diff --git a/.travis.yml b/.travis.yml index 6c59b7de..dc6cfb2c 100644 --- a/.travis.yml +++ b/.travis.yml @@ -1,7 +1,7 @@ language: python python: - - "3.6" # oldest rdflib supported by libgit2 - - "3.7" # debian buster (stable) as of 2019-12 + #- "3.6" # oldest rdflib supported by libgit2 + #- "3.7" # debian buster (stable) as of 2019-12 - "3.8-dev" # 3.8 development branch - "nightly" @@ -45,6 +45,7 @@ script: - coverage run -a --source=quit tests/test_helpers.py - coverage run -a --source=quit tests/test_namespace.py - coverage run -a --source=quit tests/test_provenance.py + - coverage run -a --source=quit tests/merges/test_merge_methods.py before_deploy: - mkdir dist diff --git a/quit/core.py b/quit/core.py index 39cbf1b3..1d4ddef7 100644 --- a/quit/core.py +++ b/quit/core.py @@ -9,7 +9,10 @@ from pygit2 import GIT_MERGE_ANALYSIS_NORMAL from pygit2 import GIT_SORT_REVERSE, GIT_RESET_HARD, GIT_STATUS_CURRENT +import rdflib from rdflib import Graph, ConjunctiveGraph, BNode, Literal, URIRef +import rdflib.plugins.parsers.ntriples as ntriples + import re from quit.conf import Feature, QuitGraphConfiguration @@ -189,7 +192,12 @@ def instance(self, reference, force=False): for blob in self.getFilesForCommit(commit): try: (name, oid) = blob - (f, context) = self.getFileReferenceAndContext(blob, commit) + result = self.getFileReferenceAndContext(blob, commit) + try: + (f, context, nameMap) = result + except ValueError: + print(result) + internal_identifier = context.identifier + '-' + str(oid) if force or not self.config.hasFeature(Feature.Persistence): @@ -330,13 +338,15 @@ def changeset(self, commit): blob = (entity.name, entity.oid) try: - f, context = self.getFileReferenceAndContext(blob, commit) + f, context, nameMap = self.getFileReferenceAndContext(blob, commit) except KeyError: graph = Graph(identifier=graphUri) - graph.parse(data=entity.content, format='nt') + parserGraph = ntriples.W3CNTriplesParser(ntriples.NTGraphSink(graph)) + source = rdflib.parser.create_input_source(data=entity.content) + parserGraph.parse(source.getCharacterStream()) self._blobs.set( - blob, (FileReference(entity.name, entity.content), graph) + blob, (FileReference(entity.name, entity.content), graph, {}) ) private_uri = QUIT["graph-{}".format(entity.oid)] @@ -413,17 +423,74 @@ def getFileReferenceAndContext(self, blob, commit): content = commit.node(path=name).content graphUri = self._graphconfigs.get(commit.id).getgraphuriforfile(name) graph = Graph(identifier=URIRef(graphUri)) - graph.parse(data=content, format='nt') - quitWorkingData = (FileReference(name, content), graph) + parserGraph = ntriples.W3CNTriplesParser(ntriples.NTGraphSink(graph)) + source = rdflib.parser.create_input_source(data=content) + parserGraph.parse(source.getCharacterStream()) + nameMap = {v: k for k, v in parserGraph._bnode_ids.items()} + quitWorkingData = (FileReference(name, content), graph, nameMap) self._blobs.set(blob, quitWorkingData) return quitWorkingData return self._blobs.get(blob) + def _replaceLabledBlankNodes(self, parsedQuery, parent_commit_ref): + """Replaces blanknodes in parsedQuery with Blanknodes that have the same label in the graph.nt + E.g. We have a Graph with the content: '_:a _:b' + A BNode('a') found in parsedQuery would be replaced by the blanknode _:a found in the graph.nt. + That way, updates can pass Blanknodes as instances and do not have to work on string representations. + """ + def replaceBlankNode(parsedQuery, nameMap): + nameMap = {v: k for k, v in nameMap.items()} + for update in parsedQuery: + for graphURI in update['quads']: + new_triples = [] + for triple in update['quads'][graphURI]: + new_triple_subj = None + new_triple_obj = None + if isinstance(triple[0], rdflib.BNode): + bNode_key = triple[0].n3() + bNode_key = bNode_key[2:] + if bNode_key in nameMap: + new_triple_subj = nameMap[bNode_key] + else: + new_triple_subj = triple[0] + nameMap[bNode_key] = triple[0] + else: + new_triple_subj = triple[0] + if isinstance(triple[2], rdflib.BNode): + bNode_key = triple[2].n3() + bNode_key = bNode_key[2:] + if bNode_key in nameMap: + new_triple_obj = nameMap[bNode_key] + else: + new_triple_obj = triple[2] + nameMap[bNode_key] = triple[2] + else: + new_triple_obj = triple[2] + new_triples.append((new_triple_subj, triple[1], new_triple_obj)) + update['quads'][graphURI] = new_triples + + if parent_commit_ref == None: + return {} + parent_commit = self.repository.revision(parent_commit_ref) + blobs = self.getFilesForCommit(parent_commit) + for blob in blobs: + (name, oid) = blob + if(name == "graph.nt"): + file_reference, context, nameMap = self.getFileReferenceAndContext( + blob, parent_commit) + replaceBlankNode(parsedQuery, nameMap) + return nameMap + return {} + def applyQueryOnCommit(self, parsedQuery, parent_commit_ref, target_ref, query=None, default_graph=[], named_graph=[]): """Apply an update query on the graph and the git repository.""" graph, commitid = self.instance(parent_commit_ref) + triples = {(x.n3(), y.n3(), z.n3()) for x, y, z in graph.store} + nameMap = self._replaceLabledBlankNodes(parsedQuery, parent_commit_ref) resultingChanges, exception = graph.update(parsedQuery) + self._replaceExplicitNamedBlankNodesInChanges(resultingChanges, nameMap) + triples = {(x.n3(), y.n3(), z.n3()) for x, y, z in graph.store} if exception: # TODO need to revert or invalidate the graph at this point. pass @@ -432,6 +499,7 @@ def applyQueryOnCommit(self, parsedQuery, parent_commit_ref, target_ref, query=N named_graph=named_graph) if exception: raise exception + triples = {(x.n3(), y.n3(), z.n3()) for x, y, z in graph.store} return oid def commit(self, graph, delta, message, parent_commit_ref, target_ref, query=None, @@ -494,7 +562,7 @@ def commit(self, graph, delta, message, parent_commit_ref, target_ref, query=Non # Update Cache and add new contexts to store blob = fileReference.path, index.stash[fileReference.path][0] - self._blobs.set(blob, (fileReference, graph.store.get_context(identifier))) + self._blobs.set(blob, (fileReference, graph.store.get_context(identifier), {})) blobs_new.add(blob) if graphconfig.mode == 'configuration': index.add('config.ttl', new_config.graphconf.serialize(format='turtle').decode()) @@ -541,12 +609,40 @@ def _build_message(self, message, query, result, default_graph, named_graph, **k out.append('{}: "{}"'.format(k, v.replace('"', "\\\""))) return "\n".join(out) + def _replaceExplicitNamedBlankNodesInChanges(self, changes, nameMap): + """Any changes applied to the update query by _replaceLabledBlankNodes have to be reverted for git deltas. + Otherwise the serialization results in Blanknodes being represented as random hashes instead of their original labels. + """ + def lookUpBNode(bNode, nameMap): + if(bNode in nameMap): + return rdflib.BNode(nameMap[bNode]) + return bNode + + def replaceBNodesByName(triple, nameMap): + new_subject = triple[0] + new_object = triple[2] + if(isinstance(new_subject, BNode)): + new_subject = lookUpBNode(new_subject, nameMap) + if(isinstance(new_object, BNode)): + new_object = lookUpBNode(new_object, nameMap) + return (new_subject, triple[1], new_object) + + if len(nameMap) == 0: + return + for change in changes: + for context in change['delta']: + for payload in change['delta'][context]: + if(isinstance(payload[1], list)): + for i in range(0, len(payload[1])): + payload[1][i] = replaceBNodesByName(payload[1][i], nameMap) + def _applyKnownGraphs(self, delta, blobs, parent_commit, index): blobs_new = set() for blob in blobs: (fileName, oid) = blob try: - file_reference, context = self.getFileReferenceAndContext(blob, parent_commit) + file_reference, context, nameMap = self.getFileReferenceAndContext( + blob, parent_commit) for entry in delta: changeset = entry['delta'].get(context.identifier, None) @@ -558,7 +654,7 @@ def _applyKnownGraphs(self, delta, blobs, parent_commit, index): self._blobs.remove(blob) blob = fileName, index.stash[file_reference.path][0] - self._blobs.set(blob, (file_reference, context)) + self._blobs.set(blob, (file_reference, context, nameMap)) blobs_new.add(blob) except KeyError: pass @@ -580,7 +676,7 @@ def _applyUnknownGraphs(self, delta, known_blobs): n = [ int(m.group(1)) for b in known_blobs for m in [reg.search(b)] if m ] + [0] - fileName = '{}_{}.nt'.format(iri_to_name(identifier), max(n)+1) + fileName = '{}_{}.nt'.format(iri_to_name(identifier), max(n) + 1) new_contexts[identifier] = FileReference(fileName, '') diff --git a/quit/graphs.py b/quit/graphs.py index f74c83ae..558fca9f 100644 --- a/quit/graphs.py +++ b/quit/graphs.py @@ -4,6 +4,7 @@ from rdflib import Graph, ConjunctiveGraph, URIRef from rdflib.graph import ModificationException from rdflib.graph import Path +from atomicgraphs.comp_graph import ComparableGraph class RewriteGraph(Graph): @@ -117,6 +118,19 @@ def __repr__(self): len((c for c in self.graphs() if c not in self.store.contexts())) ) + #def update(self, update_object): + # comp_graphA = ComparableGraph(self.store) + # comp_graphB = ComparableGraph(self.store) + # answer = comp_graphB.update(update_object) + # diff_tupel = comp_graphA.diff(comp_graphB) + # for removeGraph in diff_tupel[0]: + # for triple in removeGraph: + # self.remove(triple) + # for additionalGraph in diff_tupel[1]: + # for triple in additionalGraph: + # self.add(additionalGraph) + # return answer + def _graph(self, c): if c is None: return None diff --git a/quit/merge.py b/quit/merge.py index b76973b6..b92b0185 100644 --- a/quit/merge.py +++ b/quit/merge.py @@ -1,9 +1,11 @@ -import os import pygit2 import rdflib +from atomicgraphs import comp_graph import logging from quit.exceptions import QuitMergeConflict, QuitBlobMergeConflict -from rdflib.plugins.serializers.nt import _nt_row as _nt +from rdflib.plugins.serializers.nt import _quoteLiteral as _qLiteral +import rdflib.plugins.parsers as parsers +import rdflib.plugins.parsers.ntriples as ntriples logger = logging.getLogger('quit.merge') @@ -80,11 +82,12 @@ def merge_quit_commits(self, target, branch, favour): mergedTreeBuilder = self._repository.TreeBuilder(targetCommit.tree) logger.debug(diff) - + print("Diff: {}".format(diff)) logger.debug(diff.stats) logger.debug("Diff has following patches") conflicts = {} for p in diff: + print("Patch: {}".format(p)) logger.debug("A Patch") logger.debug(p) logger.debug(p.line_stats) @@ -159,128 +162,275 @@ def _merge_graph_blobs(self, graphAOid, graphBOid, graphBaseOid, favour): return self._merge_context_graph_blobs(graphAOid, graphBOid, graphBaseOid) def _merge_threeway_graph_blobs(self, graphAOid, graphBOid, graphBaseOid): - if str(graphAOid) == pygit2.GIT_OID_HEX_ZERO: - a = set() - else: + aGraph = comp_graph.ComparableGraph() + parserGraphA = ntriples.W3CNTriplesParser(ntriples.NTGraphSink(aGraph)) + if not str(graphAOid) == pygit2.GIT_OID_HEX_ZERO: graphAblob = self._repository[graphAOid].data - a = set(graphAblob.decode("utf-8").strip().split("\n")) + source = rdflib.parser.create_input_source(data=graphAblob.decode("utf-8")) + parserGraphA.parse(source.getCharacterStream()) - if str(graphBOid) == pygit2.GIT_OID_HEX_ZERO: - b = set() - else: + bGraph = comp_graph.ComparableGraph() + parserGraphB = ntriples.W3CNTriplesParser(ntriples.NTGraphSink(bGraph)) + if not str(graphBOid) == pygit2.GIT_OID_HEX_ZERO: graphBblob = self._repository[graphBOid].data - b = set(graphBblob.decode("utf-8").strip().split("\n")) + source = rdflib.parser.create_input_source(data=graphBblob.decode("utf-8")) + parserGraphB.parse(source.getCharacterStream()) + nameNodeBaseMap = None if graphBaseOid is not None: graphBaseblob = self._repository[graphBaseOid].data - base = set(graphBaseblob.decode("utf-8").strip().split("\n")) - addA = a - base - addB = b - base - intersect = a.intersection(b) - merged = sorted(intersect.union(addA).union(addB)) + compGraphBase = comp_graph.ComparableGraph() + parserGraphBase = ntriples.W3CNTriplesParser(ntriples.NTGraphSink(compGraphBase)) + source = rdflib.parser.create_input_source(data=graphBaseblob.decode("utf-8")) + parserGraphBase.parse(source.getCharacterStream()) + nameNodeBaseMap = parserGraphBase._bnode_ids + diffA = aGraph.diff(compGraphBase) + diffB = bGraph.diff(compGraphBase) + + diffANewTriples = self._accumulate_triples(diffA[1]) + diffBNewTriples = self._accumulate_triples(diffB[1]) + diffARemovedTriples = self._accumulate_triples(diffA[0]) + diffBRemovedTriples = self._accumulate_triples(diffB[0]) + baseTriples = self._get_triples(compGraphBase) + merged = ((baseTriples - diffARemovedTriples - diffBRemovedTriples) | + diffANewTriples | diffBNewTriples) else: - merged = a.union(b) - print("\n".join(merged)) - + diff = aGraph.diff(bGraph) + merged = self._get_triples(aGraph) + merged = merged.union(self._accumulate_triples(diff[0])) + + colourMap = {**(compGraphBase.getBNodeColourMap()), + **(bGraph.getBNodeColourMap()), + **(aGraph.getBNodeColourMap())} + colourToNameMap = self._create_colour_to_name_map(colourMap, parserGraphA._bnode_ids, + parserGraphB._bnode_ids, nameNodeBaseMap) + merged = self._serialize_triple_sets(merged, colourMap, colourToNameMap) blob = self._repository.create_blob(("\n".join(merged) + "\n").encode("utf-8")) + return blob - def _merge_context_graph_blobs(self, graphAOid, graphBOid, graphBaseOid): - if str(graphAOid) == pygit2.GIT_OID_HEX_ZERO: - a = set() + def _accumulate_triples(self, setOfGraphs): + result = set() + for aGraph in setOfGraphs: + result = result.union(self._get_triples(aGraph)) + return result + + def _get_triples(self, graph): + return set(graph.triples((None, None, None))) + + def _serialize_triple_sets(self, tripleSet, colourMap, colourToNameMap): + result = set() + for triple in tripleSet: + result.add("{} {} {} .".format(self._serialize_bNode(triple[0], + colourMap, colourToNameMap), + triple[1].n3(), + self._serialize_bNode(triple[2], + colourMap, + colourToNameMap))) + return sorted(result) + + def _serialize_bNode(self, node, colourMap, colourToNameMap): + if(isinstance(node, rdflib.BNode)): + try: + return colourToNameMap[colourMap[node]] + except KeyError: + return node.n3() else: + return node.n3() + + def _create_colour_to_name_map(self, nodeColourMap, nameNodeMapA, + nameNodeMapB, nameNodeMapC=None): + colourToNameMap = {} + for bNodeName in nameNodeMapA: + colourKey = nodeColourMap[nameNodeMapA[bNodeName]] + if colourKey not in colourToNameMap or bNodeName < colourToNameMap[colourKey]: + colourToNameMap[colourKey] = "_:{}".format(bNodeName) + + for bNodeName in nameNodeMapB: + bNode = nameNodeMapB[bNodeName] + colourKey = nodeColourMap[bNode] + # check if the first two loops already took the label + unusedCheck = bNodeName not in nameNodeMapA + if colourKey not in colourToNameMap: + if unusedCheck: + colourToNameMap[colourKey] = "_:{}".format(bNodeName) + else: + colourToNameMap[colourKey] = bNode.n3() + if bNodeName < colourToNameMap[colourKey] and unusedCheck: + colourToNameMap[colourKey] = "_:{}".format(bNodeName) + + if nameNodeMapC is not None: + for bNodeName in nameNodeMapB: + bNode = nameNodeMapB[bNodeName] + colourKey = nodeColourMap[bNode] + # check if the first two loops already took the label + unusedCheck = bNodeName not in nameNodeMapA and bNodeName not in nameNodeMapB + if colourKey not in colourToNameMap: + if unusedCheck: + colourToNameMap[colourKey] = "_:{}".format(bNodeName) + else: + colourToNameMap[colourKey] = bNode.n3() + if bNodeName < colourToNameMap[colourKey] and unusedCheck: + colourToNameMap[colourKey] = "_:{}".format(bNodeName) + + return colourToNameMap + + def _merge_context_graph_blobs(self, graphAOid, graphBOid, graphBaseOid): + graphA = comp_graph.ComparableGraph() + parserGraphA = ntriples.W3CNTriplesParser(ntriples.NTGraphSink(graphA)) + if not str(graphAOid) == pygit2.GIT_OID_HEX_ZERO: graphAblob = self._repository[graphAOid].data - a = set(graphAblob.decode("utf-8").split("\n")) + source = rdflib.parser.create_input_source(data=graphAblob.decode("utf-8")) + parserGraphA.parse(source.getCharacterStream()) - if str(graphBOid) == pygit2.GIT_OID_HEX_ZERO: - b = set() - else: + graphB = comp_graph.ComparableGraph() + parserGraphB = ntriples.W3CNTriplesParser(ntriples.NTGraphSink(graphB)) + if not str(graphBOid) == pygit2.GIT_OID_HEX_ZERO: graphBblob = self._repository[graphBOid].data - b = set(graphBblob.decode("utf-8").split("\n")) + source = rdflib.parser.create_input_source(data=graphBblob.decode("utf-8")) + parserGraphB.parse(source.getCharacterStream()) + nameNodeBaseMap = None if graphBaseOid is not None: graphBaseblob = self._repository[graphBaseOid].data - base = set(graphBaseblob.decode("utf-8").split("\n")) + graphBase = comp_graph.ComparableGraph() + parserGraphBase = ntriples.W3CNTriplesParser(ntriples.NTGraphSink(graphBase)) + source = rdflib.parser.create_input_source(data=graphBaseblob.decode("utf-8")) + parserGraphBase.parse(source.getCharacterStream()) + nameNodeBaseMap = parserGraphBase._bnode_ids else: - base = set() - - logger.debug("base") - logger.debug(base) - logger.debug("a") - logger.debug(a) - logger.debug("b") - logger.debug(b) - - addA = a - base - delA = base - a - addB = b - base - delB = base - b - - ok, conflicts = self._merge_context_conflict_detection(addA - addB, delA - delB, - addB - addA, delB - delA) - - logger.debug("intersect and ok, then merged") - logger.debug(a.intersection(b)) - logger.debug(ok) - merged = sorted(a.intersection(b).union(ok)) - logger.debug(merged) - print(merged) + graphBase = comp_graph.ComparableGraph() + + diffA = graphA.diff(graphBase) + diffB = graphB.diff(graphBase) + + colourMap = {**(graphBase.getBNodeColourMap()), + **(graphB.getBNodeColourMap()), + **(graphA.getBNodeColourMap())} + colourToNameMap = self._create_colour_to_name_map(colourMap, parserGraphA._bnode_ids, + parserGraphB._bnode_ids, nameNodeBaseMap) + + # those operations are not ready since they actually need to be done by their colour + diffANewTriples = self._accumulate_triples(diffA[1]) # C+c + diffANewTriples = self._colour_triple_sets(diffANewTriples, colourMap) + diffBNewTriples = self._accumulate_triples(diffB[1]) # C+b + diffBNewTriples = self._colour_triple_sets(diffBNewTriples, colourMap) + diffARemovedTriples = self._accumulate_triples(diffA[0]) # C-c + diffARemovedTriples = self._colour_triple_sets(diffARemovedTriples, colourMap) + diffBRemovedTriples = self._accumulate_triples(diffB[0]) # C-b + diffBRemovedTriples = self._colour_triple_sets(diffBRemovedTriples, colourMap) + baseTriples = self._get_triples(graphBase) + baseTriples = self._colour_triple_sets(baseTriples, colourMap) + ok, conflicts = self._merge_context_conflict_detection(diffANewTriples, diffARemovedTriples, + diffBNewTriples, diffBRemovedTriples, + colourToNameMap) + + merged = baseTriples - diffARemovedTriples - \ + diffBRemovedTriples | (diffANewTriples & diffBNewTriples) # P(G') ^ P(G'') + merged = self._convert_colour_to_name_triple_rows(merged, colourToNameMap) + merged = merged.union(ok) if conflicts is not None: - print("raised") - raise QuitBlobMergeConflict('Conflicts, ahhhhh!!', merged, conflicts) + raise QuitBlobMergeConflict("Conflicts, ahhhh", merged, conflicts) blob = self._repository.create_blob("\n".join(merged).encode("utf-8")) return blob - def _merge_context_conflict_detection(self, addA, delA, addB, delB): + def _merge_context_conflict_detection(self, addA, delA, addB, delB, colNameMap): - def conflictSet(graph, conflictingNodes): + def conflictSet(tripleSet, conflictingNodes, colNameMap): ok = set() conflicts = set() - for triple in graph.triples((None, None, None)): - if triple[0] in conflictingNodes or triple[2] in conflictingNodes: - conflicts.add(_nt(triple).rstrip()) + for triple in tripleSet: + conflicted = triple[0] in conflictingNodes or triple[2] in conflictingNodes + if isinstance(triple[0], bytes): + subject = colNameMap[triple[0]] + else: + subject = triple[0].n3() + + if isinstance(triple[2], bytes): + object = colNameMap[triple[2]] + elif isinstance(triple[2], rdflib.Literal): + object = _qLiteral(triple[2]) + else: + object = triple[2].n3() + + cTriple = ("%s %s %s .\n" % (subject, triple[1].n3(), object)).rstrip() + if conflicted: + conflicts.add(cTriple) else: - ok.add(_nt(triple).rstrip()) + ok.add(cTriple) return ok, conflicts - graphAddA = rdflib.ConjunctiveGraph() - graphAddA.parse(data="\n".join(addA), format="nt") - graphAddB = rdflib.ConjunctiveGraph() - graphAddB.parse(data="\n".join(addB), format="nt") - graphDelA = rdflib.ConjunctiveGraph() - graphDelA.parse(data="\n".join(delA), format="nt") - graphDelB = rdflib.ConjunctiveGraph() - graphDelB.parse(data="\n".join(delB), format="nt") - - conflictingNodes = (graphAddA + graphDelA).all_nodes().intersection( - (graphAddB + graphDelB).all_nodes()) - print(conflictingNodes) + def collectNodes(tripleSet): + nodes = set() + for triple in tripleSet: + nodes.add(triple[0]) + nodes.add(triple[2]) + return nodes + + addANoB = addA - addB # C+c\b + addANoBNodes = collectNodes(addANoB) + addBNoA = addB - addA # C+b\c + addBNoANodes = collectNodes(addBNoA) + delANoB = delA - delB # C-c\b + delANoBNodes = collectNodes(delANoB) + delBNoA = delB - delA # C-b\c + delBNoANodes = collectNodes(delBNoA) + + conflictingNodes = (addANoBNodes | delANoBNodes).intersection(addBNoANodes | delBNoANodes) logger.debug(conflictingNodes) conflicts = {} ok = set() - for key, graph in [("addA", graphAddA), ("delA", graphDelA), - ("addB", graphAddB), ("delB", graphDelB)]: - newOK, conflict = conflictSet(graph, conflictingNodes) + for key, graph in [("addA", addANoB), ("delA", delANoB), + ("addB", addBNoA), ("delB", delBNoA)]: + newOK, conflict = conflictSet(graph, conflictingNodes, colNameMap) if len(conflict) > 0: conflicts[key] = "\n".join(sorted(conflict)) if key.startswith("add"): ok.update(newOK) - print("list done") - if conflicts: nodes = [] for node in conflictingNodes: logger.debug(node.n3()) - nodes.append(node.n3()) + if isinstance(node, bytes): + nodes.append(colNameMap[node]) + else: + nodes.append(node.n3()) conflicts["nodes"] = nodes - print(conflicts) - - print("OK") - print(ok) return sorted(ok), conflicts or None + + def _colour_triple_sets(self, tripleSet, colourMap): + result = set() + for triple in tripleSet: + subject = triple[0] + object = triple[2] + if isinstance(triple[0], rdflib.BNode) or isinstance(triple[0], rdflib.term.BNode): + subject = colourMap[triple[0]] + if isinstance(triple[2], rdflib.BNode) or isinstance(triple[2], rdflib.term.BNode): + object = colourMap[triple[2]] + result.add((subject, triple[1], object)) + return result + + def _convert_colour_to_name_triple_rows(self, tripleSet, colNameMap): + result = set() + for triple in tripleSet: + if isinstance(triple[0], bytes): + subject = colNameMap[triple[0]] + else: + subject = triple[0].n3() + + if isinstance(triple[2], bytes): + object = colNameMap[triple[2]] + elif isinstance(triple[2], rdflib.Literal): + object = _qLiteral(triple[2]) + else: + object = triple[2].n3() + + cTriple = ("%s %s %s .\n" % (subject, triple[1].n3(), object)).rstrip() + result.add(cTriple) + return result diff --git a/requirements.txt b/requirements.txt index b3cc6055..186e5ecd 100644 --- a/requirements.txt +++ b/requirements.txt @@ -4,6 +4,7 @@ Flask-Cors pygit2>=1.1.0 sortedcontainers uritools +git+https://github.com/Simaris/Atomic-Graph@master git+https://github.com/RDFLib/rdflib-jsonld@master uwsgi diff --git a/setup.cfg b/setup.cfg new file mode 100644 index 00000000..ccfa933e --- /dev/null +++ b/setup.cfg @@ -0,0 +1,15 @@ +[pycodestyle] +format = pylint +exclude = tests/*,quit/tools/* +count = False +ignore = E402 +max-line-length = 100 + +[pylava] +format = pylint +skip = tests/*,quit/tools/* +linters = pep8 +ignore = E402 + +[pylava:pep8] +max_line_length = 100 diff --git a/tests/merges/FirstTest/base.nt b/tests/merges/FirstTest/base.nt new file mode 100644 index 00000000..e8432369 --- /dev/null +++ b/tests/merges/FirstTest/base.nt @@ -0,0 +1 @@ + . diff --git a/tests/merges/FirstTest/branch.nt b/tests/merges/FirstTest/branch.nt new file mode 100644 index 00000000..a0306776 --- /dev/null +++ b/tests/merges/FirstTest/branch.nt @@ -0,0 +1,2 @@ + . + . diff --git a/tests/merges/FirstTest/result.nt b/tests/merges/FirstTest/result.nt new file mode 100644 index 00000000..599e8dc9 --- /dev/null +++ b/tests/merges/FirstTest/result.nt @@ -0,0 +1,3 @@ + . + . + . diff --git a/tests/merges/FirstTest/target.nt b/tests/merges/FirstTest/target.nt new file mode 100644 index 00000000..f57201c4 --- /dev/null +++ b/tests/merges/FirstTest/target.nt @@ -0,0 +1,2 @@ + . + . diff --git a/tests/merges/TestA/a_graphs b/tests/merges/TestA/a_graphs new file mode 100644 index 00000000..db476bc0 --- /dev/null +++ b/tests/merges/TestA/a_graphs @@ -0,0 +1,7 @@ +_:a _:b . +_:b _:c . +_:c _:a . +--- +_:a _:b . +_:b _:c . +_:c _:a . diff --git a/tests/merges/TestA/base.nt b/tests/merges/TestA/base.nt new file mode 100644 index 00000000..e69de29b diff --git a/tests/merges/TestA/branch.nt b/tests/merges/TestA/branch.nt new file mode 100644 index 00000000..7d5e7083 --- /dev/null +++ b/tests/merges/TestA/branch.nt @@ -0,0 +1,3 @@ +_:a _:b . +_:b _:c . +_:c _:a . diff --git a/tests/merges/TestA/target.nt b/tests/merges/TestA/target.nt new file mode 100644 index 00000000..1ffaa2ce --- /dev/null +++ b/tests/merges/TestA/target.nt @@ -0,0 +1,3 @@ +_:a _:b . +_:b _:c . +_:c _:a . diff --git a/tests/merges/TestABCD/a_graphs b/tests/merges/TestABCD/a_graphs new file mode 100644 index 00000000..8a541d78 --- /dev/null +++ b/tests/merges/TestABCD/a_graphs @@ -0,0 +1,41 @@ +_:a _:b . +_:b _:c . +_:c _:a . + +_:a _:b . +_:b _:c . +_:c _:d . +_:d _:a . + +_:a _:b . +_:b _:c . +_:c _:d . +_:d _:a . + +_:a _:b . +_:b _:c . +_:c _:d . +_:d _:a . +--- +_:a _:b . +_:b _:c . +_:c _:a . + +_:a _:b . +_:b _:c . +_:c _:a . + +_:a _:b . +_:b _:c . +_:c _:d . +_:d _:a . +_:a _:c . + +_:a _:b . +_:b _:d . +_:d _:e . +_:e _:b . +_:b _:c . +_:c _:d . +_:d _:a . +_:a _:e . diff --git a/tests/merges/TestABCD/base.nt b/tests/merges/TestABCD/base.nt new file mode 100644 index 00000000..e69de29b diff --git a/tests/merges/TestABCD/branch.nt b/tests/merges/TestABCD/branch.nt new file mode 100644 index 00000000..f50b3a6a --- /dev/null +++ b/tests/merges/TestABCD/branch.nt @@ -0,0 +1,18 @@ +_:a _:b . +_:b _:c . +_:c _:a . + +_:a _:b . +_:b _:c . +_:c _:d . +_:d _:a . + +_:a _:b . +_:b _:c . +_:c _:d . +_:d _:a . + +_:a _:b . +_:b _:c . +_:c _:d . +_:d _:a . diff --git a/tests/merges/TestABCD/target.nt b/tests/merges/TestABCD/target.nt new file mode 100644 index 00000000..debf19c9 --- /dev/null +++ b/tests/merges/TestABCD/target.nt @@ -0,0 +1,22 @@ +_:a _:b . +_:b _:c . +_:c _:a . + +_:a _:b . +_:b _:c . +_:c _:a . + +_:a _:b . +_:b _:c . +_:c _:d . +_:d _:a . +_:a _:c . + +_:a _:b . +_:b _:d . +_:d _:e . +_:e _:b . +_:b _:c . +_:c _:d . +_:d _:a . +_:a _:e . diff --git a/tests/merges/TestB/a_graphs b/tests/merges/TestB/a_graphs new file mode 100644 index 00000000..62549275 --- /dev/null +++ b/tests/merges/TestB/a_graphs @@ -0,0 +1,14 @@ +_:a _:b . +_:b _:c . +_:c _:d . +_:d _:a . +_:a _:c . +--- +_:a _:b . +_:b _:d . +_:d _:e . +_:e _:b . +_:b _:c . +_:c _:d . +_:d _:a . +_:a _:e . diff --git a/tests/merges/TestB/base.nt b/tests/merges/TestB/base.nt new file mode 100644 index 00000000..e69de29b diff --git a/tests/merges/TestB/branch.nt b/tests/merges/TestB/branch.nt new file mode 100644 index 00000000..d09627a3 --- /dev/null +++ b/tests/merges/TestB/branch.nt @@ -0,0 +1,5 @@ +_:a _:b . +_:b _:c . +_:c _:d . +_:d _:a . +_:a _:c . diff --git a/tests/merges/TestB/debug.png b/tests/merges/TestB/debug.png new file mode 100644 index 00000000..ebe1c3e5 Binary files /dev/null and b/tests/merges/TestB/debug.png differ diff --git a/tests/merges/TestB/debugResult b/tests/merges/TestB/debugResult new file mode 100644 index 00000000..f58e6233 --- /dev/null +++ b/tests/merges/TestB/debugResult @@ -0,0 +1,8 @@ +_:a _:b . +_:b _:c . +_:d _:a . +_:b _:d . +_:a _:c . +_:c _:d . +_:d _:e . +_:e _:a . \ No newline at end of file diff --git a/tests/merges/TestB/debugTarget.png b/tests/merges/TestB/debugTarget.png new file mode 100644 index 00000000..ca9ce15e Binary files /dev/null and b/tests/merges/TestB/debugTarget.png differ diff --git a/tests/merges/TestB/target.nt b/tests/merges/TestB/target.nt new file mode 100644 index 00000000..1f65ee8d --- /dev/null +++ b/tests/merges/TestB/target.nt @@ -0,0 +1,8 @@ +_:a _:b . +_:b _:d . +_:d _:e . +_:e _:b . +_:b _:c . +_:c _:d . +_:d _:a . +_:a _:e . diff --git a/tests/merges/TestC/a_graphs b/tests/merges/TestC/a_graphs new file mode 100644 index 00000000..38bfc429 --- /dev/null +++ b/tests/merges/TestC/a_graphs @@ -0,0 +1 @@ +_:a _:a . diff --git a/tests/merges/TestC/base.nt b/tests/merges/TestC/base.nt new file mode 100644 index 00000000..e69de29b diff --git a/tests/merges/TestC/branch.nt b/tests/merges/TestC/branch.nt new file mode 100644 index 00000000..9484383b --- /dev/null +++ b/tests/merges/TestC/branch.nt @@ -0,0 +1,3 @@ +_:a _:b . +_:b _:c . +_:c _:a . diff --git a/tests/merges/TestC/debugResult b/tests/merges/TestC/debugResult new file mode 100644 index 00000000..91c3241e --- /dev/null +++ b/tests/merges/TestC/debugResult @@ -0,0 +1,3 @@ +_:a _:b . +_:b _:c . +_:c _:a . \ No newline at end of file diff --git a/tests/merges/TestC/target.nt b/tests/merges/TestC/target.nt new file mode 100644 index 00000000..8d3badc3 --- /dev/null +++ b/tests/merges/TestC/target.nt @@ -0,0 +1,4 @@ +_:a _:b . +_:b _:c . +_:c _:d . +_:d _:a . diff --git a/tests/merges/TestD/a_graphs b/tests/merges/TestD/a_graphs new file mode 100644 index 00000000..d38ca3b5 --- /dev/null +++ b/tests/merges/TestD/a_graphs @@ -0,0 +1,4 @@ +_:a _:b . +_:b _:a . +--- +_:a _:a . diff --git a/tests/merges/TestD/base.nt b/tests/merges/TestD/base.nt new file mode 100644 index 00000000..e69de29b diff --git a/tests/merges/TestD/branch.nt b/tests/merges/TestD/branch.nt new file mode 100644 index 00000000..c0c4898d --- /dev/null +++ b/tests/merges/TestD/branch.nt @@ -0,0 +1,4 @@ +_:a _:b . +_:b _:c . +_:c _:d . +_:d _:a . diff --git a/tests/merges/TestD/debugResult b/tests/merges/TestD/debugResult new file mode 100644 index 00000000..bde5fdf3 --- /dev/null +++ b/tests/merges/TestD/debugResult @@ -0,0 +1 @@ +_:a _:a . \ No newline at end of file diff --git a/tests/merges/TestD/target.nt b/tests/merges/TestD/target.nt new file mode 100644 index 00000000..6ca2abf5 --- /dev/null +++ b/tests/merges/TestD/target.nt @@ -0,0 +1,4 @@ +_:a _:b . +_:b _:c . +_:c _:d . +_:d _:a . diff --git a/tests/merges/TestHouseMerge/a_graphs b/tests/merges/TestHouseMerge/a_graphs new file mode 100644 index 00000000..21c46b5e --- /dev/null +++ b/tests/merges/TestHouseMerge/a_graphs @@ -0,0 +1,41 @@ + _:a . +_:a _:b . +_:b _:d . +_:d _:e . +_:e _:b . +_:b _:c . +_:c _:d . +_:d _:a . +_:a _:e . +_:a . + _:x . +_:x _:y . +_:y _:l . +_:l _:m . +_:m _:y . +_:y _:z . +_:z _:l . +_:l _:x . +_:x _:m . +_:m . +--- + _:a . +_:a _:b . +_:b _:d . +_:d _:e . +_:e _:b . +_:b _:c . +_:c _:d . +_:d _:a . +_:a _:e . +_:a . + _:x . +_:x _:y . +_:y _:l . +_:l _:m . +_:m _:y . +_:y _:z . +_:z _:l . +_:l _:x . +_:x _:m . +_:m . diff --git a/tests/merges/TestHouseMerge/base.nt b/tests/merges/TestHouseMerge/base.nt new file mode 100644 index 00000000..e69de29b diff --git a/tests/merges/TestHouseMerge/branch.nt b/tests/merges/TestHouseMerge/branch.nt new file mode 100644 index 00000000..6da929b2 --- /dev/null +++ b/tests/merges/TestHouseMerge/branch.nt @@ -0,0 +1,20 @@ + _:a . +_:a _:b . +_:b _:d . +_:d _:e . +_:e _:b . +_:b _:c . +_:c _:d . +_:d _:a . +_:a _:e . +_:a . + _:x . +_:x _:y . +_:y _:l . +_:l _:m . +_:m _:y . +_:y _:z . +_:z _:l . +_:l _:x . +_:x _:m . +_:m . diff --git a/tests/merges/TestHouseMerge/target.nt b/tests/merges/TestHouseMerge/target.nt new file mode 100644 index 00000000..5ac5605e --- /dev/null +++ b/tests/merges/TestHouseMerge/target.nt @@ -0,0 +1,20 @@ + _:a . +_:a _:b . +_:b _:d . +_:d _:e . +_:e _:b . +_:b _:c . +_:c _:d . +_:d _:a . +_:a _:e . +_:a . + _:x . +_:x _:y . +_:y _:l . +_:l _:m . +_:m _:y . +_:y _:z . +_:z _:l . +_:l _:x . +_:x _:m . +_:m . diff --git a/tests/merges/context.py b/tests/merges/context.py new file mode 100644 index 00000000..a96fe3c1 --- /dev/null +++ b/tests/merges/context.py @@ -0,0 +1,8 @@ +import os +import sys + +sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), '../..'))) + +import quit + +sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), '..'))) diff --git a/tests/merges/test_merge_methods.py b/tests/merges/test_merge_methods.py new file mode 100644 index 00000000..531126d3 --- /dev/null +++ b/tests/merges/test_merge_methods.py @@ -0,0 +1,103 @@ +from context import quit + +import os +from os import listdir +from os.path import isfile, isdir, join +import quit.application as quitApp +from quit.web.app import create_app +import unittest +import pygit2 +from helpers import TemporaryRepositoryFactory +import rdflib +from atomicgraphs.atomic_graph import AtomicGraphFactory as aGraphFactory + + +class GraphMergeTests(unittest.TestCase): + """Test if two graphs on differen branches are correctly merged.""" + + def setUp(self): + return + + def tearDown(self): + return + + def testThreeWayMerge(self): + """Test merging two commits. Method: Three-Way""" + testPath = os.path.dirname(os.path.abspath(__file__)) + for d in listdir(testPath): + if d[0:4] == "Test" and isdir(join(testPath, d)): + self._merge_test(join(testPath, d), "three-way") + + def testContextMerge(self): + """Test merging two commits. Method: Context""" + testPath = os.path.dirname(os.path.abspath(__file__)) + exceptions = ["TestHouseMerge"] # TestHouse actually raises a merge conflict exception + for d in listdir(testPath): + if d[0:4] == "Test" and isdir(join(testPath, d)) and d not in exceptions: + self._merge_test(join(testPath, d), "context") + + def _merge_test(self, dirPath, method): + # Prepate a git Repository + file = open(join(dirPath, "base.nt"), "r") + content = file.read() + file.close() + with TemporaryRepositoryFactory().withGraph("http://example.org/", content) as repo: + # Start Quit + args = quitApp.getDefaults() + args['targetdir'] = repo.workdir + app = create_app(args).test_client() + + app.post("/branch", data={"oldbranch": "master", "newbranch": "componentA"}) + app.post("/branch", data={"oldbranch": "master", "newbranch": "componentB"}) + + self.expand_branch(repo, "componentA", join(dirPath, "branch.nt")) + self.expand_branch(repo, "componentB", join(dirPath, "target.nt")) + + app = create_app(args).test_client() + app.post("/merge", data={"target": "componentB", "branch": "componentA", + "method": method}) + + reference = repo.lookup_reference('refs/heads/%s' % "componentB") + branchOid = reference.resolve().target + branchCommit = repo.get(branchOid) + if isfile(join(dirPath, "a_graphs")): + file = open(join(dirPath, "a_graphs"), "r") + aControllGraphContents = file.read().split("---") + file.close() + resultContent = branchCommit.tree["graph.nt"].data.decode("utf-8") + resultGraph = rdflib.Graph().parse(data=resultContent, format="nt") + aResultGraphs = set(iter(aGraphFactory(resultGraph))) + for aControllGraphContent in aControllGraphContents: + graph = rdflib.Graph().parse(data=aControllGraphContent, format="nt") + for aGraph in aGraphFactory(graph): + message = "Merge test {}:\n Graph {} is not in the set: {}" + resultSetString = {a.__hash__() for a in aResultGraphs} + message = message.format(dirPath, aGraph.__hash__(), resultSetString) + self.assertTrue(aGraph in aResultGraphs, message) + aResultGraphs.remove(aGraph) + message = "Merge test {}:\n Not all graphs were defined in a_graphs: {}" + message = message.format(dirPath, aResultGraphs) + self.assertEqual(0, len(aResultGraphs), message) + else: + file = open(join(dirPath, "result.nt"), "r") + self.assertEqual(branchCommit.tree["graph.nt"].data.decode("utf-8"), file.read()) + file.close() + + def expand_branch(self, repo, branch, graphFile): + reference = repo.lookup_reference('refs/heads/%s' % branch) + branchOid = reference.resolve().target + branchCommit = repo.get(branchOid) + treeBuilder = repo.TreeBuilder(branchCommit.tree) + file = open(graphFile, "r") + treeBuilder.insert("graph.nt", repo.create_blob(file.read().encode()), 33188) + file.close() + treeOID = treeBuilder.write() + author = pygit2.Signature("test", "test@example.org") + newCommitOid = repo.create_commit("refs/heads/%s" % branch, author, author, + "this is a test", treeOID, [branchOid]) + repo.state_cleanup() + return newCommitOid + + +if __name__ == '__main__': + unittest.main() diff --git a/tests/test_app.py b/tests/test_app.py index 6b88c23d..6a56953d 100644 --- a/tests/test_app.py +++ b/tests/test_app.py @@ -3841,6 +3841,39 @@ def testDeleteWithWhitespaceFile(self): with open(path.join(repo.workdir, 'graph.nt'), 'r') as f: self.assertEqual('\n', f.read()) + def testUpdateWithBlankNode(self): + # Prepate a git Repository + graphContent = """ . + _:a _:c . + _:c _:d . + """ + with TemporaryRepositoryFactory().withGraph("http://example.org/", graphContent) as repo: + + # Start Quit + args = quitApp.getDefaults() + args['targetdir'] = repo.workdir + app = create_app(args).test_client() + + with open(path.join(repo.workdir, 'graph.nt'), 'r') as f: + self.assertEqual(graphContent, f.read()) + + # execute Update query + update = 'INSERT DATA { GRAPH { _:c _:e .}}' + result = app.post('/sparql', + content_type="application/sparql-update", + data=update) + targetContent = """ + . +_:a _:c . +_:c _:d . +_:c _:e . +""" + + reference = repo.lookup_reference('refs/heads/%s' % "master") + branchOid = reference.resolve().target + branchCommit = repo.get(branchOid) + self.assertEqual(targetContent, branchCommit.tree["graph.nt"].data.decode("utf-8")) + if __name__ == '__main__': unittest.main()