private ReadGraph g;
private Layer0 b;
+ private boolean mapLiterals = true;
+
public GraphComparator(Resource r1, Resource r2) {
this.r1 = r1;
this.r2 = r2;
this.comparator = comparator;
}
- ArrayList<Statement> ss1 = new ArrayList<Statement>();
- ArrayList<Statement> ss2 = new ArrayList<Statement>();
-
public Comparator<Resource> getResourceComparator() {
return rcomp;
this.maxIter = maxIter;
}
- public void clear() {
+ public boolean isMapLiterals() {
+ return mapLiterals;
+ }
+
+ public void setMapLiterals(boolean mapLiterals) {
+ this.mapLiterals = mapLiterals;
+ }
+
+ private void clear() {
changes1.clear();
changes1Set.clear();
changes2.clear();
processedResources.clear();
}
+ public void dispose() {
+ changes1 = null;
+ changes1Set = null;
+ changes2 = null;
+ changes2Set = null;
+ comparableResources = null;
+ comparableStatements = null;
+ comparator = null;
+ modifications = null;
+ modificationsSet = null;
+ processedResources = null;
+ nonMatchedLeft = null;
+ nonMatchedRight = null;
+ nonTested = null;
+ nonTraversed = null;
+ strong = null;
+ tested = null;
+ traversed = null;
+ }
+
public void test(ReadGraph g) throws DatabaseException {
test(g,null);
}
private boolean compareIter(ReadGraph graph, Stack<Resource> objectsLeft, Stack<Resource> objectsRight, Set<Statement> unreliableLeft, Set<Statement> unreliableRight) throws DatabaseException {
// process compares objects that are identified and searches for more resources to process.
iter = 0;
- boolean c = process(objectsLeft, objectsRight, unreliableLeft, unreliableRight);
+ boolean c = process(objectsLeft, objectsRight, unreliableLeft, unreliableRight,false);
if (objectsLeft.isEmpty()) {
// process unreliable handles cases where unidentified statements subject and object have been identified
c |= processUnreliable(unreliableLeft, unreliableRight);
// process unreliable handles cases where unidentified resources have path of length one to identified resource
if (!c) {
- c |= processUnreliable(unreliableLeft, unreliableRight,objectsLeft,objectsRight);
+ c |= processUnreliable(unreliableLeft, unreliableRight,objectsLeft,objectsRight,false);
+ if (!c && objectsLeft.isEmpty() && unreliableLeft.size() > 0 && unreliableRight.size() > 0) {
+ c |= processUnreliable(unreliableLeft, unreliableRight,objectsLeft,objectsRight,true);
+ }
+ if (!c && objectsLeft.isEmpty() && unreliableLeft.size() > 0 && unreliableRight.size() > 0) {
+ c |= processUnreliable2(unreliableLeft, unreliableRight,objectsLeft,objectsRight,false);
+ }
if (!c && objectsLeft.isEmpty() && unreliableLeft.size() > 0 && unreliableRight.size() > 0) {
- c |= processUnreliable2(unreliableLeft, unreliableRight,objectsLeft,objectsRight);
+ c |= processUnreliable2(unreliableLeft, unreliableRight,objectsLeft,objectsRight,true);
}
if (!c && objectsLeft.isEmpty() && unreliableLeft.size() > 0 && unreliableRight.size() > 0) {
// comparison is ending, but we have still unprocessed unidentified resources left.
}
}
}
+ if (!c)
+ process(objectsLeft, objectsRight, unreliableLeft, unreliableRight,true);
return c;
}
return "Graph compare " + (cr + ch) + " / " + (cr+ch+(Math.max(objectsLeft.size(), objectsRight.size())+Math.max(unreliableLeft.size(), unreliableRight.size())));
}
- private boolean process(Stack<Resource> objectsLeft, Stack<Resource> objectsRight, Set<Statement> unreliableLeft, Set<Statement> unreliableRight) throws DatabaseException {
+ private boolean process(Stack<Resource> objectsLeft, Stack<Resource> objectsRight, Set<Statement> unreliableLeft, Set<Statement> unreliableRight, boolean useBest) throws DatabaseException {
List<Statement> ss1 = new ArrayList<Statement>();
List<Statement> ss2 = new ArrayList<Statement>();
boolean didSomething = false;
ss1 = filter(Collections.singletonList(b.HasProperty), ss1);
ss2 = filter(Collections.singletonList(b.HasProperty), ss2);
- compareStatements(ss1, ss2, null, null,null,null);
+ compareStatements(ss1, ss2, null, null,null,null, useBest);
ss1.clear();
ss2.clear();
}
ss2 = filterAsserted(r2, ss2);
ss1 = filterNonTraversed(ss1);
ss2 = filterNonTraversed(ss2);
- compareStatements(ss1, ss2, objectsLeft, objectsRight,unreliableLeft,unreliableRight);
+ compareStatements(ss1, ss2, objectsLeft, objectsRight,unreliableLeft,unreliableRight, useBest);
ss1.clear();
ss2.clear();
return didSomething;
}
- private boolean processUnreliable(Set<Statement> unreliableLeft, Set<Statement> unreliableRight, Stack<Resource> objectsLeft, Stack<Resource> objectsRight) throws DatabaseException {
+ private boolean processUnreliable(Set<Statement> unreliableLeft, Set<Statement> unreliableRight, Stack<Resource> objectsLeft, Stack<Resource> objectsRight, boolean useBest) throws DatabaseException {
MapList<Resource,Statement> subjectLeft = new MapList<Resource, Statement>();
MapList<Resource,Statement> subjectRight = new MapList<Resource, Statement>();
MapList<Resource,Statement> objectLeft = new MapList<Resource, Statement>();
}
}
- compareStatements(ss1, ss2, objectsLeft, objectsRight,unreliableLeft,unreliableRight);
-
for (Resource similarOl : comparableOLeft.getKeys()) {
List<Statement> similarLeft = comparableOLeft.getValues(similarOl);
if (similarLeft.size() == left.size()) {
}
- private boolean processUnreliable2(Set<Statement> unreliableLeft, Set<Statement> unreliableRight, Stack<Resource> objectsLeft, Stack<Resource> objectsRight) throws DatabaseException {
+ private boolean processUnreliable2(Set<Statement> unreliableLeft, Set<Statement> unreliableRight, Stack<Resource> objectsLeft, Stack<Resource> objectsRight, boolean useBest) throws DatabaseException {
MapList<Resource,Statement> subjectLeft = new MapList<Resource, Statement>();
MapList<Resource,Statement> subjectRight = new MapList<Resource, Statement>();
MapList<Resource,Statement> objectLeft = new MapList<Resource, Statement>();
subjectRight.add(s.getSubject(),s);
objectRight.add(s.getObject(),s);
}
+ Set<Pair<Resource, Resource>> processed = new HashSet<Pair<Resource,Resource>>();
for (Resource ol : objectLeft.getKeys()) {
if (maxIter > 0 && iter > maxIter)
break;
}
if (sLeft.size() == 1 && sRight.size() == 1) {
- List<Statement> ss1 = new ArrayList<Statement>(subjectLeft.getValues(sLeft.iterator().next()));
- List<Statement> ss2 = new ArrayList<Statement>(subjectRight.getValues(sRight.iterator().next()));
+ Resource sl = sLeft.iterator().next();
+ Resource sr = sRight.iterator().next();
+ Pair<Resource, Resource> p = new Pair<Resource, Resource>(sl, sr);
+ if (processed.contains(p))
+ continue;
+ processed.add(p);
+ List<Statement> ss1 = new ArrayList<Statement>(subjectLeft.getValuesSnapshot(sl));
+ List<Statement> ss2 = new ArrayList<Statement>(subjectRight.getValuesSnapshot(sr));
+ for (int i = ss1.size() -1; i >= 0; i--) {
+ if (!unreliableLeft.contains(ss1.get(i)))
+ ss1.remove(i);
+ }
+ for (int i = ss2.size() -1; i >= 0; i--) {
+ if (!unreliableRight.contains(ss2.get(i)))
+ ss2.remove(i);
+ }
- int count = comparableStatements.size();
- compareStatements(ss1, ss2, objectsLeft, objectsRight,unreliableLeft,unreliableRight);
- if (comparableStatements.size() > count) {
+ int ccount = comparableStatements.size();
+ int lcount = changes1.size();
+ int rcount = changes2.size();
+
+ compareStatements(ss1, ss2, objectsLeft, objectsRight,unreliableLeft,unreliableRight, useBest);
+ if (comparableStatements.size() > ccount) {
didSomething = true;
for (Entry<Statement, Statement> entry : comparableStatements.getEntries()) {
unreliableLeft.remove(entry.getKey());
iter++;
}
}
+ if (changes1.size() > lcount) {
+ didSomething = true;
+ for (Statement stm : changes1) {
+ unreliableLeft.remove(stm);
+ iter++;
+ }
+ }
+ if (changes2.size() > rcount) {
+ didSomething = true;
+ for (Statement stm : changes2) {
+ unreliableRight.remove(stm);
+ iter++;
+ }
+ }
}
}
return didSomething;
}
if (matchingPaths.size() > 0) {
if (matchingPaths.size() == 1) {
- didSomething = true;
Resource or = matchingPaths.keySet().iterator().next();
-
- objectsLeft.add(ol);
- objectsRight.add(or);
- addComparable(ol, or);
- Collection<Statement> statementsLeft = objectLeft.getValues(ol);
- Collection<Statement> statementsRight = objectRight.getValues(or);
- unreliableLeft.removeAll(statementsLeft);
- unreliableRight.removeAll(statementsRight);
BijectionMap<Path,Path> map = getMatchingPaths(pathsLeft, matchingPaths.get(or));
+ BijectionMap<Resource,Resource> toMap = new BijectionMap<Resource, Resource>();
+ toMap.map(ol, or);
+ boolean pass = true;
+ // Path matching may create conflicting results, prevent selecting them.
for (Path left : map.getLeftSet()) {
Path right = map.getRight(left);
for (int i = 0; i < left.getLength(); i++) {
- addComparable(left.getStatements().get(i),right.getStatements().get(i));
- iter++;
+ Resource l = left.getStatements().get(i).getObject();
+ Resource r = right.getStatements().get(i).getObject();
+ if (comparableResources.contains(l, r))
+ continue;
+ if (comparableResources.containsLeft(l)) {
+ pass = false;
+ break;
+ }
+ if (comparableResources.containsRight(r)) {
+ pass = false;
+ break;
+ }
+ if (toMap.contains(l,r))
+ continue;
+ if (toMap.containsLeft(l)) {
+ pass = false;
+ break;
+ }
+ if (toMap.containsRight(r)) {
+ pass = false;
+ break;
+ }
+ toMap.map(l, r);
+
}
}
- iter++;
+ if (pass) {
+ didSomething = true;
+
+ objectsLeft.add(ol);
+ objectsRight.add(or);
+ addComparable(ol, or);
+ Collection<Statement> statementsLeft = objectLeft.getValues(ol);
+ Collection<Statement> statementsRight = objectRight.getValues(or);
+ unreliableLeft.removeAll(statementsLeft);
+ unreliableRight.removeAll(statementsRight);
+ for (Path left : map.getLeftSet()) {
+ Path right = map.getRight(left);
+ for (int i = 0; i < left.getLength(); i++) {
+ addComparable(left.getStatements().get(i),right.getStatements().get(i));
+ iter++;
+ }
+ }
+ iter++;
+ } else {
+ iter++;
+ }
}
}
}
}
}
- private void addModification(Resource left, Statement leftstm, Resource right, Statement rightstm) {
+ private void addModification(Resource left, Statement leftstm, Resource right, Statement rightstm) throws DatabaseException {
Modification mod = new Modification(left, right, leftstm, rightstm);
if (!modificationsSet.contains(mod)) {
modificationsSet.add(mod);
modifications.add(mod);
+ if (mapLiterals && leftstm != null && rightstm != null)
+ addComparable(leftstm, rightstm);
}
}
}
}
- private void compareStatements(List<Statement> ss1, List<Statement> ss2, Stack<Resource> objectsLeft, Stack<Resource> objectsRight, Collection<Statement> unreliableLeft, Collection<Statement> unreliableRight) throws DatabaseException {
+ private void compareStatements(List<Statement> ss1, List<Statement> ss2, Stack<Resource> objectsLeft, Stack<Resource> objectsRight, Collection<Statement> unreliableLeft, Collection<Statement> unreliableRight, boolean useBest) throws DatabaseException {
sortStatement(ss1, ss2);
int i1 = 0;
int same2 = sameRel(ss2, i2);
int c = rcomp.compare(ss1.get(i1).getPredicate(),ss2.get(i2).getPredicate());
if (c == 0) {
- compareStatements(ss1, i1, same1, ss2, i2, same2,objectsLeft,objectsRight,unreliableLeft,unreliableRight);
+ compareStatements(ss1, i1, same1, ss2, i2, same2,objectsLeft,objectsRight,unreliableLeft,unreliableRight, useBest);
i1+=same1;
i2+=same2;
} else if (c < 0) {
return comparator.compare(g, o1, o2);
}
- private void compareStatements(List<Statement> ss1, int off1, int len1, List<Statement> ss2, int off2, int len2, Collection<Resource> objectsLeft, Collection<Resource> objectsRight, Collection<Statement> unreliableLeft, Collection<Statement> unreliableRight) throws DatabaseException {
+ private void compareStatements(List<Statement> ss1, int off1, int len1, List<Statement> ss2, int off2, int len2, Collection<Resource> objectsLeft, Collection<Resource> objectsRight, Collection<Statement> unreliableLeft, Collection<Statement> unreliableRight, boolean useBest) throws DatabaseException {
boolean[] used1 = new boolean[len1];
for (int i = 0; i < used1.length; i++) {
used1[i] = false;
Integer[] pris = priorities.getKeys(new Integer[]{});
Arrays.sort(pris);
boolean matchFail = priorityMatching(ss1, off1, len1, ss2, off2, len2, pris, differences, priorities, used1, used2, objectsLeft, objectsRight, false);
- if (matchFail)
- matchFail = priorityMatching(ss1, off1, len1, ss2, off2, len2, pris, differences, priorities, used1, used2, objectsLeft, objectsRight, objectsLeft == null);
+ if (matchFail && objectsLeft == null)
+ matchFail = priorityMatching(ss1, off1, len1, ss2, off2, len2, pris, differences, priorities, used1, used2, objectsLeft, objectsRight,true);
+
if (unreliableLeft != null) {
if (matchFail) {
+ if (useBest)
+ bestMatching(ss1, off1, len1, ss2, off2, len2, differences, priorities, used1, used2, objectsLeft, objectsRight);
// With match failure, statement matching was aborted. We must move all unmatched statements to unreliable.
for (Integer pri : pris) {
if (pri == 0 || pri == Integer.MAX_VALUE)
}
return matchFail;
}
+
+ /**
+ * priority matching has a flaw, if lowest priority produces conflicting results, it is not able to match anything.
+ * This matching logic looks for a best match, and if it is non conflicting applies the match.
+ *
+ * TODO: now this applies all unique lowest priority matches for each object. Should this pick only the object with global lowest priority,
+ * so that new matches improve situation with conflicting (non unique) matches?
+ *
+ * @param ss1
+ * @param off1
+ * @param len1
+ * @param ss2
+ * @param off2
+ * @param len2
+ * @param differences
+ * @param priorities
+ * @param used1
+ * @param used2
+ * @param objectsLeft
+ * @param objectsRight
+ * @throws DatabaseException
+ */
+ private void bestMatching(List<Statement> ss1, int off1, int len1, List<Statement> ss2, int off2, int len2, List<List<Integer>> differences, MapList<Integer, Integer> priorities, boolean used1[],boolean used2[], Collection<Resource> objectsLeft, Collection<Resource> objectsRight) throws DatabaseException {
+ for (int i1 = 0; i1 < differences.size(); i1++) {
+ if (used1[i1])
+ continue;
+ List<Integer> diffs = differences.get(i1);
+ int min = Integer.MAX_VALUE;
+ int minIndex = -1;
+ for (int j = 0; j < diffs.size(); j++) {
+ int p = diffs.get(j);
+ if (p < 0)
+ continue;
+ if (p < min) {
+ min = p;
+ minIndex = j;
+ } else if (p == min) {
+ minIndex = -1;
+ }
+ }
+ if (minIndex >= 0 && min > 0) {
+ int i2 = minIndex;
+ if (!used2[i2]) {
+ used1[i1] = true;
+ used2[i2] = true;
+ Statement s1 = ss1.get(i1+off1);
+ Statement s2 = ss2.get(i2+off2);
+
+ if (objectsLeft != null) {
+ objectsLeft.add(s1.getObject());
+ objectsRight.add(s2.getObject());
+ }
+ addComparable(s1, s2);
+ }
+ }
+ }
+ }
/**
if (!eq) {
addModification(r1,s1,r2,s2);
}
- if (!a1 && !a2)
+ if (mapLiterals && !a1 && !a2)
addComparable(s1, s2);
} else {
// Non literal properties.