1 /*******************************************************************************
\r
2 * Copyright (c) 2007, 2010 Association for Decentralized Information Management
\r
3 * in Industry THTH ry.
\r
4 * All rights reserved. This program and the accompanying materials
\r
5 * are made available under the terms of the Eclipse Public License v1.0
\r
6 * which accompanies this distribution, and is available at
\r
7 * http://www.eclipse.org/legal/epl-v10.html
\r
10 * VTT Technical Research Centre of Finland - initial API and implementation
\r
11 *******************************************************************************/
\r
12 package fi.vtt.simantics.procore.internal;
\r
13 //package fi.vtt.simantics.procore.internal2;
\r
15 //import java.util.ArrayList;
\r
16 //import java.util.Collection;
\r
17 //import java.util.HashMap;
\r
18 //import java.util.HashSet;
\r
19 //import java.util.Map;
\r
21 //import org.simantics.db.ReadGraph;
\r
22 //import org.simantics.db.Resource;
\r
23 //import org.simantics.db.WriteGraph;
\r
24 //import org.simantics.db.common.queries.QueryProvider;
\r
25 //import org.simantics.db.queries.QuerySupport;
\r
26 //import org.simantics.utils.datastructures.Pair;
\r
28 //import fi.vtt.simantics.procore.internal2.ClusteringInformation.ReclusterIterator;
\r
30 //class ClusteringAlgorithmImpl implements ClusteringAlgorithm {
\r
32 // HashMap<Integer, NewCluster> assignment;
\r
34 // class NewCluster {
\r
39 // ArrayList<Integer> ids = new ArrayList<Integer>();
\r
40 // public NewCluster(int existing, int root) {
\r
41 // this.existing = existing;
\r
42 // this.root = root;
\r
44 // public void grow(Integer id) {
\r
46 // assignment.put(id, this);
\r
49 // public void merge(NewCluster other) {
\r
50 // assert(other != this);
\r
51 // for(int i : other.ids) grow(i);
\r
54 // public int size() {
\r
59 // class CoverageNode {
\r
62 // public int lastCoverage = 1;
\r
63 // public int coverage = 1;
\r
65 // public CoverageNode(int id) {
\r
75 // HashSet<Integer> properties;
\r
76 // HashSet<Integer> depends;
\r
77 // HashSet<Integer> unknown;
\r
79 // class Statement {
\r
80 // final public int subject;
\r
81 // final public int predicate;
\r
82 // final public int object;
\r
83 // public Statement(int s, int p, int o) {
\r
84 // //System.out.println("new Statement(" + s + "," + p + "," + o + ")");
\r
91 // private void computeCoverages(HashMap<Integer, CoverageNode> newNodes, QuerySupport core, ArrayList<Statement> statements) {
\r
93 // for(int i=0;i<5;i++) {
\r
94 // for(CoverageNode n : newNodes.values()) {
\r
95 // n.lastCoverage = n.coverage;
\r
98 // for(Statement s : statements) {
\r
99 // if(depends.contains(s.predicate)) {
\r
100 // CoverageNode sn = newNodes.get(s.subject);
\r
101 // CoverageNode on = newNodes.get(s.object);
\r
102 // if(sn != null && on != null) {
\r
103 // sn.coverage += on.lastCoverage;
\r
111 // private final int MAX_COVERAGE = 5000;
\r
112 // private final int MAX_CLUSTER = 5000;
\r
113 // private final int MAX_CLUSTER_2 = 15000;
\r
115 // private void clusterNode(CoverageNode node, HashMap<Integer, CoverageNode> newNodes, QuerySupport core, HashMap<Integer, Collection<Integer>> deps, HashSet<CoverageNode> visited, NewCluster topCluster, NewCluster currentCluster, ArrayList<NewCluster> clusters) {
\r
117 // assert(node != null);
\r
119 // if(visited.contains(node)) return;
\r
120 // visited.add(node);
\r
122 // if(node.coverage > MAX_COVERAGE) {
\r
123 // topCluster.grow(node.seed);
\r
125 // currentCluster.grow(node.seed);
\r
128 // Collection<Integer> dc = deps.get(node.seed);
\r
129 // if(dc == null) return;
\r
131 // for(Integer i : dc) {
\r
133 // CoverageNode sn = newNodes.get(i);
\r
134 // if(sn != null) {
\r
136 // //System.out.println("traverse: " + node.coverage + " " + sn.coverage + " " + currentCluster.size());
\r
138 // if(node.coverage > MAX_COVERAGE && sn.coverage < MAX_COVERAGE) {
\r
140 // if(currentCluster.size() > MAX_CLUSTER) {
\r
141 //// System.out.println("new cluster: " + node.coverage + " " + sn.coverage + " " + currentCluster.size());
\r
142 // currentCluster = new NewCluster(0, node.seed);
\r
143 // clusters.add(currentCluster);
\r
145 //// System.out.println("continue cluster: " + node.coverage + " " + sn.coverage + " " + currentCluster.size());
\r
150 // clusterNode(sn, newNodes, core, deps, visited, topCluster, currentCluster, clusters);
\r
158 // private void combineExistingSiblings(ArrayList<NewCluster> clusters, ClusteringSupport support) {
\r
160 // HashMap<Integer, ArrayList<NewCluster>> siblings = new HashMap<Integer, ArrayList<NewCluster>>();
\r
161 // for(NewCluster cluster : clusters) {
\r
163 // if(cluster.size() < MAX_CLUSTER && cluster.existing > 0) {
\r
165 // ArrayList<NewCluster> list = siblings.get(cluster.existing);
\r
166 // if(list == null) {
\r
167 // list = new ArrayList<NewCluster>();
\r
168 // siblings.put(cluster.existing, list);
\r
170 // list.add(cluster);
\r
176 // for(ArrayList<NewCluster> list : siblings.values()) {
\r
178 // if(list.size() < 2) continue;
\r
180 //// System.out.println("Processing shared root with " + list.size() + " new clusters.");
\r
182 // NewCluster current = null;
\r
184 // for(NewCluster cluster : list) {
\r
186 // if(current == null) {
\r
187 // current = cluster;
\r
189 // //System.out.println("Merging to sibling cluster " + current.size + " <-> " + cluster.size);
\r
190 // current.merge(cluster);
\r
193 // if(current.size > MAX_CLUSTER) {
\r
194 // current.id = support.newClusterId();
\r
205 // private void combineRootSiblings(ArrayList<NewCluster> clusters, ClusteringSupport support) {
\r
207 // HashMap<Integer, ArrayList<NewCluster>> siblings = new HashMap<Integer, ArrayList<NewCluster>>();
\r
208 // for(NewCluster cluster : clusters) {
\r
210 // if(cluster.size() < MAX_CLUSTER) {
\r
212 // ArrayList<NewCluster> list = siblings.get(cluster.root);
\r
213 // if(list == null) {
\r
214 // list = new ArrayList<NewCluster>();
\r
215 // siblings.put(cluster.root, list);
\r
217 // list.add(cluster);
\r
223 // for(ArrayList<NewCluster> list : siblings.values()) {
\r
225 // if(list.size() < 2) continue;
\r
227 //// System.out.println("Processing shared root with " + list.size() + " new clusters.");
\r
229 // NewCluster current = null;
\r
231 // for(NewCluster cluster : list) {
\r
233 // if(current == null) {
\r
234 // current = cluster;
\r
236 //// System.out.println("Merging to sibling cluster " + current.size + " <-> " + cluster.size);
\r
237 // current.merge(cluster);
\r
240 // if(current.size > MAX_CLUSTER) {
\r
241 // current.id = support.newClusterId();
\r
251 // private void cluster(HashMap<Integer, Integer> roots, HashMap<Integer, CoverageNode> newNodes, ClusteringSupport support, QuerySupport core, ReadGraph graph, HashMap<Integer, Collection<Integer>> deps) {
\r
253 // ArrayList<NewCluster> clusters = new ArrayList<NewCluster>();
\r
254 // HashSet<CoverageNode> visited = new HashSet<CoverageNode>();
\r
255 // for(Map.Entry<Integer, Integer> e : roots.entrySet()) {
\r
256 // NewCluster topCluster = new NewCluster(e.getValue(), e.getKey());
\r
257 // NewCluster currentCluster = new NewCluster(e.getValue(), e.getKey());
\r
258 // clusterNode(newNodes.get(e.getKey()), newNodes, core, deps, visited, topCluster, currentCluster, clusters);
\r
259 // if(topCluster.size > 0) clusters.add(topCluster);
\r
260 // if(currentCluster.size > 0) clusters.add(currentCluster);
\r
263 //// System.out.println("Initial clustering produced " + clusters.size() + " clusters.");
\r
265 // combineRootSiblings(clusters, support);
\r
266 // combineExistingSiblings(clusters, support);
\r
268 // for(NewCluster cluster : clusters) {
\r
270 // if(cluster.size() > 0 && cluster.size() < MAX_CLUSTER) {
\r
272 // if(!newNodes.containsKey(cluster.existing)) {
\r
274 // Collection<Resource> siblings2 = graph.getObjects(core.getResource(cluster.root), graph.getBuiltins().DependsOn);
\r
276 // for(Resource sibling : siblings2) {
\r
278 // if(newNodes.get(core.getId(sibling)) == null) {
\r
280 // long existing = support.getCluster(sibling);
\r
281 // long existingSize = support.getClusterSizeCache(existing);
\r
282 // if(existingSize < MAX_CLUSTER_2) {
\r
283 // cluster.id = existing;
\r
284 // System.out.println("Merging to existing cluster " + existing + " with size " + existingSize);
\r
286 // System.out.println(" -sibling too large (" + existingSize + ")");
\r
297 // if(cluster.size > 0 && cluster.id == 0) {
\r
298 // cluster.id = support.newClusterId();
\r
303 //// System.out.println("Clustering report:");
\r
306 // int totalClusters = 0;
\r
307 // for(NewCluster c : clusters) {
\r
308 // if(c.size() > 0) {
\r
309 //// System.out.println("-" + c.size() + " elements - id = " + c.id);
\r
310 // total += c.size();
\r
311 // totalClusters++;
\r
315 // //System.out.println("Total of " + total + " elements in " + totalClusters + " clusters.");
\r
320 // public void recluster(ClusteringInformation info, ClusteringSupport clusteringSupport, ReadGraph graph, QuerySupport querySupport, QueryProvider queryProvider) {
\r
322 //// Collection<Integer> resources = new ArrayList<Integer>();
\r
323 //// ReclusterIterator it = info.getReclusterIterator();
\r
324 //// if(it == null) return;
\r
326 //// while(it.hasNext()) {
\r
328 //// resources.add(it.getReclusterResourceId());
\r
331 //// ArrayList<Statement> statements = new ArrayList<Statement>();
\r
332 //// AddedStatmentsIterator it2 = info.getAddedStatmentsIterator();
\r
333 //// while(it2.hasNext()) {
\r
334 //// it2.advance();
\r
335 //// statements.add(new Statement(it2.getAddedSubjectId(), it2.getAddedPredicateId(), it2.getAddedObjectId()));
\r
338 //// //System.out.println("Clustering " + resources.size() + " new resources, " + statements.size() + " new statements.");
\r
342 //// HashMap<Integer, CoverageNode> newNodes = new HashMap<Integer, CoverageNode>();
\r
344 //// instanceOf = querySupport.getId(graph.getBuiltins().InstanceOf);
\r
345 //// consistsOf = querySupport.getId(graph.getBuiltins().ConsistsOf);
\r
346 //// dependsOn = querySupport.getId(graph.getBuiltins().DependsOn);
\r
348 //// assignment = new HashMap<Integer, NewCluster>();
\r
349 //// properties = new HashSet<Integer>();
\r
350 //// depends = new HashSet<Integer>();
\r
351 //// unknown = new HashSet<Integer>();
\r
353 //// depends.add(consistsOf);
\r
355 //// for(Integer r : resources) {
\r
356 //// newNodes.put(r, new CoverageNode(r));
\r
359 //// for(Statement s : statements) {
\r
361 //// if(unknown.contains(s.predicate)) continue;
\r
362 //// if(depends.contains(s.predicate)) continue;
\r
363 //// if(properties.contains(s.predicate)) continue;
\r
364 //// if(s.predicate == instanceOf) continue;
\r
365 //// if(s.predicate == consistsOf) continue;
\r
367 //// if(graph.isSubrelationOf(querySupport.getResource(s.predicate), graph.getBuiltins().HasProperty)) {
\r
368 //// properties.add(s.predicate);
\r
369 //// } else if(graph.isSubrelationOf(querySupport.getResource(s.predicate), graph.getBuiltins().DependsOn)) {
\r
370 //// depends.add(s.predicate);
\r
372 //// unknown.add(s.predicate);
\r
377 //// depends.addAll(properties);
\r
379 //// HashSet<Integer> roots = new HashSet<Integer>();
\r
380 //// for(Integer r : resources) roots.add(r);
\r
382 //// HashMap<Integer, Collection<Integer>> deps = new HashMap<Integer, Collection<Integer>>();
\r
384 //// for(Statement s : statements) {
\r
386 //// if(depends.contains(s.predicate)) {
\r
387 //// if(newNodes.containsKey(s.subject)) roots.remove(s.object);
\r
388 //// Collection<Integer> coll = deps.get(s.subject);
\r
389 //// if(coll == null) {
\r
390 //// coll = new ArrayList<Integer>();
\r
391 //// deps.put(s.subject, coll);
\r
393 //// coll.add(s.object);
\r
398 ////// System.out.println("" + roots.size() + " roots.");
\r
400 //// for(Statement s : statements) {
\r
402 //// if(roots.contains(s.object) && s.predicate == instanceOf && newNodes.containsKey(s.subject)) {
\r
403 //// roots.remove(s.object);
\r
404 //// Collection<Integer> coll = deps.get(s.subject);
\r
405 //// if(coll == null) {
\r
406 //// deps.put(s.subject, new SingletonCollection<Integer>(s.object));
\r
408 //// coll.add(s.object);
\r
414 //// System.out.println("" + roots.size() + " roots.");
\r
416 //// HashMap<Integer,Integer> roots2 = new HashMap<Integer,Integer>();
\r
417 //// for(Statement s : statements) {
\r
418 //// if(depends.contains(s.predicate)) {
\r
419 //// if(roots.contains(s.object)) {
\r
420 //// roots2.put(s.object, s.subject);
\r
425 //// for(Integer i : roots) {
\r
426 //// System.out.println("root");
\r
427 //// for(StatementImpl2 s2 : statements) {
\r
428 //// int sub = core.getId(s2.getSubject());
\r
429 //// if(sub == i) {
\r
430 //// System.out.println("-" + g.adapt(s2.getPredicate(), g.getBuiltins().HasStringRepresentation));
\r
435 //// System.out.println("" + roots.size() + " roots after parent search.");
\r
437 //// System.out.println("-found " + properties.size() + " property relations");
\r
438 //// for(Integer i : properties) {
\r
439 //// System.out.println("-" + graph.adapt(querySupport.getResource(i), graph.getBuiltins().HasStringRepresentation));
\r
441 //// System.out.println("-found " + depends.size() + " depends on relations");
\r
442 //// for(Integer i : depends) {
\r
443 //// System.out.println("-" + graph.adapt(querySupport.getResource(i), graph.getBuiltins().HasStringRepresentation));
\r
445 //// System.out.println("-found " + unknown.size() + " other relations");
\r
446 //// for(Integer i : unknown) {
\r
447 //// System.out.println("-" + graph.adapt(querySupport.getResource(i), graph.getBuiltins().HasStringRepresentation));
\r
450 //// computeCoverages(newNodes, querySupport, statements);
\r
451 //// cluster(roots2, newNodes, clusteringSupport, querySupport, graph, deps);
\r
453 //// System.out.println("finished clustering");
\r
455 //// long cid = clusteringSupport.newClusterId();
\r
457 // long defaultCluster = 0;
\r
458 // ReclusterIterator it3 = info.getReclusterIterator();
\r
459 // while(it3.hasNext()) {
\r
460 // int id = it3.getReclusterResourceId();
\r
462 // Long cluster = assignment2.get(id);
\r
463 // if(cluster == null) {
\r
464 // if(defaultCluster == 0) defaultCluster = clusteringSupport.newClusterId();
\r
465 // cluster = defaultCluster;
\r
468 // it3.setReclusterResourceCluster(cluster);
\r
469 //// it3.setReclusterResourceCluster(cid);
\r
472 //// if(newContexts.contains(id)) {
\r
473 //// it3.setReclusterResourceCluster(clusteringSupport.newClusterId());
\r
474 //// it3.advance();
\r
476 //// NewCluster t = assignment.get(id);
\r
477 //// it3.setReclusterResourceCluster(t.id);
\r
478 //// it3.advance();
\r
483 //// } catch(Throwable t) {
\r
485 //// t.printStackTrace();
\r
491 // private HashMap<Resource, Pair<Resource, Integer>> contextCache = new HashMap<Resource, Pair<Resource, Integer>>();
\r
493 // HashSet<Integer> newContexts;
\r
495 // HashMap<Integer, Long> assignment2;
\r
498 // public void createContexts(HashMap<Resource, Resource> newResources, WriteGraph g, QuerySupport querySupport, ClusteringSupport clusteringSupport) {
\r
500 // newContexts = new HashSet<Integer>();
\r
501 // assignment2 = new HashMap<Integer, Long>();
\r
503 // HashMap<Resource, Collection<Resource>> contexts = new HashMap<Resource, Collection<Resource>>();
\r
504 // for(Map.Entry<Resource, Resource> entry : newResources.entrySet()) {
\r
506 // assert(entry.getKey() != null);
\r
507 // assert(entry.getValue() != null);
\r
509 // Collection<Resource> coll = contexts.get(entry.getValue());
\r
510 // if(coll == null) {
\r
511 // coll = new ArrayList<Resource>();
\r
512 // contexts.put(entry.getValue(), coll);
\r
514 // coll.add(entry.getKey());
\r
518 //// long newClusterId = clusteringSupport.newClusterId();
\r
520 // for(Map.Entry<Resource, Collection<Resource>> entry : contexts.entrySet()) {
\r
522 // Resource context = g.getBuiltins().RootLibrary;
\r
523 // Resource type = entry.getKey();
\r
525 // Resource typeContext = null;
\r
526 // long fill = 10000;
\r
528 // long assignedClusterId = 0;
\r
530 //// Collection<Resource> ctxs = g.getObjects(context, type);
\r
532 // Pair<Resource, Integer> contextPair = contextCache.get(type);
\r
533 // if(contextPair != null) {
\r
535 // typeContext = contextPair.first;
\r
536 // fill = contextPair.second;
\r
537 // assignedClusterId = clusteringSupport.getCluster(typeContext);
\r
541 //// System.out.println("No existing context found in " + context.getResourceId() + " for type " + type.getResourceId());
\r
545 //// for(Resource ctx : ctxs) {
\r
546 //// long clusterId = clusteringSupport.getCluster(ctx);
\r
547 //// long size = clusteringSupport.getClusterSizeCache(clusterId);
\r
548 //// if(size < 500000) {
\r
549 //// typeContext = ctx;
\r
550 //// fill = (long)(10000.0 * ((double)size / 500000.0));
\r
551 //// System.out.println("Append to existing context " + clusteringSupport.getCluster(ctx) + "(res=" + typeContext.getResourceId() + ") with size " + clusteringSupport.getClusterSize(clusteringSupport.getCluster(ctx)));
\r
552 //// assignedClusterId = clusterId;
\r
555 //// System.out.println("Context cluster size was " + clusteringSupport.getClusterSize(clusteringSupport.getCluster(ctx)));
\r
559 //// if(ctxs.size() == 0) System.out.println("No contexts found in " + context.getResourceId() + " for type " + type.getResourceId());
\r
561 // for(Resource newResource : entry.getValue()) {
\r
563 // if(fill >= 10000) {
\r
564 // typeContext = g.newResource();
\r
565 // g.addStatement(context, type, type, typeContext);
\r
566 // g.addStatement(typeContext, g.getBuiltins().Inherits, g.getBuiltins().SupertypeOf, type);
\r
567 // newContexts.add(querySupport.getId(typeContext));
\r
569 // assignedClusterId = clusteringSupport.newClusterId();
\r
570 // assignment2.put(querySupport.getId(typeContext), assignedClusterId);
\r
573 // assignment2.put(querySupport.getId(newResource), assignedClusterId);
\r
575 // g.addStatement(typeContext, g.getBuiltins().HasInstance, g.getBuiltins().InstanceOf, newResource);
\r
580 // contextCache.put(type, new Pair<Resource, Integer>(typeContext, (int)fill));
\r