bundles/org.simantics.db.procore/src/fi/vtt/simantics/procore/internal/ClusteringAlgorithmImpl.java

   1 /*******************************************************************************
   2  * Copyright (c) 2007, 2010 Association for Decentralized Information Management
   3  * in Industry THTH ry.
   4  * All rights reserved. This program and the accompanying materials
   5  * are made available under the terms of the Eclipse Public License v1.0
   6  * which accompanies this distribution, and is available at
   7  * http://www.eclipse.org/legal/epl-v10.html
   8  *
   9  * Contributors:
  10  *     VTT Technical Research Centre of Finland - initial API and implementation
  11  *******************************************************************************/
  12 package fi.vtt.simantics.procore.internal;
  13 //package fi.vtt.simantics.procore.internal2;
  14 //
  15 //import java.util.ArrayList;
  16 //import java.util.Collection;
  17 //import java.util.HashMap;
  18 //import java.util.HashSet;
  19 //import java.util.Map;
  20 //
  21 //import org.simantics.db.ReadGraph;
  22 //import org.simantics.db.Resource;
  23 //import org.simantics.db.WriteGraph;
  24 //import org.simantics.db.common.queries.QueryProvider;
  25 //import org.simantics.db.queries.QuerySupport;
  26 //import org.simantics.utils.datastructures.Pair;
  27 //
  28 //import fi.vtt.simantics.procore.internal2.ClusteringInformation.ReclusterIterator;
  29 //
  30 //class ClusteringAlgorithmImpl implements ClusteringAlgorithm {
  31 //
  32 //    HashMap<Integer, NewCluster> assignment;
  33 //
  34 //    class NewCluster {
  35 //        int existing;
  36 //        int root;
  37 //        int size = 0;
  38 //        long id = 0;
  39 //        ArrayList<Integer> ids = new ArrayList<Integer>();
  40 //        public NewCluster(int existing, int root) {
  41 //            this.existing = existing;
  42 //            this.root = root;
  43 //        }
  44 //        public void grow(Integer id) {
  45 //            size++;
  46 //            assignment.put(id, this);
  47 //            ids.add(id);
  48 //        }
  49 //        public void merge(NewCluster other) {
  50 //            assert(other != this);
  51 //            for(int i : other.ids) grow(i);
  52 //            other.size = 0;
  53 //        }
  54 //        public int size() {
  55 //            return size;
  56 //        }
  57 //    }
  58 //
  59 //    class CoverageNode {
  60 //
  61 //        public int seed;
  62 //        public int lastCoverage = 1;
  63 //        public int coverage = 1;
  64 //
  65 //        public CoverageNode(int id) {
  66 //            this.seed = id;
  67 //        }
  68 //
  69 //    }
  70 //
  71 //    int instanceOf;
  72 //    int consistsOf;
  73 //    int dependsOn;
  74 //
  75 //    HashSet<Integer> properties;
  76 //    HashSet<Integer> depends;
  77 //    HashSet<Integer> unknown;
  78 //
  79 //    class Statement {
  80 //        final public int subject;
  81 //        final public int predicate;
  82 //        final public int object;
  83 //        public Statement(int s, int p, int o) {
  84 //            //System.out.println("new Statement(" + s + "," + p + "," + o + ")");
  85 //            subject = s;
  86 //            predicate = p;
  87 //            object = o;
  88 //        }
  89 //    };
  90 //
  91 //    private void computeCoverages(HashMap<Integer, CoverageNode> newNodes, QuerySupport core, ArrayList<Statement> statements) {
  92 //
  93 //        for(int i=0;i<5;i++) {
  94 //            for(CoverageNode n : newNodes.values()) {
  95 //                n.lastCoverage = n.coverage;
  96 //                n.coverage = 1;
  97 //            }
  98 //            for(Statement s : statements) {
  99 //                if(depends.contains(s.predicate)) {
 100 //                    CoverageNode sn = newNodes.get(s.subject);
 101 //                    CoverageNode on = newNodes.get(s.object);
 102 //                    if(sn != null && on != null) {
 103 //                        sn.coverage += on.lastCoverage;
 104 //                    }
 105 //                }
 106 //            }
 107 //        }
 108 //
 109 //    }
 110 //
 111 //    private final int MAX_COVERAGE = 5000;
 112 //    private final int MAX_CLUSTER = 5000;
 113 //    private final int MAX_CLUSTER_2 = 15000;
 114 //
 115 //    private void clusterNode(CoverageNode node, HashMap<Integer, CoverageNode> newNodes, QuerySupport core, HashMap<Integer, Collection<Integer>> deps, HashSet<CoverageNode> visited, NewCluster topCluster, NewCluster currentCluster, ArrayList<NewCluster> clusters) {
 116 //
 117 //        assert(node != null);
 118 //
 119 //        if(visited.contains(node)) return;
 120 //        visited.add(node);
 121 //
 122 //        if(node.coverage > MAX_COVERAGE) {
 123 //            topCluster.grow(node.seed);
 124 //        } else {
 125 //            currentCluster.grow(node.seed);
 126 //        }
 127 //
 128 //        Collection<Integer> dc = deps.get(node.seed);
 129 //        if(dc == null) return;
 130 //
 131 //        for(Integer i : dc) {
 132 //
 133 //            CoverageNode sn = newNodes.get(i);
 134 //            if(sn != null) {
 135 //
 136 //                //System.out.println("traverse: " + node.coverage + " " + sn.coverage + " " + currentCluster.size());
 137 //
 138 //                if(node.coverage > MAX_COVERAGE && sn.coverage < MAX_COVERAGE) {
 139 //
 140 //                    if(currentCluster.size() > MAX_CLUSTER) {
 141 ////                        System.out.println("new cluster: " + node.coverage + " " + sn.coverage + " " + currentCluster.size());
 142 //                        currentCluster = new NewCluster(0, node.seed);
 143 //                        clusters.add(currentCluster);
 144 //                    } else {
 145 ////                        System.out.println("continue cluster: " + node.coverage + " " + sn.coverage + " " + currentCluster.size());
 146 //                    }
 147 //
 148 //                }
 149 //
 150 //                clusterNode(sn, newNodes, core, deps, visited, topCluster, currentCluster, clusters);
 151 //
 152 //            }
 153 //
 154 //        }
 155 //
 156 //    }
 157 //
 158 //    private void combineExistingSiblings(ArrayList<NewCluster> clusters, ClusteringSupport support) {
 159 //
 160 //        HashMap<Integer, ArrayList<NewCluster>> siblings = new HashMap<Integer, ArrayList<NewCluster>>();
 161 //        for(NewCluster cluster : clusters) {
 162 //
 163 //            if(cluster.size() < MAX_CLUSTER && cluster.existing > 0) {
 164 //
 165 //                ArrayList<NewCluster> list = siblings.get(cluster.existing);
 166 //                if(list == null) {
 167 //                    list = new ArrayList<NewCluster>();
 168 //                    siblings.put(cluster.existing, list);
 169 //                }
 170 //                list.add(cluster);
 171 //
 172 //            }
 173 //
 174 //        }
 175 //
 176 //        for(ArrayList<NewCluster> list : siblings.values()) {
 177 //
 178 //            if(list.size() < 2) continue;
 179 //
 180 ////            System.out.println("Processing shared root with  " + list.size() + " new clusters.");
 181 //
 182 //            NewCluster current = null;
 183 //
 184 //            for(NewCluster cluster : list) {
 185 //
 186 //                if(current == null) {
 187 //                    current = cluster;
 188 //                } else {
 189 //                    //System.out.println("Merging to sibling cluster " + current.size + " <-> " + cluster.size);
 190 //                    current.merge(cluster);
 191 //                }
 192 //
 193 //                if(current.size > MAX_CLUSTER) {
 194 //                    current.id = support.newClusterId();
 195 //                    current = null;
 196 //                }
 197 //
 198 //            }
 199 //
 200 //        }
 201 //
 202 //    }
 203 //
 204 //
 205 //   private void combineRootSiblings(ArrayList<NewCluster> clusters, ClusteringSupport support) {
 206 //
 207 //       HashMap<Integer, ArrayList<NewCluster>> siblings = new HashMap<Integer, ArrayList<NewCluster>>();
 208 //       for(NewCluster cluster : clusters) {
 209 //
 210 //           if(cluster.size() < MAX_CLUSTER) {
 211 //
 212 //               ArrayList<NewCluster> list = siblings.get(cluster.root);
 213 //               if(list == null) {
 214 //                   list = new ArrayList<NewCluster>();
 215 //                   siblings.put(cluster.root, list);
 216 //               }
 217 //               list.add(cluster);
 218 //
 219 //           }
 220 //
 221 //       }
 222 //
 223 //       for(ArrayList<NewCluster> list : siblings.values()) {
 224 //
 225 //           if(list.size() < 2) continue;
 226 //
 227 ////           System.out.println("Processing shared root with  " + list.size() + " new clusters.");
 228 //
 229 //           NewCluster current = null;
 230 //
 231 //           for(NewCluster cluster : list) {
 232 //
 233 //               if(current == null) {
 234 //                   current = cluster;
 235 //               } else {
 236 ////                   System.out.println("Merging to sibling cluster " + current.size + " <-> " + cluster.size);
 237 //                   current.merge(cluster);
 238 //               }
 239 //
 240 //               if(current.size > MAX_CLUSTER) {
 241 //                   current.id = support.newClusterId();
 242 //                   current = null;
 243 //               }
 244 //
 245 //           }
 246 //
 247 //       }
 248 //
 249 //   }
 250 //
 251 //    private void cluster(HashMap<Integer, Integer> roots, HashMap<Integer, CoverageNode> newNodes, ClusteringSupport support, QuerySupport core, ReadGraph graph, HashMap<Integer, Collection<Integer>> deps) {
 252 //
 253 //        ArrayList<NewCluster> clusters = new ArrayList<NewCluster>();
 254 //        HashSet<CoverageNode> visited = new HashSet<CoverageNode>();
 255 //        for(Map.Entry<Integer, Integer> e : roots.entrySet()) {
 256 //            NewCluster topCluster = new NewCluster(e.getValue(), e.getKey());
 257 //            NewCluster currentCluster = new NewCluster(e.getValue(), e.getKey());
 258 //            clusterNode(newNodes.get(e.getKey()), newNodes, core, deps, visited, topCluster, currentCluster, clusters);
 259 //            if(topCluster.size > 0) clusters.add(topCluster);
 260 //            if(currentCluster.size > 0) clusters.add(currentCluster);
 261 //        }
 262 //
 263 ////        System.out.println("Initial clustering produced " + clusters.size() + " clusters.");
 264 //
 265 //        combineRootSiblings(clusters, support);
 266 //        combineExistingSiblings(clusters, support);
 267 //
 268 //        for(NewCluster cluster : clusters) {
 269 //
 270 //            if(cluster.size() > 0 && cluster.size() < MAX_CLUSTER) {
 271 //
 272 //                if(!newNodes.containsKey(cluster.existing)) {
 273 //
 274 //                    Collection<Resource> siblings2 = graph.getObjects(core.getResource(cluster.root), graph.getBuiltins().DependsOn);
 275 //
 276 //                    for(Resource sibling : siblings2) {
 277 //
 278 //                        if(newNodes.get(core.getId(sibling)) == null) {
 279 //
 280 //                            long existing = support.getCluster(sibling);
 281 //                            long existingSize = support.getClusterSizeCache(existing);
 282 //                            if(existingSize < MAX_CLUSTER_2) {
 283 //                                cluster.id = existing;
 284 //                                System.out.println("Merging to existing cluster " + existing + " with size " + existingSize);
 285 //                            } else {
 286 //                                System.out.println(" -sibling too large (" + existingSize + ")");
 287 //                            }
 288 //
 289 //                        }
 290 //
 291 //                    }
 292 //
 293 //                }
 294 //
 295 //            }
 296 //
 297 //            if(cluster.size > 0 && cluster.id == 0) {
 298 //                cluster.id = support.newClusterId();
 299 //            }
 300 //
 301 //        }
 302 //
 303 ////        System.out.println("Clustering report:");
 304 //
 305 //        int total = 0;
 306 //        int totalClusters = 0;
 307 //        for(NewCluster c : clusters) {
 308 //            if(c.size() > 0) {
 309 ////                System.out.println("-" + c.size() + " elements - id = " + c.id);
 310 //                total += c.size();
 311 //                totalClusters++;
 312 //            }
 313 //        }
 314 //
 315 //        //System.out.println("Total of " + total + " elements in " + totalClusters + " clusters.");
 316 //
 317 //    }
 318 //
 319 //    @Override
 320 //    public void recluster(ClusteringInformation info, ClusteringSupport clusteringSupport, ReadGraph graph, QuerySupport querySupport, QueryProvider queryProvider) {
 321 //
 322 ////        Collection<Integer> resources = new ArrayList<Integer>();
 323 ////        ReclusterIterator it = info.getReclusterIterator();
 324 ////        if(it == null) return;
 325 ////
 326 ////        while(it.hasNext()) {
 327 ////            it.advance();
 328 ////            resources.add(it.getReclusterResourceId());
 329 ////        }
 330 ////
 331 ////        ArrayList<Statement> statements = new ArrayList<Statement>();
 332 ////        AddedStatmentsIterator it2 = info.getAddedStatmentsIterator();
 333 ////        while(it2.hasNext()) {
 334 ////            it2.advance();
 335 ////            statements.add(new Statement(it2.getAddedSubjectId(), it2.getAddedPredicateId(), it2.getAddedObjectId()));
 336 ////        }
 337 ////
 338 ////        //System.out.println("Clustering " + resources.size() + " new resources,  " + statements.size() + " new statements.");
 339 ////
 340 ////        try {
 341 ////
 342 ////            HashMap<Integer, CoverageNode> newNodes = new HashMap<Integer, CoverageNode>();
 343 ////
 344 ////            instanceOf = querySupport.getId(graph.getBuiltins().InstanceOf);
 345 ////            consistsOf = querySupport.getId(graph.getBuiltins().ConsistsOf);
 346 ////            dependsOn = querySupport.getId(graph.getBuiltins().DependsOn);
 347 ////
 348 ////            assignment = new HashMap<Integer, NewCluster>();
 349 ////            properties = new HashSet<Integer>();
 350 ////            depends = new HashSet<Integer>();
 351 ////            unknown = new HashSet<Integer>();
 352 ////
 353 ////            depends.add(consistsOf);
 354 ////
 355 ////            for(Integer r : resources) {
 356 ////                newNodes.put(r, new CoverageNode(r));
 357 ////            }
 358 ////
 359 ////            for(Statement s : statements) {
 360 ////
 361 ////                if(unknown.contains(s.predicate)) continue;
 362 ////                if(depends.contains(s.predicate)) continue;
 363 ////                if(properties.contains(s.predicate)) continue;
 364 ////                if(s.predicate == instanceOf) continue;
 365 ////                if(s.predicate == consistsOf) continue;
 366 ////
 367 ////                if(graph.isSubrelationOf(querySupport.getResource(s.predicate), graph.getBuiltins().HasProperty)) {
 368 ////                    properties.add(s.predicate);
 369 ////                } else if(graph.isSubrelationOf(querySupport.getResource(s.predicate), graph.getBuiltins().DependsOn)) {
 370 ////                    depends.add(s.predicate);
 371 ////                } else {
 372 ////                    unknown.add(s.predicate);
 373 ////                }
 374 ////
 375 ////            }
 376 ////
 377 ////            depends.addAll(properties);
 378 ////
 379 ////            HashSet<Integer> roots = new HashSet<Integer>();
 380 ////            for(Integer r : resources) roots.add(r);
 381 ////
 382 ////            HashMap<Integer, Collection<Integer>> deps = new HashMap<Integer, Collection<Integer>>();
 383 ////
 384 ////            for(Statement s : statements) {
 385 ////
 386 ////                if(depends.contains(s.predicate)) {
 387 ////                    if(newNodes.containsKey(s.subject)) roots.remove(s.object);
 388 ////                    Collection<Integer> coll = deps.get(s.subject);
 389 ////                    if(coll == null) {
 390 ////                        coll = new ArrayList<Integer>();
 391 ////                        deps.put(s.subject, coll);
 392 ////                    }
 393 ////                    coll.add(s.object);
 394 ////                }
 395 ////
 396 ////            }
 397 ////
 398 //////            System.out.println("" + roots.size() + " roots.");
 399 ////
 400 ////            for(Statement s : statements) {
 401 ////
 402 ////                if(roots.contains(s.object) && s.predicate == instanceOf && newNodes.containsKey(s.subject)) {
 403 ////                    roots.remove(s.object);
 404 ////                    Collection<Integer> coll = deps.get(s.subject);
 405 ////                    if(coll == null) {
 406 ////                        deps.put(s.subject, new SingletonCollection<Integer>(s.object));
 407 ////                    } else {
 408 ////                        coll.add(s.object);
 409 ////                    }
 410 ////                }
 411 ////
 412 ////            }
 413 //
 414 ////            System.out.println("" + roots.size() + " roots.");
 415 //
 416 ////            HashMap<Integer,Integer> roots2 = new HashMap<Integer,Integer>();
 417 ////            for(Statement s : statements) {
 418 ////                if(depends.contains(s.predicate)) {
 419 ////                    if(roots.contains(s.object)) {
 420 ////                        roots2.put(s.object, s.subject);
 421 ////                    }
 422 ////                }
 423 ////            }
 424 //
 425 ////            for(Integer i : roots) {
 426 ////                System.out.println("root");
 427 ////                for(StatementImpl2 s2 : statements) {
 428 ////                    int sub = core.getId(s2.getSubject());
 429 ////                    if(sub == i) {
 430 ////                        System.out.println("-" + g.adapt(s2.getPredicate(), g.getBuiltins().HasStringRepresentation));
 431 ////                    }
 432 ////                }
 433 ////            }
 434 //
 435 ////            System.out.println("" + roots.size() + " roots after parent search.");
 436 //
 437 ////            System.out.println("-found " + properties.size() + " property relations");
 438 ////            for(Integer i : properties) {
 439 ////                System.out.println("-" + graph.adapt(querySupport.getResource(i), graph.getBuiltins().HasStringRepresentation));
 440 ////            }
 441 ////            System.out.println("-found " + depends.size() + " depends on relations");
 442 ////            for(Integer i : depends) {
 443 ////                System.out.println("-" + graph.adapt(querySupport.getResource(i), graph.getBuiltins().HasStringRepresentation));
 444 ////            }
 445 ////            System.out.println("-found " + unknown.size() + " other relations");
 446 ////            for(Integer i : unknown) {
 447 ////                System.out.println("-" + graph.adapt(querySupport.getResource(i), graph.getBuiltins().HasStringRepresentation));
 448 ////            }
 449 //
 450 ////            computeCoverages(newNodes, querySupport, statements);
 451 ////            cluster(roots2, newNodes, clusteringSupport, querySupport, graph, deps);
 452 //
 453 ////            System.out.println("finished clustering");
 454 //
 455 ////            long cid = clusteringSupport.newClusterId();
 456 //
 457 //            long defaultCluster = 0;
 458 //            ReclusterIterator it3 = info.getReclusterIterator();
 459 //            while(it3.hasNext()) {
 460 //              int id = it3.getReclusterResourceId();
 461 //
 462 //              Long cluster = assignment2.get(id);
 463 //              if(cluster == null) {
 464 //                  if(defaultCluster == 0) defaultCluster = clusteringSupport.newClusterId();
 465 //                  cluster = defaultCluster;
 466 //              }
 467 //
 468 //              it3.setReclusterResourceCluster(cluster);
 469 ////                it3.setReclusterResourceCluster(cid);
 470 //                it3.advance();
 471 //
 472 ////                    if(newContexts.contains(id)) {
 473 ////                    it3.setReclusterResourceCluster(clusteringSupport.newClusterId());
 474 ////                    it3.advance();
 475 ////                } else {
 476 ////                    NewCluster t = assignment.get(id);
 477 ////                    it3.setReclusterResourceCluster(t.id);
 478 ////                    it3.advance();
 479 ////                }
 480 //
 481 //            }
 482 //
 483 ////        } catch(Throwable t) {
 484 ////
 485 ////            t.printStackTrace();
 486 ////
 487 ////        }
 488 //
 489 //    }
 490 //
 491 //    private HashMap<Resource, Pair<Resource, Integer>> contextCache = new HashMap<Resource, Pair<Resource, Integer>>();
 492 //
 493 //    HashSet<Integer> newContexts;
 494 //
 495 //    HashMap<Integer, Long> assignment2;
 496 //
 497 //    @Override
 498 //    public void createContexts(HashMap<Resource, Resource> newResources, WriteGraph g, QuerySupport querySupport, ClusteringSupport clusteringSupport) {
 499 //
 500 //        newContexts = new HashSet<Integer>();
 501 //        assignment2 = new HashMap<Integer, Long>();
 502 //
 503 //        HashMap<Resource, Collection<Resource>> contexts = new HashMap<Resource, Collection<Resource>>();
 504 //        for(Map.Entry<Resource, Resource> entry : newResources.entrySet()) {
 505 //
 506 //            assert(entry.getKey() != null);
 507 //            assert(entry.getValue() != null);
 508 //
 509 //            Collection<Resource> coll = contexts.get(entry.getValue());
 510 //            if(coll == null) {
 511 //                coll = new ArrayList<Resource>();
 512 //                contexts.put(entry.getValue(), coll);
 513 //            }
 514 //            coll.add(entry.getKey());
 515 //
 516 //        }
 517 //
 518 ////        long newClusterId = clusteringSupport.newClusterId();
 519 ////
 520 //        for(Map.Entry<Resource, Collection<Resource>> entry : contexts.entrySet()) {
 521 //
 522 //            Resource context = g.getBuiltins().RootLibrary;
 523 //            Resource type = entry.getKey();
 524 //
 525 //            Resource typeContext = null;
 526 //            long fill = 10000;
 527 //
 528 //            long assignedClusterId = 0;
 529 //
 530 ////            Collection<Resource> ctxs = g.getObjects(context, type);
 531 //
 532 //            Pair<Resource, Integer> contextPair = contextCache.get(type);
 533 //            if(contextPair != null) {
 534 //
 535 //              typeContext = contextPair.first;
 536 //              fill = contextPair.second;
 537 //              assignedClusterId = clusteringSupport.getCluster(typeContext);
 538 //
 539 //            } else {
 540 //
 541 ////                    System.out.println("No existing context found in " + context.getResourceId() + " for type " + type.getResourceId());
 542 //
 543 //            }
 544 //
 545 ////            for(Resource ctx : ctxs) {
 546 ////                long clusterId = clusteringSupport.getCluster(ctx);
 547 ////                long size = clusteringSupport.getClusterSizeCache(clusterId);
 548 ////                if(size < 500000) {
 549 ////                    typeContext = ctx;
 550 ////                    fill = (long)(10000.0 * ((double)size / 500000.0));
 551 ////                    System.out.println("Append to existing context "  + clusteringSupport.getCluster(ctx) + "(res=" + typeContext.getResourceId() + ") with size " + clusteringSupport.getClusterSize(clusteringSupport.getCluster(ctx)));
 552 ////                    assignedClusterId = clusterId;
 553 ////                    break;
 554 ////                } else {
 555 ////                    System.out.println("Context cluster size was " + clusteringSupport.getClusterSize(clusteringSupport.getCluster(ctx)));
 556 ////                }
 557 ////            }
 558 ////
 559 ////            if(ctxs.size() == 0) System.out.println("No contexts found in " + context.getResourceId() + " for type " + type.getResourceId());
 560 //
 561 //            for(Resource newResource : entry.getValue()) {
 562 //
 563 //                if(fill >= 10000) {
 564 //                    typeContext = g.newResource();
 565 //                    g.addStatement(context, type, type, typeContext);
 566 //                    g.addStatement(typeContext, g.getBuiltins().Inherits, g.getBuiltins().SupertypeOf, type);
 567 //                    newContexts.add(querySupport.getId(typeContext));
 568 //                    fill = 0;
 569 //                    assignedClusterId = clusteringSupport.newClusterId();
 570 //                    assignment2.put(querySupport.getId(typeContext), assignedClusterId);
 571 //                }
 572 //
 573 //                assignment2.put(querySupport.getId(newResource), assignedClusterId);
 574 //
 575 //                g.addStatement(typeContext, g.getBuiltins().HasInstance, g.getBuiltins().InstanceOf, newResource);
 576 //                fill++;
 577 //
 578 //            }
 579 //
 580 //            contextCache.put(type, new Pair<Resource, Integer>(typeContext, (int)fill));
 581 //
 582 //        }
 583 //
 584 //    }
 585 //
 586 //}