Fixed DirectQuerySupportImpl small cluster String value retrieval 27/2027/1
authorTuukka Lehtonen <tuukka.lehtonen@semantum.fi>
Fri, 24 Aug 2018 10:45:16 +0000 (13:45 +0300)
committerTuukka Lehtonen <tuukka.lehtonen@semantum.fi>
Fri, 24 Aug 2018 10:45:16 +0000 (13:45 +0300)
Previous too-optimized implementation had multiple flaws:
* Index calculation did not have enough parenthesis in it which meant
  that the value table index was sometimes calculated incorrectly.
  This lead to string literal values being read incorrectly.
* Did not take into account string literals longer than 127 characters
* Did not take into account that string literals are modified-utf-8
  encoded and simply decoded them as US-ASCII bytes. Essentially all
  resources with name containing special characters were indexed
  incorrectly up until now.

All of these flaws have been in the code for ages.

gitlab #86

Change-Id: I500114d3e2cff76370433b929ec8b8124659ce33

bundles/org.simantics.db.procore/src/fi/vtt/simantics/procore/internal/DirectQuerySupportImpl.java

index 06c5ebfed22a0f3c3d51b727e1c0e1dc2338313a..fb6a1f7638e6716e283e3d654775ff665092dc36 100644 (file)
@@ -29,6 +29,13 @@ import org.simantics.db.service.DirectQuerySupport;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
+/**
+ * Note that the direct value retrieval in this implementation only supports
+ * String-type literals - nothing else!
+ * 
+ * This implementation is mainly intended for optimizing database indexing
+ * performance.
+ */
 public class DirectQuerySupportImpl implements DirectQuerySupport {
 
     private static final Logger LOGGER = LoggerFactory.getLogger(DirectQuerySupportImpl.class);
@@ -179,7 +186,6 @@ public class DirectQuerySupportImpl implements DirectQuerySupport {
                assert(subject != null);
 
         final ForPossibleRelatedValueProcedure<T> proc = (ForPossibleRelatedValueProcedure<T>)procedure;
-        final RelationInfo info = proc.info;
         
         final ReadGraphImpl impl = (ReadGraphImpl)graph;
         final int subjectId = ((ResourceImpl)subject).id;
@@ -191,7 +197,7 @@ public class DirectQuerySupportImpl implements DirectQuerySupport {
         
 //        if(callerThread == suggestSchedule) {
                
-//             if(info.isFunctional) {
+//             if(proc.info.isFunctional) {
         try {
                T result = getRelatedValue4(impl, subjectId, proc);
                try {
@@ -248,7 +254,6 @@ public class DirectQuerySupportImpl implements DirectQuerySupport {
                assert(subject != null);
 
         final ForPossibleRelatedValueContextProcedure<C, T> proc = (ForPossibleRelatedValueContextProcedure<C, T>)procedure;
-        final RelationInfo info = proc.info;
         
         final ReadGraphImpl impl = (ReadGraphImpl)graph;
         final int subjectId = ((ResourceImpl)subject).id;
@@ -258,7 +263,7 @@ public class DirectQuerySupportImpl implements DirectQuerySupport {
         
 //        impl.inc();
                
-//        if(info.isFunctional) {
+//        if(proc.info.isFunctional) {
 //        } else {
 //             getRelatedValue4(impl, subjectId, context, proc);
 //        }
@@ -453,6 +458,7 @@ public class DirectQuerySupportImpl implements DirectQuerySupport {
                
        }
        
+       @SuppressWarnings("unchecked")
        private <T> T getValue4(final ReadGraphImpl graph, final ClusterImpl containerCluster, final int subject, final ForPossibleRelatedValueProcedure<T> procedure) throws DatabaseException {
                
                Object result = null;
@@ -475,6 +481,7 @@ public class DirectQuerySupportImpl implements DirectQuerySupport {
                                }
                        }
                        
+                       // FIXME: throw something here instead
                        return (T)"name";
                        
                }
@@ -526,6 +533,7 @@ public class DirectQuerySupportImpl implements DirectQuerySupport {
        
        }
        
+       @SuppressWarnings("unchecked")
        private <C, T> T getValue4(final ReadGraphImpl graph, final ClusterImpl containerCluster, final int subject, final C context, final ForPossibleRelatedValueContextProcedure<C, T> procedure) throws DatabaseException {
                
                Object result = null;
@@ -548,6 +556,7 @@ public class DirectQuerySupportImpl implements DirectQuerySupport {
                                }
                        }
 
+                       // FIXME: throw something here instead
                        return (T)"name";
                        
                }
@@ -717,14 +726,19 @@ public class DirectQuerySupportImpl implements DirectQuerySupport {
        }
        */
 
+       @SuppressWarnings("unchecked")
        private <T> T getDirectValue4(final ReadGraphImpl graph, final ClusterBig cluster, final int subject) throws DatabaseException {
 
                byte[] bytes = cluster.getValue(subject, session.clusterTranslator);
-               return (T)utf(bytes);
+               return (T) utf(bytes, 0);
 
        }
 
+       @SuppressWarnings("unchecked")
        private <T> T getDirectValue4(final ReadGraphImpl graph, final ClusterSmall cluster, final int subject) throws DatabaseException {
+               // Note: this code avoids creating an intermediate byte[]
+               // to store the encoded string bytes and reads the UTF string
+               // from the value table byte[] directly into String instead.
 
                ResourceTableSmall rt = cluster.resourceTable;
                ValueTableSmall vt = cluster.valueTable;
@@ -733,31 +747,24 @@ public class DirectQuerySupportImpl implements DirectQuerySupport {
                long[] ls = rt.table;
 
                int index = ((subject&0xFFF) << 1) - 1 + rt.offset;
+               int valueIndex = ((int)(ls[index] >>> 24) & 0x3FFFFF) + vt.offset;
 
-               int valueIndex = (int)(ls[index] >>> 24) & 0x3FFFFF + vt.offset;
-
-               int size = (int)bs[valueIndex++]-1;
-               if(size <= 0) {
+               int size = bs[valueIndex++];
+               if (size < 0) // two byte size
+                       size = (int)(((size & 0x7F) << 8) | (bs[valueIndex++] & 0xFF));
+               if (size <= 0)
                        throw new DatabaseException("No value for " + subject); 
-               }
-               
-               char[] chars = new char[size];
-               valueIndex++;
-               for(int i=0;i<size;i++) {
-                       chars[i] = (char)bs[valueIndex++];
-               }
-
-               return (T)new String(chars);
 
+               return (T) utf(bs, valueIndex);
        }
 
-       private final String utf(byte[] bytes) throws AssumptionException {
+       private static final String utf(byte[] bytes, int offset) throws AssumptionException {
 
                if(bytes == null) return null;
 
                // Read databoard int32 using Length encoding
                // https://dev.simantics.org/index.php/Databoard_Specification#Length
-               int index = 0;
+               int index = offset;
                int length = bytes[index++]&0xff; 
                if(length >= 0x80) {
                        if(length >= 0xc0) {