Merge commit '145a2884933f2ffdd48d6835729e58f1152d274e'

[simantics/platform.git] / bundles / org.simantics.databoard / src / org / simantics / databoard / util / URIStringUtils.java
diff --git a/bundles/org.simantics.databoard/src/org/simantics/databoard/util/URIStringUtils.java b/bundles/org.simantics.databoard/src/org/simantics/databoard/util/URIStringUtils.java

index dde498a2c38bdcadbe3fb025cce664e2e0575c74..a11579f07e774404394e448daf3a4d4799651512 100644 (file)
--- a/bundles/org.simantics.databoard/src/org/simantics/databoard/util/URIStringUtils.java
+++ b/bundles/org.simantics.databoard/src/org/simantics/databoard/util/URIStringUtils.java
@@ -45,8 +45,7 @@
  \r
  package org.simantics.databoard.util;\r
  \r
  \r
  package org.simantics.databoard.util;\r
  \r
-import java.nio.charset.Charset;\r
-import java.util.ArrayList;\r
+import java.util.Arrays;\r
  import java.util.List;\r
  \r
  \r
  import java.util.List;\r
  \r
  \r
@@ -194,13 +193,14 @@ public final class URIStringUtils {
              return name;\r
      }\r
  \r
              return name;\r
      }\r
  \r
-    final private static int HTTP_POSITION = "http://".length();\r
+    final private static String HTTP_PREFIX = "http://";\r
+    final private static int HTTP_POSITION = HTTP_PREFIX.length();\r
  \r
      public static String[] splitURI(String uri) {\r
          int nextPathSeparator = uri.lastIndexOf(URIStringUtils.NAMESPACE_PATH_SEPARATOR);\r
          if (nextPathSeparator == -1) return null;\r
          if (nextPathSeparator == HTTP_POSITION - 1) {\r
  \r
      public static String[] splitURI(String uri) {\r
          int nextPathSeparator = uri.lastIndexOf(URIStringUtils.NAMESPACE_PATH_SEPARATOR);\r
          if (nextPathSeparator == -1) return null;\r
          if (nextPathSeparator == HTTP_POSITION - 1) {\r
-            if(uri.startsWith("http://")) return new String[] { "http://", uri.substring(HTTP_POSITION, uri.length()) };\r
+            if(uri.startsWith(HTTP_PREFIX)) return new String[] { HTTP_PREFIX, uri.substring(HTTP_POSITION, uri.length()) };\r
              else return null;\r
          }\r
          return new String[] {\r
              else return null;\r
          }\r
          return new String[] {\r
@@ -208,12 +208,10 @@ public final class URIStringUtils {
                  uri.substring(nextPathSeparator + 1, uri.length())\r
          };\r
      }\r
                  uri.substring(nextPathSeparator + 1, uri.length())\r
          };\r
      }\r
-    \r
+\r
      public static List<String> splitURISCL(String uri) {\r
          String[] result = splitURI(uri);\r
      public static List<String> splitURISCL(String uri) {\r
          String[] result = splitURI(uri);\r
-        ArrayList<String> list = new ArrayList<String>(result.length);\r
-        for(String s : result) list.add(s);\r
-        return list;\r
+        return Arrays.asList(result);\r
      }\r
  \r
      /**\r
      }\r
  \r
      /**\r
@@ -263,8 +261,7 @@ public final class URIStringUtils {
      public static String escapeURI(String localName) {\r
          if (localName == null)\r
              throw new NullPointerException("null local name");\r
      public static String escapeURI(String localName) {\r
          if (localName == null)\r
              throw new NullPointerException("null local name");\r
-        String result = encode(localName);\r
-        return result;\r
+        return encode(localName);\r
      }\r
  \r
      /**\r
      }\r
  \r
      /**\r
@@ -276,8 +273,7 @@ public final class URIStringUtils {
       * @return the joined namespace\r
       */\r
      public static String appendURINamespace(String namespace, String suffix) {\r
       * @return the joined namespace\r
       */\r
      public static String appendURINamespace(String namespace, String suffix) {\r
-        //return namespace + NAMESPACE_PATH_SEPARATOR + suffix;\r
-        return new StringBuffer(namespace.length() + 1 + suffix.length())\r
+        return new StringBuilder(namespace.length() + 1 + suffix.length())\r
          .append(namespace)\r
          .append(NAMESPACE_PATH_SEPARATOR)\r
          .append(suffix)\r
          .append(namespace)\r
          .append(NAMESPACE_PATH_SEPARATOR)\r
          .append(suffix)\r
@@ -293,9 +289,8 @@ public final class URIStringUtils {
       * @return the joined URI\r
       */\r
      public static String makeURI(String namespace, String localName) {\r
       * @return the joined URI\r
       */\r
      public static String makeURI(String namespace, String localName) {\r
-        //return namespace + NAMESPACE_LOCAL_SEPARATOR + escapeURI(localName);\r
          String escapedLocalName = escapeURI(localName);\r
          String escapedLocalName = escapeURI(localName);\r
-        return new StringBuffer(namespace.length() + 1 + escapedLocalName.length())\r
+        return new StringBuilder(namespace.length() + 1 + escapedLocalName.length())\r
          .append(namespace)\r
          .append(NAMESPACE_LOCAL_SEPARATOR)\r
          .append(escapedLocalName)\r
          .append(namespace)\r
          .append(NAMESPACE_LOCAL_SEPARATOR)\r
          .append(escapedLocalName)\r
@@ -332,94 +327,59 @@ public final class URIStringUtils {
      }\r
  \r
  \r
      }\r
  \r
  \r
-    final private static Charset UTF8 = Charset.forName("UTF-8");\r
-    final private static Charset ASCII = Charset.forName("US-ASCII");\r
-\r
-    /* Copied and modified from Jena 2.4 com.hp.hpl.jena.util.URIref */\r
-    private static String encode(String unicode) {\r
-        boolean needsEscapes = needsEscaping(unicode);\r
-        if (!needsEscapes)\r
-            return unicode;\r
-\r
-        byte utf8[] = unicode.getBytes(UTF8);\r
-        byte rsltAscii[] = new byte[utf8.length * 6];\r
-        int in = 0;\r
-        int out = 0;\r
-        while (in < utf8.length) {\r
-            switch (utf8[in]) {\r
-                case (byte)'a': case (byte)'b': case (byte)'c': case (byte)'d': case (byte)'e': case (byte)'f': case (byte)'g': case (byte)'h': case (byte)'i': case (byte)'j': case (byte)'k': case (byte)'l': case (byte)'m': case (byte)'n': case (byte)'o': case (byte)'p': case (byte)'q': case (byte)'r': case (byte)'s': case (byte)'t': case (byte)'u': case (byte)'v': case (byte)'w': case (byte)'x': case (byte)'y': case (byte)'z':\r
-                case (byte)'A': case (byte)'B': case (byte)'C': case (byte)'D': case (byte)'E': case (byte)'F': case (byte)'G': case (byte)'H': case (byte)'I': case (byte)'J': case (byte)'K': case (byte)'L': case (byte)'M': case (byte)'N': case (byte)'O': case (byte)'P': case (byte)'Q': case (byte)'R': case (byte)'S': case (byte)'T': case (byte)'U': case (byte)'V': case (byte)'W': case (byte)'X': case (byte)'Y': case (byte)'Z':\r
-                case (byte)'0': case (byte)'1': case (byte)'2': case (byte)'3': case (byte)'4': case (byte)'5': case (byte)'6': case (byte)'7': case (byte)'8': case (byte)'9':\r
-                case (byte)';': case (byte)'?': case (byte)':': case (byte)'@': case (byte)'=': case (byte)'+': case (byte)'$': case (byte)',':\r
-                case (byte)'-': case (byte)'_': case (byte)'.': case (byte)'!': case (byte)'~': case (byte)'*': case (byte)'\'': case (byte)'(': case (byte)')':\r
-                case (byte)'[': case (byte)']':\r
-                    rsltAscii[out] = utf8[in];\r
-                    out++;\r
-                    in++;\r
-                    break;\r
-                case (byte)' ':\r
-                    rsltAscii[out++] = (byte) '%';\r
-                    rsltAscii[out++] = '2';\r
-                    rsltAscii[out++] = '0';\r
-                    in++;\r
-                    break;\r
-                case (byte) '%':\r
-                    // [lehtonen] NOTE: all input needs to be escaped, i.e. "%01" should result in "%2501", not "%01".\r
-                    // escape+unescape is a bijection, not an idempotent operation. \r
-                    // Fall through to to escape '%' as '%25'\r
-                case (byte) '#':\r
-                case (byte) '/':\r
-                    // Fall through to escape '/'\r
-                case (byte)'&':\r
-                    // Fall through to escape '&' characters to avoid them\r
-                    // being interpreted as SGML entities.\r
-                default:\r
-                    rsltAscii[out++] = (byte) '%';\r
-                    // Get rid of sign ...\r
-                    int c = (utf8[in]) & 255;\r
-                    rsltAscii[out++] = hexEncode(c / 16);\r
-                    rsltAscii[out++] = hexEncode(c % 16);\r
-                    in++;\r
-                    break;\r
-            }\r
-        }\r
-        return new String(rsltAscii, 0, out, ASCII);\r
-    }\r
-\r
      /*\r
       * RFC 3986 section 2.2 Reserved Characters (January 2005)\r
       * !*'();:@&=+$,/?#[]\r
       */\r
      /*\r
       * RFC 3986 section 2.2 Reserved Characters (January 2005)\r
       * !*'();:@&=+$,/?#[]\r
       */\r
-    private static boolean needsEscaping(String unicode) {\r
+    private static boolean[] ESCAPED_US_ASCII_CHARS = new boolean[128];\r
+\r
+    static {\r
+        ESCAPED_US_ASCII_CHARS[' '] = true;\r
+        // IMPORTANT NOTE: every time escape is invoked, all input needs to be escaped,\r
+        // i.e. escape("%01") should result in "%2501", not "%01".\r
+        // escape and unescape form a bijection, where neither\r
+        // of them is an idempotent operation. \r
+        ESCAPED_US_ASCII_CHARS['%'] = true;\r
+        // '#' and '/' are URL segment/fragment delimiters, need to be escaped in names.\r
+        ESCAPED_US_ASCII_CHARS['#'] = true;\r
+        ESCAPED_US_ASCII_CHARS['/'] = true;\r
+        // Escape '&' characters to avoid them being interpreted as SGML entities.\r
+        ESCAPED_US_ASCII_CHARS['&'] = true;\r
+    }\r
+\r
+    private static int needsEscaping(String unicode) {\r
          int len = unicode.length();\r
          int len = unicode.length();\r
+        int escapeCount = 0;\r
          for (int i = 0; i < len; ++i) {\r
          for (int i = 0; i < len; ++i) {\r
-            switch (unicode.charAt(i)) {\r
-                case (byte)'!':\r
-                case (byte)'*':\r
-                case (byte)'\'':\r
-                case (byte)'(':\r
-                case (byte)')':\r
-                case (byte)';':\r
-                case (byte)':':\r
-                case (byte)'@':\r
-                case (byte)'=': \r
-                case (byte)'+':\r
-                case (byte)'$':\r
-                case (byte)',':\r
-                case (byte)'?':\r
-                case (byte)'~':\r
-                case (byte)'[':\r
-                case (byte)']':\r
-                    break;\r
-                case (byte)' ':\r
-                case (byte) '#':\r
-                case (byte) '%':\r
-                case (byte) '/':\r
-                case (byte)'&':\r
-                    return true;\r
+            char ch = unicode.charAt(i);\r
+            if (ch < 128 && ESCAPED_US_ASCII_CHARS[ch])\r
+                ++escapeCount;\r
+        }\r
+        return escapeCount;\r
+    }\r
+\r
+    private static String encode(String unicode) {\r
+        int needsEscapes = needsEscaping(unicode);\r
+        if (needsEscapes == 0)\r
+            return unicode;\r
+\r
+        int len = unicode.length();\r
+        char result[] = new char[(len - needsEscapes) + needsEscapes * 3];\r
+        int in = 0;\r
+        int out = 0;\r
+        while (in < len) {\r
+            char inCh = unicode.charAt(in++);\r
+            if (inCh >= 128 || !ESCAPED_US_ASCII_CHARS[inCh]) {\r
+                result[out++] = inCh;\r
+            } else {\r
+                // Only selected 7-bit US-ASCII characters are escaped\r
+                int c = inCh & 255;\r
+                result[out++] = '%';\r
+                result[out++] = (char) hexEncode(c / 16);\r
+                result[out++] = (char) hexEncode(c % 16);\r
              }\r
          }\r
              }\r
          }\r
-        return false;\r
+        return new String(result, 0, out);\r
      }\r
  \r
      private static boolean needsUnescaping(String unicode) {\r
      }\r
  \r
      private static boolean needsUnescaping(String unicode) {\r
@@ -427,13 +387,12 @@ public final class URIStringUtils {
      }\r
  \r
      /**\r
      }\r
  \r
      /**\r
-     * Convert a URI, in US-ASCII, with escaped characters taken from UTF-8, to\r
-     * the corresponding Unicode string. On ill-formed input the results are\r
-     * undefined, specifically if the unescaped version is not a UTF-8 String,\r
-     * some String will be returned.\r
+     * Convert a URI, in UTF-16 with escaped characters taken from US-ASCII, to\r
+     * the corresponding unescaped Unicode string. On ill-formed input the results are\r
+     * undefined.\r
       * \r
       * @param uri the uri, in characters specified by RFC 2396 + '#'.\r
       * \r
       * @param uri the uri, in characters specified by RFC 2396 + '#'.\r
-     * @return the corresponding Unicode String.\r
+     * @return the corresponding unescaped Unicode String.\r
       * @exception IllegalArgumentException if a % hex sequence is ill-formed.\r
       */\r
      public static String unescape(String uri) {\r
       * @exception IllegalArgumentException if a % hex sequence is ill-formed.\r
       */\r
      public static String unescape(String uri) {\r
@@ -441,26 +400,29 @@ public final class URIStringUtils {
              if (!needsUnescaping(uri))\r
                  return uri;\r
  \r
              if (!needsUnescaping(uri))\r
                  return uri;\r
  \r
-            byte ascii[] = uri.getBytes("US-ASCII");\r
-            byte utf8[] = new byte[ascii.length];\r
+            int len = uri.length();\r
+            String unicode = uri;\r
+            char result[] = new char[len];\r
              int in = 0;\r
              int out = 0;\r
              int in = 0;\r
              int out = 0;\r
-            while ( in < ascii.length ) {\r
-                if (ascii[in] == (byte) '%') {\r
-                    in++;\r
-                    utf8[out++] = (byte) (hexDecode(ascii[in]) * 16 | hexDecode(ascii[in + 1]));\r
+            while (in < len) {\r
+                char inCh = unicode.charAt(in++);\r
+                if (inCh == '%') {\r
+                    char d1 = unicode.charAt(in);\r
+                    char d2 = unicode.charAt(in+1);\r
+                    if (d1 > 127 || d2 > 127)\r
+                        throw new IllegalArgumentException("Invalid hex digit escape sequence in " + uri + " at " + in);\r
+                    result[out++] = (char) (hexDecode((byte) d1) * 16 | hexDecode((byte) d2));\r
                      in += 2;\r
                  } else {\r
                      in += 2;\r
                  } else {\r
-                    utf8[out++] = ascii[in++];\r
+                    result[out++] = inCh;\r
                  }\r
              }\r
                  }\r
              }\r
-            return new String(utf8, 0, out, "UTF-8");\r
+            return new String(result, 0, out);\r
          } catch (IllegalArgumentException e) {\r
              throw new IllegalArgumentException("Problem while unescaping string: " + uri, e);\r
          } catch (IllegalArgumentException e) {\r
              throw new IllegalArgumentException("Problem while unescaping string: " + uri, e);\r
-        } catch (java.io.UnsupportedEncodingException e) {\r
-            throw new Error("The JVM is required to support UTF-8 and US-ASCII encodings.");\r
-        } catch (ArrayIndexOutOfBoundsException ee) {\r
-            throw new IllegalArgumentException("Incomplete Hex escape sequence in " + uri);\r
+        } catch (IndexOutOfBoundsException ee) {\r
+            throw new IllegalArgumentException("Incomplete hex digit escape sequence in " + uri);\r
          }\r
      }\r
  \r
          }\r
      }\r
  \r
@@ -491,38 +453,36 @@ public final class URIStringUtils {
       * @param args\r
       */\r
      public static void main(String[] args) {\r
       * @param args\r
       */\r
      public static void main(String[] args) {\r
-        String s;\r
-        s = "http://www.vtt.fi%2FSome- %25 Namespace/Jotain";\r
-        System.out.println(String.format("escape+unescape: %s -> %s -> %s", s, escape(s), unescape(escape(s))));\r
-        s = "http://www.vtt.fi%2FPSK";\r
-        System.out.println(String.format("unescape: %s -> %s", s, unescape(s)));\r
-        s = "http://www.vtt.fi%2FSome-Namespace/Jotain / Muuta";\r
-        System.out.println(String.format("escape: %s -> %s", s, escape(s)));\r
-        s = "Jotain / Muuta";\r
-        System.out.println(String.format("escape: %s -> %s", s, escape(s)));\r
-\r
-        System.out.println("escapeURI: " + escapeURI("foo/bar/org%2Fnet"));\r
-        System.out.println("escapeURI('...#...'): " + escapeURI("foo/bar#org%2Fnet"));\r
-        s = makeURI("http://foo.bar.com/foo/bar", "baz/guuk/org%2Fnet");\r
+        String s = makeURI("http://foo.bar.com/foo/bar", "baz/guuk/org%2Fnet");\r
          System.out.println("escapeURI: " + s);\r
          System.out.println("getNamespace: " + getNamespace(s));\r
          System.out.println("getLocalName: " + getLocalName(s));\r
  \r
          System.out.println("escapeURI: " + s);\r
          System.out.println("getNamespace: " + getNamespace(s));\r
          System.out.println("getLocalName: " + getLocalName(s));\r
  \r
+        System.out.println("escapeURI: " + escapeURI("foo/bar/org%2Fnet"));\r
+        System.out.println("escapeURI('...#...'): " + escapeURI("foo/bar#org%2Fnet"));\r
+\r
          testEscape("/", "%2F");\r
          testEscape("#", "%23");\r
          testEscape("%", "%25");\r
          testEscape("%01", "%2501");\r
          testEscape("%GG", "%25GG");\r
          testEscape("/", "%2F");\r
          testEscape("#", "%23");\r
          testEscape("%", "%25");\r
          testEscape("%01", "%2501");\r
          testEscape("%GG", "%25GG");\r
+        testEscape("säätö venttiili", "säätö%20venttiili");\r
+        testEscape("säätö", "säätö");\r
+        testEscape("Something / Else", "Something%20%2F%20Else");\r
+        testEscape("http://www.vtt.fi%2FSome- %25 Namespace/Something", "http:%2F%2Fwww.vtt.fi%252FSome-%20%2525%20Namespace%2FSomething");\r
+        testEscape("http://www.vtt.fi/PSK", "http:%2F%2Fwww.vtt.fi%2FPSK");\r
+        testEscape("http://www.vtt.fi%2FSome-Namespace/Something / Else", "http:%2F%2Fwww.vtt.fi%252FSome-Namespace%2FSomething%20%2F%20Else");\r
      }\r
  \r
      private static void testEscape(String unescaped, String expectedEscaped) {\r
          String esc = escape(unescaped);\r
          String unesc = unescape(esc);\r
      }\r
  \r
      private static void testEscape(String unescaped, String expectedEscaped) {\r
          String esc = escape(unescaped);\r
          String unesc = unescape(esc);\r
-        System.out.format("escape('%s')='%s', unescape('%s')='%s'\n", unescaped, esc, esc, unesc);\r
+        System.out.format("escape('%s') -> '%s', unescape('%s') -> '%s'", unescaped, esc, esc, unesc);\r
          if (!esc.equals(expectedEscaped))\r
              throw new AssertionError("escape('" + unescaped + "') was expected to return '" + expectedEscaped + "' but returned '" + esc + "'");\r
          if (!unesc.equals(unescaped))\r
              throw new AssertionError("unescape(escape('" + unescaped + "'))=unescape(" + esc + ") was expected to return '" + unescaped + "' but returned '" + unesc + "'");\r
          if (!esc.equals(expectedEscaped))\r
              throw new AssertionError("escape('" + unescaped + "') was expected to return '" + expectedEscaped + "' but returned '" + esc + "'");\r
          if (!unesc.equals(unescaped))\r
              throw new AssertionError("unescape(escape('" + unescaped + "'))=unescape(" + esc + ") was expected to return '" + unescaped + "' but returned '" + unesc + "'");\r
+        System.out.println(" OK");\r
      }\r
  \r
  }\r
      }\r
  \r
  }\r