\r
package org.simantics.databoard.util;\r
\r
-import java.nio.charset.Charset;\r
-import java.util.ArrayList;\r
+import java.util.Arrays;\r
import java.util.List;\r
\r
\r
return name;\r
}\r
\r
- final private static int HTTP_POSITION = "http://".length();\r
+ final private static String HTTP_PREFIX = "http://";\r
+ final private static int HTTP_POSITION = HTTP_PREFIX.length();\r
\r
public static String[] splitURI(String uri) {\r
int nextPathSeparator = uri.lastIndexOf(URIStringUtils.NAMESPACE_PATH_SEPARATOR);\r
if (nextPathSeparator == -1) return null;\r
if (nextPathSeparator == HTTP_POSITION - 1) {\r
- if(uri.startsWith("http://")) return new String[] { "http://", uri.substring(HTTP_POSITION, uri.length()) };\r
+ if(uri.startsWith(HTTP_PREFIX)) return new String[] { HTTP_PREFIX, uri.substring(HTTP_POSITION, uri.length()) };\r
else return null;\r
}\r
return new String[] {\r
uri.substring(nextPathSeparator + 1, uri.length())\r
};\r
}\r
- \r
+\r
public static List<String> splitURISCL(String uri) {\r
String[] result = splitURI(uri);\r
- ArrayList<String> list = new ArrayList<String>(result.length);\r
- for(String s : result) list.add(s);\r
- return list;\r
+ return Arrays.asList(result);\r
}\r
\r
/**\r
public static String escapeURI(String localName) {\r
if (localName == null)\r
throw new NullPointerException("null local name");\r
- String result = encode(localName);\r
- return result;\r
+ return encode(localName);\r
}\r
\r
/**\r
* @return the joined namespace\r
*/\r
public static String appendURINamespace(String namespace, String suffix) {\r
- //return namespace + NAMESPACE_PATH_SEPARATOR + suffix;\r
- return new StringBuffer(namespace.length() + 1 + suffix.length())\r
+ return new StringBuilder(namespace.length() + 1 + suffix.length())\r
.append(namespace)\r
.append(NAMESPACE_PATH_SEPARATOR)\r
.append(suffix)\r
* @return the joined URI\r
*/\r
public static String makeURI(String namespace, String localName) {\r
- //return namespace + NAMESPACE_LOCAL_SEPARATOR + escapeURI(localName);\r
String escapedLocalName = escapeURI(localName);\r
- return new StringBuffer(namespace.length() + 1 + escapedLocalName.length())\r
+ return new StringBuilder(namespace.length() + 1 + escapedLocalName.length())\r
.append(namespace)\r
.append(NAMESPACE_LOCAL_SEPARATOR)\r
.append(escapedLocalName)\r
}\r
\r
\r
- final private static Charset UTF8 = Charset.forName("UTF-8");\r
- final private static Charset ASCII = Charset.forName("US-ASCII");\r
-\r
/*\r
* RFC 3986 section 2.2 Reserved Characters (January 2005)\r
* !*'();:@&=+$,/?#[]\r
*/\r
- private static boolean[] UNESCAPED_US_ASCII_CHARS = new boolean[128];\r
+ private static boolean[] ESCAPED_US_ASCII_CHARS = new boolean[128];\r
\r
static {\r
- for(char ch='A';ch <= 'Z';++ch)\r
- UNESCAPED_US_ASCII_CHARS[ch] = true;\r
- for(char ch='a';ch <= 'z';++ch)\r
- UNESCAPED_US_ASCII_CHARS[ch] = true;\r
- for(char ch='0';ch <= '9';++ch)\r
- UNESCAPED_US_ASCII_CHARS[ch] = true;\r
- UNESCAPED_US_ASCII_CHARS[';'] = true;\r
- UNESCAPED_US_ASCII_CHARS['?'] = true;\r
- UNESCAPED_US_ASCII_CHARS[':'] = true;\r
- UNESCAPED_US_ASCII_CHARS['@'] = true;\r
- UNESCAPED_US_ASCII_CHARS['='] = true;\r
- UNESCAPED_US_ASCII_CHARS['+'] = true;\r
- UNESCAPED_US_ASCII_CHARS['$'] = true;\r
- UNESCAPED_US_ASCII_CHARS['.'] = true;\r
- UNESCAPED_US_ASCII_CHARS[','] = true;\r
- UNESCAPED_US_ASCII_CHARS['-'] = true;\r
- UNESCAPED_US_ASCII_CHARS['_'] = true;\r
- UNESCAPED_US_ASCII_CHARS['!'] = true;\r
- UNESCAPED_US_ASCII_CHARS['~'] = true;\r
- UNESCAPED_US_ASCII_CHARS['*'] = true;\r
- UNESCAPED_US_ASCII_CHARS['\''] = true;\r
- UNESCAPED_US_ASCII_CHARS['('] = true;\r
- UNESCAPED_US_ASCII_CHARS[')'] = true;\r
- UNESCAPED_US_ASCII_CHARS['['] = true;\r
- UNESCAPED_US_ASCII_CHARS[']'] = true;\r
+ ESCAPED_US_ASCII_CHARS[' '] = true;\r
+ // IMPORTANT NOTE: every time escape is invoked, all input needs to be escaped,\r
+ // i.e. escape("%01") should result in "%2501", not "%01".\r
+ // escape and unescape form a bijection, where neither\r
+ // of them is an idempotent operation. \r
+ ESCAPED_US_ASCII_CHARS['%'] = true;\r
+ // '#' and '/' are URL segment/fragment delimiters, need to be escaped in names.\r
+ ESCAPED_US_ASCII_CHARS['#'] = true;\r
+ ESCAPED_US_ASCII_CHARS['/'] = true;\r
+ // Escape '&' characters to avoid them being interpreted as SGML entities.\r
+ ESCAPED_US_ASCII_CHARS['&'] = true;\r
}\r
\r
- private static boolean needsEscaping(String unicode) {\r
+ private static int needsEscaping(String unicode) {\r
int len = unicode.length();\r
+ int escapeCount = 0;\r
for (int i = 0; i < len; ++i) {\r
char ch = unicode.charAt(i);\r
- if (ch >= 128 || !UNESCAPED_US_ASCII_CHARS[ch])\r
- return true;\r
+ if (ch < 128 && ESCAPED_US_ASCII_CHARS[ch])\r
+ ++escapeCount;\r
}\r
- return false;\r
+ return escapeCount;\r
}\r
\r
- /* Copied and modified from Jena 2.4 com.hp.hpl.jena.util.URIref */\r
private static String encode(String unicode) {\r
- boolean needsEscapes = needsEscaping(unicode);\r
- if (!needsEscapes)\r
+ int needsEscapes = needsEscaping(unicode);\r
+ if (needsEscapes == 0)\r
return unicode;\r
\r
- byte utf8[] = unicode.getBytes(UTF8);\r
- byte rsltAscii[] = new byte[utf8.length * 6];\r
+ int len = unicode.length();\r
+ char result[] = new char[(len - needsEscapes) + needsEscapes * 3];\r
int in = 0;\r
int out = 0;\r
- while (in < utf8.length) {\r
- byte inCh = utf8[in];\r
- if (inCh >= 0 && inCh < 128 && UNESCAPED_US_ASCII_CHARS[inCh]) {\r
- rsltAscii[out] = inCh;\r
- out++;\r
- in++;\r
+ while (in < len) {\r
+ char inCh = unicode.charAt(in++);\r
+ if (inCh >= 128 || !ESCAPED_US_ASCII_CHARS[inCh]) {\r
+ result[out++] = inCh;\r
} else {\r
- switch (inCh) {\r
- case (byte)' ':\r
- rsltAscii[out++] = (byte) '%';\r
- rsltAscii[out++] = '2';\r
- rsltAscii[out++] = '0';\r
- in++;\r
- break;\r
- case (byte) '%':\r
- // [lehtonen] NOTE: all input needs to be escaped, i.e. "%01" should result in "%2501", not "%01".\r
- // escape+unescape is a bijection, not an idempotent operation. \r
- // Fall through to escape '%' as '%25'\r
- case (byte) '#':\r
- case (byte) '/':\r
- // Fall through to escape '/'\r
- case (byte)'&':\r
- // Fall through to escape '&' characters to avoid them\r
- // being interpreted as SGML entities.\r
- default:\r
- rsltAscii[out++] = (byte) '%';\r
- // Get rid of sign ...\r
- int c = (inCh) & 255;\r
- rsltAscii[out++] = hexEncode(c / 16);\r
- rsltAscii[out++] = hexEncode(c % 16);\r
- in++;\r
- break;\r
- }\r
+ // Only selected 7-bit US-ASCII characters are escaped\r
+ int c = inCh & 255;\r
+ result[out++] = '%';\r
+ result[out++] = (char) hexEncode(c / 16);\r
+ result[out++] = (char) hexEncode(c % 16);\r
}\r
}\r
- return new String(rsltAscii, 0, out, ASCII);\r
+ return new String(result, 0, out);\r
}\r
\r
private static boolean needsUnescaping(String unicode) {\r
}\r
\r
/**\r
- * Convert a URI, in US-ASCII, with escaped characters taken from UTF-8, to\r
- * the corresponding Unicode string. On ill-formed input the results are\r
- * undefined, specifically if the unescaped version is not a UTF-8 String,\r
- * some String will be returned.\r
+ * Convert a URI, in UTF-16 with escaped characters taken from US-ASCII, to\r
+ * the corresponding unescaped Unicode string. On ill-formed input the results are\r
+ * undefined.\r
* \r
* @param uri the uri, in characters specified by RFC 2396 + '#'.\r
- * @return the corresponding Unicode String.\r
+ * @return the corresponding unescaped Unicode String.\r
* @exception IllegalArgumentException if a % hex sequence is ill-formed.\r
*/\r
public static String unescape(String uri) {\r
if (!needsUnescaping(uri))\r
return uri;\r
\r
- byte ascii[] = uri.getBytes("US-ASCII");\r
- byte utf8[] = new byte[ascii.length];\r
+ int len = uri.length();\r
+ String unicode = uri;\r
+ char result[] = new char[len];\r
int in = 0;\r
int out = 0;\r
- while ( in < ascii.length ) {\r
- if (ascii[in] == (byte) '%') {\r
- in++;\r
- utf8[out++] = (byte) (hexDecode(ascii[in]) * 16 | hexDecode(ascii[in + 1]));\r
+ while (in < len) {\r
+ char inCh = unicode.charAt(in++);\r
+ if (inCh == '%') {\r
+ char d1 = unicode.charAt(in);\r
+ char d2 = unicode.charAt(in+1);\r
+ if (d1 > 127 || d2 > 127)\r
+ throw new IllegalArgumentException("Invalid hex digit escape sequence in " + uri + " at " + in);\r
+ result[out++] = (char) (hexDecode((byte) d1) * 16 | hexDecode((byte) d2));\r
in += 2;\r
} else {\r
- utf8[out++] = ascii[in++];\r
+ result[out++] = inCh;\r
}\r
}\r
- return new String(utf8, 0, out, "UTF-8");\r
+ return new String(result, 0, out);\r
} catch (IllegalArgumentException e) {\r
throw new IllegalArgumentException("Problem while unescaping string: " + uri, e);\r
- } catch (java.io.UnsupportedEncodingException e) {\r
- throw new Error("The JVM is required to support UTF-8 and US-ASCII encodings.");\r
- } catch (ArrayIndexOutOfBoundsException ee) {\r
- throw new IllegalArgumentException("Incomplete Hex escape sequence in " + uri);\r
+ } catch (IndexOutOfBoundsException ee) {\r
+ throw new IllegalArgumentException("Incomplete hex digit escape sequence in " + uri);\r
}\r
}\r
\r
* @param args\r
*/\r
public static void main(String[] args) {\r
- String s;\r
- s = "http://www.vtt.fi%2FSome- %25 Namespace/Jotain";\r
- System.out.println(String.format("escape+unescape: %s -> %s -> %s", s, escape(s), unescape(escape(s))));\r
- s = "http://www.vtt.fi%2FPSK";\r
- System.out.println(String.format("unescape: %s -> %s", s, unescape(s)));\r
- s = "http://www.vtt.fi%2FSome-Namespace/Jotain / Muuta";\r
- System.out.println(String.format("escape: %s -> %s", s, escape(s)));\r
- s = "Jotain / Muuta";\r
- System.out.println(String.format("escape: %s -> %s", s, escape(s)));\r
-\r
- System.out.println("escapeURI: " + escapeURI("foo/bar/org%2Fnet"));\r
- System.out.println("escapeURI('...#...'): " + escapeURI("foo/bar#org%2Fnet"));\r
- s = makeURI("http://foo.bar.com/foo/bar", "baz/guuk/org%2Fnet");\r
+ String s = makeURI("http://foo.bar.com/foo/bar", "baz/guuk/org%2Fnet");\r
System.out.println("escapeURI: " + s);\r
System.out.println("getNamespace: " + getNamespace(s));\r
System.out.println("getLocalName: " + getLocalName(s));\r
\r
+ System.out.println("escapeURI: " + escapeURI("foo/bar/org%2Fnet"));\r
+ System.out.println("escapeURI('...#...'): " + escapeURI("foo/bar#org%2Fnet"));\r
+\r
testEscape("/", "%2F");\r
testEscape("#", "%23");\r
testEscape("%", "%25");\r
testEscape("%01", "%2501");\r
testEscape("%GG", "%25GG");\r
- testEscape("säätö venttiili", "s%C3%A4%C3%A4t%C3%B6%20venttiili");\r
- testEscape("säätö", "s%C3%A4%C3%A4t%C3%B6");\r
+ testEscape("säätö venttiili", "säätö%20venttiili");\r
+ testEscape("säätö", "säätö");\r
+ testEscape("Something / Else", "Something%20%2F%20Else");\r
+ testEscape("http://www.vtt.fi%2FSome- %25 Namespace/Something", "http:%2F%2Fwww.vtt.fi%252FSome-%20%2525%20Namespace%2FSomething");\r
+ testEscape("http://www.vtt.fi/PSK", "http:%2F%2Fwww.vtt.fi%2FPSK");\r
+ testEscape("http://www.vtt.fi%2FSome-Namespace/Something / Else", "http:%2F%2Fwww.vtt.fi%252FSome-Namespace%2FSomething%20%2F%20Else");\r
}\r
\r
private static void testEscape(String unescaped, String expectedEscaped) {\r
String esc = escape(unescaped);\r
String unesc = unescape(esc);\r
- System.out.format("escape('%s')='%s', unescape('%s')='%s'\n", unescaped, esc, esc, unesc);\r
+ System.out.format("escape('%s') -> '%s', unescape('%s') -> '%s'", unescaped, esc, esc, unesc);\r
if (!esc.equals(expectedEscaped))\r
throw new AssertionError("escape('" + unescaped + "') was expected to return '" + expectedEscaped + "' but returned '" + esc + "'");\r
if (!unesc.equals(unescaped))\r
throw new AssertionError("unescape(escape('" + unescaped + "'))=unescape(" + esc + ") was expected to return '" + unescaped + "' but returned '" + unesc + "'");\r
+ System.out.println(" OK");\r
}\r
\r
}\r