From: Tuukka Lehtonen Date: Fri, 9 Sep 2016 13:05:42 +0000 (+0300) Subject: Merge branch 'master' of ssh://www.simantics.org:29418/simantics/platform X-Git-Tag: v1.25.0~130 X-Git-Url: https://gerrit.simantics.org/r/gitweb?p=simantics%2Fplatform.git;a=commitdiff_plain;h=b5c834f08accf6d283d88489e89c23da2a485fe2;hp=23ea9f9235f428865dc4cd3bff6d3dd9e3174653 Merge branch 'master' of ssh://www.simantics.org:29418/simantics/platform --- diff --git a/bundles/org.simantics.databoard/src/org/simantics/databoard/util/URIStringUtils.java b/bundles/org.simantics.databoard/src/org/simantics/databoard/util/URIStringUtils.java index 1cd658ba9..a11579f07 100644 --- a/bundles/org.simantics.databoard/src/org/simantics/databoard/util/URIStringUtils.java +++ b/bundles/org.simantics.databoard/src/org/simantics/databoard/util/URIStringUtils.java @@ -45,8 +45,7 @@ package org.simantics.databoard.util; -import java.nio.charset.Charset; -import java.util.ArrayList; +import java.util.Arrays; import java.util.List; @@ -194,13 +193,14 @@ public final class URIStringUtils { return name; } - final private static int HTTP_POSITION = "http://".length(); + final private static String HTTP_PREFIX = "http://"; + final private static int HTTP_POSITION = HTTP_PREFIX.length(); public static String[] splitURI(String uri) { int nextPathSeparator = uri.lastIndexOf(URIStringUtils.NAMESPACE_PATH_SEPARATOR); if (nextPathSeparator == -1) return null; if (nextPathSeparator == HTTP_POSITION - 1) { - if(uri.startsWith("http://")) return new String[] { "http://", uri.substring(HTTP_POSITION, uri.length()) }; + if(uri.startsWith(HTTP_PREFIX)) return new String[] { HTTP_PREFIX, uri.substring(HTTP_POSITION, uri.length()) }; else return null; } return new String[] { @@ -208,12 +208,10 @@ public final class URIStringUtils { uri.substring(nextPathSeparator + 1, uri.length()) }; } - + public static List splitURISCL(String uri) { String[] result = splitURI(uri); - ArrayList list = new ArrayList(result.length); - for(String s : result) list.add(s); - return list; + return Arrays.asList(result); } /** @@ -263,8 +261,7 @@ public final class URIStringUtils { public static String escapeURI(String localName) { if (localName == null) throw new NullPointerException("null local name"); - String result = encode(localName); - return result; + return encode(localName); } /** @@ -276,8 +273,7 @@ public final class URIStringUtils { * @return the joined namespace */ public static String appendURINamespace(String namespace, String suffix) { - //return namespace + NAMESPACE_PATH_SEPARATOR + suffix; - return new StringBuffer(namespace.length() + 1 + suffix.length()) + return new StringBuilder(namespace.length() + 1 + suffix.length()) .append(namespace) .append(NAMESPACE_PATH_SEPARATOR) .append(suffix) @@ -293,9 +289,8 @@ public final class URIStringUtils { * @return the joined URI */ public static String makeURI(String namespace, String localName) { - //return namespace + NAMESPACE_LOCAL_SEPARATOR + escapeURI(localName); String escapedLocalName = escapeURI(localName); - return new StringBuffer(namespace.length() + 1 + escapedLocalName.length()) + return new StringBuilder(namespace.length() + 1 + escapedLocalName.length()) .append(namespace) .append(NAMESPACE_LOCAL_SEPARATOR) .append(escapedLocalName) @@ -332,99 +327,59 @@ public final class URIStringUtils { } - final private static Charset UTF8 = Charset.forName("UTF-8"); - final private static Charset ASCII = Charset.forName("US-ASCII"); - /* * RFC 3986 section 2.2 Reserved Characters (January 2005) * !*'();:@&=+$,/?#[] */ - private static boolean[] UNESCAPED_US_ASCII_CHARS = new boolean[128]; + private static boolean[] ESCAPED_US_ASCII_CHARS = new boolean[128]; static { - for(char ch='A';ch <= 'Z';++ch) - UNESCAPED_US_ASCII_CHARS[ch] = true; - for(char ch='a';ch <= 'z';++ch) - UNESCAPED_US_ASCII_CHARS[ch] = true; - for(char ch='0';ch <= '9';++ch) - UNESCAPED_US_ASCII_CHARS[ch] = true; - UNESCAPED_US_ASCII_CHARS[';'] = true; - UNESCAPED_US_ASCII_CHARS['?'] = true; - UNESCAPED_US_ASCII_CHARS[':'] = true; - UNESCAPED_US_ASCII_CHARS['@'] = true; - UNESCAPED_US_ASCII_CHARS['='] = true; - UNESCAPED_US_ASCII_CHARS['+'] = true; - UNESCAPED_US_ASCII_CHARS['$'] = true; - UNESCAPED_US_ASCII_CHARS['.'] = true; - UNESCAPED_US_ASCII_CHARS[','] = true; - UNESCAPED_US_ASCII_CHARS['-'] = true; - UNESCAPED_US_ASCII_CHARS['_'] = true; - UNESCAPED_US_ASCII_CHARS['!'] = true; - UNESCAPED_US_ASCII_CHARS['~'] = true; - UNESCAPED_US_ASCII_CHARS['*'] = true; - UNESCAPED_US_ASCII_CHARS['\''] = true; - UNESCAPED_US_ASCII_CHARS['('] = true; - UNESCAPED_US_ASCII_CHARS[')'] = true; - UNESCAPED_US_ASCII_CHARS['['] = true; - UNESCAPED_US_ASCII_CHARS[']'] = true; + ESCAPED_US_ASCII_CHARS[' '] = true; + // IMPORTANT NOTE: every time escape is invoked, all input needs to be escaped, + // i.e. escape("%01") should result in "%2501", not "%01". + // escape and unescape form a bijection, where neither + // of them is an idempotent operation. + ESCAPED_US_ASCII_CHARS['%'] = true; + // '#' and '/' are URL segment/fragment delimiters, need to be escaped in names. + ESCAPED_US_ASCII_CHARS['#'] = true; + ESCAPED_US_ASCII_CHARS['/'] = true; + // Escape '&' characters to avoid them being interpreted as SGML entities. + ESCAPED_US_ASCII_CHARS['&'] = true; } - private static boolean needsEscaping(String unicode) { + private static int needsEscaping(String unicode) { int len = unicode.length(); + int escapeCount = 0; for (int i = 0; i < len; ++i) { char ch = unicode.charAt(i); - if (ch >= 128 || !UNESCAPED_US_ASCII_CHARS[ch]) - return true; + if (ch < 128 && ESCAPED_US_ASCII_CHARS[ch]) + ++escapeCount; } - return false; + return escapeCount; } - /* Copied and modified from Jena 2.4 com.hp.hpl.jena.util.URIref */ private static String encode(String unicode) { - boolean needsEscapes = needsEscaping(unicode); - if (!needsEscapes) + int needsEscapes = needsEscaping(unicode); + if (needsEscapes == 0) return unicode; - byte utf8[] = unicode.getBytes(UTF8); - byte rsltAscii[] = new byte[utf8.length * 6]; + int len = unicode.length(); + char result[] = new char[(len - needsEscapes) + needsEscapes * 3]; int in = 0; int out = 0; - while (in < utf8.length) { - byte inCh = utf8[in]; - if (inCh >= 0 && inCh < 128 && UNESCAPED_US_ASCII_CHARS[inCh]) { - rsltAscii[out] = inCh; - out++; - in++; + while (in < len) { + char inCh = unicode.charAt(in++); + if (inCh >= 128 || !ESCAPED_US_ASCII_CHARS[inCh]) { + result[out++] = inCh; } else { - switch (inCh) { - case (byte)' ': - rsltAscii[out++] = (byte) '%'; - rsltAscii[out++] = '2'; - rsltAscii[out++] = '0'; - in++; - break; - case (byte) '%': - // [lehtonen] NOTE: all input needs to be escaped, i.e. "%01" should result in "%2501", not "%01". - // escape+unescape is a bijection, not an idempotent operation. - // Fall through to escape '%' as '%25' - case (byte) '#': - case (byte) '/': - // Fall through to escape '/' - case (byte)'&': - // Fall through to escape '&' characters to avoid them - // being interpreted as SGML entities. - default: - rsltAscii[out++] = (byte) '%'; - // Get rid of sign ... - int c = (inCh) & 255; - rsltAscii[out++] = hexEncode(c / 16); - rsltAscii[out++] = hexEncode(c % 16); - in++; - break; - } + // Only selected 7-bit US-ASCII characters are escaped + int c = inCh & 255; + result[out++] = '%'; + result[out++] = (char) hexEncode(c / 16); + result[out++] = (char) hexEncode(c % 16); } } - return new String(rsltAscii, 0, out, ASCII); + return new String(result, 0, out); } private static boolean needsUnescaping(String unicode) { @@ -432,13 +387,12 @@ public final class URIStringUtils { } /** - * Convert a URI, in US-ASCII, with escaped characters taken from UTF-8, to - * the corresponding Unicode string. On ill-formed input the results are - * undefined, specifically if the unescaped version is not a UTF-8 String, - * some String will be returned. + * Convert a URI, in UTF-16 with escaped characters taken from US-ASCII, to + * the corresponding unescaped Unicode string. On ill-formed input the results are + * undefined. * * @param uri the uri, in characters specified by RFC 2396 + '#'. - * @return the corresponding Unicode String. + * @return the corresponding unescaped Unicode String. * @exception IllegalArgumentException if a % hex sequence is ill-formed. */ public static String unescape(String uri) { @@ -446,26 +400,29 @@ public final class URIStringUtils { if (!needsUnescaping(uri)) return uri; - byte ascii[] = uri.getBytes("US-ASCII"); - byte utf8[] = new byte[ascii.length]; + int len = uri.length(); + String unicode = uri; + char result[] = new char[len]; int in = 0; int out = 0; - while ( in < ascii.length ) { - if (ascii[in] == (byte) '%') { - in++; - utf8[out++] = (byte) (hexDecode(ascii[in]) * 16 | hexDecode(ascii[in + 1])); + while (in < len) { + char inCh = unicode.charAt(in++); + if (inCh == '%') { + char d1 = unicode.charAt(in); + char d2 = unicode.charAt(in+1); + if (d1 > 127 || d2 > 127) + throw new IllegalArgumentException("Invalid hex digit escape sequence in " + uri + " at " + in); + result[out++] = (char) (hexDecode((byte) d1) * 16 | hexDecode((byte) d2)); in += 2; } else { - utf8[out++] = ascii[in++]; + result[out++] = inCh; } } - return new String(utf8, 0, out, "UTF-8"); + return new String(result, 0, out); } catch (IllegalArgumentException e) { throw new IllegalArgumentException("Problem while unescaping string: " + uri, e); - } catch (java.io.UnsupportedEncodingException e) { - throw new Error("The JVM is required to support UTF-8 and US-ASCII encodings."); - } catch (ArrayIndexOutOfBoundsException ee) { - throw new IllegalArgumentException("Incomplete Hex escape sequence in " + uri); + } catch (IndexOutOfBoundsException ee) { + throw new IllegalArgumentException("Incomplete hex digit escape sequence in " + uri); } } @@ -496,40 +453,36 @@ public final class URIStringUtils { * @param args */ public static void main(String[] args) { - String s; - s = "http://www.vtt.fi%2FSome- %25 Namespace/Jotain"; - System.out.println(String.format("escape+unescape: %s -> %s -> %s", s, escape(s), unescape(escape(s)))); - s = "http://www.vtt.fi%2FPSK"; - System.out.println(String.format("unescape: %s -> %s", s, unescape(s))); - s = "http://www.vtt.fi%2FSome-Namespace/Jotain / Muuta"; - System.out.println(String.format("escape: %s -> %s", s, escape(s))); - s = "Jotain / Muuta"; - System.out.println(String.format("escape: %s -> %s", s, escape(s))); - - System.out.println("escapeURI: " + escapeURI("foo/bar/org%2Fnet")); - System.out.println("escapeURI('...#...'): " + escapeURI("foo/bar#org%2Fnet")); - s = makeURI("http://foo.bar.com/foo/bar", "baz/guuk/org%2Fnet"); + String s = makeURI("http://foo.bar.com/foo/bar", "baz/guuk/org%2Fnet"); System.out.println("escapeURI: " + s); System.out.println("getNamespace: " + getNamespace(s)); System.out.println("getLocalName: " + getLocalName(s)); + System.out.println("escapeURI: " + escapeURI("foo/bar/org%2Fnet")); + System.out.println("escapeURI('...#...'): " + escapeURI("foo/bar#org%2Fnet")); + testEscape("/", "%2F"); testEscape("#", "%23"); testEscape("%", "%25"); testEscape("%01", "%2501"); testEscape("%GG", "%25GG"); - testEscape("säätö venttiili", "s%C3%A4%C3%A4t%C3%B6%20venttiili"); - testEscape("säätö", "s%C3%A4%C3%A4t%C3%B6"); + testEscape("säätö venttiili", "säätö%20venttiili"); + testEscape("säätö", "säätö"); + testEscape("Something / Else", "Something%20%2F%20Else"); + testEscape("http://www.vtt.fi%2FSome- %25 Namespace/Something", "http:%2F%2Fwww.vtt.fi%252FSome-%20%2525%20Namespace%2FSomething"); + testEscape("http://www.vtt.fi/PSK", "http:%2F%2Fwww.vtt.fi%2FPSK"); + testEscape("http://www.vtt.fi%2FSome-Namespace/Something / Else", "http:%2F%2Fwww.vtt.fi%252FSome-Namespace%2FSomething%20%2F%20Else"); } private static void testEscape(String unescaped, String expectedEscaped) { String esc = escape(unescaped); String unesc = unescape(esc); - System.out.format("escape('%s')='%s', unescape('%s')='%s'\n", unescaped, esc, esc, unesc); + System.out.format("escape('%s') -> '%s', unescape('%s') -> '%s'", unescaped, esc, esc, unesc); if (!esc.equals(expectedEscaped)) throw new AssertionError("escape('" + unescaped + "') was expected to return '" + expectedEscaped + "' but returned '" + esc + "'"); if (!unesc.equals(unescaped)) throw new AssertionError("unescape(escape('" + unescaped + "'))=unescape(" + esc + ") was expected to return '" + unescaped + "' but returned '" + unesc + "'"); + System.out.println(" OK"); } }