X-Git-Url: https://gerrit.simantics.org/r/gitweb?p=simantics%2Fplatform.git;a=blobdiff_plain;f=bundles%2Forg.simantics.databoard%2Fsrc%2Forg%2Fsimantics%2Fdataboard%2Futil%2FURIStringUtils.java;h=a11579f07e774404394e448daf3a4d4799651512;hp=dde498a2c38bdcadbe3fb025cce664e2e0575c74;hb=e67e00d01d30f362e7a4f599cbcf24770ce13e22;hpb=969bd23cab98a79ca9101af33334000879fb60c5 diff --git a/bundles/org.simantics.databoard/src/org/simantics/databoard/util/URIStringUtils.java b/bundles/org.simantics.databoard/src/org/simantics/databoard/util/URIStringUtils.java index dde498a2c..a11579f07 100644 --- a/bundles/org.simantics.databoard/src/org/simantics/databoard/util/URIStringUtils.java +++ b/bundles/org.simantics.databoard/src/org/simantics/databoard/util/URIStringUtils.java @@ -45,8 +45,7 @@ package org.simantics.databoard.util; -import java.nio.charset.Charset; -import java.util.ArrayList; +import java.util.Arrays; import java.util.List; @@ -194,13 +193,14 @@ public final class URIStringUtils { return name; } - final private static int HTTP_POSITION = "http://".length(); + final private static String HTTP_PREFIX = "http://"; + final private static int HTTP_POSITION = HTTP_PREFIX.length(); public static String[] splitURI(String uri) { int nextPathSeparator = uri.lastIndexOf(URIStringUtils.NAMESPACE_PATH_SEPARATOR); if (nextPathSeparator == -1) return null; if (nextPathSeparator == HTTP_POSITION - 1) { - if(uri.startsWith("http://")) return new String[] { "http://", uri.substring(HTTP_POSITION, uri.length()) }; + if(uri.startsWith(HTTP_PREFIX)) return new String[] { HTTP_PREFIX, uri.substring(HTTP_POSITION, uri.length()) }; else return null; } return new String[] { @@ -208,12 +208,10 @@ public final class URIStringUtils { uri.substring(nextPathSeparator + 1, uri.length()) }; } - + public static List splitURISCL(String uri) { String[] result = splitURI(uri); - ArrayList list = new ArrayList(result.length); - for(String s : result) list.add(s); - return list; + return Arrays.asList(result); } /** @@ -263,8 +261,7 @@ public final class URIStringUtils { public static String escapeURI(String localName) { if (localName == null) throw new NullPointerException("null local name"); - String result = encode(localName); - return result; + return encode(localName); } /** @@ -276,8 +273,7 @@ public final class URIStringUtils { * @return the joined namespace */ public static String appendURINamespace(String namespace, String suffix) { - //return namespace + NAMESPACE_PATH_SEPARATOR + suffix; - return new StringBuffer(namespace.length() + 1 + suffix.length()) + return new StringBuilder(namespace.length() + 1 + suffix.length()) .append(namespace) .append(NAMESPACE_PATH_SEPARATOR) .append(suffix) @@ -293,9 +289,8 @@ public final class URIStringUtils { * @return the joined URI */ public static String makeURI(String namespace, String localName) { - //return namespace + NAMESPACE_LOCAL_SEPARATOR + escapeURI(localName); String escapedLocalName = escapeURI(localName); - return new StringBuffer(namespace.length() + 1 + escapedLocalName.length()) + return new StringBuilder(namespace.length() + 1 + escapedLocalName.length()) .append(namespace) .append(NAMESPACE_LOCAL_SEPARATOR) .append(escapedLocalName) @@ -332,94 +327,59 @@ public final class URIStringUtils { } - final private static Charset UTF8 = Charset.forName("UTF-8"); - final private static Charset ASCII = Charset.forName("US-ASCII"); - - /* Copied and modified from Jena 2.4 com.hp.hpl.jena.util.URIref */ - private static String encode(String unicode) { - boolean needsEscapes = needsEscaping(unicode); - if (!needsEscapes) - return unicode; - - byte utf8[] = unicode.getBytes(UTF8); - byte rsltAscii[] = new byte[utf8.length * 6]; - int in = 0; - int out = 0; - while (in < utf8.length) { - switch (utf8[in]) { - case (byte)'a': case (byte)'b': case (byte)'c': case (byte)'d': case (byte)'e': case (byte)'f': case (byte)'g': case (byte)'h': case (byte)'i': case (byte)'j': case (byte)'k': case (byte)'l': case (byte)'m': case (byte)'n': case (byte)'o': case (byte)'p': case (byte)'q': case (byte)'r': case (byte)'s': case (byte)'t': case (byte)'u': case (byte)'v': case (byte)'w': case (byte)'x': case (byte)'y': case (byte)'z': - case (byte)'A': case (byte)'B': case (byte)'C': case (byte)'D': case (byte)'E': case (byte)'F': case (byte)'G': case (byte)'H': case (byte)'I': case (byte)'J': case (byte)'K': case (byte)'L': case (byte)'M': case (byte)'N': case (byte)'O': case (byte)'P': case (byte)'Q': case (byte)'R': case (byte)'S': case (byte)'T': case (byte)'U': case (byte)'V': case (byte)'W': case (byte)'X': case (byte)'Y': case (byte)'Z': - case (byte)'0': case (byte)'1': case (byte)'2': case (byte)'3': case (byte)'4': case (byte)'5': case (byte)'6': case (byte)'7': case (byte)'8': case (byte)'9': - case (byte)';': case (byte)'?': case (byte)':': case (byte)'@': case (byte)'=': case (byte)'+': case (byte)'$': case (byte)',': - case (byte)'-': case (byte)'_': case (byte)'.': case (byte)'!': case (byte)'~': case (byte)'*': case (byte)'\'': case (byte)'(': case (byte)')': - case (byte)'[': case (byte)']': - rsltAscii[out] = utf8[in]; - out++; - in++; - break; - case (byte)' ': - rsltAscii[out++] = (byte) '%'; - rsltAscii[out++] = '2'; - rsltAscii[out++] = '0'; - in++; - break; - case (byte) '%': - // [lehtonen] NOTE: all input needs to be escaped, i.e. "%01" should result in "%2501", not "%01". - // escape+unescape is a bijection, not an idempotent operation. - // Fall through to to escape '%' as '%25' - case (byte) '#': - case (byte) '/': - // Fall through to escape '/' - case (byte)'&': - // Fall through to escape '&' characters to avoid them - // being interpreted as SGML entities. - default: - rsltAscii[out++] = (byte) '%'; - // Get rid of sign ... - int c = (utf8[in]) & 255; - rsltAscii[out++] = hexEncode(c / 16); - rsltAscii[out++] = hexEncode(c % 16); - in++; - break; - } - } - return new String(rsltAscii, 0, out, ASCII); - } - /* * RFC 3986 section 2.2 Reserved Characters (January 2005) * !*'();:@&=+$,/?#[] */ - private static boolean needsEscaping(String unicode) { + private static boolean[] ESCAPED_US_ASCII_CHARS = new boolean[128]; + + static { + ESCAPED_US_ASCII_CHARS[' '] = true; + // IMPORTANT NOTE: every time escape is invoked, all input needs to be escaped, + // i.e. escape("%01") should result in "%2501", not "%01". + // escape and unescape form a bijection, where neither + // of them is an idempotent operation. + ESCAPED_US_ASCII_CHARS['%'] = true; + // '#' and '/' are URL segment/fragment delimiters, need to be escaped in names. + ESCAPED_US_ASCII_CHARS['#'] = true; + ESCAPED_US_ASCII_CHARS['/'] = true; + // Escape '&' characters to avoid them being interpreted as SGML entities. + ESCAPED_US_ASCII_CHARS['&'] = true; + } + + private static int needsEscaping(String unicode) { int len = unicode.length(); + int escapeCount = 0; for (int i = 0; i < len; ++i) { - switch (unicode.charAt(i)) { - case (byte)'!': - case (byte)'*': - case (byte)'\'': - case (byte)'(': - case (byte)')': - case (byte)';': - case (byte)':': - case (byte)'@': - case (byte)'=': - case (byte)'+': - case (byte)'$': - case (byte)',': - case (byte)'?': - case (byte)'~': - case (byte)'[': - case (byte)']': - break; - case (byte)' ': - case (byte) '#': - case (byte) '%': - case (byte) '/': - case (byte)'&': - return true; + char ch = unicode.charAt(i); + if (ch < 128 && ESCAPED_US_ASCII_CHARS[ch]) + ++escapeCount; + } + return escapeCount; + } + + private static String encode(String unicode) { + int needsEscapes = needsEscaping(unicode); + if (needsEscapes == 0) + return unicode; + + int len = unicode.length(); + char result[] = new char[(len - needsEscapes) + needsEscapes * 3]; + int in = 0; + int out = 0; + while (in < len) { + char inCh = unicode.charAt(in++); + if (inCh >= 128 || !ESCAPED_US_ASCII_CHARS[inCh]) { + result[out++] = inCh; + } else { + // Only selected 7-bit US-ASCII characters are escaped + int c = inCh & 255; + result[out++] = '%'; + result[out++] = (char) hexEncode(c / 16); + result[out++] = (char) hexEncode(c % 16); } } - return false; + return new String(result, 0, out); } private static boolean needsUnescaping(String unicode) { @@ -427,13 +387,12 @@ public final class URIStringUtils { } /** - * Convert a URI, in US-ASCII, with escaped characters taken from UTF-8, to - * the corresponding Unicode string. On ill-formed input the results are - * undefined, specifically if the unescaped version is not a UTF-8 String, - * some String will be returned. + * Convert a URI, in UTF-16 with escaped characters taken from US-ASCII, to + * the corresponding unescaped Unicode string. On ill-formed input the results are + * undefined. * * @param uri the uri, in characters specified by RFC 2396 + '#'. - * @return the corresponding Unicode String. + * @return the corresponding unescaped Unicode String. * @exception IllegalArgumentException if a % hex sequence is ill-formed. */ public static String unescape(String uri) { @@ -441,26 +400,29 @@ public final class URIStringUtils { if (!needsUnescaping(uri)) return uri; - byte ascii[] = uri.getBytes("US-ASCII"); - byte utf8[] = new byte[ascii.length]; + int len = uri.length(); + String unicode = uri; + char result[] = new char[len]; int in = 0; int out = 0; - while ( in < ascii.length ) { - if (ascii[in] == (byte) '%') { - in++; - utf8[out++] = (byte) (hexDecode(ascii[in]) * 16 | hexDecode(ascii[in + 1])); + while (in < len) { + char inCh = unicode.charAt(in++); + if (inCh == '%') { + char d1 = unicode.charAt(in); + char d2 = unicode.charAt(in+1); + if (d1 > 127 || d2 > 127) + throw new IllegalArgumentException("Invalid hex digit escape sequence in " + uri + " at " + in); + result[out++] = (char) (hexDecode((byte) d1) * 16 | hexDecode((byte) d2)); in += 2; } else { - utf8[out++] = ascii[in++]; + result[out++] = inCh; } } - return new String(utf8, 0, out, "UTF-8"); + return new String(result, 0, out); } catch (IllegalArgumentException e) { throw new IllegalArgumentException("Problem while unescaping string: " + uri, e); - } catch (java.io.UnsupportedEncodingException e) { - throw new Error("The JVM is required to support UTF-8 and US-ASCII encodings."); - } catch (ArrayIndexOutOfBoundsException ee) { - throw new IllegalArgumentException("Incomplete Hex escape sequence in " + uri); + } catch (IndexOutOfBoundsException ee) { + throw new IllegalArgumentException("Incomplete hex digit escape sequence in " + uri); } } @@ -491,38 +453,36 @@ public final class URIStringUtils { * @param args */ public static void main(String[] args) { - String s; - s = "http://www.vtt.fi%2FSome- %25 Namespace/Jotain"; - System.out.println(String.format("escape+unescape: %s -> %s -> %s", s, escape(s), unescape(escape(s)))); - s = "http://www.vtt.fi%2FPSK"; - System.out.println(String.format("unescape: %s -> %s", s, unescape(s))); - s = "http://www.vtt.fi%2FSome-Namespace/Jotain / Muuta"; - System.out.println(String.format("escape: %s -> %s", s, escape(s))); - s = "Jotain / Muuta"; - System.out.println(String.format("escape: %s -> %s", s, escape(s))); - - System.out.println("escapeURI: " + escapeURI("foo/bar/org%2Fnet")); - System.out.println("escapeURI('...#...'): " + escapeURI("foo/bar#org%2Fnet")); - s = makeURI("http://foo.bar.com/foo/bar", "baz/guuk/org%2Fnet"); + String s = makeURI("http://foo.bar.com/foo/bar", "baz/guuk/org%2Fnet"); System.out.println("escapeURI: " + s); System.out.println("getNamespace: " + getNamespace(s)); System.out.println("getLocalName: " + getLocalName(s)); + System.out.println("escapeURI: " + escapeURI("foo/bar/org%2Fnet")); + System.out.println("escapeURI('...#...'): " + escapeURI("foo/bar#org%2Fnet")); + testEscape("/", "%2F"); testEscape("#", "%23"); testEscape("%", "%25"); testEscape("%01", "%2501"); testEscape("%GG", "%25GG"); + testEscape("säätö venttiili", "säätö%20venttiili"); + testEscape("säätö", "säätö"); + testEscape("Something / Else", "Something%20%2F%20Else"); + testEscape("http://www.vtt.fi%2FSome- %25 Namespace/Something", "http:%2F%2Fwww.vtt.fi%252FSome-%20%2525%20Namespace%2FSomething"); + testEscape("http://www.vtt.fi/PSK", "http:%2F%2Fwww.vtt.fi%2FPSK"); + testEscape("http://www.vtt.fi%2FSome-Namespace/Something / Else", "http:%2F%2Fwww.vtt.fi%252FSome-Namespace%2FSomething%20%2F%20Else"); } private static void testEscape(String unescaped, String expectedEscaped) { String esc = escape(unescaped); String unesc = unescape(esc); - System.out.format("escape('%s')='%s', unescape('%s')='%s'\n", unescaped, esc, esc, unesc); + System.out.format("escape('%s') -> '%s', unescape('%s') -> '%s'", unescaped, esc, esc, unesc); if (!esc.equals(expectedEscaped)) throw new AssertionError("escape('" + unescaped + "') was expected to return '" + expectedEscaped + "' but returned '" + esc + "'"); if (!unesc.equals(unescaped)) throw new AssertionError("unescape(escape('" + unescaped + "'))=unescape(" + esc + ") was expected to return '" + unescaped + "' but returned '" + unesc + "'"); + System.out.println(" OK"); } }