X-Git-Url: https://gerrit.simantics.org/r/gitweb?p=simantics%2Fplatform.git;a=blobdiff_plain;f=bundles%2Forg.simantics.databoard%2Fsrc%2Forg%2Fsimantics%2Fdataboard%2Futil%2FURIStringUtils.java;fp=bundles%2Forg.simantics.databoard%2Fsrc%2Forg%2Fsimantics%2Fdataboard%2Futil%2FURIStringUtils.java;h=a8b8627e45840a9398429c615806c6dec25e46a0;hp=a11579f07e774404394e448daf3a4d4799651512;hb=0ae2b770234dfc3cbb18bd38f324125cf0faca07;hpb=24e2b34260f219f0d1644ca7a138894980e25b14 diff --git a/bundles/org.simantics.databoard/src/org/simantics/databoard/util/URIStringUtils.java b/bundles/org.simantics.databoard/src/org/simantics/databoard/util/URIStringUtils.java index a11579f07..a8b8627e4 100644 --- a/bundles/org.simantics.databoard/src/org/simantics/databoard/util/URIStringUtils.java +++ b/bundles/org.simantics.databoard/src/org/simantics/databoard/util/URIStringUtils.java @@ -1,488 +1,488 @@ -/******************************************************************************* - * Copyright (c) 2007, 2010 Association for Decentralized Information Management - * in Industry THTH ry. - * All rights reserved. This program and the accompanying materials - * are made available under the terms of the Eclipse Public License v1.0 - * which accompanies this distribution, and is available at - * http://www.eclipse.org/legal/epl-v10.html - * - * Contributors: - * VTT Technical Research Centre of Finland - initial API and implementation - *******************************************************************************/ -/* The following copyright is attached because marked parts of the following code are - * copied and modified from Jena 2.4. - */ -/* - * (c) Copyright 2001, 2002, 2003, 2004, 2005, 2006 Hewlett-Packard Development Company, LP - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * 3. The name of the author may not be used to endorse or promote products - * derived from this software without specific prior written permission. - - * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR - * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES - * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. - * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, - * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT - * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF - * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - - * * Id: URIref.java,v 1.5 2006/03/22 13:52:49 andy_seaborne Exp - - AUTHOR: Jeremy J. Carroll - */ - -package org.simantics.databoard.util; - -import java.util.Arrays; -import java.util.List; - - -/** - * Contains utility methods for handling URI Strings in the context of ProCore - * and the Simantics platform. This includes URI escaping and unescaping and - * namespace/local name separation and joining. - * - *

- * URI's in this context are assumed to be formed as follows: - * - *

- * <namespace part>#<local name part>
- * 
- * - *

- * The implementation of {@link #escape(String)} and {@link #unescape(String)} - * is copied and modified from Jena's com.hp.hpl.jena.util.URIref. - *

- * - * @see Percent-encoding - * - * @author Tuukka Lehtonen - */ -public final class URIStringUtils { - - /** - * The character '/' is used as a path separator in URI namespace parts in ProCore. - */ - public static final char NAMESPACE_PATH_SEPARATOR = '/'; - - /** - * The '#' character is used to separate the local name and namespace parts - * of an URI, for example http://www.example.org#localName. - */ - public static final char NAMESPACE_LOCAL_SEPARATOR = '#'; - - /** - * Checks that only one separator character ({@link #NAMESPACE_LOCAL_SEPARATOR}) - * between namespace and localname exists in the specified URI and returns - * its index. - * - * @param uri the URI to search from - * @return the character index of the separator ranging from 0 to uri.length()-1 - * @throws IllegalArgumentException if no {@link #NAMESPACE_LOCAL_SEPARATOR} - * is found in the specified URI - */ - private static int assertSingleSeparatorPosition(String uri) { - int sharpIndex = uri.indexOf(NAMESPACE_LOCAL_SEPARATOR); - if (sharpIndex == -1) { - throw new IllegalArgumentException("URI '" + uri + "' does not contain any '" + NAMESPACE_LOCAL_SEPARATOR + "' separator characters"); - } - int nextSharpIndex = uri.indexOf(NAMESPACE_LOCAL_SEPARATOR, sharpIndex + 1); - if (nextSharpIndex != -1) { - throw new IllegalArgumentException("URI '" + uri + "' contains multiple '" + NAMESPACE_LOCAL_SEPARATOR + "' separator characters"); - } - return sharpIndex; - } - - /** - * Checks that only one separator character ( - * {@link #NAMESPACE_LOCAL_SEPARATOR}) between namespace and localname - * exists in the specified URI and returns its index. This version does not - * throw an exception when the separator is not found. - * - * @param uri the URI to search from - * @return the character index of the separator ranging from 0 to - * uri.length()-1 or -1 if no separator was found. - */ - private static int singleSeparatorPosition(String uri) { - int sharpIndex = uri.indexOf(NAMESPACE_LOCAL_SEPARATOR); - if (sharpIndex == -1) { - return -1; - } - int nextSharpIndex = uri.indexOf(NAMESPACE_LOCAL_SEPARATOR, sharpIndex + 1); - if (nextSharpIndex != -1) { - return -1; - } - return sharpIndex; - } - - /** - * Splits the specified URI into a namespace and a local name and returns - * the namespace. - * - *

- * Assumes that namespaces are always separated by - * {@link #NAMESPACE_LOCAL_SEPARATOR} characters. - *

- * - * @param uri the URI to split, must be non-null - * @return the namespace part of the specified URI - * @throws IllegalArgumentException for URIs without a - * {@link #NAMESPACE_LOCAL_SEPARATOR} - * @throws NullPointerException for null URIs - */ - public static String getNamespace(String uri) { - if (uri == null) - throw new NullPointerException("null uri"); - int separatorIndex = assertSingleSeparatorPosition(uri); - return uri.substring(0, separatorIndex); - } - - public static String getRVIParent(String uri) { - int childSeparator = uri.lastIndexOf(URIStringUtils.NAMESPACE_PATH_SEPARATOR); - int propertySeparator = uri.lastIndexOf(URIStringUtils.NAMESPACE_LOCAL_SEPARATOR); - int separator = Math.max(childSeparator, propertySeparator); - return uri.substring(0, separator); - } - - - /** - * Splits the specified URI into a namespace and a local name and returns - * the local name. - * - *

- * Assumes that namespaces are always separated by - * {@link #NAMESPACE_LOCAL_SEPARATOR} characters. - *

- * - * @param uri the URI to split, must be non-null - * @return the local name part of the specified URI - * @throws IllegalArgumentException for URIs without a - * {@link #NAMESPACE_LOCAL_SEPARATOR} - * @throws NullPointerException for null URIs - */ - public static String getLocalName(String uri) { - if (uri == null) - throw new NullPointerException("null uri"); - int separatorIndex = assertSingleSeparatorPosition(uri); - return uri.substring(separatorIndex + 1); - } - - public static String escapeName(String name) { - char[] chars = name.toCharArray(); - boolean modified = false; - for(int i=0;i splitURISCL(String uri) { - String[] result = splitURI(uri); - return Arrays.asList(result); - } - - /** - * Splits the specified URI into a namespace and a local name and returns - * them both separately as an array. - * - * @param uri the URI to split, must be non-null - * @return [0] = namespace, [1] = local name or null if the URI - * cannot be split. - * @throws NullPointerException for null URIs - */ - public static String[] trySplitNamespaceAndLocalName(String uri) { - if (uri == null) - throw new NullPointerException("null uri"); - int separatorIndex = singleSeparatorPosition(uri); - return separatorIndex == -1 ? - null - : new String[] { uri.substring(0, separatorIndex), uri.substring(separatorIndex + 1) }; - } - - /** - * Splits the specified URI into a namespace and a local name and returns - * them both separately as an array. - * - * @param uri the URI to split, must be non-null - * @return [0] = namespace, [1] = local name - * @throws IllegalArgumentException for URIs without a - * {@link #NAMESPACE_LOCAL_SEPARATOR} - * @throws NullPointerException for null URIs - */ - public static String[] splitNamespaceAndLocalName(String uri) { - if (uri == null) - throw new NullPointerException("null uri"); - int separatorIndex = assertSingleSeparatorPosition(uri); - return new String[] { uri.substring(0, separatorIndex), uri.substring(separatorIndex + 1) }; - } - - /** - * Converts a unicode string into an RFC 2396 compliant URI, using %NN - * escapes where appropriate, including the - * {@link #NAMESPACE_PATH_SEPARATOR} character. - * - * @param localName the string to escape - * @return the escaped string - * @throws NullPointerException for null URIs - */ - public static String escapeURI(String localName) { - if (localName == null) - throw new NullPointerException("null local name"); - return encode(localName); - } - - /** - * Add a suffix path to a namespace string, i.e. join the strings to - * together with the {@link #NAMESPACE_PATH_SEPARATOR} character in between. - * - * @param namespace the namespace to append to - * @param suffix the suffix to append - * @return the joined namespace - */ - public static String appendURINamespace(String namespace, String suffix) { - return new StringBuilder(namespace.length() + 1 + suffix.length()) - .append(namespace) - .append(NAMESPACE_PATH_SEPARATOR) - .append(suffix) - .toString(); - } - - /** - * Join a namespace and a localname to form an URI with - * {@link #NAMESPACE_LOCAL_SEPARATOR}. - * - * @param namespace the namespace part to join - * @param localName the localname part to join - * @return the joined URI - */ - public static String makeURI(String namespace, String localName) { - String escapedLocalName = escapeURI(localName); - return new StringBuilder(namespace.length() + 1 + escapedLocalName.length()) - .append(namespace) - .append(NAMESPACE_LOCAL_SEPARATOR) - .append(escapedLocalName) - .toString(); - } - - /** - * Convert a Unicode string, first to UTF-8 and then to an RFC 2396 - * compliant URI with optional fragment identifier using %NN escape - * mechanism as appropriate. The '%' character is assumed to already - * indicated an escape byte. The '%' character must be followed by two - * hexadecimal digits. - * - *

- * Meant to be used for encoding URI local name parts if it is desired to - * have '/' characters in the local name without creating a new namespace. - * For example these two URI's:
- * - * - * http://foo.bar.com/foo/bar/org%2Fcom
- * http://foo.bar.com/foo/bar/net%2Fcom
- *
- * - * have the same namespace http://foo.bar.com/foo/bar/ and - * different local names org%2Fcom and net%2Fcom - * or org/com and net/com in unescaped form. - *

- * - * @param unicode The uri, in characters specified by RFC 2396 + '#' - * @return The corresponding Unicode String - */ - public static String escape(String unicode) { - return encode(unicode); - } - - - /* - * RFC 3986 section 2.2 Reserved Characters (January 2005) - * !*'();:@&=+$,/?#[] - */ - private static boolean[] ESCAPED_US_ASCII_CHARS = new boolean[128]; - - static { - ESCAPED_US_ASCII_CHARS[' '] = true; - // IMPORTANT NOTE: every time escape is invoked, all input needs to be escaped, - // i.e. escape("%01") should result in "%2501", not "%01". - // escape and unescape form a bijection, where neither - // of them is an idempotent operation. - ESCAPED_US_ASCII_CHARS['%'] = true; - // '#' and '/' are URL segment/fragment delimiters, need to be escaped in names. - ESCAPED_US_ASCII_CHARS['#'] = true; - ESCAPED_US_ASCII_CHARS['/'] = true; - // Escape '&' characters to avoid them being interpreted as SGML entities. - ESCAPED_US_ASCII_CHARS['&'] = true; - } - - private static int needsEscaping(String unicode) { - int len = unicode.length(); - int escapeCount = 0; - for (int i = 0; i < len; ++i) { - char ch = unicode.charAt(i); - if (ch < 128 && ESCAPED_US_ASCII_CHARS[ch]) - ++escapeCount; - } - return escapeCount; - } - - private static String encode(String unicode) { - int needsEscapes = needsEscaping(unicode); - if (needsEscapes == 0) - return unicode; - - int len = unicode.length(); - char result[] = new char[(len - needsEscapes) + needsEscapes * 3]; - int in = 0; - int out = 0; - while (in < len) { - char inCh = unicode.charAt(in++); - if (inCh >= 128 || !ESCAPED_US_ASCII_CHARS[inCh]) { - result[out++] = inCh; - } else { - // Only selected 7-bit US-ASCII characters are escaped - int c = inCh & 255; - result[out++] = '%'; - result[out++] = (char) hexEncode(c / 16); - result[out++] = (char) hexEncode(c % 16); - } - } - return new String(result, 0, out); - } - - private static boolean needsUnescaping(String unicode) { - return unicode.indexOf('%') > -1; - } - - /** - * Convert a URI, in UTF-16 with escaped characters taken from US-ASCII, to - * the corresponding unescaped Unicode string. On ill-formed input the results are - * undefined. - * - * @param uri the uri, in characters specified by RFC 2396 + '#'. - * @return the corresponding unescaped Unicode String. - * @exception IllegalArgumentException if a % hex sequence is ill-formed. - */ - public static String unescape(String uri) { - try { - if (!needsUnescaping(uri)) - return uri; - - int len = uri.length(); - String unicode = uri; - char result[] = new char[len]; - int in = 0; - int out = 0; - while (in < len) { - char inCh = unicode.charAt(in++); - if (inCh == '%') { - char d1 = unicode.charAt(in); - char d2 = unicode.charAt(in+1); - if (d1 > 127 || d2 > 127) - throw new IllegalArgumentException("Invalid hex digit escape sequence in " + uri + " at " + in); - result[out++] = (char) (hexDecode((byte) d1) * 16 | hexDecode((byte) d2)); - in += 2; - } else { - result[out++] = inCh; - } - } - return new String(result, 0, out); - } catch (IllegalArgumentException e) { - throw new IllegalArgumentException("Problem while unescaping string: " + uri, e); - } catch (IndexOutOfBoundsException ee) { - throw new IllegalArgumentException("Incomplete hex digit escape sequence in " + uri); - } - } - - /* Copied from Jena 2.4 com.hp.hpl.jena.util.URIref */ - private static byte hexEncode(int i) { - if (i < 10) - return (byte) ('0' + i); - else - return (byte)('A' + i - 10); - } - - /* Copied from Jena 2.4 com.hp.hpl.jena.util.URIref */ - private static int hexDecode(byte b) { - switch (b) { - case (byte)'a': case (byte)'b': case (byte)'c': case (byte)'d': case (byte)'e': case (byte)'f': - return ((b) & 255) - 'a' + 10; - case (byte)'A': case (byte)'B': case (byte)'C': case (byte)'D': case (byte)'E': case (byte)'F': - return b - (byte) 'A' + 10; - case (byte)'0': case (byte)'1': case (byte)'2': case (byte)'3': case (byte)'4': case (byte)'5': case (byte)'6': case (byte)'7': case (byte)'8': case (byte)'9': - return b - (byte) '0'; - default: - throw new IllegalArgumentException("Bad Hex escape character: " + ((b)&255) ); - } - } - - /** - * Some simple tests. - * @param args - */ - public static void main(String[] args) { - String s = makeURI("http://foo.bar.com/foo/bar", "baz/guuk/org%2Fnet"); - System.out.println("escapeURI: " + s); - System.out.println("getNamespace: " + getNamespace(s)); - System.out.println("getLocalName: " + getLocalName(s)); - - System.out.println("escapeURI: " + escapeURI("foo/bar/org%2Fnet")); - System.out.println("escapeURI('...#...'): " + escapeURI("foo/bar#org%2Fnet")); - - testEscape("/", "%2F"); - testEscape("#", "%23"); - testEscape("%", "%25"); - testEscape("%01", "%2501"); - testEscape("%GG", "%25GG"); - testEscape("säätö venttiili", "säätö%20venttiili"); - testEscape("säätö", "säätö"); - testEscape("Something / Else", "Something%20%2F%20Else"); - testEscape("http://www.vtt.fi%2FSome- %25 Namespace/Something", "http:%2F%2Fwww.vtt.fi%252FSome-%20%2525%20Namespace%2FSomething"); - testEscape("http://www.vtt.fi/PSK", "http:%2F%2Fwww.vtt.fi%2FPSK"); - testEscape("http://www.vtt.fi%2FSome-Namespace/Something / Else", "http:%2F%2Fwww.vtt.fi%252FSome-Namespace%2FSomething%20%2F%20Else"); - } - - private static void testEscape(String unescaped, String expectedEscaped) { - String esc = escape(unescaped); - String unesc = unescape(esc); - System.out.format("escape('%s') -> '%s', unescape('%s') -> '%s'", unescaped, esc, esc, unesc); - if (!esc.equals(expectedEscaped)) - throw new AssertionError("escape('" + unescaped + "') was expected to return '" + expectedEscaped + "' but returned '" + esc + "'"); - if (!unesc.equals(unescaped)) - throw new AssertionError("unescape(escape('" + unescaped + "'))=unescape(" + esc + ") was expected to return '" + unescaped + "' but returned '" + unesc + "'"); - System.out.println(" OK"); - } - -} +/******************************************************************************* + * Copyright (c) 2007, 2010 Association for Decentralized Information Management + * in Industry THTH ry. + * All rights reserved. This program and the accompanying materials + * are made available under the terms of the Eclipse Public License v1.0 + * which accompanies this distribution, and is available at + * http://www.eclipse.org/legal/epl-v10.html + * + * Contributors: + * VTT Technical Research Centre of Finland - initial API and implementation + *******************************************************************************/ +/* The following copyright is attached because marked parts of the following code are + * copied and modified from Jena 2.4. + */ +/* + * (c) Copyright 2001, 2002, 2003, 2004, 2005, 2006 Hewlett-Packard Development Company, LP + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. The name of the author may not be used to endorse or promote products + * derived from this software without specific prior written permission. + + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. + * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF + * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + + * * Id: URIref.java,v 1.5 2006/03/22 13:52:49 andy_seaborne Exp + + AUTHOR: Jeremy J. Carroll + */ + +package org.simantics.databoard.util; + +import java.util.Arrays; +import java.util.List; + + +/** + * Contains utility methods for handling URI Strings in the context of ProCore + * and the Simantics platform. This includes URI escaping and unescaping and + * namespace/local name separation and joining. + * + *

+ * URI's in this context are assumed to be formed as follows: + * + *

+ * <namespace part>#<local name part>
+ * 
+ * + *

+ * The implementation of {@link #escape(String)} and {@link #unescape(String)} + * is copied and modified from Jena's com.hp.hpl.jena.util.URIref. + *

+ * + * @see Percent-encoding + * + * @author Tuukka Lehtonen + */ +public final class URIStringUtils { + + /** + * The character '/' is used as a path separator in URI namespace parts in ProCore. + */ + public static final char NAMESPACE_PATH_SEPARATOR = '/'; + + /** + * The '#' character is used to separate the local name and namespace parts + * of an URI, for example http://www.example.org#localName. + */ + public static final char NAMESPACE_LOCAL_SEPARATOR = '#'; + + /** + * Checks that only one separator character ({@link #NAMESPACE_LOCAL_SEPARATOR}) + * between namespace and localname exists in the specified URI and returns + * its index. + * + * @param uri the URI to search from + * @return the character index of the separator ranging from 0 to uri.length()-1 + * @throws IllegalArgumentException if no {@link #NAMESPACE_LOCAL_SEPARATOR} + * is found in the specified URI + */ + private static int assertSingleSeparatorPosition(String uri) { + int sharpIndex = uri.indexOf(NAMESPACE_LOCAL_SEPARATOR); + if (sharpIndex == -1) { + throw new IllegalArgumentException("URI '" + uri + "' does not contain any '" + NAMESPACE_LOCAL_SEPARATOR + "' separator characters"); + } + int nextSharpIndex = uri.indexOf(NAMESPACE_LOCAL_SEPARATOR, sharpIndex + 1); + if (nextSharpIndex != -1) { + throw new IllegalArgumentException("URI '" + uri + "' contains multiple '" + NAMESPACE_LOCAL_SEPARATOR + "' separator characters"); + } + return sharpIndex; + } + + /** + * Checks that only one separator character ( + * {@link #NAMESPACE_LOCAL_SEPARATOR}) between namespace and localname + * exists in the specified URI and returns its index. This version does not + * throw an exception when the separator is not found. + * + * @param uri the URI to search from + * @return the character index of the separator ranging from 0 to + * uri.length()-1 or -1 if no separator was found. + */ + private static int singleSeparatorPosition(String uri) { + int sharpIndex = uri.indexOf(NAMESPACE_LOCAL_SEPARATOR); + if (sharpIndex == -1) { + return -1; + } + int nextSharpIndex = uri.indexOf(NAMESPACE_LOCAL_SEPARATOR, sharpIndex + 1); + if (nextSharpIndex != -1) { + return -1; + } + return sharpIndex; + } + + /** + * Splits the specified URI into a namespace and a local name and returns + * the namespace. + * + *

+ * Assumes that namespaces are always separated by + * {@link #NAMESPACE_LOCAL_SEPARATOR} characters. + *

+ * + * @param uri the URI to split, must be non-null + * @return the namespace part of the specified URI + * @throws IllegalArgumentException for URIs without a + * {@link #NAMESPACE_LOCAL_SEPARATOR} + * @throws NullPointerException for null URIs + */ + public static String getNamespace(String uri) { + if (uri == null) + throw new NullPointerException("null uri"); + int separatorIndex = assertSingleSeparatorPosition(uri); + return uri.substring(0, separatorIndex); + } + + public static String getRVIParent(String uri) { + int childSeparator = uri.lastIndexOf(URIStringUtils.NAMESPACE_PATH_SEPARATOR); + int propertySeparator = uri.lastIndexOf(URIStringUtils.NAMESPACE_LOCAL_SEPARATOR); + int separator = Math.max(childSeparator, propertySeparator); + return uri.substring(0, separator); + } + + + /** + * Splits the specified URI into a namespace and a local name and returns + * the local name. + * + *

+ * Assumes that namespaces are always separated by + * {@link #NAMESPACE_LOCAL_SEPARATOR} characters. + *

+ * + * @param uri the URI to split, must be non-null + * @return the local name part of the specified URI + * @throws IllegalArgumentException for URIs without a + * {@link #NAMESPACE_LOCAL_SEPARATOR} + * @throws NullPointerException for null URIs + */ + public static String getLocalName(String uri) { + if (uri == null) + throw new NullPointerException("null uri"); + int separatorIndex = assertSingleSeparatorPosition(uri); + return uri.substring(separatorIndex + 1); + } + + public static String escapeName(String name) { + char[] chars = name.toCharArray(); + boolean modified = false; + for(int i=0;i splitURISCL(String uri) { + String[] result = splitURI(uri); + return Arrays.asList(result); + } + + /** + * Splits the specified URI into a namespace and a local name and returns + * them both separately as an array. + * + * @param uri the URI to split, must be non-null + * @return [0] = namespace, [1] = local name or null if the URI + * cannot be split. + * @throws NullPointerException for null URIs + */ + public static String[] trySplitNamespaceAndLocalName(String uri) { + if (uri == null) + throw new NullPointerException("null uri"); + int separatorIndex = singleSeparatorPosition(uri); + return separatorIndex == -1 ? + null + : new String[] { uri.substring(0, separatorIndex), uri.substring(separatorIndex + 1) }; + } + + /** + * Splits the specified URI into a namespace and a local name and returns + * them both separately as an array. + * + * @param uri the URI to split, must be non-null + * @return [0] = namespace, [1] = local name + * @throws IllegalArgumentException for URIs without a + * {@link #NAMESPACE_LOCAL_SEPARATOR} + * @throws NullPointerException for null URIs + */ + public static String[] splitNamespaceAndLocalName(String uri) { + if (uri == null) + throw new NullPointerException("null uri"); + int separatorIndex = assertSingleSeparatorPosition(uri); + return new String[] { uri.substring(0, separatorIndex), uri.substring(separatorIndex + 1) }; + } + + /** + * Converts a unicode string into an RFC 2396 compliant URI, using %NN + * escapes where appropriate, including the + * {@link #NAMESPACE_PATH_SEPARATOR} character. + * + * @param localName the string to escape + * @return the escaped string + * @throws NullPointerException for null URIs + */ + public static String escapeURI(String localName) { + if (localName == null) + throw new NullPointerException("null local name"); + return encode(localName); + } + + /** + * Add a suffix path to a namespace string, i.e. join the strings to + * together with the {@link #NAMESPACE_PATH_SEPARATOR} character in between. + * + * @param namespace the namespace to append to + * @param suffix the suffix to append + * @return the joined namespace + */ + public static String appendURINamespace(String namespace, String suffix) { + return new StringBuilder(namespace.length() + 1 + suffix.length()) + .append(namespace) + .append(NAMESPACE_PATH_SEPARATOR) + .append(suffix) + .toString(); + } + + /** + * Join a namespace and a localname to form an URI with + * {@link #NAMESPACE_LOCAL_SEPARATOR}. + * + * @param namespace the namespace part to join + * @param localName the localname part to join + * @return the joined URI + */ + public static String makeURI(String namespace, String localName) { + String escapedLocalName = escapeURI(localName); + return new StringBuilder(namespace.length() + 1 + escapedLocalName.length()) + .append(namespace) + .append(NAMESPACE_LOCAL_SEPARATOR) + .append(escapedLocalName) + .toString(); + } + + /** + * Convert a Unicode string, first to UTF-8 and then to an RFC 2396 + * compliant URI with optional fragment identifier using %NN escape + * mechanism as appropriate. The '%' character is assumed to already + * indicated an escape byte. The '%' character must be followed by two + * hexadecimal digits. + * + *

+ * Meant to be used for encoding URI local name parts if it is desired to + * have '/' characters in the local name without creating a new namespace. + * For example these two URI's:
+ * + * + * http://foo.bar.com/foo/bar/org%2Fcom
+ * http://foo.bar.com/foo/bar/net%2Fcom
+ *
+ * + * have the same namespace http://foo.bar.com/foo/bar/ and + * different local names org%2Fcom and net%2Fcom + * or org/com and net/com in unescaped form. + *

+ * + * @param unicode The uri, in characters specified by RFC 2396 + '#' + * @return The corresponding Unicode String + */ + public static String escape(String unicode) { + return encode(unicode); + } + + + /* + * RFC 3986 section 2.2 Reserved Characters (January 2005) + * !*'();:@&=+$,/?#[] + */ + private static boolean[] ESCAPED_US_ASCII_CHARS = new boolean[128]; + + static { + ESCAPED_US_ASCII_CHARS[' '] = true; + // IMPORTANT NOTE: every time escape is invoked, all input needs to be escaped, + // i.e. escape("%01") should result in "%2501", not "%01". + // escape and unescape form a bijection, where neither + // of them is an idempotent operation. + ESCAPED_US_ASCII_CHARS['%'] = true; + // '#' and '/' are URL segment/fragment delimiters, need to be escaped in names. + ESCAPED_US_ASCII_CHARS['#'] = true; + ESCAPED_US_ASCII_CHARS['/'] = true; + // Escape '&' characters to avoid them being interpreted as SGML entities. + ESCAPED_US_ASCII_CHARS['&'] = true; + } + + private static int needsEscaping(String unicode) { + int len = unicode.length(); + int escapeCount = 0; + for (int i = 0; i < len; ++i) { + char ch = unicode.charAt(i); + if (ch < 128 && ESCAPED_US_ASCII_CHARS[ch]) + ++escapeCount; + } + return escapeCount; + } + + private static String encode(String unicode) { + int needsEscapes = needsEscaping(unicode); + if (needsEscapes == 0) + return unicode; + + int len = unicode.length(); + char result[] = new char[(len - needsEscapes) + needsEscapes * 3]; + int in = 0; + int out = 0; + while (in < len) { + char inCh = unicode.charAt(in++); + if (inCh >= 128 || !ESCAPED_US_ASCII_CHARS[inCh]) { + result[out++] = inCh; + } else { + // Only selected 7-bit US-ASCII characters are escaped + int c = inCh & 255; + result[out++] = '%'; + result[out++] = (char) hexEncode(c / 16); + result[out++] = (char) hexEncode(c % 16); + } + } + return new String(result, 0, out); + } + + private static boolean needsUnescaping(String unicode) { + return unicode.indexOf('%') > -1; + } + + /** + * Convert a URI, in UTF-16 with escaped characters taken from US-ASCII, to + * the corresponding unescaped Unicode string. On ill-formed input the results are + * undefined. + * + * @param uri the uri, in characters specified by RFC 2396 + '#'. + * @return the corresponding unescaped Unicode String. + * @exception IllegalArgumentException if a % hex sequence is ill-formed. + */ + public static String unescape(String uri) { + try { + if (!needsUnescaping(uri)) + return uri; + + int len = uri.length(); + String unicode = uri; + char result[] = new char[len]; + int in = 0; + int out = 0; + while (in < len) { + char inCh = unicode.charAt(in++); + if (inCh == '%') { + char d1 = unicode.charAt(in); + char d2 = unicode.charAt(in+1); + if (d1 > 127 || d2 > 127) + throw new IllegalArgumentException("Invalid hex digit escape sequence in " + uri + " at " + in); + result[out++] = (char) (hexDecode((byte) d1) * 16 | hexDecode((byte) d2)); + in += 2; + } else { + result[out++] = inCh; + } + } + return new String(result, 0, out); + } catch (IllegalArgumentException e) { + throw new IllegalArgumentException("Problem while unescaping string: " + uri, e); + } catch (IndexOutOfBoundsException ee) { + throw new IllegalArgumentException("Incomplete hex digit escape sequence in " + uri); + } + } + + /* Copied from Jena 2.4 com.hp.hpl.jena.util.URIref */ + private static byte hexEncode(int i) { + if (i < 10) + return (byte) ('0' + i); + else + return (byte)('A' + i - 10); + } + + /* Copied from Jena 2.4 com.hp.hpl.jena.util.URIref */ + private static int hexDecode(byte b) { + switch (b) { + case (byte)'a': case (byte)'b': case (byte)'c': case (byte)'d': case (byte)'e': case (byte)'f': + return ((b) & 255) - 'a' + 10; + case (byte)'A': case (byte)'B': case (byte)'C': case (byte)'D': case (byte)'E': case (byte)'F': + return b - (byte) 'A' + 10; + case (byte)'0': case (byte)'1': case (byte)'2': case (byte)'3': case (byte)'4': case (byte)'5': case (byte)'6': case (byte)'7': case (byte)'8': case (byte)'9': + return b - (byte) '0'; + default: + throw new IllegalArgumentException("Bad Hex escape character: " + ((b)&255) ); + } + } + + /** + * Some simple tests. + * @param args + */ + public static void main(String[] args) { + String s = makeURI("http://foo.bar.com/foo/bar", "baz/guuk/org%2Fnet"); + System.out.println("escapeURI: " + s); + System.out.println("getNamespace: " + getNamespace(s)); + System.out.println("getLocalName: " + getLocalName(s)); + + System.out.println("escapeURI: " + escapeURI("foo/bar/org%2Fnet")); + System.out.println("escapeURI('...#...'): " + escapeURI("foo/bar#org%2Fnet")); + + testEscape("/", "%2F"); + testEscape("#", "%23"); + testEscape("%", "%25"); + testEscape("%01", "%2501"); + testEscape("%GG", "%25GG"); + testEscape("säätö venttiili", "säätö%20venttiili"); + testEscape("säätö", "säätö"); + testEscape("Something / Else", "Something%20%2F%20Else"); + testEscape("http://www.vtt.fi%2FSome- %25 Namespace/Something", "http:%2F%2Fwww.vtt.fi%252FSome-%20%2525%20Namespace%2FSomething"); + testEscape("http://www.vtt.fi/PSK", "http:%2F%2Fwww.vtt.fi%2FPSK"); + testEscape("http://www.vtt.fi%2FSome-Namespace/Something / Else", "http:%2F%2Fwww.vtt.fi%252FSome-Namespace%2FSomething%20%2F%20Else"); + } + + private static void testEscape(String unescaped, String expectedEscaped) { + String esc = escape(unescaped); + String unesc = unescape(esc); + System.out.format("escape('%s') -> '%s', unescape('%s') -> '%s'", unescaped, esc, esc, unesc); + if (!esc.equals(expectedEscaped)) + throw new AssertionError("escape('" + unescaped + "') was expected to return '" + expectedEscaped + "' but returned '" + esc + "'"); + if (!unesc.equals(unescaped)) + throw new AssertionError("unescape(escape('" + unescaped + "'))=unescape(" + esc + ") was expected to return '" + unescaped + "' but returned '" + unesc + "'"); + System.out.println(" OK"); + } + +}