-/*******************************************************************************\r
- * Copyright (c) 2007, 2010 Association for Decentralized Information Management\r
- * in Industry THTH ry.\r
- * All rights reserved. This program and the accompanying materials\r
- * are made available under the terms of the Eclipse Public License v1.0\r
- * which accompanies this distribution, and is available at\r
- * http://www.eclipse.org/legal/epl-v10.html\r
- *\r
- * Contributors:\r
- * VTT Technical Research Centre of Finland - initial API and implementation\r
- *******************************************************************************/\r
-/* The following copyright is attached because marked parts of the following code are\r
- * copied and modified from Jena 2.4.\r
- */\r
-/*\r
- * (c) Copyright 2001, 2002, 2003, 2004, 2005, 2006 Hewlett-Packard Development Company, LP\r
- * All rights reserved.\r
- *\r
- * Redistribution and use in source and binary forms, with or without\r
- * modification, are permitted provided that the following conditions\r
- * are met:\r
- * 1. Redistributions of source code must retain the above copyright\r
- * notice, this list of conditions and the following disclaimer.\r
- * 2. Redistributions in binary form must reproduce the above copyright\r
- * notice, this list of conditions and the following disclaimer in the\r
- * documentation and/or other materials provided with the distribution.\r
- * 3. The name of the author may not be used to endorse or promote products\r
- * derived from this software without specific prior written permission.\r
-\r
- * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR\r
- * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES\r
- * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.\r
- * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,\r
- * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT\r
- * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,\r
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY\r
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT\r
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF\r
- * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.\r
-\r
- * * Id: URIref.java,v 1.5 2006/03/22 13:52:49 andy_seaborne Exp\r
-\r
- AUTHOR: Jeremy J. Carroll\r
- */\r
-\r
-package org.simantics.databoard.util;\r
-\r
-import java.util.Arrays;\r
-import java.util.List;\r
-\r
-\r
-/**\r
- * Contains utility methods for handling URI Strings in the context of ProCore\r
- * and the Simantics platform. This includes URI escaping and unescaping and\r
- * namespace/local name separation and joining.\r
- * \r
- * <p>\r
- * URI's in this context are assumed to be formed as follows:\r
- * \r
- * <pre>\r
- * <namespace part>#<local name part>\r
- * </pre>\r
- * \r
- * <p>\r
- * The implementation of {@link #escape(String)} and {@link #unescape(String)}\r
- * is copied and modified from Jena's com.hp.hpl.jena.util.URIref.\r
- * </p>\r
- * \r
- * @see <a href="http://en.wikipedia.org/wiki/Percent-encoding">Percent-encoding</a>\r
- * \r
- * @author Tuukka Lehtonen\r
- */\r
-public final class URIStringUtils {\r
-\r
- /**\r
- * The character '/' is used as a path separator in URI namespace parts in ProCore.\r
- */\r
- public static final char NAMESPACE_PATH_SEPARATOR = '/';\r
-\r
- /**\r
- * The '#' character is used to separate the local name and namespace parts\r
- * of an URI, for example <code>http://www.example.org#localName</code>.\r
- */\r
- public static final char NAMESPACE_LOCAL_SEPARATOR = '#';\r
-\r
- /**\r
- * Checks that only one separator character ({@link #NAMESPACE_LOCAL_SEPARATOR})\r
- * between namespace and localname exists in the specified URI and returns\r
- * its index.\r
- * \r
- * @param uri the URI to search from\r
- * @return the character index of the separator ranging from 0 to uri.length()-1\r
- * @throws IllegalArgumentException if no {@link #NAMESPACE_LOCAL_SEPARATOR}\r
- * is found in the specified URI\r
- */\r
- private static int assertSingleSeparatorPosition(String uri) {\r
- int sharpIndex = uri.indexOf(NAMESPACE_LOCAL_SEPARATOR);\r
- if (sharpIndex == -1) {\r
- throw new IllegalArgumentException("URI '" + uri + "' does not contain any '" + NAMESPACE_LOCAL_SEPARATOR + "' separator characters");\r
- }\r
- int nextSharpIndex = uri.indexOf(NAMESPACE_LOCAL_SEPARATOR, sharpIndex + 1);\r
- if (nextSharpIndex != -1) {\r
- throw new IllegalArgumentException("URI '" + uri + "' contains multiple '" + NAMESPACE_LOCAL_SEPARATOR + "' separator characters");\r
- }\r
- return sharpIndex;\r
- }\r
-\r
- /**\r
- * Checks that only one separator character (\r
- * {@link #NAMESPACE_LOCAL_SEPARATOR}) between namespace and localname\r
- * exists in the specified URI and returns its index. This version does not\r
- * throw an exception when the separator is not found.\r
- * \r
- * @param uri the URI to search from\r
- * @return the character index of the separator ranging from 0 to\r
- * uri.length()-1 or -1 if no separator was found.\r
- */\r
- private static int singleSeparatorPosition(String uri) {\r
- int sharpIndex = uri.indexOf(NAMESPACE_LOCAL_SEPARATOR);\r
- if (sharpIndex == -1) {\r
- return -1;\r
- }\r
- int nextSharpIndex = uri.indexOf(NAMESPACE_LOCAL_SEPARATOR, sharpIndex + 1);\r
- if (nextSharpIndex != -1) {\r
- return -1;\r
- }\r
- return sharpIndex;\r
- }\r
-\r
- /**\r
- * Splits the specified URI into a namespace and a local name and returns\r
- * the namespace.\r
- * \r
- * <p>\r
- * Assumes that namespaces are always separated by\r
- * {@link #NAMESPACE_LOCAL_SEPARATOR} characters.\r
- * </p>\r
- * \r
- * @param uri the URI to split, must be non-null\r
- * @return the namespace part of the specified URI\r
- * @throws IllegalArgumentException for URIs without a\r
- * {@link #NAMESPACE_LOCAL_SEPARATOR}\r
- * @throws NullPointerException for <code>null</code> URIs\r
- */\r
- public static String getNamespace(String uri) {\r
- if (uri == null)\r
- throw new NullPointerException("null uri");\r
- int separatorIndex = assertSingleSeparatorPosition(uri);\r
- return uri.substring(0, separatorIndex);\r
- }\r
- \r
- public static String getRVIParent(String uri) {\r
- int childSeparator = uri.lastIndexOf(URIStringUtils.NAMESPACE_PATH_SEPARATOR);\r
- int propertySeparator = uri.lastIndexOf(URIStringUtils.NAMESPACE_LOCAL_SEPARATOR);\r
- int separator = Math.max(childSeparator, propertySeparator);\r
- return uri.substring(0, separator);\r
- }\r
- \r
-\r
- /**\r
- * Splits the specified URI into a namespace and a local name and returns\r
- * the local name.\r
- * \r
- * <p>\r
- * Assumes that namespaces are always separated by\r
- * {@link #NAMESPACE_LOCAL_SEPARATOR} characters.\r
- * </p>\r
- * \r
- * @param uri the URI to split, must be non-null\r
- * @return the local name part of the specified URI\r
- * @throws IllegalArgumentException for URIs without a\r
- * {@link #NAMESPACE_LOCAL_SEPARATOR}\r
- * @throws NullPointerException for <code>null</code> URIs\r
- */\r
- public static String getLocalName(String uri) {\r
- if (uri == null)\r
- throw new NullPointerException("null uri");\r
- int separatorIndex = assertSingleSeparatorPosition(uri);\r
- return uri.substring(separatorIndex + 1);\r
- }\r
-\r
- public static String escapeName(String name) {\r
- char[] chars = name.toCharArray();\r
- boolean modified = false;\r
- for(int i=0;i<chars.length;++i)\r
- if(!Character.isJavaIdentifierPart(chars[i])) {\r
- chars[i] = '_';\r
- modified = true;\r
- }\r
- if(modified)\r
- return new String(chars);\r
- else\r
- return name;\r
- }\r
-\r
- final private static String HTTP_PREFIX = "http://";\r
- final private static int HTTP_POSITION = HTTP_PREFIX.length();\r
-\r
- public static String[] splitURI(String uri) {\r
- int nextPathSeparator = uri.lastIndexOf(URIStringUtils.NAMESPACE_PATH_SEPARATOR);\r
- if (nextPathSeparator == -1) return null;\r
- if (nextPathSeparator == HTTP_POSITION - 1) {\r
- if(uri.startsWith(HTTP_PREFIX)) return new String[] { HTTP_PREFIX, uri.substring(HTTP_POSITION, uri.length()) };\r
- else return null;\r
- }\r
- return new String[] {\r
- uri.substring(0, nextPathSeparator),\r
- uri.substring(nextPathSeparator + 1, uri.length())\r
- };\r
- }\r
-\r
- public static List<String> splitURISCL(String uri) {\r
- String[] result = splitURI(uri);\r
- return Arrays.asList(result);\r
- }\r
-\r
- /**\r
- * Splits the specified URI into a namespace and a local name and returns\r
- * them both separately as an array.\r
- * \r
- * @param uri the URI to split, must be non-null\r
- * @return [0] = namespace, [1] = local name or <code>null</code> if the URI\r
- * cannot be split.\r
- * @throws NullPointerException for <code>null</code> URIs\r
- */\r
- public static String[] trySplitNamespaceAndLocalName(String uri) {\r
- if (uri == null)\r
- throw new NullPointerException("null uri");\r
- int separatorIndex = singleSeparatorPosition(uri);\r
- return separatorIndex == -1 ?\r
- null\r
- : new String[] { uri.substring(0, separatorIndex), uri.substring(separatorIndex + 1) };\r
- }\r
-\r
- /**\r
- * Splits the specified URI into a namespace and a local name and returns\r
- * them both separately as an array.\r
- * \r
- * @param uri the URI to split, must be non-null\r
- * @return [0] = namespace, [1] = local name\r
- * @throws IllegalArgumentException for URIs without a\r
- * {@link #NAMESPACE_LOCAL_SEPARATOR}\r
- * @throws NullPointerException for <code>null</code> URIs\r
- */\r
- public static String[] splitNamespaceAndLocalName(String uri) {\r
- if (uri == null)\r
- throw new NullPointerException("null uri");\r
- int separatorIndex = assertSingleSeparatorPosition(uri);\r
- return new String[] { uri.substring(0, separatorIndex), uri.substring(separatorIndex + 1) };\r
- }\r
-\r
- /**\r
- * Converts a unicode string into an RFC 2396 compliant URI, using %NN\r
- * escapes where appropriate, including the\r
- * {@link #NAMESPACE_PATH_SEPARATOR} character.\r
- * \r
- * @param localName the string to escape\r
- * @return the escaped string\r
- * @throws NullPointerException for <code>null</code> URIs\r
- */\r
- public static String escapeURI(String localName) {\r
- if (localName == null)\r
- throw new NullPointerException("null local name");\r
- return encode(localName);\r
- }\r
-\r
- /**\r
- * Add a suffix path to a namespace string, i.e. join the strings to\r
- * together with the {@link #NAMESPACE_PATH_SEPARATOR} character in between.\r
- * \r
- * @param namespace the namespace to append to\r
- * @param suffix the suffix to append\r
- * @return the joined namespace\r
- */\r
- public static String appendURINamespace(String namespace, String suffix) {\r
- return new StringBuilder(namespace.length() + 1 + suffix.length())\r
- .append(namespace)\r
- .append(NAMESPACE_PATH_SEPARATOR)\r
- .append(suffix)\r
- .toString();\r
- }\r
-\r
- /**\r
- * Join a namespace and a localname to form an URI with\r
- * {@link #NAMESPACE_LOCAL_SEPARATOR}.\r
- * \r
- * @param namespace the namespace part to join\r
- * @param localName the localname part to join\r
- * @return the joined URI\r
- */\r
- public static String makeURI(String namespace, String localName) {\r
- String escapedLocalName = escapeURI(localName);\r
- return new StringBuilder(namespace.length() + 1 + escapedLocalName.length())\r
- .append(namespace)\r
- .append(NAMESPACE_LOCAL_SEPARATOR)\r
- .append(escapedLocalName)\r
- .toString();\r
- }\r
-\r
- /**\r
- * Convert a Unicode string, first to UTF-8 and then to an RFC 2396\r
- * compliant URI with optional fragment identifier using %NN escape\r
- * mechanism as appropriate. The '%' character is assumed to already\r
- * indicated an escape byte. The '%' character must be followed by two\r
- * hexadecimal digits.\r
- * \r
- * <p>\r
- * Meant to be used for encoding URI local name parts if it is desired to\r
- * have '/' characters in the local name without creating a new namespace.\r
- * For example these two URI's:<br/>\r
- * \r
- * <code>\r
- * http://foo.bar.com/foo/bar/org%2Fcom<br/>\r
- * http://foo.bar.com/foo/bar/net%2Fcom<br/>\r
- * </code>\r
- * \r
- * have the same namespace <code>http://foo.bar.com/foo/bar/</code> and\r
- * different local names <code>org%2Fcom</code> and <code>net%2Fcom</code>\r
- * or <code>org/com</code> and <code>net/com</code> in unescaped form.\r
- * </p>\r
- * \r
- * @param unicode The uri, in characters specified by RFC 2396 + '#'\r
- * @return The corresponding Unicode String\r
- */\r
- public static String escape(String unicode) {\r
- return encode(unicode);\r
- }\r
-\r
-\r
- /*\r
- * RFC 3986 section 2.2 Reserved Characters (January 2005)\r
- * !*'();:@&=+$,/?#[]\r
- */\r
- private static boolean[] ESCAPED_US_ASCII_CHARS = new boolean[128];\r
-\r
- static {\r
- ESCAPED_US_ASCII_CHARS[' '] = true;\r
- // IMPORTANT NOTE: every time escape is invoked, all input needs to be escaped,\r
- // i.e. escape("%01") should result in "%2501", not "%01".\r
- // escape and unescape form a bijection, where neither\r
- // of them is an idempotent operation. \r
- ESCAPED_US_ASCII_CHARS['%'] = true;\r
- // '#' and '/' are URL segment/fragment delimiters, need to be escaped in names.\r
- ESCAPED_US_ASCII_CHARS['#'] = true;\r
- ESCAPED_US_ASCII_CHARS['/'] = true;\r
- // Escape '&' characters to avoid them being interpreted as SGML entities.\r
- ESCAPED_US_ASCII_CHARS['&'] = true;\r
- }\r
-\r
- private static int needsEscaping(String unicode) {\r
- int len = unicode.length();\r
- int escapeCount = 0;\r
- for (int i = 0; i < len; ++i) {\r
- char ch = unicode.charAt(i);\r
- if (ch < 128 && ESCAPED_US_ASCII_CHARS[ch])\r
- ++escapeCount;\r
- }\r
- return escapeCount;\r
- }\r
-\r
- private static String encode(String unicode) {\r
- int needsEscapes = needsEscaping(unicode);\r
- if (needsEscapes == 0)\r
- return unicode;\r
-\r
- int len = unicode.length();\r
- char result[] = new char[(len - needsEscapes) + needsEscapes * 3];\r
- int in = 0;\r
- int out = 0;\r
- while (in < len) {\r
- char inCh = unicode.charAt(in++);\r
- if (inCh >= 128 || !ESCAPED_US_ASCII_CHARS[inCh]) {\r
- result[out++] = inCh;\r
- } else {\r
- // Only selected 7-bit US-ASCII characters are escaped\r
- int c = inCh & 255;\r
- result[out++] = '%';\r
- result[out++] = (char) hexEncode(c / 16);\r
- result[out++] = (char) hexEncode(c % 16);\r
- }\r
- }\r
- return new String(result, 0, out);\r
- }\r
-\r
- private static boolean needsUnescaping(String unicode) {\r
- return unicode.indexOf('%') > -1;\r
- }\r
-\r
- /**\r
- * Convert a URI, in UTF-16 with escaped characters taken from US-ASCII, to\r
- * the corresponding unescaped Unicode string. On ill-formed input the results are\r
- * undefined.\r
- * \r
- * @param uri the uri, in characters specified by RFC 2396 + '#'.\r
- * @return the corresponding unescaped Unicode String.\r
- * @exception IllegalArgumentException if a % hex sequence is ill-formed.\r
- */\r
- public static String unescape(String uri) {\r
- try {\r
- if (!needsUnescaping(uri))\r
- return uri;\r
-\r
- int len = uri.length();\r
- String unicode = uri;\r
- char result[] = new char[len];\r
- int in = 0;\r
- int out = 0;\r
- while (in < len) {\r
- char inCh = unicode.charAt(in++);\r
- if (inCh == '%') {\r
- char d1 = unicode.charAt(in);\r
- char d2 = unicode.charAt(in+1);\r
- if (d1 > 127 || d2 > 127)\r
- throw new IllegalArgumentException("Invalid hex digit escape sequence in " + uri + " at " + in);\r
- result[out++] = (char) (hexDecode((byte) d1) * 16 | hexDecode((byte) d2));\r
- in += 2;\r
- } else {\r
- result[out++] = inCh;\r
- }\r
- }\r
- return new String(result, 0, out);\r
- } catch (IllegalArgumentException e) {\r
- throw new IllegalArgumentException("Problem while unescaping string: " + uri, e);\r
- } catch (IndexOutOfBoundsException ee) {\r
- throw new IllegalArgumentException("Incomplete hex digit escape sequence in " + uri);\r
- }\r
- }\r
-\r
- /* Copied from Jena 2.4 com.hp.hpl.jena.util.URIref */\r
- private static byte hexEncode(int i) {\r
- if (i < 10)\r
- return (byte) ('0' + i);\r
- else\r
- return (byte)('A' + i - 10);\r
- }\r
-\r
- /* Copied from Jena 2.4 com.hp.hpl.jena.util.URIref */\r
- private static int hexDecode(byte b) {\r
- switch (b) {\r
- case (byte)'a': case (byte)'b': case (byte)'c': case (byte)'d': case (byte)'e': case (byte)'f':\r
- return ((b) & 255) - 'a' + 10;\r
- case (byte)'A': case (byte)'B': case (byte)'C': case (byte)'D': case (byte)'E': case (byte)'F':\r
- return b - (byte) 'A' + 10;\r
- case (byte)'0': case (byte)'1': case (byte)'2': case (byte)'3': case (byte)'4': case (byte)'5': case (byte)'6': case (byte)'7': case (byte)'8': case (byte)'9':\r
- return b - (byte) '0';\r
- default:\r
- throw new IllegalArgumentException("Bad Hex escape character: " + ((b)&255) );\r
- }\r
- }\r
-\r
- /**\r
- * Some simple tests.\r
- * @param args\r
- */\r
- public static void main(String[] args) {\r
- String s = makeURI("http://foo.bar.com/foo/bar", "baz/guuk/org%2Fnet");\r
- System.out.println("escapeURI: " + s);\r
- System.out.println("getNamespace: " + getNamespace(s));\r
- System.out.println("getLocalName: " + getLocalName(s));\r
-\r
- System.out.println("escapeURI: " + escapeURI("foo/bar/org%2Fnet"));\r
- System.out.println("escapeURI('...#...'): " + escapeURI("foo/bar#org%2Fnet"));\r
-\r
- testEscape("/", "%2F");\r
- testEscape("#", "%23");\r
- testEscape("%", "%25");\r
- testEscape("%01", "%2501");\r
- testEscape("%GG", "%25GG");\r
- testEscape("säätö venttiili", "säätö%20venttiili");\r
- testEscape("säätö", "säätö");\r
- testEscape("Something / Else", "Something%20%2F%20Else");\r
- testEscape("http://www.vtt.fi%2FSome- %25 Namespace/Something", "http:%2F%2Fwww.vtt.fi%252FSome-%20%2525%20Namespace%2FSomething");\r
- testEscape("http://www.vtt.fi/PSK", "http:%2F%2Fwww.vtt.fi%2FPSK");\r
- testEscape("http://www.vtt.fi%2FSome-Namespace/Something / Else", "http:%2F%2Fwww.vtt.fi%252FSome-Namespace%2FSomething%20%2F%20Else");\r
- }\r
-\r
- private static void testEscape(String unescaped, String expectedEscaped) {\r
- String esc = escape(unescaped);\r
- String unesc = unescape(esc);\r
- System.out.format("escape('%s') -> '%s', unescape('%s') -> '%s'", unescaped, esc, esc, unesc);\r
- if (!esc.equals(expectedEscaped))\r
- throw new AssertionError("escape('" + unescaped + "') was expected to return '" + expectedEscaped + "' but returned '" + esc + "'");\r
- if (!unesc.equals(unescaped))\r
- throw new AssertionError("unescape(escape('" + unescaped + "'))=unescape(" + esc + ") was expected to return '" + unescaped + "' but returned '" + unesc + "'");\r
- System.out.println(" OK");\r
- }\r
-\r
-}\r
+/*******************************************************************************
+ * Copyright (c) 2007, 2010 Association for Decentralized Information Management
+ * in Industry THTH ry.
+ * All rights reserved. This program and the accompanying materials
+ * are made available under the terms of the Eclipse Public License v1.0
+ * which accompanies this distribution, and is available at
+ * http://www.eclipse.org/legal/epl-v10.html
+ *
+ * Contributors:
+ * VTT Technical Research Centre of Finland - initial API and implementation
+ *******************************************************************************/
+/* The following copyright is attached because marked parts of the following code are
+ * copied and modified from Jena 2.4.
+ */
+/*
+ * (c) Copyright 2001, 2002, 2003, 2004, 2005, 2006 Hewlett-Packard Development Company, LP
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. The name of the author may not be used to endorse or promote products
+ * derived from this software without specific prior written permission.
+
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+ * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+ * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+ * * Id: URIref.java,v 1.5 2006/03/22 13:52:49 andy_seaborne Exp
+
+ AUTHOR: Jeremy J. Carroll
+ */
+
+package org.simantics.databoard.util;
+
+import java.util.Arrays;
+import java.util.List;
+
+
+/**
+ * Contains utility methods for handling URI Strings in the context of ProCore
+ * and the Simantics platform. This includes URI escaping and unescaping and
+ * namespace/local name separation and joining.
+ *
+ * <p>
+ * URI's in this context are assumed to be formed as follows:
+ *
+ * <pre>
+ * <namespace part>#<local name part>
+ * </pre>
+ *
+ * <p>
+ * The implementation of {@link #escape(String)} and {@link #unescape(String)}
+ * is copied and modified from Jena's com.hp.hpl.jena.util.URIref.
+ * </p>
+ *
+ * @see <a href="http://en.wikipedia.org/wiki/Percent-encoding">Percent-encoding</a>
+ *
+ * @author Tuukka Lehtonen
+ */
+public final class URIStringUtils {
+
+ /**
+ * The character '/' is used as a path separator in URI namespace parts in ProCore.
+ */
+ public static final char NAMESPACE_PATH_SEPARATOR = '/';
+
+ /**
+ * The '#' character is used to separate the local name and namespace parts
+ * of an URI, for example <code>http://www.example.org#localName</code>.
+ */
+ public static final char NAMESPACE_LOCAL_SEPARATOR = '#';
+
+ /**
+ * Checks that only one separator character ({@link #NAMESPACE_LOCAL_SEPARATOR})
+ * between namespace and localname exists in the specified URI and returns
+ * its index.
+ *
+ * @param uri the URI to search from
+ * @return the character index of the separator ranging from 0 to uri.length()-1
+ * @throws IllegalArgumentException if no {@link #NAMESPACE_LOCAL_SEPARATOR}
+ * is found in the specified URI
+ */
+ private static int assertSingleSeparatorPosition(String uri) {
+ int sharpIndex = uri.indexOf(NAMESPACE_LOCAL_SEPARATOR);
+ if (sharpIndex == -1) {
+ throw new IllegalArgumentException("URI '" + uri + "' does not contain any '" + NAMESPACE_LOCAL_SEPARATOR + "' separator characters");
+ }
+ int nextSharpIndex = uri.indexOf(NAMESPACE_LOCAL_SEPARATOR, sharpIndex + 1);
+ if (nextSharpIndex != -1) {
+ throw new IllegalArgumentException("URI '" + uri + "' contains multiple '" + NAMESPACE_LOCAL_SEPARATOR + "' separator characters");
+ }
+ return sharpIndex;
+ }
+
+ /**
+ * Checks that only one separator character (
+ * {@link #NAMESPACE_LOCAL_SEPARATOR}) between namespace and localname
+ * exists in the specified URI and returns its index. This version does not
+ * throw an exception when the separator is not found.
+ *
+ * @param uri the URI to search from
+ * @return the character index of the separator ranging from 0 to
+ * uri.length()-1 or -1 if no separator was found.
+ */
+ private static int singleSeparatorPosition(String uri) {
+ int sharpIndex = uri.indexOf(NAMESPACE_LOCAL_SEPARATOR);
+ if (sharpIndex == -1) {
+ return -1;
+ }
+ int nextSharpIndex = uri.indexOf(NAMESPACE_LOCAL_SEPARATOR, sharpIndex + 1);
+ if (nextSharpIndex != -1) {
+ return -1;
+ }
+ return sharpIndex;
+ }
+
+ /**
+ * Splits the specified URI into a namespace and a local name and returns
+ * the namespace.
+ *
+ * <p>
+ * Assumes that namespaces are always separated by
+ * {@link #NAMESPACE_LOCAL_SEPARATOR} characters.
+ * </p>
+ *
+ * @param uri the URI to split, must be non-null
+ * @return the namespace part of the specified URI
+ * @throws IllegalArgumentException for URIs without a
+ * {@link #NAMESPACE_LOCAL_SEPARATOR}
+ * @throws NullPointerException for <code>null</code> URIs
+ */
+ public static String getNamespace(String uri) {
+ if (uri == null)
+ throw new NullPointerException("null uri");
+ int separatorIndex = assertSingleSeparatorPosition(uri);
+ return uri.substring(0, separatorIndex);
+ }
+
+ public static String getRVIParent(String uri) {
+ int childSeparator = uri.lastIndexOf(URIStringUtils.NAMESPACE_PATH_SEPARATOR);
+ int propertySeparator = uri.lastIndexOf(URIStringUtils.NAMESPACE_LOCAL_SEPARATOR);
+ int separator = Math.max(childSeparator, propertySeparator);
+ return uri.substring(0, separator);
+ }
+
+
+ /**
+ * Splits the specified URI into a namespace and a local name and returns
+ * the local name.
+ *
+ * <p>
+ * Assumes that namespaces are always separated by
+ * {@link #NAMESPACE_LOCAL_SEPARATOR} characters.
+ * </p>
+ *
+ * @param uri the URI to split, must be non-null
+ * @return the local name part of the specified URI
+ * @throws IllegalArgumentException for URIs without a
+ * {@link #NAMESPACE_LOCAL_SEPARATOR}
+ * @throws NullPointerException for <code>null</code> URIs
+ */
+ public static String getLocalName(String uri) {
+ if (uri == null)
+ throw new NullPointerException("null uri");
+ int separatorIndex = assertSingleSeparatorPosition(uri);
+ return uri.substring(separatorIndex + 1);
+ }
+
+ public static String escapeName(String name) {
+ char[] chars = name.toCharArray();
+ boolean modified = false;
+ for(int i=0;i<chars.length;++i)
+ if(!Character.isJavaIdentifierPart(chars[i])) {
+ chars[i] = '_';
+ modified = true;
+ }
+ if(modified)
+ return new String(chars);
+ else
+ return name;
+ }
+
+ final private static String HTTP_PREFIX = "http://";
+ final private static int HTTP_POSITION = HTTP_PREFIX.length();
+
+ public static String[] splitURI(String uri) {
+ int nextPathSeparator = uri.lastIndexOf(URIStringUtils.NAMESPACE_PATH_SEPARATOR);
+ if (nextPathSeparator == -1) return null;
+ if (nextPathSeparator == HTTP_POSITION - 1) {
+ if(uri.startsWith(HTTP_PREFIX)) return new String[] { HTTP_PREFIX, uri.substring(HTTP_POSITION, uri.length()) };
+ else return null;
+ }
+ return new String[] {
+ uri.substring(0, nextPathSeparator),
+ uri.substring(nextPathSeparator + 1, uri.length())
+ };
+ }
+
+ public static List<String> splitURISCL(String uri) {
+ String[] result = splitURI(uri);
+ return Arrays.asList(result);
+ }
+
+ /**
+ * Splits the specified URI into a namespace and a local name and returns
+ * them both separately as an array.
+ *
+ * @param uri the URI to split, must be non-null
+ * @return [0] = namespace, [1] = local name or <code>null</code> if the URI
+ * cannot be split.
+ * @throws NullPointerException for <code>null</code> URIs
+ */
+ public static String[] trySplitNamespaceAndLocalName(String uri) {
+ if (uri == null)
+ throw new NullPointerException("null uri");
+ int separatorIndex = singleSeparatorPosition(uri);
+ return separatorIndex == -1 ?
+ null
+ : new String[] { uri.substring(0, separatorIndex), uri.substring(separatorIndex + 1) };
+ }
+
+ /**
+ * Splits the specified URI into a namespace and a local name and returns
+ * them both separately as an array.
+ *
+ * @param uri the URI to split, must be non-null
+ * @return [0] = namespace, [1] = local name
+ * @throws IllegalArgumentException for URIs without a
+ * {@link #NAMESPACE_LOCAL_SEPARATOR}
+ * @throws NullPointerException for <code>null</code> URIs
+ */
+ public static String[] splitNamespaceAndLocalName(String uri) {
+ if (uri == null)
+ throw new NullPointerException("null uri");
+ int separatorIndex = assertSingleSeparatorPosition(uri);
+ return new String[] { uri.substring(0, separatorIndex), uri.substring(separatorIndex + 1) };
+ }
+
+ /**
+ * Converts a unicode string into an RFC 2396 compliant URI, using %NN
+ * escapes where appropriate, including the
+ * {@link #NAMESPACE_PATH_SEPARATOR} character.
+ *
+ * @param localName the string to escape
+ * @return the escaped string
+ * @throws NullPointerException for <code>null</code> URIs
+ */
+ public static String escapeURI(String localName) {
+ if (localName == null)
+ throw new NullPointerException("null local name");
+ return encode(localName);
+ }
+
+ /**
+ * Add a suffix path to a namespace string, i.e. join the strings to
+ * together with the {@link #NAMESPACE_PATH_SEPARATOR} character in between.
+ *
+ * @param namespace the namespace to append to
+ * @param suffix the suffix to append
+ * @return the joined namespace
+ */
+ public static String appendURINamespace(String namespace, String suffix) {
+ return new StringBuilder(namespace.length() + 1 + suffix.length())
+ .append(namespace)
+ .append(NAMESPACE_PATH_SEPARATOR)
+ .append(suffix)
+ .toString();
+ }
+
+ /**
+ * Join a namespace and a localname to form an URI with
+ * {@link #NAMESPACE_LOCAL_SEPARATOR}.
+ *
+ * @param namespace the namespace part to join
+ * @param localName the localname part to join
+ * @return the joined URI
+ */
+ public static String makeURI(String namespace, String localName) {
+ String escapedLocalName = escapeURI(localName);
+ return new StringBuilder(namespace.length() + 1 + escapedLocalName.length())
+ .append(namespace)
+ .append(NAMESPACE_LOCAL_SEPARATOR)
+ .append(escapedLocalName)
+ .toString();
+ }
+
+ /**
+ * Convert a Unicode string, first to UTF-8 and then to an RFC 2396
+ * compliant URI with optional fragment identifier using %NN escape
+ * mechanism as appropriate. The '%' character is assumed to already
+ * indicated an escape byte. The '%' character must be followed by two
+ * hexadecimal digits.
+ *
+ * <p>
+ * Meant to be used for encoding URI local name parts if it is desired to
+ * have '/' characters in the local name without creating a new namespace.
+ * For example these two URI's:<br/>
+ *
+ * <code>
+ * http://foo.bar.com/foo/bar/org%2Fcom<br/>
+ * http://foo.bar.com/foo/bar/net%2Fcom<br/>
+ * </code>
+ *
+ * have the same namespace <code>http://foo.bar.com/foo/bar/</code> and
+ * different local names <code>org%2Fcom</code> and <code>net%2Fcom</code>
+ * or <code>org/com</code> and <code>net/com</code> in unescaped form.
+ * </p>
+ *
+ * @param unicode The uri, in characters specified by RFC 2396 + '#'
+ * @return The corresponding Unicode String
+ */
+ public static String escape(String unicode) {
+ return encode(unicode);
+ }
+
+
+ /*
+ * RFC 3986 section 2.2 Reserved Characters (January 2005)
+ * !*'();:@&=+$,/?#[]
+ */
+ private static boolean[] ESCAPED_US_ASCII_CHARS = new boolean[128];
+
+ static {
+ ESCAPED_US_ASCII_CHARS[' '] = true;
+ // IMPORTANT NOTE: every time escape is invoked, all input needs to be escaped,
+ // i.e. escape("%01") should result in "%2501", not "%01".
+ // escape and unescape form a bijection, where neither
+ // of them is an idempotent operation.
+ ESCAPED_US_ASCII_CHARS['%'] = true;
+ // '#' and '/' are URL segment/fragment delimiters, need to be escaped in names.
+ ESCAPED_US_ASCII_CHARS['#'] = true;
+ ESCAPED_US_ASCII_CHARS['/'] = true;
+ // Escape '&' characters to avoid them being interpreted as SGML entities.
+ ESCAPED_US_ASCII_CHARS['&'] = true;
+ }
+
+ private static int needsEscaping(String unicode) {
+ int len = unicode.length();
+ int escapeCount = 0;
+ for (int i = 0; i < len; ++i) {
+ char ch = unicode.charAt(i);
+ if (ch < 128 && ESCAPED_US_ASCII_CHARS[ch])
+ ++escapeCount;
+ }
+ return escapeCount;
+ }
+
+ private static String encode(String unicode) {
+ int needsEscapes = needsEscaping(unicode);
+ if (needsEscapes == 0)
+ return unicode;
+
+ int len = unicode.length();
+ char result[] = new char[(len - needsEscapes) + needsEscapes * 3];
+ int in = 0;
+ int out = 0;
+ while (in < len) {
+ char inCh = unicode.charAt(in++);
+ if (inCh >= 128 || !ESCAPED_US_ASCII_CHARS[inCh]) {
+ result[out++] = inCh;
+ } else {
+ // Only selected 7-bit US-ASCII characters are escaped
+ int c = inCh & 255;
+ result[out++] = '%';
+ result[out++] = (char) hexEncode(c / 16);
+ result[out++] = (char) hexEncode(c % 16);
+ }
+ }
+ return new String(result, 0, out);
+ }
+
+ private static boolean needsUnescaping(String unicode) {
+ return unicode.indexOf('%') > -1;
+ }
+
+ /**
+ * Convert a URI, in UTF-16 with escaped characters taken from US-ASCII, to
+ * the corresponding unescaped Unicode string. On ill-formed input the results are
+ * undefined.
+ *
+ * @param uri the uri, in characters specified by RFC 2396 + '#'.
+ * @return the corresponding unescaped Unicode String.
+ * @exception IllegalArgumentException if a % hex sequence is ill-formed.
+ */
+ public static String unescape(String uri) {
+ try {
+ if (!needsUnescaping(uri))
+ return uri;
+
+ int len = uri.length();
+ String unicode = uri;
+ char result[] = new char[len];
+ int in = 0;
+ int out = 0;
+ while (in < len) {
+ char inCh = unicode.charAt(in++);
+ if (inCh == '%') {
+ char d1 = unicode.charAt(in);
+ char d2 = unicode.charAt(in+1);
+ if (d1 > 127 || d2 > 127)
+ throw new IllegalArgumentException("Invalid hex digit escape sequence in " + uri + " at " + in);
+ result[out++] = (char) (hexDecode((byte) d1) * 16 | hexDecode((byte) d2));
+ in += 2;
+ } else {
+ result[out++] = inCh;
+ }
+ }
+ return new String(result, 0, out);
+ } catch (IllegalArgumentException e) {
+ throw new IllegalArgumentException("Problem while unescaping string: " + uri, e);
+ } catch (IndexOutOfBoundsException ee) {
+ throw new IllegalArgumentException("Incomplete hex digit escape sequence in " + uri);
+ }
+ }
+
+ /* Copied from Jena 2.4 com.hp.hpl.jena.util.URIref */
+ private static byte hexEncode(int i) {
+ if (i < 10)
+ return (byte) ('0' + i);
+ else
+ return (byte)('A' + i - 10);
+ }
+
+ /* Copied from Jena 2.4 com.hp.hpl.jena.util.URIref */
+ private static int hexDecode(byte b) {
+ switch (b) {
+ case (byte)'a': case (byte)'b': case (byte)'c': case (byte)'d': case (byte)'e': case (byte)'f':
+ return ((b) & 255) - 'a' + 10;
+ case (byte)'A': case (byte)'B': case (byte)'C': case (byte)'D': case (byte)'E': case (byte)'F':
+ return b - (byte) 'A' + 10;
+ case (byte)'0': case (byte)'1': case (byte)'2': case (byte)'3': case (byte)'4': case (byte)'5': case (byte)'6': case (byte)'7': case (byte)'8': case (byte)'9':
+ return b - (byte) '0';
+ default:
+ throw new IllegalArgumentException("Bad Hex escape character: " + ((b)&255) );
+ }
+ }
+
+ /**
+ * Some simple tests.
+ * @param args
+ */
+ public static void main(String[] args) {
+ String s = makeURI("http://foo.bar.com/foo/bar", "baz/guuk/org%2Fnet");
+ System.out.println("escapeURI: " + s);
+ System.out.println("getNamespace: " + getNamespace(s));
+ System.out.println("getLocalName: " + getLocalName(s));
+
+ System.out.println("escapeURI: " + escapeURI("foo/bar/org%2Fnet"));
+ System.out.println("escapeURI('...#...'): " + escapeURI("foo/bar#org%2Fnet"));
+
+ testEscape("/", "%2F");
+ testEscape("#", "%23");
+ testEscape("%", "%25");
+ testEscape("%01", "%2501");
+ testEscape("%GG", "%25GG");
+ testEscape("säätö venttiili", "säätö%20venttiili");
+ testEscape("säätö", "säätö");
+ testEscape("Something / Else", "Something%20%2F%20Else");
+ testEscape("http://www.vtt.fi%2FSome- %25 Namespace/Something", "http:%2F%2Fwww.vtt.fi%252FSome-%20%2525%20Namespace%2FSomething");
+ testEscape("http://www.vtt.fi/PSK", "http:%2F%2Fwww.vtt.fi%2FPSK");
+ testEscape("http://www.vtt.fi%2FSome-Namespace/Something / Else", "http:%2F%2Fwww.vtt.fi%252FSome-Namespace%2FSomething%20%2F%20Else");
+ }
+
+ private static void testEscape(String unescaped, String expectedEscaped) {
+ String esc = escape(unescaped);
+ String unesc = unescape(esc);
+ System.out.format("escape('%s') -> '%s', unescape('%s') -> '%s'", unescaped, esc, esc, unesc);
+ if (!esc.equals(expectedEscaped))
+ throw new AssertionError("escape('" + unescaped + "') was expected to return '" + expectedEscaped + "' but returned '" + esc + "'");
+ if (!unesc.equals(unescaped))
+ throw new AssertionError("unescape(escape('" + unescaped + "'))=unescape(" + esc + ") was expected to return '" + unescaped + "' but returned '" + unesc + "'");
+ System.out.println(" OK");
+ }
+
+}