/******************************************************************************* * Copyright (c) 2007, 2010 Association for Decentralized Information Management * in Industry THTH ry. * All rights reserved. This program and the accompanying materials * are made available under the terms of the Eclipse Public License v1.0 * which accompanies this distribution, and is available at * http://www.eclipse.org/legal/epl-v10.html * * Contributors: * VTT Technical Research Centre of Finland - initial API and implementation *******************************************************************************/ /* The following copyright is attached because marked parts of the following code are * copied and modified from Jena 2.4. */ /* * (c) Copyright 2001, 2002, 2003, 2004, 2005, 2006 Hewlett-Packard Development Company, LP * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. The name of the author may not be used to endorse or promote products * derived from this software without specific prior written permission. * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * * Id: URIref.java,v 1.5 2006/03/22 13:52:49 andy_seaborne Exp AUTHOR: Jeremy J. Carroll */ package org.simantics.databoard.util; import java.nio.charset.Charset; import java.util.ArrayList; import java.util.List; /** * Contains utility methods for handling URI Strings in the context of ProCore * and the Simantics platform. This includes URI escaping and unescaping and * namespace/local name separation and joining. * *

* URI's in this context are assumed to be formed as follows: * *

 * <namespace part>#<local name part>
 * 
* *

* The implementation of {@link #escape(String)} and {@link #unescape(String)} * is copied and modified from Jena's com.hp.hpl.jena.util.URIref. *

* * @see Percent-encoding * * @author Tuukka Lehtonen */ public final class URIStringUtils { /** * The character '/' is used as a path separator in URI namespace parts in ProCore. */ public static final char NAMESPACE_PATH_SEPARATOR = '/'; /** * The '#' character is used to separate the local name and namespace parts * of an URI, for example http://www.example.org#localName. */ public static final char NAMESPACE_LOCAL_SEPARATOR = '#'; /** * Checks that only one separator character ({@link #NAMESPACE_LOCAL_SEPARATOR}) * between namespace and localname exists in the specified URI and returns * its index. * * @param uri the URI to search from * @return the character index of the separator ranging from 0 to uri.length()-1 * @throws IllegalArgumentException if no {@link #NAMESPACE_LOCAL_SEPARATOR} * is found in the specified URI */ private static int assertSingleSeparatorPosition(String uri) { int sharpIndex = uri.indexOf(NAMESPACE_LOCAL_SEPARATOR); if (sharpIndex == -1) { throw new IllegalArgumentException("URI '" + uri + "' does not contain any '" + NAMESPACE_LOCAL_SEPARATOR + "' separator characters"); } int nextSharpIndex = uri.indexOf(NAMESPACE_LOCAL_SEPARATOR, sharpIndex + 1); if (nextSharpIndex != -1) { throw new IllegalArgumentException("URI '" + uri + "' contains multiple '" + NAMESPACE_LOCAL_SEPARATOR + "' separator characters"); } return sharpIndex; } /** * Checks that only one separator character ( * {@link #NAMESPACE_LOCAL_SEPARATOR}) between namespace and localname * exists in the specified URI and returns its index. This version does not * throw an exception when the separator is not found. * * @param uri the URI to search from * @return the character index of the separator ranging from 0 to * uri.length()-1 or -1 if no separator was found. */ private static int singleSeparatorPosition(String uri) { int sharpIndex = uri.indexOf(NAMESPACE_LOCAL_SEPARATOR); if (sharpIndex == -1) { return -1; } int nextSharpIndex = uri.indexOf(NAMESPACE_LOCAL_SEPARATOR, sharpIndex + 1); if (nextSharpIndex != -1) { return -1; } return sharpIndex; } /** * Splits the specified URI into a namespace and a local name and returns * the namespace. * *

* Assumes that namespaces are always separated by * {@link #NAMESPACE_LOCAL_SEPARATOR} characters. *

* * @param uri the URI to split, must be non-null * @return the namespace part of the specified URI * @throws IllegalArgumentException for URIs without a * {@link #NAMESPACE_LOCAL_SEPARATOR} * @throws NullPointerException for null URIs */ public static String getNamespace(String uri) { if (uri == null) throw new NullPointerException("null uri"); int separatorIndex = assertSingleSeparatorPosition(uri); return uri.substring(0, separatorIndex); } public static String getRVIParent(String uri) { int childSeparator = uri.lastIndexOf(URIStringUtils.NAMESPACE_PATH_SEPARATOR); int propertySeparator = uri.lastIndexOf(URIStringUtils.NAMESPACE_LOCAL_SEPARATOR); int separator = Math.max(childSeparator, propertySeparator); return uri.substring(0, separator); } /** * Splits the specified URI into a namespace and a local name and returns * the local name. * *

* Assumes that namespaces are always separated by * {@link #NAMESPACE_LOCAL_SEPARATOR} characters. *

* * @param uri the URI to split, must be non-null * @return the local name part of the specified URI * @throws IllegalArgumentException for URIs without a * {@link #NAMESPACE_LOCAL_SEPARATOR} * @throws NullPointerException for null URIs */ public static String getLocalName(String uri) { if (uri == null) throw new NullPointerException("null uri"); int separatorIndex = assertSingleSeparatorPosition(uri); return uri.substring(separatorIndex + 1); } public static String escapeName(String name) { char[] chars = name.toCharArray(); boolean modified = false; for(int i=0;i splitURISCL(String uri) { String[] result = splitURI(uri); ArrayList list = new ArrayList(result.length); for(String s : result) list.add(s); return list; } /** * Splits the specified URI into a namespace and a local name and returns * them both separately as an array. * * @param uri the URI to split, must be non-null * @return [0] = namespace, [1] = local name or null if the URI * cannot be split. * @throws NullPointerException for null URIs */ public static String[] trySplitNamespaceAndLocalName(String uri) { if (uri == null) throw new NullPointerException("null uri"); int separatorIndex = singleSeparatorPosition(uri); return separatorIndex == -1 ? null : new String[] { uri.substring(0, separatorIndex), uri.substring(separatorIndex + 1) }; } /** * Splits the specified URI into a namespace and a local name and returns * them both separately as an array. * * @param uri the URI to split, must be non-null * @return [0] = namespace, [1] = local name * @throws IllegalArgumentException for URIs without a * {@link #NAMESPACE_LOCAL_SEPARATOR} * @throws NullPointerException for null URIs */ public static String[] splitNamespaceAndLocalName(String uri) { if (uri == null) throw new NullPointerException("null uri"); int separatorIndex = assertSingleSeparatorPosition(uri); return new String[] { uri.substring(0, separatorIndex), uri.substring(separatorIndex + 1) }; } /** * Converts a unicode string into an RFC 2396 compliant URI, using %NN * escapes where appropriate, including the * {@link #NAMESPACE_PATH_SEPARATOR} character. * * @param localName the string to escape * @return the escaped string * @throws NullPointerException for null URIs */ public static String escapeURI(String localName) { if (localName == null) throw new NullPointerException("null local name"); String result = encode(localName); return result; } /** * Add a suffix path to a namespace string, i.e. join the strings to * together with the {@link #NAMESPACE_PATH_SEPARATOR} character in between. * * @param namespace the namespace to append to * @param suffix the suffix to append * @return the joined namespace */ public static String appendURINamespace(String namespace, String suffix) { //return namespace + NAMESPACE_PATH_SEPARATOR + suffix; return new StringBuffer(namespace.length() + 1 + suffix.length()) .append(namespace) .append(NAMESPACE_PATH_SEPARATOR) .append(suffix) .toString(); } /** * Join a namespace and a localname to form an URI with * {@link #NAMESPACE_LOCAL_SEPARATOR}. * * @param namespace the namespace part to join * @param localName the localname part to join * @return the joined URI */ public static String makeURI(String namespace, String localName) { //return namespace + NAMESPACE_LOCAL_SEPARATOR + escapeURI(localName); String escapedLocalName = escapeURI(localName); return new StringBuffer(namespace.length() + 1 + escapedLocalName.length()) .append(namespace) .append(NAMESPACE_LOCAL_SEPARATOR) .append(escapedLocalName) .toString(); } /** * Convert a Unicode string, first to UTF-8 and then to an RFC 2396 * compliant URI with optional fragment identifier using %NN escape * mechanism as appropriate. The '%' character is assumed to already * indicated an escape byte. The '%' character must be followed by two * hexadecimal digits. * *

* Meant to be used for encoding URI local name parts if it is desired to * have '/' characters in the local name without creating a new namespace. * For example these two URI's:
* * * http://foo.bar.com/foo/bar/org%2Fcom
* http://foo.bar.com/foo/bar/net%2Fcom
*
* * have the same namespace http://foo.bar.com/foo/bar/ and * different local names org%2Fcom and net%2Fcom * or org/com and net/com in unescaped form. *

* * @param unicode The uri, in characters specified by RFC 2396 + '#' * @return The corresponding Unicode String */ public static String escape(String unicode) { return encode(unicode); } final private static Charset UTF8 = Charset.forName("UTF-8"); final private static Charset ASCII = Charset.forName("US-ASCII"); /* Copied and modified from Jena 2.4 com.hp.hpl.jena.util.URIref */ private static String encode(String unicode) { boolean needsEscapes = needsEscaping(unicode); if (!needsEscapes) return unicode; byte utf8[] = unicode.getBytes(UTF8); byte rsltAscii[] = new byte[utf8.length * 6]; int in = 0; int out = 0; while (in < utf8.length) { switch (utf8[in]) { case (byte)'a': case (byte)'b': case (byte)'c': case (byte)'d': case (byte)'e': case (byte)'f': case (byte)'g': case (byte)'h': case (byte)'i': case (byte)'j': case (byte)'k': case (byte)'l': case (byte)'m': case (byte)'n': case (byte)'o': case (byte)'p': case (byte)'q': case (byte)'r': case (byte)'s': case (byte)'t': case (byte)'u': case (byte)'v': case (byte)'w': case (byte)'x': case (byte)'y': case (byte)'z': case (byte)'A': case (byte)'B': case (byte)'C': case (byte)'D': case (byte)'E': case (byte)'F': case (byte)'G': case (byte)'H': case (byte)'I': case (byte)'J': case (byte)'K': case (byte)'L': case (byte)'M': case (byte)'N': case (byte)'O': case (byte)'P': case (byte)'Q': case (byte)'R': case (byte)'S': case (byte)'T': case (byte)'U': case (byte)'V': case (byte)'W': case (byte)'X': case (byte)'Y': case (byte)'Z': case (byte)'0': case (byte)'1': case (byte)'2': case (byte)'3': case (byte)'4': case (byte)'5': case (byte)'6': case (byte)'7': case (byte)'8': case (byte)'9': case (byte)';': case (byte)'?': case (byte)':': case (byte)'@': case (byte)'=': case (byte)'+': case (byte)'$': case (byte)',': case (byte)'-': case (byte)'_': case (byte)'.': case (byte)'!': case (byte)'~': case (byte)'*': case (byte)'\'': case (byte)'(': case (byte)')': case (byte)'[': case (byte)']': rsltAscii[out] = utf8[in]; out++; in++; break; case (byte)' ': rsltAscii[out++] = (byte) '%'; rsltAscii[out++] = '2'; rsltAscii[out++] = '0'; in++; break; case (byte) '%': // [lehtonen] NOTE: all input needs to be escaped, i.e. "%01" should result in "%2501", not "%01". // escape+unescape is a bijection, not an idempotent operation. // Fall through to to escape '%' as '%25' case (byte) '#': case (byte) '/': // Fall through to escape '/' case (byte)'&': // Fall through to escape '&' characters to avoid them // being interpreted as SGML entities. default: rsltAscii[out++] = (byte) '%'; // Get rid of sign ... int c = (utf8[in]) & 255; rsltAscii[out++] = hexEncode(c / 16); rsltAscii[out++] = hexEncode(c % 16); in++; break; } } return new String(rsltAscii, 0, out, ASCII); } /* * RFC 3986 section 2.2 Reserved Characters (January 2005) * !*'();:@&=+$,/?#[] */ private static boolean needsEscaping(String unicode) { int len = unicode.length(); for (int i = 0; i < len; ++i) { switch (unicode.charAt(i)) { case (byte)'!': case (byte)'*': case (byte)'\'': case (byte)'(': case (byte)')': case (byte)';': case (byte)':': case (byte)'@': case (byte)'=': case (byte)'+': case (byte)'$': case (byte)',': case (byte)'?': case (byte)'~': case (byte)'[': case (byte)']': break; case (byte)' ': case (byte) '#': case (byte) '%': case (byte) '/': case (byte)'&': return true; } } return false; } private static boolean needsUnescaping(String unicode) { return unicode.indexOf('%') > -1; } /** * Convert a URI, in US-ASCII, with escaped characters taken from UTF-8, to * the corresponding Unicode string. On ill-formed input the results are * undefined, specifically if the unescaped version is not a UTF-8 String, * some String will be returned. * * @param uri the uri, in characters specified by RFC 2396 + '#'. * @return the corresponding Unicode String. * @exception IllegalArgumentException if a % hex sequence is ill-formed. */ public static String unescape(String uri) { try { if (!needsUnescaping(uri)) return uri; byte ascii[] = uri.getBytes("US-ASCII"); byte utf8[] = new byte[ascii.length]; int in = 0; int out = 0; while ( in < ascii.length ) { if (ascii[in] == (byte) '%') { in++; utf8[out++] = (byte) (hexDecode(ascii[in]) * 16 | hexDecode(ascii[in + 1])); in += 2; } else { utf8[out++] = ascii[in++]; } } return new String(utf8, 0, out, "UTF-8"); } catch (IllegalArgumentException e) { throw new IllegalArgumentException("Problem while unescaping string: " + uri, e); } catch (java.io.UnsupportedEncodingException e) { throw new Error("The JVM is required to support UTF-8 and US-ASCII encodings."); } catch (ArrayIndexOutOfBoundsException ee) { throw new IllegalArgumentException("Incomplete Hex escape sequence in " + uri); } } /* Copied from Jena 2.4 com.hp.hpl.jena.util.URIref */ private static byte hexEncode(int i) { if (i < 10) return (byte) ('0' + i); else return (byte)('A' + i - 10); } /* Copied from Jena 2.4 com.hp.hpl.jena.util.URIref */ private static int hexDecode(byte b) { switch (b) { case (byte)'a': case (byte)'b': case (byte)'c': case (byte)'d': case (byte)'e': case (byte)'f': return ((b) & 255) - 'a' + 10; case (byte)'A': case (byte)'B': case (byte)'C': case (byte)'D': case (byte)'E': case (byte)'F': return b - (byte) 'A' + 10; case (byte)'0': case (byte)'1': case (byte)'2': case (byte)'3': case (byte)'4': case (byte)'5': case (byte)'6': case (byte)'7': case (byte)'8': case (byte)'9': return b - (byte) '0'; default: throw new IllegalArgumentException("Bad Hex escape character: " + ((b)&255) ); } } /** * Some simple tests. * @param args */ public static void main(String[] args) { String s; s = "http://www.vtt.fi%2FSome- %25 Namespace/Jotain"; System.out.println(String.format("escape+unescape: %s -> %s -> %s", s, escape(s), unescape(escape(s)))); s = "http://www.vtt.fi%2FPSK"; System.out.println(String.format("unescape: %s -> %s", s, unescape(s))); s = "http://www.vtt.fi%2FSome-Namespace/Jotain / Muuta"; System.out.println(String.format("escape: %s -> %s", s, escape(s))); s = "Jotain / Muuta"; System.out.println(String.format("escape: %s -> %s", s, escape(s))); System.out.println("escapeURI: " + escapeURI("foo/bar/org%2Fnet")); System.out.println("escapeURI('...#...'): " + escapeURI("foo/bar#org%2Fnet")); s = makeURI("http://foo.bar.com/foo/bar", "baz/guuk/org%2Fnet"); System.out.println("escapeURI: " + s); System.out.println("getNamespace: " + getNamespace(s)); System.out.println("getLocalName: " + getLocalName(s)); testEscape("/", "%2F"); testEscape("#", "%23"); testEscape("%", "%25"); testEscape("%01", "%2501"); testEscape("%GG", "%25GG"); } private static void testEscape(String unescaped, String expectedEscaped) { String esc = escape(unescaped); String unesc = unescape(esc); System.out.format("escape('%s')='%s', unescape('%s')='%s'\n", unescaped, esc, esc, unesc); if (!esc.equals(expectedEscaped)) throw new AssertionError("escape('" + unescaped + "') was expected to return '" + expectedEscaped + "' but returned '" + esc + "'"); if (!unesc.equals(unescaped)) throw new AssertionError("unescape(escape('" + unescaped + "'))=unescape(" + esc + ") was expected to return '" + unescaped + "' but returned '" + unesc + "'"); } }