X-Git-Url: https://gerrit.simantics.org/r/gitweb?p=simantics%2Fplatform.git;a=blobdiff_plain;f=bundles%2Forg.simantics.databoard%2Fsrc%2Forg%2Fsimantics%2Fdataboard%2Futil%2FURIStringUtils.java;fp=bundles%2Forg.simantics.databoard%2Fsrc%2Forg%2Fsimantics%2Fdataboard%2Futil%2FURIStringUtils.java;h=dde498a2c38bdcadbe3fb025cce664e2e0575c74;hp=0000000000000000000000000000000000000000;hb=969bd23cab98a79ca9101af33334000879fb60c5;hpb=866dba5cd5a3929bbeae85991796acb212338a08 diff --git a/bundles/org.simantics.databoard/src/org/simantics/databoard/util/URIStringUtils.java b/bundles/org.simantics.databoard/src/org/simantics/databoard/util/URIStringUtils.java new file mode 100644 index 000000000..dde498a2c --- /dev/null +++ b/bundles/org.simantics.databoard/src/org/simantics/databoard/util/URIStringUtils.java @@ -0,0 +1,528 @@ +/******************************************************************************* + * Copyright (c) 2007, 2010 Association for Decentralized Information Management + * in Industry THTH ry. + * All rights reserved. This program and the accompanying materials + * are made available under the terms of the Eclipse Public License v1.0 + * which accompanies this distribution, and is available at + * http://www.eclipse.org/legal/epl-v10.html + * + * Contributors: + * VTT Technical Research Centre of Finland - initial API and implementation + *******************************************************************************/ +/* The following copyright is attached because marked parts of the following code are + * copied and modified from Jena 2.4. + */ +/* + * (c) Copyright 2001, 2002, 2003, 2004, 2005, 2006 Hewlett-Packard Development Company, LP + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. The name of the author may not be used to endorse or promote products + * derived from this software without specific prior written permission. + + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. + * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF + * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + + * * Id: URIref.java,v 1.5 2006/03/22 13:52:49 andy_seaborne Exp + + AUTHOR: Jeremy J. Carroll + */ + +package org.simantics.databoard.util; + +import java.nio.charset.Charset; +import java.util.ArrayList; +import java.util.List; + + +/** + * Contains utility methods for handling URI Strings in the context of ProCore + * and the Simantics platform. This includes URI escaping and unescaping and + * namespace/local name separation and joining. + * + *

+ * URI's in this context are assumed to be formed as follows: + * + *

+ * <namespace part>#<local name part>
+ * 
+ * + *

+ * The implementation of {@link #escape(String)} and {@link #unescape(String)} + * is copied and modified from Jena's com.hp.hpl.jena.util.URIref. + *

+ * + * @see Percent-encoding + * + * @author Tuukka Lehtonen + */ +public final class URIStringUtils { + + /** + * The character '/' is used as a path separator in URI namespace parts in ProCore. + */ + public static final char NAMESPACE_PATH_SEPARATOR = '/'; + + /** + * The '#' character is used to separate the local name and namespace parts + * of an URI, for example http://www.example.org#localName. + */ + public static final char NAMESPACE_LOCAL_SEPARATOR = '#'; + + /** + * Checks that only one separator character ({@link #NAMESPACE_LOCAL_SEPARATOR}) + * between namespace and localname exists in the specified URI and returns + * its index. + * + * @param uri the URI to search from + * @return the character index of the separator ranging from 0 to uri.length()-1 + * @throws IllegalArgumentException if no {@link #NAMESPACE_LOCAL_SEPARATOR} + * is found in the specified URI + */ + private static int assertSingleSeparatorPosition(String uri) { + int sharpIndex = uri.indexOf(NAMESPACE_LOCAL_SEPARATOR); + if (sharpIndex == -1) { + throw new IllegalArgumentException("URI '" + uri + "' does not contain any '" + NAMESPACE_LOCAL_SEPARATOR + "' separator characters"); + } + int nextSharpIndex = uri.indexOf(NAMESPACE_LOCAL_SEPARATOR, sharpIndex + 1); + if (nextSharpIndex != -1) { + throw new IllegalArgumentException("URI '" + uri + "' contains multiple '" + NAMESPACE_LOCAL_SEPARATOR + "' separator characters"); + } + return sharpIndex; + } + + /** + * Checks that only one separator character ( + * {@link #NAMESPACE_LOCAL_SEPARATOR}) between namespace and localname + * exists in the specified URI and returns its index. This version does not + * throw an exception when the separator is not found. + * + * @param uri the URI to search from + * @return the character index of the separator ranging from 0 to + * uri.length()-1 or -1 if no separator was found. + */ + private static int singleSeparatorPosition(String uri) { + int sharpIndex = uri.indexOf(NAMESPACE_LOCAL_SEPARATOR); + if (sharpIndex == -1) { + return -1; + } + int nextSharpIndex = uri.indexOf(NAMESPACE_LOCAL_SEPARATOR, sharpIndex + 1); + if (nextSharpIndex != -1) { + return -1; + } + return sharpIndex; + } + + /** + * Splits the specified URI into a namespace and a local name and returns + * the namespace. + * + *

+ * Assumes that namespaces are always separated by + * {@link #NAMESPACE_LOCAL_SEPARATOR} characters. + *

+ * + * @param uri the URI to split, must be non-null + * @return the namespace part of the specified URI + * @throws IllegalArgumentException for URIs without a + * {@link #NAMESPACE_LOCAL_SEPARATOR} + * @throws NullPointerException for null URIs + */ + public static String getNamespace(String uri) { + if (uri == null) + throw new NullPointerException("null uri"); + int separatorIndex = assertSingleSeparatorPosition(uri); + return uri.substring(0, separatorIndex); + } + + public static String getRVIParent(String uri) { + int childSeparator = uri.lastIndexOf(URIStringUtils.NAMESPACE_PATH_SEPARATOR); + int propertySeparator = uri.lastIndexOf(URIStringUtils.NAMESPACE_LOCAL_SEPARATOR); + int separator = Math.max(childSeparator, propertySeparator); + return uri.substring(0, separator); + } + + + /** + * Splits the specified URI into a namespace and a local name and returns + * the local name. + * + *

+ * Assumes that namespaces are always separated by + * {@link #NAMESPACE_LOCAL_SEPARATOR} characters. + *

+ * + * @param uri the URI to split, must be non-null + * @return the local name part of the specified URI + * @throws IllegalArgumentException for URIs without a + * {@link #NAMESPACE_LOCAL_SEPARATOR} + * @throws NullPointerException for null URIs + */ + public static String getLocalName(String uri) { + if (uri == null) + throw new NullPointerException("null uri"); + int separatorIndex = assertSingleSeparatorPosition(uri); + return uri.substring(separatorIndex + 1); + } + + public static String escapeName(String name) { + char[] chars = name.toCharArray(); + boolean modified = false; + for(int i=0;i splitURISCL(String uri) { + String[] result = splitURI(uri); + ArrayList list = new ArrayList(result.length); + for(String s : result) list.add(s); + return list; + } + + /** + * Splits the specified URI into a namespace and a local name and returns + * them both separately as an array. + * + * @param uri the URI to split, must be non-null + * @return [0] = namespace, [1] = local name or null if the URI + * cannot be split. + * @throws NullPointerException for null URIs + */ + public static String[] trySplitNamespaceAndLocalName(String uri) { + if (uri == null) + throw new NullPointerException("null uri"); + int separatorIndex = singleSeparatorPosition(uri); + return separatorIndex == -1 ? + null + : new String[] { uri.substring(0, separatorIndex), uri.substring(separatorIndex + 1) }; + } + + /** + * Splits the specified URI into a namespace and a local name and returns + * them both separately as an array. + * + * @param uri the URI to split, must be non-null + * @return [0] = namespace, [1] = local name + * @throws IllegalArgumentException for URIs without a + * {@link #NAMESPACE_LOCAL_SEPARATOR} + * @throws NullPointerException for null URIs + */ + public static String[] splitNamespaceAndLocalName(String uri) { + if (uri == null) + throw new NullPointerException("null uri"); + int separatorIndex = assertSingleSeparatorPosition(uri); + return new String[] { uri.substring(0, separatorIndex), uri.substring(separatorIndex + 1) }; + } + + /** + * Converts a unicode string into an RFC 2396 compliant URI, using %NN + * escapes where appropriate, including the + * {@link #NAMESPACE_PATH_SEPARATOR} character. + * + * @param localName the string to escape + * @return the escaped string + * @throws NullPointerException for null URIs + */ + public static String escapeURI(String localName) { + if (localName == null) + throw new NullPointerException("null local name"); + String result = encode(localName); + return result; + } + + /** + * Add a suffix path to a namespace string, i.e. join the strings to + * together with the {@link #NAMESPACE_PATH_SEPARATOR} character in between. + * + * @param namespace the namespace to append to + * @param suffix the suffix to append + * @return the joined namespace + */ + public static String appendURINamespace(String namespace, String suffix) { + //return namespace + NAMESPACE_PATH_SEPARATOR + suffix; + return new StringBuffer(namespace.length() + 1 + suffix.length()) + .append(namespace) + .append(NAMESPACE_PATH_SEPARATOR) + .append(suffix) + .toString(); + } + + /** + * Join a namespace and a localname to form an URI with + * {@link #NAMESPACE_LOCAL_SEPARATOR}. + * + * @param namespace the namespace part to join + * @param localName the localname part to join + * @return the joined URI + */ + public static String makeURI(String namespace, String localName) { + //return namespace + NAMESPACE_LOCAL_SEPARATOR + escapeURI(localName); + String escapedLocalName = escapeURI(localName); + return new StringBuffer(namespace.length() + 1 + escapedLocalName.length()) + .append(namespace) + .append(NAMESPACE_LOCAL_SEPARATOR) + .append(escapedLocalName) + .toString(); + } + + /** + * Convert a Unicode string, first to UTF-8 and then to an RFC 2396 + * compliant URI with optional fragment identifier using %NN escape + * mechanism as appropriate. The '%' character is assumed to already + * indicated an escape byte. The '%' character must be followed by two + * hexadecimal digits. + * + *

+ * Meant to be used for encoding URI local name parts if it is desired to + * have '/' characters in the local name without creating a new namespace. + * For example these two URI's:
+ * + * + * http://foo.bar.com/foo/bar/org%2Fcom
+ * http://foo.bar.com/foo/bar/net%2Fcom
+ *
+ * + * have the same namespace http://foo.bar.com/foo/bar/ and + * different local names org%2Fcom and net%2Fcom + * or org/com and net/com in unescaped form. + *

+ * + * @param unicode The uri, in characters specified by RFC 2396 + '#' + * @return The corresponding Unicode String + */ + public static String escape(String unicode) { + return encode(unicode); + } + + + final private static Charset UTF8 = Charset.forName("UTF-8"); + final private static Charset ASCII = Charset.forName("US-ASCII"); + + /* Copied and modified from Jena 2.4 com.hp.hpl.jena.util.URIref */ + private static String encode(String unicode) { + boolean needsEscapes = needsEscaping(unicode); + if (!needsEscapes) + return unicode; + + byte utf8[] = unicode.getBytes(UTF8); + byte rsltAscii[] = new byte[utf8.length * 6]; + int in = 0; + int out = 0; + while (in < utf8.length) { + switch (utf8[in]) { + case (byte)'a': case (byte)'b': case (byte)'c': case (byte)'d': case (byte)'e': case (byte)'f': case (byte)'g': case (byte)'h': case (byte)'i': case (byte)'j': case (byte)'k': case (byte)'l': case (byte)'m': case (byte)'n': case (byte)'o': case (byte)'p': case (byte)'q': case (byte)'r': case (byte)'s': case (byte)'t': case (byte)'u': case (byte)'v': case (byte)'w': case (byte)'x': case (byte)'y': case (byte)'z': + case (byte)'A': case (byte)'B': case (byte)'C': case (byte)'D': case (byte)'E': case (byte)'F': case (byte)'G': case (byte)'H': case (byte)'I': case (byte)'J': case (byte)'K': case (byte)'L': case (byte)'M': case (byte)'N': case (byte)'O': case (byte)'P': case (byte)'Q': case (byte)'R': case (byte)'S': case (byte)'T': case (byte)'U': case (byte)'V': case (byte)'W': case (byte)'X': case (byte)'Y': case (byte)'Z': + case (byte)'0': case (byte)'1': case (byte)'2': case (byte)'3': case (byte)'4': case (byte)'5': case (byte)'6': case (byte)'7': case (byte)'8': case (byte)'9': + case (byte)';': case (byte)'?': case (byte)':': case (byte)'@': case (byte)'=': case (byte)'+': case (byte)'$': case (byte)',': + case (byte)'-': case (byte)'_': case (byte)'.': case (byte)'!': case (byte)'~': case (byte)'*': case (byte)'\'': case (byte)'(': case (byte)')': + case (byte)'[': case (byte)']': + rsltAscii[out] = utf8[in]; + out++; + in++; + break; + case (byte)' ': + rsltAscii[out++] = (byte) '%'; + rsltAscii[out++] = '2'; + rsltAscii[out++] = '0'; + in++; + break; + case (byte) '%': + // [lehtonen] NOTE: all input needs to be escaped, i.e. "%01" should result in "%2501", not "%01". + // escape+unescape is a bijection, not an idempotent operation. + // Fall through to to escape '%' as '%25' + case (byte) '#': + case (byte) '/': + // Fall through to escape '/' + case (byte)'&': + // Fall through to escape '&' characters to avoid them + // being interpreted as SGML entities. + default: + rsltAscii[out++] = (byte) '%'; + // Get rid of sign ... + int c = (utf8[in]) & 255; + rsltAscii[out++] = hexEncode(c / 16); + rsltAscii[out++] = hexEncode(c % 16); + in++; + break; + } + } + return new String(rsltAscii, 0, out, ASCII); + } + + /* + * RFC 3986 section 2.2 Reserved Characters (January 2005) + * !*'();:@&=+$,/?#[] + */ + private static boolean needsEscaping(String unicode) { + int len = unicode.length(); + for (int i = 0; i < len; ++i) { + switch (unicode.charAt(i)) { + case (byte)'!': + case (byte)'*': + case (byte)'\'': + case (byte)'(': + case (byte)')': + case (byte)';': + case (byte)':': + case (byte)'@': + case (byte)'=': + case (byte)'+': + case (byte)'$': + case (byte)',': + case (byte)'?': + case (byte)'~': + case (byte)'[': + case (byte)']': + break; + case (byte)' ': + case (byte) '#': + case (byte) '%': + case (byte) '/': + case (byte)'&': + return true; + } + } + return false; + } + + private static boolean needsUnescaping(String unicode) { + return unicode.indexOf('%') > -1; + } + + /** + * Convert a URI, in US-ASCII, with escaped characters taken from UTF-8, to + * the corresponding Unicode string. On ill-formed input the results are + * undefined, specifically if the unescaped version is not a UTF-8 String, + * some String will be returned. + * + * @param uri the uri, in characters specified by RFC 2396 + '#'. + * @return the corresponding Unicode String. + * @exception IllegalArgumentException if a % hex sequence is ill-formed. + */ + public static String unescape(String uri) { + try { + if (!needsUnescaping(uri)) + return uri; + + byte ascii[] = uri.getBytes("US-ASCII"); + byte utf8[] = new byte[ascii.length]; + int in = 0; + int out = 0; + while ( in < ascii.length ) { + if (ascii[in] == (byte) '%') { + in++; + utf8[out++] = (byte) (hexDecode(ascii[in]) * 16 | hexDecode(ascii[in + 1])); + in += 2; + } else { + utf8[out++] = ascii[in++]; + } + } + return new String(utf8, 0, out, "UTF-8"); + } catch (IllegalArgumentException e) { + throw new IllegalArgumentException("Problem while unescaping string: " + uri, e); + } catch (java.io.UnsupportedEncodingException e) { + throw new Error("The JVM is required to support UTF-8 and US-ASCII encodings."); + } catch (ArrayIndexOutOfBoundsException ee) { + throw new IllegalArgumentException("Incomplete Hex escape sequence in " + uri); + } + } + + /* Copied from Jena 2.4 com.hp.hpl.jena.util.URIref */ + private static byte hexEncode(int i) { + if (i < 10) + return (byte) ('0' + i); + else + return (byte)('A' + i - 10); + } + + /* Copied from Jena 2.4 com.hp.hpl.jena.util.URIref */ + private static int hexDecode(byte b) { + switch (b) { + case (byte)'a': case (byte)'b': case (byte)'c': case (byte)'d': case (byte)'e': case (byte)'f': + return ((b) & 255) - 'a' + 10; + case (byte)'A': case (byte)'B': case (byte)'C': case (byte)'D': case (byte)'E': case (byte)'F': + return b - (byte) 'A' + 10; + case (byte)'0': case (byte)'1': case (byte)'2': case (byte)'3': case (byte)'4': case (byte)'5': case (byte)'6': case (byte)'7': case (byte)'8': case (byte)'9': + return b - (byte) '0'; + default: + throw new IllegalArgumentException("Bad Hex escape character: " + ((b)&255) ); + } + } + + /** + * Some simple tests. + * @param args + */ + public static void main(String[] args) { + String s; + s = "http://www.vtt.fi%2FSome- %25 Namespace/Jotain"; + System.out.println(String.format("escape+unescape: %s -> %s -> %s", s, escape(s), unescape(escape(s)))); + s = "http://www.vtt.fi%2FPSK"; + System.out.println(String.format("unescape: %s -> %s", s, unescape(s))); + s = "http://www.vtt.fi%2FSome-Namespace/Jotain / Muuta"; + System.out.println(String.format("escape: %s -> %s", s, escape(s))); + s = "Jotain / Muuta"; + System.out.println(String.format("escape: %s -> %s", s, escape(s))); + + System.out.println("escapeURI: " + escapeURI("foo/bar/org%2Fnet")); + System.out.println("escapeURI('...#...'): " + escapeURI("foo/bar#org%2Fnet")); + s = makeURI("http://foo.bar.com/foo/bar", "baz/guuk/org%2Fnet"); + System.out.println("escapeURI: " + s); + System.out.println("getNamespace: " + getNamespace(s)); + System.out.println("getLocalName: " + getLocalName(s)); + + testEscape("/", "%2F"); + testEscape("#", "%23"); + testEscape("%", "%25"); + testEscape("%01", "%2501"); + testEscape("%GG", "%25GG"); + } + + private static void testEscape(String unescaped, String expectedEscaped) { + String esc = escape(unescaped); + String unesc = unescape(esc); + System.out.format("escape('%s')='%s', unescape('%s')='%s'\n", unescaped, esc, esc, unesc); + if (!esc.equals(expectedEscaped)) + throw new AssertionError("escape('" + unescaped + "') was expected to return '" + expectedEscaped + "' but returned '" + esc + "'"); + if (!unesc.equals(unescaped)) + throw new AssertionError("unescape(escape('" + unescaped + "'))=unescape(" + esc + ") was expected to return '" + unescaped + "' but returned '" + unesc + "'"); + } + +}