1 /*******************************************************************************
\r
2 * Copyright (c) 2007, 2010 Association for Decentralized Information Management
\r
3 * in Industry THTH ry.
\r
4 * All rights reserved. This program and the accompanying materials
\r
5 * are made available under the terms of the Eclipse Public License v1.0
\r
6 * which accompanies this distribution, and is available at
\r
7 * http://www.eclipse.org/legal/epl-v10.html
\r
10 * VTT Technical Research Centre of Finland - initial API and implementation
\r
11 *******************************************************************************/
\r
12 /* The following copyright is attached because marked parts of the following code are
\r
13 * copied and modified from Jena 2.4.
\r
16 * (c) Copyright 2001, 2002, 2003, 2004, 2005, 2006 Hewlett-Packard Development Company, LP
\r
17 * All rights reserved.
\r
19 * Redistribution and use in source and binary forms, with or without
\r
20 * modification, are permitted provided that the following conditions
\r
22 * 1. Redistributions of source code must retain the above copyright
\r
23 * notice, this list of conditions and the following disclaimer.
\r
24 * 2. Redistributions in binary form must reproduce the above copyright
\r
25 * notice, this list of conditions and the following disclaimer in the
\r
26 * documentation and/or other materials provided with the distribution.
\r
27 * 3. The name of the author may not be used to endorse or promote products
\r
28 * derived from this software without specific prior written permission.
\r
30 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
\r
31 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
\r
32 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
\r
33 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
\r
34 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
\r
35 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
\r
36 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
\r
37 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
\r
38 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
\r
39 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
\r
41 * * Id: URIref.java,v 1.5 2006/03/22 13:52:49 andy_seaborne Exp
\r
43 AUTHOR: Jeremy J. Carroll
\r
46 package org.simantics.databoard.util;
\r
48 import java.nio.charset.Charset;
\r
49 import java.util.ArrayList;
\r
50 import java.util.List;
\r
54 * Contains utility methods for handling URI Strings in the context of ProCore
\r
55 * and the Simantics platform. This includes URI escaping and unescaping and
\r
56 * namespace/local name separation and joining.
\r
59 * URI's in this context are assumed to be formed as follows:
\r
62 * <namespace part>#<local name part>
\r
66 * The implementation of {@link #escape(String)} and {@link #unescape(String)}
\r
67 * is copied and modified from Jena's com.hp.hpl.jena.util.URIref.
\r
70 * @see <a href="http://en.wikipedia.org/wiki/Percent-encoding">Percent-encoding</a>
\r
72 * @author Tuukka Lehtonen
\r
74 public final class URIStringUtils {
\r
77 * The character '/' is used as a path separator in URI namespace parts in ProCore.
\r
79 public static final char NAMESPACE_PATH_SEPARATOR = '/';
\r
82 * The '#' character is used to separate the local name and namespace parts
\r
83 * of an URI, for example <code>http://www.example.org#localName</code>.
\r
85 public static final char NAMESPACE_LOCAL_SEPARATOR = '#';
\r
88 * Checks that only one separator character ({@link #NAMESPACE_LOCAL_SEPARATOR})
\r
89 * between namespace and localname exists in the specified URI and returns
\r
92 * @param uri the URI to search from
\r
93 * @return the character index of the separator ranging from 0 to uri.length()-1
\r
94 * @throws IllegalArgumentException if no {@link #NAMESPACE_LOCAL_SEPARATOR}
\r
95 * is found in the specified URI
\r
97 private static int assertSingleSeparatorPosition(String uri) {
\r
98 int sharpIndex = uri.indexOf(NAMESPACE_LOCAL_SEPARATOR);
\r
99 if (sharpIndex == -1) {
\r
100 throw new IllegalArgumentException("URI '" + uri + "' does not contain any '" + NAMESPACE_LOCAL_SEPARATOR + "' separator characters");
\r
102 int nextSharpIndex = uri.indexOf(NAMESPACE_LOCAL_SEPARATOR, sharpIndex + 1);
\r
103 if (nextSharpIndex != -1) {
\r
104 throw new IllegalArgumentException("URI '" + uri + "' contains multiple '" + NAMESPACE_LOCAL_SEPARATOR + "' separator characters");
\r
110 * Checks that only one separator character (
\r
111 * {@link #NAMESPACE_LOCAL_SEPARATOR}) between namespace and localname
\r
112 * exists in the specified URI and returns its index. This version does not
\r
113 * throw an exception when the separator is not found.
\r
115 * @param uri the URI to search from
\r
116 * @return the character index of the separator ranging from 0 to
\r
117 * uri.length()-1 or -1 if no separator was found.
\r
119 private static int singleSeparatorPosition(String uri) {
\r
120 int sharpIndex = uri.indexOf(NAMESPACE_LOCAL_SEPARATOR);
\r
121 if (sharpIndex == -1) {
\r
124 int nextSharpIndex = uri.indexOf(NAMESPACE_LOCAL_SEPARATOR, sharpIndex + 1);
\r
125 if (nextSharpIndex != -1) {
\r
132 * Splits the specified URI into a namespace and a local name and returns
\r
136 * Assumes that namespaces are always separated by
\r
137 * {@link #NAMESPACE_LOCAL_SEPARATOR} characters.
\r
140 * @param uri the URI to split, must be non-null
\r
141 * @return the namespace part of the specified URI
\r
142 * @throws IllegalArgumentException for URIs without a
\r
143 * {@link #NAMESPACE_LOCAL_SEPARATOR}
\r
144 * @throws NullPointerException for <code>null</code> URIs
\r
146 public static String getNamespace(String uri) {
\r
148 throw new NullPointerException("null uri");
\r
149 int separatorIndex = assertSingleSeparatorPosition(uri);
\r
150 return uri.substring(0, separatorIndex);
\r
153 public static String getRVIParent(String uri) {
\r
154 int childSeparator = uri.lastIndexOf(URIStringUtils.NAMESPACE_PATH_SEPARATOR);
\r
155 int propertySeparator = uri.lastIndexOf(URIStringUtils.NAMESPACE_LOCAL_SEPARATOR);
\r
156 int separator = Math.max(childSeparator, propertySeparator);
\r
157 return uri.substring(0, separator);
\r
162 * Splits the specified URI into a namespace and a local name and returns
\r
166 * Assumes that namespaces are always separated by
\r
167 * {@link #NAMESPACE_LOCAL_SEPARATOR} characters.
\r
170 * @param uri the URI to split, must be non-null
\r
171 * @return the local name part of the specified URI
\r
172 * @throws IllegalArgumentException for URIs without a
\r
173 * {@link #NAMESPACE_LOCAL_SEPARATOR}
\r
174 * @throws NullPointerException for <code>null</code> URIs
\r
176 public static String getLocalName(String uri) {
\r
178 throw new NullPointerException("null uri");
\r
179 int separatorIndex = assertSingleSeparatorPosition(uri);
\r
180 return uri.substring(separatorIndex + 1);
\r
183 public static String escapeName(String name) {
\r
184 char[] chars = name.toCharArray();
\r
185 boolean modified = false;
\r
186 for(int i=0;i<chars.length;++i)
\r
187 if(!Character.isJavaIdentifierPart(chars[i])) {
\r
192 return new String(chars);
\r
197 final private static int HTTP_POSITION = "http://".length();
\r
199 public static String[] splitURI(String uri) {
\r
200 int nextPathSeparator = uri.lastIndexOf(URIStringUtils.NAMESPACE_PATH_SEPARATOR);
\r
201 if (nextPathSeparator == -1) return null;
\r
202 if (nextPathSeparator == HTTP_POSITION - 1) {
\r
203 if(uri.startsWith("http://")) return new String[] { "http://", uri.substring(HTTP_POSITION, uri.length()) };
\r
206 return new String[] {
\r
207 uri.substring(0, nextPathSeparator),
\r
208 uri.substring(nextPathSeparator + 1, uri.length())
\r
212 public static List<String> splitURISCL(String uri) {
\r
213 String[] result = splitURI(uri);
\r
214 ArrayList<String> list = new ArrayList<String>(result.length);
\r
215 for(String s : result) list.add(s);
\r
220 * Splits the specified URI into a namespace and a local name and returns
\r
221 * them both separately as an array.
\r
223 * @param uri the URI to split, must be non-null
\r
224 * @return [0] = namespace, [1] = local name or <code>null</code> if the URI
\r
226 * @throws NullPointerException for <code>null</code> URIs
\r
228 public static String[] trySplitNamespaceAndLocalName(String uri) {
\r
230 throw new NullPointerException("null uri");
\r
231 int separatorIndex = singleSeparatorPosition(uri);
\r
232 return separatorIndex == -1 ?
\r
234 : new String[] { uri.substring(0, separatorIndex), uri.substring(separatorIndex + 1) };
\r
238 * Splits the specified URI into a namespace and a local name and returns
\r
239 * them both separately as an array.
\r
241 * @param uri the URI to split, must be non-null
\r
242 * @return [0] = namespace, [1] = local name
\r
243 * @throws IllegalArgumentException for URIs without a
\r
244 * {@link #NAMESPACE_LOCAL_SEPARATOR}
\r
245 * @throws NullPointerException for <code>null</code> URIs
\r
247 public static String[] splitNamespaceAndLocalName(String uri) {
\r
249 throw new NullPointerException("null uri");
\r
250 int separatorIndex = assertSingleSeparatorPosition(uri);
\r
251 return new String[] { uri.substring(0, separatorIndex), uri.substring(separatorIndex + 1) };
\r
255 * Converts a unicode string into an RFC 2396 compliant URI, using %NN
\r
256 * escapes where appropriate, including the
\r
257 * {@link #NAMESPACE_PATH_SEPARATOR} character.
\r
259 * @param localName the string to escape
\r
260 * @return the escaped string
\r
261 * @throws NullPointerException for <code>null</code> URIs
\r
263 public static String escapeURI(String localName) {
\r
264 if (localName == null)
\r
265 throw new NullPointerException("null local name");
\r
266 String result = encode(localName);
\r
271 * Add a suffix path to a namespace string, i.e. join the strings to
\r
272 * together with the {@link #NAMESPACE_PATH_SEPARATOR} character in between.
\r
274 * @param namespace the namespace to append to
\r
275 * @param suffix the suffix to append
\r
276 * @return the joined namespace
\r
278 public static String appendURINamespace(String namespace, String suffix) {
\r
279 //return namespace + NAMESPACE_PATH_SEPARATOR + suffix;
\r
280 return new StringBuffer(namespace.length() + 1 + suffix.length())
\r
282 .append(NAMESPACE_PATH_SEPARATOR)
\r
288 * Join a namespace and a localname to form an URI with
\r
289 * {@link #NAMESPACE_LOCAL_SEPARATOR}.
\r
291 * @param namespace the namespace part to join
\r
292 * @param localName the localname part to join
\r
293 * @return the joined URI
\r
295 public static String makeURI(String namespace, String localName) {
\r
296 //return namespace + NAMESPACE_LOCAL_SEPARATOR + escapeURI(localName);
\r
297 String escapedLocalName = escapeURI(localName);
\r
298 return new StringBuffer(namespace.length() + 1 + escapedLocalName.length())
\r
300 .append(NAMESPACE_LOCAL_SEPARATOR)
\r
301 .append(escapedLocalName)
\r
306 * Convert a Unicode string, first to UTF-8 and then to an RFC 2396
\r
307 * compliant URI with optional fragment identifier using %NN escape
\r
308 * mechanism as appropriate. The '%' character is assumed to already
\r
309 * indicated an escape byte. The '%' character must be followed by two
\r
310 * hexadecimal digits.
\r
313 * Meant to be used for encoding URI local name parts if it is desired to
\r
314 * have '/' characters in the local name without creating a new namespace.
\r
315 * For example these two URI's:<br/>
\r
318 * http://foo.bar.com/foo/bar/org%2Fcom<br/>
\r
319 * http://foo.bar.com/foo/bar/net%2Fcom<br/>
\r
322 * have the same namespace <code>http://foo.bar.com/foo/bar/</code> and
\r
323 * different local names <code>org%2Fcom</code> and <code>net%2Fcom</code>
\r
324 * or <code>org/com</code> and <code>net/com</code> in unescaped form.
\r
327 * @param unicode The uri, in characters specified by RFC 2396 + '#'
\r
328 * @return The corresponding Unicode String
\r
330 public static String escape(String unicode) {
\r
331 return encode(unicode);
\r
335 final private static Charset UTF8 = Charset.forName("UTF-8");
\r
336 final private static Charset ASCII = Charset.forName("US-ASCII");
\r
338 /* Copied and modified from Jena 2.4 com.hp.hpl.jena.util.URIref */
\r
339 private static String encode(String unicode) {
\r
340 boolean needsEscapes = needsEscaping(unicode);
\r
344 byte utf8[] = unicode.getBytes(UTF8);
\r
345 byte rsltAscii[] = new byte[utf8.length * 6];
\r
348 while (in < utf8.length) {
\r
349 switch (utf8[in]) {
\r
350 case (byte)'a': case (byte)'b': case (byte)'c': case (byte)'d': case (byte)'e': case (byte)'f': case (byte)'g': case (byte)'h': case (byte)'i': case (byte)'j': case (byte)'k': case (byte)'l': case (byte)'m': case (byte)'n': case (byte)'o': case (byte)'p': case (byte)'q': case (byte)'r': case (byte)'s': case (byte)'t': case (byte)'u': case (byte)'v': case (byte)'w': case (byte)'x': case (byte)'y': case (byte)'z':
\r
351 case (byte)'A': case (byte)'B': case (byte)'C': case (byte)'D': case (byte)'E': case (byte)'F': case (byte)'G': case (byte)'H': case (byte)'I': case (byte)'J': case (byte)'K': case (byte)'L': case (byte)'M': case (byte)'N': case (byte)'O': case (byte)'P': case (byte)'Q': case (byte)'R': case (byte)'S': case (byte)'T': case (byte)'U': case (byte)'V': case (byte)'W': case (byte)'X': case (byte)'Y': case (byte)'Z':
\r
352 case (byte)'0': case (byte)'1': case (byte)'2': case (byte)'3': case (byte)'4': case (byte)'5': case (byte)'6': case (byte)'7': case (byte)'8': case (byte)'9':
\r
353 case (byte)';': case (byte)'?': case (byte)':': case (byte)'@': case (byte)'=': case (byte)'+': case (byte)'$': case (byte)',':
\r
354 case (byte)'-': case (byte)'_': case (byte)'.': case (byte)'!': case (byte)'~': case (byte)'*': case (byte)'\'': case (byte)'(': case (byte)')':
\r
355 case (byte)'[': case (byte)']':
\r
356 rsltAscii[out] = utf8[in];
\r
361 rsltAscii[out++] = (byte) '%';
\r
362 rsltAscii[out++] = '2';
\r
363 rsltAscii[out++] = '0';
\r
367 // [lehtonen] NOTE: all input needs to be escaped, i.e. "%01" should result in "%2501", not "%01".
\r
368 // escape+unescape is a bijection, not an idempotent operation.
\r
369 // Fall through to to escape '%' as '%25'
\r
372 // Fall through to escape '/'
\r
374 // Fall through to escape '&' characters to avoid them
\r
375 // being interpreted as SGML entities.
\r
377 rsltAscii[out++] = (byte) '%';
\r
378 // Get rid of sign ...
\r
379 int c = (utf8[in]) & 255;
\r
380 rsltAscii[out++] = hexEncode(c / 16);
\r
381 rsltAscii[out++] = hexEncode(c % 16);
\r
386 return new String(rsltAscii, 0, out, ASCII);
\r
390 * RFC 3986 section 2.2 Reserved Characters (January 2005)
\r
391 * !*'();:@&=+$,/?#[]
\r
393 private static boolean needsEscaping(String unicode) {
\r
394 int len = unicode.length();
\r
395 for (int i = 0; i < len; ++i) {
\r
396 switch (unicode.charAt(i)) {
\r
425 private static boolean needsUnescaping(String unicode) {
\r
426 return unicode.indexOf('%') > -1;
\r
430 * Convert a URI, in US-ASCII, with escaped characters taken from UTF-8, to
\r
431 * the corresponding Unicode string. On ill-formed input the results are
\r
432 * undefined, specifically if the unescaped version is not a UTF-8 String,
\r
433 * some String will be returned.
\r
435 * @param uri the uri, in characters specified by RFC 2396 + '#'.
\r
436 * @return the corresponding Unicode String.
\r
437 * @exception IllegalArgumentException if a % hex sequence is ill-formed.
\r
439 public static String unescape(String uri) {
\r
441 if (!needsUnescaping(uri))
\r
444 byte ascii[] = uri.getBytes("US-ASCII");
\r
445 byte utf8[] = new byte[ascii.length];
\r
448 while ( in < ascii.length ) {
\r
449 if (ascii[in] == (byte) '%') {
\r
451 utf8[out++] = (byte) (hexDecode(ascii[in]) * 16 | hexDecode(ascii[in + 1]));
\r
454 utf8[out++] = ascii[in++];
\r
457 return new String(utf8, 0, out, "UTF-8");
\r
458 } catch (IllegalArgumentException e) {
\r
459 throw new IllegalArgumentException("Problem while unescaping string: " + uri, e);
\r
460 } catch (java.io.UnsupportedEncodingException e) {
\r
461 throw new Error("The JVM is required to support UTF-8 and US-ASCII encodings.");
\r
462 } catch (ArrayIndexOutOfBoundsException ee) {
\r
463 throw new IllegalArgumentException("Incomplete Hex escape sequence in " + uri);
\r
467 /* Copied from Jena 2.4 com.hp.hpl.jena.util.URIref */
\r
468 private static byte hexEncode(int i) {
\r
470 return (byte) ('0' + i);
\r
472 return (byte)('A' + i - 10);
\r
475 /* Copied from Jena 2.4 com.hp.hpl.jena.util.URIref */
\r
476 private static int hexDecode(byte b) {
\r
478 case (byte)'a': case (byte)'b': case (byte)'c': case (byte)'d': case (byte)'e': case (byte)'f':
\r
479 return ((b) & 255) - 'a' + 10;
\r
480 case (byte)'A': case (byte)'B': case (byte)'C': case (byte)'D': case (byte)'E': case (byte)'F':
\r
481 return b - (byte) 'A' + 10;
\r
482 case (byte)'0': case (byte)'1': case (byte)'2': case (byte)'3': case (byte)'4': case (byte)'5': case (byte)'6': case (byte)'7': case (byte)'8': case (byte)'9':
\r
483 return b - (byte) '0';
\r
485 throw new IllegalArgumentException("Bad Hex escape character: " + ((b)&255) );
\r
490 * Some simple tests.
\r
493 public static void main(String[] args) {
\r
495 s = "http://www.vtt.fi%2FSome- %25 Namespace/Jotain";
\r
496 System.out.println(String.format("escape+unescape: %s -> %s -> %s", s, escape(s), unescape(escape(s))));
\r
497 s = "http://www.vtt.fi%2FPSK";
\r
498 System.out.println(String.format("unescape: %s -> %s", s, unescape(s)));
\r
499 s = "http://www.vtt.fi%2FSome-Namespace/Jotain / Muuta";
\r
500 System.out.println(String.format("escape: %s -> %s", s, escape(s)));
\r
501 s = "Jotain / Muuta";
\r
502 System.out.println(String.format("escape: %s -> %s", s, escape(s)));
\r
504 System.out.println("escapeURI: " + escapeURI("foo/bar/org%2Fnet"));
\r
505 System.out.println("escapeURI('...#...'): " + escapeURI("foo/bar#org%2Fnet"));
\r
506 s = makeURI("http://foo.bar.com/foo/bar", "baz/guuk/org%2Fnet");
\r
507 System.out.println("escapeURI: " + s);
\r
508 System.out.println("getNamespace: " + getNamespace(s));
\r
509 System.out.println("getLocalName: " + getLocalName(s));
\r
511 testEscape("/", "%2F");
\r
512 testEscape("#", "%23");
\r
513 testEscape("%", "%25");
\r
514 testEscape("%01", "%2501");
\r
515 testEscape("%GG", "%25GG");
\r
518 private static void testEscape(String unescaped, String expectedEscaped) {
\r
519 String esc = escape(unescaped);
\r
520 String unesc = unescape(esc);
\r
521 System.out.format("escape('%s')='%s', unescape('%s')='%s'\n", unescaped, esc, esc, unesc);
\r
522 if (!esc.equals(expectedEscaped))
\r
523 throw new AssertionError("escape('" + unescaped + "') was expected to return '" + expectedEscaped + "' but returned '" + esc + "'");
\r
524 if (!unesc.equals(unescaped))
\r
525 throw new AssertionError("unescape(escape('" + unescaped + "'))=unescape(" + esc + ") was expected to return '" + unescaped + "' but returned '" + unesc + "'");
\r