1 /*******************************************************************************
\r
2 * Copyright (c) 2007, 2010 Association for Decentralized Information Management
\r
3 * in Industry THTH ry.
\r
4 * All rights reserved. This program and the accompanying materials
\r
5 * are made available under the terms of the Eclipse Public License v1.0
\r
6 * which accompanies this distribution, and is available at
\r
7 * http://www.eclipse.org/legal/epl-v10.html
\r
10 * VTT Technical Research Centre of Finland - initial API and implementation
\r
11 *******************************************************************************/
\r
12 package org.simantics.databoard.util;
\r
14 import java.io.UnsupportedEncodingException;
\r
15 import java.nio.charset.Charset;
\r
18 * <a href="http://www.simantics.org/wiki/index.php/URI">Simantics URI and identifier escape specification.
\r
20 * @author Hannu Niemistö
\r
22 public final class URIUtil {
\r
24 static final Charset UTF8 = Charset.forName("UTF-8");
\r
26 static final byte[] encodeTable = new byte[128];
\r
27 static final byte[] encodeTable2 = new byte[128]; // for non-bijection filenames
\r
30 for (int i = 0; i < 128; ++i) {
\r
33 encodeTable[i] = '_';
\r
35 else if (Character.isJavaIdentifierPart(c) && c != '_' && c != '$') {
\r
36 encodeTable[i] = (byte) i;
\r
38 encodeTable[i] = -1;
\r
41 for (int i = 0; i < 128; ++i) {
\r
43 if (c == ' ' || c == '_' || c == '(' || c== ')')
\r
44 encodeTable2[i] = (byte) i;
\r
46 encodeTable2[i] = (byte) '-';
\r
48 encodeTable2[i] = (byte) '_';
\r
49 else if (c == '-' || c == '.')
\r
50 encodeTable2[i] = (byte) i;
\r
51 else if (Character.isJavaIdentifierPart(c) && c != '_' && c != '$') {
\r
52 encodeTable2[i] = (byte) i;
\r
54 encodeTable2[i] = -1;
\r
59 public static byte[] encode(String str, byte escapeChar, boolean identifier) throws UnsupportedEncodingException {
\r
60 byte[] bytes = str.getBytes(UTF8);
\r
62 boolean prefixWithUnderscore = identifier && bytes.length > 0 && (bytes[0] == '_' || Character.isDigit(bytes[0]));
\r
64 // First calculate the length
\r
65 int length = bytes.length;
\r
66 for (byte b : bytes) {
\r
67 if (b < 0 || encodeTable[b] == -1)
\r
70 if (prefixWithUnderscore)
\r
74 if (length == bytes.length) {
\r
75 for (int i = 0; i < length; ++i)
\r
76 bytes[i] = encodeTable[bytes[i]];
\r
79 byte[] result = new byte[length];
\r
81 if (prefixWithUnderscore) {
\r
82 result[pos++] = '_';
\r
84 for (byte b : bytes) {
\r
87 byte eb = encodeTable[ib];
\r
95 result[pos++] = escapeChar;
\r
96 result[pos++] = (byte) Character.forDigit(ib >> 4, 16);
\r
97 result[pos++] = (byte) Character.forDigit(ib & 15, 16);
\r
103 public static byte[] encodeFilename(String str, byte escapeChar, boolean identifier) throws UnsupportedEncodingException {
\r
104 byte[] bytes = str.getBytes(UTF8);
\r
106 boolean prefixWithUnderscore = identifier && bytes.length > 0 && (bytes[0] == '_' || Character.isDigit(bytes[0]));
\r
108 // First calculate the length
\r
109 int length = bytes.length;
\r
110 for (byte b : bytes) {
\r
111 if (b < 0 || encodeTable2[b] == -1)
\r
114 if (prefixWithUnderscore)
\r
118 if (length == bytes.length) {
\r
119 for (int i = 0; i < length; ++i)
\r
120 bytes[i] = encodeTable2[bytes[i]];
\r
123 byte[] result = new byte[length];
\r
125 if (prefixWithUnderscore) {
\r
126 result[pos++] = '_';
\r
128 for (byte b : bytes) {
\r
131 byte eb = encodeTable2[ib];
\r
133 result[pos++] = eb;
\r
139 result[pos++] = escapeChar;
\r
140 result[pos++] = (byte) Character.forDigit(ib >> 4, 16);
\r
141 result[pos++] = (byte) Character.forDigit(ib & 15, 16);
\r
147 public static String encodeFilename(String str) {
\r
149 byte[] result = encodeFilename(str, (byte) '%', false);
\r
150 return new String(result, 0, result.length);
\r
151 } catch (UnsupportedEncodingException e) {
\r
152 // Should never happen when using UTF-8
\r
153 throw new Error(e);
\r
158 public static String encodeURI(String str) {
\r
160 byte[] result = encode(str, (byte) '%', false);
\r
161 return new String(result, 0, result.length);
\r
162 } catch (UnsupportedEncodingException e) {
\r
163 // Should never happen when using UTF-8
\r
164 throw new Error(e);
\r
169 public static String encodeIdentifier(String str) {
\r
171 byte[] result = encode(str, (byte) '$', true);
\r
172 return new String(result, 0, result.length);
\r
173 } catch (UnsupportedEncodingException e) {
\r
174 // Should never happen when using UTF-8
\r
175 throw new Error(e);
\r
180 public static String decode(byte[] bytes, byte escapeChar, boolean identifier) {
\r
185 // Skip '_' prefix if necessary
\r
186 if (identifier && bytes.length > 0 && bytes[0] == '_') {
\r
190 for (; i < bytes.length; ++i) {
\r
192 if (b == escapeChar)
\r
198 byte[] result = new byte[length];
\r
199 for (int i = startPos; i < bytes.length; ++i) {
\r
201 if (b == escapeChar) {
\r
202 int c = Character.digit((char) bytes[++i], 16);
\r
204 c += Character.digit((char) bytes[++i], 16);
\r
205 result[pos] = (byte) c;
\r
214 return new String(result, UTF8);
\r
217 public static String decodeURI(String str) {
\r
218 return decode(str.getBytes(), (byte) '%', false);
\r
221 public static String decodeIdentifier(String str) {
\r
222 return decode(str.getBytes(), (byte) '$', true);
\r