1 /*******************************************************************************
2 * Copyright (c) 2007, 2010 Association for Decentralized Information Management
4 * All rights reserved. This program and the accompanying materials
5 * are made available under the terms of the Eclipse Public License v1.0
6 * which accompanies this distribution, and is available at
7 * http://www.eclipse.org/legal/epl-v10.html
10 * VTT Technical Research Centre of Finland - initial API and implementation
11 *******************************************************************************/
12 package org.simantics.databoard.util;
14 import java.io.UnsupportedEncodingException;
15 import java.nio.charset.Charset;
18 * <a href="http://www.simantics.org/wiki/index.php/URI">Simantics URI and identifier escape specification.
20 * @author Hannu Niemistö
22 public final class URIUtil {
24 static final Charset UTF8 = Charset.forName("UTF-8");
26 static final byte[] encodeTable = new byte[128];
27 static final byte[] encodeTable2 = new byte[128]; // for non-bijection filenames
30 for (int i = 0; i < 128; ++i) {
35 else if (Character.isJavaIdentifierPart(c) && c != '_' && c != '$') {
36 encodeTable[i] = (byte) i;
41 for (int i = 0; i < 128; ++i) {
43 if (c == ' ' || c == '_' || c == '(' || c== ')')
44 encodeTable2[i] = (byte) i;
46 encodeTable2[i] = (byte) '-';
48 encodeTable2[i] = (byte) '_';
49 else if (c == '-' || c == '.')
50 encodeTable2[i] = (byte) i;
51 else if (Character.isJavaIdentifierPart(c) && c != '_' && c != '$') {
52 encodeTable2[i] = (byte) i;
59 public static byte[] encode(String str, byte escapeChar, boolean identifier) throws UnsupportedEncodingException {
60 byte[] bytes = str.getBytes(UTF8);
62 boolean prefixWithUnderscore = identifier && bytes.length > 0 && (bytes[0] == '_' || Character.isDigit(bytes[0]));
64 // First calculate the length
65 int length = bytes.length;
66 for (byte b : bytes) {
67 if (b < 0 || encodeTable[b] == -1)
70 if (prefixWithUnderscore)
74 if (length == bytes.length) {
75 for (int i = 0; i < length; ++i)
76 bytes[i] = encodeTable[bytes[i]];
79 byte[] result = new byte[length];
81 if (prefixWithUnderscore) {
84 for (byte b : bytes) {
87 byte eb = encodeTable[ib];
95 result[pos++] = escapeChar;
96 result[pos++] = (byte) Character.forDigit(ib >> 4, 16);
97 result[pos++] = (byte) Character.forDigit(ib & 15, 16);
103 public static byte[] encodeFilename(String str, byte escapeChar, boolean identifier) throws UnsupportedEncodingException {
104 byte[] bytes = str.getBytes(UTF8);
106 boolean prefixWithUnderscore = identifier && bytes.length > 0 && (bytes[0] == '_' || Character.isDigit(bytes[0]));
108 // First calculate the length
109 int length = bytes.length;
110 for (byte b : bytes) {
111 if (b < 0 || encodeTable2[b] == -1)
114 if (prefixWithUnderscore)
118 if (length == bytes.length) {
119 for (int i = 0; i < length; ++i)
120 bytes[i] = encodeTable2[bytes[i]];
123 byte[] result = new byte[length];
125 if (prefixWithUnderscore) {
128 for (byte b : bytes) {
131 byte eb = encodeTable2[ib];
139 result[pos++] = escapeChar;
140 result[pos++] = (byte) Character.forDigit(ib >> 4, 16);
141 result[pos++] = (byte) Character.forDigit(ib & 15, 16);
147 public static String encodeFilename(String str) {
149 byte[] result = encodeFilename(str, (byte) '%', false);
150 return new String(result, 0, result.length);
151 } catch (UnsupportedEncodingException e) {
152 // Should never happen when using UTF-8
158 public static String encodeURI(String str) {
160 byte[] result = encode(str, (byte) '%', false);
161 return new String(result, 0, result.length);
162 } catch (UnsupportedEncodingException e) {
163 // Should never happen when using UTF-8
169 public static String encodeIdentifier(String str) {
171 byte[] result = encode(str, (byte) '$', true);
172 return new String(result, 0, result.length);
173 } catch (UnsupportedEncodingException e) {
174 // Should never happen when using UTF-8
180 public static String decode(byte[] bytes, byte escapeChar, boolean identifier) {
185 // Skip '_' prefix if necessary
186 if (identifier && bytes.length > 0 && bytes[0] == '_') {
190 for (; i < bytes.length; ++i) {
198 byte[] result = new byte[length];
199 for (int i = startPos; i < bytes.length; ++i) {
201 if (b == escapeChar) {
202 int c = Character.digit((char) bytes[++i], 16);
204 c += Character.digit((char) bytes[++i], 16);
205 result[pos] = (byte) c;
214 return new String(result, UTF8);
217 public static String decodeURI(String str) {
218 return decode(str.getBytes(), (byte) '%', false);
221 public static String decodeIdentifier(String str) {
222 return decode(str.getBytes(), (byte) '$', true);
226 * Escape any of the following characters: <code><>:"/\|?*</code> with %nn.
228 * @param str a file name, not a full file path
229 * @return original string or escaped file name if encoding is needed
231 public static String encodeFilename2(String str) {
232 return encodeFilename2(str, '%');
235 private static String encodeFilename2(String str, char escapeChar) {
236 // First calculate the length
237 int originalLength = str.length();
238 int length = originalLength;
239 for (int i = 0; i < originalLength; ++i) {
240 char c = str.charAt(i);
241 if (c < 128 && fileNameEncodeTable[(int) c] == -1)
245 if (length == originalLength)
248 char[] result = new char[length];
250 for (int i = 0; i < originalLength; ++i) {
251 char c = str.charAt(i);
254 // Never escape any non-ASCII characters. Those should work.
257 int ec = fileNameEncodeTable[ic];
259 result[pos++] = (char) ec;
261 result[pos++] = escapeChar;
262 result[pos++] = Character.forDigit(ic >> 4, 16);
263 result[pos++] = Character.forDigit(ic & 15, 16);
267 return new String(result);
270 static final int[] fileNameEncodeTable = new int[128]; // for UTF-16 non-bijection filenames
273 for (int i = 0; i < fileNameEncodeTable.length; ++i) {
275 // Control characters are all in need of escapes
276 fileNameEncodeTable[i] = -1;
279 // Denied characters in windows file names
280 // https://msdn.microsoft.com/en-us/library/windows/desktop/aa365247(v=vs.85).aspx
281 case '<': case '>': case ':': case '"': case '/': case '\\': case '|': case '?': case '*':
282 fileNameEncodeTable[i] = -1;
285 fileNameEncodeTable[i] = i;