/******************************************************************************* * Copyright (c) 2007, 2010 Association for Decentralized Information Management * in Industry THTH ry. * All rights reserved. This program and the accompanying materials * are made available under the terms of the Eclipse Public License v1.0 * which accompanies this distribution, and is available at * http://www.eclipse.org/legal/epl-v10.html * * Contributors: * VTT Technical Research Centre of Finland - initial API and implementation *******************************************************************************/ package org.simantics.databoard.util; import java.io.UnsupportedEncodingException; import java.nio.charset.Charset; /** * Simantics URI and identifier escape specification. * * @author Hannu Niemistö */ public final class URIUtil { static final Charset UTF8 = Charset.forName("UTF-8"); static final byte[] encodeTable = new byte[128]; static final byte[] encodeTable2 = new byte[128]; // for non-bijection filenames static { for (int i = 0; i < 128; ++i) { char c = (char) i; if (c == ' ') encodeTable[i] = '_'; else if (Character.isJavaIdentifierPart(c) && c != '_' && c != '$') { encodeTable[i] = (byte) i; } else encodeTable[i] = -1; } for (int i = 0; i < 128; ++i) { char c = (char) i; if (c == ' ' || c == '_' || c == '(' || c== ')') encodeTable2[i] = (byte) i; else if (c == '/') encodeTable2[i] = (byte) '-'; else if (c == ' ') encodeTable2[i] = (byte) '_'; else if (c == '-' || c == '.') encodeTable2[i] = (byte) i; else if (Character.isJavaIdentifierPart(c) && c != '_' && c != '$') { encodeTable2[i] = (byte) i; } else encodeTable2[i] = -1; } } public static byte[] encode(String str, byte escapeChar, boolean identifier) throws UnsupportedEncodingException { byte[] bytes = str.getBytes(UTF8); boolean prefixWithUnderscore = identifier && bytes.length > 0 && (bytes[0] == '_' || Character.isDigit(bytes[0])); // First calculate the length int length = bytes.length; for (byte b : bytes) { if (b < 0 || encodeTable[b] == -1) length += 2; } if (prefixWithUnderscore) length += 1; // Then encode if (length == bytes.length) { for (int i = 0; i < length; ++i) bytes[i] = encodeTable[bytes[i]]; return bytes; } else { byte[] result = new byte[length]; int pos = 0; if (prefixWithUnderscore) { result[pos++] = '_'; } for (byte b : bytes) { int ib = (int) b; if (ib >= 0) { byte eb = encodeTable[ib]; if (eb >= 0) { result[pos++] = eb; continue; } } else ib += 256; result[pos++] = escapeChar; result[pos++] = (byte) Character.forDigit(ib >> 4, 16); result[pos++] = (byte) Character.forDigit(ib & 15, 16); } return result; } } public static byte[] encodeFilename(String str, byte escapeChar, boolean identifier) throws UnsupportedEncodingException { byte[] bytes = str.getBytes(UTF8); boolean prefixWithUnderscore = identifier && bytes.length > 0 && (bytes[0] == '_' || Character.isDigit(bytes[0])); // First calculate the length int length = bytes.length; for (byte b : bytes) { if (b < 0 || encodeTable2[b] == -1) length += 2; } if (prefixWithUnderscore) length += 1; // Then encode if (length == bytes.length) { for (int i = 0; i < length; ++i) bytes[i] = encodeTable2[bytes[i]]; return bytes; } else { byte[] result = new byte[length]; int pos = 0; if (prefixWithUnderscore) { result[pos++] = '_'; } for (byte b : bytes) { int ib = (int) b; if (ib >= 0) { byte eb = encodeTable2[ib]; if (eb >= 0) { result[pos++] = eb; continue; } } else ib += 256; result[pos++] = escapeChar; result[pos++] = (byte) Character.forDigit(ib >> 4, 16); result[pos++] = (byte) Character.forDigit(ib & 15, 16); } return result; } } public static String encodeFilename(String str) { try { byte[] result = encodeFilename(str, (byte) '%', false); return new String(result, 0, result.length); } catch (UnsupportedEncodingException e) { // Should never happen when using UTF-8 throw new Error(e); } } public static String encodeURI(String str) { try { byte[] result = encode(str, (byte) '%', false); return new String(result, 0, result.length); } catch (UnsupportedEncodingException e) { // Should never happen when using UTF-8 throw new Error(e); } } public static String encodeIdentifier(String str) { try { byte[] result = encode(str, (byte) '$', true); return new String(result, 0, result.length); } catch (UnsupportedEncodingException e) { // Should never happen when using UTF-8 throw new Error(e); } } public static String decode(byte[] bytes, byte escapeChar, boolean identifier) { int length = 0; int startPos = 0; { int i = 0; // Skip '_' prefix if necessary if (identifier && bytes.length > 0 && bytes[0] == '_') { startPos = 1; i = 1; } for (; i < bytes.length; ++i) { byte b = bytes[i]; if (b == escapeChar) i += 2; ++length; } } int pos = 0; byte[] result = new byte[length]; for (int i = startPos; i < bytes.length; ++i) { byte b = bytes[i]; if (b == escapeChar) { int c = Character.digit((char) bytes[++i], 16); c *= 16; c += Character.digit((char) bytes[++i], 16); result[pos] = (byte) c; } else { if (b == '_') result[pos] = ' '; else result[pos] = b; } ++pos; } return new String(result, UTF8); } public static String decodeURI(String str) { return decode(str.getBytes(), (byte) '%', false); } public static String decodeIdentifier(String str) { return decode(str.getBytes(), (byte) '$', true); } /** * Escape any of the following characters: <>:"/\|?* with %nn. * * @param str a file name, not a full file path * @return original string or escaped file name if encoding is needed */ public static String encodeFilename2(String str) { return encodeFilename2(str, '%'); } private static String encodeFilename2(String str, char escapeChar) { // First calculate the length int originalLength = str.length(); int length = originalLength; for (int i = 0; i < originalLength; ++i) { char c = str.charAt(i); if (c < 128 && fileNameEncodeTable[(int) c] == -1) length += 2; } if (length == originalLength) return str; char[] result = new char[length]; int pos = 0; for (int i = 0; i < originalLength; ++i) { char c = str.charAt(i); int ic = c; if (c >= 128) { // Never escape any non-ASCII characters. Those should work. result[pos++] = c; } else { int ec = fileNameEncodeTable[ic]; if (ec >= 0) { result[pos++] = (char) ec; } else { result[pos++] = escapeChar; result[pos++] = Character.forDigit(ic >> 4, 16); result[pos++] = Character.forDigit(ic & 15, 16); } } } return new String(result); } static final int[] fileNameEncodeTable = new int[128]; // for UTF-16 non-bijection filenames static { for (int i = 0; i < fileNameEncodeTable.length; ++i) { if (i < 32) { // Control characters are all in need of escapes fileNameEncodeTable[i] = -1; } else { switch ((char) i) { // Denied characters in windows file names // https://msdn.microsoft.com/en-us/library/windows/desktop/aa365247(v=vs.85).aspx case '<': case '>': case ':': case '"': case '/': case '\\': case '|': case '?': case '*': fileNameEncodeTable[i] = -1; break; default: fileNameEncodeTable[i] = i; break; } } } } }