X-Git-Url: https://gerrit.simantics.org/r/gitweb?a=blobdiff_plain;f=bundles%2Forg.simantics.databoard%2Fsrc%2Forg%2Fsimantics%2Fdataboard%2Futil%2FURIUtil.java;h=0bfc3b1b12c52e543001fcbcf74d0d96bc7c4e7e;hb=c69064f9e740eb4abe8448a63d306a085e036e1c;hp=b67e29721b8590378719052a0d82b6f45f527046;hpb=969bd23cab98a79ca9101af33334000879fb60c5;p=simantics%2Fplatform.git diff --git a/bundles/org.simantics.databoard/src/org/simantics/databoard/util/URIUtil.java b/bundles/org.simantics.databoard/src/org/simantics/databoard/util/URIUtil.java index b67e29721..0bfc3b1b1 100644 --- a/bundles/org.simantics.databoard/src/org/simantics/databoard/util/URIUtil.java +++ b/bundles/org.simantics.databoard/src/org/simantics/databoard/util/URIUtil.java @@ -1,225 +1,292 @@ -/******************************************************************************* - * Copyright (c) 2007, 2010 Association for Decentralized Information Management - * in Industry THTH ry. - * All rights reserved. This program and the accompanying materials - * are made available under the terms of the Eclipse Public License v1.0 - * which accompanies this distribution, and is available at - * http://www.eclipse.org/legal/epl-v10.html - * - * Contributors: - * VTT Technical Research Centre of Finland - initial API and implementation - *******************************************************************************/ -package org.simantics.databoard.util; - -import java.io.UnsupportedEncodingException; -import java.nio.charset.Charset; - -/** - * Simantics URI and identifier escape specification. - * - * @author Hannu Niemistö - */ -public final class URIUtil { - - static final Charset UTF8 = Charset.forName("UTF-8"); - - static final byte[] encodeTable = new byte[128]; - static final byte[] encodeTable2 = new byte[128]; // for non-bijection filenames - - static { - for (int i = 0; i < 128; ++i) { - char c = (char) i; - if (c == ' ') - encodeTable[i] = '_'; - - else if (Character.isJavaIdentifierPart(c) && c != '_' && c != '$') { - encodeTable[i] = (byte) i; - } else - encodeTable[i] = -1; - } - - for (int i = 0; i < 128; ++i) { - char c = (char) i; - if (c == ' ' || c == '_' || c == '(' || c== ')') - encodeTable2[i] = (byte) i; - else if (c == '/') - encodeTable2[i] = (byte) '-'; - else if (c == ' ') - encodeTable2[i] = (byte) '_'; - else if (c == '-' || c == '.') - encodeTable2[i] = (byte) i; - else if (Character.isJavaIdentifierPart(c) && c != '_' && c != '$') { - encodeTable2[i] = (byte) i; - } else - encodeTable2[i] = -1; - } - - } - - public static byte[] encode(String str, byte escapeChar, boolean identifier) throws UnsupportedEncodingException { - byte[] bytes = str.getBytes(UTF8); - - boolean prefixWithUnderscore = identifier && bytes.length > 0 && (bytes[0] == '_' || Character.isDigit(bytes[0])); - - // First calculate the length - int length = bytes.length; - for (byte b : bytes) { - if (b < 0 || encodeTable[b] == -1) - length += 2; - } - if (prefixWithUnderscore) - length += 1; - - // Then encode - if (length == bytes.length) { - for (int i = 0; i < length; ++i) - bytes[i] = encodeTable[bytes[i]]; - return bytes; - } else { - byte[] result = new byte[length]; - int pos = 0; - if (prefixWithUnderscore) { - result[pos++] = '_'; - } - for (byte b : bytes) { - int ib = (int) b; - if (ib >= 0) { - byte eb = encodeTable[ib]; - if (eb >= 0) { - result[pos++] = eb; - continue; - } - } else - ib += 256; - - result[pos++] = escapeChar; - result[pos++] = (byte) Character.forDigit(ib >> 4, 16); - result[pos++] = (byte) Character.forDigit(ib & 15, 16); - } - return result; - } - } - - public static byte[] encodeFilename(String str, byte escapeChar, boolean identifier) throws UnsupportedEncodingException { - byte[] bytes = str.getBytes(UTF8); - - boolean prefixWithUnderscore = identifier && bytes.length > 0 && (bytes[0] == '_' || Character.isDigit(bytes[0])); - - // First calculate the length - int length = bytes.length; - for (byte b : bytes) { - if (b < 0 || encodeTable2[b] == -1) - length += 2; - } - if (prefixWithUnderscore) - length += 1; - - // Then encode - if (length == bytes.length) { - for (int i = 0; i < length; ++i) - bytes[i] = encodeTable2[bytes[i]]; - return bytes; - } else { - byte[] result = new byte[length]; - int pos = 0; - if (prefixWithUnderscore) { - result[pos++] = '_'; - } - for (byte b : bytes) { - int ib = (int) b; - if (ib >= 0) { - byte eb = encodeTable2[ib]; - if (eb >= 0) { - result[pos++] = eb; - continue; - } - } else - ib += 256; - - result[pos++] = escapeChar; - result[pos++] = (byte) Character.forDigit(ib >> 4, 16); - result[pos++] = (byte) Character.forDigit(ib & 15, 16); - } - return result; - } - } - - public static String encodeFilename(String str) { - try { - byte[] result = encodeFilename(str, (byte) '%', false); - return new String(result, 0, result.length); - } catch (UnsupportedEncodingException e) { - // Should never happen when using UTF-8 - throw new Error(e); - } - - } - - public static String encodeURI(String str) { - try { - byte[] result = encode(str, (byte) '%', false); - return new String(result, 0, result.length); - } catch (UnsupportedEncodingException e) { - // Should never happen when using UTF-8 - throw new Error(e); - } - - } - - public static String encodeIdentifier(String str) { - try { - byte[] result = encode(str, (byte) '$', true); - return new String(result, 0, result.length); - } catch (UnsupportedEncodingException e) { - // Should never happen when using UTF-8 - throw new Error(e); - } - - } - - public static String decode(byte[] bytes, byte escapeChar, boolean identifier) { - int length = 0; - int startPos = 0; - { - int i = 0; - // Skip '_' prefix if necessary - if (identifier && bytes.length > 0 && bytes[0] == '_') { - startPos = 1; - i = 1; - } - for (; i < bytes.length; ++i) { - byte b = bytes[i]; - if (b == escapeChar) - i += 2; - ++length; - } - } - int pos = 0; - byte[] result = new byte[length]; - for (int i = startPos; i < bytes.length; ++i) { - byte b = bytes[i]; - if (b == escapeChar) { - int c = Character.digit((char) bytes[++i], 16); - c *= 16; - c += Character.digit((char) bytes[++i], 16); - result[pos] = (byte) c; - } else { - if (b == '_') - result[pos] = ' '; - else - result[pos] = b; - } - ++pos; - } - return new String(result, UTF8); - } - - public static String decodeURI(String str) { - return decode(str.getBytes(), (byte) '%', false); - } - - public static String decodeIdentifier(String str) { - return decode(str.getBytes(), (byte) '$', true); - } - -} +/******************************************************************************* + * Copyright (c) 2007, 2010 Association for Decentralized Information Management + * in Industry THTH ry. + * All rights reserved. This program and the accompanying materials + * are made available under the terms of the Eclipse Public License v1.0 + * which accompanies this distribution, and is available at + * http://www.eclipse.org/legal/epl-v10.html + * + * Contributors: + * VTT Technical Research Centre of Finland - initial API and implementation + *******************************************************************************/ +package org.simantics.databoard.util; + +import java.io.UnsupportedEncodingException; +import java.nio.charset.Charset; + +/** + * Simantics URI and identifier escape specification. + * + * @author Hannu Niemistö + */ +public final class URIUtil { + + static final Charset UTF8 = Charset.forName("UTF-8"); + + static final byte[] encodeTable = new byte[128]; + static final byte[] encodeTable2 = new byte[128]; // for non-bijection filenames + + static { + for (int i = 0; i < 128; ++i) { + char c = (char) i; + if (c == ' ') + encodeTable[i] = '_'; + + else if (Character.isJavaIdentifierPart(c) && c != '_' && c != '$') { + encodeTable[i] = (byte) i; + } else + encodeTable[i] = -1; + } + + for (int i = 0; i < 128; ++i) { + char c = (char) i; + if (c == ' ' || c == '_' || c == '(' || c== ')') + encodeTable2[i] = (byte) i; + else if (c == '/') + encodeTable2[i] = (byte) '-'; + else if (c == ' ') + encodeTable2[i] = (byte) '_'; + else if (c == '-' || c == '.') + encodeTable2[i] = (byte) i; + else if (Character.isJavaIdentifierPart(c) && c != '_' && c != '$') { + encodeTable2[i] = (byte) i; + } else + encodeTable2[i] = -1; + } + + } + + public static byte[] encode(String str, byte escapeChar, boolean identifier) throws UnsupportedEncodingException { + byte[] bytes = str.getBytes(UTF8); + + boolean prefixWithUnderscore = identifier && bytes.length > 0 && (bytes[0] == '_' || Character.isDigit(bytes[0])); + + // First calculate the length + int length = bytes.length; + for (byte b : bytes) { + if (b < 0 || encodeTable[b] == -1) + length += 2; + } + if (prefixWithUnderscore) + length += 1; + + // Then encode + if (length == bytes.length) { + for (int i = 0; i < length; ++i) + bytes[i] = encodeTable[bytes[i]]; + return bytes; + } else { + byte[] result = new byte[length]; + int pos = 0; + if (prefixWithUnderscore) { + result[pos++] = '_'; + } + for (byte b : bytes) { + int ib = (int) b; + if (ib >= 0) { + byte eb = encodeTable[ib]; + if (eb >= 0) { + result[pos++] = eb; + continue; + } + } else + ib += 256; + + result[pos++] = escapeChar; + result[pos++] = (byte) Character.forDigit(ib >> 4, 16); + result[pos++] = (byte) Character.forDigit(ib & 15, 16); + } + return result; + } + } + + public static byte[] encodeFilename(String str, byte escapeChar, boolean identifier) throws UnsupportedEncodingException { + byte[] bytes = str.getBytes(UTF8); + + boolean prefixWithUnderscore = identifier && bytes.length > 0 && (bytes[0] == '_' || Character.isDigit(bytes[0])); + + // First calculate the length + int length = bytes.length; + for (byte b : bytes) { + if (b < 0 || encodeTable2[b] == -1) + length += 2; + } + if (prefixWithUnderscore) + length += 1; + + // Then encode + if (length == bytes.length) { + for (int i = 0; i < length; ++i) + bytes[i] = encodeTable2[bytes[i]]; + return bytes; + } else { + byte[] result = new byte[length]; + int pos = 0; + if (prefixWithUnderscore) { + result[pos++] = '_'; + } + for (byte b : bytes) { + int ib = (int) b; + if (ib >= 0) { + byte eb = encodeTable2[ib]; + if (eb >= 0) { + result[pos++] = eb; + continue; + } + } else + ib += 256; + + result[pos++] = escapeChar; + result[pos++] = (byte) Character.forDigit(ib >> 4, 16); + result[pos++] = (byte) Character.forDigit(ib & 15, 16); + } + return result; + } + } + + public static String encodeFilename(String str) { + try { + byte[] result = encodeFilename(str, (byte) '%', false); + return new String(result, 0, result.length); + } catch (UnsupportedEncodingException e) { + // Should never happen when using UTF-8 + throw new Error(e); + } + + } + + public static String encodeURI(String str) { + try { + byte[] result = encode(str, (byte) '%', false); + return new String(result, 0, result.length); + } catch (UnsupportedEncodingException e) { + // Should never happen when using UTF-8 + throw new Error(e); + } + + } + + public static String encodeIdentifier(String str) { + try { + byte[] result = encode(str, (byte) '$', true); + return new String(result, 0, result.length); + } catch (UnsupportedEncodingException e) { + // Should never happen when using UTF-8 + throw new Error(e); + } + + } + + public static String decode(byte[] bytes, byte escapeChar, boolean identifier) { + int length = 0; + int startPos = 0; + { + int i = 0; + // Skip '_' prefix if necessary + if (identifier && bytes.length > 0 && bytes[0] == '_') { + startPos = 1; + i = 1; + } + for (; i < bytes.length; ++i) { + byte b = bytes[i]; + if (b == escapeChar) + i += 2; + ++length; + } + } + int pos = 0; + byte[] result = new byte[length]; + for (int i = startPos; i < bytes.length; ++i) { + byte b = bytes[i]; + if (b == escapeChar) { + int c = Character.digit((char) bytes[++i], 16); + c *= 16; + c += Character.digit((char) bytes[++i], 16); + result[pos] = (byte) c; + } else { + if (b == '_') + result[pos] = ' '; + else + result[pos] = b; + } + ++pos; + } + return new String(result, UTF8); + } + + public static String decodeURI(String str) { + return decode(str.getBytes(), (byte) '%', false); + } + + public static String decodeIdentifier(String str) { + return decode(str.getBytes(), (byte) '$', true); + } + + /** + * Escape any of the following characters: <>:"/\|?* with %nn. + * + * @param str a file name, not a full file path + * @return original string or escaped file name if encoding is needed + */ + public static String encodeFilename2(String str) { + return encodeFilename2(str, '%'); + } + + private static String encodeFilename2(String str, char escapeChar) { + // First calculate the length + int originalLength = str.length(); + int length = originalLength; + for (int i = 0; i < originalLength; ++i) { + char c = str.charAt(i); + if (c < 128 && fileNameEncodeTable[(int) c] == -1) + length += 2; + } + + if (length == originalLength) + return str; + + char[] result = new char[length]; + int pos = 0; + for (int i = 0; i < originalLength; ++i) { + char c = str.charAt(i); + int ic = c; + if (c >= 128) { + // Never escape any non-ASCII characters. Those should work. + result[pos++] = c; + } else { + int ec = fileNameEncodeTable[ic]; + if (ec >= 0) { + result[pos++] = (char) ec; + } else { + result[pos++] = escapeChar; + result[pos++] = Character.forDigit(ic >> 4, 16); + result[pos++] = Character.forDigit(ic & 15, 16); + } + } + } + return new String(result); + } + + static final int[] fileNameEncodeTable = new int[128]; // for UTF-16 non-bijection filenames + + static { + for (int i = 0; i < fileNameEncodeTable.length; ++i) { + if (i < 32) { + // Control characters are all in need of escapes + fileNameEncodeTable[i] = -1; + } else { + switch ((char) i) { + // Denied characters in windows file names + // https://msdn.microsoft.com/en-us/library/windows/desktop/aa365247(v=vs.85).aspx + case '<': case '>': case ':': case '"': case '/': case '\\': case '|': case '?': case '*': + fileNameEncodeTable[i] = -1; + break; + default: + fileNameEncodeTable[i] = i; + break; + } + } + } + } + +}