/******************************************************************************* * Copyright (c) 2007, 2010 Association for Decentralized Information Management * in Industry THTH ry. * All rights reserved. This program and the accompanying materials * are made available under the terms of the Eclipse Public License v1.0 * which accompanies this distribution, and is available at * http://www.eclipse.org/legal/epl-v10.html * * Contributors: * VTT Technical Research Centre of Finland - initial API and implementation *******************************************************************************/ package org.simantics.databoard.util; import java.io.UnsupportedEncodingException; import java.nio.charset.Charset; /** * Simantics URI and identifier escape specification. * * @author Hannu Niemistö */ public final class URIUtil { static final Charset UTF8 = Charset.forName("UTF-8"); static final byte[] encodeTable = new byte[128]; static final byte[] encodeTable2 = new byte[128]; // for non-bijection filenames static { for (int i = 0; i < 128; ++i) { char c = (char) i; if (c == ' ') encodeTable[i] = '_'; else if (Character.isJavaIdentifierPart(c) && c != '_' && c != '$') { encodeTable[i] = (byte) i; } else encodeTable[i] = -1; } for (int i = 0; i < 128; ++i) { char c = (char) i; if (c == ' ' || c == '_' || c == '(' || c== ')') encodeTable2[i] = (byte) i; else if (c == '/') encodeTable2[i] = (byte) '-'; else if (c == ' ') encodeTable2[i] = (byte) '_'; else if (c == '-' || c == '.') encodeTable2[i] = (byte) i; else if (Character.isJavaIdentifierPart(c) && c != '_' && c != '$') { encodeTable2[i] = (byte) i; } else encodeTable2[i] = -1; } } public static byte[] encode(String str, byte escapeChar, boolean identifier) throws UnsupportedEncodingException { byte[] bytes = str.getBytes(UTF8); boolean prefixWithUnderscore = identifier && bytes.length > 0 && (bytes[0] == '_' || Character.isDigit(bytes[0])); // First calculate the length int length = bytes.length; for (byte b : bytes) { if (b < 0 || encodeTable[b] == -1) length += 2; } if (prefixWithUnderscore) length += 1; // Then encode if (length == bytes.length) { for (int i = 0; i < length; ++i) bytes[i] = encodeTable[bytes[i]]; return bytes; } else { byte[] result = new byte[length]; int pos = 0; if (prefixWithUnderscore) { result[pos++] = '_'; } for (byte b : bytes) { int ib = (int) b; if (ib >= 0) { byte eb = encodeTable[ib]; if (eb >= 0) { result[pos++] = eb; continue; } } else ib += 256; result[pos++] = escapeChar; result[pos++] = (byte) Character.forDigit(ib >> 4, 16); result[pos++] = (byte) Character.forDigit(ib & 15, 16); } return result; } } public static byte[] encodeFilename(String str, byte escapeChar, boolean identifier) throws UnsupportedEncodingException { byte[] bytes = str.getBytes(UTF8); boolean prefixWithUnderscore = identifier && bytes.length > 0 && (bytes[0] == '_' || Character.isDigit(bytes[0])); // First calculate the length int length = bytes.length; for (byte b : bytes) { if (b < 0 || encodeTable2[b] == -1) length += 2; } if (prefixWithUnderscore) length += 1; // Then encode if (length == bytes.length) { for (int i = 0; i < length; ++i) bytes[i] = encodeTable2[bytes[i]]; return bytes; } else { byte[] result = new byte[length]; int pos = 0; if (prefixWithUnderscore) { result[pos++] = '_'; } for (byte b : bytes) { int ib = (int) b; if (ib >= 0) { byte eb = encodeTable2[ib]; if (eb >= 0) { result[pos++] = eb; continue; } } else ib += 256; result[pos++] = escapeChar; result[pos++] = (byte) Character.forDigit(ib >> 4, 16); result[pos++] = (byte) Character.forDigit(ib & 15, 16); } return result; } } public static String encodeFilename(String str) { try { byte[] result = encodeFilename(str, (byte) '%', false); return new String(result, 0, result.length); } catch (UnsupportedEncodingException e) { // Should never happen when using UTF-8 throw new Error(e); } } public static String encodeURI(String str) { try { byte[] result = encode(str, (byte) '%', false); return new String(result, 0, result.length); } catch (UnsupportedEncodingException e) { // Should never happen when using UTF-8 throw new Error(e); } } public static String encodeIdentifier(String str) { try { byte[] result = encode(str, (byte) '$', true); return new String(result, 0, result.length); } catch (UnsupportedEncodingException e) { // Should never happen when using UTF-8 throw new Error(e); } } public static String decode(byte[] bytes, byte escapeChar, boolean identifier) { int length = 0; int startPos = 0; { int i = 0; // Skip '_' prefix if necessary if (identifier && bytes.length > 0 && bytes[0] == '_') { startPos = 1; i = 1; } for (; i < bytes.length; ++i) { byte b = bytes[i]; if (b == escapeChar) i += 2; ++length; } } int pos = 0; byte[] result = new byte[length]; for (int i = startPos; i < bytes.length; ++i) { byte b = bytes[i]; if (b == escapeChar) { int c = Character.digit((char) bytes[++i], 16); c *= 16; c += Character.digit((char) bytes[++i], 16); result[pos] = (byte) c; } else { if (b == '_') result[pos] = ' '; else result[pos] = b; } ++pos; } return new String(result, UTF8); } public static String decodeURI(String str) { return decode(str.getBytes(), (byte) '%', false); } public static String decodeIdentifier(String str) { return decode(str.getBytes(), (byte) '$', true); } }