]> gerrit.simantics Code Review - simantics/platform.git/blobdiff - bundles/org.simantics.history/src/org/simantics/history/csv/URIs.java
Safer URI unescaping for CSVFormatter subscription CSV exporter
[simantics/platform.git] / bundles / org.simantics.history / src / org / simantics / history / csv / URIs.java
diff --git a/bundles/org.simantics.history/src/org/simantics/history/csv/URIs.java b/bundles/org.simantics.history/src/org/simantics/history/csv/URIs.java
new file mode 100644 (file)
index 0000000..57a819a
--- /dev/null
@@ -0,0 +1,103 @@
+/*******************************************************************************
+ * Copyright (c) 2017 Association for Decentralized Information Management in
+ * Industry THTH ry.
+ * All rights reserved. This program and the accompanying materials
+ * are made available under the terms of the Eclipse Public License v1.0
+ * which accompanies this distribution, and is available at
+ * http://www.eclipse.org/legal/epl-v10.html
+ *
+ * Contributors:
+ *     Semantum Oy - initial API and implementation
+ *******************************************************************************/
+package org.simantics.history.csv;
+
+/**
+ * @author Tuukka Lehtonen
+ * @since 1.30.0, 1.28.1
+ */
+class URIs {
+
+    public static String safeUnescape(String uri) {
+        try {
+            return unescape(uri);
+        } catch (IllegalArgumentException e) {
+            return uri;
+        }
+    }
+
+    public static String unescape(String uri) {
+        try {
+            if (!needsUnescaping(uri))
+                return uri;
+
+            int len = uri.length();
+            String unicode = uri;
+            char result[] = new char[len];
+            int in = 0;
+            int out = 0;
+            while (in < len) {
+                char inCh = unicode.charAt(in++);
+                if (inCh == '%' && in+1 < len) {
+                    char d1 = unicode.charAt(in);
+                    char d2 = unicode.charAt(in+1);
+                    if (d1 > 127 || d2 > 127)
+                        throw new IllegalArgumentException("Invalid hex digit escape sequence in " + uri + " at " + in);
+                    result[out++] = (char) (hexDecode(d1) * 16 | hexDecode(d2));
+                    in += 2;
+                } else {
+                    result[out++] = inCh;
+                }
+            }
+            return new String(result, 0, out);
+        } catch (IllegalArgumentException e) {
+            throw new IllegalArgumentException("Problem while unescaping string: " + uri, e);
+        } catch (IndexOutOfBoundsException ee) {
+            throw new IllegalArgumentException("Incomplete hex digit escape sequence in " + uri);
+        }
+    }
+
+    private static boolean needsUnescaping(String s) {
+        int l = s.length();
+        for (int i = -1; i < l;) {
+            i = s.indexOf('%', i+1);
+            if (i < 0)
+                break;
+            if (i+2 < l
+                    && isHexDigit(s.charAt(i+1))
+                    && isHexDigit(s.charAt(i+2)))
+                return true;
+        }
+        return false;
+    }
+
+    private static int hexDecode(char c) {
+        switch (c) {
+            case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
+                return ((c) & 255) - 'a' + 10;
+            case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
+                return c -  'A' + 10;
+            case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7': case '8': case '9':
+                return c -  '0';
+            default:
+                throw new IllegalArgumentException("Bad Hex escape character: " + ((c)&255) );
+        }
+    }
+
+    private static boolean isHexDigit(char c) {
+        return (c >= '0' && c <= '9') || (c >= 'A' && c <= 'F') || (c >= 'a' && c <= 'f');
+    }
+
+//    public static void main(String[] args) {
+//        System.out.println(unescape("%"));
+//        System.out.println(unescape("%.AI"));
+//        System.out.println(unescape("%6B"));
+//        System.out.println(unescape("%6g"));
+//        System.out.println(unescape("%g5"));
+//        System.out.println(unescape("%f5"));
+//        System.out.println(unescape("%A1"));
+//        System.out.println(unescape("%A"));
+//        System.out.println(unescape("%A."));
+//        System.out.println(unescape("%AI"));
+//    }
+
+}