X-Git-Url: https://gerrit.simantics.org/r/gitweb?a=blobdiff_plain;ds=sidebyside;f=bundles%2Forg.simantics.databoard%2Fsrc%2Forg%2Fsimantics%2Fdataboard%2Futil%2Fbinary%2FUTF8.java;h=aba0446aec647957a006aaf2360571be730cc8be;hb=refs%2Fchanges%2F38%2F238%2F2;hp=6f16677158fbeeb5086bd08c0e338b637944fdfd;hpb=24e2b34260f219f0d1644ca7a138894980e25b14;p=simantics%2Fplatform.git

diff --git a/bundles/org.simantics.databoard/src/org/simantics/databoard/util/binary/UTF8.java b/bundles/org.simantics.databoard/src/org/simantics/databoard/util/binary/UTF8.java
index 6f1667715..aba0446ae 100644
--- a/bundles/org.simantics.databoard/src/org/simantics/databoard/util/binary/UTF8.java
+++ b/bundles/org.simantics.databoard/src/org/simantics/databoard/util/binary/UTF8.java
@@ -1,236 +1,236 @@
-package org.simantics.databoard.util.binary;
-
-import java.io.DataInput;
-import java.io.DataOutput;
-import java.io.IOException;
-import java.io.UTFDataFormatException;
-import java.nio.charset.Charset;
-
-/**
- * Utils for handling Standard-UTF8 and <a href="http://download.oracle.com/javase/6/docs/api/java/io/DataInput.html">Modified-UTF8</a> Strings.<p>
- * 
- * The differences between standard UTF8 and Modified are the following:
- * <ul>
- * <li>The null byte <code>'&#92;u0000'</code> is encoded in 2-byte format
- *     rather than 1-byte, so that the encoded strings never have
- *     embedded nulls.
- * <li>Only the 1-byte, 2-byte, and 3-byte formats are used.
- * <li><a href="../lang/Character.html#unicode">Supplementary characters</a>
- *     are represented in the form of surrogate pairs.
- * </ul>
- * 
- */
-public class UTF8 {
-
-	public static final Charset CHARSET = Charset.forName("utf-8");
-	
-	/**
-	 * Get the number of bytes in an UTF-8 encoding of a string 
-	 * 
-	 * @param string
-	 * @return byte length
-	 */
-	public static int getUTF8EncodingByteLength(String string)
-	{
-		// TODO ?May not work properly properly on some characters that are encoded with 2 chars in UTF-16? 
-		
-		// Correct
-		//return string.getBytes(UTF8).length;
-		
-		// http://en.wikipedia.org/wiki/UTF-8
-		int result = 0;
-		int length = string.length();
-		for (int i=0; i<length; i++)
-		{
-			char c = string.charAt(i);
-			if (c>=0 && c<=0x7f) {
-				result += 1;
-			} else if (c>=0x80 && c<=0x07ff) {
-				result += 2;
-			} else if (c>=0xD800 && c<=0xDFFF) {
-				result += 1;
-			} else if (c>=0x800 && c<=0xffff) {
-				result += 3;
-			}
-			// Not really used as char is 16-bit
-			else if (c>=0x10000 && c<=0x10ffff) {
-				result += 4;
-			} else if (c>=0x110000 && c<=0x1FFFFF) {
-				result += 4;
-			} else {
-				// NOT IN RFC 3629
-				result += 5;
-			}
-		}
-		return result;				
-	}
-	
-
-	/**
-	 * Get the number of bytes in an Modified-UTF-8 encoding of a string 
-	 * 
-	 * @param str
-	 * @return byte length
-	 */
-	public static int getModifiedUTF8EncodingByteLength(String str)
-	{
-        int strlen = str.length();
-    	int utflen = 0;
-    	int c = 0;
-     
-        /* use charAt instead of copying String to char array */
-    	for (int i = 0; i < strlen; i++) {
-                c = str.charAt(i);
-    	    if ((c >= 0x0001) && (c <= 0x007F)) {
-    		utflen++;
-    	    } else if (c > 0x07FF) {
-    		utflen += 3;
-    	    } else {
-    		utflen += 2;
-    	    }
-    	}
-    	return utflen;
-	}
-    
-	/**
-	 * Write Modified-UTF8 to a stream.
-	 * 
-	 * @param out output stream 
-	 * @param str string
-	 * @throws IOException
-	 */
-    public static void writeModifiedUTF(DataOutput out, String str) throws IOException {
-    	// Copied from DataOutput
-        int strlen = str.length();
-        int c = 0;
-        
-        int i=0;
-        for (i=0; i<strlen; i++) {
-           c = str.charAt(i);
-           if (!((c >= 0x0001) && (c <= 0x007F))) break;
-           out.write(c);
-        }
-        
-        for (;i < strlen; i++){
-            c = str.charAt(i);
-            if ((c >= 0x0001) && (c <= 0x007F)) {
-            	out.write( c );
-            } else if (c > 0x07FF) {
-            	out.write(0xE0 | ((c >> 12) & 0x0F));
-            	out.write(0x80 | ((c >>  6) & 0x3F));
-            	out.write(0x80 | ((c >>  0) & 0x3F));
-            } else {
-            	out.write(0xC0 | ((c >>  6) & 0x1F));
-            	out.write(0x80 | ((c >>  0) & 0x3F));
-            }
-        }
-    }
-    
-    /**
-     * Read Modified-UTF8 from a stream
-     * @param in input
-     * @param utflen number of bytes
-     * @return string
-     * @throws IOException
-     */
-    public static String readModifiedUTF(DataInput in, int utflen)
-    throws IOException, UTFDataFormatException
-    {
-    	// Copied from DataInput
-        byte[] bytearr = null;
-        char[] chararr = null;
-
-        {
-            bytearr = new byte[utflen];
-            chararr = new char[utflen];
-        }
-
-        int c, char2, char3;
-        int count = 0;
-        int chararr_count=0;
-
-        in.readFully(bytearr, 0, utflen);
-
-        while (count < utflen) {
-            c = (int) bytearr[count] & 0xff;      
-            if (c > 127) break;
-            count++;
-            chararr[chararr_count++]=(char)c;
-        }
-
-        while (count < utflen) {
-            c = (int) bytearr[count] & 0xff;
-            switch (c >> 4) {
-                case 0: case 1: case 2: case 3: case 4: case 5: case 6: case 7:
-                    /* 0xxxxxxx*/
-                    count++;
-                    chararr[chararr_count++]=(char)c;
-                    break;
-                case 12: case 13:
-                    /* 110x xxxx   10xx xxxx*/
-                    count += 2;
-                    if (count > utflen)
-                        throw new UTFDataFormatException(
-                            "malformed input: partial character at end");
-                    char2 = (int) bytearr[count-1];
-                    if ((char2 & 0xC0) != 0x80)
-                        throw new UTFDataFormatException(
-                            "malformed input around byte " + count); 
-                    chararr[chararr_count++]=(char)(((c & 0x1F) << 6) | 
-                                                    (char2 & 0x3F));  
-                    break;
-                case 14:
-                    /* 1110 xxxx  10xx xxxx  10xx xxxx */
-                    count += 3;
-                    if (count > utflen)
-                        throw new UTFDataFormatException(
-                            "malformed input: partial character at end");
-                    char2 = (int) bytearr[count-2];
-                    char3 = (int) bytearr[count-1];
-                    if (((char2 & 0xC0) != 0x80) || ((char3 & 0xC0) != 0x80))
-                        throw new UTFDataFormatException(
-                            "malformed input around byte " + (count-1));
-                    chararr[chararr_count++]=(char)(((c     & 0x0F) << 12) |
-                                                    ((char2 & 0x3F) << 6)  |
-                                                    ((char3 & 0x3F) << 0));
-                    break;
-                default:
-                    /* 10xx xxxx,  1111 xxxx */
-                    throw new UTFDataFormatException(
-                        "malformed input around byte " + count);
-            }
-        }
-        // The number of chars produced may be less than utflen
-        return new String(chararr, 0, chararr_count);
-    }    
-    
-    /**
-     * Write Standard-UTF8 to a stream.
-     * 
-     * @param str
-     * @param out
-     * @throws IOException
-     */
-    public static void writeUTF(DataOutput out, String str)
-    throws IOException
-    {
-		byte[] bytes = str.getBytes(CHARSET);
-		out.write(bytes);
-    }
-    
-    /**
-     * Read Standard-UTF8 from a stream
-     * @param in input
-     * @param len number of bytes
-     * @return string
-     * @throws IOException
-     */
-    public static String readUTF(DataInput in, int len)
-    throws IOException
-    {
-		byte[] bytes = new byte[len];
-		in.readFully(bytes);
-		return new String(bytes, UTF8.CHARSET);
-    }
-	
-}
+package org.simantics.databoard.util.binary;
+
+import java.io.DataInput;
+import java.io.DataOutput;
+import java.io.IOException;
+import java.io.UTFDataFormatException;
+import java.nio.charset.Charset;
+
+/**
+ * Utils for handling Standard-UTF8 and <a href="http://download.oracle.com/javase/6/docs/api/java/io/DataInput.html">Modified-UTF8</a> Strings.<p>
+ * 
+ * The differences between standard UTF8 and Modified are the following:
+ * <ul>
+ * <li>The null byte <code>'&#92;u0000'</code> is encoded in 2-byte format
+ *     rather than 1-byte, so that the encoded strings never have
+ *     embedded nulls.
+ * <li>Only the 1-byte, 2-byte, and 3-byte formats are used.
+ * <li><a href="../lang/Character.html#unicode">Supplementary characters</a>
+ *     are represented in the form of surrogate pairs.
+ * </ul>
+ * 
+ */
+public class UTF8 {
+
+	public static final Charset CHARSET = Charset.forName("utf-8");
+	
+	/**
+	 * Get the number of bytes in an UTF-8 encoding of a string 
+	 * 
+	 * @param string
+	 * @return byte length
+	 */
+	public static int getUTF8EncodingByteLength(String string)
+	{
+		// TODO ?May not work properly properly on some characters that are encoded with 2 chars in UTF-16? 
+		
+		// Correct
+		//return string.getBytes(UTF8).length;
+		
+		// http://en.wikipedia.org/wiki/UTF-8
+		int result = 0;
+		int length = string.length();
+		for (int i=0; i<length; i++)
+		{
+			char c = string.charAt(i);
+			if (c>=0 && c<=0x7f) {
+				result += 1;
+			} else if (c>=0x80 && c<=0x07ff) {
+				result += 2;
+			} else if (c>=0xD800 && c<=0xDFFF) {
+				result += 1;
+			} else if (c>=0x800 && c<=0xffff) {
+				result += 3;
+			}
+			// Not really used as char is 16-bit
+			else if (c>=0x10000 && c<=0x10ffff) {
+				result += 4;
+			} else if (c>=0x110000 && c<=0x1FFFFF) {
+				result += 4;
+			} else {
+				// NOT IN RFC 3629
+				result += 5;
+			}
+		}
+		return result;				
+	}
+	
+
+	/**
+	 * Get the number of bytes in an Modified-UTF-8 encoding of a string 
+	 * 
+	 * @param str
+	 * @return byte length
+	 */
+	public static int getModifiedUTF8EncodingByteLength(String str)
+	{
+        int strlen = str.length();
+    	int utflen = 0;
+    	int c = 0;
+     
+        /* use charAt instead of copying String to char array */
+    	for (int i = 0; i < strlen; i++) {
+                c = str.charAt(i);
+    	    if ((c >= 0x0001) && (c <= 0x007F)) {
+    		utflen++;
+    	    } else if (c > 0x07FF) {
+    		utflen += 3;
+    	    } else {
+    		utflen += 2;
+    	    }
+    	}
+    	return utflen;
+	}
+    
+	/**
+	 * Write Modified-UTF8 to a stream.
+	 * 
+	 * @param out output stream 
+	 * @param str string
+	 * @throws IOException
+	 */
+    public static void writeModifiedUTF(DataOutput out, String str) throws IOException {
+    	// Copied from DataOutput
+        int strlen = str.length();
+        int c = 0;
+        
+        int i=0;
+        for (i=0; i<strlen; i++) {
+           c = str.charAt(i);
+           if (!((c >= 0x0001) && (c <= 0x007F))) break;
+           out.write(c);
+        }
+        
+        for (;i < strlen; i++){
+            c = str.charAt(i);
+            if ((c >= 0x0001) && (c <= 0x007F)) {
+            	out.write( c );
+            } else if (c > 0x07FF) {
+            	out.write(0xE0 | ((c >> 12) & 0x0F));
+            	out.write(0x80 | ((c >>  6) & 0x3F));
+            	out.write(0x80 | ((c >>  0) & 0x3F));
+            } else {
+            	out.write(0xC0 | ((c >>  6) & 0x1F));
+            	out.write(0x80 | ((c >>  0) & 0x3F));
+            }
+        }
+    }
+    
+    /**
+     * Read Modified-UTF8 from a stream
+     * @param in input
+     * @param utflen number of bytes
+     * @return string
+     * @throws IOException
+     */
+    public static String readModifiedUTF(DataInput in, int utflen)
+    throws IOException, UTFDataFormatException
+    {
+    	// Copied from DataInput
+        byte[] bytearr = null;
+        char[] chararr = null;
+
+        {
+            bytearr = new byte[utflen];
+            chararr = new char[utflen];
+        }
+
+        int c, char2, char3;
+        int count = 0;
+        int chararr_count=0;
+
+        in.readFully(bytearr, 0, utflen);
+
+        while (count < utflen) {
+            c = (int) bytearr[count] & 0xff;      
+            if (c > 127) break;
+            count++;
+            chararr[chararr_count++]=(char)c;
+        }
+
+        while (count < utflen) {
+            c = (int) bytearr[count] & 0xff;
+            switch (c >> 4) {
+                case 0: case 1: case 2: case 3: case 4: case 5: case 6: case 7:
+                    /* 0xxxxxxx*/
+                    count++;
+                    chararr[chararr_count++]=(char)c;
+                    break;
+                case 12: case 13:
+                    /* 110x xxxx   10xx xxxx*/
+                    count += 2;
+                    if (count > utflen)
+                        throw new UTFDataFormatException(
+                            "malformed input: partial character at end");
+                    char2 = (int) bytearr[count-1];
+                    if ((char2 & 0xC0) != 0x80)
+                        throw new UTFDataFormatException(
+                            "malformed input around byte " + count); 
+                    chararr[chararr_count++]=(char)(((c & 0x1F) << 6) | 
+                                                    (char2 & 0x3F));  
+                    break;
+                case 14:
+                    /* 1110 xxxx  10xx xxxx  10xx xxxx */
+                    count += 3;
+                    if (count > utflen)
+                        throw new UTFDataFormatException(
+                            "malformed input: partial character at end");
+                    char2 = (int) bytearr[count-2];
+                    char3 = (int) bytearr[count-1];
+                    if (((char2 & 0xC0) != 0x80) || ((char3 & 0xC0) != 0x80))
+                        throw new UTFDataFormatException(
+                            "malformed input around byte " + (count-1));
+                    chararr[chararr_count++]=(char)(((c     & 0x0F) << 12) |
+                                                    ((char2 & 0x3F) << 6)  |
+                                                    ((char3 & 0x3F) << 0));
+                    break;
+                default:
+                    /* 10xx xxxx,  1111 xxxx */
+                    throw new UTFDataFormatException(
+                        "malformed input around byte " + count);
+            }
+        }
+        // The number of chars produced may be less than utflen
+        return new String(chararr, 0, chararr_count);
+    }    
+    
+    /**
+     * Write Standard-UTF8 to a stream.
+     * 
+     * @param str
+     * @param out
+     * @throws IOException
+     */
+    public static void writeUTF(DataOutput out, String str)
+    throws IOException
+    {
+		byte[] bytes = str.getBytes(CHARSET);
+		out.write(bytes);
+    }
+    
+    /**
+     * Read Standard-UTF8 from a stream
+     * @param in input
+     * @param len number of bytes
+     * @return string
+     * @throws IOException
+     */
+    public static String readUTF(DataInput in, int len)
+    throws IOException
+    {
+		byte[] bytes = new byte[len];
+		in.readFully(bytes);
+		return new String(bytes, UTF8.CHARSET);
+    }
+	
+}