package org.simantics.scl.compiler.internal.parsing.utils; import java.io.BufferedInputStream; import java.io.FileInputStream; import java.io.IOException; import java.io.InputStreamReader; import java.nio.charset.Charset; import java.nio.charset.CharsetDecoder; import java.nio.charset.CodingErrorAction; public class LaxUTF8Reader extends InputStreamReader { private static final Charset UTF8 = Charset.forName("UTF-8"); private static final CharsetDecoder UTF8_DECODER = UTF8.newDecoder(); static { UTF8_DECODER.onMalformedInput(CodingErrorAction.REPLACE); UTF8_DECODER.onUnmappableCharacter(CodingErrorAction.REPLACE); UTF8_DECODER.replaceWith("\ufffd"); } /** * Skips possible BOM (ef bb bf) in the beginning of the stream. */ private static BufferedInputStream skipBOM(BufferedInputStream stream) throws IOException { stream.mark(4); if(stream.read() == 0xef) if(stream.read() == 0xbb) if(stream.read() == 0xbf) return stream; stream.reset(); return stream; } public LaxUTF8Reader(BufferedInputStream stream) throws IOException { super(skipBOM(stream), UTF8_DECODER); } public LaxUTF8Reader(String fileName) throws IOException { this(new BufferedInputStream(new FileInputStream(fileName))); } }