]> gerrit.simantics Code Review - simantics/platform.git/blobdiff - bundles/org.simantics.scl.compiler/src/org/simantics/scl/compiler/markdown/internal/Scanner.java
Migrated source code from Simantics SVN
[simantics/platform.git] / bundles / org.simantics.scl.compiler / src / org / simantics / scl / compiler / markdown / internal / Scanner.java
diff --git a/bundles/org.simantics.scl.compiler/src/org/simantics/scl/compiler/markdown/internal/Scanner.java b/bundles/org.simantics.scl.compiler/src/org/simantics/scl/compiler/markdown/internal/Scanner.java
new file mode 100644 (file)
index 0000000..437d80c
--- /dev/null
@@ -0,0 +1,594 @@
+package org.simantics.scl.compiler.markdown.internal;
+
+import gnu.trove.set.hash.THashSet;
+
+import org.simantics.scl.compiler.markdown.inlines.Subject;
+
+public class Scanner {
+
+    public int level;
+    public int matched;
+    public char bulletChar;
+    
+    public static boolean isCloseCodeFence(StringBuilder line, int offset, char fenceChar, int fenceLength) {
+        int matched = 0;
+        while(line.charAt(offset) == fenceChar) {
+            ++offset;
+            ++matched;
+        }
+        if(matched < fenceLength)
+            return false;
+        while(true) {
+            char c = line.charAt(offset++);
+            if(c == '\n')
+                return true;
+            else if(c != ' ')
+                return false;
+        }
+    }
+    
+    public static boolean isSetextHeaderLine(StringBuilder line, int offset, char headerLineChar) {
+        char c;
+        while((c = line.charAt(offset)) == headerLineChar)
+            ++offset;
+        while(c == ' ') {
+            ++offset;
+            c = line.charAt(offset);
+        }
+        return c == '\n';
+    }
+
+    public static boolean isHRule(StringBuilder line, int offset, char hrChar) {
+        char c;
+        int count = 0;
+        while((c = line.charAt(offset)) != '\n') {
+            if(c == hrChar)
+                ++count;
+            else if(c != ' ')
+                return false;
+            ++offset;
+        }
+        return count >= 3;
+    }
+
+    public static boolean isHtmlBlockTag(StringBuilder line, int offset) {
+        if(line.charAt(offset) != '<')
+            return false;
+        ++offset;
+        char c = line.charAt(offset);
+        
+        // HTML comment, processing instruction, CDATA or entity definition
+        if(c == '!' || c == '?')
+            return true;
+
+        // Ending tag
+        if(c == '/') {
+            ++offset;
+            offset = scanTag(line, offset);
+            if(offset == -1)
+                return false;
+            c = line.charAt(offset);
+            return c == ' ' || c == '>';
+        }
+        
+        // Beginning tag
+        offset = scanTag(line, offset);
+        if(offset == -1)
+            return false;
+        c = line.charAt(offset);
+        return c == ' ' || c == '/' || c == '>';
+    }
+    
+    public static int scanTag(StringBuilder line, int offset) {
+        StringBuilder b = new StringBuilder();
+        while(offset < line.length()) {
+            char c = line.charAt(offset);
+            if( (c >= 'a' && c <= 'z') || (c >= '0' && c <= '9') )
+                b.append(c);
+            else if( c >= 'A' && c <= 'Z' )
+                b.append(Character.toLowerCase(c));
+            else
+                break;
+            ++offset;
+        }
+        if(HTML_BLOCK_TAG_SET.contains(b.toString()))
+            return offset;
+        else
+            return -1;
+    }
+    
+    private static final String[] HTML_BLOCK_TAGS = new String[] {
+        "article", "header", "aside", "hgroup", "blockquote", "hr", "iframe", "body", "li", "map", "button", "object", "canvas", "ol",
+        "caption", "output", "col", "p", "colgroup", "pre", "dd", "progress", "div", "section", "dl", "table", "td", "dt", "tbody",
+        "embed", "textarea", "fieldset", "tfoot", "figcaption", "th", "figure", "thead", "footer", "tr", "form", "ul", "h1", "h2", "h3",
+        "h4", "h5", "h6", "video", "script", "style"
+    };
+    private static final THashSet<String> HTML_BLOCK_TAG_SET = new THashSet<String>();
+    static {
+        for(String tag : HTML_BLOCK_TAGS)
+            HTML_BLOCK_TAG_SET.add(tag);
+    }
+    
+    public boolean isAtxHeaderStart(StringBuilder line, int offset) {
+        int matched = 0;
+        char c;
+        while((c = line.charAt(offset)) == '#') {
+            ++offset;
+            ++matched;
+        }
+        if(matched == 0 || matched > 6)
+            return false;
+        this.level = matched;
+        if(c != '\n') {
+            if(c != ' ') 
+                return false;
+            while(c == ' ') {
+                ++offset;
+                ++matched;
+                c = line.charAt(offset);
+            }
+        }
+        this.matched = matched;
+        return true;
+    }
+
+    public boolean isOpenCodeFence(StringBuilder line, int offset, char fenceChar) {
+        int matched = 0;
+        char c;
+        while((c = line.charAt(offset)) == fenceChar) {
+            ++offset;
+            ++matched;
+        }
+        if(matched < 3)
+            return false;
+        this.level = matched;
+        while(line.charAt(offset) == ' ') {
+            ++offset;
+            ++matched;
+        }
+        this.matched = matched;
+        while((c = line.charAt(offset)) != '\n') {
+            if(c == fenceChar)
+                return false;
+            ++offset;
+        }
+        return true;
+    }
+
+    public boolean isListMarker(StringBuilder line, int offset) {
+        int pos = offset;
+        char c;
+        while(Character.isDigit(c = line.charAt(pos)))
+            ++pos;
+        if(c != '.' && c != ')')
+            return false;
+        ++pos;
+        char c2;
+        if((c2=line.charAt(pos)) != ' ' && c2 != '\n')
+            return false;
+        this.matched = pos-offset;
+        this.level = Integer.parseInt(line.substring(offset, pos-1));
+        this.bulletChar = c;
+        return true;
+    }
+    
+    private static final String CDATA = "CDATA[";
+    
+    public static int scanHtmlTag(StringBuilder input, int offset) {
+        char c;
+        c = input.charAt(offset++);        
+        
+        // Comment, declaration or cdata 
+        if(c == '!') {
+            if(offset == input.length())
+                return -1;
+            c = input.charAt(offset++);
+            
+            // Comment
+            if(c == '-') {
+                if(offset+4 > input.length())
+                    return -1;
+                if(input.charAt(offset++) != '-')
+                    return -1;
+                c = input.charAt(offset++);
+                if(c == '-') {
+                    c = input.charAt(offset++);
+                    if(c == '-')
+                        return -1;
+                }
+                if(c == '>')
+                    return -1;
+                
+                while(offset+3 <= input.length()) {
+                    c = input.charAt(offset++);
+                    if(c == '-') {
+                        c = input.charAt(offset++);
+                        if(c == '-') {
+                            c = input.charAt(offset++);
+                            if(c == '>')
+                                return offset;
+                            else
+                                return -1;
+                        }
+                    }
+                }
+                return -1;
+            }
+            
+            // Cdata
+            else if(c == '[') {
+                for(int i=0;i<CDATA.length();++i) {
+                    c = input.charAt(offset++);
+                    if(CDATA.charAt(i) != c)
+                        return -1;
+                }
+                while(offset+3 <= input.length()) {
+                    c = input.charAt(offset++);
+                    if(c == ']') {
+                        c = input.charAt(offset++);
+                        while(c == ']') {
+                            c = input.charAt(offset++);
+                            if(c == '>')
+                                return offset;
+                        }
+                    }
+                }
+                return -1;
+            }
+            
+            // Declaration
+            else if(c >= 'A' && c <= 'Z') {
+                while( offset < input.length() && (c=input.charAt(offset++)) >= 'A' && c <= 'Z' );
+                if(c != ' ' && c != '\n')
+                    return -1;
+                while( offset < input.length() && (c=input.charAt(offset++)) != '>' );
+                if(c != '>')
+                    return -1;
+                else
+                    return offset;
+            }
+            else
+                return -1;
+        }
+        
+        // Processing instruction
+        else if(c == '?') {
+            while(offset < input.length()) {
+                c = input.charAt(offset++);
+                if(c == '?') {
+                    c = input.charAt(offset++);
+                    if(c == '>')
+                        return offset;
+                }
+            }
+            return -1;
+        }
+        
+        // Close tag
+        else if(c == '/') {
+            offset = scanTagName(input, offset);
+            if(offset == -1)
+                return -1;
+            offset = scanWhitespace(input, offset);
+            if(offset == -1)
+                return -1;
+            if(input.charAt(offset) == '>')
+                return offset+1;
+            else
+                return -1;
+        }
+        
+        // Open tag
+        else {
+            --offset;
+            offset = scanTagName(input, offset);
+            if(offset == -1)
+                return -1;
+            while(true) {
+                if((c=input.charAt(offset)) != ' ' && c != '\n') {
+                    if(c == '>')
+                        return offset+1;
+                    if(c == '/' && input.charAt(offset+1)=='>')
+                        return offset+2;
+                    return -1;
+                }
+                offset = scanWhitespace(input, offset);
+                if(offset == -1)
+                    return -1;
+                c = input.charAt(offset);
+                if(c == '>')
+                    return offset+1;
+                if(c == '/' && input.charAt(offset+1)=='>')
+                    return offset+2;
+                offset = scanAttributeName(input, offset);
+                if(offset == -1)
+                    return -1;
+                offset = scanWhitespace(input, offset);
+                if(offset == -1)
+                    return -1;
+                if((c=input.charAt(offset)) == '=') {
+                    ++offset;
+                    offset = scanWhitespace(input, offset);
+                    if(offset == -1)
+                        return -1;
+                    
+                    c = input.charAt(offset);
+                    if(c == '"') {
+                        ++offset;
+                        while(true) {
+                            if(offset == input.length())
+                                return -1;
+                            c=input.charAt(offset++);
+                            if(c == '"')
+                                break;
+                        }
+                    }
+                    else if(c == '\'') {
+                        ++offset;
+                        while(true) {
+                            if(offset == input.length())
+                                return -1;
+                            c=input.charAt(offset++);
+                            if(c == '\'')
+                                break;
+                        }
+                    }
+                    else {
+                        while(true) {
+                            if(offset == input.length())
+                                return -1;
+                            c=input.charAt(offset++);
+                            if(c==' ' || c=='\n' || c=='"' || c=='\'' || c=='=' || c=='<' || c=='>' || c=='`') {
+                                --offset;
+                                break;
+                            }
+                        }
+                    }
+                }
+                else {
+                    if(c == '>')
+                        return offset+1;
+                    --offset;
+                    c = input.charAt(offset);
+                    if(c != ' ' && c != '\n' && c != '>')
+                        return -1;
+                }
+            }
+        }
+    }
+    
+    private static int scanTagName(StringBuilder input, int offset) {
+        if(offset >= input.length())
+            return -1;
+        char c = input.charAt(offset++);
+        if( !((c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z')) )
+            return -1;
+        while(offset < input.length()) {
+            c = input.charAt(offset++);
+            if( !((c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z') || (c >= '0' && c <= '9')) )
+                return offset-1;
+        }
+        return -1;
+    }
+    
+    private static int scanAttributeName(StringBuilder input, int offset) {
+        if(offset >= input.length())
+            return -1;
+        char c = input.charAt(offset++);
+        if( !((c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z') || c == '_' || c == ':') )
+            return -1;
+        while(offset < input.length()) {
+            c = input.charAt(offset++);
+            if( !((c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z') || (c >= '0' && c <= '9')
+                    || c == '_' || c == ':' || c == '.' || c == '-') )
+                return offset-1;
+        }
+        return -1;
+    }
+    
+    public static int scanWhitespace(StringBuilder input, int offset) {
+        while(offset < input.length()) {
+            char c = input.charAt(offset);
+            if(c != ' ' && c != '\n')
+                return offset;
+            ++offset;
+        }
+        return -1;
+    }
+
+    public static int scanUri(StringBuilder input, int pos) {
+        int startPos = pos;
+        char c;
+        while(true) {
+            if(pos == input.length())
+                return -1;
+            c = input.charAt(pos++);
+            if(c < 0 || c >= 128 || !IS_SCHEME_CHAR[(int)c])
+                break;
+        }
+        if(c != ':' || !SCHEME_SET.contains(input.substring(startPos, pos-1).toLowerCase()))
+            return -1;
+        while(pos < input.length() && (c = input.charAt(pos)) != '>' && c != '<' && !(c <= 0x20 && c >= 0))
+            ++pos;
+        if(c == '>')
+            return pos+1;
+        else
+            return -1;
+    }
+    
+    private static final String[] SCHEMES = new String[] {
+        "coap", "doi", "javascript", "aaa", "aaas", "about", "acap", "cap", "cid", "crid", "data", "dav", "dict", "dns", "file", "ftp", "geo", "go",
+        "gopher", "h323", "http", "https", "iax", "icap", "im", "imap", "info", "ipp", "iris", "iris.beep", "iris.xpc", "iris.xpcs", "iris.lwz",
+        "ldap", "mailto", "mid", "msrp", "msrps", "mtqp", "mupdate", "news", "nfs", "ni", "nih", "nntp", "opaquelocktoken", "pop", "pres", "rtsp",
+        "service", "session", "shttp", "sieve", "sip", "sips", "sms", "snmp", "soap.beep", "soap.beeps", "tag", "tel", "telnet", "tftp", "thismessage",
+        "tn3270", "tip", "tv", "urn", "vemmi", "ws", "wss", "xcon", "xcon-userid", "xmlrpc.beep", "xmlrpc.beeps", "xmpp", "z39.50r", "z39.50s", "adiumxtra",
+        "afp", "afs", "aim", "apt", "attachment", "aw", "beshare", "bitcoin", "bolo", "callto", "chrome", "chrome-extension", "com-eventbrite-attendee",
+        "content", "cvs", "dlna-playsingle", "dlna-playcontainer", "dtn", "dvb", "ed2k", "facetime", "feed", "finger", "fish", "gg", "git", "gizmoproject",
+        "gtalk", "hcp", "icon", "ipn", "irc", "irc6", "ircs", "itms", "jar", "jms", "keyparc", "lastfm", "ldaps", "magnet", "maps", "market", "message", "mms",
+        "ms-help", "msnim", "mumble", "mvn", "notes", "oid", "palm", "paparazzi", "platform", "proxy", "psyc", "query", "res", "resource", "rmi", "rsync",
+        "rtmp", "secondlife", "sftp", "sgn", "skype", "smb", "soldat", "spotify", "ssh", "steam", "svn", "teamspeak", "things", "udp", "unreal", "ut2004",
+        "ventrilo", "view-source", "webcal", "wtai", "wyciwyg", "xfire", "xri", "ymsgr"  
+    };
+    private static final THashSet<String> SCHEME_SET = new THashSet<String>();
+    private static final boolean[] IS_SCHEME_CHAR = new boolean[128];
+    static {
+        for(String scheme : SCHEMES) {
+            SCHEME_SET.add(scheme);
+            for(int i=0;i<scheme.length();++i) {
+                char c = scheme.charAt(i);
+                IS_SCHEME_CHAR[(int)c] = true;
+                IS_SCHEME_CHAR[(int)Character.toUpperCase(c)] = true;
+            }
+        }
+    }
+    
+    public static int scanLinkLabel(StringBuilder input, int offset) {
+        if(offset == input.length() || input.charAt(offset++) != '[')
+            return -1;
+        int maxPos = Math.min(input.length(), offset+1000);
+        while(offset < maxPos) {
+            char c = input.charAt(offset++);
+            if(c == ']')
+                return offset;
+            if(c == '[')
+                return -1;
+            if(c == '\\' && offset < maxPos) {
+                c = input.charAt(offset);
+                if(Subject.getCharType(c) == 2)
+                    ++offset;
+            }
+        }
+        return -1;
+    }
+    
+    public static int scanLinkUrl(StringBuilder input, int offset) {        
+        if(offset == input.length())
+            return offset;
+        if(input.charAt(offset) == '<') {
+            ++offset;
+            while(offset < input.length()) {
+                char c = input.charAt(offset++);
+                if(c == '>') {
+                    return offset;
+                }
+                else if(c == '\\') {
+                    if(Subject.getCharType(c) == 2)
+                        ++offset;
+                }
+                else if(c == '<' || c == '\n')
+                    return -1;
+            }
+            return -1;
+        }
+        else {
+            while(offset < input.length()) {
+                char c = input.charAt(offset++);
+                if(c == '\\') {
+                    if(Subject.getCharType(input.charAt(offset)) == 2)
+                        ++offset;
+                    else
+                        return offset - 1;
+                }
+                else if( c == '(' ) {
+                    int orgPos = offset - 1;
+                    while(true) {
+                        if(offset >= input.length())
+                            return orgPos;
+                        c = input.charAt(offset++);
+                        if(c == '\\') {
+                            if(Subject.getCharType(input.charAt(offset)) == 2)
+                                ++offset;
+                            else
+                                return orgPos;
+                        }
+                        else if(c == ')')
+                            break;
+                        else if( (c <= 0x20 && c >= 0) || c == '(' )
+                            return orgPos;
+                    }
+                }
+                else if( (c <= 0x20 && c >= 0) || c == ')' )
+                    return offset-1;
+            }
+            return offset;
+        }
+    }
+
+    public static int scanLinkTitle(StringBuilder input, int offset) {
+        if(offset == input.length())
+            return -1;
+        char c = input.charAt(offset++);
+        if(c == '(') {
+            while(offset < input.length()) {
+                c = input.charAt(offset++);
+                if(c == ')')
+                    return offset;      
+                if(c == '\\') {
+                    c = input.charAt(offset);
+                    if(c == ')' || c == '\\')
+                        ++offset;
+                }
+            }
+            return -1;
+        }
+        else if(c == '"') {
+            while(offset < input.length()) {
+                c = input.charAt(offset++);
+                if(c == '"')
+                    return offset;      
+                if(c == '\\') {
+                    c = input.charAt(offset);
+                    if(c == '"' || c == '\\')
+                        ++offset;
+                }
+            }
+            return -1;
+        }
+        if(c == '\'') {
+            while(offset < input.length()) {
+                c = input.charAt(offset++);
+                if(c == '\'')
+                    return offset;      
+                if(c == '\\') {
+                    c = input.charAt(offset);
+                    if(c == '\'' || c == '\\')
+                        ++offset;
+                }
+            }
+            return -1;
+        }
+        else
+            return -1;
+    }
+    
+    private static final CharacterSet EMAIL_START = new CharacterSet("a-zA-Z0-9.!#$%&'*+/=?^_`{|}~-");
+    private static final CharacterSet EMAIL_END_A = new CharacterSet("a-zA-Z0-9");
+    private static final CharacterSet EMAIL_END_B = new CharacterSet("a-zA-Z0-9-");
+
+    public static int scanEmail(StringBuilder input, int offset) {
+        int initialPos = offset;
+        char c = 0;
+        while(offset < input.length() && EMAIL_START.contains(c=input.charAt(offset++)) );
+        if( c != '@' || offset == initialPos )
+            return -1;
+        ++offset;
+        while(true) {
+            if(offset == input.length() || !EMAIL_END_A.contains(c=input.charAt(offset++)))
+                return -1;
+            int count = 1;
+            int oldC = c;
+            while(offset < input.length() && EMAIL_END_B.contains(c=input.charAt(offset++))) {
+                ++count;
+                if(count > 62)
+                    return -1;
+                oldC = c;
+            }
+            if(oldC=='-')
+                return -1;
+            if(c == '>')
+                return offset;
+            if(c != '.')
+                return -1;
+        }
+    }
+}