package org.simantics.scl.compiler.markdown.internal; import org.simantics.scl.compiler.markdown.inlines.Subject; import gnu.trove.set.hash.THashSet; public class Scanner { public int level; public int matched; public char bulletChar; public static boolean isCloseCodeFence(StringBuilder line, int offset, char fenceChar, int fenceLength) { int matched = 0; while(line.charAt(offset) == fenceChar) { ++offset; ++matched; } if(matched < fenceLength) return false; while(true) { char c = line.charAt(offset++); if(c == '\n') return true; else if(c != ' ') return false; } } public static boolean isSetextHeaderLine(StringBuilder line, int offset, char headerLineChar) { char c; while((c = line.charAt(offset)) == headerLineChar) ++offset; while(c == ' ') { ++offset; c = line.charAt(offset); } return c == '\n'; } public static boolean isHRule(StringBuilder line, int offset, char hrChar) { char c; int count = 0; while((c = line.charAt(offset)) != '\n') { if(c == hrChar) ++count; else if(c != ' ') return false; ++offset; } return count >= 3; } public static boolean isHtmlBlockTag(StringBuilder line, int offset) { if(line.charAt(offset) != '<') return false; ++offset; char c = line.charAt(offset); // HTML comment, processing instruction, CDATA or entity definition if(c == '!' || c == '?') return true; // Ending tag if(c == '/') { ++offset; offset = scanTag(line, offset); if(offset == -1) return false; c = line.charAt(offset); return c == ' ' || c == '>'; } // Beginning tag offset = scanTag(line, offset); if(offset == -1) return false; c = line.charAt(offset); return c == ' ' || c == '/' || c == '>'; } public static int scanTag(StringBuilder line, int offset) { StringBuilder b = new StringBuilder(); while(offset < line.length()) { char c = line.charAt(offset); if( (c >= 'a' && c <= 'z') || (c >= '0' && c <= '9') ) b.append(c); else if( c >= 'A' && c <= 'Z' ) b.append(Character.toLowerCase(c)); else break; ++offset; } if(HTML_BLOCK_TAG_SET.contains(b.toString())) return offset; else return -1; } private static final String[] HTML_BLOCK_TAGS = new String[] { "article", "header", "aside", "hgroup", "blockquote", "hr", "iframe", "body", "li", "map", "button", "object", "canvas", "ol", "caption", "output", "col", "p", "colgroup", "pre", "dd", "progress", "div", "section", "dl", "table", "td", "dt", "tbody", "embed", "textarea", "fieldset", "tfoot", "figcaption", "th", "figure", "thead", "footer", "tr", "form", "ul", "h1", "h2", "h3", "h4", "h5", "h6", "video", "script", "style" }; private static final THashSet HTML_BLOCK_TAG_SET = new THashSet(); static { for(String tag : HTML_BLOCK_TAGS) HTML_BLOCK_TAG_SET.add(tag); } public boolean isAtxHeaderStart(StringBuilder line, int offset) { int matched = 0; char c; while((c = line.charAt(offset)) == '#') { ++offset; ++matched; } if(matched == 0 || matched > 6) return false; this.level = matched; if(c != '\n') { if(c != ' ') return false; while(c == ' ') { ++offset; ++matched; c = line.charAt(offset); } } this.matched = matched; return true; } public boolean isOpenCodeFence(StringBuilder line, int offset, char fenceChar) { int matched = 0; char c; while((c = line.charAt(offset)) == fenceChar) { ++offset; ++matched; } if(matched < 3) return false; this.level = matched; while(line.charAt(offset) == ' ') { ++offset; ++matched; } this.matched = matched; while((c = line.charAt(offset)) != '\n') { if(c == fenceChar) return false; ++offset; } return true; } public boolean isListMarker(StringBuilder line, int offset) { int pos = offset; char c; while(Character.isDigit(c = line.charAt(pos))) ++pos; if(c != '.' && c != ')') return false; ++pos; char c2; if((c2=line.charAt(pos)) != ' ' && c2 != '\n') return false; this.matched = pos-offset; this.level = Integer.parseInt(line.substring(offset, pos-1)); this.bulletChar = c; return true; } private static final String CDATA = "CDATA["; public static int scanHtmlTag(StringBuilder input, int offset) { char c; c = input.charAt(offset++); // Comment, declaration or cdata if(c == '!') { if(offset == input.length()) return -1; c = input.charAt(offset++); // Comment if(c == '-') { if(offset+4 > input.length()) return -1; if(input.charAt(offset++) != '-') return -1; c = input.charAt(offset++); if(c == '-') { c = input.charAt(offset++); if(c == '-') return -1; } if(c == '>') return -1; while(offset+3 <= input.length()) { c = input.charAt(offset++); if(c == '-') { c = input.charAt(offset++); if(c == '-') { c = input.charAt(offset++); if(c == '>') return offset; else return -1; } } } return -1; } // Cdata else if(c == '[') { for(int i=0;i') return offset; } } } return -1; } // Declaration else if(c >= 'A' && c <= 'Z') { while( offset < input.length() && (c=input.charAt(offset++)) >= 'A' && c <= 'Z' ); if(c != ' ' && c != '\n') return -1; while( offset < input.length() && (c=input.charAt(offset++)) != '>' ); if(c != '>') return -1; else return offset; } else return -1; } // Processing instruction else if(c == '?') { while(offset < input.length()) { c = input.charAt(offset++); if(c == '?') { c = input.charAt(offset++); if(c == '>') return offset; } } return -1; } // Close tag else if(c == '/') { offset = scanTagName(input, offset); if(offset == -1) return -1; offset = scanWhitespace(input, offset); if(offset == -1) return -1; if(input.charAt(offset) == '>') return offset+1; else return -1; } // Open tag else { --offset; offset = scanTagName(input, offset); if(offset == -1) return -1; while(true) { if((c=input.charAt(offset)) != ' ' && c != '\n') { if(c == '>') return offset+1; if(c == '/' && input.charAt(offset+1)=='>') return offset+2; return -1; } offset = scanWhitespace(input, offset); if(offset == -1) return -1; c = input.charAt(offset); if(c == '>') return offset+1; if(c == '/' && input.charAt(offset+1)=='>') return offset+2; offset = scanAttributeName(input, offset); if(offset == -1) return -1; offset = scanWhitespace(input, offset); if(offset == -1) return -1; if((c=input.charAt(offset)) == '=') { ++offset; offset = scanWhitespace(input, offset); if(offset == -1) return -1; c = input.charAt(offset); if(c == '"') { ++offset; while(true) { if(offset == input.length()) return -1; c=input.charAt(offset++); if(c == '"') break; } } else if(c == '\'') { ++offset; while(true) { if(offset == input.length()) return -1; c=input.charAt(offset++); if(c == '\'') break; } } else { while(true) { if(offset == input.length()) return -1; c=input.charAt(offset++); if(c==' ' || c=='\n' || c=='"' || c=='\'' || c=='=' || c=='<' || c=='>' || c=='`') { --offset; break; } } } } else { if(c == '>') return offset+1; --offset; c = input.charAt(offset); if(c != ' ' && c != '\n' && c != '>') return -1; } } } } private static int scanTagName(StringBuilder input, int offset) { if(offset >= input.length()) return -1; char c = input.charAt(offset++); if( !((c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z')) ) return -1; while(offset < input.length()) { c = input.charAt(offset++); if( !((c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z') || (c >= '0' && c <= '9')) ) return offset-1; } return -1; } private static int scanAttributeName(StringBuilder input, int offset) { if(offset >= input.length()) return -1; char c = input.charAt(offset++); if( !((c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z') || c == '_' || c == ':') ) return -1; while(offset < input.length()) { c = input.charAt(offset++); if( !((c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z') || (c >= '0' && c <= '9') || c == '_' || c == ':' || c == '.' || c == '-') ) return offset-1; } return -1; } public static int scanWhitespace(StringBuilder input, int offset) { while(offset < input.length()) { char c = input.charAt(offset); if(c != ' ' && c != '\n') return offset; ++offset; } return -1; } public static int scanUri(StringBuilder input, int pos) { int startPos = pos; char c; while(true) { if(pos == input.length()) return -1; c = input.charAt(pos++); if(c < 0 || c >= 128 || !IS_SCHEME_CHAR[(int)c]) break; } if(c != ':' || !SCHEME_SET.contains(input.substring(startPos, pos-1).toLowerCase())) return -1; while(pos < input.length() && (c = input.charAt(pos)) != '>' && c != '<' && !(c <= 0x20 && c >= 0)) ++pos; if(c == '>') return pos+1; else return -1; } private static final String[] SCHEMES = new String[] { "coap", "doi", "javascript", "aaa", "aaas", "about", "acap", "cap", "cid", "crid", "data", "dav", "dict", "dns", "file", "ftp", "geo", "go", "gopher", "h323", "http", "https", "iax", "icap", "im", "imap", "info", "ipp", "iris", "iris.beep", "iris.xpc", "iris.xpcs", "iris.lwz", "ldap", "mailto", "mid", "msrp", "msrps", "mtqp", "mupdate", "news", "nfs", "ni", "nih", "nntp", "opaquelocktoken", "pop", "pres", "rtsp", "service", "session", "shttp", "sieve", "sip", "sips", "sms", "snmp", "soap.beep", "soap.beeps", "tag", "tel", "telnet", "tftp", "thismessage", "tn3270", "tip", "tv", "urn", "vemmi", "ws", "wss", "xcon", "xcon-userid", "xmlrpc.beep", "xmlrpc.beeps", "xmpp", "z39.50r", "z39.50s", "adiumxtra", "afp", "afs", "aim", "apt", "attachment", "aw", "beshare", "bitcoin", "bolo", "callto", "chrome", "chrome-extension", "com-eventbrite-attendee", "content", "cvs", "dlna-playsingle", "dlna-playcontainer", "dtn", "dvb", "ed2k", "facetime", "feed", "finger", "fish", "gg", "git", "gizmoproject", "gtalk", "hcp", "icon", "ipn", "irc", "irc6", "ircs", "itms", "jar", "jms", "keyparc", "lastfm", "ldaps", "magnet", "maps", "market", "message", "mms", "ms-help", "msnim", "mumble", "mvn", "notes", "oid", "palm", "paparazzi", "platform", "proxy", "psyc", "query", "res", "resource", "rmi", "rsync", "rtmp", "secondlife", "sftp", "sgn", "skype", "smb", "soldat", "spotify", "ssh", "steam", "svn", "teamspeak", "things", "udp", "unreal", "ut2004", "ventrilo", "view-source", "webcal", "wtai", "wyciwyg", "xfire", "xri", "ymsgr" }; private static final THashSet SCHEME_SET = new THashSet(); private static final boolean[] IS_SCHEME_CHAR = new boolean[128]; static { for(String scheme : SCHEMES) { SCHEME_SET.add(scheme); for(int i=0;i') { return offset; } else if(c == '\\') { if(Subject.getCharType(c) == 2) ++offset; } else if(c == '<' || c == '\n') return -1; } return -1; } else { while(offset < input.length()) { char c = input.charAt(offset++); if(c == '\\') { if(Subject.getCharType(input.charAt(offset)) == 2) ++offset; else return offset - 1; } else if( c == '(' ) { int orgPos = offset - 1; while(true) { if(offset >= input.length()) return orgPos; c = input.charAt(offset++); if(c == '\\') { if(Subject.getCharType(input.charAt(offset)) == 2) ++offset; else return orgPos; } else if(c == ')') break; else if( (c <= 0x20 && c >= 0) || c == '(' ) return orgPos; } } else if( (c <= 0x20 && c >= 0) || c == ')' ) return offset-1; } return offset; } } public static int scanLinkTitle(StringBuilder input, int offset) { if(offset == input.length()) return -1; char c = input.charAt(offset++); if(c == '(') { while(offset < input.length()) { c = input.charAt(offset++); if(c == ')') return offset; if(c == '\\') { c = input.charAt(offset); if(c == ')' || c == '\\') ++offset; } } return -1; } else if(c == '"') { while(offset < input.length()) { c = input.charAt(offset++); if(c == '"') return offset; if(c == '\\') { c = input.charAt(offset); if(c == '"' || c == '\\') ++offset; } } return -1; } if(c == '\'') { while(offset < input.length()) { c = input.charAt(offset++); if(c == '\'') return offset; if(c == '\\') { c = input.charAt(offset); if(c == '\'' || c == '\\') ++offset; } } return -1; } else return -1; } private static final CharacterSet EMAIL_START = new CharacterSet("a-zA-Z0-9.!#$%&'*+/=?^_`{|}~-"); private static final CharacterSet EMAIL_END_A = new CharacterSet("a-zA-Z0-9"); private static final CharacterSet EMAIL_END_B = new CharacterSet("a-zA-Z0-9-"); public static int scanEmail(StringBuilder input, int offset) { int initialPos = offset; char c = 0; while(offset < input.length() && EMAIL_START.contains(c=input.charAt(offset++)) ); if( c != '@' || offset == initialPos ) return -1; ++offset; while(true) { if(offset == input.length() || !EMAIL_END_A.contains(c=input.charAt(offset++))) return -1; int count = 1; int oldC = c; while(offset < input.length() && EMAIL_END_B.contains(c=input.charAt(offset++))) { ++count; if(count > 62) return -1; oldC = c; } if(oldC=='-') return -1; if(c == '>') return offset; if(c != '.') return -1; } } }