1 package org.simantics.scl.compiler.markdown.internal;
3 import org.simantics.scl.compiler.markdown.inlines.Subject;
5 import gnu.trove.set.hash.THashSet;
11 public char bulletChar;
13 public static boolean isCloseCodeFence(StringBuilder line, int offset, char fenceChar, int fenceLength) {
15 while(line.charAt(offset) == fenceChar) {
19 if(matched < fenceLength)
22 char c = line.charAt(offset++);
30 public static boolean isSetextHeaderLine(StringBuilder line, int offset, char headerLineChar) {
32 while((c = line.charAt(offset)) == headerLineChar)
36 c = line.charAt(offset);
41 public static boolean isHRule(StringBuilder line, int offset, char hrChar) {
44 while((c = line.charAt(offset)) != '\n') {
54 public static boolean isHtmlBlockTag(StringBuilder line, int offset) {
55 if(line.charAt(offset) != '<')
58 char c = line.charAt(offset);
60 // HTML comment, processing instruction, CDATA or entity definition
61 if(c == '!' || c == '?')
67 offset = scanTag(line, offset);
70 c = line.charAt(offset);
71 return c == ' ' || c == '>';
75 offset = scanTag(line, offset);
78 c = line.charAt(offset);
79 return c == ' ' || c == '/' || c == '>';
82 public static int scanTag(StringBuilder line, int offset) {
83 StringBuilder b = new StringBuilder();
84 while(offset < line.length()) {
85 char c = line.charAt(offset);
86 if( (c >= 'a' && c <= 'z') || (c >= '0' && c <= '9') )
88 else if( c >= 'A' && c <= 'Z' )
89 b.append(Character.toLowerCase(c));
94 if(HTML_BLOCK_TAG_SET.contains(b.toString()))
100 private static final String[] HTML_BLOCK_TAGS = new String[] {
101 "article", "header", "aside", "hgroup", "blockquote", "hr", "iframe", "body", "li", "map", "button", "object", "canvas", "ol",
102 "caption", "output", "col", "p", "colgroup", "pre", "dd", "progress", "div", "section", "dl", "table", "td", "dt", "tbody",
103 "embed", "textarea", "fieldset", "tfoot", "figcaption", "th", "figure", "thead", "footer", "tr", "form", "ul", "h1", "h2", "h3",
104 "h4", "h5", "h6", "video", "script", "style"
106 private static final THashSet<String> HTML_BLOCK_TAG_SET = new THashSet<String>();
108 for(String tag : HTML_BLOCK_TAGS)
109 HTML_BLOCK_TAG_SET.add(tag);
112 public boolean isAtxHeaderStart(StringBuilder line, int offset) {
115 while((c = line.charAt(offset)) == '#') {
119 if(matched == 0 || matched > 6)
121 this.level = matched;
128 c = line.charAt(offset);
131 this.matched = matched;
135 public boolean isOpenCodeFence(StringBuilder line, int offset, char fenceChar) {
138 while((c = line.charAt(offset)) == fenceChar) {
144 this.level = matched;
145 while(line.charAt(offset) == ' ') {
149 this.matched = matched;
150 while((c = line.charAt(offset)) != '\n') {
158 public boolean isListMarker(StringBuilder line, int offset) {
161 while(Character.isDigit(c = line.charAt(pos)))
163 if(c != '.' && c != ')')
167 if((c2=line.charAt(pos)) != ' ' && c2 != '\n')
169 this.matched = pos-offset;
170 this.level = Integer.parseInt(line.substring(offset, pos-1));
175 private static final String CDATA = "CDATA[";
177 public static int scanHtmlTag(StringBuilder input, int offset) {
179 c = input.charAt(offset++);
181 // Comment, declaration or cdata
183 if(offset == input.length())
185 c = input.charAt(offset++);
189 if(offset+4 > input.length())
191 if(input.charAt(offset++) != '-')
193 c = input.charAt(offset++);
195 c = input.charAt(offset++);
202 while(offset+3 <= input.length()) {
203 c = input.charAt(offset++);
205 c = input.charAt(offset++);
207 c = input.charAt(offset++);
220 for(int i=0;i<CDATA.length();++i) {
221 c = input.charAt(offset++);
222 if(CDATA.charAt(i) != c)
225 while(offset+3 <= input.length()) {
226 c = input.charAt(offset++);
228 c = input.charAt(offset++);
230 c = input.charAt(offset++);
240 else if(c >= 'A' && c <= 'Z') {
241 while( offset < input.length() && (c=input.charAt(offset++)) >= 'A' && c <= 'Z' );
242 if(c != ' ' && c != '\n')
244 while( offset < input.length() && (c=input.charAt(offset++)) != '>' );
254 // Processing instruction
256 while(offset < input.length()) {
257 c = input.charAt(offset++);
259 c = input.charAt(offset++);
269 offset = scanTagName(input, offset);
272 offset = scanWhitespace(input, offset);
275 if(input.charAt(offset) == '>')
284 offset = scanTagName(input, offset);
288 if((c=input.charAt(offset)) != ' ' && c != '\n') {
291 if(c == '/' && input.charAt(offset+1)=='>')
295 offset = scanWhitespace(input, offset);
298 c = input.charAt(offset);
301 if(c == '/' && input.charAt(offset+1)=='>')
303 offset = scanAttributeName(input, offset);
306 offset = scanWhitespace(input, offset);
309 if((c=input.charAt(offset)) == '=') {
311 offset = scanWhitespace(input, offset);
315 c = input.charAt(offset);
319 if(offset == input.length())
321 c=input.charAt(offset++);
329 if(offset == input.length())
331 c=input.charAt(offset++);
338 if(offset == input.length())
340 c=input.charAt(offset++);
341 if(c==' ' || c=='\n' || c=='"' || c=='\'' || c=='=' || c=='<' || c=='>' || c=='`') {
352 c = input.charAt(offset);
353 if(c != ' ' && c != '\n' && c != '>')
360 private static int scanTagName(StringBuilder input, int offset) {
361 if(offset >= input.length())
363 char c = input.charAt(offset++);
364 if( !((c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z')) )
366 while(offset < input.length()) {
367 c = input.charAt(offset++);
368 if( !((c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z') || (c >= '0' && c <= '9')) )
374 private static int scanAttributeName(StringBuilder input, int offset) {
375 if(offset >= input.length())
377 char c = input.charAt(offset++);
378 if( !((c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z') || c == '_' || c == ':') )
380 while(offset < input.length()) {
381 c = input.charAt(offset++);
382 if( !((c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z') || (c >= '0' && c <= '9')
383 || c == '_' || c == ':' || c == '.' || c == '-') )
389 public static int scanWhitespace(StringBuilder input, int offset) {
390 while(offset < input.length()) {
391 char c = input.charAt(offset);
392 if(c != ' ' && c != '\n')
399 public static int scanUri(StringBuilder input, int pos) {
403 if(pos == input.length())
405 c = input.charAt(pos++);
406 if(c < 0 || c >= 128 || !IS_SCHEME_CHAR[(int)c])
409 if(c != ':' || !SCHEME_SET.contains(input.substring(startPos, pos-1).toLowerCase()))
411 while(pos < input.length() && (c = input.charAt(pos)) != '>' && c != '<' && !(c <= 0x20 && c >= 0))
419 private static final String[] SCHEMES = new String[] {
420 "coap", "doi", "javascript", "aaa", "aaas", "about", "acap", "cap", "cid", "crid", "data", "dav", "dict", "dns", "file", "ftp", "geo", "go",
421 "gopher", "h323", "http", "https", "iax", "icap", "im", "imap", "info", "ipp", "iris", "iris.beep", "iris.xpc", "iris.xpcs", "iris.lwz",
422 "ldap", "mailto", "mid", "msrp", "msrps", "mtqp", "mupdate", "news", "nfs", "ni", "nih", "nntp", "opaquelocktoken", "pop", "pres", "rtsp",
423 "service", "session", "shttp", "sieve", "sip", "sips", "sms", "snmp", "soap.beep", "soap.beeps", "tag", "tel", "telnet", "tftp", "thismessage",
424 "tn3270", "tip", "tv", "urn", "vemmi", "ws", "wss", "xcon", "xcon-userid", "xmlrpc.beep", "xmlrpc.beeps", "xmpp", "z39.50r", "z39.50s", "adiumxtra",
425 "afp", "afs", "aim", "apt", "attachment", "aw", "beshare", "bitcoin", "bolo", "callto", "chrome", "chrome-extension", "com-eventbrite-attendee",
426 "content", "cvs", "dlna-playsingle", "dlna-playcontainer", "dtn", "dvb", "ed2k", "facetime", "feed", "finger", "fish", "gg", "git", "gizmoproject",
427 "gtalk", "hcp", "icon", "ipn", "irc", "irc6", "ircs", "itms", "jar", "jms", "keyparc", "lastfm", "ldaps", "magnet", "maps", "market", "message", "mms",
428 "ms-help", "msnim", "mumble", "mvn", "notes", "oid", "palm", "paparazzi", "platform", "proxy", "psyc", "query", "res", "resource", "rmi", "rsync",
429 "rtmp", "secondlife", "sftp", "sgn", "skype", "smb", "soldat", "spotify", "ssh", "steam", "svn", "teamspeak", "things", "udp", "unreal", "ut2004",
430 "ventrilo", "view-source", "webcal", "wtai", "wyciwyg", "xfire", "xri", "ymsgr"
432 private static final THashSet<String> SCHEME_SET = new THashSet<String>();
433 private static final boolean[] IS_SCHEME_CHAR = new boolean[128];
435 for(String scheme : SCHEMES) {
436 SCHEME_SET.add(scheme);
437 for(int i=0;i<scheme.length();++i) {
438 char c = scheme.charAt(i);
439 IS_SCHEME_CHAR[(int)c] = true;
440 IS_SCHEME_CHAR[(int)Character.toUpperCase(c)] = true;
445 public static int scanLinkLabel(StringBuilder input, int offset) {
446 if(offset == input.length() || input.charAt(offset++) != '[')
448 int maxPos = Math.min(input.length(), offset+1000);
449 while(offset < maxPos) {
450 char c = input.charAt(offset++);
455 if(c == '\\' && offset < maxPos) {
456 c = input.charAt(offset);
457 if(Subject.getCharType(c) == 2)
464 public static int scanLinkUrl(StringBuilder input, int offset) {
465 if(offset == input.length())
467 if(input.charAt(offset) == '<') {
469 while(offset < input.length()) {
470 char c = input.charAt(offset++);
475 if(Subject.getCharType(c) == 2)
478 else if(c == '<' || c == '\n')
484 while(offset < input.length()) {
485 char c = input.charAt(offset++);
487 if(Subject.getCharType(input.charAt(offset)) == 2)
492 else if( c == '(' ) {
493 int orgPos = offset - 1;
495 if(offset >= input.length())
497 c = input.charAt(offset++);
499 if(Subject.getCharType(input.charAt(offset)) == 2)
506 else if( (c <= 0x20 && c >= 0) || c == '(' )
510 else if( (c <= 0x20 && c >= 0) || c == ')' )
517 public static int scanLinkTitle(StringBuilder input, int offset) {
518 if(offset == input.length())
520 char c = input.charAt(offset++);
522 while(offset < input.length()) {
523 c = input.charAt(offset++);
527 c = input.charAt(offset);
528 if(c == ')' || c == '\\')
535 while(offset < input.length()) {
536 c = input.charAt(offset++);
540 c = input.charAt(offset);
541 if(c == '"' || c == '\\')
548 while(offset < input.length()) {
549 c = input.charAt(offset++);
553 c = input.charAt(offset);
554 if(c == '\'' || c == '\\')
564 private static final CharacterSet EMAIL_START = new CharacterSet("a-zA-Z0-9.!#$%&'*+/=?^_`{|}~-");
565 private static final CharacterSet EMAIL_END_A = new CharacterSet("a-zA-Z0-9");
566 private static final CharacterSet EMAIL_END_B = new CharacterSet("a-zA-Z0-9-");
568 public static int scanEmail(StringBuilder input, int offset) {
569 int initialPos = offset;
571 while(offset < input.length() && EMAIL_START.contains(c=input.charAt(offset++)) );
572 if( c != '@' || offset == initialPos )
576 if(offset == input.length() || !EMAIL_END_A.contains(c=input.charAt(offset++)))
580 while(offset < input.length() && EMAIL_END_B.contains(c=input.charAt(offset++))) {