1 package org.simantics.scl.compiler.markdown.internal;
3 import java.io.IOException;
5 import java.io.StringReader;
7 import org.simantics.scl.compiler.markdown.inlines.Subject;
8 import org.simantics.scl.compiler.markdown.nodes.BlockQuoteNode;
9 import org.simantics.scl.compiler.markdown.nodes.CodeBlockNode;
10 import org.simantics.scl.compiler.markdown.nodes.DocumentNode;
11 import org.simantics.scl.compiler.markdown.nodes.ExtensionBlockNode;
12 import org.simantics.scl.compiler.markdown.nodes.HeaderNode;
13 import org.simantics.scl.compiler.markdown.nodes.HorizontalRuleNode;
14 import org.simantics.scl.compiler.markdown.nodes.HtmlNode;
15 import org.simantics.scl.compiler.markdown.nodes.ItemNode;
16 import org.simantics.scl.compiler.markdown.nodes.ListNode;
17 import org.simantics.scl.compiler.markdown.nodes.Node;
18 import org.simantics.scl.compiler.markdown.nodes.ParagraphNode;
19 import org.simantics.scl.compiler.markdown.nodes.Reference;
21 import gnu.trove.map.hash.THashMap;
23 public class MarkdownParser {
24 public static final boolean DEBUG = false;
25 public static final int CODE_INDENT = 4;
27 private DocumentNode root = new DocumentNode();
28 private Node current = root;
30 private StringBuilder detabBuffer = new StringBuilder();
31 private Scanner scanner = new Scanner();
32 private int lineNumber = 0;
34 private THashMap<String, Reference> referenceMap = new THashMap<String, Reference>();
36 public DocumentNode parseDocument(Reader reader) throws IOException {
37 StringBuilder lineBuffer = new StringBuilder();
40 int c = reader.read();
42 processLine(lineBuffer);
45 else if(c == '\n' || c == '\r') {
46 if(lineBuffer.length() == 0 && c == secondNL)
49 processLine(lineBuffer);
50 lineBuffer.delete(0, lineBuffer.length());
51 secondNL = c == '\n' ? '\r' : '\n';
55 lineBuffer.append((char)c);
57 while(current != null)
58 current = finalize(current);
63 public DocumentNode parseDocument(String text) {
65 return parseDocument(new StringReader(text));
66 } catch (IOException e) {
67 // Should not be possible
68 throw new RuntimeException(e);
72 private void processInlines(Node node) {
73 for(Node child = node.firstChild; child != null; child = child.next)
74 processInlines(child);
75 if(node instanceof ParagraphNode || node instanceof HeaderNode)
76 Subject.parseInlines(referenceMap, node);
79 private void processLine(StringBuilder line) {
83 System.out.println("processLine(" + line + ")");
84 line.append('\n'); // Easier to detect eol
86 Node container = root;
89 boolean blank = false;
90 boolean allMatched = true;
91 while(container.lastChild != null && container.lastChild.open) {
92 container = container.lastChild;
94 int firstNonspace = offset;
96 while((c=line.charAt(firstNonspace)) == ' ')
99 int indent = firstNonspace - offset;
102 if(container instanceof BlockQuoteNode) {
103 if(indent <= 3 && c == '>') {
104 offset = firstNonspace + 1;
105 if(line.charAt(offset) == ' ')
111 else if(container instanceof ItemNode) {
112 ItemNode item = (ItemNode)container;
113 if(indent >= item.indentation) {
114 offset += item.indentation;
117 offset = firstNonspace;
121 else if(container instanceof CodeBlockNode) {
122 CodeBlockNode codeBlock = (CodeBlockNode)container;
123 if(!codeBlock.fenced) {
124 if(indent >= CODE_INDENT)
125 offset += CODE_INDENT;
127 offset = firstNonspace;
133 Scanner.isCloseCodeFence(line, firstNonspace,
134 codeBlock.fenceChar, codeBlock.fenceLength)) {
135 current = finalize(container);
139 int i = codeBlock.fenceOffset;
140 while(i > 0 && line.charAt(offset) == ' ') {
147 else if(container instanceof HeaderNode) {
150 else if(container instanceof HtmlNode) {
154 else if(container instanceof ParagraphNode) {
160 container = container.parent;
165 Node lastMatchedContainer = container;
167 System.out.println(" lastMatchedContainer = " + lastMatchedContainer.getClass().getSimpleName() + "@" + lastMatchedContainer.hashCode());
169 if(blank && container.lastLineBlank) {
170 //System.out.println(" DOUBLE BREAK " + container.getClass().getSimpleName() + "@" + container.hashCode());
172 while(b != null && !(b instanceof ListNode))
176 while(container != null && container != b)
177 container = finalize(container);
179 container = b.parent;
183 boolean maybeLazy = current instanceof ParagraphNode;
184 while(!(container instanceof CodeBlockNode) && !(container instanceof HtmlNode)) {
185 int firstNonspace = offset;
187 while((c=line.charAt(firstNonspace)) == ' ')
190 int indent = firstNonspace - offset;
193 if(indent >= CODE_INDENT) {
194 if(!maybeLazy && !blank) {
196 container = addChild(container, new CodeBlockNode());
202 offset = firstNonspace + 1;
203 if(line.charAt(offset) == ' ')
205 container = addChild(container, new BlockQuoteNode());
207 else if(c == '#' && scanner.isAtxHeaderStart(line, firstNonspace)) {
208 offset = firstNonspace + scanner.matched;
209 container = addChild(container, new HeaderNode(scanner.level, false));
211 else if((c == '`' || c == '~') && scanner.isOpenCodeFence(line, firstNonspace, c)) {
212 container = addChild(container, new CodeBlockNode(c, scanner.level, firstNonspace - offset));
213 offset = firstNonspace + scanner.matched;
215 else if(Scanner.isHtmlBlockTag(line, firstNonspace)) {
216 container = addChild(container, new HtmlNode());
218 else if((c == '=' || c == '-')
219 && container instanceof ParagraphNode
220 && Scanner.isSetextHeaderLine(line, firstNonspace, c)
221 && container.stringContent.indexOf("\n") == -1
223 HeaderNode header = new HeaderNode(c == '=' ? 1 : 2, true);
224 header.lineNumber = container.lineNumber;
226 System.out.println(" Replace ParagraphNode@" + System.identityHashCode(container) + " with HeaderNode@" + System.identityHashCode(header));
227 header.stringContent = container.stringContent;
228 header.parent = container.parent;
229 header.prev = container.prev;
230 if(header.prev != null)
231 header.prev.next = header;
232 if(header.parent.lastChild != null)
233 header.parent.lastChild = header;
234 if(header.parent.firstChild == container)
235 header.parent.firstChild = header;
237 if(current == container)
239 offset = line.length()-1;
241 else if(!(container instanceof ParagraphNode && !allMatched)
242 && (c == '*' || c == '_' || c == '-')
243 && Scanner.isHRule(line, firstNonspace, c)) {
244 container = addChild(container, new HorizontalRuleNode());
245 container = finalize(container);
246 offset = line.length()-1;
248 else if((c == '*' || c == '+' || c == '-') &&
249 (line.charAt(firstNonspace+1) == ' ' || line.charAt(firstNonspace+1) == '\n')) {
250 int originalOffset = offset;
251 offset = firstNonspace + 1;
254 while(i <= 5 && (c2 = line.charAt(offset+i)) == ' ')
256 if(i >= 5|| i < 1 || c2 == '\n') {
264 if(!(container instanceof ListNode) ||
265 !((ListNode)container).isCompatible(c)) {
266 container = addChild(container, new ListNode(c));
270 System.out.println(" indentation = " + (offset - originalOffset + (i == 0 ? 1 : 0)));
272 container = addChild(container, new ItemNode(offset - originalOffset + (i == 0 ? 1 : 0)));
274 else if(Character.isDigit(c) && scanner.isListMarker(line, firstNonspace)) {
275 int originalOffset = offset;
276 offset = firstNonspace + scanner.matched;
279 while(i <= 5 && (c2 = line.charAt(offset+i)) == ' ')
281 if(i >= 5|| i < 1 || c2 == '\n') {
289 if(!(container instanceof ListNode) ||
290 !((ListNode)container).isCompatible(scanner.bulletChar)) {
291 container = addChild(container, new ListNode(scanner.bulletChar, scanner.level));
295 System.out.println(" indentation = " + (offset - originalOffset + (i == 0 ? 1 : 0)));
297 container = addChild(container, new ItemNode(offset - originalOffset + (i == 0 ? 1 : 0)));
299 else if(c == ':' && line.charAt(firstNonspace+1) == ':') {
300 int p=firstNonspace+2;
301 while(Character.isAlphabetic(c=line.charAt(p)) || Character.isDigit(c) || c == ' ' || c=='_')
305 int bracketBegin = p;
308 c = line.charAt(p++);
312 else if(c == '\\' && ((c=line.charAt(p+1)) == '\\' || c == ']')) {
320 container = addChild(container, new ExtensionBlockNode(
321 line.substring(firstNonspace+2, bracketBegin).trim(),
322 line.substring(bracketBegin+1, p-1).trim()));
330 if(container.acceptLines())
335 int firstNonspace = offset;
337 while((c=line.charAt(firstNonspace)) == ' ')
343 if(container.lastChild != null)
344 container.lastChild.setLastLineBlank(true);
345 container.setLastLineBlank(
346 !(container instanceof BlockQuoteNode) &&
347 !(container instanceof HeaderNode) &&
348 !(container instanceof CodeBlockNode && ((CodeBlockNode)container).fenced) &&
349 !(container instanceof ItemNode &&
350 container.firstChild == null &&
351 container.lineNumber == lineNumber));
354 container.setLastLineBlank(false);
355 for(Node cont = container;cont.parent != null;
356 cont = cont.parent, cont.setLastLineBlank(false));
359 System.out.println(" current = " + current.getClass().getSimpleName() + "@" + current.hashCode());
360 System.out.println(" container = " + container.getClass().getSimpleName() + "@" + container.hashCode());
362 if(current != lastMatchedContainer &&
363 container == lastMatchedContainer &&
365 current instanceof ParagraphNode &&
366 current.stringContent != null) {
367 addLine(current, line, offset);
370 while(current != lastMatchedContainer)
371 current = finalize(current);
373 if(container instanceof CodeBlockNode ||
374 container instanceof HtmlNode)
375 addLine(container, line, offset);
378 else if(container.acceptLines()) {
379 if(container instanceof HeaderNode &&
380 !((HeaderNode)container).setext)
381 chopTrailingHashtags(line, firstNonspace);
382 addLine(container, line, firstNonspace);
385 container = addChild(container, new ParagraphNode());
386 addLine(container, line, firstNonspace);
393 private void chopTrailingHashtags(StringBuilder line, int firstNonspace) {
394 //System.out.println("chopTrailingHashtags("+line.substring(firstNonspace)+")");
395 int pos = line.length()-1;
397 while(pos >= 0 && ((c=line.charAt(pos)) == ' ' || c == '\n'))
399 line.delete(pos+1, line.length());
402 while(pos >= 0 && (c=line.charAt(pos)) == '#')
407 while(pos >= 0 && line.charAt(pos) == ' ')
410 if(pos < firstNonspace)
412 line.delete(pos, line.length());
416 private void addLine(Node container, StringBuilder line, int offset) {
417 if(container.stringContent == null)
418 container.stringContent = new StringBuilder();
420 container.stringContent.append('\n');
421 int length = line.length();
422 if(length > 0 && line.charAt(length-1) == '\n')
425 System.out.println(" addLine(" + container.getClass().getSimpleName() + "@" + container.hashCode() + ", \"" + line.substring(offset, length) + "\")");
426 container.stringContent.append(line, offset, length);
429 private StringBuilder detab(StringBuilder str) {
430 int length = str.length();
431 for(int i=0;i<length;++i) {
432 if(str.charAt(i) == '\t') {
433 detabBuffer.delete(0, detabBuffer.length());
434 detabBuffer.append(str, 0, i);
436 char c = str.charAt(i);
438 int spaces = 4 - detabBuffer.length()%4;
440 detabBuffer.append(' ');
443 detabBuffer.append(c);
451 private Node addChild(Node parent, Node child) {
452 child.lineNumber = lineNumber;
454 System.out.println(" addChild(" + parent.getClass().getSimpleName() + "@" + parent.hashCode() + ", " +
455 child.getClass().getSimpleName() + "@" + child.hashCode() + ")");
456 while(!parent.canContain(child))
457 parent = finalize(parent);
458 parent.addChild(child);
462 private Node finalize(Node node) {
464 if(node instanceof ParagraphNode) {
465 parseReferenceInline(node);
467 else if(node instanceof HeaderNode) {
468 if(node.stringContent == null)
469 node.stringContent = new StringBuilder(0);
471 else if(node instanceof CodeBlockNode) {
472 CodeBlockNode codeBlock = (CodeBlockNode)node;
473 if(codeBlock.fenced) {
474 int firstLineLength = codeBlock.stringContent.indexOf("\n");
476 if(firstLineLength == -1) {
477 infoString = codeBlock.stringContent.toString().trim();
478 codeBlock.stringContent = new StringBuilder(0);
481 infoString = codeBlock.stringContent.substring(0, firstLineLength).trim();
482 codeBlock.stringContent.delete(0, firstLineLength+1);
484 codeBlock.infoString = Reference.cleanUrl(infoString);
487 removeTrailingBlankLines(codeBlock.stringContent);
490 else if(node instanceof ListNode) {
491 ListNode list = (ListNode)node;
493 itemLoop: for(Node item=list.firstChild;item != null;item = item.next) {
494 if(item.lastLineBlank && item.next != null) {
498 for(Node child=item.firstChild;child != null;child = child.next)
499 if(endsWithBlankLine(child) && (child.next != null || item.next != null)) {
508 private static boolean endsWithBlankLine(Node node) {
510 if(node.lastLineBlank)
512 node = node.lastChild;
513 if(!(node instanceof ListNode) && !(node instanceof ItemNode))
518 private static void removeTrailingBlankLines(StringBuilder str) {
519 int endPos = str.length();
522 char c = str.charAt(pos);
529 if(endPos < str.length())
530 str.delete(endPos, str.length());
533 private void parseReferenceInline(Node node) {
534 StringBuilder input = node.stringContent;
538 if(offset == input.length() || input.charAt(offset) != '[')
542 offset = Scanner.scanLinkLabel(input, offset);
543 if(offset == -1 || offset == input.length()
544 || input.charAt(offset) != ':')
546 String label = input.substring(1, offset-1);
550 offset = spnl(input, offset);
551 int linkStart = offset;
552 offset = Scanner.scanLinkUrl(input, offset);
553 if(offset == -1 || offset == linkStart)
556 if(linkStart < input.length() && input.charAt(linkStart) == '<')
557 url = input.substring(linkStart+1, offset-1);
559 url = input.substring(linkStart, offset);
560 url = Reference.cleanUrl(url);
563 int linkUrlEnd = offset;
564 offset = spnl(input, offset);
565 int titleStart = offset;
566 offset = Scanner.scanLinkTitle(input, offset);
571 while(offset < input.length() && (c = input.charAt(offset)) == ' ')
575 else if(offset != input.length())
580 title = input.substring(titleStart+1, offset-1);
581 title = Reference.cleanTitle(title);
583 while(offset < input.length() && (c = input.charAt(offset)) == ' ')
587 else if(offset != input.length())
590 /*System.out.println("Reference:");
591 System.out.println(" label = '" + label + "'");
592 System.out.println(" url = '" + url + "'");
593 System.out.println(" title = '" + title + "'");*/
594 Reference reference = new Reference(Reference.normalizeLabel(label), url, title);
595 if(!referenceMap.contains(reference.label))
596 referenceMap.put(reference.label, reference);
598 if(offset == input.length()) {
603 input.delete(0, offset);
607 private static int spnl(StringBuilder input, int offset) {
608 boolean seenWhitespace = false;
609 while(offset < input.length()) {
610 char c = input.charAt(offset);
617 seenWhitespace = true;