X-Git-Url: https://gerrit.simantics.org/r/gitweb?p=simantics%2Fplatform.git;a=blobdiff_plain;f=bundles%2Forg.simantics.scl.compiler%2Fsrc%2Forg%2Fsimantics%2Fscl%2Fcompiler%2Fmarkdown%2Finternal%2FMarkdownParser.java;fp=bundles%2Forg.simantics.scl.compiler%2Fsrc%2Forg%2Fsimantics%2Fscl%2Fcompiler%2Fmarkdown%2Finternal%2FMarkdownParser.java;h=7d30856c8d65b7a3491a7e924bb65434d2e27d93;hp=0000000000000000000000000000000000000000;hb=969bd23cab98a79ca9101af33334000879fb60c5;hpb=866dba5cd5a3929bbeae85991796acb212338a08 diff --git a/bundles/org.simantics.scl.compiler/src/org/simantics/scl/compiler/markdown/internal/MarkdownParser.java b/bundles/org.simantics.scl.compiler/src/org/simantics/scl/compiler/markdown/internal/MarkdownParser.java new file mode 100644 index 000000000..7d30856c8 --- /dev/null +++ b/bundles/org.simantics.scl.compiler/src/org/simantics/scl/compiler/markdown/internal/MarkdownParser.java @@ -0,0 +1,627 @@ +package org.simantics.scl.compiler.markdown.internal; + +import gnu.trove.map.hash.THashMap; + +import java.io.IOException; +import java.io.Reader; +import java.io.StringReader; + +import org.simantics.scl.compiler.markdown.inlines.Subject; +import org.simantics.scl.compiler.markdown.nodes.BlockQuoteNode; +import org.simantics.scl.compiler.markdown.nodes.CodeBlockNode; +import org.simantics.scl.compiler.markdown.nodes.DocumentNode; +import org.simantics.scl.compiler.markdown.nodes.ExtensionBlockNode; +import org.simantics.scl.compiler.markdown.nodes.HeaderNode; +import org.simantics.scl.compiler.markdown.nodes.HorizontalRuleNode; +import org.simantics.scl.compiler.markdown.nodes.HtmlNode; +import org.simantics.scl.compiler.markdown.nodes.ItemNode; +import org.simantics.scl.compiler.markdown.nodes.ListNode; +import org.simantics.scl.compiler.markdown.nodes.Node; +import org.simantics.scl.compiler.markdown.nodes.ParagraphNode; +import org.simantics.scl.compiler.markdown.nodes.Reference; + +public class MarkdownParser { + public static final boolean DEBUG = false; + public static final int CODE_INDENT = 4; + + private DocumentNode root = new DocumentNode(); + private Node current = root; + + private StringBuilder detabBuffer = new StringBuilder(); + private Scanner scanner = new Scanner(); + private int lineNumber = 0; + + private THashMap referenceMap = new THashMap(); + + public DocumentNode parseDocument(Reader reader) throws IOException { + StringBuilder lineBuffer = new StringBuilder(); + char secondNL = 0; + while(true) { + int c = reader.read(); + if(c == -1) { + processLine(lineBuffer); + break; + } + else if(c == '\n' || c == '\r') { + if(lineBuffer.length() == 0 && c == secondNL) + secondNL = 0; + else { + processLine(lineBuffer); + lineBuffer.delete(0, lineBuffer.length()); + secondNL = c == '\n' ? '\r' : '\n'; + } + } + else + lineBuffer.append((char)c); + } + while(current != null) + current = finalize(current); + processInlines(root); + return root; + } + + public DocumentNode parseDocument(String text) { + try { + return parseDocument(new StringReader(text)); + } catch (IOException e) { + // Should not be possible + throw new RuntimeException(e); + } + } + + private void processInlines(Node node) { + for(Node child = node.firstChild; child != null; child = child.next) + processInlines(child); + if(node instanceof ParagraphNode || node instanceof HeaderNode) + Subject.parseInlines(referenceMap, node); + } + + private void processLine(StringBuilder line) { + ++lineNumber; + line = detab(line); + if(DEBUG) + System.out.println("processLine(" + line + ")"); + line.append('\n'); // Easier to detect eol + + Node container = root; + + int offset = 0; + boolean blank = false; + boolean allMatched = true; + while(container.lastChild != null && container.lastChild.open) { + container = container.lastChild; + + int firstNonspace = offset; + char c; + while((c=line.charAt(firstNonspace)) == ' ') + ++firstNonspace; + + int indent = firstNonspace - offset; + blank = c == '\n'; + + if(container instanceof BlockQuoteNode) { + if(indent <= 3 && c == '>') { + offset = firstNonspace + 1; + if(line.charAt(offset) == ' ') + ++offset; + } + else + allMatched = false; + } + else if(container instanceof ItemNode) { + ItemNode item = (ItemNode)container; + if(indent >= item.indentation) { + offset += item.indentation; + } + else if(blank) + offset = firstNonspace; + else + allMatched = false; + } + else if(container instanceof CodeBlockNode) { + CodeBlockNode codeBlock = (CodeBlockNode)container; + if(!codeBlock.fenced) { + if(indent >= CODE_INDENT) + offset += CODE_INDENT; + else if(blank) + offset = firstNonspace; + else + allMatched = false; + } + else { + if(indent <= 3 && + Scanner.isCloseCodeFence(line, firstNonspace, + codeBlock.fenceChar, codeBlock.fenceLength)) { + current = finalize(container); + return; + } + else { + int i = codeBlock.fenceOffset; + while(i > 0 && line.charAt(offset) == ' ') { + ++offset; + --i; + } + } + } + } + else if(container instanceof HeaderNode) { + allMatched = false; + } + else if(container instanceof HtmlNode) { + if(blank) + allMatched = false; + } + else if(container instanceof ParagraphNode) { + if(blank) + allMatched = false; + } + + if(!allMatched) { + container = container.parent; + break; + } + } + + Node lastMatchedContainer = container; + if(DEBUG) + System.out.println(" lastMatchedContainer = " + lastMatchedContainer.getClass().getSimpleName() + "@" + lastMatchedContainer.hashCode()); + + if(blank && container.lastLineBlank) { + //System.out.println(" DOUBLE BREAK " + container.getClass().getSimpleName() + "@" + container.hashCode()); + Node b = root; + while(b != null && !(b instanceof ListNode)) + b = b.lastChild; + + if(b != null) { + while(container != null && container != b) + container = finalize(container); + finalize(b); + container = b.parent; + } + } + + boolean maybeLazy = current instanceof ParagraphNode; + while(!(container instanceof CodeBlockNode) && !(container instanceof HtmlNode)) { + int firstNonspace = offset; + char c; + while((c=line.charAt(firstNonspace)) == ' ') + ++firstNonspace; + + int indent = firstNonspace - offset; + blank = c == '\n'; + + if(indent >= CODE_INDENT) { + if(!maybeLazy && !blank) { + offset += 4; + container = addChild(container, new CodeBlockNode()); + } + else + break; + } + else if(c == '>') { + offset = firstNonspace + 1; + if(line.charAt(offset) == ' ') + ++offset; + container = addChild(container, new BlockQuoteNode()); + } + else if(c == '#' && scanner.isAtxHeaderStart(line, firstNonspace)) { + offset = firstNonspace + scanner.matched; + container = addChild(container, new HeaderNode(scanner.level, false)); + } + else if((c == '`' || c == '~') && scanner.isOpenCodeFence(line, firstNonspace, c)) { + container = addChild(container, new CodeBlockNode(c, scanner.level, firstNonspace - offset)); + offset = firstNonspace + scanner.matched; + } + else if(Scanner.isHtmlBlockTag(line, firstNonspace)) { + container = addChild(container, new HtmlNode()); + } + else if((c == '=' || c == '-') + && container instanceof ParagraphNode + && Scanner.isSetextHeaderLine(line, firstNonspace, c) + && container.stringContent.indexOf("\n") == -1 + ) { + HeaderNode header = new HeaderNode(c == '=' ? 1 : 2, true); + header.lineNumber = container.lineNumber; + if(DEBUG) + System.out.println(" Replace ParagraphNode@" + System.identityHashCode(container) + " with HeaderNode@" + System.identityHashCode(header)); + header.stringContent = container.stringContent; + header.parent = container.parent; + header.prev = container.prev; + if(header.prev != null) + header.prev.next = header; + if(header.parent.lastChild != null) + header.parent.lastChild = header; + if(header.parent.firstChild == container) + header.parent.firstChild = header; + container = header; + if(current == container) + current = header; + offset = line.length()-1; + } + else if(!(container instanceof ParagraphNode && !allMatched) + && (c == '*' || c == '_' || c == '-') + && Scanner.isHRule(line, firstNonspace, c)) { + container = addChild(container, new HorizontalRuleNode()); + container = finalize(container); + offset = line.length()-1; + } + else if((c == '*' || c == '+' || c == '-') && + (line.charAt(firstNonspace+1) == ' ' || line.charAt(firstNonspace+1) == '\n')) { + int originalOffset = offset; + offset = firstNonspace + 1; + int i = 0; + char c2 = 0; + while(i <= 5 && (c2 = line.charAt(offset+i)) == ' ') + ++i; + if(i >= 5|| i < 1 || c2 == '\n') { + if(i > 0) + ++offset; + } + else { + offset += i; + } + + if(!(container instanceof ListNode) || + !((ListNode)container).isCompatible(c)) { + container = addChild(container, new ListNode(c)); + } + + if(DEBUG) { + System.out.println(" indentation = " + (offset - originalOffset + (i == 0 ? 1 : 0))); + } + container = addChild(container, new ItemNode(offset - originalOffset + (i == 0 ? 1 : 0))); + } + else if(Character.isDigit(c) && scanner.isListMarker(line, firstNonspace)) { + int originalOffset = offset; + offset = firstNonspace + scanner.matched; + int i = 0; + char c2 = 0; + while(i <= 5 && (c2 = line.charAt(offset+i)) == ' ') + ++i; + if(i >= 5|| i < 1 || c2 == '\n') { + if(i > 0) + ++offset; + } + else { + offset += i; + } + + if(!(container instanceof ListNode) || + !((ListNode)container).isCompatible(scanner.bulletChar)) { + container = addChild(container, new ListNode(scanner.bulletChar, scanner.level)); + } + + if(DEBUG) { + System.out.println(" indentation = " + (offset - originalOffset + (i == 0 ? 1 : 0))); + } + container = addChild(container, new ItemNode(offset - originalOffset + (i == 0 ? 1 : 0))); + } + else if(c == ':' && line.charAt(firstNonspace+1) == ':') { + int p=firstNonspace+2; + while(Character.isAlphabetic(c=line.charAt(p)) || Character.isDigit(c) || c == ' ' || c=='_') + ++p; + if(c != '[') + break; + int bracketBegin = p; + ++p; + while(true) { + c = line.charAt(p++); + if(c == ']') { + break; + } + else if(c == '\\' && ((c=line.charAt(p+1)) == '\\' || c == ']')) { + ++p; + } + else if(c == '\n') + break; + } + if(c == ']') { + offset = p; + container = addChild(container, new ExtensionBlockNode( + line.substring(firstNonspace+2, bracketBegin).trim(), + line.substring(bracketBegin+1, p-1).trim())); + } + else + break; + } + else + break; + + if(container.acceptLines()) + break; + maybeLazy = false; + } + + int firstNonspace = offset; + char c; + while((c=line.charAt(firstNonspace)) == ' ') + ++firstNonspace; + + blank = c == '\n'; + + if(blank) { + if(container.lastChild != null) + container.lastChild.setLastLineBlank(true); + container.setLastLineBlank( + !(container instanceof BlockQuoteNode) && + !(container instanceof HeaderNode) && + !(container instanceof CodeBlockNode && ((CodeBlockNode)container).fenced) && + !(container instanceof ItemNode && + container.firstChild == null && + container.lineNumber == lineNumber)); + } + else + container.setLastLineBlank(false); + for(Node cont = container;cont.parent != null; + cont = cont.parent, cont.setLastLineBlank(false)); + + if(DEBUG) { + System.out.println(" current = " + current.getClass().getSimpleName() + "@" + current.hashCode()); + System.out.println(" container = " + container.getClass().getSimpleName() + "@" + container.hashCode()); + } + if(current != lastMatchedContainer && + container == lastMatchedContainer && + !blank && + current instanceof ParagraphNode && + current.stringContent != null) { + addLine(current, line, offset); + } + else { + while(current != lastMatchedContainer) + current = finalize(current); + + if(container instanceof CodeBlockNode || + container instanceof HtmlNode) + addLine(container, line, offset); + else if(blank) + ; // do nothing + else if(container.acceptLines()) { + if(container instanceof HeaderNode && + !((HeaderNode)container).setext) + chopTrailingHashtags(line, firstNonspace); + addLine(container, line, firstNonspace); + } + else { + container = addChild(container, new ParagraphNode()); + addLine(container, line, firstNonspace); + } + + current = container; + } + } + + private void chopTrailingHashtags(StringBuilder line, int firstNonspace) { + //System.out.println("chopTrailingHashtags("+line.substring(firstNonspace)+")"); + int pos = line.length()-1; + char c=0; + while(pos >= 0 && ((c=line.charAt(pos)) == ' ' || c == '\n')) + --pos; + line.delete(pos+1, line.length()); + if(c == '#') { + --pos; + while(pos >= 0 && (c=line.charAt(pos)) == '#') + --pos; + if(c != ' ') + return; + --pos; + while(pos >= 0 && line.charAt(pos) == ' ') + --pos; + ++pos; + if(pos < firstNonspace) + pos = firstNonspace; + line.delete(pos, line.length()); + } + } + + private void addLine(Node container, StringBuilder line, int offset) { + if(container.stringContent == null) + container.stringContent = new StringBuilder(); + else + container.stringContent.append('\n'); + int length = line.length(); + if(length > 0 && line.charAt(length-1) == '\n') + --length; + if(DEBUG) + System.out.println(" addLine(" + container.getClass().getSimpleName() + "@" + container.hashCode() + ", \"" + line.substring(offset, length) + "\")"); + container.stringContent.append(line, offset, length); + } + + private StringBuilder detab(StringBuilder str) { + int length = str.length(); + for(int i=0;i 0) + detabBuffer.append(' '); + } + else + detabBuffer.append(c); + } + return detabBuffer; + } + } + return str; + } + + private Node addChild(Node parent, Node child) { + child.lineNumber = lineNumber; + if(DEBUG) + System.out.println(" addChild(" + parent.getClass().getSimpleName() + "@" + parent.hashCode() + ", " + + child.getClass().getSimpleName() + "@" + child.hashCode() + ")"); + while(!parent.canContain(child)) + parent = finalize(parent); + parent.addChild(child); + return child; + } + + private Node finalize(Node node) { + node.open = false; + if(node instanceof ParagraphNode) { + parseReferenceInline(node); + } + else if(node instanceof HeaderNode) { + if(node.stringContent == null) + node.stringContent = new StringBuilder(0); + } + else if(node instanceof CodeBlockNode) { + CodeBlockNode codeBlock = (CodeBlockNode)node; + if(codeBlock.fenced) { + int firstLineLength = codeBlock.stringContent.indexOf("\n"); + String infoString; + if(firstLineLength == -1) { + infoString = codeBlock.stringContent.toString().trim(); + codeBlock.stringContent = new StringBuilder(0); + } + else { + infoString = codeBlock.stringContent.substring(0, firstLineLength).trim(); + codeBlock.stringContent.delete(0, firstLineLength+1); + } + codeBlock.infoString = Reference.cleanUrl(infoString); + } + else { + removeTrailingBlankLines(codeBlock.stringContent); + } + } + else if(node instanceof ListNode) { + ListNode list = (ListNode)node; + list.tight = true; + itemLoop: for(Node item=list.firstChild;item != null;item = item.next) { + if(item.lastLineBlank && item.next != null) { + list.tight = false; + break; + } + for(Node child=item.firstChild;child != null;child = child.next) + if(endsWithBlankLine(child) && (child.next != null || item.next != null)) { + list.tight = false; + break itemLoop; + } + } + } + return node.parent; + } + + private static boolean endsWithBlankLine(Node node) { + while(true) { + if(node.lastLineBlank) + return true; + node = node.lastChild; + if(!(node instanceof ListNode) && !(node instanceof ItemNode)) + return false; + } + } + + private static void removeTrailingBlankLines(StringBuilder str) { + int endPos = str.length(); + int pos = endPos-1; + while(pos >= 0) { + char c = str.charAt(pos); + if(c == '\n') + endPos = pos; + else if(c != ' ') + break; + --pos; + } + if(endPos < str.length()) + str.delete(endPos, str.length()); + } + + private void parseReferenceInline(Node node) { + StringBuilder input = node.stringContent; + + while(true) { + int offset = 0; + if(offset == input.length() || input.charAt(offset) != '[') + return; + + // Label + offset = Scanner.scanLinkLabel(input, offset); + if(offset == -1 || offset == input.length() + || input.charAt(offset) != ':') + return; + String label = input.substring(1, offset-1); + ++offset; + + // Url + offset = spnl(input, offset); + int linkStart = offset; + offset = Scanner.scanLinkUrl(input, offset); + if(offset == -1 || offset == linkStart) + return; + String url; + if(linkStart < input.length() && input.charAt(linkStart) == '<') + url = input.substring(linkStart+1, offset-1); + else + url = input.substring(linkStart, offset); + url = Reference.cleanUrl(url); + + // Title + int linkUrlEnd = offset; + offset = spnl(input, offset); + int titleStart = offset; + offset = Scanner.scanLinkTitle(input, offset); + String title; + if(offset == -1) { + offset = linkUrlEnd; + char c = 0; + while(offset < input.length() && (c = input.charAt(offset)) == ' ') + ++offset; + if(c == '\n') + ++offset; + else if(offset != input.length()) + return; + title = ""; + } + else { + title = input.substring(titleStart+1, offset-1); + title = Reference.cleanTitle(title); + char c = 0; + while(offset < input.length() && (c = input.charAt(offset)) == ' ') + ++offset; + if(c == '\n') + ++offset; + else if(offset != input.length()) + return; + } + /*System.out.println("Reference:"); + System.out.println(" label = '" + label + "'"); + System.out.println(" url = '" + url + "'"); + System.out.println(" title = '" + title + "'");*/ + Reference reference = new Reference(Reference.normalizeLabel(label), url, title); + if(!referenceMap.contains(reference.label)) + referenceMap.put(reference.label, reference); + + if(offset == input.length()) { + node.remove(); + return; + } + else + input.delete(0, offset); + } + } + + private static int spnl(StringBuilder input, int offset) { + boolean seenWhitespace = false; + while(offset < input.length()) { + char c = input.charAt(offset); + if(c == ' ') + ++offset; + else if(c == '\n') { + if(seenWhitespace) + return offset; + else { + seenWhitespace = true; + ++offset; + } + } + else + return offset; + } + return offset; + } + +}