]> gerrit.simantics Code Review - simantics/platform.git/blobdiff - bundles/org.simantics.scl.compiler/src/org/simantics/scl/compiler/markdown/internal/MarkdownParser.java
Migrated source code from Simantics SVN
[simantics/platform.git] / bundles / org.simantics.scl.compiler / src / org / simantics / scl / compiler / markdown / internal / MarkdownParser.java
diff --git a/bundles/org.simantics.scl.compiler/src/org/simantics/scl/compiler/markdown/internal/MarkdownParser.java b/bundles/org.simantics.scl.compiler/src/org/simantics/scl/compiler/markdown/internal/MarkdownParser.java
new file mode 100644 (file)
index 0000000..7d30856
--- /dev/null
@@ -0,0 +1,627 @@
+package org.simantics.scl.compiler.markdown.internal;
+
+import gnu.trove.map.hash.THashMap;
+
+import java.io.IOException;
+import java.io.Reader;
+import java.io.StringReader;
+
+import org.simantics.scl.compiler.markdown.inlines.Subject;
+import org.simantics.scl.compiler.markdown.nodes.BlockQuoteNode;
+import org.simantics.scl.compiler.markdown.nodes.CodeBlockNode;
+import org.simantics.scl.compiler.markdown.nodes.DocumentNode;
+import org.simantics.scl.compiler.markdown.nodes.ExtensionBlockNode;
+import org.simantics.scl.compiler.markdown.nodes.HeaderNode;
+import org.simantics.scl.compiler.markdown.nodes.HorizontalRuleNode;
+import org.simantics.scl.compiler.markdown.nodes.HtmlNode;
+import org.simantics.scl.compiler.markdown.nodes.ItemNode;
+import org.simantics.scl.compiler.markdown.nodes.ListNode;
+import org.simantics.scl.compiler.markdown.nodes.Node;
+import org.simantics.scl.compiler.markdown.nodes.ParagraphNode;
+import org.simantics.scl.compiler.markdown.nodes.Reference;
+
+public class MarkdownParser {
+    public static final boolean DEBUG = false;
+    public static final int CODE_INDENT = 4;
+    
+    private DocumentNode root = new DocumentNode();
+    private Node current = root;
+    
+    private StringBuilder detabBuffer = new StringBuilder();
+    private Scanner scanner = new Scanner();
+    private int lineNumber = 0;
+    
+    private THashMap<String, Reference> referenceMap = new THashMap<String, Reference>();
+    
+    public DocumentNode parseDocument(Reader reader) throws IOException {
+        StringBuilder lineBuffer = new StringBuilder();
+        char secondNL = 0;
+        while(true) {
+            int c = reader.read();
+            if(c == -1) {
+                processLine(lineBuffer);
+                break;
+            }
+            else if(c == '\n' || c == '\r') {
+                if(lineBuffer.length() == 0 && c == secondNL)
+                    secondNL = 0;
+                else {
+                    processLine(lineBuffer);
+                    lineBuffer.delete(0, lineBuffer.length());
+                    secondNL = c == '\n' ? '\r' : '\n';
+                }
+            }
+            else
+                lineBuffer.append((char)c);
+        }
+        while(current != null)
+            current = finalize(current);
+        processInlines(root);
+        return root;
+    }
+    
+    public DocumentNode parseDocument(String text) {
+        try {
+            return parseDocument(new StringReader(text));
+        } catch (IOException e) {
+            // Should not be possible
+            throw new RuntimeException(e);
+        }
+    }
+
+    private void processInlines(Node node) {
+        for(Node child = node.firstChild; child != null; child = child.next)
+            processInlines(child);
+        if(node instanceof ParagraphNode || node instanceof HeaderNode)
+            Subject.parseInlines(referenceMap, node);
+    }
+
+    private void processLine(StringBuilder line) {
+        ++lineNumber;
+        line = detab(line);
+        if(DEBUG)
+            System.out.println("processLine(" + line + ")");
+        line.append('\n'); // Easier to detect eol
+        
+        Node container = root;
+        
+        int offset = 0; 
+        boolean blank = false;        
+        boolean allMatched = true;
+        while(container.lastChild != null && container.lastChild.open) {
+            container = container.lastChild;
+            
+            int firstNonspace = offset;
+            char c;
+            while((c=line.charAt(firstNonspace)) == ' ')
+                ++firstNonspace;
+            
+            int indent = firstNonspace - offset;
+            blank = c == '\n';
+            
+            if(container instanceof BlockQuoteNode) {
+                if(indent <= 3 && c == '>') {
+                    offset = firstNonspace + 1;
+                    if(line.charAt(offset) == ' ')
+                        ++offset;
+                }
+                else
+                    allMatched = false;
+            }
+            else if(container instanceof ItemNode) {
+                ItemNode item = (ItemNode)container;
+                if(indent >= item.indentation) {
+                    offset += item.indentation;
+                }
+                else if(blank)
+                    offset = firstNonspace;
+                else
+                    allMatched = false;
+            }
+            else if(container instanceof CodeBlockNode) {
+                CodeBlockNode codeBlock = (CodeBlockNode)container;
+                if(!codeBlock.fenced) {
+                    if(indent >= CODE_INDENT)
+                        offset += CODE_INDENT;
+                    else if(blank)
+                        offset = firstNonspace;
+                    else
+                        allMatched = false;
+                }
+                else {
+                    if(indent <= 3 &&
+                            Scanner.isCloseCodeFence(line, firstNonspace,
+                                codeBlock.fenceChar, codeBlock.fenceLength)) {
+                        current = finalize(container);
+                        return;
+                    }
+                    else {
+                        int i = codeBlock.fenceOffset;
+                        while(i > 0 && line.charAt(offset) == ' ') {
+                            ++offset;
+                            --i;
+                        }
+                    }
+                }
+            }
+            else if(container instanceof HeaderNode) {
+                allMatched = false;
+            }
+            else if(container instanceof HtmlNode) {
+                if(blank)
+                    allMatched = false;
+            }
+            else if(container instanceof ParagraphNode) {
+                if(blank)
+                    allMatched = false;
+            }
+            
+            if(!allMatched) {
+                container = container.parent;
+                break;
+            }
+        }
+        
+        Node lastMatchedContainer = container;
+        if(DEBUG)
+            System.out.println("    lastMatchedContainer = " + lastMatchedContainer.getClass().getSimpleName() + "@" + lastMatchedContainer.hashCode());
+        
+        if(blank && container.lastLineBlank) {
+            //System.out.println("    DOUBLE BREAK " + container.getClass().getSimpleName() + "@" + container.hashCode());
+            Node b = root;
+            while(b != null && !(b instanceof ListNode))
+                b = b.lastChild;
+            
+            if(b != null) {
+                while(container != null && container != b)
+                    container = finalize(container);
+                finalize(b);
+                container = b.parent;
+            }
+        }
+        
+        boolean maybeLazy = current instanceof ParagraphNode;
+        while(!(container instanceof CodeBlockNode) && !(container instanceof HtmlNode)) {
+            int firstNonspace = offset;
+            char c;
+            while((c=line.charAt(firstNonspace)) == ' ')
+                ++firstNonspace;
+            
+            int indent = firstNonspace - offset;
+            blank = c == '\n';
+            
+            if(indent >= CODE_INDENT) {
+                if(!maybeLazy && !blank) {
+                    offset += 4;
+                    container = addChild(container, new CodeBlockNode());
+                }
+                else
+                    break;
+            }
+            else if(c == '>') {
+                offset = firstNonspace + 1;
+                if(line.charAt(offset) == ' ')
+                    ++offset;
+                container = addChild(container, new BlockQuoteNode());
+            }
+            else if(c == '#' && scanner.isAtxHeaderStart(line, firstNonspace)) {
+                offset = firstNonspace + scanner.matched;
+                container = addChild(container, new HeaderNode(scanner.level, false));
+            }
+            else if((c == '`' || c == '~') && scanner.isOpenCodeFence(line, firstNonspace, c)) {
+                container = addChild(container, new CodeBlockNode(c, scanner.level, firstNonspace - offset));
+                offset = firstNonspace + scanner.matched;
+            }
+            else if(Scanner.isHtmlBlockTag(line, firstNonspace)) {
+                container = addChild(container, new HtmlNode());
+            }
+            else if((c == '=' || c == '-') 
+                    && container instanceof ParagraphNode
+                    && Scanner.isSetextHeaderLine(line, firstNonspace, c)
+                    && container.stringContent.indexOf("\n") == -1
+                    ) {
+                HeaderNode header = new HeaderNode(c == '=' ? 1 : 2, true);
+                header.lineNumber = container.lineNumber;
+                if(DEBUG)
+                    System.out.println("    Replace ParagraphNode@" + System.identityHashCode(container) + " with HeaderNode@" + System.identityHashCode(header));
+                header.stringContent = container.stringContent;
+                header.parent = container.parent;
+                header.prev = container.prev;
+                if(header.prev != null)
+                    header.prev.next = header;
+                if(header.parent.lastChild != null)
+                    header.parent.lastChild = header;
+                if(header.parent.firstChild == container)
+                    header.parent.firstChild = header;
+                container = header;
+                if(current == container)
+                    current = header;
+                offset = line.length()-1;
+            }
+            else if(!(container instanceof ParagraphNode && !allMatched)
+                    && (c == '*' || c == '_' || c == '-')
+                    && Scanner.isHRule(line, firstNonspace, c)) {
+                container = addChild(container, new HorizontalRuleNode());
+                container = finalize(container);
+                offset = line.length()-1;
+            }
+            else if((c == '*' || c == '+' || c == '-') && 
+                    (line.charAt(firstNonspace+1) == ' ' || line.charAt(firstNonspace+1) == '\n')) {
+                int originalOffset = offset;
+                offset = firstNonspace + 1;
+                int i = 0;
+                char c2 = 0;
+                while(i <= 5 && (c2 = line.charAt(offset+i)) == ' ')
+                    ++i;
+                if(i >= 5|| i < 1 || c2 == '\n') {
+                    if(i > 0)
+                        ++offset;
+                }
+                else {
+                    offset += i;
+                }
+                
+                if(!(container instanceof ListNode) || 
+                        !((ListNode)container).isCompatible(c)) {
+                    container = addChild(container, new ListNode(c));
+                }
+                
+                if(DEBUG) {
+                    System.out.println("    indentation = " + (offset - originalOffset + (i == 0 ? 1 : 0)));
+                }
+                container = addChild(container, new ItemNode(offset - originalOffset + (i == 0 ? 1 : 0)));
+            }
+            else if(Character.isDigit(c) && scanner.isListMarker(line, firstNonspace)) {
+                int originalOffset = offset;
+                offset = firstNonspace + scanner.matched;
+                int i = 0;
+                char c2 = 0;
+                while(i <= 5 && (c2 = line.charAt(offset+i)) == ' ')
+                    ++i;
+                if(i >= 5|| i < 1 || c2 == '\n') {
+                    if(i > 0)
+                        ++offset;
+                }
+                else {
+                    offset += i;
+                }
+                
+                if(!(container instanceof ListNode) || 
+                        !((ListNode)container).isCompatible(scanner.bulletChar)) {
+                    container = addChild(container, new ListNode(scanner.bulletChar, scanner.level));
+                }
+                
+                if(DEBUG) {
+                    System.out.println("    indentation = " + (offset - originalOffset + (i == 0 ? 1 : 0)));
+                }
+                container = addChild(container, new ItemNode(offset - originalOffset + (i == 0 ? 1 : 0)));
+            }
+            else if(c == ':' && line.charAt(firstNonspace+1) == ':') {
+                int p=firstNonspace+2;
+                while(Character.isAlphabetic(c=line.charAt(p)) || Character.isDigit(c) || c == ' ' || c=='_')
+                    ++p;
+                if(c != '[')
+                    break;
+                int bracketBegin = p;
+                ++p;
+                while(true) {
+                    c = line.charAt(p++);
+                    if(c == ']') {
+                        break;
+                    }
+                    else if(c == '\\' && ((c=line.charAt(p+1)) == '\\' || c == ']')) {
+                        ++p;
+                    }
+                    else if(c == '\n')
+                        break;
+                }
+                if(c == ']') {
+                    offset = p;
+                    container = addChild(container, new ExtensionBlockNode(
+                            line.substring(firstNonspace+2, bracketBegin).trim(),
+                            line.substring(bracketBegin+1, p-1).trim()));
+                }
+                else
+                    break;
+            }
+            else
+                break;
+            
+            if(container.acceptLines())
+                break;
+            maybeLazy = false;
+        }
+        
+        int firstNonspace = offset;
+        char c;
+        while((c=line.charAt(firstNonspace)) == ' ')
+            ++firstNonspace;
+        
+        blank = c == '\n';
+        
+        if(blank) {
+            if(container.lastChild != null)
+                container.lastChild.setLastLineBlank(true);
+            container.setLastLineBlank(
+                    !(container instanceof BlockQuoteNode) &&
+                    !(container instanceof HeaderNode) &&
+                    !(container instanceof CodeBlockNode && ((CodeBlockNode)container).fenced) &&
+                    !(container instanceof ItemNode &&
+                            container.firstChild == null &&
+                            container.lineNumber == lineNumber));
+        }
+        else
+            container.setLastLineBlank(false);
+        for(Node cont = container;cont.parent != null;
+                cont = cont.parent, cont.setLastLineBlank(false));
+        
+        if(DEBUG) {
+            System.out.println("    current = " + current.getClass().getSimpleName() + "@" + current.hashCode());
+            System.out.println("    container = " + container.getClass().getSimpleName() + "@" + container.hashCode());
+        }
+        if(current != lastMatchedContainer &&
+                container == lastMatchedContainer &&
+                !blank &&
+                current instanceof ParagraphNode &&
+                current.stringContent != null) {
+            addLine(current, line, offset);
+        }
+        else {
+            while(current != lastMatchedContainer)
+                current = finalize(current);
+            
+            if(container instanceof CodeBlockNode ||
+                    container instanceof HtmlNode)
+                addLine(container, line, offset);
+            else if(blank)
+                ; // do nothing
+            else if(container.acceptLines()) {
+                if(container instanceof HeaderNode &&
+                        !((HeaderNode)container).setext)
+                    chopTrailingHashtags(line, firstNonspace);
+                addLine(container, line, firstNonspace);
+            }
+            else {
+                container = addChild(container, new ParagraphNode());
+                addLine(container, line, firstNonspace);
+            }
+            
+            current = container;
+        }
+    }
+    
+    private void chopTrailingHashtags(StringBuilder line, int firstNonspace) {
+        //System.out.println("chopTrailingHashtags("+line.substring(firstNonspace)+")");
+        int pos = line.length()-1;
+        char c=0;
+        while(pos >= 0 && ((c=line.charAt(pos)) == ' ' || c == '\n'))
+            --pos;
+        line.delete(pos+1, line.length());
+        if(c == '#') {
+            --pos;
+            while(pos >= 0 && (c=line.charAt(pos)) == '#')
+                --pos;
+            if(c != ' ')
+                return;
+            --pos;
+            while(pos >= 0 && line.charAt(pos) == ' ')
+                --pos;
+            ++pos;
+            if(pos < firstNonspace)
+                pos = firstNonspace;
+            line.delete(pos, line.length());
+        }
+    }
+
+    private void addLine(Node container, StringBuilder line, int offset) {
+        if(container.stringContent == null)
+            container.stringContent = new StringBuilder();
+        else
+            container.stringContent.append('\n');
+        int length = line.length();
+        if(length > 0 && line.charAt(length-1) == '\n')
+            --length;
+        if(DEBUG)
+            System.out.println("    addLine(" + container.getClass().getSimpleName() + "@" + container.hashCode() + ", \"" + line.substring(offset, length) + "\")");
+        container.stringContent.append(line, offset, length);
+    }
+
+    private StringBuilder detab(StringBuilder str) {
+        int length = str.length();
+        for(int i=0;i<length;++i) {
+            if(str.charAt(i) == '\t') {
+                detabBuffer.delete(0, detabBuffer.length());
+                detabBuffer.append(str, 0, i);
+                for(;i<length;++i) {
+                    char c = str.charAt(i);
+                    if(c == '\t') {
+                        int spaces = 4 - detabBuffer.length()%4;
+                        while(spaces-- > 0)
+                            detabBuffer.append(' ');
+                    }
+                    else
+                        detabBuffer.append(c);
+                }
+                return detabBuffer;
+            }
+        }
+        return str;
+    }
+    
+    private Node addChild(Node parent, Node child) {
+        child.lineNumber = lineNumber;
+        if(DEBUG)
+            System.out.println("    addChild(" + parent.getClass().getSimpleName() + "@" + parent.hashCode() + ", " + 
+                    child.getClass().getSimpleName() + "@" + child.hashCode() + ")");
+        while(!parent.canContain(child))
+            parent = finalize(parent);
+        parent.addChild(child);
+        return child;
+    }
+
+    private Node finalize(Node node) {
+        node.open = false;
+        if(node instanceof ParagraphNode) {
+            parseReferenceInline(node);
+        }
+        else if(node instanceof HeaderNode) {
+            if(node.stringContent == null)
+                node.stringContent = new StringBuilder(0);
+        }
+        else if(node instanceof CodeBlockNode) {
+            CodeBlockNode codeBlock = (CodeBlockNode)node;
+            if(codeBlock.fenced) {
+                int firstLineLength = codeBlock.stringContent.indexOf("\n");
+                String infoString;
+                if(firstLineLength == -1) {
+                    infoString = codeBlock.stringContent.toString().trim();
+                    codeBlock.stringContent = new StringBuilder(0);
+                }
+                else {
+                    infoString = codeBlock.stringContent.substring(0, firstLineLength).trim();
+                    codeBlock.stringContent.delete(0, firstLineLength+1);
+                }
+                codeBlock.infoString = Reference.cleanUrl(infoString);
+            }
+            else {
+                removeTrailingBlankLines(codeBlock.stringContent);
+            }
+        }
+        else if(node instanceof ListNode) {
+            ListNode list = (ListNode)node;
+            list.tight = true;
+            itemLoop: for(Node item=list.firstChild;item != null;item = item.next) {
+                if(item.lastLineBlank && item.next != null) {
+                    list.tight = false;
+                    break;
+                }
+                for(Node child=item.firstChild;child != null;child = child.next)
+                    if(endsWithBlankLine(child) && (child.next != null || item.next != null)) {
+                        list.tight = false;
+                        break itemLoop;
+                    }
+            }
+        }
+        return node.parent;
+    }
+
+    private static boolean endsWithBlankLine(Node node) {
+        while(true) {
+            if(node.lastLineBlank)
+                return true;
+            node = node.lastChild;
+            if(!(node instanceof ListNode) && !(node instanceof ItemNode))
+                return false;
+        }
+    }
+
+    private static void removeTrailingBlankLines(StringBuilder str) {
+        int endPos = str.length();
+        int pos = endPos-1;
+        while(pos >= 0) {
+            char c = str.charAt(pos);
+            if(c == '\n')
+                endPos = pos;
+            else if(c != ' ')
+                break;
+            --pos;
+        }
+        if(endPos < str.length())
+            str.delete(endPos, str.length());
+    }
+    
+    private void parseReferenceInline(Node node) {
+        StringBuilder input = node.stringContent;
+        
+        while(true) {
+            int offset = 0;
+            if(offset == input.length() || input.charAt(offset) != '[')
+                return;
+            
+            // Label
+            offset = Scanner.scanLinkLabel(input, offset);
+            if(offset == -1 || offset == input.length() 
+                    || input.charAt(offset) != ':')
+                return;
+            String label = input.substring(1, offset-1);
+            ++offset;
+            
+            // Url
+            offset = spnl(input, offset);
+            int linkStart = offset;
+            offset = Scanner.scanLinkUrl(input, offset);
+            if(offset == -1 || offset == linkStart)
+                return;
+            String url;
+            if(linkStart < input.length() && input.charAt(linkStart) == '<')
+                url = input.substring(linkStart+1, offset-1);
+            else
+                url = input.substring(linkStart, offset);
+            url = Reference.cleanUrl(url);
+            
+            // Title
+            int linkUrlEnd = offset;
+            offset = spnl(input, offset);
+            int titleStart = offset;
+            offset = Scanner.scanLinkTitle(input, offset);
+            String title;
+            if(offset == -1) {
+                offset = linkUrlEnd;
+                char c = 0;
+                while(offset < input.length() && (c = input.charAt(offset)) == ' ')
+                    ++offset;
+                if(c == '\n')
+                    ++offset;
+                else if(offset != input.length())
+                    return;
+                title = "";
+            }
+            else {
+                title = input.substring(titleStart+1, offset-1);
+                title = Reference.cleanTitle(title);
+                char c = 0;
+                while(offset < input.length() && (c = input.charAt(offset)) == ' ')
+                    ++offset;
+                if(c == '\n')
+                    ++offset;
+                else if(offset != input.length())
+                    return;
+            }
+            /*System.out.println("Reference:");
+            System.out.println("    label = '" + label + "'");
+            System.out.println("    url = '" + url + "'");
+            System.out.println("    title = '" + title + "'");*/
+            Reference reference = new Reference(Reference.normalizeLabel(label), url, title);
+            if(!referenceMap.contains(reference.label))
+                referenceMap.put(reference.label, reference);
+            
+            if(offset == input.length()) {
+                node.remove();
+                return;
+            }
+            else
+                input.delete(0, offset);
+        }        
+    }
+    
+    private static int spnl(StringBuilder input, int offset) {
+        boolean seenWhitespace = false;
+        while(offset < input.length()) {
+            char c = input.charAt(offset);
+            if(c == ' ')
+                ++offset;
+            else if(c == '\n') {
+                if(seenWhitespace)
+                    return offset;
+                else {
+                    seenWhitespace = true;
+                    ++offset;
+                }
+            }
+            else
+                return offset;
+        }
+        return offset;
+    }
+
+}