--- /dev/null
+package org.simantics.scl.compiler.markdown.internal;
+
+import gnu.trove.map.hash.THashMap;
+
+import java.io.IOException;
+import java.io.Reader;
+import java.io.StringReader;
+
+import org.simantics.scl.compiler.markdown.inlines.Subject;
+import org.simantics.scl.compiler.markdown.nodes.BlockQuoteNode;
+import org.simantics.scl.compiler.markdown.nodes.CodeBlockNode;
+import org.simantics.scl.compiler.markdown.nodes.DocumentNode;
+import org.simantics.scl.compiler.markdown.nodes.ExtensionBlockNode;
+import org.simantics.scl.compiler.markdown.nodes.HeaderNode;
+import org.simantics.scl.compiler.markdown.nodes.HorizontalRuleNode;
+import org.simantics.scl.compiler.markdown.nodes.HtmlNode;
+import org.simantics.scl.compiler.markdown.nodes.ItemNode;
+import org.simantics.scl.compiler.markdown.nodes.ListNode;
+import org.simantics.scl.compiler.markdown.nodes.Node;
+import org.simantics.scl.compiler.markdown.nodes.ParagraphNode;
+import org.simantics.scl.compiler.markdown.nodes.Reference;
+
+public class MarkdownParser {
+ public static final boolean DEBUG = false;
+ public static final int CODE_INDENT = 4;
+
+ private DocumentNode root = new DocumentNode();
+ private Node current = root;
+
+ private StringBuilder detabBuffer = new StringBuilder();
+ private Scanner scanner = new Scanner();
+ private int lineNumber = 0;
+
+ private THashMap<String, Reference> referenceMap = new THashMap<String, Reference>();
+
+ public DocumentNode parseDocument(Reader reader) throws IOException {
+ StringBuilder lineBuffer = new StringBuilder();
+ char secondNL = 0;
+ while(true) {
+ int c = reader.read();
+ if(c == -1) {
+ processLine(lineBuffer);
+ break;
+ }
+ else if(c == '\n' || c == '\r') {
+ if(lineBuffer.length() == 0 && c == secondNL)
+ secondNL = 0;
+ else {
+ processLine(lineBuffer);
+ lineBuffer.delete(0, lineBuffer.length());
+ secondNL = c == '\n' ? '\r' : '\n';
+ }
+ }
+ else
+ lineBuffer.append((char)c);
+ }
+ while(current != null)
+ current = finalize(current);
+ processInlines(root);
+ return root;
+ }
+
+ public DocumentNode parseDocument(String text) {
+ try {
+ return parseDocument(new StringReader(text));
+ } catch (IOException e) {
+ // Should not be possible
+ throw new RuntimeException(e);
+ }
+ }
+
+ private void processInlines(Node node) {
+ for(Node child = node.firstChild; child != null; child = child.next)
+ processInlines(child);
+ if(node instanceof ParagraphNode || node instanceof HeaderNode)
+ Subject.parseInlines(referenceMap, node);
+ }
+
+ private void processLine(StringBuilder line) {
+ ++lineNumber;
+ line = detab(line);
+ if(DEBUG)
+ System.out.println("processLine(" + line + ")");
+ line.append('\n'); // Easier to detect eol
+
+ Node container = root;
+
+ int offset = 0;
+ boolean blank = false;
+ boolean allMatched = true;
+ while(container.lastChild != null && container.lastChild.open) {
+ container = container.lastChild;
+
+ int firstNonspace = offset;
+ char c;
+ while((c=line.charAt(firstNonspace)) == ' ')
+ ++firstNonspace;
+
+ int indent = firstNonspace - offset;
+ blank = c == '\n';
+
+ if(container instanceof BlockQuoteNode) {
+ if(indent <= 3 && c == '>') {
+ offset = firstNonspace + 1;
+ if(line.charAt(offset) == ' ')
+ ++offset;
+ }
+ else
+ allMatched = false;
+ }
+ else if(container instanceof ItemNode) {
+ ItemNode item = (ItemNode)container;
+ if(indent >= item.indentation) {
+ offset += item.indentation;
+ }
+ else if(blank)
+ offset = firstNonspace;
+ else
+ allMatched = false;
+ }
+ else if(container instanceof CodeBlockNode) {
+ CodeBlockNode codeBlock = (CodeBlockNode)container;
+ if(!codeBlock.fenced) {
+ if(indent >= CODE_INDENT)
+ offset += CODE_INDENT;
+ else if(blank)
+ offset = firstNonspace;
+ else
+ allMatched = false;
+ }
+ else {
+ if(indent <= 3 &&
+ Scanner.isCloseCodeFence(line, firstNonspace,
+ codeBlock.fenceChar, codeBlock.fenceLength)) {
+ current = finalize(container);
+ return;
+ }
+ else {
+ int i = codeBlock.fenceOffset;
+ while(i > 0 && line.charAt(offset) == ' ') {
+ ++offset;
+ --i;
+ }
+ }
+ }
+ }
+ else if(container instanceof HeaderNode) {
+ allMatched = false;
+ }
+ else if(container instanceof HtmlNode) {
+ if(blank)
+ allMatched = false;
+ }
+ else if(container instanceof ParagraphNode) {
+ if(blank)
+ allMatched = false;
+ }
+
+ if(!allMatched) {
+ container = container.parent;
+ break;
+ }
+ }
+
+ Node lastMatchedContainer = container;
+ if(DEBUG)
+ System.out.println(" lastMatchedContainer = " + lastMatchedContainer.getClass().getSimpleName() + "@" + lastMatchedContainer.hashCode());
+
+ if(blank && container.lastLineBlank) {
+ //System.out.println(" DOUBLE BREAK " + container.getClass().getSimpleName() + "@" + container.hashCode());
+ Node b = root;
+ while(b != null && !(b instanceof ListNode))
+ b = b.lastChild;
+
+ if(b != null) {
+ while(container != null && container != b)
+ container = finalize(container);
+ finalize(b);
+ container = b.parent;
+ }
+ }
+
+ boolean maybeLazy = current instanceof ParagraphNode;
+ while(!(container instanceof CodeBlockNode) && !(container instanceof HtmlNode)) {
+ int firstNonspace = offset;
+ char c;
+ while((c=line.charAt(firstNonspace)) == ' ')
+ ++firstNonspace;
+
+ int indent = firstNonspace - offset;
+ blank = c == '\n';
+
+ if(indent >= CODE_INDENT) {
+ if(!maybeLazy && !blank) {
+ offset += 4;
+ container = addChild(container, new CodeBlockNode());
+ }
+ else
+ break;
+ }
+ else if(c == '>') {
+ offset = firstNonspace + 1;
+ if(line.charAt(offset) == ' ')
+ ++offset;
+ container = addChild(container, new BlockQuoteNode());
+ }
+ else if(c == '#' && scanner.isAtxHeaderStart(line, firstNonspace)) {
+ offset = firstNonspace + scanner.matched;
+ container = addChild(container, new HeaderNode(scanner.level, false));
+ }
+ else if((c == '`' || c == '~') && scanner.isOpenCodeFence(line, firstNonspace, c)) {
+ container = addChild(container, new CodeBlockNode(c, scanner.level, firstNonspace - offset));
+ offset = firstNonspace + scanner.matched;
+ }
+ else if(Scanner.isHtmlBlockTag(line, firstNonspace)) {
+ container = addChild(container, new HtmlNode());
+ }
+ else if((c == '=' || c == '-')
+ && container instanceof ParagraphNode
+ && Scanner.isSetextHeaderLine(line, firstNonspace, c)
+ && container.stringContent.indexOf("\n") == -1
+ ) {
+ HeaderNode header = new HeaderNode(c == '=' ? 1 : 2, true);
+ header.lineNumber = container.lineNumber;
+ if(DEBUG)
+ System.out.println(" Replace ParagraphNode@" + System.identityHashCode(container) + " with HeaderNode@" + System.identityHashCode(header));
+ header.stringContent = container.stringContent;
+ header.parent = container.parent;
+ header.prev = container.prev;
+ if(header.prev != null)
+ header.prev.next = header;
+ if(header.parent.lastChild != null)
+ header.parent.lastChild = header;
+ if(header.parent.firstChild == container)
+ header.parent.firstChild = header;
+ container = header;
+ if(current == container)
+ current = header;
+ offset = line.length()-1;
+ }
+ else if(!(container instanceof ParagraphNode && !allMatched)
+ && (c == '*' || c == '_' || c == '-')
+ && Scanner.isHRule(line, firstNonspace, c)) {
+ container = addChild(container, new HorizontalRuleNode());
+ container = finalize(container);
+ offset = line.length()-1;
+ }
+ else if((c == '*' || c == '+' || c == '-') &&
+ (line.charAt(firstNonspace+1) == ' ' || line.charAt(firstNonspace+1) == '\n')) {
+ int originalOffset = offset;
+ offset = firstNonspace + 1;
+ int i = 0;
+ char c2 = 0;
+ while(i <= 5 && (c2 = line.charAt(offset+i)) == ' ')
+ ++i;
+ if(i >= 5|| i < 1 || c2 == '\n') {
+ if(i > 0)
+ ++offset;
+ }
+ else {
+ offset += i;
+ }
+
+ if(!(container instanceof ListNode) ||
+ !((ListNode)container).isCompatible(c)) {
+ container = addChild(container, new ListNode(c));
+ }
+
+ if(DEBUG) {
+ System.out.println(" indentation = " + (offset - originalOffset + (i == 0 ? 1 : 0)));
+ }
+ container = addChild(container, new ItemNode(offset - originalOffset + (i == 0 ? 1 : 0)));
+ }
+ else if(Character.isDigit(c) && scanner.isListMarker(line, firstNonspace)) {
+ int originalOffset = offset;
+ offset = firstNonspace + scanner.matched;
+ int i = 0;
+ char c2 = 0;
+ while(i <= 5 && (c2 = line.charAt(offset+i)) == ' ')
+ ++i;
+ if(i >= 5|| i < 1 || c2 == '\n') {
+ if(i > 0)
+ ++offset;
+ }
+ else {
+ offset += i;
+ }
+
+ if(!(container instanceof ListNode) ||
+ !((ListNode)container).isCompatible(scanner.bulletChar)) {
+ container = addChild(container, new ListNode(scanner.bulletChar, scanner.level));
+ }
+
+ if(DEBUG) {
+ System.out.println(" indentation = " + (offset - originalOffset + (i == 0 ? 1 : 0)));
+ }
+ container = addChild(container, new ItemNode(offset - originalOffset + (i == 0 ? 1 : 0)));
+ }
+ else if(c == ':' && line.charAt(firstNonspace+1) == ':') {
+ int p=firstNonspace+2;
+ while(Character.isAlphabetic(c=line.charAt(p)) || Character.isDigit(c) || c == ' ' || c=='_')
+ ++p;
+ if(c != '[')
+ break;
+ int bracketBegin = p;
+ ++p;
+ while(true) {
+ c = line.charAt(p++);
+ if(c == ']') {
+ break;
+ }
+ else if(c == '\\' && ((c=line.charAt(p+1)) == '\\' || c == ']')) {
+ ++p;
+ }
+ else if(c == '\n')
+ break;
+ }
+ if(c == ']') {
+ offset = p;
+ container = addChild(container, new ExtensionBlockNode(
+ line.substring(firstNonspace+2, bracketBegin).trim(),
+ line.substring(bracketBegin+1, p-1).trim()));
+ }
+ else
+ break;
+ }
+ else
+ break;
+
+ if(container.acceptLines())
+ break;
+ maybeLazy = false;
+ }
+
+ int firstNonspace = offset;
+ char c;
+ while((c=line.charAt(firstNonspace)) == ' ')
+ ++firstNonspace;
+
+ blank = c == '\n';
+
+ if(blank) {
+ if(container.lastChild != null)
+ container.lastChild.setLastLineBlank(true);
+ container.setLastLineBlank(
+ !(container instanceof BlockQuoteNode) &&
+ !(container instanceof HeaderNode) &&
+ !(container instanceof CodeBlockNode && ((CodeBlockNode)container).fenced) &&
+ !(container instanceof ItemNode &&
+ container.firstChild == null &&
+ container.lineNumber == lineNumber));
+ }
+ else
+ container.setLastLineBlank(false);
+ for(Node cont = container;cont.parent != null;
+ cont = cont.parent, cont.setLastLineBlank(false));
+
+ if(DEBUG) {
+ System.out.println(" current = " + current.getClass().getSimpleName() + "@" + current.hashCode());
+ System.out.println(" container = " + container.getClass().getSimpleName() + "@" + container.hashCode());
+ }
+ if(current != lastMatchedContainer &&
+ container == lastMatchedContainer &&
+ !blank &&
+ current instanceof ParagraphNode &&
+ current.stringContent != null) {
+ addLine(current, line, offset);
+ }
+ else {
+ while(current != lastMatchedContainer)
+ current = finalize(current);
+
+ if(container instanceof CodeBlockNode ||
+ container instanceof HtmlNode)
+ addLine(container, line, offset);
+ else if(blank)
+ ; // do nothing
+ else if(container.acceptLines()) {
+ if(container instanceof HeaderNode &&
+ !((HeaderNode)container).setext)
+ chopTrailingHashtags(line, firstNonspace);
+ addLine(container, line, firstNonspace);
+ }
+ else {
+ container = addChild(container, new ParagraphNode());
+ addLine(container, line, firstNonspace);
+ }
+
+ current = container;
+ }
+ }
+
+ private void chopTrailingHashtags(StringBuilder line, int firstNonspace) {
+ //System.out.println("chopTrailingHashtags("+line.substring(firstNonspace)+")");
+ int pos = line.length()-1;
+ char c=0;
+ while(pos >= 0 && ((c=line.charAt(pos)) == ' ' || c == '\n'))
+ --pos;
+ line.delete(pos+1, line.length());
+ if(c == '#') {
+ --pos;
+ while(pos >= 0 && (c=line.charAt(pos)) == '#')
+ --pos;
+ if(c != ' ')
+ return;
+ --pos;
+ while(pos >= 0 && line.charAt(pos) == ' ')
+ --pos;
+ ++pos;
+ if(pos < firstNonspace)
+ pos = firstNonspace;
+ line.delete(pos, line.length());
+ }
+ }
+
+ private void addLine(Node container, StringBuilder line, int offset) {
+ if(container.stringContent == null)
+ container.stringContent = new StringBuilder();
+ else
+ container.stringContent.append('\n');
+ int length = line.length();
+ if(length > 0 && line.charAt(length-1) == '\n')
+ --length;
+ if(DEBUG)
+ System.out.println(" addLine(" + container.getClass().getSimpleName() + "@" + container.hashCode() + ", \"" + line.substring(offset, length) + "\")");
+ container.stringContent.append(line, offset, length);
+ }
+
+ private StringBuilder detab(StringBuilder str) {
+ int length = str.length();
+ for(int i=0;i<length;++i) {
+ if(str.charAt(i) == '\t') {
+ detabBuffer.delete(0, detabBuffer.length());
+ detabBuffer.append(str, 0, i);
+ for(;i<length;++i) {
+ char c = str.charAt(i);
+ if(c == '\t') {
+ int spaces = 4 - detabBuffer.length()%4;
+ while(spaces-- > 0)
+ detabBuffer.append(' ');
+ }
+ else
+ detabBuffer.append(c);
+ }
+ return detabBuffer;
+ }
+ }
+ return str;
+ }
+
+ private Node addChild(Node parent, Node child) {
+ child.lineNumber = lineNumber;
+ if(DEBUG)
+ System.out.println(" addChild(" + parent.getClass().getSimpleName() + "@" + parent.hashCode() + ", " +
+ child.getClass().getSimpleName() + "@" + child.hashCode() + ")");
+ while(!parent.canContain(child))
+ parent = finalize(parent);
+ parent.addChild(child);
+ return child;
+ }
+
+ private Node finalize(Node node) {
+ node.open = false;
+ if(node instanceof ParagraphNode) {
+ parseReferenceInline(node);
+ }
+ else if(node instanceof HeaderNode) {
+ if(node.stringContent == null)
+ node.stringContent = new StringBuilder(0);
+ }
+ else if(node instanceof CodeBlockNode) {
+ CodeBlockNode codeBlock = (CodeBlockNode)node;
+ if(codeBlock.fenced) {
+ int firstLineLength = codeBlock.stringContent.indexOf("\n");
+ String infoString;
+ if(firstLineLength == -1) {
+ infoString = codeBlock.stringContent.toString().trim();
+ codeBlock.stringContent = new StringBuilder(0);
+ }
+ else {
+ infoString = codeBlock.stringContent.substring(0, firstLineLength).trim();
+ codeBlock.stringContent.delete(0, firstLineLength+1);
+ }
+ codeBlock.infoString = Reference.cleanUrl(infoString);
+ }
+ else {
+ removeTrailingBlankLines(codeBlock.stringContent);
+ }
+ }
+ else if(node instanceof ListNode) {
+ ListNode list = (ListNode)node;
+ list.tight = true;
+ itemLoop: for(Node item=list.firstChild;item != null;item = item.next) {
+ if(item.lastLineBlank && item.next != null) {
+ list.tight = false;
+ break;
+ }
+ for(Node child=item.firstChild;child != null;child = child.next)
+ if(endsWithBlankLine(child) && (child.next != null || item.next != null)) {
+ list.tight = false;
+ break itemLoop;
+ }
+ }
+ }
+ return node.parent;
+ }
+
+ private static boolean endsWithBlankLine(Node node) {
+ while(true) {
+ if(node.lastLineBlank)
+ return true;
+ node = node.lastChild;
+ if(!(node instanceof ListNode) && !(node instanceof ItemNode))
+ return false;
+ }
+ }
+
+ private static void removeTrailingBlankLines(StringBuilder str) {
+ int endPos = str.length();
+ int pos = endPos-1;
+ while(pos >= 0) {
+ char c = str.charAt(pos);
+ if(c == '\n')
+ endPos = pos;
+ else if(c != ' ')
+ break;
+ --pos;
+ }
+ if(endPos < str.length())
+ str.delete(endPos, str.length());
+ }
+
+ private void parseReferenceInline(Node node) {
+ StringBuilder input = node.stringContent;
+
+ while(true) {
+ int offset = 0;
+ if(offset == input.length() || input.charAt(offset) != '[')
+ return;
+
+ // Label
+ offset = Scanner.scanLinkLabel(input, offset);
+ if(offset == -1 || offset == input.length()
+ || input.charAt(offset) != ':')
+ return;
+ String label = input.substring(1, offset-1);
+ ++offset;
+
+ // Url
+ offset = spnl(input, offset);
+ int linkStart = offset;
+ offset = Scanner.scanLinkUrl(input, offset);
+ if(offset == -1 || offset == linkStart)
+ return;
+ String url;
+ if(linkStart < input.length() && input.charAt(linkStart) == '<')
+ url = input.substring(linkStart+1, offset-1);
+ else
+ url = input.substring(linkStart, offset);
+ url = Reference.cleanUrl(url);
+
+ // Title
+ int linkUrlEnd = offset;
+ offset = spnl(input, offset);
+ int titleStart = offset;
+ offset = Scanner.scanLinkTitle(input, offset);
+ String title;
+ if(offset == -1) {
+ offset = linkUrlEnd;
+ char c = 0;
+ while(offset < input.length() && (c = input.charAt(offset)) == ' ')
+ ++offset;
+ if(c == '\n')
+ ++offset;
+ else if(offset != input.length())
+ return;
+ title = "";
+ }
+ else {
+ title = input.substring(titleStart+1, offset-1);
+ title = Reference.cleanTitle(title);
+ char c = 0;
+ while(offset < input.length() && (c = input.charAt(offset)) == ' ')
+ ++offset;
+ if(c == '\n')
+ ++offset;
+ else if(offset != input.length())
+ return;
+ }
+ /*System.out.println("Reference:");
+ System.out.println(" label = '" + label + "'");
+ System.out.println(" url = '" + url + "'");
+ System.out.println(" title = '" + title + "'");*/
+ Reference reference = new Reference(Reference.normalizeLabel(label), url, title);
+ if(!referenceMap.contains(reference.label))
+ referenceMap.put(reference.label, reference);
+
+ if(offset == input.length()) {
+ node.remove();
+ return;
+ }
+ else
+ input.delete(0, offset);
+ }
+ }
+
+ private static int spnl(StringBuilder input, int offset) {
+ boolean seenWhitespace = false;
+ while(offset < input.length()) {
+ char c = input.charAt(offset);
+ if(c == ' ')
+ ++offset;
+ else if(c == '\n') {
+ if(seenWhitespace)
+ return offset;
+ else {
+ seenWhitespace = true;
+ ++offset;
+ }
+ }
+ else
+ return offset;
+ }
+ return offset;
+ }
+
+}