--- /dev/null
+/**\r
+ * Copyright winterwell Mathematics Ltd.\r
+ * @author Daniel Winterstein\r
+ * 11 Jan 2007\r
+ */\r
+package winterwell.markdown.pagemodel;\r
+\r
+import java.io.File;\r
+import java.util.ArrayList;\r
+import java.util.Collections;\r
+import java.util.HashMap;\r
+import java.util.List;\r
+import java.util.Map;\r
+import java.util.regex.Matcher;\r
+import java.util.regex.Pattern;\r
+\r
+import org.eclipse.jface.preference.IPreferenceStore;\r
+\r
+import winterwell.markdown.Activator;\r
+import winterwell.markdown.StringMethods;\r
+import winterwell.markdown.preferences.MarkdownPreferencePage;\r
+import winterwell.utils.FailureException;\r
+import winterwell.utils.Process;\r
+import winterwell.utils.StrUtils;\r
+import winterwell.utils.Utils;\r
+import winterwell.utils.io.FileUtils;\r
+\r
+import com.petebevin.markdown.MarkdownProcessor;\r
+\r
+/**\r
+ * Understands Markdown syntax.\r
+ * \r
+ * @author Daniel Winterstein\r
+ */\r
+public class MarkdownPage {\r
+\r
+ /**\r
+ * Strip leading and trailing #s and whitespace\r
+ * \r
+ * @param line\r
+ * @return cleaned up line\r
+ */\r
+ private String cleanHeader(String line) {\r
+ for (int j = 0; j < line.length(); j++) {\r
+ char c = line.charAt(j);\r
+ if (c != '#' && !Character.isWhitespace(c)) {\r
+ line = line.substring(j);\r
+ break;\r
+ }\r
+ }\r
+ for (int j = line.length() - 1; j > 0; j--) {\r
+ char c = line.charAt(j);\r
+ if (c != '#' && !Character.isWhitespace(c)) {\r
+ line = line.substring(0, j + 1);\r
+ break;\r
+ }\r
+ }\r
+ return line;\r
+ }\r
+\r
+ /**\r
+ * Represents information about a section header. E.g. ## Misc Warblings\r
+ * \r
+ * @author daniel\r
+ */\r
+ public class Header {\r
+ /**\r
+ * 1 = top-level (i.e. #), 2= 2nd-level (i.e. ##), etc.\r
+ */\r
+ final int level;\r
+ /**\r
+ * The text of the Header\r
+ */\r
+ final String heading;\r
+ /**\r
+ * Sub-sections, if any\r
+ */\r
+ final List<Header> subHeaders = new ArrayList<Header>();\r
+ /**\r
+ * The line on which this header occurs.\r
+ */\r
+ final int lineNumber;\r
+\r
+ public int getLineNumber() {\r
+ return lineNumber;\r
+ }\r
+\r
+ /**\r
+ * \r
+ * @return the next section (at this depth if possible), null if none\r
+ */\r
+ public Header getNext() {\r
+ if (parent == null) {\r
+ int ti = level1Headers.indexOf(this);\r
+ if (ti == -1 || ti == level1Headers.size() - 1)\r
+ return null;\r
+ return level1Headers.get(ti + 1);\r
+ }\r
+ int i = parent.subHeaders.indexOf(this);\r
+ assert i != -1 : this;\r
+ if (i == parent.subHeaders.size() - 1)\r
+ return parent.getNext();\r
+ return parent.subHeaders.get(i + 1);\r
+ }\r
+ /**\r
+ * \r
+ * @return the next section (at this depth if possible), null if none\r
+ */\r
+ public Header getPrevious() {\r
+ if (parent == null) {\r
+ int ti = level1Headers.indexOf(this);\r
+ if (ti == -1 || ti == 0)\r
+ return null;\r
+ return level1Headers.get(ti - 1);\r
+ }\r
+ int i = parent.subHeaders.indexOf(this);\r
+ assert i != -1 : this;\r
+ if (i == 0)\r
+ return parent.getPrevious();\r
+ return parent.subHeaders.get(i - 1);\r
+ }\r
+ \r
+\r
+ /**\r
+ * The parent section. Can be null.\r
+ */\r
+ private Header parent;\r
+\r
+ /**\r
+ * Create a marker for a section Header\r
+ * \r
+ * @param level\r
+ * 1 = top-level (i.e. #), 2= 2nd-level (i.e. ##), etc.\r
+ * @param lineNumber\r
+ * The line on which this header occurs\r
+ * @param heading\r
+ * The text of the Header, trimmed of #s\r
+ * @param currentHeader\r
+ * The previous Header. This is used to find the parent\r
+ * section if there is one. Can be null.\r
+ */\r
+ Header(int level, int lineNumber, String heading, Header currentHeader) {\r
+ this.lineNumber = lineNumber;\r
+ this.level = level;\r
+ this.heading = cleanHeader(heading);\r
+ // Heading Tree\r
+ setParent(currentHeader);\r
+ }\r
+\r
+ private void setParent(Header currentHeader) {\r
+ if (currentHeader == null) {\r
+ parent = null;\r
+ return;\r
+ }\r
+ if (currentHeader.level < level) {\r
+ parent = currentHeader;\r
+ parent.subHeaders.add(this);\r
+ return;\r
+ }\r
+ setParent(currentHeader.parent);\r
+ }\r
+\r
+ public Header getParent() {\r
+ return parent;\r
+ }\r
+\r
+ /**\r
+ * Sub-sections. May be zero-length, never null.\r
+ */\r
+ public List<Header> getSubHeaders() {\r
+ return subHeaders;\r
+ }\r
+\r
+ @Override\r
+ public String toString() {\r
+ return heading;\r
+ }\r
+\r
+ public int getLevel() {\r
+ return level;\r
+ }\r
+ }\r
+\r
+ /**\r
+ * The raw text, broken up into individual lines.\r
+ */\r
+ private List<String> lines;\r
+\r
+ /**\r
+ * The raw text, broken up into individual lines.\r
+ */\r
+ public List<String> getText() {\r
+ return Collections.unmodifiableList(lines);\r
+ }\r
+\r
+ public enum KLineType {\r
+ NORMAL, H1, H2, H3, H4, H5, H6, BLANK,\r
+ // TODO LIST, BLOCKQUOTE,\r
+ /** A line marking Markdown info about the preceding line, e.g. ====== */\r
+ MARKER,\r
+ /** A line containing meta-data, e.g. title: My Page */\r
+ META\r
+ }\r
+\r
+ /**\r
+ * Information about each line.\r
+ */\r
+ private List<KLineType> lineTypes;\r
+ private Map<Integer,Object> pageObjects = new HashMap<Integer, Object>();\r
+\r
+ // TODO meta-data, footnotes, tables, link & image attributes\r
+ private static Pattern multiMarkdownTag = Pattern.compile("^([\\w].*):(.*)");\r
+ private Map<String, String> multiMarkdownTags = new HashMap<String, String>();\r
+ \r
+ // Regular expression for Github support\r
+ private static Pattern githubURLDetection = Pattern.compile("((https?|ftp|file)://[-a-zA-Z0-9+&@#/%?=~_|!:,.;]*[-a-zA-Z0-9+&@#/%=~_|])");\r
+\r
+ /**\r
+ * The top-level headers. FIXME handle documents which have a 2nd level\r
+ * header before any 1st level ones\r
+ */\r
+ private final List<Header> level1Headers = new ArrayList<Header>();\r
+ private final IPreferenceStore pStore;\r
+\r
+ /**\r
+ * Create a page.\r
+ * \r
+ * @param text\r
+ */\r
+ public MarkdownPage(String text) {\r
+ pStore = Activator.getDefault().getPreferenceStore();\r
+ setText(text);\r
+ }\r
+\r
+ /**\r
+ * Reset the text for this page.\r
+ * \r
+ * @param text\r
+ */\r
+ private void setText(String text) {\r
+ // Get lines\r
+ lines = StringMethods.splitLines(text);\r
+ // Clean out old\r
+ level1Headers.clear();\r
+ lineTypes = new ArrayList<KLineType>(lines.size());\r
+ pageObjects.clear();\r
+ // Dummy level-1 header in case there are none \r
+ Header dummyTopHeader = new Header(1, 0, "", null);\r
+ level1Headers.add(dummyTopHeader);\r
+ Header currentHeader = dummyTopHeader; \r
+ // Identify line types \r
+ int lineNum = 0;\r
+\r
+ // Check if we should support the Multi-Markdown Metadata\r
+ boolean multiMarkdownMetadataSupport =\r
+ pStore.getBoolean(MarkdownPreferencePage.PREF_MULTIMARKDOWN_METADATA);\r
+ \r
+ // Multi-markdown header\r
+ if (multiMarkdownMetadataSupport) {\r
+ // The key is the text before the colon, and the data is the text\r
+ // after the\r
+ // colon. In the above example, notice that there are two lines of\r
+ // information\r
+ // for the Author key. If you end a line with “space-space-newline”,\r
+ // the newline\r
+ // will be included when converted to other formats.\r
+ //\r
+ // There must not be any whitespace above the metadata, and the\r
+ // metadata block\r
+ // ends with the first whitespace only line. The metadata is\r
+ // stripped from the\r
+ // document before it is passed on to the syntax parser.\r
+ \r
+ //\r
+ // Check if the Metdatas are valid\r
+ //\r
+ boolean validMetadata = true;\r
+ for (lineNum = 0; lineNum < lines.size(); lineNum++) {\r
+ String line = lines.get(lineNum);\r
+ if (Utils.isBlank(line)) {\r
+ break;\r
+ }\r
+ Matcher m = multiMarkdownTag.matcher(line);\r
+ if (!m.find()) {\r
+ if (lineNum == 0) {\r
+ // No MultiMarkdown metadata\r
+ validMetadata = false;\r
+ break;\r
+ } else if (!line.matches("^\\s.*\n")) {\r
+ // The next line was not intended (ie. it does not start\r
+ // with a whitespace)\r
+ validMetadata = false;\r
+ break;\r
+ }\r
+ }\r
+ }\r
+ \r
+ // Valid Metadatas have been found. We need to retrieve these keys/values.\r
+ if (validMetadata) {\r
+ String data = "";\r
+ String tag = "";\r
+ for (lineNum = 0; lineNum < lines.size(); lineNum++) {\r
+ String line = lines.get(lineNum);\r
+ if (Utils.isBlank(line)) {\r
+ break;\r
+ }\r
+ Matcher m = multiMarkdownTag.matcher(line);\r
+ if (!m.find()) {\r
+ if (lineNum == 0) {\r
+ break;\r
+ }\r
+ // Multi-line tag\r
+ lineTypes.add(KLineType.META);\r
+ data += StrUtils.LINEEND + line.trim();\r
+ multiMarkdownTags.put(tag, data);\r
+ } else {\r
+ lineTypes.add(KLineType.META);\r
+ tag = m.group(0);\r
+ data = m.group(1).trim();\r
+ if (m.group(1).endsWith(line))\r
+ multiMarkdownTags.put(tag, data);\r
+ }\r
+ }\r
+ } else {\r
+ lineNum = 0;\r
+ }\r
+ }\r
+ for (; lineNum < lines.size(); lineNum++) {\r
+ String line = lines.get(lineNum);\r
+ // Headings\r
+ int h = numHash(line);\r
+ String hLine = line;\r
+ int hLineNum = lineNum;\r
+ int underline = -1;\r
+ if (lineNum != 0) {\r
+ underline = just(line, '=') ? 1 : just(line, '-') ? 2 : -1;\r
+ }\r
+ if (underline != -1) {\r
+ h = underline;\r
+ hLineNum = lineNum - 1;\r
+ hLine = lines.get(lineNum - 1);\r
+ lineTypes.set(hLineNum, KLineType.values()[h]);\r
+ lineTypes.add(KLineType.MARKER);\r
+ }\r
+ // Create a Header object\r
+ if (h > 0) {\r
+ if (underline == -1)\r
+ lineTypes.add(KLineType.values()[h]);\r
+ Header header = new Header(h, hLineNum, hLine, currentHeader);\r
+ if (h == 1) {\r
+ level1Headers.add(header);\r
+ }\r
+ pageObjects.put(hLineNum, header);\r
+ currentHeader = header;\r
+ continue;\r
+ }\r
+ // TODO List\r
+ // TODO Block quote\r
+ // Blank line\r
+ if (Utils.isBlank(line)) {\r
+ lineTypes.add(KLineType.BLANK);\r
+ continue;\r
+ }\r
+ // Normal\r
+ lineTypes.add(KLineType.NORMAL);\r
+ } // end line-loop\r
+ // Remove dummy header?\r
+ if (dummyTopHeader.getSubHeaders().size() == 0) {\r
+ level1Headers.remove(dummyTopHeader);\r
+ }\r
+ \r
+ boolean githubSyntaxSupport =\r
+ pStore.getBoolean(MarkdownPreferencePage.PREF_GITHUB_SYNTAX);\r
+ if (githubSyntaxSupport) {\r
+ /*\r
+ * Support Code block\r
+ */\r
+ boolean inCodeBlock = false;\r
+ for (lineNum = 0; lineNum < lines.size(); lineNum++) {\r
+ String line = lines.get(lineNum);\r
+ // Found the start or end of a code block\r
+ if (line.matches("^```.*\n")) {\r
+ // We reverse the boolean value\r
+ inCodeBlock = !inCodeBlock;\r
+\r
+ // We force the line to be blank. But we mark it as normal\r
+ // to prevent to be stripped\r
+ lines.set(lineNum, "\n");\r
+ lineTypes.set(lineNum, KLineType.NORMAL);\r
+ continue;\r
+ }\r
+ if (inCodeBlock) {\r
+ lines.set(lineNum, " " + line);\r
+ }\r
+ }\r
+ \r
+ /*\r
+ * Support for URL Detection\r
+ * We search for links that are not captured by Markdown syntax\r
+ */\r
+ for (lineNum = 0; lineNum < lines.size(); lineNum++) {\r
+ String line = lines.get(lineNum);\r
+ // When a link has been replaced we need to scan again the string\r
+ // as the offsets have changed (we add '<' and '>' to the link to\r
+ // be interpreted by the markdown library)\r
+ boolean urlReplaced;\r
+\r
+ do {\r
+ urlReplaced = false;\r
+ Matcher m = githubURLDetection.matcher(line);\r
+ while (m.find()) {\r
+ // Ignore the URL following the format <link>\r
+ if ((m.start() - 1 >= 0) && (m.end() < line.length()) &&\r
+ (line.charAt(m.start() - 1) == '<') &&\r
+ (line.charAt(m.end()) == '>'))\r
+ {\r
+ continue;\r
+ }\r
+ \r
+ // Ignore the URL following the format [description](link)\r
+ if ((m.start() - 2 >= 0) && (m.end() < line.length()) &&\r
+ (line.charAt(m.start() - 2) == ']') &&\r
+ (line.charAt(m.start() - 1) == '(') &&\r
+ (line.charAt(m.end()) == ')'))\r
+ {\r
+ continue;\r
+ }\r
+ \r
+ // Ignore the URL following the format [description](link "title")\r
+ if ((m.start() - 2 >= 0) && (m.end() + 1 < line.length()) &&\r
+ (line.charAt(m.start() - 2) == ']') &&\r
+ (line.charAt(m.start() - 1) == '(') &&\r
+ (line.charAt(m.end()) == ' ') &&\r
+ (line.charAt(m.end() + 1) == '"'))\r
+ {\r
+ continue;\r
+ }\r
+ \r
+ if (m.start() - 1 >= 0) {\r
+ // Case when the link is at the beginning of the string\r
+ line = line.substring(0, m.start()) + "<" + m.group(0) + ">" + line.substring(m.end());\r
+ } else {\r
+ line = "<" + m.group(0) + ">" + line.substring(m.end());\r
+ }\r
+ \r
+ // We replaced the string in the array\r
+ lines.set(lineNum, line);\r
+ urlReplaced = true;\r
+ break;\r
+ }\r
+ } while (urlReplaced);\r
+ }\r
+ }\r
+ }\r
+\r
+ /**\r
+ * @param line\r
+ * @param c\r
+ * @return true if line is just cs (and whitespace at the start/end)\r
+ */\r
+ boolean just(String line, char c) {\r
+ return line.matches("\\s*"+c+"+\\s*");\r
+ }\r
+\r
+ /**\r
+ * @param line\r
+ * @return The number of # symbols prepending the line.\r
+ */\r
+ private int numHash(String line) {\r
+ for (int i = 0; i < line.length(); i++) {\r
+ if (line.charAt(i) != '#')\r
+ return i;\r
+ }\r
+ return line.length();\r
+ }\r
+\r
+ /**\r
+ * \r
+ * @param parent\r
+ * Can be null for top-level\r
+ * @return List of sub-headers. Never null. FIXME handle documents which\r
+ * have a 2nd level header before any 1st level ones\r
+ */\r
+ public List<Header> getHeadings(Header parent) {\r
+ if (parent == null) {\r
+ return Collections.unmodifiableList(level1Headers);\r
+ }\r
+ return Collections.unmodifiableList(parent.subHeaders);\r
+ }\r
+\r
+ // public WebPage getWebPage() {\r
+ // WebPage page = new WebPage();\r
+ // // Add the lines, one by one\r
+ // boolean inParagraph = false;\r
+ // for (int i=0; i<lines.size(); i++) {\r
+ // String line = lines.get(i);\r
+ // KLineType type = lineTypes.get(i);\r
+ // switch(type) {\r
+ // // Heading?\r
+ // case H1: case H2: case H3:\r
+ // case H4: case H5: case H6:\r
+ // if (inParagraph) page.addText("</p>");\r
+ // line = cleanHeader(line);\r
+ // page.addText("<"+type+">"+line+"</"+type+">");\r
+ // continue;\r
+ // case MARKER: // Ignore\r
+ // continue;\r
+ // // TODO List?\r
+ // // TODO Block quote?\r
+ // }\r
+ // // Paragraph end?\r
+ // if (Utils.isBlank(line)) {\r
+ // if (inParagraph) page.addText("</p>");\r
+ // continue;\r
+ // }\r
+ // // Paragraph start?\r
+ // if (!inParagraph) {\r
+ // page.addText("<p>");\r
+ // inParagraph = true;\r
+ // }\r
+ // // Plain text\r
+ // page.addText(line);\r
+ // }\r
+ // return page;\r
+ // }\r
+\r
+ /**\r
+ * Get the HTML for this page. Uses the MarkdownJ project.\r
+ */\r
+ public String html() {\r
+ // Section numbers??\r
+ boolean sectionNumbers = pStore\r
+ .getBoolean(MarkdownPreferencePage.PREF_SECTION_NUMBERS);\r
+ // Chop out multi-markdown header\r
+ StringBuilder sb = new StringBuilder();\r
+ assert lines.size() == lineTypes.size();\r
+ for (int i = 0, n = lines.size(); i < n; i++) {\r
+ KLineType type = lineTypes.get(i);\r
+ if (type == KLineType.META)\r
+ continue;\r
+ String line = lines.get(i);\r
+ if (sectionNumbers && isHeader(type) && line.contains("$section")) {\r
+ // TODO Header section = headers.get(i);\r
+ // String secNum = section.getSectionNumber();\r
+ // line.replace("$section", secNum);\r
+ }\r
+ sb.append(line);\r
+ }\r
+ String text = sb.toString();\r
+ // Use external converter?\r
+ final String cmd = pStore\r
+ .getString(MarkdownPreferencePage.PREF_MARKDOWN_COMMAND);\r
+ if (Utils.isBlank(cmd)\r
+ || (cmd.startsWith("(") && cmd.contains("MarkdownJ"))) {\r
+ // Use MarkdownJ\r
+ MarkdownProcessor markdown = new MarkdownProcessor();\r
+ // MarkdownJ doesn't convert £s for some reason\r
+ text = text.replace("£", "£");\r
+ String html = markdown.markdown(text);\r
+ return html;\r
+ }\r
+ // Attempt to run external command\r
+ try {\r
+ final File md = File.createTempFile("tmp", ".md");\r
+ FileUtils.write(md, text);\r
+ Process process = new Process(cmd+" "+md.getAbsolutePath());\r
+ process.run();\r
+ int ok = process.waitFor(10000);\r
+ if (ok != 0) throw new FailureException(cmd+" failed:\n"+process.getError());\r
+ String html = process.getOutput();\r
+ FileUtils.delete(md);\r
+ return html;\r
+ } catch (Exception e) {\r
+ throw Utils.runtime(e);\r
+ }\r
+ }\r
+\r
+ /**\r
+ * @param type\r
+ * @return\r
+ */\r
+ private boolean isHeader(KLineType type) {\r
+ return type == KLineType.H1 || type == KLineType.H2\r
+ || type == KLineType.H3 || type == KLineType.H4\r
+ || type == KLineType.H5 || type == KLineType.H6;\r
+ }\r
+\r
+ /**\r
+ * Return the raw text of this page.\r
+ */\r
+ @Override\r
+ public String toString() {\r
+ StringBuilder sb = new StringBuilder();\r
+ for (String line : lines) {\r
+ sb.append(line);\r
+ }\r
+ return sb.toString();\r
+ }\r
+\r
+ /**\r
+ * Line type information for the raw text.\r
+ * \r
+ * @return\r
+ */\r
+ public List<KLineType> getLineTypes() {\r
+ return Collections.unmodifiableList(lineTypes);\r
+ }\r
+\r
+ /**\r
+ * @param line\r
+ * @return\r
+ */\r
+ public Object getPageObject(int line) { \r
+ return pageObjects.get(line);\r
+ }\r
+\r
+}\r