X-Git-Url: https://gerrit.simantics.org/r/gitweb?a=blobdiff_plain;f=bundles%2Fwinterwell.markdown%2Fsrc%2Fwinterwell%2Fmarkdown%2Fpagemodel%2FMarkdownPage.java;fp=bundles%2Fwinterwell.markdown%2Fsrc%2Fwinterwell%2Fmarkdown%2Fpagemodel%2FMarkdownPage.java;h=a18a5ded6c76c983e4b1fe89a5ed9a71fc95aed6;hb=2531cdf245f42bce854d43f4d49a23983c79db96;hp=0000000000000000000000000000000000000000;hpb=857dbc869796d772864327ce02f19dc252b159fc;p=simantics%2Fplatform.git
diff --git a/bundles/winterwell.markdown/src/winterwell/markdown/pagemodel/MarkdownPage.java b/bundles/winterwell.markdown/src/winterwell/markdown/pagemodel/MarkdownPage.java
new file mode 100644
index 000000000..a18a5ded6
--- /dev/null
+++ b/bundles/winterwell.markdown/src/winterwell/markdown/pagemodel/MarkdownPage.java
@@ -0,0 +1,617 @@
+/**
+ * Copyright winterwell Mathematics Ltd.
+ * @author Daniel Winterstein
+ * 11 Jan 2007
+ */
+package winterwell.markdown.pagemodel;
+
+import java.io.File;
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+import java.util.regex.Matcher;
+import java.util.regex.Pattern;
+
+import org.eclipse.jface.preference.IPreferenceStore;
+
+import winterwell.markdown.Activator;
+import winterwell.markdown.StringMethods;
+import winterwell.markdown.preferences.MarkdownPreferencePage;
+import winterwell.utils.FailureException;
+import winterwell.utils.Process;
+import winterwell.utils.StrUtils;
+import winterwell.utils.Utils;
+import winterwell.utils.io.FileUtils;
+
+import com.petebevin.markdown.MarkdownProcessor;
+
+/**
+ * Understands Markdown syntax.
+ *
+ * @author Daniel Winterstein
+ */
+public class MarkdownPage {
+
+ /**
+ * Strip leading and trailing #s and whitespace
+ *
+ * @param line
+ * @return cleaned up line
+ */
+ private String cleanHeader(String line) {
+ for (int j = 0; j < line.length(); j++) {
+ char c = line.charAt(j);
+ if (c != '#' && !Character.isWhitespace(c)) {
+ line = line.substring(j);
+ break;
+ }
+ }
+ for (int j = line.length() - 1; j > 0; j--) {
+ char c = line.charAt(j);
+ if (c != '#' && !Character.isWhitespace(c)) {
+ line = line.substring(0, j + 1);
+ break;
+ }
+ }
+ return line;
+ }
+
+ /**
+ * Represents information about a section header. E.g. ## Misc Warblings
+ *
+ * @author daniel
+ */
+ public class Header {
+ /**
+ * 1 = top-level (i.e. #), 2= 2nd-level (i.e. ##), etc.
+ */
+ final int level;
+ /**
+ * The text of the Header
+ */
+ final String heading;
+ /**
+ * Sub-sections, if any
+ */
+ final List subHeaders = new ArrayList();
+ /**
+ * The line on which this header occurs.
+ */
+ final int lineNumber;
+
+ public int getLineNumber() {
+ return lineNumber;
+ }
+
+ /**
+ *
+ * @return the next section (at this depth if possible), null if none
+ */
+ public Header getNext() {
+ if (parent == null) {
+ int ti = level1Headers.indexOf(this);
+ if (ti == -1 || ti == level1Headers.size() - 1)
+ return null;
+ return level1Headers.get(ti + 1);
+ }
+ int i = parent.subHeaders.indexOf(this);
+ assert i != -1 : this;
+ if (i == parent.subHeaders.size() - 1)
+ return parent.getNext();
+ return parent.subHeaders.get(i + 1);
+ }
+ /**
+ *
+ * @return the next section (at this depth if possible), null if none
+ */
+ public Header getPrevious() {
+ if (parent == null) {
+ int ti = level1Headers.indexOf(this);
+ if (ti == -1 || ti == 0)
+ return null;
+ return level1Headers.get(ti - 1);
+ }
+ int i = parent.subHeaders.indexOf(this);
+ assert i != -1 : this;
+ if (i == 0)
+ return parent.getPrevious();
+ return parent.subHeaders.get(i - 1);
+ }
+
+
+ /**
+ * The parent section. Can be null.
+ */
+ private Header parent;
+
+ /**
+ * Create a marker for a section Header
+ *
+ * @param level
+ * 1 = top-level (i.e. #), 2= 2nd-level (i.e. ##), etc.
+ * @param lineNumber
+ * The line on which this header occurs
+ * @param heading
+ * The text of the Header, trimmed of #s
+ * @param currentHeader
+ * The previous Header. This is used to find the parent
+ * section if there is one. Can be null.
+ */
+ Header(int level, int lineNumber, String heading, Header currentHeader) {
+ this.lineNumber = lineNumber;
+ this.level = level;
+ this.heading = cleanHeader(heading);
+ // Heading Tree
+ setParent(currentHeader);
+ }
+
+ private void setParent(Header currentHeader) {
+ if (currentHeader == null) {
+ parent = null;
+ return;
+ }
+ if (currentHeader.level < level) {
+ parent = currentHeader;
+ parent.subHeaders.add(this);
+ return;
+ }
+ setParent(currentHeader.parent);
+ }
+
+ public Header getParent() {
+ return parent;
+ }
+
+ /**
+ * Sub-sections. May be zero-length, never null.
+ */
+ public List getSubHeaders() {
+ return subHeaders;
+ }
+
+ @Override
+ public String toString() {
+ return heading;
+ }
+
+ public int getLevel() {
+ return level;
+ }
+ }
+
+ /**
+ * The raw text, broken up into individual lines.
+ */
+ private List lines;
+
+ /**
+ * The raw text, broken up into individual lines.
+ */
+ public List getText() {
+ return Collections.unmodifiableList(lines);
+ }
+
+ public enum KLineType {
+ NORMAL, H1, H2, H3, H4, H5, H6, BLANK,
+ // TODO LIST, BLOCKQUOTE,
+ /** A line marking Markdown info about the preceding line, e.g. ====== */
+ MARKER,
+ /** A line containing meta-data, e.g. title: My Page */
+ META
+ }
+
+ /**
+ * Information about each line.
+ */
+ private List lineTypes;
+ private Map pageObjects = new HashMap();
+
+ // TODO meta-data, footnotes, tables, link & image attributes
+ private static Pattern multiMarkdownTag = Pattern.compile("^([\\w].*):(.*)");
+ private Map multiMarkdownTags = new HashMap();
+
+ // Regular expression for Github support
+ private static Pattern githubURLDetection = Pattern.compile("((https?|ftp|file)://[-a-zA-Z0-9+&@#/%?=~_|!:,.;]*[-a-zA-Z0-9+&@#/%=~_|])");
+
+ /**
+ * The top-level headers. FIXME handle documents which have a 2nd level
+ * header before any 1st level ones
+ */
+ private final List level1Headers = new ArrayList();
+ private final IPreferenceStore pStore;
+
+ /**
+ * Create a page.
+ *
+ * @param text
+ */
+ public MarkdownPage(String text) {
+ pStore = Activator.getDefault().getPreferenceStore();
+ setText(text);
+ }
+
+ /**
+ * Reset the text for this page.
+ *
+ * @param text
+ */
+ private void setText(String text) {
+ // Get lines
+ lines = StringMethods.splitLines(text);
+ // Clean out old
+ level1Headers.clear();
+ lineTypes = new ArrayList(lines.size());
+ pageObjects.clear();
+ // Dummy level-1 header in case there are none
+ Header dummyTopHeader = new Header(1, 0, "", null);
+ level1Headers.add(dummyTopHeader);
+ Header currentHeader = dummyTopHeader;
+ // Identify line types
+ int lineNum = 0;
+
+ // Check if we should support the Multi-Markdown Metadata
+ boolean multiMarkdownMetadataSupport =
+ pStore.getBoolean(MarkdownPreferencePage.PREF_MULTIMARKDOWN_METADATA);
+
+ // Multi-markdown header
+ if (multiMarkdownMetadataSupport) {
+ // The key is the text before the colon, and the data is the text
+ // after the
+ // colon. In the above example, notice that there are two lines of
+ // information
+ // for the Author key. If you end a line with âspace-space-newlineâ,
+ // the newline
+ // will be included when converted to other formats.
+ //
+ // There must not be any whitespace above the metadata, and the
+ // metadata block
+ // ends with the first whitespace only line. The metadata is
+ // stripped from the
+ // document before it is passed on to the syntax parser.
+
+ //
+ // Check if the Metdatas are valid
+ //
+ boolean validMetadata = true;
+ for (lineNum = 0; lineNum < lines.size(); lineNum++) {
+ String line = lines.get(lineNum);
+ if (Utils.isBlank(line)) {
+ break;
+ }
+ Matcher m = multiMarkdownTag.matcher(line);
+ if (!m.find()) {
+ if (lineNum == 0) {
+ // No MultiMarkdown metadata
+ validMetadata = false;
+ break;
+ } else if (!line.matches("^\\s.*\n")) {
+ // The next line was not intended (ie. it does not start
+ // with a whitespace)
+ validMetadata = false;
+ break;
+ }
+ }
+ }
+
+ // Valid Metadatas have been found. We need to retrieve these keys/values.
+ if (validMetadata) {
+ String data = "";
+ String tag = "";
+ for (lineNum = 0; lineNum < lines.size(); lineNum++) {
+ String line = lines.get(lineNum);
+ if (Utils.isBlank(line)) {
+ break;
+ }
+ Matcher m = multiMarkdownTag.matcher(line);
+ if (!m.find()) {
+ if (lineNum == 0) {
+ break;
+ }
+ // Multi-line tag
+ lineTypes.add(KLineType.META);
+ data += StrUtils.LINEEND + line.trim();
+ multiMarkdownTags.put(tag, data);
+ } else {
+ lineTypes.add(KLineType.META);
+ tag = m.group(0);
+ data = m.group(1).trim();
+ if (m.group(1).endsWith(line))
+ multiMarkdownTags.put(tag, data);
+ }
+ }
+ } else {
+ lineNum = 0;
+ }
+ }
+ for (; lineNum < lines.size(); lineNum++) {
+ String line = lines.get(lineNum);
+ // Headings
+ int h = numHash(line);
+ String hLine = line;
+ int hLineNum = lineNum;
+ int underline = -1;
+ if (lineNum != 0) {
+ underline = just(line, '=') ? 1 : just(line, '-') ? 2 : -1;
+ }
+ if (underline != -1) {
+ h = underline;
+ hLineNum = lineNum - 1;
+ hLine = lines.get(lineNum - 1);
+ lineTypes.set(hLineNum, KLineType.values()[h]);
+ lineTypes.add(KLineType.MARKER);
+ }
+ // Create a Header object
+ if (h > 0) {
+ if (underline == -1)
+ lineTypes.add(KLineType.values()[h]);
+ Header header = new Header(h, hLineNum, hLine, currentHeader);
+ if (h == 1) {
+ level1Headers.add(header);
+ }
+ pageObjects.put(hLineNum, header);
+ currentHeader = header;
+ continue;
+ }
+ // TODO List
+ // TODO Block quote
+ // Blank line
+ if (Utils.isBlank(line)) {
+ lineTypes.add(KLineType.BLANK);
+ continue;
+ }
+ // Normal
+ lineTypes.add(KLineType.NORMAL);
+ } // end line-loop
+ // Remove dummy header?
+ if (dummyTopHeader.getSubHeaders().size() == 0) {
+ level1Headers.remove(dummyTopHeader);
+ }
+
+ boolean githubSyntaxSupport =
+ pStore.getBoolean(MarkdownPreferencePage.PREF_GITHUB_SYNTAX);
+ if (githubSyntaxSupport) {
+ /*
+ * Support Code block
+ */
+ boolean inCodeBlock = false;
+ for (lineNum = 0; lineNum < lines.size(); lineNum++) {
+ String line = lines.get(lineNum);
+ // Found the start or end of a code block
+ if (line.matches("^```.*\n")) {
+ // We reverse the boolean value
+ inCodeBlock = !inCodeBlock;
+
+ // We force the line to be blank. But we mark it as normal
+ // to prevent to be stripped
+ lines.set(lineNum, "\n");
+ lineTypes.set(lineNum, KLineType.NORMAL);
+ continue;
+ }
+ if (inCodeBlock) {
+ lines.set(lineNum, " " + line);
+ }
+ }
+
+ /*
+ * Support for URL Detection
+ * We search for links that are not captured by Markdown syntax
+ */
+ for (lineNum = 0; lineNum < lines.size(); lineNum++) {
+ String line = lines.get(lineNum);
+ // When a link has been replaced we need to scan again the string
+ // as the offsets have changed (we add '<' and '>' to the link to
+ // be interpreted by the markdown library)
+ boolean urlReplaced;
+
+ do {
+ urlReplaced = false;
+ Matcher m = githubURLDetection.matcher(line);
+ while (m.find()) {
+ // Ignore the URL following the format
+ if ((m.start() - 1 >= 0) && (m.end() < line.length()) &&
+ (line.charAt(m.start() - 1) == '<') &&
+ (line.charAt(m.end()) == '>'))
+ {
+ continue;
+ }
+
+ // Ignore the URL following the format [description](link)
+ if ((m.start() - 2 >= 0) && (m.end() < line.length()) &&
+ (line.charAt(m.start() - 2) == ']') &&
+ (line.charAt(m.start() - 1) == '(') &&
+ (line.charAt(m.end()) == ')'))
+ {
+ continue;
+ }
+
+ // Ignore the URL following the format [description](link "title")
+ if ((m.start() - 2 >= 0) && (m.end() + 1 < line.length()) &&
+ (line.charAt(m.start() - 2) == ']') &&
+ (line.charAt(m.start() - 1) == '(') &&
+ (line.charAt(m.end()) == ' ') &&
+ (line.charAt(m.end() + 1) == '"'))
+ {
+ continue;
+ }
+
+ if (m.start() - 1 >= 0) {
+ // Case when the link is at the beginning of the string
+ line = line.substring(0, m.start()) + "<" + m.group(0) + ">" + line.substring(m.end());
+ } else {
+ line = "<" + m.group(0) + ">" + line.substring(m.end());
+ }
+
+ // We replaced the string in the array
+ lines.set(lineNum, line);
+ urlReplaced = true;
+ break;
+ }
+ } while (urlReplaced);
+ }
+ }
+ }
+
+ /**
+ * @param line
+ * @param c
+ * @return true if line is just cs (and whitespace at the start/end)
+ */
+ boolean just(String line, char c) {
+ return line.matches("\\s*"+c+"+\\s*");
+ }
+
+ /**
+ * @param line
+ * @return The number of # symbols prepending the line.
+ */
+ private int numHash(String line) {
+ for (int i = 0; i < line.length(); i++) {
+ if (line.charAt(i) != '#')
+ return i;
+ }
+ return line.length();
+ }
+
+ /**
+ *
+ * @param parent
+ * Can be null for top-level
+ * @return List of sub-headers. Never null. FIXME handle documents which
+ * have a 2nd level header before any 1st level ones
+ */
+ public List getHeadings(Header parent) {
+ if (parent == null) {
+ return Collections.unmodifiableList(level1Headers);
+ }
+ return Collections.unmodifiableList(parent.subHeaders);
+ }
+
+ // public WebPage getWebPage() {
+ // WebPage page = new WebPage();
+ // // Add the lines, one by one
+ // boolean inParagraph = false;
+ // for (int i=0; i");
+ // line = cleanHeader(line);
+ // page.addText("<"+type+">"+line+""+type+">");
+ // continue;
+ // case MARKER: // Ignore
+ // continue;
+ // // TODO List?
+ // // TODO Block quote?
+ // }
+ // // Paragraph end?
+ // if (Utils.isBlank(line)) {
+ // if (inParagraph) page.addText("