X-Git-Url: https://gerrit.simantics.org/r/gitweb?a=blobdiff_plain;f=bundles%2Fwinterwell.markdown%2Fsrc%2Fwinterwell%2Fmarkdown%2Fpagemodel%2FMarkdownPage.java;h=64339959a006629524b597c1076256ef7eb0c6b6;hb=refs%2Fchanges%2F94%2F2594%2F1;hp=a18a5ded6c76c983e4b1fe89a5ed9a71fc95aed6;hpb=9a175feb652b2b7bba7afa540831b9076be3c10e;p=simantics%2Fplatform.git
diff --git a/bundles/winterwell.markdown/src/winterwell/markdown/pagemodel/MarkdownPage.java b/bundles/winterwell.markdown/src/winterwell/markdown/pagemodel/MarkdownPage.java
index a18a5ded6..64339959a 100644
--- a/bundles/winterwell.markdown/src/winterwell/markdown/pagemodel/MarkdownPage.java
+++ b/bundles/winterwell.markdown/src/winterwell/markdown/pagemodel/MarkdownPage.java
@@ -1,617 +1,617 @@
-/**
- * Copyright winterwell Mathematics Ltd.
- * @author Daniel Winterstein
- * 11 Jan 2007
- */
-package winterwell.markdown.pagemodel;
-
-import java.io.File;
-import java.util.ArrayList;
-import java.util.Collections;
-import java.util.HashMap;
-import java.util.List;
-import java.util.Map;
-import java.util.regex.Matcher;
-import java.util.regex.Pattern;
-
-import org.eclipse.jface.preference.IPreferenceStore;
-
-import winterwell.markdown.Activator;
-import winterwell.markdown.StringMethods;
-import winterwell.markdown.preferences.MarkdownPreferencePage;
-import winterwell.utils.FailureException;
-import winterwell.utils.Process;
-import winterwell.utils.StrUtils;
-import winterwell.utils.Utils;
-import winterwell.utils.io.FileUtils;
-
-import com.petebevin.markdown.MarkdownProcessor;
-
-/**
- * Understands Markdown syntax.
- *
- * @author Daniel Winterstein
- */
-public class MarkdownPage {
-
- /**
- * Strip leading and trailing #s and whitespace
- *
- * @param line
- * @return cleaned up line
- */
- private String cleanHeader(String line) {
- for (int j = 0; j < line.length(); j++) {
- char c = line.charAt(j);
- if (c != '#' && !Character.isWhitespace(c)) {
- line = line.substring(j);
- break;
- }
- }
- for (int j = line.length() - 1; j > 0; j--) {
- char c = line.charAt(j);
- if (c != '#' && !Character.isWhitespace(c)) {
- line = line.substring(0, j + 1);
- break;
- }
- }
- return line;
- }
-
- /**
- * Represents information about a section header. E.g. ## Misc Warblings
- *
- * @author daniel
- */
- public class Header {
- /**
- * 1 = top-level (i.e. #), 2= 2nd-level (i.e. ##), etc.
- */
- final int level;
- /**
- * The text of the Header
- */
- final String heading;
- /**
- * Sub-sections, if any
- */
- final List subHeaders = new ArrayList();
- /**
- * The line on which this header occurs.
- */
- final int lineNumber;
-
- public int getLineNumber() {
- return lineNumber;
- }
-
- /**
- *
- * @return the next section (at this depth if possible), null if none
- */
- public Header getNext() {
- if (parent == null) {
- int ti = level1Headers.indexOf(this);
- if (ti == -1 || ti == level1Headers.size() - 1)
- return null;
- return level1Headers.get(ti + 1);
- }
- int i = parent.subHeaders.indexOf(this);
- assert i != -1 : this;
- if (i == parent.subHeaders.size() - 1)
- return parent.getNext();
- return parent.subHeaders.get(i + 1);
- }
- /**
- *
- * @return the next section (at this depth if possible), null if none
- */
- public Header getPrevious() {
- if (parent == null) {
- int ti = level1Headers.indexOf(this);
- if (ti == -1 || ti == 0)
- return null;
- return level1Headers.get(ti - 1);
- }
- int i = parent.subHeaders.indexOf(this);
- assert i != -1 : this;
- if (i == 0)
- return parent.getPrevious();
- return parent.subHeaders.get(i - 1);
- }
-
-
- /**
- * The parent section. Can be null.
- */
- private Header parent;
-
- /**
- * Create a marker for a section Header
- *
- * @param level
- * 1 = top-level (i.e. #), 2= 2nd-level (i.e. ##), etc.
- * @param lineNumber
- * The line on which this header occurs
- * @param heading
- * The text of the Header, trimmed of #s
- * @param currentHeader
- * The previous Header. This is used to find the parent
- * section if there is one. Can be null.
- */
- Header(int level, int lineNumber, String heading, Header currentHeader) {
- this.lineNumber = lineNumber;
- this.level = level;
- this.heading = cleanHeader(heading);
- // Heading Tree
- setParent(currentHeader);
- }
-
- private void setParent(Header currentHeader) {
- if (currentHeader == null) {
- parent = null;
- return;
- }
- if (currentHeader.level < level) {
- parent = currentHeader;
- parent.subHeaders.add(this);
- return;
- }
- setParent(currentHeader.parent);
- }
-
- public Header getParent() {
- return parent;
- }
-
- /**
- * Sub-sections. May be zero-length, never null.
- */
- public List getSubHeaders() {
- return subHeaders;
- }
-
- @Override
- public String toString() {
- return heading;
- }
-
- public int getLevel() {
- return level;
- }
- }
-
- /**
- * The raw text, broken up into individual lines.
- */
- private List lines;
-
- /**
- * The raw text, broken up into individual lines.
- */
- public List getText() {
- return Collections.unmodifiableList(lines);
- }
-
- public enum KLineType {
- NORMAL, H1, H2, H3, H4, H5, H6, BLANK,
- // TODO LIST, BLOCKQUOTE,
- /** A line marking Markdown info about the preceding line, e.g. ====== */
- MARKER,
- /** A line containing meta-data, e.g. title: My Page */
- META
- }
-
- /**
- * Information about each line.
- */
- private List lineTypes;
- private Map pageObjects = new HashMap();
-
- // TODO meta-data, footnotes, tables, link & image attributes
- private static Pattern multiMarkdownTag = Pattern.compile("^([\\w].*):(.*)");
- private Map multiMarkdownTags = new HashMap();
-
- // Regular expression for Github support
- private static Pattern githubURLDetection = Pattern.compile("((https?|ftp|file)://[-a-zA-Z0-9+&@#/%?=~_|!:,.;]*[-a-zA-Z0-9+&@#/%=~_|])");
-
- /**
- * The top-level headers. FIXME handle documents which have a 2nd level
- * header before any 1st level ones
- */
- private final List level1Headers = new ArrayList();
- private final IPreferenceStore pStore;
-
- /**
- * Create a page.
- *
- * @param text
- */
- public MarkdownPage(String text) {
- pStore = Activator.getDefault().getPreferenceStore();
- setText(text);
- }
-
- /**
- * Reset the text for this page.
- *
- * @param text
- */
- private void setText(String text) {
- // Get lines
- lines = StringMethods.splitLines(text);
- // Clean out old
- level1Headers.clear();
- lineTypes = new ArrayList(lines.size());
- pageObjects.clear();
- // Dummy level-1 header in case there are none
- Header dummyTopHeader = new Header(1, 0, "", null);
- level1Headers.add(dummyTopHeader);
- Header currentHeader = dummyTopHeader;
- // Identify line types
- int lineNum = 0;
-
- // Check if we should support the Multi-Markdown Metadata
- boolean multiMarkdownMetadataSupport =
- pStore.getBoolean(MarkdownPreferencePage.PREF_MULTIMARKDOWN_METADATA);
-
- // Multi-markdown header
- if (multiMarkdownMetadataSupport) {
- // The key is the text before the colon, and the data is the text
- // after the
- // colon. In the above example, notice that there are two lines of
- // information
- // for the Author key. If you end a line with âspace-space-newlineâ,
- // the newline
- // will be included when converted to other formats.
- //
- // There must not be any whitespace above the metadata, and the
- // metadata block
- // ends with the first whitespace only line. The metadata is
- // stripped from the
- // document before it is passed on to the syntax parser.
-
- //
- // Check if the Metdatas are valid
- //
- boolean validMetadata = true;
- for (lineNum = 0; lineNum < lines.size(); lineNum++) {
- String line = lines.get(lineNum);
- if (Utils.isBlank(line)) {
- break;
- }
- Matcher m = multiMarkdownTag.matcher(line);
- if (!m.find()) {
- if (lineNum == 0) {
- // No MultiMarkdown metadata
- validMetadata = false;
- break;
- } else if (!line.matches("^\\s.*\n")) {
- // The next line was not intended (ie. it does not start
- // with a whitespace)
- validMetadata = false;
- break;
- }
- }
- }
-
- // Valid Metadatas have been found. We need to retrieve these keys/values.
- if (validMetadata) {
- String data = "";
- String tag = "";
- for (lineNum = 0; lineNum < lines.size(); lineNum++) {
- String line = lines.get(lineNum);
- if (Utils.isBlank(line)) {
- break;
- }
- Matcher m = multiMarkdownTag.matcher(line);
- if (!m.find()) {
- if (lineNum == 0) {
- break;
- }
- // Multi-line tag
- lineTypes.add(KLineType.META);
- data += StrUtils.LINEEND + line.trim();
- multiMarkdownTags.put(tag, data);
- } else {
- lineTypes.add(KLineType.META);
- tag = m.group(0);
- data = m.group(1).trim();
- if (m.group(1).endsWith(line))
- multiMarkdownTags.put(tag, data);
- }
- }
- } else {
- lineNum = 0;
- }
- }
- for (; lineNum < lines.size(); lineNum++) {
- String line = lines.get(lineNum);
- // Headings
- int h = numHash(line);
- String hLine = line;
- int hLineNum = lineNum;
- int underline = -1;
- if (lineNum != 0) {
- underline = just(line, '=') ? 1 : just(line, '-') ? 2 : -1;
- }
- if (underline != -1) {
- h = underline;
- hLineNum = lineNum - 1;
- hLine = lines.get(lineNum - 1);
- lineTypes.set(hLineNum, KLineType.values()[h]);
- lineTypes.add(KLineType.MARKER);
- }
- // Create a Header object
- if (h > 0) {
- if (underline == -1)
- lineTypes.add(KLineType.values()[h]);
- Header header = new Header(h, hLineNum, hLine, currentHeader);
- if (h == 1) {
- level1Headers.add(header);
- }
- pageObjects.put(hLineNum, header);
- currentHeader = header;
- continue;
- }
- // TODO List
- // TODO Block quote
- // Blank line
- if (Utils.isBlank(line)) {
- lineTypes.add(KLineType.BLANK);
- continue;
- }
- // Normal
- lineTypes.add(KLineType.NORMAL);
- } // end line-loop
- // Remove dummy header?
- if (dummyTopHeader.getSubHeaders().size() == 0) {
- level1Headers.remove(dummyTopHeader);
- }
-
- boolean githubSyntaxSupport =
- pStore.getBoolean(MarkdownPreferencePage.PREF_GITHUB_SYNTAX);
- if (githubSyntaxSupport) {
- /*
- * Support Code block
- */
- boolean inCodeBlock = false;
- for (lineNum = 0; lineNum < lines.size(); lineNum++) {
- String line = lines.get(lineNum);
- // Found the start or end of a code block
- if (line.matches("^```.*\n")) {
- // We reverse the boolean value
- inCodeBlock = !inCodeBlock;
-
- // We force the line to be blank. But we mark it as normal
- // to prevent to be stripped
- lines.set(lineNum, "\n");
- lineTypes.set(lineNum, KLineType.NORMAL);
- continue;
- }
- if (inCodeBlock) {
- lines.set(lineNum, " " + line);
- }
- }
-
- /*
- * Support for URL Detection
- * We search for links that are not captured by Markdown syntax
- */
- for (lineNum = 0; lineNum < lines.size(); lineNum++) {
- String line = lines.get(lineNum);
- // When a link has been replaced we need to scan again the string
- // as the offsets have changed (we add '<' and '>' to the link to
- // be interpreted by the markdown library)
- boolean urlReplaced;
-
- do {
- urlReplaced = false;
- Matcher m = githubURLDetection.matcher(line);
- while (m.find()) {
- // Ignore the URL following the format
- if ((m.start() - 1 >= 0) && (m.end() < line.length()) &&
- (line.charAt(m.start() - 1) == '<') &&
- (line.charAt(m.end()) == '>'))
- {
- continue;
- }
-
- // Ignore the URL following the format [description](link)
- if ((m.start() - 2 >= 0) && (m.end() < line.length()) &&
- (line.charAt(m.start() - 2) == ']') &&
- (line.charAt(m.start() - 1) == '(') &&
- (line.charAt(m.end()) == ')'))
- {
- continue;
- }
-
- // Ignore the URL following the format [description](link "title")
- if ((m.start() - 2 >= 0) && (m.end() + 1 < line.length()) &&
- (line.charAt(m.start() - 2) == ']') &&
- (line.charAt(m.start() - 1) == '(') &&
- (line.charAt(m.end()) == ' ') &&
- (line.charAt(m.end() + 1) == '"'))
- {
- continue;
- }
-
- if (m.start() - 1 >= 0) {
- // Case when the link is at the beginning of the string
- line = line.substring(0, m.start()) + "<" + m.group(0) + ">" + line.substring(m.end());
- } else {
- line = "<" + m.group(0) + ">" + line.substring(m.end());
- }
-
- // We replaced the string in the array
- lines.set(lineNum, line);
- urlReplaced = true;
- break;
- }
- } while (urlReplaced);
- }
- }
- }
-
- /**
- * @param line
- * @param c
- * @return true if line is just cs (and whitespace at the start/end)
- */
- boolean just(String line, char c) {
- return line.matches("\\s*"+c+"+\\s*");
- }
-
- /**
- * @param line
- * @return The number of # symbols prepending the line.
- */
- private int numHash(String line) {
- for (int i = 0; i < line.length(); i++) {
- if (line.charAt(i) != '#')
- return i;
- }
- return line.length();
- }
-
- /**
- *
- * @param parent
- * Can be null for top-level
- * @return List of sub-headers. Never null. FIXME handle documents which
- * have a 2nd level header before any 1st level ones
- */
- public List getHeadings(Header parent) {
- if (parent == null) {
- return Collections.unmodifiableList(level1Headers);
- }
- return Collections.unmodifiableList(parent.subHeaders);
- }
-
- // public WebPage getWebPage() {
- // WebPage page = new WebPage();
- // // Add the lines, one by one
- // boolean inParagraph = false;
- // for (int i=0; i");
- // line = cleanHeader(line);
- // page.addText("<"+type+">"+line+""+type+">");
- // continue;
- // case MARKER: // Ignore
- // continue;
- // // TODO List?
- // // TODO Block quote?
- // }
- // // Paragraph end?
- // if (Utils.isBlank(line)) {
- // if (inParagraph) page.addText("