X-Git-Url: https://gerrit.simantics.org/r/gitweb?p=simantics%2Fplatform.git;a=blobdiff_plain;f=bundles%2Fwinterwell.markdown%2Fsrc%2Fwinterwell%2Fmarkdown%2Fpagemodel%2FMarkdownPage.java;fp=bundles%2Fwinterwell.markdown%2Fsrc%2Fwinterwell%2Fmarkdown%2Fpagemodel%2FMarkdownPage.java;h=64339959a006629524b597c1076256ef7eb0c6b6;hp=a18a5ded6c76c983e4b1fe89a5ed9a71fc95aed6;hb=0ae2b770234dfc3cbb18bd38f324125cf0faca07;hpb=24e2b34260f219f0d1644ca7a138894980e25b14 diff --git a/bundles/winterwell.markdown/src/winterwell/markdown/pagemodel/MarkdownPage.java b/bundles/winterwell.markdown/src/winterwell/markdown/pagemodel/MarkdownPage.java index a18a5ded6..64339959a 100644 --- a/bundles/winterwell.markdown/src/winterwell/markdown/pagemodel/MarkdownPage.java +++ b/bundles/winterwell.markdown/src/winterwell/markdown/pagemodel/MarkdownPage.java @@ -1,617 +1,617 @@ -/** - * Copyright winterwell Mathematics Ltd. - * @author Daniel Winterstein - * 11 Jan 2007 - */ -package winterwell.markdown.pagemodel; - -import java.io.File; -import java.util.ArrayList; -import java.util.Collections; -import java.util.HashMap; -import java.util.List; -import java.util.Map; -import java.util.regex.Matcher; -import java.util.regex.Pattern; - -import org.eclipse.jface.preference.IPreferenceStore; - -import winterwell.markdown.Activator; -import winterwell.markdown.StringMethods; -import winterwell.markdown.preferences.MarkdownPreferencePage; -import winterwell.utils.FailureException; -import winterwell.utils.Process; -import winterwell.utils.StrUtils; -import winterwell.utils.Utils; -import winterwell.utils.io.FileUtils; - -import com.petebevin.markdown.MarkdownProcessor; - -/** - * Understands Markdown syntax. - * - * @author Daniel Winterstein - */ -public class MarkdownPage { - - /** - * Strip leading and trailing #s and whitespace - * - * @param line - * @return cleaned up line - */ - private String cleanHeader(String line) { - for (int j = 0; j < line.length(); j++) { - char c = line.charAt(j); - if (c != '#' && !Character.isWhitespace(c)) { - line = line.substring(j); - break; - } - } - for (int j = line.length() - 1; j > 0; j--) { - char c = line.charAt(j); - if (c != '#' && !Character.isWhitespace(c)) { - line = line.substring(0, j + 1); - break; - } - } - return line; - } - - /** - * Represents information about a section header. E.g. ## Misc Warblings - * - * @author daniel - */ - public class Header { - /** - * 1 = top-level (i.e. #), 2= 2nd-level (i.e. ##), etc. - */ - final int level; - /** - * The text of the Header - */ - final String heading; - /** - * Sub-sections, if any - */ - final List
subHeaders = new ArrayList
(); - /** - * The line on which this header occurs. - */ - final int lineNumber; - - public int getLineNumber() { - return lineNumber; - } - - /** - * - * @return the next section (at this depth if possible), null if none - */ - public Header getNext() { - if (parent == null) { - int ti = level1Headers.indexOf(this); - if (ti == -1 || ti == level1Headers.size() - 1) - return null; - return level1Headers.get(ti + 1); - } - int i = parent.subHeaders.indexOf(this); - assert i != -1 : this; - if (i == parent.subHeaders.size() - 1) - return parent.getNext(); - return parent.subHeaders.get(i + 1); - } - /** - * - * @return the next section (at this depth if possible), null if none - */ - public Header getPrevious() { - if (parent == null) { - int ti = level1Headers.indexOf(this); - if (ti == -1 || ti == 0) - return null; - return level1Headers.get(ti - 1); - } - int i = parent.subHeaders.indexOf(this); - assert i != -1 : this; - if (i == 0) - return parent.getPrevious(); - return parent.subHeaders.get(i - 1); - } - - - /** - * The parent section. Can be null. - */ - private Header parent; - - /** - * Create a marker for a section Header - * - * @param level - * 1 = top-level (i.e. #), 2= 2nd-level (i.e. ##), etc. - * @param lineNumber - * The line on which this header occurs - * @param heading - * The text of the Header, trimmed of #s - * @param currentHeader - * The previous Header. This is used to find the parent - * section if there is one. Can be null. - */ - Header(int level, int lineNumber, String heading, Header currentHeader) { - this.lineNumber = lineNumber; - this.level = level; - this.heading = cleanHeader(heading); - // Heading Tree - setParent(currentHeader); - } - - private void setParent(Header currentHeader) { - if (currentHeader == null) { - parent = null; - return; - } - if (currentHeader.level < level) { - parent = currentHeader; - parent.subHeaders.add(this); - return; - } - setParent(currentHeader.parent); - } - - public Header getParent() { - return parent; - } - - /** - * Sub-sections. May be zero-length, never null. - */ - public List
getSubHeaders() { - return subHeaders; - } - - @Override - public String toString() { - return heading; - } - - public int getLevel() { - return level; - } - } - - /** - * The raw text, broken up into individual lines. - */ - private List lines; - - /** - * The raw text, broken up into individual lines. - */ - public List getText() { - return Collections.unmodifiableList(lines); - } - - public enum KLineType { - NORMAL, H1, H2, H3, H4, H5, H6, BLANK, - // TODO LIST, BLOCKQUOTE, - /** A line marking Markdown info about the preceding line, e.g. ====== */ - MARKER, - /** A line containing meta-data, e.g. title: My Page */ - META - } - - /** - * Information about each line. - */ - private List lineTypes; - private Map pageObjects = new HashMap(); - - // TODO meta-data, footnotes, tables, link & image attributes - private static Pattern multiMarkdownTag = Pattern.compile("^([\\w].*):(.*)"); - private Map multiMarkdownTags = new HashMap(); - - // Regular expression for Github support - private static Pattern githubURLDetection = Pattern.compile("((https?|ftp|file)://[-a-zA-Z0-9+&@#/%?=~_|!:,.;]*[-a-zA-Z0-9+&@#/%=~_|])"); - - /** - * The top-level headers. FIXME handle documents which have a 2nd level - * header before any 1st level ones - */ - private final List
level1Headers = new ArrayList
(); - private final IPreferenceStore pStore; - - /** - * Create a page. - * - * @param text - */ - public MarkdownPage(String text) { - pStore = Activator.getDefault().getPreferenceStore(); - setText(text); - } - - /** - * Reset the text for this page. - * - * @param text - */ - private void setText(String text) { - // Get lines - lines = StringMethods.splitLines(text); - // Clean out old - level1Headers.clear(); - lineTypes = new ArrayList(lines.size()); - pageObjects.clear(); - // Dummy level-1 header in case there are none - Header dummyTopHeader = new Header(1, 0, "", null); - level1Headers.add(dummyTopHeader); - Header currentHeader = dummyTopHeader; - // Identify line types - int lineNum = 0; - - // Check if we should support the Multi-Markdown Metadata - boolean multiMarkdownMetadataSupport = - pStore.getBoolean(MarkdownPreferencePage.PREF_MULTIMARKDOWN_METADATA); - - // Multi-markdown header - if (multiMarkdownMetadataSupport) { - // The key is the text before the colon, and the data is the text - // after the - // colon. In the above example, notice that there are two lines of - // information - // for the Author key. If you end a line with “space-space-newline”, - // the newline - // will be included when converted to other formats. - // - // There must not be any whitespace above the metadata, and the - // metadata block - // ends with the first whitespace only line. The metadata is - // stripped from the - // document before it is passed on to the syntax parser. - - // - // Check if the Metdatas are valid - // - boolean validMetadata = true; - for (lineNum = 0; lineNum < lines.size(); lineNum++) { - String line = lines.get(lineNum); - if (Utils.isBlank(line)) { - break; - } - Matcher m = multiMarkdownTag.matcher(line); - if (!m.find()) { - if (lineNum == 0) { - // No MultiMarkdown metadata - validMetadata = false; - break; - } else if (!line.matches("^\\s.*\n")) { - // The next line was not intended (ie. it does not start - // with a whitespace) - validMetadata = false; - break; - } - } - } - - // Valid Metadatas have been found. We need to retrieve these keys/values. - if (validMetadata) { - String data = ""; - String tag = ""; - for (lineNum = 0; lineNum < lines.size(); lineNum++) { - String line = lines.get(lineNum); - if (Utils.isBlank(line)) { - break; - } - Matcher m = multiMarkdownTag.matcher(line); - if (!m.find()) { - if (lineNum == 0) { - break; - } - // Multi-line tag - lineTypes.add(KLineType.META); - data += StrUtils.LINEEND + line.trim(); - multiMarkdownTags.put(tag, data); - } else { - lineTypes.add(KLineType.META); - tag = m.group(0); - data = m.group(1).trim(); - if (m.group(1).endsWith(line)) - multiMarkdownTags.put(tag, data); - } - } - } else { - lineNum = 0; - } - } - for (; lineNum < lines.size(); lineNum++) { - String line = lines.get(lineNum); - // Headings - int h = numHash(line); - String hLine = line; - int hLineNum = lineNum; - int underline = -1; - if (lineNum != 0) { - underline = just(line, '=') ? 1 : just(line, '-') ? 2 : -1; - } - if (underline != -1) { - h = underline; - hLineNum = lineNum - 1; - hLine = lines.get(lineNum - 1); - lineTypes.set(hLineNum, KLineType.values()[h]); - lineTypes.add(KLineType.MARKER); - } - // Create a Header object - if (h > 0) { - if (underline == -1) - lineTypes.add(KLineType.values()[h]); - Header header = new Header(h, hLineNum, hLine, currentHeader); - if (h == 1) { - level1Headers.add(header); - } - pageObjects.put(hLineNum, header); - currentHeader = header; - continue; - } - // TODO List - // TODO Block quote - // Blank line - if (Utils.isBlank(line)) { - lineTypes.add(KLineType.BLANK); - continue; - } - // Normal - lineTypes.add(KLineType.NORMAL); - } // end line-loop - // Remove dummy header? - if (dummyTopHeader.getSubHeaders().size() == 0) { - level1Headers.remove(dummyTopHeader); - } - - boolean githubSyntaxSupport = - pStore.getBoolean(MarkdownPreferencePage.PREF_GITHUB_SYNTAX); - if (githubSyntaxSupport) { - /* - * Support Code block - */ - boolean inCodeBlock = false; - for (lineNum = 0; lineNum < lines.size(); lineNum++) { - String line = lines.get(lineNum); - // Found the start or end of a code block - if (line.matches("^```.*\n")) { - // We reverse the boolean value - inCodeBlock = !inCodeBlock; - - // We force the line to be blank. But we mark it as normal - // to prevent to be stripped - lines.set(lineNum, "\n"); - lineTypes.set(lineNum, KLineType.NORMAL); - continue; - } - if (inCodeBlock) { - lines.set(lineNum, " " + line); - } - } - - /* - * Support for URL Detection - * We search for links that are not captured by Markdown syntax - */ - for (lineNum = 0; lineNum < lines.size(); lineNum++) { - String line = lines.get(lineNum); - // When a link has been replaced we need to scan again the string - // as the offsets have changed (we add '<' and '>' to the link to - // be interpreted by the markdown library) - boolean urlReplaced; - - do { - urlReplaced = false; - Matcher m = githubURLDetection.matcher(line); - while (m.find()) { - // Ignore the URL following the format - if ((m.start() - 1 >= 0) && (m.end() < line.length()) && - (line.charAt(m.start() - 1) == '<') && - (line.charAt(m.end()) == '>')) - { - continue; - } - - // Ignore the URL following the format [description](link) - if ((m.start() - 2 >= 0) && (m.end() < line.length()) && - (line.charAt(m.start() - 2) == ']') && - (line.charAt(m.start() - 1) == '(') && - (line.charAt(m.end()) == ')')) - { - continue; - } - - // Ignore the URL following the format [description](link "title") - if ((m.start() - 2 >= 0) && (m.end() + 1 < line.length()) && - (line.charAt(m.start() - 2) == ']') && - (line.charAt(m.start() - 1) == '(') && - (line.charAt(m.end()) == ' ') && - (line.charAt(m.end() + 1) == '"')) - { - continue; - } - - if (m.start() - 1 >= 0) { - // Case when the link is at the beginning of the string - line = line.substring(0, m.start()) + "<" + m.group(0) + ">" + line.substring(m.end()); - } else { - line = "<" + m.group(0) + ">" + line.substring(m.end()); - } - - // We replaced the string in the array - lines.set(lineNum, line); - urlReplaced = true; - break; - } - } while (urlReplaced); - } - } - } - - /** - * @param line - * @param c - * @return true if line is just cs (and whitespace at the start/end) - */ - boolean just(String line, char c) { - return line.matches("\\s*"+c+"+\\s*"); - } - - /** - * @param line - * @return The number of # symbols prepending the line. - */ - private int numHash(String line) { - for (int i = 0; i < line.length(); i++) { - if (line.charAt(i) != '#') - return i; - } - return line.length(); - } - - /** - * - * @param parent - * Can be null for top-level - * @return List of sub-headers. Never null. FIXME handle documents which - * have a 2nd level header before any 1st level ones - */ - public List
getHeadings(Header parent) { - if (parent == null) { - return Collections.unmodifiableList(level1Headers); - } - return Collections.unmodifiableList(parent.subHeaders); - } - - // public WebPage getWebPage() { - // WebPage page = new WebPage(); - // // Add the lines, one by one - // boolean inParagraph = false; - // for (int i=0; i"); - // line = cleanHeader(line); - // page.addText("<"+type+">"+line+""); - // continue; - // case MARKER: // Ignore - // continue; - // // TODO List? - // // TODO Block quote? - // } - // // Paragraph end? - // if (Utils.isBlank(line)) { - // if (inParagraph) page.addText("

"); - // continue; - // } - // // Paragraph start? - // if (!inParagraph) { - // page.addText("

"); - // inParagraph = true; - // } - // // Plain text - // page.addText(line); - // } - // return page; - // } - - /** - * Get the HTML for this page. Uses the MarkdownJ project. - */ - public String html() { - // Section numbers?? - boolean sectionNumbers = pStore - .getBoolean(MarkdownPreferencePage.PREF_SECTION_NUMBERS); - // Chop out multi-markdown header - StringBuilder sb = new StringBuilder(); - assert lines.size() == lineTypes.size(); - for (int i = 0, n = lines.size(); i < n; i++) { - KLineType type = lineTypes.get(i); - if (type == KLineType.META) - continue; - String line = lines.get(i); - if (sectionNumbers && isHeader(type) && line.contains("$section")) { - // TODO Header section = headers.get(i); - // String secNum = section.getSectionNumber(); - // line.replace("$section", secNum); - } - sb.append(line); - } - String text = sb.toString(); - // Use external converter? - final String cmd = pStore - .getString(MarkdownPreferencePage.PREF_MARKDOWN_COMMAND); - if (Utils.isBlank(cmd) - || (cmd.startsWith("(") && cmd.contains("MarkdownJ"))) { - // Use MarkdownJ - MarkdownProcessor markdown = new MarkdownProcessor(); - // MarkdownJ doesn't convert £s for some reason - text = text.replace("£", "£"); - String html = markdown.markdown(text); - return html; - } - // Attempt to run external command - try { - final File md = File.createTempFile("tmp", ".md"); - FileUtils.write(md, text); - Process process = new Process(cmd+" "+md.getAbsolutePath()); - process.run(); - int ok = process.waitFor(10000); - if (ok != 0) throw new FailureException(cmd+" failed:\n"+process.getError()); - String html = process.getOutput(); - FileUtils.delete(md); - return html; - } catch (Exception e) { - throw Utils.runtime(e); - } - } - - /** - * @param type - * @return - */ - private boolean isHeader(KLineType type) { - return type == KLineType.H1 || type == KLineType.H2 - || type == KLineType.H3 || type == KLineType.H4 - || type == KLineType.H5 || type == KLineType.H6; - } - - /** - * Return the raw text of this page. - */ - @Override - public String toString() { - StringBuilder sb = new StringBuilder(); - for (String line : lines) { - sb.append(line); - } - return sb.toString(); - } - - /** - * Line type information for the raw text. - * - * @return - */ - public List getLineTypes() { - return Collections.unmodifiableList(lineTypes); - } - - /** - * @param line - * @return - */ - public Object getPageObject(int line) { - return pageObjects.get(line); - } - -} +/** + * Copyright winterwell Mathematics Ltd. + * @author Daniel Winterstein + * 11 Jan 2007 + */ +package winterwell.markdown.pagemodel; + +import java.io.File; +import java.util.ArrayList; +import java.util.Collections; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.regex.Matcher; +import java.util.regex.Pattern; + +import org.eclipse.jface.preference.IPreferenceStore; + +import winterwell.markdown.Activator; +import winterwell.markdown.StringMethods; +import winterwell.markdown.preferences.MarkdownPreferencePage; +import winterwell.utils.FailureException; +import winterwell.utils.Process; +import winterwell.utils.StrUtils; +import winterwell.utils.Utils; +import winterwell.utils.io.FileUtils; + +import com.petebevin.markdown.MarkdownProcessor; + +/** + * Understands Markdown syntax. + * + * @author Daniel Winterstein + */ +public class MarkdownPage { + + /** + * Strip leading and trailing #s and whitespace + * + * @param line + * @return cleaned up line + */ + private String cleanHeader(String line) { + for (int j = 0; j < line.length(); j++) { + char c = line.charAt(j); + if (c != '#' && !Character.isWhitespace(c)) { + line = line.substring(j); + break; + } + } + for (int j = line.length() - 1; j > 0; j--) { + char c = line.charAt(j); + if (c != '#' && !Character.isWhitespace(c)) { + line = line.substring(0, j + 1); + break; + } + } + return line; + } + + /** + * Represents information about a section header. E.g. ## Misc Warblings + * + * @author daniel + */ + public class Header { + /** + * 1 = top-level (i.e. #), 2= 2nd-level (i.e. ##), etc. + */ + final int level; + /** + * The text of the Header + */ + final String heading; + /** + * Sub-sections, if any + */ + final List

subHeaders = new ArrayList
(); + /** + * The line on which this header occurs. + */ + final int lineNumber; + + public int getLineNumber() { + return lineNumber; + } + + /** + * + * @return the next section (at this depth if possible), null if none + */ + public Header getNext() { + if (parent == null) { + int ti = level1Headers.indexOf(this); + if (ti == -1 || ti == level1Headers.size() - 1) + return null; + return level1Headers.get(ti + 1); + } + int i = parent.subHeaders.indexOf(this); + assert i != -1 : this; + if (i == parent.subHeaders.size() - 1) + return parent.getNext(); + return parent.subHeaders.get(i + 1); + } + /** + * + * @return the next section (at this depth if possible), null if none + */ + public Header getPrevious() { + if (parent == null) { + int ti = level1Headers.indexOf(this); + if (ti == -1 || ti == 0) + return null; + return level1Headers.get(ti - 1); + } + int i = parent.subHeaders.indexOf(this); + assert i != -1 : this; + if (i == 0) + return parent.getPrevious(); + return parent.subHeaders.get(i - 1); + } + + + /** + * The parent section. Can be null. + */ + private Header parent; + + /** + * Create a marker for a section Header + * + * @param level + * 1 = top-level (i.e. #), 2= 2nd-level (i.e. ##), etc. + * @param lineNumber + * The line on which this header occurs + * @param heading + * The text of the Header, trimmed of #s + * @param currentHeader + * The previous Header. This is used to find the parent + * section if there is one. Can be null. + */ + Header(int level, int lineNumber, String heading, Header currentHeader) { + this.lineNumber = lineNumber; + this.level = level; + this.heading = cleanHeader(heading); + // Heading Tree + setParent(currentHeader); + } + + private void setParent(Header currentHeader) { + if (currentHeader == null) { + parent = null; + return; + } + if (currentHeader.level < level) { + parent = currentHeader; + parent.subHeaders.add(this); + return; + } + setParent(currentHeader.parent); + } + + public Header getParent() { + return parent; + } + + /** + * Sub-sections. May be zero-length, never null. + */ + public List
getSubHeaders() { + return subHeaders; + } + + @Override + public String toString() { + return heading; + } + + public int getLevel() { + return level; + } + } + + /** + * The raw text, broken up into individual lines. + */ + private List lines; + + /** + * The raw text, broken up into individual lines. + */ + public List getText() { + return Collections.unmodifiableList(lines); + } + + public enum KLineType { + NORMAL, H1, H2, H3, H4, H5, H6, BLANK, + // TODO LIST, BLOCKQUOTE, + /** A line marking Markdown info about the preceding line, e.g. ====== */ + MARKER, + /** A line containing meta-data, e.g. title: My Page */ + META + } + + /** + * Information about each line. + */ + private List lineTypes; + private Map pageObjects = new HashMap(); + + // TODO meta-data, footnotes, tables, link & image attributes + private static Pattern multiMarkdownTag = Pattern.compile("^([\\w].*):(.*)"); + private Map multiMarkdownTags = new HashMap(); + + // Regular expression for Github support + private static Pattern githubURLDetection = Pattern.compile("((https?|ftp|file)://[-a-zA-Z0-9+&@#/%?=~_|!:,.;]*[-a-zA-Z0-9+&@#/%=~_|])"); + + /** + * The top-level headers. FIXME handle documents which have a 2nd level + * header before any 1st level ones + */ + private final List
level1Headers = new ArrayList
(); + private final IPreferenceStore pStore; + + /** + * Create a page. + * + * @param text + */ + public MarkdownPage(String text) { + pStore = Activator.getDefault().getPreferenceStore(); + setText(text); + } + + /** + * Reset the text for this page. + * + * @param text + */ + private void setText(String text) { + // Get lines + lines = StringMethods.splitLines(text); + // Clean out old + level1Headers.clear(); + lineTypes = new ArrayList(lines.size()); + pageObjects.clear(); + // Dummy level-1 header in case there are none + Header dummyTopHeader = new Header(1, 0, "", null); + level1Headers.add(dummyTopHeader); + Header currentHeader = dummyTopHeader; + // Identify line types + int lineNum = 0; + + // Check if we should support the Multi-Markdown Metadata + boolean multiMarkdownMetadataSupport = + pStore.getBoolean(MarkdownPreferencePage.PREF_MULTIMARKDOWN_METADATA); + + // Multi-markdown header + if (multiMarkdownMetadataSupport) { + // The key is the text before the colon, and the data is the text + // after the + // colon. In the above example, notice that there are two lines of + // information + // for the Author key. If you end a line with “space-space-newline”, + // the newline + // will be included when converted to other formats. + // + // There must not be any whitespace above the metadata, and the + // metadata block + // ends with the first whitespace only line. The metadata is + // stripped from the + // document before it is passed on to the syntax parser. + + // + // Check if the Metdatas are valid + // + boolean validMetadata = true; + for (lineNum = 0; lineNum < lines.size(); lineNum++) { + String line = lines.get(lineNum); + if (Utils.isBlank(line)) { + break; + } + Matcher m = multiMarkdownTag.matcher(line); + if (!m.find()) { + if (lineNum == 0) { + // No MultiMarkdown metadata + validMetadata = false; + break; + } else if (!line.matches("^\\s.*\n")) { + // The next line was not intended (ie. it does not start + // with a whitespace) + validMetadata = false; + break; + } + } + } + + // Valid Metadatas have been found. We need to retrieve these keys/values. + if (validMetadata) { + String data = ""; + String tag = ""; + for (lineNum = 0; lineNum < lines.size(); lineNum++) { + String line = lines.get(lineNum); + if (Utils.isBlank(line)) { + break; + } + Matcher m = multiMarkdownTag.matcher(line); + if (!m.find()) { + if (lineNum == 0) { + break; + } + // Multi-line tag + lineTypes.add(KLineType.META); + data += StrUtils.LINEEND + line.trim(); + multiMarkdownTags.put(tag, data); + } else { + lineTypes.add(KLineType.META); + tag = m.group(0); + data = m.group(1).trim(); + if (m.group(1).endsWith(line)) + multiMarkdownTags.put(tag, data); + } + } + } else { + lineNum = 0; + } + } + for (; lineNum < lines.size(); lineNum++) { + String line = lines.get(lineNum); + // Headings + int h = numHash(line); + String hLine = line; + int hLineNum = lineNum; + int underline = -1; + if (lineNum != 0) { + underline = just(line, '=') ? 1 : just(line, '-') ? 2 : -1; + } + if (underline != -1) { + h = underline; + hLineNum = lineNum - 1; + hLine = lines.get(lineNum - 1); + lineTypes.set(hLineNum, KLineType.values()[h]); + lineTypes.add(KLineType.MARKER); + } + // Create a Header object + if (h > 0) { + if (underline == -1) + lineTypes.add(KLineType.values()[h]); + Header header = new Header(h, hLineNum, hLine, currentHeader); + if (h == 1) { + level1Headers.add(header); + } + pageObjects.put(hLineNum, header); + currentHeader = header; + continue; + } + // TODO List + // TODO Block quote + // Blank line + if (Utils.isBlank(line)) { + lineTypes.add(KLineType.BLANK); + continue; + } + // Normal + lineTypes.add(KLineType.NORMAL); + } // end line-loop + // Remove dummy header? + if (dummyTopHeader.getSubHeaders().size() == 0) { + level1Headers.remove(dummyTopHeader); + } + + boolean githubSyntaxSupport = + pStore.getBoolean(MarkdownPreferencePage.PREF_GITHUB_SYNTAX); + if (githubSyntaxSupport) { + /* + * Support Code block + */ + boolean inCodeBlock = false; + for (lineNum = 0; lineNum < lines.size(); lineNum++) { + String line = lines.get(lineNum); + // Found the start or end of a code block + if (line.matches("^```.*\n")) { + // We reverse the boolean value + inCodeBlock = !inCodeBlock; + + // We force the line to be blank. But we mark it as normal + // to prevent to be stripped + lines.set(lineNum, "\n"); + lineTypes.set(lineNum, KLineType.NORMAL); + continue; + } + if (inCodeBlock) { + lines.set(lineNum, " " + line); + } + } + + /* + * Support for URL Detection + * We search for links that are not captured by Markdown syntax + */ + for (lineNum = 0; lineNum < lines.size(); lineNum++) { + String line = lines.get(lineNum); + // When a link has been replaced we need to scan again the string + // as the offsets have changed (we add '<' and '>' to the link to + // be interpreted by the markdown library) + boolean urlReplaced; + + do { + urlReplaced = false; + Matcher m = githubURLDetection.matcher(line); + while (m.find()) { + // Ignore the URL following the format + if ((m.start() - 1 >= 0) && (m.end() < line.length()) && + (line.charAt(m.start() - 1) == '<') && + (line.charAt(m.end()) == '>')) + { + continue; + } + + // Ignore the URL following the format [description](link) + if ((m.start() - 2 >= 0) && (m.end() < line.length()) && + (line.charAt(m.start() - 2) == ']') && + (line.charAt(m.start() - 1) == '(') && + (line.charAt(m.end()) == ')')) + { + continue; + } + + // Ignore the URL following the format [description](link "title") + if ((m.start() - 2 >= 0) && (m.end() + 1 < line.length()) && + (line.charAt(m.start() - 2) == ']') && + (line.charAt(m.start() - 1) == '(') && + (line.charAt(m.end()) == ' ') && + (line.charAt(m.end() + 1) == '"')) + { + continue; + } + + if (m.start() - 1 >= 0) { + // Case when the link is at the beginning of the string + line = line.substring(0, m.start()) + "<" + m.group(0) + ">" + line.substring(m.end()); + } else { + line = "<" + m.group(0) + ">" + line.substring(m.end()); + } + + // We replaced the string in the array + lines.set(lineNum, line); + urlReplaced = true; + break; + } + } while (urlReplaced); + } + } + } + + /** + * @param line + * @param c + * @return true if line is just cs (and whitespace at the start/end) + */ + boolean just(String line, char c) { + return line.matches("\\s*"+c+"+\\s*"); + } + + /** + * @param line + * @return The number of # symbols prepending the line. + */ + private int numHash(String line) { + for (int i = 0; i < line.length(); i++) { + if (line.charAt(i) != '#') + return i; + } + return line.length(); + } + + /** + * + * @param parent + * Can be null for top-level + * @return List of sub-headers. Never null. FIXME handle documents which + * have a 2nd level header before any 1st level ones + */ + public List
getHeadings(Header parent) { + if (parent == null) { + return Collections.unmodifiableList(level1Headers); + } + return Collections.unmodifiableList(parent.subHeaders); + } + + // public WebPage getWebPage() { + // WebPage page = new WebPage(); + // // Add the lines, one by one + // boolean inParagraph = false; + // for (int i=0; i"); + // line = cleanHeader(line); + // page.addText("<"+type+">"+line+""); + // continue; + // case MARKER: // Ignore + // continue; + // // TODO List? + // // TODO Block quote? + // } + // // Paragraph end? + // if (Utils.isBlank(line)) { + // if (inParagraph) page.addText("

"); + // continue; + // } + // // Paragraph start? + // if (!inParagraph) { + // page.addText("

"); + // inParagraph = true; + // } + // // Plain text + // page.addText(line); + // } + // return page; + // } + + /** + * Get the HTML for this page. Uses the MarkdownJ project. + */ + public String html() { + // Section numbers?? + boolean sectionNumbers = pStore + .getBoolean(MarkdownPreferencePage.PREF_SECTION_NUMBERS); + // Chop out multi-markdown header + StringBuilder sb = new StringBuilder(); + assert lines.size() == lineTypes.size(); + for (int i = 0, n = lines.size(); i < n; i++) { + KLineType type = lineTypes.get(i); + if (type == KLineType.META) + continue; + String line = lines.get(i); + if (sectionNumbers && isHeader(type) && line.contains("$section")) { + // TODO Header section = headers.get(i); + // String secNum = section.getSectionNumber(); + // line.replace("$section", secNum); + } + sb.append(line); + } + String text = sb.toString(); + // Use external converter? + final String cmd = pStore + .getString(MarkdownPreferencePage.PREF_MARKDOWN_COMMAND); + if (Utils.isBlank(cmd) + || (cmd.startsWith("(") && cmd.contains("MarkdownJ"))) { + // Use MarkdownJ + MarkdownProcessor markdown = new MarkdownProcessor(); + // MarkdownJ doesn't convert £s for some reason + text = text.replace("£", "£"); + String html = markdown.markdown(text); + return html; + } + // Attempt to run external command + try { + final File md = File.createTempFile("tmp", ".md"); + FileUtils.write(md, text); + Process process = new Process(cmd+" "+md.getAbsolutePath()); + process.run(); + int ok = process.waitFor(10000); + if (ok != 0) throw new FailureException(cmd+" failed:\n"+process.getError()); + String html = process.getOutput(); + FileUtils.delete(md); + return html; + } catch (Exception e) { + throw Utils.runtime(e); + } + } + + /** + * @param type + * @return + */ + private boolean isHeader(KLineType type) { + return type == KLineType.H1 || type == KLineType.H2 + || type == KLineType.H3 || type == KLineType.H4 + || type == KLineType.H5 || type == KLineType.H6; + } + + /** + * Return the raw text of this page. + */ + @Override + public String toString() { + StringBuilder sb = new StringBuilder(); + for (String line : lines) { + sb.append(line); + } + return sb.toString(); + } + + /** + * Line type information for the raw text. + * + * @return + */ + public List getLineTypes() { + return Collections.unmodifiableList(lineTypes); + } + + /** + * @param line + * @return + */ + public Object getPageObject(int line) { + return pageObjects.get(line); + } + +}