2 * Copyright winterwell Mathematics Ltd.
3 * @author Daniel Winterstein
6 package winterwell.markdown.pagemodel;
9 import java.util.ArrayList;
10 import java.util.Collections;
11 import java.util.HashMap;
12 import java.util.List;
14 import java.util.regex.Matcher;
15 import java.util.regex.Pattern;
17 import org.eclipse.jface.preference.IPreferenceStore;
19 import winterwell.markdown.Activator;
20 import winterwell.markdown.StringMethods;
21 import winterwell.markdown.preferences.MarkdownPreferencePage;
22 import winterwell.utils.FailureException;
23 import winterwell.utils.Process;
24 import winterwell.utils.StrUtils;
25 import winterwell.utils.Utils;
26 import winterwell.utils.io.FileUtils;
28 import com.petebevin.markdown.MarkdownProcessor;
31 * Understands Markdown syntax.
33 * @author Daniel Winterstein
35 public class MarkdownPage {
38 * Strip leading and trailing #s and whitespace
41 * @return cleaned up line
43 private String cleanHeader(String line) {
44 for (int j = 0; j < line.length(); j++) {
45 char c = line.charAt(j);
46 if (c != '#' && !Character.isWhitespace(c)) {
47 line = line.substring(j);
51 for (int j = line.length() - 1; j > 0; j--) {
52 char c = line.charAt(j);
53 if (c != '#' && !Character.isWhitespace(c)) {
54 line = line.substring(0, j + 1);
62 * Represents information about a section header. E.g. ## Misc Warblings
68 * 1 = top-level (i.e. #), 2= 2nd-level (i.e. ##), etc.
72 * The text of the Header
76 * Sub-sections, if any
78 final List<Header> subHeaders = new ArrayList<Header>();
80 * The line on which this header occurs.
84 public int getLineNumber() {
90 * @return the next section (at this depth if possible), null if none
92 public Header getNext() {
94 int ti = level1Headers.indexOf(this);
95 if (ti == -1 || ti == level1Headers.size() - 1)
97 return level1Headers.get(ti + 1);
99 int i = parent.subHeaders.indexOf(this);
100 assert i != -1 : this;
101 if (i == parent.subHeaders.size() - 1)
102 return parent.getNext();
103 return parent.subHeaders.get(i + 1);
107 * @return the next section (at this depth if possible), null if none
109 public Header getPrevious() {
110 if (parent == null) {
111 int ti = level1Headers.indexOf(this);
112 if (ti == -1 || ti == 0)
114 return level1Headers.get(ti - 1);
116 int i = parent.subHeaders.indexOf(this);
117 assert i != -1 : this;
119 return parent.getPrevious();
120 return parent.subHeaders.get(i - 1);
125 * The parent section. Can be null.
127 private Header parent;
130 * Create a marker for a section Header
133 * 1 = top-level (i.e. #), 2= 2nd-level (i.e. ##), etc.
135 * The line on which this header occurs
137 * The text of the Header, trimmed of #s
138 * @param currentHeader
139 * The previous Header. This is used to find the parent
140 * section if there is one. Can be null.
142 Header(int level, int lineNumber, String heading, Header currentHeader) {
143 this.lineNumber = lineNumber;
145 this.heading = cleanHeader(heading);
147 setParent(currentHeader);
150 private void setParent(Header currentHeader) {
151 if (currentHeader == null) {
155 if (currentHeader.level < level) {
156 parent = currentHeader;
157 parent.subHeaders.add(this);
160 setParent(currentHeader.parent);
163 public Header getParent() {
168 * Sub-sections. May be zero-length, never null.
170 public List<Header> getSubHeaders() {
175 public String toString() {
179 public int getLevel() {
185 * The raw text, broken up into individual lines.
187 private List<String> lines;
190 * The raw text, broken up into individual lines.
192 public List<String> getText() {
193 return Collections.unmodifiableList(lines);
196 public enum KLineType {
197 NORMAL, H1, H2, H3, H4, H5, H6, BLANK,
198 // TODO LIST, BLOCKQUOTE,
199 /** A line marking Markdown info about the preceding line, e.g. ====== */
201 /** A line containing meta-data, e.g. title: My Page */
206 * Information about each line.
208 private List<KLineType> lineTypes;
209 private Map<Integer,Object> pageObjects = new HashMap<Integer, Object>();
211 // TODO meta-data, footnotes, tables, link & image attributes
212 private static Pattern multiMarkdownTag = Pattern.compile("^([\\w].*):(.*)");
213 private Map<String, String> multiMarkdownTags = new HashMap<String, String>();
215 // Regular expression for Github support
216 private static Pattern githubURLDetection = Pattern.compile("((https?|ftp|file)://[-a-zA-Z0-9+&@#/%?=~_|!:,.;]*[-a-zA-Z0-9+&@#/%=~_|])");
219 * The top-level headers. FIXME handle documents which have a 2nd level
220 * header before any 1st level ones
222 private final List<Header> level1Headers = new ArrayList<Header>();
223 private final IPreferenceStore pStore;
230 public MarkdownPage(String text) {
231 pStore = Activator.getDefault().getPreferenceStore();
236 * Reset the text for this page.
240 private void setText(String text) {
242 lines = StringMethods.splitLines(text);
244 level1Headers.clear();
245 lineTypes = new ArrayList<KLineType>(lines.size());
247 // Dummy level-1 header in case there are none
248 Header dummyTopHeader = new Header(1, 0, "", null);
249 level1Headers.add(dummyTopHeader);
250 Header currentHeader = dummyTopHeader;
251 // Identify line types
254 // Check if we should support the Multi-Markdown Metadata
255 boolean multiMarkdownMetadataSupport =
256 pStore.getBoolean(MarkdownPreferencePage.PREF_MULTIMARKDOWN_METADATA);
258 // Multi-markdown header
259 if (multiMarkdownMetadataSupport) {
260 // The key is the text before the colon, and the data is the text
262 // colon. In the above example, notice that there are two lines of
264 // for the Author key. If you end a line with “space-space-newline”,
266 // will be included when converted to other formats.
268 // There must not be any whitespace above the metadata, and the
270 // ends with the first whitespace only line. The metadata is
272 // document before it is passed on to the syntax parser.
275 // Check if the Metdatas are valid
277 boolean validMetadata = true;
278 for (lineNum = 0; lineNum < lines.size(); lineNum++) {
279 String line = lines.get(lineNum);
280 if (Utils.isBlank(line)) {
283 Matcher m = multiMarkdownTag.matcher(line);
286 // No MultiMarkdown metadata
287 validMetadata = false;
289 } else if (!line.matches("^\\s.*\n")) {
290 // The next line was not intended (ie. it does not start
291 // with a whitespace)
292 validMetadata = false;
298 // Valid Metadatas have been found. We need to retrieve these keys/values.
302 for (lineNum = 0; lineNum < lines.size(); lineNum++) {
303 String line = lines.get(lineNum);
304 if (Utils.isBlank(line)) {
307 Matcher m = multiMarkdownTag.matcher(line);
313 lineTypes.add(KLineType.META);
314 data += StrUtils.LINEEND + line.trim();
315 multiMarkdownTags.put(tag, data);
317 lineTypes.add(KLineType.META);
319 data = m.group(1).trim();
320 if (m.group(1).endsWith(line))
321 multiMarkdownTags.put(tag, data);
328 for (; lineNum < lines.size(); lineNum++) {
329 String line = lines.get(lineNum);
331 int h = numHash(line);
333 int hLineNum = lineNum;
336 underline = just(line, '=') ? 1 : just(line, '-') ? 2 : -1;
338 if (underline != -1) {
340 hLineNum = lineNum - 1;
341 hLine = lines.get(lineNum - 1);
342 lineTypes.set(hLineNum, KLineType.values()[h]);
343 lineTypes.add(KLineType.MARKER);
345 // Create a Header object
348 lineTypes.add(KLineType.values()[h]);
349 Header header = new Header(h, hLineNum, hLine, currentHeader);
351 level1Headers.add(header);
353 pageObjects.put(hLineNum, header);
354 currentHeader = header;
360 if (Utils.isBlank(line)) {
361 lineTypes.add(KLineType.BLANK);
365 lineTypes.add(KLineType.NORMAL);
367 // Remove dummy header?
368 if (dummyTopHeader.getSubHeaders().size() == 0) {
369 level1Headers.remove(dummyTopHeader);
372 boolean githubSyntaxSupport =
373 pStore.getBoolean(MarkdownPreferencePage.PREF_GITHUB_SYNTAX);
374 if (githubSyntaxSupport) {
378 boolean inCodeBlock = false;
379 for (lineNum = 0; lineNum < lines.size(); lineNum++) {
380 String line = lines.get(lineNum);
381 // Found the start or end of a code block
382 if (line.matches("^```.*\n")) {
383 // We reverse the boolean value
384 inCodeBlock = !inCodeBlock;
386 // We force the line to be blank. But we mark it as normal
387 // to prevent to be stripped
388 lines.set(lineNum, "\n");
389 lineTypes.set(lineNum, KLineType.NORMAL);
393 lines.set(lineNum, " " + line);
398 * Support for URL Detection
399 * We search for links that are not captured by Markdown syntax
401 for (lineNum = 0; lineNum < lines.size(); lineNum++) {
402 String line = lines.get(lineNum);
403 // When a link has been replaced we need to scan again the string
404 // as the offsets have changed (we add '<' and '>' to the link to
405 // be interpreted by the markdown library)
410 Matcher m = githubURLDetection.matcher(line);
412 // Ignore the URL following the format <link>
413 if ((m.start() - 1 >= 0) && (m.end() < line.length()) &&
414 (line.charAt(m.start() - 1) == '<') &&
415 (line.charAt(m.end()) == '>'))
420 // Ignore the URL following the format [description](link)
421 if ((m.start() - 2 >= 0) && (m.end() < line.length()) &&
422 (line.charAt(m.start() - 2) == ']') &&
423 (line.charAt(m.start() - 1) == '(') &&
424 (line.charAt(m.end()) == ')'))
429 // Ignore the URL following the format [description](link "title")
430 if ((m.start() - 2 >= 0) && (m.end() + 1 < line.length()) &&
431 (line.charAt(m.start() - 2) == ']') &&
432 (line.charAt(m.start() - 1) == '(') &&
433 (line.charAt(m.end()) == ' ') &&
434 (line.charAt(m.end() + 1) == '"'))
439 if (m.start() - 1 >= 0) {
440 // Case when the link is at the beginning of the string
441 line = line.substring(0, m.start()) + "<" + m.group(0) + ">" + line.substring(m.end());
443 line = "<" + m.group(0) + ">" + line.substring(m.end());
446 // We replaced the string in the array
447 lines.set(lineNum, line);
451 } while (urlReplaced);
459 * @return true if line is just cs (and whitespace at the start/end)
461 boolean just(String line, char c) {
462 return line.matches("\\s*"+c+"+\\s*");
467 * @return The number of # symbols prepending the line.
469 private int numHash(String line) {
470 for (int i = 0; i < line.length(); i++) {
471 if (line.charAt(i) != '#')
474 return line.length();
480 * Can be null for top-level
481 * @return List of sub-headers. Never null. FIXME handle documents which
482 * have a 2nd level header before any 1st level ones
484 public List<Header> getHeadings(Header parent) {
485 if (parent == null) {
486 return Collections.unmodifiableList(level1Headers);
488 return Collections.unmodifiableList(parent.subHeaders);
491 // public WebPage getWebPage() {
492 // WebPage page = new WebPage();
493 // // Add the lines, one by one
494 // boolean inParagraph = false;
495 // for (int i=0; i<lines.size(); i++) {
496 // String line = lines.get(i);
497 // KLineType type = lineTypes.get(i);
500 // case H1: case H2: case H3:
501 // case H4: case H5: case H6:
502 // if (inParagraph) page.addText("</p>");
503 // line = cleanHeader(line);
504 // page.addText("<"+type+">"+line+"</"+type+">");
506 // case MARKER: // Ignore
509 // // TODO Block quote?
512 // if (Utils.isBlank(line)) {
513 // if (inParagraph) page.addText("</p>");
516 // // Paragraph start?
517 // if (!inParagraph) {
518 // page.addText("<p>");
519 // inParagraph = true;
522 // page.addText(line);
528 * Get the HTML for this page. Uses the MarkdownJ project.
530 public String html() {
532 boolean sectionNumbers = pStore
533 .getBoolean(MarkdownPreferencePage.PREF_SECTION_NUMBERS);
534 // Chop out multi-markdown header
535 StringBuilder sb = new StringBuilder();
536 assert lines.size() == lineTypes.size();
537 for (int i = 0, n = lines.size(); i < n; i++) {
538 KLineType type = lineTypes.get(i);
539 if (type == KLineType.META)
541 String line = lines.get(i);
542 if (sectionNumbers && isHeader(type) && line.contains("$section")) {
543 // TODO Header section = headers.get(i);
544 // String secNum = section.getSectionNumber();
545 // line.replace("$section", secNum);
549 String text = sb.toString();
550 // Use external converter?
551 final String cmd = pStore
552 .getString(MarkdownPreferencePage.PREF_MARKDOWN_COMMAND);
553 if (Utils.isBlank(cmd)
554 || (cmd.startsWith("(") && cmd.contains("MarkdownJ"))) {
556 MarkdownProcessor markdown = new MarkdownProcessor();
557 // MarkdownJ doesn't convert £s for some reason
558 text = text.replace("£", "£");
559 String html = markdown.markdown(text);
562 // Attempt to run external command
564 final File md = File.createTempFile("tmp", ".md");
565 FileUtils.write(md, text);
566 Process process = new Process(cmd+" "+md.getAbsolutePath());
568 int ok = process.waitFor(10000);
569 if (ok != 0) throw new FailureException(cmd+" failed:\n"+process.getError());
570 String html = process.getOutput();
571 FileUtils.delete(md);
573 } catch (Exception e) {
574 throw Utils.runtime(e);
582 private boolean isHeader(KLineType type) {
583 return type == KLineType.H1 || type == KLineType.H2
584 || type == KLineType.H3 || type == KLineType.H4
585 || type == KLineType.H5 || type == KLineType.H6;
589 * Return the raw text of this page.
592 public String toString() {
593 StringBuilder sb = new StringBuilder();
594 for (String line : lines) {
597 return sb.toString();
601 * Line type information for the raw text.
605 public List<KLineType> getLineTypes() {
606 return Collections.unmodifiableList(lineTypes);
613 public Object getPageObject(int line) {
614 return pageObjects.get(line);