]> gerrit.simantics Code Review - simantics/platform.git/blob - bundles/winterwell.markdown/src/winterwell/markdown/pagemodel/MarkdownPage.java
Tycho compilation changes for SVN version also.
[simantics/platform.git] / bundles / winterwell.markdown / src / winterwell / markdown / pagemodel / MarkdownPage.java
1 /**\r
2  * Copyright winterwell Mathematics Ltd.\r
3  * @author Daniel Winterstein\r
4  * 11 Jan 2007\r
5  */\r
6 package winterwell.markdown.pagemodel;\r
7 \r
8 import java.io.File;\r
9 import java.util.ArrayList;\r
10 import java.util.Collections;\r
11 import java.util.HashMap;\r
12 import java.util.List;\r
13 import java.util.Map;\r
14 import java.util.regex.Matcher;\r
15 import java.util.regex.Pattern;\r
16 \r
17 import org.eclipse.jface.preference.IPreferenceStore;\r
18 \r
19 import winterwell.markdown.Activator;\r
20 import winterwell.markdown.StringMethods;\r
21 import winterwell.markdown.preferences.MarkdownPreferencePage;\r
22 import winterwell.utils.FailureException;\r
23 import winterwell.utils.Process;\r
24 import winterwell.utils.StrUtils;\r
25 import winterwell.utils.Utils;\r
26 import winterwell.utils.io.FileUtils;\r
27 \r
28 import com.petebevin.markdown.MarkdownProcessor;\r
29 \r
30 /**\r
31  * Understands Markdown syntax.\r
32  * \r
33  * @author Daniel Winterstein\r
34  */\r
35 public class MarkdownPage {\r
36 \r
37         /**\r
38          * Strip leading and trailing #s and whitespace\r
39          * \r
40          * @param line\r
41          * @return cleaned up line\r
42          */\r
43         private String cleanHeader(String line) {\r
44                 for (int j = 0; j < line.length(); j++) {\r
45                         char c = line.charAt(j);\r
46                         if (c != '#' && !Character.isWhitespace(c)) {\r
47                                 line = line.substring(j);\r
48                                 break;\r
49                         }\r
50                 }\r
51                 for (int j = line.length() - 1; j > 0; j--) {\r
52                         char c = line.charAt(j);\r
53                         if (c != '#' && !Character.isWhitespace(c)) {\r
54                                 line = line.substring(0, j + 1);\r
55                                 break;\r
56                         }\r
57                 }\r
58                 return line;\r
59         }\r
60 \r
61         /**\r
62          * Represents information about a section header. E.g. ## Misc Warblings\r
63          * \r
64          * @author daniel\r
65          */\r
66         public class Header {\r
67                 /**\r
68                  * 1 = top-level (i.e. #), 2= 2nd-level (i.e. ##), etc.\r
69                  */\r
70                 final int level;\r
71                 /**\r
72                  * The text of the Header\r
73                  */\r
74                 final String heading;\r
75                 /**\r
76                  * Sub-sections, if any\r
77                  */\r
78                 final List<Header> subHeaders = new ArrayList<Header>();\r
79                 /**\r
80                  * The line on which this header occurs.\r
81                  */\r
82                 final int lineNumber;\r
83 \r
84                 public int getLineNumber() {\r
85                         return lineNumber;\r
86                 }\r
87 \r
88                 /**\r
89                  * \r
90                  * @return the next section (at this depth if possible), null if none\r
91                  */\r
92                 public Header getNext() {\r
93                         if (parent == null) {\r
94                                 int ti = level1Headers.indexOf(this);\r
95                                 if (ti == -1 || ti == level1Headers.size() - 1)\r
96                                         return null;\r
97                                 return level1Headers.get(ti + 1);\r
98                         }\r
99                         int i = parent.subHeaders.indexOf(this);\r
100                         assert i != -1 : this;\r
101                         if (i == parent.subHeaders.size() - 1)\r
102                                 return parent.getNext();\r
103                         return parent.subHeaders.get(i + 1);\r
104                 }\r
105                 /**\r
106                  * \r
107                  * @return the next section (at this depth if possible), null if none\r
108                  */\r
109                 public Header getPrevious() {\r
110                         if (parent == null) {\r
111                                 int ti = level1Headers.indexOf(this);\r
112                                 if (ti == -1 || ti == 0)\r
113                                         return null;\r
114                                 return level1Headers.get(ti - 1);\r
115                         }\r
116                         int i = parent.subHeaders.indexOf(this);\r
117                         assert i != -1 : this;\r
118                         if (i == 0)\r
119                                 return parent.getPrevious();\r
120                         return parent.subHeaders.get(i - 1);\r
121                 }\r
122                 \r
123 \r
124                 /**\r
125                  * The parent section. Can be null.\r
126                  */\r
127                 private Header parent;\r
128 \r
129                 /**\r
130                  * Create a marker for a section Header\r
131                  * \r
132                  * @param level\r
133                  *            1 = top-level (i.e. #), 2= 2nd-level (i.e. ##), etc.\r
134                  * @param lineNumber\r
135                  *            The line on which this header occurs\r
136                  * @param heading\r
137                  *            The text of the Header, trimmed of #s\r
138                  * @param currentHeader\r
139                  *            The previous Header. This is used to find the parent\r
140                  *            section if there is one. Can be null.\r
141                  */\r
142                 Header(int level, int lineNumber, String heading, Header currentHeader) {\r
143                         this.lineNumber = lineNumber;\r
144                         this.level = level;\r
145                         this.heading = cleanHeader(heading);\r
146                         // Heading Tree\r
147                         setParent(currentHeader);\r
148                 }\r
149 \r
150                 private void setParent(Header currentHeader) {\r
151                         if (currentHeader == null) {\r
152                                 parent = null;\r
153                                 return;\r
154                         }\r
155                         if (currentHeader.level < level) {\r
156                                 parent = currentHeader;\r
157                                 parent.subHeaders.add(this);\r
158                                 return;\r
159                         }\r
160                         setParent(currentHeader.parent);\r
161                 }\r
162 \r
163                 public Header getParent() {\r
164                         return parent;\r
165                 }\r
166 \r
167                 /**\r
168                  * Sub-sections. May be zero-length, never null.\r
169                  */\r
170                 public List<Header> getSubHeaders() {\r
171                         return subHeaders;\r
172                 }\r
173 \r
174                 @Override\r
175                 public String toString() {\r
176                         return heading;\r
177                 }\r
178 \r
179                 public int getLevel() {\r
180                         return level;\r
181                 }\r
182         }\r
183 \r
184         /**\r
185          * The raw text, broken up into individual lines.\r
186          */\r
187         private List<String> lines;\r
188 \r
189         /**\r
190          * The raw text, broken up into individual lines.\r
191          */\r
192         public List<String> getText() {\r
193                 return Collections.unmodifiableList(lines);\r
194         }\r
195 \r
196         public enum KLineType {\r
197                 NORMAL, H1, H2, H3, H4, H5, H6, BLANK,\r
198                 // TODO LIST, BLOCKQUOTE,\r
199                 /** A line marking Markdown info about the preceding line, e.g. ====== */\r
200                 MARKER,\r
201                 /** A line containing meta-data, e.g. title: My Page */\r
202                 META\r
203         }\r
204 \r
205         /**\r
206          * Information about each line.\r
207          */\r
208         private List<KLineType> lineTypes;\r
209         private Map<Integer,Object> pageObjects = new HashMap<Integer, Object>();\r
210 \r
211         // TODO meta-data, footnotes, tables, link & image attributes\r
212         private static Pattern multiMarkdownTag = Pattern.compile("^([\\w].*):(.*)");\r
213         private Map<String, String> multiMarkdownTags = new HashMap<String, String>();\r
214         \r
215         // Regular expression for Github support\r
216         private static Pattern githubURLDetection = Pattern.compile("((https?|ftp|file)://[-a-zA-Z0-9+&@#/%?=~_|!:,.;]*[-a-zA-Z0-9+&@#/%=~_|])");\r
217 \r
218         /**\r
219          * The top-level headers. FIXME handle documents which have a 2nd level\r
220          * header before any 1st level ones\r
221          */\r
222         private final List<Header> level1Headers = new ArrayList<Header>();\r
223         private final IPreferenceStore pStore;\r
224 \r
225         /**\r
226          * Create a page.\r
227          * \r
228          * @param text\r
229          */\r
230         public MarkdownPage(String text) {\r
231                 pStore = Activator.getDefault().getPreferenceStore();\r
232                 setText(text);\r
233         }\r
234 \r
235         /**\r
236          * Reset the text for this page.\r
237          * \r
238          * @param text\r
239          */\r
240         private void setText(String text) {\r
241                 // Get lines\r
242                 lines = StringMethods.splitLines(text);\r
243                 // Clean out old\r
244                 level1Headers.clear();\r
245                 lineTypes = new ArrayList<KLineType>(lines.size());\r
246                 pageObjects.clear();\r
247                 // Dummy level-1 header in case there are none          \r
248                 Header dummyTopHeader = new Header(1, 0, "", null);\r
249                 level1Headers.add(dummyTopHeader);\r
250                 Header currentHeader = dummyTopHeader;          \r
251                 // Identify line types          \r
252                 int lineNum = 0;\r
253 \r
254                 // Check if we should support the Multi-Markdown Metadata\r
255                 boolean multiMarkdownMetadataSupport =\r
256                                 pStore.getBoolean(MarkdownPreferencePage.PREF_MULTIMARKDOWN_METADATA);\r
257                 \r
258                 // Multi-markdown header\r
259                 if (multiMarkdownMetadataSupport) {\r
260                         // The key is the text before the colon, and the data is the text\r
261                         // after the\r
262                         // colon. In the above example, notice that there are two lines of\r
263                         // information\r
264                         // for the Author key. If you end a line with “space-space-newline”,\r
265                         // the newline\r
266                         // will be included when converted to other formats.\r
267                         //\r
268                         // There must not be any whitespace above the metadata, and the\r
269                         // metadata block\r
270                         // ends with the first whitespace only line. The metadata is\r
271                         // stripped from the\r
272                         // document before it is passed on to the syntax parser.\r
273                         \r
274                         //\r
275                         // Check if the Metdatas are valid\r
276                         //\r
277                         boolean validMetadata = true;\r
278                         for (lineNum = 0; lineNum < lines.size(); lineNum++) {\r
279                                 String line = lines.get(lineNum);\r
280                                 if (Utils.isBlank(line)) {\r
281                                         break;\r
282                                 }\r
283                                 Matcher m = multiMarkdownTag.matcher(line);\r
284                                 if (!m.find()) {\r
285                                         if (lineNum == 0) {\r
286                                                 // No MultiMarkdown metadata\r
287                                                 validMetadata = false;\r
288                                                 break;\r
289                                         } else if (!line.matches("^\\s.*\n")) {\r
290                                                 // The next line was not intended (ie. it does not start\r
291                                                 // with a whitespace)\r
292                                                 validMetadata = false;\r
293                                                 break;\r
294                                         }\r
295                                 }\r
296                         }\r
297                         \r
298                         // Valid Metadatas have been found. We need to retrieve these keys/values.\r
299                         if (validMetadata) {\r
300                                 String data = "";\r
301                                 String tag = "";\r
302                                 for (lineNum = 0; lineNum < lines.size(); lineNum++) {\r
303                                         String line = lines.get(lineNum);\r
304                                         if (Utils.isBlank(line)) {\r
305                                                 break;\r
306                                         }\r
307                                         Matcher m = multiMarkdownTag.matcher(line);\r
308                                         if (!m.find()) {\r
309                                                 if (lineNum == 0) {\r
310                                                         break;\r
311                                                 }\r
312                                                 // Multi-line tag\r
313                                                 lineTypes.add(KLineType.META);\r
314                                                 data += StrUtils.LINEEND + line.trim();\r
315                                                 multiMarkdownTags.put(tag, data);\r
316                                         } else {\r
317                                                 lineTypes.add(KLineType.META);\r
318                                                 tag = m.group(0);\r
319                                                 data = m.group(1).trim();\r
320                                                 if (m.group(1).endsWith(line))\r
321                                                         multiMarkdownTags.put(tag, data);\r
322                                         }\r
323                                 }\r
324                         } else {\r
325                                 lineNum = 0;\r
326                         }\r
327                 }\r
328                 for (; lineNum < lines.size(); lineNum++) {\r
329                         String line = lines.get(lineNum);\r
330                         // Headings\r
331                         int h = numHash(line);\r
332                         String hLine = line;\r
333                         int hLineNum = lineNum;\r
334                         int underline = -1;\r
335                         if (lineNum != 0) {\r
336                                 underline = just(line, '=') ? 1 : just(line, '-') ? 2 : -1;\r
337                         }\r
338                         if (underline != -1) {\r
339                                 h = underline;\r
340                                 hLineNum = lineNum - 1;\r
341                                 hLine = lines.get(lineNum - 1);\r
342                                 lineTypes.set(hLineNum, KLineType.values()[h]);\r
343                                 lineTypes.add(KLineType.MARKER);\r
344                         }\r
345                         // Create a Header object\r
346                         if (h > 0) {\r
347                                 if (underline == -1)\r
348                                         lineTypes.add(KLineType.values()[h]);\r
349                                 Header header = new Header(h, hLineNum, hLine, currentHeader);\r
350                                 if (h == 1) {\r
351                                         level1Headers.add(header);\r
352                                 }\r
353                                 pageObjects.put(hLineNum, header);\r
354                                 currentHeader = header;\r
355                                 continue;\r
356                         }\r
357                         // TODO List\r
358                         // TODO Block quote\r
359                         // Blank line\r
360                         if (Utils.isBlank(line)) {\r
361                                 lineTypes.add(KLineType.BLANK);\r
362                                 continue;\r
363                         }\r
364                         // Normal\r
365                         lineTypes.add(KLineType.NORMAL);\r
366                 } // end line-loop\r
367                 // Remove dummy header?\r
368                 if (dummyTopHeader.getSubHeaders().size() == 0) {\r
369                         level1Headers.remove(dummyTopHeader);\r
370                 }\r
371                 \r
372                 boolean githubSyntaxSupport =\r
373                                 pStore.getBoolean(MarkdownPreferencePage.PREF_GITHUB_SYNTAX);\r
374                 if (githubSyntaxSupport) {\r
375                         /*\r
376                          * Support Code block\r
377                          */\r
378                         boolean inCodeBlock = false;\r
379                         for (lineNum = 0; lineNum < lines.size(); lineNum++) {\r
380                                 String line = lines.get(lineNum);\r
381                                 // Found the start or end of a code block\r
382                                 if (line.matches("^```.*\n")) {\r
383                                         // We reverse the boolean value\r
384                                         inCodeBlock = !inCodeBlock;\r
385 \r
386                                         // We force the line to be blank. But we mark it as normal\r
387                                         // to prevent to be stripped\r
388                                         lines.set(lineNum, "\n");\r
389                                         lineTypes.set(lineNum, KLineType.NORMAL);\r
390                                         continue;\r
391                                 }\r
392                                 if (inCodeBlock) {\r
393                                         lines.set(lineNum, "    " + line);\r
394                                 }\r
395                         }\r
396                         \r
397                         /*\r
398                          * Support for URL Detection\r
399                          * We search for links that are not captured by Markdown syntax\r
400                          */\r
401                         for (lineNum = 0; lineNum < lines.size(); lineNum++) {\r
402                                 String line = lines.get(lineNum);\r
403                                 // When a link has been replaced we need to scan again the string\r
404                                 // as the offsets have changed (we add '<' and '>' to the link to\r
405                                 // be interpreted by the markdown library)\r
406                                 boolean urlReplaced;\r
407 \r
408                                 do {\r
409                                         urlReplaced = false;\r
410                                         Matcher m = githubURLDetection.matcher(line);\r
411                                         while (m.find()) {\r
412                                                 // Ignore the URL following the format <link>\r
413                                                 if ((m.start() - 1 >= 0) && (m.end() < line.length()) &&\r
414                                                         (line.charAt(m.start() - 1) == '<') &&\r
415                                                         (line.charAt(m.end()) == '>'))\r
416                                                 {\r
417                                                         continue;\r
418                                                 }\r
419         \r
420                                                 // Ignore the URL following the format [description](link)\r
421                                                 if ((m.start() - 2 >= 0) && (m.end() < line.length()) &&\r
422                                                         (line.charAt(m.start() - 2) == ']') &&\r
423                                                         (line.charAt(m.start() - 1) == '(') &&\r
424                                                         (line.charAt(m.end()) == ')'))\r
425                                                 {\r
426                                                         continue;\r
427                                                 }\r
428         \r
429                                                 // Ignore the URL following the format [description](link "title")\r
430                                                 if ((m.start() - 2 >= 0) && (m.end() + 1 < line.length()) &&\r
431                                                         (line.charAt(m.start() - 2) == ']') &&\r
432                                                         (line.charAt(m.start() - 1) == '(') &&\r
433                                                         (line.charAt(m.end()) == ' ') &&\r
434                                                         (line.charAt(m.end() + 1) == '"'))\r
435                                                 {\r
436                                                         continue;\r
437                                                 }\r
438                                                 \r
439                                                 if (m.start() - 1 >= 0) {\r
440                                                         // Case when the link is at the beginning of the string\r
441                                                         line = line.substring(0, m.start()) + "<" + m.group(0) + ">" + line.substring(m.end());\r
442                                                 } else {\r
443                                                         line = "<" + m.group(0) + ">" + line.substring(m.end());\r
444                                                 }\r
445                                                 \r
446                                                 // We replaced the string in the array\r
447                                                 lines.set(lineNum, line);\r
448                                                 urlReplaced = true;\r
449                                                 break;\r
450                                         }\r
451                                 } while (urlReplaced);\r
452                         }\r
453                 }\r
454         }\r
455 \r
456         /**\r
457          * @param line\r
458          * @param c\r
459          * @return true if line is just cs (and whitespace at the start/end)\r
460          */\r
461         boolean just(String line, char c) {\r
462                 return line.matches("\\s*"+c+"+\\s*");\r
463         }\r
464 \r
465         /**\r
466          * @param line\r
467          * @return The number of # symbols prepending the line.\r
468          */\r
469         private int numHash(String line) {\r
470                 for (int i = 0; i < line.length(); i++) {\r
471                         if (line.charAt(i) != '#')\r
472                                 return i;\r
473                 }\r
474                 return line.length();\r
475         }\r
476 \r
477         /**\r
478          * \r
479          * @param parent\r
480          *            Can be null for top-level\r
481          * @return List of sub-headers. Never null. FIXME handle documents which\r
482          *         have a 2nd level header before any 1st level ones\r
483          */\r
484         public List<Header> getHeadings(Header parent) {\r
485                 if (parent == null) {\r
486                         return Collections.unmodifiableList(level1Headers);\r
487                 }\r
488                 return Collections.unmodifiableList(parent.subHeaders);\r
489         }\r
490 \r
491         // public WebPage getWebPage() {\r
492         // WebPage page = new WebPage();\r
493         // // Add the lines, one by one\r
494         // boolean inParagraph = false;\r
495         // for (int i=0; i<lines.size(); i++) {\r
496         // String line = lines.get(i);\r
497         // KLineType type = lineTypes.get(i);\r
498         // switch(type) {\r
499         // // Heading?\r
500         // case H1: case H2: case H3:\r
501         // case H4: case H5: case H6:\r
502         // if (inParagraph) page.addText("</p>");\r
503         // line = cleanHeader(line);\r
504         // page.addText("<"+type+">"+line+"</"+type+">");\r
505         // continue;\r
506         // case MARKER: // Ignore\r
507         // continue;\r
508         // // TODO List?\r
509         // // TODO Block quote?\r
510         // }\r
511         // // Paragraph end?\r
512         // if (Utils.isBlank(line)) {\r
513         // if (inParagraph) page.addText("</p>");\r
514         // continue;\r
515         // }\r
516         // // Paragraph start?\r
517         // if (!inParagraph) {\r
518         // page.addText("<p>");\r
519         // inParagraph = true;\r
520         // }\r
521         // // Plain text\r
522         // page.addText(line);\r
523         // }\r
524         // return page;\r
525         // }\r
526 \r
527         /**\r
528          * Get the HTML for this page. Uses the MarkdownJ project.\r
529          */\r
530         public String html() {\r
531                 // Section numbers??\r
532                 boolean sectionNumbers = pStore\r
533                                 .getBoolean(MarkdownPreferencePage.PREF_SECTION_NUMBERS);\r
534                 // Chop out multi-markdown header\r
535                 StringBuilder sb = new StringBuilder();\r
536                 assert lines.size() == lineTypes.size();\r
537                 for (int i = 0, n = lines.size(); i < n; i++) {\r
538                         KLineType type = lineTypes.get(i);\r
539                         if (type == KLineType.META)\r
540                                 continue;\r
541                         String line = lines.get(i);\r
542                         if (sectionNumbers && isHeader(type) && line.contains("$section")) {\r
543                                 // TODO Header section = headers.get(i);\r
544                                 // String secNum = section.getSectionNumber();\r
545                                 // line.replace("$section", secNum);\r
546                         }\r
547                         sb.append(line);\r
548                 }\r
549                 String text = sb.toString();\r
550                 // Use external converter?\r
551                 final String cmd = pStore\r
552                                 .getString(MarkdownPreferencePage.PREF_MARKDOWN_COMMAND);\r
553                 if (Utils.isBlank(cmd)\r
554                                 || (cmd.startsWith("(") && cmd.contains("MarkdownJ"))) {\r
555                         // Use MarkdownJ\r
556                         MarkdownProcessor markdown = new MarkdownProcessor();\r
557                         // MarkdownJ doesn't convert £s for some reason\r
558                         text = text.replace("£", "&pound;");\r
559                         String html = markdown.markdown(text);\r
560                         return html;\r
561                 }\r
562                 // Attempt to run external command\r
563                 try {\r
564                         final File md = File.createTempFile("tmp", ".md");\r
565                         FileUtils.write(md, text);\r
566                         Process process = new Process(cmd+" "+md.getAbsolutePath());\r
567                         process.run();\r
568                         int ok = process.waitFor(10000);\r
569                         if (ok != 0) throw new FailureException(cmd+" failed:\n"+process.getError());\r
570                         String html = process.getOutput();\r
571                         FileUtils.delete(md);\r
572                         return html;\r
573                 } catch (Exception e) {\r
574                         throw Utils.runtime(e);\r
575                 }\r
576         }\r
577 \r
578         /**\r
579          * @param type\r
580          * @return\r
581          */\r
582         private boolean isHeader(KLineType type) {\r
583                 return type == KLineType.H1 || type == KLineType.H2\r
584                                 || type == KLineType.H3 || type == KLineType.H4\r
585                                 || type == KLineType.H5 || type == KLineType.H6;\r
586         }\r
587 \r
588         /**\r
589          * Return the raw text of this page.\r
590          */\r
591         @Override\r
592         public String toString() {\r
593                 StringBuilder sb = new StringBuilder();\r
594                 for (String line : lines) {\r
595                         sb.append(line);\r
596                 }\r
597                 return sb.toString();\r
598         }\r
599 \r
600         /**\r
601          * Line type information for the raw text.\r
602          * \r
603          * @return\r
604          */\r
605         public List<KLineType> getLineTypes() {\r
606                 return Collections.unmodifiableList(lineTypes);\r
607         }\r
608 \r
609         /**\r
610          * @param line\r
611          * @return\r
612          */\r
613         public Object getPageObject(int line) {         \r
614                 return pageObjects.get(line);\r
615         }\r
616 \r
617 }\r