]> gerrit.simantics Code Review - simantics/platform.git/blob - bundles/winterwell.markdown/src/winterwell/markdown/pagemodel/MarkdownPage.java
Fixed all line endings of the repository
[simantics/platform.git] / bundles / winterwell.markdown / src / winterwell / markdown / pagemodel / MarkdownPage.java
1 /**
2  * Copyright winterwell Mathematics Ltd.
3  * @author Daniel Winterstein
4  * 11 Jan 2007
5  */
6 package winterwell.markdown.pagemodel;
7
8 import java.io.File;
9 import java.util.ArrayList;
10 import java.util.Collections;
11 import java.util.HashMap;
12 import java.util.List;
13 import java.util.Map;
14 import java.util.regex.Matcher;
15 import java.util.regex.Pattern;
16
17 import org.eclipse.jface.preference.IPreferenceStore;
18
19 import winterwell.markdown.Activator;
20 import winterwell.markdown.StringMethods;
21 import winterwell.markdown.preferences.MarkdownPreferencePage;
22 import winterwell.utils.FailureException;
23 import winterwell.utils.Process;
24 import winterwell.utils.StrUtils;
25 import winterwell.utils.Utils;
26 import winterwell.utils.io.FileUtils;
27
28 import com.petebevin.markdown.MarkdownProcessor;
29
30 /**
31  * Understands Markdown syntax.
32  * 
33  * @author Daniel Winterstein
34  */
35 public class MarkdownPage {
36
37         /**
38          * Strip leading and trailing #s and whitespace
39          * 
40          * @param line
41          * @return cleaned up line
42          */
43         private String cleanHeader(String line) {
44                 for (int j = 0; j < line.length(); j++) {
45                         char c = line.charAt(j);
46                         if (c != '#' && !Character.isWhitespace(c)) {
47                                 line = line.substring(j);
48                                 break;
49                         }
50                 }
51                 for (int j = line.length() - 1; j > 0; j--) {
52                         char c = line.charAt(j);
53                         if (c != '#' && !Character.isWhitespace(c)) {
54                                 line = line.substring(0, j + 1);
55                                 break;
56                         }
57                 }
58                 return line;
59         }
60
61         /**
62          * Represents information about a section header. E.g. ## Misc Warblings
63          * 
64          * @author daniel
65          */
66         public class Header {
67                 /**
68                  * 1 = top-level (i.e. #), 2= 2nd-level (i.e. ##), etc.
69                  */
70                 final int level;
71                 /**
72                  * The text of the Header
73                  */
74                 final String heading;
75                 /**
76                  * Sub-sections, if any
77                  */
78                 final List<Header> subHeaders = new ArrayList<Header>();
79                 /**
80                  * The line on which this header occurs.
81                  */
82                 final int lineNumber;
83
84                 public int getLineNumber() {
85                         return lineNumber;
86                 }
87
88                 /**
89                  * 
90                  * @return the next section (at this depth if possible), null if none
91                  */
92                 public Header getNext() {
93                         if (parent == null) {
94                                 int ti = level1Headers.indexOf(this);
95                                 if (ti == -1 || ti == level1Headers.size() - 1)
96                                         return null;
97                                 return level1Headers.get(ti + 1);
98                         }
99                         int i = parent.subHeaders.indexOf(this);
100                         assert i != -1 : this;
101                         if (i == parent.subHeaders.size() - 1)
102                                 return parent.getNext();
103                         return parent.subHeaders.get(i + 1);
104                 }
105                 /**
106                  * 
107                  * @return the next section (at this depth if possible), null if none
108                  */
109                 public Header getPrevious() {
110                         if (parent == null) {
111                                 int ti = level1Headers.indexOf(this);
112                                 if (ti == -1 || ti == 0)
113                                         return null;
114                                 return level1Headers.get(ti - 1);
115                         }
116                         int i = parent.subHeaders.indexOf(this);
117                         assert i != -1 : this;
118                         if (i == 0)
119                                 return parent.getPrevious();
120                         return parent.subHeaders.get(i - 1);
121                 }
122                 
123
124                 /**
125                  * The parent section. Can be null.
126                  */
127                 private Header parent;
128
129                 /**
130                  * Create a marker for a section Header
131                  * 
132                  * @param level
133                  *            1 = top-level (i.e. #), 2= 2nd-level (i.e. ##), etc.
134                  * @param lineNumber
135                  *            The line on which this header occurs
136                  * @param heading
137                  *            The text of the Header, trimmed of #s
138                  * @param currentHeader
139                  *            The previous Header. This is used to find the parent
140                  *            section if there is one. Can be null.
141                  */
142                 Header(int level, int lineNumber, String heading, Header currentHeader) {
143                         this.lineNumber = lineNumber;
144                         this.level = level;
145                         this.heading = cleanHeader(heading);
146                         // Heading Tree
147                         setParent(currentHeader);
148                 }
149
150                 private void setParent(Header currentHeader) {
151                         if (currentHeader == null) {
152                                 parent = null;
153                                 return;
154                         }
155                         if (currentHeader.level < level) {
156                                 parent = currentHeader;
157                                 parent.subHeaders.add(this);
158                                 return;
159                         }
160                         setParent(currentHeader.parent);
161                 }
162
163                 public Header getParent() {
164                         return parent;
165                 }
166
167                 /**
168                  * Sub-sections. May be zero-length, never null.
169                  */
170                 public List<Header> getSubHeaders() {
171                         return subHeaders;
172                 }
173
174                 @Override
175                 public String toString() {
176                         return heading;
177                 }
178
179                 public int getLevel() {
180                         return level;
181                 }
182         }
183
184         /**
185          * The raw text, broken up into individual lines.
186          */
187         private List<String> lines;
188
189         /**
190          * The raw text, broken up into individual lines.
191          */
192         public List<String> getText() {
193                 return Collections.unmodifiableList(lines);
194         }
195
196         public enum KLineType {
197                 NORMAL, H1, H2, H3, H4, H5, H6, BLANK,
198                 // TODO LIST, BLOCKQUOTE,
199                 /** A line marking Markdown info about the preceding line, e.g. ====== */
200                 MARKER,
201                 /** A line containing meta-data, e.g. title: My Page */
202                 META
203         }
204
205         /**
206          * Information about each line.
207          */
208         private List<KLineType> lineTypes;
209         private Map<Integer,Object> pageObjects = new HashMap<Integer, Object>();
210
211         // TODO meta-data, footnotes, tables, link & image attributes
212         private static Pattern multiMarkdownTag = Pattern.compile("^([\\w].*):(.*)");
213         private Map<String, String> multiMarkdownTags = new HashMap<String, String>();
214         
215         // Regular expression for Github support
216         private static Pattern githubURLDetection = Pattern.compile("((https?|ftp|file)://[-a-zA-Z0-9+&@#/%?=~_|!:,.;]*[-a-zA-Z0-9+&@#/%=~_|])");
217
218         /**
219          * The top-level headers. FIXME handle documents which have a 2nd level
220          * header before any 1st level ones
221          */
222         private final List<Header> level1Headers = new ArrayList<Header>();
223         private final IPreferenceStore pStore;
224
225         /**
226          * Create a page.
227          * 
228          * @param text
229          */
230         public MarkdownPage(String text) {
231                 pStore = Activator.getDefault().getPreferenceStore();
232                 setText(text);
233         }
234
235         /**
236          * Reset the text for this page.
237          * 
238          * @param text
239          */
240         private void setText(String text) {
241                 // Get lines
242                 lines = StringMethods.splitLines(text);
243                 // Clean out old
244                 level1Headers.clear();
245                 lineTypes = new ArrayList<KLineType>(lines.size());
246                 pageObjects.clear();
247                 // Dummy level-1 header in case there are none          
248                 Header dummyTopHeader = new Header(1, 0, "", null);
249                 level1Headers.add(dummyTopHeader);
250                 Header currentHeader = dummyTopHeader;          
251                 // Identify line types          
252                 int lineNum = 0;
253
254                 // Check if we should support the Multi-Markdown Metadata
255                 boolean multiMarkdownMetadataSupport =
256                                 pStore.getBoolean(MarkdownPreferencePage.PREF_MULTIMARKDOWN_METADATA);
257                 
258                 // Multi-markdown header
259                 if (multiMarkdownMetadataSupport) {
260                         // The key is the text before the colon, and the data is the text
261                         // after the
262                         // colon. In the above example, notice that there are two lines of
263                         // information
264                         // for the Author key. If you end a line with “space-space-newline”,
265                         // the newline
266                         // will be included when converted to other formats.
267                         //
268                         // There must not be any whitespace above the metadata, and the
269                         // metadata block
270                         // ends with the first whitespace only line. The metadata is
271                         // stripped from the
272                         // document before it is passed on to the syntax parser.
273                         
274                         //
275                         // Check if the Metdatas are valid
276                         //
277                         boolean validMetadata = true;
278                         for (lineNum = 0; lineNum < lines.size(); lineNum++) {
279                                 String line = lines.get(lineNum);
280                                 if (Utils.isBlank(line)) {
281                                         break;
282                                 }
283                                 Matcher m = multiMarkdownTag.matcher(line);
284                                 if (!m.find()) {
285                                         if (lineNum == 0) {
286                                                 // No MultiMarkdown metadata
287                                                 validMetadata = false;
288                                                 break;
289                                         } else if (!line.matches("^\\s.*\n")) {
290                                                 // The next line was not intended (ie. it does not start
291                                                 // with a whitespace)
292                                                 validMetadata = false;
293                                                 break;
294                                         }
295                                 }
296                         }
297                         
298                         // Valid Metadatas have been found. We need to retrieve these keys/values.
299                         if (validMetadata) {
300                                 String data = "";
301                                 String tag = "";
302                                 for (lineNum = 0; lineNum < lines.size(); lineNum++) {
303                                         String line = lines.get(lineNum);
304                                         if (Utils.isBlank(line)) {
305                                                 break;
306                                         }
307                                         Matcher m = multiMarkdownTag.matcher(line);
308                                         if (!m.find()) {
309                                                 if (lineNum == 0) {
310                                                         break;
311                                                 }
312                                                 // Multi-line tag
313                                                 lineTypes.add(KLineType.META);
314                                                 data += StrUtils.LINEEND + line.trim();
315                                                 multiMarkdownTags.put(tag, data);
316                                         } else {
317                                                 lineTypes.add(KLineType.META);
318                                                 tag = m.group(0);
319                                                 data = m.group(1).trim();
320                                                 if (m.group(1).endsWith(line))
321                                                         multiMarkdownTags.put(tag, data);
322                                         }
323                                 }
324                         } else {
325                                 lineNum = 0;
326                         }
327                 }
328                 for (; lineNum < lines.size(); lineNum++) {
329                         String line = lines.get(lineNum);
330                         // Headings
331                         int h = numHash(line);
332                         String hLine = line;
333                         int hLineNum = lineNum;
334                         int underline = -1;
335                         if (lineNum != 0) {
336                                 underline = just(line, '=') ? 1 : just(line, '-') ? 2 : -1;
337                         }
338                         if (underline != -1) {
339                                 h = underline;
340                                 hLineNum = lineNum - 1;
341                                 hLine = lines.get(lineNum - 1);
342                                 lineTypes.set(hLineNum, KLineType.values()[h]);
343                                 lineTypes.add(KLineType.MARKER);
344                         }
345                         // Create a Header object
346                         if (h > 0) {
347                                 if (underline == -1)
348                                         lineTypes.add(KLineType.values()[h]);
349                                 Header header = new Header(h, hLineNum, hLine, currentHeader);
350                                 if (h == 1) {
351                                         level1Headers.add(header);
352                                 }
353                                 pageObjects.put(hLineNum, header);
354                                 currentHeader = header;
355                                 continue;
356                         }
357                         // TODO List
358                         // TODO Block quote
359                         // Blank line
360                         if (Utils.isBlank(line)) {
361                                 lineTypes.add(KLineType.BLANK);
362                                 continue;
363                         }
364                         // Normal
365                         lineTypes.add(KLineType.NORMAL);
366                 } // end line-loop
367                 // Remove dummy header?
368                 if (dummyTopHeader.getSubHeaders().size() == 0) {
369                         level1Headers.remove(dummyTopHeader);
370                 }
371                 
372                 boolean githubSyntaxSupport =
373                                 pStore.getBoolean(MarkdownPreferencePage.PREF_GITHUB_SYNTAX);
374                 if (githubSyntaxSupport) {
375                         /*
376                          * Support Code block
377                          */
378                         boolean inCodeBlock = false;
379                         for (lineNum = 0; lineNum < lines.size(); lineNum++) {
380                                 String line = lines.get(lineNum);
381                                 // Found the start or end of a code block
382                                 if (line.matches("^```.*\n")) {
383                                         // We reverse the boolean value
384                                         inCodeBlock = !inCodeBlock;
385
386                                         // We force the line to be blank. But we mark it as normal
387                                         // to prevent to be stripped
388                                         lines.set(lineNum, "\n");
389                                         lineTypes.set(lineNum, KLineType.NORMAL);
390                                         continue;
391                                 }
392                                 if (inCodeBlock) {
393                                         lines.set(lineNum, "    " + line);
394                                 }
395                         }
396                         
397                         /*
398                          * Support for URL Detection
399                          * We search for links that are not captured by Markdown syntax
400                          */
401                         for (lineNum = 0; lineNum < lines.size(); lineNum++) {
402                                 String line = lines.get(lineNum);
403                                 // When a link has been replaced we need to scan again the string
404                                 // as the offsets have changed (we add '<' and '>' to the link to
405                                 // be interpreted by the markdown library)
406                                 boolean urlReplaced;
407
408                                 do {
409                                         urlReplaced = false;
410                                         Matcher m = githubURLDetection.matcher(line);
411                                         while (m.find()) {
412                                                 // Ignore the URL following the format <link>
413                                                 if ((m.start() - 1 >= 0) && (m.end() < line.length()) &&
414                                                         (line.charAt(m.start() - 1) == '<') &&
415                                                         (line.charAt(m.end()) == '>'))
416                                                 {
417                                                         continue;
418                                                 }
419         
420                                                 // Ignore the URL following the format [description](link)
421                                                 if ((m.start() - 2 >= 0) && (m.end() < line.length()) &&
422                                                         (line.charAt(m.start() - 2) == ']') &&
423                                                         (line.charAt(m.start() - 1) == '(') &&
424                                                         (line.charAt(m.end()) == ')'))
425                                                 {
426                                                         continue;
427                                                 }
428         
429                                                 // Ignore the URL following the format [description](link "title")
430                                                 if ((m.start() - 2 >= 0) && (m.end() + 1 < line.length()) &&
431                                                         (line.charAt(m.start() - 2) == ']') &&
432                                                         (line.charAt(m.start() - 1) == '(') &&
433                                                         (line.charAt(m.end()) == ' ') &&
434                                                         (line.charAt(m.end() + 1) == '"'))
435                                                 {
436                                                         continue;
437                                                 }
438                                                 
439                                                 if (m.start() - 1 >= 0) {
440                                                         // Case when the link is at the beginning of the string
441                                                         line = line.substring(0, m.start()) + "<" + m.group(0) + ">" + line.substring(m.end());
442                                                 } else {
443                                                         line = "<" + m.group(0) + ">" + line.substring(m.end());
444                                                 }
445                                                 
446                                                 // We replaced the string in the array
447                                                 lines.set(lineNum, line);
448                                                 urlReplaced = true;
449                                                 break;
450                                         }
451                                 } while (urlReplaced);
452                         }
453                 }
454         }
455
456         /**
457          * @param line
458          * @param c
459          * @return true if line is just cs (and whitespace at the start/end)
460          */
461         boolean just(String line, char c) {
462                 return line.matches("\\s*"+c+"+\\s*");
463         }
464
465         /**
466          * @param line
467          * @return The number of # symbols prepending the line.
468          */
469         private int numHash(String line) {
470                 for (int i = 0; i < line.length(); i++) {
471                         if (line.charAt(i) != '#')
472                                 return i;
473                 }
474                 return line.length();
475         }
476
477         /**
478          * 
479          * @param parent
480          *            Can be null for top-level
481          * @return List of sub-headers. Never null. FIXME handle documents which
482          *         have a 2nd level header before any 1st level ones
483          */
484         public List<Header> getHeadings(Header parent) {
485                 if (parent == null) {
486                         return Collections.unmodifiableList(level1Headers);
487                 }
488                 return Collections.unmodifiableList(parent.subHeaders);
489         }
490
491         // public WebPage getWebPage() {
492         // WebPage page = new WebPage();
493         // // Add the lines, one by one
494         // boolean inParagraph = false;
495         // for (int i=0; i<lines.size(); i++) {
496         // String line = lines.get(i);
497         // KLineType type = lineTypes.get(i);
498         // switch(type) {
499         // // Heading?
500         // case H1: case H2: case H3:
501         // case H4: case H5: case H6:
502         // if (inParagraph) page.addText("</p>");
503         // line = cleanHeader(line);
504         // page.addText("<"+type+">"+line+"</"+type+">");
505         // continue;
506         // case MARKER: // Ignore
507         // continue;
508         // // TODO List?
509         // // TODO Block quote?
510         // }
511         // // Paragraph end?
512         // if (Utils.isBlank(line)) {
513         // if (inParagraph) page.addText("</p>");
514         // continue;
515         // }
516         // // Paragraph start?
517         // if (!inParagraph) {
518         // page.addText("<p>");
519         // inParagraph = true;
520         // }
521         // // Plain text
522         // page.addText(line);
523         // }
524         // return page;
525         // }
526
527         /**
528          * Get the HTML for this page. Uses the MarkdownJ project.
529          */
530         public String html() {
531                 // Section numbers??
532                 boolean sectionNumbers = pStore
533                                 .getBoolean(MarkdownPreferencePage.PREF_SECTION_NUMBERS);
534                 // Chop out multi-markdown header
535                 StringBuilder sb = new StringBuilder();
536                 assert lines.size() == lineTypes.size();
537                 for (int i = 0, n = lines.size(); i < n; i++) {
538                         KLineType type = lineTypes.get(i);
539                         if (type == KLineType.META)
540                                 continue;
541                         String line = lines.get(i);
542                         if (sectionNumbers && isHeader(type) && line.contains("$section")) {
543                                 // TODO Header section = headers.get(i);
544                                 // String secNum = section.getSectionNumber();
545                                 // line.replace("$section", secNum);
546                         }
547                         sb.append(line);
548                 }
549                 String text = sb.toString();
550                 // Use external converter?
551                 final String cmd = pStore
552                                 .getString(MarkdownPreferencePage.PREF_MARKDOWN_COMMAND);
553                 if (Utils.isBlank(cmd)
554                                 || (cmd.startsWith("(") && cmd.contains("MarkdownJ"))) {
555                         // Use MarkdownJ
556                         MarkdownProcessor markdown = new MarkdownProcessor();
557                         // MarkdownJ doesn't convert £s for some reason
558                         text = text.replace("£", "&pound;");
559                         String html = markdown.markdown(text);
560                         return html;
561                 }
562                 // Attempt to run external command
563                 try {
564                         final File md = File.createTempFile("tmp", ".md");
565                         FileUtils.write(md, text);
566                         Process process = new Process(cmd+" "+md.getAbsolutePath());
567                         process.run();
568                         int ok = process.waitFor(10000);
569                         if (ok != 0) throw new FailureException(cmd+" failed:\n"+process.getError());
570                         String html = process.getOutput();
571                         FileUtils.delete(md);
572                         return html;
573                 } catch (Exception e) {
574                         throw Utils.runtime(e);
575                 }
576         }
577
578         /**
579          * @param type
580          * @return
581          */
582         private boolean isHeader(KLineType type) {
583                 return type == KLineType.H1 || type == KLineType.H2
584                                 || type == KLineType.H3 || type == KLineType.H4
585                                 || type == KLineType.H5 || type == KLineType.H6;
586         }
587
588         /**
589          * Return the raw text of this page.
590          */
591         @Override
592         public String toString() {
593                 StringBuilder sb = new StringBuilder();
594                 for (String line : lines) {
595                         sb.append(line);
596                 }
597                 return sb.toString();
598         }
599
600         /**
601          * Line type information for the raw text.
602          * 
603          * @return
604          */
605         public List<KLineType> getLineTypes() {
606                 return Collections.unmodifiableList(lineTypes);
607         }
608
609         /**
610          * @param line
611          * @return
612          */
613         public Object getPageObject(int line) {         
614                 return pageObjects.get(line);
615         }
616
617 }