/**
* Copyright winterwell Mathematics Ltd.
* @author Daniel Winterstein
* 11 Jan 2007
*/
package winterwell.markdown.pagemodel;
import java.io.File;
import java.util.ArrayList;
import java.util.Collections;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import org.eclipse.jface.preference.IPreferenceStore;
import winterwell.markdown.Activator;
import winterwell.markdown.StringMethods;
import winterwell.markdown.preferences.MarkdownPreferencePage;
import winterwell.utils.FailureException;
import winterwell.utils.Process;
import winterwell.utils.StrUtils;
import winterwell.utils.Utils;
import winterwell.utils.io.FileUtils;
import com.petebevin.markdown.MarkdownProcessor;
/**
* Understands Markdown syntax.
*
* @author Daniel Winterstein
*/
public class MarkdownPage {
/**
* Strip leading and trailing #s and whitespace
*
* @param line
* @return cleaned up line
*/
private String cleanHeader(String line) {
for (int j = 0; j < line.length(); j++) {
char c = line.charAt(j);
if (c != '#' && !Character.isWhitespace(c)) {
line = line.substring(j);
break;
}
}
for (int j = line.length() - 1; j > 0; j--) {
char c = line.charAt(j);
if (c != '#' && !Character.isWhitespace(c)) {
line = line.substring(0, j + 1);
break;
}
}
return line;
}
/**
* Represents information about a section header. E.g. ## Misc Warblings
*
* @author daniel
*/
public class Header {
/**
* 1 = top-level (i.e. #), 2= 2nd-level (i.e. ##), etc.
*/
final int level;
/**
* The text of the Header
*/
final String heading;
/**
* Sub-sections, if any
*/
final List subHeaders = new ArrayList();
/**
* The line on which this header occurs.
*/
final int lineNumber;
public int getLineNumber() {
return lineNumber;
}
/**
*
* @return the next section (at this depth if possible), null if none
*/
public Header getNext() {
if (parent == null) {
int ti = level1Headers.indexOf(this);
if (ti == -1 || ti == level1Headers.size() - 1)
return null;
return level1Headers.get(ti + 1);
}
int i = parent.subHeaders.indexOf(this);
assert i != -1 : this;
if (i == parent.subHeaders.size() - 1)
return parent.getNext();
return parent.subHeaders.get(i + 1);
}
/**
*
* @return the next section (at this depth if possible), null if none
*/
public Header getPrevious() {
if (parent == null) {
int ti = level1Headers.indexOf(this);
if (ti == -1 || ti == 0)
return null;
return level1Headers.get(ti - 1);
}
int i = parent.subHeaders.indexOf(this);
assert i != -1 : this;
if (i == 0)
return parent.getPrevious();
return parent.subHeaders.get(i - 1);
}
/**
* The parent section. Can be null.
*/
private Header parent;
/**
* Create a marker for a section Header
*
* @param level
* 1 = top-level (i.e. #), 2= 2nd-level (i.e. ##), etc.
* @param lineNumber
* The line on which this header occurs
* @param heading
* The text of the Header, trimmed of #s
* @param currentHeader
* The previous Header. This is used to find the parent
* section if there is one. Can be null.
*/
Header(int level, int lineNumber, String heading, Header currentHeader) {
this.lineNumber = lineNumber;
this.level = level;
this.heading = cleanHeader(heading);
// Heading Tree
setParent(currentHeader);
}
private void setParent(Header currentHeader) {
if (currentHeader == null) {
parent = null;
return;
}
if (currentHeader.level < level) {
parent = currentHeader;
parent.subHeaders.add(this);
return;
}
setParent(currentHeader.parent);
}
public Header getParent() {
return parent;
}
/**
* Sub-sections. May be zero-length, never null.
*/
public List getSubHeaders() {
return subHeaders;
}
@Override
public String toString() {
return heading;
}
public int getLevel() {
return level;
}
}
/**
* The raw text, broken up into individual lines.
*/
private List lines;
/**
* The raw text, broken up into individual lines.
*/
public List getText() {
return Collections.unmodifiableList(lines);
}
public enum KLineType {
NORMAL, H1, H2, H3, H4, H5, H6, BLANK,
// TODO LIST, BLOCKQUOTE,
/** A line marking Markdown info about the preceding line, e.g. ====== */
MARKER,
/** A line containing meta-data, e.g. title: My Page */
META
}
/**
* Information about each line.
*/
private List lineTypes;
private Map pageObjects = new HashMap();
// TODO meta-data, footnotes, tables, link & image attributes
private static Pattern multiMarkdownTag = Pattern.compile("^([\\w].*):(.*)");
private Map multiMarkdownTags = new HashMap();
// Regular expression for Github support
private static Pattern githubURLDetection = Pattern.compile("((https?|ftp|file)://[-a-zA-Z0-9+&@#/%?=~_|!:,.;]*[-a-zA-Z0-9+&@#/%=~_|])");
/**
* The top-level headers. FIXME handle documents which have a 2nd level
* header before any 1st level ones
*/
private final List level1Headers = new ArrayList();
private final IPreferenceStore pStore;
/**
* Create a page.
*
* @param text
*/
public MarkdownPage(String text) {
pStore = Activator.getDefault().getPreferenceStore();
setText(text);
}
/**
* Reset the text for this page.
*
* @param text
*/
private void setText(String text) {
// Get lines
lines = StringMethods.splitLines(text);
// Clean out old
level1Headers.clear();
lineTypes = new ArrayList(lines.size());
pageObjects.clear();
// Dummy level-1 header in case there are none
Header dummyTopHeader = new Header(1, 0, "", null);
level1Headers.add(dummyTopHeader);
Header currentHeader = dummyTopHeader;
// Identify line types
int lineNum = 0;
// Check if we should support the Multi-Markdown Metadata
boolean multiMarkdownMetadataSupport =
pStore.getBoolean(MarkdownPreferencePage.PREF_MULTIMARKDOWN_METADATA);
// Multi-markdown header
if (multiMarkdownMetadataSupport) {
// The key is the text before the colon, and the data is the text
// after the
// colon. In the above example, notice that there are two lines of
// information
// for the Author key. If you end a line with “space-space-newline”,
// the newline
// will be included when converted to other formats.
//
// There must not be any whitespace above the metadata, and the
// metadata block
// ends with the first whitespace only line. The metadata is
// stripped from the
// document before it is passed on to the syntax parser.
//
// Check if the Metdatas are valid
//
boolean validMetadata = true;
for (lineNum = 0; lineNum < lines.size(); lineNum++) {
String line = lines.get(lineNum);
if (Utils.isBlank(line)) {
break;
}
Matcher m = multiMarkdownTag.matcher(line);
if (!m.find()) {
if (lineNum == 0) {
// No MultiMarkdown metadata
validMetadata = false;
break;
} else if (!line.matches("^\\s.*\n")) {
// The next line was not intended (ie. it does not start
// with a whitespace)
validMetadata = false;
break;
}
}
}
// Valid Metadatas have been found. We need to retrieve these keys/values.
if (validMetadata) {
String data = "";
String tag = "";
for (lineNum = 0; lineNum < lines.size(); lineNum++) {
String line = lines.get(lineNum);
if (Utils.isBlank(line)) {
break;
}
Matcher m = multiMarkdownTag.matcher(line);
if (!m.find()) {
if (lineNum == 0) {
break;
}
// Multi-line tag
lineTypes.add(KLineType.META);
data += StrUtils.LINEEND + line.trim();
multiMarkdownTags.put(tag, data);
} else {
lineTypes.add(KLineType.META);
tag = m.group(0);
data = m.group(1).trim();
if (m.group(1).endsWith(line))
multiMarkdownTags.put(tag, data);
}
}
} else {
lineNum = 0;
}
}
for (; lineNum < lines.size(); lineNum++) {
String line = lines.get(lineNum);
// Headings
int h = numHash(line);
String hLine = line;
int hLineNum = lineNum;
int underline = -1;
if (lineNum != 0) {
underline = just(line, '=') ? 1 : just(line, '-') ? 2 : -1;
}
if (underline != -1) {
h = underline;
hLineNum = lineNum - 1;
hLine = lines.get(lineNum - 1);
lineTypes.set(hLineNum, KLineType.values()[h]);
lineTypes.add(KLineType.MARKER);
}
// Create a Header object
if (h > 0) {
if (underline == -1)
lineTypes.add(KLineType.values()[h]);
Header header = new Header(h, hLineNum, hLine, currentHeader);
if (h == 1) {
level1Headers.add(header);
}
pageObjects.put(hLineNum, header);
currentHeader = header;
continue;
}
// TODO List
// TODO Block quote
// Blank line
if (Utils.isBlank(line)) {
lineTypes.add(KLineType.BLANK);
continue;
}
// Normal
lineTypes.add(KLineType.NORMAL);
} // end line-loop
// Remove dummy header?
if (dummyTopHeader.getSubHeaders().size() == 0) {
level1Headers.remove(dummyTopHeader);
}
boolean githubSyntaxSupport =
pStore.getBoolean(MarkdownPreferencePage.PREF_GITHUB_SYNTAX);
if (githubSyntaxSupport) {
/*
* Support Code block
*/
boolean inCodeBlock = false;
for (lineNum = 0; lineNum < lines.size(); lineNum++) {
String line = lines.get(lineNum);
// Found the start or end of a code block
if (line.matches("^```.*\n")) {
// We reverse the boolean value
inCodeBlock = !inCodeBlock;
// We force the line to be blank. But we mark it as normal
// to prevent to be stripped
lines.set(lineNum, "\n");
lineTypes.set(lineNum, KLineType.NORMAL);
continue;
}
if (inCodeBlock) {
lines.set(lineNum, " " + line);
}
}
/*
* Support for URL Detection
* We search for links that are not captured by Markdown syntax
*/
for (lineNum = 0; lineNum < lines.size(); lineNum++) {
String line = lines.get(lineNum);
// When a link has been replaced we need to scan again the string
// as the offsets have changed (we add '<' and '>' to the link to
// be interpreted by the markdown library)
boolean urlReplaced;
do {
urlReplaced = false;
Matcher m = githubURLDetection.matcher(line);
while (m.find()) {
// Ignore the URL following the format
if ((m.start() - 1 >= 0) && (m.end() < line.length()) &&
(line.charAt(m.start() - 1) == '<') &&
(line.charAt(m.end()) == '>'))
{
continue;
}
// Ignore the URL following the format [description](link)
if ((m.start() - 2 >= 0) && (m.end() < line.length()) &&
(line.charAt(m.start() - 2) == ']') &&
(line.charAt(m.start() - 1) == '(') &&
(line.charAt(m.end()) == ')'))
{
continue;
}
// Ignore the URL following the format [description](link "title")
if ((m.start() - 2 >= 0) && (m.end() + 1 < line.length()) &&
(line.charAt(m.start() - 2) == ']') &&
(line.charAt(m.start() - 1) == '(') &&
(line.charAt(m.end()) == ' ') &&
(line.charAt(m.end() + 1) == '"'))
{
continue;
}
if (m.start() - 1 >= 0) {
// Case when the link is at the beginning of the string
line = line.substring(0, m.start()) + "<" + m.group(0) + ">" + line.substring(m.end());
} else {
line = "<" + m.group(0) + ">" + line.substring(m.end());
}
// We replaced the string in the array
lines.set(lineNum, line);
urlReplaced = true;
break;
}
} while (urlReplaced);
}
}
}
/**
* @param line
* @param c
* @return true if line is just cs (and whitespace at the start/end)
*/
boolean just(String line, char c) {
return line.matches("\\s*"+c+"+\\s*");
}
/**
* @param line
* @return The number of # symbols prepending the line.
*/
private int numHash(String line) {
for (int i = 0; i < line.length(); i++) {
if (line.charAt(i) != '#')
return i;
}
return line.length();
}
/**
*
* @param parent
* Can be null for top-level
* @return List of sub-headers. Never null. FIXME handle documents which
* have a 2nd level header before any 1st level ones
*/
public List getHeadings(Header parent) {
if (parent == null) {
return Collections.unmodifiableList(level1Headers);
}
return Collections.unmodifiableList(parent.subHeaders);
}
// public WebPage getWebPage() {
// WebPage page = new WebPage();
// // Add the lines, one by one
// boolean inParagraph = false;
// for (int i=0; i");
// line = cleanHeader(line);
// page.addText("<"+type+">"+line+""+type+">");
// continue;
// case MARKER: // Ignore
// continue;
// // TODO List?
// // TODO Block quote?
// }
// // Paragraph end?
// if (Utils.isBlank(line)) {
// if (inParagraph) page.addText("
");
// inParagraph = true;
// }
// // Plain text
// page.addText(line);
// }
// return page;
// }
/**
* Get the HTML for this page. Uses the MarkdownJ project.
*/
public String html() {
// Section numbers??
boolean sectionNumbers = pStore
.getBoolean(MarkdownPreferencePage.PREF_SECTION_NUMBERS);
// Chop out multi-markdown header
StringBuilder sb = new StringBuilder();
assert lines.size() == lineTypes.size();
for (int i = 0, n = lines.size(); i < n; i++) {
KLineType type = lineTypes.get(i);
if (type == KLineType.META)
continue;
String line = lines.get(i);
if (sectionNumbers && isHeader(type) && line.contains("$section")) {
// TODO Header section = headers.get(i);
// String secNum = section.getSectionNumber();
// line.replace("$section", secNum);
}
sb.append(line);
}
String text = sb.toString();
// Use external converter?
final String cmd = pStore
.getString(MarkdownPreferencePage.PREF_MARKDOWN_COMMAND);
if (Utils.isBlank(cmd)
|| (cmd.startsWith("(") && cmd.contains("MarkdownJ"))) {
// Use MarkdownJ
MarkdownProcessor markdown = new MarkdownProcessor();
// MarkdownJ doesn't convert £s for some reason
text = text.replace("£", "£");
String html = markdown.markdown(text);
return html;
}
// Attempt to run external command
try {
final File md = File.createTempFile("tmp", ".md");
FileUtils.write(md, text);
Process process = new Process(cmd+" "+md.getAbsolutePath());
process.run();
int ok = process.waitFor(10000);
if (ok != 0) throw new FailureException(cmd+" failed:\n"+process.getError());
String html = process.getOutput();
FileUtils.delete(md);
return html;
} catch (Exception e) {
throw Utils.runtime(e);
}
}
/**
* @param type
* @return
*/
private boolean isHeader(KLineType type) {
return type == KLineType.H1 || type == KLineType.H2
|| type == KLineType.H3 || type == KLineType.H4
|| type == KLineType.H5 || type == KLineType.H6;
}
/**
* Return the raw text of this page.
*/
@Override
public String toString() {
StringBuilder sb = new StringBuilder();
for (String line : lines) {
sb.append(line);
}
return sb.toString();
}
/**
* Line type information for the raw text.
*
* @return
*/
public List getLineTypes() {
return Collections.unmodifiableList(lineTypes);
}
/**
* @param line
* @return
*/
public Object getPageObject(int line) {
return pageObjects.get(line);
}
}