package winterwell.markdown.pagemodel; import java.util.List; import winterwell.utils.StrUtils; /** * Formats a string that is compatible with the Markdown syntax. * Strings must not include headers. * * @author Howard Abrams */ public class MarkdownFormatter { // Expect everyone to simply use the public static methods... private MarkdownFormatter () { } /** * Formats a collection of lines to a particular width and honors typical * Markdown syntax and formatting. * * The method assumes that if the first line ends with a line * termination character, all the other lines will as well. * * @param lines A list of strings that should be formatted and wrapped. * @param lineWidth The width of the page * @return A string containing each */ public static String format (List lines, int lineWidth) { if (lines == null) return null; // Should we return an empty string? final String lineEndings; if ( lines.get(0).endsWith ("\r\n") ) lineEndings = "\r\n"; else if ( lines.get(0).endsWith ("\r") ) lineEndings = "\r"; else lineEndings = StrUtils.LINEEND; final StringBuilder buf = new StringBuilder(); for (String line : lines) { buf.append (line); buf.append (' '); // We can add extra spaces with impunity, and this // makes sure our lines don't run together. } return format ( buf.toString(), lineWidth, lineEndings ); } /** * Formats a string of text. The formatting does line wrapping at the * lineWidth boundary, but it also honors the formatting * of initial paragraph lines, allowing indentation of the entire * paragraph. * * @param text The line of text to format * @param lineWidth The width of the lines * @return A string containing the formatted text. */ public static String format ( final String text, final int lineWidth) { return format(text, lineWidth, StrUtils.LINEEND); } /** * Formats a string of text. The formatting does line wrapping at the * lineWidth boundary, but it also honors the formatting * of initial paragraph lines, allowing indentation of the entire * paragraph. * * @param text The line of text to format * @param lineWidth The width of the lines * @param lineEnding The line ending that overrides the default System value * @return A string containing the formatted text. */ public static String format (final String text, final int lineWidth, final String lineEnding) { return new String( format(text.toCharArray (), lineWidth, lineEnding)); } /** * The available cursor position states as it sits in the buffer. */ private enum StatePosition { /** The beginning of a paragraph ... the start of the buffer */ BEGIN_FIRST_LINE, /** The beginning of the next line, which may be completely ignored. */ BEGIN_OTHER_LINE, /** The beginning of a new line that will not be ignored, but appended. */ BEGIN_NEW_LINE, /** The middle of a line. */ MIDDLE_OF_LINE } /** * The method that does the work of formatting a string of text. The text, * however, is a character array, which is more efficient to work with. * * TODO: Should we make the format(char[]) method public? * * @param text The line of text to format * @param lineWidth The width of the lines * @param lineEnding The line ending that overrides the default System value * @return A string containing the formatted text. */ static char[] format ( final char[] text, final int lineWidth, final String lineEnding ) { final StringBuilder word = new StringBuilder(); final StringBuilder indent = new StringBuilder(); final StringBuilder buffer = new StringBuilder(text.length + 10); StatePosition state = StatePosition.BEGIN_FIRST_LINE; int lineLength = 0; // There are times when we will run across a character(s) that will // cause us to stop doing word wrap until we get to the // "end of non-wordwrap" character(s). // // If this string is set to null, it tells us to "do" word-wrapping. char endWordwrap1 = 0; char endWordwrap2 = 0; // We loop one character past the end of the loop, and when we get to // this position, we assign 'c' to be 0 ... as a marker for the end of // the string... for (int i = 0; i <= text.length; i++) { final char c; if (i < text.length) c = text[i]; else c = 0; final char nextChar; if (i+1 < text.length) nextChar = text[i+1]; else nextChar = 0; // Are we actually word-wrapping? if (endWordwrap1 != 0) { // Did we get the ending sequence of the non-word-wrap? if ( ( endWordwrap2 == 0 && c == endWordwrap1 ) || ( c == endWordwrap1 && nextChar == endWordwrap2 ) ) endWordwrap1 = 0; buffer.append (c); lineLength++; if (endWordwrap1 == 0 && endWordwrap2 != 0) { buffer.append (nextChar); lineLength++; i++; } continue; } // Check to see if we got one of our special non-word-wrapping // character sequences ... if ( c == '[' ) { // [Hyperlink] endWordwrap1 = ']'; } else if ( c == '*' && nextChar == '*' ) { // **Bold** endWordwrap1 = '*'; endWordwrap2 = '*'; } // *Italics* else if ( c == '*' && state == StatePosition.MIDDLE_OF_LINE ) { endWordwrap1 = '*'; } else if ( c == '`' ) { // `code` endWordwrap1 = '`'; } else if ( c == '(' && nextChar == '(' ) { // ((Footnote)) endWordwrap1 = ')'; endWordwrap2 = ')'; } else if ( c == '!' && nextChar == '[' ) { // ![Image] endWordwrap1 = ')'; } // We are no longer doing word-wrapping, so tidy the situation up... if (endWordwrap1 != 0) { if (word.length() > 0) lineLength = addWordToBuffer (lineWidth, lineEnding, word, indent, buffer, lineLength); else if (buffer.length() > 0 && buffer.charAt (buffer.length()-1) != ']' ) buffer.append(' '); // We are adding an extra space for most situations, unless we get a // [link][ref] where we want them to be together without a space. buffer.append (c); lineLength++; continue; } // Normal word-wrapping processing continues ... if (state == StatePosition.BEGIN_FIRST_LINE) { if ( c == '\n' || c == '\r' ) { // Keep, but ignore initial line feeds buffer.append (c); lineLength = 0; continue; } if (Character.isWhitespace (c)) indent.append (c); else if ( (c == '*' || c == '-' || c == '.' ) && Character.isWhitespace (nextChar) ) indent.append (' '); else if ( Character.isDigit (c) && nextChar == '.' && Character.isWhitespace (text[i+2])) indent.append (' '); else if ( c == '>' ) indent.append ('>'); else state = StatePosition.MIDDLE_OF_LINE; // If we are still in the initial state, then put 'er in... if (state == StatePosition.BEGIN_FIRST_LINE) { buffer.append (c); lineLength++; } } // While it would be more accurate to explicitely state the range of // possibilities, with something like: // EnumSet.range (StatePosition.BEGIN_OTHER_LINE, StatePosition.MIDDLE_OF_LINE ).contains (state) // We know that what is left is just the BEGIN_FIRST_LINE ... if ( state != StatePosition.BEGIN_FIRST_LINE ) { // If not the middle of the line, then it must be at the first of a line // Either BEGIN_OTHER_LINE or BEGIN_NEW_LINE if (state != StatePosition.MIDDLE_OF_LINE) { if ( Character.isWhitespace(c) || c == '>' || c == '.' ) word.append (c); else if ( ( ( c == '*' || c == '-' ) && Character.isWhitespace (nextChar) ) || ( Character.isDigit(c) && nextChar == '.' && Character.isWhitespace( text[i+2] ) ) ) { word.append (c); state = StatePosition.BEGIN_NEW_LINE; } else { if (state == StatePosition.BEGIN_NEW_LINE) { buffer.append (word); lineLength = word.substring ( word.indexOf("\n")+1 ).length(); } word.setLength (0); state = StatePosition.MIDDLE_OF_LINE; } } if (state == StatePosition.MIDDLE_OF_LINE) { // Are we at the end of a word? Then we need to calculate whether // to wrap the line or not. // // This condition does double duty, in that is also serves to // ignore multiple spaces and special characters that may be at // the beginning of the line. if ( Character.isWhitespace(c) || c == 0 ) { if ( word.length() > 0) { lineLength = addWordToBuffer (lineWidth, lineEnding, word, indent, buffer, lineLength); } // Do we we two spaces at the end of the line? Honor this... else if ( c == ' ' && ( nextChar == '\r' || nextChar == '\n' ) && state != StatePosition.BEGIN_OTHER_LINE ) { buffer.append (" "); buffer.append (lineEnding); lineLength = 0; } if ( c == '\r' || c == '\n' ) { state = StatePosition.BEGIN_OTHER_LINE; word.append(c); } // Linefeeds are completely ignored and just treated as whitespace, // unless, of course, there are two of 'em... and of course, end of // lines are simply evil on Windows machines. if ( (c == '\n' && nextChar == '\n') || // Unix-style line-ends ( c == '\r' && nextChar == '\n' && // Windows-style line-ends text[i+2] == '\r' && text[i+3] == '\n' ) ) { state = StatePosition.BEGIN_FIRST_LINE; word.setLength(0); indent.setLength (0); lineLength = 0; if (c == '\r') { // If we are dealing with Windows-style line-ends, i++; // we need to skip past the next character... buffer.append("\r\n"); } else buffer.append(c); } } else { word.append (c); state = StatePosition.MIDDLE_OF_LINE; } } } } return buffer.toString().toCharArray(); } /** * Adds a word to the buffer, performing word wrap if necessary. * @param lineWidth The current width of the line * @param lineEnding The line ending to append, if necessary * @param word The word to append * @param indent The indentation string to insert, if necesary * @param buffer The buffer to perform all this stuff to * @param lineLength The current length of the current line * @return The new length of the current line */ private static int addWordToBuffer (final int lineWidth, final String lineEnding, final StringBuilder word, final StringBuilder indent, final StringBuilder buffer, int lineLength) { if ( word.length() + lineLength + 1 > lineWidth ) { buffer.append (lineEnding); buffer.append (indent); buffer.append (word); lineLength = indent.length() + word.length(); } else { if ( lineLength > indent.length() ) buffer.append (' '); buffer.append (word); lineLength += word.length() + 1; } word.setLength (0); return lineLength; } }