2 package winterwell.markdown.pagemodel;
6 import winterwell.utils.StrUtils;
9 * Formats a string that is compatible with the Markdown syntax.
10 * Strings must not include headers.
12 * @author Howard Abrams
14 public class MarkdownFormatter
16 // Expect everyone to simply use the public static methods...
17 private MarkdownFormatter ()
22 * Formats a collection of lines to a particular width and honors typical
23 * Markdown syntax and formatting.
25 * The method <i>assumes</i> that if the first line ends with a line
26 * termination character, all the other lines will as well.
28 * @param lines A list of strings that should be formatted and wrapped.
29 * @param lineWidth The width of the page
30 * @return A string containing each
32 public static String format (List<String> lines, int lineWidth)
35 return null; // Should we return an empty string?
37 final String lineEndings;
38 if ( lines.get(0).endsWith ("\r\n") )
40 else if ( lines.get(0).endsWith ("\r") )
43 lineEndings = StrUtils.LINEEND;
45 final StringBuilder buf = new StringBuilder();
46 for (String line : lines) {
48 buf.append (' '); // We can add extra spaces with impunity, and this
49 // makes sure our lines don't run together.
51 return format ( buf.toString(), lineWidth, lineEndings );
56 * Formats a string of text. The formatting does line wrapping at the
57 * <code>lineWidth</code> boundary, but it also honors the formatting
58 * of initial paragraph lines, allowing indentation of the entire
61 * @param text The line of text to format
62 * @param lineWidth The width of the lines
63 * @return A string containing the formatted text.
65 public static String format ( final String text, final int lineWidth)
67 return format(text, lineWidth, StrUtils.LINEEND);
71 * Formats a string of text. The formatting does line wrapping at the
72 * <code>lineWidth</code> boundary, but it also honors the formatting
73 * of initial paragraph lines, allowing indentation of the entire
76 * @param text The line of text to format
77 * @param lineWidth The width of the lines
78 * @param lineEnding The line ending that overrides the default System value
79 * @return A string containing the formatted text.
81 public static String format (final String text, final int lineWidth, final String lineEnding)
83 return new String( format(text.toCharArray (), lineWidth, lineEnding));
87 * The available cursor position states as it sits in the buffer.
89 private enum StatePosition {
90 /** The beginning of a paragraph ... the start of the buffer */
93 /** The beginning of the next line, which may be completely ignored. */
96 /** The beginning of a new line that will not be ignored, but appended. */
99 /** The middle of a line. */
104 * The method that does the work of formatting a string of text. The text,
105 * however, is a character array, which is more efficient to work with.
107 * TODO: Should we make the format(char[]) method public?
109 * @param text The line of text to format
110 * @param lineWidth The width of the lines
111 * @param lineEnding The line ending that overrides the default System value
112 * @return A string containing the formatted text.
114 static char[] format ( final char[] text, final int lineWidth, final String lineEnding )
116 final StringBuilder word = new StringBuilder();
117 final StringBuilder indent = new StringBuilder();
118 final StringBuilder buffer = new StringBuilder(text.length + 10);
120 StatePosition state = StatePosition.BEGIN_FIRST_LINE;
123 // There are times when we will run across a character(s) that will
124 // cause us to stop doing word wrap until we get to the
125 // "end of non-wordwrap" character(s).
127 // If this string is set to null, it tells us to "do" word-wrapping.
128 char endWordwrap1 = 0;
129 char endWordwrap2 = 0;
131 // We loop one character past the end of the loop, and when we get to
132 // this position, we assign 'c' to be 0 ... as a marker for the end of
135 for (int i = 0; i <= text.length; i++)
144 if (i+1 < text.length)
145 nextChar = text[i+1];
149 // Are we actually word-wrapping?
150 if (endWordwrap1 != 0) {
151 // Did we get the ending sequence of the non-word-wrap?
152 if ( ( endWordwrap2 == 0 && c == endWordwrap1 ) ||
153 ( c == endWordwrap1 && nextChar == endWordwrap2 ) )
158 if (endWordwrap1 == 0 && endWordwrap2 != 0) {
159 buffer.append (nextChar);
166 // Check to see if we got one of our special non-word-wrapping
167 // character sequences ...
169 if ( c == '[' ) { // [Hyperlink]
172 else if ( c == '*' && nextChar == '*' ) { // **Bold**
176 else if ( c == '*' && state == StatePosition.MIDDLE_OF_LINE ) {
179 else if ( c == '`' ) { // `code`
182 else if ( c == '(' && nextChar == '(' ) { // ((Footnote))
186 else if ( c == '!' && nextChar == '[' ) { // ![Image]
190 // We are no longer doing word-wrapping, so tidy the situation up...
191 if (endWordwrap1 != 0) {
192 if (word.length() > 0)
193 lineLength = addWordToBuffer (lineWidth, lineEnding, word, indent, buffer, lineLength);
194 else if (buffer.length() > 0 && buffer.charAt (buffer.length()-1) != ']' )
196 // We are adding an extra space for most situations, unless we get a
197 // [link][ref] where we want them to be together without a space.
204 // Normal word-wrapping processing continues ...
206 if (state == StatePosition.BEGIN_FIRST_LINE)
208 if ( c == '\n' || c == '\r' ) { // Keep, but ignore initial line feeds
214 if (Character.isWhitespace (c))
216 else if ( (c == '*' || c == '-' || c == '.' ) &&
217 Character.isWhitespace (nextChar) )
219 else if ( Character.isDigit (c) && nextChar == '.' &&
220 Character.isWhitespace (text[i+2]))
225 state = StatePosition.MIDDLE_OF_LINE;
227 // If we are still in the initial state, then put 'er in...
228 if (state == StatePosition.BEGIN_FIRST_LINE) {
234 // While it would be more accurate to explicitely state the range of
235 // possibilities, with something like:
236 // EnumSet.range (StatePosition.BEGIN_OTHER_LINE, StatePosition.MIDDLE_OF_LINE ).contains (state)
237 // We know that what is left is just the BEGIN_FIRST_LINE ...
239 if ( state != StatePosition.BEGIN_FIRST_LINE )
241 // If not the middle of the line, then it must be at the first of a line
242 // Either BEGIN_OTHER_LINE or BEGIN_NEW_LINE
243 if (state != StatePosition.MIDDLE_OF_LINE)
245 if ( Character.isWhitespace(c) || c == '>' || c == '.' )
247 else if ( ( ( c == '*' || c == '-' ) && Character.isWhitespace (nextChar) ) ||
248 ( Character.isDigit(c) && nextChar == '.' && Character.isWhitespace( text[i+2] ) ) ) {
250 state = StatePosition.BEGIN_NEW_LINE;
253 if (state == StatePosition.BEGIN_NEW_LINE) {
254 buffer.append (word);
255 lineLength = word.substring ( word.indexOf("\n")+1 ).length();
258 state = StatePosition.MIDDLE_OF_LINE;
262 if (state == StatePosition.MIDDLE_OF_LINE)
264 // Are we at the end of a word? Then we need to calculate whether
265 // to wrap the line or not.
267 // This condition does double duty, in that is also serves to
268 // ignore multiple spaces and special characters that may be at
269 // the beginning of the line.
270 if ( Character.isWhitespace(c) || c == 0 )
272 if ( word.length() > 0) {
273 lineLength = addWordToBuffer (lineWidth, lineEnding, word, indent, buffer, lineLength);
275 // Do we we two spaces at the end of the line? Honor this...
276 else if ( c == ' ' && ( nextChar == '\r' || nextChar == '\n' ) &&
277 state != StatePosition.BEGIN_OTHER_LINE ) {
279 buffer.append (lineEnding);
283 if ( c == '\r' || c == '\n' ) {
284 state = StatePosition.BEGIN_OTHER_LINE;
288 // Linefeeds are completely ignored and just treated as whitespace,
289 // unless, of course, there are two of 'em... and of course, end of
290 // lines are simply evil on Windows machines.
292 if ( (c == '\n' && nextChar == '\n') || // Unix-style line-ends
293 ( c == '\r' && nextChar == '\n' && // Windows-style line-ends
294 text[i+2] == '\r' && text[i+3] == '\n' ) )
296 state = StatePosition.BEGIN_FIRST_LINE;
298 indent.setLength (0);
301 if (c == '\r') { // If we are dealing with Windows-style line-ends,
302 i++; // we need to skip past the next character...
303 buffer.append("\r\n");
310 state = StatePosition.MIDDLE_OF_LINE;
316 return buffer.toString().toCharArray();
320 * Adds a word to the buffer, performing word wrap if necessary.
321 * @param lineWidth The current width of the line
322 * @param lineEnding The line ending to append, if necessary
323 * @param word The word to append
324 * @param indent The indentation string to insert, if necesary
325 * @param buffer The buffer to perform all this stuff to
326 * @param lineLength The current length of the current line
327 * @return The new length of the current line
329 private static int addWordToBuffer (final int lineWidth, final String lineEnding,
330 final StringBuilder word,
331 final StringBuilder indent,
332 final StringBuilder buffer, int lineLength)
334 if ( word.length() + lineLength + 1 > lineWidth )
336 buffer.append (lineEnding);
337 buffer.append (indent);
338 buffer.append (word);
340 lineLength = indent.length() + word.length();
343 if ( lineLength > indent.length() )
345 buffer.append (word);
346 lineLength += word.length() + 1;