]> gerrit.simantics Code Review - simantics/platform.git/blobdiff - bundles/winterwell.markdown/src/winterwell/markdown/StringMethods.java
Fixed all line endings of the repository
[simantics/platform.git] / bundles / winterwell.markdown / src / winterwell / markdown / StringMethods.java
index 208e0ae9936606e37bfb05d1ddde41b802d3e328..c5dfa92a23393ea88b4d8f48c2279648838fbf86 100644 (file)
-/**\r
- * Basic String manipulation utilities.\r
- * (c) Winterwell 2010 and ThinkTank Mathematics 2007\r
- */\r
-package winterwell.markdown;\r
-\r
-import java.math.BigInteger;\r
-import java.security.MessageDigest;\r
-import java.security.NoSuchAlgorithmException;\r
-import java.util.ArrayList;\r
-import java.util.List;\r
-import java.util.regex.Pattern;\r
-\r
-import winterwell.utils.Mutable;\r
-import winterwell.utils.containers.Pair;\r
-\r
-/**\r
- * A collection of general-purpose String handling methods.\r
- * \r
- * @author daniel.winterstein\r
- */\r
-public final class StringMethods {\r
-\r
-       /**\r
-        * Removes xml tags, comment blocks and script blocks.\r
-        * \r
-        * @param page\r
-        * @return the page with all xml tags removed.\r
-        */\r
-       public static String stripTags(String page) {\r
-               // This code is rather ugly, but it does the job\r
-               StringBuilder stripped = new StringBuilder(page.length());\r
-               boolean inTag = false;\r
-               // Comment blocks and script blocks are given special treatment\r
-               boolean inComment = false;\r
-               boolean inScript = false;\r
-               // Go through the text\r
-               for (int i = 0; i < page.length(); i++) {\r
-                       char c = page.charAt(i);\r
-                       // First check whether we are ignoring text\r
-                       if (inTag) {\r
-                               if (c == '>')\r
-                                       inTag = false;\r
-                       } else if (inComment) {\r
-                               if (c == '>' && page.charAt(i - 1) == '-'\r
-                                               && page.charAt(i - 1) == '-') {\r
-                                       inComment = false;\r
-                               }\r
-                       } else if (inScript) {\r
-                               if (c == '>' && page.substring(i - 7, i).equals("/script")) {\r
-                                       inScript = false;\r
-                               }\r
-                       } else {\r
-                               // Check for the start of a tag - looks for '<' followed by any\r
-                               // non-whitespace character\r
-                               if (c == '<' && !Character.isWhitespace(page.charAt(i + 1))) {\r
-                                       // Comment, script-block or tag?\r
-                                       if (page.charAt(i + 1) == '!' && page.charAt(i + 2) == '-'\r
-                                                       && page.charAt(i + 3) == '-') {\r
-                                               inComment = true;\r
-                                       } else if (i + 8 < page.length()\r
-                                                       && page.substring(i + 1, i + 7).equals("script")) {\r
-                                               inScript = true;\r
-                                               i += 7;\r
-                                       } else\r
-                                               inTag = true; // Normal tag by default\r
-                               } else {\r
-                                       // Append all non-tag chars\r
-                                       stripped.append(c);\r
-                               }\r
-                       } // end if...\r
-               }\r
-               return stripped.toString();\r
-       }\r
-       \r
-       /**\r
-        * The local line-end string. \n on unix, \r\n on windows, \r on mac.\r
-        */\r
-       public static final String LINEEND = System.getProperty("line.separator");\r
-\r
-       /**\r
-        * @param s\r
-        * @return A version of s where the first letter is uppercase and all others\r
-        *         are lowercase\r
-        */\r
-       public static final String capitalise(final String s) {\r
-               return s.substring(0, 1).toUpperCase() + s.substring(1).toLowerCase();\r
-       }\r
-\r
-       /**\r
-        * Convert all line breaks into the system line break.\r
-        */\r
-       public static final String convertLineBreaks(String text) {\r
-               return convertLineBreaks(text, LINEEND);\r
-       }\r
-\r
-       /**\r
-        * Convert all line breaks into the specified line break.\r
-        */\r
-       public static final String convertLineBreaks(String text, String br) {\r
-               text = text.replaceAll("\r\n", br);\r
-               text = text.replaceAll("\r", br);\r
-               text = text.replaceAll("\n", br);\r
-               return text;\r
-       }\r
-\r
-       /**\r
-        * @param string\r
-        * @param character\r
-        * @return the number of times character appears in the string\r
-        * @author Sam Halliday\r
-        */\r
-       static public int countCharsInString(String string, char character) {\r
-               int count = 0;\r
-               for (char c : string.toCharArray()) {\r
-                       if (c == character) {\r
-                               count++;\r
-                       }\r
-               }\r
-               return count;\r
-       }\r
-\r
-       /**\r
-        * \r
-        * E.g.\r
-        * <code>findEnclosingRegion("text with a [region] inside", 15, '[', ']')</code>\r
-        * is (??,??)\r
-        * \r
-        * @param text\r
-        * @param offset\r
-        * @param start\r
-        * @param end\r
-        * @return the smallest enclosed region (including start and end chars, the\r
-        *         1st number is inclusive, the 2nd exclusive), or null if none. So\r
-        *         text.subString(start,end) is the specified region\r
-        */\r
-       public static Pair<Integer> findEnclosingRegion(String text, int offset,\r
-                       char startMarker, char endMarker) {\r
-               // Forward\r
-               int end = findEnclosingRegion2(text, offset, endMarker, 1);\r
-               if (end == -1)\r
-                       return null;\r
-               end++; // end is exclusive\r
-               // Backward\r
-               int start = findEnclosingRegion2(text, offset, startMarker, -1);\r
-               if (start == -1)\r
-                       return null;\r
-               // Sanity\r
-               assert text.substring(start, end).charAt(0) == startMarker;\r
-               assert text.substring(start, end).endsWith("" + endMarker);\r
-               // Done\r
-               return new Pair<Integer>(start, end);\r
-       }\r
-\r
-       private static int findEnclosingRegion2(String text, int offset,\r
-                       char endMarker, int direction) {\r
-               while (offset > -1 && offset < text.length()) {\r
-                       char c = text.charAt(offset);\r
-                       if (c == endMarker)\r
-                               return offset;\r
-                       offset += direction;\r
-               }\r
-               return -1;\r
-       }\r
-\r
-       /**\r
-        * A convenience wrapper for\r
-        * {@link #findEnclosingRegion(String, int, char, char)} E.g. <code>\r
-        findEnclosingRegion("text with a [region] inside", 15, '[', ']') .equals("[region]");\r
-        </code>\r
-        * \r
-        * @param text\r
-        * @param offset\r
-        * @param start\r
-        * @param end\r
-        * @return the smallest enclosed region (including start and end chars), or\r
-        *         null if none.\r
-        */\r
-       public static String findEnclosingText(String text, int offset,\r
-                       char startMarker, char endMarker) {\r
-               Pair<Integer> region = findEnclosingRegion(text, offset, startMarker,\r
-                               endMarker);\r
-               if (region == null)\r
-                       return null;\r
-               String s = text.substring(region.first, region.second);\r
-               return s;\r
-       }\r
-\r
-       /**\r
-        * Format a block of text to use the given line-width. I.e. adjust the line\r
-        * breaks. Also known as <i>hard</i> line-wrapping. Paragraphs are\r
-        * recognised by a line of blank space between them (e.g. two returns).\r
-        * <p>\r
-        * Note: a side-effect of this method is that it converts all line-breaks\r
-        * into the local system's line-breaks. E.g. on Windows, \n will become \r\n\r
-        * \r
-        * @param text\r
-        *            The text to format\r
-        * @param lineWidth\r
-        *            The number of columns in a line. Typically 78 or 80.\r
-        * @param respectLeadingCharacters\r
-        *            Can be null. If set, the specified leading characters will be\r
-        *            copied if the line is split. Use with " \t" to keep indented\r
-        *            paragraphs properly indented. Use with "> \t" to also handle\r
-        *            email-style quoting. Note that respected leading characters\r
-        *            receive no special treatment when they are used inside a\r
-        *            paragraph.\r
-        * @return A copy of text, formatted to the given line-width.\r
-        *         <p>\r
-        *         TODO: recognise paragraphs by changes in the respected leading\r
-        *         characters\r
-        */\r
-       public static String format(String text, int lineWidth, int tabWidth,\r
-                       String respectLeadingCharacters) {\r
-               // Switch to Linux line breaks for easier internal workings\r
-               text = convertLineBreaks(text, "\n");\r
-               // Find paragraphs\r
-               List<String> paras = format2_splitParagraphs(text,\r
-                               respectLeadingCharacters);\r
-               // Rebuild text\r
-               StringBuilder sb = new StringBuilder(text.length() + 10);\r
-               for (String p : paras) {\r
-                       String fp = format3_oneParagraph(p, lineWidth, tabWidth,\r
-                                       respectLeadingCharacters);\r
-                       sb.append(fp);\r
-                       // Paragraphs end with a double line break\r
-                       sb.append("\n\n");\r
-               }\r
-               // Pop the last line breaks\r
-               sb.delete(sb.length() - 2, sb.length());\r
-               // Convert line breaks to system ones\r
-               text = convertLineBreaks(sb.toString());\r
-               // Done\r
-               return text;\r
-       }\r
-\r
-       private static List<String> format2_splitParagraphs(String text,\r
-                       String respectLeadingCharacters) {\r
-               List<String> paras = new ArrayList<String>();\r
-               Mutable.Int index = new Mutable.Int(0);\r
-               // TODO The characters prefacing this paragraph\r
-               String leadingChars = "";\r
-               while (index.value < text.length()) {\r
-                       // One paragraph\r
-                       boolean inSpace = false;\r
-                       int start = index.value;\r
-                       while (index.value < text.length()) {\r
-                               char c = text.charAt(index.value);\r
-                               index.value++;\r
-                               if (!Character.isWhitespace(c)) {\r
-                                       inSpace = false;\r
-                                       continue;\r
-                               }\r
-                               // Line end?\r
-                               if (c == '\r' || c == '\n') {\r
-                                       // // Handle MS Windows 2 character \r\n line breaks\r
-                                       // if (index.value < text.length()) {\r
-                                       // char c2 = text.charAt(index.value);\r
-                                       // if (c=='\r' && c2=='\n') index.value++; // Push on past\r
-                                       // the 2nd line break char\r
-                                       // }\r
-                                       // Double line end - indicating a paragraph break\r
-                                       if (inSpace)\r
-                                               break;\r
-                                       inSpace = true;\r
-                               }\r
-                               // TODO Other paragraph markers, spotted by a change in\r
-                               // leadingChars\r
-                       }\r
-                       String p = text.substring(start, index.value);\r
-                       paras.add(p);\r
-               }\r
-               // Done\r
-               return paras;\r
-       }\r
-\r
-       /**\r
-        * Format a block of text to fit the given line width\r
-        * \r
-        * @param p\r
-        * @param lineWidth\r
-        * @param tabWidth\r
-        * @param respectLeadingCharacters\r
-        * @return\r
-        */\r
-       private static String format3_oneParagraph(String p, int lineWidth,\r
-                       int tabWidth, String respectLeadingCharacters) {\r
-               // Collect the reformatted paragraph\r
-               StringBuilder sb = new StringBuilder(p.length() + 10); // Allow for\r
-                                                                                                                               // some extra\r
-                                                                                                                               // line-breaks\r
-               // Get respected leading chars\r
-               String leadingChars = format4_getLeadingChars(p,\r
-                               respectLeadingCharacters);\r
-               // First Line\r
-               sb.append(leadingChars);\r
-               int lineLength = leadingChars.length();\r
-               int index = leadingChars.length();\r
-               // Loop\r
-               while (index < p.length()) {\r
-                       // Get the next word\r
-                       StringBuilder word = new StringBuilder();\r
-                       char c = p.charAt(index);\r
-                       index++;\r
-                       while (!Character.isWhitespace(c)) {\r
-                               word.append(c);\r
-                               if (index == p.length())\r
-                                       break;\r
-                               c = p.charAt(index);\r
-                               index++;\r
-                       }\r
-                       // Break the line if the word will not fit\r
-                       if (lineLength + word.length() > lineWidth && lineLength != 0) {\r
-                               trimEnd(sb);\r
-                               sb.append('\n'); // lineEnd(sb);\r
-                               // New line\r
-                               sb.append(leadingChars);\r
-                               lineLength = leadingChars.length();\r
-                       }\r
-                       // Add word\r
-                       sb.append(word);\r
-                       lineLength += word.length();\r
-                       // Add the whitespace\r
-                       if (index != p.length() && lineLength < lineWidth) {\r
-                               if (c == '\n') {\r
-                                       c = ' ';\r
-                               }\r
-                               sb.append(c);\r
-                               lineLength += (c == '\t') ? tabWidth : 1;\r
-                       }\r
-               }\r
-               // A final trim\r
-               trimEnd(sb);\r
-               // Done\r
-               return sb.toString();\r
-       }\r
-\r
-       /**\r
-        * \r
-        * @param text\r
-        * @param respectLeadingCharacters\r
-        *            Can be null\r
-        * @return The characters at the beginning of text which are respected. E.g.\r
-        *         ("> Hello", " \t>") --> "> "\r
-        */\r
-       private static String format4_getLeadingChars(String text,\r
-                       String respectLeadingCharacters) {\r
-               if (respectLeadingCharacters == null)\r
-                       return "";\r
-               // Line-breaks cannot be respected\r
-               assert respectLeadingCharacters.indexOf('\n') == -1;\r
-               // Look for the first non-respected char\r
-               for (int i = 0; i < text.length(); i++) {\r
-                       char c = text.charAt(i);\r
-                       if (respectLeadingCharacters.indexOf(c) == -1) {\r
-                               // Return the previous chars\r
-                               return text.substring(0, i);\r
-                       }\r
-               }\r
-               // All chars are respected\r
-               return text;\r
-       }\r
-\r
-       /**\r
-        * Ensure that line ends with the right line-end character(s)\r
-        */\r
-       public static final String lineEnd(String line) {\r
-               // strip possibly inappropriate line-endings\r
-               if (line.endsWith("\n")) {\r
-                       line = line.substring(0, line.length() - 1);\r
-               }\r
-               if (line.endsWith("\r\n")) {\r
-                       line = line.substring(0, line.length() - 2);\r
-               }\r
-               if (line.endsWith("\r")) {\r
-                       line = line.substring(0, line.length() - 1);\r
-               }\r
-               // add in proper line end\r
-               if (!line.endsWith(LINEEND)) {\r
-                       line += LINEEND;\r
-               }\r
-               return line;\r
-       }\r
-\r
-       /**\r
-        * Ensure that line ends with the right line-end character(s). This is more\r
-        * efficient than the version for Strings.\r
-        * \r
-        * @param line\r
-        */\r
-       public static final void lineEnd(final StringBuilder line) {\r
-               if (line.length() == 0) {\r
-                       line.append(LINEEND);\r
-                       return;\r
-               }\r
-               // strip possibly inappropriate line-endings\r
-               final char last = line.charAt(line.length() - 1);\r
-               if (last == '\n') {\r
-                       if ((line.length() > 1) && (line.charAt(line.length() - 2) == '\r')) {\r
-                               // \r\n\r
-                               line.replace(line.length() - 2, line.length(), LINEEND);\r
-                               return;\r
-                       }\r
-                       line.replace(line.length() - 1, line.length(), LINEEND);\r
-                       return;\r
-               }\r
-               if (last == '\r') {\r
-                       line.replace(line.length() - 1, line.length(), LINEEND);\r
-                       return;\r
-               }\r
-               line.append(LINEEND);\r
-               return;\r
-       }\r
-\r
-\r
-       \r
-       /**\r
-        * @param string\r
-        * @return the MD5 sum of the string using the default charset. Null if\r
-        *         there was an error in calculating the hash.\r
-        * @author Sam Halliday\r
-        */\r
-       public static String md5Hash(String string) {\r
-               MessageDigest md5 = null;\r
-               try {\r
-                       md5 = MessageDigest.getInstance("MD5");\r
-               } catch (NoSuchAlgorithmException e) {\r
-                       // ignore this exception, we know MD5 exists\r
-               }\r
-               md5.update(string.getBytes());\r
-               BigInteger hash = new BigInteger(1, md5.digest());\r
-               return hash.toString(16);\r
-       }\r
-\r
-       /**\r
-        * Removes HTML-style tags from a string.\r
-        * \r
-        * @param s\r
-        *            a String from which to remove tags\r
-        * @return a string with all instances of <.*> removed.\r
-        */\r
-       public static String removeTags(String s) {\r
-               StringBuffer sb = new StringBuffer();\r
-               boolean inTag = false;\r
-               for (int i = 0; i < s.length(); i++) {\r
-                       char c = s.charAt(i);\r
-                       if (c == '<')\r
-                               inTag = true;\r
-                       if (!inTag)\r
-                               sb.append(c);\r
-                       if (c == '>')\r
-                               inTag = false;\r
-               }\r
-               return sb.toString();\r
-       }\r
-\r
-       /**\r
-        * Repeat a character.\r
-        * \r
-        * @param c\r
-        * @param i\r
-        * @return A String consisting of i x c.\r
-        * @example assert repeat('-', 5).equals("-----");\r
-        */\r
-       public static String repeat(Character c, int i) {\r
-               StringBuilder dashes = new StringBuilder(i);\r
-               for (int j = 0; j < i; j++)\r
-                       dashes.append(c);\r
-               return dashes.toString();\r
-       }\r
-\r
-       /**\r
-        * Split a piece of text into separate lines. The line breaks are left at\r
-        * the end of each line.\r
-        * \r
-        * @param text\r
-        * @return The individual lines in the text.\r
-        */\r
-       public static List<String> splitLines(String text) {\r
-               List<String> lines = new ArrayList<String>();\r
-               // Search for lines\r
-               int start = 0;\r
-               for (int i = 0; i < text.length(); i++) {\r
-                       char c = text.charAt(i);\r
-                       if (c == '\r' || c == '\n') {\r
-                               // Handle MS Windows 2 character \r\n line breaks\r
-                               if (i + 1 < text.length()) {\r
-                                       char c2 = text.charAt(i + 1);\r
-                                       if (c == '\r' && c2 == '\n')\r
-                                               i++;\r
-                               }\r
-                               // Get the line, with the line break\r
-                               String line = text.substring(start, i + 1);\r
-                               lines.add(line);\r
-                               start = i + 1;\r
-                       }\r
-               }\r
-               // Last one\r
-               if (start != text.length()) {\r
-                       String line = text.substring(start);\r
-                       lines.add(line);\r
-               }\r
-               return lines;\r
-       }\r
-\r
-       /**\r
-        * Remove <i>trailing</i> whitespace. c.f. String#trim() which removes\r
-        * leading and trailing whitespace.\r
-        * \r
-        * @param sb\r
-        */\r
-       private static void trimEnd(StringBuilder sb) {\r
-               while (true) {\r
-                       // Get the last character\r
-                       int i = sb.length() - 1;\r
-                       if (i == -1)\r
-                               return; // Quit if sb is empty\r
-                       char c = sb.charAt(i);\r
-                       if (!Character.isWhitespace(c))\r
-                               return; // Finish?\r
-                       sb.deleteCharAt(i); // Remove and continue\r
-               }\r
-       }\r
-\r
-       /**\r
-        * Returns true if the string is just whitespace, or empty, or null.\r
-        * \r
-        * @param s\r
-        */\r
-       public static final boolean whitespace(final String s) {\r
-               if (s == null) {\r
-                       return true;\r
-               }\r
-               for (int i = 0; i < s.length(); i++) {\r
-                       final char c = s.charAt(i);\r
-                       if (!Character.isWhitespace(c)) {\r
-                               return false;\r
-                       }\r
-               }\r
-               return true;\r
-       }\r
-\r
-       /**\r
-        * @param text\r
-        * @return the number of words in text. Uses a crude whitespace\r
-        * measure.\r
-        */\r
-       public static int wordCount(String text) {\r
-               String[] bits = text.split("\\W+");\r
-               int wc = 0;\r
-               for (String string : bits) {\r
-                       if (!whitespace(string)) wc++;\r
-               }\r
-               return wc;\r
-       }\r
-\r
-}\r
+/**
+ * Basic String manipulation utilities.
+ * (c) Winterwell 2010 and ThinkTank Mathematics 2007
+ */
+package winterwell.markdown;
+
+import java.math.BigInteger;
+import java.security.MessageDigest;
+import java.security.NoSuchAlgorithmException;
+import java.util.ArrayList;
+import java.util.List;
+import java.util.regex.Pattern;
+
+import winterwell.utils.Mutable;
+import winterwell.utils.containers.Pair;
+
+/**
+ * A collection of general-purpose String handling methods.
+ * 
+ * @author daniel.winterstein
+ */
+public final class StringMethods {
+
+       /**
+        * Removes xml tags, comment blocks and script blocks.
+        * 
+        * @param page
+        * @return the page with all xml tags removed.
+        */
+       public static String stripTags(String page) {
+               // This code is rather ugly, but it does the job
+               StringBuilder stripped = new StringBuilder(page.length());
+               boolean inTag = false;
+               // Comment blocks and script blocks are given special treatment
+               boolean inComment = false;
+               boolean inScript = false;
+               // Go through the text
+               for (int i = 0; i < page.length(); i++) {
+                       char c = page.charAt(i);
+                       // First check whether we are ignoring text
+                       if (inTag) {
+                               if (c == '>')
+                                       inTag = false;
+                       } else if (inComment) {
+                               if (c == '>' && page.charAt(i - 1) == '-'
+                                               && page.charAt(i - 1) == '-') {
+                                       inComment = false;
+                               }
+                       } else if (inScript) {
+                               if (c == '>' && page.substring(i - 7, i).equals("/script")) {
+                                       inScript = false;
+                               }
+                       } else {
+                               // Check for the start of a tag - looks for '<' followed by any
+                               // non-whitespace character
+                               if (c == '<' && !Character.isWhitespace(page.charAt(i + 1))) {
+                                       // Comment, script-block or tag?
+                                       if (page.charAt(i + 1) == '!' && page.charAt(i + 2) == '-'
+                                                       && page.charAt(i + 3) == '-') {
+                                               inComment = true;
+                                       } else if (i + 8 < page.length()
+                                                       && page.substring(i + 1, i + 7).equals("script")) {
+                                               inScript = true;
+                                               i += 7;
+                                       } else
+                                               inTag = true; // Normal tag by default
+                               } else {
+                                       // Append all non-tag chars
+                                       stripped.append(c);
+                               }
+                       } // end if...
+               }
+               return stripped.toString();
+       }
+       
+       /**
+        * The local line-end string. \n on unix, \r\n on windows, \r on mac.
+        */
+       public static final String LINEEND = System.getProperty("line.separator");
+
+       /**
+        * @param s
+        * @return A version of s where the first letter is uppercase and all others
+        *         are lowercase
+        */
+       public static final String capitalise(final String s) {
+               return s.substring(0, 1).toUpperCase() + s.substring(1).toLowerCase();
+       }
+
+       /**
+        * Convert all line breaks into the system line break.
+        */
+       public static final String convertLineBreaks(String text) {
+               return convertLineBreaks(text, LINEEND);
+       }
+
+       /**
+        * Convert all line breaks into the specified line break.
+        */
+       public static final String convertLineBreaks(String text, String br) {
+               text = text.replaceAll("\r\n", br);
+               text = text.replaceAll("\r", br);
+               text = text.replaceAll("\n", br);
+               return text;
+       }
+
+       /**
+        * @param string
+        * @param character
+        * @return the number of times character appears in the string
+        * @author Sam Halliday
+        */
+       static public int countCharsInString(String string, char character) {
+               int count = 0;
+               for (char c : string.toCharArray()) {
+                       if (c == character) {
+                               count++;
+                       }
+               }
+               return count;
+       }
+
+       /**
+        * 
+        * E.g.
+        * <code>findEnclosingRegion("text with a [region] inside", 15, '[', ']')</code>
+        * is (??,??)
+        * 
+        * @param text
+        * @param offset
+        * @param start
+        * @param end
+        * @return the smallest enclosed region (including start and end chars, the
+        *         1st number is inclusive, the 2nd exclusive), or null if none. So
+        *         text.subString(start,end) is the specified region
+        */
+       public static Pair<Integer> findEnclosingRegion(String text, int offset,
+                       char startMarker, char endMarker) {
+               // Forward
+               int end = findEnclosingRegion2(text, offset, endMarker, 1);
+               if (end == -1)
+                       return null;
+               end++; // end is exclusive
+               // Backward
+               int start = findEnclosingRegion2(text, offset, startMarker, -1);
+               if (start == -1)
+                       return null;
+               // Sanity
+               assert text.substring(start, end).charAt(0) == startMarker;
+               assert text.substring(start, end).endsWith("" + endMarker);
+               // Done
+               return new Pair<Integer>(start, end);
+       }
+
+       private static int findEnclosingRegion2(String text, int offset,
+                       char endMarker, int direction) {
+               while (offset > -1 && offset < text.length()) {
+                       char c = text.charAt(offset);
+                       if (c == endMarker)
+                               return offset;
+                       offset += direction;
+               }
+               return -1;
+       }
+
+       /**
+        * A convenience wrapper for
+        * {@link #findEnclosingRegion(String, int, char, char)} E.g. <code>
+        findEnclosingRegion("text with a [region] inside", 15, '[', ']') .equals("[region]");
+        </code>
+        * 
+        * @param text
+        * @param offset
+        * @param start
+        * @param end
+        * @return the smallest enclosed region (including start and end chars), or
+        *         null if none.
+        */
+       public static String findEnclosingText(String text, int offset,
+                       char startMarker, char endMarker) {
+               Pair<Integer> region = findEnclosingRegion(text, offset, startMarker,
+                               endMarker);
+               if (region == null)
+                       return null;
+               String s = text.substring(region.first, region.second);
+               return s;
+       }
+
+       /**
+        * Format a block of text to use the given line-width. I.e. adjust the line
+        * breaks. Also known as <i>hard</i> line-wrapping. Paragraphs are
+        * recognised by a line of blank space between them (e.g. two returns).
+        * <p>
+        * Note: a side-effect of this method is that it converts all line-breaks
+        * into the local system's line-breaks. E.g. on Windows, \n will become \r\n
+        * 
+        * @param text
+        *            The text to format
+        * @param lineWidth
+        *            The number of columns in a line. Typically 78 or 80.
+        * @param respectLeadingCharacters
+        *            Can be null. If set, the specified leading characters will be
+        *            copied if the line is split. Use with " \t" to keep indented
+        *            paragraphs properly indented. Use with "> \t" to also handle
+        *            email-style quoting. Note that respected leading characters
+        *            receive no special treatment when they are used inside a
+        *            paragraph.
+        * @return A copy of text, formatted to the given line-width.
+        *         <p>
+        *         TODO: recognise paragraphs by changes in the respected leading
+        *         characters
+        */
+       public static String format(String text, int lineWidth, int tabWidth,
+                       String respectLeadingCharacters) {
+               // Switch to Linux line breaks for easier internal workings
+               text = convertLineBreaks(text, "\n");
+               // Find paragraphs
+               List<String> paras = format2_splitParagraphs(text,
+                               respectLeadingCharacters);
+               // Rebuild text
+               StringBuilder sb = new StringBuilder(text.length() + 10);
+               for (String p : paras) {
+                       String fp = format3_oneParagraph(p, lineWidth, tabWidth,
+                                       respectLeadingCharacters);
+                       sb.append(fp);
+                       // Paragraphs end with a double line break
+                       sb.append("\n\n");
+               }
+               // Pop the last line breaks
+               sb.delete(sb.length() - 2, sb.length());
+               // Convert line breaks to system ones
+               text = convertLineBreaks(sb.toString());
+               // Done
+               return text;
+       }
+
+       private static List<String> format2_splitParagraphs(String text,
+                       String respectLeadingCharacters) {
+               List<String> paras = new ArrayList<String>();
+               Mutable.Int index = new Mutable.Int(0);
+               // TODO The characters prefacing this paragraph
+               String leadingChars = "";
+               while (index.value < text.length()) {
+                       // One paragraph
+                       boolean inSpace = false;
+                       int start = index.value;
+                       while (index.value < text.length()) {
+                               char c = text.charAt(index.value);
+                               index.value++;
+                               if (!Character.isWhitespace(c)) {
+                                       inSpace = false;
+                                       continue;
+                               }
+                               // Line end?
+                               if (c == '\r' || c == '\n') {
+                                       // // Handle MS Windows 2 character \r\n line breaks
+                                       // if (index.value < text.length()) {
+                                       // char c2 = text.charAt(index.value);
+                                       // if (c=='\r' && c2=='\n') index.value++; // Push on past
+                                       // the 2nd line break char
+                                       // }
+                                       // Double line end - indicating a paragraph break
+                                       if (inSpace)
+                                               break;
+                                       inSpace = true;
+                               }
+                               // TODO Other paragraph markers, spotted by a change in
+                               // leadingChars
+                       }
+                       String p = text.substring(start, index.value);
+                       paras.add(p);
+               }
+               // Done
+               return paras;
+       }
+
+       /**
+        * Format a block of text to fit the given line width
+        * 
+        * @param p
+        * @param lineWidth
+        * @param tabWidth
+        * @param respectLeadingCharacters
+        * @return
+        */
+       private static String format3_oneParagraph(String p, int lineWidth,
+                       int tabWidth, String respectLeadingCharacters) {
+               // Collect the reformatted paragraph
+               StringBuilder sb = new StringBuilder(p.length() + 10); // Allow for
+                                                                                                                               // some extra
+                                                                                                                               // line-breaks
+               // Get respected leading chars
+               String leadingChars = format4_getLeadingChars(p,
+                               respectLeadingCharacters);
+               // First Line
+               sb.append(leadingChars);
+               int lineLength = leadingChars.length();
+               int index = leadingChars.length();
+               // Loop
+               while (index < p.length()) {
+                       // Get the next word
+                       StringBuilder word = new StringBuilder();
+                       char c = p.charAt(index);
+                       index++;
+                       while (!Character.isWhitespace(c)) {
+                               word.append(c);
+                               if (index == p.length())
+                                       break;
+                               c = p.charAt(index);
+                               index++;
+                       }
+                       // Break the line if the word will not fit
+                       if (lineLength + word.length() > lineWidth && lineLength != 0) {
+                               trimEnd(sb);
+                               sb.append('\n'); // lineEnd(sb);
+                               // New line
+                               sb.append(leadingChars);
+                               lineLength = leadingChars.length();
+                       }
+                       // Add word
+                       sb.append(word);
+                       lineLength += word.length();
+                       // Add the whitespace
+                       if (index != p.length() && lineLength < lineWidth) {
+                               if (c == '\n') {
+                                       c = ' ';
+                               }
+                               sb.append(c);
+                               lineLength += (c == '\t') ? tabWidth : 1;
+                       }
+               }
+               // A final trim
+               trimEnd(sb);
+               // Done
+               return sb.toString();
+       }
+
+       /**
+        * 
+        * @param text
+        * @param respectLeadingCharacters
+        *            Can be null
+        * @return The characters at the beginning of text which are respected. E.g.
+        *         ("> Hello", " \t>") --> "> "
+        */
+       private static String format4_getLeadingChars(String text,
+                       String respectLeadingCharacters) {
+               if (respectLeadingCharacters == null)
+                       return "";
+               // Line-breaks cannot be respected
+               assert respectLeadingCharacters.indexOf('\n') == -1;
+               // Look for the first non-respected char
+               for (int i = 0; i < text.length(); i++) {
+                       char c = text.charAt(i);
+                       if (respectLeadingCharacters.indexOf(c) == -1) {
+                               // Return the previous chars
+                               return text.substring(0, i);
+                       }
+               }
+               // All chars are respected
+               return text;
+       }
+
+       /**
+        * Ensure that line ends with the right line-end character(s)
+        */
+       public static final String lineEnd(String line) {
+               // strip possibly inappropriate line-endings
+               if (line.endsWith("\n")) {
+                       line = line.substring(0, line.length() - 1);
+               }
+               if (line.endsWith("\r\n")) {
+                       line = line.substring(0, line.length() - 2);
+               }
+               if (line.endsWith("\r")) {
+                       line = line.substring(0, line.length() - 1);
+               }
+               // add in proper line end
+               if (!line.endsWith(LINEEND)) {
+                       line += LINEEND;
+               }
+               return line;
+       }
+
+       /**
+        * Ensure that line ends with the right line-end character(s). This is more
+        * efficient than the version for Strings.
+        * 
+        * @param line
+        */
+       public static final void lineEnd(final StringBuilder line) {
+               if (line.length() == 0) {
+                       line.append(LINEEND);
+                       return;
+               }
+               // strip possibly inappropriate line-endings
+               final char last = line.charAt(line.length() - 1);
+               if (last == '\n') {
+                       if ((line.length() > 1) && (line.charAt(line.length() - 2) == '\r')) {
+                               // \r\n
+                               line.replace(line.length() - 2, line.length(), LINEEND);
+                               return;
+                       }
+                       line.replace(line.length() - 1, line.length(), LINEEND);
+                       return;
+               }
+               if (last == '\r') {
+                       line.replace(line.length() - 1, line.length(), LINEEND);
+                       return;
+               }
+               line.append(LINEEND);
+               return;
+       }
+
+
+       
+       /**
+        * @param string
+        * @return the MD5 sum of the string using the default charset. Null if
+        *         there was an error in calculating the hash.
+        * @author Sam Halliday
+        */
+       public static String md5Hash(String string) {
+               MessageDigest md5 = null;
+               try {
+                       md5 = MessageDigest.getInstance("MD5");
+               } catch (NoSuchAlgorithmException e) {
+                       // ignore this exception, we know MD5 exists
+               }
+               md5.update(string.getBytes());
+               BigInteger hash = new BigInteger(1, md5.digest());
+               return hash.toString(16);
+       }
+
+       /**
+        * Removes HTML-style tags from a string.
+        * 
+        * @param s
+        *            a String from which to remove tags
+        * @return a string with all instances of <.*> removed.
+        */
+       public static String removeTags(String s) {
+               StringBuffer sb = new StringBuffer();
+               boolean inTag = false;
+               for (int i = 0; i < s.length(); i++) {
+                       char c = s.charAt(i);
+                       if (c == '<')
+                               inTag = true;
+                       if (!inTag)
+                               sb.append(c);
+                       if (c == '>')
+                               inTag = false;
+               }
+               return sb.toString();
+       }
+
+       /**
+        * Repeat a character.
+        * 
+        * @param c
+        * @param i
+        * @return A String consisting of i x c.
+        * @example assert repeat('-', 5).equals("-----");
+        */
+       public static String repeat(Character c, int i) {
+               StringBuilder dashes = new StringBuilder(i);
+               for (int j = 0; j < i; j++)
+                       dashes.append(c);
+               return dashes.toString();
+       }
+
+       /**
+        * Split a piece of text into separate lines. The line breaks are left at
+        * the end of each line.
+        * 
+        * @param text
+        * @return The individual lines in the text.
+        */
+       public static List<String> splitLines(String text) {
+               List<String> lines = new ArrayList<String>();
+               // Search for lines
+               int start = 0;
+               for (int i = 0; i < text.length(); i++) {
+                       char c = text.charAt(i);
+                       if (c == '\r' || c == '\n') {
+                               // Handle MS Windows 2 character \r\n line breaks
+                               if (i + 1 < text.length()) {
+                                       char c2 = text.charAt(i + 1);
+                                       if (c == '\r' && c2 == '\n')
+                                               i++;
+                               }
+                               // Get the line, with the line break
+                               String line = text.substring(start, i + 1);
+                               lines.add(line);
+                               start = i + 1;
+                       }
+               }
+               // Last one
+               if (start != text.length()) {
+                       String line = text.substring(start);
+                       lines.add(line);
+               }
+               return lines;
+       }
+
+       /**
+        * Remove <i>trailing</i> whitespace. c.f. String#trim() which removes
+        * leading and trailing whitespace.
+        * 
+        * @param sb
+        */
+       private static void trimEnd(StringBuilder sb) {
+               while (true) {
+                       // Get the last character
+                       int i = sb.length() - 1;
+                       if (i == -1)
+                               return; // Quit if sb is empty
+                       char c = sb.charAt(i);
+                       if (!Character.isWhitespace(c))
+                               return; // Finish?
+                       sb.deleteCharAt(i); // Remove and continue
+               }
+       }
+
+       /**
+        * Returns true if the string is just whitespace, or empty, or null.
+        * 
+        * @param s
+        */
+       public static final boolean whitespace(final String s) {
+               if (s == null) {
+                       return true;
+               }
+               for (int i = 0; i < s.length(); i++) {
+                       final char c = s.charAt(i);
+                       if (!Character.isWhitespace(c)) {
+                               return false;
+                       }
+               }
+               return true;
+       }
+
+       /**
+        * @param text
+        * @return the number of words in text. Uses a crude whitespace
+        * measure.
+        */
+       public static int wordCount(String text) {
+               String[] bits = text.split("\\W+");
+               int wc = 0;
+               for (String string : bits) {
+                       if (!whitespace(string)) wc++;
+               }
+               return wc;
+       }
+
+}