X-Git-Url: https://gerrit.simantics.org/r/gitweb?a=blobdiff_plain;f=bundles%2Fwinterwell.markdown%2Fsrc%2Fwinterwell%2Fmarkdown%2FStringMethods.java;h=c5dfa92a23393ea88b4d8f48c2279648838fbf86;hb=1ead2fab0a23f75b540dc3f20e84e536f3aaa3ca;hp=208e0ae9936606e37bfb05d1ddde41b802d3e328;hpb=9a175feb652b2b7bba7afa540831b9076be3c10e;p=simantics%2Fplatform.git diff --git a/bundles/winterwell.markdown/src/winterwell/markdown/StringMethods.java b/bundles/winterwell.markdown/src/winterwell/markdown/StringMethods.java index 208e0ae99..c5dfa92a2 100644 --- a/bundles/winterwell.markdown/src/winterwell/markdown/StringMethods.java +++ b/bundles/winterwell.markdown/src/winterwell/markdown/StringMethods.java @@ -1,557 +1,557 @@ -/** - * Basic String manipulation utilities. - * (c) Winterwell 2010 and ThinkTank Mathematics 2007 - */ -package winterwell.markdown; - -import java.math.BigInteger; -import java.security.MessageDigest; -import java.security.NoSuchAlgorithmException; -import java.util.ArrayList; -import java.util.List; -import java.util.regex.Pattern; - -import winterwell.utils.Mutable; -import winterwell.utils.containers.Pair; - -/** - * A collection of general-purpose String handling methods. - * - * @author daniel.winterstein - */ -public final class StringMethods { - - /** - * Removes xml tags, comment blocks and script blocks. - * - * @param page - * @return the page with all xml tags removed. - */ - public static String stripTags(String page) { - // This code is rather ugly, but it does the job - StringBuilder stripped = new StringBuilder(page.length()); - boolean inTag = false; - // Comment blocks and script blocks are given special treatment - boolean inComment = false; - boolean inScript = false; - // Go through the text - for (int i = 0; i < page.length(); i++) { - char c = page.charAt(i); - // First check whether we are ignoring text - if (inTag) { - if (c == '>') - inTag = false; - } else if (inComment) { - if (c == '>' && page.charAt(i - 1) == '-' - && page.charAt(i - 1) == '-') { - inComment = false; - } - } else if (inScript) { - if (c == '>' && page.substring(i - 7, i).equals("/script")) { - inScript = false; - } - } else { - // Check for the start of a tag - looks for '<' followed by any - // non-whitespace character - if (c == '<' && !Character.isWhitespace(page.charAt(i + 1))) { - // Comment, script-block or tag? - if (page.charAt(i + 1) == '!' && page.charAt(i + 2) == '-' - && page.charAt(i + 3) == '-') { - inComment = true; - } else if (i + 8 < page.length() - && page.substring(i + 1, i + 7).equals("script")) { - inScript = true; - i += 7; - } else - inTag = true; // Normal tag by default - } else { - // Append all non-tag chars - stripped.append(c); - } - } // end if... - } - return stripped.toString(); - } - - /** - * The local line-end string. \n on unix, \r\n on windows, \r on mac. - */ - public static final String LINEEND = System.getProperty("line.separator"); - - /** - * @param s - * @return A version of s where the first letter is uppercase and all others - * are lowercase - */ - public static final String capitalise(final String s) { - return s.substring(0, 1).toUpperCase() + s.substring(1).toLowerCase(); - } - - /** - * Convert all line breaks into the system line break. - */ - public static final String convertLineBreaks(String text) { - return convertLineBreaks(text, LINEEND); - } - - /** - * Convert all line breaks into the specified line break. - */ - public static final String convertLineBreaks(String text, String br) { - text = text.replaceAll("\r\n", br); - text = text.replaceAll("\r", br); - text = text.replaceAll("\n", br); - return text; - } - - /** - * @param string - * @param character - * @return the number of times character appears in the string - * @author Sam Halliday - */ - static public int countCharsInString(String string, char character) { - int count = 0; - for (char c : string.toCharArray()) { - if (c == character) { - count++; - } - } - return count; - } - - /** - * - * E.g. - * findEnclosingRegion("text with a [region] inside", 15, '[', ']') - * is (??,??) - * - * @param text - * @param offset - * @param start - * @param end - * @return the smallest enclosed region (including start and end chars, the - * 1st number is inclusive, the 2nd exclusive), or null if none. So - * text.subString(start,end) is the specified region - */ - public static Pair findEnclosingRegion(String text, int offset, - char startMarker, char endMarker) { - // Forward - int end = findEnclosingRegion2(text, offset, endMarker, 1); - if (end == -1) - return null; - end++; // end is exclusive - // Backward - int start = findEnclosingRegion2(text, offset, startMarker, -1); - if (start == -1) - return null; - // Sanity - assert text.substring(start, end).charAt(0) == startMarker; - assert text.substring(start, end).endsWith("" + endMarker); - // Done - return new Pair(start, end); - } - - private static int findEnclosingRegion2(String text, int offset, - char endMarker, int direction) { - while (offset > -1 && offset < text.length()) { - char c = text.charAt(offset); - if (c == endMarker) - return offset; - offset += direction; - } - return -1; - } - - /** - * A convenience wrapper for - * {@link #findEnclosingRegion(String, int, char, char)} E.g. - findEnclosingRegion("text with a [region] inside", 15, '[', ']') .equals("[region]"); - - * - * @param text - * @param offset - * @param start - * @param end - * @return the smallest enclosed region (including start and end chars), or - * null if none. - */ - public static String findEnclosingText(String text, int offset, - char startMarker, char endMarker) { - Pair region = findEnclosingRegion(text, offset, startMarker, - endMarker); - if (region == null) - return null; - String s = text.substring(region.first, region.second); - return s; - } - - /** - * Format a block of text to use the given line-width. I.e. adjust the line - * breaks. Also known as hard line-wrapping. Paragraphs are - * recognised by a line of blank space between them (e.g. two returns). - *

- * Note: a side-effect of this method is that it converts all line-breaks - * into the local system's line-breaks. E.g. on Windows, \n will become \r\n - * - * @param text - * The text to format - * @param lineWidth - * The number of columns in a line. Typically 78 or 80. - * @param respectLeadingCharacters - * Can be null. If set, the specified leading characters will be - * copied if the line is split. Use with " \t" to keep indented - * paragraphs properly indented. Use with "> \t" to also handle - * email-style quoting. Note that respected leading characters - * receive no special treatment when they are used inside a - * paragraph. - * @return A copy of text, formatted to the given line-width. - *

- * TODO: recognise paragraphs by changes in the respected leading - * characters - */ - public static String format(String text, int lineWidth, int tabWidth, - String respectLeadingCharacters) { - // Switch to Linux line breaks for easier internal workings - text = convertLineBreaks(text, "\n"); - // Find paragraphs - List paras = format2_splitParagraphs(text, - respectLeadingCharacters); - // Rebuild text - StringBuilder sb = new StringBuilder(text.length() + 10); - for (String p : paras) { - String fp = format3_oneParagraph(p, lineWidth, tabWidth, - respectLeadingCharacters); - sb.append(fp); - // Paragraphs end with a double line break - sb.append("\n\n"); - } - // Pop the last line breaks - sb.delete(sb.length() - 2, sb.length()); - // Convert line breaks to system ones - text = convertLineBreaks(sb.toString()); - // Done - return text; - } - - private static List format2_splitParagraphs(String text, - String respectLeadingCharacters) { - List paras = new ArrayList(); - Mutable.Int index = new Mutable.Int(0); - // TODO The characters prefacing this paragraph - String leadingChars = ""; - while (index.value < text.length()) { - // One paragraph - boolean inSpace = false; - int start = index.value; - while (index.value < text.length()) { - char c = text.charAt(index.value); - index.value++; - if (!Character.isWhitespace(c)) { - inSpace = false; - continue; - } - // Line end? - if (c == '\r' || c == '\n') { - // // Handle MS Windows 2 character \r\n line breaks - // if (index.value < text.length()) { - // char c2 = text.charAt(index.value); - // if (c=='\r' && c2=='\n') index.value++; // Push on past - // the 2nd line break char - // } - // Double line end - indicating a paragraph break - if (inSpace) - break; - inSpace = true; - } - // TODO Other paragraph markers, spotted by a change in - // leadingChars - } - String p = text.substring(start, index.value); - paras.add(p); - } - // Done - return paras; - } - - /** - * Format a block of text to fit the given line width - * - * @param p - * @param lineWidth - * @param tabWidth - * @param respectLeadingCharacters - * @return - */ - private static String format3_oneParagraph(String p, int lineWidth, - int tabWidth, String respectLeadingCharacters) { - // Collect the reformatted paragraph - StringBuilder sb = new StringBuilder(p.length() + 10); // Allow for - // some extra - // line-breaks - // Get respected leading chars - String leadingChars = format4_getLeadingChars(p, - respectLeadingCharacters); - // First Line - sb.append(leadingChars); - int lineLength = leadingChars.length(); - int index = leadingChars.length(); - // Loop - while (index < p.length()) { - // Get the next word - StringBuilder word = new StringBuilder(); - char c = p.charAt(index); - index++; - while (!Character.isWhitespace(c)) { - word.append(c); - if (index == p.length()) - break; - c = p.charAt(index); - index++; - } - // Break the line if the word will not fit - if (lineLength + word.length() > lineWidth && lineLength != 0) { - trimEnd(sb); - sb.append('\n'); // lineEnd(sb); - // New line - sb.append(leadingChars); - lineLength = leadingChars.length(); - } - // Add word - sb.append(word); - lineLength += word.length(); - // Add the whitespace - if (index != p.length() && lineLength < lineWidth) { - if (c == '\n') { - c = ' '; - } - sb.append(c); - lineLength += (c == '\t') ? tabWidth : 1; - } - } - // A final trim - trimEnd(sb); - // Done - return sb.toString(); - } - - /** - * - * @param text - * @param respectLeadingCharacters - * Can be null - * @return The characters at the beginning of text which are respected. E.g. - * ("> Hello", " \t>") --> "> " - */ - private static String format4_getLeadingChars(String text, - String respectLeadingCharacters) { - if (respectLeadingCharacters == null) - return ""; - // Line-breaks cannot be respected - assert respectLeadingCharacters.indexOf('\n') == -1; - // Look for the first non-respected char - for (int i = 0; i < text.length(); i++) { - char c = text.charAt(i); - if (respectLeadingCharacters.indexOf(c) == -1) { - // Return the previous chars - return text.substring(0, i); - } - } - // All chars are respected - return text; - } - - /** - * Ensure that line ends with the right line-end character(s) - */ - public static final String lineEnd(String line) { - // strip possibly inappropriate line-endings - if (line.endsWith("\n")) { - line = line.substring(0, line.length() - 1); - } - if (line.endsWith("\r\n")) { - line = line.substring(0, line.length() - 2); - } - if (line.endsWith("\r")) { - line = line.substring(0, line.length() - 1); - } - // add in proper line end - if (!line.endsWith(LINEEND)) { - line += LINEEND; - } - return line; - } - - /** - * Ensure that line ends with the right line-end character(s). This is more - * efficient than the version for Strings. - * - * @param line - */ - public static final void lineEnd(final StringBuilder line) { - if (line.length() == 0) { - line.append(LINEEND); - return; - } - // strip possibly inappropriate line-endings - final char last = line.charAt(line.length() - 1); - if (last == '\n') { - if ((line.length() > 1) && (line.charAt(line.length() - 2) == '\r')) { - // \r\n - line.replace(line.length() - 2, line.length(), LINEEND); - return; - } - line.replace(line.length() - 1, line.length(), LINEEND); - return; - } - if (last == '\r') { - line.replace(line.length() - 1, line.length(), LINEEND); - return; - } - line.append(LINEEND); - return; - } - - - - /** - * @param string - * @return the MD5 sum of the string using the default charset. Null if - * there was an error in calculating the hash. - * @author Sam Halliday - */ - public static String md5Hash(String string) { - MessageDigest md5 = null; - try { - md5 = MessageDigest.getInstance("MD5"); - } catch (NoSuchAlgorithmException e) { - // ignore this exception, we know MD5 exists - } - md5.update(string.getBytes()); - BigInteger hash = new BigInteger(1, md5.digest()); - return hash.toString(16); - } - - /** - * Removes HTML-style tags from a string. - * - * @param s - * a String from which to remove tags - * @return a string with all instances of <.*> removed. - */ - public static String removeTags(String s) { - StringBuffer sb = new StringBuffer(); - boolean inTag = false; - for (int i = 0; i < s.length(); i++) { - char c = s.charAt(i); - if (c == '<') - inTag = true; - if (!inTag) - sb.append(c); - if (c == '>') - inTag = false; - } - return sb.toString(); - } - - /** - * Repeat a character. - * - * @param c - * @param i - * @return A String consisting of i x c. - * @example assert repeat('-', 5).equals("-----"); - */ - public static String repeat(Character c, int i) { - StringBuilder dashes = new StringBuilder(i); - for (int j = 0; j < i; j++) - dashes.append(c); - return dashes.toString(); - } - - /** - * Split a piece of text into separate lines. The line breaks are left at - * the end of each line. - * - * @param text - * @return The individual lines in the text. - */ - public static List splitLines(String text) { - List lines = new ArrayList(); - // Search for lines - int start = 0; - for (int i = 0; i < text.length(); i++) { - char c = text.charAt(i); - if (c == '\r' || c == '\n') { - // Handle MS Windows 2 character \r\n line breaks - if (i + 1 < text.length()) { - char c2 = text.charAt(i + 1); - if (c == '\r' && c2 == '\n') - i++; - } - // Get the line, with the line break - String line = text.substring(start, i + 1); - lines.add(line); - start = i + 1; - } - } - // Last one - if (start != text.length()) { - String line = text.substring(start); - lines.add(line); - } - return lines; - } - - /** - * Remove trailing whitespace. c.f. String#trim() which removes - * leading and trailing whitespace. - * - * @param sb - */ - private static void trimEnd(StringBuilder sb) { - while (true) { - // Get the last character - int i = sb.length() - 1; - if (i == -1) - return; // Quit if sb is empty - char c = sb.charAt(i); - if (!Character.isWhitespace(c)) - return; // Finish? - sb.deleteCharAt(i); // Remove and continue - } - } - - /** - * Returns true if the string is just whitespace, or empty, or null. - * - * @param s - */ - public static final boolean whitespace(final String s) { - if (s == null) { - return true; - } - for (int i = 0; i < s.length(); i++) { - final char c = s.charAt(i); - if (!Character.isWhitespace(c)) { - return false; - } - } - return true; - } - - /** - * @param text - * @return the number of words in text. Uses a crude whitespace - * measure. - */ - public static int wordCount(String text) { - String[] bits = text.split("\\W+"); - int wc = 0; - for (String string : bits) { - if (!whitespace(string)) wc++; - } - return wc; - } - -} +/** + * Basic String manipulation utilities. + * (c) Winterwell 2010 and ThinkTank Mathematics 2007 + */ +package winterwell.markdown; + +import java.math.BigInteger; +import java.security.MessageDigest; +import java.security.NoSuchAlgorithmException; +import java.util.ArrayList; +import java.util.List; +import java.util.regex.Pattern; + +import winterwell.utils.Mutable; +import winterwell.utils.containers.Pair; + +/** + * A collection of general-purpose String handling methods. + * + * @author daniel.winterstein + */ +public final class StringMethods { + + /** + * Removes xml tags, comment blocks and script blocks. + * + * @param page + * @return the page with all xml tags removed. + */ + public static String stripTags(String page) { + // This code is rather ugly, but it does the job + StringBuilder stripped = new StringBuilder(page.length()); + boolean inTag = false; + // Comment blocks and script blocks are given special treatment + boolean inComment = false; + boolean inScript = false; + // Go through the text + for (int i = 0; i < page.length(); i++) { + char c = page.charAt(i); + // First check whether we are ignoring text + if (inTag) { + if (c == '>') + inTag = false; + } else if (inComment) { + if (c == '>' && page.charAt(i - 1) == '-' + && page.charAt(i - 1) == '-') { + inComment = false; + } + } else if (inScript) { + if (c == '>' && page.substring(i - 7, i).equals("/script")) { + inScript = false; + } + } else { + // Check for the start of a tag - looks for '<' followed by any + // non-whitespace character + if (c == '<' && !Character.isWhitespace(page.charAt(i + 1))) { + // Comment, script-block or tag? + if (page.charAt(i + 1) == '!' && page.charAt(i + 2) == '-' + && page.charAt(i + 3) == '-') { + inComment = true; + } else if (i + 8 < page.length() + && page.substring(i + 1, i + 7).equals("script")) { + inScript = true; + i += 7; + } else + inTag = true; // Normal tag by default + } else { + // Append all non-tag chars + stripped.append(c); + } + } // end if... + } + return stripped.toString(); + } + + /** + * The local line-end string. \n on unix, \r\n on windows, \r on mac. + */ + public static final String LINEEND = System.getProperty("line.separator"); + + /** + * @param s + * @return A version of s where the first letter is uppercase and all others + * are lowercase + */ + public static final String capitalise(final String s) { + return s.substring(0, 1).toUpperCase() + s.substring(1).toLowerCase(); + } + + /** + * Convert all line breaks into the system line break. + */ + public static final String convertLineBreaks(String text) { + return convertLineBreaks(text, LINEEND); + } + + /** + * Convert all line breaks into the specified line break. + */ + public static final String convertLineBreaks(String text, String br) { + text = text.replaceAll("\r\n", br); + text = text.replaceAll("\r", br); + text = text.replaceAll("\n", br); + return text; + } + + /** + * @param string + * @param character + * @return the number of times character appears in the string + * @author Sam Halliday + */ + static public int countCharsInString(String string, char character) { + int count = 0; + for (char c : string.toCharArray()) { + if (c == character) { + count++; + } + } + return count; + } + + /** + * + * E.g. + * findEnclosingRegion("text with a [region] inside", 15, '[', ']') + * is (??,??) + * + * @param text + * @param offset + * @param start + * @param end + * @return the smallest enclosed region (including start and end chars, the + * 1st number is inclusive, the 2nd exclusive), or null if none. So + * text.subString(start,end) is the specified region + */ + public static Pair findEnclosingRegion(String text, int offset, + char startMarker, char endMarker) { + // Forward + int end = findEnclosingRegion2(text, offset, endMarker, 1); + if (end == -1) + return null; + end++; // end is exclusive + // Backward + int start = findEnclosingRegion2(text, offset, startMarker, -1); + if (start == -1) + return null; + // Sanity + assert text.substring(start, end).charAt(0) == startMarker; + assert text.substring(start, end).endsWith("" + endMarker); + // Done + return new Pair(start, end); + } + + private static int findEnclosingRegion2(String text, int offset, + char endMarker, int direction) { + while (offset > -1 && offset < text.length()) { + char c = text.charAt(offset); + if (c == endMarker) + return offset; + offset += direction; + } + return -1; + } + + /** + * A convenience wrapper for + * {@link #findEnclosingRegion(String, int, char, char)} E.g. + findEnclosingRegion("text with a [region] inside", 15, '[', ']') .equals("[region]"); + + * + * @param text + * @param offset + * @param start + * @param end + * @return the smallest enclosed region (including start and end chars), or + * null if none. + */ + public static String findEnclosingText(String text, int offset, + char startMarker, char endMarker) { + Pair region = findEnclosingRegion(text, offset, startMarker, + endMarker); + if (region == null) + return null; + String s = text.substring(region.first, region.second); + return s; + } + + /** + * Format a block of text to use the given line-width. I.e. adjust the line + * breaks. Also known as hard line-wrapping. Paragraphs are + * recognised by a line of blank space between them (e.g. two returns). + *

+ * Note: a side-effect of this method is that it converts all line-breaks + * into the local system's line-breaks. E.g. on Windows, \n will become \r\n + * + * @param text + * The text to format + * @param lineWidth + * The number of columns in a line. Typically 78 or 80. + * @param respectLeadingCharacters + * Can be null. If set, the specified leading characters will be + * copied if the line is split. Use with " \t" to keep indented + * paragraphs properly indented. Use with "> \t" to also handle + * email-style quoting. Note that respected leading characters + * receive no special treatment when they are used inside a + * paragraph. + * @return A copy of text, formatted to the given line-width. + *

+ * TODO: recognise paragraphs by changes in the respected leading + * characters + */ + public static String format(String text, int lineWidth, int tabWidth, + String respectLeadingCharacters) { + // Switch to Linux line breaks for easier internal workings + text = convertLineBreaks(text, "\n"); + // Find paragraphs + List paras = format2_splitParagraphs(text, + respectLeadingCharacters); + // Rebuild text + StringBuilder sb = new StringBuilder(text.length() + 10); + for (String p : paras) { + String fp = format3_oneParagraph(p, lineWidth, tabWidth, + respectLeadingCharacters); + sb.append(fp); + // Paragraphs end with a double line break + sb.append("\n\n"); + } + // Pop the last line breaks + sb.delete(sb.length() - 2, sb.length()); + // Convert line breaks to system ones + text = convertLineBreaks(sb.toString()); + // Done + return text; + } + + private static List format2_splitParagraphs(String text, + String respectLeadingCharacters) { + List paras = new ArrayList(); + Mutable.Int index = new Mutable.Int(0); + // TODO The characters prefacing this paragraph + String leadingChars = ""; + while (index.value < text.length()) { + // One paragraph + boolean inSpace = false; + int start = index.value; + while (index.value < text.length()) { + char c = text.charAt(index.value); + index.value++; + if (!Character.isWhitespace(c)) { + inSpace = false; + continue; + } + // Line end? + if (c == '\r' || c == '\n') { + // // Handle MS Windows 2 character \r\n line breaks + // if (index.value < text.length()) { + // char c2 = text.charAt(index.value); + // if (c=='\r' && c2=='\n') index.value++; // Push on past + // the 2nd line break char + // } + // Double line end - indicating a paragraph break + if (inSpace) + break; + inSpace = true; + } + // TODO Other paragraph markers, spotted by a change in + // leadingChars + } + String p = text.substring(start, index.value); + paras.add(p); + } + // Done + return paras; + } + + /** + * Format a block of text to fit the given line width + * + * @param p + * @param lineWidth + * @param tabWidth + * @param respectLeadingCharacters + * @return + */ + private static String format3_oneParagraph(String p, int lineWidth, + int tabWidth, String respectLeadingCharacters) { + // Collect the reformatted paragraph + StringBuilder sb = new StringBuilder(p.length() + 10); // Allow for + // some extra + // line-breaks + // Get respected leading chars + String leadingChars = format4_getLeadingChars(p, + respectLeadingCharacters); + // First Line + sb.append(leadingChars); + int lineLength = leadingChars.length(); + int index = leadingChars.length(); + // Loop + while (index < p.length()) { + // Get the next word + StringBuilder word = new StringBuilder(); + char c = p.charAt(index); + index++; + while (!Character.isWhitespace(c)) { + word.append(c); + if (index == p.length()) + break; + c = p.charAt(index); + index++; + } + // Break the line if the word will not fit + if (lineLength + word.length() > lineWidth && lineLength != 0) { + trimEnd(sb); + sb.append('\n'); // lineEnd(sb); + // New line + sb.append(leadingChars); + lineLength = leadingChars.length(); + } + // Add word + sb.append(word); + lineLength += word.length(); + // Add the whitespace + if (index != p.length() && lineLength < lineWidth) { + if (c == '\n') { + c = ' '; + } + sb.append(c); + lineLength += (c == '\t') ? tabWidth : 1; + } + } + // A final trim + trimEnd(sb); + // Done + return sb.toString(); + } + + /** + * + * @param text + * @param respectLeadingCharacters + * Can be null + * @return The characters at the beginning of text which are respected. E.g. + * ("> Hello", " \t>") --> "> " + */ + private static String format4_getLeadingChars(String text, + String respectLeadingCharacters) { + if (respectLeadingCharacters == null) + return ""; + // Line-breaks cannot be respected + assert respectLeadingCharacters.indexOf('\n') == -1; + // Look for the first non-respected char + for (int i = 0; i < text.length(); i++) { + char c = text.charAt(i); + if (respectLeadingCharacters.indexOf(c) == -1) { + // Return the previous chars + return text.substring(0, i); + } + } + // All chars are respected + return text; + } + + /** + * Ensure that line ends with the right line-end character(s) + */ + public static final String lineEnd(String line) { + // strip possibly inappropriate line-endings + if (line.endsWith("\n")) { + line = line.substring(0, line.length() - 1); + } + if (line.endsWith("\r\n")) { + line = line.substring(0, line.length() - 2); + } + if (line.endsWith("\r")) { + line = line.substring(0, line.length() - 1); + } + // add in proper line end + if (!line.endsWith(LINEEND)) { + line += LINEEND; + } + return line; + } + + /** + * Ensure that line ends with the right line-end character(s). This is more + * efficient than the version for Strings. + * + * @param line + */ + public static final void lineEnd(final StringBuilder line) { + if (line.length() == 0) { + line.append(LINEEND); + return; + } + // strip possibly inappropriate line-endings + final char last = line.charAt(line.length() - 1); + if (last == '\n') { + if ((line.length() > 1) && (line.charAt(line.length() - 2) == '\r')) { + // \r\n + line.replace(line.length() - 2, line.length(), LINEEND); + return; + } + line.replace(line.length() - 1, line.length(), LINEEND); + return; + } + if (last == '\r') { + line.replace(line.length() - 1, line.length(), LINEEND); + return; + } + line.append(LINEEND); + return; + } + + + + /** + * @param string + * @return the MD5 sum of the string using the default charset. Null if + * there was an error in calculating the hash. + * @author Sam Halliday + */ + public static String md5Hash(String string) { + MessageDigest md5 = null; + try { + md5 = MessageDigest.getInstance("MD5"); + } catch (NoSuchAlgorithmException e) { + // ignore this exception, we know MD5 exists + } + md5.update(string.getBytes()); + BigInteger hash = new BigInteger(1, md5.digest()); + return hash.toString(16); + } + + /** + * Removes HTML-style tags from a string. + * + * @param s + * a String from which to remove tags + * @return a string with all instances of <.*> removed. + */ + public static String removeTags(String s) { + StringBuffer sb = new StringBuffer(); + boolean inTag = false; + for (int i = 0; i < s.length(); i++) { + char c = s.charAt(i); + if (c == '<') + inTag = true; + if (!inTag) + sb.append(c); + if (c == '>') + inTag = false; + } + return sb.toString(); + } + + /** + * Repeat a character. + * + * @param c + * @param i + * @return A String consisting of i x c. + * @example assert repeat('-', 5).equals("-----"); + */ + public static String repeat(Character c, int i) { + StringBuilder dashes = new StringBuilder(i); + for (int j = 0; j < i; j++) + dashes.append(c); + return dashes.toString(); + } + + /** + * Split a piece of text into separate lines. The line breaks are left at + * the end of each line. + * + * @param text + * @return The individual lines in the text. + */ + public static List splitLines(String text) { + List lines = new ArrayList(); + // Search for lines + int start = 0; + for (int i = 0; i < text.length(); i++) { + char c = text.charAt(i); + if (c == '\r' || c == '\n') { + // Handle MS Windows 2 character \r\n line breaks + if (i + 1 < text.length()) { + char c2 = text.charAt(i + 1); + if (c == '\r' && c2 == '\n') + i++; + } + // Get the line, with the line break + String line = text.substring(start, i + 1); + lines.add(line); + start = i + 1; + } + } + // Last one + if (start != text.length()) { + String line = text.substring(start); + lines.add(line); + } + return lines; + } + + /** + * Remove trailing whitespace. c.f. String#trim() which removes + * leading and trailing whitespace. + * + * @param sb + */ + private static void trimEnd(StringBuilder sb) { + while (true) { + // Get the last character + int i = sb.length() - 1; + if (i == -1) + return; // Quit if sb is empty + char c = sb.charAt(i); + if (!Character.isWhitespace(c)) + return; // Finish? + sb.deleteCharAt(i); // Remove and continue + } + } + + /** + * Returns true if the string is just whitespace, or empty, or null. + * + * @param s + */ + public static final boolean whitespace(final String s) { + if (s == null) { + return true; + } + for (int i = 0; i < s.length(); i++) { + final char c = s.charAt(i); + if (!Character.isWhitespace(c)) { + return false; + } + } + return true; + } + + /** + * @param text + * @return the number of words in text. Uses a crude whitespace + * measure. + */ + public static int wordCount(String text) { + String[] bits = text.split("\\W+"); + int wc = 0; + for (String string : bits) { + if (!whitespace(string)) wc++; + } + return wc; + } + +}