diff --git a/jdk/src/share/classes/java/util/regex/Matcher.java b/jdk/src/share/classes/java/util/regex/Matcher.java index ebab02e3b82..e841294ed8e 100644 --- a/jdk/src/share/classes/java/util/regex/Matcher.java +++ b/jdk/src/share/classes/java/util/regex/Matcher.java @@ -65,9 +65,10 @@ import java.util.Objects; * new strings whose contents can, if desired, be computed from the match * result. The {@link #appendReplacement appendReplacement} and {@link * #appendTail appendTail} methods can be used in tandem in order to collect - * the result into an existing string buffer, or the more convenient {@link - * #replaceAll replaceAll} method can be used to create a string in which every - * matching subsequence in the input sequence is replaced. + * the result into an existing string buffer or string builder. Alternatively, + * the more convenient {@link #replaceAll replaceAll} method can be used to + * create a string in which every matching subsequence in the input sequence + * is replaced. * *
The explicit state of a matcher includes the start and end indices of * the most recent successful match. It also includes the start and end @@ -792,15 +793,115 @@ public final class Matcher implements MatchResult { * that does not exist in the pattern */ public Matcher appendReplacement(StringBuffer sb, String replacement) { - // If no match, return error if (first < 0) throw new IllegalStateException("No match available"); - - // Process substitution string to replace group references with groups - int cursor = 0; StringBuilder result = new StringBuilder(); + appendExpandedReplacement(replacement, result); + // Append the intervening text + sb.append(text, lastAppendPosition, first); + // Append the match substitution + sb.append(result); + lastAppendPosition = last; + return this; + } + /** + * Implements a non-terminal append-and-replace step. + * + *
This method performs the following actions:
+ * + *It reads characters from the input sequence, starting at the + * append position, and appends them to the given string builder. It + * stops after reading the last character preceding the previous match, + * that is, the character at index {@link + * #start()} - 1.
It appends the given replacement string to the string builder. + *
It sets the append position of this matcher to the index of + * the last character matched, plus one, that is, to {@link #end()}. + *
The replacement string may contain references to subsequences + * captured during the previous match: Each occurrence of + * $g will be replaced by the result of + * evaluating {@link #group(int) group}(g). + * The first number after the $ is always treated as part of + * the group reference. Subsequent numbers are incorporated into g if + * they would form a legal group reference. Only the numerals '0' + * through '9' are considered as potential components of the group + * reference. If the second group matched the string "foo", for + * example, then passing the replacement string "$2bar" would + * cause "foobar" to be appended to the string builder. A dollar + * sign ($) may be included as a literal in the replacement + * string by preceding it with a backslash (\$). + * + *
Note that backslashes (\) and dollar signs ($) in + * the replacement string may cause the results to be different than if it + * were being treated as a literal replacement string. Dollar signs may be + * treated as references to captured subsequences as described above, and + * backslashes are used to escape literal characters in the replacement + * string. + * + *
This method is intended to be used in a loop together with the + * {@link #appendTail appendTail} and {@link #find find} methods. The + * following code, for example, writes one dog two dogs in the + * yard to the standard-output stream:
+ * + *+ * + * @param sb + * The target string builder + * @param replacement + * The replacement string + * @return This matcher + * + * @throws IllegalStateException + * If no match has yet been attempted, + * or if the previous match operation failed + * @throws IllegalArgumentException + * If the replacement string refers to a named-capturing + * group that does not exist in the pattern + * @throws IndexOutOfBoundsException + * If the replacement string refers to a capturing group + * that does not exist in the pattern + * @since 1.9 + */ + public Matcher appendReplacement(StringBuilder sb, String replacement) { + // If no match, return error + if (first < 0) + throw new IllegalStateException("No match available"); + StringBuilder result = new StringBuilder(); + appendExpandedReplacement(replacement, result); + // Append the intervening text + sb.append(text, lastAppendPosition, first); + // Append the match substitution + sb.append(result); + lastAppendPosition = last; + return this; + } + + /** + * Processes replacement string to replace group references with + * groups. + */ + private StringBuilder appendExpandedReplacement( + String replacement, StringBuilder result) { + int cursor = 0; while (cursor < replacement.length()) { char nextChar = replacement.charAt(cursor); if (nextChar == '\\') { @@ -852,8 +953,8 @@ public final class Matcher implements MatchResult { cursor++; } else { // The first number is always a group - refNum = (int)nextChar - '0'; - if ((refNum < 0)||(refNum > 9)) + refNum = nextChar - '0'; + if ((refNum < 0) || (refNum > 9)) throw new IllegalArgumentException( "Illegal group reference"); cursor++; @@ -864,7 +965,7 @@ public final class Matcher implements MatchResult { break; } int nextDigit = replacement.charAt(cursor) - '0'; - if ((nextDigit < 0)||(nextDigit > 9)) { // not a number + if ((nextDigit < 0) || (nextDigit > 9)) { // not a number break; } int newRefNum = (refNum * 10) + nextDigit; @@ -884,13 +985,7 @@ public final class Matcher implements MatchResult { cursor++; } } - // Append the intervening text - sb.append(text, lastAppendPosition, first); - // Append the match substitution - sb.append(result); - - lastAppendPosition = last; - return this; + return result; } /** @@ -912,6 +1007,27 @@ public final class Matcher implements MatchResult { return sb; } + /** + * Implements a terminal append-and-replace step. + * + *+ * Pattern p = Pattern.compile("cat"); + * Matcher m = p.matcher("one cat two cats in the yard"); + * StringBuilder sb = new StringBuilder(); + * while (m.find()) { + * m.appendReplacement(sb, "dog"); + * } + * m.appendTail(sb); + * System.out.println(sb.toString());
This method reads characters from the input sequence, starting at + * the append position, and appends them to the given string builder. It is + * intended to be invoked after one or more invocations of the {@link + * #appendReplacement appendReplacement} method in order to copy the + * remainder of the input sequence.
+ * + * @param sb + * The target string builder + * + * @return The target string builder + * + * @since 1.9 + */ + public StringBuilder appendTail(StringBuilder sb) { + sb.append(text, lastAppendPosition, getTextLength()); + return sb; + } + /** * Replaces every subsequence of the input sequence that matches the * pattern with the given replacement string. @@ -950,7 +1066,7 @@ public final class Matcher implements MatchResult { reset(); boolean result = find(); if (result) { - StringBuffer sb = new StringBuffer(); + StringBuilder sb = new StringBuilder(); do { appendReplacement(sb, replacement); result = find(); @@ -1000,7 +1116,7 @@ public final class Matcher implements MatchResult { reset(); if (!find()) return text.toString(); - StringBuffer sb = new StringBuffer(); + StringBuilder sb = new StringBuilder(); appendReplacement(sb, replacement); appendTail(sb); return sb.toString(); diff --git a/jdk/test/java/util/regex/RegExTest.java b/jdk/test/java/util/regex/RegExTest.java index cd6abd9be07..7cc7c784cf2 100644 --- a/jdk/test/java/util/regex/RegExTest.java +++ b/jdk/test/java/util/regex/RegExTest.java @@ -32,7 +32,7 @@ * 6358731 6178785 6284152 6231989 6497148 6486934 6233084 6504326 6635133 * 6350801 6676425 6878475 6919132 6931676 6948903 6990617 7014645 7039066 * 7067045 7014640 7189363 8007395 8013252 8013254 8012646 8023647 6559590 - * 8027645 8035076 + * 8027645 8035076 8039124 */ import java.util.regex.*; @@ -75,7 +75,10 @@ public class RegExTest { // Substitition tests on randomly generated sequences globalSubstitute(); stringbufferSubstitute(); + stringbuilderSubstitute(); + substitutionBasher(); + substitutionBasher2(); // Canonical Equivalence ceTest(); @@ -296,10 +299,12 @@ public class RegExTest { final Matcher m = Pattern.compile("xyz").matcher("xyz"); m.matches(); - check(new Runnable() { public void run() { m.appendTail(null);}}); + check(new Runnable() { public void run() { m.appendTail((StringBuffer)null);}}); + check(new Runnable() { public void run() { m.appendTail((StringBuilder)null);}}); check(new Runnable() { public void run() { m.replaceAll(null);}}); check(new Runnable() { public void run() { m.replaceFirst(null);}}); - check(new Runnable() { public void run() { m.appendReplacement(null, null);}}); + check(new Runnable() { public void run() { m.appendReplacement((StringBuffer)null, null);}}); + check(new Runnable() { public void run() { m.appendReplacement((StringBuilder)null, null);}}); check(new Runnable() { public void run() { m.reset(null);}}); check(new Runnable() { public void run() { Matcher.quoteReplacement(null);}}); //check(new Runnable() { public void run() { m.usePattern(null);}}); @@ -2973,6 +2978,286 @@ public class RegExTest { report("SB Substitution"); } + /** + * Tests the usage of Matcher.appendReplacement() with literal + * and group substitutions. + */ + private static void stringbuilderSubstitute() throws Exception { + // SB substitution with literal + String blah = "zzzblahzzz"; + Pattern p = Pattern.compile("blah"); + Matcher m = p.matcher(blah); + StringBuilder result = new StringBuilder(); + try { + m.appendReplacement(result, "blech"); + failCount++; + } catch (IllegalStateException e) { + } + m.find(); + m.appendReplacement(result, "blech"); + if (!result.toString().equals("zzzblech")) + failCount++; + + m.appendTail(result); + if (!result.toString().equals("zzzblechzzz")) + failCount++; + + // SB substitution with groups + blah = "zzzabcdzzz"; + p = Pattern.compile("(ab)(cd)*"); + m = p.matcher(blah); + result = new StringBuilder(); + try { + m.appendReplacement(result, "$1"); + failCount++; + } catch (IllegalStateException e) { + } + m.find(); + m.appendReplacement(result, "$1"); + if (!result.toString().equals("zzzab")) + failCount++; + + m.appendTail(result); + if (!result.toString().equals("zzzabzzz")) + failCount++; + + // SB substitution with 3 groups + blah = "zzzabcdcdefzzz"; + p = Pattern.compile("(ab)(cd)*(ef)"); + m = p.matcher(blah); + result = new StringBuilder(); + try { + m.appendReplacement(result, "$1w$2w$3"); + failCount++; + } catch (IllegalStateException e) { + } + m.find(); + m.appendReplacement(result, "$1w$2w$3"); + if (!result.toString().equals("zzzabwcdwef")) + failCount++; + + m.appendTail(result); + if (!result.toString().equals("zzzabwcdwefzzz")) + failCount++; + + // SB substitution with groups and three matches + // skipping middle match + blah = "zzzabcdzzzabcddzzzabcdzzz"; + p = Pattern.compile("(ab)(cd*)"); + m = p.matcher(blah); + result = new StringBuilder(); + try { + m.appendReplacement(result, "$1"); + failCount++; + } catch (IllegalStateException e) { + } + m.find(); + m.appendReplacement(result, "$1"); + if (!result.toString().equals("zzzab")) + failCount++; + + m.find(); + m.find(); + m.appendReplacement(result, "$2"); + if (!result.toString().equals("zzzabzzzabcddzzzcd")) + failCount++; + + m.appendTail(result); + if (!result.toString().equals("zzzabzzzabcddzzzcdzzz")) + failCount++; + + // Check to make sure escaped $ is ignored + blah = "zzzabcdcdefzzz"; + p = Pattern.compile("(ab)(cd)*(ef)"); + m = p.matcher(blah); + result = new StringBuilder(); + m.find(); + m.appendReplacement(result, "$1w\\$2w$3"); + if (!result.toString().equals("zzzabw$2wef")) + failCount++; + + m.appendTail(result); + if (!result.toString().equals("zzzabw$2wefzzz")) + failCount++; + + // Check to make sure a reference to nonexistent group causes error + blah = "zzzabcdcdefzzz"; + p = Pattern.compile("(ab)(cd)*(ef)"); + m = p.matcher(blah); + result = new StringBuilder(); + m.find(); + try { + m.appendReplacement(result, "$1w$5w$3"); + failCount++; + } catch (IndexOutOfBoundsException ioobe) { + // Correct result + } + + // Check double digit group references + blah = "zzz123456789101112zzz"; + p = Pattern.compile("(1)(2)(3)(4)(5)(6)(7)(8)(9)(10)(11)"); + m = p.matcher(blah); + result = new StringBuilder(); + m.find(); + m.appendReplacement(result, "$1w$11w$3"); + if (!result.toString().equals("zzz1w11w3")) + failCount++; + + // Check to make sure it backs off $15 to $1 if only three groups + blah = "zzzabcdcdefzzz"; + p = Pattern.compile("(ab)(cd)*(ef)"); + m = p.matcher(blah); + result = new StringBuilder(); + m.find(); + m.appendReplacement(result, "$1w$15w$3"); + if (!result.toString().equals("zzzabwab5wef")) + failCount++; + + + // Supplementary character test + // SB substitution with literal + blah = toSupplementaries("zzzblahzzz"); + p = Pattern.compile(toSupplementaries("blah")); + m = p.matcher(blah); + result = new StringBuilder(); + try { + m.appendReplacement(result, toSupplementaries("blech")); + failCount++; + } catch (IllegalStateException e) { + } + m.find(); + m.appendReplacement(result, toSupplementaries("blech")); + if (!result.toString().equals(toSupplementaries("zzzblech"))) + failCount++; + m.appendTail(result); + if (!result.toString().equals(toSupplementaries("zzzblechzzz"))) + failCount++; + + // SB substitution with groups + blah = toSupplementaries("zzzabcdzzz"); + p = Pattern.compile(toSupplementaries("(ab)(cd)*")); + m = p.matcher(blah); + result = new StringBuilder(); + try { + m.appendReplacement(result, "$1"); + failCount++; + } catch (IllegalStateException e) { + } + m.find(); + m.appendReplacement(result, "$1"); + if (!result.toString().equals(toSupplementaries("zzzab"))) + failCount++; + + m.appendTail(result); + if (!result.toString().equals(toSupplementaries("zzzabzzz"))) + failCount++; + + // SB substitution with 3 groups + blah = toSupplementaries("zzzabcdcdefzzz"); + p = Pattern.compile(toSupplementaries("(ab)(cd)*(ef)")); + m = p.matcher(blah); + result = new StringBuilder(); + try { + m.appendReplacement(result, toSupplementaries("$1w$2w$3")); + failCount++; + } catch (IllegalStateException e) { + } + m.find(); + m.appendReplacement(result, toSupplementaries("$1w$2w$3")); + if (!result.toString().equals(toSupplementaries("zzzabwcdwef"))) + failCount++; + + m.appendTail(result); + if (!result.toString().equals(toSupplementaries("zzzabwcdwefzzz"))) + failCount++; + + // SB substitution with groups and three matches + // skipping middle match + blah = toSupplementaries("zzzabcdzzzabcddzzzabcdzzz"); + p = Pattern.compile(toSupplementaries("(ab)(cd*)")); + m = p.matcher(blah); + result = new StringBuilder(); + try { + m.appendReplacement(result, "$1"); + failCount++; + } catch (IllegalStateException e) { + } + m.find(); + m.appendReplacement(result, "$1"); + if (!result.toString().equals(toSupplementaries("zzzab"))) + failCount++; + + m.find(); + m.find(); + m.appendReplacement(result, "$2"); + if (!result.toString().equals(toSupplementaries("zzzabzzzabcddzzzcd"))) + failCount++; + + m.appendTail(result); + if (!result.toString().equals(toSupplementaries("zzzabzzzabcddzzzcdzzz"))) + failCount++; + + // Check to make sure escaped $ is ignored + blah = toSupplementaries("zzzabcdcdefzzz"); + p = Pattern.compile(toSupplementaries("(ab)(cd)*(ef)")); + m = p.matcher(blah); + result = new StringBuilder(); + m.find(); + m.appendReplacement(result, toSupplementaries("$1w\\$2w$3")); + if (!result.toString().equals(toSupplementaries("zzzabw$2wef"))) + failCount++; + + m.appendTail(result); + if (!result.toString().equals(toSupplementaries("zzzabw$2wefzzz"))) + failCount++; + + // Check to make sure a reference to nonexistent group causes error + blah = toSupplementaries("zzzabcdcdefzzz"); + p = Pattern.compile(toSupplementaries("(ab)(cd)*(ef)")); + m = p.matcher(blah); + result = new StringBuilder(); + m.find(); + try { + m.appendReplacement(result, toSupplementaries("$1w$5w$3")); + failCount++; + } catch (IndexOutOfBoundsException ioobe) { + // Correct result + } + // Check double digit group references + blah = toSupplementaries("zzz123456789101112zzz"); + p = Pattern.compile("(1)(2)(3)(4)(5)(6)(7)(8)(9)(10)(11)"); + m = p.matcher(blah); + result = new StringBuilder(); + m.find(); + m.appendReplacement(result, toSupplementaries("$1w$11w$3")); + if (!result.toString().equals(toSupplementaries("zzz1w11w3"))) + failCount++; + + // Check to make sure it backs off $15 to $1 if only three groups + blah = toSupplementaries("zzzabcdcdefzzz"); + p = Pattern.compile(toSupplementaries("(ab)(cd)*(ef)")); + m = p.matcher(blah); + result = new StringBuilder(); + m.find(); + m.appendReplacement(result, toSupplementaries("$1w$15w$3")); + if (!result.toString().equals(toSupplementaries("zzzabwab5wef"))) + failCount++; + // Check nothing has been appended into the output buffer if + // the replacement string triggers IllegalArgumentException. + p = Pattern.compile("(abc)"); + m = p.matcher("abcd"); + result = new StringBuilder(); + m.find(); + try { + m.appendReplacement(result, ("xyz$g")); + failCount++; + } catch (IllegalArgumentException iae) { + if (result.length() != 0) + failCount++; + } + report("SB Substitution 2"); + } + /* * 5 groups of characters are created to make a substitution string. * A base string will be created including random lead chars, the @@ -3059,6 +3344,93 @@ public class RegExTest { report("Substitution Basher"); } + /* + * 5 groups of characters are created to make a substitution string. + * A base string will be created including random lead chars, the + * substitution string, and random trailing chars. + * A pattern containing the 5 groups is searched for and replaced with: + * random group + random string + random group. + * The results are checked for correctness. + */ + private static void substitutionBasher2() { + for (int runs = 0; runs<1000; runs++) { + // Create a base string to work in + int leadingChars = generator.nextInt(10); + StringBuilder baseBuffer = new StringBuilder(100); + String leadingString = getRandomAlphaString(leadingChars); + baseBuffer.append(leadingString); + + // Create 5 groups of random number of random chars + // Create the string to substitute + // Create the pattern string to search for + StringBuilder bufferToSub = new StringBuilder(25); + StringBuilder bufferToPat = new StringBuilder(50); + String[] groups = new String[5]; + for(int i=0; i<5; i++) { + int aGroupSize = generator.nextInt(5)+1; + groups[i] = getRandomAlphaString(aGroupSize); + bufferToSub.append(groups[i]); + bufferToPat.append('('); + bufferToPat.append(groups[i]); + bufferToPat.append(')'); + } + String stringToSub = bufferToSub.toString(); + String pattern = bufferToPat.toString(); + + // Place sub string into working string at random index + baseBuffer.append(stringToSub); + + // Append random chars to end + int trailingChars = generator.nextInt(10); + String trailingString = getRandomAlphaString(trailingChars); + baseBuffer.append(trailingString); + String baseString = baseBuffer.toString(); + + // Create test pattern and matcher + Pattern p = Pattern.compile(pattern); + Matcher m = p.matcher(baseString); + + // Reject candidate if pattern happens to start early + m.find(); + if (m.start() < leadingChars) + continue; + + // Reject candidate if more than one match + if (m.find()) + continue; + + // Construct a replacement string with : + // random group + random string + random group + StringBuilder bufferToRep = new StringBuilder(); + int groupIndex1 = generator.nextInt(5); + bufferToRep.append("$" + (groupIndex1 + 1)); + String randomMidString = getRandomAlphaString(5); + bufferToRep.append(randomMidString); + int groupIndex2 = generator.nextInt(5); + bufferToRep.append("$" + (groupIndex2 + 1)); + String replacement = bufferToRep.toString(); + + // Do the replacement + String result = m.replaceAll(replacement); + + // Construct expected result + StringBuilder bufferToRes = new StringBuilder(); + bufferToRes.append(leadingString); + bufferToRes.append(groups[groupIndex1]); + bufferToRes.append(randomMidString); + bufferToRes.append(groups[groupIndex2]); + bufferToRes.append(trailingString); + String expectedResult = bufferToRes.toString(); + + // Check results + if (!result.equals(expectedResult)) { + failCount++; + } + } + + report("Substitution Basher 2"); + } + /** * Checks the handling of some escape sequences that the Pattern * class should process instead of the java compiler. These are