8039124: j.u.regex.Matcher.appendReplace/Tail() should support StringBuilder variant

To add the StringBuilder variant

Co-authored-by: Peter Levart <peter.levart@gmail.com>
Reviewed-by: alanb, sherman
This commit is contained in:
Jeremy Manson 2014-04-09 09:36:19 -07:00 committed by Xueming Shen
parent 2f501cd8a5
commit 55d8dc5d85
2 changed files with 510 additions and 22 deletions

View File

@ -65,9 +65,10 @@ import java.util.Objects;
* new strings whose contents can, if desired, be computed from the match
* result. The {@link #appendReplacement appendReplacement} and {@link
* #appendTail appendTail} methods can be used in tandem in order to collect
* the result into an existing string buffer, or the more convenient {@link
* #replaceAll replaceAll} method can be used to create a string in which every
* matching subsequence in the input sequence is replaced.
* the result into an existing string buffer or string builder. Alternatively,
* the more convenient {@link #replaceAll replaceAll} method can be used to
* create a string in which every matching subsequence in the input sequence
* is replaced.
*
* <p> The explicit state of a matcher includes the start and end indices of
* the most recent successful match. It also includes the start and end
@ -792,15 +793,115 @@ public final class Matcher implements MatchResult {
* that does not exist in the pattern
*/
public Matcher appendReplacement(StringBuffer sb, String replacement) {
// If no match, return error
if (first < 0)
throw new IllegalStateException("No match available");
// Process substitution string to replace group references with groups
int cursor = 0;
StringBuilder result = new StringBuilder();
appendExpandedReplacement(replacement, result);
// Append the intervening text
sb.append(text, lastAppendPosition, first);
// Append the match substitution
sb.append(result);
lastAppendPosition = last;
return this;
}
/**
* Implements a non-terminal append-and-replace step.
*
* <p> This method performs the following actions: </p>
*
* <ol>
*
* <li><p> It reads characters from the input sequence, starting at the
* append position, and appends them to the given string builder. It
* stops after reading the last character preceding the previous match,
* that is, the character at index {@link
* #start()}&nbsp;<tt>-</tt>&nbsp;<tt>1</tt>. </p></li>
*
* <li><p> It appends the given replacement string to the string builder.
* </p></li>
*
* <li><p> It sets the append position of this matcher to the index of
* the last character matched, plus one, that is, to {@link #end()}.
* </p></li>
*
* </ol>
*
* <p> The replacement string may contain references to subsequences
* captured during the previous match: Each occurrence of
* <tt>$</tt><i>g</i><tt></tt> will be replaced by the result of
* evaluating {@link #group(int) group}<tt>(</tt><i>g</i><tt>)</tt>.
* The first number after the <tt>$</tt> is always treated as part of
* the group reference. Subsequent numbers are incorporated into g if
* they would form a legal group reference. Only the numerals '0'
* through '9' are considered as potential components of the group
* reference. If the second group matched the string <tt>"foo"</tt>, for
* example, then passing the replacement string <tt>"$2bar"</tt> would
* cause <tt>"foobar"</tt> to be appended to the string builder. A dollar
* sign (<tt>$</tt>) may be included as a literal in the replacement
* string by preceding it with a backslash (<tt>\$</tt>).
*
* <p> Note that backslashes (<tt>\</tt>) and dollar signs (<tt>$</tt>) in
* the replacement string may cause the results to be different than if it
* were being treated as a literal replacement string. Dollar signs may be
* treated as references to captured subsequences as described above, and
* backslashes are used to escape literal characters in the replacement
* string.
*
* <p> This method is intended to be used in a loop together with the
* {@link #appendTail appendTail} and {@link #find find} methods. The
* following code, for example, writes <tt>one dog two dogs in the
* yard</tt> to the standard-output stream: </p>
*
* <blockquote><pre>
* Pattern p = Pattern.compile("cat");
* Matcher m = p.matcher("one cat two cats in the yard");
* StringBuilder sb = new StringBuilder();
* while (m.find()) {
* m.appendReplacement(sb, "dog");
* }
* m.appendTail(sb);
* System.out.println(sb.toString());</pre></blockquote>
*
* @param sb
* The target string builder
* @param replacement
* The replacement string
* @return This matcher
*
* @throws IllegalStateException
* If no match has yet been attempted,
* or if the previous match operation failed
* @throws IllegalArgumentException
* If the replacement string refers to a named-capturing
* group that does not exist in the pattern
* @throws IndexOutOfBoundsException
* If the replacement string refers to a capturing group
* that does not exist in the pattern
* @since 1.9
*/
public Matcher appendReplacement(StringBuilder sb, String replacement) {
// If no match, return error
if (first < 0)
throw new IllegalStateException("No match available");
StringBuilder result = new StringBuilder();
appendExpandedReplacement(replacement, result);
// Append the intervening text
sb.append(text, lastAppendPosition, first);
// Append the match substitution
sb.append(result);
lastAppendPosition = last;
return this;
}
/**
* Processes replacement string to replace group references with
* groups.
*/
private StringBuilder appendExpandedReplacement(
String replacement, StringBuilder result) {
int cursor = 0;
while (cursor < replacement.length()) {
char nextChar = replacement.charAt(cursor);
if (nextChar == '\\') {
@ -852,8 +953,8 @@ public final class Matcher implements MatchResult {
cursor++;
} else {
// The first number is always a group
refNum = (int)nextChar - '0';
if ((refNum < 0)||(refNum > 9))
refNum = nextChar - '0';
if ((refNum < 0) || (refNum > 9))
throw new IllegalArgumentException(
"Illegal group reference");
cursor++;
@ -864,7 +965,7 @@ public final class Matcher implements MatchResult {
break;
}
int nextDigit = replacement.charAt(cursor) - '0';
if ((nextDigit < 0)||(nextDigit > 9)) { // not a number
if ((nextDigit < 0) || (nextDigit > 9)) { // not a number
break;
}
int newRefNum = (refNum * 10) + nextDigit;
@ -884,13 +985,7 @@ public final class Matcher implements MatchResult {
cursor++;
}
}
// Append the intervening text
sb.append(text, lastAppendPosition, first);
// Append the match substitution
sb.append(result);
lastAppendPosition = last;
return this;
return result;
}
/**
@ -912,6 +1007,27 @@ public final class Matcher implements MatchResult {
return sb;
}
/**
* Implements a terminal append-and-replace step.
*
* <p> This method reads characters from the input sequence, starting at
* the append position, and appends them to the given string builder. It is
* intended to be invoked after one or more invocations of the {@link
* #appendReplacement appendReplacement} method in order to copy the
* remainder of the input sequence. </p>
*
* @param sb
* The target string builder
*
* @return The target string builder
*
* @since 1.9
*/
public StringBuilder appendTail(StringBuilder sb) {
sb.append(text, lastAppendPosition, getTextLength());
return sb;
}
/**
* Replaces every subsequence of the input sequence that matches the
* pattern with the given replacement string.
@ -950,7 +1066,7 @@ public final class Matcher implements MatchResult {
reset();
boolean result = find();
if (result) {
StringBuffer sb = new StringBuffer();
StringBuilder sb = new StringBuilder();
do {
appendReplacement(sb, replacement);
result = find();
@ -1000,7 +1116,7 @@ public final class Matcher implements MatchResult {
reset();
if (!find())
return text.toString();
StringBuffer sb = new StringBuffer();
StringBuilder sb = new StringBuilder();
appendReplacement(sb, replacement);
appendTail(sb);
return sb.toString();

View File

@ -32,7 +32,7 @@
* 6358731 6178785 6284152 6231989 6497148 6486934 6233084 6504326 6635133
* 6350801 6676425 6878475 6919132 6931676 6948903 6990617 7014645 7039066
* 7067045 7014640 7189363 8007395 8013252 8013254 8012646 8023647 6559590
* 8027645 8035076
* 8027645 8035076 8039124
*/
import java.util.regex.*;
@ -75,7 +75,10 @@ public class RegExTest {
// Substitition tests on randomly generated sequences
globalSubstitute();
stringbufferSubstitute();
stringbuilderSubstitute();
substitutionBasher();
substitutionBasher2();
// Canonical Equivalence
ceTest();
@ -296,10 +299,12 @@ public class RegExTest {
final Matcher m = Pattern.compile("xyz").matcher("xyz");
m.matches();
check(new Runnable() { public void run() { m.appendTail(null);}});
check(new Runnable() { public void run() { m.appendTail((StringBuffer)null);}});
check(new Runnable() { public void run() { m.appendTail((StringBuilder)null);}});
check(new Runnable() { public void run() { m.replaceAll(null);}});
check(new Runnable() { public void run() { m.replaceFirst(null);}});
check(new Runnable() { public void run() { m.appendReplacement(null, null);}});
check(new Runnable() { public void run() { m.appendReplacement((StringBuffer)null, null);}});
check(new Runnable() { public void run() { m.appendReplacement((StringBuilder)null, null);}});
check(new Runnable() { public void run() { m.reset(null);}});
check(new Runnable() { public void run() { Matcher.quoteReplacement(null);}});
//check(new Runnable() { public void run() { m.usePattern(null);}});
@ -2973,6 +2978,286 @@ public class RegExTest {
report("SB Substitution");
}
/**
* Tests the usage of Matcher.appendReplacement() with literal
* and group substitutions.
*/
private static void stringbuilderSubstitute() throws Exception {
// SB substitution with literal
String blah = "zzzblahzzz";
Pattern p = Pattern.compile("blah");
Matcher m = p.matcher(blah);
StringBuilder result = new StringBuilder();
try {
m.appendReplacement(result, "blech");
failCount++;
} catch (IllegalStateException e) {
}
m.find();
m.appendReplacement(result, "blech");
if (!result.toString().equals("zzzblech"))
failCount++;
m.appendTail(result);
if (!result.toString().equals("zzzblechzzz"))
failCount++;
// SB substitution with groups
blah = "zzzabcdzzz";
p = Pattern.compile("(ab)(cd)*");
m = p.matcher(blah);
result = new StringBuilder();
try {
m.appendReplacement(result, "$1");
failCount++;
} catch (IllegalStateException e) {
}
m.find();
m.appendReplacement(result, "$1");
if (!result.toString().equals("zzzab"))
failCount++;
m.appendTail(result);
if (!result.toString().equals("zzzabzzz"))
failCount++;
// SB substitution with 3 groups
blah = "zzzabcdcdefzzz";
p = Pattern.compile("(ab)(cd)*(ef)");
m = p.matcher(blah);
result = new StringBuilder();
try {
m.appendReplacement(result, "$1w$2w$3");
failCount++;
} catch (IllegalStateException e) {
}
m.find();
m.appendReplacement(result, "$1w$2w$3");
if (!result.toString().equals("zzzabwcdwef"))
failCount++;
m.appendTail(result);
if (!result.toString().equals("zzzabwcdwefzzz"))
failCount++;
// SB substitution with groups and three matches
// skipping middle match
blah = "zzzabcdzzzabcddzzzabcdzzz";
p = Pattern.compile("(ab)(cd*)");
m = p.matcher(blah);
result = new StringBuilder();
try {
m.appendReplacement(result, "$1");
failCount++;
} catch (IllegalStateException e) {
}
m.find();
m.appendReplacement(result, "$1");
if (!result.toString().equals("zzzab"))
failCount++;
m.find();
m.find();
m.appendReplacement(result, "$2");
if (!result.toString().equals("zzzabzzzabcddzzzcd"))
failCount++;
m.appendTail(result);
if (!result.toString().equals("zzzabzzzabcddzzzcdzzz"))
failCount++;
// Check to make sure escaped $ is ignored
blah = "zzzabcdcdefzzz";
p = Pattern.compile("(ab)(cd)*(ef)");
m = p.matcher(blah);
result = new StringBuilder();
m.find();
m.appendReplacement(result, "$1w\\$2w$3");
if (!result.toString().equals("zzzabw$2wef"))
failCount++;
m.appendTail(result);
if (!result.toString().equals("zzzabw$2wefzzz"))
failCount++;
// Check to make sure a reference to nonexistent group causes error
blah = "zzzabcdcdefzzz";
p = Pattern.compile("(ab)(cd)*(ef)");
m = p.matcher(blah);
result = new StringBuilder();
m.find();
try {
m.appendReplacement(result, "$1w$5w$3");
failCount++;
} catch (IndexOutOfBoundsException ioobe) {
// Correct result
}
// Check double digit group references
blah = "zzz123456789101112zzz";
p = Pattern.compile("(1)(2)(3)(4)(5)(6)(7)(8)(9)(10)(11)");
m = p.matcher(blah);
result = new StringBuilder();
m.find();
m.appendReplacement(result, "$1w$11w$3");
if (!result.toString().equals("zzz1w11w3"))
failCount++;
// Check to make sure it backs off $15 to $1 if only three groups
blah = "zzzabcdcdefzzz";
p = Pattern.compile("(ab)(cd)*(ef)");
m = p.matcher(blah);
result = new StringBuilder();
m.find();
m.appendReplacement(result, "$1w$15w$3");
if (!result.toString().equals("zzzabwab5wef"))
failCount++;
// Supplementary character test
// SB substitution with literal
blah = toSupplementaries("zzzblahzzz");
p = Pattern.compile(toSupplementaries("blah"));
m = p.matcher(blah);
result = new StringBuilder();
try {
m.appendReplacement(result, toSupplementaries("blech"));
failCount++;
} catch (IllegalStateException e) {
}
m.find();
m.appendReplacement(result, toSupplementaries("blech"));
if (!result.toString().equals(toSupplementaries("zzzblech")))
failCount++;
m.appendTail(result);
if (!result.toString().equals(toSupplementaries("zzzblechzzz")))
failCount++;
// SB substitution with groups
blah = toSupplementaries("zzzabcdzzz");
p = Pattern.compile(toSupplementaries("(ab)(cd)*"));
m = p.matcher(blah);
result = new StringBuilder();
try {
m.appendReplacement(result, "$1");
failCount++;
} catch (IllegalStateException e) {
}
m.find();
m.appendReplacement(result, "$1");
if (!result.toString().equals(toSupplementaries("zzzab")))
failCount++;
m.appendTail(result);
if (!result.toString().equals(toSupplementaries("zzzabzzz")))
failCount++;
// SB substitution with 3 groups
blah = toSupplementaries("zzzabcdcdefzzz");
p = Pattern.compile(toSupplementaries("(ab)(cd)*(ef)"));
m = p.matcher(blah);
result = new StringBuilder();
try {
m.appendReplacement(result, toSupplementaries("$1w$2w$3"));
failCount++;
} catch (IllegalStateException e) {
}
m.find();
m.appendReplacement(result, toSupplementaries("$1w$2w$3"));
if (!result.toString().equals(toSupplementaries("zzzabwcdwef")))
failCount++;
m.appendTail(result);
if (!result.toString().equals(toSupplementaries("zzzabwcdwefzzz")))
failCount++;
// SB substitution with groups and three matches
// skipping middle match
blah = toSupplementaries("zzzabcdzzzabcddzzzabcdzzz");
p = Pattern.compile(toSupplementaries("(ab)(cd*)"));
m = p.matcher(blah);
result = new StringBuilder();
try {
m.appendReplacement(result, "$1");
failCount++;
} catch (IllegalStateException e) {
}
m.find();
m.appendReplacement(result, "$1");
if (!result.toString().equals(toSupplementaries("zzzab")))
failCount++;
m.find();
m.find();
m.appendReplacement(result, "$2");
if (!result.toString().equals(toSupplementaries("zzzabzzzabcddzzzcd")))
failCount++;
m.appendTail(result);
if (!result.toString().equals(toSupplementaries("zzzabzzzabcddzzzcdzzz")))
failCount++;
// Check to make sure escaped $ is ignored
blah = toSupplementaries("zzzabcdcdefzzz");
p = Pattern.compile(toSupplementaries("(ab)(cd)*(ef)"));
m = p.matcher(blah);
result = new StringBuilder();
m.find();
m.appendReplacement(result, toSupplementaries("$1w\\$2w$3"));
if (!result.toString().equals(toSupplementaries("zzzabw$2wef")))
failCount++;
m.appendTail(result);
if (!result.toString().equals(toSupplementaries("zzzabw$2wefzzz")))
failCount++;
// Check to make sure a reference to nonexistent group causes error
blah = toSupplementaries("zzzabcdcdefzzz");
p = Pattern.compile(toSupplementaries("(ab)(cd)*(ef)"));
m = p.matcher(blah);
result = new StringBuilder();
m.find();
try {
m.appendReplacement(result, toSupplementaries("$1w$5w$3"));
failCount++;
} catch (IndexOutOfBoundsException ioobe) {
// Correct result
}
// Check double digit group references
blah = toSupplementaries("zzz123456789101112zzz");
p = Pattern.compile("(1)(2)(3)(4)(5)(6)(7)(8)(9)(10)(11)");
m = p.matcher(blah);
result = new StringBuilder();
m.find();
m.appendReplacement(result, toSupplementaries("$1w$11w$3"));
if (!result.toString().equals(toSupplementaries("zzz1w11w3")))
failCount++;
// Check to make sure it backs off $15 to $1 if only three groups
blah = toSupplementaries("zzzabcdcdefzzz");
p = Pattern.compile(toSupplementaries("(ab)(cd)*(ef)"));
m = p.matcher(blah);
result = new StringBuilder();
m.find();
m.appendReplacement(result, toSupplementaries("$1w$15w$3"));
if (!result.toString().equals(toSupplementaries("zzzabwab5wef")))
failCount++;
// Check nothing has been appended into the output buffer if
// the replacement string triggers IllegalArgumentException.
p = Pattern.compile("(abc)");
m = p.matcher("abcd");
result = new StringBuilder();
m.find();
try {
m.appendReplacement(result, ("xyz$g"));
failCount++;
} catch (IllegalArgumentException iae) {
if (result.length() != 0)
failCount++;
}
report("SB Substitution 2");
}
/*
* 5 groups of characters are created to make a substitution string.
* A base string will be created including random lead chars, the
@ -3059,6 +3344,93 @@ public class RegExTest {
report("Substitution Basher");
}
/*
* 5 groups of characters are created to make a substitution string.
* A base string will be created including random lead chars, the
* substitution string, and random trailing chars.
* A pattern containing the 5 groups is searched for and replaced with:
* random group + random string + random group.
* The results are checked for correctness.
*/
private static void substitutionBasher2() {
for (int runs = 0; runs<1000; runs++) {
// Create a base string to work in
int leadingChars = generator.nextInt(10);
StringBuilder baseBuffer = new StringBuilder(100);
String leadingString = getRandomAlphaString(leadingChars);
baseBuffer.append(leadingString);
// Create 5 groups of random number of random chars
// Create the string to substitute
// Create the pattern string to search for
StringBuilder bufferToSub = new StringBuilder(25);
StringBuilder bufferToPat = new StringBuilder(50);
String[] groups = new String[5];
for(int i=0; i<5; i++) {
int aGroupSize = generator.nextInt(5)+1;
groups[i] = getRandomAlphaString(aGroupSize);
bufferToSub.append(groups[i]);
bufferToPat.append('(');
bufferToPat.append(groups[i]);
bufferToPat.append(')');
}
String stringToSub = bufferToSub.toString();
String pattern = bufferToPat.toString();
// Place sub string into working string at random index
baseBuffer.append(stringToSub);
// Append random chars to end
int trailingChars = generator.nextInt(10);
String trailingString = getRandomAlphaString(trailingChars);
baseBuffer.append(trailingString);
String baseString = baseBuffer.toString();
// Create test pattern and matcher
Pattern p = Pattern.compile(pattern);
Matcher m = p.matcher(baseString);
// Reject candidate if pattern happens to start early
m.find();
if (m.start() < leadingChars)
continue;
// Reject candidate if more than one match
if (m.find())
continue;
// Construct a replacement string with :
// random group + random string + random group
StringBuilder bufferToRep = new StringBuilder();
int groupIndex1 = generator.nextInt(5);
bufferToRep.append("$" + (groupIndex1 + 1));
String randomMidString = getRandomAlphaString(5);
bufferToRep.append(randomMidString);
int groupIndex2 = generator.nextInt(5);
bufferToRep.append("$" + (groupIndex2 + 1));
String replacement = bufferToRep.toString();
// Do the replacement
String result = m.replaceAll(replacement);
// Construct expected result
StringBuilder bufferToRes = new StringBuilder();
bufferToRes.append(leadingString);
bufferToRes.append(groups[groupIndex1]);
bufferToRes.append(randomMidString);
bufferToRes.append(groups[groupIndex2]);
bufferToRes.append(trailingString);
String expectedResult = bufferToRes.toString();
// Check results
if (!result.equals(expectedResult)) {
failCount++;
}
}
report("Substitution Basher 2");
}
/**
* Checks the handling of some escape sequences that the Pattern
* class should process instead of the java compiler. These are