8311939: Excessive allocation of Matcher.groups array
Reviewed-by: rriggs, igraves
This commit is contained in:
parent
ed585d16b9
commit
32efd23c5d
@ -247,8 +247,7 @@ public final class Matcher implements MatchResult {
|
|||||||
this.text = text;
|
this.text = text;
|
||||||
|
|
||||||
// Allocate state storage
|
// Allocate state storage
|
||||||
int parentGroupCount = Math.max(parent.capturingGroupCount, 10);
|
groups = new int[parent.capturingGroupCount * 2];
|
||||||
groups = new int[parentGroupCount * 2];
|
|
||||||
locals = new int[parent.localCount];
|
locals = new int[parent.localCount];
|
||||||
localsPos = new IntHashSet[parent.localTCNCount];
|
localsPos = new IntHashSet[parent.localTCNCount];
|
||||||
|
|
||||||
@ -422,8 +421,7 @@ public final class Matcher implements MatchResult {
|
|||||||
namedGroups = null;
|
namedGroups = null;
|
||||||
|
|
||||||
// Reallocate state storage
|
// Reallocate state storage
|
||||||
int parentGroupCount = Math.max(newPattern.capturingGroupCount, 10);
|
groups = new int[newPattern.capturingGroupCount * 2];
|
||||||
groups = new int[parentGroupCount * 2];
|
|
||||||
locals = new int[newPattern.localCount];
|
locals = new int[newPattern.localCount];
|
||||||
for (int i = 0; i < groups.length; i++)
|
for (int i = 0; i < groups.length; i++)
|
||||||
groups[i] = -1;
|
groups[i] = -1;
|
||||||
|
@ -5187,6 +5187,12 @@ loop: for(int x=0, offset=0; x<nCodePoints; x++, offset+=len) {
|
|||||||
groupIndex = groupCount + groupCount;
|
groupIndex = groupCount + groupCount;
|
||||||
}
|
}
|
||||||
boolean match(Matcher matcher, int i, CharSequence seq) {
|
boolean match(Matcher matcher, int i, CharSequence seq) {
|
||||||
|
// reference to not existing group must never match
|
||||||
|
// group does not exist if matcher didn't allocate space for it
|
||||||
|
if (groupIndex >= matcher.groups.length) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
int j = matcher.groups[groupIndex];
|
int j = matcher.groups[groupIndex];
|
||||||
int k = matcher.groups[groupIndex+1];
|
int k = matcher.groups[groupIndex+1];
|
||||||
|
|
||||||
@ -5223,6 +5229,12 @@ loop: for(int x=0, offset=0; x<nCodePoints; x++, offset+=len) {
|
|||||||
this.doUnicodeCase = doUnicodeCase;
|
this.doUnicodeCase = doUnicodeCase;
|
||||||
}
|
}
|
||||||
boolean match(Matcher matcher, int i, CharSequence seq) {
|
boolean match(Matcher matcher, int i, CharSequence seq) {
|
||||||
|
// reference to not existing group must never match
|
||||||
|
// group does not exist if matcher didn't allocate space for it
|
||||||
|
if (groupIndex >= matcher.groups.length) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
int j = matcher.groups[groupIndex];
|
int j = matcher.groups[groupIndex];
|
||||||
int k = matcher.groups[groupIndex+1];
|
int k = matcher.groups[groupIndex+1];
|
||||||
|
|
||||||
|
@ -2041,6 +2041,58 @@ public class RegExTest {
|
|||||||
check(pattern, toSupplementaries("abcdefghijkk"), true);
|
check(pattern, toSupplementaries("abcdefghijkk"), true);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public static void ciBackRefTest() {
|
||||||
|
Pattern pattern = Pattern.compile("(?i)(a*)bc\\1");
|
||||||
|
check(pattern, "zzzaabcazzz", true);
|
||||||
|
|
||||||
|
pattern = Pattern.compile("(?i)(a*)bc\\1");
|
||||||
|
check(pattern, "zzzaabcaazzz", true);
|
||||||
|
|
||||||
|
pattern = Pattern.compile("(?i)(abc)(def)\\1");
|
||||||
|
check(pattern, "abcdefabc", true);
|
||||||
|
|
||||||
|
pattern = Pattern.compile("(?i)(abc)(def)\\3");
|
||||||
|
check(pattern, "abcdefabc", false);
|
||||||
|
|
||||||
|
for (int i = 1; i < 10; i++) {
|
||||||
|
// Make sure backref 1-9 are always accepted
|
||||||
|
pattern = Pattern.compile("(?i)abcdef\\" + i);
|
||||||
|
// and fail to match if the target group does not exit
|
||||||
|
check(pattern, "abcdef", false);
|
||||||
|
}
|
||||||
|
|
||||||
|
pattern = Pattern.compile("(?i)(a)(b)(c)(d)(e)(f)(g)(h)(i)(j)\\11");
|
||||||
|
check(pattern, "abcdefghija", false);
|
||||||
|
check(pattern, "abcdefghija1", true);
|
||||||
|
|
||||||
|
pattern = Pattern.compile("(?i)(a)(b)(c)(d)(e)(f)(g)(h)(i)(j)(k)\\11");
|
||||||
|
check(pattern, "abcdefghijkk", true);
|
||||||
|
|
||||||
|
pattern = Pattern.compile("(?i)(a)bcdefghij\\11");
|
||||||
|
check(pattern, "abcdefghija1", true);
|
||||||
|
|
||||||
|
// Supplementary character tests
|
||||||
|
pattern = Pattern.compile("(?i)" + toSupplementaries("(a*)bc\\1"));
|
||||||
|
check(pattern, toSupplementaries("zzzaabcazzz"), true);
|
||||||
|
|
||||||
|
pattern = Pattern.compile("(?i)" + toSupplementaries("(a*)bc\\1"));
|
||||||
|
check(pattern, toSupplementaries("zzzaabcaazzz"), true);
|
||||||
|
|
||||||
|
pattern = Pattern.compile("(?i)" + toSupplementaries("(abc)(def)\\1"));
|
||||||
|
check(pattern, toSupplementaries("abcdefabc"), true);
|
||||||
|
|
||||||
|
pattern = Pattern.compile("(?i)" + toSupplementaries("(abc)(def)\\3"));
|
||||||
|
check(pattern, toSupplementaries("abcdefabc"), false);
|
||||||
|
|
||||||
|
pattern = Pattern.compile("(?i)" + toSupplementaries("(a)(b)(c)(d)(e)(f)(g)(h)(i)(j)\\11"));
|
||||||
|
check(pattern, toSupplementaries("abcdefghija"), false);
|
||||||
|
check(pattern, toSupplementaries("abcdefghija1"), true);
|
||||||
|
|
||||||
|
pattern = Pattern.compile("(?i)" + toSupplementaries("(a)(b)(c)(d)(e)(f)(g)(h)(i)(j)(k)\\11"));
|
||||||
|
check(pattern, toSupplementaries("abcdefghijkk"), true);
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Unicode Technical Report #18, section 2.6 End of Line
|
* Unicode Technical Report #18, section 2.6 End of Line
|
||||||
* There is no empty line to be matched in the sequence \u000D\u000A
|
* There is no empty line to be matched in the sequence \u000D\u000A
|
||||||
|
Loading…
x
Reference in New Issue
Block a user