8311939: Excessive allocation of Matcher.groups array

Reviewed-by: rriggs, igraves
This commit is contained in:
Cristian Vat 2023-08-17 11:27:39 +00:00 committed by Raffaello Giulietti
parent ed585d16b9
commit 32efd23c5d
3 changed files with 66 additions and 4 deletions

View File

@ -247,8 +247,7 @@ public final class Matcher implements MatchResult {
this.text = text; this.text = text;
// Allocate state storage // Allocate state storage
int parentGroupCount = Math.max(parent.capturingGroupCount, 10); groups = new int[parent.capturingGroupCount * 2];
groups = new int[parentGroupCount * 2];
locals = new int[parent.localCount]; locals = new int[parent.localCount];
localsPos = new IntHashSet[parent.localTCNCount]; localsPos = new IntHashSet[parent.localTCNCount];
@ -422,8 +421,7 @@ public final class Matcher implements MatchResult {
namedGroups = null; namedGroups = null;
// Reallocate state storage // Reallocate state storage
int parentGroupCount = Math.max(newPattern.capturingGroupCount, 10); groups = new int[newPattern.capturingGroupCount * 2];
groups = new int[parentGroupCount * 2];
locals = new int[newPattern.localCount]; locals = new int[newPattern.localCount];
for (int i = 0; i < groups.length; i++) for (int i = 0; i < groups.length; i++)
groups[i] = -1; groups[i] = -1;

View File

@ -5187,6 +5187,12 @@ loop: for(int x=0, offset=0; x<nCodePoints; x++, offset+=len) {
groupIndex = groupCount + groupCount; groupIndex = groupCount + groupCount;
} }
boolean match(Matcher matcher, int i, CharSequence seq) { boolean match(Matcher matcher, int i, CharSequence seq) {
// reference to not existing group must never match
// group does not exist if matcher didn't allocate space for it
if (groupIndex >= matcher.groups.length) {
return false;
}
int j = matcher.groups[groupIndex]; int j = matcher.groups[groupIndex];
int k = matcher.groups[groupIndex+1]; int k = matcher.groups[groupIndex+1];
@ -5223,6 +5229,12 @@ loop: for(int x=0, offset=0; x<nCodePoints; x++, offset+=len) {
this.doUnicodeCase = doUnicodeCase; this.doUnicodeCase = doUnicodeCase;
} }
boolean match(Matcher matcher, int i, CharSequence seq) { boolean match(Matcher matcher, int i, CharSequence seq) {
// reference to not existing group must never match
// group does not exist if matcher didn't allocate space for it
if (groupIndex >= matcher.groups.length) {
return false;
}
int j = matcher.groups[groupIndex]; int j = matcher.groups[groupIndex];
int k = matcher.groups[groupIndex+1]; int k = matcher.groups[groupIndex+1];

View File

@ -2041,6 +2041,58 @@ public class RegExTest {
check(pattern, toSupplementaries("abcdefghijkk"), true); check(pattern, toSupplementaries("abcdefghijkk"), true);
} }
@Test
public static void ciBackRefTest() {
Pattern pattern = Pattern.compile("(?i)(a*)bc\\1");
check(pattern, "zzzaabcazzz", true);
pattern = Pattern.compile("(?i)(a*)bc\\1");
check(pattern, "zzzaabcaazzz", true);
pattern = Pattern.compile("(?i)(abc)(def)\\1");
check(pattern, "abcdefabc", true);
pattern = Pattern.compile("(?i)(abc)(def)\\3");
check(pattern, "abcdefabc", false);
for (int i = 1; i < 10; i++) {
// Make sure backref 1-9 are always accepted
pattern = Pattern.compile("(?i)abcdef\\" + i);
// and fail to match if the target group does not exit
check(pattern, "abcdef", false);
}
pattern = Pattern.compile("(?i)(a)(b)(c)(d)(e)(f)(g)(h)(i)(j)\\11");
check(pattern, "abcdefghija", false);
check(pattern, "abcdefghija1", true);
pattern = Pattern.compile("(?i)(a)(b)(c)(d)(e)(f)(g)(h)(i)(j)(k)\\11");
check(pattern, "abcdefghijkk", true);
pattern = Pattern.compile("(?i)(a)bcdefghij\\11");
check(pattern, "abcdefghija1", true);
// Supplementary character tests
pattern = Pattern.compile("(?i)" + toSupplementaries("(a*)bc\\1"));
check(pattern, toSupplementaries("zzzaabcazzz"), true);
pattern = Pattern.compile("(?i)" + toSupplementaries("(a*)bc\\1"));
check(pattern, toSupplementaries("zzzaabcaazzz"), true);
pattern = Pattern.compile("(?i)" + toSupplementaries("(abc)(def)\\1"));
check(pattern, toSupplementaries("abcdefabc"), true);
pattern = Pattern.compile("(?i)" + toSupplementaries("(abc)(def)\\3"));
check(pattern, toSupplementaries("abcdefabc"), false);
pattern = Pattern.compile("(?i)" + toSupplementaries("(a)(b)(c)(d)(e)(f)(g)(h)(i)(j)\\11"));
check(pattern, toSupplementaries("abcdefghija"), false);
check(pattern, toSupplementaries("abcdefghija1"), true);
pattern = Pattern.compile("(?i)" + toSupplementaries("(a)(b)(c)(d)(e)(f)(g)(h)(i)(j)(k)\\11"));
check(pattern, toSupplementaries("abcdefghijkk"), true);
}
/** /**
* Unicode Technical Report #18, section 2.6 End of Line * Unicode Technical Report #18, section 2.6 End of Line
* There is no empty line to be matched in the sequence \u000D\u000A * There is no empty line to be matched in the sequence \u000D\u000A