From 32efd23c5d59c03a6376c92c63f5947e961ee24e Mon Sep 17 00:00:00 2001 From: Cristian Vat Date: Thu, 17 Aug 2023 11:27:39 +0000 Subject: [PATCH] 8311939: Excessive allocation of Matcher.groups array Reviewed-by: rriggs, igraves --- .../classes/java/util/regex/Matcher.java | 6 +-- .../classes/java/util/regex/Pattern.java | 12 +++++ test/jdk/java/util/regex/RegExTest.java | 52 +++++++++++++++++++ 3 files changed, 66 insertions(+), 4 deletions(-) diff --git a/src/java.base/share/classes/java/util/regex/Matcher.java b/src/java.base/share/classes/java/util/regex/Matcher.java index 8bc07ba0347..4d48e702cdc 100644 --- a/src/java.base/share/classes/java/util/regex/Matcher.java +++ b/src/java.base/share/classes/java/util/regex/Matcher.java @@ -247,8 +247,7 @@ public final class Matcher implements MatchResult { this.text = text; // Allocate state storage - int parentGroupCount = Math.max(parent.capturingGroupCount, 10); - groups = new int[parentGroupCount * 2]; + groups = new int[parent.capturingGroupCount * 2]; locals = new int[parent.localCount]; localsPos = new IntHashSet[parent.localTCNCount]; @@ -422,8 +421,7 @@ public final class Matcher implements MatchResult { namedGroups = null; // Reallocate state storage - int parentGroupCount = Math.max(newPattern.capturingGroupCount, 10); - groups = new int[parentGroupCount * 2]; + groups = new int[newPattern.capturingGroupCount * 2]; locals = new int[newPattern.localCount]; for (int i = 0; i < groups.length; i++) groups[i] = -1; diff --git a/src/java.base/share/classes/java/util/regex/Pattern.java b/src/java.base/share/classes/java/util/regex/Pattern.java index 45c48ddab54..054ad4d9da0 100644 --- a/src/java.base/share/classes/java/util/regex/Pattern.java +++ b/src/java.base/share/classes/java/util/regex/Pattern.java @@ -5187,6 +5187,12 @@ loop: for(int x=0, offset=0; x= matcher.groups.length) { + return false; + } + int j = matcher.groups[groupIndex]; int k = matcher.groups[groupIndex+1]; @@ -5223,6 +5229,12 @@ loop: for(int x=0, offset=0; x= matcher.groups.length) { + return false; + } + int j = matcher.groups[groupIndex]; int k = matcher.groups[groupIndex+1]; diff --git a/test/jdk/java/util/regex/RegExTest.java b/test/jdk/java/util/regex/RegExTest.java index 805b8a78d4d..17514521c06 100644 --- a/test/jdk/java/util/regex/RegExTest.java +++ b/test/jdk/java/util/regex/RegExTest.java @@ -2041,6 +2041,58 @@ public class RegExTest { check(pattern, toSupplementaries("abcdefghijkk"), true); } + @Test + public static void ciBackRefTest() { + Pattern pattern = Pattern.compile("(?i)(a*)bc\\1"); + check(pattern, "zzzaabcazzz", true); + + pattern = Pattern.compile("(?i)(a*)bc\\1"); + check(pattern, "zzzaabcaazzz", true); + + pattern = Pattern.compile("(?i)(abc)(def)\\1"); + check(pattern, "abcdefabc", true); + + pattern = Pattern.compile("(?i)(abc)(def)\\3"); + check(pattern, "abcdefabc", false); + + for (int i = 1; i < 10; i++) { + // Make sure backref 1-9 are always accepted + pattern = Pattern.compile("(?i)abcdef\\" + i); + // and fail to match if the target group does not exit + check(pattern, "abcdef", false); + } + + pattern = Pattern.compile("(?i)(a)(b)(c)(d)(e)(f)(g)(h)(i)(j)\\11"); + check(pattern, "abcdefghija", false); + check(pattern, "abcdefghija1", true); + + pattern = Pattern.compile("(?i)(a)(b)(c)(d)(e)(f)(g)(h)(i)(j)(k)\\11"); + check(pattern, "abcdefghijkk", true); + + pattern = Pattern.compile("(?i)(a)bcdefghij\\11"); + check(pattern, "abcdefghija1", true); + + // Supplementary character tests + pattern = Pattern.compile("(?i)" + toSupplementaries("(a*)bc\\1")); + check(pattern, toSupplementaries("zzzaabcazzz"), true); + + pattern = Pattern.compile("(?i)" + toSupplementaries("(a*)bc\\1")); + check(pattern, toSupplementaries("zzzaabcaazzz"), true); + + pattern = Pattern.compile("(?i)" + toSupplementaries("(abc)(def)\\1")); + check(pattern, toSupplementaries("abcdefabc"), true); + + pattern = Pattern.compile("(?i)" + toSupplementaries("(abc)(def)\\3")); + check(pattern, toSupplementaries("abcdefabc"), false); + + pattern = Pattern.compile("(?i)" + toSupplementaries("(a)(b)(c)(d)(e)(f)(g)(h)(i)(j)\\11")); + check(pattern, toSupplementaries("abcdefghija"), false); + check(pattern, toSupplementaries("abcdefghija1"), true); + + pattern = Pattern.compile("(?i)" + toSupplementaries("(a)(b)(c)(d)(e)(f)(g)(h)(i)(j)(k)\\11")); + check(pattern, toSupplementaries("abcdefghijkk"), true); + } + /** * Unicode Technical Report #18, section 2.6 End of Line * There is no empty line to be matched in the sequence \u000D\u000A