8281315: Unicode, (?i) flag and backreference throwing IndexOutOfBounds Exception

Reviewed-by: naoto
This commit is contained in:
Ian Graves 2022-02-22 16:31:57 +00:00
parent 957dae02b1
commit 3cb38678aa
2 changed files with 25 additions and 4 deletions

View File

@ -5060,14 +5060,14 @@ loop: for(int x=0, offset=0; x<nCodePoints; x++, offset+=len) {
int j = matcher.groups[groupIndex];
int k = matcher.groups[groupIndex+1];
int groupSize = k - j;
int groupSizeChars = k - j; //Group size in chars
// If the referenced group didn't match, neither can this
if (j < 0)
return false;
// If there isn't enough input left no match
if (i + groupSize > matcher.to) {
if (i + groupSizeChars > matcher.to) {
matcher.hitEnd = true;
return false;
}
@ -5075,7 +5075,13 @@ loop: for(int x=0, offset=0; x<nCodePoints; x++, offset+=len) {
// Check each new char to make sure it matches what the group
// referenced matched last time around
int x = i;
for (int index=0; index<groupSize; index++) {
// We set groupCodepoints to the number of chars
// in the given subsequence but this is an upper bound estimate
// we reduce by one if we spot 2-char codepoints.
int groupCodepoints = groupSizeChars;
for (int index=0; index<groupCodepoints; index++) {
int c1 = Character.codePointAt(seq, x);
int c2 = Character.codePointAt(seq, j);
if (c1 != c2) {
@ -5093,9 +5099,15 @@ loop: for(int x=0, offset=0; x<nCodePoints; x++, offset+=len) {
}
x += Character.charCount(c1);
j += Character.charCount(c2);
if(c1 >= Character.MIN_SUPPLEMENTARY_CODE_POINT) {
//Group size is guessed in terms of chars, but we need to
//adjust if we spot a 2-char codePoint.
groupCodepoints--;
}
}
return next.match(matcher, i+groupSize, seq);
return next.match(matcher, i+groupSizeChars, seq);
}
boolean study(TreeInfo info) {
info.maxValid = false;

View File

@ -4556,4 +4556,13 @@ public class RegExTest {
Pattern.compile(pattern));
assertTrue(e.getMessage().contains("Bad intersection syntax"));
}
//This test is for 8281315
@Test
public static void iOOBForCIBackrefs(){
String line = "\ud83d\udc95\ud83d\udc95\ud83d\udc95";
var pattern2 = Pattern.compile("(?i)(.)\\1{2,}");
assertTrue(pattern2.matcher(line).find());
}
}