8281315: Unicode, (?i) flag and backreference throwing IndexOutOfBounds Exception
Reviewed-by: naoto
This commit is contained in:
parent
957dae02b1
commit
3cb38678aa
@ -5060,14 +5060,14 @@ loop: for(int x=0, offset=0; x<nCodePoints; x++, offset+=len) {
|
|||||||
int j = matcher.groups[groupIndex];
|
int j = matcher.groups[groupIndex];
|
||||||
int k = matcher.groups[groupIndex+1];
|
int k = matcher.groups[groupIndex+1];
|
||||||
|
|
||||||
int groupSize = k - j;
|
int groupSizeChars = k - j; //Group size in chars
|
||||||
|
|
||||||
// If the referenced group didn't match, neither can this
|
// If the referenced group didn't match, neither can this
|
||||||
if (j < 0)
|
if (j < 0)
|
||||||
return false;
|
return false;
|
||||||
|
|
||||||
// If there isn't enough input left no match
|
// If there isn't enough input left no match
|
||||||
if (i + groupSize > matcher.to) {
|
if (i + groupSizeChars > matcher.to) {
|
||||||
matcher.hitEnd = true;
|
matcher.hitEnd = true;
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
@ -5075,7 +5075,13 @@ loop: for(int x=0, offset=0; x<nCodePoints; x++, offset+=len) {
|
|||||||
// Check each new char to make sure it matches what the group
|
// Check each new char to make sure it matches what the group
|
||||||
// referenced matched last time around
|
// referenced matched last time around
|
||||||
int x = i;
|
int x = i;
|
||||||
for (int index=0; index<groupSize; index++) {
|
|
||||||
|
// We set groupCodepoints to the number of chars
|
||||||
|
// in the given subsequence but this is an upper bound estimate
|
||||||
|
// we reduce by one if we spot 2-char codepoints.
|
||||||
|
int groupCodepoints = groupSizeChars;
|
||||||
|
|
||||||
|
for (int index=0; index<groupCodepoints; index++) {
|
||||||
int c1 = Character.codePointAt(seq, x);
|
int c1 = Character.codePointAt(seq, x);
|
||||||
int c2 = Character.codePointAt(seq, j);
|
int c2 = Character.codePointAt(seq, j);
|
||||||
if (c1 != c2) {
|
if (c1 != c2) {
|
||||||
@ -5093,9 +5099,15 @@ loop: for(int x=0, offset=0; x<nCodePoints; x++, offset+=len) {
|
|||||||
}
|
}
|
||||||
x += Character.charCount(c1);
|
x += Character.charCount(c1);
|
||||||
j += Character.charCount(c2);
|
j += Character.charCount(c2);
|
||||||
|
|
||||||
|
if(c1 >= Character.MIN_SUPPLEMENTARY_CODE_POINT) {
|
||||||
|
//Group size is guessed in terms of chars, but we need to
|
||||||
|
//adjust if we spot a 2-char codePoint.
|
||||||
|
groupCodepoints--;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
return next.match(matcher, i+groupSize, seq);
|
return next.match(matcher, i+groupSizeChars, seq);
|
||||||
}
|
}
|
||||||
boolean study(TreeInfo info) {
|
boolean study(TreeInfo info) {
|
||||||
info.maxValid = false;
|
info.maxValid = false;
|
||||||
|
@ -4556,4 +4556,13 @@ public class RegExTest {
|
|||||||
Pattern.compile(pattern));
|
Pattern.compile(pattern));
|
||||||
assertTrue(e.getMessage().contains("Bad intersection syntax"));
|
assertTrue(e.getMessage().contains("Bad intersection syntax"));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
//This test is for 8281315
|
||||||
|
@Test
|
||||||
|
public static void iOOBForCIBackrefs(){
|
||||||
|
String line = "\ud83d\udc95\ud83d\udc95\ud83d\udc95";
|
||||||
|
var pattern2 = Pattern.compile("(?i)(.)\\1{2,}");
|
||||||
|
assertTrue(pattern2.matcher(line).find());
|
||||||
|
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
Loading…
x
Reference in New Issue
Block a user