8247546: Pattern matching does not skip correctly over supplementary characters

Reviewed-by: joehw
This commit is contained in:
Naoto Sato 2020-07-29 09:49:43 -07:00
parent 6e32338330
commit 4946a162aa
3 changed files with 36 additions and 8 deletions

View File

@ -1049,9 +1049,10 @@ public final class Pattern
private transient int patternLength;
/**
* If the Start node might possibly match supplementary characters.
* If the Start node might possibly match supplementary or surrogate
* code points.
* It is set to true during compiling if
* (1) There is supplementary char in pattern, or
* (1) There is supplementary or surrogate code point in pattern, or
* (2) There is complement node of a "family" CharProperty
*/
private transient boolean hasSupplementary;
@ -2948,8 +2949,10 @@ loop: for(int x=0, offset=0; x<nCodePoints; x++, offset+=len) {
return null;
if (p instanceof BmpCharPredicate)
return new BmpCharProperty((BmpCharPredicate)p);
else
else {
hasSupplementary = true;
return new CharProperty(p);
}
}
/**
@ -5785,18 +5788,18 @@ NEXT: while (i <= last) {
}
/**
* Charactrs within a explicit value range
* Characters within a explicit value range
*/
static CharPredicate Range(int lower, int upper) {
if (upper < Character.MIN_HIGH_SURROGATE ||
lower > Character.MAX_HIGH_SURROGATE &&
lower > Character.MAX_LOW_SURROGATE &&
upper < Character.MIN_SUPPLEMENTARY_CODE_POINT)
return (BmpCharPredicate)(ch -> inRange(lower, ch, upper));
return ch -> inRange(lower, ch, upper);
}
/**
* Charactrs within a explicit value range in a case insensitive manner.
* Characters within a explicit value range in a case insensitive manner.
*/
static CharPredicate CIRange(int lower, int upper) {
return ch -> inRange(lower, ch, upper) ||

View File

@ -36,7 +36,7 @@
* 8151481 4867170 7080302 6728861 6995635 6736245 4916384 6328855 6192895
* 6345469 6988218 6693451 7006761 8140212 8143282 8158482 8176029 8184706
* 8194667 8197462 8184692 8221431 8224789 8228352 8230829 8236034 8235812
* 8216332 8214245 8237599 8241055
* 8216332 8214245 8237599 8241055 8247546
*
* @library /test/lib
* @library /lib/testlibrary/java/lang

View File

@ -1,5 +1,5 @@
//
// Copyright (c) 1999, 2009, Oracle and/or its affiliates. All rights reserved.
// Copyright (c) 1999, 2020, Oracle and/or its affiliates. All rights reserved.
// DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
//
// This code is free software; you can redistribute it and/or modify it
@ -129,6 +129,31 @@ true \ud800\udc00pqr 0
///\ud800\udc00
///false 0
// unpaired surrogate should match
[\x{d800}-\x{dbff}\x{dc00}-\x{dfff}]
xxx\udca9\ud83dyyy
true \udca9 0
// surrogates in a supplementary character should not match
[\x{d800}-\x{dbff}\x{dc00}-\x{dfff}]
\ud83d\udca9
false 0
// unpaired surrogate should match
[\p{InHIGH_SURROGATES}\p{InLOW_SURROGATES}]
xxx\udca9\ud83dyyy
true \udca9 0
// surrogates part of a supplementary character should not match
[\p{InHIGH_SURROGATES}\p{InLOW_SURROGATES}]
\ud83d\udca9
false 0
// low surrogate part of a supplementary character should not match
[\x{dc00}-\x{dfff}]
\ud83d\udca9
false 0
// use of x modifier
\ud800\udc61bc(?x)bl\ud800\udc61h
\ud800\udc61bcbl\ud800\udc61h