8247546: Pattern matching does not skip correctly over supplementary characters
Reviewed-by: joehw
This commit is contained in:
parent
6e32338330
commit
4946a162aa
@ -1049,9 +1049,10 @@ public final class Pattern
|
||||
private transient int patternLength;
|
||||
|
||||
/**
|
||||
* If the Start node might possibly match supplementary characters.
|
||||
* If the Start node might possibly match supplementary or surrogate
|
||||
* code points.
|
||||
* It is set to true during compiling if
|
||||
* (1) There is supplementary char in pattern, or
|
||||
* (1) There is supplementary or surrogate code point in pattern, or
|
||||
* (2) There is complement node of a "family" CharProperty
|
||||
*/
|
||||
private transient boolean hasSupplementary;
|
||||
@ -2948,8 +2949,10 @@ loop: for(int x=0, offset=0; x<nCodePoints; x++, offset+=len) {
|
||||
return null;
|
||||
if (p instanceof BmpCharPredicate)
|
||||
return new BmpCharProperty((BmpCharPredicate)p);
|
||||
else
|
||||
else {
|
||||
hasSupplementary = true;
|
||||
return new CharProperty(p);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
@ -5785,18 +5788,18 @@ NEXT: while (i <= last) {
|
||||
}
|
||||
|
||||
/**
|
||||
* Charactrs within a explicit value range
|
||||
* Characters within a explicit value range
|
||||
*/
|
||||
static CharPredicate Range(int lower, int upper) {
|
||||
if (upper < Character.MIN_HIGH_SURROGATE ||
|
||||
lower > Character.MAX_HIGH_SURROGATE &&
|
||||
lower > Character.MAX_LOW_SURROGATE &&
|
||||
upper < Character.MIN_SUPPLEMENTARY_CODE_POINT)
|
||||
return (BmpCharPredicate)(ch -> inRange(lower, ch, upper));
|
||||
return ch -> inRange(lower, ch, upper);
|
||||
}
|
||||
|
||||
/**
|
||||
* Charactrs within a explicit value range in a case insensitive manner.
|
||||
* Characters within a explicit value range in a case insensitive manner.
|
||||
*/
|
||||
static CharPredicate CIRange(int lower, int upper) {
|
||||
return ch -> inRange(lower, ch, upper) ||
|
||||
|
@ -36,7 +36,7 @@
|
||||
* 8151481 4867170 7080302 6728861 6995635 6736245 4916384 6328855 6192895
|
||||
* 6345469 6988218 6693451 7006761 8140212 8143282 8158482 8176029 8184706
|
||||
* 8194667 8197462 8184692 8221431 8224789 8228352 8230829 8236034 8235812
|
||||
* 8216332 8214245 8237599 8241055
|
||||
* 8216332 8214245 8237599 8241055 8247546
|
||||
*
|
||||
* @library /test/lib
|
||||
* @library /lib/testlibrary/java/lang
|
||||
|
@ -1,5 +1,5 @@
|
||||
//
|
||||
// Copyright (c) 1999, 2009, Oracle and/or its affiliates. All rights reserved.
|
||||
// Copyright (c) 1999, 2020, Oracle and/or its affiliates. All rights reserved.
|
||||
// DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
||||
//
|
||||
// This code is free software; you can redistribute it and/or modify it
|
||||
@ -129,6 +129,31 @@ true \ud800\udc00pqr 0
|
||||
///\ud800\udc00
|
||||
///false 0
|
||||
|
||||
// unpaired surrogate should match
|
||||
[\x{d800}-\x{dbff}\x{dc00}-\x{dfff}]
|
||||
xxx\udca9\ud83dyyy
|
||||
true \udca9 0
|
||||
|
||||
// surrogates in a supplementary character should not match
|
||||
[\x{d800}-\x{dbff}\x{dc00}-\x{dfff}]
|
||||
\ud83d\udca9
|
||||
false 0
|
||||
|
||||
// unpaired surrogate should match
|
||||
[\p{InHIGH_SURROGATES}\p{InLOW_SURROGATES}]
|
||||
xxx\udca9\ud83dyyy
|
||||
true \udca9 0
|
||||
|
||||
// surrogates part of a supplementary character should not match
|
||||
[\p{InHIGH_SURROGATES}\p{InLOW_SURROGATES}]
|
||||
\ud83d\udca9
|
||||
false 0
|
||||
|
||||
// low surrogate part of a supplementary character should not match
|
||||
[\x{dc00}-\x{dfff}]
|
||||
\ud83d\udca9
|
||||
false 0
|
||||
|
||||
// use of x modifier
|
||||
\ud800\udc61bc(?x)bl\ud800\udc61h
|
||||
\ud800\udc61bcbl\ud800\udc61h
|
||||
|
Loading…
Reference in New Issue
Block a user