8247546: Pattern matching does not skip correctly over supplementary characters
Reviewed-by: joehw
This commit is contained in:
parent
6e32338330
commit
4946a162aa
@ -1049,9 +1049,10 @@ public final class Pattern
|
|||||||
private transient int patternLength;
|
private transient int patternLength;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* If the Start node might possibly match supplementary characters.
|
* If the Start node might possibly match supplementary or surrogate
|
||||||
|
* code points.
|
||||||
* It is set to true during compiling if
|
* It is set to true during compiling if
|
||||||
* (1) There is supplementary char in pattern, or
|
* (1) There is supplementary or surrogate code point in pattern, or
|
||||||
* (2) There is complement node of a "family" CharProperty
|
* (2) There is complement node of a "family" CharProperty
|
||||||
*/
|
*/
|
||||||
private transient boolean hasSupplementary;
|
private transient boolean hasSupplementary;
|
||||||
@ -2948,8 +2949,10 @@ loop: for(int x=0, offset=0; x<nCodePoints; x++, offset+=len) {
|
|||||||
return null;
|
return null;
|
||||||
if (p instanceof BmpCharPredicate)
|
if (p instanceof BmpCharPredicate)
|
||||||
return new BmpCharProperty((BmpCharPredicate)p);
|
return new BmpCharProperty((BmpCharPredicate)p);
|
||||||
else
|
else {
|
||||||
|
hasSupplementary = true;
|
||||||
return new CharProperty(p);
|
return new CharProperty(p);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
@ -5785,18 +5788,18 @@ NEXT: while (i <= last) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Charactrs within a explicit value range
|
* Characters within a explicit value range
|
||||||
*/
|
*/
|
||||||
static CharPredicate Range(int lower, int upper) {
|
static CharPredicate Range(int lower, int upper) {
|
||||||
if (upper < Character.MIN_HIGH_SURROGATE ||
|
if (upper < Character.MIN_HIGH_SURROGATE ||
|
||||||
lower > Character.MAX_HIGH_SURROGATE &&
|
lower > Character.MAX_LOW_SURROGATE &&
|
||||||
upper < Character.MIN_SUPPLEMENTARY_CODE_POINT)
|
upper < Character.MIN_SUPPLEMENTARY_CODE_POINT)
|
||||||
return (BmpCharPredicate)(ch -> inRange(lower, ch, upper));
|
return (BmpCharPredicate)(ch -> inRange(lower, ch, upper));
|
||||||
return ch -> inRange(lower, ch, upper);
|
return ch -> inRange(lower, ch, upper);
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Charactrs within a explicit value range in a case insensitive manner.
|
* Characters within a explicit value range in a case insensitive manner.
|
||||||
*/
|
*/
|
||||||
static CharPredicate CIRange(int lower, int upper) {
|
static CharPredicate CIRange(int lower, int upper) {
|
||||||
return ch -> inRange(lower, ch, upper) ||
|
return ch -> inRange(lower, ch, upper) ||
|
||||||
|
@ -36,7 +36,7 @@
|
|||||||
* 8151481 4867170 7080302 6728861 6995635 6736245 4916384 6328855 6192895
|
* 8151481 4867170 7080302 6728861 6995635 6736245 4916384 6328855 6192895
|
||||||
* 6345469 6988218 6693451 7006761 8140212 8143282 8158482 8176029 8184706
|
* 6345469 6988218 6693451 7006761 8140212 8143282 8158482 8176029 8184706
|
||||||
* 8194667 8197462 8184692 8221431 8224789 8228352 8230829 8236034 8235812
|
* 8194667 8197462 8184692 8221431 8224789 8228352 8230829 8236034 8235812
|
||||||
* 8216332 8214245 8237599 8241055
|
* 8216332 8214245 8237599 8241055 8247546
|
||||||
*
|
*
|
||||||
* @library /test/lib
|
* @library /test/lib
|
||||||
* @library /lib/testlibrary/java/lang
|
* @library /lib/testlibrary/java/lang
|
||||||
|
@ -1,5 +1,5 @@
|
|||||||
//
|
//
|
||||||
// Copyright (c) 1999, 2009, Oracle and/or its affiliates. All rights reserved.
|
// Copyright (c) 1999, 2020, Oracle and/or its affiliates. All rights reserved.
|
||||||
// DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
// DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
||||||
//
|
//
|
||||||
// This code is free software; you can redistribute it and/or modify it
|
// This code is free software; you can redistribute it and/or modify it
|
||||||
@ -129,6 +129,31 @@ true \ud800\udc00pqr 0
|
|||||||
///\ud800\udc00
|
///\ud800\udc00
|
||||||
///false 0
|
///false 0
|
||||||
|
|
||||||
|
// unpaired surrogate should match
|
||||||
|
[\x{d800}-\x{dbff}\x{dc00}-\x{dfff}]
|
||||||
|
xxx\udca9\ud83dyyy
|
||||||
|
true \udca9 0
|
||||||
|
|
||||||
|
// surrogates in a supplementary character should not match
|
||||||
|
[\x{d800}-\x{dbff}\x{dc00}-\x{dfff}]
|
||||||
|
\ud83d\udca9
|
||||||
|
false 0
|
||||||
|
|
||||||
|
// unpaired surrogate should match
|
||||||
|
[\p{InHIGH_SURROGATES}\p{InLOW_SURROGATES}]
|
||||||
|
xxx\udca9\ud83dyyy
|
||||||
|
true \udca9 0
|
||||||
|
|
||||||
|
// surrogates part of a supplementary character should not match
|
||||||
|
[\p{InHIGH_SURROGATES}\p{InLOW_SURROGATES}]
|
||||||
|
\ud83d\udca9
|
||||||
|
false 0
|
||||||
|
|
||||||
|
// low surrogate part of a supplementary character should not match
|
||||||
|
[\x{dc00}-\x{dfff}]
|
||||||
|
\ud83d\udca9
|
||||||
|
false 0
|
||||||
|
|
||||||
// use of x modifier
|
// use of x modifier
|
||||||
\ud800\udc61bc(?x)bl\ud800\udc61h
|
\ud800\udc61bc(?x)bl\ud800\udc61h
|
||||||
\ud800\udc61bcbl\ud800\udc61h
|
\ud800\udc61bcbl\ud800\udc61h
|
||||||
|
Loading…
x
Reference in New Issue
Block a user