8305107: Emoji related binary properties in RegEx
Reviewed-by: iris, rriggs, jpai
This commit is contained in:
parent
5919fad1f4
commit
ee3023359c
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) 2011, 2020, Oracle and/or its affiliates. All rights reserved.
|
||||
* Copyright (c) 2011, 2023, Oracle and/or its affiliates. All rights reserved.
|
||||
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
||||
*
|
||||
* This code is free software; you can redistribute it and/or modify it
|
||||
@ -196,6 +196,12 @@ class CharPredicates {
|
||||
case "ALPHABETIC" -> ALPHABETIC();
|
||||
case "ASSIGNED" -> ASSIGNED();
|
||||
case "CONTROL" -> CONTROL();
|
||||
case "EMOJI" -> EMOJI();
|
||||
case "EMOJI_PRESENTATION" -> EMOJI_PRESENTATION();
|
||||
case "EMOJI_MODIFIER" -> EMOJI_MODIFIER();
|
||||
case "EMOJI_MODIFIER_BASE" -> EMOJI_MODIFIER_BASE();
|
||||
case "EMOJI_COMPONENT" -> EMOJI_COMPONENT();
|
||||
case "EXTENDED_PICTOGRAPHIC" -> EXTENDED_PICTOGRAPHIC();
|
||||
case "HEXDIGIT", "HEX_DIGIT" -> HEX_DIGIT();
|
||||
case "IDEOGRAPHIC" -> IDEOGRAPHIC();
|
||||
case "JOINCONTROL", "JOIN_CONTROL" -> JOIN_CONTROL();
|
||||
@ -421,4 +427,27 @@ class CharPredicates {
|
||||
return ch -> ch < 128 && ASCII.isSpace(ch);
|
||||
}
|
||||
|
||||
/////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
/**
|
||||
* Emoji related binary properties
|
||||
*/
|
||||
static final CharPredicate EMOJI() {
|
||||
return Character::isEmoji;
|
||||
}
|
||||
static final CharPredicate EMOJI_PRESENTATION() {
|
||||
return Character::isEmojiPresentation;
|
||||
}
|
||||
static final CharPredicate EMOJI_MODIFIER() {
|
||||
return Character::isEmojiModifier;
|
||||
}
|
||||
static final CharPredicate EMOJI_MODIFIER_BASE() {
|
||||
return Character::isEmojiModifierBase;
|
||||
}
|
||||
static final CharPredicate EMOJI_COMPONENT() {
|
||||
return Character::isEmojiComponent;
|
||||
}
|
||||
static final CharPredicate EXTENDED_PICTOGRAPHIC() {
|
||||
return Character::isExtendedPictographic;
|
||||
}
|
||||
}
|
||||
|
@ -638,6 +638,12 @@ import jdk.internal.util.regex.Grapheme;
|
||||
* <li> Join_Control
|
||||
* <li> Noncharacter_Code_Point
|
||||
* <li> Assigned
|
||||
* <li> Emoji
|
||||
* <li> Emoji_Presentation
|
||||
* <li> Emoji_Modifier
|
||||
* <li> Emoji_Modifier_Base
|
||||
* <li> Emoji_Component
|
||||
* <li> Extended_Pictographic
|
||||
* </ul>
|
||||
* <p>
|
||||
* The following <b>Predefined Character classes</b> and <b>POSIX character classes</b>
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) 1999, 2022, Oracle and/or its affiliates. All rights reserved.
|
||||
* Copyright (c) 1999, 2023, Oracle and/or its affiliates. All rights reserved.
|
||||
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
||||
*
|
||||
* This code is free software; you can redistribute it and/or modify it
|
||||
@ -36,7 +36,7 @@
|
||||
* 6345469 6988218 6693451 7006761 8140212 8143282 8158482 8176029 8184706
|
||||
* 8194667 8197462 8184692 8221431 8224789 8228352 8230829 8236034 8235812
|
||||
* 8216332 8214245 8237599 8241055 8247546 8258259 8037397 8269753 8276694
|
||||
* 8280403 8264160 8281315
|
||||
* 8280403 8264160 8281315 8305107
|
||||
* @library /test/lib
|
||||
* @library /lib/testlibrary/java/lang
|
||||
* @build jdk.test.lib.RandomFactory
|
||||
@ -3717,7 +3717,6 @@ public class RegExTest {
|
||||
|
||||
Matcher lower = Pattern.compile("\\p{Lower}").matcher("");
|
||||
Matcher upper = Pattern.compile("\\p{Upper}").matcher("");
|
||||
Matcher ASCII = Pattern.compile("\\p{ASCII}").matcher("");
|
||||
Matcher alpha = Pattern.compile("\\p{Alpha}").matcher("");
|
||||
Matcher digit = Pattern.compile("\\p{Digit}").matcher("");
|
||||
Matcher alnum = Pattern.compile("\\p{Alnum}").matcher("");
|
||||
@ -3728,12 +3727,10 @@ public class RegExTest {
|
||||
Matcher cntrl = Pattern.compile("\\p{Cntrl}").matcher("");
|
||||
Matcher xdigit = Pattern.compile("\\p{XDigit}").matcher("");
|
||||
Matcher space = Pattern.compile("\\p{Space}").matcher("");
|
||||
Matcher bound = Pattern.compile("\\b").matcher("");
|
||||
Matcher word = Pattern.compile("\\w++").matcher("");
|
||||
// UNICODE_CHARACTER_CLASS
|
||||
Matcher lowerU = Pattern.compile("\\p{Lower}", Pattern.UNICODE_CHARACTER_CLASS).matcher("");
|
||||
Matcher upperU = Pattern.compile("\\p{Upper}", Pattern.UNICODE_CHARACTER_CLASS).matcher("");
|
||||
Matcher ASCIIU = Pattern.compile("\\p{ASCII}", Pattern.UNICODE_CHARACTER_CLASS).matcher("");
|
||||
Matcher alphaU = Pattern.compile("\\p{Alpha}", Pattern.UNICODE_CHARACTER_CLASS).matcher("");
|
||||
Matcher digitU = Pattern.compile("\\p{Digit}", Pattern.UNICODE_CHARACTER_CLASS).matcher("");
|
||||
Matcher alnumU = Pattern.compile("\\p{Alnum}", Pattern.UNICODE_CHARACTER_CLASS).matcher("");
|
||||
@ -3766,6 +3763,13 @@ public class RegExTest {
|
||||
Matcher definedP = Pattern.compile("\\p{IsAssigned}").matcher("");
|
||||
Matcher nonCCPP = Pattern.compile("\\p{IsNoncharacterCodePoint}").matcher("");
|
||||
Matcher joinCrtl = Pattern.compile("\\p{IsJoinControl}").matcher("");
|
||||
// Emoji properties
|
||||
Matcher emojiP = Pattern.compile("\\p{IsEmoji}").matcher("");
|
||||
Matcher emojiPP = Pattern.compile("\\p{IsEmoji_Presentation}").matcher("");
|
||||
Matcher emojiMP = Pattern.compile("\\p{IsEmoji_Modifier}").matcher("");
|
||||
Matcher emojiMBP = Pattern.compile("\\p{IsEmoji_Modifier_Base}").matcher("");
|
||||
Matcher emojiCP = Pattern.compile("\\p{IsEmoji_Component}").matcher("");
|
||||
Matcher extPP = Pattern.compile("\\p{IsExtended_Pictographic}").matcher("");
|
||||
// javaMethod
|
||||
Matcher lowerJ = Pattern.compile("\\p{javaLowerCase}").matcher("");
|
||||
Matcher upperJ = Pattern.compile("\\p{javaUpperCase}").matcher("");
|
||||
@ -3839,6 +3843,13 @@ public class RegExTest {
|
||||
(Character.UNASSIGNED == type) == definedP.reset(str).matches() ||
|
||||
POSIX_Unicode.isNoncharacterCodePoint(cp) != nonCCPP.reset(str).matches() ||
|
||||
POSIX_Unicode.isJoinControl(cp) != joinCrtl.reset(str).matches() ||
|
||||
// Emoji properties
|
||||
Character.isEmoji(cp) != emojiP.reset(str).matches() ||
|
||||
Character.isEmojiPresentation(cp) != emojiPP.reset(str).matches() ||
|
||||
Character.isEmojiModifier(cp) != emojiMP.reset(str).matches() ||
|
||||
Character.isEmojiModifierBase(cp)!= emojiMBP.reset(str).matches() ||
|
||||
Character.isEmojiComponent(cp) != emojiCP.reset(str).matches() ||
|
||||
Character.isExtendedPictographic(cp) != extPP.reset(str).matches() ||
|
||||
// gc_C
|
||||
(Character.CONTROL == type || Character.FORMAT == type ||
|
||||
Character.PRIVATE_USE == type || Character.SURROGATE == type ||
|
||||
|
Loading…
x
Reference in New Issue
Block a user