8305107: Emoji related binary properties in RegEx
Reviewed-by: iris, rriggs, jpai
This commit is contained in:
parent
5919fad1f4
commit
ee3023359c
@ -1,5 +1,5 @@
|
|||||||
/*
|
/*
|
||||||
* Copyright (c) 2011, 2020, Oracle and/or its affiliates. All rights reserved.
|
* Copyright (c) 2011, 2023, Oracle and/or its affiliates. All rights reserved.
|
||||||
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
||||||
*
|
*
|
||||||
* This code is free software; you can redistribute it and/or modify it
|
* This code is free software; you can redistribute it and/or modify it
|
||||||
@ -196,6 +196,12 @@ class CharPredicates {
|
|||||||
case "ALPHABETIC" -> ALPHABETIC();
|
case "ALPHABETIC" -> ALPHABETIC();
|
||||||
case "ASSIGNED" -> ASSIGNED();
|
case "ASSIGNED" -> ASSIGNED();
|
||||||
case "CONTROL" -> CONTROL();
|
case "CONTROL" -> CONTROL();
|
||||||
|
case "EMOJI" -> EMOJI();
|
||||||
|
case "EMOJI_PRESENTATION" -> EMOJI_PRESENTATION();
|
||||||
|
case "EMOJI_MODIFIER" -> EMOJI_MODIFIER();
|
||||||
|
case "EMOJI_MODIFIER_BASE" -> EMOJI_MODIFIER_BASE();
|
||||||
|
case "EMOJI_COMPONENT" -> EMOJI_COMPONENT();
|
||||||
|
case "EXTENDED_PICTOGRAPHIC" -> EXTENDED_PICTOGRAPHIC();
|
||||||
case "HEXDIGIT", "HEX_DIGIT" -> HEX_DIGIT();
|
case "HEXDIGIT", "HEX_DIGIT" -> HEX_DIGIT();
|
||||||
case "IDEOGRAPHIC" -> IDEOGRAPHIC();
|
case "IDEOGRAPHIC" -> IDEOGRAPHIC();
|
||||||
case "JOINCONTROL", "JOIN_CONTROL" -> JOIN_CONTROL();
|
case "JOINCONTROL", "JOIN_CONTROL" -> JOIN_CONTROL();
|
||||||
@ -421,4 +427,27 @@ class CharPredicates {
|
|||||||
return ch -> ch < 128 && ASCII.isSpace(ch);
|
return ch -> ch < 128 && ASCII.isSpace(ch);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/////////////////////////////////////////////////////////////////////////////
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Emoji related binary properties
|
||||||
|
*/
|
||||||
|
static final CharPredicate EMOJI() {
|
||||||
|
return Character::isEmoji;
|
||||||
|
}
|
||||||
|
static final CharPredicate EMOJI_PRESENTATION() {
|
||||||
|
return Character::isEmojiPresentation;
|
||||||
|
}
|
||||||
|
static final CharPredicate EMOJI_MODIFIER() {
|
||||||
|
return Character::isEmojiModifier;
|
||||||
|
}
|
||||||
|
static final CharPredicate EMOJI_MODIFIER_BASE() {
|
||||||
|
return Character::isEmojiModifierBase;
|
||||||
|
}
|
||||||
|
static final CharPredicate EMOJI_COMPONENT() {
|
||||||
|
return Character::isEmojiComponent;
|
||||||
|
}
|
||||||
|
static final CharPredicate EXTENDED_PICTOGRAPHIC() {
|
||||||
|
return Character::isExtendedPictographic;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
@ -638,6 +638,12 @@ import jdk.internal.util.regex.Grapheme;
|
|||||||
* <li> Join_Control
|
* <li> Join_Control
|
||||||
* <li> Noncharacter_Code_Point
|
* <li> Noncharacter_Code_Point
|
||||||
* <li> Assigned
|
* <li> Assigned
|
||||||
|
* <li> Emoji
|
||||||
|
* <li> Emoji_Presentation
|
||||||
|
* <li> Emoji_Modifier
|
||||||
|
* <li> Emoji_Modifier_Base
|
||||||
|
* <li> Emoji_Component
|
||||||
|
* <li> Extended_Pictographic
|
||||||
* </ul>
|
* </ul>
|
||||||
* <p>
|
* <p>
|
||||||
* The following <b>Predefined Character classes</b> and <b>POSIX character classes</b>
|
* The following <b>Predefined Character classes</b> and <b>POSIX character classes</b>
|
||||||
|
@ -1,5 +1,5 @@
|
|||||||
/*
|
/*
|
||||||
* Copyright (c) 1999, 2022, Oracle and/or its affiliates. All rights reserved.
|
* Copyright (c) 1999, 2023, Oracle and/or its affiliates. All rights reserved.
|
||||||
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
||||||
*
|
*
|
||||||
* This code is free software; you can redistribute it and/or modify it
|
* This code is free software; you can redistribute it and/or modify it
|
||||||
@ -36,7 +36,7 @@
|
|||||||
* 6345469 6988218 6693451 7006761 8140212 8143282 8158482 8176029 8184706
|
* 6345469 6988218 6693451 7006761 8140212 8143282 8158482 8176029 8184706
|
||||||
* 8194667 8197462 8184692 8221431 8224789 8228352 8230829 8236034 8235812
|
* 8194667 8197462 8184692 8221431 8224789 8228352 8230829 8236034 8235812
|
||||||
* 8216332 8214245 8237599 8241055 8247546 8258259 8037397 8269753 8276694
|
* 8216332 8214245 8237599 8241055 8247546 8258259 8037397 8269753 8276694
|
||||||
* 8280403 8264160 8281315
|
* 8280403 8264160 8281315 8305107
|
||||||
* @library /test/lib
|
* @library /test/lib
|
||||||
* @library /lib/testlibrary/java/lang
|
* @library /lib/testlibrary/java/lang
|
||||||
* @build jdk.test.lib.RandomFactory
|
* @build jdk.test.lib.RandomFactory
|
||||||
@ -3717,7 +3717,6 @@ public class RegExTest {
|
|||||||
|
|
||||||
Matcher lower = Pattern.compile("\\p{Lower}").matcher("");
|
Matcher lower = Pattern.compile("\\p{Lower}").matcher("");
|
||||||
Matcher upper = Pattern.compile("\\p{Upper}").matcher("");
|
Matcher upper = Pattern.compile("\\p{Upper}").matcher("");
|
||||||
Matcher ASCII = Pattern.compile("\\p{ASCII}").matcher("");
|
|
||||||
Matcher alpha = Pattern.compile("\\p{Alpha}").matcher("");
|
Matcher alpha = Pattern.compile("\\p{Alpha}").matcher("");
|
||||||
Matcher digit = Pattern.compile("\\p{Digit}").matcher("");
|
Matcher digit = Pattern.compile("\\p{Digit}").matcher("");
|
||||||
Matcher alnum = Pattern.compile("\\p{Alnum}").matcher("");
|
Matcher alnum = Pattern.compile("\\p{Alnum}").matcher("");
|
||||||
@ -3728,12 +3727,10 @@ public class RegExTest {
|
|||||||
Matcher cntrl = Pattern.compile("\\p{Cntrl}").matcher("");
|
Matcher cntrl = Pattern.compile("\\p{Cntrl}").matcher("");
|
||||||
Matcher xdigit = Pattern.compile("\\p{XDigit}").matcher("");
|
Matcher xdigit = Pattern.compile("\\p{XDigit}").matcher("");
|
||||||
Matcher space = Pattern.compile("\\p{Space}").matcher("");
|
Matcher space = Pattern.compile("\\p{Space}").matcher("");
|
||||||
Matcher bound = Pattern.compile("\\b").matcher("");
|
|
||||||
Matcher word = Pattern.compile("\\w++").matcher("");
|
Matcher word = Pattern.compile("\\w++").matcher("");
|
||||||
// UNICODE_CHARACTER_CLASS
|
// UNICODE_CHARACTER_CLASS
|
||||||
Matcher lowerU = Pattern.compile("\\p{Lower}", Pattern.UNICODE_CHARACTER_CLASS).matcher("");
|
Matcher lowerU = Pattern.compile("\\p{Lower}", Pattern.UNICODE_CHARACTER_CLASS).matcher("");
|
||||||
Matcher upperU = Pattern.compile("\\p{Upper}", Pattern.UNICODE_CHARACTER_CLASS).matcher("");
|
Matcher upperU = Pattern.compile("\\p{Upper}", Pattern.UNICODE_CHARACTER_CLASS).matcher("");
|
||||||
Matcher ASCIIU = Pattern.compile("\\p{ASCII}", Pattern.UNICODE_CHARACTER_CLASS).matcher("");
|
|
||||||
Matcher alphaU = Pattern.compile("\\p{Alpha}", Pattern.UNICODE_CHARACTER_CLASS).matcher("");
|
Matcher alphaU = Pattern.compile("\\p{Alpha}", Pattern.UNICODE_CHARACTER_CLASS).matcher("");
|
||||||
Matcher digitU = Pattern.compile("\\p{Digit}", Pattern.UNICODE_CHARACTER_CLASS).matcher("");
|
Matcher digitU = Pattern.compile("\\p{Digit}", Pattern.UNICODE_CHARACTER_CLASS).matcher("");
|
||||||
Matcher alnumU = Pattern.compile("\\p{Alnum}", Pattern.UNICODE_CHARACTER_CLASS).matcher("");
|
Matcher alnumU = Pattern.compile("\\p{Alnum}", Pattern.UNICODE_CHARACTER_CLASS).matcher("");
|
||||||
@ -3766,6 +3763,13 @@ public class RegExTest {
|
|||||||
Matcher definedP = Pattern.compile("\\p{IsAssigned}").matcher("");
|
Matcher definedP = Pattern.compile("\\p{IsAssigned}").matcher("");
|
||||||
Matcher nonCCPP = Pattern.compile("\\p{IsNoncharacterCodePoint}").matcher("");
|
Matcher nonCCPP = Pattern.compile("\\p{IsNoncharacterCodePoint}").matcher("");
|
||||||
Matcher joinCrtl = Pattern.compile("\\p{IsJoinControl}").matcher("");
|
Matcher joinCrtl = Pattern.compile("\\p{IsJoinControl}").matcher("");
|
||||||
|
// Emoji properties
|
||||||
|
Matcher emojiP = Pattern.compile("\\p{IsEmoji}").matcher("");
|
||||||
|
Matcher emojiPP = Pattern.compile("\\p{IsEmoji_Presentation}").matcher("");
|
||||||
|
Matcher emojiMP = Pattern.compile("\\p{IsEmoji_Modifier}").matcher("");
|
||||||
|
Matcher emojiMBP = Pattern.compile("\\p{IsEmoji_Modifier_Base}").matcher("");
|
||||||
|
Matcher emojiCP = Pattern.compile("\\p{IsEmoji_Component}").matcher("");
|
||||||
|
Matcher extPP = Pattern.compile("\\p{IsExtended_Pictographic}").matcher("");
|
||||||
// javaMethod
|
// javaMethod
|
||||||
Matcher lowerJ = Pattern.compile("\\p{javaLowerCase}").matcher("");
|
Matcher lowerJ = Pattern.compile("\\p{javaLowerCase}").matcher("");
|
||||||
Matcher upperJ = Pattern.compile("\\p{javaUpperCase}").matcher("");
|
Matcher upperJ = Pattern.compile("\\p{javaUpperCase}").matcher("");
|
||||||
@ -3839,6 +3843,13 @@ public class RegExTest {
|
|||||||
(Character.UNASSIGNED == type) == definedP.reset(str).matches() ||
|
(Character.UNASSIGNED == type) == definedP.reset(str).matches() ||
|
||||||
POSIX_Unicode.isNoncharacterCodePoint(cp) != nonCCPP.reset(str).matches() ||
|
POSIX_Unicode.isNoncharacterCodePoint(cp) != nonCCPP.reset(str).matches() ||
|
||||||
POSIX_Unicode.isJoinControl(cp) != joinCrtl.reset(str).matches() ||
|
POSIX_Unicode.isJoinControl(cp) != joinCrtl.reset(str).matches() ||
|
||||||
|
// Emoji properties
|
||||||
|
Character.isEmoji(cp) != emojiP.reset(str).matches() ||
|
||||||
|
Character.isEmojiPresentation(cp) != emojiPP.reset(str).matches() ||
|
||||||
|
Character.isEmojiModifier(cp) != emojiMP.reset(str).matches() ||
|
||||||
|
Character.isEmojiModifierBase(cp)!= emojiMBP.reset(str).matches() ||
|
||||||
|
Character.isEmojiComponent(cp) != emojiCP.reset(str).matches() ||
|
||||||
|
Character.isExtendedPictographic(cp) != extPP.reset(str).matches() ||
|
||||||
// gc_C
|
// gc_C
|
||||||
(Character.CONTROL == type || Character.FORMAT == type ||
|
(Character.CONTROL == type || Character.FORMAT == type ||
|
||||||
Character.PRIVATE_USE == type || Character.SURROGATE == type ||
|
Character.PRIVATE_USE == type || Character.SURROGATE == type ||
|
||||||
|
Loading…
Reference in New Issue
Block a user