From ee3023359caed3be4fe4cd829f04ede99d17ae86 Mon Sep 17 00:00:00 2001 From: Naoto Sato Date: Wed, 5 Apr 2023 16:04:27 +0000 Subject: [PATCH] 8305107: Emoji related binary properties in RegEx Reviewed-by: iris, rriggs, jpai --- .../java/util/regex/CharPredicates.java | 31 ++++++++++++++++++- .../classes/java/util/regex/Pattern.java | 6 ++++ test/jdk/java/util/regex/RegExTest.java | 21 ++++++++++--- 3 files changed, 52 insertions(+), 6 deletions(-) diff --git a/src/java.base/share/classes/java/util/regex/CharPredicates.java b/src/java.base/share/classes/java/util/regex/CharPredicates.java index b650855bcb9..e7b2714d8cf 100644 --- a/src/java.base/share/classes/java/util/regex/CharPredicates.java +++ b/src/java.base/share/classes/java/util/regex/CharPredicates.java @@ -1,5 +1,5 @@ /* - * Copyright (c) 2011, 2020, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2011, 2023, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -196,6 +196,12 @@ class CharPredicates { case "ALPHABETIC" -> ALPHABETIC(); case "ASSIGNED" -> ASSIGNED(); case "CONTROL" -> CONTROL(); + case "EMOJI" -> EMOJI(); + case "EMOJI_PRESENTATION" -> EMOJI_PRESENTATION(); + case "EMOJI_MODIFIER" -> EMOJI_MODIFIER(); + case "EMOJI_MODIFIER_BASE" -> EMOJI_MODIFIER_BASE(); + case "EMOJI_COMPONENT" -> EMOJI_COMPONENT(); + case "EXTENDED_PICTOGRAPHIC" -> EXTENDED_PICTOGRAPHIC(); case "HEXDIGIT", "HEX_DIGIT" -> HEX_DIGIT(); case "IDEOGRAPHIC" -> IDEOGRAPHIC(); case "JOINCONTROL", "JOIN_CONTROL" -> JOIN_CONTROL(); @@ -421,4 +427,27 @@ class CharPredicates { return ch -> ch < 128 && ASCII.isSpace(ch); } + ///////////////////////////////////////////////////////////////////////////// + + /** + * Emoji related binary properties + */ + static final CharPredicate EMOJI() { + return Character::isEmoji; + } + static final CharPredicate EMOJI_PRESENTATION() { + return Character::isEmojiPresentation; + } + static final CharPredicate EMOJI_MODIFIER() { + return Character::isEmojiModifier; + } + static final CharPredicate EMOJI_MODIFIER_BASE() { + return Character::isEmojiModifierBase; + } + static final CharPredicate EMOJI_COMPONENT() { + return Character::isEmojiComponent; + } + static final CharPredicate EXTENDED_PICTOGRAPHIC() { + return Character::isExtendedPictographic; + } } diff --git a/src/java.base/share/classes/java/util/regex/Pattern.java b/src/java.base/share/classes/java/util/regex/Pattern.java index 3c744783ed8..9fffc2cf07d 100644 --- a/src/java.base/share/classes/java/util/regex/Pattern.java +++ b/src/java.base/share/classes/java/util/regex/Pattern.java @@ -638,6 +638,12 @@ import jdk.internal.util.regex.Grapheme; *
  • Join_Control *
  • Noncharacter_Code_Point *
  • Assigned + *
  • Emoji + *
  • Emoji_Presentation + *
  • Emoji_Modifier + *
  • Emoji_Modifier_Base + *
  • Emoji_Component + *
  • Extended_Pictographic * *

    * The following Predefined Character classes and POSIX character classes diff --git a/test/jdk/java/util/regex/RegExTest.java b/test/jdk/java/util/regex/RegExTest.java index f43b34a42ba..805b8a78d4d 100644 --- a/test/jdk/java/util/regex/RegExTest.java +++ b/test/jdk/java/util/regex/RegExTest.java @@ -1,5 +1,5 @@ /* - * Copyright (c) 1999, 2022, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 1999, 2023, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -36,7 +36,7 @@ * 6345469 6988218 6693451 7006761 8140212 8143282 8158482 8176029 8184706 * 8194667 8197462 8184692 8221431 8224789 8228352 8230829 8236034 8235812 * 8216332 8214245 8237599 8241055 8247546 8258259 8037397 8269753 8276694 - * 8280403 8264160 8281315 + * 8280403 8264160 8281315 8305107 * @library /test/lib * @library /lib/testlibrary/java/lang * @build jdk.test.lib.RandomFactory @@ -3717,7 +3717,6 @@ public class RegExTest { Matcher lower = Pattern.compile("\\p{Lower}").matcher(""); Matcher upper = Pattern.compile("\\p{Upper}").matcher(""); - Matcher ASCII = Pattern.compile("\\p{ASCII}").matcher(""); Matcher alpha = Pattern.compile("\\p{Alpha}").matcher(""); Matcher digit = Pattern.compile("\\p{Digit}").matcher(""); Matcher alnum = Pattern.compile("\\p{Alnum}").matcher(""); @@ -3728,12 +3727,10 @@ public class RegExTest { Matcher cntrl = Pattern.compile("\\p{Cntrl}").matcher(""); Matcher xdigit = Pattern.compile("\\p{XDigit}").matcher(""); Matcher space = Pattern.compile("\\p{Space}").matcher(""); - Matcher bound = Pattern.compile("\\b").matcher(""); Matcher word = Pattern.compile("\\w++").matcher(""); // UNICODE_CHARACTER_CLASS Matcher lowerU = Pattern.compile("\\p{Lower}", Pattern.UNICODE_CHARACTER_CLASS).matcher(""); Matcher upperU = Pattern.compile("\\p{Upper}", Pattern.UNICODE_CHARACTER_CLASS).matcher(""); - Matcher ASCIIU = Pattern.compile("\\p{ASCII}", Pattern.UNICODE_CHARACTER_CLASS).matcher(""); Matcher alphaU = Pattern.compile("\\p{Alpha}", Pattern.UNICODE_CHARACTER_CLASS).matcher(""); Matcher digitU = Pattern.compile("\\p{Digit}", Pattern.UNICODE_CHARACTER_CLASS).matcher(""); Matcher alnumU = Pattern.compile("\\p{Alnum}", Pattern.UNICODE_CHARACTER_CLASS).matcher(""); @@ -3766,6 +3763,13 @@ public class RegExTest { Matcher definedP = Pattern.compile("\\p{IsAssigned}").matcher(""); Matcher nonCCPP = Pattern.compile("\\p{IsNoncharacterCodePoint}").matcher(""); Matcher joinCrtl = Pattern.compile("\\p{IsJoinControl}").matcher(""); + // Emoji properties + Matcher emojiP = Pattern.compile("\\p{IsEmoji}").matcher(""); + Matcher emojiPP = Pattern.compile("\\p{IsEmoji_Presentation}").matcher(""); + Matcher emojiMP = Pattern.compile("\\p{IsEmoji_Modifier}").matcher(""); + Matcher emojiMBP = Pattern.compile("\\p{IsEmoji_Modifier_Base}").matcher(""); + Matcher emojiCP = Pattern.compile("\\p{IsEmoji_Component}").matcher(""); + Matcher extPP = Pattern.compile("\\p{IsExtended_Pictographic}").matcher(""); // javaMethod Matcher lowerJ = Pattern.compile("\\p{javaLowerCase}").matcher(""); Matcher upperJ = Pattern.compile("\\p{javaUpperCase}").matcher(""); @@ -3839,6 +3843,13 @@ public class RegExTest { (Character.UNASSIGNED == type) == definedP.reset(str).matches() || POSIX_Unicode.isNoncharacterCodePoint(cp) != nonCCPP.reset(str).matches() || POSIX_Unicode.isJoinControl(cp) != joinCrtl.reset(str).matches() || + // Emoji properties + Character.isEmoji(cp) != emojiP.reset(str).matches() || + Character.isEmojiPresentation(cp) != emojiPP.reset(str).matches() || + Character.isEmojiModifier(cp) != emojiMP.reset(str).matches() || + Character.isEmojiModifierBase(cp)!= emojiMBP.reset(str).matches() || + Character.isEmojiComponent(cp) != emojiCP.reset(str).matches() || + Character.isExtendedPictographic(cp) != extPP.reset(str).matches() || // gc_C (Character.CONTROL == type || Character.FORMAT == type || Character.PRIVATE_USE == type || Character.SURROGATE == type ||