diff --git a/make/jdk/src/classes/build/tools/generatecharacter/EmojiData.java b/make/jdk/src/classes/build/tools/generatecharacter/EmojiData.java new file mode 100644 index 00000000000..bc2716a4179 --- /dev/null +++ b/make/jdk/src/classes/build/tools/generatecharacter/EmojiData.java @@ -0,0 +1,93 @@ +/* + * Copyright (c) 2019, 2023, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. Oracle designates this + * particular file as subject to the "Classpath" exception as provided + * by Oracle in the LICENSE file that accompanied this code. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + */ + +package build.tools.generatecharacter; + +import java.io.IOException; +import java.nio.file.Files; +import java.nio.file.Path; +import java.nio.file.StandardOpenOption; +import java.util.AbstractMap; +import java.util.Map; +import java.util.Set; +import java.util.function.Predicate; +import java.util.stream.Collectors; +import java.util.stream.IntStream; +import java.util.stream.Stream; + +/** + * A class holding emoji character properties + * https://unicode.org/reports/tr51/#Emoji_Properties_and_Data_Files + */ +class EmojiData { + // Emoji properties map + private final Map emojiProps; + + static EmojiData readSpecFile(Path file, int plane) throws IOException { + return new EmojiData(file, plane); + } + + EmojiData(Path file, int plane) throws IOException { + emojiProps = Files.readAllLines(file).stream() + .map(line -> line.split("#", 2)[0]) + .filter(Predicate.not(String::isBlank)) + .map(line -> line.split("[ \t]*;[ \t]*", 2)) + .flatMap(map -> { + var range = map[0].split("\\.\\.", 2); + var start = Integer.valueOf(range[0], 16); + if ((start >> 16) != plane) { + return Stream.empty(); + } else { + return range.length == 1 ? + Stream.of(new AbstractMap.SimpleEntry<>(start, convertType(map[1].trim()))) : + IntStream.rangeClosed(start, Integer.valueOf(range[1], 16)) + .mapToObj(cp -> new AbstractMap.SimpleEntry<>(cp, convertType(map[1].trim()))); + } + }) + .collect(Collectors.toMap(AbstractMap.SimpleEntry::getKey, + AbstractMap.SimpleEntry::getValue, + (v1, v2) -> v1 | v2)); + } + + long properties(int cp) { + return emojiProps.get(cp); + } + + Set codepoints() { + return emojiProps.keySet(); + } + + private static long convertType(String type) { + return switch (type) { + case "Emoji" -> GenerateCharacter.maskEmoji; + case "Emoji_Presentation" -> GenerateCharacter.maskEmojiPresentation; + case "Emoji_Modifier" -> GenerateCharacter.maskEmojiModifier; + case "Emoji_Modifier_Base" -> GenerateCharacter.maskEmojiModifierBase; + case "Emoji_Component" -> GenerateCharacter.maskEmojiComponent; + case "Extended_Pictographic" -> GenerateCharacter.maskExtendedPictographic; + default -> throw new InternalError("Unrecognizable Emoji type: " + type); + }; + } +} diff --git a/make/jdk/src/classes/build/tools/generatecharacter/GenerateCharacter.java b/make/jdk/src/classes/build/tools/generatecharacter/GenerateCharacter.java index 3fbb631433c..8d6a703f1c9 100644 --- a/make/jdk/src/classes/build/tools/generatecharacter/GenerateCharacter.java +++ b/make/jdk/src/classes/build/tools/generatecharacter/GenerateCharacter.java @@ -1,5 +1,5 @@ /* - * Copyright (c) 2002, 2021, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2002, 2023, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -33,6 +33,7 @@ import java.io.PrintWriter; import java.io.BufferedWriter; import java.io.FileWriter; import java.io.File; +import java.nio.file.Paths; import java.util.List; import build.tools.generatecharacter.CharacterName; @@ -74,6 +75,7 @@ public class GenerateCharacter { static String DefaultSpecialCasingFileName = ROOT + "SpecialCasing.txt"; static String DefaultPropListFileName = ROOT + "PropList.txt"; static String DefaultDerivedPropsFileName = ROOT + "DerivedCoreProperties.txt"; + static String DefaultEmojiDataFileName = ROOT + "emoji-data.txt"; static String DefaultJavaTemplateFileName = ROOT + "Character.java.template"; static String DefaultJavaOutputFileName = ROOT + "Character.java"; static String DefaultCTemplateFileName = ROOT + "Character.c.template"; @@ -105,7 +107,7 @@ public class GenerateCharacter { entries are short rather than byte). */ - /* The character properties are currently encoded into A (32 bits) and B (8 bits) + /* The character properties are currently encoded into A (32 bits) and B (16 bits) two parts. A: the low 32 bits are defined in the following manner: @@ -160,6 +162,13 @@ public class GenerateCharacter { 1 bit Ideographic property 1 bit ID_Start property 1 bit ID_Continue property + 6 bits for Emoji properties :- + 1 bit for Emoji + 1 bit for Emoji_Presentation + 1 bit for Emoji_Modifier + 1 bit for Emoji_Modifier_Base + 1 bit for Emoji_Component + 1 bit for Extended_Pictographic */ @@ -188,15 +197,21 @@ public class GenerateCharacter { // maskMirrored needs to be long, if up 16-bit private static final long maskMirrored = 0x80000000L; - // bit masks identify the 8-bit property field described above, in B + // bit masks identify the 16-bit property field described above, in B // table - private static final long - maskOtherLowercase = 0x0100000000L, - maskOtherUppercase = 0x0200000000L, - maskOtherAlphabetic = 0x0400000000L, - maskIdeographic = 0x0800000000L, - maskIDStart = 0x1000000000L, - maskIDContinue = 0x2000000000L; + static final long + maskOtherLowercase = 1L << 32, + maskOtherUppercase = 1L << 33, + maskOtherAlphabetic = 1L << 34, + maskIdeographic = 1L << 35, + maskIDStart = 1L << 36, + maskIDContinue = 1L << 37, + maskEmoji = 1L << 38, + maskEmojiPresentation = 1L << 39, + maskEmojiModifier = 1L << 40, + maskEmojiModifierBase = 1L << 41, + maskEmojiComponent = 1L << 42, + maskExtendedPictographic = 1L << 43; // Can compare masked values with these to determine // numeric or lexical types. @@ -304,7 +319,7 @@ public class GenerateCharacter { * @see GenerateCharacter#buildOne */ - static long[] buildMap(UnicodeSpec[] data, SpecialCaseMap[] specialMaps, PropList propList) + static long[] buildMap(UnicodeSpec[] data, SpecialCaseMap[] specialMaps, PropList propList, EmojiData emojiData) { long[] result = new long[bLatin1 ? 256 : 1 << 16]; int k = 0; @@ -361,6 +376,9 @@ public class GenerateCharacter { addExProp(result, propList, "ID_Start", maskIDStart); addExProp(result, propList, "ID_Continue", maskIDContinue); + // add Emoji properties to the upper 16-bit + addEmojiProps(result, emojiData); + return result; } @@ -583,6 +601,14 @@ public class GenerateCharacter { } } + static void addEmojiProps(long[] map, EmojiData emojiData) { + for (int cp : emojiData.codepoints()) { + var index = cp & 0xFFFF; + if (index < map.length) + map[index] |= emojiData.properties(cp); + } + } + /** * This is the heart of the table compression strategy. The inputs are a map * and a number of bits (size). The map is simply an array of long integer values; @@ -776,6 +802,12 @@ OUTER: for (int i = 0; i < n; i += m) { if (x.equals("maskIdeographic")) return "0x" + hex4(maskIdeographic >> 32); if (x.equals("maskIDStart")) return "0x" + hex4(maskIDStart >> 32); if (x.equals("maskIDContinue")) return "0x" + hex4(maskIDContinue >> 32); + if (x.equals("maskEmoji")) return "0x" + hex4(maskEmoji >> 32); + if (x.equals("maskEmojiPresentation")) return "0x" + hex4(maskEmojiPresentation >> 32); + if (x.equals("maskEmojiModifier")) return "0x" + hex4(maskEmojiModifier >> 32); + if (x.equals("maskEmojiModifierBase")) return "0x" + hex4(maskEmojiModifierBase >> 32); + if (x.equals("maskEmojiComponent")) return "0x" + hex4(maskEmojiComponent >> 32); + if (x.equals("maskExtendedPictographic")) return "0x" + hex4(maskExtendedPictographic >> 32); if (x.equals("valueIgnorable")) return "0x" + hex8(valueIgnorable); if (x.equals("valueJavaUnicodeStart")) return "0x" + hex8(valueJavaUnicodeStart); if (x.equals("valueJavaOnlyStart")) return "0x" + hex8(valueJavaOnlyStart); @@ -952,7 +984,7 @@ OUTER: for (int i = 0; i < n; i += m) { // If we ever need more than 32 bits to represent the character properties, // then a table "B" may be needed as well. - genTable(result, "B", tables[n - 1], 32, 8, sizes[n - 1], false, 0, true, true, false); + genTable(result, "B", tables[n - 1], 32, 16, sizes[n - 1], false, 0, true, true, false); totalBytes += ((((tables[n - 1].length * (identifiers ? 2 : 32)) + 31) >> 5) << 2); result.append(commentStart); @@ -1434,6 +1466,42 @@ OUTER: for (int i = 0; i < n; i += m) { result.append(", supradecimal "); result.append((val & maskDigitOffset) >> shiftDigitOffset); } + if ((val & maskOtherLowercase) == maskOtherLowercase) { + result.append(", otherLowercase"); + } + if ((val & maskOtherUppercase) == maskOtherUppercase) { + result.append(", otherUppercase"); + } + if ((val & maskOtherAlphabetic) == maskOtherAlphabetic) { + result.append(", otherAlphabetic"); + } + if ((val & maskIdeographic) == maskIdeographic) { + result.append(", ideographic"); + } + if ((val & maskIDStart) == maskIDStart) { + result.append(", IDStart"); + } + if ((val & maskIDContinue) == maskIDContinue) { + result.append(", IDContinue"); + } + if ((val & maskEmoji) == maskEmoji) { + result.append(", emoji"); + } + if ((val & maskEmojiPresentation) == maskEmojiPresentation) { + result.append(", emojiPresentation"); + } + if ((val & maskEmojiModifier) == maskEmojiModifier) { + result.append(", emojiModifier"); + } + if ((val & maskEmojiModifierBase) == maskEmojiModifierBase) { + result.append(", emojiModifierBase"); + } + if ((val & maskEmojiComponent) == maskEmojiComponent) { + result.append(", emojiComponent"); + } + if ((val & maskExtendedPictographic) == maskExtendedPictographic) { + result.append(", extendedPictographic"); + } } static String[] tableNames = { "X", "Y", "Z", "P", "Q", "R", "S", "T", "U", "V", "W" }; @@ -1512,6 +1580,7 @@ OUTER: for (int i = 0; i < n; i += m) { static String SpecialCasingFileName = null; static String PropListFileName = null; static String DerivedPropsFileName = null; + static String EmojiDataFileName = null; static boolean useCharForByte = false; static int[] sizes; static int bins = 0; // liu; if > 0, then perform search @@ -1649,6 +1718,14 @@ OUTER: for (int i = 0; i < n; i += m) { DerivedPropsFileName = args[++j]; } } + else if (args[j].equals("-emojidata")) { + if (j == args.length -1) { + FAIL("File name missing after -emojidata"); + } + else { + EmojiDataFileName = args[++j]; + } + } else if (args[j].equals("-plane")) { if (j == args.length -1) { FAIL("Plane number missing after -plane"); @@ -1717,6 +1794,10 @@ OUTER: for (int i = 0; i < n; i += m) { DerivedPropsFileName = DefaultDerivedPropsFileName; desc.append(" [-derivedprops " + DerivedPropsFileName + ']'); } + if (EmojiDataFileName == null) { + EmojiDataFileName = DefaultEmojiDataFileName; + desc.append(" [-emojidata " + EmojiDataFileName + ']'); + } if (TemplateFileName == null) { TemplateFileName = (Csyntax ? DefaultCTemplateFileName : DefaultJavaTemplateFileName); @@ -1871,11 +1952,12 @@ OUTER: for (int i = 0; i < n; i += m) { specialCaseMaps = SpecialCaseMap.readSpecFile(new File(SpecialCasingFileName), plane); PropList propList = PropList.readSpecFile(new File(PropListFileName), plane); propList.putAll(PropList.readSpecFile(new File(DerivedPropsFileName), plane)); + EmojiData emojiData = EmojiData.readSpecFile(Paths.get(EmojiDataFileName), plane); if (verbose) { System.out.println(data.length + " items read from Unicode spec file " + UnicodeSpecFileName); // liu } - long[] map = buildMap(data, specialCaseMaps, propList); + long[] map = buildMap(data, specialCaseMaps, propList, emojiData); if (verbose) { System.err.println("Completed building of initial map"); } diff --git a/make/jdk/src/classes/build/tools/generateemojidata/GenerateEmojiData.java b/make/jdk/src/classes/build/tools/generateemojidata/GenerateEmojiData.java deleted file mode 100644 index 64154f2ddcb..00000000000 --- a/make/jdk/src/classes/build/tools/generateemojidata/GenerateEmojiData.java +++ /dev/null @@ -1,155 +0,0 @@ -/* - * Copyright (c) 2019, 2020, Oracle and/or its affiliates. All rights reserved. - * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. - * - * This code is free software; you can redistribute it and/or modify it - * under the terms of the GNU General Public License version 2 only, as - * published by the Free Software Foundation. Oracle designates this - * particular file as subject to the "Classpath" exception as provided - * by Oracle in the LICENSE file that accompanied this code. - * - * This code is distributed in the hope that it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License - * version 2 for more details (a copy is included in the LICENSE file that - * accompanied this code). - * - * You should have received a copy of the GNU General Public License version - * 2 along with this work; if not, write to the Free Software Foundation, - * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. - * - * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA - * or visit www.oracle.com if you need additional information or have any - * questions. - */ - -package build.tools.generateemojidata; - -import java.io.IOException; -import java.nio.file.Files; -import java.nio.file.Paths; -import java.nio.file.StandardOpenOption; -import java.util.ArrayList; -import java.util.List; -import java.util.function.Predicate; -import java.util.stream.Collectors; -import java.util.stream.Stream; - -/** - * Generate EmojiData.java - * args[0]: Full path string to the template file - * args[1]: Full path string to the directory that contains "emoji-data.txt" - * args[2]: Full path string to the generated .java file - */ -public class GenerateEmojiData { - public static void main(String[] args) { - try { - final Range[] last = new Range[1]; // last extended pictographic range - last[0] = new Range(0, 0); - - List extPictRanges = Files.lines(Paths.get(args[1], "emoji", "emoji-data.txt")) - .filter(Predicate.not(l -> l.startsWith("#") || l.isBlank())) - .filter(l -> l.contains("; Extended_Pictograph")) - .map(l -> new Range(l.replaceFirst(" .*", ""))) - .sorted() - .collect(ArrayList::new, - (list, r) -> { - // collapsing consecutive pictographic ranges - int lastIndex = list.size() - 1; - if (lastIndex >= 0) { - Range lastRange = list.get(lastIndex); - if (lastRange.last + 1 == r.start) { - list.set(lastIndex, new Range(lastRange.start, r.last)); - return; - } - } - list.add(r); - }, - ArrayList::addAll); - - - // make the code point conditions - // only very few codepoints below 0x2000 are "emojis", so separate them - // out to generate a fast-path check that can be efficiently inlined - String lowExtPictCodePoints = extPictRanges.stream() - .takeWhile(r -> r.last < 0x2000) - .map(r -> rangeToString(r)) - .collect(Collectors.joining(" ||\n", "", ";\n")); - - String highExtPictCodePoints = extPictRanges.stream() - .dropWhile(r -> r.last < 0x2000) - .map(r -> rangeToString(r)) - .collect(Collectors.joining(" ||\n", "", ";\n")); - - // Generate EmojiData.java file - Files.write(Paths.get(args[2]), - Files.lines(Paths.get(args[0])) - .flatMap(l -> { - if (l.equals("%%%EXTPICT_LOW%%%")) { - return Stream.of(lowExtPictCodePoints); - } else if (l.equals("%%%EXTPICT_HIGH%%%")) { - return Stream.of(highExtPictCodePoints); - } else { - return Stream.of(l); - } - }) - .collect(Collectors.toList()), - StandardOpenOption.CREATE, StandardOpenOption.TRUNCATE_EXISTING); - } catch (IOException e) { - e.printStackTrace(); - } - } - - static String rangeToString(Range r) { - if (r.start == r.last) { - return (" ".repeat(16) + "cp == 0x" + toHexString(r.start)); - } else if (r.start == r.last - 1) { - return " ".repeat(16) + "cp == 0x" + toHexString(r.start) + " ||\n" + - " ".repeat(16) + "cp == 0x" + toHexString(r.last); - } else { - return " ".repeat(15) + "(cp >= 0x" + toHexString(r.start) + - " && cp <= 0x" + toHexString(r.last) + ")"; - } - } - - static int toInt(String hexStr) { - return Integer.parseUnsignedInt(hexStr, 16); - } - - static String toHexString(int cp) { - String ret = Integer.toUnsignedString(cp, 16).toUpperCase(); - if (ret.length() < 4) { - ret = "0".repeat(4 - ret.length()) + ret; - } - return ret; - } - - static class Range implements Comparable { - int start; - int last; - - Range (int start, int last) { - this.start = start; - this.last = last; - } - - Range (String input) { - input = input.replaceFirst("\\s#.*", ""); - start = toInt(input.replaceFirst("[\\s\\.].*", "")); - last = input.contains("..") ? - toInt(input.replaceFirst(".*\\.\\.", "") - .replaceFirst(";.*", "").trim()) - : start; - } - - @Override - public String toString() { - return "Start: " + toHexString(start) + ", Last: " + toHexString(last); - } - - @Override - public int compareTo(Range other) { - return Integer.compare(start, other.start); - } - } -} diff --git a/make/modules/java.base/Gensrc.gmk b/make/modules/java.base/Gensrc.gmk index c85347fbf68..5d9a9bbcc22 100644 --- a/make/modules/java.base/Gensrc.gmk +++ b/make/modules/java.base/Gensrc.gmk @@ -1,5 +1,5 @@ # -# Copyright (c) 2011, 2022, Oracle and/or its affiliates. All rights reserved. +# Copyright (c) 2011, 2023, Oracle and/or its affiliates. All rights reserved. # DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. # # This code is free software; you can redistribute it and/or modify it @@ -34,7 +34,6 @@ include gensrc/GensrcBuffer.gmk include gensrc/GensrcExceptions.gmk include gensrc/GensrcVarHandles.gmk include gensrc/GensrcModuleLoaderMap.gmk -include gensrc/GensrcEmojiData.gmk include gensrc/GensrcScopedMemoryAccess.gmk # GensrcLocaleData.gmk does not set TARGETS, so we must choose which targets diff --git a/make/modules/java.base/gensrc/GensrcCharacterData.gmk b/make/modules/java.base/gensrc/GensrcCharacterData.gmk index 115a28309a2..23544c6a4cf 100644 --- a/make/modules/java.base/gensrc/GensrcCharacterData.gmk +++ b/make/modules/java.base/gensrc/GensrcCharacterData.gmk @@ -49,6 +49,7 @@ define SetupCharacterData -specialcasing $(UNICODEDATA)/SpecialCasing.txt \ -proplist $(UNICODEDATA)/PropList.txt \ -derivedprops $(UNICODEDATA)/DerivedCoreProperties.txt \ + -emojidata $(UNICODEDATA)/emoji/emoji-data.txt \ -o $(SUPPORT_OUTPUTDIR)/gensrc/java.base/java/lang/$1.java \ -usecharforbyte $3 diff --git a/make/modules/java.base/gensrc/GensrcEmojiData.gmk b/make/modules/java.base/gensrc/GensrcEmojiData.gmk deleted file mode 100644 index e9b030db679..00000000000 --- a/make/modules/java.base/gensrc/GensrcEmojiData.gmk +++ /dev/null @@ -1,43 +0,0 @@ -# -# Copyright (c) 2019, 2022, Oracle and/or its affiliates. All rights reserved. -# DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. -# -# This code is free software; you can redistribute it and/or modify it -# under the terms of the GNU General Public License version 2 only, as -# published by the Free Software Foundation. Oracle designates this -# particular file as subject to the "Classpath" exception as provided -# by Oracle in the LICENSE file that accompanied this code. -# -# This code is distributed in the hope that it will be useful, but WITHOUT -# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or -# FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License -# version 2 for more details (a copy is included in the LICENSE file that -# accompanied this code). -# -# You should have received a copy of the GNU General Public License version -# 2 along with this work; if not, write to the Free Software Foundation, -# Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. -# -# Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA -# or visit www.oracle.com if you need additional information or have any -# questions. -# - -# -# Rules to create $(SUPPORT_OUTPUTDIR)/gensrc/java.base/jdk/internal/util/regex/EmojiData.java -# - -GENSRC_EMOJIDATA := $(SUPPORT_OUTPUTDIR)/gensrc/java.base/jdk/internal/util/regex/EmojiData.java - -EMOJIDATATEMP = $(MODULE_SRC)/share/classes/jdk/internal/util/regex/EmojiData.java.template -UNICODEDATA = $(MODULE_SRC)/share/data/unicodedata - -$(GENSRC_EMOJIDATA): $(BUILD_TOOLS_JDK) $(EMOJIDATATEMP) $(UNICODEDATA)/emoji/emoji-data.txt - $(call LogInfo, Generating $@) - $(call MakeTargetDir) - $(TOOL_GENERATEEMOJIDATA) \ - $(EMOJIDATATEMP) \ - $(UNICODEDATA) \ - $(GENSRC_EMOJIDATA) - -TARGETS += $(GENSRC_EMOJIDATA) diff --git a/src/java.base/share/classes/java/lang/Character.java b/src/java.base/share/classes/java/lang/Character.java index bb995d7e75e..a91f9131e24 100644 --- a/src/java.base/share/classes/java/lang/Character.java +++ b/src/java.base/share/classes/java/lang/Character.java @@ -10781,6 +10781,113 @@ class Character implements java.io.Serializable, Comparable, Constabl return CharacterData.of(codePoint).isIdentifierIgnorable(codePoint); } + /** + * Determines if the specified character (Unicode code point) is an Emoji. + *

+ * A character is considered to be an Emoji if and only if it has the {@code Emoji} + * property, defined in + * + * Unicode Emoji (Technical Standard #51). + * + * @param codePoint the character (Unicode code point) to be tested. + * @return {@code true} if the character is an Emoji; + * {@code false} otherwise. + * @since 21 + */ + public static boolean isEmoji(int codePoint) { + return CharacterData.of(codePoint).isEmoji(codePoint); + } + + /** + * Determines if the specified character (Unicode code point) has the + * Emoji Presentation property by default. + *

+ * A character is considered to have the Emoji Presentation property if and + * only if it has the {@code Emoji_Presentation} property, defined in + * + * Unicode Emoji (Technical Standard #51). + * + * @param codePoint the character (Unicode code point) to be tested. + * @return {@code true} if the character has the Emoji Presentation + * property; {@code false} otherwise. + * @since 21 + */ + public static boolean isEmojiPresentation(int codePoint) { + return CharacterData.of(codePoint).isEmojiPresentation(codePoint); + } + + /** + * Determines if the specified character (Unicode code point) is an + * Emoji Modifier. + *

+ * A character is considered to be an Emoji Modifier if and only if it has + * the {@code Emoji_Modifier} property, defined in + * + * Unicode Emoji (Technical Standard #51). + * + * @param codePoint the character (Unicode code point) to be tested. + * @return {@code true} if the character is an Emoji Modifier; + * {@code false} otherwise. + * @since 21 + */ + public static boolean isEmojiModifier(int codePoint) { + return CharacterData.of(codePoint).isEmojiModifier(codePoint); + } + + /** + * Determines if the specified character (Unicode code point) is an + * Emoji Modifier Base. + *

+ * A character is considered to be an Emoji Modifier Base if and only if it has + * the {@code Emoji_Modifier_Base} property, defined in + * + * Unicode Emoji (Technical Standard #51). + * + * @param codePoint the character (Unicode code point) to be tested. + * @return {@code true} if the character is an Emoji Modifier Base; + * {@code false} otherwise. + * @since 21 + */ + public static boolean isEmojiModifierBase(int codePoint) { + return CharacterData.of(codePoint).isEmojiModifierBase(codePoint); + } + + /** + * Determines if the specified character (Unicode code point) is an + * Emoji Component. + *

+ * A character is considered to be an Emoji Component if and only if it has + * the {@code Emoji_Component} property, defined in + * + * Unicode Emoji (Technical Standard #51). + * + * @param codePoint the character (Unicode code point) to be tested. + * @return {@code true} if the character is an Emoji Component; + * {@code false} otherwise. + * @since 21 + */ + public static boolean isEmojiComponent(int codePoint) { + return CharacterData.of(codePoint).isEmojiComponent(codePoint); + } + + /** + * Determines if the specified character (Unicode code point) is + * an Extended Pictographic. + *

+ * A character is considered to be an Extended Pictographic if and only if it has + * the {@code Extended_Pictographic} property, defined in + * + * Unicode Emoji (Technical Standard #51). + * + * @param codePoint the character (Unicode code point) to be tested. + * @return {@code true} if the character is an Extended Pictographic; + * {@code false} otherwise. + * @since 21 + */ + public static boolean isExtendedPictographic(int codePoint) { + return CharacterData.of(codePoint).isExtendedPictographic(codePoint); + } + /** * Converts the character argument to lowercase using case * mapping information from the UnicodeData file. diff --git a/src/java.base/share/classes/java/lang/CharacterData.java b/src/java.base/share/classes/java/lang/CharacterData.java index 03162039bde..d75853fe9ca 100644 --- a/src/java.base/share/classes/java/lang/CharacterData.java +++ b/src/java.base/share/classes/java/lang/CharacterData.java @@ -1,5 +1,5 @@ /* - * Copyright (c) 2006, 2020, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2006, 2023, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -38,6 +38,12 @@ abstract class CharacterData { abstract boolean isUnicodeIdentifierStart(int ch); abstract boolean isUnicodeIdentifierPart(int ch); abstract boolean isIdentifierIgnorable(int ch); + abstract boolean isEmoji(int ch); + abstract boolean isEmojiPresentation(int ch); + abstract boolean isEmojiModifier(int ch); + abstract boolean isEmojiModifierBase(int ch); + abstract boolean isEmojiComponent(int ch); + abstract boolean isExtendedPictographic(int ch); abstract int toLowerCase(int ch); abstract int toUpperCase(int ch); abstract int toTitleCase(int ch); diff --git a/src/java.base/share/classes/java/lang/CharacterData00.java.template b/src/java.base/share/classes/java/lang/CharacterData00.java.template index d8e8b0cf8f1..2b16b410e61 100644 --- a/src/java.base/share/classes/java/lang/CharacterData00.java.template +++ b/src/java.base/share/classes/java/lang/CharacterData00.java.template @@ -1,5 +1,5 @@ /* - * Copyright (c) 2003, 2021, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2003, 2023, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -120,6 +120,30 @@ class CharacterData00 extends CharacterData { return ((props & $$maskIdentifierInfo) == $$valueIgnorable); } + boolean isEmoji(int ch) { + return (getPropertiesEx(ch) & $$maskEmoji) != 0; + } + + boolean isEmojiPresentation(int ch) { + return (getPropertiesEx(ch) & $$maskEmojiPresentation) != 0; + } + + boolean isEmojiModifier(int ch) { + return (getPropertiesEx(ch) & $$maskEmojiModifier) != 0; + } + + boolean isEmojiModifierBase(int ch) { + return (getPropertiesEx(ch) & $$maskEmojiModifierBase) != 0; + } + + boolean isEmojiComponent(int ch) { + return (getPropertiesEx(ch) & $$maskEmojiComponent) != 0; + } + + boolean isExtendedPictographic(int ch) { + return (getPropertiesEx(ch) & $$maskExtendedPictographic) != 0; + } + int toLowerCase(int ch) { int mapChar = ch; int val = getProperties(ch); diff --git a/src/java.base/share/classes/java/lang/CharacterData01.java.template b/src/java.base/share/classes/java/lang/CharacterData01.java.template index f7897d9c254..0c25f633391 100644 --- a/src/java.base/share/classes/java/lang/CharacterData01.java.template +++ b/src/java.base/share/classes/java/lang/CharacterData01.java.template @@ -1,5 +1,5 @@ /* - * Copyright (c) 2003, 2021, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2003, 2023, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -117,6 +117,30 @@ class CharacterData01 extends CharacterData { return ((props & $$maskIdentifierInfo) == $$valueIgnorable); } + boolean isEmoji(int ch) { + return (getPropertiesEx(ch) & $$maskEmoji) != 0; + } + + boolean isEmojiPresentation(int ch) { + return (getPropertiesEx(ch) & $$maskEmojiPresentation) != 0; + } + + boolean isEmojiModifier(int ch) { + return (getPropertiesEx(ch) & $$maskEmojiModifier) != 0; + } + + boolean isEmojiModifierBase(int ch) { + return (getPropertiesEx(ch) & $$maskEmojiModifierBase) != 0; + } + + boolean isEmojiComponent(int ch) { + return (getPropertiesEx(ch) & $$maskEmojiComponent) != 0; + } + + boolean isExtendedPictographic(int ch) { + return (getPropertiesEx(ch) & $$maskExtendedPictographic) != 0; + } + int toLowerCase(int ch) { int mapChar = ch; int val = getProperties(ch); diff --git a/src/java.base/share/classes/java/lang/CharacterData02.java.template b/src/java.base/share/classes/java/lang/CharacterData02.java.template index 410f8ec9e99..ff5a1c30189 100644 --- a/src/java.base/share/classes/java/lang/CharacterData02.java.template +++ b/src/java.base/share/classes/java/lang/CharacterData02.java.template @@ -1,5 +1,5 @@ /* - * Copyright (c) 2003, 2021, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2003, 2023, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -116,6 +116,30 @@ class CharacterData02 extends CharacterData { return ((props & $$maskIdentifierInfo) == $$valueIgnorable); } + boolean isEmoji(int ch) { + return (getPropertiesEx(ch) & $$maskEmoji) != 0; + } + + boolean isEmojiPresentation(int ch) { + return (getPropertiesEx(ch) & $$maskEmojiPresentation) != 0; + } + + boolean isEmojiModifier(int ch) { + return (getPropertiesEx(ch) & $$maskEmojiModifier) != 0; + } + + boolean isEmojiModifierBase(int ch) { + return (getPropertiesEx(ch) & $$maskEmojiModifierBase) != 0; + } + + boolean isEmojiComponent(int ch) { + return (getPropertiesEx(ch) & $$maskEmojiComponent) != 0; + } + + boolean isExtendedPictographic(int ch) { + return (getPropertiesEx(ch) & $$maskExtendedPictographic) != 0; + } + int toLowerCase(int ch) { int mapChar = ch; int val = getProperties(ch); diff --git a/src/java.base/share/classes/java/lang/CharacterData03.java.template b/src/java.base/share/classes/java/lang/CharacterData03.java.template index d390a0cfd0e..27d1f4b9a69 100644 --- a/src/java.base/share/classes/java/lang/CharacterData03.java.template +++ b/src/java.base/share/classes/java/lang/CharacterData03.java.template @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020, 2021, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2020, 2023, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -116,6 +116,30 @@ class CharacterData03 extends CharacterData { return ((props & $$maskIdentifierInfo) == $$valueIgnorable); } + boolean isEmoji(int ch) { + return (getPropertiesEx(ch) & $$maskEmoji) != 0; + } + + boolean isEmojiPresentation(int ch) { + return (getPropertiesEx(ch) & $$maskEmojiPresentation) != 0; + } + + boolean isEmojiModifier(int ch) { + return (getPropertiesEx(ch) & $$maskEmojiModifier) != 0; + } + + boolean isEmojiModifierBase(int ch) { + return (getPropertiesEx(ch) & $$maskEmojiModifierBase) != 0; + } + + boolean isEmojiComponent(int ch) { + return (getPropertiesEx(ch) & $$maskEmojiComponent) != 0; + } + + boolean isExtendedPictographic(int ch) { + return (getPropertiesEx(ch) & $$maskExtendedPictographic) != 0; + } + int toLowerCase(int ch) { int mapChar = ch; int val = getProperties(ch); diff --git a/src/java.base/share/classes/java/lang/CharacterData0E.java.template b/src/java.base/share/classes/java/lang/CharacterData0E.java.template index f9ffcd58e16..f2ac14682ac 100644 --- a/src/java.base/share/classes/java/lang/CharacterData0E.java.template +++ b/src/java.base/share/classes/java/lang/CharacterData0E.java.template @@ -1,5 +1,5 @@ /* - * Copyright (c) 2003, 2021, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2003, 2023, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -116,6 +116,30 @@ class CharacterData0E extends CharacterData { return ((props & $$maskIdentifierInfo) == $$valueIgnorable); } + boolean isEmoji(int ch) { + return (getPropertiesEx(ch) & $$maskEmoji) != 0; + } + + boolean isEmojiPresentation(int ch) { + return (getPropertiesEx(ch) & $$maskEmojiPresentation) != 0; + } + + boolean isEmojiModifier(int ch) { + return (getPropertiesEx(ch) & $$maskEmojiModifier) != 0; + } + + boolean isEmojiModifierBase(int ch) { + return (getPropertiesEx(ch) & $$maskEmojiModifierBase) != 0; + } + + boolean isEmojiComponent(int ch) { + return (getPropertiesEx(ch) & $$maskEmojiComponent) != 0; + } + + boolean isExtendedPictographic(int ch) { + return (getPropertiesEx(ch) & $$maskExtendedPictographic) != 0; + } + int toLowerCase(int ch) { int mapChar = ch; int val = getProperties(ch); diff --git a/src/java.base/share/classes/java/lang/CharacterDataLatin1.java.template b/src/java.base/share/classes/java/lang/CharacterDataLatin1.java.template index f2d8fcf37f5..3f162e6cc16 100644 --- a/src/java.base/share/classes/java/lang/CharacterDataLatin1.java.template +++ b/src/java.base/share/classes/java/lang/CharacterDataLatin1.java.template @@ -134,6 +134,30 @@ class CharacterDataLatin1 extends CharacterData { return ((props & $$maskIdentifierInfo) == $$valueIgnorable); } + boolean isEmoji(int ch) { + return (getPropertiesEx(ch) & $$maskEmoji) != 0; + } + + boolean isEmojiPresentation(int ch) { + return (getPropertiesEx(ch) & $$maskEmojiPresentation) != 0; + } + + boolean isEmojiModifier(int ch) { + return (getPropertiesEx(ch) & $$maskEmojiModifier) != 0; + } + + boolean isEmojiModifierBase(int ch) { + return (getPropertiesEx(ch) & $$maskEmojiModifierBase) != 0; + } + + boolean isEmojiComponent(int ch) { + return (getPropertiesEx(ch) & $$maskEmojiComponent) != 0; + } + + boolean isExtendedPictographic(int ch) { + return (getPropertiesEx(ch) & $$maskExtendedPictographic) != 0; + } + int toLowerCase(int ch) { if (ch < 'A') { // Fast path for low code points return ch; diff --git a/src/java.base/share/classes/java/lang/CharacterDataPrivateUse.java b/src/java.base/share/classes/java/lang/CharacterDataPrivateUse.java index f139f938df1..9e6c446f86a 100644 --- a/src/java.base/share/classes/java/lang/CharacterDataPrivateUse.java +++ b/src/java.base/share/classes/java/lang/CharacterDataPrivateUse.java @@ -1,5 +1,5 @@ /* - * Copyright (c) 2003, 2018, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2003, 2023, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -60,6 +60,30 @@ class CharacterDataPrivateUse extends CharacterData { return false; } + boolean isEmoji(int ch) { + return false; + } + + boolean isEmojiPresentation(int ch) { + return false; + } + + boolean isEmojiModifier(int ch) { + return false; + } + + boolean isEmojiModifierBase(int ch) { + return false; + } + + boolean isEmojiComponent(int ch) { + return false; + } + + boolean isExtendedPictographic(int ch) { + return false; + } + int toLowerCase(int ch) { return ch; } diff --git a/src/java.base/share/classes/java/lang/CharacterDataUndefined.java b/src/java.base/share/classes/java/lang/CharacterDataUndefined.java index a6d240f11b4..58c74b658b1 100644 --- a/src/java.base/share/classes/java/lang/CharacterDataUndefined.java +++ b/src/java.base/share/classes/java/lang/CharacterDataUndefined.java @@ -1,5 +1,5 @@ /* - * Copyright (c) 2003, 2018, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2003, 2023, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -58,6 +58,30 @@ class CharacterDataUndefined extends CharacterData { return false; } + boolean isEmoji(int ch) { + return false; + } + + boolean isEmojiPresentation(int ch) { + return false; + } + + boolean isEmojiModifier(int ch) { + return false; + } + + boolean isEmojiModifierBase(int ch) { + return false; + } + + boolean isEmojiComponent(int ch) { + return false; + } + + boolean isExtendedPictographic(int ch) { + return false; + } + int toLowerCase(int ch) { return ch; } diff --git a/src/java.base/share/classes/jdk/internal/util/regex/EmojiData.java.template b/src/java.base/share/classes/jdk/internal/util/regex/EmojiData.java.template deleted file mode 100644 index db6a1d9733a..00000000000 --- a/src/java.base/share/classes/jdk/internal/util/regex/EmojiData.java.template +++ /dev/null @@ -1,55 +0,0 @@ -/* - * Copyright (c) 2019, 2022, Oracle and/or its affiliates. All rights reserved. - * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. - * - * This code is free software; you can redistribute it and/or modify it - * under the terms of the GNU General Public License version 2 only, as - * published by the Free Software Foundation. Oracle designates this - * particular file as subject to the "Classpath" exception as provided - * by Oracle in the LICENSE file that accompanied this code. - * - * This code is distributed in the hope that it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License - * version 2 for more details (a copy is included in the LICENSE file that - * accompanied this code). - * - * You should have received a copy of the GNU General Public License version - * 2 along with this work; if not, write to the Free Software Foundation, - * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. - * - * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA - * or visit www.oracle.com if you need additional information or have any - * questions. - */ - -package jdk.internal.util.regex; - -/** - * Holds data contained in the Unicode Technical Standard #51: Unicode - * Emoji. - * - * Currently it is only used for the rule "GB11" in UAX #29 Unicode Text - * Segmentation. - */ -final class EmojiData { - /** - * Returns whether the code point is an extended pictographic or not. - * - * @param cp code point to examine - * @return true if {@code cp} is an extended pictographic - */ - static boolean isExtendedPictographic(int cp) { - if (cp < 0x2000) { - return -%%%EXTPICT_LOW%%% - } else { - return isHigh(cp); - } - } - - private static boolean isHigh(int cp) { - return -%%%EXTPICT_HIGH%%% - } -} diff --git a/src/java.base/share/classes/jdk/internal/util/regex/Grapheme.java b/src/java.base/share/classes/jdk/internal/util/regex/Grapheme.java index 8fc9c1eaac5..68a28537cb3 100644 --- a/src/java.base/share/classes/jdk/internal/util/regex/Grapheme.java +++ b/src/java.base/share/classes/jdk/internal/util/regex/Grapheme.java @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016, 2022, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2016, 2023, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -176,7 +176,7 @@ public final class Grapheme { return OTHER; } - if (EmojiData.isExtendedPictographic(cp)) { + if (Character.isExtendedPictographic(cp)) { return EXTENDED_PICTOGRAPHIC; } diff --git a/test/jdk/java/lang/Character/TestEmojiProperties.java b/test/jdk/java/lang/Character/TestEmojiProperties.java new file mode 100644 index 00000000000..ce27e252afd --- /dev/null +++ b/test/jdk/java/lang/Character/TestEmojiProperties.java @@ -0,0 +1,138 @@ +/* + * Copyright (c) 2023, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + */ + + +/** + * @test + * @bug 8303018 + * @summary Check j.l.Character.isEmoji/isEmojiPresentation/isEmojiModifier + * isEmojiModifierBase/isEmojiComponent/isExtendedPictographic + * @library /lib/testlibrary/java/lang + */ + +import java.io.IOException; +import java.nio.file.Files; +import java.util.AbstractMap; +import java.util.function.Predicate; +import java.util.stream.Collectors; +import java.util.stream.IntStream; +import java.util.stream.Stream; + +import static java.lang.Character.MAX_CODE_POINT; +import static java.lang.Character.MIN_CODE_POINT; +import static java.lang.Character.isEmoji; +import static java.lang.Character.isEmojiPresentation; +import static java.lang.Character.isEmojiModifier; +import static java.lang.Character.isEmojiModifierBase; +import static java.lang.Character.isEmojiComponent; +import static java.lang.Character.isExtendedPictographic; + +public class TestEmojiProperties { + // Masks representing Emoji properties (16-bit `B` table masks in + // CharacterData.java) + private static final int EMOJI = 0x0040; + private static final int EMOJI_PRESENTATION = 0x0080; + private static final int EMOJI_MODIFIER = 0x0100; + private static final int EMOJI_MODIFIER_BASE = 0x0200; + private static final int EMOJI_COMPONENT = 0x0400; + private static final int EXTENDED_PICTOGRAPHIC = 0x0800; + + public static void main(String[] args) throws IOException { + var emojiProps = Files.readAllLines(UCDFiles.EMOJI_DATA).stream() + .map(line -> line.split("#", 2)[0]) + .filter(Predicate.not(String::isBlank)) + .map(line -> line.split("[ \t]*;[ \t]*", 2)) + .flatMap(map -> { + var range = map[0].split("\\.\\.", 2); + var start = Integer.valueOf(range[0], 16); + return range.length == 1 ? + Stream.of(new AbstractMap.SimpleEntry<>(start, convertType(map[1].trim()))) : + IntStream.rangeClosed(start, + Integer.valueOf(range[1], 16)) + .mapToObj(cp -> new AbstractMap.SimpleEntry<>(cp, convertType(map[1].trim()))); + }) + .collect(Collectors.toMap(AbstractMap.SimpleEntry::getKey, AbstractMap.SimpleEntry::getValue, (v1, v2) -> v1 | v2)); + + final var fails = new Integer[1]; + fails[0] = 0; + IntStream.rangeClosed(MIN_CODE_POINT, MAX_CODE_POINT).forEach(cp -> { + var props = emojiProps.getOrDefault(cp, 0L); + if ((props & EMOJI) != 0 ^ isEmoji(cp)) { + System.err.printf(""" + isEmoji(0x%x) failed. Returned: %b + """, cp, isEmoji(cp)); + fails[0] ++; + } + + if ((props & EMOJI_PRESENTATION) != 0 ^ isEmojiPresentation(cp)) { + System.err.printf(""" + isEmojiPresentation(0x%x) failed. Returned: %b + """, cp, isEmojiPresentation(cp)); + fails[0] ++; + } + + if ((props & EMOJI_MODIFIER) != 0 ^ isEmojiModifier(cp)) { + System.err.printf(""" + isEmojiModifier(0x%x) failed. Returned: %b + """, cp, isEmojiModifier(cp)); + fails[0] ++; + } + + if ((props & EMOJI_MODIFIER_BASE) != 0 ^ isEmojiModifierBase(cp)) { + System.err.printf(""" + isEmojiModifierBase(0x%x) failed. Returned: %b + """, cp, isEmojiModifierBase(cp)); + fails[0] ++; + } + + if ((props & EMOJI_COMPONENT) != 0 ^ isEmojiComponent(cp)) { + System.err.printf(""" + isEmojiComponent(0x%x) failed. Returned: %b + """, cp, isEmojiComponent(cp)); + fails[0] ++; + } + + if ((props & EXTENDED_PICTOGRAPHIC) != 0 ^ isExtendedPictographic(cp)) { + System.err.printf(""" + isExtendedPictographic(0x%x) failed. Returned: %b + """, cp, isExtendedPictographic(cp)); + fails[0] ++; + } + }); + if (fails[0] != 0) { + throw new RuntimeException("TestEmojiProperties failed=" + fails); + } + } + + private static long convertType(String type) { + return switch (type) { + case "Emoji" -> EMOJI; + case "Emoji_Presentation" -> EMOJI_PRESENTATION; + case "Emoji_Modifier" -> EMOJI_MODIFIER; + case "Emoji_Modifier_Base" -> EMOJI_MODIFIER_BASE; + case "Emoji_Component" -> EMOJI_COMPONENT; + case "Extended_Pictographic" -> EXTENDED_PICTOGRAPHIC; + default -> throw new InternalError(); + }; + } +}