8303018: Unicode Emoji Properties
Reviewed-by: prr, erikj, rriggs
This commit is contained in:
parent
bc0ed730f2
commit
f593a6b52e
@ -0,0 +1,93 @@
|
||||
/*
|
||||
* Copyright (c) 2019, 2023, Oracle and/or its affiliates. All rights reserved.
|
||||
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
||||
*
|
||||
* This code is free software; you can redistribute it and/or modify it
|
||||
* under the terms of the GNU General Public License version 2 only, as
|
||||
* published by the Free Software Foundation. Oracle designates this
|
||||
* particular file as subject to the "Classpath" exception as provided
|
||||
* by Oracle in the LICENSE file that accompanied this code.
|
||||
*
|
||||
* This code is distributed in the hope that it will be useful, but WITHOUT
|
||||
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
||||
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
|
||||
* version 2 for more details (a copy is included in the LICENSE file that
|
||||
* accompanied this code).
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License version
|
||||
* 2 along with this work; if not, write to the Free Software Foundation,
|
||||
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
*
|
||||
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
|
||||
* or visit www.oracle.com if you need additional information or have any
|
||||
* questions.
|
||||
*/
|
||||
|
||||
package build.tools.generatecharacter;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.nio.file.Files;
|
||||
import java.nio.file.Path;
|
||||
import java.nio.file.StandardOpenOption;
|
||||
import java.util.AbstractMap;
|
||||
import java.util.Map;
|
||||
import java.util.Set;
|
||||
import java.util.function.Predicate;
|
||||
import java.util.stream.Collectors;
|
||||
import java.util.stream.IntStream;
|
||||
import java.util.stream.Stream;
|
||||
|
||||
/**
|
||||
* A class holding emoji character properties
|
||||
* https://unicode.org/reports/tr51/#Emoji_Properties_and_Data_Files
|
||||
*/
|
||||
class EmojiData {
|
||||
// Emoji properties map
|
||||
private final Map<Integer, Long> emojiProps;
|
||||
|
||||
static EmojiData readSpecFile(Path file, int plane) throws IOException {
|
||||
return new EmojiData(file, plane);
|
||||
}
|
||||
|
||||
EmojiData(Path file, int plane) throws IOException {
|
||||
emojiProps = Files.readAllLines(file).stream()
|
||||
.map(line -> line.split("#", 2)[0])
|
||||
.filter(Predicate.not(String::isBlank))
|
||||
.map(line -> line.split("[ \t]*;[ \t]*", 2))
|
||||
.flatMap(map -> {
|
||||
var range = map[0].split("\\.\\.", 2);
|
||||
var start = Integer.valueOf(range[0], 16);
|
||||
if ((start >> 16) != plane) {
|
||||
return Stream.empty();
|
||||
} else {
|
||||
return range.length == 1 ?
|
||||
Stream.of(new AbstractMap.SimpleEntry<>(start, convertType(map[1].trim()))) :
|
||||
IntStream.rangeClosed(start, Integer.valueOf(range[1], 16))
|
||||
.mapToObj(cp -> new AbstractMap.SimpleEntry<>(cp, convertType(map[1].trim())));
|
||||
}
|
||||
})
|
||||
.collect(Collectors.toMap(AbstractMap.SimpleEntry::getKey,
|
||||
AbstractMap.SimpleEntry::getValue,
|
||||
(v1, v2) -> v1 | v2));
|
||||
}
|
||||
|
||||
long properties(int cp) {
|
||||
return emojiProps.get(cp);
|
||||
}
|
||||
|
||||
Set<Integer> codepoints() {
|
||||
return emojiProps.keySet();
|
||||
}
|
||||
|
||||
private static long convertType(String type) {
|
||||
return switch (type) {
|
||||
case "Emoji" -> GenerateCharacter.maskEmoji;
|
||||
case "Emoji_Presentation" -> GenerateCharacter.maskEmojiPresentation;
|
||||
case "Emoji_Modifier" -> GenerateCharacter.maskEmojiModifier;
|
||||
case "Emoji_Modifier_Base" -> GenerateCharacter.maskEmojiModifierBase;
|
||||
case "Emoji_Component" -> GenerateCharacter.maskEmojiComponent;
|
||||
case "Extended_Pictographic" -> GenerateCharacter.maskExtendedPictographic;
|
||||
default -> throw new InternalError("Unrecognizable Emoji type: " + type);
|
||||
};
|
||||
}
|
||||
}
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) 2002, 2021, Oracle and/or its affiliates. All rights reserved.
|
||||
* Copyright (c) 2002, 2023, Oracle and/or its affiliates. All rights reserved.
|
||||
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
||||
*
|
||||
* This code is free software; you can redistribute it and/or modify it
|
||||
@ -33,6 +33,7 @@ import java.io.PrintWriter;
|
||||
import java.io.BufferedWriter;
|
||||
import java.io.FileWriter;
|
||||
import java.io.File;
|
||||
import java.nio.file.Paths;
|
||||
import java.util.List;
|
||||
|
||||
import build.tools.generatecharacter.CharacterName;
|
||||
@ -74,6 +75,7 @@ public class GenerateCharacter {
|
||||
static String DefaultSpecialCasingFileName = ROOT + "SpecialCasing.txt";
|
||||
static String DefaultPropListFileName = ROOT + "PropList.txt";
|
||||
static String DefaultDerivedPropsFileName = ROOT + "DerivedCoreProperties.txt";
|
||||
static String DefaultEmojiDataFileName = ROOT + "emoji-data.txt";
|
||||
static String DefaultJavaTemplateFileName = ROOT + "Character.java.template";
|
||||
static String DefaultJavaOutputFileName = ROOT + "Character.java";
|
||||
static String DefaultCTemplateFileName = ROOT + "Character.c.template";
|
||||
@ -105,7 +107,7 @@ public class GenerateCharacter {
|
||||
entries are short rather than byte).
|
||||
*/
|
||||
|
||||
/* The character properties are currently encoded into A (32 bits) and B (8 bits)
|
||||
/* The character properties are currently encoded into A (32 bits) and B (16 bits)
|
||||
two parts.
|
||||
|
||||
A: the low 32 bits are defined in the following manner:
|
||||
@ -160,6 +162,13 @@ public class GenerateCharacter {
|
||||
1 bit Ideographic property
|
||||
1 bit ID_Start property
|
||||
1 bit ID_Continue property
|
||||
6 bits for Emoji properties :-
|
||||
1 bit for Emoji
|
||||
1 bit for Emoji_Presentation
|
||||
1 bit for Emoji_Modifier
|
||||
1 bit for Emoji_Modifier_Base
|
||||
1 bit for Emoji_Component
|
||||
1 bit for Extended_Pictographic
|
||||
*/
|
||||
|
||||
|
||||
@ -188,15 +197,21 @@ public class GenerateCharacter {
|
||||
// maskMirrored needs to be long, if up 16-bit
|
||||
private static final long maskMirrored = 0x80000000L;
|
||||
|
||||
// bit masks identify the 8-bit property field described above, in B
|
||||
// bit masks identify the 16-bit property field described above, in B
|
||||
// table
|
||||
private static final long
|
||||
maskOtherLowercase = 0x0100000000L,
|
||||
maskOtherUppercase = 0x0200000000L,
|
||||
maskOtherAlphabetic = 0x0400000000L,
|
||||
maskIdeographic = 0x0800000000L,
|
||||
maskIDStart = 0x1000000000L,
|
||||
maskIDContinue = 0x2000000000L;
|
||||
static final long
|
||||
maskOtherLowercase = 1L << 32,
|
||||
maskOtherUppercase = 1L << 33,
|
||||
maskOtherAlphabetic = 1L << 34,
|
||||
maskIdeographic = 1L << 35,
|
||||
maskIDStart = 1L << 36,
|
||||
maskIDContinue = 1L << 37,
|
||||
maskEmoji = 1L << 38,
|
||||
maskEmojiPresentation = 1L << 39,
|
||||
maskEmojiModifier = 1L << 40,
|
||||
maskEmojiModifierBase = 1L << 41,
|
||||
maskEmojiComponent = 1L << 42,
|
||||
maskExtendedPictographic = 1L << 43;
|
||||
|
||||
// Can compare masked values with these to determine
|
||||
// numeric or lexical types.
|
||||
@ -304,7 +319,7 @@ public class GenerateCharacter {
|
||||
* @see GenerateCharacter#buildOne
|
||||
*/
|
||||
|
||||
static long[] buildMap(UnicodeSpec[] data, SpecialCaseMap[] specialMaps, PropList propList)
|
||||
static long[] buildMap(UnicodeSpec[] data, SpecialCaseMap[] specialMaps, PropList propList, EmojiData emojiData)
|
||||
{
|
||||
long[] result = new long[bLatin1 ? 256 : 1 << 16];
|
||||
int k = 0;
|
||||
@ -361,6 +376,9 @@ public class GenerateCharacter {
|
||||
addExProp(result, propList, "ID_Start", maskIDStart);
|
||||
addExProp(result, propList, "ID_Continue", maskIDContinue);
|
||||
|
||||
// add Emoji properties to the upper 16-bit
|
||||
addEmojiProps(result, emojiData);
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
@ -583,6 +601,14 @@ public class GenerateCharacter {
|
||||
}
|
||||
}
|
||||
|
||||
static void addEmojiProps(long[] map, EmojiData emojiData) {
|
||||
for (int cp : emojiData.codepoints()) {
|
||||
var index = cp & 0xFFFF;
|
||||
if (index < map.length)
|
||||
map[index] |= emojiData.properties(cp);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* This is the heart of the table compression strategy. The inputs are a map
|
||||
* and a number of bits (size). The map is simply an array of long integer values;
|
||||
@ -776,6 +802,12 @@ OUTER: for (int i = 0; i < n; i += m) {
|
||||
if (x.equals("maskIdeographic")) return "0x" + hex4(maskIdeographic >> 32);
|
||||
if (x.equals("maskIDStart")) return "0x" + hex4(maskIDStart >> 32);
|
||||
if (x.equals("maskIDContinue")) return "0x" + hex4(maskIDContinue >> 32);
|
||||
if (x.equals("maskEmoji")) return "0x" + hex4(maskEmoji >> 32);
|
||||
if (x.equals("maskEmojiPresentation")) return "0x" + hex4(maskEmojiPresentation >> 32);
|
||||
if (x.equals("maskEmojiModifier")) return "0x" + hex4(maskEmojiModifier >> 32);
|
||||
if (x.equals("maskEmojiModifierBase")) return "0x" + hex4(maskEmojiModifierBase >> 32);
|
||||
if (x.equals("maskEmojiComponent")) return "0x" + hex4(maskEmojiComponent >> 32);
|
||||
if (x.equals("maskExtendedPictographic")) return "0x" + hex4(maskExtendedPictographic >> 32);
|
||||
if (x.equals("valueIgnorable")) return "0x" + hex8(valueIgnorable);
|
||||
if (x.equals("valueJavaUnicodeStart")) return "0x" + hex8(valueJavaUnicodeStart);
|
||||
if (x.equals("valueJavaOnlyStart")) return "0x" + hex8(valueJavaOnlyStart);
|
||||
@ -952,7 +984,7 @@ OUTER: for (int i = 0; i < n; i += m) {
|
||||
|
||||
// If we ever need more than 32 bits to represent the character properties,
|
||||
// then a table "B" may be needed as well.
|
||||
genTable(result, "B", tables[n - 1], 32, 8, sizes[n - 1], false, 0, true, true, false);
|
||||
genTable(result, "B", tables[n - 1], 32, 16, sizes[n - 1], false, 0, true, true, false);
|
||||
|
||||
totalBytes += ((((tables[n - 1].length * (identifiers ? 2 : 32)) + 31) >> 5) << 2);
|
||||
result.append(commentStart);
|
||||
@ -1434,6 +1466,42 @@ OUTER: for (int i = 0; i < n; i += m) {
|
||||
result.append(", supradecimal ");
|
||||
result.append((val & maskDigitOffset) >> shiftDigitOffset);
|
||||
}
|
||||
if ((val & maskOtherLowercase) == maskOtherLowercase) {
|
||||
result.append(", otherLowercase");
|
||||
}
|
||||
if ((val & maskOtherUppercase) == maskOtherUppercase) {
|
||||
result.append(", otherUppercase");
|
||||
}
|
||||
if ((val & maskOtherAlphabetic) == maskOtherAlphabetic) {
|
||||
result.append(", otherAlphabetic");
|
||||
}
|
||||
if ((val & maskIdeographic) == maskIdeographic) {
|
||||
result.append(", ideographic");
|
||||
}
|
||||
if ((val & maskIDStart) == maskIDStart) {
|
||||
result.append(", IDStart");
|
||||
}
|
||||
if ((val & maskIDContinue) == maskIDContinue) {
|
||||
result.append(", IDContinue");
|
||||
}
|
||||
if ((val & maskEmoji) == maskEmoji) {
|
||||
result.append(", emoji");
|
||||
}
|
||||
if ((val & maskEmojiPresentation) == maskEmojiPresentation) {
|
||||
result.append(", emojiPresentation");
|
||||
}
|
||||
if ((val & maskEmojiModifier) == maskEmojiModifier) {
|
||||
result.append(", emojiModifier");
|
||||
}
|
||||
if ((val & maskEmojiModifierBase) == maskEmojiModifierBase) {
|
||||
result.append(", emojiModifierBase");
|
||||
}
|
||||
if ((val & maskEmojiComponent) == maskEmojiComponent) {
|
||||
result.append(", emojiComponent");
|
||||
}
|
||||
if ((val & maskExtendedPictographic) == maskExtendedPictographic) {
|
||||
result.append(", extendedPictographic");
|
||||
}
|
||||
}
|
||||
|
||||
static String[] tableNames = { "X", "Y", "Z", "P", "Q", "R", "S", "T", "U", "V", "W" };
|
||||
@ -1512,6 +1580,7 @@ OUTER: for (int i = 0; i < n; i += m) {
|
||||
static String SpecialCasingFileName = null;
|
||||
static String PropListFileName = null;
|
||||
static String DerivedPropsFileName = null;
|
||||
static String EmojiDataFileName = null;
|
||||
static boolean useCharForByte = false;
|
||||
static int[] sizes;
|
||||
static int bins = 0; // liu; if > 0, then perform search
|
||||
@ -1649,6 +1718,14 @@ OUTER: for (int i = 0; i < n; i += m) {
|
||||
DerivedPropsFileName = args[++j];
|
||||
}
|
||||
}
|
||||
else if (args[j].equals("-emojidata")) {
|
||||
if (j == args.length -1) {
|
||||
FAIL("File name missing after -emojidata");
|
||||
}
|
||||
else {
|
||||
EmojiDataFileName = args[++j];
|
||||
}
|
||||
}
|
||||
else if (args[j].equals("-plane")) {
|
||||
if (j == args.length -1) {
|
||||
FAIL("Plane number missing after -plane");
|
||||
@ -1717,6 +1794,10 @@ OUTER: for (int i = 0; i < n; i += m) {
|
||||
DerivedPropsFileName = DefaultDerivedPropsFileName;
|
||||
desc.append(" [-derivedprops " + DerivedPropsFileName + ']');
|
||||
}
|
||||
if (EmojiDataFileName == null) {
|
||||
EmojiDataFileName = DefaultEmojiDataFileName;
|
||||
desc.append(" [-emojidata " + EmojiDataFileName + ']');
|
||||
}
|
||||
if (TemplateFileName == null) {
|
||||
TemplateFileName = (Csyntax ? DefaultCTemplateFileName
|
||||
: DefaultJavaTemplateFileName);
|
||||
@ -1871,11 +1952,12 @@ OUTER: for (int i = 0; i < n; i += m) {
|
||||
specialCaseMaps = SpecialCaseMap.readSpecFile(new File(SpecialCasingFileName), plane);
|
||||
PropList propList = PropList.readSpecFile(new File(PropListFileName), plane);
|
||||
propList.putAll(PropList.readSpecFile(new File(DerivedPropsFileName), plane));
|
||||
EmojiData emojiData = EmojiData.readSpecFile(Paths.get(EmojiDataFileName), plane);
|
||||
|
||||
if (verbose) {
|
||||
System.out.println(data.length + " items read from Unicode spec file " + UnicodeSpecFileName); // liu
|
||||
}
|
||||
long[] map = buildMap(data, specialCaseMaps, propList);
|
||||
long[] map = buildMap(data, specialCaseMaps, propList, emojiData);
|
||||
if (verbose) {
|
||||
System.err.println("Completed building of initial map");
|
||||
}
|
||||
|
@ -1,155 +0,0 @@
|
||||
/*
|
||||
* Copyright (c) 2019, 2020, Oracle and/or its affiliates. All rights reserved.
|
||||
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
||||
*
|
||||
* This code is free software; you can redistribute it and/or modify it
|
||||
* under the terms of the GNU General Public License version 2 only, as
|
||||
* published by the Free Software Foundation. Oracle designates this
|
||||
* particular file as subject to the "Classpath" exception as provided
|
||||
* by Oracle in the LICENSE file that accompanied this code.
|
||||
*
|
||||
* This code is distributed in the hope that it will be useful, but WITHOUT
|
||||
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
||||
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
|
||||
* version 2 for more details (a copy is included in the LICENSE file that
|
||||
* accompanied this code).
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License version
|
||||
* 2 along with this work; if not, write to the Free Software Foundation,
|
||||
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
*
|
||||
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
|
||||
* or visit www.oracle.com if you need additional information or have any
|
||||
* questions.
|
||||
*/
|
||||
|
||||
package build.tools.generateemojidata;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.nio.file.Files;
|
||||
import java.nio.file.Paths;
|
||||
import java.nio.file.StandardOpenOption;
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
import java.util.function.Predicate;
|
||||
import java.util.stream.Collectors;
|
||||
import java.util.stream.Stream;
|
||||
|
||||
/**
|
||||
* Generate EmojiData.java
|
||||
* args[0]: Full path string to the template file
|
||||
* args[1]: Full path string to the directory that contains "emoji-data.txt"
|
||||
* args[2]: Full path string to the generated .java file
|
||||
*/
|
||||
public class GenerateEmojiData {
|
||||
public static void main(String[] args) {
|
||||
try {
|
||||
final Range[] last = new Range[1]; // last extended pictographic range
|
||||
last[0] = new Range(0, 0);
|
||||
|
||||
List<Range> extPictRanges = Files.lines(Paths.get(args[1], "emoji", "emoji-data.txt"))
|
||||
.filter(Predicate.not(l -> l.startsWith("#") || l.isBlank()))
|
||||
.filter(l -> l.contains("; Extended_Pictograph"))
|
||||
.map(l -> new Range(l.replaceFirst(" .*", "")))
|
||||
.sorted()
|
||||
.collect(ArrayList<Range>::new,
|
||||
(list, r) -> {
|
||||
// collapsing consecutive pictographic ranges
|
||||
int lastIndex = list.size() - 1;
|
||||
if (lastIndex >= 0) {
|
||||
Range lastRange = list.get(lastIndex);
|
||||
if (lastRange.last + 1 == r.start) {
|
||||
list.set(lastIndex, new Range(lastRange.start, r.last));
|
||||
return;
|
||||
}
|
||||
}
|
||||
list.add(r);
|
||||
},
|
||||
ArrayList<Range>::addAll);
|
||||
|
||||
|
||||
// make the code point conditions
|
||||
// only very few codepoints below 0x2000 are "emojis", so separate them
|
||||
// out to generate a fast-path check that can be efficiently inlined
|
||||
String lowExtPictCodePoints = extPictRanges.stream()
|
||||
.takeWhile(r -> r.last < 0x2000)
|
||||
.map(r -> rangeToString(r))
|
||||
.collect(Collectors.joining(" ||\n", "", ";\n"));
|
||||
|
||||
String highExtPictCodePoints = extPictRanges.stream()
|
||||
.dropWhile(r -> r.last < 0x2000)
|
||||
.map(r -> rangeToString(r))
|
||||
.collect(Collectors.joining(" ||\n", "", ";\n"));
|
||||
|
||||
// Generate EmojiData.java file
|
||||
Files.write(Paths.get(args[2]),
|
||||
Files.lines(Paths.get(args[0]))
|
||||
.flatMap(l -> {
|
||||
if (l.equals("%%%EXTPICT_LOW%%%")) {
|
||||
return Stream.of(lowExtPictCodePoints);
|
||||
} else if (l.equals("%%%EXTPICT_HIGH%%%")) {
|
||||
return Stream.of(highExtPictCodePoints);
|
||||
} else {
|
||||
return Stream.of(l);
|
||||
}
|
||||
})
|
||||
.collect(Collectors.toList()),
|
||||
StandardOpenOption.CREATE, StandardOpenOption.TRUNCATE_EXISTING);
|
||||
} catch (IOException e) {
|
||||
e.printStackTrace();
|
||||
}
|
||||
}
|
||||
|
||||
static String rangeToString(Range r) {
|
||||
if (r.start == r.last) {
|
||||
return (" ".repeat(16) + "cp == 0x" + toHexString(r.start));
|
||||
} else if (r.start == r.last - 1) {
|
||||
return " ".repeat(16) + "cp == 0x" + toHexString(r.start) + " ||\n" +
|
||||
" ".repeat(16) + "cp == 0x" + toHexString(r.last);
|
||||
} else {
|
||||
return " ".repeat(15) + "(cp >= 0x" + toHexString(r.start) +
|
||||
" && cp <= 0x" + toHexString(r.last) + ")";
|
||||
}
|
||||
}
|
||||
|
||||
static int toInt(String hexStr) {
|
||||
return Integer.parseUnsignedInt(hexStr, 16);
|
||||
}
|
||||
|
||||
static String toHexString(int cp) {
|
||||
String ret = Integer.toUnsignedString(cp, 16).toUpperCase();
|
||||
if (ret.length() < 4) {
|
||||
ret = "0".repeat(4 - ret.length()) + ret;
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
|
||||
static class Range implements Comparable<Range> {
|
||||
int start;
|
||||
int last;
|
||||
|
||||
Range (int start, int last) {
|
||||
this.start = start;
|
||||
this.last = last;
|
||||
}
|
||||
|
||||
Range (String input) {
|
||||
input = input.replaceFirst("\\s#.*", "");
|
||||
start = toInt(input.replaceFirst("[\\s\\.].*", ""));
|
||||
last = input.contains("..") ?
|
||||
toInt(input.replaceFirst(".*\\.\\.", "")
|
||||
.replaceFirst(";.*", "").trim())
|
||||
: start;
|
||||
}
|
||||
|
||||
@Override
|
||||
public String toString() {
|
||||
return "Start: " + toHexString(start) + ", Last: " + toHexString(last);
|
||||
}
|
||||
|
||||
@Override
|
||||
public int compareTo(Range other) {
|
||||
return Integer.compare(start, other.start);
|
||||
}
|
||||
}
|
||||
}
|
@ -1,5 +1,5 @@
|
||||
#
|
||||
# Copyright (c) 2011, 2022, Oracle and/or its affiliates. All rights reserved.
|
||||
# Copyright (c) 2011, 2023, Oracle and/or its affiliates. All rights reserved.
|
||||
# DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
||||
#
|
||||
# This code is free software; you can redistribute it and/or modify it
|
||||
@ -34,7 +34,6 @@ include gensrc/GensrcBuffer.gmk
|
||||
include gensrc/GensrcExceptions.gmk
|
||||
include gensrc/GensrcVarHandles.gmk
|
||||
include gensrc/GensrcModuleLoaderMap.gmk
|
||||
include gensrc/GensrcEmojiData.gmk
|
||||
include gensrc/GensrcScopedMemoryAccess.gmk
|
||||
|
||||
# GensrcLocaleData.gmk does not set TARGETS, so we must choose which targets
|
||||
|
@ -49,6 +49,7 @@ define SetupCharacterData
|
||||
-specialcasing $(UNICODEDATA)/SpecialCasing.txt \
|
||||
-proplist $(UNICODEDATA)/PropList.txt \
|
||||
-derivedprops $(UNICODEDATA)/DerivedCoreProperties.txt \
|
||||
-emojidata $(UNICODEDATA)/emoji/emoji-data.txt \
|
||||
-o $(SUPPORT_OUTPUTDIR)/gensrc/java.base/java/lang/$1.java \
|
||||
-usecharforbyte $3
|
||||
|
||||
|
@ -1,43 +0,0 @@
|
||||
#
|
||||
# Copyright (c) 2019, 2022, Oracle and/or its affiliates. All rights reserved.
|
||||
# DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
||||
#
|
||||
# This code is free software; you can redistribute it and/or modify it
|
||||
# under the terms of the GNU General Public License version 2 only, as
|
||||
# published by the Free Software Foundation. Oracle designates this
|
||||
# particular file as subject to the "Classpath" exception as provided
|
||||
# by Oracle in the LICENSE file that accompanied this code.
|
||||
#
|
||||
# This code is distributed in the hope that it will be useful, but WITHOUT
|
||||
# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
||||
# FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
|
||||
# version 2 for more details (a copy is included in the LICENSE file that
|
||||
# accompanied this code).
|
||||
#
|
||||
# You should have received a copy of the GNU General Public License version
|
||||
# 2 along with this work; if not, write to the Free Software Foundation,
|
||||
# Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
#
|
||||
# Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
|
||||
# or visit www.oracle.com if you need additional information or have any
|
||||
# questions.
|
||||
#
|
||||
|
||||
#
|
||||
# Rules to create $(SUPPORT_OUTPUTDIR)/gensrc/java.base/jdk/internal/util/regex/EmojiData.java
|
||||
#
|
||||
|
||||
GENSRC_EMOJIDATA := $(SUPPORT_OUTPUTDIR)/gensrc/java.base/jdk/internal/util/regex/EmojiData.java
|
||||
|
||||
EMOJIDATATEMP = $(MODULE_SRC)/share/classes/jdk/internal/util/regex/EmojiData.java.template
|
||||
UNICODEDATA = $(MODULE_SRC)/share/data/unicodedata
|
||||
|
||||
$(GENSRC_EMOJIDATA): $(BUILD_TOOLS_JDK) $(EMOJIDATATEMP) $(UNICODEDATA)/emoji/emoji-data.txt
|
||||
$(call LogInfo, Generating $@)
|
||||
$(call MakeTargetDir)
|
||||
$(TOOL_GENERATEEMOJIDATA) \
|
||||
$(EMOJIDATATEMP) \
|
||||
$(UNICODEDATA) \
|
||||
$(GENSRC_EMOJIDATA)
|
||||
|
||||
TARGETS += $(GENSRC_EMOJIDATA)
|
@ -10781,6 +10781,113 @@ class Character implements java.io.Serializable, Comparable<Character>, Constabl
|
||||
return CharacterData.of(codePoint).isIdentifierIgnorable(codePoint);
|
||||
}
|
||||
|
||||
/**
|
||||
* Determines if the specified character (Unicode code point) is an Emoji.
|
||||
* <p>
|
||||
* A character is considered to be an Emoji if and only if it has the {@code Emoji}
|
||||
* property, defined in
|
||||
* <a href="https://unicode.org/reports/tr51/#Emoji_Properties_and_Data_Files">
|
||||
* Unicode Emoji (Technical Standard #51)</a>.
|
||||
*
|
||||
* @param codePoint the character (Unicode code point) to be tested.
|
||||
* @return {@code true} if the character is an Emoji;
|
||||
* {@code false} otherwise.
|
||||
* @since 21
|
||||
*/
|
||||
public static boolean isEmoji(int codePoint) {
|
||||
return CharacterData.of(codePoint).isEmoji(codePoint);
|
||||
}
|
||||
|
||||
/**
|
||||
* Determines if the specified character (Unicode code point) has the
|
||||
* Emoji Presentation property by default.
|
||||
* <p>
|
||||
* A character is considered to have the Emoji Presentation property if and
|
||||
* only if it has the {@code Emoji_Presentation} property, defined in
|
||||
* <a href="https://unicode.org/reports/tr51/#Emoji_Properties_and_Data_Files">
|
||||
* Unicode Emoji (Technical Standard #51)</a>.
|
||||
*
|
||||
* @param codePoint the character (Unicode code point) to be tested.
|
||||
* @return {@code true} if the character has the Emoji Presentation
|
||||
* property; {@code false} otherwise.
|
||||
* @since 21
|
||||
*/
|
||||
public static boolean isEmojiPresentation(int codePoint) {
|
||||
return CharacterData.of(codePoint).isEmojiPresentation(codePoint);
|
||||
}
|
||||
|
||||
/**
|
||||
* Determines if the specified character (Unicode code point) is an
|
||||
* Emoji Modifier.
|
||||
* <p>
|
||||
* A character is considered to be an Emoji Modifier if and only if it has
|
||||
* the {@code Emoji_Modifier} property, defined in
|
||||
* <a href="https://unicode.org/reports/tr51/#Emoji_Properties_and_Data_Files">
|
||||
* Unicode Emoji (Technical Standard #51)</a>.
|
||||
*
|
||||
* @param codePoint the character (Unicode code point) to be tested.
|
||||
* @return {@code true} if the character is an Emoji Modifier;
|
||||
* {@code false} otherwise.
|
||||
* @since 21
|
||||
*/
|
||||
public static boolean isEmojiModifier(int codePoint) {
|
||||
return CharacterData.of(codePoint).isEmojiModifier(codePoint);
|
||||
}
|
||||
|
||||
/**
|
||||
* Determines if the specified character (Unicode code point) is an
|
||||
* Emoji Modifier Base.
|
||||
* <p>
|
||||
* A character is considered to be an Emoji Modifier Base if and only if it has
|
||||
* the {@code Emoji_Modifier_Base} property, defined in
|
||||
* <a href="https://unicode.org/reports/tr51/#Emoji_Properties_and_Data_Files">
|
||||
* Unicode Emoji (Technical Standard #51)</a>.
|
||||
*
|
||||
* @param codePoint the character (Unicode code point) to be tested.
|
||||
* @return {@code true} if the character is an Emoji Modifier Base;
|
||||
* {@code false} otherwise.
|
||||
* @since 21
|
||||
*/
|
||||
public static boolean isEmojiModifierBase(int codePoint) {
|
||||
return CharacterData.of(codePoint).isEmojiModifierBase(codePoint);
|
||||
}
|
||||
|
||||
/**
|
||||
* Determines if the specified character (Unicode code point) is an
|
||||
* Emoji Component.
|
||||
* <p>
|
||||
* A character is considered to be an Emoji Component if and only if it has
|
||||
* the {@code Emoji_Component} property, defined in
|
||||
* <a href="https://unicode.org/reports/tr51/#Emoji_Properties_and_Data_Files">
|
||||
* Unicode Emoji (Technical Standard #51)</a>.
|
||||
*
|
||||
* @param codePoint the character (Unicode code point) to be tested.
|
||||
* @return {@code true} if the character is an Emoji Component;
|
||||
* {@code false} otherwise.
|
||||
* @since 21
|
||||
*/
|
||||
public static boolean isEmojiComponent(int codePoint) {
|
||||
return CharacterData.of(codePoint).isEmojiComponent(codePoint);
|
||||
}
|
||||
|
||||
/**
|
||||
* Determines if the specified character (Unicode code point) is
|
||||
* an Extended Pictographic.
|
||||
* <p>
|
||||
* A character is considered to be an Extended Pictographic if and only if it has
|
||||
* the {@code Extended_Pictographic} property, defined in
|
||||
* <a href="https://unicode.org/reports/tr51/#Emoji_Properties_and_Data_Files">
|
||||
* Unicode Emoji (Technical Standard #51)</a>.
|
||||
*
|
||||
* @param codePoint the character (Unicode code point) to be tested.
|
||||
* @return {@code true} if the character is an Extended Pictographic;
|
||||
* {@code false} otherwise.
|
||||
* @since 21
|
||||
*/
|
||||
public static boolean isExtendedPictographic(int codePoint) {
|
||||
return CharacterData.of(codePoint).isExtendedPictographic(codePoint);
|
||||
}
|
||||
|
||||
/**
|
||||
* Converts the character argument to lowercase using case
|
||||
* mapping information from the UnicodeData file.
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) 2006, 2020, Oracle and/or its affiliates. All rights reserved.
|
||||
* Copyright (c) 2006, 2023, Oracle and/or its affiliates. All rights reserved.
|
||||
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
||||
*
|
||||
* This code is free software; you can redistribute it and/or modify it
|
||||
@ -38,6 +38,12 @@ abstract class CharacterData {
|
||||
abstract boolean isUnicodeIdentifierStart(int ch);
|
||||
abstract boolean isUnicodeIdentifierPart(int ch);
|
||||
abstract boolean isIdentifierIgnorable(int ch);
|
||||
abstract boolean isEmoji(int ch);
|
||||
abstract boolean isEmojiPresentation(int ch);
|
||||
abstract boolean isEmojiModifier(int ch);
|
||||
abstract boolean isEmojiModifierBase(int ch);
|
||||
abstract boolean isEmojiComponent(int ch);
|
||||
abstract boolean isExtendedPictographic(int ch);
|
||||
abstract int toLowerCase(int ch);
|
||||
abstract int toUpperCase(int ch);
|
||||
abstract int toTitleCase(int ch);
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) 2003, 2021, Oracle and/or its affiliates. All rights reserved.
|
||||
* Copyright (c) 2003, 2023, Oracle and/or its affiliates. All rights reserved.
|
||||
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
||||
*
|
||||
* This code is free software; you can redistribute it and/or modify it
|
||||
@ -120,6 +120,30 @@ class CharacterData00 extends CharacterData {
|
||||
return ((props & $$maskIdentifierInfo) == $$valueIgnorable);
|
||||
}
|
||||
|
||||
boolean isEmoji(int ch) {
|
||||
return (getPropertiesEx(ch) & $$maskEmoji) != 0;
|
||||
}
|
||||
|
||||
boolean isEmojiPresentation(int ch) {
|
||||
return (getPropertiesEx(ch) & $$maskEmojiPresentation) != 0;
|
||||
}
|
||||
|
||||
boolean isEmojiModifier(int ch) {
|
||||
return (getPropertiesEx(ch) & $$maskEmojiModifier) != 0;
|
||||
}
|
||||
|
||||
boolean isEmojiModifierBase(int ch) {
|
||||
return (getPropertiesEx(ch) & $$maskEmojiModifierBase) != 0;
|
||||
}
|
||||
|
||||
boolean isEmojiComponent(int ch) {
|
||||
return (getPropertiesEx(ch) & $$maskEmojiComponent) != 0;
|
||||
}
|
||||
|
||||
boolean isExtendedPictographic(int ch) {
|
||||
return (getPropertiesEx(ch) & $$maskExtendedPictographic) != 0;
|
||||
}
|
||||
|
||||
int toLowerCase(int ch) {
|
||||
int mapChar = ch;
|
||||
int val = getProperties(ch);
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) 2003, 2021, Oracle and/or its affiliates. All rights reserved.
|
||||
* Copyright (c) 2003, 2023, Oracle and/or its affiliates. All rights reserved.
|
||||
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
||||
*
|
||||
* This code is free software; you can redistribute it and/or modify it
|
||||
@ -117,6 +117,30 @@ class CharacterData01 extends CharacterData {
|
||||
return ((props & $$maskIdentifierInfo) == $$valueIgnorable);
|
||||
}
|
||||
|
||||
boolean isEmoji(int ch) {
|
||||
return (getPropertiesEx(ch) & $$maskEmoji) != 0;
|
||||
}
|
||||
|
||||
boolean isEmojiPresentation(int ch) {
|
||||
return (getPropertiesEx(ch) & $$maskEmojiPresentation) != 0;
|
||||
}
|
||||
|
||||
boolean isEmojiModifier(int ch) {
|
||||
return (getPropertiesEx(ch) & $$maskEmojiModifier) != 0;
|
||||
}
|
||||
|
||||
boolean isEmojiModifierBase(int ch) {
|
||||
return (getPropertiesEx(ch) & $$maskEmojiModifierBase) != 0;
|
||||
}
|
||||
|
||||
boolean isEmojiComponent(int ch) {
|
||||
return (getPropertiesEx(ch) & $$maskEmojiComponent) != 0;
|
||||
}
|
||||
|
||||
boolean isExtendedPictographic(int ch) {
|
||||
return (getPropertiesEx(ch) & $$maskExtendedPictographic) != 0;
|
||||
}
|
||||
|
||||
int toLowerCase(int ch) {
|
||||
int mapChar = ch;
|
||||
int val = getProperties(ch);
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) 2003, 2021, Oracle and/or its affiliates. All rights reserved.
|
||||
* Copyright (c) 2003, 2023, Oracle and/or its affiliates. All rights reserved.
|
||||
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
||||
*
|
||||
* This code is free software; you can redistribute it and/or modify it
|
||||
@ -116,6 +116,30 @@ class CharacterData02 extends CharacterData {
|
||||
return ((props & $$maskIdentifierInfo) == $$valueIgnorable);
|
||||
}
|
||||
|
||||
boolean isEmoji(int ch) {
|
||||
return (getPropertiesEx(ch) & $$maskEmoji) != 0;
|
||||
}
|
||||
|
||||
boolean isEmojiPresentation(int ch) {
|
||||
return (getPropertiesEx(ch) & $$maskEmojiPresentation) != 0;
|
||||
}
|
||||
|
||||
boolean isEmojiModifier(int ch) {
|
||||
return (getPropertiesEx(ch) & $$maskEmojiModifier) != 0;
|
||||
}
|
||||
|
||||
boolean isEmojiModifierBase(int ch) {
|
||||
return (getPropertiesEx(ch) & $$maskEmojiModifierBase) != 0;
|
||||
}
|
||||
|
||||
boolean isEmojiComponent(int ch) {
|
||||
return (getPropertiesEx(ch) & $$maskEmojiComponent) != 0;
|
||||
}
|
||||
|
||||
boolean isExtendedPictographic(int ch) {
|
||||
return (getPropertiesEx(ch) & $$maskExtendedPictographic) != 0;
|
||||
}
|
||||
|
||||
int toLowerCase(int ch) {
|
||||
int mapChar = ch;
|
||||
int val = getProperties(ch);
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) 2020, 2021, Oracle and/or its affiliates. All rights reserved.
|
||||
* Copyright (c) 2020, 2023, Oracle and/or its affiliates. All rights reserved.
|
||||
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
||||
*
|
||||
* This code is free software; you can redistribute it and/or modify it
|
||||
@ -116,6 +116,30 @@ class CharacterData03 extends CharacterData {
|
||||
return ((props & $$maskIdentifierInfo) == $$valueIgnorable);
|
||||
}
|
||||
|
||||
boolean isEmoji(int ch) {
|
||||
return (getPropertiesEx(ch) & $$maskEmoji) != 0;
|
||||
}
|
||||
|
||||
boolean isEmojiPresentation(int ch) {
|
||||
return (getPropertiesEx(ch) & $$maskEmojiPresentation) != 0;
|
||||
}
|
||||
|
||||
boolean isEmojiModifier(int ch) {
|
||||
return (getPropertiesEx(ch) & $$maskEmojiModifier) != 0;
|
||||
}
|
||||
|
||||
boolean isEmojiModifierBase(int ch) {
|
||||
return (getPropertiesEx(ch) & $$maskEmojiModifierBase) != 0;
|
||||
}
|
||||
|
||||
boolean isEmojiComponent(int ch) {
|
||||
return (getPropertiesEx(ch) & $$maskEmojiComponent) != 0;
|
||||
}
|
||||
|
||||
boolean isExtendedPictographic(int ch) {
|
||||
return (getPropertiesEx(ch) & $$maskExtendedPictographic) != 0;
|
||||
}
|
||||
|
||||
int toLowerCase(int ch) {
|
||||
int mapChar = ch;
|
||||
int val = getProperties(ch);
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) 2003, 2021, Oracle and/or its affiliates. All rights reserved.
|
||||
* Copyright (c) 2003, 2023, Oracle and/or its affiliates. All rights reserved.
|
||||
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
||||
*
|
||||
* This code is free software; you can redistribute it and/or modify it
|
||||
@ -116,6 +116,30 @@ class CharacterData0E extends CharacterData {
|
||||
return ((props & $$maskIdentifierInfo) == $$valueIgnorable);
|
||||
}
|
||||
|
||||
boolean isEmoji(int ch) {
|
||||
return (getPropertiesEx(ch) & $$maskEmoji) != 0;
|
||||
}
|
||||
|
||||
boolean isEmojiPresentation(int ch) {
|
||||
return (getPropertiesEx(ch) & $$maskEmojiPresentation) != 0;
|
||||
}
|
||||
|
||||
boolean isEmojiModifier(int ch) {
|
||||
return (getPropertiesEx(ch) & $$maskEmojiModifier) != 0;
|
||||
}
|
||||
|
||||
boolean isEmojiModifierBase(int ch) {
|
||||
return (getPropertiesEx(ch) & $$maskEmojiModifierBase) != 0;
|
||||
}
|
||||
|
||||
boolean isEmojiComponent(int ch) {
|
||||
return (getPropertiesEx(ch) & $$maskEmojiComponent) != 0;
|
||||
}
|
||||
|
||||
boolean isExtendedPictographic(int ch) {
|
||||
return (getPropertiesEx(ch) & $$maskExtendedPictographic) != 0;
|
||||
}
|
||||
|
||||
int toLowerCase(int ch) {
|
||||
int mapChar = ch;
|
||||
int val = getProperties(ch);
|
||||
|
@ -134,6 +134,30 @@ class CharacterDataLatin1 extends CharacterData {
|
||||
return ((props & $$maskIdentifierInfo) == $$valueIgnorable);
|
||||
}
|
||||
|
||||
boolean isEmoji(int ch) {
|
||||
return (getPropertiesEx(ch) & $$maskEmoji) != 0;
|
||||
}
|
||||
|
||||
boolean isEmojiPresentation(int ch) {
|
||||
return (getPropertiesEx(ch) & $$maskEmojiPresentation) != 0;
|
||||
}
|
||||
|
||||
boolean isEmojiModifier(int ch) {
|
||||
return (getPropertiesEx(ch) & $$maskEmojiModifier) != 0;
|
||||
}
|
||||
|
||||
boolean isEmojiModifierBase(int ch) {
|
||||
return (getPropertiesEx(ch) & $$maskEmojiModifierBase) != 0;
|
||||
}
|
||||
|
||||
boolean isEmojiComponent(int ch) {
|
||||
return (getPropertiesEx(ch) & $$maskEmojiComponent) != 0;
|
||||
}
|
||||
|
||||
boolean isExtendedPictographic(int ch) {
|
||||
return (getPropertiesEx(ch) & $$maskExtendedPictographic) != 0;
|
||||
}
|
||||
|
||||
int toLowerCase(int ch) {
|
||||
if (ch < 'A') { // Fast path for low code points
|
||||
return ch;
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) 2003, 2018, Oracle and/or its affiliates. All rights reserved.
|
||||
* Copyright (c) 2003, 2023, Oracle and/or its affiliates. All rights reserved.
|
||||
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
||||
*
|
||||
* This code is free software; you can redistribute it and/or modify it
|
||||
@ -60,6 +60,30 @@ class CharacterDataPrivateUse extends CharacterData {
|
||||
return false;
|
||||
}
|
||||
|
||||
boolean isEmoji(int ch) {
|
||||
return false;
|
||||
}
|
||||
|
||||
boolean isEmojiPresentation(int ch) {
|
||||
return false;
|
||||
}
|
||||
|
||||
boolean isEmojiModifier(int ch) {
|
||||
return false;
|
||||
}
|
||||
|
||||
boolean isEmojiModifierBase(int ch) {
|
||||
return false;
|
||||
}
|
||||
|
||||
boolean isEmojiComponent(int ch) {
|
||||
return false;
|
||||
}
|
||||
|
||||
boolean isExtendedPictographic(int ch) {
|
||||
return false;
|
||||
}
|
||||
|
||||
int toLowerCase(int ch) {
|
||||
return ch;
|
||||
}
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) 2003, 2018, Oracle and/or its affiliates. All rights reserved.
|
||||
* Copyright (c) 2003, 2023, Oracle and/or its affiliates. All rights reserved.
|
||||
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
||||
*
|
||||
* This code is free software; you can redistribute it and/or modify it
|
||||
@ -58,6 +58,30 @@ class CharacterDataUndefined extends CharacterData {
|
||||
return false;
|
||||
}
|
||||
|
||||
boolean isEmoji(int ch) {
|
||||
return false;
|
||||
}
|
||||
|
||||
boolean isEmojiPresentation(int ch) {
|
||||
return false;
|
||||
}
|
||||
|
||||
boolean isEmojiModifier(int ch) {
|
||||
return false;
|
||||
}
|
||||
|
||||
boolean isEmojiModifierBase(int ch) {
|
||||
return false;
|
||||
}
|
||||
|
||||
boolean isEmojiComponent(int ch) {
|
||||
return false;
|
||||
}
|
||||
|
||||
boolean isExtendedPictographic(int ch) {
|
||||
return false;
|
||||
}
|
||||
|
||||
int toLowerCase(int ch) {
|
||||
return ch;
|
||||
}
|
||||
|
@ -1,55 +0,0 @@
|
||||
/*
|
||||
* Copyright (c) 2019, 2022, Oracle and/or its affiliates. All rights reserved.
|
||||
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
||||
*
|
||||
* This code is free software; you can redistribute it and/or modify it
|
||||
* under the terms of the GNU General Public License version 2 only, as
|
||||
* published by the Free Software Foundation. Oracle designates this
|
||||
* particular file as subject to the "Classpath" exception as provided
|
||||
* by Oracle in the LICENSE file that accompanied this code.
|
||||
*
|
||||
* This code is distributed in the hope that it will be useful, but WITHOUT
|
||||
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
||||
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
|
||||
* version 2 for more details (a copy is included in the LICENSE file that
|
||||
* accompanied this code).
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License version
|
||||
* 2 along with this work; if not, write to the Free Software Foundation,
|
||||
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
*
|
||||
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
|
||||
* or visit www.oracle.com if you need additional information or have any
|
||||
* questions.
|
||||
*/
|
||||
|
||||
package jdk.internal.util.regex;
|
||||
|
||||
/**
|
||||
* Holds data contained in the Unicode Technical Standard #51: Unicode
|
||||
* Emoji.
|
||||
*
|
||||
* Currently it is only used for the rule "GB11" in UAX #29 Unicode Text
|
||||
* Segmentation.
|
||||
*/
|
||||
final class EmojiData {
|
||||
/**
|
||||
* Returns whether the code point is an extended pictographic or not.
|
||||
*
|
||||
* @param cp code point to examine
|
||||
* @return true if {@code cp} is an extended pictographic
|
||||
*/
|
||||
static boolean isExtendedPictographic(int cp) {
|
||||
if (cp < 0x2000) {
|
||||
return
|
||||
%%%EXTPICT_LOW%%%
|
||||
} else {
|
||||
return isHigh(cp);
|
||||
}
|
||||
}
|
||||
|
||||
private static boolean isHigh(int cp) {
|
||||
return
|
||||
%%%EXTPICT_HIGH%%%
|
||||
}
|
||||
}
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) 2016, 2022, Oracle and/or its affiliates. All rights reserved.
|
||||
* Copyright (c) 2016, 2023, Oracle and/or its affiliates. All rights reserved.
|
||||
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
||||
*
|
||||
* This code is free software; you can redistribute it and/or modify it
|
||||
@ -176,7 +176,7 @@ public final class Grapheme {
|
||||
return OTHER;
|
||||
}
|
||||
|
||||
if (EmojiData.isExtendedPictographic(cp)) {
|
||||
if (Character.isExtendedPictographic(cp)) {
|
||||
return EXTENDED_PICTOGRAPHIC;
|
||||
}
|
||||
|
||||
|
138
test/jdk/java/lang/Character/TestEmojiProperties.java
Normal file
138
test/jdk/java/lang/Character/TestEmojiProperties.java
Normal file
@ -0,0 +1,138 @@
|
||||
/*
|
||||
* Copyright (c) 2023, Oracle and/or its affiliates. All rights reserved.
|
||||
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
||||
*
|
||||
* This code is free software; you can redistribute it and/or modify it
|
||||
* under the terms of the GNU General Public License version 2 only, as
|
||||
* published by the Free Software Foundation.
|
||||
*
|
||||
* This code is distributed in the hope that it will be useful, but WITHOUT
|
||||
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
||||
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
|
||||
* version 2 for more details (a copy is included in the LICENSE file that
|
||||
* accompanied this code).
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License version
|
||||
* 2 along with this work; if not, write to the Free Software Foundation,
|
||||
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
*
|
||||
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
|
||||
* or visit www.oracle.com if you need additional information or have any
|
||||
* questions.
|
||||
*/
|
||||
|
||||
|
||||
/**
|
||||
* @test
|
||||
* @bug 8303018
|
||||
* @summary Check j.l.Character.isEmoji/isEmojiPresentation/isEmojiModifier
|
||||
* isEmojiModifierBase/isEmojiComponent/isExtendedPictographic
|
||||
* @library /lib/testlibrary/java/lang
|
||||
*/
|
||||
|
||||
import java.io.IOException;
|
||||
import java.nio.file.Files;
|
||||
import java.util.AbstractMap;
|
||||
import java.util.function.Predicate;
|
||||
import java.util.stream.Collectors;
|
||||
import java.util.stream.IntStream;
|
||||
import java.util.stream.Stream;
|
||||
|
||||
import static java.lang.Character.MAX_CODE_POINT;
|
||||
import static java.lang.Character.MIN_CODE_POINT;
|
||||
import static java.lang.Character.isEmoji;
|
||||
import static java.lang.Character.isEmojiPresentation;
|
||||
import static java.lang.Character.isEmojiModifier;
|
||||
import static java.lang.Character.isEmojiModifierBase;
|
||||
import static java.lang.Character.isEmojiComponent;
|
||||
import static java.lang.Character.isExtendedPictographic;
|
||||
|
||||
public class TestEmojiProperties {
|
||||
// Masks representing Emoji properties (16-bit `B` table masks in
|
||||
// CharacterData.java)
|
||||
private static final int EMOJI = 0x0040;
|
||||
private static final int EMOJI_PRESENTATION = 0x0080;
|
||||
private static final int EMOJI_MODIFIER = 0x0100;
|
||||
private static final int EMOJI_MODIFIER_BASE = 0x0200;
|
||||
private static final int EMOJI_COMPONENT = 0x0400;
|
||||
private static final int EXTENDED_PICTOGRAPHIC = 0x0800;
|
||||
|
||||
public static void main(String[] args) throws IOException {
|
||||
var emojiProps = Files.readAllLines(UCDFiles.EMOJI_DATA).stream()
|
||||
.map(line -> line.split("#", 2)[0])
|
||||
.filter(Predicate.not(String::isBlank))
|
||||
.map(line -> line.split("[ \t]*;[ \t]*", 2))
|
||||
.flatMap(map -> {
|
||||
var range = map[0].split("\\.\\.", 2);
|
||||
var start = Integer.valueOf(range[0], 16);
|
||||
return range.length == 1 ?
|
||||
Stream.of(new AbstractMap.SimpleEntry<>(start, convertType(map[1].trim()))) :
|
||||
IntStream.rangeClosed(start,
|
||||
Integer.valueOf(range[1], 16))
|
||||
.mapToObj(cp -> new AbstractMap.SimpleEntry<>(cp, convertType(map[1].trim())));
|
||||
})
|
||||
.collect(Collectors.toMap(AbstractMap.SimpleEntry::getKey, AbstractMap.SimpleEntry::getValue, (v1, v2) -> v1 | v2));
|
||||
|
||||
final var fails = new Integer[1];
|
||||
fails[0] = 0;
|
||||
IntStream.rangeClosed(MIN_CODE_POINT, MAX_CODE_POINT).forEach(cp -> {
|
||||
var props = emojiProps.getOrDefault(cp, 0L);
|
||||
if ((props & EMOJI) != 0 ^ isEmoji(cp)) {
|
||||
System.err.printf("""
|
||||
isEmoji(0x%x) failed. Returned: %b
|
||||
""", cp, isEmoji(cp));
|
||||
fails[0] ++;
|
||||
}
|
||||
|
||||
if ((props & EMOJI_PRESENTATION) != 0 ^ isEmojiPresentation(cp)) {
|
||||
System.err.printf("""
|
||||
isEmojiPresentation(0x%x) failed. Returned: %b
|
||||
""", cp, isEmojiPresentation(cp));
|
||||
fails[0] ++;
|
||||
}
|
||||
|
||||
if ((props & EMOJI_MODIFIER) != 0 ^ isEmojiModifier(cp)) {
|
||||
System.err.printf("""
|
||||
isEmojiModifier(0x%x) failed. Returned: %b
|
||||
""", cp, isEmojiModifier(cp));
|
||||
fails[0] ++;
|
||||
}
|
||||
|
||||
if ((props & EMOJI_MODIFIER_BASE) != 0 ^ isEmojiModifierBase(cp)) {
|
||||
System.err.printf("""
|
||||
isEmojiModifierBase(0x%x) failed. Returned: %b
|
||||
""", cp, isEmojiModifierBase(cp));
|
||||
fails[0] ++;
|
||||
}
|
||||
|
||||
if ((props & EMOJI_COMPONENT) != 0 ^ isEmojiComponent(cp)) {
|
||||
System.err.printf("""
|
||||
isEmojiComponent(0x%x) failed. Returned: %b
|
||||
""", cp, isEmojiComponent(cp));
|
||||
fails[0] ++;
|
||||
}
|
||||
|
||||
if ((props & EXTENDED_PICTOGRAPHIC) != 0 ^ isExtendedPictographic(cp)) {
|
||||
System.err.printf("""
|
||||
isExtendedPictographic(0x%x) failed. Returned: %b
|
||||
""", cp, isExtendedPictographic(cp));
|
||||
fails[0] ++;
|
||||
}
|
||||
});
|
||||
if (fails[0] != 0) {
|
||||
throw new RuntimeException("TestEmojiProperties failed=" + fails);
|
||||
}
|
||||
}
|
||||
|
||||
private static long convertType(String type) {
|
||||
return switch (type) {
|
||||
case "Emoji" -> EMOJI;
|
||||
case "Emoji_Presentation" -> EMOJI_PRESENTATION;
|
||||
case "Emoji_Modifier" -> EMOJI_MODIFIER;
|
||||
case "Emoji_Modifier_Base" -> EMOJI_MODIFIER_BASE;
|
||||
case "Emoji_Component" -> EMOJI_COMPONENT;
|
||||
case "Extended_Pictographic" -> EXTENDED_PICTOGRAPHIC;
|
||||
default -> throw new InternalError();
|
||||
};
|
||||
}
|
||||
}
|
Loading…
Reference in New Issue
Block a user