8296246: Update Unicode Data Files to Version 15.1.0

Reviewed-by: erikj, joehw, srl, rriggs
This commit is contained in:
Naoto Sato 2023-09-20 17:39:57 +00:00
parent a021dbcb9e
commit 7c991cc567
22 changed files with 1511 additions and 224 deletions

View File

@ -76,8 +76,8 @@ TOOL_GENERATECACERTS = $(JAVA_SMALL) -cp $(BUILDTOOLS_OUTPUTDIR)/jdk_tools_class
-Dkeystore.pkcs12.macAlgorithm=NONE \
build.tools.generatecacerts.GenerateCacerts
TOOL_GENERATEEMOJIDATA = $(JAVA_SMALL) -cp $(BUILDTOOLS_OUTPUTDIR)/jdk_tools_classes \
build.tools.generateemojidata.GenerateEmojiData
TOOL_GENERATEEXTRAPROPERTIES = $(JAVA_SMALL) -cp $(BUILDTOOLS_OUTPUTDIR)/jdk_tools_classes \
build.tools.generateextraproperties.GenerateExtraProperties
TOOL_MAKEZIPREPRODUCIBLE = $(JAVA_SMALL) -cp $(BUILDTOOLS_OUTPUTDIR)/jdk_tools_classes \
build.tools.makezipreproducible.MakeZipReproducible

View File

@ -64,7 +64,7 @@ public class PropList {
int i, j;
BufferedReader sbfr = new BufferedReader(new FileReader(file));
Matcher m = Pattern.compile("(\\p{XDigit}+)(?:\\.{2}(\\p{XDigit}+))?\\s*;\\s+(\\w+)\\s+#.*").matcher("");
Matcher m = Pattern.compile("(\\p{XDigit}+)(?:\\.{2}(\\p{XDigit}+))?\\s*;\\s+(\\w+)[;\\s].*").matcher("");
String line = null;
int lineNo = 0;
while ((line = sbfr.readLine()) != null) {

View File

@ -0,0 +1,169 @@
/*
* Copyright (c) 2023, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License version 2 only, as
* published by the Free Software Foundation. Oracle designates this
* particular file as subject to the "Classpath" exception as provided
* by Oracle in the LICENSE file that accompanied this code.
*
* This code is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
* version 2 for more details (a copy is included in the LICENSE file that
* accompanied this code).
*
* You should have received a copy of the GNU General Public License version
* 2 along with this work; if not, write to the Free Software Foundation,
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
*
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
* or visit www.oracle.com if you need additional information or have any
* questions.
*/
package build.tools.generateextraproperties;
import java.io.IOException;
import java.nio.file.Files;
import java.nio.file.Paths;
import java.nio.file.StandardOpenOption;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.HashMap;
import java.util.List;
import java.util.function.Predicate;
import java.util.stream.Collectors;
import java.util.stream.Stream;
/**
* Parses extra properties files of UCD, and replaces the placeholders in
* the given template source file with the generated conditions, then emits
* .java files. For example, if the properties file has:
* <blockquote>
* 0009..000D ; Type (; Value)
* 0020 ; Type (; Value)
* 2000..200A ; Type (; Value)
* </blockquote>
* and the template file contains
* <blockquote>
* %%%Type(=Value)%%%
* </blockquote>
* then the generated .java file would have the following in place:
* <blockquote>
* (cp >= 0x0009 && cp <= 0x000D) ||
* cp == 0x0020 ||
* (cp >= 0x2000 && cp <= 0x200A);
* </blockquote>
* Note that those in parentheses in the properties file and the
* template file are optional.
*
* Arguments to this utility:
* args[0]: Full path string to the template file
* args[1]: Full path string to the properties file
* args[2]: Full path string to the generated .java file
* args[3...]: Names of the property to generate the conditions
*/
public class GenerateExtraProperties {
public static void main(String[] args) {
var templateFile = Paths.get(args[0]);
var propertiesFile = Paths.get(args[1]);
var gensrcFile = Paths.get(args[2]);
var propertyNames = Arrays.copyOfRange(args, 3, args.length);
var replacementMap = new HashMap<String, String>();
try {
for (var propertyName: propertyNames) {
var pn = "; " + propertyName.replaceFirst("=", "; ");
List<Range> ranges = Files.lines(propertiesFile)
.filter(Predicate.not(l -> l.startsWith("#") || l.isBlank()))
.filter(l -> l.contains(pn))
.map(l -> new Range(l.replaceFirst(" .*", "")))
.sorted()
.collect(ArrayList<Range>::new,
(list, r) -> {
// collapsing consecutive pictographic ranges
int lastIndex = list.size() - 1;
if (lastIndex >= 0) {
Range lastRange = list.get(lastIndex);
if (lastRange.last + 1 == r.start) {
list.set(lastIndex, new Range(lastRange.start, r.last));
return;
}
}
list.add(r);
},
ArrayList<Range>::addAll);
replacementMap.put("%%%" + propertyName + "%%%",
ranges.stream()
.map(GenerateExtraProperties::rangeToString)
.collect(Collectors.joining(" ||\n", "", ";")));
}
// Generate .java file
Files.write(gensrcFile,
Files.lines(templateFile)
.flatMap(l -> Stream.of(replacementMap.getOrDefault(l.trim(), l)))
.collect(Collectors.toList()),
StandardOpenOption.CREATE, StandardOpenOption.TRUNCATE_EXISTING);
} catch (IOException e) {
e.printStackTrace();
}
}
static String rangeToString(Range r) {
if (r.start == r.last) {
return (" ".repeat(12) + "cp == 0x" + toHexString(r.start));
} else if (r.start == r.last - 1) {
return " ".repeat(12) + "cp == 0x" + toHexString(r.start) + " ||\n" +
" ".repeat(12) + "cp == 0x" + toHexString(r.last);
} else {
return " ".repeat(11) + "(cp >= 0x" + toHexString(r.start) +
" && cp <= 0x" + toHexString(r.last) + ")";
}
}
static int toInt(String hexStr) {
return Integer.parseUnsignedInt(hexStr, 16);
}
static String toHexString(int cp) {
String ret = Integer.toUnsignedString(cp, 16).toUpperCase();
if (ret.length() < 4) {
ret = "0".repeat(4 - ret.length()) + ret;
}
return ret;
}
static class Range implements Comparable<Range> {
int start;
int last;
Range (int start, int last) {
this.start = start;
this.last = last;
}
Range (String input) {
input = input.replaceFirst("\\s#.*", "");
start = toInt(input.replaceFirst("[\\s\\.].*", ""));
last = input.contains("..") ?
toInt(input.replaceFirst(".*\\.\\.", "")
.replaceFirst(";.*", "").trim())
: start;
}
@Override
public String toString() {
return "Start: " + toHexString(start) + ", Last: " + toHexString(last);
}
@Override
public int compareTo(Range other) {
return Integer.compare(start, other.start);
}
}
}

View File

@ -35,6 +35,7 @@ include gensrc/GensrcExceptions.gmk
include gensrc/GensrcVarHandles.gmk
include gensrc/GensrcModuleLoaderMap.gmk
include gensrc/GensrcScopedMemoryAccess.gmk
include gensrc/GensrcRegex.gmk
# GensrcLocaleData.gmk does not set TARGETS, so we must choose which targets
# to include.

View File

@ -0,0 +1,46 @@
#
# Copyright (c) 2023, Oracle and/or its affiliates. All rights reserved.
# DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
#
# This code is free software; you can redistribute it and/or modify it
# under the terms of the GNU General Public License version 2 only, as
# published by the Free Software Foundation. Oracle designates this
# particular file as subject to the "Classpath" exception as provided
# by Oracle in the LICENSE file that accompanied this code.
#
# This code is distributed in the hope that it will be useful, but WITHOUT
# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
# FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
# version 2 for more details (a copy is included in the LICENSE file that
# accompanied this code).
#
# You should have received a copy of the GNU General Public License version
# 2 along with this work; if not, write to the Free Software Foundation,
# Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
#
# Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
# or visit www.oracle.com if you need additional information or have any
# questions.
#
#
# Rules to create java files under
# $(SUPPORT_OUTPUTDIR)/gensrc/java.base/jdk/internal/util/regex/
#
GENSRC_INDICCONJUNCTBREAK := $(SUPPORT_OUTPUTDIR)/gensrc/java.base/jdk/internal/util/regex/IndicConjunctBreak.java
INDICCONJUNCTBREAKTEMP := $(MODULE_SRC)/share/classes/jdk/internal/util/regex/IndicConjunctBreak.java.template
INDICCONJUNCTBREAKPROPS := $(MODULE_SRC)/share/data/unicodedata/DerivedCoreProperties.txt
INDICCONJUNCTBREAKPARAMS := InCB=Linker InCB=Extend InCB=Consonant
$(GENSRC_INDICCONJUNCTBREAK): $(BUILD_TOOLS_JDK) $(INDICCONJUNCTBREAKTEMP) $(INDICCONJUNCTBREAKPROPS)
$(call LogInfo, Generating $@)
$(call MakeTargetDir)
$(TOOL_GENERATEEXTRAPROPERTIES) \
$(INDICCONJUNCTBREAKTEMP) \
$(INDICCONJUNCTBREAKPROPS) \
$(GENSRC_INDICCONJUNCTBREAK) \
$(INDICCONJUNCTBREAKPARAMS)
TARGETS += $(GENSRC_INDICCONJUNCTBREAK)

View File

@ -63,7 +63,7 @@ import static java.lang.constant.ConstantDescs.DEFAULT_NAME;
* from the Unicode Consortium at
* <a href="http://www.unicode.org">http://www.unicode.org</a>.
* <p>
* Character information is based on the Unicode Standard, version 15.0.
* Character information is based on the Unicode Standard, version 15.1.
* <p>
* The Java platform has supported different versions of the Unicode
* Standard over time. Upgrades to newer versions of the Unicode Standard
@ -75,6 +75,8 @@ import static java.lang.constant.ConstantDescs.DEFAULT_NAME;
* <th scope="col">Unicode version</th></tr>
* </thead>
* <tbody>
* <tr><th scope="row" style="text-align:left">Java SE 22</th>
* <td>Unicode 15.1</td></tr>
* <tr><th scope="row" style="text-align:left">Java SE 20</th>
* <td>Unicode 15.0</td></tr>
* <tr><th scope="row" style="text-align:left">Java SE 19</th>
@ -744,7 +746,7 @@ class Character implements java.io.Serializable, Comparable<Character>, Constabl
* It should be adjusted whenever the Unicode Character Database
* is upgraded.
*/
private static final int NUM_ENTITIES = 756;
private static final int NUM_ENTITIES = 759;
private static Map<String, UnicodeBlock> map = HashMap.newHashMap(NUM_ENTITIES);
/**
@ -3611,6 +3613,16 @@ class Character implements java.io.Serializable, Comparable<Character>, Constabl
"CJK UNIFIED IDEOGRAPHS EXTENSION H",
"CJKUNIFIEDIDEOGRAPHSEXTENSIONH");
/**
* Constant for the "CJK Unified Ideographs Extension I" Unicode
* character block.
* @since 22
*/
public static final UnicodeBlock CJK_UNIFIED_IDEOGRAPHS_EXTENSION_I =
new UnicodeBlock("CJK_UNIFIED_IDEOGRAPHS_EXTENSION_I",
"CJK UNIFIED IDEOGRAPHS EXTENSION I",
"CJKUNIFIEDIDEOGRAPHSEXTENSIONI");
private static final int[] blockStarts = {
0x0000, // 0000..007F; Basic Latin
0x0080, // 0080..00FF; Latin-1 Supplement
@ -3978,7 +3990,8 @@ class Character implements java.io.Serializable, Comparable<Character>, Constabl
0x2B740, // 2B740..2B81F; CJK Unified Ideographs Extension D
0x2B820, // 2B820..2CEAF; CJK Unified Ideographs Extension E
0x2CEB0, // 2CEB0..2EBEF; CJK Unified Ideographs Extension F
0x2EBF0, // unassigned
0x2EBF0, // 2EBF0..2EE5F; CJK Unified Ideographs Extension I
0x2EE60, // unassigned
0x2F800, // 2F800..2FA1F; CJK Compatibility Ideographs Supplement
0x2FA20, // unassigned
0x30000, // 30000..3134F; CJK Unified Ideographs Extension G
@ -4359,6 +4372,7 @@ class Character implements java.io.Serializable, Comparable<Character>, Constabl
CJK_UNIFIED_IDEOGRAPHS_EXTENSION_D,
CJK_UNIFIED_IDEOGRAPHS_EXTENSION_E,
CJK_UNIFIED_IDEOGRAPHS_EXTENSION_F,
CJK_UNIFIED_IDEOGRAPHS_EXTENSION_I,
null,
CJK_COMPATIBILITY_IDEOGRAPHS_SUPPLEMENT,
null,
@ -6057,9 +6071,7 @@ class Character implements java.io.Serializable, Comparable<Character>, Constabl
0x2EF4, // 2EF4..2EFF; UNKNOWN
0x2F00, // 2F00..2FD5; HAN
0x2FD6, // 2FD6..2FEF; UNKNOWN
0x2FF0, // 2FF0..2FFB; COMMON
0x2FFC, // 2FFC..2FFF; UNKNOWN
0x3000, // 3000..3004; COMMON
0x2FF0, // 2FF0..3004; COMMON
0x3005, // 3005 ; HAN
0x3006, // 3006 ; COMMON
0x3007, // 3007 ; HAN
@ -6088,7 +6100,8 @@ class Character implements java.io.Serializable, Comparable<Character>, Constabl
0x3190, // 3190..319F; COMMON
0x31A0, // 31A0..31BF; BOPOMOFO
0x31C0, // 31C0..31E3; COMMON
0x31E4, // 31E4..31EF; UNKNOWN
0x31E4, // 31E4..31EE; UNKNOWN
0x31EF, // 31EF ; COMMON
0x31F0, // 31F0..31FF; KATAKANA
0x3200, // 3200..321E; HANGUL
0x321F, // 321F ; UNKNOWN
@ -7028,7 +7041,9 @@ class Character implements java.io.Serializable, Comparable<Character>, Constabl
0x2B820, // 2B820..2CEA1; HAN
0x2CEA2, // 2CEA2..2CEAF; UNKNOWN
0x2CEB0, // 2CEB0..2EBE0; HAN
0x2EBE1, // 2EBE1..2F7FF; UNKNOWN
0x2EBE1, // 2EBE1..2EBEF; UNKNOWN
0x2EBF0, // 2EBF0..2EE5D; HAN
0x2EE5E, // 2EE5E..2F7FF; UNKNOWN
0x2F800, // 2F800..2FA1D; HAN
0x2FA1E, // 2FA1E..2FFFF; UNKNOWN
0x30000, // 30000..3134A; HAN
@ -7717,9 +7732,7 @@ class Character implements java.io.Serializable, Comparable<Character>, Constabl
UNKNOWN, // 2EF4..2EFF
HAN, // 2F00..2FD5
UNKNOWN, // 2FD6..2FEF
COMMON, // 2FF0..2FFB
UNKNOWN, // 2FFC..2FFF
COMMON, // 3000..3004
COMMON, // 2FF0..3004
HAN, // 3005
COMMON, // 3006
HAN, // 3007
@ -7748,7 +7761,8 @@ class Character implements java.io.Serializable, Comparable<Character>, Constabl
COMMON, // 3190..319F
BOPOMOFO, // 31A0..31BF
COMMON, // 31C0..31E3
UNKNOWN, // 31E4..31EF
UNKNOWN, // 31E4..31EE
COMMON, // 31EF
KATAKANA, // 31F0..31FF
HANGUL, // 3200..321E
UNKNOWN, // 321F
@ -8688,7 +8702,9 @@ class Character implements java.io.Serializable, Comparable<Character>, Constabl
HAN, // 2B820..2CEA1
UNKNOWN, // 2CEA2..2CEAF
HAN, // 2CEB0..2EBE0
UNKNOWN, // 2EBE1..2F7FF
UNKNOWN, // 2EBE1..2EBEF
HAN, // 2EBF0..2EE5D
UNKNOWN, // 2EE5E..2F7FF
HAN, // 2F800..2FA1D
UNKNOWN, // 2FA1E..2FFFF
HAN, // 30000..3134A

View File

@ -35,9 +35,9 @@ public final class Grapheme {
* <p>
* See Unicode Standard Annex #29 Unicode Text Segmentation for the specification
* for the extended grapheme cluster boundary rules. The following implementation
* is based on the annex for Unicode version 15.0.
* (http://www.unicode.org/reports/tr29/tr29-40.html)
* is based on the annex for Unicode version 15.1.
*
* @spec http://www.unicode.org/reports/tr29/tr29-43.html
* @param src the {@code CharSequence} to be scanned
* @param off offset to start looking for the next boundary in the src
* @param limit limit offset in the src (exclusive)
@ -56,6 +56,15 @@ public final class Grapheme {
int ch1 = Character.codePointAt(src, ret);
int t1 = getType(ch1);
// GB9c
if (IndicConjunctBreak.isConsonant(ch0)) {
var advance = checkIndicConjunctBreak(src, ret, limit);
if (advance >= 0) {
ret += advance;
continue;
}
}
if (gb11 && t0 == ZWJ && t1 == EXTENDED_PICTOGRAPHIC) {
// continue for gb11
} else if (riCount % 2 == 1 && t0 == RI && t1 == RI) {
@ -70,6 +79,7 @@ public final class Grapheme {
}
riCount += (t1 == RI) ? 1 : 0;
ch0 = ch1;
t0 = t1;
ret += Character.charCount(ch1);
@ -283,4 +293,40 @@ public final class Grapheme {
}
return OTHER;
}
/**
* Checks for a possible GB9c Indic Conjunct Break sequence. If it is
* repetitive, e.g., Consonant1/Linker1/Consonant2/Linker2/Consonant3, only
* the first part of the sequence (Consonant1/Linker1/Consonant2) is
* recognized. The rest is analyzed in the next iteration of the grapheme
* cluster boundary search.
*
* @param src the source char sequence
* @param index the index that points to the starting Linking Consonant
* @param limit limit to the char sequence
* @return the advance in index if the indic conjunct break sequence
* is found, it will be negative if the sequence is not found
*/
private static int checkIndicConjunctBreak(CharSequence src, int index, int limit) {
boolean linkerFound = false;
int advance = 0;
while (index + advance < limit) {
int ch1 = Character.codePointAt(src, index + advance);
advance += Character.charCount(ch1);
if (IndicConjunctBreak.isLinker(ch1)) {
linkerFound = true;
} else if (IndicConjunctBreak.isConsonant(ch1)) {
if (linkerFound) {
return advance;
} else {
break;
}
} else if (!IndicConjunctBreak.isExtend(ch1)) {
break;
}
}
return -1;
}
}

View File

@ -0,0 +1,60 @@
/*
* Copyright (c) 2023, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License version 2 only, as
* published by the Free Software Foundation. Oracle designates this
* particular file as subject to the "Classpath" exception as provided
* by Oracle in the LICENSE file that accompanied this code.
*
* This code is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
* version 2 for more details (a copy is included in the LICENSE file that
* accompanied this code).
*
* You should have received a copy of the GNU General Public License version
* 2 along with this work; if not, write to the Free Software Foundation,
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
*
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
* or visit www.oracle.com if you need additional information or have any
* questions.
*/
package jdk.internal.util.regex;
/**
* Helper class for supporting the GB9c rule in Unicode Text Segmentation TR29
*
* <blockquote>
* GB9c Do not break within certain combinations with Indic_Conjunct_Break (InCB)=Linker.
*
* \p{InCB=Consonant} [ \p{InCB=Extend} \p{InCB=Linker} ]* \p{InCB=Linker} [ \p{InCB=Extend} \p{InCB=Linker} ]* x \p{InCB=Consonant}*
* </blockquote>
*
* Code point conditions included in this class are derived from the "Derived Property: Indic_Conjunct_Break"
* section in DerivedCoreProperties.txt of the Unicode Character Database.
*/
final class IndicConjunctBreak {
static boolean isLinker(int cp) {
return
%%%InCB=Linker%%%
}
static boolean isExtend(int cp) {
return
%%%InCB=Extend%%%
}
static boolean isConsonant(int cp) {
// fast check - Devanagari to Malayalam
if (cp < 0x0900 || cp > 0x0D7F) {
return false;
}
return
%%%InCB=Consonant%%%
}
}

View File

@ -1,6 +1,6 @@
# Blocks-15.0.0.txt
# Date: 2022-01-28, 20:58:00 GMT [KW]
# Copyright (c) 2022 Unicode, Inc.
# Blocks-15.1.0.txt
# Date: 2023-07-28, 15:47:20 GMT
# Copyright (c) 2023 Unicode, Inc.
# For terms of use, see https://www.unicode.org/terms_of_use.html
#
# Unicode Character Database
@ -352,6 +352,7 @@ FFF0..FFFF; Specials
2B740..2B81F; CJK Unified Ideographs Extension D
2B820..2CEAF; CJK Unified Ideographs Extension E
2CEB0..2EBEF; CJK Unified Ideographs Extension F
2EBF0..2EE5F; CJK Unified Ideographs Extension I
2F800..2FA1F; CJK Compatibility Ideographs Supplement
30000..3134F; CJK Unified Ideographs Extension G
31350..323AF; CJK Unified Ideographs Extension H

View File

@ -1,6 +1,6 @@
# DerivedCoreProperties-15.0.0.txt
# Date: 2022-08-05, 22:17:05 GMT
# Copyright (c) 2022 Unicode, Inc.
# DerivedCoreProperties-15.1.0.txt
# Date: 2023-08-07, 15:21:24 GMT
# Copyright (c) 2023 Unicode, Inc.
# Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries.
# For terms of use, see https://www.unicode.org/terms_of_use.html
#
@ -1397,11 +1397,12 @@ FFDA..FFDC ; Alphabetic # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANG
2B740..2B81D ; Alphabetic # Lo [222] CJK UNIFIED IDEOGRAPH-2B740..CJK UNIFIED IDEOGRAPH-2B81D
2B820..2CEA1 ; Alphabetic # Lo [5762] CJK UNIFIED IDEOGRAPH-2B820..CJK UNIFIED IDEOGRAPH-2CEA1
2CEB0..2EBE0 ; Alphabetic # Lo [7473] CJK UNIFIED IDEOGRAPH-2CEB0..CJK UNIFIED IDEOGRAPH-2EBE0
2EBF0..2EE5D ; Alphabetic # Lo [622] CJK UNIFIED IDEOGRAPH-2EBF0..CJK UNIFIED IDEOGRAPH-2EE5D
2F800..2FA1D ; Alphabetic # Lo [542] CJK COMPATIBILITY IDEOGRAPH-2F800..CJK COMPATIBILITY IDEOGRAPH-2FA1D
30000..3134A ; Alphabetic # Lo [4939] CJK UNIFIED IDEOGRAPH-30000..CJK UNIFIED IDEOGRAPH-3134A
31350..323AF ; Alphabetic # Lo [4192] CJK UNIFIED IDEOGRAPH-31350..CJK UNIFIED IDEOGRAPH-323AF
# Total code points: 137765
# Total code points: 138387
# ================================================
@ -6853,11 +6854,12 @@ FFDA..FFDC ; ID_Start # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGUL
2B740..2B81D ; ID_Start # Lo [222] CJK UNIFIED IDEOGRAPH-2B740..CJK UNIFIED IDEOGRAPH-2B81D
2B820..2CEA1 ; ID_Start # Lo [5762] CJK UNIFIED IDEOGRAPH-2B820..CJK UNIFIED IDEOGRAPH-2CEA1
2CEB0..2EBE0 ; ID_Start # Lo [7473] CJK UNIFIED IDEOGRAPH-2CEB0..CJK UNIFIED IDEOGRAPH-2EBE0
2EBF0..2EE5D ; ID_Start # Lo [622] CJK UNIFIED IDEOGRAPH-2EBF0..CJK UNIFIED IDEOGRAPH-2EE5D
2F800..2FA1D ; ID_Start # Lo [542] CJK COMPATIBILITY IDEOGRAPH-2F800..CJK COMPATIBILITY IDEOGRAPH-2FA1D
30000..3134A ; ID_Start # Lo [4939] CJK UNIFIED IDEOGRAPH-30000..CJK UNIFIED IDEOGRAPH-3134A
31350..323AF ; ID_Start # Lo [4192] CJK UNIFIED IDEOGRAPH-31350..CJK UNIFIED IDEOGRAPH-323AF
# Total code points: 136345
# Total code points: 136967
# ================================================
@ -7438,6 +7440,7 @@ FFDA..FFDC ; ID_Start # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGUL
1FE0..1FEC ; ID_Continue # L& [13] GREEK SMALL LETTER UPSILON WITH VRACHY..GREEK CAPITAL LETTER RHO WITH DASIA
1FF2..1FF4 ; ID_Continue # L& [3] GREEK SMALL LETTER OMEGA WITH VARIA AND YPOGEGRAMMENI..GREEK SMALL LETTER OMEGA WITH OXIA AND YPOGEGRAMMENI
1FF6..1FFC ; ID_Continue # L& [7] GREEK SMALL LETTER OMEGA WITH PERISPOMENI..GREEK CAPITAL LETTER OMEGA WITH PROSGEGRAMMENI
200C..200D ; ID_Continue # Cf [2] ZERO WIDTH NON-JOINER..ZERO WIDTH JOINER
203F..2040 ; ID_Continue # Pc [2] UNDERTIE..CHARACTER TIE
2054 ; ID_Continue # Pc INVERTED UNDERTIE
2071 ; ID_Continue # Lm SUPERSCRIPT LATIN SMALL LETTER I
@ -7504,6 +7507,7 @@ FFDA..FFDC ; ID_Start # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGUL
309D..309E ; ID_Continue # Lm [2] HIRAGANA ITERATION MARK..HIRAGANA VOICED ITERATION MARK
309F ; ID_Continue # Lo HIRAGANA DIGRAPH YORI
30A1..30FA ; ID_Continue # Lo [90] KATAKANA LETTER SMALL A..KATAKANA LETTER VO
30FB ; ID_Continue # Po KATAKANA MIDDLE DOT
30FC..30FE ; ID_Continue # Lm [3] KATAKANA-HIRAGANA PROLONGED SOUND MARK..KATAKANA VOICED ITERATION MARK
30FF ; ID_Continue # Lo KATAKANA DIGRAPH KOTO
3105..312F ; ID_Continue # Lo [43] BOPOMOFO LETTER B..BOPOMOFO LETTER NN
@ -7683,6 +7687,7 @@ FF10..FF19 ; ID_Continue # Nd [10] FULLWIDTH DIGIT ZERO..FULLWIDTH DIGIT NIN
FF21..FF3A ; ID_Continue # L& [26] FULLWIDTH LATIN CAPITAL LETTER A..FULLWIDTH LATIN CAPITAL LETTER Z
FF3F ; ID_Continue # Pc FULLWIDTH LOW LINE
FF41..FF5A ; ID_Continue # L& [26] FULLWIDTH LATIN SMALL LETTER A..FULLWIDTH LATIN SMALL LETTER Z
FF65 ; ID_Continue # Po HALFWIDTH KATAKANA MIDDLE DOT
FF66..FF6F ; ID_Continue # Lo [10] HALFWIDTH KATAKANA LETTER WO..HALFWIDTH KATAKANA LETTER SMALL TU
FF70 ; ID_Continue # Lm HALFWIDTH KATAKANA-HIRAGANA PROLONGED SOUND MARK
FF71..FF9D ; ID_Continue # Lo [45] HALFWIDTH KATAKANA LETTER A..HALFWIDTH KATAKANA LETTER N
@ -8207,12 +8212,13 @@ FFDA..FFDC ; ID_Continue # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HAN
2B740..2B81D ; ID_Continue # Lo [222] CJK UNIFIED IDEOGRAPH-2B740..CJK UNIFIED IDEOGRAPH-2B81D
2B820..2CEA1 ; ID_Continue # Lo [5762] CJK UNIFIED IDEOGRAPH-2B820..CJK UNIFIED IDEOGRAPH-2CEA1
2CEB0..2EBE0 ; ID_Continue # Lo [7473] CJK UNIFIED IDEOGRAPH-2CEB0..CJK UNIFIED IDEOGRAPH-2EBE0
2EBF0..2EE5D ; ID_Continue # Lo [622] CJK UNIFIED IDEOGRAPH-2EBF0..CJK UNIFIED IDEOGRAPH-2EE5D
2F800..2FA1D ; ID_Continue # Lo [542] CJK COMPATIBILITY IDEOGRAPH-2F800..CJK COMPATIBILITY IDEOGRAPH-2FA1D
30000..3134A ; ID_Continue # Lo [4939] CJK UNIFIED IDEOGRAPH-30000..CJK UNIFIED IDEOGRAPH-3134A
31350..323AF ; ID_Continue # Lo [4192] CJK UNIFIED IDEOGRAPH-31350..CJK UNIFIED IDEOGRAPH-323AF
E0100..E01EF ; ID_Continue # Mn [240] VARIATION SELECTOR-17..VARIATION SELECTOR-256
# Total code points: 139482
# Total code points: 140108
# ================================================
@ -8962,11 +8968,12 @@ FFDA..FFDC ; XID_Start # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGU
2B740..2B81D ; XID_Start # Lo [222] CJK UNIFIED IDEOGRAPH-2B740..CJK UNIFIED IDEOGRAPH-2B81D
2B820..2CEA1 ; XID_Start # Lo [5762] CJK UNIFIED IDEOGRAPH-2B820..CJK UNIFIED IDEOGRAPH-2CEA1
2CEB0..2EBE0 ; XID_Start # Lo [7473] CJK UNIFIED IDEOGRAPH-2CEB0..CJK UNIFIED IDEOGRAPH-2EBE0
2EBF0..2EE5D ; XID_Start # Lo [622] CJK UNIFIED IDEOGRAPH-2EBF0..CJK UNIFIED IDEOGRAPH-2EE5D
2F800..2FA1D ; XID_Start # Lo [542] CJK COMPATIBILITY IDEOGRAPH-2F800..CJK COMPATIBILITY IDEOGRAPH-2FA1D
30000..3134A ; XID_Start # Lo [4939] CJK UNIFIED IDEOGRAPH-30000..CJK UNIFIED IDEOGRAPH-3134A
31350..323AF ; XID_Start # Lo [4192] CJK UNIFIED IDEOGRAPH-31350..CJK UNIFIED IDEOGRAPH-323AF
# Total code points: 136322
# Total code points: 136944
# ================================================
@ -9543,6 +9550,7 @@ FFDA..FFDC ; XID_Start # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGU
1FE0..1FEC ; XID_Continue # L& [13] GREEK SMALL LETTER UPSILON WITH VRACHY..GREEK CAPITAL LETTER RHO WITH DASIA
1FF2..1FF4 ; XID_Continue # L& [3] GREEK SMALL LETTER OMEGA WITH VARIA AND YPOGEGRAMMENI..GREEK SMALL LETTER OMEGA WITH OXIA AND YPOGEGRAMMENI
1FF6..1FFC ; XID_Continue # L& [7] GREEK SMALL LETTER OMEGA WITH PERISPOMENI..GREEK CAPITAL LETTER OMEGA WITH PROSGEGRAMMENI
200C..200D ; XID_Continue # Cf [2] ZERO WIDTH NON-JOINER..ZERO WIDTH JOINER
203F..2040 ; XID_Continue # Pc [2] UNDERTIE..CHARACTER TIE
2054 ; XID_Continue # Pc INVERTED UNDERTIE
2071 ; XID_Continue # Lm SUPERSCRIPT LATIN SMALL LETTER I
@ -9608,6 +9616,7 @@ FFDA..FFDC ; XID_Start # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGU
309D..309E ; XID_Continue # Lm [2] HIRAGANA ITERATION MARK..HIRAGANA VOICED ITERATION MARK
309F ; XID_Continue # Lo HIRAGANA DIGRAPH YORI
30A1..30FA ; XID_Continue # Lo [90] KATAKANA LETTER SMALL A..KATAKANA LETTER VO
30FB ; XID_Continue # Po KATAKANA MIDDLE DOT
30FC..30FE ; XID_Continue # Lm [3] KATAKANA-HIRAGANA PROLONGED SOUND MARK..KATAKANA VOICED ITERATION MARK
30FF ; XID_Continue # Lo KATAKANA DIGRAPH KOTO
3105..312F ; XID_Continue # Lo [43] BOPOMOFO LETTER B..BOPOMOFO LETTER NN
@ -9793,6 +9802,7 @@ FF10..FF19 ; XID_Continue # Nd [10] FULLWIDTH DIGIT ZERO..FULLWIDTH DIGIT NI
FF21..FF3A ; XID_Continue # L& [26] FULLWIDTH LATIN CAPITAL LETTER A..FULLWIDTH LATIN CAPITAL LETTER Z
FF3F ; XID_Continue # Pc FULLWIDTH LOW LINE
FF41..FF5A ; XID_Continue # L& [26] FULLWIDTH LATIN SMALL LETTER A..FULLWIDTH LATIN SMALL LETTER Z
FF65 ; XID_Continue # Po HALFWIDTH KATAKANA MIDDLE DOT
FF66..FF6F ; XID_Continue # Lo [10] HALFWIDTH KATAKANA LETTER WO..HALFWIDTH KATAKANA LETTER SMALL TU
FF70 ; XID_Continue # Lm HALFWIDTH KATAKANA-HIRAGANA PROLONGED SOUND MARK
FF71..FF9D ; XID_Continue # Lo [45] HALFWIDTH KATAKANA LETTER A..HALFWIDTH KATAKANA LETTER N
@ -10317,12 +10327,13 @@ FFDA..FFDC ; XID_Continue # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HA
2B740..2B81D ; XID_Continue # Lo [222] CJK UNIFIED IDEOGRAPH-2B740..CJK UNIFIED IDEOGRAPH-2B81D
2B820..2CEA1 ; XID_Continue # Lo [5762] CJK UNIFIED IDEOGRAPH-2B820..CJK UNIFIED IDEOGRAPH-2CEA1
2CEB0..2EBE0 ; XID_Continue # Lo [7473] CJK UNIFIED IDEOGRAPH-2CEB0..CJK UNIFIED IDEOGRAPH-2EBE0
2EBF0..2EE5D ; XID_Continue # Lo [622] CJK UNIFIED IDEOGRAPH-2EBF0..CJK UNIFIED IDEOGRAPH-2EE5D
2F800..2FA1D ; XID_Continue # Lo [542] CJK COMPATIBILITY IDEOGRAPH-2F800..CJK COMPATIBILITY IDEOGRAPH-2FA1D
30000..3134A ; XID_Continue # Lo [4939] CJK UNIFIED IDEOGRAPH-30000..CJK UNIFIED IDEOGRAPH-3134A
31350..323AF ; XID_Continue # Lo [4192] CJK UNIFIED IDEOGRAPH-31350..CJK UNIFIED IDEOGRAPH-323AF
E0100..E01EF ; XID_Continue # Mn [240] VARIATION SELECTOR-17..VARIATION SELECTOR-256
# Total code points: 139463
# Total code points: 140089
# ================================================
@ -10335,6 +10346,15 @@ E0100..E01EF ; XID_Continue # Mn [240] VARIATION SELECTOR-17..VARIATION SELECTO
# - FFF9..FFFB (Interlinear annotation format characters)
# - 13430..13440 (Egyptian hieroglyph format characters)
# - Prepended_Concatenation_Mark (Exceptional format characters that should be visible)
#
# There are currently no stability guarantees for DICP. However, the
# values of DICP interact with the derivation of XID_Continue
# and NFKC_CF, for which there are stability guarantees.
# Maintainers of this property should note that in the
# unlikely case that the DICP value changes for an existing character
# which is also XID_Continue=Yes, then exceptions must be put
# in place to ensure that the NFKC_CF mapping value for that
# existing character does not change.
00AD ; Default_Ignorable_Code_Point # Cf SOFT HYPHEN
034F ; Default_Ignorable_Code_Point # Mn COMBINING GRAPHEME JOINER
@ -11602,7 +11622,7 @@ E0100..E01EF ; Grapheme_Extend # Mn [240] VARIATION SELECTOR-17..VARIATION SELE
2E80..2E99 ; Grapheme_Base # So [26] CJK RADICAL REPEAT..CJK RADICAL RAP
2E9B..2EF3 ; Grapheme_Base # So [89] CJK RADICAL CHOKE..CJK RADICAL C-SIMPLIFIED TURTLE
2F00..2FD5 ; Grapheme_Base # So [214] KANGXI RADICAL ONE..KANGXI RADICAL FLUTE
2FF0..2FFB ; Grapheme_Base # So [12] IDEOGRAPHIC DESCRIPTION CHARACTER LEFT TO RIGHT..IDEOGRAPHIC DESCRIPTION CHARACTER OVERLAID
2FF0..2FFF ; Grapheme_Base # So [16] IDEOGRAPHIC DESCRIPTION CHARACTER LEFT TO RIGHT..IDEOGRAPHIC DESCRIPTION CHARACTER ROTATION
3000 ; Grapheme_Base # Zs IDEOGRAPHIC SPACE
3001..3003 ; Grapheme_Base # Po [3] IDEOGRAPHIC COMMA..DITTO MARK
3004 ; Grapheme_Base # So JAPANESE INDUSTRIAL STANDARD SYMBOL
@ -11657,6 +11677,7 @@ E0100..E01EF ; Grapheme_Extend # Mn [240] VARIATION SELECTOR-17..VARIATION SELE
3196..319F ; Grapheme_Base # So [10] IDEOGRAPHIC ANNOTATION TOP MARK..IDEOGRAPHIC ANNOTATION MAN MARK
31A0..31BF ; Grapheme_Base # Lo [32] BOPOMOFO LETTER BU..BOPOMOFO LETTER AH
31C0..31E3 ; Grapheme_Base # So [36] CJK STROKE T..CJK STROKE Q
31EF ; Grapheme_Base # So IDEOGRAPHIC DESCRIPTION CHARACTER SUBTRACTION
31F0..31FF ; Grapheme_Base # Lo [16] KATAKANA LETTER SMALL KU..KATAKANA LETTER SMALL RO
3200..321E ; Grapheme_Base # So [31] PARENTHESIZED HANGUL KIYEOK..PARENTHESIZED KOREAN CHARACTER O HU
3220..3229 ; Grapheme_Base # No [10] PARENTHESIZED IDEOGRAPH ONE..PARENTHESIZED IDEOGRAPH TEN
@ -12497,11 +12518,12 @@ FFFC..FFFD ; Grapheme_Base # So [2] OBJECT REPLACEMENT CHARACTER..REPLACEME
2B740..2B81D ; Grapheme_Base # Lo [222] CJK UNIFIED IDEOGRAPH-2B740..CJK UNIFIED IDEOGRAPH-2B81D
2B820..2CEA1 ; Grapheme_Base # Lo [5762] CJK UNIFIED IDEOGRAPH-2B820..CJK UNIFIED IDEOGRAPH-2CEA1
2CEB0..2EBE0 ; Grapheme_Base # Lo [7473] CJK UNIFIED IDEOGRAPH-2CEB0..CJK UNIFIED IDEOGRAPH-2EBE0
2EBF0..2EE5D ; Grapheme_Base # Lo [622] CJK UNIFIED IDEOGRAPH-2EBF0..CJK UNIFIED IDEOGRAPH-2EE5D
2F800..2FA1D ; Grapheme_Base # Lo [542] CJK COMPATIBILITY IDEOGRAPH-2F800..CJK COMPATIBILITY IDEOGRAPH-2FA1D
30000..3134A ; Grapheme_Base # Lo [4939] CJK UNIFIED IDEOGRAPH-30000..CJK UNIFIED IDEOGRAPH-3134A
31350..323AF ; Grapheme_Base # Lo [4192] CJK UNIFIED IDEOGRAPH-31350..CJK UNIFIED IDEOGRAPH-323AF
# Total code points: 146986
# Total code points: 147613
# ================================================
@ -12572,4 +12594,239 @@ ABED ; Grapheme_Link # Mn MEETEI MAYEK APUN IYEK
# Total code points: 65
# ================================================
# Derived Property: Indic_Conjunct_Break
# Generated from the Grapheme_Cluster_Break, Indic_Syllabic_Category,
# Canonical_Combining_Class, and Script properties as described in UAX #44:
# https://www.unicode.org/reports/tr44/.
# All code points not explicitly listed for Indic_Conjunct_Break
# have the value None.
# @missing: 0000..10FFFF; InCB; None
# ================================================
# Indic_Conjunct_Break=Linker
094D ; InCB; Linker # Mn DEVANAGARI SIGN VIRAMA
09CD ; InCB; Linker # Mn BENGALI SIGN VIRAMA
0ACD ; InCB; Linker # Mn GUJARATI SIGN VIRAMA
0B4D ; InCB; Linker # Mn ORIYA SIGN VIRAMA
0C4D ; InCB; Linker # Mn TELUGU SIGN VIRAMA
0D4D ; InCB; Linker # Mn MALAYALAM SIGN VIRAMA
# Total code points: 6
# ================================================
# Indic_Conjunct_Break=Consonant
0915..0939 ; InCB; Consonant # Lo [37] DEVANAGARI LETTER KA..DEVANAGARI LETTER HA
0958..095F ; InCB; Consonant # Lo [8] DEVANAGARI LETTER QA..DEVANAGARI LETTER YYA
0978..097F ; InCB; Consonant # Lo [8] DEVANAGARI LETTER MARWARI DDA..DEVANAGARI LETTER BBA
0995..09A8 ; InCB; Consonant # Lo [20] BENGALI LETTER KA..BENGALI LETTER NA
09AA..09B0 ; InCB; Consonant # Lo [7] BENGALI LETTER PA..BENGALI LETTER RA
09B2 ; InCB; Consonant # Lo BENGALI LETTER LA
09B6..09B9 ; InCB; Consonant # Lo [4] BENGALI LETTER SHA..BENGALI LETTER HA
09DC..09DD ; InCB; Consonant # Lo [2] BENGALI LETTER RRA..BENGALI LETTER RHA
09DF ; InCB; Consonant # Lo BENGALI LETTER YYA
09F0..09F1 ; InCB; Consonant # Lo [2] BENGALI LETTER RA WITH MIDDLE DIAGONAL..BENGALI LETTER RA WITH LOWER DIAGONAL
0A95..0AA8 ; InCB; Consonant # Lo [20] GUJARATI LETTER KA..GUJARATI LETTER NA
0AAA..0AB0 ; InCB; Consonant # Lo [7] GUJARATI LETTER PA..GUJARATI LETTER RA
0AB2..0AB3 ; InCB; Consonant # Lo [2] GUJARATI LETTER LA..GUJARATI LETTER LLA
0AB5..0AB9 ; InCB; Consonant # Lo [5] GUJARATI LETTER VA..GUJARATI LETTER HA
0AF9 ; InCB; Consonant # Lo GUJARATI LETTER ZHA
0B15..0B28 ; InCB; Consonant # Lo [20] ORIYA LETTER KA..ORIYA LETTER NA
0B2A..0B30 ; InCB; Consonant # Lo [7] ORIYA LETTER PA..ORIYA LETTER RA
0B32..0B33 ; InCB; Consonant # Lo [2] ORIYA LETTER LA..ORIYA LETTER LLA
0B35..0B39 ; InCB; Consonant # Lo [5] ORIYA LETTER VA..ORIYA LETTER HA
0B5C..0B5D ; InCB; Consonant # Lo [2] ORIYA LETTER RRA..ORIYA LETTER RHA
0B5F ; InCB; Consonant # Lo ORIYA LETTER YYA
0B71 ; InCB; Consonant # Lo ORIYA LETTER WA
0C15..0C28 ; InCB; Consonant # Lo [20] TELUGU LETTER KA..TELUGU LETTER NA
0C2A..0C39 ; InCB; Consonant # Lo [16] TELUGU LETTER PA..TELUGU LETTER HA
0C58..0C5A ; InCB; Consonant # Lo [3] TELUGU LETTER TSA..TELUGU LETTER RRRA
0D15..0D3A ; InCB; Consonant # Lo [38] MALAYALAM LETTER KA..MALAYALAM LETTER TTTA
# Total code points: 240
# ================================================
# Indic_Conjunct_Break=Extend
0300..034E ; InCB; Extend # Mn [79] COMBINING GRAVE ACCENT..COMBINING UPWARDS ARROW BELOW
0350..036F ; InCB; Extend # Mn [32] COMBINING RIGHT ARROWHEAD ABOVE..COMBINING LATIN SMALL LETTER X
0483..0487 ; InCB; Extend # Mn [5] COMBINING CYRILLIC TITLO..COMBINING CYRILLIC POKRYTIE
0591..05BD ; InCB; Extend # Mn [45] HEBREW ACCENT ETNAHTA..HEBREW POINT METEG
05BF ; InCB; Extend # Mn HEBREW POINT RAFE
05C1..05C2 ; InCB; Extend # Mn [2] HEBREW POINT SHIN DOT..HEBREW POINT SIN DOT
05C4..05C5 ; InCB; Extend # Mn [2] HEBREW MARK UPPER DOT..HEBREW MARK LOWER DOT
05C7 ; InCB; Extend # Mn HEBREW POINT QAMATS QATAN
0610..061A ; InCB; Extend # Mn [11] ARABIC SIGN SALLALLAHOU ALAYHE WASSALLAM..ARABIC SMALL KASRA
064B..065F ; InCB; Extend # Mn [21] ARABIC FATHATAN..ARABIC WAVY HAMZA BELOW
0670 ; InCB; Extend # Mn ARABIC LETTER SUPERSCRIPT ALEF
06D6..06DC ; InCB; Extend # Mn [7] ARABIC SMALL HIGH LIGATURE SAD WITH LAM WITH ALEF MAKSURA..ARABIC SMALL HIGH SEEN
06DF..06E4 ; InCB; Extend # Mn [6] ARABIC SMALL HIGH ROUNDED ZERO..ARABIC SMALL HIGH MADDA
06E7..06E8 ; InCB; Extend # Mn [2] ARABIC SMALL HIGH YEH..ARABIC SMALL HIGH NOON
06EA..06ED ; InCB; Extend # Mn [4] ARABIC EMPTY CENTRE LOW STOP..ARABIC SMALL LOW MEEM
0711 ; InCB; Extend # Mn SYRIAC LETTER SUPERSCRIPT ALAPH
0730..074A ; InCB; Extend # Mn [27] SYRIAC PTHAHA ABOVE..SYRIAC BARREKH
07EB..07F3 ; InCB; Extend # Mn [9] NKO COMBINING SHORT HIGH TONE..NKO COMBINING DOUBLE DOT ABOVE
07FD ; InCB; Extend # Mn NKO DANTAYALAN
0816..0819 ; InCB; Extend # Mn [4] SAMARITAN MARK IN..SAMARITAN MARK DAGESH
081B..0823 ; InCB; Extend # Mn [9] SAMARITAN MARK EPENTHETIC YUT..SAMARITAN VOWEL SIGN A
0825..0827 ; InCB; Extend # Mn [3] SAMARITAN VOWEL SIGN SHORT A..SAMARITAN VOWEL SIGN U
0829..082D ; InCB; Extend # Mn [5] SAMARITAN VOWEL SIGN LONG I..SAMARITAN MARK NEQUDAA
0859..085B ; InCB; Extend # Mn [3] MANDAIC AFFRICATION MARK..MANDAIC GEMINATION MARK
0898..089F ; InCB; Extend # Mn [8] ARABIC SMALL HIGH WORD AL-JUZ..ARABIC HALF MADDA OVER MADDA
08CA..08E1 ; InCB; Extend # Mn [24] ARABIC SMALL HIGH FARSI YEH..ARABIC SMALL HIGH SIGN SAFHA
08E3..08FF ; InCB; Extend # Mn [29] ARABIC TURNED DAMMA BELOW..ARABIC MARK SIDEWAYS NOON GHUNNA
093C ; InCB; Extend # Mn DEVANAGARI SIGN NUKTA
0951..0954 ; InCB; Extend # Mn [4] DEVANAGARI STRESS SIGN UDATTA..DEVANAGARI ACUTE ACCENT
09BC ; InCB; Extend # Mn BENGALI SIGN NUKTA
09FE ; InCB; Extend # Mn BENGALI SANDHI MARK
0A3C ; InCB; Extend # Mn GURMUKHI SIGN NUKTA
0ABC ; InCB; Extend # Mn GUJARATI SIGN NUKTA
0B3C ; InCB; Extend # Mn ORIYA SIGN NUKTA
0C3C ; InCB; Extend # Mn TELUGU SIGN NUKTA
0C55..0C56 ; InCB; Extend # Mn [2] TELUGU LENGTH MARK..TELUGU AI LENGTH MARK
0CBC ; InCB; Extend # Mn KANNADA SIGN NUKTA
0D3B..0D3C ; InCB; Extend # Mn [2] MALAYALAM SIGN VERTICAL BAR VIRAMA..MALAYALAM SIGN CIRCULAR VIRAMA
0E38..0E3A ; InCB; Extend # Mn [3] THAI CHARACTER SARA U..THAI CHARACTER PHINTHU
0E48..0E4B ; InCB; Extend # Mn [4] THAI CHARACTER MAI EK..THAI CHARACTER MAI CHATTAWA
0EB8..0EBA ; InCB; Extend # Mn [3] LAO VOWEL SIGN U..LAO SIGN PALI VIRAMA
0EC8..0ECB ; InCB; Extend # Mn [4] LAO TONE MAI EK..LAO TONE MAI CATAWA
0F18..0F19 ; InCB; Extend # Mn [2] TIBETAN ASTROLOGICAL SIGN -KHYUD PA..TIBETAN ASTROLOGICAL SIGN SDONG TSHUGS
0F35 ; InCB; Extend # Mn TIBETAN MARK NGAS BZUNG NYI ZLA
0F37 ; InCB; Extend # Mn TIBETAN MARK NGAS BZUNG SGOR RTAGS
0F39 ; InCB; Extend # Mn TIBETAN MARK TSA -PHRU
0F71..0F72 ; InCB; Extend # Mn [2] TIBETAN VOWEL SIGN AA..TIBETAN VOWEL SIGN I
0F74 ; InCB; Extend # Mn TIBETAN VOWEL SIGN U
0F7A..0F7D ; InCB; Extend # Mn [4] TIBETAN VOWEL SIGN E..TIBETAN VOWEL SIGN OO
0F80 ; InCB; Extend # Mn TIBETAN VOWEL SIGN REVERSED I
0F82..0F84 ; InCB; Extend # Mn [3] TIBETAN SIGN NYI ZLA NAA DA..TIBETAN MARK HALANTA
0F86..0F87 ; InCB; Extend # Mn [2] TIBETAN SIGN LCI RTAGS..TIBETAN SIGN YANG RTAGS
0FC6 ; InCB; Extend # Mn TIBETAN SYMBOL PADMA GDAN
1037 ; InCB; Extend # Mn MYANMAR SIGN DOT BELOW
1039..103A ; InCB; Extend # Mn [2] MYANMAR SIGN VIRAMA..MYANMAR SIGN ASAT
108D ; InCB; Extend # Mn MYANMAR SIGN SHAN COUNCIL EMPHATIC TONE
135D..135F ; InCB; Extend # Mn [3] ETHIOPIC COMBINING GEMINATION AND VOWEL LENGTH MARK..ETHIOPIC COMBINING GEMINATION MARK
1714 ; InCB; Extend # Mn TAGALOG SIGN VIRAMA
17D2 ; InCB; Extend # Mn KHMER SIGN COENG
17DD ; InCB; Extend # Mn KHMER SIGN ATTHACAN
18A9 ; InCB; Extend # Mn MONGOLIAN LETTER ALI GALI DAGALGA
1939..193B ; InCB; Extend # Mn [3] LIMBU SIGN MUKPHRENG..LIMBU SIGN SA-I
1A17..1A18 ; InCB; Extend # Mn [2] BUGINESE VOWEL SIGN I..BUGINESE VOWEL SIGN U
1A60 ; InCB; Extend # Mn TAI THAM SIGN SAKOT
1A75..1A7C ; InCB; Extend # Mn [8] TAI THAM SIGN TONE-1..TAI THAM SIGN KHUEN-LUE KARAN
1A7F ; InCB; Extend # Mn TAI THAM COMBINING CRYPTOGRAMMIC DOT
1AB0..1ABD ; InCB; Extend # Mn [14] COMBINING DOUBLED CIRCUMFLEX ACCENT..COMBINING PARENTHESES BELOW
1ABF..1ACE ; InCB; Extend # Mn [16] COMBINING LATIN SMALL LETTER W BELOW..COMBINING LATIN SMALL LETTER INSULAR T
1B34 ; InCB; Extend # Mn BALINESE SIGN REREKAN
1B6B..1B73 ; InCB; Extend # Mn [9] BALINESE MUSICAL SYMBOL COMBINING TEGEH..BALINESE MUSICAL SYMBOL COMBINING GONG
1BAB ; InCB; Extend # Mn SUNDANESE SIGN VIRAMA
1BE6 ; InCB; Extend # Mn BATAK SIGN TOMPI
1C37 ; InCB; Extend # Mn LEPCHA SIGN NUKTA
1CD0..1CD2 ; InCB; Extend # Mn [3] VEDIC TONE KARSHANA..VEDIC TONE PRENKHA
1CD4..1CE0 ; InCB; Extend # Mn [13] VEDIC SIGN YAJURVEDIC MIDLINE SVARITA..VEDIC TONE RIGVEDIC KASHMIRI INDEPENDENT SVARITA
1CE2..1CE8 ; InCB; Extend # Mn [7] VEDIC SIGN VISARGA SVARITA..VEDIC SIGN VISARGA ANUDATTA WITH TAIL
1CED ; InCB; Extend # Mn VEDIC SIGN TIRYAK
1CF4 ; InCB; Extend # Mn VEDIC TONE CANDRA ABOVE
1CF8..1CF9 ; InCB; Extend # Mn [2] VEDIC TONE RING ABOVE..VEDIC TONE DOUBLE RING ABOVE
1DC0..1DFF ; InCB; Extend # Mn [64] COMBINING DOTTED GRAVE ACCENT..COMBINING RIGHT ARROWHEAD AND DOWN ARROWHEAD BELOW
200D ; InCB; Extend # Cf ZERO WIDTH JOINER
20D0..20DC ; InCB; Extend # Mn [13] COMBINING LEFT HARPOON ABOVE..COMBINING FOUR DOTS ABOVE
20E1 ; InCB; Extend # Mn COMBINING LEFT RIGHT ARROW ABOVE
20E5..20F0 ; InCB; Extend # Mn [12] COMBINING REVERSE SOLIDUS OVERLAY..COMBINING ASTERISK ABOVE
2CEF..2CF1 ; InCB; Extend # Mn [3] COPTIC COMBINING NI ABOVE..COPTIC COMBINING SPIRITUS LENIS
2D7F ; InCB; Extend # Mn TIFINAGH CONSONANT JOINER
2DE0..2DFF ; InCB; Extend # Mn [32] COMBINING CYRILLIC LETTER BE..COMBINING CYRILLIC LETTER IOTIFIED BIG YUS
302A..302D ; InCB; Extend # Mn [4] IDEOGRAPHIC LEVEL TONE MARK..IDEOGRAPHIC ENTERING TONE MARK
302E..302F ; InCB; Extend # Mc [2] HANGUL SINGLE DOT TONE MARK..HANGUL DOUBLE DOT TONE MARK
3099..309A ; InCB; Extend # Mn [2] COMBINING KATAKANA-HIRAGANA VOICED SOUND MARK..COMBINING KATAKANA-HIRAGANA SEMI-VOICED SOUND MARK
A66F ; InCB; Extend # Mn COMBINING CYRILLIC VZMET
A674..A67D ; InCB; Extend # Mn [10] COMBINING CYRILLIC LETTER UKRAINIAN IE..COMBINING CYRILLIC PAYEROK
A69E..A69F ; InCB; Extend # Mn [2] COMBINING CYRILLIC LETTER EF..COMBINING CYRILLIC LETTER IOTIFIED E
A6F0..A6F1 ; InCB; Extend # Mn [2] BAMUM COMBINING MARK KOQNDON..BAMUM COMBINING MARK TUKWENTIS
A82C ; InCB; Extend # Mn SYLOTI NAGRI SIGN ALTERNATE HASANTA
A8E0..A8F1 ; InCB; Extend # Mn [18] COMBINING DEVANAGARI DIGIT ZERO..COMBINING DEVANAGARI SIGN AVAGRAHA
A92B..A92D ; InCB; Extend # Mn [3] KAYAH LI TONE PLOPHU..KAYAH LI TONE CALYA PLOPHU
A9B3 ; InCB; Extend # Mn JAVANESE SIGN CECAK TELU
AAB0 ; InCB; Extend # Mn TAI VIET MAI KANG
AAB2..AAB4 ; InCB; Extend # Mn [3] TAI VIET VOWEL I..TAI VIET VOWEL U
AAB7..AAB8 ; InCB; Extend # Mn [2] TAI VIET MAI KHIT..TAI VIET VOWEL IA
AABE..AABF ; InCB; Extend # Mn [2] TAI VIET VOWEL AM..TAI VIET TONE MAI EK
AAC1 ; InCB; Extend # Mn TAI VIET TONE MAI THO
AAF6 ; InCB; Extend # Mn MEETEI MAYEK VIRAMA
ABED ; InCB; Extend # Mn MEETEI MAYEK APUN IYEK
FB1E ; InCB; Extend # Mn HEBREW POINT JUDEO-SPANISH VARIKA
FE20..FE2F ; InCB; Extend # Mn [16] COMBINING LIGATURE LEFT HALF..COMBINING CYRILLIC TITLO RIGHT HALF
101FD ; InCB; Extend # Mn PHAISTOS DISC SIGN COMBINING OBLIQUE STROKE
102E0 ; InCB; Extend # Mn COPTIC EPACT THOUSANDS MARK
10376..1037A ; InCB; Extend # Mn [5] COMBINING OLD PERMIC LETTER AN..COMBINING OLD PERMIC LETTER SII
10A0D ; InCB; Extend # Mn KHAROSHTHI SIGN DOUBLE RING BELOW
10A0F ; InCB; Extend # Mn KHAROSHTHI SIGN VISARGA
10A38..10A3A ; InCB; Extend # Mn [3] KHAROSHTHI SIGN BAR ABOVE..KHAROSHTHI SIGN DOT BELOW
10A3F ; InCB; Extend # Mn KHAROSHTHI VIRAMA
10AE5..10AE6 ; InCB; Extend # Mn [2] MANICHAEAN ABBREVIATION MARK ABOVE..MANICHAEAN ABBREVIATION MARK BELOW
10D24..10D27 ; InCB; Extend # Mn [4] HANIFI ROHINGYA SIGN HARBAHAY..HANIFI ROHINGYA SIGN TASSI
10EAB..10EAC ; InCB; Extend # Mn [2] YEZIDI COMBINING HAMZA MARK..YEZIDI COMBINING MADDA MARK
10EFD..10EFF ; InCB; Extend # Mn [3] ARABIC SMALL LOW WORD SAKTA..ARABIC SMALL LOW WORD MADDA
10F46..10F50 ; InCB; Extend # Mn [11] SOGDIAN COMBINING DOT BELOW..SOGDIAN COMBINING STROKE BELOW
10F82..10F85 ; InCB; Extend # Mn [4] OLD UYGHUR COMBINING DOT ABOVE..OLD UYGHUR COMBINING TWO DOTS BELOW
11070 ; InCB; Extend # Mn BRAHMI SIGN OLD TAMIL VIRAMA
1107F ; InCB; Extend # Mn BRAHMI NUMBER JOINER
110BA ; InCB; Extend # Mn KAITHI SIGN NUKTA
11100..11102 ; InCB; Extend # Mn [3] CHAKMA SIGN CANDRABINDU..CHAKMA SIGN VISARGA
11133..11134 ; InCB; Extend # Mn [2] CHAKMA VIRAMA..CHAKMA MAAYYAA
11173 ; InCB; Extend # Mn MAHAJANI SIGN NUKTA
111CA ; InCB; Extend # Mn SHARADA SIGN NUKTA
11236 ; InCB; Extend # Mn KHOJKI SIGN NUKTA
112E9..112EA ; InCB; Extend # Mn [2] KHUDAWADI SIGN NUKTA..KHUDAWADI SIGN VIRAMA
1133B..1133C ; InCB; Extend # Mn [2] COMBINING BINDU BELOW..GRANTHA SIGN NUKTA
11366..1136C ; InCB; Extend # Mn [7] COMBINING GRANTHA DIGIT ZERO..COMBINING GRANTHA DIGIT SIX
11370..11374 ; InCB; Extend # Mn [5] COMBINING GRANTHA LETTER A..COMBINING GRANTHA LETTER PA
11446 ; InCB; Extend # Mn NEWA SIGN NUKTA
1145E ; InCB; Extend # Mn NEWA SANDHI MARK
114C3 ; InCB; Extend # Mn TIRHUTA SIGN NUKTA
115C0 ; InCB; Extend # Mn SIDDHAM SIGN NUKTA
116B7 ; InCB; Extend # Mn TAKRI SIGN NUKTA
1172B ; InCB; Extend # Mn AHOM SIGN KILLER
1183A ; InCB; Extend # Mn DOGRA SIGN NUKTA
1193E ; InCB; Extend # Mn DIVES AKURU VIRAMA
11943 ; InCB; Extend # Mn DIVES AKURU SIGN NUKTA
11A34 ; InCB; Extend # Mn ZANABAZAR SQUARE SIGN VIRAMA
11A47 ; InCB; Extend # Mn ZANABAZAR SQUARE SUBJOINER
11A99 ; InCB; Extend # Mn SOYOMBO SUBJOINER
11D42 ; InCB; Extend # Mn MASARAM GONDI SIGN NUKTA
11D44..11D45 ; InCB; Extend # Mn [2] MASARAM GONDI SIGN HALANTA..MASARAM GONDI VIRAMA
11D97 ; InCB; Extend # Mn GUNJALA GONDI VIRAMA
11F42 ; InCB; Extend # Mn KAWI CONJOINER
16AF0..16AF4 ; InCB; Extend # Mn [5] BASSA VAH COMBINING HIGH TONE..BASSA VAH COMBINING HIGH-LOW TONE
16B30..16B36 ; InCB; Extend # Mn [7] PAHAWH HMONG MARK CIM TUB..PAHAWH HMONG MARK CIM TAUM
1BC9E ; InCB; Extend # Mn DUPLOYAN DOUBLE MARK
1D165 ; InCB; Extend # Mc MUSICAL SYMBOL COMBINING STEM
1D167..1D169 ; InCB; Extend # Mn [3] MUSICAL SYMBOL COMBINING TREMOLO-1..MUSICAL SYMBOL COMBINING TREMOLO-3
1D16E..1D172 ; InCB; Extend # Mc [5] MUSICAL SYMBOL COMBINING FLAG-1..MUSICAL SYMBOL COMBINING FLAG-5
1D17B..1D182 ; InCB; Extend # Mn [8] MUSICAL SYMBOL COMBINING ACCENT..MUSICAL SYMBOL COMBINING LOURE
1D185..1D18B ; InCB; Extend # Mn [7] MUSICAL SYMBOL COMBINING DOIT..MUSICAL SYMBOL COMBINING TRIPLE TONGUE
1D1AA..1D1AD ; InCB; Extend # Mn [4] MUSICAL SYMBOL COMBINING DOWN BOW..MUSICAL SYMBOL COMBINING SNAP PIZZICATO
1D242..1D244 ; InCB; Extend # Mn [3] COMBINING GREEK MUSICAL TRISEME..COMBINING GREEK MUSICAL PENTASEME
1E000..1E006 ; InCB; Extend # Mn [7] COMBINING GLAGOLITIC LETTER AZU..COMBINING GLAGOLITIC LETTER ZHIVETE
1E008..1E018 ; InCB; Extend # Mn [17] COMBINING GLAGOLITIC LETTER ZEMLJA..COMBINING GLAGOLITIC LETTER HERU
1E01B..1E021 ; InCB; Extend # Mn [7] COMBINING GLAGOLITIC LETTER SHTA..COMBINING GLAGOLITIC LETTER YATI
1E023..1E024 ; InCB; Extend # Mn [2] COMBINING GLAGOLITIC LETTER YU..COMBINING GLAGOLITIC LETTER SMALL YUS
1E026..1E02A ; InCB; Extend # Mn [5] COMBINING GLAGOLITIC LETTER YO..COMBINING GLAGOLITIC LETTER FITA
1E08F ; InCB; Extend # Mn COMBINING CYRILLIC SMALL LETTER BYELORUSSIAN-UKRAINIAN I
1E130..1E136 ; InCB; Extend # Mn [7] NYIAKENG PUACHUE HMONG TONE-B..NYIAKENG PUACHUE HMONG TONE-D
1E2AE ; InCB; Extend # Mn TOTO SIGN RISING TONE
1E2EC..1E2EF ; InCB; Extend # Mn [4] WANCHO TONE TUP..WANCHO TONE KOINI
1E4EC..1E4EF ; InCB; Extend # Mn [4] NAG MUNDARI SIGN MUHOR..NAG MUNDARI SIGN SUTUH
1E8D0..1E8D6 ; InCB; Extend # Mn [7] MENDE KIKAKUI COMBINING NUMBER TEENS..MENDE KIKAKUI COMBINING NUMBER MILLIONS
1E944..1E94A ; InCB; Extend # Mn [7] ADLAM ALIF LENGTHENER..ADLAM NUKTA
# Total code points: 884
# EOF

View File

@ -1,6 +1,6 @@
# NormalizationTest-15.0.0.txt
# Date: 2022-04-02, 01:29:09 GMT
# Copyright (c) 2022 Unicode, Inc.
# NormalizationTest-15.1.0.txt
# Date: 2023-01-05, 20:34:44 GMT
# Copyright (c) 2023 Unicode, Inc.
# Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries.
# For terms of use, see https://www.unicode.org/terms_of_use.html
#

View File

@ -1,6 +1,6 @@
# PropList-15.0.0.txt
# Date: 2022-08-05, 22:17:16 GMT
# Copyright (c) 2022 Unicode, Inc.
# PropList-15.1.0.txt
# Date: 2023-08-01, 21:56:53 GMT
# Copyright (c) 2023 Unicode, Inc.
# Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries.
# For terms of use, see https://www.unicode.org/terms_of_use.html
#
@ -856,11 +856,12 @@ FA70..FAD9 ; Ideographic # Lo [106] CJK COMPATIBILITY IDEOGRAPH-FA70..CJK COM
2B740..2B81D ; Ideographic # Lo [222] CJK UNIFIED IDEOGRAPH-2B740..CJK UNIFIED IDEOGRAPH-2B81D
2B820..2CEA1 ; Ideographic # Lo [5762] CJK UNIFIED IDEOGRAPH-2B820..CJK UNIFIED IDEOGRAPH-2CEA1
2CEB0..2EBE0 ; Ideographic # Lo [7473] CJK UNIFIED IDEOGRAPH-2CEB0..CJK UNIFIED IDEOGRAPH-2EBE0
2EBF0..2EE5D ; Ideographic # Lo [622] CJK UNIFIED IDEOGRAPH-2EBF0..CJK UNIFIED IDEOGRAPH-2EE5D
2F800..2FA1D ; Ideographic # Lo [542] CJK COMPATIBILITY IDEOGRAPH-2F800..CJK COMPATIBILITY IDEOGRAPH-2FA1D
30000..3134A ; Ideographic # Lo [4939] CJK UNIFIED IDEOGRAPH-30000..CJK UNIFIED IDEOGRAPH-3134A
31350..323AF ; Ideographic # Lo [4192] CJK UNIFIED IDEOGRAPH-31350..CJK UNIFIED IDEOGRAPH-323AF
# Total code points: 105854
# Total code points: 106476
# ================================================
@ -1241,9 +1242,10 @@ E0020..E007F ; Other_Grapheme_Extend # Cf [96] TAG SPACE..CANCEL TAG
# ================================================
2FF0..2FF1 ; IDS_Binary_Operator # So [2] IDEOGRAPHIC DESCRIPTION CHARACTER LEFT TO RIGHT..IDEOGRAPHIC DESCRIPTION CHARACTER ABOVE TO BELOW
2FF4..2FFB ; IDS_Binary_Operator # So [8] IDEOGRAPHIC DESCRIPTION CHARACTER FULL SURROUND..IDEOGRAPHIC DESCRIPTION CHARACTER OVERLAID
2FF4..2FFD ; IDS_Binary_Operator # So [10] IDEOGRAPHIC DESCRIPTION CHARACTER FULL SURROUND..IDEOGRAPHIC DESCRIPTION CHARACTER SURROUND FROM LOWER RIGHT
31EF ; IDS_Binary_Operator # So IDEOGRAPHIC DESCRIPTION CHARACTER SUBTRACTION
# Total code points: 10
# Total code points: 13
# ================================================
@ -1253,6 +1255,12 @@ E0020..E007F ; Other_Grapheme_Extend # Cf [96] TAG SPACE..CANCEL TAG
# ================================================
2FFE..2FFF ; IDS_Unary_Operator # So [2] IDEOGRAPHIC DESCRIPTION CHARACTER HORIZONTAL REFLECTION..IDEOGRAPHIC DESCRIPTION CHARACTER ROTATION
# Total code points: 2
# ================================================
2E80..2E99 ; Radical # So [26] CJK RADICAL REPEAT..CJK RADICAL RAP
2E9B..2EF3 ; Radical # So [89] CJK RADICAL CHOKE..CJK RADICAL C-SIMPLIFIED TURTLE
2F00..2FD5 ; Radical # So [214] KANGXI RADICAL ONE..KANGXI RADICAL FLUTE
@ -1275,10 +1283,11 @@ FA27..FA29 ; Unified_Ideograph # Lo [3] CJK COMPATIBILITY IDEOGRAPH-FA27..C
2B740..2B81D ; Unified_Ideograph # Lo [222] CJK UNIFIED IDEOGRAPH-2B740..CJK UNIFIED IDEOGRAPH-2B81D
2B820..2CEA1 ; Unified_Ideograph # Lo [5762] CJK UNIFIED IDEOGRAPH-2B820..CJK UNIFIED IDEOGRAPH-2CEA1
2CEB0..2EBE0 ; Unified_Ideograph # Lo [7473] CJK UNIFIED IDEOGRAPH-2CEB0..CJK UNIFIED IDEOGRAPH-2EBE0
2EBF0..2EE5D ; Unified_Ideograph # Lo [622] CJK UNIFIED IDEOGRAPH-2EBF0..CJK UNIFIED IDEOGRAPH-2EE5D
30000..3134A ; Unified_Ideograph # Lo [4939] CJK UNIFIED IDEOGRAPH-30000..CJK UNIFIED IDEOGRAPH-3134A
31350..323AF ; Unified_Ideograph # Lo [4192] CJK UNIFIED IDEOGRAPH-31350..CJK UNIFIED IDEOGRAPH-323AF
# Total code points: 97058
# Total code points: 97680
# ================================================
@ -1376,8 +1385,58 @@ AABB..AABC ; Logical_Order_Exception # Lo [2] TAI VIET VOWEL AUE..TAI VIET
0387 ; Other_ID_Continue # Po GREEK ANO TELEIA
1369..1371 ; Other_ID_Continue # No [9] ETHIOPIC DIGIT ONE..ETHIOPIC DIGIT NINE
19DA ; Other_ID_Continue # No NEW TAI LUE THAM DIGIT ONE
200C..200D ; Other_ID_Continue # Cf [2] ZERO WIDTH NON-JOINER..ZERO WIDTH JOINER
30FB ; Other_ID_Continue # Po KATAKANA MIDDLE DOT
FF65 ; Other_ID_Continue # Po HALFWIDTH KATAKANA MIDDLE DOT
# Total code points: 12
# Total code points: 16
# ================================================
00B2..00B3 ; ID_Compat_Math_Continue # No [2] SUPERSCRIPT TWO..SUPERSCRIPT THREE
00B9 ; ID_Compat_Math_Continue # No SUPERSCRIPT ONE
2070 ; ID_Compat_Math_Continue # No SUPERSCRIPT ZERO
2074..2079 ; ID_Compat_Math_Continue # No [6] SUPERSCRIPT FOUR..SUPERSCRIPT NINE
207A..207C ; ID_Compat_Math_Continue # Sm [3] SUPERSCRIPT PLUS SIGN..SUPERSCRIPT EQUALS SIGN
207D ; ID_Compat_Math_Continue # Ps SUPERSCRIPT LEFT PARENTHESIS
207E ; ID_Compat_Math_Continue # Pe SUPERSCRIPT RIGHT PARENTHESIS
2080..2089 ; ID_Compat_Math_Continue # No [10] SUBSCRIPT ZERO..SUBSCRIPT NINE
208A..208C ; ID_Compat_Math_Continue # Sm [3] SUBSCRIPT PLUS SIGN..SUBSCRIPT EQUALS SIGN
208D ; ID_Compat_Math_Continue # Ps SUBSCRIPT LEFT PARENTHESIS
208E ; ID_Compat_Math_Continue # Pe SUBSCRIPT RIGHT PARENTHESIS
2202 ; ID_Compat_Math_Continue # Sm PARTIAL DIFFERENTIAL
2207 ; ID_Compat_Math_Continue # Sm NABLA
221E ; ID_Compat_Math_Continue # Sm INFINITY
1D6C1 ; ID_Compat_Math_Continue # Sm MATHEMATICAL BOLD NABLA
1D6DB ; ID_Compat_Math_Continue # Sm MATHEMATICAL BOLD PARTIAL DIFFERENTIAL
1D6FB ; ID_Compat_Math_Continue # Sm MATHEMATICAL ITALIC NABLA
1D715 ; ID_Compat_Math_Continue # Sm MATHEMATICAL ITALIC PARTIAL DIFFERENTIAL
1D735 ; ID_Compat_Math_Continue # Sm MATHEMATICAL BOLD ITALIC NABLA
1D74F ; ID_Compat_Math_Continue # Sm MATHEMATICAL BOLD ITALIC PARTIAL DIFFERENTIAL
1D76F ; ID_Compat_Math_Continue # Sm MATHEMATICAL SANS-SERIF BOLD NABLA
1D789 ; ID_Compat_Math_Continue # Sm MATHEMATICAL SANS-SERIF BOLD PARTIAL DIFFERENTIAL
1D7A9 ; ID_Compat_Math_Continue # Sm MATHEMATICAL SANS-SERIF BOLD ITALIC NABLA
1D7C3 ; ID_Compat_Math_Continue # Sm MATHEMATICAL SANS-SERIF BOLD ITALIC PARTIAL DIFFERENTIAL
# Total code points: 43
# ================================================
2202 ; ID_Compat_Math_Start # Sm PARTIAL DIFFERENTIAL
2207 ; ID_Compat_Math_Start # Sm NABLA
221E ; ID_Compat_Math_Start # Sm INFINITY
1D6C1 ; ID_Compat_Math_Start # Sm MATHEMATICAL BOLD NABLA
1D6DB ; ID_Compat_Math_Start # Sm MATHEMATICAL BOLD PARTIAL DIFFERENTIAL
1D6FB ; ID_Compat_Math_Start # Sm MATHEMATICAL ITALIC NABLA
1D715 ; ID_Compat_Math_Start # Sm MATHEMATICAL ITALIC PARTIAL DIFFERENTIAL
1D735 ; ID_Compat_Math_Start # Sm MATHEMATICAL BOLD ITALIC NABLA
1D74F ; ID_Compat_Math_Start # Sm MATHEMATICAL BOLD ITALIC PARTIAL DIFFERENTIAL
1D76F ; ID_Compat_Math_Start # Sm MATHEMATICAL SANS-SERIF BOLD NABLA
1D789 ; ID_Compat_Math_Start # Sm MATHEMATICAL SANS-SERIF BOLD PARTIAL DIFFERENTIAL
1D7A9 ; ID_Compat_Math_Start # Sm MATHEMATICAL SANS-SERIF BOLD ITALIC NABLA
1D7C3 ; ID_Compat_Math_Start # Sm MATHEMATICAL SANS-SERIF BOLD ITALIC PARTIAL DIFFERENTIAL
# Total code points: 13
# ================================================
@ -1398,6 +1457,7 @@ AABB..AABC ; Logical_Order_Exception # Lo [2] TAI VIET VOWEL AUE..TAI VIET
1367..1368 ; Sentence_Terminal # Po [2] ETHIOPIC QUESTION MARK..ETHIOPIC PARAGRAPH SEPARATOR
166E ; Sentence_Terminal # Po CANADIAN SYLLABICS FULL STOP
1735..1736 ; Sentence_Terminal # Po [2] PHILIPPINE SINGLE PUNCTUATION..PHILIPPINE DOUBLE PUNCTUATION
17D4..17D5 ; Sentence_Terminal # Po [2] KHMER SIGN KHAN..KHMER SIGN BARIYOOSAN
1803 ; Sentence_Terminal # Po MONGOLIAN FULL STOP
1809 ; Sentence_Terminal # Po MONGOLIAN MANCHU FULL STOP
1944..1945 ; Sentence_Terminal # Po [2] LIMBU EXCLAMATION MARK..LIMBU QUESTION MARK
@ -1462,7 +1522,7 @@ FF61 ; Sentence_Terminal # Po HALFWIDTH IDEOGRAPHIC FULL STOP
1BC9F ; Sentence_Terminal # Po DUPLOYAN PUNCTUATION CHINOOK FULL STOP
1DA88 ; Sentence_Terminal # Po SIGNWRITING FULL STOP
# Total code points: 154
# Total code points: 156
# ================================================

View File

@ -1,6 +1,6 @@
# PropertyValueAliases-15.0.0.txt
# Date: 2022-08-05, 23:42:17 GMT
# Copyright (c) 2022 Unicode, Inc.
# PropertyValueAliases-15.1.0.txt
# Date: 2023-08-07, 15:21:34 GMT
# Copyright (c) 2023 Unicode, Inc.
# Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries.
# For terms of use, see https://www.unicode.org/terms_of_use.html
#
@ -91,6 +91,7 @@ age; 12.1 ; V12_1
age; 13.0 ; V13_0
age; 14.0 ; V14_0
age; 15.0 ; V15_0
age; 15.1 ; V15_1
age; NA ; Unassigned
# Alphabetic (Alpha)
@ -208,6 +209,7 @@ blk; CJK_Ext_E ; CJK_Unified_Ideographs_Extension_E
blk; CJK_Ext_F ; CJK_Unified_Ideographs_Extension_F
blk; CJK_Ext_G ; CJK_Unified_Ideographs_Extension_G
blk; CJK_Ext_H ; CJK_Unified_Ideographs_Extension_H
blk; CJK_Ext_I ; CJK_Unified_Ideographs_Extension_I
blk; CJK_Radicals_Sup ; CJK_Radicals_Supplement
blk; CJK_Strokes ; CJK_Strokes
blk; CJK_Symbols ; CJK_Symbols_And_Punctuation
@ -817,6 +819,21 @@ IDSB; Y ; Yes ; T
IDST; N ; No ; F ; False
IDST; Y ; Yes ; T ; True
# IDS_Unary_Operator (IDSU)
IDSU; N ; No ; F ; False
IDSU; Y ; Yes ; T ; True
# ID_Compat_Math_Continue (ID_Compat_Math_Continue)
ID_Compat_Math_Continue; N ; No ; F ; False
ID_Compat_Math_Continue; Y ; Yes ; T ; True
# ID_Compat_Math_Start (ID_Compat_Math_Start)
ID_Compat_Math_Start; N ; No ; F ; False
ID_Compat_Math_Start; Y ; Yes ; T ; True
# ID_Continue (IDC)
IDC; N ; No ; F ; False
@ -836,6 +853,13 @@ IDS; Y ; Yes ; T
Ideo; N ; No ; F ; False
Ideo; Y ; Yes ; T ; True
# Indic_Conjunct_Break (InCB)
InCB; Consonant ; Consonant
InCB; Extend ; Extend
InCB; Linker ; Linker
InCB; None ; None
# Indic_Positional_Category (InPC)
InPC; Bottom ; Bottom
@ -1074,7 +1098,10 @@ jt ; U ; Non_Joining
# Line_Break (lb)
lb ; AI ; Ambiguous
lb ; AK ; Aksara
lb ; AL ; Alphabetic
lb ; AP ; Aksara_Prebase
lb ; AS ; Aksara_Start
lb ; B2 ; Break_Both
lb ; BA ; Break_After
lb ; BB ; Break_Before
@ -1112,6 +1139,8 @@ lb ; SA ; Complex_Context
lb ; SG ; Surrogate
lb ; SP ; Space
lb ; SY ; Break_Symbols
lb ; VF ; Virama_Final
lb ; VI ; Virama
lb ; WJ ; Word_Joiner
lb ; XX ; Unknown
lb ; ZW ; ZWSpace
@ -1156,6 +1185,9 @@ NFKC_QC; M ; Maybe
NFKC_QC; N ; No
NFKC_QC; Y ; Yes
# NFKC_Simple_Casefold (NFKC_SCF)
# NFKD_Quick_Check (NFKD_QC)
NFKD_QC; N ; No

View File

@ -1,6 +1,6 @@
# Unicode Character Database
# Date: 2022-09-02
# Copyright (c) 2022 Unicode, Inc.
# Date: 2023-08-28
# Copyright (c) 2023 Unicode, Inc.
# Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries.
# For terms of use, see https://www.unicode.org/terms_of_use.html
#
@ -10,7 +10,7 @@
# UAX #44, "Unicode Character Database"
# UTS #51, "Unicode Emoji"
#
# The UAXes and UTS #51 can be accessed at https://www.unicode.org/versions/Unicode15.0.0/
# The UAXes and UTS #51 can be accessed at https://www.unicode.org/versions/Unicode15.1.0/
This directory contains the final data files
for the Unicode Character Database, for Version 15.0.0 of the Unicode Standard.
This directory contains final data files
for the Unicode Character Database, for Version 15.1.0 of the Unicode Standard.

View File

@ -1,6 +1,6 @@
# Scripts-15.0.0.txt
# Date: 2022-04-26, 23:15:02 GMT
# Copyright (c) 2022 Unicode, Inc.
# Scripts-15.1.0.txt
# Date: 2023-07-28, 16:01:07 GMT
# Copyright (c) 2023 Unicode, Inc.
# Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries.
# For terms of use, see https://www.unicode.org/terms_of_use.html
#
@ -357,7 +357,7 @@
2E5B ; Common # Ps BOTTOM HALF LEFT PARENTHESIS
2E5C ; Common # Pe BOTTOM HALF RIGHT PARENTHESIS
2E5D ; Common # Pd OBLIQUE HYPHEN
2FF0..2FFB ; Common # So [12] IDEOGRAPHIC DESCRIPTION CHARACTER LEFT TO RIGHT..IDEOGRAPHIC DESCRIPTION CHARACTER OVERLAID
2FF0..2FFF ; Common # So [16] IDEOGRAPHIC DESCRIPTION CHARACTER LEFT TO RIGHT..IDEOGRAPHIC DESCRIPTION CHARACTER ROTATION
3000 ; Common # Zs IDEOGRAPHIC SPACE
3001..3003 ; Common # Po [3] IDEOGRAPHIC COMMA..DITTO MARK
3004 ; Common # So JAPANESE INDUSTRIAL STANDARD SYMBOL
@ -399,6 +399,7 @@
3192..3195 ; Common # No [4] IDEOGRAPHIC ANNOTATION ONE MARK..IDEOGRAPHIC ANNOTATION FOUR MARK
3196..319F ; Common # So [10] IDEOGRAPHIC ANNOTATION TOP MARK..IDEOGRAPHIC ANNOTATION MAN MARK
31C0..31E3 ; Common # So [36] CJK STROKE T..CJK STROKE Q
31EF ; Common # So IDEOGRAPHIC DESCRIPTION CHARACTER SUBTRACTION
3220..3229 ; Common # No [10] PARENTHESIZED IDEOGRAPH ONE..PARENTHESIZED IDEOGRAPH TEN
322A..3247 ; Common # So [30] PARENTHESIZED IDEOGRAPH MOON..CIRCLED IDEOGRAPH KOTO
3248..324F ; Common # No [8] CIRCLED NUMBER TEN ON BLACK SQUARE..CIRCLED NUMBER EIGHTY ON BLACK SQUARE
@ -629,7 +630,7 @@ FFFC..FFFD ; Common # So [2] OBJECT REPLACEMENT CHARACTER..REPLACEMENT CHAR
E0001 ; Common # Cf LANGUAGE TAG
E0020..E007F ; Common # Cf [96] TAG SPACE..CANCEL TAG
# Total code points: 8301
# Total code points: 8306
# ================================================
@ -1593,11 +1594,12 @@ FA70..FAD9 ; Han # Lo [106] CJK COMPATIBILITY IDEOGRAPH-FA70..CJK COMPATIBILI
2B740..2B81D ; Han # Lo [222] CJK UNIFIED IDEOGRAPH-2B740..CJK UNIFIED IDEOGRAPH-2B81D
2B820..2CEA1 ; Han # Lo [5762] CJK UNIFIED IDEOGRAPH-2B820..CJK UNIFIED IDEOGRAPH-2CEA1
2CEB0..2EBE0 ; Han # Lo [7473] CJK UNIFIED IDEOGRAPH-2CEB0..CJK UNIFIED IDEOGRAPH-2EBE0
2EBF0..2EE5D ; Han # Lo [622] CJK UNIFIED IDEOGRAPH-2EBF0..CJK UNIFIED IDEOGRAPH-2EE5D
2F800..2FA1D ; Han # Lo [542] CJK COMPATIBILITY IDEOGRAPH-2F800..CJK COMPATIBILITY IDEOGRAPH-2FA1D
30000..3134A ; Han # Lo [4939] CJK UNIFIED IDEOGRAPH-30000..CJK UNIFIED IDEOGRAPH-3134A
31350..323AF ; Han # Lo [4192] CJK UNIFIED IDEOGRAPH-31350..CJK UNIFIED IDEOGRAPH-323AF
# Total code points: 98408
# Total code points: 99030
# ================================================

View File

@ -1,6 +1,6 @@
# SpecialCasing-15.0.0.txt
# Date: 2022-02-02, 23:35:52 GMT
# Copyright (c) 2022 Unicode, Inc.
# SpecialCasing-15.1.0.txt
# Date: 2023-01-05, 20:35:03 GMT
# Copyright (c) 2023 Unicode, Inc.
# Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries.
# For terms of use, see https://www.unicode.org/terms_of_use.html
#

View File

@ -11231,6 +11231,10 @@
2FF9;IDEOGRAPHIC DESCRIPTION CHARACTER SURROUND FROM UPPER RIGHT;So;0;ON;;;;;N;;;;;
2FFA;IDEOGRAPHIC DESCRIPTION CHARACTER SURROUND FROM LOWER LEFT;So;0;ON;;;;;N;;;;;
2FFB;IDEOGRAPHIC DESCRIPTION CHARACTER OVERLAID;So;0;ON;;;;;N;;;;;
2FFC;IDEOGRAPHIC DESCRIPTION CHARACTER SURROUND FROM RIGHT;So;0;ON;;;;;N;;;;;
2FFD;IDEOGRAPHIC DESCRIPTION CHARACTER SURROUND FROM LOWER RIGHT;So;0;ON;;;;;N;;;;;
2FFE;IDEOGRAPHIC DESCRIPTION CHARACTER HORIZONTAL REFLECTION;So;0;ON;;;;;N;;;;;
2FFF;IDEOGRAPHIC DESCRIPTION CHARACTER ROTATION;So;0;ON;;;;;N;;;;;
3000;IDEOGRAPHIC SPACE;Zs;0;WS;<wide> 0020;;;;N;;;;;
3001;IDEOGRAPHIC COMMA;Po;0;ON;;;;;N;;;;;
3002;IDEOGRAPHIC FULL STOP;Po;0;ON;;;;;N;IDEOGRAPHIC PERIOD;;;;
@ -11705,6 +11709,7 @@
31E1;CJK STROKE HZZZG;So;0;ON;;;;;N;;;;;
31E2;CJK STROKE PG;So;0;ON;;;;;N;;;;;
31E3;CJK STROKE Q;So;0;ON;;;;;N;;;;;
31EF;IDEOGRAPHIC DESCRIPTION CHARACTER SUBTRACTION;So;0;ON;;;;;N;;;;;
31F0;KATAKANA LETTER SMALL KU;Lo;0;L;;;;;N;;;;;
31F1;KATAKANA LETTER SMALL SI;Lo;0;L;;;;;N;;;;;
31F2;KATAKANA LETTER SMALL SU;Lo;0;L;;;;;N;;;;;
@ -34035,6 +34040,8 @@ FFFD;REPLACEMENT CHARACTER;So;0;ON;;;;;N;;;;;
2CEA1;<CJK Ideograph Extension E, Last>;Lo;0;L;;;;;N;;;;;
2CEB0;<CJK Ideograph Extension F, First>;Lo;0;L;;;;;N;;;;;
2EBE0;<CJK Ideograph Extension F, Last>;Lo;0;L;;;;;N;;;;;
2EBF0;<CJK Ideograph Extension I, First>;Lo;0;L;;;;;N;;;;;
2EE5D;<CJK Ideograph Extension I, Last>;Lo;0;L;;;;;N;;;;;
2F800;CJK COMPATIBILITY IDEOGRAPH-2F800;Lo;0;L;4E3D;;;;N;;;;;
2F801;CJK COMPATIBILITY IDEOGRAPH-2F801;Lo;0;L;4E38;;;;N;;;;;
2F802;CJK COMPATIBILITY IDEOGRAPH-2F802;Lo;0;L;4E41;;;;N;;;;;

View File

@ -1,6 +1,6 @@
# GraphemeBreakProperty-15.0.0.txt
# Date: 2022-04-27, 17:07:38 GMT
# Copyright (c) 2022 Unicode, Inc.
# GraphemeBreakProperty-15.1.0.txt
# Date: 2023-01-05, 20:34:41 GMT
# Copyright (c) 2023 Unicode, Inc.
# Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries.
# For terms of use, see https://www.unicode.org/terms_of_use.html
#

View File

@ -1,11 +1,11 @@
# emoji-data.txt
# Date: 2022-08-02, 00:26:10 GMT
# Copyright (c) 2022 Unicode, Inc.
# Date: 2023-02-01, 02:22:54 GMT
# Copyright (c) 2023 Unicode, Inc.
# Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries.
# For terms of use, see https://www.unicode.org/terms_of_use.html
#
# Emoji Data for UTS #51
# Used with Emoji Version 15.0 and subsequent minor revisions (if any)
# Used with Emoji Version 15.1 and subsequent minor revisions (if any)
#
# For documentation and usage, see https://www.unicode.org/reports/tr51
#

View File

@ -1,103 +1,104 @@
## The Unicode Standard, Unicode Character Database, Version 15.0.0
## The Unicode Standard, Unicode Character Database, Version 15.1.0
### Unicode Character Database
```
UNICODE, INC. LICENSE AGREEMENT - DATA FILES AND SOFTWARE
See Terms of Use for definitions of Unicode Inc.'s
Data Files and Software.
NOTICE TO USER: Carefully read the following legal agreement.
BY DOWNLOADING, INSTALLING, COPYING OR OTHERWISE USING UNICODE INC.'S
DATA FILES ("DATA FILES"), AND/OR SOFTWARE ("SOFTWARE"),
YOU UNEQUIVOCALLY ACCEPT, AND AGREE TO BE BOUND BY, ALL OF THE
TERMS AND CONDITIONS OF THIS AGREEMENT.
IF YOU DO NOT AGREE, DO NOT DOWNLOAD, INSTALL, COPY, DISTRIBUTE OR USE
THE DATA FILES OR SOFTWARE.
UNICODE LICENSE V3
COPYRIGHT AND PERMISSION NOTICE
Copyright © 1991-2022 Unicode, Inc. All rights reserved.
Distributed under the Terms of Use in https://www.unicode.org/copyright.html.
Copyright © 1991-2023 Unicode, Inc.
Permission is hereby granted, free of charge, to any person obtaining
a copy of the Unicode data files and any associated documentation
(the "Data Files") or Unicode software and any associated documentation
(the "Software") to deal in the Data Files or Software
without restriction, including without limitation the rights to use,
copy, modify, merge, publish, distribute, and/or sell copies of
the Data Files or Software, and to permit persons to whom the Data Files
or Software are furnished to do so, provided that either
(a) this copyright and permission notice appear with all copies
of the Data Files or Software, or
(b) this copyright and permission notice appear in associated
Documentation.
NOTICE TO USER: Carefully read the following legal agreement. BY
DOWNLOADING, INSTALLING, COPYING OR OTHERWISE USING DATA FILES, AND/OR
SOFTWARE, YOU UNEQUIVOCALLY ACCEPT, AND AGREE TO BE BOUND BY, ALL OF THE
TERMS AND CONDITIONS OF THIS AGREEMENT. IF YOU DO NOT AGREE, DO NOT
DOWNLOAD, INSTALL, COPY, DISTRIBUTE OR USE THE DATA FILES OR SOFTWARE.
THE DATA FILES AND SOFTWARE ARE PROVIDED "AS IS", WITHOUT WARRANTY OF
ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE
WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
NONINFRINGEMENT OF THIRD PARTY RIGHTS.
IN NO EVENT SHALL THE COPYRIGHT HOLDER OR HOLDERS INCLUDED IN THIS
NOTICE BE LIABLE FOR ANY CLAIM, OR ANY SPECIAL INDIRECT OR CONSEQUENTIAL
DAMAGES, OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE,
DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER
TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
PERFORMANCE OF THE DATA FILES OR SOFTWARE.
Permission is hereby granted, free of charge, to any person obtaining a
copy of data files and any associated documentation (the "Data Files") or
software and any associated documentation (the "Software") to deal in the
Data Files or Software without restriction, including without limitation
the rights to use, copy, modify, merge, publish, distribute, and/or sell
copies of the Data Files or Software, and to permit persons to whom the
Data Files or Software are furnished to do so, provided that either (a)
this copyright and permission notice appear with all copies of the Data
Files or Software, or (b) this copyright and permission notice appear in
associated Documentation.
Except as contained in this notice, the name of a copyright holder
shall not be used in advertising or otherwise to promote the sale,
use or other dealings in these Data Files or Software without prior
written authorization of the copyright holder.
THE DATA FILES AND SOFTWARE ARE PROVIDED "AS IS", WITHOUT WARRANTY OF ANY
KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT OF
THIRD PARTY RIGHTS.
=== http://www.unicode.org/copyright.html content ===
Unicode (R) Copyright and Terms of Use
IN NO EVENT SHALL THE COPYRIGHT HOLDER OR HOLDERS INCLUDED IN THIS NOTICE
BE LIABLE FOR ANY CLAIM, OR ANY SPECIAL INDIRECT OR CONSEQUENTIAL DAMAGES,
OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS,
WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION,
ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THE DATA
FILES OR SOFTWARE.
Except as contained in this notice, the name of a copyright holder shall
not be used in advertising or otherwise to promote the sale, use or other
dealings in these Data Files or Software without prior written
authorization of the copyright holder.
------------------
Unicode® Copyright and Terms of Use
For the general privacy policy governing access to this site, see the Unicode Privacy Policy.
Unicode Copyright
Copyright (C) 1991-2022 Unicode, Inc. All rights reserved.
Definitions
Unicode Data Files ("DATA FILES") include all data files under the directories:
https://www.unicode.org/Public/
https://www.unicode.org/reports/
https://www.unicode.org/ivd/data/
A. Unicode Copyright
Copyright © 1991-2023 Unicode, Inc. All rights reserved.
Unicode Data Files do not include PDF online code charts under the directory:
https://www.unicode.org/Public/
B. Definitions
Unicode Data Files ("DATA FILES") include all data files under the directories:
https://www.unicode.org/Public/
https://www.unicode.org/reports/
https://www.unicode.org/ivd/data/
Unicode Software ("SOFTWARE") includes any source code published in the Unicode Standard
or any source code or compiled code under the directories:
https://www.unicode.org/Public/PROGRAMS/
https://www.unicode.org/Public/cldr/
http://site.icu-project.org/download/
Terms of Use
Certain documents and files on this website contain a legend indicating that "Modification is permitted." Any person is hereby authorized, without fee, to modify such documents and files to create derivative works conforming to the Unicode® Standard, subject to Terms and Conditions herein.
Any person is hereby authorized, without fee, to view, use, reproduce, and distribute all documents and files, subject to the Terms and Conditions herein.
Further specifications of rights and restrictions pertaining to the use of the Unicode DATA FILES and SOFTWARE can be found in the Unicode Data Files and Software License.
Each version of the Unicode Standard has further specifications of rights and restrictions of use. For the book editions (Unicode 5.0 and earlier), these are found on the back of the title page.
The Unicode PDF online code charts carry specific restrictions. Those restrictions are incorporated as the first page of each PDF code chart.
All other files, including online documentation of the core specification for Unicode 6.0 and later, are covered under these general Terms of Use.
No license is granted to "mirror" the Unicode website where a fee is charged for access to the "mirror" site.
Modification is not permitted with respect to this document. All copies of this document must be verbatim.
Restricted Rights Legend
Any technical data or software which is licensed to the United States of America, its agencies and/or instrumentalities under this Agreement is commercial technical data or commercial computer software developed exclusively at private expense as defined in FAR 2.101, or DFARS 252.227-7014 (June 1995), as applicable. For technical data, use, duplication, or disclosure by the Government is subject to restrictions as set forth in DFARS 202.227-7015 Technical Data, Commercial and Items (Nov 1995) and this Agreement. For Software, in accordance with FAR 12-212 or DFARS 227-7202, as applicable, use, duplication or disclosure by the Government is subject to the restrictions set forth in this Agreement.
Warranties and Disclaimers
This publication and/or website may include technical or typographical errors or other inaccuracies. Changes are periodically added to the information herein; these changes will be incorporated in new editions of the publication and/or website. Unicode, Inc. may make improvements and/or changes in the product(s) and/or program(s) described in this publication and/or website at any time.
If this file has been purchased on magnetic or optical media from Unicode, Inc. the sole and exclusive remedy for any claim will be exchange of the defective media within ninety (90) days of original purchase.
EXCEPT AS PROVIDED IN SECTION E.2, THIS PUBLICATION AND/OR SOFTWARE IS PROVIDED "AS IS" WITHOUT WARRANTY OF ANY KIND EITHER EXPRESS, IMPLIED, OR STATUTORY, INCLUDING, BUT NOT LIMITED TO, ANY WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, OR NON-INFRINGEMENT. UNICODE, INC. AND ITS LICENSORS ASSUME NO RESPONSIBILITY FOR ERRORS OR OMISSIONS IN THIS PUBLICATION AND/OR SOFTWARE OR OTHER DOCUMENTS WHICH ARE REFERENCED BY OR LINKED TO THIS PUBLICATION OR THE UNICODE WEBSITE.
Waiver of Damages
In no event shall Unicode, Inc. or its licensors be liable for any special, incidental, indirect or consequential damages of any kind, or any damages whatsoever, whether or not Unicode, Inc. was advised of the possibility of the damage, including, without limitation, those resulting from the following: loss of use, data or profits, in connection with the use, modification or distribution of this information or its derivatives.
Trademarks & Logos
The Unicode Word Mark and the Unicode Logo are trademarks of Unicode, Inc. “The Unicode Consortium” and “Unicode, Inc.” are trade names of Unicode, Inc. Use of the information and materials found on this website indicates your acknowledgement of Unicode, Inc.s exclusive worldwide rights in the Unicode Word Mark, the Unicode Logo, and the Unicode trade names.
The Unicode Consortium Name and Trademark Usage Policy (“Trademark Policy”) are incorporated herein by reference and you agree to abide by the provisions of the Trademark Policy, which may be changed from time to time in the sole discretion of Unicode, Inc.
All third party trademarks referenced herein are the property of their respective owners.
Miscellaneous
Jurisdiction and Venue. This website is operated from a location in the State of California, United States of America. Unicode, Inc. makes no representation that the materials are appropriate for use in other locations. If you access this website from other locations, you are responsible for compliance with local laws. This Agreement, all use of this website and any claims and damages resulting from use of this website are governed solely by the laws of the State of California without regard to any principles which would apply the laws of a different jurisdiction. The user agrees that any disputes regarding this website shall be resolved solely in the courts located in Santa Clara County, California. The user agrees said courts have personal jurisdiction and agree to waive any right to transfer the dispute to any other forum.
Modification by Unicode, Inc. Unicode, Inc. shall have the right to modify this Agreement at any time by posting it to this website. The user may not assign any part of this Agreement without Unicode, Inc.s prior written consent.
Taxes. The user agrees to pay any taxes arising from access to this website or use of the information herein, except for those based on Unicodes net income.
Severability. If any provision of this Agreement is declared invalid or unenforceable, the remaining provisions of this Agreement shall remain in effect.
Entire Agreement. This Agreement constitutes the entire agreement between the parties.
Unicode Data Files do not include PDF online code charts under the directory:
https://www.unicode.org/Public/
Unicode Software ("SOFTWARE") includes any source code published in the Unicode Standard
or any source code or compiled code under the directories:
https://www.unicode.org/Public/PROGRAMS/
https://www.unicode.org/Public/cldr/
http://site.icu-project.org/download/
C. Terms of Use
1. Certain documents and files on this website contain a legend indicating that "Modification is permitted." Any person is hereby authorized, without fee, to modify such documents and files to create derivative works conforming to the Unicode® Standard, subject to Terms and Conditions herein.
2. Any person is hereby authorized, without fee, to view, use, reproduce, and distribute all documents and files, subject to the Terms and Conditions herein.
3. Further specifications of rights and restrictions pertaining to the use of the Unicode DATA FILES and SOFTWARE can be found in the Unicode Data Files and Software License.
4. Each version of the Unicode Standard has further specifications of rights and restrictions of use. For the book editions (Unicode 5.0 and earlier), these are found on the back of the title page.
5. The Unicode PDF online code charts carry specific restrictions. Those restrictions are incorporated as the first page of each PDF code chart.
6. All other files, including online documentation of the core specification for Unicode 6.0 and later, are covered under these general Terms of Use.
7. No license is granted to "mirror" the Unicode website where a fee is charged for access to the "mirror" site.
8. Modification is not permitted with respect to this document. All copies of this document must be verbatim.
D. Restricted Rights Legend
1. Any technical data or software which is licensed to the United States of America, its agencies and/or instrumentalities under this Agreement is commercial technical data or commercial computer software developed exclusively at private expense as defined in FAR 2.101, or DFARS 252.227-7014 (June 1995), as applicable. For technical data, use, duplication, or disclosure by the Government is subject to restrictions as set forth in DFARS 202.227-7015 Technical Data, Commercial and Items (Nov 1995) and this Agreement. For Software, in accordance with FAR 12-212 or DFARS 227-7202, as applicable, use, duplication or disclosure by the Government is subject to the restrictions set forth in this Agreement.
E. Warranties and Disclaimers
1. This publication and/or website may include technical or typographical errors or other inaccuracies. Changes are periodically added to the information herein; these changes will be incorporated in new editions of the publication and/or website. Unicode, Inc. may make improvements and/or changes in the product(s) and/or program(s) described in this publication and/or website at any time.
2. If this file has been purchased on magnetic or optical media from Unicode, Inc. the sole and exclusive remedy for any claim will be exchange of the defective media within ninety (90) days of original purchase.
3. EXCEPT AS PROVIDED IN SECTION E.2, THIS PUBLICATION AND/OR SOFTWARE IS PROVIDED "AS IS" WITHOUT WARRANTY OF ANY KIND EITHER EXPRESS, IMPLIED, OR STATUTORY, INCLUDING, BUT NOT LIMITED TO, ANY WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, OR NON-INFRINGEMENT. UNICODE, INC. AND ITS LICENSORS ASSUME NO RESPONSIBILITY FOR ERRORS OR OMISSIONS IN THIS PUBLICATION AND/OR SOFTWARE OR OTHER DOCUMENTS WHICH ARE REFERENCED BY OR LINKED TO THIS PUBLICATION OR THE UNICODE WEBSITE.
F. Waiver of Damages
1. In no event shall Unicode, Inc. or its licensors be liable for any special, incidental, indirect or consequential damages of any kind, or any damages whatsoever, whether or not Unicode, Inc. was advised of the possibility of the damage, including, without limitation, those resulting from the following: loss of use, data or profits, in connection with the use, modification or distribution of this information or its derivatives.
G. Trademarks & Logos
1. The Unicode Word Mark and the Unicode Logo are trademarks of Unicode, Inc. "The Unicode Consortium" and "Unicode, Inc." are trade names of Unicode, Inc. Use of the information and materials found on this website indicates your acknowledgement of Unicode, Inc.'s exclusive worldwide rights in the Unicode Word Mark, the Unicode Logo, and the Unicode trade names.
2. The Unicode Consortium Name and Trademark Usage Policy ("Trademark Policy") are incorporated herein by reference and you agree to abide by the provisions of the Trademark Policy, which may be changed from time to time in the sole discretion of Unicode, Inc.
All third party trademarks referenced herein are the property of their respective owners.
H. Miscellaneous
1. Jurisdiction and Venue. This website is operated from a location in the State of California, United States of America. Unicode, Inc. makes no representation that the materials are appropriate for use in other locations. If you access this website from other locations, you are responsible for compliance with local laws. This Agreement, all use of this website and any claims and damages resulting from use of this website are governed solely by the laws of the State of California without regard to any principles which would apply the laws of a different jurisdiction. The user agrees that any disputes regarding this website shall be resolved solely in the courts located in Santa Clara County, California. The user agrees said courts have personal jurisdiction and agree to waive any right to transfer the dispute to any other forum.
2. Modification by Unicode, Inc. Unicode, Inc. shall have the right to modify this Agreement at any time by posting it to this website. The user may not assign any part of this Agreement without Unicode, Inc.'s prior written consent.
3. Taxes. The user agrees to pay any taxes arising from access to this website or use of the information herein, except for those based on Unicode's net income.
4. Severability. If any provision of this Agreement is declared invalid or unenforceable, the remaining provisions of this Agreement shall remain in effect.
5. Entire Agreement. This Agreement constitutes the entire agreement between the parties.
```

View File

@ -1,5 +1,5 @@
/*
* Copyright (c) 2018, 2019, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2018, 2023, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@ -23,7 +23,7 @@
/*
* @test
* @bug 8202771 8221431 8229831
* @bug 8202771 8221431 8229831 8296246
* @summary Check j.l.Character.isDigit/isLetter/isLetterOrDigit/isSpaceChar
* /isWhitespace/isTitleCase/isISOControl/isIdentifierIgnorable
* /isJavaIdentifierStart/isJavaIdentifierPart/isUnicodeIdentifierStart
@ -292,7 +292,11 @@ public class CharPropTest {
return codePoint == 0x00B7 ||
codePoint == 0x0387 ||
(codePoint >= 0x1369 && codePoint <= 0x1371) ||
codePoint == 0x19DA;
codePoint == 0x19DA ||
codePoint == 0x200C ||
codePoint == 0x200D ||
codePoint == 0x30FB ||
codePoint == 0xFF65;
}
private static void printDiff(int codePoint, String method, boolean actual, boolean expected) {