8296246: Update Unicode Data Files to Version 15.1.0
Reviewed-by: erikj, joehw, srl, rriggs
This commit is contained in:
parent
a021dbcb9e
commit
7c991cc567
@ -76,8 +76,8 @@ TOOL_GENERATECACERTS = $(JAVA_SMALL) -cp $(BUILDTOOLS_OUTPUTDIR)/jdk_tools_class
|
||||
-Dkeystore.pkcs12.macAlgorithm=NONE \
|
||||
build.tools.generatecacerts.GenerateCacerts
|
||||
|
||||
TOOL_GENERATEEMOJIDATA = $(JAVA_SMALL) -cp $(BUILDTOOLS_OUTPUTDIR)/jdk_tools_classes \
|
||||
build.tools.generateemojidata.GenerateEmojiData
|
||||
TOOL_GENERATEEXTRAPROPERTIES = $(JAVA_SMALL) -cp $(BUILDTOOLS_OUTPUTDIR)/jdk_tools_classes \
|
||||
build.tools.generateextraproperties.GenerateExtraProperties
|
||||
|
||||
TOOL_MAKEZIPREPRODUCIBLE = $(JAVA_SMALL) -cp $(BUILDTOOLS_OUTPUTDIR)/jdk_tools_classes \
|
||||
build.tools.makezipreproducible.MakeZipReproducible
|
||||
|
@ -64,7 +64,7 @@ public class PropList {
|
||||
|
||||
int i, j;
|
||||
BufferedReader sbfr = new BufferedReader(new FileReader(file));
|
||||
Matcher m = Pattern.compile("(\\p{XDigit}+)(?:\\.{2}(\\p{XDigit}+))?\\s*;\\s+(\\w+)\\s+#.*").matcher("");
|
||||
Matcher m = Pattern.compile("(\\p{XDigit}+)(?:\\.{2}(\\p{XDigit}+))?\\s*;\\s+(\\w+)[;\\s].*").matcher("");
|
||||
String line = null;
|
||||
int lineNo = 0;
|
||||
while ((line = sbfr.readLine()) != null) {
|
||||
|
@ -0,0 +1,169 @@
|
||||
/*
|
||||
* Copyright (c) 2023, Oracle and/or its affiliates. All rights reserved.
|
||||
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
||||
*
|
||||
* This code is free software; you can redistribute it and/or modify it
|
||||
* under the terms of the GNU General Public License version 2 only, as
|
||||
* published by the Free Software Foundation. Oracle designates this
|
||||
* particular file as subject to the "Classpath" exception as provided
|
||||
* by Oracle in the LICENSE file that accompanied this code.
|
||||
*
|
||||
* This code is distributed in the hope that it will be useful, but WITHOUT
|
||||
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
||||
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
|
||||
* version 2 for more details (a copy is included in the LICENSE file that
|
||||
* accompanied this code).
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License version
|
||||
* 2 along with this work; if not, write to the Free Software Foundation,
|
||||
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
*
|
||||
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
|
||||
* or visit www.oracle.com if you need additional information or have any
|
||||
* questions.
|
||||
*/
|
||||
package build.tools.generateextraproperties;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.nio.file.Files;
|
||||
import java.nio.file.Paths;
|
||||
import java.nio.file.StandardOpenOption;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Arrays;
|
||||
import java.util.HashMap;
|
||||
import java.util.List;
|
||||
import java.util.function.Predicate;
|
||||
import java.util.stream.Collectors;
|
||||
import java.util.stream.Stream;
|
||||
|
||||
/**
|
||||
* Parses extra properties files of UCD, and replaces the placeholders in
|
||||
* the given template source file with the generated conditions, then emits
|
||||
* .java files. For example, if the properties file has:
|
||||
* <blockquote>
|
||||
* 0009..000D ; Type (; Value)
|
||||
* 0020 ; Type (; Value)
|
||||
* 2000..200A ; Type (; Value)
|
||||
* </blockquote>
|
||||
* and the template file contains
|
||||
* <blockquote>
|
||||
* %%%Type(=Value)%%%
|
||||
* </blockquote>
|
||||
* then the generated .java file would have the following in place:
|
||||
* <blockquote>
|
||||
* (cp >= 0x0009 && cp <= 0x000D) ||
|
||||
* cp == 0x0020 ||
|
||||
* (cp >= 0x2000 && cp <= 0x200A);
|
||||
* </blockquote>
|
||||
* Note that those in parentheses in the properties file and the
|
||||
* template file are optional.
|
||||
*
|
||||
* Arguments to this utility:
|
||||
* args[0]: Full path string to the template file
|
||||
* args[1]: Full path string to the properties file
|
||||
* args[2]: Full path string to the generated .java file
|
||||
* args[3...]: Names of the property to generate the conditions
|
||||
*/
|
||||
public class GenerateExtraProperties {
|
||||
public static void main(String[] args) {
|
||||
var templateFile = Paths.get(args[0]);
|
||||
var propertiesFile = Paths.get(args[1]);
|
||||
var gensrcFile = Paths.get(args[2]);
|
||||
var propertyNames = Arrays.copyOfRange(args, 3, args.length);
|
||||
var replacementMap = new HashMap<String, String>();
|
||||
|
||||
try {
|
||||
for (var propertyName: propertyNames) {
|
||||
var pn = "; " + propertyName.replaceFirst("=", "; ");
|
||||
|
||||
List<Range> ranges = Files.lines(propertiesFile)
|
||||
.filter(Predicate.not(l -> l.startsWith("#") || l.isBlank()))
|
||||
.filter(l -> l.contains(pn))
|
||||
.map(l -> new Range(l.replaceFirst(" .*", "")))
|
||||
.sorted()
|
||||
.collect(ArrayList<Range>::new,
|
||||
(list, r) -> {
|
||||
// collapsing consecutive pictographic ranges
|
||||
int lastIndex = list.size() - 1;
|
||||
if (lastIndex >= 0) {
|
||||
Range lastRange = list.get(lastIndex);
|
||||
if (lastRange.last + 1 == r.start) {
|
||||
list.set(lastIndex, new Range(lastRange.start, r.last));
|
||||
return;
|
||||
}
|
||||
}
|
||||
list.add(r);
|
||||
},
|
||||
ArrayList<Range>::addAll);
|
||||
|
||||
|
||||
replacementMap.put("%%%" + propertyName + "%%%",
|
||||
ranges.stream()
|
||||
.map(GenerateExtraProperties::rangeToString)
|
||||
.collect(Collectors.joining(" ||\n", "", ";")));
|
||||
}
|
||||
|
||||
// Generate .java file
|
||||
Files.write(gensrcFile,
|
||||
Files.lines(templateFile)
|
||||
.flatMap(l -> Stream.of(replacementMap.getOrDefault(l.trim(), l)))
|
||||
.collect(Collectors.toList()),
|
||||
StandardOpenOption.CREATE, StandardOpenOption.TRUNCATE_EXISTING);
|
||||
} catch (IOException e) {
|
||||
e.printStackTrace();
|
||||
}
|
||||
}
|
||||
|
||||
static String rangeToString(Range r) {
|
||||
if (r.start == r.last) {
|
||||
return (" ".repeat(12) + "cp == 0x" + toHexString(r.start));
|
||||
} else if (r.start == r.last - 1) {
|
||||
return " ".repeat(12) + "cp == 0x" + toHexString(r.start) + " ||\n" +
|
||||
" ".repeat(12) + "cp == 0x" + toHexString(r.last);
|
||||
} else {
|
||||
return " ".repeat(11) + "(cp >= 0x" + toHexString(r.start) +
|
||||
" && cp <= 0x" + toHexString(r.last) + ")";
|
||||
}
|
||||
}
|
||||
|
||||
static int toInt(String hexStr) {
|
||||
return Integer.parseUnsignedInt(hexStr, 16);
|
||||
}
|
||||
|
||||
static String toHexString(int cp) {
|
||||
String ret = Integer.toUnsignedString(cp, 16).toUpperCase();
|
||||
if (ret.length() < 4) {
|
||||
ret = "0".repeat(4 - ret.length()) + ret;
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
|
||||
static class Range implements Comparable<Range> {
|
||||
int start;
|
||||
int last;
|
||||
|
||||
Range (int start, int last) {
|
||||
this.start = start;
|
||||
this.last = last;
|
||||
}
|
||||
|
||||
Range (String input) {
|
||||
input = input.replaceFirst("\\s#.*", "");
|
||||
start = toInt(input.replaceFirst("[\\s\\.].*", ""));
|
||||
last = input.contains("..") ?
|
||||
toInt(input.replaceFirst(".*\\.\\.", "")
|
||||
.replaceFirst(";.*", "").trim())
|
||||
: start;
|
||||
}
|
||||
|
||||
@Override
|
||||
public String toString() {
|
||||
return "Start: " + toHexString(start) + ", Last: " + toHexString(last);
|
||||
}
|
||||
|
||||
@Override
|
||||
public int compareTo(Range other) {
|
||||
return Integer.compare(start, other.start);
|
||||
}
|
||||
}
|
||||
}
|
@ -35,6 +35,7 @@ include gensrc/GensrcExceptions.gmk
|
||||
include gensrc/GensrcVarHandles.gmk
|
||||
include gensrc/GensrcModuleLoaderMap.gmk
|
||||
include gensrc/GensrcScopedMemoryAccess.gmk
|
||||
include gensrc/GensrcRegex.gmk
|
||||
|
||||
# GensrcLocaleData.gmk does not set TARGETS, so we must choose which targets
|
||||
# to include.
|
||||
|
46
make/modules/java.base/gensrc/GensrcRegex.gmk
Normal file
46
make/modules/java.base/gensrc/GensrcRegex.gmk
Normal file
@ -0,0 +1,46 @@
|
||||
#
|
||||
# Copyright (c) 2023, Oracle and/or its affiliates. All rights reserved.
|
||||
# DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
||||
#
|
||||
# This code is free software; you can redistribute it and/or modify it
|
||||
# under the terms of the GNU General Public License version 2 only, as
|
||||
# published by the Free Software Foundation. Oracle designates this
|
||||
# particular file as subject to the "Classpath" exception as provided
|
||||
# by Oracle in the LICENSE file that accompanied this code.
|
||||
#
|
||||
# This code is distributed in the hope that it will be useful, but WITHOUT
|
||||
# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
||||
# FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
|
||||
# version 2 for more details (a copy is included in the LICENSE file that
|
||||
# accompanied this code).
|
||||
#
|
||||
# You should have received a copy of the GNU General Public License version
|
||||
# 2 along with this work; if not, write to the Free Software Foundation,
|
||||
# Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
#
|
||||
# Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
|
||||
# or visit www.oracle.com if you need additional information or have any
|
||||
# questions.
|
||||
#
|
||||
|
||||
#
|
||||
# Rules to create java files under
|
||||
# $(SUPPORT_OUTPUTDIR)/gensrc/java.base/jdk/internal/util/regex/
|
||||
#
|
||||
|
||||
GENSRC_INDICCONJUNCTBREAK := $(SUPPORT_OUTPUTDIR)/gensrc/java.base/jdk/internal/util/regex/IndicConjunctBreak.java
|
||||
|
||||
INDICCONJUNCTBREAKTEMP := $(MODULE_SRC)/share/classes/jdk/internal/util/regex/IndicConjunctBreak.java.template
|
||||
INDICCONJUNCTBREAKPROPS := $(MODULE_SRC)/share/data/unicodedata/DerivedCoreProperties.txt
|
||||
INDICCONJUNCTBREAKPARAMS := InCB=Linker InCB=Extend InCB=Consonant
|
||||
|
||||
$(GENSRC_INDICCONJUNCTBREAK): $(BUILD_TOOLS_JDK) $(INDICCONJUNCTBREAKTEMP) $(INDICCONJUNCTBREAKPROPS)
|
||||
$(call LogInfo, Generating $@)
|
||||
$(call MakeTargetDir)
|
||||
$(TOOL_GENERATEEXTRAPROPERTIES) \
|
||||
$(INDICCONJUNCTBREAKTEMP) \
|
||||
$(INDICCONJUNCTBREAKPROPS) \
|
||||
$(GENSRC_INDICCONJUNCTBREAK) \
|
||||
$(INDICCONJUNCTBREAKPARAMS)
|
||||
|
||||
TARGETS += $(GENSRC_INDICCONJUNCTBREAK)
|
@ -63,7 +63,7 @@ import static java.lang.constant.ConstantDescs.DEFAULT_NAME;
|
||||
* from the Unicode Consortium at
|
||||
* <a href="http://www.unicode.org">http://www.unicode.org</a>.
|
||||
* <p>
|
||||
* Character information is based on the Unicode Standard, version 15.0.
|
||||
* Character information is based on the Unicode Standard, version 15.1.
|
||||
* <p>
|
||||
* The Java platform has supported different versions of the Unicode
|
||||
* Standard over time. Upgrades to newer versions of the Unicode Standard
|
||||
@ -75,6 +75,8 @@ import static java.lang.constant.ConstantDescs.DEFAULT_NAME;
|
||||
* <th scope="col">Unicode version</th></tr>
|
||||
* </thead>
|
||||
* <tbody>
|
||||
* <tr><th scope="row" style="text-align:left">Java SE 22</th>
|
||||
* <td>Unicode 15.1</td></tr>
|
||||
* <tr><th scope="row" style="text-align:left">Java SE 20</th>
|
||||
* <td>Unicode 15.0</td></tr>
|
||||
* <tr><th scope="row" style="text-align:left">Java SE 19</th>
|
||||
@ -744,7 +746,7 @@ class Character implements java.io.Serializable, Comparable<Character>, Constabl
|
||||
* It should be adjusted whenever the Unicode Character Database
|
||||
* is upgraded.
|
||||
*/
|
||||
private static final int NUM_ENTITIES = 756;
|
||||
private static final int NUM_ENTITIES = 759;
|
||||
private static Map<String, UnicodeBlock> map = HashMap.newHashMap(NUM_ENTITIES);
|
||||
|
||||
/**
|
||||
@ -3611,6 +3613,16 @@ class Character implements java.io.Serializable, Comparable<Character>, Constabl
|
||||
"CJK UNIFIED IDEOGRAPHS EXTENSION H",
|
||||
"CJKUNIFIEDIDEOGRAPHSEXTENSIONH");
|
||||
|
||||
/**
|
||||
* Constant for the "CJK Unified Ideographs Extension I" Unicode
|
||||
* character block.
|
||||
* @since 22
|
||||
*/
|
||||
public static final UnicodeBlock CJK_UNIFIED_IDEOGRAPHS_EXTENSION_I =
|
||||
new UnicodeBlock("CJK_UNIFIED_IDEOGRAPHS_EXTENSION_I",
|
||||
"CJK UNIFIED IDEOGRAPHS EXTENSION I",
|
||||
"CJKUNIFIEDIDEOGRAPHSEXTENSIONI");
|
||||
|
||||
private static final int[] blockStarts = {
|
||||
0x0000, // 0000..007F; Basic Latin
|
||||
0x0080, // 0080..00FF; Latin-1 Supplement
|
||||
@ -3978,7 +3990,8 @@ class Character implements java.io.Serializable, Comparable<Character>, Constabl
|
||||
0x2B740, // 2B740..2B81F; CJK Unified Ideographs Extension D
|
||||
0x2B820, // 2B820..2CEAF; CJK Unified Ideographs Extension E
|
||||
0x2CEB0, // 2CEB0..2EBEF; CJK Unified Ideographs Extension F
|
||||
0x2EBF0, // unassigned
|
||||
0x2EBF0, // 2EBF0..2EE5F; CJK Unified Ideographs Extension I
|
||||
0x2EE60, // unassigned
|
||||
0x2F800, // 2F800..2FA1F; CJK Compatibility Ideographs Supplement
|
||||
0x2FA20, // unassigned
|
||||
0x30000, // 30000..3134F; CJK Unified Ideographs Extension G
|
||||
@ -4359,6 +4372,7 @@ class Character implements java.io.Serializable, Comparable<Character>, Constabl
|
||||
CJK_UNIFIED_IDEOGRAPHS_EXTENSION_D,
|
||||
CJK_UNIFIED_IDEOGRAPHS_EXTENSION_E,
|
||||
CJK_UNIFIED_IDEOGRAPHS_EXTENSION_F,
|
||||
CJK_UNIFIED_IDEOGRAPHS_EXTENSION_I,
|
||||
null,
|
||||
CJK_COMPATIBILITY_IDEOGRAPHS_SUPPLEMENT,
|
||||
null,
|
||||
@ -6057,9 +6071,7 @@ class Character implements java.io.Serializable, Comparable<Character>, Constabl
|
||||
0x2EF4, // 2EF4..2EFF; UNKNOWN
|
||||
0x2F00, // 2F00..2FD5; HAN
|
||||
0x2FD6, // 2FD6..2FEF; UNKNOWN
|
||||
0x2FF0, // 2FF0..2FFB; COMMON
|
||||
0x2FFC, // 2FFC..2FFF; UNKNOWN
|
||||
0x3000, // 3000..3004; COMMON
|
||||
0x2FF0, // 2FF0..3004; COMMON
|
||||
0x3005, // 3005 ; HAN
|
||||
0x3006, // 3006 ; COMMON
|
||||
0x3007, // 3007 ; HAN
|
||||
@ -6088,7 +6100,8 @@ class Character implements java.io.Serializable, Comparable<Character>, Constabl
|
||||
0x3190, // 3190..319F; COMMON
|
||||
0x31A0, // 31A0..31BF; BOPOMOFO
|
||||
0x31C0, // 31C0..31E3; COMMON
|
||||
0x31E4, // 31E4..31EF; UNKNOWN
|
||||
0x31E4, // 31E4..31EE; UNKNOWN
|
||||
0x31EF, // 31EF ; COMMON
|
||||
0x31F0, // 31F0..31FF; KATAKANA
|
||||
0x3200, // 3200..321E; HANGUL
|
||||
0x321F, // 321F ; UNKNOWN
|
||||
@ -7028,7 +7041,9 @@ class Character implements java.io.Serializable, Comparable<Character>, Constabl
|
||||
0x2B820, // 2B820..2CEA1; HAN
|
||||
0x2CEA2, // 2CEA2..2CEAF; UNKNOWN
|
||||
0x2CEB0, // 2CEB0..2EBE0; HAN
|
||||
0x2EBE1, // 2EBE1..2F7FF; UNKNOWN
|
||||
0x2EBE1, // 2EBE1..2EBEF; UNKNOWN
|
||||
0x2EBF0, // 2EBF0..2EE5D; HAN
|
||||
0x2EE5E, // 2EE5E..2F7FF; UNKNOWN
|
||||
0x2F800, // 2F800..2FA1D; HAN
|
||||
0x2FA1E, // 2FA1E..2FFFF; UNKNOWN
|
||||
0x30000, // 30000..3134A; HAN
|
||||
@ -7717,9 +7732,7 @@ class Character implements java.io.Serializable, Comparable<Character>, Constabl
|
||||
UNKNOWN, // 2EF4..2EFF
|
||||
HAN, // 2F00..2FD5
|
||||
UNKNOWN, // 2FD6..2FEF
|
||||
COMMON, // 2FF0..2FFB
|
||||
UNKNOWN, // 2FFC..2FFF
|
||||
COMMON, // 3000..3004
|
||||
COMMON, // 2FF0..3004
|
||||
HAN, // 3005
|
||||
COMMON, // 3006
|
||||
HAN, // 3007
|
||||
@ -7748,7 +7761,8 @@ class Character implements java.io.Serializable, Comparable<Character>, Constabl
|
||||
COMMON, // 3190..319F
|
||||
BOPOMOFO, // 31A0..31BF
|
||||
COMMON, // 31C0..31E3
|
||||
UNKNOWN, // 31E4..31EF
|
||||
UNKNOWN, // 31E4..31EE
|
||||
COMMON, // 31EF
|
||||
KATAKANA, // 31F0..31FF
|
||||
HANGUL, // 3200..321E
|
||||
UNKNOWN, // 321F
|
||||
@ -8688,7 +8702,9 @@ class Character implements java.io.Serializable, Comparable<Character>, Constabl
|
||||
HAN, // 2B820..2CEA1
|
||||
UNKNOWN, // 2CEA2..2CEAF
|
||||
HAN, // 2CEB0..2EBE0
|
||||
UNKNOWN, // 2EBE1..2F7FF
|
||||
UNKNOWN, // 2EBE1..2EBEF
|
||||
HAN, // 2EBF0..2EE5D
|
||||
UNKNOWN, // 2EE5E..2F7FF
|
||||
HAN, // 2F800..2FA1D
|
||||
UNKNOWN, // 2FA1E..2FFFF
|
||||
HAN, // 30000..3134A
|
||||
|
@ -35,9 +35,9 @@ public final class Grapheme {
|
||||
* <p>
|
||||
* See Unicode Standard Annex #29 Unicode Text Segmentation for the specification
|
||||
* for the extended grapheme cluster boundary rules. The following implementation
|
||||
* is based on the annex for Unicode version 15.0.
|
||||
* (http://www.unicode.org/reports/tr29/tr29-40.html)
|
||||
* is based on the annex for Unicode version 15.1.
|
||||
*
|
||||
* @spec http://www.unicode.org/reports/tr29/tr29-43.html
|
||||
* @param src the {@code CharSequence} to be scanned
|
||||
* @param off offset to start looking for the next boundary in the src
|
||||
* @param limit limit offset in the src (exclusive)
|
||||
@ -56,6 +56,15 @@ public final class Grapheme {
|
||||
int ch1 = Character.codePointAt(src, ret);
|
||||
int t1 = getType(ch1);
|
||||
|
||||
// GB9c
|
||||
if (IndicConjunctBreak.isConsonant(ch0)) {
|
||||
var advance = checkIndicConjunctBreak(src, ret, limit);
|
||||
if (advance >= 0) {
|
||||
ret += advance;
|
||||
continue;
|
||||
}
|
||||
}
|
||||
|
||||
if (gb11 && t0 == ZWJ && t1 == EXTENDED_PICTOGRAPHIC) {
|
||||
// continue for gb11
|
||||
} else if (riCount % 2 == 1 && t0 == RI && t1 == RI) {
|
||||
@ -70,6 +79,7 @@ public final class Grapheme {
|
||||
}
|
||||
|
||||
riCount += (t1 == RI) ? 1 : 0;
|
||||
ch0 = ch1;
|
||||
t0 = t1;
|
||||
|
||||
ret += Character.charCount(ch1);
|
||||
@ -283,4 +293,40 @@ public final class Grapheme {
|
||||
}
|
||||
return OTHER;
|
||||
}
|
||||
|
||||
/**
|
||||
* Checks for a possible GB9c Indic Conjunct Break sequence. If it is
|
||||
* repetitive, e.g., Consonant1/Linker1/Consonant2/Linker2/Consonant3, only
|
||||
* the first part of the sequence (Consonant1/Linker1/Consonant2) is
|
||||
* recognized. The rest is analyzed in the next iteration of the grapheme
|
||||
* cluster boundary search.
|
||||
*
|
||||
* @param src the source char sequence
|
||||
* @param index the index that points to the starting Linking Consonant
|
||||
* @param limit limit to the char sequence
|
||||
* @return the advance in index if the indic conjunct break sequence
|
||||
* is found, it will be negative if the sequence is not found
|
||||
*/
|
||||
private static int checkIndicConjunctBreak(CharSequence src, int index, int limit) {
|
||||
boolean linkerFound = false;
|
||||
int advance = 0;
|
||||
|
||||
while (index + advance < limit) {
|
||||
int ch1 = Character.codePointAt(src, index + advance);
|
||||
advance += Character.charCount(ch1);
|
||||
|
||||
if (IndicConjunctBreak.isLinker(ch1)) {
|
||||
linkerFound = true;
|
||||
} else if (IndicConjunctBreak.isConsonant(ch1)) {
|
||||
if (linkerFound) {
|
||||
return advance;
|
||||
} else {
|
||||
break;
|
||||
}
|
||||
} else if (!IndicConjunctBreak.isExtend(ch1)) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
@ -0,0 +1,60 @@
|
||||
/*
|
||||
* Copyright (c) 2023, Oracle and/or its affiliates. All rights reserved.
|
||||
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
||||
*
|
||||
* This code is free software; you can redistribute it and/or modify it
|
||||
* under the terms of the GNU General Public License version 2 only, as
|
||||
* published by the Free Software Foundation. Oracle designates this
|
||||
* particular file as subject to the "Classpath" exception as provided
|
||||
* by Oracle in the LICENSE file that accompanied this code.
|
||||
*
|
||||
* This code is distributed in the hope that it will be useful, but WITHOUT
|
||||
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
||||
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
|
||||
* version 2 for more details (a copy is included in the LICENSE file that
|
||||
* accompanied this code).
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License version
|
||||
* 2 along with this work; if not, write to the Free Software Foundation,
|
||||
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
*
|
||||
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
|
||||
* or visit www.oracle.com if you need additional information or have any
|
||||
* questions.
|
||||
*/
|
||||
|
||||
package jdk.internal.util.regex;
|
||||
|
||||
/**
|
||||
* Helper class for supporting the GB9c rule in Unicode Text Segmentation TR29
|
||||
*
|
||||
* <blockquote>
|
||||
* GB9c Do not break within certain combinations with Indic_Conjunct_Break (InCB)=Linker.
|
||||
*
|
||||
* \p{InCB=Consonant} [ \p{InCB=Extend} \p{InCB=Linker} ]* \p{InCB=Linker} [ \p{InCB=Extend} \p{InCB=Linker} ]* x \p{InCB=Consonant}*
|
||||
* </blockquote>
|
||||
*
|
||||
* Code point conditions included in this class are derived from the "Derived Property: Indic_Conjunct_Break"
|
||||
* section in DerivedCoreProperties.txt of the Unicode Character Database.
|
||||
*/
|
||||
final class IndicConjunctBreak {
|
||||
static boolean isLinker(int cp) {
|
||||
return
|
||||
%%%InCB=Linker%%%
|
||||
}
|
||||
|
||||
static boolean isExtend(int cp) {
|
||||
return
|
||||
%%%InCB=Extend%%%
|
||||
}
|
||||
|
||||
static boolean isConsonant(int cp) {
|
||||
// fast check - Devanagari to Malayalam
|
||||
if (cp < 0x0900 || cp > 0x0D7F) {
|
||||
return false;
|
||||
}
|
||||
|
||||
return
|
||||
%%%InCB=Consonant%%%
|
||||
}
|
||||
}
|
@ -1,6 +1,6 @@
|
||||
# Blocks-15.0.0.txt
|
||||
# Date: 2022-01-28, 20:58:00 GMT [KW]
|
||||
# Copyright (c) 2022 Unicode, Inc.
|
||||
# Blocks-15.1.0.txt
|
||||
# Date: 2023-07-28, 15:47:20 GMT
|
||||
# Copyright (c) 2023 Unicode, Inc.
|
||||
# For terms of use, see https://www.unicode.org/terms_of_use.html
|
||||
#
|
||||
# Unicode Character Database
|
||||
@ -352,6 +352,7 @@ FFF0..FFFF; Specials
|
||||
2B740..2B81F; CJK Unified Ideographs Extension D
|
||||
2B820..2CEAF; CJK Unified Ideographs Extension E
|
||||
2CEB0..2EBEF; CJK Unified Ideographs Extension F
|
||||
2EBF0..2EE5F; CJK Unified Ideographs Extension I
|
||||
2F800..2FA1F; CJK Compatibility Ideographs Supplement
|
||||
30000..3134F; CJK Unified Ideographs Extension G
|
||||
31350..323AF; CJK Unified Ideographs Extension H
|
||||
|
@ -1,6 +1,6 @@
|
||||
# DerivedCoreProperties-15.0.0.txt
|
||||
# Date: 2022-08-05, 22:17:05 GMT
|
||||
# Copyright (c) 2022 Unicode, Inc.
|
||||
# DerivedCoreProperties-15.1.0.txt
|
||||
# Date: 2023-08-07, 15:21:24 GMT
|
||||
# Copyright (c) 2023 Unicode, Inc.
|
||||
# Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries.
|
||||
# For terms of use, see https://www.unicode.org/terms_of_use.html
|
||||
#
|
||||
@ -1397,11 +1397,12 @@ FFDA..FFDC ; Alphabetic # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANG
|
||||
2B740..2B81D ; Alphabetic # Lo [222] CJK UNIFIED IDEOGRAPH-2B740..CJK UNIFIED IDEOGRAPH-2B81D
|
||||
2B820..2CEA1 ; Alphabetic # Lo [5762] CJK UNIFIED IDEOGRAPH-2B820..CJK UNIFIED IDEOGRAPH-2CEA1
|
||||
2CEB0..2EBE0 ; Alphabetic # Lo [7473] CJK UNIFIED IDEOGRAPH-2CEB0..CJK UNIFIED IDEOGRAPH-2EBE0
|
||||
2EBF0..2EE5D ; Alphabetic # Lo [622] CJK UNIFIED IDEOGRAPH-2EBF0..CJK UNIFIED IDEOGRAPH-2EE5D
|
||||
2F800..2FA1D ; Alphabetic # Lo [542] CJK COMPATIBILITY IDEOGRAPH-2F800..CJK COMPATIBILITY IDEOGRAPH-2FA1D
|
||||
30000..3134A ; Alphabetic # Lo [4939] CJK UNIFIED IDEOGRAPH-30000..CJK UNIFIED IDEOGRAPH-3134A
|
||||
31350..323AF ; Alphabetic # Lo [4192] CJK UNIFIED IDEOGRAPH-31350..CJK UNIFIED IDEOGRAPH-323AF
|
||||
|
||||
# Total code points: 137765
|
||||
# Total code points: 138387
|
||||
|
||||
# ================================================
|
||||
|
||||
@ -6853,11 +6854,12 @@ FFDA..FFDC ; ID_Start # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGUL
|
||||
2B740..2B81D ; ID_Start # Lo [222] CJK UNIFIED IDEOGRAPH-2B740..CJK UNIFIED IDEOGRAPH-2B81D
|
||||
2B820..2CEA1 ; ID_Start # Lo [5762] CJK UNIFIED IDEOGRAPH-2B820..CJK UNIFIED IDEOGRAPH-2CEA1
|
||||
2CEB0..2EBE0 ; ID_Start # Lo [7473] CJK UNIFIED IDEOGRAPH-2CEB0..CJK UNIFIED IDEOGRAPH-2EBE0
|
||||
2EBF0..2EE5D ; ID_Start # Lo [622] CJK UNIFIED IDEOGRAPH-2EBF0..CJK UNIFIED IDEOGRAPH-2EE5D
|
||||
2F800..2FA1D ; ID_Start # Lo [542] CJK COMPATIBILITY IDEOGRAPH-2F800..CJK COMPATIBILITY IDEOGRAPH-2FA1D
|
||||
30000..3134A ; ID_Start # Lo [4939] CJK UNIFIED IDEOGRAPH-30000..CJK UNIFIED IDEOGRAPH-3134A
|
||||
31350..323AF ; ID_Start # Lo [4192] CJK UNIFIED IDEOGRAPH-31350..CJK UNIFIED IDEOGRAPH-323AF
|
||||
|
||||
# Total code points: 136345
|
||||
# Total code points: 136967
|
||||
|
||||
# ================================================
|
||||
|
||||
@ -7438,6 +7440,7 @@ FFDA..FFDC ; ID_Start # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGUL
|
||||
1FE0..1FEC ; ID_Continue # L& [13] GREEK SMALL LETTER UPSILON WITH VRACHY..GREEK CAPITAL LETTER RHO WITH DASIA
|
||||
1FF2..1FF4 ; ID_Continue # L& [3] GREEK SMALL LETTER OMEGA WITH VARIA AND YPOGEGRAMMENI..GREEK SMALL LETTER OMEGA WITH OXIA AND YPOGEGRAMMENI
|
||||
1FF6..1FFC ; ID_Continue # L& [7] GREEK SMALL LETTER OMEGA WITH PERISPOMENI..GREEK CAPITAL LETTER OMEGA WITH PROSGEGRAMMENI
|
||||
200C..200D ; ID_Continue # Cf [2] ZERO WIDTH NON-JOINER..ZERO WIDTH JOINER
|
||||
203F..2040 ; ID_Continue # Pc [2] UNDERTIE..CHARACTER TIE
|
||||
2054 ; ID_Continue # Pc INVERTED UNDERTIE
|
||||
2071 ; ID_Continue # Lm SUPERSCRIPT LATIN SMALL LETTER I
|
||||
@ -7504,6 +7507,7 @@ FFDA..FFDC ; ID_Start # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGUL
|
||||
309D..309E ; ID_Continue # Lm [2] HIRAGANA ITERATION MARK..HIRAGANA VOICED ITERATION MARK
|
||||
309F ; ID_Continue # Lo HIRAGANA DIGRAPH YORI
|
||||
30A1..30FA ; ID_Continue # Lo [90] KATAKANA LETTER SMALL A..KATAKANA LETTER VO
|
||||
30FB ; ID_Continue # Po KATAKANA MIDDLE DOT
|
||||
30FC..30FE ; ID_Continue # Lm [3] KATAKANA-HIRAGANA PROLONGED SOUND MARK..KATAKANA VOICED ITERATION MARK
|
||||
30FF ; ID_Continue # Lo KATAKANA DIGRAPH KOTO
|
||||
3105..312F ; ID_Continue # Lo [43] BOPOMOFO LETTER B..BOPOMOFO LETTER NN
|
||||
@ -7683,6 +7687,7 @@ FF10..FF19 ; ID_Continue # Nd [10] FULLWIDTH DIGIT ZERO..FULLWIDTH DIGIT NIN
|
||||
FF21..FF3A ; ID_Continue # L& [26] FULLWIDTH LATIN CAPITAL LETTER A..FULLWIDTH LATIN CAPITAL LETTER Z
|
||||
FF3F ; ID_Continue # Pc FULLWIDTH LOW LINE
|
||||
FF41..FF5A ; ID_Continue # L& [26] FULLWIDTH LATIN SMALL LETTER A..FULLWIDTH LATIN SMALL LETTER Z
|
||||
FF65 ; ID_Continue # Po HALFWIDTH KATAKANA MIDDLE DOT
|
||||
FF66..FF6F ; ID_Continue # Lo [10] HALFWIDTH KATAKANA LETTER WO..HALFWIDTH KATAKANA LETTER SMALL TU
|
||||
FF70 ; ID_Continue # Lm HALFWIDTH KATAKANA-HIRAGANA PROLONGED SOUND MARK
|
||||
FF71..FF9D ; ID_Continue # Lo [45] HALFWIDTH KATAKANA LETTER A..HALFWIDTH KATAKANA LETTER N
|
||||
@ -8207,12 +8212,13 @@ FFDA..FFDC ; ID_Continue # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HAN
|
||||
2B740..2B81D ; ID_Continue # Lo [222] CJK UNIFIED IDEOGRAPH-2B740..CJK UNIFIED IDEOGRAPH-2B81D
|
||||
2B820..2CEA1 ; ID_Continue # Lo [5762] CJK UNIFIED IDEOGRAPH-2B820..CJK UNIFIED IDEOGRAPH-2CEA1
|
||||
2CEB0..2EBE0 ; ID_Continue # Lo [7473] CJK UNIFIED IDEOGRAPH-2CEB0..CJK UNIFIED IDEOGRAPH-2EBE0
|
||||
2EBF0..2EE5D ; ID_Continue # Lo [622] CJK UNIFIED IDEOGRAPH-2EBF0..CJK UNIFIED IDEOGRAPH-2EE5D
|
||||
2F800..2FA1D ; ID_Continue # Lo [542] CJK COMPATIBILITY IDEOGRAPH-2F800..CJK COMPATIBILITY IDEOGRAPH-2FA1D
|
||||
30000..3134A ; ID_Continue # Lo [4939] CJK UNIFIED IDEOGRAPH-30000..CJK UNIFIED IDEOGRAPH-3134A
|
||||
31350..323AF ; ID_Continue # Lo [4192] CJK UNIFIED IDEOGRAPH-31350..CJK UNIFIED IDEOGRAPH-323AF
|
||||
E0100..E01EF ; ID_Continue # Mn [240] VARIATION SELECTOR-17..VARIATION SELECTOR-256
|
||||
|
||||
# Total code points: 139482
|
||||
# Total code points: 140108
|
||||
|
||||
# ================================================
|
||||
|
||||
@ -8962,11 +8968,12 @@ FFDA..FFDC ; XID_Start # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGU
|
||||
2B740..2B81D ; XID_Start # Lo [222] CJK UNIFIED IDEOGRAPH-2B740..CJK UNIFIED IDEOGRAPH-2B81D
|
||||
2B820..2CEA1 ; XID_Start # Lo [5762] CJK UNIFIED IDEOGRAPH-2B820..CJK UNIFIED IDEOGRAPH-2CEA1
|
||||
2CEB0..2EBE0 ; XID_Start # Lo [7473] CJK UNIFIED IDEOGRAPH-2CEB0..CJK UNIFIED IDEOGRAPH-2EBE0
|
||||
2EBF0..2EE5D ; XID_Start # Lo [622] CJK UNIFIED IDEOGRAPH-2EBF0..CJK UNIFIED IDEOGRAPH-2EE5D
|
||||
2F800..2FA1D ; XID_Start # Lo [542] CJK COMPATIBILITY IDEOGRAPH-2F800..CJK COMPATIBILITY IDEOGRAPH-2FA1D
|
||||
30000..3134A ; XID_Start # Lo [4939] CJK UNIFIED IDEOGRAPH-30000..CJK UNIFIED IDEOGRAPH-3134A
|
||||
31350..323AF ; XID_Start # Lo [4192] CJK UNIFIED IDEOGRAPH-31350..CJK UNIFIED IDEOGRAPH-323AF
|
||||
|
||||
# Total code points: 136322
|
||||
# Total code points: 136944
|
||||
|
||||
# ================================================
|
||||
|
||||
@ -9543,6 +9550,7 @@ FFDA..FFDC ; XID_Start # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGU
|
||||
1FE0..1FEC ; XID_Continue # L& [13] GREEK SMALL LETTER UPSILON WITH VRACHY..GREEK CAPITAL LETTER RHO WITH DASIA
|
||||
1FF2..1FF4 ; XID_Continue # L& [3] GREEK SMALL LETTER OMEGA WITH VARIA AND YPOGEGRAMMENI..GREEK SMALL LETTER OMEGA WITH OXIA AND YPOGEGRAMMENI
|
||||
1FF6..1FFC ; XID_Continue # L& [7] GREEK SMALL LETTER OMEGA WITH PERISPOMENI..GREEK CAPITAL LETTER OMEGA WITH PROSGEGRAMMENI
|
||||
200C..200D ; XID_Continue # Cf [2] ZERO WIDTH NON-JOINER..ZERO WIDTH JOINER
|
||||
203F..2040 ; XID_Continue # Pc [2] UNDERTIE..CHARACTER TIE
|
||||
2054 ; XID_Continue # Pc INVERTED UNDERTIE
|
||||
2071 ; XID_Continue # Lm SUPERSCRIPT LATIN SMALL LETTER I
|
||||
@ -9608,6 +9616,7 @@ FFDA..FFDC ; XID_Start # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGU
|
||||
309D..309E ; XID_Continue # Lm [2] HIRAGANA ITERATION MARK..HIRAGANA VOICED ITERATION MARK
|
||||
309F ; XID_Continue # Lo HIRAGANA DIGRAPH YORI
|
||||
30A1..30FA ; XID_Continue # Lo [90] KATAKANA LETTER SMALL A..KATAKANA LETTER VO
|
||||
30FB ; XID_Continue # Po KATAKANA MIDDLE DOT
|
||||
30FC..30FE ; XID_Continue # Lm [3] KATAKANA-HIRAGANA PROLONGED SOUND MARK..KATAKANA VOICED ITERATION MARK
|
||||
30FF ; XID_Continue # Lo KATAKANA DIGRAPH KOTO
|
||||
3105..312F ; XID_Continue # Lo [43] BOPOMOFO LETTER B..BOPOMOFO LETTER NN
|
||||
@ -9793,6 +9802,7 @@ FF10..FF19 ; XID_Continue # Nd [10] FULLWIDTH DIGIT ZERO..FULLWIDTH DIGIT NI
|
||||
FF21..FF3A ; XID_Continue # L& [26] FULLWIDTH LATIN CAPITAL LETTER A..FULLWIDTH LATIN CAPITAL LETTER Z
|
||||
FF3F ; XID_Continue # Pc FULLWIDTH LOW LINE
|
||||
FF41..FF5A ; XID_Continue # L& [26] FULLWIDTH LATIN SMALL LETTER A..FULLWIDTH LATIN SMALL LETTER Z
|
||||
FF65 ; XID_Continue # Po HALFWIDTH KATAKANA MIDDLE DOT
|
||||
FF66..FF6F ; XID_Continue # Lo [10] HALFWIDTH KATAKANA LETTER WO..HALFWIDTH KATAKANA LETTER SMALL TU
|
||||
FF70 ; XID_Continue # Lm HALFWIDTH KATAKANA-HIRAGANA PROLONGED SOUND MARK
|
||||
FF71..FF9D ; XID_Continue # Lo [45] HALFWIDTH KATAKANA LETTER A..HALFWIDTH KATAKANA LETTER N
|
||||
@ -10317,12 +10327,13 @@ FFDA..FFDC ; XID_Continue # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HA
|
||||
2B740..2B81D ; XID_Continue # Lo [222] CJK UNIFIED IDEOGRAPH-2B740..CJK UNIFIED IDEOGRAPH-2B81D
|
||||
2B820..2CEA1 ; XID_Continue # Lo [5762] CJK UNIFIED IDEOGRAPH-2B820..CJK UNIFIED IDEOGRAPH-2CEA1
|
||||
2CEB0..2EBE0 ; XID_Continue # Lo [7473] CJK UNIFIED IDEOGRAPH-2CEB0..CJK UNIFIED IDEOGRAPH-2EBE0
|
||||
2EBF0..2EE5D ; XID_Continue # Lo [622] CJK UNIFIED IDEOGRAPH-2EBF0..CJK UNIFIED IDEOGRAPH-2EE5D
|
||||
2F800..2FA1D ; XID_Continue # Lo [542] CJK COMPATIBILITY IDEOGRAPH-2F800..CJK COMPATIBILITY IDEOGRAPH-2FA1D
|
||||
30000..3134A ; XID_Continue # Lo [4939] CJK UNIFIED IDEOGRAPH-30000..CJK UNIFIED IDEOGRAPH-3134A
|
||||
31350..323AF ; XID_Continue # Lo [4192] CJK UNIFIED IDEOGRAPH-31350..CJK UNIFIED IDEOGRAPH-323AF
|
||||
E0100..E01EF ; XID_Continue # Mn [240] VARIATION SELECTOR-17..VARIATION SELECTOR-256
|
||||
|
||||
# Total code points: 139463
|
||||
# Total code points: 140089
|
||||
|
||||
# ================================================
|
||||
|
||||
@ -10335,6 +10346,15 @@ E0100..E01EF ; XID_Continue # Mn [240] VARIATION SELECTOR-17..VARIATION SELECTO
|
||||
# - FFF9..FFFB (Interlinear annotation format characters)
|
||||
# - 13430..13440 (Egyptian hieroglyph format characters)
|
||||
# - Prepended_Concatenation_Mark (Exceptional format characters that should be visible)
|
||||
#
|
||||
# There are currently no stability guarantees for DICP. However, the
|
||||
# values of DICP interact with the derivation of XID_Continue
|
||||
# and NFKC_CF, for which there are stability guarantees.
|
||||
# Maintainers of this property should note that in the
|
||||
# unlikely case that the DICP value changes for an existing character
|
||||
# which is also XID_Continue=Yes, then exceptions must be put
|
||||
# in place to ensure that the NFKC_CF mapping value for that
|
||||
# existing character does not change.
|
||||
|
||||
00AD ; Default_Ignorable_Code_Point # Cf SOFT HYPHEN
|
||||
034F ; Default_Ignorable_Code_Point # Mn COMBINING GRAPHEME JOINER
|
||||
@ -11602,7 +11622,7 @@ E0100..E01EF ; Grapheme_Extend # Mn [240] VARIATION SELECTOR-17..VARIATION SELE
|
||||
2E80..2E99 ; Grapheme_Base # So [26] CJK RADICAL REPEAT..CJK RADICAL RAP
|
||||
2E9B..2EF3 ; Grapheme_Base # So [89] CJK RADICAL CHOKE..CJK RADICAL C-SIMPLIFIED TURTLE
|
||||
2F00..2FD5 ; Grapheme_Base # So [214] KANGXI RADICAL ONE..KANGXI RADICAL FLUTE
|
||||
2FF0..2FFB ; Grapheme_Base # So [12] IDEOGRAPHIC DESCRIPTION CHARACTER LEFT TO RIGHT..IDEOGRAPHIC DESCRIPTION CHARACTER OVERLAID
|
||||
2FF0..2FFF ; Grapheme_Base # So [16] IDEOGRAPHIC DESCRIPTION CHARACTER LEFT TO RIGHT..IDEOGRAPHIC DESCRIPTION CHARACTER ROTATION
|
||||
3000 ; Grapheme_Base # Zs IDEOGRAPHIC SPACE
|
||||
3001..3003 ; Grapheme_Base # Po [3] IDEOGRAPHIC COMMA..DITTO MARK
|
||||
3004 ; Grapheme_Base # So JAPANESE INDUSTRIAL STANDARD SYMBOL
|
||||
@ -11657,6 +11677,7 @@ E0100..E01EF ; Grapheme_Extend # Mn [240] VARIATION SELECTOR-17..VARIATION SELE
|
||||
3196..319F ; Grapheme_Base # So [10] IDEOGRAPHIC ANNOTATION TOP MARK..IDEOGRAPHIC ANNOTATION MAN MARK
|
||||
31A0..31BF ; Grapheme_Base # Lo [32] BOPOMOFO LETTER BU..BOPOMOFO LETTER AH
|
||||
31C0..31E3 ; Grapheme_Base # So [36] CJK STROKE T..CJK STROKE Q
|
||||
31EF ; Grapheme_Base # So IDEOGRAPHIC DESCRIPTION CHARACTER SUBTRACTION
|
||||
31F0..31FF ; Grapheme_Base # Lo [16] KATAKANA LETTER SMALL KU..KATAKANA LETTER SMALL RO
|
||||
3200..321E ; Grapheme_Base # So [31] PARENTHESIZED HANGUL KIYEOK..PARENTHESIZED KOREAN CHARACTER O HU
|
||||
3220..3229 ; Grapheme_Base # No [10] PARENTHESIZED IDEOGRAPH ONE..PARENTHESIZED IDEOGRAPH TEN
|
||||
@ -12497,11 +12518,12 @@ FFFC..FFFD ; Grapheme_Base # So [2] OBJECT REPLACEMENT CHARACTER..REPLACEME
|
||||
2B740..2B81D ; Grapheme_Base # Lo [222] CJK UNIFIED IDEOGRAPH-2B740..CJK UNIFIED IDEOGRAPH-2B81D
|
||||
2B820..2CEA1 ; Grapheme_Base # Lo [5762] CJK UNIFIED IDEOGRAPH-2B820..CJK UNIFIED IDEOGRAPH-2CEA1
|
||||
2CEB0..2EBE0 ; Grapheme_Base # Lo [7473] CJK UNIFIED IDEOGRAPH-2CEB0..CJK UNIFIED IDEOGRAPH-2EBE0
|
||||
2EBF0..2EE5D ; Grapheme_Base # Lo [622] CJK UNIFIED IDEOGRAPH-2EBF0..CJK UNIFIED IDEOGRAPH-2EE5D
|
||||
2F800..2FA1D ; Grapheme_Base # Lo [542] CJK COMPATIBILITY IDEOGRAPH-2F800..CJK COMPATIBILITY IDEOGRAPH-2FA1D
|
||||
30000..3134A ; Grapheme_Base # Lo [4939] CJK UNIFIED IDEOGRAPH-30000..CJK UNIFIED IDEOGRAPH-3134A
|
||||
31350..323AF ; Grapheme_Base # Lo [4192] CJK UNIFIED IDEOGRAPH-31350..CJK UNIFIED IDEOGRAPH-323AF
|
||||
|
||||
# Total code points: 146986
|
||||
# Total code points: 147613
|
||||
|
||||
# ================================================
|
||||
|
||||
@ -12572,4 +12594,239 @@ ABED ; Grapheme_Link # Mn MEETEI MAYEK APUN IYEK
|
||||
|
||||
# Total code points: 65
|
||||
|
||||
# ================================================
|
||||
|
||||
# Derived Property: Indic_Conjunct_Break
|
||||
# Generated from the Grapheme_Cluster_Break, Indic_Syllabic_Category,
|
||||
# Canonical_Combining_Class, and Script properties as described in UAX #44:
|
||||
# https://www.unicode.org/reports/tr44/.
|
||||
|
||||
# All code points not explicitly listed for Indic_Conjunct_Break
|
||||
# have the value None.
|
||||
|
||||
# @missing: 0000..10FFFF; InCB; None
|
||||
|
||||
# ================================================
|
||||
|
||||
# Indic_Conjunct_Break=Linker
|
||||
|
||||
094D ; InCB; Linker # Mn DEVANAGARI SIGN VIRAMA
|
||||
09CD ; InCB; Linker # Mn BENGALI SIGN VIRAMA
|
||||
0ACD ; InCB; Linker # Mn GUJARATI SIGN VIRAMA
|
||||
0B4D ; InCB; Linker # Mn ORIYA SIGN VIRAMA
|
||||
0C4D ; InCB; Linker # Mn TELUGU SIGN VIRAMA
|
||||
0D4D ; InCB; Linker # Mn MALAYALAM SIGN VIRAMA
|
||||
|
||||
# Total code points: 6
|
||||
|
||||
# ================================================
|
||||
|
||||
# Indic_Conjunct_Break=Consonant
|
||||
|
||||
0915..0939 ; InCB; Consonant # Lo [37] DEVANAGARI LETTER KA..DEVANAGARI LETTER HA
|
||||
0958..095F ; InCB; Consonant # Lo [8] DEVANAGARI LETTER QA..DEVANAGARI LETTER YYA
|
||||
0978..097F ; InCB; Consonant # Lo [8] DEVANAGARI LETTER MARWARI DDA..DEVANAGARI LETTER BBA
|
||||
0995..09A8 ; InCB; Consonant # Lo [20] BENGALI LETTER KA..BENGALI LETTER NA
|
||||
09AA..09B0 ; InCB; Consonant # Lo [7] BENGALI LETTER PA..BENGALI LETTER RA
|
||||
09B2 ; InCB; Consonant # Lo BENGALI LETTER LA
|
||||
09B6..09B9 ; InCB; Consonant # Lo [4] BENGALI LETTER SHA..BENGALI LETTER HA
|
||||
09DC..09DD ; InCB; Consonant # Lo [2] BENGALI LETTER RRA..BENGALI LETTER RHA
|
||||
09DF ; InCB; Consonant # Lo BENGALI LETTER YYA
|
||||
09F0..09F1 ; InCB; Consonant # Lo [2] BENGALI LETTER RA WITH MIDDLE DIAGONAL..BENGALI LETTER RA WITH LOWER DIAGONAL
|
||||
0A95..0AA8 ; InCB; Consonant # Lo [20] GUJARATI LETTER KA..GUJARATI LETTER NA
|
||||
0AAA..0AB0 ; InCB; Consonant # Lo [7] GUJARATI LETTER PA..GUJARATI LETTER RA
|
||||
0AB2..0AB3 ; InCB; Consonant # Lo [2] GUJARATI LETTER LA..GUJARATI LETTER LLA
|
||||
0AB5..0AB9 ; InCB; Consonant # Lo [5] GUJARATI LETTER VA..GUJARATI LETTER HA
|
||||
0AF9 ; InCB; Consonant # Lo GUJARATI LETTER ZHA
|
||||
0B15..0B28 ; InCB; Consonant # Lo [20] ORIYA LETTER KA..ORIYA LETTER NA
|
||||
0B2A..0B30 ; InCB; Consonant # Lo [7] ORIYA LETTER PA..ORIYA LETTER RA
|
||||
0B32..0B33 ; InCB; Consonant # Lo [2] ORIYA LETTER LA..ORIYA LETTER LLA
|
||||
0B35..0B39 ; InCB; Consonant # Lo [5] ORIYA LETTER VA..ORIYA LETTER HA
|
||||
0B5C..0B5D ; InCB; Consonant # Lo [2] ORIYA LETTER RRA..ORIYA LETTER RHA
|
||||
0B5F ; InCB; Consonant # Lo ORIYA LETTER YYA
|
||||
0B71 ; InCB; Consonant # Lo ORIYA LETTER WA
|
||||
0C15..0C28 ; InCB; Consonant # Lo [20] TELUGU LETTER KA..TELUGU LETTER NA
|
||||
0C2A..0C39 ; InCB; Consonant # Lo [16] TELUGU LETTER PA..TELUGU LETTER HA
|
||||
0C58..0C5A ; InCB; Consonant # Lo [3] TELUGU LETTER TSA..TELUGU LETTER RRRA
|
||||
0D15..0D3A ; InCB; Consonant # Lo [38] MALAYALAM LETTER KA..MALAYALAM LETTER TTTA
|
||||
|
||||
# Total code points: 240
|
||||
|
||||
# ================================================
|
||||
|
||||
# Indic_Conjunct_Break=Extend
|
||||
|
||||
0300..034E ; InCB; Extend # Mn [79] COMBINING GRAVE ACCENT..COMBINING UPWARDS ARROW BELOW
|
||||
0350..036F ; InCB; Extend # Mn [32] COMBINING RIGHT ARROWHEAD ABOVE..COMBINING LATIN SMALL LETTER X
|
||||
0483..0487 ; InCB; Extend # Mn [5] COMBINING CYRILLIC TITLO..COMBINING CYRILLIC POKRYTIE
|
||||
0591..05BD ; InCB; Extend # Mn [45] HEBREW ACCENT ETNAHTA..HEBREW POINT METEG
|
||||
05BF ; InCB; Extend # Mn HEBREW POINT RAFE
|
||||
05C1..05C2 ; InCB; Extend # Mn [2] HEBREW POINT SHIN DOT..HEBREW POINT SIN DOT
|
||||
05C4..05C5 ; InCB; Extend # Mn [2] HEBREW MARK UPPER DOT..HEBREW MARK LOWER DOT
|
||||
05C7 ; InCB; Extend # Mn HEBREW POINT QAMATS QATAN
|
||||
0610..061A ; InCB; Extend # Mn [11] ARABIC SIGN SALLALLAHOU ALAYHE WASSALLAM..ARABIC SMALL KASRA
|
||||
064B..065F ; InCB; Extend # Mn [21] ARABIC FATHATAN..ARABIC WAVY HAMZA BELOW
|
||||
0670 ; InCB; Extend # Mn ARABIC LETTER SUPERSCRIPT ALEF
|
||||
06D6..06DC ; InCB; Extend # Mn [7] ARABIC SMALL HIGH LIGATURE SAD WITH LAM WITH ALEF MAKSURA..ARABIC SMALL HIGH SEEN
|
||||
06DF..06E4 ; InCB; Extend # Mn [6] ARABIC SMALL HIGH ROUNDED ZERO..ARABIC SMALL HIGH MADDA
|
||||
06E7..06E8 ; InCB; Extend # Mn [2] ARABIC SMALL HIGH YEH..ARABIC SMALL HIGH NOON
|
||||
06EA..06ED ; InCB; Extend # Mn [4] ARABIC EMPTY CENTRE LOW STOP..ARABIC SMALL LOW MEEM
|
||||
0711 ; InCB; Extend # Mn SYRIAC LETTER SUPERSCRIPT ALAPH
|
||||
0730..074A ; InCB; Extend # Mn [27] SYRIAC PTHAHA ABOVE..SYRIAC BARREKH
|
||||
07EB..07F3 ; InCB; Extend # Mn [9] NKO COMBINING SHORT HIGH TONE..NKO COMBINING DOUBLE DOT ABOVE
|
||||
07FD ; InCB; Extend # Mn NKO DANTAYALAN
|
||||
0816..0819 ; InCB; Extend # Mn [4] SAMARITAN MARK IN..SAMARITAN MARK DAGESH
|
||||
081B..0823 ; InCB; Extend # Mn [9] SAMARITAN MARK EPENTHETIC YUT..SAMARITAN VOWEL SIGN A
|
||||
0825..0827 ; InCB; Extend # Mn [3] SAMARITAN VOWEL SIGN SHORT A..SAMARITAN VOWEL SIGN U
|
||||
0829..082D ; InCB; Extend # Mn [5] SAMARITAN VOWEL SIGN LONG I..SAMARITAN MARK NEQUDAA
|
||||
0859..085B ; InCB; Extend # Mn [3] MANDAIC AFFRICATION MARK..MANDAIC GEMINATION MARK
|
||||
0898..089F ; InCB; Extend # Mn [8] ARABIC SMALL HIGH WORD AL-JUZ..ARABIC HALF MADDA OVER MADDA
|
||||
08CA..08E1 ; InCB; Extend # Mn [24] ARABIC SMALL HIGH FARSI YEH..ARABIC SMALL HIGH SIGN SAFHA
|
||||
08E3..08FF ; InCB; Extend # Mn [29] ARABIC TURNED DAMMA BELOW..ARABIC MARK SIDEWAYS NOON GHUNNA
|
||||
093C ; InCB; Extend # Mn DEVANAGARI SIGN NUKTA
|
||||
0951..0954 ; InCB; Extend # Mn [4] DEVANAGARI STRESS SIGN UDATTA..DEVANAGARI ACUTE ACCENT
|
||||
09BC ; InCB; Extend # Mn BENGALI SIGN NUKTA
|
||||
09FE ; InCB; Extend # Mn BENGALI SANDHI MARK
|
||||
0A3C ; InCB; Extend # Mn GURMUKHI SIGN NUKTA
|
||||
0ABC ; InCB; Extend # Mn GUJARATI SIGN NUKTA
|
||||
0B3C ; InCB; Extend # Mn ORIYA SIGN NUKTA
|
||||
0C3C ; InCB; Extend # Mn TELUGU SIGN NUKTA
|
||||
0C55..0C56 ; InCB; Extend # Mn [2] TELUGU LENGTH MARK..TELUGU AI LENGTH MARK
|
||||
0CBC ; InCB; Extend # Mn KANNADA SIGN NUKTA
|
||||
0D3B..0D3C ; InCB; Extend # Mn [2] MALAYALAM SIGN VERTICAL BAR VIRAMA..MALAYALAM SIGN CIRCULAR VIRAMA
|
||||
0E38..0E3A ; InCB; Extend # Mn [3] THAI CHARACTER SARA U..THAI CHARACTER PHINTHU
|
||||
0E48..0E4B ; InCB; Extend # Mn [4] THAI CHARACTER MAI EK..THAI CHARACTER MAI CHATTAWA
|
||||
0EB8..0EBA ; InCB; Extend # Mn [3] LAO VOWEL SIGN U..LAO SIGN PALI VIRAMA
|
||||
0EC8..0ECB ; InCB; Extend # Mn [4] LAO TONE MAI EK..LAO TONE MAI CATAWA
|
||||
0F18..0F19 ; InCB; Extend # Mn [2] TIBETAN ASTROLOGICAL SIGN -KHYUD PA..TIBETAN ASTROLOGICAL SIGN SDONG TSHUGS
|
||||
0F35 ; InCB; Extend # Mn TIBETAN MARK NGAS BZUNG NYI ZLA
|
||||
0F37 ; InCB; Extend # Mn TIBETAN MARK NGAS BZUNG SGOR RTAGS
|
||||
0F39 ; InCB; Extend # Mn TIBETAN MARK TSA -PHRU
|
||||
0F71..0F72 ; InCB; Extend # Mn [2] TIBETAN VOWEL SIGN AA..TIBETAN VOWEL SIGN I
|
||||
0F74 ; InCB; Extend # Mn TIBETAN VOWEL SIGN U
|
||||
0F7A..0F7D ; InCB; Extend # Mn [4] TIBETAN VOWEL SIGN E..TIBETAN VOWEL SIGN OO
|
||||
0F80 ; InCB; Extend # Mn TIBETAN VOWEL SIGN REVERSED I
|
||||
0F82..0F84 ; InCB; Extend # Mn [3] TIBETAN SIGN NYI ZLA NAA DA..TIBETAN MARK HALANTA
|
||||
0F86..0F87 ; InCB; Extend # Mn [2] TIBETAN SIGN LCI RTAGS..TIBETAN SIGN YANG RTAGS
|
||||
0FC6 ; InCB; Extend # Mn TIBETAN SYMBOL PADMA GDAN
|
||||
1037 ; InCB; Extend # Mn MYANMAR SIGN DOT BELOW
|
||||
1039..103A ; InCB; Extend # Mn [2] MYANMAR SIGN VIRAMA..MYANMAR SIGN ASAT
|
||||
108D ; InCB; Extend # Mn MYANMAR SIGN SHAN COUNCIL EMPHATIC TONE
|
||||
135D..135F ; InCB; Extend # Mn [3] ETHIOPIC COMBINING GEMINATION AND VOWEL LENGTH MARK..ETHIOPIC COMBINING GEMINATION MARK
|
||||
1714 ; InCB; Extend # Mn TAGALOG SIGN VIRAMA
|
||||
17D2 ; InCB; Extend # Mn KHMER SIGN COENG
|
||||
17DD ; InCB; Extend # Mn KHMER SIGN ATTHACAN
|
||||
18A9 ; InCB; Extend # Mn MONGOLIAN LETTER ALI GALI DAGALGA
|
||||
1939..193B ; InCB; Extend # Mn [3] LIMBU SIGN MUKPHRENG..LIMBU SIGN SA-I
|
||||
1A17..1A18 ; InCB; Extend # Mn [2] BUGINESE VOWEL SIGN I..BUGINESE VOWEL SIGN U
|
||||
1A60 ; InCB; Extend # Mn TAI THAM SIGN SAKOT
|
||||
1A75..1A7C ; InCB; Extend # Mn [8] TAI THAM SIGN TONE-1..TAI THAM SIGN KHUEN-LUE KARAN
|
||||
1A7F ; InCB; Extend # Mn TAI THAM COMBINING CRYPTOGRAMMIC DOT
|
||||
1AB0..1ABD ; InCB; Extend # Mn [14] COMBINING DOUBLED CIRCUMFLEX ACCENT..COMBINING PARENTHESES BELOW
|
||||
1ABF..1ACE ; InCB; Extend # Mn [16] COMBINING LATIN SMALL LETTER W BELOW..COMBINING LATIN SMALL LETTER INSULAR T
|
||||
1B34 ; InCB; Extend # Mn BALINESE SIGN REREKAN
|
||||
1B6B..1B73 ; InCB; Extend # Mn [9] BALINESE MUSICAL SYMBOL COMBINING TEGEH..BALINESE MUSICAL SYMBOL COMBINING GONG
|
||||
1BAB ; InCB; Extend # Mn SUNDANESE SIGN VIRAMA
|
||||
1BE6 ; InCB; Extend # Mn BATAK SIGN TOMPI
|
||||
1C37 ; InCB; Extend # Mn LEPCHA SIGN NUKTA
|
||||
1CD0..1CD2 ; InCB; Extend # Mn [3] VEDIC TONE KARSHANA..VEDIC TONE PRENKHA
|
||||
1CD4..1CE0 ; InCB; Extend # Mn [13] VEDIC SIGN YAJURVEDIC MIDLINE SVARITA..VEDIC TONE RIGVEDIC KASHMIRI INDEPENDENT SVARITA
|
||||
1CE2..1CE8 ; InCB; Extend # Mn [7] VEDIC SIGN VISARGA SVARITA..VEDIC SIGN VISARGA ANUDATTA WITH TAIL
|
||||
1CED ; InCB; Extend # Mn VEDIC SIGN TIRYAK
|
||||
1CF4 ; InCB; Extend # Mn VEDIC TONE CANDRA ABOVE
|
||||
1CF8..1CF9 ; InCB; Extend # Mn [2] VEDIC TONE RING ABOVE..VEDIC TONE DOUBLE RING ABOVE
|
||||
1DC0..1DFF ; InCB; Extend # Mn [64] COMBINING DOTTED GRAVE ACCENT..COMBINING RIGHT ARROWHEAD AND DOWN ARROWHEAD BELOW
|
||||
200D ; InCB; Extend # Cf ZERO WIDTH JOINER
|
||||
20D0..20DC ; InCB; Extend # Mn [13] COMBINING LEFT HARPOON ABOVE..COMBINING FOUR DOTS ABOVE
|
||||
20E1 ; InCB; Extend # Mn COMBINING LEFT RIGHT ARROW ABOVE
|
||||
20E5..20F0 ; InCB; Extend # Mn [12] COMBINING REVERSE SOLIDUS OVERLAY..COMBINING ASTERISK ABOVE
|
||||
2CEF..2CF1 ; InCB; Extend # Mn [3] COPTIC COMBINING NI ABOVE..COPTIC COMBINING SPIRITUS LENIS
|
||||
2D7F ; InCB; Extend # Mn TIFINAGH CONSONANT JOINER
|
||||
2DE0..2DFF ; InCB; Extend # Mn [32] COMBINING CYRILLIC LETTER BE..COMBINING CYRILLIC LETTER IOTIFIED BIG YUS
|
||||
302A..302D ; InCB; Extend # Mn [4] IDEOGRAPHIC LEVEL TONE MARK..IDEOGRAPHIC ENTERING TONE MARK
|
||||
302E..302F ; InCB; Extend # Mc [2] HANGUL SINGLE DOT TONE MARK..HANGUL DOUBLE DOT TONE MARK
|
||||
3099..309A ; InCB; Extend # Mn [2] COMBINING KATAKANA-HIRAGANA VOICED SOUND MARK..COMBINING KATAKANA-HIRAGANA SEMI-VOICED SOUND MARK
|
||||
A66F ; InCB; Extend # Mn COMBINING CYRILLIC VZMET
|
||||
A674..A67D ; InCB; Extend # Mn [10] COMBINING CYRILLIC LETTER UKRAINIAN IE..COMBINING CYRILLIC PAYEROK
|
||||
A69E..A69F ; InCB; Extend # Mn [2] COMBINING CYRILLIC LETTER EF..COMBINING CYRILLIC LETTER IOTIFIED E
|
||||
A6F0..A6F1 ; InCB; Extend # Mn [2] BAMUM COMBINING MARK KOQNDON..BAMUM COMBINING MARK TUKWENTIS
|
||||
A82C ; InCB; Extend # Mn SYLOTI NAGRI SIGN ALTERNATE HASANTA
|
||||
A8E0..A8F1 ; InCB; Extend # Mn [18] COMBINING DEVANAGARI DIGIT ZERO..COMBINING DEVANAGARI SIGN AVAGRAHA
|
||||
A92B..A92D ; InCB; Extend # Mn [3] KAYAH LI TONE PLOPHU..KAYAH LI TONE CALYA PLOPHU
|
||||
A9B3 ; InCB; Extend # Mn JAVANESE SIGN CECAK TELU
|
||||
AAB0 ; InCB; Extend # Mn TAI VIET MAI KANG
|
||||
AAB2..AAB4 ; InCB; Extend # Mn [3] TAI VIET VOWEL I..TAI VIET VOWEL U
|
||||
AAB7..AAB8 ; InCB; Extend # Mn [2] TAI VIET MAI KHIT..TAI VIET VOWEL IA
|
||||
AABE..AABF ; InCB; Extend # Mn [2] TAI VIET VOWEL AM..TAI VIET TONE MAI EK
|
||||
AAC1 ; InCB; Extend # Mn TAI VIET TONE MAI THO
|
||||
AAF6 ; InCB; Extend # Mn MEETEI MAYEK VIRAMA
|
||||
ABED ; InCB; Extend # Mn MEETEI MAYEK APUN IYEK
|
||||
FB1E ; InCB; Extend # Mn HEBREW POINT JUDEO-SPANISH VARIKA
|
||||
FE20..FE2F ; InCB; Extend # Mn [16] COMBINING LIGATURE LEFT HALF..COMBINING CYRILLIC TITLO RIGHT HALF
|
||||
101FD ; InCB; Extend # Mn PHAISTOS DISC SIGN COMBINING OBLIQUE STROKE
|
||||
102E0 ; InCB; Extend # Mn COPTIC EPACT THOUSANDS MARK
|
||||
10376..1037A ; InCB; Extend # Mn [5] COMBINING OLD PERMIC LETTER AN..COMBINING OLD PERMIC LETTER SII
|
||||
10A0D ; InCB; Extend # Mn KHAROSHTHI SIGN DOUBLE RING BELOW
|
||||
10A0F ; InCB; Extend # Mn KHAROSHTHI SIGN VISARGA
|
||||
10A38..10A3A ; InCB; Extend # Mn [3] KHAROSHTHI SIGN BAR ABOVE..KHAROSHTHI SIGN DOT BELOW
|
||||
10A3F ; InCB; Extend # Mn KHAROSHTHI VIRAMA
|
||||
10AE5..10AE6 ; InCB; Extend # Mn [2] MANICHAEAN ABBREVIATION MARK ABOVE..MANICHAEAN ABBREVIATION MARK BELOW
|
||||
10D24..10D27 ; InCB; Extend # Mn [4] HANIFI ROHINGYA SIGN HARBAHAY..HANIFI ROHINGYA SIGN TASSI
|
||||
10EAB..10EAC ; InCB; Extend # Mn [2] YEZIDI COMBINING HAMZA MARK..YEZIDI COMBINING MADDA MARK
|
||||
10EFD..10EFF ; InCB; Extend # Mn [3] ARABIC SMALL LOW WORD SAKTA..ARABIC SMALL LOW WORD MADDA
|
||||
10F46..10F50 ; InCB; Extend # Mn [11] SOGDIAN COMBINING DOT BELOW..SOGDIAN COMBINING STROKE BELOW
|
||||
10F82..10F85 ; InCB; Extend # Mn [4] OLD UYGHUR COMBINING DOT ABOVE..OLD UYGHUR COMBINING TWO DOTS BELOW
|
||||
11070 ; InCB; Extend # Mn BRAHMI SIGN OLD TAMIL VIRAMA
|
||||
1107F ; InCB; Extend # Mn BRAHMI NUMBER JOINER
|
||||
110BA ; InCB; Extend # Mn KAITHI SIGN NUKTA
|
||||
11100..11102 ; InCB; Extend # Mn [3] CHAKMA SIGN CANDRABINDU..CHAKMA SIGN VISARGA
|
||||
11133..11134 ; InCB; Extend # Mn [2] CHAKMA VIRAMA..CHAKMA MAAYYAA
|
||||
11173 ; InCB; Extend # Mn MAHAJANI SIGN NUKTA
|
||||
111CA ; InCB; Extend # Mn SHARADA SIGN NUKTA
|
||||
11236 ; InCB; Extend # Mn KHOJKI SIGN NUKTA
|
||||
112E9..112EA ; InCB; Extend # Mn [2] KHUDAWADI SIGN NUKTA..KHUDAWADI SIGN VIRAMA
|
||||
1133B..1133C ; InCB; Extend # Mn [2] COMBINING BINDU BELOW..GRANTHA SIGN NUKTA
|
||||
11366..1136C ; InCB; Extend # Mn [7] COMBINING GRANTHA DIGIT ZERO..COMBINING GRANTHA DIGIT SIX
|
||||
11370..11374 ; InCB; Extend # Mn [5] COMBINING GRANTHA LETTER A..COMBINING GRANTHA LETTER PA
|
||||
11446 ; InCB; Extend # Mn NEWA SIGN NUKTA
|
||||
1145E ; InCB; Extend # Mn NEWA SANDHI MARK
|
||||
114C3 ; InCB; Extend # Mn TIRHUTA SIGN NUKTA
|
||||
115C0 ; InCB; Extend # Mn SIDDHAM SIGN NUKTA
|
||||
116B7 ; InCB; Extend # Mn TAKRI SIGN NUKTA
|
||||
1172B ; InCB; Extend # Mn AHOM SIGN KILLER
|
||||
1183A ; InCB; Extend # Mn DOGRA SIGN NUKTA
|
||||
1193E ; InCB; Extend # Mn DIVES AKURU VIRAMA
|
||||
11943 ; InCB; Extend # Mn DIVES AKURU SIGN NUKTA
|
||||
11A34 ; InCB; Extend # Mn ZANABAZAR SQUARE SIGN VIRAMA
|
||||
11A47 ; InCB; Extend # Mn ZANABAZAR SQUARE SUBJOINER
|
||||
11A99 ; InCB; Extend # Mn SOYOMBO SUBJOINER
|
||||
11D42 ; InCB; Extend # Mn MASARAM GONDI SIGN NUKTA
|
||||
11D44..11D45 ; InCB; Extend # Mn [2] MASARAM GONDI SIGN HALANTA..MASARAM GONDI VIRAMA
|
||||
11D97 ; InCB; Extend # Mn GUNJALA GONDI VIRAMA
|
||||
11F42 ; InCB; Extend # Mn KAWI CONJOINER
|
||||
16AF0..16AF4 ; InCB; Extend # Mn [5] BASSA VAH COMBINING HIGH TONE..BASSA VAH COMBINING HIGH-LOW TONE
|
||||
16B30..16B36 ; InCB; Extend # Mn [7] PAHAWH HMONG MARK CIM TUB..PAHAWH HMONG MARK CIM TAUM
|
||||
1BC9E ; InCB; Extend # Mn DUPLOYAN DOUBLE MARK
|
||||
1D165 ; InCB; Extend # Mc MUSICAL SYMBOL COMBINING STEM
|
||||
1D167..1D169 ; InCB; Extend # Mn [3] MUSICAL SYMBOL COMBINING TREMOLO-1..MUSICAL SYMBOL COMBINING TREMOLO-3
|
||||
1D16E..1D172 ; InCB; Extend # Mc [5] MUSICAL SYMBOL COMBINING FLAG-1..MUSICAL SYMBOL COMBINING FLAG-5
|
||||
1D17B..1D182 ; InCB; Extend # Mn [8] MUSICAL SYMBOL COMBINING ACCENT..MUSICAL SYMBOL COMBINING LOURE
|
||||
1D185..1D18B ; InCB; Extend # Mn [7] MUSICAL SYMBOL COMBINING DOIT..MUSICAL SYMBOL COMBINING TRIPLE TONGUE
|
||||
1D1AA..1D1AD ; InCB; Extend # Mn [4] MUSICAL SYMBOL COMBINING DOWN BOW..MUSICAL SYMBOL COMBINING SNAP PIZZICATO
|
||||
1D242..1D244 ; InCB; Extend # Mn [3] COMBINING GREEK MUSICAL TRISEME..COMBINING GREEK MUSICAL PENTASEME
|
||||
1E000..1E006 ; InCB; Extend # Mn [7] COMBINING GLAGOLITIC LETTER AZU..COMBINING GLAGOLITIC LETTER ZHIVETE
|
||||
1E008..1E018 ; InCB; Extend # Mn [17] COMBINING GLAGOLITIC LETTER ZEMLJA..COMBINING GLAGOLITIC LETTER HERU
|
||||
1E01B..1E021 ; InCB; Extend # Mn [7] COMBINING GLAGOLITIC LETTER SHTA..COMBINING GLAGOLITIC LETTER YATI
|
||||
1E023..1E024 ; InCB; Extend # Mn [2] COMBINING GLAGOLITIC LETTER YU..COMBINING GLAGOLITIC LETTER SMALL YUS
|
||||
1E026..1E02A ; InCB; Extend # Mn [5] COMBINING GLAGOLITIC LETTER YO..COMBINING GLAGOLITIC LETTER FITA
|
||||
1E08F ; InCB; Extend # Mn COMBINING CYRILLIC SMALL LETTER BYELORUSSIAN-UKRAINIAN I
|
||||
1E130..1E136 ; InCB; Extend # Mn [7] NYIAKENG PUACHUE HMONG TONE-B..NYIAKENG PUACHUE HMONG TONE-D
|
||||
1E2AE ; InCB; Extend # Mn TOTO SIGN RISING TONE
|
||||
1E2EC..1E2EF ; InCB; Extend # Mn [4] WANCHO TONE TUP..WANCHO TONE KOINI
|
||||
1E4EC..1E4EF ; InCB; Extend # Mn [4] NAG MUNDARI SIGN MUHOR..NAG MUNDARI SIGN SUTUH
|
||||
1E8D0..1E8D6 ; InCB; Extend # Mn [7] MENDE KIKAKUI COMBINING NUMBER TEENS..MENDE KIKAKUI COMBINING NUMBER MILLIONS
|
||||
1E944..1E94A ; InCB; Extend # Mn [7] ADLAM ALIF LENGTHENER..ADLAM NUKTA
|
||||
|
||||
# Total code points: 884
|
||||
|
||||
# EOF
|
||||
|
@ -1,6 +1,6 @@
|
||||
# NormalizationTest-15.0.0.txt
|
||||
# Date: 2022-04-02, 01:29:09 GMT
|
||||
# Copyright (c) 2022 Unicode, Inc.
|
||||
# NormalizationTest-15.1.0.txt
|
||||
# Date: 2023-01-05, 20:34:44 GMT
|
||||
# Copyright (c) 2023 Unicode, Inc.
|
||||
# Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries.
|
||||
# For terms of use, see https://www.unicode.org/terms_of_use.html
|
||||
#
|
||||
|
@ -1,6 +1,6 @@
|
||||
# PropList-15.0.0.txt
|
||||
# Date: 2022-08-05, 22:17:16 GMT
|
||||
# Copyright (c) 2022 Unicode, Inc.
|
||||
# PropList-15.1.0.txt
|
||||
# Date: 2023-08-01, 21:56:53 GMT
|
||||
# Copyright (c) 2023 Unicode, Inc.
|
||||
# Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries.
|
||||
# For terms of use, see https://www.unicode.org/terms_of_use.html
|
||||
#
|
||||
@ -856,11 +856,12 @@ FA70..FAD9 ; Ideographic # Lo [106] CJK COMPATIBILITY IDEOGRAPH-FA70..CJK COM
|
||||
2B740..2B81D ; Ideographic # Lo [222] CJK UNIFIED IDEOGRAPH-2B740..CJK UNIFIED IDEOGRAPH-2B81D
|
||||
2B820..2CEA1 ; Ideographic # Lo [5762] CJK UNIFIED IDEOGRAPH-2B820..CJK UNIFIED IDEOGRAPH-2CEA1
|
||||
2CEB0..2EBE0 ; Ideographic # Lo [7473] CJK UNIFIED IDEOGRAPH-2CEB0..CJK UNIFIED IDEOGRAPH-2EBE0
|
||||
2EBF0..2EE5D ; Ideographic # Lo [622] CJK UNIFIED IDEOGRAPH-2EBF0..CJK UNIFIED IDEOGRAPH-2EE5D
|
||||
2F800..2FA1D ; Ideographic # Lo [542] CJK COMPATIBILITY IDEOGRAPH-2F800..CJK COMPATIBILITY IDEOGRAPH-2FA1D
|
||||
30000..3134A ; Ideographic # Lo [4939] CJK UNIFIED IDEOGRAPH-30000..CJK UNIFIED IDEOGRAPH-3134A
|
||||
31350..323AF ; Ideographic # Lo [4192] CJK UNIFIED IDEOGRAPH-31350..CJK UNIFIED IDEOGRAPH-323AF
|
||||
|
||||
# Total code points: 105854
|
||||
# Total code points: 106476
|
||||
|
||||
# ================================================
|
||||
|
||||
@ -1241,9 +1242,10 @@ E0020..E007F ; Other_Grapheme_Extend # Cf [96] TAG SPACE..CANCEL TAG
|
||||
# ================================================
|
||||
|
||||
2FF0..2FF1 ; IDS_Binary_Operator # So [2] IDEOGRAPHIC DESCRIPTION CHARACTER LEFT TO RIGHT..IDEOGRAPHIC DESCRIPTION CHARACTER ABOVE TO BELOW
|
||||
2FF4..2FFB ; IDS_Binary_Operator # So [8] IDEOGRAPHIC DESCRIPTION CHARACTER FULL SURROUND..IDEOGRAPHIC DESCRIPTION CHARACTER OVERLAID
|
||||
2FF4..2FFD ; IDS_Binary_Operator # So [10] IDEOGRAPHIC DESCRIPTION CHARACTER FULL SURROUND..IDEOGRAPHIC DESCRIPTION CHARACTER SURROUND FROM LOWER RIGHT
|
||||
31EF ; IDS_Binary_Operator # So IDEOGRAPHIC DESCRIPTION CHARACTER SUBTRACTION
|
||||
|
||||
# Total code points: 10
|
||||
# Total code points: 13
|
||||
|
||||
# ================================================
|
||||
|
||||
@ -1253,6 +1255,12 @@ E0020..E007F ; Other_Grapheme_Extend # Cf [96] TAG SPACE..CANCEL TAG
|
||||
|
||||
# ================================================
|
||||
|
||||
2FFE..2FFF ; IDS_Unary_Operator # So [2] IDEOGRAPHIC DESCRIPTION CHARACTER HORIZONTAL REFLECTION..IDEOGRAPHIC DESCRIPTION CHARACTER ROTATION
|
||||
|
||||
# Total code points: 2
|
||||
|
||||
# ================================================
|
||||
|
||||
2E80..2E99 ; Radical # So [26] CJK RADICAL REPEAT..CJK RADICAL RAP
|
||||
2E9B..2EF3 ; Radical # So [89] CJK RADICAL CHOKE..CJK RADICAL C-SIMPLIFIED TURTLE
|
||||
2F00..2FD5 ; Radical # So [214] KANGXI RADICAL ONE..KANGXI RADICAL FLUTE
|
||||
@ -1275,10 +1283,11 @@ FA27..FA29 ; Unified_Ideograph # Lo [3] CJK COMPATIBILITY IDEOGRAPH-FA27..C
|
||||
2B740..2B81D ; Unified_Ideograph # Lo [222] CJK UNIFIED IDEOGRAPH-2B740..CJK UNIFIED IDEOGRAPH-2B81D
|
||||
2B820..2CEA1 ; Unified_Ideograph # Lo [5762] CJK UNIFIED IDEOGRAPH-2B820..CJK UNIFIED IDEOGRAPH-2CEA1
|
||||
2CEB0..2EBE0 ; Unified_Ideograph # Lo [7473] CJK UNIFIED IDEOGRAPH-2CEB0..CJK UNIFIED IDEOGRAPH-2EBE0
|
||||
2EBF0..2EE5D ; Unified_Ideograph # Lo [622] CJK UNIFIED IDEOGRAPH-2EBF0..CJK UNIFIED IDEOGRAPH-2EE5D
|
||||
30000..3134A ; Unified_Ideograph # Lo [4939] CJK UNIFIED IDEOGRAPH-30000..CJK UNIFIED IDEOGRAPH-3134A
|
||||
31350..323AF ; Unified_Ideograph # Lo [4192] CJK UNIFIED IDEOGRAPH-31350..CJK UNIFIED IDEOGRAPH-323AF
|
||||
|
||||
# Total code points: 97058
|
||||
# Total code points: 97680
|
||||
|
||||
# ================================================
|
||||
|
||||
@ -1376,8 +1385,58 @@ AABB..AABC ; Logical_Order_Exception # Lo [2] TAI VIET VOWEL AUE..TAI VIET
|
||||
0387 ; Other_ID_Continue # Po GREEK ANO TELEIA
|
||||
1369..1371 ; Other_ID_Continue # No [9] ETHIOPIC DIGIT ONE..ETHIOPIC DIGIT NINE
|
||||
19DA ; Other_ID_Continue # No NEW TAI LUE THAM DIGIT ONE
|
||||
200C..200D ; Other_ID_Continue # Cf [2] ZERO WIDTH NON-JOINER..ZERO WIDTH JOINER
|
||||
30FB ; Other_ID_Continue # Po KATAKANA MIDDLE DOT
|
||||
FF65 ; Other_ID_Continue # Po HALFWIDTH KATAKANA MIDDLE DOT
|
||||
|
||||
# Total code points: 12
|
||||
# Total code points: 16
|
||||
|
||||
# ================================================
|
||||
|
||||
00B2..00B3 ; ID_Compat_Math_Continue # No [2] SUPERSCRIPT TWO..SUPERSCRIPT THREE
|
||||
00B9 ; ID_Compat_Math_Continue # No SUPERSCRIPT ONE
|
||||
2070 ; ID_Compat_Math_Continue # No SUPERSCRIPT ZERO
|
||||
2074..2079 ; ID_Compat_Math_Continue # No [6] SUPERSCRIPT FOUR..SUPERSCRIPT NINE
|
||||
207A..207C ; ID_Compat_Math_Continue # Sm [3] SUPERSCRIPT PLUS SIGN..SUPERSCRIPT EQUALS SIGN
|
||||
207D ; ID_Compat_Math_Continue # Ps SUPERSCRIPT LEFT PARENTHESIS
|
||||
207E ; ID_Compat_Math_Continue # Pe SUPERSCRIPT RIGHT PARENTHESIS
|
||||
2080..2089 ; ID_Compat_Math_Continue # No [10] SUBSCRIPT ZERO..SUBSCRIPT NINE
|
||||
208A..208C ; ID_Compat_Math_Continue # Sm [3] SUBSCRIPT PLUS SIGN..SUBSCRIPT EQUALS SIGN
|
||||
208D ; ID_Compat_Math_Continue # Ps SUBSCRIPT LEFT PARENTHESIS
|
||||
208E ; ID_Compat_Math_Continue # Pe SUBSCRIPT RIGHT PARENTHESIS
|
||||
2202 ; ID_Compat_Math_Continue # Sm PARTIAL DIFFERENTIAL
|
||||
2207 ; ID_Compat_Math_Continue # Sm NABLA
|
||||
221E ; ID_Compat_Math_Continue # Sm INFINITY
|
||||
1D6C1 ; ID_Compat_Math_Continue # Sm MATHEMATICAL BOLD NABLA
|
||||
1D6DB ; ID_Compat_Math_Continue # Sm MATHEMATICAL BOLD PARTIAL DIFFERENTIAL
|
||||
1D6FB ; ID_Compat_Math_Continue # Sm MATHEMATICAL ITALIC NABLA
|
||||
1D715 ; ID_Compat_Math_Continue # Sm MATHEMATICAL ITALIC PARTIAL DIFFERENTIAL
|
||||
1D735 ; ID_Compat_Math_Continue # Sm MATHEMATICAL BOLD ITALIC NABLA
|
||||
1D74F ; ID_Compat_Math_Continue # Sm MATHEMATICAL BOLD ITALIC PARTIAL DIFFERENTIAL
|
||||
1D76F ; ID_Compat_Math_Continue # Sm MATHEMATICAL SANS-SERIF BOLD NABLA
|
||||
1D789 ; ID_Compat_Math_Continue # Sm MATHEMATICAL SANS-SERIF BOLD PARTIAL DIFFERENTIAL
|
||||
1D7A9 ; ID_Compat_Math_Continue # Sm MATHEMATICAL SANS-SERIF BOLD ITALIC NABLA
|
||||
1D7C3 ; ID_Compat_Math_Continue # Sm MATHEMATICAL SANS-SERIF BOLD ITALIC PARTIAL DIFFERENTIAL
|
||||
|
||||
# Total code points: 43
|
||||
|
||||
# ================================================
|
||||
|
||||
2202 ; ID_Compat_Math_Start # Sm PARTIAL DIFFERENTIAL
|
||||
2207 ; ID_Compat_Math_Start # Sm NABLA
|
||||
221E ; ID_Compat_Math_Start # Sm INFINITY
|
||||
1D6C1 ; ID_Compat_Math_Start # Sm MATHEMATICAL BOLD NABLA
|
||||
1D6DB ; ID_Compat_Math_Start # Sm MATHEMATICAL BOLD PARTIAL DIFFERENTIAL
|
||||
1D6FB ; ID_Compat_Math_Start # Sm MATHEMATICAL ITALIC NABLA
|
||||
1D715 ; ID_Compat_Math_Start # Sm MATHEMATICAL ITALIC PARTIAL DIFFERENTIAL
|
||||
1D735 ; ID_Compat_Math_Start # Sm MATHEMATICAL BOLD ITALIC NABLA
|
||||
1D74F ; ID_Compat_Math_Start # Sm MATHEMATICAL BOLD ITALIC PARTIAL DIFFERENTIAL
|
||||
1D76F ; ID_Compat_Math_Start # Sm MATHEMATICAL SANS-SERIF BOLD NABLA
|
||||
1D789 ; ID_Compat_Math_Start # Sm MATHEMATICAL SANS-SERIF BOLD PARTIAL DIFFERENTIAL
|
||||
1D7A9 ; ID_Compat_Math_Start # Sm MATHEMATICAL SANS-SERIF BOLD ITALIC NABLA
|
||||
1D7C3 ; ID_Compat_Math_Start # Sm MATHEMATICAL SANS-SERIF BOLD ITALIC PARTIAL DIFFERENTIAL
|
||||
|
||||
# Total code points: 13
|
||||
|
||||
# ================================================
|
||||
|
||||
@ -1398,6 +1457,7 @@ AABB..AABC ; Logical_Order_Exception # Lo [2] TAI VIET VOWEL AUE..TAI VIET
|
||||
1367..1368 ; Sentence_Terminal # Po [2] ETHIOPIC QUESTION MARK..ETHIOPIC PARAGRAPH SEPARATOR
|
||||
166E ; Sentence_Terminal # Po CANADIAN SYLLABICS FULL STOP
|
||||
1735..1736 ; Sentence_Terminal # Po [2] PHILIPPINE SINGLE PUNCTUATION..PHILIPPINE DOUBLE PUNCTUATION
|
||||
17D4..17D5 ; Sentence_Terminal # Po [2] KHMER SIGN KHAN..KHMER SIGN BARIYOOSAN
|
||||
1803 ; Sentence_Terminal # Po MONGOLIAN FULL STOP
|
||||
1809 ; Sentence_Terminal # Po MONGOLIAN MANCHU FULL STOP
|
||||
1944..1945 ; Sentence_Terminal # Po [2] LIMBU EXCLAMATION MARK..LIMBU QUESTION MARK
|
||||
@ -1462,7 +1522,7 @@ FF61 ; Sentence_Terminal # Po HALFWIDTH IDEOGRAPHIC FULL STOP
|
||||
1BC9F ; Sentence_Terminal # Po DUPLOYAN PUNCTUATION CHINOOK FULL STOP
|
||||
1DA88 ; Sentence_Terminal # Po SIGNWRITING FULL STOP
|
||||
|
||||
# Total code points: 154
|
||||
# Total code points: 156
|
||||
|
||||
# ================================================
|
||||
|
||||
|
@ -1,6 +1,6 @@
|
||||
# PropertyValueAliases-15.0.0.txt
|
||||
# Date: 2022-08-05, 23:42:17 GMT
|
||||
# Copyright (c) 2022 Unicode, Inc.
|
||||
# PropertyValueAliases-15.1.0.txt
|
||||
# Date: 2023-08-07, 15:21:34 GMT
|
||||
# Copyright (c) 2023 Unicode, Inc.
|
||||
# Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries.
|
||||
# For terms of use, see https://www.unicode.org/terms_of_use.html
|
||||
#
|
||||
@ -91,6 +91,7 @@ age; 12.1 ; V12_1
|
||||
age; 13.0 ; V13_0
|
||||
age; 14.0 ; V14_0
|
||||
age; 15.0 ; V15_0
|
||||
age; 15.1 ; V15_1
|
||||
age; NA ; Unassigned
|
||||
|
||||
# Alphabetic (Alpha)
|
||||
@ -208,6 +209,7 @@ blk; CJK_Ext_E ; CJK_Unified_Ideographs_Extension_E
|
||||
blk; CJK_Ext_F ; CJK_Unified_Ideographs_Extension_F
|
||||
blk; CJK_Ext_G ; CJK_Unified_Ideographs_Extension_G
|
||||
blk; CJK_Ext_H ; CJK_Unified_Ideographs_Extension_H
|
||||
blk; CJK_Ext_I ; CJK_Unified_Ideographs_Extension_I
|
||||
blk; CJK_Radicals_Sup ; CJK_Radicals_Supplement
|
||||
blk; CJK_Strokes ; CJK_Strokes
|
||||
blk; CJK_Symbols ; CJK_Symbols_And_Punctuation
|
||||
@ -817,6 +819,21 @@ IDSB; Y ; Yes ; T
|
||||
IDST; N ; No ; F ; False
|
||||
IDST; Y ; Yes ; T ; True
|
||||
|
||||
# IDS_Unary_Operator (IDSU)
|
||||
|
||||
IDSU; N ; No ; F ; False
|
||||
IDSU; Y ; Yes ; T ; True
|
||||
|
||||
# ID_Compat_Math_Continue (ID_Compat_Math_Continue)
|
||||
|
||||
ID_Compat_Math_Continue; N ; No ; F ; False
|
||||
ID_Compat_Math_Continue; Y ; Yes ; T ; True
|
||||
|
||||
# ID_Compat_Math_Start (ID_Compat_Math_Start)
|
||||
|
||||
ID_Compat_Math_Start; N ; No ; F ; False
|
||||
ID_Compat_Math_Start; Y ; Yes ; T ; True
|
||||
|
||||
# ID_Continue (IDC)
|
||||
|
||||
IDC; N ; No ; F ; False
|
||||
@ -836,6 +853,13 @@ IDS; Y ; Yes ; T
|
||||
Ideo; N ; No ; F ; False
|
||||
Ideo; Y ; Yes ; T ; True
|
||||
|
||||
# Indic_Conjunct_Break (InCB)
|
||||
|
||||
InCB; Consonant ; Consonant
|
||||
InCB; Extend ; Extend
|
||||
InCB; Linker ; Linker
|
||||
InCB; None ; None
|
||||
|
||||
# Indic_Positional_Category (InPC)
|
||||
|
||||
InPC; Bottom ; Bottom
|
||||
@ -1074,7 +1098,10 @@ jt ; U ; Non_Joining
|
||||
# Line_Break (lb)
|
||||
|
||||
lb ; AI ; Ambiguous
|
||||
lb ; AK ; Aksara
|
||||
lb ; AL ; Alphabetic
|
||||
lb ; AP ; Aksara_Prebase
|
||||
lb ; AS ; Aksara_Start
|
||||
lb ; B2 ; Break_Both
|
||||
lb ; BA ; Break_After
|
||||
lb ; BB ; Break_Before
|
||||
@ -1112,6 +1139,8 @@ lb ; SA ; Complex_Context
|
||||
lb ; SG ; Surrogate
|
||||
lb ; SP ; Space
|
||||
lb ; SY ; Break_Symbols
|
||||
lb ; VF ; Virama_Final
|
||||
lb ; VI ; Virama
|
||||
lb ; WJ ; Word_Joiner
|
||||
lb ; XX ; Unknown
|
||||
lb ; ZW ; ZWSpace
|
||||
@ -1156,6 +1185,9 @@ NFKC_QC; M ; Maybe
|
||||
NFKC_QC; N ; No
|
||||
NFKC_QC; Y ; Yes
|
||||
|
||||
# NFKC_Simple_Casefold (NFKC_SCF)
|
||||
|
||||
|
||||
# NFKD_Quick_Check (NFKD_QC)
|
||||
|
||||
NFKD_QC; N ; No
|
||||
|
@ -1,6 +1,6 @@
|
||||
# Unicode Character Database
|
||||
# Date: 2022-09-02
|
||||
# Copyright (c) 2022 Unicode, Inc.
|
||||
# Date: 2023-08-28
|
||||
# Copyright (c) 2023 Unicode, Inc.
|
||||
# Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries.
|
||||
# For terms of use, see https://www.unicode.org/terms_of_use.html
|
||||
#
|
||||
@ -10,7 +10,7 @@
|
||||
# UAX #44, "Unicode Character Database"
|
||||
# UTS #51, "Unicode Emoji"
|
||||
#
|
||||
# The UAXes and UTS #51 can be accessed at https://www.unicode.org/versions/Unicode15.0.0/
|
||||
# The UAXes and UTS #51 can be accessed at https://www.unicode.org/versions/Unicode15.1.0/
|
||||
|
||||
This directory contains the final data files
|
||||
for the Unicode Character Database, for Version 15.0.0 of the Unicode Standard.
|
||||
This directory contains final data files
|
||||
for the Unicode Character Database, for Version 15.1.0 of the Unicode Standard.
|
||||
|
@ -1,6 +1,6 @@
|
||||
# Scripts-15.0.0.txt
|
||||
# Date: 2022-04-26, 23:15:02 GMT
|
||||
# Copyright (c) 2022 Unicode, Inc.
|
||||
# Scripts-15.1.0.txt
|
||||
# Date: 2023-07-28, 16:01:07 GMT
|
||||
# Copyright (c) 2023 Unicode, Inc.
|
||||
# Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries.
|
||||
# For terms of use, see https://www.unicode.org/terms_of_use.html
|
||||
#
|
||||
@ -357,7 +357,7 @@
|
||||
2E5B ; Common # Ps BOTTOM HALF LEFT PARENTHESIS
|
||||
2E5C ; Common # Pe BOTTOM HALF RIGHT PARENTHESIS
|
||||
2E5D ; Common # Pd OBLIQUE HYPHEN
|
||||
2FF0..2FFB ; Common # So [12] IDEOGRAPHIC DESCRIPTION CHARACTER LEFT TO RIGHT..IDEOGRAPHIC DESCRIPTION CHARACTER OVERLAID
|
||||
2FF0..2FFF ; Common # So [16] IDEOGRAPHIC DESCRIPTION CHARACTER LEFT TO RIGHT..IDEOGRAPHIC DESCRIPTION CHARACTER ROTATION
|
||||
3000 ; Common # Zs IDEOGRAPHIC SPACE
|
||||
3001..3003 ; Common # Po [3] IDEOGRAPHIC COMMA..DITTO MARK
|
||||
3004 ; Common # So JAPANESE INDUSTRIAL STANDARD SYMBOL
|
||||
@ -399,6 +399,7 @@
|
||||
3192..3195 ; Common # No [4] IDEOGRAPHIC ANNOTATION ONE MARK..IDEOGRAPHIC ANNOTATION FOUR MARK
|
||||
3196..319F ; Common # So [10] IDEOGRAPHIC ANNOTATION TOP MARK..IDEOGRAPHIC ANNOTATION MAN MARK
|
||||
31C0..31E3 ; Common # So [36] CJK STROKE T..CJK STROKE Q
|
||||
31EF ; Common # So IDEOGRAPHIC DESCRIPTION CHARACTER SUBTRACTION
|
||||
3220..3229 ; Common # No [10] PARENTHESIZED IDEOGRAPH ONE..PARENTHESIZED IDEOGRAPH TEN
|
||||
322A..3247 ; Common # So [30] PARENTHESIZED IDEOGRAPH MOON..CIRCLED IDEOGRAPH KOTO
|
||||
3248..324F ; Common # No [8] CIRCLED NUMBER TEN ON BLACK SQUARE..CIRCLED NUMBER EIGHTY ON BLACK SQUARE
|
||||
@ -629,7 +630,7 @@ FFFC..FFFD ; Common # So [2] OBJECT REPLACEMENT CHARACTER..REPLACEMENT CHAR
|
||||
E0001 ; Common # Cf LANGUAGE TAG
|
||||
E0020..E007F ; Common # Cf [96] TAG SPACE..CANCEL TAG
|
||||
|
||||
# Total code points: 8301
|
||||
# Total code points: 8306
|
||||
|
||||
# ================================================
|
||||
|
||||
@ -1593,11 +1594,12 @@ FA70..FAD9 ; Han # Lo [106] CJK COMPATIBILITY IDEOGRAPH-FA70..CJK COMPATIBILI
|
||||
2B740..2B81D ; Han # Lo [222] CJK UNIFIED IDEOGRAPH-2B740..CJK UNIFIED IDEOGRAPH-2B81D
|
||||
2B820..2CEA1 ; Han # Lo [5762] CJK UNIFIED IDEOGRAPH-2B820..CJK UNIFIED IDEOGRAPH-2CEA1
|
||||
2CEB0..2EBE0 ; Han # Lo [7473] CJK UNIFIED IDEOGRAPH-2CEB0..CJK UNIFIED IDEOGRAPH-2EBE0
|
||||
2EBF0..2EE5D ; Han # Lo [622] CJK UNIFIED IDEOGRAPH-2EBF0..CJK UNIFIED IDEOGRAPH-2EE5D
|
||||
2F800..2FA1D ; Han # Lo [542] CJK COMPATIBILITY IDEOGRAPH-2F800..CJK COMPATIBILITY IDEOGRAPH-2FA1D
|
||||
30000..3134A ; Han # Lo [4939] CJK UNIFIED IDEOGRAPH-30000..CJK UNIFIED IDEOGRAPH-3134A
|
||||
31350..323AF ; Han # Lo [4192] CJK UNIFIED IDEOGRAPH-31350..CJK UNIFIED IDEOGRAPH-323AF
|
||||
|
||||
# Total code points: 98408
|
||||
# Total code points: 99030
|
||||
|
||||
# ================================================
|
||||
|
||||
|
@ -1,6 +1,6 @@
|
||||
# SpecialCasing-15.0.0.txt
|
||||
# Date: 2022-02-02, 23:35:52 GMT
|
||||
# Copyright (c) 2022 Unicode, Inc.
|
||||
# SpecialCasing-15.1.0.txt
|
||||
# Date: 2023-01-05, 20:35:03 GMT
|
||||
# Copyright (c) 2023 Unicode, Inc.
|
||||
# Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries.
|
||||
# For terms of use, see https://www.unicode.org/terms_of_use.html
|
||||
#
|
||||
|
@ -11231,6 +11231,10 @@
|
||||
2FF9;IDEOGRAPHIC DESCRIPTION CHARACTER SURROUND FROM UPPER RIGHT;So;0;ON;;;;;N;;;;;
|
||||
2FFA;IDEOGRAPHIC DESCRIPTION CHARACTER SURROUND FROM LOWER LEFT;So;0;ON;;;;;N;;;;;
|
||||
2FFB;IDEOGRAPHIC DESCRIPTION CHARACTER OVERLAID;So;0;ON;;;;;N;;;;;
|
||||
2FFC;IDEOGRAPHIC DESCRIPTION CHARACTER SURROUND FROM RIGHT;So;0;ON;;;;;N;;;;;
|
||||
2FFD;IDEOGRAPHIC DESCRIPTION CHARACTER SURROUND FROM LOWER RIGHT;So;0;ON;;;;;N;;;;;
|
||||
2FFE;IDEOGRAPHIC DESCRIPTION CHARACTER HORIZONTAL REFLECTION;So;0;ON;;;;;N;;;;;
|
||||
2FFF;IDEOGRAPHIC DESCRIPTION CHARACTER ROTATION;So;0;ON;;;;;N;;;;;
|
||||
3000;IDEOGRAPHIC SPACE;Zs;0;WS;<wide> 0020;;;;N;;;;;
|
||||
3001;IDEOGRAPHIC COMMA;Po;0;ON;;;;;N;;;;;
|
||||
3002;IDEOGRAPHIC FULL STOP;Po;0;ON;;;;;N;IDEOGRAPHIC PERIOD;;;;
|
||||
@ -11705,6 +11709,7 @@
|
||||
31E1;CJK STROKE HZZZG;So;0;ON;;;;;N;;;;;
|
||||
31E2;CJK STROKE PG;So;0;ON;;;;;N;;;;;
|
||||
31E3;CJK STROKE Q;So;0;ON;;;;;N;;;;;
|
||||
31EF;IDEOGRAPHIC DESCRIPTION CHARACTER SUBTRACTION;So;0;ON;;;;;N;;;;;
|
||||
31F0;KATAKANA LETTER SMALL KU;Lo;0;L;;;;;N;;;;;
|
||||
31F1;KATAKANA LETTER SMALL SI;Lo;0;L;;;;;N;;;;;
|
||||
31F2;KATAKANA LETTER SMALL SU;Lo;0;L;;;;;N;;;;;
|
||||
@ -34035,6 +34040,8 @@ FFFD;REPLACEMENT CHARACTER;So;0;ON;;;;;N;;;;;
|
||||
2CEA1;<CJK Ideograph Extension E, Last>;Lo;0;L;;;;;N;;;;;
|
||||
2CEB0;<CJK Ideograph Extension F, First>;Lo;0;L;;;;;N;;;;;
|
||||
2EBE0;<CJK Ideograph Extension F, Last>;Lo;0;L;;;;;N;;;;;
|
||||
2EBF0;<CJK Ideograph Extension I, First>;Lo;0;L;;;;;N;;;;;
|
||||
2EE5D;<CJK Ideograph Extension I, Last>;Lo;0;L;;;;;N;;;;;
|
||||
2F800;CJK COMPATIBILITY IDEOGRAPH-2F800;Lo;0;L;4E3D;;;;N;;;;;
|
||||
2F801;CJK COMPATIBILITY IDEOGRAPH-2F801;Lo;0;L;4E38;;;;N;;;;;
|
||||
2F802;CJK COMPATIBILITY IDEOGRAPH-2F802;Lo;0;L;4E41;;;;N;;;;;
|
||||
|
@ -1,6 +1,6 @@
|
||||
# GraphemeBreakProperty-15.0.0.txt
|
||||
# Date: 2022-04-27, 17:07:38 GMT
|
||||
# Copyright (c) 2022 Unicode, Inc.
|
||||
# GraphemeBreakProperty-15.1.0.txt
|
||||
# Date: 2023-01-05, 20:34:41 GMT
|
||||
# Copyright (c) 2023 Unicode, Inc.
|
||||
# Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries.
|
||||
# For terms of use, see https://www.unicode.org/terms_of_use.html
|
||||
#
|
||||
|
File diff suppressed because it is too large
Load Diff
@ -1,11 +1,11 @@
|
||||
# emoji-data.txt
|
||||
# Date: 2022-08-02, 00:26:10 GMT
|
||||
# Copyright (c) 2022 Unicode, Inc.
|
||||
# Date: 2023-02-01, 02:22:54 GMT
|
||||
# Copyright (c) 2023 Unicode, Inc.
|
||||
# Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries.
|
||||
# For terms of use, see https://www.unicode.org/terms_of_use.html
|
||||
#
|
||||
# Emoji Data for UTS #51
|
||||
# Used with Emoji Version 15.0 and subsequent minor revisions (if any)
|
||||
# Used with Emoji Version 15.1 and subsequent minor revisions (if any)
|
||||
#
|
||||
# For documentation and usage, see https://www.unicode.org/reports/tr51
|
||||
#
|
||||
|
@ -1,103 +1,104 @@
|
||||
## The Unicode Standard, Unicode Character Database, Version 15.0.0
|
||||
## The Unicode Standard, Unicode Character Database, Version 15.1.0
|
||||
|
||||
### Unicode Character Database
|
||||
```
|
||||
|
||||
UNICODE, INC. LICENSE AGREEMENT - DATA FILES AND SOFTWARE
|
||||
|
||||
See Terms of Use for definitions of Unicode Inc.'s
|
||||
Data Files and Software.
|
||||
|
||||
NOTICE TO USER: Carefully read the following legal agreement.
|
||||
BY DOWNLOADING, INSTALLING, COPYING OR OTHERWISE USING UNICODE INC.'S
|
||||
DATA FILES ("DATA FILES"), AND/OR SOFTWARE ("SOFTWARE"),
|
||||
YOU UNEQUIVOCALLY ACCEPT, AND AGREE TO BE BOUND BY, ALL OF THE
|
||||
TERMS AND CONDITIONS OF THIS AGREEMENT.
|
||||
IF YOU DO NOT AGREE, DO NOT DOWNLOAD, INSTALL, COPY, DISTRIBUTE OR USE
|
||||
THE DATA FILES OR SOFTWARE.
|
||||
UNICODE LICENSE V3
|
||||
|
||||
COPYRIGHT AND PERMISSION NOTICE
|
||||
|
||||
Copyright © 1991-2022 Unicode, Inc. All rights reserved.
|
||||
Distributed under the Terms of Use in https://www.unicode.org/copyright.html.
|
||||
Copyright © 1991-2023 Unicode, Inc.
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining
|
||||
a copy of the Unicode data files and any associated documentation
|
||||
(the "Data Files") or Unicode software and any associated documentation
|
||||
(the "Software") to deal in the Data Files or Software
|
||||
without restriction, including without limitation the rights to use,
|
||||
copy, modify, merge, publish, distribute, and/or sell copies of
|
||||
the Data Files or Software, and to permit persons to whom the Data Files
|
||||
or Software are furnished to do so, provided that either
|
||||
(a) this copyright and permission notice appear with all copies
|
||||
of the Data Files or Software, or
|
||||
(b) this copyright and permission notice appear in associated
|
||||
Documentation.
|
||||
NOTICE TO USER: Carefully read the following legal agreement. BY
|
||||
DOWNLOADING, INSTALLING, COPYING OR OTHERWISE USING DATA FILES, AND/OR
|
||||
SOFTWARE, YOU UNEQUIVOCALLY ACCEPT, AND AGREE TO BE BOUND BY, ALL OF THE
|
||||
TERMS AND CONDITIONS OF THIS AGREEMENT. IF YOU DO NOT AGREE, DO NOT
|
||||
DOWNLOAD, INSTALL, COPY, DISTRIBUTE OR USE THE DATA FILES OR SOFTWARE.
|
||||
|
||||
THE DATA FILES AND SOFTWARE ARE PROVIDED "AS IS", WITHOUT WARRANTY OF
|
||||
ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE
|
||||
WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
NONINFRINGEMENT OF THIRD PARTY RIGHTS.
|
||||
IN NO EVENT SHALL THE COPYRIGHT HOLDER OR HOLDERS INCLUDED IN THIS
|
||||
NOTICE BE LIABLE FOR ANY CLAIM, OR ANY SPECIAL INDIRECT OR CONSEQUENTIAL
|
||||
DAMAGES, OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE,
|
||||
DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER
|
||||
TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
|
||||
PERFORMANCE OF THE DATA FILES OR SOFTWARE.
|
||||
Permission is hereby granted, free of charge, to any person obtaining a
|
||||
copy of data files and any associated documentation (the "Data Files") or
|
||||
software and any associated documentation (the "Software") to deal in the
|
||||
Data Files or Software without restriction, including without limitation
|
||||
the rights to use, copy, modify, merge, publish, distribute, and/or sell
|
||||
copies of the Data Files or Software, and to permit persons to whom the
|
||||
Data Files or Software are furnished to do so, provided that either (a)
|
||||
this copyright and permission notice appear with all copies of the Data
|
||||
Files or Software, or (b) this copyright and permission notice appear in
|
||||
associated Documentation.
|
||||
|
||||
Except as contained in this notice, the name of a copyright holder
|
||||
shall not be used in advertising or otherwise to promote the sale,
|
||||
use or other dealings in these Data Files or Software without prior
|
||||
written authorization of the copyright holder.
|
||||
THE DATA FILES AND SOFTWARE ARE PROVIDED "AS IS", WITHOUT WARRANTY OF ANY
|
||||
KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
||||
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT OF
|
||||
THIRD PARTY RIGHTS.
|
||||
|
||||
=== http://www.unicode.org/copyright.html content ===
|
||||
Unicode (R) Copyright and Terms of Use
|
||||
IN NO EVENT SHALL THE COPYRIGHT HOLDER OR HOLDERS INCLUDED IN THIS NOTICE
|
||||
BE LIABLE FOR ANY CLAIM, OR ANY SPECIAL INDIRECT OR CONSEQUENTIAL DAMAGES,
|
||||
OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS,
|
||||
WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION,
|
||||
ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THE DATA
|
||||
FILES OR SOFTWARE.
|
||||
|
||||
Except as contained in this notice, the name of a copyright holder shall
|
||||
not be used in advertising or otherwise to promote the sale, use or other
|
||||
dealings in these Data Files or Software without prior written
|
||||
authorization of the copyright holder.
|
||||
|
||||
|
||||
------------------
|
||||
|
||||
Unicode® Copyright and Terms of Use
|
||||
For the general privacy policy governing access to this site, see the Unicode Privacy Policy.
|
||||
|
||||
Unicode Copyright
|
||||
Copyright (C) 1991-2022 Unicode, Inc. All rights reserved.
|
||||
Definitions
|
||||
Unicode Data Files ("DATA FILES") include all data files under the directories:
|
||||
https://www.unicode.org/Public/
|
||||
https://www.unicode.org/reports/
|
||||
https://www.unicode.org/ivd/data/
|
||||
A. Unicode Copyright
|
||||
Copyright © 1991-2023 Unicode, Inc. All rights reserved.
|
||||
|
||||
Unicode Data Files do not include PDF online code charts under the directory:
|
||||
https://www.unicode.org/Public/
|
||||
B. Definitions
|
||||
Unicode Data Files ("DATA FILES") include all data files under the directories:
|
||||
https://www.unicode.org/Public/
|
||||
https://www.unicode.org/reports/
|
||||
https://www.unicode.org/ivd/data/
|
||||
|
||||
Unicode Software ("SOFTWARE") includes any source code published in the Unicode Standard
|
||||
or any source code or compiled code under the directories:
|
||||
https://www.unicode.org/Public/PROGRAMS/
|
||||
https://www.unicode.org/Public/cldr/
|
||||
http://site.icu-project.org/download/
|
||||
Terms of Use
|
||||
Certain documents and files on this website contain a legend indicating that "Modification is permitted." Any person is hereby authorized, without fee, to modify such documents and files to create derivative works conforming to the Unicode® Standard, subject to Terms and Conditions herein.
|
||||
Any person is hereby authorized, without fee, to view, use, reproduce, and distribute all documents and files, subject to the Terms and Conditions herein.
|
||||
Further specifications of rights and restrictions pertaining to the use of the Unicode DATA FILES and SOFTWARE can be found in the Unicode Data Files and Software License.
|
||||
Each version of the Unicode Standard has further specifications of rights and restrictions of use. For the book editions (Unicode 5.0 and earlier), these are found on the back of the title page.
|
||||
The Unicode PDF online code charts carry specific restrictions. Those restrictions are incorporated as the first page of each PDF code chart.
|
||||
All other files, including online documentation of the core specification for Unicode 6.0 and later, are covered under these general Terms of Use.
|
||||
No license is granted to "mirror" the Unicode website where a fee is charged for access to the "mirror" site.
|
||||
Modification is not permitted with respect to this document. All copies of this document must be verbatim.
|
||||
Restricted Rights Legend
|
||||
Any technical data or software which is licensed to the United States of America, its agencies and/or instrumentalities under this Agreement is commercial technical data or commercial computer software developed exclusively at private expense as defined in FAR 2.101, or DFARS 252.227-7014 (June 1995), as applicable. For technical data, use, duplication, or disclosure by the Government is subject to restrictions as set forth in DFARS 202.227-7015 Technical Data, Commercial and Items (Nov 1995) and this Agreement. For Software, in accordance with FAR 12-212 or DFARS 227-7202, as applicable, use, duplication or disclosure by the Government is subject to the restrictions set forth in this Agreement.
|
||||
Warranties and Disclaimers
|
||||
This publication and/or website may include technical or typographical errors or other inaccuracies. Changes are periodically added to the information herein; these changes will be incorporated in new editions of the publication and/or website. Unicode, Inc. may make improvements and/or changes in the product(s) and/or program(s) described in this publication and/or website at any time.
|
||||
If this file has been purchased on magnetic or optical media from Unicode, Inc. the sole and exclusive remedy for any claim will be exchange of the defective media within ninety (90) days of original purchase.
|
||||
EXCEPT AS PROVIDED IN SECTION E.2, THIS PUBLICATION AND/OR SOFTWARE IS PROVIDED "AS IS" WITHOUT WARRANTY OF ANY KIND EITHER EXPRESS, IMPLIED, OR STATUTORY, INCLUDING, BUT NOT LIMITED TO, ANY WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, OR NON-INFRINGEMENT. UNICODE, INC. AND ITS LICENSORS ASSUME NO RESPONSIBILITY FOR ERRORS OR OMISSIONS IN THIS PUBLICATION AND/OR SOFTWARE OR OTHER DOCUMENTS WHICH ARE REFERENCED BY OR LINKED TO THIS PUBLICATION OR THE UNICODE WEBSITE.
|
||||
Waiver of Damages
|
||||
In no event shall Unicode, Inc. or its licensors be liable for any special, incidental, indirect or consequential damages of any kind, or any damages whatsoever, whether or not Unicode, Inc. was advised of the possibility of the damage, including, without limitation, those resulting from the following: loss of use, data or profits, in connection with the use, modification or distribution of this information or its derivatives.
|
||||
Trademarks & Logos
|
||||
The Unicode Word Mark and the Unicode Logo are trademarks of Unicode, Inc. “The Unicode Consortium” and “Unicode, Inc.” are trade names of Unicode, Inc. Use of the information and materials found on this website indicates your acknowledgement of Unicode, Inc.’s exclusive worldwide rights in the Unicode Word Mark, the Unicode Logo, and the Unicode trade names.
|
||||
The Unicode Consortium Name and Trademark Usage Policy (“Trademark Policy”) are incorporated herein by reference and you agree to abide by the provisions of the Trademark Policy, which may be changed from time to time in the sole discretion of Unicode, Inc.
|
||||
All third party trademarks referenced herein are the property of their respective owners.
|
||||
Miscellaneous
|
||||
Jurisdiction and Venue. This website is operated from a location in the State of California, United States of America. Unicode, Inc. makes no representation that the materials are appropriate for use in other locations. If you access this website from other locations, you are responsible for compliance with local laws. This Agreement, all use of this website and any claims and damages resulting from use of this website are governed solely by the laws of the State of California without regard to any principles which would apply the laws of a different jurisdiction. The user agrees that any disputes regarding this website shall be resolved solely in the courts located in Santa Clara County, California. The user agrees said courts have personal jurisdiction and agree to waive any right to transfer the dispute to any other forum.
|
||||
Modification by Unicode, Inc. Unicode, Inc. shall have the right to modify this Agreement at any time by posting it to this website. The user may not assign any part of this Agreement without Unicode, Inc.’s prior written consent.
|
||||
Taxes. The user agrees to pay any taxes arising from access to this website or use of the information herein, except for those based on Unicode’s net income.
|
||||
Severability. If any provision of this Agreement is declared invalid or unenforceable, the remaining provisions of this Agreement shall remain in effect.
|
||||
Entire Agreement. This Agreement constitutes the entire agreement between the parties.
|
||||
Unicode Data Files do not include PDF online code charts under the directory:
|
||||
https://www.unicode.org/Public/
|
||||
|
||||
Unicode Software ("SOFTWARE") includes any source code published in the Unicode Standard
|
||||
or any source code or compiled code under the directories:
|
||||
https://www.unicode.org/Public/PROGRAMS/
|
||||
https://www.unicode.org/Public/cldr/
|
||||
http://site.icu-project.org/download/
|
||||
|
||||
C. Terms of Use
|
||||
1. Certain documents and files on this website contain a legend indicating that "Modification is permitted." Any person is hereby authorized, without fee, to modify such documents and files to create derivative works conforming to the Unicode® Standard, subject to Terms and Conditions herein.
|
||||
2. Any person is hereby authorized, without fee, to view, use, reproduce, and distribute all documents and files, subject to the Terms and Conditions herein.
|
||||
3. Further specifications of rights and restrictions pertaining to the use of the Unicode DATA FILES and SOFTWARE can be found in the Unicode Data Files and Software License.
|
||||
4. Each version of the Unicode Standard has further specifications of rights and restrictions of use. For the book editions (Unicode 5.0 and earlier), these are found on the back of the title page.
|
||||
5. The Unicode PDF online code charts carry specific restrictions. Those restrictions are incorporated as the first page of each PDF code chart.
|
||||
6. All other files, including online documentation of the core specification for Unicode 6.0 and later, are covered under these general Terms of Use.
|
||||
7. No license is granted to "mirror" the Unicode website where a fee is charged for access to the "mirror" site.
|
||||
8. Modification is not permitted with respect to this document. All copies of this document must be verbatim.
|
||||
|
||||
D. Restricted Rights Legend
|
||||
1. Any technical data or software which is licensed to the United States of America, its agencies and/or instrumentalities under this Agreement is commercial technical data or commercial computer software developed exclusively at private expense as defined in FAR 2.101, or DFARS 252.227-7014 (June 1995), as applicable. For technical data, use, duplication, or disclosure by the Government is subject to restrictions as set forth in DFARS 202.227-7015 Technical Data, Commercial and Items (Nov 1995) and this Agreement. For Software, in accordance with FAR 12-212 or DFARS 227-7202, as applicable, use, duplication or disclosure by the Government is subject to the restrictions set forth in this Agreement.
|
||||
|
||||
E. Warranties and Disclaimers
|
||||
1. This publication and/or website may include technical or typographical errors or other inaccuracies. Changes are periodically added to the information herein; these changes will be incorporated in new editions of the publication and/or website. Unicode, Inc. may make improvements and/or changes in the product(s) and/or program(s) described in this publication and/or website at any time.
|
||||
2. If this file has been purchased on magnetic or optical media from Unicode, Inc. the sole and exclusive remedy for any claim will be exchange of the defective media within ninety (90) days of original purchase.
|
||||
3. EXCEPT AS PROVIDED IN SECTION E.2, THIS PUBLICATION AND/OR SOFTWARE IS PROVIDED "AS IS" WITHOUT WARRANTY OF ANY KIND EITHER EXPRESS, IMPLIED, OR STATUTORY, INCLUDING, BUT NOT LIMITED TO, ANY WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, OR NON-INFRINGEMENT. UNICODE, INC. AND ITS LICENSORS ASSUME NO RESPONSIBILITY FOR ERRORS OR OMISSIONS IN THIS PUBLICATION AND/OR SOFTWARE OR OTHER DOCUMENTS WHICH ARE REFERENCED BY OR LINKED TO THIS PUBLICATION OR THE UNICODE WEBSITE.
|
||||
|
||||
F. Waiver of Damages
|
||||
1. In no event shall Unicode, Inc. or its licensors be liable for any special, incidental, indirect or consequential damages of any kind, or any damages whatsoever, whether or not Unicode, Inc. was advised of the possibility of the damage, including, without limitation, those resulting from the following: loss of use, data or profits, in connection with the use, modification or distribution of this information or its derivatives.
|
||||
|
||||
G. Trademarks & Logos
|
||||
1. The Unicode Word Mark and the Unicode Logo are trademarks of Unicode, Inc. "The Unicode Consortium" and "Unicode, Inc." are trade names of Unicode, Inc. Use of the information and materials found on this website indicates your acknowledgement of Unicode, Inc.'s exclusive worldwide rights in the Unicode Word Mark, the Unicode Logo, and the Unicode trade names.
|
||||
2. The Unicode Consortium Name and Trademark Usage Policy ("Trademark Policy") are incorporated herein by reference and you agree to abide by the provisions of the Trademark Policy, which may be changed from time to time in the sole discretion of Unicode, Inc.
|
||||
All third party trademarks referenced herein are the property of their respective owners.
|
||||
|
||||
H. Miscellaneous
|
||||
1. Jurisdiction and Venue. This website is operated from a location in the State of California, United States of America. Unicode, Inc. makes no representation that the materials are appropriate for use in other locations. If you access this website from other locations, you are responsible for compliance with local laws. This Agreement, all use of this website and any claims and damages resulting from use of this website are governed solely by the laws of the State of California without regard to any principles which would apply the laws of a different jurisdiction. The user agrees that any disputes regarding this website shall be resolved solely in the courts located in Santa Clara County, California. The user agrees said courts have personal jurisdiction and agree to waive any right to transfer the dispute to any other forum.
|
||||
2. Modification by Unicode, Inc. Unicode, Inc. shall have the right to modify this Agreement at any time by posting it to this website. The user may not assign any part of this Agreement without Unicode, Inc.'s prior written consent.
|
||||
3. Taxes. The user agrees to pay any taxes arising from access to this website or use of the information herein, except for those based on Unicode's net income.
|
||||
4. Severability. If any provision of this Agreement is declared invalid or unenforceable, the remaining provisions of this Agreement shall remain in effect.
|
||||
5. Entire Agreement. This Agreement constitutes the entire agreement between the parties.
|
||||
|
||||
```
|
||||
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) 2018, 2019, Oracle and/or its affiliates. All rights reserved.
|
||||
* Copyright (c) 2018, 2023, Oracle and/or its affiliates. All rights reserved.
|
||||
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
||||
*
|
||||
* This code is free software; you can redistribute it and/or modify it
|
||||
@ -23,7 +23,7 @@
|
||||
|
||||
/*
|
||||
* @test
|
||||
* @bug 8202771 8221431 8229831
|
||||
* @bug 8202771 8221431 8229831 8296246
|
||||
* @summary Check j.l.Character.isDigit/isLetter/isLetterOrDigit/isSpaceChar
|
||||
* /isWhitespace/isTitleCase/isISOControl/isIdentifierIgnorable
|
||||
* /isJavaIdentifierStart/isJavaIdentifierPart/isUnicodeIdentifierStart
|
||||
@ -292,7 +292,11 @@ public class CharPropTest {
|
||||
return codePoint == 0x00B7 ||
|
||||
codePoint == 0x0387 ||
|
||||
(codePoint >= 0x1369 && codePoint <= 0x1371) ||
|
||||
codePoint == 0x19DA;
|
||||
codePoint == 0x19DA ||
|
||||
codePoint == 0x200C ||
|
||||
codePoint == 0x200D ||
|
||||
codePoint == 0x30FB ||
|
||||
codePoint == 0xFF65;
|
||||
}
|
||||
|
||||
private static void printDiff(int codePoint, String method, boolean actual, boolean expected) {
|
||||
|
Loading…
x
Reference in New Issue
Block a user