8221431: Support for Unicode 12.1

Reviewed-by: erikj, rriggs
This commit is contained in:
Naoto Sato 2019-05-23 12:21:21 -07:00
parent e4f31b1cd7
commit 93fabcdc5a
66 changed files with 60279 additions and 38178 deletions

View File

@ -39,7 +39,7 @@ $(eval $(call IncludeCustomExtension, CompileJavaModules.gmk))
# Module specific build settings
java.base_ADD_JAVAC_FLAGS += -Xdoclint:all/protected,-reference,-accessibility '-Xdoclint/package:java.*,javax.*' -XDstringConcat=inline
java.base_COPY += .icu .dat .spp content-types.properties hijrah-config-islamic-umalqura.properties
java.base_COPY += .icu .dat .spp .nrm content-types.properties hijrah-config-islamic-umalqura.properties
java.base_CLEAN += intrinsic.properties
java.base_EXCLUDE_FILES += \

View File

@ -1,5 +1,5 @@
#
# Copyright (c) 2011, 2018, Oracle and/or its affiliates. All rights reserved.
# Copyright (c) 2011, 2019, Oracle and/or its affiliates. All rights reserved.
# DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
#
# This code is free software; you can redistribute it and/or modify it
@ -73,6 +73,9 @@ TOOL_BLACKLISTED_CERTS = $(JAVA_SMALL) -cp $(BUILDTOOLS_OUTPUTDIR)/jdk_tools_cla
TOOL_MAKEJAVASECURITY = $(JAVA_SMALL) -cp $(BUILDTOOLS_OUTPUTDIR)/jdk_tools_classes \
build.tools.makejavasecurity.MakeJavaSecurity
TOOL_GENERATEEMOJIDATA = $(JAVA_SMALL) -cp $(BUILDTOOLS_OUTPUTDIR)/jdk_tools_classes \
build.tools.generateemojidata.GenerateEmojiData
# TODO: There are references to the jdwpgen.jar in jdk/make/netbeans/jdwpgen/build.xml
# and nbproject/project.properties in the same dir. Needs to be looked at.

View File

@ -1,5 +1,5 @@
/*
* Copyright (c) 2003, 2018, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2003, 2019, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@ -365,6 +365,8 @@ class CharacterData00 extends CharacterData {
case 0xA7B1: mapChar = 0x0287; break;
case 0xA7B2: mapChar = 0x029D; break;
case 0xA7B3: mapChar = 0xAB53; break;
case 0xA7C5: mapChar = 0x0282; break;
case 0xA7C6: mapChar = 0x1D8E; break;
// default mapChar is already set, so no
// need to redo it here.
// default : mapChar = ch;
@ -400,6 +402,7 @@ class CharacterData00 extends CharacterData {
case 0x026C: mapChar = 0xA7AD; break;
case 0x0271: mapChar = 0x2C6E; break;
case 0x027D: mapChar = 0x2C64; break;
case 0x0282: mapChar = 0xA7C5; break;
case 0x0287: mapChar = 0xA7B1; break;
case 0x029D: mapChar = 0xA7B2; break;
case 0x029E: mapChar = 0xA7B0; break;
@ -460,6 +463,7 @@ class CharacterData00 extends CharacterData {
case 0x1C88: mapChar = 0xA64A; break;
case 0x1D79: mapChar = 0xA77D; break;
case 0x1D7D: mapChar = 0x2C63; break;
case 0x1D8E: mapChar = 0xA7C6; break;
case 0x1F80: mapChar = 0x1F88; break;
case 0x1F81: mapChar = 0x1F89; break;
case 0x1F82: mapChar = 0x1F8A; break;
@ -854,6 +858,7 @@ class CharacterData00 extends CharacterData {
case 0x026C: mapChar = 0xA7AD; break;
case 0x0271: mapChar = 0x2C6E; break;
case 0x027D: mapChar = 0x2C64; break;
case 0x0282: mapChar = 0xA7C5; break;
case 0x0287: mapChar = 0xA7B1; break;
case 0x029D: mapChar = 0xA7B2; break;
case 0x029E: mapChar = 0xA7B0; break;
@ -914,6 +919,7 @@ class CharacterData00 extends CharacterData {
case 0x1C88: mapChar = 0xA64A; break;
case 0x1D79: mapChar = 0xA77D; break;
case 0x1D7D: mapChar = 0x2C63; break;
case 0x1D8E: mapChar = 0xA7C6; break;
case 0x1FBE: mapChar = 0x0399; break;
case 0x2C65: mapChar = 0x023A; break;
case 0x2C66: mapChar = 0x023E; break;

View File

@ -1,5 +1,5 @@
/*
* Copyright (c) 2003, 2018, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2003, 2019, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@ -433,22 +433,59 @@ class CharacterData01 extends CharacterData {
case 0X1EC92: retval = 7000; break; // INDIC SIYAQ NUMBER SEVEN THOUSAND
case 0X1EC93: retval = 8000; break; // INDIC SIYAQ NUMBER EIGHT THOUSAND
case 0X1EC94: retval = 9000; break; // INDIC SIYAQ NUMBER NINE THOUSAND
case 0X1EC95: retval = 10000; break; // INDIC SIYAQ NUMBER TEN THOUSAND
case 0X1EC96: retval = 20000; break; // INDIC SIYAQ NUMBER TWENTY THOUSAND
case 0X1EC97: retval = 30000; break; // INDIC SIYAQ NUMBER THIRTY THOUSAND
case 0X1EC98: retval = 40000; break; // INDIC SIYAQ NUMBER FORTY THOUSAND
case 0X1EC99: retval = 50000; break; // INDIC SIYAQ NUMBER FIFTY THOUSAND
case 0X1EC9A: retval = 60000; break; // INDIC SIYAQ NUMBER SIXTY THOUSAND
case 0X1EC9B: retval = 70000; break; // INDIC SIYAQ NUMBER SEVENTY THOUSAND
case 0X1EC9C: retval = 80000; break; // INDIC SIYAQ NUMBER EIGHTY THOUSAND
case 0X1EC9D: retval = 90000; break; // INDIC SIYAQ NUMBER NINETY THOUSAND
case 0X1EC9E: retval = 100000; break; // INDIC SIYAQ NUMBER LAKH
case 0X1EC9F: retval = 200000; break; // INDIC SIYAQ NUMBER LAKHAN
case 0X1ECA0: retval = 100000; break; // INDIC SIYAQ LAKH MARK
case 0X1ECA1: retval = 10000000; break; // INDIC SIYAQ NUMBER KAROR
case 0X1ECA2: retval = 20000000; break; // INDIC SIYAQ NUMBER KARORAN
case 0X1ECB3: retval = 10000; break; // INDIC SIYAQ NUMBER ALTERNATE TEN THOUSAND
case 0X1ECB4: retval = 100000; break; // INDIC SIYAQ NUMBER ALTERNATE LAKH MARK
case 0X1EC95: retval = 10000; break; // INDIC SIYAQ NUMBER TEN THOUSAND
case 0X1EC96: retval = 20000; break; // INDIC SIYAQ NUMBER TWENTY THOUSAND
case 0X1EC97: retval = 30000; break; // INDIC SIYAQ NUMBER THIRTY THOUSAND
case 0X1EC98: retval = 40000; break; // INDIC SIYAQ NUMBER FORTY THOUSAND
case 0X1EC99: retval = 50000; break; // INDIC SIYAQ NUMBER FIFTY THOUSAND
case 0X1EC9A: retval = 60000; break; // INDIC SIYAQ NUMBER SIXTY THOUSAND
case 0X1EC9B: retval = 70000; break; // INDIC SIYAQ NUMBER SEVENTY THOUSAND
case 0X1EC9C: retval = 80000; break; // INDIC SIYAQ NUMBER EIGHTY THOUSAND
case 0X1EC9D: retval = 90000; break; // INDIC SIYAQ NUMBER NINETY THOUSAND
case 0X1EC9E: retval = 100000; break; // INDIC SIYAQ NUMBER LAKH
case 0X1EC9F: retval = 200000; break; // INDIC SIYAQ NUMBER LAKHAN
case 0X1ECA0: retval = 100000; break; // INDIC SIYAQ LAKH MARK
case 0X1ECA1: retval = 10000000; break;// INDIC SIYAQ NUMBER KAROR
case 0X1ECA2: retval = 20000000; break;// INDIC SIYAQ NUMBER KARORAN
case 0X1ECB3: retval = 10000; break; // INDIC SIYAQ NUMBER ALTERNATE TEN THOUSAND
case 0X1ECB4: retval = 100000; break; // INDIC SIYAQ NUMBER ALTERNATE LAKH MARK
case 0X1ED0D: retval = 40; break; // OTTOMAN SIYAQ NUMBER FORTY
case 0X1ED0E: retval = 50; break; // OTTOMAN SIYAQ NUMBER FIFTY
case 0X1ED0F: retval = 60; break; // OTTOMAN SIYAQ NUMBER SIXTY
case 0X1ED10: retval = 70; break; // OTTOMAN SIYAQ NUMBER SEVENTY
case 0X1ED11: retval = 80; break; // OTTOMAN SIYAQ NUMBER EIGHTY
case 0X1ED12: retval = 90; break; // OTTOMAN SIYAQ NUMBER NINETY
case 0X1ED13: retval = 100; break; // OTTOMAN SIYAQ NUMBER ONE HUNDRED
case 0X1ED14: retval = 200; break; // OTTOMAN SIYAQ NUMBER TWO HUNDRED
case 0X1ED15: retval = 300; break; // OTTOMAN SIYAQ NUMBER THREE HUNDRED
case 0X1ED16: retval = 400; break; // OTTOMAN SIYAQ NUMBER FOUR HUNDRED
case 0X1ED17: retval = 500; break; // OTTOMAN SIYAQ NUMBER FIVE HUNDRED
case 0X1ED18: retval = 600; break; // OTTOMAN SIYAQ NUMBER SIX HUNDRED
case 0X1ED19: retval = 700; break; // OTTOMAN SIYAQ NUMBER SEVEN HUNDRED
case 0X1ED1A: retval = 800; break; // OTTOMAN SIYAQ NUMBER EIGHT HUNDRED
case 0X1ED1B: retval = 900; break; // OTTOMAN SIYAQ NUMBER NINE HUNDRED
case 0X1ED1C: retval = 1000; break; // OTTOMAN SIYAQ NUMBER ONE THOUSAND
case 0X1ED1D: retval = 2000; break; // OTTOMAN SIYAQ NUMBER TWO THOUSAND
case 0X1ED1E: retval = 3000; break; // OTTOMAN SIYAQ NUMBER THREE THOUSAND
case 0X1ED1F: retval = 4000; break; // OTTOMAN SIYAQ NUMBER FOUR THOUSAND
case 0X1ED20: retval = 5000; break; // OTTOMAN SIYAQ NUMBER FIVE THOUSAND
case 0X1ED21: retval = 6000; break; // OTTOMAN SIYAQ NUMBER SIX THOUSAND
case 0X1ED22: retval = 7000; break; // OTTOMAN SIYAQ NUMBER SEVEN THOUSAND
case 0X1ED23: retval = 8000; break; // OTTOMAN SIYAQ NUMBER EIGHT THOUSAND
case 0X1ED24: retval = 9000; break; // OTTOMAN SIYAQ NUMBER NINE THOUSAND
case 0X1ED25: retval = 10000; break; // OTTOMAN SIYAQ NUMBER TEN THOUSAND
case 0X1ED26: retval = 20000; break; // OTTOMAN SIYAQ NUMBER TWENTY THOUSAND
case 0X1ED27: retval = 30000; break; // OTTOMAN SIYAQ NUMBER THIRTY THOUSAND
case 0X1ED28: retval = 40000; break; // OTTOMAN SIYAQ NUMBER FORTY THOUSAND
case 0X1ED29: retval = 50000; break; // OTTOMAN SIYAQ NUMBER FIFTY THOUSAND
case 0X1ED2A: retval = 60000; break; // OTTOMAN SIYAQ NUMBER SIXTY THOUSAND
case 0X1ED2B: retval = 70000; break; // OTTOMAN SIYAQ NUMBER SEVENTY THOUSAND
case 0X1ED2C: retval = 80000; break; // OTTOMAN SIYAQ NUMBER EIGHTY THOUSAND
case 0X1ED2D: retval = 90000; break; // OTTOMAN SIYAQ NUMBER NINETY THOUSAND
case 0X1ED38: retval = 400; break; // OTTOMAN SIYAQ ALTERNATE NUMBER FOUR HUNDRED
case 0X1ED39: retval = 600; break; // OTTOMAN SIYAQ ALTERNATE NUMBER SIX HUNDRED
case 0X1ED3A: retval = 2000; break; // OTTOMAN SIYAQ ALTERNATE NUMBER TWO THOUSAND
case 0X1ED3B: retval = 10000; break; // OTTOMAN SIYAQ ALTERNATE NUMBER TEN THOUSAND
default: retval = -2; break;
}

View File

@ -1,6 +1,6 @@
# Blocks-11.0.0.txt
# Date: 2017-10-16, 24:39:00 GMT [KW]
# Copyright (c) 2018 Unicode, Inc.
# Blocks-12.1.0.txt
# Date: 2019-03-08, 23:59:00 GMT [KW]
# Copyright (c) 2019 Unicode, Inc.
# For terms of use, see http://www.unicode.org/terms_of_use.html
#
# Unicode Character Database
@ -239,6 +239,7 @@ FFF0..FFFF; Specials
10E60..10E7F; Rumi Numeral Symbols
10F00..10F2F; Old Sogdian
10F30..10F6F; Sogdian
10FE0..10FFF; Elymaic
11000..1107F; Brahmi
11080..110CF; Kaithi
110D0..110FF; Sora Sompeng
@ -259,6 +260,7 @@ FFF0..FFFF; Specials
11700..1173F; Ahom
11800..1184F; Dogra
118A0..118FF; Warang Citi
119A0..119FF; Nandinagari
11A00..11A4F; Zanabazar Square
11A50..11AAF; Soyombo
11AC0..11AFF; Pau Cin Hau
@ -267,10 +269,12 @@ FFF0..FFFF; Specials
11D00..11D5F; Masaram Gondi
11D60..11DAF; Gunjala Gondi
11EE0..11EFF; Makasar
11FC0..11FFF; Tamil Supplement
12000..123FF; Cuneiform
12400..1247F; Cuneiform Numbers and Punctuation
12480..1254F; Early Dynastic Cuneiform
13000..1342F; Egyptian Hieroglyphs
13430..1343F; Egyptian Hieroglyph Format Controls
14400..1467F; Anatolian Hieroglyphs
16800..16A3F; Bamum Supplement
16A40..16A6F; Mro
@ -283,6 +287,7 @@ FFF0..FFFF; Specials
18800..18AFF; Tangut Components
1B000..1B0FF; Kana Supplement
1B100..1B12F; Kana Extended-A
1B130..1B16F; Small Kana Extension
1B170..1B2FF; Nushu
1BC00..1BC9F; Duployan
1BCA0..1BCAF; Shorthand Format Controls
@ -295,9 +300,12 @@ FFF0..FFFF; Specials
1D400..1D7FF; Mathematical Alphanumeric Symbols
1D800..1DAAF; Sutton SignWriting
1E000..1E02F; Glagolitic Supplement
1E100..1E14F; Nyiakeng Puachue Hmong
1E2C0..1E2FF; Wancho
1E800..1E8DF; Mende Kikakui
1E900..1E95F; Adlam
1EC70..1ECBF; Indic Siyaq Numbers
1ED00..1ED4F; Ottoman Siyaq Numbers
1EE00..1EEFF; Arabic Mathematical Alphabetic Symbols
1F000..1F02F; Mahjong Tiles
1F030..1F09F; Domino Tiles
@ -313,6 +321,7 @@ FFF0..FFFF; Specials
1F800..1F8FF; Supplemental Arrows-C
1F900..1F9FF; Supplemental Symbols and Pictographs
1FA00..1FA6F; Chess Symbols
1FA70..1FAFF; Symbols and Pictographs Extended-A
20000..2A6DF; CJK Unified Ideographs Extension B
2A700..2B73F; CJK Unified Ideographs Extension C
2B740..2B81F; CJK Unified Ideographs Extension D

File diff suppressed because it is too large Load Diff

View File

@ -1,6 +1,6 @@
# PropList-11.0.0.txt
# Date: 2018-03-15, 04:28:35 GMT
# Copyright (c) 2018 Unicode, Inc.
# PropList-12.1.0.txt
# Date: 2019-03-10, 10:53:16 GMT
# Copyright (c) 2019 Unicode, Inc.
# Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries.
# For terms of use, see http://www.unicode.org/terms_of_use.html
#
@ -138,7 +138,7 @@ FF63 ; Quotation_Mark # Pe HALFWIDTH RIGHT CORNER BRACKET
0F0D..0F12 ; Terminal_Punctuation # Po [6] TIBETAN MARK SHAD..TIBETAN MARK RGYA GRAM SHAD
104A..104B ; Terminal_Punctuation # Po [2] MYANMAR SIGN LITTLE SECTION..MYANMAR SIGN SECTION
1361..1368 ; Terminal_Punctuation # Po [8] ETHIOPIC WORDSPACE..ETHIOPIC PARAGRAPH SEPARATOR
166D..166E ; Terminal_Punctuation # Po [2] CANADIAN SYLLABICS CHI SIGN..CANADIAN SYLLABICS FULL STOP
166E ; Terminal_Punctuation # Po CANADIAN SYLLABICS FULL STOP
16EB..16ED ; Terminal_Punctuation # Po [3] RUNIC SINGLE PUNCTUATION..RUNIC CROSS PUNCTUATION
1735..1736 ; Terminal_Punctuation # Po [2] PHILIPPINE SINGLE PUNCTUATION..PHILIPPINE DOUBLE PUNCTUATION
17D4..17D6 ; Terminal_Punctuation # Po [3] KHMER SIGN KHAN..KHMER SIGN CAMNUC PII KUUH
@ -157,7 +157,7 @@ FF63 ; Quotation_Mark # Pe HALFWIDTH RIGHT CORNER BRACKET
2E3C ; Terminal_Punctuation # Po STENOGRAPHIC FULL STOP
2E41 ; Terminal_Punctuation # Po REVERSED COMMA
2E4C ; Terminal_Punctuation # Po MEDIEVAL COMMA
2E4E ; Terminal_Punctuation # Po PUNCTUS ELEVATUS MARK
2E4E..2E4F ; Terminal_Punctuation # Po [2] PUNCTUS ELEVATUS MARK..CORNISH VERSE DIVIDER
3001..3002 ; Terminal_Punctuation # Po [2] IDEOGRAPHIC COMMA..IDEOGRAPHIC FULL STOP
A4FE..A4FF ; Terminal_Punctuation # Po [2] LISU PUNCTUATION COMMA..LISU PUNCTUATION FULL STOP
A60D..A60F ; Terminal_Punctuation # Po [3] VAI COMMA..VAI QUESTION MARK
@ -553,15 +553,17 @@ FF41..FF46 ; Hex_Digit # L& [6] FULLWIDTH LATIN SMALL LETTER A..FULLWIDTH L
1056..1057 ; Other_Alphabetic # Mc [2] MYANMAR VOWEL SIGN VOCALIC R..MYANMAR VOWEL SIGN VOCALIC RR
1058..1059 ; Other_Alphabetic # Mn [2] MYANMAR VOWEL SIGN VOCALIC L..MYANMAR VOWEL SIGN VOCALIC LL
105E..1060 ; Other_Alphabetic # Mn [3] MYANMAR CONSONANT SIGN MON MEDIAL NA..MYANMAR CONSONANT SIGN MON MEDIAL LA
1062 ; Other_Alphabetic # Mc MYANMAR VOWEL SIGN SGAW KAREN EU
1067..1068 ; Other_Alphabetic # Mc [2] MYANMAR VOWEL SIGN WESTERN PWO KAREN EU..MYANMAR VOWEL SIGN WESTERN PWO KAREN UE
1062..1064 ; Other_Alphabetic # Mc [3] MYANMAR VOWEL SIGN SGAW KAREN EU..MYANMAR TONE MARK SGAW KAREN KE PHO
1067..106D ; Other_Alphabetic # Mc [7] MYANMAR VOWEL SIGN WESTERN PWO KAREN EU..MYANMAR SIGN WESTERN PWO KAREN TONE-5
1071..1074 ; Other_Alphabetic # Mn [4] MYANMAR VOWEL SIGN GEBA KAREN I..MYANMAR VOWEL SIGN KAYAH EE
1082 ; Other_Alphabetic # Mn MYANMAR CONSONANT SIGN SHAN MEDIAL WA
1083..1084 ; Other_Alphabetic # Mc [2] MYANMAR VOWEL SIGN SHAN AA..MYANMAR VOWEL SIGN SHAN E
1085..1086 ; Other_Alphabetic # Mn [2] MYANMAR VOWEL SIGN SHAN E ABOVE..MYANMAR VOWEL SIGN SHAN FINAL Y
109C ; Other_Alphabetic # Mc MYANMAR VOWEL SIGN AITON A
1087..108C ; Other_Alphabetic # Mc [6] MYANMAR SIGN SHAN TONE-2..MYANMAR SIGN SHAN COUNCIL TONE-3
108D ; Other_Alphabetic # Mn MYANMAR SIGN SHAN COUNCIL EMPHATIC TONE
108F ; Other_Alphabetic # Mc MYANMAR SIGN RUMAI PALAUNG TONE-5
109A..109C ; Other_Alphabetic # Mc [3] MYANMAR SIGN KHAMTI TONE-1..MYANMAR VOWEL SIGN AITON A
109D ; Other_Alphabetic # Mn MYANMAR VOWEL SIGN AITON AI
135F ; Other_Alphabetic # Mn ETHIOPIC COMBINING GEMINATION MARK
1712..1713 ; Other_Alphabetic # Mn [2] TAGALOG VOWEL SIGN I..TAGALOG VOWEL SIGN U
1732..1733 ; Other_Alphabetic # Mn [2] HANUNOO VOWEL SIGN I..HANUNOO VOWEL SIGN U
1752..1753 ; Other_Alphabetic # Mn [2] BUHID VOWEL SIGN I..BUHID VOWEL SIGN U
@ -618,18 +620,21 @@ FF41..FF46 ; Hex_Digit # L& [6] FULLWIDTH LATIN SMALL LETTER A..FULLWIDTH L
1C24..1C2B ; Other_Alphabetic # Mc [8] LEPCHA SUBJOINED LETTER YA..LEPCHA VOWEL SIGN UU
1C2C..1C33 ; Other_Alphabetic # Mn [8] LEPCHA VOWEL SIGN E..LEPCHA CONSONANT SIGN T
1C34..1C35 ; Other_Alphabetic # Mc [2] LEPCHA CONSONANT SIGN NYIN-DO..LEPCHA CONSONANT SIGN KANG
1CF2..1CF3 ; Other_Alphabetic # Mc [2] VEDIC SIGN ARDHAVISARGA..VEDIC SIGN ROTATED ARDHAVISARGA
1C36 ; Other_Alphabetic # Mn LEPCHA SIGN RAN
1DE7..1DF4 ; Other_Alphabetic # Mn [14] COMBINING LATIN SMALL LETTER ALPHA..COMBINING LATIN SMALL LETTER U WITH DIAERESIS
24B6..24E9 ; Other_Alphabetic # So [52] CIRCLED LATIN CAPITAL LETTER A..CIRCLED LATIN SMALL LETTER Z
2DE0..2DFF ; Other_Alphabetic # Mn [32] COMBINING CYRILLIC LETTER BE..COMBINING CYRILLIC LETTER IOTIFIED BIG YUS
A674..A67B ; Other_Alphabetic # Mn [8] COMBINING CYRILLIC LETTER UKRAINIAN IE..COMBINING CYRILLIC LETTER OMEGA
A69E..A69F ; Other_Alphabetic # Mn [2] COMBINING CYRILLIC LETTER EF..COMBINING CYRILLIC LETTER IOTIFIED E
A802 ; Other_Alphabetic # Mn SYLOTI NAGRI SIGN DVISVARA
A80B ; Other_Alphabetic # Mn SYLOTI NAGRI SIGN ANUSVARA
A823..A824 ; Other_Alphabetic # Mc [2] SYLOTI NAGRI VOWEL SIGN A..SYLOTI NAGRI VOWEL SIGN I
A825..A826 ; Other_Alphabetic # Mn [2] SYLOTI NAGRI VOWEL SIGN U..SYLOTI NAGRI VOWEL SIGN E
A827 ; Other_Alphabetic # Mc SYLOTI NAGRI VOWEL SIGN OO
A880..A881 ; Other_Alphabetic # Mc [2] SAURASHTRA SIGN ANUSVARA..SAURASHTRA SIGN VISARGA
A8B4..A8C3 ; Other_Alphabetic # Mc [16] SAURASHTRA CONSONANT SIGN HAARU..SAURASHTRA VOWEL SIGN AU
A8C5 ; Other_Alphabetic # Mn SAURASHTRA SIGN CANDRABINDU
A8FF ; Other_Alphabetic # Mn DEVANAGARI VOWEL SIGN AY
A926..A92A ; Other_Alphabetic # Mn [5] KAYAH LI VOWEL UE..KAYAH LI VOWEL O
A947..A951 ; Other_Alphabetic # Mn [11] REJANG VOWEL SIGN I..REJANG CONSONANT SIGN R
A952 ; Other_Alphabetic # Mc REJANG CONSONANT SIGN H
@ -638,8 +643,9 @@ A983 ; Other_Alphabetic # Mc JAVANESE SIGN WIGNYAN
A9B4..A9B5 ; Other_Alphabetic # Mc [2] JAVANESE VOWEL SIGN TARUNG..JAVANESE VOWEL SIGN TOLONG
A9B6..A9B9 ; Other_Alphabetic # Mn [4] JAVANESE VOWEL SIGN WULU..JAVANESE VOWEL SIGN SUKU MENDUT
A9BA..A9BB ; Other_Alphabetic # Mc [2] JAVANESE VOWEL SIGN TALING..JAVANESE VOWEL SIGN DIRGA MURE
A9BC ; Other_Alphabetic # Mn JAVANESE VOWEL SIGN PEPET
A9BD..A9BF ; Other_Alphabetic # Mc [3] JAVANESE CONSONANT SIGN KERET..JAVANESE CONSONANT SIGN CAKRA
A9BC..A9BD ; Other_Alphabetic # Mn [2] JAVANESE VOWEL SIGN PEPET..JAVANESE CONSONANT SIGN KERET
A9BE..A9BF ; Other_Alphabetic # Mc [2] JAVANESE CONSONANT SIGN PENGKAL..JAVANESE CONSONANT SIGN CAKRA
A9E5 ; Other_Alphabetic # Mn MYANMAR SIGN SHAN SAW
AA29..AA2E ; Other_Alphabetic # Mn [6] CHAM VOWEL SIGN AA..CHAM VOWEL SIGN OE
AA2F..AA30 ; Other_Alphabetic # Mc [2] CHAM VOWEL SIGN O..CHAM VOWEL SIGN AI
AA31..AA32 ; Other_Alphabetic # Mn [2] CHAM VOWEL SIGN AU..CHAM VOWEL SIGN UE
@ -648,6 +654,9 @@ AA35..AA36 ; Other_Alphabetic # Mn [2] CHAM CONSONANT SIGN LA..CHAM CONSONA
AA43 ; Other_Alphabetic # Mn CHAM CONSONANT SIGN FINAL NG
AA4C ; Other_Alphabetic # Mn CHAM CONSONANT SIGN FINAL M
AA4D ; Other_Alphabetic # Mc CHAM CONSONANT SIGN FINAL H
AA7B ; Other_Alphabetic # Mc MYANMAR SIGN PAO KAREN TONE
AA7C ; Other_Alphabetic # Mn MYANMAR SIGN TAI LAING TONE-2
AA7D ; Other_Alphabetic # Mc MYANMAR SIGN TAI LAING TONE-5
AAB0 ; Other_Alphabetic # Mn TAI VIET MAI KANG
AAB2..AAB4 ; Other_Alphabetic # Mn [3] TAI VIET VOWEL I..TAI VIET VOWEL U
AAB7..AAB8 ; Other_Alphabetic # Mn [2] TAI VIET MAI KHIT..TAI VIET VOWEL IA
@ -740,6 +749,11 @@ FB1E ; Other_Alphabetic # Mn HEBREW POINT JUDEO-SPANISH VARIKA
1182C..1182E ; Other_Alphabetic # Mc [3] DOGRA VOWEL SIGN AA..DOGRA VOWEL SIGN II
1182F..11837 ; Other_Alphabetic # Mn [9] DOGRA VOWEL SIGN U..DOGRA SIGN ANUSVARA
11838 ; Other_Alphabetic # Mc DOGRA SIGN VISARGA
119D1..119D3 ; Other_Alphabetic # Mc [3] NANDINAGARI VOWEL SIGN AA..NANDINAGARI VOWEL SIGN II
119D4..119D7 ; Other_Alphabetic # Mn [4] NANDINAGARI VOWEL SIGN U..NANDINAGARI VOWEL SIGN VOCALIC RR
119DA..119DB ; Other_Alphabetic # Mn [2] NANDINAGARI VOWEL SIGN E..NANDINAGARI VOWEL SIGN AI
119DC..119DF ; Other_Alphabetic # Mc [4] NANDINAGARI VOWEL SIGN O..NANDINAGARI SIGN VISARGA
119E4 ; Other_Alphabetic # Mc NANDINAGARI VOWEL SIGN PRISHTHAMATRA E
11A01..11A0A ; Other_Alphabetic # Mn [10] ZANABAZAR SQUARE VOWEL SIGN I..ZANABAZAR SQUARE VOWEL LENGTH MARK
11A35..11A38 ; Other_Alphabetic # Mn [4] ZANABAZAR SQUARE SIGN CANDRABINDU..ZANABAZAR SQUARE SIGN ANUSVARA
11A39 ; Other_Alphabetic # Mc ZANABAZAR SQUARE SIGN VISARGA
@ -773,8 +787,9 @@ FB1E ; Other_Alphabetic # Mn HEBREW POINT JUDEO-SPANISH VARIKA
11D96 ; Other_Alphabetic # Mc GUNJALA GONDI SIGN VISARGA
11EF3..11EF4 ; Other_Alphabetic # Mn [2] MAKASAR VOWEL SIGN I..MAKASAR VOWEL SIGN U
11EF5..11EF6 ; Other_Alphabetic # Mc [2] MAKASAR VOWEL SIGN E..MAKASAR VOWEL SIGN O
16B30..16B36 ; Other_Alphabetic # Mn [7] PAHAWH HMONG MARK CIM TUB..PAHAWH HMONG MARK CIM TAUM
16F51..16F7E ; Other_Alphabetic # Mc [46] MIAO SIGN ASPIRATION..MIAO VOWEL SIGN NG
16F4F ; Other_Alphabetic # Mn MIAO SIGN CONSONANT MODIFIER BAR
16F51..16F87 ; Other_Alphabetic # Mc [55] MIAO SIGN ASPIRATION..MIAO VOWEL SIGN UI
16F8F..16F92 ; Other_Alphabetic # Mn [4] MIAO TONE RIGHT..MIAO TONE BELOW
1BC9E ; Other_Alphabetic # Mn DUPLOYAN DOUBLE MARK
1E000..1E006 ; Other_Alphabetic # Mn [7] COMBINING GLAGOLITIC LETTER AZU..COMBINING GLAGOLITIC LETTER ZHIVETE
1E008..1E018 ; Other_Alphabetic # Mn [17] COMBINING GLAGOLITIC LETTER ZEMLJA..COMBINING GLAGOLITIC LETTER HERU
@ -786,7 +801,7 @@ FB1E ; Other_Alphabetic # Mn HEBREW POINT JUDEO-SPANISH VARIKA
1F150..1F169 ; Other_Alphabetic # So [26] NEGATIVE CIRCLED LATIN CAPITAL LETTER A..NEGATIVE CIRCLED LATIN CAPITAL LETTER Z
1F170..1F189 ; Other_Alphabetic # So [26] NEGATIVE SQUARED LATIN CAPITAL LETTER A..NEGATIVE SQUARED LATIN CAPITAL LETTER Z
# Total code points: 1334
# Total code points: 1377
# ================================================
@ -798,7 +813,7 @@ FB1E ; Other_Alphabetic # Mn HEBREW POINT JUDEO-SPANISH VARIKA
4E00..9FEF ; Ideographic # Lo [20976] CJK UNIFIED IDEOGRAPH-4E00..CJK UNIFIED IDEOGRAPH-9FEF
F900..FA6D ; Ideographic # Lo [366] CJK COMPATIBILITY IDEOGRAPH-F900..CJK COMPATIBILITY IDEOGRAPH-FA6D
FA70..FAD9 ; Ideographic # Lo [106] CJK COMPATIBILITY IDEOGRAPH-FA70..CJK COMPATIBILITY IDEOGRAPH-FAD9
17000..187F1 ; Ideographic # Lo [6130] TANGUT IDEOGRAPH-17000..TANGUT IDEOGRAPH-187F1
17000..187F7 ; Ideographic # Lo [6136] TANGUT IDEOGRAPH-17000..TANGUT IDEOGRAPH-187F7
18800..18AF2 ; Ideographic # Lo [755] TANGUT COMPONENT-001..TANGUT COMPONENT-755
1B170..1B2FB ; Ideographic # Lo [396] NUSHU CHARACTER-1B170..NUSHU CHARACTER-1B2FB
20000..2A6D6 ; Ideographic # Lo [42711] CJK UNIFIED IDEOGRAPH-20000..CJK UNIFIED IDEOGRAPH-2A6D6
@ -808,7 +823,7 @@ FA70..FAD9 ; Ideographic # Lo [106] CJK COMPATIBILITY IDEOGRAPH-FA70..CJK COM
2CEB0..2EBE0 ; Ideographic # Lo [7473] CJK UNIFIED IDEOGRAPH-2CEB0..CJK UNIFIED IDEOGRAPH-2EBE0
2F800..2FA1D ; Ideographic # Lo [542] CJK COMPATIBILITY IDEOGRAPH-2F800..CJK COMPATIBILITY IDEOGRAPH-2FA1D
# Total code points: 96184
# Total code points: 96190
# ================================================
@ -876,6 +891,7 @@ FA70..FAD9 ; Ideographic # Lo [106] CJK COMPATIBILITY IDEOGRAPH-FA70..CJK COM
0DCA ; Diacritic # Mn SINHALA SIGN AL-LAKUNA
0E47..0E4C ; Diacritic # Mn [6] THAI CHARACTER MAITAIKHU..THAI CHARACTER THANTHAKHAT
0E4E ; Diacritic # Mn THAI CHARACTER YAMAKKAN
0EBA ; Diacritic # Mn LAO SIGN PALI VIRAMA
0EC8..0ECC ; Diacritic # Mn [5] LAO TONE MAI EK..LAO CANCELLATION MARK
0F18..0F19 ; Diacritic # Mn [2] TIBETAN ASTROLOGICAL SIGN -KHYUD PA..TIBETAN ASTROLOGICAL SIGN SDONG TSHUGS
0F35 ; Diacritic # Mn TIBETAN MARK NGAS BZUNG NYI ZLA
@ -887,10 +903,13 @@ FA70..FAD9 ; Ideographic # Lo [106] CJK COMPATIBILITY IDEOGRAPH-FA70..CJK COM
0FC6 ; Diacritic # Mn TIBETAN SYMBOL PADMA GDAN
1037 ; Diacritic # Mn MYANMAR SIGN DOT BELOW
1039..103A ; Diacritic # Mn [2] MYANMAR SIGN VIRAMA..MYANMAR SIGN ASAT
1063..1064 ; Diacritic # Mc [2] MYANMAR TONE MARK SGAW KAREN HATHI..MYANMAR TONE MARK SGAW KAREN KE PHO
1069..106D ; Diacritic # Mc [5] MYANMAR SIGN WESTERN PWO KAREN TONE-1..MYANMAR SIGN WESTERN PWO KAREN TONE-5
1087..108C ; Diacritic # Mc [6] MYANMAR SIGN SHAN TONE-2..MYANMAR SIGN SHAN COUNCIL TONE-3
108D ; Diacritic # Mn MYANMAR SIGN SHAN COUNCIL EMPHATIC TONE
108F ; Diacritic # Mc MYANMAR SIGN RUMAI PALAUNG TONE-5
109A..109B ; Diacritic # Mc [2] MYANMAR SIGN KHAMTI TONE-1..MYANMAR SIGN KHAMTI TONE-3
135D..135F ; Diacritic # Mn [3] ETHIOPIC COMBINING GEMINATION AND VOWEL LENGTH MARK..ETHIOPIC COMBINING GEMINATION MARK
17C9..17D3 ; Diacritic # Mn [11] KHMER SIGN MUUSIKATOAN..KHMER SIGN BATHAMASAT
17DD ; Diacritic # Mn KHMER SIGN ATTHACAN
1939..193B ; Diacritic # Mn [3] LIMBU SIGN MUKPHRENG..LIMBU SIGN SA-I
@ -935,9 +954,11 @@ A67C..A67D ; Diacritic # Mn [2] COMBINING CYRILLIC KAVYKA..COMBINING CYRILL
A67F ; Diacritic # Lm CYRILLIC PAYEROK
A69C..A69D ; Diacritic # Lm [2] MODIFIER LETTER CYRILLIC HARD SIGN..MODIFIER LETTER CYRILLIC SOFT SIGN
A6F0..A6F1 ; Diacritic # Mn [2] BAMUM COMBINING MARK KOQNDON..BAMUM COMBINING MARK TUKWENTIS
A700..A716 ; Diacritic # Sk [23] MODIFIER LETTER CHINESE TONE YIN PING..MODIFIER LETTER EXTRA-LOW LEFT-STEM TONE BAR
A717..A71F ; Diacritic # Lm [9] MODIFIER LETTER DOT VERTICAL BAR..MODIFIER LETTER LOW INVERTED EXCLAMATION MARK
A720..A721 ; Diacritic # Sk [2] MODIFIER LETTER STRESS AND HIGH TONE..MODIFIER LETTER STRESS AND LOW TONE
A788 ; Diacritic # Lm MODIFIER LETTER LOW CIRCUMFLEX ACCENT
A789..A78A ; Diacritic # Sk [2] MODIFIER LETTER COLON..MODIFIER LETTER SHORT EQUALS SIGN
A7F8..A7F9 ; Diacritic # Lm [2] MODIFIER LETTER CAPITAL H WITH STROKE..MODIFIER LETTER SMALL LIGATURE OE
A8C4 ; Diacritic # Mn SAURASHTRA SIGN VIRAMA
A8E0..A8F1 ; Diacritic # Mn [18] COMBINING DEVANAGARI DIGIT ZERO..COMBINING DEVANAGARI SIGN AVAGRAHA
@ -992,6 +1013,7 @@ FFE3 ; Diacritic # Sk FULLWIDTH MACRON
116B7 ; Diacritic # Mn TAKRI SIGN NUKTA
1172B ; Diacritic # Mn AHOM SIGN KILLER
11839..1183A ; Diacritic # Mn [2] DOGRA SIGN VIRAMA..DOGRA SIGN NUKTA
119E0 ; Diacritic # Mn NANDINAGARI SIGN VIRAMA
11A34 ; Diacritic # Mn ZANABAZAR SQUARE SIGN VIRAMA
11A47 ; Diacritic # Mn ZANABAZAR SQUARE SUBJOINER
11A99 ; Diacritic # Mn SOYOMBO SUBJOINER
@ -1000,6 +1022,7 @@ FFE3 ; Diacritic # Sk FULLWIDTH MACRON
11D44..11D45 ; Diacritic # Mn [2] MASARAM GONDI SIGN HALANTA..MASARAM GONDI VIRAMA
11D97 ; Diacritic # Mn GUNJALA GONDI VIRAMA
16AF0..16AF4 ; Diacritic # Mn [5] BASSA VAH COMBINING HIGH TONE..BASSA VAH COMBINING HIGH-LOW TONE
16B30..16B36 ; Diacritic # Mn [7] PAHAWH HMONG MARK CIM TUB..PAHAWH HMONG MARK CIM TAUM
16F8F..16F92 ; Diacritic # Mn [4] MIAO TONE RIGHT..MIAO TONE BELOW
16F93..16F9F ; Diacritic # Lm [13] MIAO LETTER TONE-2..MIAO LETTER REFORMED TONE-8
1D167..1D169 ; Diacritic # Mn [3] MUSICAL SYMBOL COMBINING TREMOLO-1..MUSICAL SYMBOL COMBINING TREMOLO-3
@ -1007,11 +1030,13 @@ FFE3 ; Diacritic # Sk FULLWIDTH MACRON
1D17B..1D182 ; Diacritic # Mn [8] MUSICAL SYMBOL COMBINING ACCENT..MUSICAL SYMBOL COMBINING LOURE
1D185..1D18B ; Diacritic # Mn [7] MUSICAL SYMBOL COMBINING DOIT..MUSICAL SYMBOL COMBINING TRIPLE TONGUE
1D1AA..1D1AD ; Diacritic # Mn [4] MUSICAL SYMBOL COMBINING DOWN BOW..MUSICAL SYMBOL COMBINING SNAP PIZZICATO
1E130..1E136 ; Diacritic # Mn [7] NYIAKENG PUACHUE HMONG TONE-B..NYIAKENG PUACHUE HMONG TONE-D
1E2EC..1E2EF ; Diacritic # Mn [4] WANCHO TONE TUP..WANCHO TONE KOINI
1E8D0..1E8D6 ; Diacritic # Mn [7] MENDE KIKAKUI COMBINING NUMBER TEENS..MENDE KIKAKUI COMBINING NUMBER MILLIONS
1E944..1E946 ; Diacritic # Mn [3] ADLAM ALIF LENGTHENER..ADLAM GEMINATION MARK
1E948..1E94A ; Diacritic # Mn [3] ADLAM CONSONANT MODIFIER..ADLAM NUKTA
# Total code points: 818
# Total code points: 873
# ================================================
@ -1043,9 +1068,11 @@ FF70 ; Extender # Lm HALFWIDTH KATAKANA-HIRAGANA PROLONGED SOUND
11A98 ; Extender # Mn SOYOMBO GEMINATION MARK
16B42..16B43 ; Extender # Lm [2] PAHAWH HMONG SIGN VOS NRUA..PAHAWH HMONG SIGN IB YAM
16FE0..16FE1 ; Extender # Lm [2] TANGUT ITERATION MARK..NUSHU ITERATION MARK
16FE3 ; Extender # Lm OLD CHINESE ITERATION MARK
1E13C..1E13D ; Extender # Lm [2] NYIAKENG PUACHUE HMONG SIGN XW XW..NYIAKENG PUACHUE HMONG SYLLABLE LENGTHENER
1E944..1E946 ; Extender # Mn [3] ADLAM ALIF LENGTHENER..ADLAM GEMINATION MARK
# Total code points: 44
# Total code points: 47
# ================================================
@ -1119,6 +1146,7 @@ FFFFE..FFFFF ; Noncharacter_Code_Point # Cn [2] <noncharacter-FFFFE>..<noncha
0D57 ; Other_Grapheme_Extend # Mc MALAYALAM AU LENGTH MARK
0DCF ; Other_Grapheme_Extend # Mc SINHALA VOWEL SIGN AELA-PILLA
0DDF ; Other_Grapheme_Extend # Mc SINHALA VOWEL SIGN GAYANUKITTA
1B35 ; Other_Grapheme_Extend # Mc BALINESE VOWEL SIGN TEDUNG
200C ; Other_Grapheme_Extend # Cf ZERO WIDTH NON-JOINER
302E..302F ; Other_Grapheme_Extend # Mc [2] HANGUL SINGLE DOT TONE MARK..HANGUL DOUBLE DOT TONE MARK
FF9E..FF9F ; Other_Grapheme_Extend # Lm [2] HALFWIDTH KATAKANA VOICED SOUND MARK..HALFWIDTH KATAKANA SEMI-VOICED SOUND MARK
@ -1131,7 +1159,7 @@ FF9E..FF9F ; Other_Grapheme_Extend # Lm [2] HALFWIDTH KATAKANA VOICED SOUND
1D16E..1D172 ; Other_Grapheme_Extend # Mc [5] MUSICAL SYMBOL COMBINING FLAG-1..MUSICAL SYMBOL COMBINING FLAG-5
E0020..E007F ; Other_Grapheme_Extend # Cf [96] TAG SPACE..CANCEL TAG
# Total code points: 125
# Total code points: 126
# ================================================
@ -1547,10 +1575,7 @@ E0100..E01EF ; Variation_Selector # Mn [240] VARIATION SELECTOR-17..VARIATION S
2B74..2B75 ; Pattern_Syntax # Cn [2] <reserved-2B74>..<reserved-2B75>
2B76..2B95 ; Pattern_Syntax # So [32] NORTH WEST TRIANGLE-HEADED ARROW TO BAR..RIGHTWARDS BLACK ARROW
2B96..2B97 ; Pattern_Syntax # Cn [2] <reserved-2B96>..<reserved-2B97>
2B98..2BC8 ; Pattern_Syntax # So [49] THREE-D TOP-LIGHTED LEFTWARDS EQUILATERAL ARROWHEAD..BLACK MEDIUM RIGHT-POINTING TRIANGLE CENTRED
2BC9 ; Pattern_Syntax # Cn <reserved-2BC9>
2BCA..2BFE ; Pattern_Syntax # So [53] TOP HALF BLACK CIRCLE..REVERSED RIGHT ANGLE
2BFF ; Pattern_Syntax # Cn <reserved-2BFF>
2B98..2BFF ; Pattern_Syntax # So [104] THREE-D TOP-LIGHTED LEFTWARDS EQUILATERAL ARROWHEAD..HELLSCHREIBER PAUSE SYMBOL
2E00..2E01 ; Pattern_Syntax # Po [2] RIGHT ANGLE SUBSTITUTION MARKER..RIGHT ANGLE DOTTED SUBSTITUTION MARKER
2E02 ; Pattern_Syntax # Pi LEFT SUBSTITUTION BRACKET
2E03 ; Pattern_Syntax # Pf RIGHT SUBSTITUTION BRACKET
@ -1588,8 +1613,8 @@ E0100..E01EF ; Variation_Selector # Mn [240] VARIATION SELECTOR-17..VARIATION S
2E40 ; Pattern_Syntax # Pd DOUBLE HYPHEN
2E41 ; Pattern_Syntax # Po REVERSED COMMA
2E42 ; Pattern_Syntax # Ps DOUBLE LOW-REVERSED-9 QUOTATION MARK
2E43..2E4E ; Pattern_Syntax # Po [12] DASH WITH LEFT UPTURN..PUNCTUS ELEVATUS MARK
2E4F..2E7F ; Pattern_Syntax # Cn [49] <reserved-2E4F>..<reserved-2E7F>
2E43..2E4F ; Pattern_Syntax # Po [13] DASH WITH LEFT UPTURN..CORNISH VERSE DIVIDER
2E50..2E7F ; Pattern_Syntax # Cn [48] <reserved-2E50>..<reserved-2E7F>
3001..3003 ; Pattern_Syntax # Po [3] IDEOGRAPHIC COMMA..DITTO MARK
3008 ; Pattern_Syntax # Ps LEFT ANGLE BRACKET
3009 ; Pattern_Syntax # Pe RIGHT ANGLE BRACKET

View File

@ -1,6 +1,6 @@
# PropertyValueAliases-11.0.0.txt
# Date: 2018-05-20, 09:03:12 GMT
# Copyright (c) 2018 Unicode, Inc.
# PropertyValueAliases-12.1.0.txt
# Date: 2019-03-10, 10:53:18 GMT
# Copyright (c) 2019 Unicode, Inc.
# Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries.
# For terms of use, see http://www.unicode.org/terms_of_use.html
#
@ -22,12 +22,15 @@
# First Field: The first field describes the property for which that
# property value name is used.
#
# Second Field: The second field is an abbreviated name.
# Second Field: The second field is the short name for the property value.
# It is typically an abbreviation, but in a number of cases it is simply
# a duplicate of the "long name" in the third field.
#
# Third Field: The third field is a long name.
# Third Field: The third field is the long name for the property value,
# typically the formal name used in documentation about the property value.
#
# In the case of ccc, there are 4 fields. The second field is numeric, third
# is abbreviated, and fourth is long.
# In the case of Canonical_Combining_Class (ccc), there are 4 fields:
# The second field is numeric, the third is the short name, and the fourth is the long name.
#
# The above are the preferred aliases. Other aliases may be listed in additional fields.
#
@ -51,7 +54,8 @@
#
# The combination of property value and property name is, however, unique.
#
# For more information, see UTS #18: Unicode Regular Expressions
# For more information, see UAX #44, Unicode Character Database, and
# UTS #18, Unicode Regular Expressions.
# ================================================
@ -82,6 +86,8 @@ age; 8.0 ; V8_0
age; 9.0 ; V9_0
age; 10.0 ; V10_0
age; 11.0 ; V11_0
age; 12.0 ; V12_0
age; 12.1 ; V12_1
age; NA ; Unassigned
# Alphabetic (Alpha)
@ -224,8 +230,10 @@ blk; Dogra ; Dogra
blk; Domino ; Domino_Tiles
blk; Duployan ; Duployan
blk; Early_Dynastic_Cuneiform ; Early_Dynastic_Cuneiform
blk; Egyptian_Hieroglyph_Format_Controls; Egyptian_Hieroglyph_Format_Controls
blk; Egyptian_Hieroglyphs ; Egyptian_Hieroglyphs
blk; Elbasan ; Elbasan
blk; Elymaic ; Elymaic
blk; Emoticons ; Emoticons
blk; Enclosed_Alphanum ; Enclosed_Alphanumerics
blk; Enclosed_Alphanum_Sup ; Enclosed_Alphanumeric_Supplement
@ -339,12 +347,14 @@ blk; Myanmar ; Myanmar
blk; Myanmar_Ext_A ; Myanmar_Extended_A
blk; Myanmar_Ext_B ; Myanmar_Extended_B
blk; Nabataean ; Nabataean
blk; Nandinagari ; Nandinagari
blk; NB ; No_Block
blk; New_Tai_Lue ; New_Tai_Lue
blk; Newa ; Newa
blk; NKo ; NKo
blk; Number_Forms ; Number_Forms
blk; Nushu ; Nushu
blk; Nyiakeng_Puachue_Hmong ; Nyiakeng_Puachue_Hmong
blk; OCR ; Optical_Character_Recognition
blk; Ogham ; Ogham
blk; Ol_Chiki ; Ol_Chiki
@ -360,6 +370,7 @@ blk; Oriya ; Oriya
blk; Ornamental_Dingbats ; Ornamental_Dingbats
blk; Osage ; Osage
blk; Osmanya ; Osmanya
blk; Ottoman_Siyaq_Numbers ; Ottoman_Siyaq_Numbers
blk; Pahawh_Hmong ; Pahawh_Hmong
blk; Palmyrene ; Palmyrene
blk; Pau_Cin_Hau ; Pau_Cin_Hau
@ -384,6 +395,7 @@ blk; Siddham ; Siddham
blk; Sinhala ; Sinhala
blk; Sinhala_Archaic_Numbers ; Sinhala_Archaic_Numbers
blk; Small_Forms ; Small_Form_Variants
blk; Small_Kana_Ext ; Small_Kana_Extension
blk; Sogdian ; Sogdian
blk; Sora_Sompeng ; Sora_Sompeng
blk; Soyombo ; Soyombo
@ -401,6 +413,7 @@ blk; Sup_Symbols_And_Pictographs ; Supplemental_Symbols_And_Pictographs
blk; Super_And_Sub ; Superscripts_And_Subscripts
blk; Sutton_SignWriting ; Sutton_SignWriting
blk; Syloti_Nagri ; Syloti_Nagri
blk; Symbols_And_Pictographs_Ext_A ; Symbols_And_Pictographs_Extended_A
blk; Syriac ; Syriac
blk; Syriac_Sup ; Syriac_Supplement
blk; Tagalog ; Tagalog
@ -412,6 +425,7 @@ blk; Tai_Viet ; Tai_Viet
blk; Tai_Xuan_Jing ; Tai_Xuan_Jing_Symbols
blk; Takri ; Takri
blk; Tamil ; Tamil
blk; Tamil_Sup ; Tamil_Supplement
blk; Tangut ; Tangut
blk; Tangut_Components ; Tangut_Components
blk; Telugu ; Telugu
@ -429,6 +443,7 @@ blk; Vedic_Ext ; Vedic_Extensions
blk; Vertical_Forms ; Vertical_Forms
blk; VS ; Variation_Selectors
blk; VS_Sup ; Variation_Selectors_Supplement
blk; Wancho ; Wancho
blk; Warang_Citi ; Warang_Citi
blk; Yi_Radicals ; Yi_Radicals
blk; Yi_Syllables ; Yi_Syllables
@ -1213,6 +1228,7 @@ sc ; Dsrt ; Deseret
sc ; Dupl ; Duployan
sc ; Egyp ; Egyptian_Hieroglyphs
sc ; Elba ; Elbasan
sc ; Elym ; Elymaic
sc ; Ethi ; Ethiopic
sc ; Geor ; Georgian
sc ; Glag ; Glagolitic
@ -1231,6 +1247,7 @@ sc ; Hebr ; Hebrew
sc ; Hira ; Hiragana
sc ; Hluw ; Anatolian_Hieroglyphs
sc ; Hmng ; Pahawh_Hmong
sc ; Hmnp ; Nyiakeng_Puachue_Hmong
sc ; Hrkt ; Katakana_Or_Hiragana
sc ; Hung ; Old_Hungarian
sc ; Ital ; Old_Italic
@ -1268,6 +1285,7 @@ sc ; Mroo ; Mro
sc ; Mtei ; Meetei_Mayek
sc ; Mult ; Multani
sc ; Mymr ; Myanmar
sc ; Nand ; Nandinagari
sc ; Narb ; Old_North_Arabian
sc ; Nbat ; Nabataean
sc ; Newa ; Newa
@ -1324,6 +1342,7 @@ sc ; Tirh ; Tirhuta
sc ; Ugar ; Ugaritic
sc ; Vaii ; Vai
sc ; Wara ; Warang_Citi
sc ; Wcho ; Wancho
sc ; Xpeo ; Old_Persian
sc ; Xsux ; Cuneiform
sc ; Yiii ; Yi

View File

@ -1,6 +1,6 @@
# Scripts-11.0.0.txt
# Date: 2018-02-21, 05:34:31 GMT
# Copyright (c) 2018 Unicode, Inc.
# Scripts-12.1.0.txt
# Date: 2019-04-01, 09:10:42 GMT
# Copyright (c) 2019 Unicode, Inc.
# Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries.
# For terms of use, see http://www.unicode.org/terms_of_use.html
#
@ -108,10 +108,10 @@
1CD3 ; Common # Po VEDIC SIGN NIHSHVASA
1CE1 ; Common # Mc VEDIC TONE ATHARVAVEDIC INDEPENDENT SVARITA
1CE9..1CEC ; Common # Lo [4] VEDIC SIGN ANUSVARA ANTARGOMUKHA..VEDIC SIGN ANUSVARA VAMAGOMUKHA WITH TAIL
1CEE..1CF1 ; Common # Lo [4] VEDIC SIGN HEXIFORM LONG ANUSVARA..VEDIC SIGN ANUSVARA UBHAYATO MUKHA
1CF2..1CF3 ; Common # Mc [2] VEDIC SIGN ARDHAVISARGA..VEDIC SIGN ROTATED ARDHAVISARGA
1CEE..1CF3 ; Common # Lo [6] VEDIC SIGN HEXIFORM LONG ANUSVARA..VEDIC SIGN ROTATED ARDHAVISARGA
1CF5..1CF6 ; Common # Lo [2] VEDIC SIGN JIHVAMULIYA..VEDIC SIGN UPADHMANIYA
1CF7 ; Common # Mc VEDIC SIGN ATIKRAMA
1CFA ; Common # Lo VEDIC SIGN DOUBLE ANUSVARA ANTARGOMUKHA
2000..200A ; Common # Zs [11] EN QUAD..HAIR SPACE
200B ; Common # Cf ZERO WIDTH SPACE
200E..200F ; Common # Cf [2] LEFT-TO-RIGHT MARK..RIGHT-TO-LEFT MARK
@ -308,8 +308,7 @@
2B47..2B4C ; Common # Sm [6] REVERSE TILDE OPERATOR ABOVE RIGHTWARDS ARROW..RIGHTWARDS ARROW ABOVE REVERSE TILDE OPERATOR
2B4D..2B73 ; Common # So [39] DOWNWARDS TRIANGLE-HEADED ZIGZAG ARROW..DOWNWARDS TRIANGLE-HEADED ARROW TO BAR
2B76..2B95 ; Common # So [32] NORTH WEST TRIANGLE-HEADED ARROW TO BAR..RIGHTWARDS BLACK ARROW
2B98..2BC8 ; Common # So [49] THREE-D TOP-LIGHTED LEFTWARDS EQUILATERAL ARROWHEAD..BLACK MEDIUM RIGHT-POINTING TRIANGLE CENTRED
2BCA..2BFE ; Common # So [53] TOP HALF BLACK CIRCLE..REVERSED RIGHT ANGLE
2B98..2BFF ; Common # So [104] THREE-D TOP-LIGHTED LEFTWARDS EQUILATERAL ARROWHEAD..HELLSCHREIBER PAUSE SYMBOL
2E00..2E01 ; Common # Po [2] RIGHT ANGLE SUBSTITUTION MARKER..RIGHT ANGLE DOTTED SUBSTITUTION MARKER
2E02 ; Common # Pi LEFT SUBSTITUTION BRACKET
2E03 ; Common # Pf RIGHT SUBSTITUTION BRACKET
@ -347,7 +346,7 @@
2E40 ; Common # Pd DOUBLE HYPHEN
2E41 ; Common # Po REVERSED COMMA
2E42 ; Common # Ps DOUBLE LOW-REVERSED-9 QUOTATION MARK
2E43..2E4E ; Common # Po [12] DASH WITH LEFT UPTURN..PUNCTUS ELEVATUS MARK
2E43..2E4F ; Common # Po [13] DASH WITH LEFT UPTURN..CORNISH VERSE DIVIDER
2FF0..2FFB ; Common # So [12] IDEOGRAPHIC DESCRIPTION CHARACTER LEFT TO RIGHT..IDEOGRAPHIC DESCRIPTION CHARACTER OVERLAID
3000 ; Common # Zs IDEOGRAPHIC SPACE
3001..3003 ; Common # Po [3] IDEOGRAPHIC COMMA..DITTO MARK
@ -400,6 +399,7 @@
328A..32B0 ; Common # So [39] CIRCLED IDEOGRAPH MOON..CIRCLED IDEOGRAPH NIGHT
32B1..32BF ; Common # No [15] CIRCLED NUMBER THIRTY SIX..CIRCLED NUMBER FIFTY
32C0..32CF ; Common # So [16] IDEOGRAPHIC TELEGRAPH SYMBOL FOR JANUARY..LIMITED LIABILITY SIGN
32FF ; Common # So SQUARE ERA NAME REIWA
3358..33FF ; Common # So [168] IDEOGRAPHIC TELEGRAPH SYMBOL FOR HOUR ZERO..SQUARE GAL
4DC0..4DFF ; Common # So [64] HEXAGRAM FOR THE CREATIVE HEAVEN..HEXAGRAM FOR BEFORE COMPLETION
A700..A716 ; Common # Sk [23] MODIFIER LETTER CHINESE TONE YIN PING..MODIFIER LETTER EXTRA-LOW LEFT-STEM TONE BAR
@ -509,6 +509,8 @@ FFFC..FFFD ; Common # So [2] OBJECT REPLACEMENT CHARACTER..REPLACEMENT CHAR
10190..1019B ; Common # So [12] ROMAN SEXTANS SIGN..ROMAN CENTURIAL SIGN
101D0..101FC ; Common # So [45] PHAISTOS DISC SIGN PEDESTRIAN..PHAISTOS DISC SIGN WAVY BAND
102E1..102FB ; Common # No [27] COPTIC EPACT DIGIT ONE..COPTIC EPACT NUMBER NINE HUNDRED
16FE2 ; Common # Po OLD CHINESE HOOK MARK
16FE3 ; Common # Lm OLD CHINESE ITERATION MARK
1BCA0..1BCA3 ; Common # Cf [4] SHORTHAND FORMAT LETTER OVERLAP..SHORTHAND FORMAT UP STEP
1D000..1D0F5 ; Common # So [246] BYZANTINE MUSICAL SYMBOL PSILI..BYZANTINE MUSICAL SYMBOL GORGON NEO KATO
1D100..1D126 ; Common # So [39] MUSICAL SYMBOL SINGLE BARLINE..MUSICAL SYMBOL DRUM CLEF-2
@ -569,6 +571,9 @@ FFFC..FFFD ; Common # So [2] OBJECT REPLACEMENT CHARACTER..REPLACEMENT CHAR
1ECAD..1ECAF ; Common # No [3] INDIC SIYAQ FRACTION ONE QUARTER..INDIC SIYAQ FRACTION THREE QUARTERS
1ECB0 ; Common # Sc INDIC SIYAQ RUPEE MARK
1ECB1..1ECB4 ; Common # No [4] INDIC SIYAQ NUMBER ALTERNATE ONE..INDIC SIYAQ ALTERNATE LAKH MARK
1ED01..1ED2D ; Common # No [45] OTTOMAN SIYAQ NUMBER ONE..OTTOMAN SIYAQ NUMBER NINETY THOUSAND
1ED2E ; Common # So OTTOMAN SIYAQ MARRATAN
1ED2F..1ED3D ; Common # No [15] OTTOMAN SIYAQ ALTERNATE NUMBER TWO..OTTOMAN SIYAQ FRACTION ONE SIXTH
1F000..1F02B ; Common # So [44] MAHJONG TILE EAST WIND..MAHJONG TILE BACK
1F030..1F093 ; Common # So [100] DOMINO TILE HORIZONTAL BACK..DOMINO TILE VERTICAL-06-06
1F0A0..1F0AE ; Common # So [15] PLAYING CARD BACK..PLAYING CARD KING OF SPADES
@ -576,7 +581,7 @@ FFFC..FFFD ; Common # So [2] OBJECT REPLACEMENT CHARACTER..REPLACEMENT CHAR
1F0C1..1F0CF ; Common # So [15] PLAYING CARD ACE OF DIAMONDS..PLAYING CARD BLACK JOKER
1F0D1..1F0F5 ; Common # So [37] PLAYING CARD ACE OF CLUBS..PLAYING CARD TRUMP-21
1F100..1F10C ; Common # No [13] DIGIT ZERO FULL STOP..DINGBAT NEGATIVE CIRCLED SANS-SERIF DIGIT ZERO
1F110..1F16B ; Common # So [92] PARENTHESIZED LATIN CAPITAL LETTER A..RAISED MD SIGN
1F110..1F16C ; Common # So [93] PARENTHESIZED LATIN CAPITAL LETTER A..RAISED MR SIGN
1F170..1F1AC ; Common # So [61] NEGATIVE SQUARED LATIN CAPITAL LETTER A..SQUARED VOD
1F1E6..1F1FF ; Common # So [26] REGIONAL INDICATOR SYMBOL LETTER A..REGIONAL INDICATOR SYMBOL LETTER Z
1F201..1F202 ; Common # So [2] SQUARED KATAKANA KOKO..SQUARED KATAKANA SA
@ -586,30 +591,33 @@ FFFC..FFFD ; Common # So [2] OBJECT REPLACEMENT CHARACTER..REPLACEMENT CHAR
1F260..1F265 ; Common # So [6] ROUNDED SYMBOL FOR FU..ROUNDED SYMBOL FOR CAI
1F300..1F3FA ; Common # So [251] CYCLONE..AMPHORA
1F3FB..1F3FF ; Common # Sk [5] EMOJI MODIFIER FITZPATRICK TYPE-1-2..EMOJI MODIFIER FITZPATRICK TYPE-6
1F400..1F6D4 ; Common # So [725] RAT..PAGODA
1F400..1F6D5 ; Common # So [726] RAT..HINDU TEMPLE
1F6E0..1F6EC ; Common # So [13] HAMMER AND WRENCH..AIRPLANE ARRIVING
1F6F0..1F6F9 ; Common # So [10] SATELLITE..SKATEBOARD
1F6F0..1F6FA ; Common # So [11] SATELLITE..AUTO RICKSHAW
1F700..1F773 ; Common # So [116] ALCHEMICAL SYMBOL FOR QUINTESSENCE..ALCHEMICAL SYMBOL FOR HALF OUNCE
1F780..1F7D8 ; Common # So [89] BLACK LEFT-POINTING ISOSCELES RIGHT TRIANGLE..NEGATIVE CIRCLED SQUARE
1F7E0..1F7EB ; Common # So [12] LARGE ORANGE CIRCLE..LARGE BROWN SQUARE
1F800..1F80B ; Common # So [12] LEFTWARDS ARROW WITH SMALL TRIANGLE ARROWHEAD..DOWNWARDS ARROW WITH LARGE TRIANGLE ARROWHEAD
1F810..1F847 ; Common # So [56] LEFTWARDS ARROW WITH SMALL EQUILATERAL ARROWHEAD..DOWNWARDS HEAVY ARROW
1F850..1F859 ; Common # So [10] LEFTWARDS SANS-SERIF ARROW..UP DOWN SANS-SERIF ARROW
1F860..1F887 ; Common # So [40] WIDE-HEADED LEFTWARDS LIGHT BARB ARROW..WIDE-HEADED SOUTH WEST VERY HEAVY BARB ARROW
1F890..1F8AD ; Common # So [30] LEFTWARDS TRIANGLE ARROWHEAD..WHITE ARROW SHAFT WIDTH TWO THIRDS
1F900..1F90B ; Common # So [12] CIRCLED CROSS FORMEE WITH FOUR DOTS..DOWNWARD FACING NOTCHED HOOK WITH DOT
1F910..1F93E ; Common # So [47] ZIPPER-MOUTH FACE..HANDBALL
1F940..1F970 ; Common # So [49] WILTED FLOWER..SMILING FACE WITH SMILING EYES AND THREE HEARTS
1F90D..1F971 ; Common # So [101] WHITE HEART..YAWNING FACE
1F973..1F976 ; Common # So [4] FACE WITH PARTY HORN AND PARTY HAT..FREEZING FACE
1F97A ; Common # So FACE WITH PLEADING EYES
1F97C..1F9A2 ; Common # So [39] LAB COAT..SWAN
1F9B0..1F9B9 ; Common # So [10] EMOJI COMPONENT RED HAIR..SUPERVILLAIN
1F9C0..1F9C2 ; Common # So [3] CHEESE WEDGE..SALT SHAKER
1F9D0..1F9FF ; Common # So [48] FACE WITH MONOCLE..NAZAR AMULET
1F97A..1F9A2 ; Common # So [41] FACE WITH PLEADING EYES..SWAN
1F9A5..1F9AA ; Common # So [6] SLOTH..OYSTER
1F9AE..1F9CA ; Common # So [29] GUIDE DOG..ICE CUBE
1F9CD..1FA53 ; Common # So [135] STANDING PERSON..BLACK CHESS KNIGHT-BISHOP
1FA60..1FA6D ; Common # So [14] XIANGQI RED GENERAL..XIANGQI BLACK SOLDIER
1FA70..1FA73 ; Common # So [4] BALLET SHOES..SHORTS
1FA78..1FA7A ; Common # So [3] DROP OF BLOOD..STETHOSCOPE
1FA80..1FA82 ; Common # So [3] YO-YO..PARACHUTE
1FA90..1FA95 ; Common # So [6] RINGED PLANET..BANJO
E0001 ; Common # Cf LANGUAGE TAG
E0020..E007F ; Common # Cf [96] TAG SPACE..CANCEL TAG
# Total code points: 7591
# Total code points: 7805
# ================================================
@ -652,7 +660,8 @@ A770 ; Latin # Lm MODIFIER LETTER US
A771..A787 ; Latin # L& [23] LATIN SMALL LETTER DUM..LATIN SMALL LETTER INSULAR T
A78B..A78E ; Latin # L& [4] LATIN CAPITAL LETTER SALTILLO..LATIN SMALL LETTER L WITH RETROFLEX HOOK AND BELT
A78F ; Latin # Lo LATIN LETTER SINOLOGICAL DOT
A790..A7B9 ; Latin # L& [42] LATIN CAPITAL LETTER N WITH DESCENDER..LATIN SMALL LETTER U WITH STROKE
A790..A7BF ; Latin # L& [48] LATIN CAPITAL LETTER N WITH DESCENDER..LATIN SMALL LETTER GLOTTAL U
A7C2..A7C6 ; Latin # L& [5] LATIN CAPITAL LETTER ANGLICANA W..LATIN CAPITAL LETTER Z WITH PALATAL HOOK
A7F7 ; Latin # Lo LATIN EPIGRAPHIC LETTER SIDEWAYS I
A7F8..A7F9 ; Latin # Lm [2] MODIFIER LETTER CAPITAL H WITH STROKE..MODIFIER LETTER SMALL LIGATURE OE
A7FA ; Latin # L& LATIN LETTER SMALL CAPITAL TURNED M
@ -660,11 +669,12 @@ A7FB..A7FF ; Latin # Lo [5] LATIN EPIGRAPHIC LETTER REVERSED F..LATIN EPIGR
AB30..AB5A ; Latin # L& [43] LATIN SMALL LETTER BARRED ALPHA..LATIN SMALL LETTER Y WITH SHORT RIGHT LEG
AB5C..AB5F ; Latin # Lm [4] MODIFIER LETTER SMALL HENG..MODIFIER LETTER SMALL U WITH LEFT HOOK
AB60..AB64 ; Latin # L& [5] LATIN SMALL LETTER SAKHA YAT..LATIN SMALL LETTER INVERTED ALPHA
AB66..AB67 ; Latin # L& [2] LATIN SMALL LETTER DZ DIGRAPH WITH RETROFLEX HOOK..LATIN SMALL LETTER TS DIGRAPH WITH RETROFLEX HOOK
FB00..FB06 ; Latin # L& [7] LATIN SMALL LIGATURE FF..LATIN SMALL LIGATURE ST
FF21..FF3A ; Latin # L& [26] FULLWIDTH LATIN CAPITAL LETTER A..FULLWIDTH LATIN CAPITAL LETTER Z
FF41..FF5A ; Latin # L& [26] FULLWIDTH LATIN SMALL LETTER A..FULLWIDTH LATIN SMALL LETTER Z
# Total code points: 1353
# Total code points: 1366
# ================================================
@ -914,7 +924,7 @@ FE76..FEFC ; Arabic # Lo [135] ARABIC FATHA ISOLATED FORM..ARABIC LIGATURE LA
094D ; Devanagari # Mn DEVANAGARI SIGN VIRAMA
094E..094F ; Devanagari # Mc [2] DEVANAGARI VOWEL SIGN PRISHTHAMATRA E..DEVANAGARI VOWEL SIGN AW
0950 ; Devanagari # Lo DEVANAGARI OM
0953..0957 ; Devanagari # Mn [5] DEVANAGARI GRAVE ACCENT..DEVANAGARI VOWEL SIGN UUE
0955..0957 ; Devanagari # Mn [3] DEVANAGARI VOWEL SIGN CANDRA LONG E..DEVANAGARI VOWEL SIGN UUE
0958..0961 ; Devanagari # Lo [10] DEVANAGARI LETTER QA..DEVANAGARI LETTER VOCALIC LL
0962..0963 ; Devanagari # Mn [2] DEVANAGARI VOWEL SIGN VOCALIC L..DEVANAGARI VOWEL SIGN VOCALIC LL
0966..096F ; Devanagari # Nd [10] DEVANAGARI DIGIT ZERO..DEVANAGARI DIGIT NINE
@ -929,7 +939,7 @@ A8FC ; Devanagari # Po DEVANAGARI SIGN SIDDHAM
A8FD..A8FE ; Devanagari # Lo [2] DEVANAGARI JAIN OM..DEVANAGARI LETTER AY
A8FF ; Devanagari # Mn DEVANAGARI VOWEL SIGN AY
# Total code points: 156
# Total code points: 154
# ================================================
@ -1079,8 +1089,13 @@ A8FF ; Devanagari # Mn DEVANAGARI VOWEL SIGN AY
0BF3..0BF8 ; Tamil # So [6] TAMIL DAY SIGN..TAMIL AS ABOVE SIGN
0BF9 ; Tamil # Sc TAMIL RUPEE SIGN
0BFA ; Tamil # So TAMIL NUMBER SIGN
11FC0..11FD4 ; Tamil # No [21] TAMIL FRACTION ONE THREE-HUNDRED-AND-TWENTIETH..TAMIL FRACTION DOWNSCALING FACTOR KIIZH
11FD5..11FDC ; Tamil # So [8] TAMIL SIGN NEL..TAMIL SIGN MUKKURUNI
11FDD..11FE0 ; Tamil # Sc [4] TAMIL SIGN KAACU..TAMIL SIGN VARAAKAN
11FE1..11FF1 ; Tamil # So [17] TAMIL SIGN PAARAM..TAMIL SIGN VAKAIYARAA
11FFF ; Tamil # Po TAMIL PUNCTUATION END OF TEXT
# Total code points: 72
# Total code points: 123
# ================================================
@ -1101,10 +1116,11 @@ A8FF ; Devanagari # Mn DEVANAGARI VOWEL SIGN AY
0C60..0C61 ; Telugu # Lo [2] TELUGU LETTER VOCALIC RR..TELUGU LETTER VOCALIC LL
0C62..0C63 ; Telugu # Mn [2] TELUGU VOWEL SIGN VOCALIC L..TELUGU VOWEL SIGN VOCALIC LL
0C66..0C6F ; Telugu # Nd [10] TELUGU DIGIT ZERO..TELUGU DIGIT NINE
0C77 ; Telugu # Po TELUGU SIGN SIDDHAM
0C78..0C7E ; Telugu # No [7] TELUGU FRACTION DIGIT ZERO FOR ODD POWERS OF FOUR..TELUGU FRACTION DIGIT THREE FOR EVEN POWERS OF FOUR
0C7F ; Telugu # So TELUGU SIGN TUUMU
# Total code points: 97
# Total code points: 98
# ================================================
@ -1202,20 +1218,13 @@ A8FF ; Devanagari # Mn DEVANAGARI VOWEL SIGN AY
0E81..0E82 ; Lao # Lo [2] LAO LETTER KO..LAO LETTER KHO SUNG
0E84 ; Lao # Lo LAO LETTER KHO TAM
0E87..0E88 ; Lao # Lo [2] LAO LETTER NGO..LAO LETTER CO
0E8A ; Lao # Lo LAO LETTER SO TAM
0E8D ; Lao # Lo LAO LETTER NYO
0E94..0E97 ; Lao # Lo [4] LAO LETTER DO..LAO LETTER THO TAM
0E99..0E9F ; Lao # Lo [7] LAO LETTER NO..LAO LETTER FO SUNG
0EA1..0EA3 ; Lao # Lo [3] LAO LETTER MO..LAO LETTER LO LING
0E86..0E8A ; Lao # Lo [5] LAO LETTER PALI GHA..LAO LETTER SO TAM
0E8C..0EA3 ; Lao # Lo [24] LAO LETTER PALI JHA..LAO LETTER LO LING
0EA5 ; Lao # Lo LAO LETTER LO LOOT
0EA7 ; Lao # Lo LAO LETTER WO
0EAA..0EAB ; Lao # Lo [2] LAO LETTER SO SUNG..LAO LETTER HO SUNG
0EAD..0EB0 ; Lao # Lo [4] LAO LETTER O..LAO VOWEL SIGN A
0EA7..0EB0 ; Lao # Lo [10] LAO LETTER WO..LAO VOWEL SIGN A
0EB1 ; Lao # Mn LAO VOWEL SIGN MAI KAN
0EB2..0EB3 ; Lao # Lo [2] LAO VOWEL SIGN AA..LAO VOWEL SIGN AM
0EB4..0EB9 ; Lao # Mn [6] LAO VOWEL SIGN I..LAO VOWEL SIGN UU
0EBB..0EBC ; Lao # Mn [2] LAO VOWEL SIGN MAI KON..LAO SEMIVOWEL SIGN LO
0EB4..0EBC ; Lao # Mn [9] LAO VOWEL SIGN I..LAO SEMIVOWEL SIGN LO
0EBD ; Lao # Lo LAO SEMIVOWEL SIGN NYO
0EC0..0EC4 ; Lao # Lo [5] LAO VOWEL SIGN E..LAO VOWEL SIGN AI
0EC6 ; Lao # Lm LAO KO LA
@ -1223,7 +1232,7 @@ A8FF ; Devanagari # Mn DEVANAGARI VOWEL SIGN AY
0ED0..0ED9 ; Lao # Nd [10] LAO DIGIT ZERO..LAO DIGIT NINE
0EDC..0EDF ; Lao # Lo [4] LAO HO NO..LAO LETTER KHMU NYO
# Total code points: 67
# Total code points: 82
# ================================================
@ -1409,7 +1418,8 @@ AB70..ABBF ; Cherokee # L& [80] CHEROKEE SMALL LETTER A..CHEROKEE SMALL LETT
1400 ; Canadian_Aboriginal # Pd CANADIAN SYLLABICS HYPHEN
1401..166C ; Canadian_Aboriginal # Lo [620] CANADIAN SYLLABICS E..CANADIAN SYLLABICS CARRIER TTSA
166D..166E ; Canadian_Aboriginal # Po [2] CANADIAN SYLLABICS CHI SIGN..CANADIAN SYLLABICS FULL STOP
166D ; Canadian_Aboriginal # So CANADIAN SYLLABICS CHI SIGN
166E ; Canadian_Aboriginal # Po CANADIAN SYLLABICS FULL STOP
166F..167F ; Canadian_Aboriginal # Lo [17] CANADIAN SYLLABICS QAI..CANADIAN SYLLABICS BLACKFOOT W
18B0..18F5 ; Canadian_Aboriginal # Lo [70] CANADIAN SYLLABICS OY..CANADIAN SYLLABICS CARRIER DENTAL S
@ -1481,9 +1491,10 @@ AB70..ABBF ; Cherokee # L& [80] CHEROKEE SMALL LETTER A..CHEROKEE SMALL LETT
309D..309E ; Hiragana # Lm [2] HIRAGANA ITERATION MARK..HIRAGANA VOICED ITERATION MARK
309F ; Hiragana # Lo HIRAGANA DIGRAPH YORI
1B001..1B11E ; Hiragana # Lo [286] HIRAGANA LETTER ARCHAIC YE..HENTAIGANA LETTER N-MU-MO-2
1B150..1B152 ; Hiragana # Lo [3] HIRAGANA LETTER SMALL WI..HIRAGANA LETTER SMALL WO
1F200 ; Hiragana # So SQUARE HIRAGANA HOKA
# Total code points: 376
# Total code points: 379
# ================================================
@ -1496,8 +1507,9 @@ AB70..ABBF ; Cherokee # L& [80] CHEROKEE SMALL LETTER A..CHEROKEE SMALL LETT
FF66..FF6F ; Katakana # Lo [10] HALFWIDTH KATAKANA LETTER WO..HALFWIDTH KATAKANA LETTER SMALL TU
FF71..FF9D ; Katakana # Lo [45] HALFWIDTH KATAKANA LETTER A..HALFWIDTH KATAKANA LETTER N
1B000 ; Katakana # Lo KATAKANA LETTER ARCHAIC E
1B164..1B167 ; Katakana # Lo [4] KATAKANA LETTER SMALL WI..KATAKANA LETTER SMALL N
# Total code points: 300
# Total code points: 304
# ================================================
@ -1568,7 +1580,7 @@ A490..A4C6 ; Yi # So [55] YI RADICAL QOT..YI RADICAL KE
0485..0486 ; Inherited # Mn [2] COMBINING CYRILLIC DASIA PNEUMATA..COMBINING CYRILLIC PSILI PNEUMATA
064B..0655 ; Inherited # Mn [11] ARABIC FATHATAN..ARABIC HAMZA BELOW
0670 ; Inherited # Mn ARABIC LETTER SUPERSCRIPT ALEF
0951..0952 ; Inherited # Mn [2] DEVANAGARI STRESS SIGN UDATTA..DEVANAGARI STRESS SIGN ANUDATTA
0951..0954 ; Inherited # Mn [4] DEVANAGARI STRESS SIGN UDATTA..DEVANAGARI ACUTE ACCENT
1AB0..1ABD ; Inherited # Mn [14] COMBINING DOUBLED CIRCUMFLEX ACCENT..COMBINING PARENTHESES BELOW
1ABE ; Inherited # Me COMBINING PARENTHESES OVERLAY
1CD0..1CD2 ; Inherited # Mn [3] VEDIC TONE KARSHANA..VEDIC TONE PRENKHA
@ -1598,7 +1610,7 @@ FE20..FE2D ; Inherited # Mn [14] COMBINING LIGATURE LEFT HALF..COMBINING CON
1D1AA..1D1AD ; Inherited # Mn [4] MUSICAL SYMBOL COMBINING DOWN BOW..MUSICAL SYMBOL COMBINING SNAP PIZZICATO
E0100..E01EF ; Inherited # Mn [240] VARIATION SELECTOR-17..VARIATION SELECTOR-256
# Total code points: 569
# Total code points: 571
# ================================================
@ -2028,8 +2040,9 @@ AADE..AADF ; Tai_Viet # Po [2] TAI VIET SYMBOL HO HOI..TAI VIET SYMBOL KOI
# ================================================
13000..1342E ; Egyptian_Hieroglyphs # Lo [1071] EGYPTIAN HIEROGLYPH A001..EGYPTIAN HIEROGLYPH AA032
13430..13438 ; Egyptian_Hieroglyphs # Cf [9] EGYPTIAN HIEROGLYPH VERTICAL JOINER..EGYPTIAN HIEROGLYPH END SEGMENT
# Total code points: 1071
# Total code points: 1080
# ================================================
@ -2072,8 +2085,8 @@ A9B3 ; Javanese # Mn JAVANESE SIGN CECAK TELU
A9B4..A9B5 ; Javanese # Mc [2] JAVANESE VOWEL SIGN TARUNG..JAVANESE VOWEL SIGN TOLONG
A9B6..A9B9 ; Javanese # Mn [4] JAVANESE VOWEL SIGN WULU..JAVANESE VOWEL SIGN SUKU MENDUT
A9BA..A9BB ; Javanese # Mc [2] JAVANESE VOWEL SIGN TALING..JAVANESE VOWEL SIGN DIRGA MURE
A9BC ; Javanese # Mn JAVANESE VOWEL SIGN PEPET
A9BD..A9C0 ; Javanese # Mc [4] JAVANESE CONSONANT SIGN KERET..JAVANESE PANGKON
A9BC..A9BD ; Javanese # Mn [2] JAVANESE VOWEL SIGN PEPET..JAVANESE CONSONANT SIGN KERET
A9BE..A9C0 ; Javanese # Mc [3] JAVANESE CONSONANT SIGN PENGKAL..JAVANESE PANGKON
A9C1..A9CD ; Javanese # Po [13] JAVANESE LEFT RERENGGAN..JAVANESE TURNED PADA PISELEH
A9D0..A9D9 ; Javanese # Nd [10] JAVANESE DIGIT ZERO..JAVANESE DIGIT NINE
A9DE..A9DF ; Javanese # Po [2] JAVANESE PADA TIRTA TUMETES..JAVANESE PADA ISEN-ISEN
@ -2225,13 +2238,14 @@ ABF0..ABF9 ; Meetei_Mayek # Nd [10] MEETEI MAYEK DIGIT ZERO..MEETEI MAYEK DI
# ================================================
16F00..16F44 ; Miao # Lo [69] MIAO LETTER PA..MIAO LETTER HHA
16F00..16F4A ; Miao # Lo [75] MIAO LETTER PA..MIAO LETTER RTE
16F4F ; Miao # Mn MIAO SIGN CONSONANT MODIFIER BAR
16F50 ; Miao # Lo MIAO LETTER NASALIZATION
16F51..16F7E ; Miao # Mc [46] MIAO SIGN ASPIRATION..MIAO VOWEL SIGN NG
16F51..16F87 ; Miao # Mc [55] MIAO SIGN ASPIRATION..MIAO VOWEL SIGN UI
16F8F..16F92 ; Miao # Mn [4] MIAO TONE RIGHT..MIAO TONE BELOW
16F93..16F9F ; Miao # Lm [13] MIAO LETTER TONE-2..MIAO LETTER REFORMED TONE-8
# Total code points: 133
# Total code points: 149
# ================================================
@ -2270,9 +2284,10 @@ ABF0..ABF9 ; Meetei_Mayek # Nd [10] MEETEI MAYEK DIGIT ZERO..MEETEI MAYEK DI
116B0..116B5 ; Takri # Mn [6] TAKRI VOWEL SIGN U..TAKRI VOWEL SIGN AU
116B6 ; Takri # Mc TAKRI SIGN VIRAMA
116B7 ; Takri # Mn TAKRI SIGN NUKTA
116B8 ; Takri # Lo TAKRI LETTER ARCHAIC KHA
116C0..116C9 ; Takri # Nd [10] TAKRI DIGIT ZERO..TAKRI DIGIT NINE
# Total code points: 66
# Total code points: 67
# ================================================
@ -2587,10 +2602,11 @@ ABF0..ABF9 ; Meetei_Mayek # Nd [10] MEETEI MAYEK DIGIT ZERO..MEETEI MAYEK DI
1E900..1E943 ; Adlam # L& [68] ADLAM CAPITAL LETTER ALIF..ADLAM SMALL LETTER SHA
1E944..1E94A ; Adlam # Mn [7] ADLAM ALIF LENGTHENER..ADLAM NUKTA
1E94B ; Adlam # Lm ADLAM NASALIZATION MARK
1E950..1E959 ; Adlam # Nd [10] ADLAM DIGIT ZERO..ADLAM DIGIT NINE
1E95E..1E95F ; Adlam # Po [2] ADLAM INITIAL EXCLAMATION MARK..ADLAM INITIAL QUESTION MARK
# Total code points: 87
# Total code points: 88
# ================================================
@ -2637,8 +2653,9 @@ ABF0..ABF9 ; Meetei_Mayek # Nd [10] MEETEI MAYEK DIGIT ZERO..MEETEI MAYEK DI
1145B ; Newa # Po NEWA PLACEHOLDER MARK
1145D ; Newa # Po NEWA INSERTION SIGN
1145E ; Newa # Mn NEWA SANDHI MARK
1145F ; Newa # Lo NEWA LETTER VEDIC ANUSVARA
# Total code points: 93
# Total code points: 94
# ================================================
@ -2650,10 +2667,10 @@ ABF0..ABF9 ; Meetei_Mayek # Nd [10] MEETEI MAYEK DIGIT ZERO..MEETEI MAYEK DI
# ================================================
16FE0 ; Tangut # Lm TANGUT ITERATION MARK
17000..187F1 ; Tangut # Lo [6130] TANGUT IDEOGRAPH-17000..TANGUT IDEOGRAPH-187F1
17000..187F7 ; Tangut # Lo [6136] TANGUT IDEOGRAPH-17000..TANGUT IDEOGRAPH-187F7
18800..18AF2 ; Tangut # Lo [755] TANGUT COMPONENT-001..TANGUT COMPONENT-755
# Total code points: 6886
# Total code points: 6892
# ================================================
@ -2683,8 +2700,7 @@ ABF0..ABF9 ; Meetei_Mayek # Nd [10] MEETEI MAYEK DIGIT ZERO..MEETEI MAYEK DI
11A51..11A56 ; Soyombo # Mn [6] SOYOMBO VOWEL SIGN I..SOYOMBO VOWEL SIGN OE
11A57..11A58 ; Soyombo # Mc [2] SOYOMBO VOWEL SIGN AI..SOYOMBO VOWEL SIGN AU
11A59..11A5B ; Soyombo # Mn [3] SOYOMBO VOWEL SIGN VOCALIC R..SOYOMBO VOWEL LENGTH MARK
11A5C..11A83 ; Soyombo # Lo [40] SOYOMBO LETTER KA..SOYOMBO LETTER KSSA
11A86..11A89 ; Soyombo # Lo [4] SOYOMBO CLUSTER-INITIAL LETTER RA..SOYOMBO CLUSTER-INITIAL LETTER SA
11A5C..11A89 ; Soyombo # Lo [46] SOYOMBO LETTER KA..SOYOMBO CLUSTER-INITIAL LETTER SA
11A8A..11A96 ; Soyombo # Mn [13] SOYOMBO FINAL CONSONANT SIGN G..SOYOMBO SIGN ANUSVARA
11A97 ; Soyombo # Mc SOYOMBO SIGN VISARGA
11A98..11A99 ; Soyombo # Mn [2] SOYOMBO GEMINATION MARK..SOYOMBO SUBJOINER
@ -2692,7 +2708,7 @@ ABF0..ABF9 ; Meetei_Mayek # Nd [10] MEETEI MAYEK DIGIT ZERO..MEETEI MAYEK DI
11A9D ; Soyombo # Lo SOYOMBO MARK PLUTA
11A9E..11AA2 ; Soyombo # Po [5] SOYOMBO HEAD MARK WITH MOON AND SUN AND TRIPLE FLAME..SOYOMBO TERMINAL MARK-2
# Total code points: 81
# Total code points: 83
# ================================================
@ -2777,4 +2793,46 @@ ABF0..ABF9 ; Meetei_Mayek # Nd [10] MEETEI MAYEK DIGIT ZERO..MEETEI MAYEK DI
# Total code points: 40
# ================================================
10FE0..10FF6 ; Elymaic # Lo [23] ELYMAIC LETTER ALEPH..ELYMAIC LIGATURE ZAYIN-YODH
# Total code points: 23
# ================================================
119A0..119A7 ; Nandinagari # Lo [8] NANDINAGARI LETTER A..NANDINAGARI LETTER VOCALIC RR
119AA..119D0 ; Nandinagari # Lo [39] NANDINAGARI LETTER E..NANDINAGARI LETTER RRA
119D1..119D3 ; Nandinagari # Mc [3] NANDINAGARI VOWEL SIGN AA..NANDINAGARI VOWEL SIGN II
119D4..119D7 ; Nandinagari # Mn [4] NANDINAGARI VOWEL SIGN U..NANDINAGARI VOWEL SIGN VOCALIC RR
119DA..119DB ; Nandinagari # Mn [2] NANDINAGARI VOWEL SIGN E..NANDINAGARI VOWEL SIGN AI
119DC..119DF ; Nandinagari # Mc [4] NANDINAGARI VOWEL SIGN O..NANDINAGARI SIGN VISARGA
119E0 ; Nandinagari # Mn NANDINAGARI SIGN VIRAMA
119E1 ; Nandinagari # Lo NANDINAGARI SIGN AVAGRAHA
119E2 ; Nandinagari # Po NANDINAGARI SIGN SIDDHAM
119E3 ; Nandinagari # Lo NANDINAGARI HEADSTROKE
119E4 ; Nandinagari # Mc NANDINAGARI VOWEL SIGN PRISHTHAMATRA E
# Total code points: 65
# ================================================
1E100..1E12C ; Nyiakeng_Puachue_Hmong # Lo [45] NYIAKENG PUACHUE HMONG LETTER MA..NYIAKENG PUACHUE HMONG LETTER W
1E130..1E136 ; Nyiakeng_Puachue_Hmong # Mn [7] NYIAKENG PUACHUE HMONG TONE-B..NYIAKENG PUACHUE HMONG TONE-D
1E137..1E13D ; Nyiakeng_Puachue_Hmong # Lm [7] NYIAKENG PUACHUE HMONG SIGN FOR PERSON..NYIAKENG PUACHUE HMONG SYLLABLE LENGTHENER
1E140..1E149 ; Nyiakeng_Puachue_Hmong # Nd [10] NYIAKENG PUACHUE HMONG DIGIT ZERO..NYIAKENG PUACHUE HMONG DIGIT NINE
1E14E ; Nyiakeng_Puachue_Hmong # Lo NYIAKENG PUACHUE HMONG LOGOGRAM NYAJ
1E14F ; Nyiakeng_Puachue_Hmong # So NYIAKENG PUACHUE HMONG CIRCLED CA
# Total code points: 71
# ================================================
1E2C0..1E2EB ; Wancho # Lo [44] WANCHO LETTER AA..WANCHO LETTER YIH
1E2EC..1E2EF ; Wancho # Mn [4] WANCHO TONE TUP..WANCHO TONE KOINI
1E2F0..1E2F9 ; Wancho # Nd [10] WANCHO DIGIT ZERO..WANCHO DIGIT NINE
1E2FF ; Wancho # Sc WANCHO NGUN SIGN
# Total code points: 59
# EOF

View File

@ -1,6 +1,6 @@
# SpecialCasing-11.0.0.txt
# Date: 2018-02-22, 06:16:47 GMT
# Copyright (c) 2018 Unicode, Inc.
# SpecialCasing-12.1.0.txt
# Date: 2019-03-10, 10:53:28 GMT
# Copyright (c) 2019 Unicode, Inc.
# Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries.
# For terms of use, see http://www.unicode.org/terms_of_use.html
#

View File

@ -640,7 +640,7 @@
027F;LATIN SMALL LETTER REVERSED R WITH FISHHOOK;Ll;0;L;;;;;N;LATIN SMALL LETTER REVERSED FISHHOOK R;;;;
0280;LATIN LETTER SMALL CAPITAL R;Ll;0;L;;;;;N;;;01A6;;01A6
0281;LATIN LETTER SMALL CAPITAL INVERTED R;Ll;0;L;;;;;N;;;;;
0282;LATIN SMALL LETTER S WITH HOOK;Ll;0;L;;;;;N;LATIN SMALL LETTER S HOOK;;;;
0282;LATIN SMALL LETTER S WITH HOOK;Ll;0;L;;;;;N;LATIN SMALL LETTER S HOOK;;A7C5;;A7C5
0283;LATIN SMALL LETTER ESH;Ll;0;L;;;;;N;;;01A9;;01A9
0284;LATIN SMALL LETTER DOTLESS J WITH STROKE AND HOOK;Ll;0;L;;;;;N;LATIN SMALL LETTER DOTLESS J BAR HOOK;;;;
0285;LATIN SMALL LETTER SQUAT REVERSED ESH;Ll;0;L;;;;;N;;;;;
@ -2809,6 +2809,7 @@
0C6D;TELUGU DIGIT SEVEN;Nd;0;L;;7;7;7;N;;;;;
0C6E;TELUGU DIGIT EIGHT;Nd;0;L;;8;8;8;N;;;;;
0C6F;TELUGU DIGIT NINE;Nd;0;L;;9;9;9;N;;;;;
0C77;TELUGU SIGN SIDDHAM;Po;0;L;;;;;N;;;;;
0C78;TELUGU FRACTION DIGIT ZERO FOR ODD POWERS OF FOUR;No;0;ON;;;;0;N;;;;;
0C79;TELUGU FRACTION DIGIT ONE FOR ODD POWERS OF FOUR;No;0;ON;;;;1;N;;;;;
0C7A;TELUGU FRACTION DIGIT TWO FOR ODD POWERS OF FOUR;No;0;ON;;;;2;N;;;;;
@ -3203,14 +3204,24 @@
0E81;LAO LETTER KO;Lo;0;L;;;;;N;;;;;
0E82;LAO LETTER KHO SUNG;Lo;0;L;;;;;N;;;;;
0E84;LAO LETTER KHO TAM;Lo;0;L;;;;;N;;;;;
0E86;LAO LETTER PALI GHA;Lo;0;L;;;;;N;;;;;
0E87;LAO LETTER NGO;Lo;0;L;;;;;N;;;;;
0E88;LAO LETTER CO;Lo;0;L;;;;;N;;;;;
0E89;LAO LETTER PALI CHA;Lo;0;L;;;;;N;;;;;
0E8A;LAO LETTER SO TAM;Lo;0;L;;;;;N;;;;;
0E8C;LAO LETTER PALI JHA;Lo;0;L;;;;;N;;;;;
0E8D;LAO LETTER NYO;Lo;0;L;;;;;N;;;;;
0E8E;LAO LETTER PALI NYA;Lo;0;L;;;;;N;;;;;
0E8F;LAO LETTER PALI TTA;Lo;0;L;;;;;N;;;;;
0E90;LAO LETTER PALI TTHA;Lo;0;L;;;;;N;;;;;
0E91;LAO LETTER PALI DDA;Lo;0;L;;;;;N;;;;;
0E92;LAO LETTER PALI DDHA;Lo;0;L;;;;;N;;;;;
0E93;LAO LETTER PALI NNA;Lo;0;L;;;;;N;;;;;
0E94;LAO LETTER DO;Lo;0;L;;;;;N;;;;;
0E95;LAO LETTER TO;Lo;0;L;;;;;N;;;;;
0E96;LAO LETTER THO SUNG;Lo;0;L;;;;;N;;;;;
0E97;LAO LETTER THO TAM;Lo;0;L;;;;;N;;;;;
0E98;LAO LETTER PALI DHA;Lo;0;L;;;;;N;;;;;
0E99;LAO LETTER NO;Lo;0;L;;;;;N;;;;;
0E9A;LAO LETTER BO;Lo;0;L;;;;;N;;;;;
0E9B;LAO LETTER PO;Lo;0;L;;;;;N;;;;;
@ -3218,13 +3229,17 @@
0E9D;LAO LETTER FO TAM;Lo;0;L;;;;;N;;;;;
0E9E;LAO LETTER PHO TAM;Lo;0;L;;;;;N;;;;;
0E9F;LAO LETTER FO SUNG;Lo;0;L;;;;;N;;;;;
0EA0;LAO LETTER PALI BHA;Lo;0;L;;;;;N;;;;;
0EA1;LAO LETTER MO;Lo;0;L;;;;;N;;;;;
0EA2;LAO LETTER YO;Lo;0;L;;;;;N;;;;;
0EA3;LAO LETTER LO LING;Lo;0;L;;;;;N;;;;;
0EA5;LAO LETTER LO LOOT;Lo;0;L;;;;;N;;;;;
0EA7;LAO LETTER WO;Lo;0;L;;;;;N;;;;;
0EA8;LAO LETTER SANSKRIT SHA;Lo;0;L;;;;;N;;;;;
0EA9;LAO LETTER SANSKRIT SSA;Lo;0;L;;;;;N;;;;;
0EAA;LAO LETTER SO SUNG;Lo;0;L;;;;;N;;;;;
0EAB;LAO LETTER HO SUNG;Lo;0;L;;;;;N;;;;;
0EAC;LAO LETTER PALI LLA;Lo;0;L;;;;;N;;;;;
0EAD;LAO LETTER O;Lo;0;L;;;;;N;;;;;
0EAE;LAO LETTER HO TAM;Lo;0;L;;;;;N;;;;;
0EAF;LAO ELLIPSIS;Lo;0;L;;;;;N;;;;;
@ -3238,6 +3253,7 @@
0EB7;LAO VOWEL SIGN YY;Mn;0;NSM;;;;;N;;;;;
0EB8;LAO VOWEL SIGN U;Mn;118;NSM;;;;;N;;;;;
0EB9;LAO VOWEL SIGN UU;Mn;118;NSM;;;;;N;;;;;
0EBA;LAO SIGN PALI VIRAMA;Mn;9;NSM;;;;;N;;;;;
0EBB;LAO VOWEL SIGN MAI KON;Mn;0;NSM;;;;;N;;;;;
0EBC;LAO SEMIVOWEL SIGN LO;Mn;0;NSM;;;;;N;;;;;
0EBD;LAO SEMIVOWEL SIGN NYO;Lo;0;L;;;;;N;;;;;
@ -5079,7 +5095,7 @@
166A;CANADIAN SYLLABICS CARRIER TTSEE;Lo;0;L;;;;;N;;;;;
166B;CANADIAN SYLLABICS CARRIER TTSI;Lo;0;L;;;;;N;;;;;
166C;CANADIAN SYLLABICS CARRIER TTSA;Lo;0;L;;;;;N;;;;;
166D;CANADIAN SYLLABICS CHI SIGN;Po;0;L;;;;;N;;;;;
166D;CANADIAN SYLLABICS CHI SIGN;So;0;L;;;;;N;;;;;
166E;CANADIAN SYLLABICS FULL STOP;Po;0;L;;;;;N;;;;;
166F;CANADIAN SYLLABICS QAI;Lo;0;L;;;;;N;;;;;
1670;CANADIAN SYLLABICS NGAI;Lo;0;L;;;;;N;;;;;
@ -6488,14 +6504,15 @@
1CEF;VEDIC SIGN LONG ANUSVARA;Lo;0;L;;;;;N;;;;;
1CF0;VEDIC SIGN RTHANG LONG ANUSVARA;Lo;0;L;;;;;N;;;;;
1CF1;VEDIC SIGN ANUSVARA UBHAYATO MUKHA;Lo;0;L;;;;;N;;;;;
1CF2;VEDIC SIGN ARDHAVISARGA;Mc;0;L;;;;;N;;;;;
1CF3;VEDIC SIGN ROTATED ARDHAVISARGA;Mc;0;L;;;;;N;;;;;
1CF2;VEDIC SIGN ARDHAVISARGA;Lo;0;L;;;;;N;;;;;
1CF3;VEDIC SIGN ROTATED ARDHAVISARGA;Lo;0;L;;;;;N;;;;;
1CF4;VEDIC TONE CANDRA ABOVE;Mn;230;NSM;;;;;N;;;;;
1CF5;VEDIC SIGN JIHVAMULIYA;Lo;0;L;;;;;N;;;;;
1CF6;VEDIC SIGN UPADHMANIYA;Lo;0;L;;;;;N;;;;;
1CF7;VEDIC SIGN ATIKRAMA;Mc;0;L;;;;;N;;;;;
1CF8;VEDIC TONE RING ABOVE;Mn;230;NSM;;;;;N;;;;;
1CF9;VEDIC TONE DOUBLE RING ABOVE;Mn;230;NSM;;;;;N;;;;;
1CFA;VEDIC SIGN DOUBLE ANUSVARA ANTARGOMUKHA;Lo;0;L;;;;;N;;;;;
1D00;LATIN LETTER SMALL CAPITAL A;Ll;0;L;;;;;N;;;;;
1D01;LATIN LETTER SMALL CAPITAL AE;Ll;0;L;;;;;N;;;;;
1D02;LATIN SMALL LETTER TURNED AE;Ll;0;L;;;;;N;;;;;
@ -6638,7 +6655,7 @@
1D8B;LATIN SMALL LETTER ESH WITH PALATAL HOOK;Ll;0;L;;;;;N;;;;;
1D8C;LATIN SMALL LETTER V WITH PALATAL HOOK;Ll;0;L;;;;;N;;;;;
1D8D;LATIN SMALL LETTER X WITH PALATAL HOOK;Ll;0;L;;;;;N;;;;;
1D8E;LATIN SMALL LETTER Z WITH PALATAL HOOK;Ll;0;L;;;;;N;;;;;
1D8E;LATIN SMALL LETTER Z WITH PALATAL HOOK;Ll;0;L;;;;;N;;;A7C6;;A7C6
1D8F;LATIN SMALL LETTER A WITH RETROFLEX HOOK;Ll;0;L;;;;;N;;;;;
1D90;LATIN SMALL LETTER ALPHA WITH RETROFLEX HOOK;Ll;0;L;;;;;N;;;;;
1D91;LATIN SMALL LETTER D WITH HOOK AND TAIL;Ll;0;L;;;;;N;;;;;
@ -10165,6 +10182,7 @@
2BC6;BLACK MEDIUM DOWN-POINTING TRIANGLE CENTRED;So;0;ON;;;;;N;;;;;
2BC7;BLACK MEDIUM LEFT-POINTING TRIANGLE CENTRED;So;0;ON;;;;;N;;;;;
2BC8;BLACK MEDIUM RIGHT-POINTING TRIANGLE CENTRED;So;0;ON;;;;;N;;;;;
2BC9;NEPTUNE FORM TWO;So;0;ON;;;;;N;;;;;
2BCA;TOP HALF BLACK CIRCLE;So;0;ON;;;;;N;;;;;
2BCB;BOTTOM HALF BLACK CIRCLE;So;0;ON;;;;;N;;;;;
2BCC;LIGHT FOUR POINTED BLACK CUSP;So;0;ON;;;;;N;;;;;
@ -10218,6 +10236,7 @@
2BFC;DOUBLED SYMBOL;So;0;ON;;;;;N;;;;;
2BFD;PASSED SYMBOL;So;0;ON;;;;;N;;;;;
2BFE;REVERSED RIGHT ANGLE;So;0;ON;;;;;Y;;;;;
2BFF;HELLSCHREIBER PAUSE SYMBOL;So;0;ON;;;;;N;;;;;
2C00;GLAGOLITIC CAPITAL LETTER AZU;Lu;0;L;;;;;N;;;;2C30;
2C01;GLAGOLITIC CAPITAL LETTER BUKY;Lu;0;L;;;;;N;;;;2C31;
2C02;GLAGOLITIC CAPITAL LETTER VEDE;Lu;0;L;;;;;N;;;;2C32;
@ -10756,6 +10775,7 @@
2E4C;MEDIEVAL COMMA;Po;0;ON;;;;;N;;;;;
2E4D;PARAGRAPHUS MARK;Po;0;ON;;;;;N;;;;;
2E4E;PUNCTUS ELEVATUS MARK;Po;0;ON;;;;;N;;;;;
2E4F;CORNISH VERSE DIVIDER;Po;0;ON;;;;;N;;;;;
2E80;CJK RADICAL REPEAT;So;0;ON;;;;;N;;;;;
2E81;CJK RADICAL CLIFF;So;0;ON;;;;;N;;;;;
2E82;CJK RADICAL SECOND ONE;So;0;ON;;;;;N;;;;;
@ -11836,7 +11856,7 @@
32FC;CIRCLED KATAKANA WI;So;0;L;<circle> 30F0;;;;N;;;;;
32FD;CIRCLED KATAKANA WE;So;0;L;<circle> 30F1;;;;N;;;;;
32FE;CIRCLED KATAKANA WO;So;0;L;<circle> 30F2;;;;N;;;;;
32FF;SQUARE ERA NAME REIWA;So;0;L;<square> 4EE4 548C;;;;N;SQUARED TWO IDEOGRAPHS ERA NAME REIWA;;;;
32FF;SQUARE ERA NAME REIWA;So;0;L;<square> 4EE4 548C;;;;N;;;;;
3300;SQUARE APAATO;So;0;L;<square> 30A2 30D1 30FC 30C8;;;;N;SQUARED APAATO;;;;
3301;SQUARE ARUHUA;So;0;L;<square> 30A2 30EB 30D5 30A1;;;;N;SQUARED ARUHUA;;;;
3302;SQUARE ANPEA;So;0;L;<square> 30A2 30F3 30DA 30A2;;;;N;SQUARED ANPEA;;;;
@ -14061,7 +14081,7 @@ A790;LATIN CAPITAL LETTER N WITH DESCENDER;Lu;0;L;;;;;N;;;;A791;
A791;LATIN SMALL LETTER N WITH DESCENDER;Ll;0;L;;;;;N;;;A790;;A790
A792;LATIN CAPITAL LETTER C WITH BAR;Lu;0;L;;;;;N;;;;A793;
A793;LATIN SMALL LETTER C WITH BAR;Ll;0;L;;;;;N;;;A792;;A792
A794;LATIN SMALL LETTER C WITH PALATAL HOOK;Ll;0;L;;;;;N;;;;;
A794;LATIN SMALL LETTER C WITH PALATAL HOOK;Ll;0;L;;;;;N;;;A7C4;;A7C4
A795;LATIN SMALL LETTER H WITH PALATAL HOOK;Ll;0;L;;;;;N;;;;;
A796;LATIN CAPITAL LETTER B WITH FLOURISH;Lu;0;L;;;;;N;;;;A797;
A797;LATIN SMALL LETTER B WITH FLOURISH;Ll;0;L;;;;;N;;;A796;;A796
@ -14099,6 +14119,17 @@ A7B6;LATIN CAPITAL LETTER OMEGA;Lu;0;L;;;;;N;;;;A7B7;
A7B7;LATIN SMALL LETTER OMEGA;Ll;0;L;;;;;N;;;A7B6;;A7B6
A7B8;LATIN CAPITAL LETTER U WITH STROKE;Lu;0;L;;;;;N;;;;A7B9;
A7B9;LATIN SMALL LETTER U WITH STROKE;Ll;0;L;;;;;N;;;A7B8;;A7B8
A7BA;LATIN CAPITAL LETTER GLOTTAL A;Lu;0;L;;;;;N;;;;A7BB;
A7BB;LATIN SMALL LETTER GLOTTAL A;Ll;0;L;;;;;N;;;A7BA;;A7BA
A7BC;LATIN CAPITAL LETTER GLOTTAL I;Lu;0;L;;;;;N;;;;A7BD;
A7BD;LATIN SMALL LETTER GLOTTAL I;Ll;0;L;;;;;N;;;A7BC;;A7BC
A7BE;LATIN CAPITAL LETTER GLOTTAL U;Lu;0;L;;;;;N;;;;A7BF;
A7BF;LATIN SMALL LETTER GLOTTAL U;Ll;0;L;;;;;N;;;A7BE;;A7BE
A7C2;LATIN CAPITAL LETTER ANGLICANA W;Lu;0;L;;;;;N;;;;A7C3;
A7C3;LATIN SMALL LETTER ANGLICANA W;Ll;0;L;;;;;N;;;A7C2;;A7C2
A7C4;LATIN CAPITAL LETTER C WITH PALATAL HOOK;Lu;0;L;;;;;N;;;;A794;
A7C5;LATIN CAPITAL LETTER S WITH HOOK;Lu;0;L;;;;;N;;;;0282;
A7C6;LATIN CAPITAL LETTER Z WITH PALATAL HOOK;Lu;0;L;;;;;N;;;;1D8E;
A7F7;LATIN EPIGRAPHIC LETTER SIDEWAYS I;Lo;0;L;;;;;N;;;;;
A7F8;MODIFIER LETTER CAPITAL H WITH STROKE;Lm;0;L;<super> 0126;;;;N;;;;;
A7F9;MODIFIER LETTER SMALL LIGATURE OE;Lm;0;L;<super> 0153;;;;N;;;;;
@ -14507,7 +14538,7 @@ A9B9;JAVANESE VOWEL SIGN SUKU MENDUT;Mn;0;NSM;;;;;N;;;;;
A9BA;JAVANESE VOWEL SIGN TALING;Mc;0;L;;;;;N;;;;;
A9BB;JAVANESE VOWEL SIGN DIRGA MURE;Mc;0;L;;;;;N;;;;;
A9BC;JAVANESE VOWEL SIGN PEPET;Mn;0;NSM;;;;;N;;;;;
A9BD;JAVANESE CONSONANT SIGN KERET;Mc;0;L;;;;;N;;;;;
A9BD;JAVANESE CONSONANT SIGN KERET;Mn;0;NSM;;;;;N;;;;;
A9BE;JAVANESE CONSONANT SIGN PENGKAL;Mc;0;L;;;;;N;;;;;
A9BF;JAVANESE CONSONANT SIGN CAKRA;Mc;0;L;;;;;N;;;;;
A9C0;JAVANESE PANGKON;Mc;9;L;;;;;N;;;;;
@ -14864,6 +14895,8 @@ AB62;LATIN SMALL LETTER OPEN OE;Ll;0;L;;;;;N;;;;;
AB63;LATIN SMALL LETTER UO;Ll;0;L;;;;;N;;;;;
AB64;LATIN SMALL LETTER INVERTED ALPHA;Ll;0;L;;;;;N;;;;;
AB65;GREEK LETTER SMALL CAPITAL OMEGA;Ll;0;L;;;;;N;;;;;
AB66;LATIN SMALL LETTER DZ DIGRAPH WITH RETROFLEX HOOK;Ll;0;L;;;;;N;;;;;
AB67;LATIN SMALL LETTER TS DIGRAPH WITH RETROFLEX HOOK;Ll;0;L;;;;;N;;;;;
AB70;CHEROKEE SMALL LETTER A;Ll;0;L;;;;;N;;;13A0;;13A0
AB71;CHEROKEE SMALL LETTER E;Ll;0;L;;;;;N;;;13A1;;13A1
AB72;CHEROKEE SMALL LETTER I;Ll;0;L;;;;;N;;;13A2;;13A2
@ -19106,6 +19139,29 @@ FFFD;REPLACEMENT CHARACTER;So;0;ON;;;;;N;;;;;
10F57;SOGDIAN PUNCTUATION CIRCLE WITH DOT;Po;0;AL;;;;;N;;;;;
10F58;SOGDIAN PUNCTUATION TWO CIRCLES WITH DOTS;Po;0;AL;;;;;N;;;;;
10F59;SOGDIAN PUNCTUATION HALF CIRCLE WITH DOT;Po;0;AL;;;;;N;;;;;
10FE0;ELYMAIC LETTER ALEPH;Lo;0;R;;;;;N;;;;;
10FE1;ELYMAIC LETTER BETH;Lo;0;R;;;;;N;;;;;
10FE2;ELYMAIC LETTER GIMEL;Lo;0;R;;;;;N;;;;;
10FE3;ELYMAIC LETTER DALETH;Lo;0;R;;;;;N;;;;;
10FE4;ELYMAIC LETTER HE;Lo;0;R;;;;;N;;;;;
10FE5;ELYMAIC LETTER WAW;Lo;0;R;;;;;N;;;;;
10FE6;ELYMAIC LETTER ZAYIN;Lo;0;R;;;;;N;;;;;
10FE7;ELYMAIC LETTER HETH;Lo;0;R;;;;;N;;;;;
10FE8;ELYMAIC LETTER TETH;Lo;0;R;;;;;N;;;;;
10FE9;ELYMAIC LETTER YODH;Lo;0;R;;;;;N;;;;;
10FEA;ELYMAIC LETTER KAPH;Lo;0;R;;;;;N;;;;;
10FEB;ELYMAIC LETTER LAMEDH;Lo;0;R;;;;;N;;;;;
10FEC;ELYMAIC LETTER MEM;Lo;0;R;;;;;N;;;;;
10FED;ELYMAIC LETTER NUN;Lo;0;R;;;;;N;;;;;
10FEE;ELYMAIC LETTER SAMEKH;Lo;0;R;;;;;N;;;;;
10FEF;ELYMAIC LETTER AYIN;Lo;0;R;;;;;N;;;;;
10FF0;ELYMAIC LETTER PE;Lo;0;R;;;;;N;;;;;
10FF1;ELYMAIC LETTER SADHE;Lo;0;R;;;;;N;;;;;
10FF2;ELYMAIC LETTER QOPH;Lo;0;R;;;;;N;;;;;
10FF3;ELYMAIC LETTER RESH;Lo;0;R;;;;;N;;;;;
10FF4;ELYMAIC LETTER SHIN;Lo;0;R;;;;;N;;;;;
10FF5;ELYMAIC LETTER TAW;Lo;0;R;;;;;N;;;;;
10FF6;ELYMAIC LIGATURE ZAYIN-YODH;Lo;0;R;;;;;N;;;;;
11000;BRAHMI SIGN CANDRABINDU;Mc;0;L;;;;;N;;;;;
11001;BRAHMI SIGN ANUSVARA;Mn;0;NSM;;;;;N;;;;;
11002;BRAHMI SIGN VISARGA;Mc;0;L;;;;;N;;;;;
@ -19888,6 +19944,7 @@ FFFD;REPLACEMENT CHARACTER;So;0;ON;;;;;N;;;;;
1145B;NEWA PLACEHOLDER MARK;Po;0;L;;;;;N;;;;;
1145D;NEWA INSERTION SIGN;Po;0;L;;;;;N;;;;;
1145E;NEWA SANDHI MARK;Mn;230;NSM;;;;;N;;;;;
1145F;NEWA LETTER VEDIC ANUSVARA;Lo;0;L;;;;;N;;;;;
11480;TIRHUTA ANJI;Lo;0;L;;;;;N;;;;;
11481;TIRHUTA LETTER A;Lo;0;L;;;;;N;;;;;
11482;TIRHUTA LETTER AA;Lo;0;L;;;;;N;;;;;
@ -20210,6 +20267,7 @@ FFFD;REPLACEMENT CHARACTER;So;0;ON;;;;;N;;;;;
116B5;TAKRI VOWEL SIGN AU;Mn;0;NSM;;;;;N;;;;;
116B6;TAKRI SIGN VIRAMA;Mc;9;L;;;;;N;;;;;
116B7;TAKRI SIGN NUKTA;Mn;7;NSM;;;;;N;;;;;
116B8;TAKRI LETTER ARCHAIC KHA;Lo;0;L;;;;;N;;;;;
116C0;TAKRI DIGIT ZERO;Nd;0;L;;0;0;0;N;;;;;
116C1;TAKRI DIGIT ONE;Nd;0;L;;1;1;1;N;;;;;
116C2;TAKRI DIGIT TWO;Nd;0;L;;2;2;2;N;;;;;
@ -20422,6 +20480,71 @@ FFFD;REPLACEMENT CHARACTER;So;0;ON;;;;;N;;;;;
118F1;WARANG CITI NUMBER EIGHTY;No;0;L;;;;80;N;;;;;
118F2;WARANG CITI NUMBER NINETY;No;0;L;;;;90;N;;;;;
118FF;WARANG CITI OM;Lo;0;L;;;;;N;;;;;
119A0;NANDINAGARI LETTER A;Lo;0;L;;;;;N;;;;;
119A1;NANDINAGARI LETTER AA;Lo;0;L;;;;;N;;;;;
119A2;NANDINAGARI LETTER I;Lo;0;L;;;;;N;;;;;
119A3;NANDINAGARI LETTER II;Lo;0;L;;;;;N;;;;;
119A4;NANDINAGARI LETTER U;Lo;0;L;;;;;N;;;;;
119A5;NANDINAGARI LETTER UU;Lo;0;L;;;;;N;;;;;
119A6;NANDINAGARI LETTER VOCALIC R;Lo;0;L;;;;;N;;;;;
119A7;NANDINAGARI LETTER VOCALIC RR;Lo;0;L;;;;;N;;;;;
119AA;NANDINAGARI LETTER E;Lo;0;L;;;;;N;;;;;
119AB;NANDINAGARI LETTER AI;Lo;0;L;;;;;N;;;;;
119AC;NANDINAGARI LETTER O;Lo;0;L;;;;;N;;;;;
119AD;NANDINAGARI LETTER AU;Lo;0;L;;;;;N;;;;;
119AE;NANDINAGARI LETTER KA;Lo;0;L;;;;;N;;;;;
119AF;NANDINAGARI LETTER KHA;Lo;0;L;;;;;N;;;;;
119B0;NANDINAGARI LETTER GA;Lo;0;L;;;;;N;;;;;
119B1;NANDINAGARI LETTER GHA;Lo;0;L;;;;;N;;;;;
119B2;NANDINAGARI LETTER NGA;Lo;0;L;;;;;N;;;;;
119B3;NANDINAGARI LETTER CA;Lo;0;L;;;;;N;;;;;
119B4;NANDINAGARI LETTER CHA;Lo;0;L;;;;;N;;;;;
119B5;NANDINAGARI LETTER JA;Lo;0;L;;;;;N;;;;;
119B6;NANDINAGARI LETTER JHA;Lo;0;L;;;;;N;;;;;
119B7;NANDINAGARI LETTER NYA;Lo;0;L;;;;;N;;;;;
119B8;NANDINAGARI LETTER TTA;Lo;0;L;;;;;N;;;;;
119B9;NANDINAGARI LETTER TTHA;Lo;0;L;;;;;N;;;;;
119BA;NANDINAGARI LETTER DDA;Lo;0;L;;;;;N;;;;;
119BB;NANDINAGARI LETTER DDHA;Lo;0;L;;;;;N;;;;;
119BC;NANDINAGARI LETTER NNA;Lo;0;L;;;;;N;;;;;
119BD;NANDINAGARI LETTER TA;Lo;0;L;;;;;N;;;;;
119BE;NANDINAGARI LETTER THA;Lo;0;L;;;;;N;;;;;
119BF;NANDINAGARI LETTER DA;Lo;0;L;;;;;N;;;;;
119C0;NANDINAGARI LETTER DHA;Lo;0;L;;;;;N;;;;;
119C1;NANDINAGARI LETTER NA;Lo;0;L;;;;;N;;;;;
119C2;NANDINAGARI LETTER PA;Lo;0;L;;;;;N;;;;;
119C3;NANDINAGARI LETTER PHA;Lo;0;L;;;;;N;;;;;
119C4;NANDINAGARI LETTER BA;Lo;0;L;;;;;N;;;;;
119C5;NANDINAGARI LETTER BHA;Lo;0;L;;;;;N;;;;;
119C6;NANDINAGARI LETTER MA;Lo;0;L;;;;;N;;;;;
119C7;NANDINAGARI LETTER YA;Lo;0;L;;;;;N;;;;;
119C8;NANDINAGARI LETTER RA;Lo;0;L;;;;;N;;;;;
119C9;NANDINAGARI LETTER LA;Lo;0;L;;;;;N;;;;;
119CA;NANDINAGARI LETTER VA;Lo;0;L;;;;;N;;;;;
119CB;NANDINAGARI LETTER SHA;Lo;0;L;;;;;N;;;;;
119CC;NANDINAGARI LETTER SSA;Lo;0;L;;;;;N;;;;;
119CD;NANDINAGARI LETTER SA;Lo;0;L;;;;;N;;;;;
119CE;NANDINAGARI LETTER HA;Lo;0;L;;;;;N;;;;;
119CF;NANDINAGARI LETTER LLA;Lo;0;L;;;;;N;;;;;
119D0;NANDINAGARI LETTER RRA;Lo;0;L;;;;;N;;;;;
119D1;NANDINAGARI VOWEL SIGN AA;Mc;0;L;;;;;N;;;;;
119D2;NANDINAGARI VOWEL SIGN I;Mc;0;L;;;;;N;;;;;
119D3;NANDINAGARI VOWEL SIGN II;Mc;0;L;;;;;N;;;;;
119D4;NANDINAGARI VOWEL SIGN U;Mn;0;NSM;;;;;N;;;;;
119D5;NANDINAGARI VOWEL SIGN UU;Mn;0;NSM;;;;;N;;;;;
119D6;NANDINAGARI VOWEL SIGN VOCALIC R;Mn;0;NSM;;;;;N;;;;;
119D7;NANDINAGARI VOWEL SIGN VOCALIC RR;Mn;0;NSM;;;;;N;;;;;
119DA;NANDINAGARI VOWEL SIGN E;Mn;0;NSM;;;;;N;;;;;
119DB;NANDINAGARI VOWEL SIGN AI;Mn;0;NSM;;;;;N;;;;;
119DC;NANDINAGARI VOWEL SIGN O;Mc;0;L;;;;;N;;;;;
119DD;NANDINAGARI VOWEL SIGN AU;Mc;0;L;;;;;N;;;;;
119DE;NANDINAGARI SIGN ANUSVARA;Mc;0;L;;;;;N;;;;;
119DF;NANDINAGARI SIGN VISARGA;Mc;0;L;;;;;N;;;;;
119E0;NANDINAGARI SIGN VIRAMA;Mn;9;NSM;;;;;N;;;;;
119E1;NANDINAGARI SIGN AVAGRAHA;Lo;0;L;;;;;N;;;;;
119E2;NANDINAGARI SIGN SIDDHAM;Po;0;L;;;;;N;;;;;
119E3;NANDINAGARI HEADSTROKE;Lo;0;L;;;;;N;;;;;
119E4;NANDINAGARI VOWEL SIGN PRISHTHAMATRA E;Mc;0;L;;;;;N;;;;;
11A00;ZANABAZAR SQUARE LETTER A;Lo;0;L;;;;;N;;;;;
11A01;ZANABAZAR SQUARE VOWEL SIGN I;Mn;0;NSM;;;;;N;;;;;
11A02;ZANABAZAR SQUARE VOWEL SIGN UE;Mn;0;NSM;;;;;N;;;;;
@ -20546,6 +20669,8 @@ FFFD;REPLACEMENT CHARACTER;So;0;ON;;;;;N;;;;;
11A81;SOYOMBO LETTER SA;Lo;0;L;;;;;N;;;;;
11A82;SOYOMBO LETTER HA;Lo;0;L;;;;;N;;;;;
11A83;SOYOMBO LETTER KSSA;Lo;0;L;;;;;N;;;;;
11A84;SOYOMBO SIGN JIHVAMULIYA;Lo;0;L;;;;;N;;;;;
11A85;SOYOMBO SIGN UPADHMANIYA;Lo;0;L;;;;;N;;;;;
11A86;SOYOMBO CLUSTER-INITIAL LETTER RA;Lo;0;L;;;;;N;;;;;
11A87;SOYOMBO CLUSTER-INITIAL LETTER LA;Lo;0;L;;;;;N;;;;;
11A88;SOYOMBO CLUSTER-INITIAL LETTER SHA;Lo;0;L;;;;;N;;;;;
@ -20960,6 +21085,57 @@ FFFD;REPLACEMENT CHARACTER;So;0;ON;;;;;N;;;;;
11EF6;MAKASAR VOWEL SIGN O;Mc;0;L;;;;;N;;;;;
11EF7;MAKASAR PASSIMBANG;Po;0;L;;;;;N;;;;;
11EF8;MAKASAR END OF SECTION;Po;0;L;;;;;N;;;;;
11FC0;TAMIL FRACTION ONE THREE-HUNDRED-AND-TWENTIETH;No;0;L;;;;1/320;N;;;;;
11FC1;TAMIL FRACTION ONE ONE-HUNDRED-AND-SIXTIETH;No;0;L;;;;1/160;N;;;;;
11FC2;TAMIL FRACTION ONE EIGHTIETH;No;0;L;;;;1/80;N;;;;;
11FC3;TAMIL FRACTION ONE SIXTY-FOURTH;No;0;L;;;;1/64;N;;;;;
11FC4;TAMIL FRACTION ONE FORTIETH;No;0;L;;;;1/40;N;;;;;
11FC5;TAMIL FRACTION ONE THIRTY-SECOND;No;0;L;;;;1/32;N;;;;;
11FC6;TAMIL FRACTION THREE EIGHTIETHS;No;0;L;;;;3/80;N;;;;;
11FC7;TAMIL FRACTION THREE SIXTY-FOURTHS;No;0;L;;;;3/64;N;;;;;
11FC8;TAMIL FRACTION ONE TWENTIETH;No;0;L;;;;1/20;N;;;;;
11FC9;TAMIL FRACTION ONE SIXTEENTH-1;No;0;L;;;;1/16;N;;;;;
11FCA;TAMIL FRACTION ONE SIXTEENTH-2;No;0;L;;;;1/16;N;;;;;
11FCB;TAMIL FRACTION ONE TENTH;No;0;L;;;;1/10;N;;;;;
11FCC;TAMIL FRACTION ONE EIGHTH;No;0;L;;;;1/8;N;;;;;
11FCD;TAMIL FRACTION THREE TWENTIETHS;No;0;L;;;;3/20;N;;;;;
11FCE;TAMIL FRACTION THREE SIXTEENTHS;No;0;L;;;;3/16;N;;;;;
11FCF;TAMIL FRACTION ONE FIFTH;No;0;L;;;;1/5;N;;;;;
11FD0;TAMIL FRACTION ONE QUARTER;No;0;L;;;;1/4;N;;;;;
11FD1;TAMIL FRACTION ONE HALF-1;No;0;L;;;;1/2;N;;;;;
11FD2;TAMIL FRACTION ONE HALF-2;No;0;L;;;;1/2;N;;;;;
11FD3;TAMIL FRACTION THREE QUARTERS;No;0;L;;;;3/4;N;;;;;
11FD4;TAMIL FRACTION DOWNSCALING FACTOR KIIZH;No;0;L;;;;1/320;N;;;;;
11FD5;TAMIL SIGN NEL;So;0;ON;;;;;N;;;;;
11FD6;TAMIL SIGN CEVITU;So;0;ON;;;;;N;;;;;
11FD7;TAMIL SIGN AAZHAAKKU;So;0;ON;;;;;N;;;;;
11FD8;TAMIL SIGN UZHAKKU;So;0;ON;;;;;N;;;;;
11FD9;TAMIL SIGN MUUVUZHAKKU;So;0;ON;;;;;N;;;;;
11FDA;TAMIL SIGN KURUNI;So;0;ON;;;;;N;;;;;
11FDB;TAMIL SIGN PATHAKKU;So;0;ON;;;;;N;;;;;
11FDC;TAMIL SIGN MUKKURUNI;So;0;ON;;;;;N;;;;;
11FDD;TAMIL SIGN KAACU;Sc;0;ET;;;;;N;;;;;
11FDE;TAMIL SIGN PANAM;Sc;0;ET;;;;;N;;;;;
11FDF;TAMIL SIGN PON;Sc;0;ET;;;;;N;;;;;
11FE0;TAMIL SIGN VARAAKAN;Sc;0;ET;;;;;N;;;;;
11FE1;TAMIL SIGN PAARAM;So;0;ON;;;;;N;;;;;
11FE2;TAMIL SIGN KUZHI;So;0;ON;;;;;N;;;;;
11FE3;TAMIL SIGN VELI;So;0;ON;;;;;N;;;;;
11FE4;TAMIL WET CULTIVATION SIGN;So;0;ON;;;;;N;;;;;
11FE5;TAMIL DRY CULTIVATION SIGN;So;0;ON;;;;;N;;;;;
11FE6;TAMIL LAND SIGN;So;0;ON;;;;;N;;;;;
11FE7;TAMIL SALT PAN SIGN;So;0;ON;;;;;N;;;;;
11FE8;TAMIL TRADITIONAL CREDIT SIGN;So;0;ON;;;;;N;;;;;
11FE9;TAMIL TRADITIONAL NUMBER SIGN;So;0;ON;;;;;N;;;;;
11FEA;TAMIL CURRENT SIGN;So;0;ON;;;;;N;;;;;
11FEB;TAMIL AND ODD SIGN;So;0;ON;;;;;N;;;;;
11FEC;TAMIL SPENT SIGN;So;0;ON;;;;;N;;;;;
11FED;TAMIL TOTAL SIGN;So;0;ON;;;;;N;;;;;
11FEE;TAMIL IN POSSESSION SIGN;So;0;ON;;;;;N;;;;;
11FEF;TAMIL STARTING FROM SIGN;So;0;ON;;;;;N;;;;;
11FF0;TAMIL SIGN MUTHALIYA;So;0;ON;;;;;N;;;;;
11FF1;TAMIL SIGN VAKAIYARAA;So;0;ON;;;;;N;;;;;
11FFF;TAMIL PUNCTUATION END OF TEXT;Po;0;L;;;;;N;;;;;
12000;CUNEIFORM SIGN A;Lo;0;L;;;;;N;;;;;
12001;CUNEIFORM SIGN A TIMES A;Lo;0;L;;;;;N;;;;;
12002;CUNEIFORM SIGN A TIMES BAD;Lo;0;L;;;;;N;;;;;
@ -23265,6 +23441,15 @@ FFFD;REPLACEMENT CHARACTER;So;0;ON;;;;;N;;;;;
1342C;EGYPTIAN HIEROGLYPH AA030;Lo;0;L;;;;;N;;;;;
1342D;EGYPTIAN HIEROGLYPH AA031;Lo;0;L;;;;;N;;;;;
1342E;EGYPTIAN HIEROGLYPH AA032;Lo;0;L;;;;;N;;;;;
13430;EGYPTIAN HIEROGLYPH VERTICAL JOINER;Cf;0;L;;;;;N;;;;;
13431;EGYPTIAN HIEROGLYPH HORIZONTAL JOINER;Cf;0;L;;;;;N;;;;;
13432;EGYPTIAN HIEROGLYPH INSERT AT TOP START;Cf;0;L;;;;;N;;;;;
13433;EGYPTIAN HIEROGLYPH INSERT AT BOTTOM START;Cf;0;L;;;;;N;;;;;
13434;EGYPTIAN HIEROGLYPH INSERT AT TOP END;Cf;0;L;;;;;N;;;;;
13435;EGYPTIAN HIEROGLYPH INSERT AT BOTTOM END;Cf;0;L;;;;;N;;;;;
13436;EGYPTIAN HIEROGLYPH OVERLAY MIDDLE;Cf;0;L;;;;;N;;;;;
13437;EGYPTIAN HIEROGLYPH BEGIN SEGMENT;Cf;0;L;;;;;N;;;;;
13438;EGYPTIAN HIEROGLYPH END SEGMENT;Cf;0;L;;;;;N;;;;;
14400;ANATOLIAN HIEROGLYPH A001;Lo;0;L;;;;;N;;;;;
14401;ANATOLIAN HIEROGLYPH A002;Lo;0;L;;;;;N;;;;;
14402;ANATOLIAN HIEROGLYPH A003;Lo;0;L;;;;;N;;;;;
@ -24783,6 +24968,13 @@ FFFD;REPLACEMENT CHARACTER;So;0;ON;;;;;N;;;;;
16F42;MIAO LETTER WA;Lo;0;L;;;;;N;;;;;
16F43;MIAO LETTER AH;Lo;0;L;;;;;N;;;;;
16F44;MIAO LETTER HHA;Lo;0;L;;;;;N;;;;;
16F45;MIAO LETTER BRI;Lo;0;L;;;;;N;;;;;
16F46;MIAO LETTER SYI;Lo;0;L;;;;;N;;;;;
16F47;MIAO LETTER DZYI;Lo;0;L;;;;;N;;;;;
16F48;MIAO LETTER TE;Lo;0;L;;;;;N;;;;;
16F49;MIAO LETTER TSE;Lo;0;L;;;;;N;;;;;
16F4A;MIAO LETTER RTE;Lo;0;L;;;;;N;;;;;
16F4F;MIAO SIGN CONSONANT MODIFIER BAR;Mn;0;NSM;;;;;N;;;;;
16F50;MIAO LETTER NASALIZATION;Lo;0;L;;;;;N;;;;;
16F51;MIAO SIGN ASPIRATION;Mc;0;L;;;;;N;;;;;
16F52;MIAO SIGN REFORMED VOICING;Mc;0;L;;;;;N;;;;;
@ -24830,6 +25022,15 @@ FFFD;REPLACEMENT CHARACTER;So;0;ON;;;;;N;;;;;
16F7C;MIAO VOWEL SIGN OU;Mc;0;L;;;;;N;;;;;
16F7D;MIAO VOWEL SIGN N;Mc;0;L;;;;;N;;;;;
16F7E;MIAO VOWEL SIGN NG;Mc;0;L;;;;;N;;;;;
16F7F;MIAO VOWEL SIGN UOG;Mc;0;L;;;;;N;;;;;
16F80;MIAO VOWEL SIGN YUI;Mc;0;L;;;;;N;;;;;
16F81;MIAO VOWEL SIGN OG;Mc;0;L;;;;;N;;;;;
16F82;MIAO VOWEL SIGN OER;Mc;0;L;;;;;N;;;;;
16F83;MIAO VOWEL SIGN VW;Mc;0;L;;;;;N;;;;;
16F84;MIAO VOWEL SIGN IG;Mc;0;L;;;;;N;;;;;
16F85;MIAO VOWEL SIGN EA;Mc;0;L;;;;;N;;;;;
16F86;MIAO VOWEL SIGN IONG;Mc;0;L;;;;;N;;;;;
16F87;MIAO VOWEL SIGN UI;Mc;0;L;;;;;N;;;;;
16F8F;MIAO TONE RIGHT;Mn;0;NSM;;;;;N;;;;;
16F90;MIAO TONE TOP RIGHT;Mn;0;NSM;;;;;N;;;;;
16F91;MIAO TONE ABOVE;Mn;0;NSM;;;;;N;;;;;
@ -24849,8 +25050,10 @@ FFFD;REPLACEMENT CHARACTER;So;0;ON;;;;;N;;;;;
16F9F;MIAO LETTER REFORMED TONE-8;Lm;0;L;;;;;N;;;;;
16FE0;TANGUT ITERATION MARK;Lm;0;L;;;;;N;;;;;
16FE1;NUSHU ITERATION MARK;Lm;0;L;;;;;N;;;;;
16FE2;OLD CHINESE HOOK MARK;Po;0;ON;;;;;N;;;;;
16FE3;OLD CHINESE ITERATION MARK;Lm;0;L;;;;;N;;;;;
17000;<Tangut Ideograph, First>;Lo;0;L;;;;;N;;;;;
187F1;<Tangut Ideograph, Last>;Lo;0;L;;;;;N;;;;;
187F7;<Tangut Ideograph, Last>;Lo;0;L;;;;;N;;;;;
18800;TANGUT COMPONENT-001;Lo;0;L;;;;;N;;;;;
18801;TANGUT COMPONENT-002;Lo;0;L;;;;;N;;;;;
18802;TANGUT COMPONENT-003;Lo;0;L;;;;;N;;;;;
@ -25893,6 +26096,13 @@ FFFD;REPLACEMENT CHARACTER;So;0;ON;;;;;N;;;;;
1B11C;HENTAIGANA LETTER WO-7;Lo;0;L;;;;;N;;;;;
1B11D;HENTAIGANA LETTER N-MU-MO-1;Lo;0;L;;;;;N;;;;;
1B11E;HENTAIGANA LETTER N-MU-MO-2;Lo;0;L;;;;;N;;;;;
1B150;HIRAGANA LETTER SMALL WI;Lo;0;L;;;;;N;;;;;
1B151;HIRAGANA LETTER SMALL WE;Lo;0;L;;;;;N;;;;;
1B152;HIRAGANA LETTER SMALL WO;Lo;0;L;;;;;N;;;;;
1B164;KATAKANA LETTER SMALL WI;Lo;0;L;;;;;N;;;;;
1B165;KATAKANA LETTER SMALL WE;Lo;0;L;;;;;N;;;;;
1B166;KATAKANA LETTER SMALL WO;Lo;0;L;;;;;N;;;;;
1B167;KATAKANA LETTER SMALL N;Lo;0;L;;;;;N;;;;;
1B170;NUSHU CHARACTER-1B170;Lo;0;L;;;;;N;;;;;
1B171;NUSHU CHARACTER-1B171;Lo;0;L;;;;;N;;;;;
1B172;NUSHU CHARACTER-1B172;Lo;0;L;;;;;N;;;;;
@ -28821,6 +29031,136 @@ FFFD;REPLACEMENT CHARACTER;So;0;ON;;;;;N;;;;;
1E028;COMBINING GLAGOLITIC LETTER BIG YUS;Mn;230;NSM;;;;;N;;;;;
1E029;COMBINING GLAGOLITIC LETTER IOTATED BIG YUS;Mn;230;NSM;;;;;N;;;;;
1E02A;COMBINING GLAGOLITIC LETTER FITA;Mn;230;NSM;;;;;N;;;;;
1E100;NYIAKENG PUACHUE HMONG LETTER MA;Lo;0;L;;;;;N;;;;;
1E101;NYIAKENG PUACHUE HMONG LETTER TSA;Lo;0;L;;;;;N;;;;;
1E102;NYIAKENG PUACHUE HMONG LETTER NTA;Lo;0;L;;;;;N;;;;;
1E103;NYIAKENG PUACHUE HMONG LETTER TA;Lo;0;L;;;;;N;;;;;
1E104;NYIAKENG PUACHUE HMONG LETTER HA;Lo;0;L;;;;;N;;;;;
1E105;NYIAKENG PUACHUE HMONG LETTER NA;Lo;0;L;;;;;N;;;;;
1E106;NYIAKENG PUACHUE HMONG LETTER XA;Lo;0;L;;;;;N;;;;;
1E107;NYIAKENG PUACHUE HMONG LETTER NKA;Lo;0;L;;;;;N;;;;;
1E108;NYIAKENG PUACHUE HMONG LETTER CA;Lo;0;L;;;;;N;;;;;
1E109;NYIAKENG PUACHUE HMONG LETTER LA;Lo;0;L;;;;;N;;;;;
1E10A;NYIAKENG PUACHUE HMONG LETTER SA;Lo;0;L;;;;;N;;;;;
1E10B;NYIAKENG PUACHUE HMONG LETTER ZA;Lo;0;L;;;;;N;;;;;
1E10C;NYIAKENG PUACHUE HMONG LETTER NCA;Lo;0;L;;;;;N;;;;;
1E10D;NYIAKENG PUACHUE HMONG LETTER NTSA;Lo;0;L;;;;;N;;;;;
1E10E;NYIAKENG PUACHUE HMONG LETTER KA;Lo;0;L;;;;;N;;;;;
1E10F;NYIAKENG PUACHUE HMONG LETTER DA;Lo;0;L;;;;;N;;;;;
1E110;NYIAKENG PUACHUE HMONG LETTER NYA;Lo;0;L;;;;;N;;;;;
1E111;NYIAKENG PUACHUE HMONG LETTER NRA;Lo;0;L;;;;;N;;;;;
1E112;NYIAKENG PUACHUE HMONG LETTER VA;Lo;0;L;;;;;N;;;;;
1E113;NYIAKENG PUACHUE HMONG LETTER NTXA;Lo;0;L;;;;;N;;;;;
1E114;NYIAKENG PUACHUE HMONG LETTER TXA;Lo;0;L;;;;;N;;;;;
1E115;NYIAKENG PUACHUE HMONG LETTER FA;Lo;0;L;;;;;N;;;;;
1E116;NYIAKENG PUACHUE HMONG LETTER RA;Lo;0;L;;;;;N;;;;;
1E117;NYIAKENG PUACHUE HMONG LETTER QA;Lo;0;L;;;;;N;;;;;
1E118;NYIAKENG PUACHUE HMONG LETTER YA;Lo;0;L;;;;;N;;;;;
1E119;NYIAKENG PUACHUE HMONG LETTER NQA;Lo;0;L;;;;;N;;;;;
1E11A;NYIAKENG PUACHUE HMONG LETTER PA;Lo;0;L;;;;;N;;;;;
1E11B;NYIAKENG PUACHUE HMONG LETTER XYA;Lo;0;L;;;;;N;;;;;
1E11C;NYIAKENG PUACHUE HMONG LETTER NPA;Lo;0;L;;;;;N;;;;;
1E11D;NYIAKENG PUACHUE HMONG LETTER DLA;Lo;0;L;;;;;N;;;;;
1E11E;NYIAKENG PUACHUE HMONG LETTER NPLA;Lo;0;L;;;;;N;;;;;
1E11F;NYIAKENG PUACHUE HMONG LETTER HAH;Lo;0;L;;;;;N;;;;;
1E120;NYIAKENG PUACHUE HMONG LETTER MLA;Lo;0;L;;;;;N;;;;;
1E121;NYIAKENG PUACHUE HMONG LETTER PLA;Lo;0;L;;;;;N;;;;;
1E122;NYIAKENG PUACHUE HMONG LETTER GA;Lo;0;L;;;;;N;;;;;
1E123;NYIAKENG PUACHUE HMONG LETTER RRA;Lo;0;L;;;;;N;;;;;
1E124;NYIAKENG PUACHUE HMONG LETTER A;Lo;0;L;;;;;N;;;;;
1E125;NYIAKENG PUACHUE HMONG LETTER AA;Lo;0;L;;;;;N;;;;;
1E126;NYIAKENG PUACHUE HMONG LETTER I;Lo;0;L;;;;;N;;;;;
1E127;NYIAKENG PUACHUE HMONG LETTER U;Lo;0;L;;;;;N;;;;;
1E128;NYIAKENG PUACHUE HMONG LETTER O;Lo;0;L;;;;;N;;;;;
1E129;NYIAKENG PUACHUE HMONG LETTER OO;Lo;0;L;;;;;N;;;;;
1E12A;NYIAKENG PUACHUE HMONG LETTER E;Lo;0;L;;;;;N;;;;;
1E12B;NYIAKENG PUACHUE HMONG LETTER EE;Lo;0;L;;;;;N;;;;;
1E12C;NYIAKENG PUACHUE HMONG LETTER W;Lo;0;L;;;;;N;;;;;
1E130;NYIAKENG PUACHUE HMONG TONE-B;Mn;230;NSM;;;;;N;;;;;
1E131;NYIAKENG PUACHUE HMONG TONE-M;Mn;230;NSM;;;;;N;;;;;
1E132;NYIAKENG PUACHUE HMONG TONE-J;Mn;230;NSM;;;;;N;;;;;
1E133;NYIAKENG PUACHUE HMONG TONE-V;Mn;230;NSM;;;;;N;;;;;
1E134;NYIAKENG PUACHUE HMONG TONE-S;Mn;230;NSM;;;;;N;;;;;
1E135;NYIAKENG PUACHUE HMONG TONE-G;Mn;230;NSM;;;;;N;;;;;
1E136;NYIAKENG PUACHUE HMONG TONE-D;Mn;230;NSM;;;;;N;;;;;
1E137;NYIAKENG PUACHUE HMONG SIGN FOR PERSON;Lm;0;L;;;;;N;;;;;
1E138;NYIAKENG PUACHUE HMONG SIGN FOR THING;Lm;0;L;;;;;N;;;;;
1E139;NYIAKENG PUACHUE HMONG SIGN FOR LOCATION;Lm;0;L;;;;;N;;;;;
1E13A;NYIAKENG PUACHUE HMONG SIGN FOR ANIMAL;Lm;0;L;;;;;N;;;;;
1E13B;NYIAKENG PUACHUE HMONG SIGN FOR INVERTEBRATE;Lm;0;L;;;;;N;;;;;
1E13C;NYIAKENG PUACHUE HMONG SIGN XW XW;Lm;0;L;;;;;N;;;;;
1E13D;NYIAKENG PUACHUE HMONG SYLLABLE LENGTHENER;Lm;0;L;;;;;N;;;;;
1E140;NYIAKENG PUACHUE HMONG DIGIT ZERO;Nd;0;L;;0;0;0;N;;;;;
1E141;NYIAKENG PUACHUE HMONG DIGIT ONE;Nd;0;L;;1;1;1;N;;;;;
1E142;NYIAKENG PUACHUE HMONG DIGIT TWO;Nd;0;L;;2;2;2;N;;;;;
1E143;NYIAKENG PUACHUE HMONG DIGIT THREE;Nd;0;L;;3;3;3;N;;;;;
1E144;NYIAKENG PUACHUE HMONG DIGIT FOUR;Nd;0;L;;4;4;4;N;;;;;
1E145;NYIAKENG PUACHUE HMONG DIGIT FIVE;Nd;0;L;;5;5;5;N;;;;;
1E146;NYIAKENG PUACHUE HMONG DIGIT SIX;Nd;0;L;;6;6;6;N;;;;;
1E147;NYIAKENG PUACHUE HMONG DIGIT SEVEN;Nd;0;L;;7;7;7;N;;;;;
1E148;NYIAKENG PUACHUE HMONG DIGIT EIGHT;Nd;0;L;;8;8;8;N;;;;;
1E149;NYIAKENG PUACHUE HMONG DIGIT NINE;Nd;0;L;;9;9;9;N;;;;;
1E14E;NYIAKENG PUACHUE HMONG LOGOGRAM NYAJ;Lo;0;L;;;;;N;;;;;
1E14F;NYIAKENG PUACHUE HMONG CIRCLED CA;So;0;L;;;;;N;;;;;
1E2C0;WANCHO LETTER AA;Lo;0;L;;;;;N;;;;;
1E2C1;WANCHO LETTER A;Lo;0;L;;;;;N;;;;;
1E2C2;WANCHO LETTER BA;Lo;0;L;;;;;N;;;;;
1E2C3;WANCHO LETTER CA;Lo;0;L;;;;;N;;;;;
1E2C4;WANCHO LETTER DA;Lo;0;L;;;;;N;;;;;
1E2C5;WANCHO LETTER GA;Lo;0;L;;;;;N;;;;;
1E2C6;WANCHO LETTER YA;Lo;0;L;;;;;N;;;;;
1E2C7;WANCHO LETTER PHA;Lo;0;L;;;;;N;;;;;
1E2C8;WANCHO LETTER LA;Lo;0;L;;;;;N;;;;;
1E2C9;WANCHO LETTER NA;Lo;0;L;;;;;N;;;;;
1E2CA;WANCHO LETTER PA;Lo;0;L;;;;;N;;;;;
1E2CB;WANCHO LETTER TA;Lo;0;L;;;;;N;;;;;
1E2CC;WANCHO LETTER THA;Lo;0;L;;;;;N;;;;;
1E2CD;WANCHO LETTER FA;Lo;0;L;;;;;N;;;;;
1E2CE;WANCHO LETTER SA;Lo;0;L;;;;;N;;;;;
1E2CF;WANCHO LETTER SHA;Lo;0;L;;;;;N;;;;;
1E2D0;WANCHO LETTER JA;Lo;0;L;;;;;N;;;;;
1E2D1;WANCHO LETTER ZA;Lo;0;L;;;;;N;;;;;
1E2D2;WANCHO LETTER WA;Lo;0;L;;;;;N;;;;;
1E2D3;WANCHO LETTER VA;Lo;0;L;;;;;N;;;;;
1E2D4;WANCHO LETTER KA;Lo;0;L;;;;;N;;;;;
1E2D5;WANCHO LETTER O;Lo;0;L;;;;;N;;;;;
1E2D6;WANCHO LETTER AU;Lo;0;L;;;;;N;;;;;
1E2D7;WANCHO LETTER RA;Lo;0;L;;;;;N;;;;;
1E2D8;WANCHO LETTER MA;Lo;0;L;;;;;N;;;;;
1E2D9;WANCHO LETTER KHA;Lo;0;L;;;;;N;;;;;
1E2DA;WANCHO LETTER HA;Lo;0;L;;;;;N;;;;;
1E2DB;WANCHO LETTER E;Lo;0;L;;;;;N;;;;;
1E2DC;WANCHO LETTER I;Lo;0;L;;;;;N;;;;;
1E2DD;WANCHO LETTER NGA;Lo;0;L;;;;;N;;;;;
1E2DE;WANCHO LETTER U;Lo;0;L;;;;;N;;;;;
1E2DF;WANCHO LETTER LLHA;Lo;0;L;;;;;N;;;;;
1E2E0;WANCHO LETTER TSA;Lo;0;L;;;;;N;;;;;
1E2E1;WANCHO LETTER TRA;Lo;0;L;;;;;N;;;;;
1E2E2;WANCHO LETTER ONG;Lo;0;L;;;;;N;;;;;
1E2E3;WANCHO LETTER AANG;Lo;0;L;;;;;N;;;;;
1E2E4;WANCHO LETTER ANG;Lo;0;L;;;;;N;;;;;
1E2E5;WANCHO LETTER ING;Lo;0;L;;;;;N;;;;;
1E2E6;WANCHO LETTER ON;Lo;0;L;;;;;N;;;;;
1E2E7;WANCHO LETTER EN;Lo;0;L;;;;;N;;;;;
1E2E8;WANCHO LETTER AAN;Lo;0;L;;;;;N;;;;;
1E2E9;WANCHO LETTER NYA;Lo;0;L;;;;;N;;;;;
1E2EA;WANCHO LETTER UEN;Lo;0;L;;;;;N;;;;;
1E2EB;WANCHO LETTER YIH;Lo;0;L;;;;;N;;;;;
1E2EC;WANCHO TONE TUP;Mn;230;NSM;;;;;N;;;;;
1E2ED;WANCHO TONE TUPNI;Mn;230;NSM;;;;;N;;;;;
1E2EE;WANCHO TONE KOI;Mn;230;NSM;;;;;N;;;;;
1E2EF;WANCHO TONE KOINI;Mn;230;NSM;;;;;N;;;;;
1E2F0;WANCHO DIGIT ZERO;Nd;0;L;;0;0;0;N;;;;;
1E2F1;WANCHO DIGIT ONE;Nd;0;L;;1;1;1;N;;;;;
1E2F2;WANCHO DIGIT TWO;Nd;0;L;;2;2;2;N;;;;;
1E2F3;WANCHO DIGIT THREE;Nd;0;L;;3;3;3;N;;;;;
1E2F4;WANCHO DIGIT FOUR;Nd;0;L;;4;4;4;N;;;;;
1E2F5;WANCHO DIGIT FIVE;Nd;0;L;;5;5;5;N;;;;;
1E2F6;WANCHO DIGIT SIX;Nd;0;L;;6;6;6;N;;;;;
1E2F7;WANCHO DIGIT SEVEN;Nd;0;L;;7;7;7;N;;;;;
1E2F8;WANCHO DIGIT EIGHT;Nd;0;L;;8;8;8;N;;;;;
1E2F9;WANCHO DIGIT NINE;Nd;0;L;;9;9;9;N;;;;;
1E2FF;WANCHO NGUN SIGN;Sc;0;ET;;;;;N;;;;;
1E800;MENDE KIKAKUI SYLLABLE M001 KI;Lo;0;R;;;;;N;;;;;
1E801;MENDE KIKAKUI SYLLABLE M002 KA;Lo;0;R;;;;;N;;;;;
1E802;MENDE KIKAKUI SYLLABLE M003 KU;Lo;0;R;;;;;N;;;;;
@ -29109,6 +29449,7 @@ FFFD;REPLACEMENT CHARACTER;So;0;ON;;;;;N;;;;;
1E948;ADLAM CONSONANT MODIFIER;Mn;230;NSM;;;;;N;;;;;
1E949;ADLAM GEMINATE CONSONANT MODIFIER;Mn;230;NSM;;;;;N;;;;;
1E94A;ADLAM NUKTA;Mn;7;NSM;;;;;N;;;;;
1E94B;ADLAM NASALIZATION MARK;Lm;0;R;;;;;N;;;;;
1E950;ADLAM DIGIT ZERO;Nd;0;R;;0;0;0;N;;;;;
1E951;ADLAM DIGIT ONE;Nd;0;R;;1;1;1;N;;;;;
1E952;ADLAM DIGIT TWO;Nd;0;R;;2;2;2;N;;;;;
@ -29189,6 +29530,67 @@ FFFD;REPLACEMENT CHARACTER;So;0;ON;;;;;N;;;;;
1ECB2;INDIC SIYAQ NUMBER ALTERNATE TWO;No;0;AL;;;;2;N;;;;;
1ECB3;INDIC SIYAQ NUMBER ALTERNATE TEN THOUSAND;No;0;AL;;;;10000;N;;;;;
1ECB4;INDIC SIYAQ ALTERNATE LAKH MARK;No;0;AL;;;;100000;N;;;;;
1ED01;OTTOMAN SIYAQ NUMBER ONE;No;0;AL;;;;1;N;;;;;
1ED02;OTTOMAN SIYAQ NUMBER TWO;No;0;AL;;;;2;N;;;;;
1ED03;OTTOMAN SIYAQ NUMBER THREE;No;0;AL;;;;3;N;;;;;
1ED04;OTTOMAN SIYAQ NUMBER FOUR;No;0;AL;;;;4;N;;;;;
1ED05;OTTOMAN SIYAQ NUMBER FIVE;No;0;AL;;;;5;N;;;;;
1ED06;OTTOMAN SIYAQ NUMBER SIX;No;0;AL;;;;6;N;;;;;
1ED07;OTTOMAN SIYAQ NUMBER SEVEN;No;0;AL;;;;7;N;;;;;
1ED08;OTTOMAN SIYAQ NUMBER EIGHT;No;0;AL;;;;8;N;;;;;
1ED09;OTTOMAN SIYAQ NUMBER NINE;No;0;AL;;;;9;N;;;;;
1ED0A;OTTOMAN SIYAQ NUMBER TEN;No;0;AL;;;;10;N;;;;;
1ED0B;OTTOMAN SIYAQ NUMBER TWENTY;No;0;AL;;;;20;N;;;;;
1ED0C;OTTOMAN SIYAQ NUMBER THIRTY;No;0;AL;;;;30;N;;;;;
1ED0D;OTTOMAN SIYAQ NUMBER FORTY;No;0;AL;;;;40;N;;;;;
1ED0E;OTTOMAN SIYAQ NUMBER FIFTY;No;0;AL;;;;50;N;;;;;
1ED0F;OTTOMAN SIYAQ NUMBER SIXTY;No;0;AL;;;;60;N;;;;;
1ED10;OTTOMAN SIYAQ NUMBER SEVENTY;No;0;AL;;;;70;N;;;;;
1ED11;OTTOMAN SIYAQ NUMBER EIGHTY;No;0;AL;;;;80;N;;;;;
1ED12;OTTOMAN SIYAQ NUMBER NINETY;No;0;AL;;;;90;N;;;;;
1ED13;OTTOMAN SIYAQ NUMBER ONE HUNDRED;No;0;AL;;;;100;N;;;;;
1ED14;OTTOMAN SIYAQ NUMBER TWO HUNDRED;No;0;AL;;;;200;N;;;;;
1ED15;OTTOMAN SIYAQ NUMBER THREE HUNDRED;No;0;AL;;;;300;N;;;;;
1ED16;OTTOMAN SIYAQ NUMBER FOUR HUNDRED;No;0;AL;;;;400;N;;;;;
1ED17;OTTOMAN SIYAQ NUMBER FIVE HUNDRED;No;0;AL;;;;500;N;;;;;
1ED18;OTTOMAN SIYAQ NUMBER SIX HUNDRED;No;0;AL;;;;600;N;;;;;
1ED19;OTTOMAN SIYAQ NUMBER SEVEN HUNDRED;No;0;AL;;;;700;N;;;;;
1ED1A;OTTOMAN SIYAQ NUMBER EIGHT HUNDRED;No;0;AL;;;;800;N;;;;;
1ED1B;OTTOMAN SIYAQ NUMBER NINE HUNDRED;No;0;AL;;;;900;N;;;;;
1ED1C;OTTOMAN SIYAQ NUMBER ONE THOUSAND;No;0;AL;;;;1000;N;;;;;
1ED1D;OTTOMAN SIYAQ NUMBER TWO THOUSAND;No;0;AL;;;;2000;N;;;;;
1ED1E;OTTOMAN SIYAQ NUMBER THREE THOUSAND;No;0;AL;;;;3000;N;;;;;
1ED1F;OTTOMAN SIYAQ NUMBER FOUR THOUSAND;No;0;AL;;;;4000;N;;;;;
1ED20;OTTOMAN SIYAQ NUMBER FIVE THOUSAND;No;0;AL;;;;5000;N;;;;;
1ED21;OTTOMAN SIYAQ NUMBER SIX THOUSAND;No;0;AL;;;;6000;N;;;;;
1ED22;OTTOMAN SIYAQ NUMBER SEVEN THOUSAND;No;0;AL;;;;7000;N;;;;;
1ED23;OTTOMAN SIYAQ NUMBER EIGHT THOUSAND;No;0;AL;;;;8000;N;;;;;
1ED24;OTTOMAN SIYAQ NUMBER NINE THOUSAND;No;0;AL;;;;9000;N;;;;;
1ED25;OTTOMAN SIYAQ NUMBER TEN THOUSAND;No;0;AL;;;;10000;N;;;;;
1ED26;OTTOMAN SIYAQ NUMBER TWENTY THOUSAND;No;0;AL;;;;20000;N;;;;;
1ED27;OTTOMAN SIYAQ NUMBER THIRTY THOUSAND;No;0;AL;;;;30000;N;;;;;
1ED28;OTTOMAN SIYAQ NUMBER FORTY THOUSAND;No;0;AL;;;;40000;N;;;;;
1ED29;OTTOMAN SIYAQ NUMBER FIFTY THOUSAND;No;0;AL;;;;50000;N;;;;;
1ED2A;OTTOMAN SIYAQ NUMBER SIXTY THOUSAND;No;0;AL;;;;60000;N;;;;;
1ED2B;OTTOMAN SIYAQ NUMBER SEVENTY THOUSAND;No;0;AL;;;;70000;N;;;;;
1ED2C;OTTOMAN SIYAQ NUMBER EIGHTY THOUSAND;No;0;AL;;;;80000;N;;;;;
1ED2D;OTTOMAN SIYAQ NUMBER NINETY THOUSAND;No;0;AL;;;;90000;N;;;;;
1ED2E;OTTOMAN SIYAQ MARRATAN;So;0;AL;;;;;N;;;;;
1ED2F;OTTOMAN SIYAQ ALTERNATE NUMBER TWO;No;0;AL;;;;2;N;;;;;
1ED30;OTTOMAN SIYAQ ALTERNATE NUMBER THREE;No;0;AL;;;;3;N;;;;;
1ED31;OTTOMAN SIYAQ ALTERNATE NUMBER FOUR;No;0;AL;;;;4;N;;;;;
1ED32;OTTOMAN SIYAQ ALTERNATE NUMBER FIVE;No;0;AL;;;;5;N;;;;;
1ED33;OTTOMAN SIYAQ ALTERNATE NUMBER SIX;No;0;AL;;;;6;N;;;;;
1ED34;OTTOMAN SIYAQ ALTERNATE NUMBER SEVEN;No;0;AL;;;;7;N;;;;;
1ED35;OTTOMAN SIYAQ ALTERNATE NUMBER EIGHT;No;0;AL;;;;8;N;;;;;
1ED36;OTTOMAN SIYAQ ALTERNATE NUMBER NINE;No;0;AL;;;;9;N;;;;;
1ED37;OTTOMAN SIYAQ ALTERNATE NUMBER TEN;No;0;AL;;;;10;N;;;;;
1ED38;OTTOMAN SIYAQ ALTERNATE NUMBER FOUR HUNDRED;No;0;AL;;;;400;N;;;;;
1ED39;OTTOMAN SIYAQ ALTERNATE NUMBER SIX HUNDRED;No;0;AL;;;;600;N;;;;;
1ED3A;OTTOMAN SIYAQ ALTERNATE NUMBER TWO THOUSAND;No;0;AL;;;;2000;N;;;;;
1ED3B;OTTOMAN SIYAQ ALTERNATE NUMBER TEN THOUSAND;No;0;AL;;;;10000;N;;;;;
1ED3C;OTTOMAN SIYAQ FRACTION ONE HALF;No;0;AL;;;;1/2;N;;;;;
1ED3D;OTTOMAN SIYAQ FRACTION ONE SIXTH;No;0;AL;;;;1/6;N;;;;;
1EE00;ARABIC MATHEMATICAL ALEF;Lo;0;AL;<font> 0627;;;;N;;;;;
1EE01;ARABIC MATHEMATICAL BEH;Lo;0;AL;<font> 0628;;;;N;;;;;
1EE02;ARABIC MATHEMATICAL JEEM;Lo;0;AL;<font> 062C;;;;N;;;;;
@ -29663,6 +30065,7 @@ FFFD;REPLACEMENT CHARACTER;So;0;ON;;;;;N;;;;;
1F169;NEGATIVE CIRCLED LATIN CAPITAL LETTER Z;So;0;L;;;;;N;;;;;
1F16A;RAISED MC SIGN;So;0;ON;<super> 004D 0043;;;;N;;;;;
1F16B;RAISED MD SIGN;So;0;ON;<super> 004D 0044;;;;N;;;;;
1F16C;RAISED MR SIGN;So;0;ON;<super> 004D 0052;;;;N;;;;;
1F170;NEGATIVE SQUARED LATIN CAPITAL LETTER A;So;0;L;;;;;N;;;;;
1F171;NEGATIVE SQUARED LATIN CAPITAL LETTER B;So;0;L;;;;;N;;;;;
1F172;NEGATIVE SQUARED LATIN CAPITAL LETTER C;So;0;L;;;;;N;;;;;
@ -30795,6 +31198,7 @@ FFFD;REPLACEMENT CHARACTER;So;0;ON;;;;;N;;;;;
1F6D2;SHOPPING TROLLEY;So;0;ON;;;;;N;;;;;
1F6D3;STUPA;So;0;ON;;;;;N;;;;;
1F6D4;PAGODA;So;0;ON;;;;;N;;;;;
1F6D5;HINDU TEMPLE;So;0;ON;;;;;N;;;;;
1F6E0;HAMMER AND WRENCH;So;0;ON;;;;;N;;;;;
1F6E1;SHIELD;So;0;ON;;;;;N;;;;;
1F6E2;OIL DRUM;So;0;ON;;;;;N;;;;;
@ -30818,6 +31222,7 @@ FFFD;REPLACEMENT CHARACTER;So;0;ON;;;;;N;;;;;
1F6F7;SLED;So;0;ON;;;;;N;;;;;
1F6F8;FLYING SAUCER;So;0;ON;;;;;N;;;;;
1F6F9;SKATEBOARD;So;0;ON;;;;;N;;;;;
1F6FA;AUTO RICKSHAW;So;0;ON;;;;;N;;;;;
1F700;ALCHEMICAL SYMBOL FOR QUINTESSENCE;So;0;ON;;;;;N;;;;;
1F701;ALCHEMICAL SYMBOL FOR AIR;So;0;ON;;;;;N;;;;;
1F702;ALCHEMICAL SYMBOL FOR FIRE;So;0;ON;;;;;N;;;;;
@ -31023,6 +31428,18 @@ FFFD;REPLACEMENT CHARACTER;So;0;ON;;;;;N;;;;;
1F7D6;NEGATIVE CIRCLED TRIANGLE;So;0;ON;;;;;N;;;;;
1F7D7;CIRCLED SQUARE;So;0;ON;;;;;N;;;;;
1F7D8;NEGATIVE CIRCLED SQUARE;So;0;ON;;;;;N;;;;;
1F7E0;LARGE ORANGE CIRCLE;So;0;ON;;;;;N;;;;;
1F7E1;LARGE YELLOW CIRCLE;So;0;ON;;;;;N;;;;;
1F7E2;LARGE GREEN CIRCLE;So;0;ON;;;;;N;;;;;
1F7E3;LARGE PURPLE CIRCLE;So;0;ON;;;;;N;;;;;
1F7E4;LARGE BROWN CIRCLE;So;0;ON;;;;;N;;;;;
1F7E5;LARGE RED SQUARE;So;0;ON;;;;;N;;;;;
1F7E6;LARGE BLUE SQUARE;So;0;ON;;;;;N;;;;;
1F7E7;LARGE ORANGE SQUARE;So;0;ON;;;;;N;;;;;
1F7E8;LARGE YELLOW SQUARE;So;0;ON;;;;;N;;;;;
1F7E9;LARGE GREEN SQUARE;So;0;ON;;;;;N;;;;;
1F7EA;LARGE PURPLE SQUARE;So;0;ON;;;;;N;;;;;
1F7EB;LARGE BROWN SQUARE;So;0;ON;;;;;N;;;;;
1F800;LEFTWARDS ARROW WITH SMALL TRIANGLE ARROWHEAD;So;0;ON;;;;;N;;;;;
1F801;UPWARDS ARROW WITH SMALL TRIANGLE ARROWHEAD;So;0;ON;;;;;N;;;;;
1F802;RIGHTWARDS ARROW WITH SMALL TRIANGLE ARROWHEAD;So;0;ON;;;;;N;;;;;
@ -31183,6 +31600,9 @@ FFFD;REPLACEMENT CHARACTER;So;0;ON;;;;;N;;;;;
1F909;DOWNWARD FACING NOTCHED HOOK;So;0;ON;;;;;N;;;;;
1F90A;DOWNWARD FACING HOOK WITH DOT;So;0;ON;;;;;N;;;;;
1F90B;DOWNWARD FACING NOTCHED HOOK WITH DOT;So;0;ON;;;;;N;;;;;
1F90D;WHITE HEART;So;0;ON;;;;;N;;;;;
1F90E;BROWN HEART;So;0;ON;;;;;N;;;;;
1F90F;PINCHING HAND;So;0;ON;;;;;N;;;;;
1F910;ZIPPER-MOUTH FACE;So;0;ON;;;;;N;;;;;
1F911;MONEY-MOUTH FACE;So;0;ON;;;;;N;;;;;
1F912;FACE WITH THERMOMETER;So;0;ON;;;;;N;;;;;
@ -31230,6 +31650,7 @@ FFFD;REPLACEMENT CHARACTER;So;0;ON;;;;;N;;;;;
1F93C;WRESTLERS;So;0;ON;;;;;N;;;;;
1F93D;WATER POLO;So;0;ON;;;;;N;;;;;
1F93E;HANDBALL;So;0;ON;;;;;N;;;;;
1F93F;DIVING MASK;So;0;ON;;;;;N;;;;;
1F940;WILTED FLOWER;So;0;ON;;;;;N;;;;;
1F941;DRUM WITH DRUMSTICKS;So;0;ON;;;;;N;;;;;
1F942;CLINKING GLASSES;So;0;ON;;;;;N;;;;;
@ -31279,11 +31700,13 @@ FFFD;REPLACEMENT CHARACTER;So;0;ON;;;;;N;;;;;
1F96E;MOON CAKE;So;0;ON;;;;;N;;;;;
1F96F;BAGEL;So;0;ON;;;;;N;;;;;
1F970;SMILING FACE WITH SMILING EYES AND THREE HEARTS;So;0;ON;;;;;N;;;;;
1F971;YAWNING FACE;So;0;ON;;;;;N;;;;;
1F973;FACE WITH PARTY HORN AND PARTY HAT;So;0;ON;;;;;N;;;;;
1F974;FACE WITH UNEVEN EYES AND WAVY MOUTH;So;0;ON;;;;;N;;;;;
1F975;OVERHEATED FACE;So;0;ON;;;;;N;;;;;
1F976;FREEZING FACE;So;0;ON;;;;;N;;;;;
1F97A;FACE WITH PLEADING EYES;So;0;ON;;;;;N;;;;;
1F97B;SARI;So;0;ON;;;;;N;;;;;
1F97C;LAB COAT;So;0;ON;;;;;N;;;;;
1F97D;GOGGLES;So;0;ON;;;;;N;;;;;
1F97E;HIKING BOOT;So;0;ON;;;;;N;;;;;
@ -31323,6 +31746,14 @@ FFFD;REPLACEMENT CHARACTER;So;0;ON;;;;;N;;;;;
1F9A0;MICROBE;So;0;ON;;;;;N;;;;;
1F9A1;BADGER;So;0;ON;;;;;N;;;;;
1F9A2;SWAN;So;0;ON;;;;;N;;;;;
1F9A5;SLOTH;So;0;ON;;;;;N;;;;;
1F9A6;OTTER;So;0;ON;;;;;N;;;;;
1F9A7;ORANGUTAN;So;0;ON;;;;;N;;;;;
1F9A8;SKUNK;So;0;ON;;;;;N;;;;;
1F9A9;FLAMINGO;So;0;ON;;;;;N;;;;;
1F9AA;OYSTER;So;0;ON;;;;;N;;;;;
1F9AE;GUIDE DOG;So;0;ON;;;;;N;;;;;
1F9AF;PROBING CANE;So;0;ON;;;;;N;;;;;
1F9B0;EMOJI COMPONENT RED HAIR;So;0;ON;;;;;N;;;;;
1F9B1;EMOJI COMPONENT CURLY HAIR;So;0;ON;;;;;N;;;;;
1F9B2;EMOJI COMPONENT BALD;So;0;ON;;;;;N;;;;;
@ -31333,9 +31764,26 @@ FFFD;REPLACEMENT CHARACTER;So;0;ON;;;;;N;;;;;
1F9B7;TOOTH;So;0;ON;;;;;N;;;;;
1F9B8;SUPERHERO;So;0;ON;;;;;N;;;;;
1F9B9;SUPERVILLAIN;So;0;ON;;;;;N;;;;;
1F9BA;SAFETY VEST;So;0;ON;;;;;N;;;;;
1F9BB;EAR WITH HEARING AID;So;0;ON;;;;;N;;;;;
1F9BC;MOTORIZED WHEELCHAIR;So;0;ON;;;;;N;;;;;
1F9BD;MANUAL WHEELCHAIR;So;0;ON;;;;;N;;;;;
1F9BE;MECHANICAL ARM;So;0;ON;;;;;N;;;;;
1F9BF;MECHANICAL LEG;So;0;ON;;;;;N;;;;;
1F9C0;CHEESE WEDGE;So;0;ON;;;;;N;;;;;
1F9C1;CUPCAKE;So;0;ON;;;;;N;;;;;
1F9C2;SALT SHAKER;So;0;ON;;;;;N;;;;;
1F9C3;BEVERAGE BOX;So;0;ON;;;;;N;;;;;
1F9C4;GARLIC;So;0;ON;;;;;N;;;;;
1F9C5;ONION;So;0;ON;;;;;N;;;;;
1F9C6;FALAFEL;So;0;ON;;;;;N;;;;;
1F9C7;WAFFLE;So;0;ON;;;;;N;;;;;
1F9C8;BUTTER;So;0;ON;;;;;N;;;;;
1F9C9;MATE DRINK;So;0;ON;;;;;N;;;;;
1F9CA;ICE CUBE;So;0;ON;;;;;N;;;;;
1F9CD;STANDING PERSON;So;0;ON;;;;;N;;;;;
1F9CE;KNEELING PERSON;So;0;ON;;;;;N;;;;;
1F9CF;DEAF PERSON;So;0;ON;;;;;N;;;;;
1F9D0;FACE WITH MONOCLE;So;0;ON;;;;;N;;;;;
1F9D1;ADULT;So;0;ON;;;;;N;;;;;
1F9D2;CHILD;So;0;ON;;;;;N;;;;;
@ -31384,6 +31832,90 @@ FFFD;REPLACEMENT CHARACTER;So;0;ON;;;;;N;;;;;
1F9FD;SPONGE;So;0;ON;;;;;N;;;;;
1F9FE;RECEIPT;So;0;ON;;;;;N;;;;;
1F9FF;NAZAR AMULET;So;0;ON;;;;;N;;;;;
1FA00;NEUTRAL CHESS KING;So;0;ON;;;;;N;;;;;
1FA01;NEUTRAL CHESS QUEEN;So;0;ON;;;;;N;;;;;
1FA02;NEUTRAL CHESS ROOK;So;0;ON;;;;;N;;;;;
1FA03;NEUTRAL CHESS BISHOP;So;0;ON;;;;;N;;;;;
1FA04;NEUTRAL CHESS KNIGHT;So;0;ON;;;;;N;;;;;
1FA05;NEUTRAL CHESS PAWN;So;0;ON;;;;;N;;;;;
1FA06;WHITE CHESS KNIGHT ROTATED FORTY-FIVE DEGREES;So;0;ON;;;;;N;;;;;
1FA07;BLACK CHESS KNIGHT ROTATED FORTY-FIVE DEGREES;So;0;ON;;;;;N;;;;;
1FA08;NEUTRAL CHESS KNIGHT ROTATED FORTY-FIVE DEGREES;So;0;ON;;;;;N;;;;;
1FA09;WHITE CHESS KING ROTATED NINETY DEGREES;So;0;ON;;;;;N;;;;;
1FA0A;WHITE CHESS QUEEN ROTATED NINETY DEGREES;So;0;ON;;;;;N;;;;;
1FA0B;WHITE CHESS ROOK ROTATED NINETY DEGREES;So;0;ON;;;;;N;;;;;
1FA0C;WHITE CHESS BISHOP ROTATED NINETY DEGREES;So;0;ON;;;;;N;;;;;
1FA0D;WHITE CHESS KNIGHT ROTATED NINETY DEGREES;So;0;ON;;;;;N;;;;;
1FA0E;WHITE CHESS PAWN ROTATED NINETY DEGREES;So;0;ON;;;;;N;;;;;
1FA0F;BLACK CHESS KING ROTATED NINETY DEGREES;So;0;ON;;;;;N;;;;;
1FA10;BLACK CHESS QUEEN ROTATED NINETY DEGREES;So;0;ON;;;;;N;;;;;
1FA11;BLACK CHESS ROOK ROTATED NINETY DEGREES;So;0;ON;;;;;N;;;;;
1FA12;BLACK CHESS BISHOP ROTATED NINETY DEGREES;So;0;ON;;;;;N;;;;;
1FA13;BLACK CHESS KNIGHT ROTATED NINETY DEGREES;So;0;ON;;;;;N;;;;;
1FA14;BLACK CHESS PAWN ROTATED NINETY DEGREES;So;0;ON;;;;;N;;;;;
1FA15;NEUTRAL CHESS KING ROTATED NINETY DEGREES;So;0;ON;;;;;N;;;;;
1FA16;NEUTRAL CHESS QUEEN ROTATED NINETY DEGREES;So;0;ON;;;;;N;;;;;
1FA17;NEUTRAL CHESS ROOK ROTATED NINETY DEGREES;So;0;ON;;;;;N;;;;;
1FA18;NEUTRAL CHESS BISHOP ROTATED NINETY DEGREES;So;0;ON;;;;;N;;;;;
1FA19;NEUTRAL CHESS KNIGHT ROTATED NINETY DEGREES;So;0;ON;;;;;N;;;;;
1FA1A;NEUTRAL CHESS PAWN ROTATED NINETY DEGREES;So;0;ON;;;;;N;;;;;
1FA1B;WHITE CHESS KNIGHT ROTATED ONE HUNDRED THIRTY-FIVE DEGREES;So;0;ON;;;;;N;;;;;
1FA1C;BLACK CHESS KNIGHT ROTATED ONE HUNDRED THIRTY-FIVE DEGREES;So;0;ON;;;;;N;;;;;
1FA1D;NEUTRAL CHESS KNIGHT ROTATED ONE HUNDRED THIRTY-FIVE DEGREES;So;0;ON;;;;;N;;;;;
1FA1E;WHITE CHESS TURNED KING;So;0;ON;;;;;N;;;;;
1FA1F;WHITE CHESS TURNED QUEEN;So;0;ON;;;;;N;;;;;
1FA20;WHITE CHESS TURNED ROOK;So;0;ON;;;;;N;;;;;
1FA21;WHITE CHESS TURNED BISHOP;So;0;ON;;;;;N;;;;;
1FA22;WHITE CHESS TURNED KNIGHT;So;0;ON;;;;;N;;;;;
1FA23;WHITE CHESS TURNED PAWN;So;0;ON;;;;;N;;;;;
1FA24;BLACK CHESS TURNED KING;So;0;ON;;;;;N;;;;;
1FA25;BLACK CHESS TURNED QUEEN;So;0;ON;;;;;N;;;;;
1FA26;BLACK CHESS TURNED ROOK;So;0;ON;;;;;N;;;;;
1FA27;BLACK CHESS TURNED BISHOP;So;0;ON;;;;;N;;;;;
1FA28;BLACK CHESS TURNED KNIGHT;So;0;ON;;;;;N;;;;;
1FA29;BLACK CHESS TURNED PAWN;So;0;ON;;;;;N;;;;;
1FA2A;NEUTRAL CHESS TURNED KING;So;0;ON;;;;;N;;;;;
1FA2B;NEUTRAL CHESS TURNED QUEEN;So;0;ON;;;;;N;;;;;
1FA2C;NEUTRAL CHESS TURNED ROOK;So;0;ON;;;;;N;;;;;
1FA2D;NEUTRAL CHESS TURNED BISHOP;So;0;ON;;;;;N;;;;;
1FA2E;NEUTRAL CHESS TURNED KNIGHT;So;0;ON;;;;;N;;;;;
1FA2F;NEUTRAL CHESS TURNED PAWN;So;0;ON;;;;;N;;;;;
1FA30;WHITE CHESS KNIGHT ROTATED TWO HUNDRED TWENTY-FIVE DEGREES;So;0;ON;;;;;N;;;;;
1FA31;BLACK CHESS KNIGHT ROTATED TWO HUNDRED TWENTY-FIVE DEGREES;So;0;ON;;;;;N;;;;;
1FA32;NEUTRAL CHESS KNIGHT ROTATED TWO HUNDRED TWENTY-FIVE DEGREES;So;0;ON;;;;;N;;;;;
1FA33;WHITE CHESS KING ROTATED TWO HUNDRED SEVENTY DEGREES;So;0;ON;;;;;N;;;;;
1FA34;WHITE CHESS QUEEN ROTATED TWO HUNDRED SEVENTY DEGREES;So;0;ON;;;;;N;;;;;
1FA35;WHITE CHESS ROOK ROTATED TWO HUNDRED SEVENTY DEGREES;So;0;ON;;;;;N;;;;;
1FA36;WHITE CHESS BISHOP ROTATED TWO HUNDRED SEVENTY DEGREES;So;0;ON;;;;;N;;;;;
1FA37;WHITE CHESS KNIGHT ROTATED TWO HUNDRED SEVENTY DEGREES;So;0;ON;;;;;N;;;;;
1FA38;WHITE CHESS PAWN ROTATED TWO HUNDRED SEVENTY DEGREES;So;0;ON;;;;;N;;;;;
1FA39;BLACK CHESS KING ROTATED TWO HUNDRED SEVENTY DEGREES;So;0;ON;;;;;N;;;;;
1FA3A;BLACK CHESS QUEEN ROTATED TWO HUNDRED SEVENTY DEGREES;So;0;ON;;;;;N;;;;;
1FA3B;BLACK CHESS ROOK ROTATED TWO HUNDRED SEVENTY DEGREES;So;0;ON;;;;;N;;;;;
1FA3C;BLACK CHESS BISHOP ROTATED TWO HUNDRED SEVENTY DEGREES;So;0;ON;;;;;N;;;;;
1FA3D;BLACK CHESS KNIGHT ROTATED TWO HUNDRED SEVENTY DEGREES;So;0;ON;;;;;N;;;;;
1FA3E;BLACK CHESS PAWN ROTATED TWO HUNDRED SEVENTY DEGREES;So;0;ON;;;;;N;;;;;
1FA3F;NEUTRAL CHESS KING ROTATED TWO HUNDRED SEVENTY DEGREES;So;0;ON;;;;;N;;;;;
1FA40;NEUTRAL CHESS QUEEN ROTATED TWO HUNDRED SEVENTY DEGREES;So;0;ON;;;;;N;;;;;
1FA41;NEUTRAL CHESS ROOK ROTATED TWO HUNDRED SEVENTY DEGREES;So;0;ON;;;;;N;;;;;
1FA42;NEUTRAL CHESS BISHOP ROTATED TWO HUNDRED SEVENTY DEGREES;So;0;ON;;;;;N;;;;;
1FA43;NEUTRAL CHESS KNIGHT ROTATED TWO HUNDRED SEVENTY DEGREES;So;0;ON;;;;;N;;;;;
1FA44;NEUTRAL CHESS PAWN ROTATED TWO HUNDRED SEVENTY DEGREES;So;0;ON;;;;;N;;;;;
1FA45;WHITE CHESS KNIGHT ROTATED THREE HUNDRED FIFTEEN DEGREES;So;0;ON;;;;;N;;;;;
1FA46;BLACK CHESS KNIGHT ROTATED THREE HUNDRED FIFTEEN DEGREES;So;0;ON;;;;;N;;;;;
1FA47;NEUTRAL CHESS KNIGHT ROTATED THREE HUNDRED FIFTEEN DEGREES;So;0;ON;;;;;N;;;;;
1FA48;WHITE CHESS EQUIHOPPER;So;0;ON;;;;;N;;;;;
1FA49;BLACK CHESS EQUIHOPPER;So;0;ON;;;;;N;;;;;
1FA4A;NEUTRAL CHESS EQUIHOPPER;So;0;ON;;;;;N;;;;;
1FA4B;WHITE CHESS EQUIHOPPER ROTATED NINETY DEGREES;So;0;ON;;;;;N;;;;;
1FA4C;BLACK CHESS EQUIHOPPER ROTATED NINETY DEGREES;So;0;ON;;;;;N;;;;;
1FA4D;NEUTRAL CHESS EQUIHOPPER ROTATED NINETY DEGREES;So;0;ON;;;;;N;;;;;
1FA4E;WHITE CHESS KNIGHT-QUEEN;So;0;ON;;;;;N;;;;;
1FA4F;WHITE CHESS KNIGHT-ROOK;So;0;ON;;;;;N;;;;;
1FA50;WHITE CHESS KNIGHT-BISHOP;So;0;ON;;;;;N;;;;;
1FA51;BLACK CHESS KNIGHT-QUEEN;So;0;ON;;;;;N;;;;;
1FA52;BLACK CHESS KNIGHT-ROOK;So;0;ON;;;;;N;;;;;
1FA53;BLACK CHESS KNIGHT-BISHOP;So;0;ON;;;;;N;;;;;
1FA60;XIANGQI RED GENERAL;So;0;ON;;;;;N;;;;;
1FA61;XIANGQI RED MANDARIN;So;0;ON;;;;;N;;;;;
1FA62;XIANGQI RED ELEPHANT;So;0;ON;;;;;N;;;;;
@ -31398,6 +31930,22 @@ FFFD;REPLACEMENT CHARACTER;So;0;ON;;;;;N;;;;;
1FA6B;XIANGQI BLACK CHARIOT;So;0;ON;;;;;N;;;;;
1FA6C;XIANGQI BLACK CANNON;So;0;ON;;;;;N;;;;;
1FA6D;XIANGQI BLACK SOLDIER;So;0;ON;;;;;N;;;;;
1FA70;BALLET SHOES;So;0;ON;;;;;N;;;;;
1FA71;ONE-PIECE SWIMSUIT;So;0;ON;;;;;N;;;;;
1FA72;BRIEFS;So;0;ON;;;;;N;;;;;
1FA73;SHORTS;So;0;ON;;;;;N;;;;;
1FA78;DROP OF BLOOD;So;0;ON;;;;;N;;;;;
1FA79;ADHESIVE BANDAGE;So;0;ON;;;;;N;;;;;
1FA7A;STETHOSCOPE;So;0;ON;;;;;N;;;;;
1FA80;YO-YO;So;0;ON;;;;;N;;;;;
1FA81;KITE;So;0;ON;;;;;N;;;;;
1FA82;PARACHUTE;So;0;ON;;;;;N;;;;;
1FA90;RINGED PLANET;So;0;ON;;;;;N;;;;;
1FA91;CHAIR;So;0;ON;;;;;N;;;;;
1FA92;RAZOR;So;0;ON;;;;;N;;;;;
1FA93;AXE;So;0;ON;;;;;N;;;;;
1FA94;DIYA LAMP;So;0;ON;;;;;N;;;;;
1FA95;BANJO;So;0;ON;;;;;N;;;;;
20000;<CJK Ideograph Extension B, First>;Lo;0;L;;;;;N;;;;;
2A6D6;<CJK Ideograph Extension B, Last>;Lo;0;L;;;;;N;;;;;
2A700;<CJK Ideograph Extension C, First>;Lo;0;L;;;;;N;;;;;

View File

@ -1 +1 @@
11.0.0
12.1.0

View File

@ -1,10 +1,11 @@
# GraphemeBreakProperty-8.0.0.txt
# Date: 2015-02-13, 13:47:14 GMT [MD]
# GraphemeBreakProperty-12.1.0.txt
# Date: 2019-03-10, 10:53:12 GMT
# Copyright (c) 2019 Unicode, Inc.
# Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries.
# For terms of use, see http://www.unicode.org/terms_of_use.html
#
# Unicode Character Database
# Copyright (c) 1991-2015 Unicode, Inc.
# For terms of use, see http://www.unicode.org/terms_of_use.html
# For documentation, see http://www.unicode.org/reports/tr44/
# For documentation, see http://www.unicode.org/reports/tr44/
# ================================================
@ -17,6 +18,22 @@
# ================================================
0600..0605 ; Prepend # Cf [6] ARABIC NUMBER SIGN..ARABIC NUMBER MARK ABOVE
06DD ; Prepend # Cf ARABIC END OF AYAH
070F ; Prepend # Cf SYRIAC ABBREVIATION MARK
08E2 ; Prepend # Cf ARABIC DISPUTED END OF AYAH
0D4E ; Prepend # Lo MALAYALAM LETTER DOT REPH
110BD ; Prepend # Cf KAITHI NUMBER SIGN
110CD ; Prepend # Cf KAITHI NUMBER SIGN ABOVE
111C2..111C3 ; Prepend # Lo [2] SHARADA SIGN JIHVAMULIYA..SHARADA SIGN UPADHMANIYA
11A3A ; Prepend # Lo ZANABAZAR SQUARE CLUSTER-INITIAL LETTER RA
11A84..11A89 ; Prepend # Lo [6] SOYOMBO SIGN JIHVAMULIYA..SOYOMBO CLUSTER-INITIAL LETTER SA
11D46 ; Prepend # Lo MASARAM GONDI REPHA
# Total code points: 22
# ================================================
000D ; CR # Cc <control-000D>
# Total code points: 1
@ -34,10 +51,7 @@
000E..001F ; Control # Cc [18] <control-000E>..<control-001F>
007F..009F ; Control # Cc [33] <control-007F>..<control-009F>
00AD ; Control # Cf SOFT HYPHEN
0600..0605 ; Control # Cf [6] ARABIC NUMBER SIGN..ARABIC NUMBER MARK ABOVE
061C ; Control # Cf ARABIC LETTER MARK
06DD ; Control # Cf ARABIC END OF AYAH
070F ; Control # Cf SYRIAC ABBREVIATION MARK
180E ; Control # Cf MONGOLIAN VOWEL SEPARATOR
200B ; Control # Cf ZERO WIDTH SPACE
200E..200F ; Control # Cf [2] LEFT-TO-RIGHT MARK..RIGHT-TO-LEFT MARK
@ -47,21 +61,19 @@
2060..2064 ; Control # Cf [5] WORD JOINER..INVISIBLE PLUS
2065 ; Control # Cn <reserved-2065>
2066..206F ; Control # Cf [10] LEFT-TO-RIGHT ISOLATE..NOMINAL DIGIT SHAPES
D800..DFFF ; Control # Cs [2048] <surrogate-D800>..<surrogate-DFFF>
FEFF ; Control # Cf ZERO WIDTH NO-BREAK SPACE
FFF0..FFF8 ; Control # Cn [9] <reserved-FFF0>..<reserved-FFF8>
FFF9..FFFB ; Control # Cf [3] INTERLINEAR ANNOTATION ANCHOR..INTERLINEAR ANNOTATION TERMINATOR
110BD ; Control # Cf KAITHI NUMBER SIGN
13430..13438 ; Control # Cf [9] EGYPTIAN HIEROGLYPH VERTICAL JOINER..EGYPTIAN HIEROGLYPH END SEGMENT
1BCA0..1BCA3 ; Control # Cf [4] SHORTHAND FORMAT LETTER OVERLAP..SHORTHAND FORMAT UP STEP
1D173..1D17A ; Control # Cf [8] MUSICAL SYMBOL BEGIN BEAM..MUSICAL SYMBOL END PHRASE
E0000 ; Control # Cn <reserved-E0000>
E0001 ; Control # Cf LANGUAGE TAG
E0002..E001F ; Control # Cn [30] <reserved-E0002>..<reserved-E001F>
E0020..E007F ; Control # Cf [96] TAG SPACE..CANCEL TAG
E0080..E00FF ; Control # Cn [128] <reserved-E0080>..<reserved-E00FF>
E01F0..E0FFF ; Control # Cn [3600] <reserved-E01F0>..<reserved-E0FFF>
# Total code points: 6030
# Total code points: 3886
# ================================================
@ -84,11 +96,13 @@ E01F0..E0FFF ; Control # Cn [3600] <reserved-E01F0>..<reserved-E0FFF>
0730..074A ; Extend # Mn [27] SYRIAC PTHAHA ABOVE..SYRIAC BARREKH
07A6..07B0 ; Extend # Mn [11] THAANA ABAFILI..THAANA SUKUN
07EB..07F3 ; Extend # Mn [9] NKO COMBINING SHORT HIGH TONE..NKO COMBINING DOUBLE DOT ABOVE
07FD ; Extend # Mn NKO DANTAYALAN
0816..0819 ; Extend # Mn [4] SAMARITAN MARK IN..SAMARITAN MARK DAGESH
081B..0823 ; Extend # Mn [9] SAMARITAN MARK EPENTHETIC YUT..SAMARITAN VOWEL SIGN A
0825..0827 ; Extend # Mn [3] SAMARITAN VOWEL SIGN SHORT A..SAMARITAN VOWEL SIGN U
0829..082D ; Extend # Mn [5] SAMARITAN VOWEL SIGN LONG I..SAMARITAN MARK NEQUDAA
0859..085B ; Extend # Mn [3] MANDAIC AFFRICATION MARK..MANDAIC GEMINATION MARK
08D3..08E1 ; Extend # Mn [15] ARABIC SMALL LOW WAW..ARABIC SMALL HIGH SIGN SAFHA
08E3..0902 ; Extend # Mn [32] ARABIC TURNED DAMMA BELOW..DEVANAGARI SIGN ANUSVARA
093A ; Extend # Mn DEVANAGARI VOWEL SIGN OE
093C ; Extend # Mn DEVANAGARI SIGN NUKTA
@ -103,6 +117,7 @@ E01F0..E0FFF ; Control # Cn [3600] <reserved-E01F0>..<reserved-E0FFF>
09CD ; Extend # Mn BENGALI SIGN VIRAMA
09D7 ; Extend # Mc BENGALI AU LENGTH MARK
09E2..09E3 ; Extend # Mn [2] BENGALI VOWEL SIGN VOCALIC L..BENGALI VOWEL SIGN VOCALIC LL
09FE ; Extend # Mn BENGALI SANDHI MARK
0A01..0A02 ; Extend # Mn [2] GURMUKHI SIGN ADAK BINDI..GURMUKHI SIGN BINDI
0A3C ; Extend # Mn GURMUKHI SIGN NUKTA
0A41..0A42 ; Extend # Mn [2] GURMUKHI VOWEL SIGN U..GURMUKHI VOWEL SIGN UU
@ -117,6 +132,7 @@ E01F0..E0FFF ; Control # Cn [3600] <reserved-E01F0>..<reserved-E0FFF>
0AC7..0AC8 ; Extend # Mn [2] GUJARATI VOWEL SIGN E..GUJARATI VOWEL SIGN AI
0ACD ; Extend # Mn GUJARATI SIGN VIRAMA
0AE2..0AE3 ; Extend # Mn [2] GUJARATI VOWEL SIGN VOCALIC L..GUJARATI VOWEL SIGN VOCALIC LL
0AFA..0AFF ; Extend # Mn [6] GUJARATI SIGN SUKUN..GUJARATI SIGN TWO-CIRCLE NUKTA ABOVE
0B01 ; Extend # Mn ORIYA SIGN CANDRABINDU
0B3C ; Extend # Mn ORIYA SIGN NUKTA
0B3E ; Extend # Mc ORIYA VOWEL SIGN AA
@ -132,6 +148,7 @@ E01F0..E0FFF ; Control # Cn [3600] <reserved-E01F0>..<reserved-E0FFF>
0BCD ; Extend # Mn TAMIL SIGN VIRAMA
0BD7 ; Extend # Mc TAMIL AU LENGTH MARK
0C00 ; Extend # Mn TELUGU SIGN COMBINING CANDRABINDU ABOVE
0C04 ; Extend # Mn TELUGU SIGN COMBINING ANUSVARA ABOVE
0C3E..0C40 ; Extend # Mn [3] TELUGU VOWEL SIGN AA..TELUGU VOWEL SIGN II
0C46..0C48 ; Extend # Mn [3] TELUGU VOWEL SIGN E..TELUGU VOWEL SIGN AI
0C4A..0C4D ; Extend # Mn [4] TELUGU VOWEL SIGN O..TELUGU SIGN VIRAMA
@ -145,7 +162,8 @@ E01F0..E0FFF ; Control # Cn [3600] <reserved-E01F0>..<reserved-E0FFF>
0CCC..0CCD ; Extend # Mn [2] KANNADA VOWEL SIGN AU..KANNADA SIGN VIRAMA
0CD5..0CD6 ; Extend # Mc [2] KANNADA LENGTH MARK..KANNADA AI LENGTH MARK
0CE2..0CE3 ; Extend # Mn [2] KANNADA VOWEL SIGN VOCALIC L..KANNADA VOWEL SIGN VOCALIC LL
0D01 ; Extend # Mn MALAYALAM SIGN CANDRABINDU
0D00..0D01 ; Extend # Mn [2] MALAYALAM SIGN COMBINING ANUSVARA ABOVE..MALAYALAM SIGN CANDRABINDU
0D3B..0D3C ; Extend # Mn [2] MALAYALAM SIGN VERTICAL BAR VIRAMA..MALAYALAM SIGN CIRCULAR VIRAMA
0D3E ; Extend # Mc MALAYALAM VOWEL SIGN AA
0D41..0D44 ; Extend # Mn [4] MALAYALAM VOWEL SIGN U..MALAYALAM VOWEL SIGN VOCALIC RR
0D4D ; Extend # Mn MALAYALAM SIGN VIRAMA
@ -160,8 +178,7 @@ E01F0..E0FFF ; Control # Cn [3600] <reserved-E01F0>..<reserved-E0FFF>
0E34..0E3A ; Extend # Mn [7] THAI CHARACTER SARA I..THAI CHARACTER PHINTHU
0E47..0E4E ; Extend # Mn [8] THAI CHARACTER MAITAIKHU..THAI CHARACTER YAMAKKAN
0EB1 ; Extend # Mn LAO VOWEL SIGN MAI KAN
0EB4..0EB9 ; Extend # Mn [6] LAO VOWEL SIGN I..LAO VOWEL SIGN UU
0EBB..0EBC ; Extend # Mn [2] LAO VOWEL SIGN MAI KON..LAO SEMIVOWEL SIGN LO
0EB4..0EBC ; Extend # Mn [9] LAO VOWEL SIGN I..LAO SEMIVOWEL SIGN LO
0EC8..0ECD ; Extend # Mn [6] LAO TONE MAI EK..LAO NIGGAHITA
0F18..0F19 ; Extend # Mn [2] TIBETAN ASTROLOGICAL SIGN -KHYUD PA..TIBETAN ASTROLOGICAL SIGN SDONG TSHUGS
0F35 ; Extend # Mn TIBETAN MARK NGAS BZUNG NYI ZLA
@ -195,6 +212,7 @@ E01F0..E0FFF ; Control # Cn [3600] <reserved-E01F0>..<reserved-E0FFF>
17C9..17D3 ; Extend # Mn [11] KHMER SIGN MUUSIKATOAN..KHMER SIGN BATHAMASAT
17DD ; Extend # Mn KHMER SIGN ATTHACAN
180B..180D ; Extend # Mn [3] MONGOLIAN FREE VARIATION SELECTOR ONE..MONGOLIAN FREE VARIATION SELECTOR THREE
1885..1886 ; Extend # Mn [2] MONGOLIAN LETTER ALI GALI BALUDA..MONGOLIAN LETTER ALI GALI THREE BALUDA
18A9 ; Extend # Mn MONGOLIAN LETTER ALI GALI DAGALGA
1920..1922 ; Extend # Mn [3] LIMBU VOWEL SIGN A..LIMBU VOWEL SIGN U
1927..1928 ; Extend # Mn [2] LIMBU VOWEL SIGN E..LIMBU VOWEL SIGN O
@ -213,6 +231,7 @@ E01F0..E0FFF ; Control # Cn [3600] <reserved-E01F0>..<reserved-E0FFF>
1ABE ; Extend # Me COMBINING PARENTHESES OVERLAY
1B00..1B03 ; Extend # Mn [4] BALINESE SIGN ULU RICEM..BALINESE SIGN SURANG
1B34 ; Extend # Mn BALINESE SIGN REREKAN
1B35 ; Extend # Mc BALINESE VOWEL SIGN TEDUNG
1B36..1B3A ; Extend # Mn [5] BALINESE VOWEL SIGN ULU..BALINESE VOWEL SIGN RA REPA
1B3C ; Extend # Mn BALINESE VOWEL SIGN LA LENGA
1B42 ; Extend # Mn BALINESE VOWEL SIGN PEPET
@ -233,9 +252,9 @@ E01F0..E0FFF ; Control # Cn [3600] <reserved-E01F0>..<reserved-E0FFF>
1CED ; Extend # Mn VEDIC SIGN TIRYAK
1CF4 ; Extend # Mn VEDIC TONE CANDRA ABOVE
1CF8..1CF9 ; Extend # Mn [2] VEDIC TONE RING ABOVE..VEDIC TONE DOUBLE RING ABOVE
1DC0..1DF5 ; Extend # Mn [54] COMBINING DOTTED GRAVE ACCENT..COMBINING UP TACK ABOVE
1DFC..1DFF ; Extend # Mn [4] COMBINING DOUBLE INVERTED BREVE BELOW..COMBINING RIGHT ARROWHEAD AND DOWN ARROWHEAD BELOW
200C..200D ; Extend # Cf [2] ZERO WIDTH NON-JOINER..ZERO WIDTH JOINER
1DC0..1DF9 ; Extend # Mn [58] COMBINING DOTTED GRAVE ACCENT..COMBINING WIDE INVERTED BRIDGE BELOW
1DFB..1DFF ; Extend # Mn [5] COMBINING DELETION MARK..COMBINING RIGHT ARROWHEAD AND DOWN ARROWHEAD BELOW
200C ; Extend # Cf ZERO WIDTH NON-JOINER
20D0..20DC ; Extend # Mn [13] COMBINING LEFT HARPOON ABOVE..COMBINING FOUR DOTS ABOVE
20DD..20E0 ; Extend # Me [4] COMBINING ENCLOSING CIRCLE..COMBINING ENCLOSING CIRCLE BACKSLASH
20E1 ; Extend # Mn COMBINING LEFT RIGHT ARROW ABOVE
@ -256,14 +275,15 @@ A802 ; Extend # Mn SYLOTI NAGRI SIGN DVISVARA
A806 ; Extend # Mn SYLOTI NAGRI SIGN HASANTA
A80B ; Extend # Mn SYLOTI NAGRI SIGN ANUSVARA
A825..A826 ; Extend # Mn [2] SYLOTI NAGRI VOWEL SIGN U..SYLOTI NAGRI VOWEL SIGN E
A8C4 ; Extend # Mn SAURASHTRA SIGN VIRAMA
A8C4..A8C5 ; Extend # Mn [2] SAURASHTRA SIGN VIRAMA..SAURASHTRA SIGN CANDRABINDU
A8E0..A8F1 ; Extend # Mn [18] COMBINING DEVANAGARI DIGIT ZERO..COMBINING DEVANAGARI SIGN AVAGRAHA
A8FF ; Extend # Mn DEVANAGARI VOWEL SIGN AY
A926..A92D ; Extend # Mn [8] KAYAH LI VOWEL UE..KAYAH LI TONE CALYA PLOPHU
A947..A951 ; Extend # Mn [11] REJANG VOWEL SIGN I..REJANG CONSONANT SIGN R
A980..A982 ; Extend # Mn [3] JAVANESE SIGN PANYANGGA..JAVANESE SIGN LAYAR
A9B3 ; Extend # Mn JAVANESE SIGN CECAK TELU
A9B6..A9B9 ; Extend # Mn [4] JAVANESE VOWEL SIGN WULU..JAVANESE VOWEL SIGN SUKU MENDUT
A9BC ; Extend # Mn JAVANESE VOWEL SIGN PEPET
A9BC..A9BD ; Extend # Mn [2] JAVANESE VOWEL SIGN PEPET..JAVANESE CONSONANT SIGN KERET
A9E5 ; Extend # Mn MYANMAR SIGN SHAN SAW
AA29..AA2E ; Extend # Mn [6] CHAM VOWEL SIGN AA..CHAM VOWEL SIGN OE
AA31..AA32 ; Extend # Mn [2] CHAM VOWEL SIGN AU..CHAM VOWEL SIGN UE
@ -294,6 +314,8 @@ FF9E..FF9F ; Extend # Lm [2] HALFWIDTH KATAKANA VOICED SOUND MARK..HALFWIDT
10A38..10A3A ; Extend # Mn [3] KHAROSHTHI SIGN BAR ABOVE..KHAROSHTHI SIGN DOT BELOW
10A3F ; Extend # Mn KHAROSHTHI VIRAMA
10AE5..10AE6 ; Extend # Mn [2] MANICHAEAN ABBREVIATION MARK ABOVE..MANICHAEAN ABBREVIATION MARK BELOW
10D24..10D27 ; Extend # Mn [4] HANIFI ROHINGYA SIGN HARBAHAY..HANIFI ROHINGYA SIGN TASSI
10F46..10F50 ; Extend # Mn [11] SOGDIAN COMBINING DOT BELOW..SOGDIAN COMBINING STROKE BELOW
11001 ; Extend # Mn BRAHMI SIGN ANUSVARA
11038..11046 ; Extend # Mn [15] BRAHMI VOWEL SIGN AA..BRAHMI VIRAMA
1107F..11081 ; Extend # Mn [3] BRAHMI NUMBER JOINER..KAITHI SIGN ANUSVARA
@ -305,19 +327,24 @@ FF9E..FF9F ; Extend # Lm [2] HALFWIDTH KATAKANA VOICED SOUND MARK..HALFWIDT
11173 ; Extend # Mn MAHAJANI SIGN NUKTA
11180..11181 ; Extend # Mn [2] SHARADA SIGN CANDRABINDU..SHARADA SIGN ANUSVARA
111B6..111BE ; Extend # Mn [9] SHARADA VOWEL SIGN U..SHARADA VOWEL SIGN O
111CA..111CC ; Extend # Mn [3] SHARADA SIGN NUKTA..SHARADA EXTRA SHORT VOWEL MARK
111C9..111CC ; Extend # Mn [4] SHARADA SANDHI MARK..SHARADA EXTRA SHORT VOWEL MARK
1122F..11231 ; Extend # Mn [3] KHOJKI VOWEL SIGN U..KHOJKI VOWEL SIGN AI
11234 ; Extend # Mn KHOJKI SIGN ANUSVARA
11236..11237 ; Extend # Mn [2] KHOJKI SIGN NUKTA..KHOJKI SIGN SHADDA
1123E ; Extend # Mn KHOJKI SIGN SUKUN
112DF ; Extend # Mn KHUDAWADI SIGN ANUSVARA
112E3..112EA ; Extend # Mn [8] KHUDAWADI VOWEL SIGN U..KHUDAWADI SIGN VIRAMA
11300..11301 ; Extend # Mn [2] GRANTHA SIGN COMBINING ANUSVARA ABOVE..GRANTHA SIGN CANDRABINDU
1133C ; Extend # Mn GRANTHA SIGN NUKTA
1133B..1133C ; Extend # Mn [2] COMBINING BINDU BELOW..GRANTHA SIGN NUKTA
1133E ; Extend # Mc GRANTHA VOWEL SIGN AA
11340 ; Extend # Mn GRANTHA VOWEL SIGN II
11357 ; Extend # Mc GRANTHA AU LENGTH MARK
11366..1136C ; Extend # Mn [7] COMBINING GRANTHA DIGIT ZERO..COMBINING GRANTHA DIGIT SIX
11370..11374 ; Extend # Mn [5] COMBINING GRANTHA LETTER A..COMBINING GRANTHA LETTER PA
11438..1143F ; Extend # Mn [8] NEWA VOWEL SIGN U..NEWA VOWEL SIGN AI
11442..11444 ; Extend # Mn [3] NEWA SIGN VIRAMA..NEWA SIGN ANUSVARA
11446 ; Extend # Mn NEWA SIGN NUKTA
1145E ; Extend # Mn NEWA SANDHI MARK
114B0 ; Extend # Mc TIRHUTA VOWEL SIGN AA
114B3..114B8 ; Extend # Mn [6] TIRHUTA VOWEL SIGN U..TIRHUTA VOWEL SIGN VOCALIC LL
114BA ; Extend # Mn TIRHUTA VOWEL SIGN SHORT E
@ -339,8 +366,38 @@ FF9E..FF9F ; Extend # Lm [2] HALFWIDTH KATAKANA VOICED SOUND MARK..HALFWIDT
1171D..1171F ; Extend # Mn [3] AHOM CONSONANT SIGN MEDIAL LA..AHOM CONSONANT SIGN MEDIAL LIGATING RA
11722..11725 ; Extend # Mn [4] AHOM VOWEL SIGN I..AHOM VOWEL SIGN UU
11727..1172B ; Extend # Mn [5] AHOM VOWEL SIGN AW..AHOM SIGN KILLER
1182F..11837 ; Extend # Mn [9] DOGRA VOWEL SIGN U..DOGRA SIGN ANUSVARA
11839..1183A ; Extend # Mn [2] DOGRA SIGN VIRAMA..DOGRA SIGN NUKTA
119D4..119D7 ; Extend # Mn [4] NANDINAGARI VOWEL SIGN U..NANDINAGARI VOWEL SIGN VOCALIC RR
119DA..119DB ; Extend # Mn [2] NANDINAGARI VOWEL SIGN E..NANDINAGARI VOWEL SIGN AI
119E0 ; Extend # Mn NANDINAGARI SIGN VIRAMA
11A01..11A0A ; Extend # Mn [10] ZANABAZAR SQUARE VOWEL SIGN I..ZANABAZAR SQUARE VOWEL LENGTH MARK
11A33..11A38 ; Extend # Mn [6] ZANABAZAR SQUARE FINAL CONSONANT MARK..ZANABAZAR SQUARE SIGN ANUSVARA
11A3B..11A3E ; Extend # Mn [4] ZANABAZAR SQUARE CLUSTER-FINAL LETTER YA..ZANABAZAR SQUARE CLUSTER-FINAL LETTER VA
11A47 ; Extend # Mn ZANABAZAR SQUARE SUBJOINER
11A51..11A56 ; Extend # Mn [6] SOYOMBO VOWEL SIGN I..SOYOMBO VOWEL SIGN OE
11A59..11A5B ; Extend # Mn [3] SOYOMBO VOWEL SIGN VOCALIC R..SOYOMBO VOWEL LENGTH MARK
11A8A..11A96 ; Extend # Mn [13] SOYOMBO FINAL CONSONANT SIGN G..SOYOMBO SIGN ANUSVARA
11A98..11A99 ; Extend # Mn [2] SOYOMBO GEMINATION MARK..SOYOMBO SUBJOINER
11C30..11C36 ; Extend # Mn [7] BHAIKSUKI VOWEL SIGN I..BHAIKSUKI VOWEL SIGN VOCALIC L
11C38..11C3D ; Extend # Mn [6] BHAIKSUKI VOWEL SIGN E..BHAIKSUKI SIGN ANUSVARA
11C3F ; Extend # Mn BHAIKSUKI SIGN VIRAMA
11C92..11CA7 ; Extend # Mn [22] MARCHEN SUBJOINED LETTER KA..MARCHEN SUBJOINED LETTER ZA
11CAA..11CB0 ; Extend # Mn [7] MARCHEN SUBJOINED LETTER RA..MARCHEN VOWEL SIGN AA
11CB2..11CB3 ; Extend # Mn [2] MARCHEN VOWEL SIGN U..MARCHEN VOWEL SIGN E
11CB5..11CB6 ; Extend # Mn [2] MARCHEN SIGN ANUSVARA..MARCHEN SIGN CANDRABINDU
11D31..11D36 ; Extend # Mn [6] MASARAM GONDI VOWEL SIGN AA..MASARAM GONDI VOWEL SIGN VOCALIC R
11D3A ; Extend # Mn MASARAM GONDI VOWEL SIGN E
11D3C..11D3D ; Extend # Mn [2] MASARAM GONDI VOWEL SIGN AI..MASARAM GONDI VOWEL SIGN O
11D3F..11D45 ; Extend # Mn [7] MASARAM GONDI VOWEL SIGN AU..MASARAM GONDI VIRAMA
11D47 ; Extend # Mn MASARAM GONDI RA-KARA
11D90..11D91 ; Extend # Mn [2] GUNJALA GONDI VOWEL SIGN EE..GUNJALA GONDI VOWEL SIGN AI
11D95 ; Extend # Mn GUNJALA GONDI SIGN ANUSVARA
11D97 ; Extend # Mn GUNJALA GONDI VIRAMA
11EF3..11EF4 ; Extend # Mn [2] MAKASAR VOWEL SIGN I..MAKASAR VOWEL SIGN U
16AF0..16AF4 ; Extend # Mn [5] BASSA VAH COMBINING HIGH TONE..BASSA VAH COMBINING HIGH-LOW TONE
16B30..16B36 ; Extend # Mn [7] PAHAWH HMONG MARK CIM TUB..PAHAWH HMONG MARK CIM TAUM
16F4F ; Extend # Mn MIAO SIGN CONSONANT MODIFIER BAR
16F8F..16F92 ; Extend # Mn [4] MIAO TONE RIGHT..MIAO TONE BELOW
1BC9D..1BC9E ; Extend # Mn [2] DUPLOYAN THICK LETTER SELECTOR..DUPLOYAN DOUBLE MARK
1D165 ; Extend # Mc MUSICAL SYMBOL COMBINING STEM
@ -356,10 +413,20 @@ FF9E..FF9F ; Extend # Lm [2] HALFWIDTH KATAKANA VOICED SOUND MARK..HALFWIDT
1DA84 ; Extend # Mn SIGNWRITING LOCATION HEAD NECK
1DA9B..1DA9F ; Extend # Mn [5] SIGNWRITING FILL MODIFIER-2..SIGNWRITING FILL MODIFIER-6
1DAA1..1DAAF ; Extend # Mn [15] SIGNWRITING ROTATION MODIFIER-2..SIGNWRITING ROTATION MODIFIER-16
1E000..1E006 ; Extend # Mn [7] COMBINING GLAGOLITIC LETTER AZU..COMBINING GLAGOLITIC LETTER ZHIVETE
1E008..1E018 ; Extend # Mn [17] COMBINING GLAGOLITIC LETTER ZEMLJA..COMBINING GLAGOLITIC LETTER HERU
1E01B..1E021 ; Extend # Mn [7] COMBINING GLAGOLITIC LETTER SHTA..COMBINING GLAGOLITIC LETTER YATI
1E023..1E024 ; Extend # Mn [2] COMBINING GLAGOLITIC LETTER YU..COMBINING GLAGOLITIC LETTER SMALL YUS
1E026..1E02A ; Extend # Mn [5] COMBINING GLAGOLITIC LETTER YO..COMBINING GLAGOLITIC LETTER FITA
1E130..1E136 ; Extend # Mn [7] NYIAKENG PUACHUE HMONG TONE-B..NYIAKENG PUACHUE HMONG TONE-D
1E2EC..1E2EF ; Extend # Mn [4] WANCHO TONE TUP..WANCHO TONE KOINI
1E8D0..1E8D6 ; Extend # Mn [7] MENDE KIKAKUI COMBINING NUMBER TEENS..MENDE KIKAKUI COMBINING NUMBER MILLIONS
1E944..1E94A ; Extend # Mn [7] ADLAM ALIF LENGTHENER..ADLAM NUKTA
1F3FB..1F3FF ; Extend # Sk [5] EMOJI MODIFIER FITZPATRICK TYPE-1-2..EMOJI MODIFIER FITZPATRICK TYPE-6
E0020..E007F ; Extend # Cf [96] TAG SPACE..CANCEL TAG
E0100..E01EF ; Extend # Mn [240] VARIATION SELECTOR-17..VARIATION SELECTOR-256
# Total code points: 1610
# Total code points: 1970
# ================================================
@ -428,7 +495,6 @@ E0100..E01EF ; Extend # Mn [240] VARIATION SELECTOR-17..VARIATION SELECTOR-256
1A57 ; SpacingMark # Mc TAI THAM CONSONANT SIGN LA TANG LAI
1A6D..1A72 ; SpacingMark # Mc [6] TAI THAM VOWEL SIGN OY..TAI THAM VOWEL SIGN THAM AI
1B04 ; SpacingMark # Mc BALINESE SIGN BISAH
1B35 ; SpacingMark # Mc BALINESE VOWEL SIGN TEDUNG
1B3B ; SpacingMark # Mc BALINESE VOWEL SIGN RA REPA TEDUNG
1B3D..1B41 ; SpacingMark # Mc [5] BALINESE VOWEL SIGN LA LENGA TEDUNG..BALINESE VOWEL SIGN TALING REPA TEDUNG
1B43..1B44 ; SpacingMark # Mc [2] BALINESE VOWEL SIGN PEPET TEDUNG..BALINESE ADEG ADEG
@ -443,7 +509,7 @@ E0100..E01EF ; Extend # Mn [240] VARIATION SELECTOR-17..VARIATION SELECTOR-256
1C24..1C2B ; SpacingMark # Mc [8] LEPCHA SUBJOINED LETTER YA..LEPCHA VOWEL SIGN UU
1C34..1C35 ; SpacingMark # Mc [2] LEPCHA CONSONANT SIGN NYIN-DO..LEPCHA CONSONANT SIGN KANG
1CE1 ; SpacingMark # Mc VEDIC TONE ATHARVAVEDIC INDEPENDENT SVARITA
1CF2..1CF3 ; SpacingMark # Mc [2] VEDIC SIGN ARDHAVISARGA..VEDIC SIGN ROTATED ARDHAVISARGA
1CF7 ; SpacingMark # Mc VEDIC SIGN ATIKRAMA
A823..A824 ; SpacingMark # Mc [2] SYLOTI NAGRI VOWEL SIGN A..SYLOTI NAGRI VOWEL SIGN I
A827 ; SpacingMark # Mc SYLOTI NAGRI VOWEL SIGN OO
A880..A881 ; SpacingMark # Mc [2] SAURASHTRA SIGN ANUSVARA..SAURASHTRA SIGN VISARGA
@ -452,7 +518,7 @@ A952..A953 ; SpacingMark # Mc [2] REJANG CONSONANT SIGN H..REJANG VIRAMA
A983 ; SpacingMark # Mc JAVANESE SIGN WIGNYAN
A9B4..A9B5 ; SpacingMark # Mc [2] JAVANESE VOWEL SIGN TARUNG..JAVANESE VOWEL SIGN TOLONG
A9BA..A9BB ; SpacingMark # Mc [2] JAVANESE VOWEL SIGN TALING..JAVANESE VOWEL SIGN DIRGA MURE
A9BD..A9C0 ; SpacingMark # Mc [4] JAVANESE CONSONANT SIGN KERET..JAVANESE PANGKON
A9BE..A9C0 ; SpacingMark # Mc [3] JAVANESE CONSONANT SIGN PENGKAL..JAVANESE PANGKON
AA2F..AA30 ; SpacingMark # Mc [2] CHAM VOWEL SIGN O..CHAM VOWEL SIGN AI
AA33..AA34 ; SpacingMark # Mc [2] CHAM CONSONANT SIGN YA..CHAM CONSONANT SIGN RA
AA4D ; SpacingMark # Mc CHAM CONSONANT SIGN FINAL H
@ -469,6 +535,7 @@ ABEC ; SpacingMark # Mc MEETEI MAYEK LUM IYEK
110B0..110B2 ; SpacingMark # Mc [3] KAITHI VOWEL SIGN AA..KAITHI VOWEL SIGN II
110B7..110B8 ; SpacingMark # Mc [2] KAITHI VOWEL SIGN O..KAITHI VOWEL SIGN AU
1112C ; SpacingMark # Mc CHAKMA VOWEL SIGN E
11145..11146 ; SpacingMark # Mc [2] CHAKMA VOWEL SIGN AA..CHAKMA VOWEL SIGN EI
11182 ; SpacingMark # Mc SHARADA SIGN VISARGA
111B3..111B5 ; SpacingMark # Mc [3] SHARADA VOWEL SIGN AA..SHARADA VOWEL SIGN II
111BF..111C0 ; SpacingMark # Mc [2] SHARADA VOWEL SIGN AU..SHARADA SIGN VIRAMA
@ -482,6 +549,9 @@ ABEC ; SpacingMark # Mc MEETEI MAYEK LUM IYEK
11347..11348 ; SpacingMark # Mc [2] GRANTHA VOWEL SIGN EE..GRANTHA VOWEL SIGN AI
1134B..1134D ; SpacingMark # Mc [3] GRANTHA VOWEL SIGN OO..GRANTHA SIGN VIRAMA
11362..11363 ; SpacingMark # Mc [2] GRANTHA VOWEL SIGN VOCALIC L..GRANTHA VOWEL SIGN VOCALIC LL
11435..11437 ; SpacingMark # Mc [3] NEWA VOWEL SIGN AA..NEWA VOWEL SIGN II
11440..11441 ; SpacingMark # Mc [2] NEWA VOWEL SIGN O..NEWA VOWEL SIGN AU
11445 ; SpacingMark # Mc NEWA SIGN VISARGA
114B1..114B2 ; SpacingMark # Mc [2] TIRHUTA VOWEL SIGN I..TIRHUTA VOWEL SIGN II
114B9 ; SpacingMark # Mc TIRHUTA VOWEL SIGN E
114BB..114BC ; SpacingMark # Mc [2] TIRHUTA VOWEL SIGN AI..TIRHUTA VOWEL SIGN O
@ -498,11 +568,28 @@ ABEC ; SpacingMark # Mc MEETEI MAYEK LUM IYEK
116B6 ; SpacingMark # Mc TAKRI SIGN VIRAMA
11720..11721 ; SpacingMark # Mc [2] AHOM VOWEL SIGN A..AHOM VOWEL SIGN AA
11726 ; SpacingMark # Mc AHOM VOWEL SIGN E
16F51..16F7E ; SpacingMark # Mc [46] MIAO SIGN ASPIRATION..MIAO VOWEL SIGN NG
1182C..1182E ; SpacingMark # Mc [3] DOGRA VOWEL SIGN AA..DOGRA VOWEL SIGN II
11838 ; SpacingMark # Mc DOGRA SIGN VISARGA
119D1..119D3 ; SpacingMark # Mc [3] NANDINAGARI VOWEL SIGN AA..NANDINAGARI VOWEL SIGN II
119DC..119DF ; SpacingMark # Mc [4] NANDINAGARI VOWEL SIGN O..NANDINAGARI SIGN VISARGA
119E4 ; SpacingMark # Mc NANDINAGARI VOWEL SIGN PRISHTHAMATRA E
11A39 ; SpacingMark # Mc ZANABAZAR SQUARE SIGN VISARGA
11A57..11A58 ; SpacingMark # Mc [2] SOYOMBO VOWEL SIGN AI..SOYOMBO VOWEL SIGN AU
11A97 ; SpacingMark # Mc SOYOMBO SIGN VISARGA
11C2F ; SpacingMark # Mc BHAIKSUKI VOWEL SIGN AA
11C3E ; SpacingMark # Mc BHAIKSUKI SIGN VISARGA
11CA9 ; SpacingMark # Mc MARCHEN SUBJOINED LETTER YA
11CB1 ; SpacingMark # Mc MARCHEN VOWEL SIGN I
11CB4 ; SpacingMark # Mc MARCHEN VOWEL SIGN O
11D8A..11D8E ; SpacingMark # Mc [5] GUNJALA GONDI VOWEL SIGN AA..GUNJALA GONDI VOWEL SIGN UU
11D93..11D94 ; SpacingMark # Mc [2] GUNJALA GONDI VOWEL SIGN OO..GUNJALA GONDI VOWEL SIGN AU
11D96 ; SpacingMark # Mc GUNJALA GONDI SIGN VISARGA
11EF5..11EF6 ; SpacingMark # Mc [2] MAKASAR VOWEL SIGN E..MAKASAR VOWEL SIGN O
16F51..16F87 ; SpacingMark # Mc [55] MIAO SIGN ASPIRATION..MIAO VOWEL SIGN UI
1D166 ; SpacingMark # Mc MUSICAL SYMBOL COMBINING SPRECHGESANG STEM
1D16D ; SpacingMark # Mc MUSICAL SYMBOL COMBINING AUGMENTATION DOT
# Total code points: 330
# Total code points: 375
# ================================================
@ -1333,4 +1420,10 @@ D789..D7A3 ; LVT # Lo [27] HANGUL SYLLABLE HIG..HANGUL SYLLABLE HIH
# Total code points: 10773
# ================================================
200D ; ZWJ # Cf ZERO WIDTH JOINER
# Total code points: 1
# EOF

View File

@ -0,0 +1,630 @@
# GraphemeBreakTest-12.1.0.txt
# Date: 2019-03-10, 10:53:12 GMT
# Copyright (c) 2019 Unicode, Inc.
# Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries.
# For terms of use, see http://www.unicode.org/terms_of_use.html
#
# Unicode Character Database
# For documentation, see http://www.unicode.org/reports/tr44/
#
# Default Grapheme_Cluster_Break Test
#
# Format:
# <string> (# <comment>)?
# <string> contains hex Unicode code points, with
# ÷ wherever there is a break opportunity, and
# × wherever there is not.
# <comment> the format can change, but currently it shows:
# - the sample character name
# - (x) the Grapheme_Cluster_Break property value for the sample character
# - [x] the rule that determines whether there is a break or not,
# as listed in the Rules section of GraphemeBreakTest.html
#
# These samples may be extended or changed in the future.
#
÷ 0020 ÷ 0020 ÷ # ÷ [0.2] SPACE (Other) ÷ [999.0] SPACE (Other) ÷ [0.3]
÷ 0020 × 0308 ÷ 0020 ÷ # ÷ [0.2] SPACE (Other) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] SPACE (Other) ÷ [0.3]
÷ 0020 ÷ 000D ÷ # ÷ [0.2] SPACE (Other) ÷ [5.0] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3]
÷ 0020 × 0308 ÷ 000D ÷ # ÷ [0.2] SPACE (Other) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [5.0] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3]
÷ 0020 ÷ 000A ÷ # ÷ [0.2] SPACE (Other) ÷ [5.0] <LINE FEED (LF)> (LF) ÷ [0.3]
÷ 0020 × 0308 ÷ 000A ÷ # ÷ [0.2] SPACE (Other) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [5.0] <LINE FEED (LF)> (LF) ÷ [0.3]
÷ 0020 ÷ 0001 ÷ # ÷ [0.2] SPACE (Other) ÷ [5.0] <START OF HEADING> (Control) ÷ [0.3]
÷ 0020 × 0308 ÷ 0001 ÷ # ÷ [0.2] SPACE (Other) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [5.0] <START OF HEADING> (Control) ÷ [0.3]
÷ 0020 × 034F ÷ # ÷ [0.2] SPACE (Other) × [9.0] COMBINING GRAPHEME JOINER (Extend) ÷ [0.3]
÷ 0020 × 0308 × 034F ÷ # ÷ [0.2] SPACE (Other) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] COMBINING GRAPHEME JOINER (Extend) ÷ [0.3]
÷ 0020 ÷ 1F1E6 ÷ # ÷ [0.2] SPACE (Other) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [0.3]
÷ 0020 × 0308 ÷ 1F1E6 ÷ # ÷ [0.2] SPACE (Other) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [0.3]
÷ 0020 ÷ 0600 ÷ # ÷ [0.2] SPACE (Other) ÷ [999.0] ARABIC NUMBER SIGN (Prepend) ÷ [0.3]
÷ 0020 × 0308 ÷ 0600 ÷ # ÷ [0.2] SPACE (Other) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] ARABIC NUMBER SIGN (Prepend) ÷ [0.3]
÷ 0020 × 0903 ÷ # ÷ [0.2] SPACE (Other) × [9.1] DEVANAGARI SIGN VISARGA (SpacingMark) ÷ [0.3]
÷ 0020 × 0308 × 0903 ÷ # ÷ [0.2] SPACE (Other) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.1] DEVANAGARI SIGN VISARGA (SpacingMark) ÷ [0.3]
÷ 0020 ÷ 1100 ÷ # ÷ [0.2] SPACE (Other) ÷ [999.0] HANGUL CHOSEONG KIYEOK (L) ÷ [0.3]
÷ 0020 × 0308 ÷ 1100 ÷ # ÷ [0.2] SPACE (Other) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL CHOSEONG KIYEOK (L) ÷ [0.3]
÷ 0020 ÷ 1160 ÷ # ÷ [0.2] SPACE (Other) ÷ [999.0] HANGUL JUNGSEONG FILLER (V) ÷ [0.3]
÷ 0020 × 0308 ÷ 1160 ÷ # ÷ [0.2] SPACE (Other) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL JUNGSEONG FILLER (V) ÷ [0.3]
÷ 0020 ÷ 11A8 ÷ # ÷ [0.2] SPACE (Other) ÷ [999.0] HANGUL JONGSEONG KIYEOK (T) ÷ [0.3]
÷ 0020 × 0308 ÷ 11A8 ÷ # ÷ [0.2] SPACE (Other) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL JONGSEONG KIYEOK (T) ÷ [0.3]
÷ 0020 ÷ AC00 ÷ # ÷ [0.2] SPACE (Other) ÷ [999.0] HANGUL SYLLABLE GA (LV) ÷ [0.3]
÷ 0020 × 0308 ÷ AC00 ÷ # ÷ [0.2] SPACE (Other) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL SYLLABLE GA (LV) ÷ [0.3]
÷ 0020 ÷ AC01 ÷ # ÷ [0.2] SPACE (Other) ÷ [999.0] HANGUL SYLLABLE GAG (LVT) ÷ [0.3]
÷ 0020 × 0308 ÷ AC01 ÷ # ÷ [0.2] SPACE (Other) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL SYLLABLE GAG (LVT) ÷ [0.3]
÷ 0020 ÷ 231A ÷ # ÷ [0.2] SPACE (Other) ÷ [999.0] WATCH (ExtPict) ÷ [0.3]
÷ 0020 × 0308 ÷ 231A ÷ # ÷ [0.2] SPACE (Other) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] WATCH (ExtPict) ÷ [0.3]
÷ 0020 × 0300 ÷ # ÷ [0.2] SPACE (Other) × [9.0] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) ÷ [0.3]
÷ 0020 × 0308 × 0300 ÷ # ÷ [0.2] SPACE (Other) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) ÷ [0.3]
÷ 0020 × 200D ÷ # ÷ [0.2] SPACE (Other) × [9.0] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) ÷ [0.3]
÷ 0020 × 0308 × 200D ÷ # ÷ [0.2] SPACE (Other) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) ÷ [0.3]
÷ 0020 ÷ 0378 ÷ # ÷ [0.2] SPACE (Other) ÷ [999.0] <reserved-0378> (Other) ÷ [0.3]
÷ 0020 × 0308 ÷ 0378 ÷ # ÷ [0.2] SPACE (Other) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] <reserved-0378> (Other) ÷ [0.3]
÷ 000D ÷ 0020 ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [4.0] SPACE (Other) ÷ [0.3]
÷ 000D ÷ 0308 ÷ 0020 ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [4.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] SPACE (Other) ÷ [0.3]
÷ 000D ÷ 000D ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [4.0] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3]
÷ 000D ÷ 0308 ÷ 000D ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [4.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [5.0] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3]
÷ 000D × 000A ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) × [3.0] <LINE FEED (LF)> (LF) ÷ [0.3]
÷ 000D ÷ 0308 ÷ 000A ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [4.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [5.0] <LINE FEED (LF)> (LF) ÷ [0.3]
÷ 000D ÷ 0001 ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [4.0] <START OF HEADING> (Control) ÷ [0.3]
÷ 000D ÷ 0308 ÷ 0001 ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [4.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [5.0] <START OF HEADING> (Control) ÷ [0.3]
÷ 000D ÷ 034F ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [4.0] COMBINING GRAPHEME JOINER (Extend) ÷ [0.3]
÷ 000D ÷ 0308 × 034F ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [4.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] COMBINING GRAPHEME JOINER (Extend) ÷ [0.3]
÷ 000D ÷ 1F1E6 ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [4.0] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [0.3]
÷ 000D ÷ 0308 ÷ 1F1E6 ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [4.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [0.3]
÷ 000D ÷ 0600 ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [4.0] ARABIC NUMBER SIGN (Prepend) ÷ [0.3]
÷ 000D ÷ 0308 ÷ 0600 ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [4.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] ARABIC NUMBER SIGN (Prepend) ÷ [0.3]
÷ 000D ÷ 0903 ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [4.0] DEVANAGARI SIGN VISARGA (SpacingMark) ÷ [0.3]
÷ 000D ÷ 0308 × 0903 ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [4.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.1] DEVANAGARI SIGN VISARGA (SpacingMark) ÷ [0.3]
÷ 000D ÷ 1100 ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [4.0] HANGUL CHOSEONG KIYEOK (L) ÷ [0.3]
÷ 000D ÷ 0308 ÷ 1100 ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [4.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL CHOSEONG KIYEOK (L) ÷ [0.3]
÷ 000D ÷ 1160 ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [4.0] HANGUL JUNGSEONG FILLER (V) ÷ [0.3]
÷ 000D ÷ 0308 ÷ 1160 ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [4.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL JUNGSEONG FILLER (V) ÷ [0.3]
÷ 000D ÷ 11A8 ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [4.0] HANGUL JONGSEONG KIYEOK (T) ÷ [0.3]
÷ 000D ÷ 0308 ÷ 11A8 ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [4.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL JONGSEONG KIYEOK (T) ÷ [0.3]
÷ 000D ÷ AC00 ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [4.0] HANGUL SYLLABLE GA (LV) ÷ [0.3]
÷ 000D ÷ 0308 ÷ AC00 ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [4.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL SYLLABLE GA (LV) ÷ [0.3]
÷ 000D ÷ AC01 ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [4.0] HANGUL SYLLABLE GAG (LVT) ÷ [0.3]
÷ 000D ÷ 0308 ÷ AC01 ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [4.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL SYLLABLE GAG (LVT) ÷ [0.3]
÷ 000D ÷ 231A ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [4.0] WATCH (ExtPict) ÷ [0.3]
÷ 000D ÷ 0308 ÷ 231A ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [4.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] WATCH (ExtPict) ÷ [0.3]
÷ 000D ÷ 0300 ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [4.0] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) ÷ [0.3]
÷ 000D ÷ 0308 × 0300 ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [4.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) ÷ [0.3]
÷ 000D ÷ 200D ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [4.0] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) ÷ [0.3]
÷ 000D ÷ 0308 × 200D ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [4.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) ÷ [0.3]
÷ 000D ÷ 0378 ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [4.0] <reserved-0378> (Other) ÷ [0.3]
÷ 000D ÷ 0308 ÷ 0378 ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [4.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] <reserved-0378> (Other) ÷ [0.3]
÷ 000A ÷ 0020 ÷ # ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [4.0] SPACE (Other) ÷ [0.3]
÷ 000A ÷ 0308 ÷ 0020 ÷ # ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [4.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] SPACE (Other) ÷ [0.3]
÷ 000A ÷ 000D ÷ # ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [4.0] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3]
÷ 000A ÷ 0308 ÷ 000D ÷ # ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [4.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [5.0] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3]
÷ 000A ÷ 000A ÷ # ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [4.0] <LINE FEED (LF)> (LF) ÷ [0.3]
÷ 000A ÷ 0308 ÷ 000A ÷ # ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [4.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [5.0] <LINE FEED (LF)> (LF) ÷ [0.3]
÷ 000A ÷ 0001 ÷ # ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [4.0] <START OF HEADING> (Control) ÷ [0.3]
÷ 000A ÷ 0308 ÷ 0001 ÷ # ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [4.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [5.0] <START OF HEADING> (Control) ÷ [0.3]
÷ 000A ÷ 034F ÷ # ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [4.0] COMBINING GRAPHEME JOINER (Extend) ÷ [0.3]
÷ 000A ÷ 0308 × 034F ÷ # ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [4.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] COMBINING GRAPHEME JOINER (Extend) ÷ [0.3]
÷ 000A ÷ 1F1E6 ÷ # ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [4.0] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [0.3]
÷ 000A ÷ 0308 ÷ 1F1E6 ÷ # ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [4.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [0.3]
÷ 000A ÷ 0600 ÷ # ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [4.0] ARABIC NUMBER SIGN (Prepend) ÷ [0.3]
÷ 000A ÷ 0308 ÷ 0600 ÷ # ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [4.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] ARABIC NUMBER SIGN (Prepend) ÷ [0.3]
÷ 000A ÷ 0903 ÷ # ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [4.0] DEVANAGARI SIGN VISARGA (SpacingMark) ÷ [0.3]
÷ 000A ÷ 0308 × 0903 ÷ # ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [4.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.1] DEVANAGARI SIGN VISARGA (SpacingMark) ÷ [0.3]
÷ 000A ÷ 1100 ÷ # ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [4.0] HANGUL CHOSEONG KIYEOK (L) ÷ [0.3]
÷ 000A ÷ 0308 ÷ 1100 ÷ # ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [4.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL CHOSEONG KIYEOK (L) ÷ [0.3]
÷ 000A ÷ 1160 ÷ # ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [4.0] HANGUL JUNGSEONG FILLER (V) ÷ [0.3]
÷ 000A ÷ 0308 ÷ 1160 ÷ # ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [4.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL JUNGSEONG FILLER (V) ÷ [0.3]
÷ 000A ÷ 11A8 ÷ # ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [4.0] HANGUL JONGSEONG KIYEOK (T) ÷ [0.3]
÷ 000A ÷ 0308 ÷ 11A8 ÷ # ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [4.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL JONGSEONG KIYEOK (T) ÷ [0.3]
÷ 000A ÷ AC00 ÷ # ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [4.0] HANGUL SYLLABLE GA (LV) ÷ [0.3]
÷ 000A ÷ 0308 ÷ AC00 ÷ # ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [4.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL SYLLABLE GA (LV) ÷ [0.3]
÷ 000A ÷ AC01 ÷ # ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [4.0] HANGUL SYLLABLE GAG (LVT) ÷ [0.3]
÷ 000A ÷ 0308 ÷ AC01 ÷ # ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [4.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL SYLLABLE GAG (LVT) ÷ [0.3]
÷ 000A ÷ 231A ÷ # ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [4.0] WATCH (ExtPict) ÷ [0.3]
÷ 000A ÷ 0308 ÷ 231A ÷ # ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [4.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] WATCH (ExtPict) ÷ [0.3]
÷ 000A ÷ 0300 ÷ # ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [4.0] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) ÷ [0.3]
÷ 000A ÷ 0308 × 0300 ÷ # ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [4.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) ÷ [0.3]
÷ 000A ÷ 200D ÷ # ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [4.0] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) ÷ [0.3]
÷ 000A ÷ 0308 × 200D ÷ # ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [4.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) ÷ [0.3]
÷ 000A ÷ 0378 ÷ # ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [4.0] <reserved-0378> (Other) ÷ [0.3]
÷ 000A ÷ 0308 ÷ 0378 ÷ # ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [4.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] <reserved-0378> (Other) ÷ [0.3]
÷ 0001 ÷ 0020 ÷ # ÷ [0.2] <START OF HEADING> (Control) ÷ [4.0] SPACE (Other) ÷ [0.3]
÷ 0001 ÷ 0308 ÷ 0020 ÷ # ÷ [0.2] <START OF HEADING> (Control) ÷ [4.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] SPACE (Other) ÷ [0.3]
÷ 0001 ÷ 000D ÷ # ÷ [0.2] <START OF HEADING> (Control) ÷ [4.0] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3]
÷ 0001 ÷ 0308 ÷ 000D ÷ # ÷ [0.2] <START OF HEADING> (Control) ÷ [4.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [5.0] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3]
÷ 0001 ÷ 000A ÷ # ÷ [0.2] <START OF HEADING> (Control) ÷ [4.0] <LINE FEED (LF)> (LF) ÷ [0.3]
÷ 0001 ÷ 0308 ÷ 000A ÷ # ÷ [0.2] <START OF HEADING> (Control) ÷ [4.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [5.0] <LINE FEED (LF)> (LF) ÷ [0.3]
÷ 0001 ÷ 0001 ÷ # ÷ [0.2] <START OF HEADING> (Control) ÷ [4.0] <START OF HEADING> (Control) ÷ [0.3]
÷ 0001 ÷ 0308 ÷ 0001 ÷ # ÷ [0.2] <START OF HEADING> (Control) ÷ [4.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [5.0] <START OF HEADING> (Control) ÷ [0.3]
÷ 0001 ÷ 034F ÷ # ÷ [0.2] <START OF HEADING> (Control) ÷ [4.0] COMBINING GRAPHEME JOINER (Extend) ÷ [0.3]
÷ 0001 ÷ 0308 × 034F ÷ # ÷ [0.2] <START OF HEADING> (Control) ÷ [4.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] COMBINING GRAPHEME JOINER (Extend) ÷ [0.3]
÷ 0001 ÷ 1F1E6 ÷ # ÷ [0.2] <START OF HEADING> (Control) ÷ [4.0] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [0.3]
÷ 0001 ÷ 0308 ÷ 1F1E6 ÷ # ÷ [0.2] <START OF HEADING> (Control) ÷ [4.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [0.3]
÷ 0001 ÷ 0600 ÷ # ÷ [0.2] <START OF HEADING> (Control) ÷ [4.0] ARABIC NUMBER SIGN (Prepend) ÷ [0.3]
÷ 0001 ÷ 0308 ÷ 0600 ÷ # ÷ [0.2] <START OF HEADING> (Control) ÷ [4.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] ARABIC NUMBER SIGN (Prepend) ÷ [0.3]
÷ 0001 ÷ 0903 ÷ # ÷ [0.2] <START OF HEADING> (Control) ÷ [4.0] DEVANAGARI SIGN VISARGA (SpacingMark) ÷ [0.3]
÷ 0001 ÷ 0308 × 0903 ÷ # ÷ [0.2] <START OF HEADING> (Control) ÷ [4.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.1] DEVANAGARI SIGN VISARGA (SpacingMark) ÷ [0.3]
÷ 0001 ÷ 1100 ÷ # ÷ [0.2] <START OF HEADING> (Control) ÷ [4.0] HANGUL CHOSEONG KIYEOK (L) ÷ [0.3]
÷ 0001 ÷ 0308 ÷ 1100 ÷ # ÷ [0.2] <START OF HEADING> (Control) ÷ [4.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL CHOSEONG KIYEOK (L) ÷ [0.3]
÷ 0001 ÷ 1160 ÷ # ÷ [0.2] <START OF HEADING> (Control) ÷ [4.0] HANGUL JUNGSEONG FILLER (V) ÷ [0.3]
÷ 0001 ÷ 0308 ÷ 1160 ÷ # ÷ [0.2] <START OF HEADING> (Control) ÷ [4.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL JUNGSEONG FILLER (V) ÷ [0.3]
÷ 0001 ÷ 11A8 ÷ # ÷ [0.2] <START OF HEADING> (Control) ÷ [4.0] HANGUL JONGSEONG KIYEOK (T) ÷ [0.3]
÷ 0001 ÷ 0308 ÷ 11A8 ÷ # ÷ [0.2] <START OF HEADING> (Control) ÷ [4.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL JONGSEONG KIYEOK (T) ÷ [0.3]
÷ 0001 ÷ AC00 ÷ # ÷ [0.2] <START OF HEADING> (Control) ÷ [4.0] HANGUL SYLLABLE GA (LV) ÷ [0.3]
÷ 0001 ÷ 0308 ÷ AC00 ÷ # ÷ [0.2] <START OF HEADING> (Control) ÷ [4.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL SYLLABLE GA (LV) ÷ [0.3]
÷ 0001 ÷ AC01 ÷ # ÷ [0.2] <START OF HEADING> (Control) ÷ [4.0] HANGUL SYLLABLE GAG (LVT) ÷ [0.3]
÷ 0001 ÷ 0308 ÷ AC01 ÷ # ÷ [0.2] <START OF HEADING> (Control) ÷ [4.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL SYLLABLE GAG (LVT) ÷ [0.3]
÷ 0001 ÷ 231A ÷ # ÷ [0.2] <START OF HEADING> (Control) ÷ [4.0] WATCH (ExtPict) ÷ [0.3]
÷ 0001 ÷ 0308 ÷ 231A ÷ # ÷ [0.2] <START OF HEADING> (Control) ÷ [4.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] WATCH (ExtPict) ÷ [0.3]
÷ 0001 ÷ 0300 ÷ # ÷ [0.2] <START OF HEADING> (Control) ÷ [4.0] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) ÷ [0.3]
÷ 0001 ÷ 0308 × 0300 ÷ # ÷ [0.2] <START OF HEADING> (Control) ÷ [4.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) ÷ [0.3]
÷ 0001 ÷ 200D ÷ # ÷ [0.2] <START OF HEADING> (Control) ÷ [4.0] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) ÷ [0.3]
÷ 0001 ÷ 0308 × 200D ÷ # ÷ [0.2] <START OF HEADING> (Control) ÷ [4.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) ÷ [0.3]
÷ 0001 ÷ 0378 ÷ # ÷ [0.2] <START OF HEADING> (Control) ÷ [4.0] <reserved-0378> (Other) ÷ [0.3]
÷ 0001 ÷ 0308 ÷ 0378 ÷ # ÷ [0.2] <START OF HEADING> (Control) ÷ [4.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] <reserved-0378> (Other) ÷ [0.3]
÷ 034F ÷ 0020 ÷ # ÷ [0.2] COMBINING GRAPHEME JOINER (Extend) ÷ [999.0] SPACE (Other) ÷ [0.3]
÷ 034F × 0308 ÷ 0020 ÷ # ÷ [0.2] COMBINING GRAPHEME JOINER (Extend) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] SPACE (Other) ÷ [0.3]
÷ 034F ÷ 000D ÷ # ÷ [0.2] COMBINING GRAPHEME JOINER (Extend) ÷ [5.0] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3]
÷ 034F × 0308 ÷ 000D ÷ # ÷ [0.2] COMBINING GRAPHEME JOINER (Extend) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [5.0] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3]
÷ 034F ÷ 000A ÷ # ÷ [0.2] COMBINING GRAPHEME JOINER (Extend) ÷ [5.0] <LINE FEED (LF)> (LF) ÷ [0.3]
÷ 034F × 0308 ÷ 000A ÷ # ÷ [0.2] COMBINING GRAPHEME JOINER (Extend) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [5.0] <LINE FEED (LF)> (LF) ÷ [0.3]
÷ 034F ÷ 0001 ÷ # ÷ [0.2] COMBINING GRAPHEME JOINER (Extend) ÷ [5.0] <START OF HEADING> (Control) ÷ [0.3]
÷ 034F × 0308 ÷ 0001 ÷ # ÷ [0.2] COMBINING GRAPHEME JOINER (Extend) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [5.0] <START OF HEADING> (Control) ÷ [0.3]
÷ 034F × 034F ÷ # ÷ [0.2] COMBINING GRAPHEME JOINER (Extend) × [9.0] COMBINING GRAPHEME JOINER (Extend) ÷ [0.3]
÷ 034F × 0308 × 034F ÷ # ÷ [0.2] COMBINING GRAPHEME JOINER (Extend) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] COMBINING GRAPHEME JOINER (Extend) ÷ [0.3]
÷ 034F ÷ 1F1E6 ÷ # ÷ [0.2] COMBINING GRAPHEME JOINER (Extend) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [0.3]
÷ 034F × 0308 ÷ 1F1E6 ÷ # ÷ [0.2] COMBINING GRAPHEME JOINER (Extend) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [0.3]
÷ 034F ÷ 0600 ÷ # ÷ [0.2] COMBINING GRAPHEME JOINER (Extend) ÷ [999.0] ARABIC NUMBER SIGN (Prepend) ÷ [0.3]
÷ 034F × 0308 ÷ 0600 ÷ # ÷ [0.2] COMBINING GRAPHEME JOINER (Extend) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] ARABIC NUMBER SIGN (Prepend) ÷ [0.3]
÷ 034F × 0903 ÷ # ÷ [0.2] COMBINING GRAPHEME JOINER (Extend) × [9.1] DEVANAGARI SIGN VISARGA (SpacingMark) ÷ [0.3]
÷ 034F × 0308 × 0903 ÷ # ÷ [0.2] COMBINING GRAPHEME JOINER (Extend) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.1] DEVANAGARI SIGN VISARGA (SpacingMark) ÷ [0.3]
÷ 034F ÷ 1100 ÷ # ÷ [0.2] COMBINING GRAPHEME JOINER (Extend) ÷ [999.0] HANGUL CHOSEONG KIYEOK (L) ÷ [0.3]
÷ 034F × 0308 ÷ 1100 ÷ # ÷ [0.2] COMBINING GRAPHEME JOINER (Extend) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL CHOSEONG KIYEOK (L) ÷ [0.3]
÷ 034F ÷ 1160 ÷ # ÷ [0.2] COMBINING GRAPHEME JOINER (Extend) ÷ [999.0] HANGUL JUNGSEONG FILLER (V) ÷ [0.3]
÷ 034F × 0308 ÷ 1160 ÷ # ÷ [0.2] COMBINING GRAPHEME JOINER (Extend) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL JUNGSEONG FILLER (V) ÷ [0.3]
÷ 034F ÷ 11A8 ÷ # ÷ [0.2] COMBINING GRAPHEME JOINER (Extend) ÷ [999.0] HANGUL JONGSEONG KIYEOK (T) ÷ [0.3]
÷ 034F × 0308 ÷ 11A8 ÷ # ÷ [0.2] COMBINING GRAPHEME JOINER (Extend) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL JONGSEONG KIYEOK (T) ÷ [0.3]
÷ 034F ÷ AC00 ÷ # ÷ [0.2] COMBINING GRAPHEME JOINER (Extend) ÷ [999.0] HANGUL SYLLABLE GA (LV) ÷ [0.3]
÷ 034F × 0308 ÷ AC00 ÷ # ÷ [0.2] COMBINING GRAPHEME JOINER (Extend) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL SYLLABLE GA (LV) ÷ [0.3]
÷ 034F ÷ AC01 ÷ # ÷ [0.2] COMBINING GRAPHEME JOINER (Extend) ÷ [999.0] HANGUL SYLLABLE GAG (LVT) ÷ [0.3]
÷ 034F × 0308 ÷ AC01 ÷ # ÷ [0.2] COMBINING GRAPHEME JOINER (Extend) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL SYLLABLE GAG (LVT) ÷ [0.3]
÷ 034F ÷ 231A ÷ # ÷ [0.2] COMBINING GRAPHEME JOINER (Extend) ÷ [999.0] WATCH (ExtPict) ÷ [0.3]
÷ 034F × 0308 ÷ 231A ÷ # ÷ [0.2] COMBINING GRAPHEME JOINER (Extend) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] WATCH (ExtPict) ÷ [0.3]
÷ 034F × 0300 ÷ # ÷ [0.2] COMBINING GRAPHEME JOINER (Extend) × [9.0] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) ÷ [0.3]
÷ 034F × 0308 × 0300 ÷ # ÷ [0.2] COMBINING GRAPHEME JOINER (Extend) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) ÷ [0.3]
÷ 034F × 200D ÷ # ÷ [0.2] COMBINING GRAPHEME JOINER (Extend) × [9.0] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) ÷ [0.3]
÷ 034F × 0308 × 200D ÷ # ÷ [0.2] COMBINING GRAPHEME JOINER (Extend) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) ÷ [0.3]
÷ 034F ÷ 0378 ÷ # ÷ [0.2] COMBINING GRAPHEME JOINER (Extend) ÷ [999.0] <reserved-0378> (Other) ÷ [0.3]
÷ 034F × 0308 ÷ 0378 ÷ # ÷ [0.2] COMBINING GRAPHEME JOINER (Extend) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] <reserved-0378> (Other) ÷ [0.3]
÷ 1F1E6 ÷ 0020 ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [999.0] SPACE (Other) ÷ [0.3]
÷ 1F1E6 × 0308 ÷ 0020 ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (RI) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] SPACE (Other) ÷ [0.3]
÷ 1F1E6 ÷ 000D ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [5.0] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3]
÷ 1F1E6 × 0308 ÷ 000D ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (RI) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [5.0] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3]
÷ 1F1E6 ÷ 000A ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [5.0] <LINE FEED (LF)> (LF) ÷ [0.3]
÷ 1F1E6 × 0308 ÷ 000A ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (RI) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [5.0] <LINE FEED (LF)> (LF) ÷ [0.3]
÷ 1F1E6 ÷ 0001 ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [5.0] <START OF HEADING> (Control) ÷ [0.3]
÷ 1F1E6 × 0308 ÷ 0001 ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (RI) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [5.0] <START OF HEADING> (Control) ÷ [0.3]
÷ 1F1E6 × 034F ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (RI) × [9.0] COMBINING GRAPHEME JOINER (Extend) ÷ [0.3]
÷ 1F1E6 × 0308 × 034F ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (RI) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] COMBINING GRAPHEME JOINER (Extend) ÷ [0.3]
÷ 1F1E6 × 1F1E6 ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (RI) × [12.0] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [0.3]
÷ 1F1E6 × 0308 ÷ 1F1E6 ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (RI) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [0.3]
÷ 1F1E6 ÷ 0600 ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [999.0] ARABIC NUMBER SIGN (Prepend) ÷ [0.3]
÷ 1F1E6 × 0308 ÷ 0600 ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (RI) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] ARABIC NUMBER SIGN (Prepend) ÷ [0.3]
÷ 1F1E6 × 0903 ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (RI) × [9.1] DEVANAGARI SIGN VISARGA (SpacingMark) ÷ [0.3]
÷ 1F1E6 × 0308 × 0903 ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (RI) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.1] DEVANAGARI SIGN VISARGA (SpacingMark) ÷ [0.3]
÷ 1F1E6 ÷ 1100 ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [999.0] HANGUL CHOSEONG KIYEOK (L) ÷ [0.3]
÷ 1F1E6 × 0308 ÷ 1100 ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (RI) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL CHOSEONG KIYEOK (L) ÷ [0.3]
÷ 1F1E6 ÷ 1160 ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [999.0] HANGUL JUNGSEONG FILLER (V) ÷ [0.3]
÷ 1F1E6 × 0308 ÷ 1160 ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (RI) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL JUNGSEONG FILLER (V) ÷ [0.3]
÷ 1F1E6 ÷ 11A8 ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [999.0] HANGUL JONGSEONG KIYEOK (T) ÷ [0.3]
÷ 1F1E6 × 0308 ÷ 11A8 ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (RI) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL JONGSEONG KIYEOK (T) ÷ [0.3]
÷ 1F1E6 ÷ AC00 ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [999.0] HANGUL SYLLABLE GA (LV) ÷ [0.3]
÷ 1F1E6 × 0308 ÷ AC00 ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (RI) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL SYLLABLE GA (LV) ÷ [0.3]
÷ 1F1E6 ÷ AC01 ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [999.0] HANGUL SYLLABLE GAG (LVT) ÷ [0.3]
÷ 1F1E6 × 0308 ÷ AC01 ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (RI) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL SYLLABLE GAG (LVT) ÷ [0.3]
÷ 1F1E6 ÷ 231A ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [999.0] WATCH (ExtPict) ÷ [0.3]
÷ 1F1E6 × 0308 ÷ 231A ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (RI) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] WATCH (ExtPict) ÷ [0.3]
÷ 1F1E6 × 0300 ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (RI) × [9.0] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) ÷ [0.3]
÷ 1F1E6 × 0308 × 0300 ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (RI) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) ÷ [0.3]
÷ 1F1E6 × 200D ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (RI) × [9.0] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) ÷ [0.3]
÷ 1F1E6 × 0308 × 200D ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (RI) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) ÷ [0.3]
÷ 1F1E6 ÷ 0378 ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [999.0] <reserved-0378> (Other) ÷ [0.3]
÷ 1F1E6 × 0308 ÷ 0378 ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (RI) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] <reserved-0378> (Other) ÷ [0.3]
÷ 0600 × 0020 ÷ # ÷ [0.2] ARABIC NUMBER SIGN (Prepend) × [9.2] SPACE (Other) ÷ [0.3]
÷ 0600 × 0308 ÷ 0020 ÷ # ÷ [0.2] ARABIC NUMBER SIGN (Prepend) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] SPACE (Other) ÷ [0.3]
÷ 0600 ÷ 000D ÷ # ÷ [0.2] ARABIC NUMBER SIGN (Prepend) ÷ [5.0] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3]
÷ 0600 × 0308 ÷ 000D ÷ # ÷ [0.2] ARABIC NUMBER SIGN (Prepend) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [5.0] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3]
÷ 0600 ÷ 000A ÷ # ÷ [0.2] ARABIC NUMBER SIGN (Prepend) ÷ [5.0] <LINE FEED (LF)> (LF) ÷ [0.3]
÷ 0600 × 0308 ÷ 000A ÷ # ÷ [0.2] ARABIC NUMBER SIGN (Prepend) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [5.0] <LINE FEED (LF)> (LF) ÷ [0.3]
÷ 0600 ÷ 0001 ÷ # ÷ [0.2] ARABIC NUMBER SIGN (Prepend) ÷ [5.0] <START OF HEADING> (Control) ÷ [0.3]
÷ 0600 × 0308 ÷ 0001 ÷ # ÷ [0.2] ARABIC NUMBER SIGN (Prepend) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [5.0] <START OF HEADING> (Control) ÷ [0.3]
÷ 0600 × 034F ÷ # ÷ [0.2] ARABIC NUMBER SIGN (Prepend) × [9.0] COMBINING GRAPHEME JOINER (Extend) ÷ [0.3]
÷ 0600 × 0308 × 034F ÷ # ÷ [0.2] ARABIC NUMBER SIGN (Prepend) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] COMBINING GRAPHEME JOINER (Extend) ÷ [0.3]
÷ 0600 × 1F1E6 ÷ # ÷ [0.2] ARABIC NUMBER SIGN (Prepend) × [9.2] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [0.3]
÷ 0600 × 0308 ÷ 1F1E6 ÷ # ÷ [0.2] ARABIC NUMBER SIGN (Prepend) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [0.3]
÷ 0600 × 0600 ÷ # ÷ [0.2] ARABIC NUMBER SIGN (Prepend) × [9.2] ARABIC NUMBER SIGN (Prepend) ÷ [0.3]
÷ 0600 × 0308 ÷ 0600 ÷ # ÷ [0.2] ARABIC NUMBER SIGN (Prepend) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] ARABIC NUMBER SIGN (Prepend) ÷ [0.3]
÷ 0600 × 0903 ÷ # ÷ [0.2] ARABIC NUMBER SIGN (Prepend) × [9.1] DEVANAGARI SIGN VISARGA (SpacingMark) ÷ [0.3]
÷ 0600 × 0308 × 0903 ÷ # ÷ [0.2] ARABIC NUMBER SIGN (Prepend) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.1] DEVANAGARI SIGN VISARGA (SpacingMark) ÷ [0.3]
÷ 0600 × 1100 ÷ # ÷ [0.2] ARABIC NUMBER SIGN (Prepend) × [9.2] HANGUL CHOSEONG KIYEOK (L) ÷ [0.3]
÷ 0600 × 0308 ÷ 1100 ÷ # ÷ [0.2] ARABIC NUMBER SIGN (Prepend) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL CHOSEONG KIYEOK (L) ÷ [0.3]
÷ 0600 × 1160 ÷ # ÷ [0.2] ARABIC NUMBER SIGN (Prepend) × [9.2] HANGUL JUNGSEONG FILLER (V) ÷ [0.3]
÷ 0600 × 0308 ÷ 1160 ÷ # ÷ [0.2] ARABIC NUMBER SIGN (Prepend) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL JUNGSEONG FILLER (V) ÷ [0.3]
÷ 0600 × 11A8 ÷ # ÷ [0.2] ARABIC NUMBER SIGN (Prepend) × [9.2] HANGUL JONGSEONG KIYEOK (T) ÷ [0.3]
÷ 0600 × 0308 ÷ 11A8 ÷ # ÷ [0.2] ARABIC NUMBER SIGN (Prepend) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL JONGSEONG KIYEOK (T) ÷ [0.3]
÷ 0600 × AC00 ÷ # ÷ [0.2] ARABIC NUMBER SIGN (Prepend) × [9.2] HANGUL SYLLABLE GA (LV) ÷ [0.3]
÷ 0600 × 0308 ÷ AC00 ÷ # ÷ [0.2] ARABIC NUMBER SIGN (Prepend) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL SYLLABLE GA (LV) ÷ [0.3]
÷ 0600 × AC01 ÷ # ÷ [0.2] ARABIC NUMBER SIGN (Prepend) × [9.2] HANGUL SYLLABLE GAG (LVT) ÷ [0.3]
÷ 0600 × 0308 ÷ AC01 ÷ # ÷ [0.2] ARABIC NUMBER SIGN (Prepend) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL SYLLABLE GAG (LVT) ÷ [0.3]
÷ 0600 × 231A ÷ # ÷ [0.2] ARABIC NUMBER SIGN (Prepend) × [9.2] WATCH (ExtPict) ÷ [0.3]
÷ 0600 × 0308 ÷ 231A ÷ # ÷ [0.2] ARABIC NUMBER SIGN (Prepend) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] WATCH (ExtPict) ÷ [0.3]
÷ 0600 × 0300 ÷ # ÷ [0.2] ARABIC NUMBER SIGN (Prepend) × [9.0] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) ÷ [0.3]
÷ 0600 × 0308 × 0300 ÷ # ÷ [0.2] ARABIC NUMBER SIGN (Prepend) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) ÷ [0.3]
÷ 0600 × 200D ÷ # ÷ [0.2] ARABIC NUMBER SIGN (Prepend) × [9.0] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) ÷ [0.3]
÷ 0600 × 0308 × 200D ÷ # ÷ [0.2] ARABIC NUMBER SIGN (Prepend) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) ÷ [0.3]
÷ 0600 × 0378 ÷ # ÷ [0.2] ARABIC NUMBER SIGN (Prepend) × [9.2] <reserved-0378> (Other) ÷ [0.3]
÷ 0600 × 0308 ÷ 0378 ÷ # ÷ [0.2] ARABIC NUMBER SIGN (Prepend) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] <reserved-0378> (Other) ÷ [0.3]
÷ 0903 ÷ 0020 ÷ # ÷ [0.2] DEVANAGARI SIGN VISARGA (SpacingMark) ÷ [999.0] SPACE (Other) ÷ [0.3]
÷ 0903 × 0308 ÷ 0020 ÷ # ÷ [0.2] DEVANAGARI SIGN VISARGA (SpacingMark) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] SPACE (Other) ÷ [0.3]
÷ 0903 ÷ 000D ÷ # ÷ [0.2] DEVANAGARI SIGN VISARGA (SpacingMark) ÷ [5.0] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3]
÷ 0903 × 0308 ÷ 000D ÷ # ÷ [0.2] DEVANAGARI SIGN VISARGA (SpacingMark) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [5.0] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3]
÷ 0903 ÷ 000A ÷ # ÷ [0.2] DEVANAGARI SIGN VISARGA (SpacingMark) ÷ [5.0] <LINE FEED (LF)> (LF) ÷ [0.3]
÷ 0903 × 0308 ÷ 000A ÷ # ÷ [0.2] DEVANAGARI SIGN VISARGA (SpacingMark) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [5.0] <LINE FEED (LF)> (LF) ÷ [0.3]
÷ 0903 ÷ 0001 ÷ # ÷ [0.2] DEVANAGARI SIGN VISARGA (SpacingMark) ÷ [5.0] <START OF HEADING> (Control) ÷ [0.3]
÷ 0903 × 0308 ÷ 0001 ÷ # ÷ [0.2] DEVANAGARI SIGN VISARGA (SpacingMark) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [5.0] <START OF HEADING> (Control) ÷ [0.3]
÷ 0903 × 034F ÷ # ÷ [0.2] DEVANAGARI SIGN VISARGA (SpacingMark) × [9.0] COMBINING GRAPHEME JOINER (Extend) ÷ [0.3]
÷ 0903 × 0308 × 034F ÷ # ÷ [0.2] DEVANAGARI SIGN VISARGA (SpacingMark) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] COMBINING GRAPHEME JOINER (Extend) ÷ [0.3]
÷ 0903 ÷ 1F1E6 ÷ # ÷ [0.2] DEVANAGARI SIGN VISARGA (SpacingMark) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [0.3]
÷ 0903 × 0308 ÷ 1F1E6 ÷ # ÷ [0.2] DEVANAGARI SIGN VISARGA (SpacingMark) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [0.3]
÷ 0903 ÷ 0600 ÷ # ÷ [0.2] DEVANAGARI SIGN VISARGA (SpacingMark) ÷ [999.0] ARABIC NUMBER SIGN (Prepend) ÷ [0.3]
÷ 0903 × 0308 ÷ 0600 ÷ # ÷ [0.2] DEVANAGARI SIGN VISARGA (SpacingMark) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] ARABIC NUMBER SIGN (Prepend) ÷ [0.3]
÷ 0903 × 0903 ÷ # ÷ [0.2] DEVANAGARI SIGN VISARGA (SpacingMark) × [9.1] DEVANAGARI SIGN VISARGA (SpacingMark) ÷ [0.3]
÷ 0903 × 0308 × 0903 ÷ # ÷ [0.2] DEVANAGARI SIGN VISARGA (SpacingMark) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.1] DEVANAGARI SIGN VISARGA (SpacingMark) ÷ [0.3]
÷ 0903 ÷ 1100 ÷ # ÷ [0.2] DEVANAGARI SIGN VISARGA (SpacingMark) ÷ [999.0] HANGUL CHOSEONG KIYEOK (L) ÷ [0.3]
÷ 0903 × 0308 ÷ 1100 ÷ # ÷ [0.2] DEVANAGARI SIGN VISARGA (SpacingMark) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL CHOSEONG KIYEOK (L) ÷ [0.3]
÷ 0903 ÷ 1160 ÷ # ÷ [0.2] DEVANAGARI SIGN VISARGA (SpacingMark) ÷ [999.0] HANGUL JUNGSEONG FILLER (V) ÷ [0.3]
÷ 0903 × 0308 ÷ 1160 ÷ # ÷ [0.2] DEVANAGARI SIGN VISARGA (SpacingMark) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL JUNGSEONG FILLER (V) ÷ [0.3]
÷ 0903 ÷ 11A8 ÷ # ÷ [0.2] DEVANAGARI SIGN VISARGA (SpacingMark) ÷ [999.0] HANGUL JONGSEONG KIYEOK (T) ÷ [0.3]
÷ 0903 × 0308 ÷ 11A8 ÷ # ÷ [0.2] DEVANAGARI SIGN VISARGA (SpacingMark) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL JONGSEONG KIYEOK (T) ÷ [0.3]
÷ 0903 ÷ AC00 ÷ # ÷ [0.2] DEVANAGARI SIGN VISARGA (SpacingMark) ÷ [999.0] HANGUL SYLLABLE GA (LV) ÷ [0.3]
÷ 0903 × 0308 ÷ AC00 ÷ # ÷ [0.2] DEVANAGARI SIGN VISARGA (SpacingMark) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL SYLLABLE GA (LV) ÷ [0.3]
÷ 0903 ÷ AC01 ÷ # ÷ [0.2] DEVANAGARI SIGN VISARGA (SpacingMark) ÷ [999.0] HANGUL SYLLABLE GAG (LVT) ÷ [0.3]
÷ 0903 × 0308 ÷ AC01 ÷ # ÷ [0.2] DEVANAGARI SIGN VISARGA (SpacingMark) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL SYLLABLE GAG (LVT) ÷ [0.3]
÷ 0903 ÷ 231A ÷ # ÷ [0.2] DEVANAGARI SIGN VISARGA (SpacingMark) ÷ [999.0] WATCH (ExtPict) ÷ [0.3]
÷ 0903 × 0308 ÷ 231A ÷ # ÷ [0.2] DEVANAGARI SIGN VISARGA (SpacingMark) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] WATCH (ExtPict) ÷ [0.3]
÷ 0903 × 0300 ÷ # ÷ [0.2] DEVANAGARI SIGN VISARGA (SpacingMark) × [9.0] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) ÷ [0.3]
÷ 0903 × 0308 × 0300 ÷ # ÷ [0.2] DEVANAGARI SIGN VISARGA (SpacingMark) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) ÷ [0.3]
÷ 0903 × 200D ÷ # ÷ [0.2] DEVANAGARI SIGN VISARGA (SpacingMark) × [9.0] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) ÷ [0.3]
÷ 0903 × 0308 × 200D ÷ # ÷ [0.2] DEVANAGARI SIGN VISARGA (SpacingMark) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) ÷ [0.3]
÷ 0903 ÷ 0378 ÷ # ÷ [0.2] DEVANAGARI SIGN VISARGA (SpacingMark) ÷ [999.0] <reserved-0378> (Other) ÷ [0.3]
÷ 0903 × 0308 ÷ 0378 ÷ # ÷ [0.2] DEVANAGARI SIGN VISARGA (SpacingMark) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] <reserved-0378> (Other) ÷ [0.3]
÷ 1100 ÷ 0020 ÷ # ÷ [0.2] HANGUL CHOSEONG KIYEOK (L) ÷ [999.0] SPACE (Other) ÷ [0.3]
÷ 1100 × 0308 ÷ 0020 ÷ # ÷ [0.2] HANGUL CHOSEONG KIYEOK (L) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] SPACE (Other) ÷ [0.3]
÷ 1100 ÷ 000D ÷ # ÷ [0.2] HANGUL CHOSEONG KIYEOK (L) ÷ [5.0] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3]
÷ 1100 × 0308 ÷ 000D ÷ # ÷ [0.2] HANGUL CHOSEONG KIYEOK (L) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [5.0] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3]
÷ 1100 ÷ 000A ÷ # ÷ [0.2] HANGUL CHOSEONG KIYEOK (L) ÷ [5.0] <LINE FEED (LF)> (LF) ÷ [0.3]
÷ 1100 × 0308 ÷ 000A ÷ # ÷ [0.2] HANGUL CHOSEONG KIYEOK (L) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [5.0] <LINE FEED (LF)> (LF) ÷ [0.3]
÷ 1100 ÷ 0001 ÷ # ÷ [0.2] HANGUL CHOSEONG KIYEOK (L) ÷ [5.0] <START OF HEADING> (Control) ÷ [0.3]
÷ 1100 × 0308 ÷ 0001 ÷ # ÷ [0.2] HANGUL CHOSEONG KIYEOK (L) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [5.0] <START OF HEADING> (Control) ÷ [0.3]
÷ 1100 × 034F ÷ # ÷ [0.2] HANGUL CHOSEONG KIYEOK (L) × [9.0] COMBINING GRAPHEME JOINER (Extend) ÷ [0.3]
÷ 1100 × 0308 × 034F ÷ # ÷ [0.2] HANGUL CHOSEONG KIYEOK (L) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] COMBINING GRAPHEME JOINER (Extend) ÷ [0.3]
÷ 1100 ÷ 1F1E6 ÷ # ÷ [0.2] HANGUL CHOSEONG KIYEOK (L) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [0.3]
÷ 1100 × 0308 ÷ 1F1E6 ÷ # ÷ [0.2] HANGUL CHOSEONG KIYEOK (L) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [0.3]
÷ 1100 ÷ 0600 ÷ # ÷ [0.2] HANGUL CHOSEONG KIYEOK (L) ÷ [999.0] ARABIC NUMBER SIGN (Prepend) ÷ [0.3]
÷ 1100 × 0308 ÷ 0600 ÷ # ÷ [0.2] HANGUL CHOSEONG KIYEOK (L) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] ARABIC NUMBER SIGN (Prepend) ÷ [0.3]
÷ 1100 × 0903 ÷ # ÷ [0.2] HANGUL CHOSEONG KIYEOK (L) × [9.1] DEVANAGARI SIGN VISARGA (SpacingMark) ÷ [0.3]
÷ 1100 × 0308 × 0903 ÷ # ÷ [0.2] HANGUL CHOSEONG KIYEOK (L) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.1] DEVANAGARI SIGN VISARGA (SpacingMark) ÷ [0.3]
÷ 1100 × 1100 ÷ # ÷ [0.2] HANGUL CHOSEONG KIYEOK (L) × [6.0] HANGUL CHOSEONG KIYEOK (L) ÷ [0.3]
÷ 1100 × 0308 ÷ 1100 ÷ # ÷ [0.2] HANGUL CHOSEONG KIYEOK (L) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL CHOSEONG KIYEOK (L) ÷ [0.3]
÷ 1100 × 1160 ÷ # ÷ [0.2] HANGUL CHOSEONG KIYEOK (L) × [6.0] HANGUL JUNGSEONG FILLER (V) ÷ [0.3]
÷ 1100 × 0308 ÷ 1160 ÷ # ÷ [0.2] HANGUL CHOSEONG KIYEOK (L) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL JUNGSEONG FILLER (V) ÷ [0.3]
÷ 1100 ÷ 11A8 ÷ # ÷ [0.2] HANGUL CHOSEONG KIYEOK (L) ÷ [999.0] HANGUL JONGSEONG KIYEOK (T) ÷ [0.3]
÷ 1100 × 0308 ÷ 11A8 ÷ # ÷ [0.2] HANGUL CHOSEONG KIYEOK (L) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL JONGSEONG KIYEOK (T) ÷ [0.3]
÷ 1100 × AC00 ÷ # ÷ [0.2] HANGUL CHOSEONG KIYEOK (L) × [6.0] HANGUL SYLLABLE GA (LV) ÷ [0.3]
÷ 1100 × 0308 ÷ AC00 ÷ # ÷ [0.2] HANGUL CHOSEONG KIYEOK (L) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL SYLLABLE GA (LV) ÷ [0.3]
÷ 1100 × AC01 ÷ # ÷ [0.2] HANGUL CHOSEONG KIYEOK (L) × [6.0] HANGUL SYLLABLE GAG (LVT) ÷ [0.3]
÷ 1100 × 0308 ÷ AC01 ÷ # ÷ [0.2] HANGUL CHOSEONG KIYEOK (L) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL SYLLABLE GAG (LVT) ÷ [0.3]
÷ 1100 ÷ 231A ÷ # ÷ [0.2] HANGUL CHOSEONG KIYEOK (L) ÷ [999.0] WATCH (ExtPict) ÷ [0.3]
÷ 1100 × 0308 ÷ 231A ÷ # ÷ [0.2] HANGUL CHOSEONG KIYEOK (L) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] WATCH (ExtPict) ÷ [0.3]
÷ 1100 × 0300 ÷ # ÷ [0.2] HANGUL CHOSEONG KIYEOK (L) × [9.0] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) ÷ [0.3]
÷ 1100 × 0308 × 0300 ÷ # ÷ [0.2] HANGUL CHOSEONG KIYEOK (L) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) ÷ [0.3]
÷ 1100 × 200D ÷ # ÷ [0.2] HANGUL CHOSEONG KIYEOK (L) × [9.0] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) ÷ [0.3]
÷ 1100 × 0308 × 200D ÷ # ÷ [0.2] HANGUL CHOSEONG KIYEOK (L) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) ÷ [0.3]
÷ 1100 ÷ 0378 ÷ # ÷ [0.2] HANGUL CHOSEONG KIYEOK (L) ÷ [999.0] <reserved-0378> (Other) ÷ [0.3]
÷ 1100 × 0308 ÷ 0378 ÷ # ÷ [0.2] HANGUL CHOSEONG KIYEOK (L) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] <reserved-0378> (Other) ÷ [0.3]
÷ 1160 ÷ 0020 ÷ # ÷ [0.2] HANGUL JUNGSEONG FILLER (V) ÷ [999.0] SPACE (Other) ÷ [0.3]
÷ 1160 × 0308 ÷ 0020 ÷ # ÷ [0.2] HANGUL JUNGSEONG FILLER (V) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] SPACE (Other) ÷ [0.3]
÷ 1160 ÷ 000D ÷ # ÷ [0.2] HANGUL JUNGSEONG FILLER (V) ÷ [5.0] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3]
÷ 1160 × 0308 ÷ 000D ÷ # ÷ [0.2] HANGUL JUNGSEONG FILLER (V) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [5.0] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3]
÷ 1160 ÷ 000A ÷ # ÷ [0.2] HANGUL JUNGSEONG FILLER (V) ÷ [5.0] <LINE FEED (LF)> (LF) ÷ [0.3]
÷ 1160 × 0308 ÷ 000A ÷ # ÷ [0.2] HANGUL JUNGSEONG FILLER (V) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [5.0] <LINE FEED (LF)> (LF) ÷ [0.3]
÷ 1160 ÷ 0001 ÷ # ÷ [0.2] HANGUL JUNGSEONG FILLER (V) ÷ [5.0] <START OF HEADING> (Control) ÷ [0.3]
÷ 1160 × 0308 ÷ 0001 ÷ # ÷ [0.2] HANGUL JUNGSEONG FILLER (V) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [5.0] <START OF HEADING> (Control) ÷ [0.3]
÷ 1160 × 034F ÷ # ÷ [0.2] HANGUL JUNGSEONG FILLER (V) × [9.0] COMBINING GRAPHEME JOINER (Extend) ÷ [0.3]
÷ 1160 × 0308 × 034F ÷ # ÷ [0.2] HANGUL JUNGSEONG FILLER (V) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] COMBINING GRAPHEME JOINER (Extend) ÷ [0.3]
÷ 1160 ÷ 1F1E6 ÷ # ÷ [0.2] HANGUL JUNGSEONG FILLER (V) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [0.3]
÷ 1160 × 0308 ÷ 1F1E6 ÷ # ÷ [0.2] HANGUL JUNGSEONG FILLER (V) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [0.3]
÷ 1160 ÷ 0600 ÷ # ÷ [0.2] HANGUL JUNGSEONG FILLER (V) ÷ [999.0] ARABIC NUMBER SIGN (Prepend) ÷ [0.3]
÷ 1160 × 0308 ÷ 0600 ÷ # ÷ [0.2] HANGUL JUNGSEONG FILLER (V) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] ARABIC NUMBER SIGN (Prepend) ÷ [0.3]
÷ 1160 × 0903 ÷ # ÷ [0.2] HANGUL JUNGSEONG FILLER (V) × [9.1] DEVANAGARI SIGN VISARGA (SpacingMark) ÷ [0.3]
÷ 1160 × 0308 × 0903 ÷ # ÷ [0.2] HANGUL JUNGSEONG FILLER (V) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.1] DEVANAGARI SIGN VISARGA (SpacingMark) ÷ [0.3]
÷ 1160 ÷ 1100 ÷ # ÷ [0.2] HANGUL JUNGSEONG FILLER (V) ÷ [999.0] HANGUL CHOSEONG KIYEOK (L) ÷ [0.3]
÷ 1160 × 0308 ÷ 1100 ÷ # ÷ [0.2] HANGUL JUNGSEONG FILLER (V) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL CHOSEONG KIYEOK (L) ÷ [0.3]
÷ 1160 × 1160 ÷ # ÷ [0.2] HANGUL JUNGSEONG FILLER (V) × [7.0] HANGUL JUNGSEONG FILLER (V) ÷ [0.3]
÷ 1160 × 0308 ÷ 1160 ÷ # ÷ [0.2] HANGUL JUNGSEONG FILLER (V) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL JUNGSEONG FILLER (V) ÷ [0.3]
÷ 1160 × 11A8 ÷ # ÷ [0.2] HANGUL JUNGSEONG FILLER (V) × [7.0] HANGUL JONGSEONG KIYEOK (T) ÷ [0.3]
÷ 1160 × 0308 ÷ 11A8 ÷ # ÷ [0.2] HANGUL JUNGSEONG FILLER (V) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL JONGSEONG KIYEOK (T) ÷ [0.3]
÷ 1160 ÷ AC00 ÷ # ÷ [0.2] HANGUL JUNGSEONG FILLER (V) ÷ [999.0] HANGUL SYLLABLE GA (LV) ÷ [0.3]
÷ 1160 × 0308 ÷ AC00 ÷ # ÷ [0.2] HANGUL JUNGSEONG FILLER (V) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL SYLLABLE GA (LV) ÷ [0.3]
÷ 1160 ÷ AC01 ÷ # ÷ [0.2] HANGUL JUNGSEONG FILLER (V) ÷ [999.0] HANGUL SYLLABLE GAG (LVT) ÷ [0.3]
÷ 1160 × 0308 ÷ AC01 ÷ # ÷ [0.2] HANGUL JUNGSEONG FILLER (V) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL SYLLABLE GAG (LVT) ÷ [0.3]
÷ 1160 ÷ 231A ÷ # ÷ [0.2] HANGUL JUNGSEONG FILLER (V) ÷ [999.0] WATCH (ExtPict) ÷ [0.3]
÷ 1160 × 0308 ÷ 231A ÷ # ÷ [0.2] HANGUL JUNGSEONG FILLER (V) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] WATCH (ExtPict) ÷ [0.3]
÷ 1160 × 0300 ÷ # ÷ [0.2] HANGUL JUNGSEONG FILLER (V) × [9.0] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) ÷ [0.3]
÷ 1160 × 0308 × 0300 ÷ # ÷ [0.2] HANGUL JUNGSEONG FILLER (V) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) ÷ [0.3]
÷ 1160 × 200D ÷ # ÷ [0.2] HANGUL JUNGSEONG FILLER (V) × [9.0] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) ÷ [0.3]
÷ 1160 × 0308 × 200D ÷ # ÷ [0.2] HANGUL JUNGSEONG FILLER (V) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) ÷ [0.3]
÷ 1160 ÷ 0378 ÷ # ÷ [0.2] HANGUL JUNGSEONG FILLER (V) ÷ [999.0] <reserved-0378> (Other) ÷ [0.3]
÷ 1160 × 0308 ÷ 0378 ÷ # ÷ [0.2] HANGUL JUNGSEONG FILLER (V) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] <reserved-0378> (Other) ÷ [0.3]
÷ 11A8 ÷ 0020 ÷ # ÷ [0.2] HANGUL JONGSEONG KIYEOK (T) ÷ [999.0] SPACE (Other) ÷ [0.3]
÷ 11A8 × 0308 ÷ 0020 ÷ # ÷ [0.2] HANGUL JONGSEONG KIYEOK (T) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] SPACE (Other) ÷ [0.3]
÷ 11A8 ÷ 000D ÷ # ÷ [0.2] HANGUL JONGSEONG KIYEOK (T) ÷ [5.0] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3]
÷ 11A8 × 0308 ÷ 000D ÷ # ÷ [0.2] HANGUL JONGSEONG KIYEOK (T) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [5.0] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3]
÷ 11A8 ÷ 000A ÷ # ÷ [0.2] HANGUL JONGSEONG KIYEOK (T) ÷ [5.0] <LINE FEED (LF)> (LF) ÷ [0.3]
÷ 11A8 × 0308 ÷ 000A ÷ # ÷ [0.2] HANGUL JONGSEONG KIYEOK (T) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [5.0] <LINE FEED (LF)> (LF) ÷ [0.3]
÷ 11A8 ÷ 0001 ÷ # ÷ [0.2] HANGUL JONGSEONG KIYEOK (T) ÷ [5.0] <START OF HEADING> (Control) ÷ [0.3]
÷ 11A8 × 0308 ÷ 0001 ÷ # ÷ [0.2] HANGUL JONGSEONG KIYEOK (T) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [5.0] <START OF HEADING> (Control) ÷ [0.3]
÷ 11A8 × 034F ÷ # ÷ [0.2] HANGUL JONGSEONG KIYEOK (T) × [9.0] COMBINING GRAPHEME JOINER (Extend) ÷ [0.3]
÷ 11A8 × 0308 × 034F ÷ # ÷ [0.2] HANGUL JONGSEONG KIYEOK (T) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] COMBINING GRAPHEME JOINER (Extend) ÷ [0.3]
÷ 11A8 ÷ 1F1E6 ÷ # ÷ [0.2] HANGUL JONGSEONG KIYEOK (T) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [0.3]
÷ 11A8 × 0308 ÷ 1F1E6 ÷ # ÷ [0.2] HANGUL JONGSEONG KIYEOK (T) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [0.3]
÷ 11A8 ÷ 0600 ÷ # ÷ [0.2] HANGUL JONGSEONG KIYEOK (T) ÷ [999.0] ARABIC NUMBER SIGN (Prepend) ÷ [0.3]
÷ 11A8 × 0308 ÷ 0600 ÷ # ÷ [0.2] HANGUL JONGSEONG KIYEOK (T) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] ARABIC NUMBER SIGN (Prepend) ÷ [0.3]
÷ 11A8 × 0903 ÷ # ÷ [0.2] HANGUL JONGSEONG KIYEOK (T) × [9.1] DEVANAGARI SIGN VISARGA (SpacingMark) ÷ [0.3]
÷ 11A8 × 0308 × 0903 ÷ # ÷ [0.2] HANGUL JONGSEONG KIYEOK (T) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.1] DEVANAGARI SIGN VISARGA (SpacingMark) ÷ [0.3]
÷ 11A8 ÷ 1100 ÷ # ÷ [0.2] HANGUL JONGSEONG KIYEOK (T) ÷ [999.0] HANGUL CHOSEONG KIYEOK (L) ÷ [0.3]
÷ 11A8 × 0308 ÷ 1100 ÷ # ÷ [0.2] HANGUL JONGSEONG KIYEOK (T) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL CHOSEONG KIYEOK (L) ÷ [0.3]
÷ 11A8 ÷ 1160 ÷ # ÷ [0.2] HANGUL JONGSEONG KIYEOK (T) ÷ [999.0] HANGUL JUNGSEONG FILLER (V) ÷ [0.3]
÷ 11A8 × 0308 ÷ 1160 ÷ # ÷ [0.2] HANGUL JONGSEONG KIYEOK (T) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL JUNGSEONG FILLER (V) ÷ [0.3]
÷ 11A8 × 11A8 ÷ # ÷ [0.2] HANGUL JONGSEONG KIYEOK (T) × [8.0] HANGUL JONGSEONG KIYEOK (T) ÷ [0.3]
÷ 11A8 × 0308 ÷ 11A8 ÷ # ÷ [0.2] HANGUL JONGSEONG KIYEOK (T) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL JONGSEONG KIYEOK (T) ÷ [0.3]
÷ 11A8 ÷ AC00 ÷ # ÷ [0.2] HANGUL JONGSEONG KIYEOK (T) ÷ [999.0] HANGUL SYLLABLE GA (LV) ÷ [0.3]
÷ 11A8 × 0308 ÷ AC00 ÷ # ÷ [0.2] HANGUL JONGSEONG KIYEOK (T) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL SYLLABLE GA (LV) ÷ [0.3]
÷ 11A8 ÷ AC01 ÷ # ÷ [0.2] HANGUL JONGSEONG KIYEOK (T) ÷ [999.0] HANGUL SYLLABLE GAG (LVT) ÷ [0.3]
÷ 11A8 × 0308 ÷ AC01 ÷ # ÷ [0.2] HANGUL JONGSEONG KIYEOK (T) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL SYLLABLE GAG (LVT) ÷ [0.3]
÷ 11A8 ÷ 231A ÷ # ÷ [0.2] HANGUL JONGSEONG KIYEOK (T) ÷ [999.0] WATCH (ExtPict) ÷ [0.3]
÷ 11A8 × 0308 ÷ 231A ÷ # ÷ [0.2] HANGUL JONGSEONG KIYEOK (T) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] WATCH (ExtPict) ÷ [0.3]
÷ 11A8 × 0300 ÷ # ÷ [0.2] HANGUL JONGSEONG KIYEOK (T) × [9.0] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) ÷ [0.3]
÷ 11A8 × 0308 × 0300 ÷ # ÷ [0.2] HANGUL JONGSEONG KIYEOK (T) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) ÷ [0.3]
÷ 11A8 × 200D ÷ # ÷ [0.2] HANGUL JONGSEONG KIYEOK (T) × [9.0] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) ÷ [0.3]
÷ 11A8 × 0308 × 200D ÷ # ÷ [0.2] HANGUL JONGSEONG KIYEOK (T) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) ÷ [0.3]
÷ 11A8 ÷ 0378 ÷ # ÷ [0.2] HANGUL JONGSEONG KIYEOK (T) ÷ [999.0] <reserved-0378> (Other) ÷ [0.3]
÷ 11A8 × 0308 ÷ 0378 ÷ # ÷ [0.2] HANGUL JONGSEONG KIYEOK (T) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] <reserved-0378> (Other) ÷ [0.3]
÷ AC00 ÷ 0020 ÷ # ÷ [0.2] HANGUL SYLLABLE GA (LV) ÷ [999.0] SPACE (Other) ÷ [0.3]
÷ AC00 × 0308 ÷ 0020 ÷ # ÷ [0.2] HANGUL SYLLABLE GA (LV) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] SPACE (Other) ÷ [0.3]
÷ AC00 ÷ 000D ÷ # ÷ [0.2] HANGUL SYLLABLE GA (LV) ÷ [5.0] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3]
÷ AC00 × 0308 ÷ 000D ÷ # ÷ [0.2] HANGUL SYLLABLE GA (LV) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [5.0] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3]
÷ AC00 ÷ 000A ÷ # ÷ [0.2] HANGUL SYLLABLE GA (LV) ÷ [5.0] <LINE FEED (LF)> (LF) ÷ [0.3]
÷ AC00 × 0308 ÷ 000A ÷ # ÷ [0.2] HANGUL SYLLABLE GA (LV) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [5.0] <LINE FEED (LF)> (LF) ÷ [0.3]
÷ AC00 ÷ 0001 ÷ # ÷ [0.2] HANGUL SYLLABLE GA (LV) ÷ [5.0] <START OF HEADING> (Control) ÷ [0.3]
÷ AC00 × 0308 ÷ 0001 ÷ # ÷ [0.2] HANGUL SYLLABLE GA (LV) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [5.0] <START OF HEADING> (Control) ÷ [0.3]
÷ AC00 × 034F ÷ # ÷ [0.2] HANGUL SYLLABLE GA (LV) × [9.0] COMBINING GRAPHEME JOINER (Extend) ÷ [0.3]
÷ AC00 × 0308 × 034F ÷ # ÷ [0.2] HANGUL SYLLABLE GA (LV) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] COMBINING GRAPHEME JOINER (Extend) ÷ [0.3]
÷ AC00 ÷ 1F1E6 ÷ # ÷ [0.2] HANGUL SYLLABLE GA (LV) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [0.3]
÷ AC00 × 0308 ÷ 1F1E6 ÷ # ÷ [0.2] HANGUL SYLLABLE GA (LV) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [0.3]
÷ AC00 ÷ 0600 ÷ # ÷ [0.2] HANGUL SYLLABLE GA (LV) ÷ [999.0] ARABIC NUMBER SIGN (Prepend) ÷ [0.3]
÷ AC00 × 0308 ÷ 0600 ÷ # ÷ [0.2] HANGUL SYLLABLE GA (LV) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] ARABIC NUMBER SIGN (Prepend) ÷ [0.3]
÷ AC00 × 0903 ÷ # ÷ [0.2] HANGUL SYLLABLE GA (LV) × [9.1] DEVANAGARI SIGN VISARGA (SpacingMark) ÷ [0.3]
÷ AC00 × 0308 × 0903 ÷ # ÷ [0.2] HANGUL SYLLABLE GA (LV) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.1] DEVANAGARI SIGN VISARGA (SpacingMark) ÷ [0.3]
÷ AC00 ÷ 1100 ÷ # ÷ [0.2] HANGUL SYLLABLE GA (LV) ÷ [999.0] HANGUL CHOSEONG KIYEOK (L) ÷ [0.3]
÷ AC00 × 0308 ÷ 1100 ÷ # ÷ [0.2] HANGUL SYLLABLE GA (LV) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL CHOSEONG KIYEOK (L) ÷ [0.3]
÷ AC00 × 1160 ÷ # ÷ [0.2] HANGUL SYLLABLE GA (LV) × [7.0] HANGUL JUNGSEONG FILLER (V) ÷ [0.3]
÷ AC00 × 0308 ÷ 1160 ÷ # ÷ [0.2] HANGUL SYLLABLE GA (LV) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL JUNGSEONG FILLER (V) ÷ [0.3]
÷ AC00 × 11A8 ÷ # ÷ [0.2] HANGUL SYLLABLE GA (LV) × [7.0] HANGUL JONGSEONG KIYEOK (T) ÷ [0.3]
÷ AC00 × 0308 ÷ 11A8 ÷ # ÷ [0.2] HANGUL SYLLABLE GA (LV) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL JONGSEONG KIYEOK (T) ÷ [0.3]
÷ AC00 ÷ AC00 ÷ # ÷ [0.2] HANGUL SYLLABLE GA (LV) ÷ [999.0] HANGUL SYLLABLE GA (LV) ÷ [0.3]
÷ AC00 × 0308 ÷ AC00 ÷ # ÷ [0.2] HANGUL SYLLABLE GA (LV) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL SYLLABLE GA (LV) ÷ [0.3]
÷ AC00 ÷ AC01 ÷ # ÷ [0.2] HANGUL SYLLABLE GA (LV) ÷ [999.0] HANGUL SYLLABLE GAG (LVT) ÷ [0.3]
÷ AC00 × 0308 ÷ AC01 ÷ # ÷ [0.2] HANGUL SYLLABLE GA (LV) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL SYLLABLE GAG (LVT) ÷ [0.3]
÷ AC00 ÷ 231A ÷ # ÷ [0.2] HANGUL SYLLABLE GA (LV) ÷ [999.0] WATCH (ExtPict) ÷ [0.3]
÷ AC00 × 0308 ÷ 231A ÷ # ÷ [0.2] HANGUL SYLLABLE GA (LV) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] WATCH (ExtPict) ÷ [0.3]
÷ AC00 × 0300 ÷ # ÷ [0.2] HANGUL SYLLABLE GA (LV) × [9.0] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) ÷ [0.3]
÷ AC00 × 0308 × 0300 ÷ # ÷ [0.2] HANGUL SYLLABLE GA (LV) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) ÷ [0.3]
÷ AC00 × 200D ÷ # ÷ [0.2] HANGUL SYLLABLE GA (LV) × [9.0] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) ÷ [0.3]
÷ AC00 × 0308 × 200D ÷ # ÷ [0.2] HANGUL SYLLABLE GA (LV) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) ÷ [0.3]
÷ AC00 ÷ 0378 ÷ # ÷ [0.2] HANGUL SYLLABLE GA (LV) ÷ [999.0] <reserved-0378> (Other) ÷ [0.3]
÷ AC00 × 0308 ÷ 0378 ÷ # ÷ [0.2] HANGUL SYLLABLE GA (LV) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] <reserved-0378> (Other) ÷ [0.3]
÷ AC01 ÷ 0020 ÷ # ÷ [0.2] HANGUL SYLLABLE GAG (LVT) ÷ [999.0] SPACE (Other) ÷ [0.3]
÷ AC01 × 0308 ÷ 0020 ÷ # ÷ [0.2] HANGUL SYLLABLE GAG (LVT) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] SPACE (Other) ÷ [0.3]
÷ AC01 ÷ 000D ÷ # ÷ [0.2] HANGUL SYLLABLE GAG (LVT) ÷ [5.0] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3]
÷ AC01 × 0308 ÷ 000D ÷ # ÷ [0.2] HANGUL SYLLABLE GAG (LVT) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [5.0] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3]
÷ AC01 ÷ 000A ÷ # ÷ [0.2] HANGUL SYLLABLE GAG (LVT) ÷ [5.0] <LINE FEED (LF)> (LF) ÷ [0.3]
÷ AC01 × 0308 ÷ 000A ÷ # ÷ [0.2] HANGUL SYLLABLE GAG (LVT) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [5.0] <LINE FEED (LF)> (LF) ÷ [0.3]
÷ AC01 ÷ 0001 ÷ # ÷ [0.2] HANGUL SYLLABLE GAG (LVT) ÷ [5.0] <START OF HEADING> (Control) ÷ [0.3]
÷ AC01 × 0308 ÷ 0001 ÷ # ÷ [0.2] HANGUL SYLLABLE GAG (LVT) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [5.0] <START OF HEADING> (Control) ÷ [0.3]
÷ AC01 × 034F ÷ # ÷ [0.2] HANGUL SYLLABLE GAG (LVT) × [9.0] COMBINING GRAPHEME JOINER (Extend) ÷ [0.3]
÷ AC01 × 0308 × 034F ÷ # ÷ [0.2] HANGUL SYLLABLE GAG (LVT) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] COMBINING GRAPHEME JOINER (Extend) ÷ [0.3]
÷ AC01 ÷ 1F1E6 ÷ # ÷ [0.2] HANGUL SYLLABLE GAG (LVT) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [0.3]
÷ AC01 × 0308 ÷ 1F1E6 ÷ # ÷ [0.2] HANGUL SYLLABLE GAG (LVT) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [0.3]
÷ AC01 ÷ 0600 ÷ # ÷ [0.2] HANGUL SYLLABLE GAG (LVT) ÷ [999.0] ARABIC NUMBER SIGN (Prepend) ÷ [0.3]
÷ AC01 × 0308 ÷ 0600 ÷ # ÷ [0.2] HANGUL SYLLABLE GAG (LVT) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] ARABIC NUMBER SIGN (Prepend) ÷ [0.3]
÷ AC01 × 0903 ÷ # ÷ [0.2] HANGUL SYLLABLE GAG (LVT) × [9.1] DEVANAGARI SIGN VISARGA (SpacingMark) ÷ [0.3]
÷ AC01 × 0308 × 0903 ÷ # ÷ [0.2] HANGUL SYLLABLE GAG (LVT) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.1] DEVANAGARI SIGN VISARGA (SpacingMark) ÷ [0.3]
÷ AC01 ÷ 1100 ÷ # ÷ [0.2] HANGUL SYLLABLE GAG (LVT) ÷ [999.0] HANGUL CHOSEONG KIYEOK (L) ÷ [0.3]
÷ AC01 × 0308 ÷ 1100 ÷ # ÷ [0.2] HANGUL SYLLABLE GAG (LVT) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL CHOSEONG KIYEOK (L) ÷ [0.3]
÷ AC01 ÷ 1160 ÷ # ÷ [0.2] HANGUL SYLLABLE GAG (LVT) ÷ [999.0] HANGUL JUNGSEONG FILLER (V) ÷ [0.3]
÷ AC01 × 0308 ÷ 1160 ÷ # ÷ [0.2] HANGUL SYLLABLE GAG (LVT) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL JUNGSEONG FILLER (V) ÷ [0.3]
÷ AC01 × 11A8 ÷ # ÷ [0.2] HANGUL SYLLABLE GAG (LVT) × [8.0] HANGUL JONGSEONG KIYEOK (T) ÷ [0.3]
÷ AC01 × 0308 ÷ 11A8 ÷ # ÷ [0.2] HANGUL SYLLABLE GAG (LVT) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL JONGSEONG KIYEOK (T) ÷ [0.3]
÷ AC01 ÷ AC00 ÷ # ÷ [0.2] HANGUL SYLLABLE GAG (LVT) ÷ [999.0] HANGUL SYLLABLE GA (LV) ÷ [0.3]
÷ AC01 × 0308 ÷ AC00 ÷ # ÷ [0.2] HANGUL SYLLABLE GAG (LVT) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL SYLLABLE GA (LV) ÷ [0.3]
÷ AC01 ÷ AC01 ÷ # ÷ [0.2] HANGUL SYLLABLE GAG (LVT) ÷ [999.0] HANGUL SYLLABLE GAG (LVT) ÷ [0.3]
÷ AC01 × 0308 ÷ AC01 ÷ # ÷ [0.2] HANGUL SYLLABLE GAG (LVT) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL SYLLABLE GAG (LVT) ÷ [0.3]
÷ AC01 ÷ 231A ÷ # ÷ [0.2] HANGUL SYLLABLE GAG (LVT) ÷ [999.0] WATCH (ExtPict) ÷ [0.3]
÷ AC01 × 0308 ÷ 231A ÷ # ÷ [0.2] HANGUL SYLLABLE GAG (LVT) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] WATCH (ExtPict) ÷ [0.3]
÷ AC01 × 0300 ÷ # ÷ [0.2] HANGUL SYLLABLE GAG (LVT) × [9.0] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) ÷ [0.3]
÷ AC01 × 0308 × 0300 ÷ # ÷ [0.2] HANGUL SYLLABLE GAG (LVT) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) ÷ [0.3]
÷ AC01 × 200D ÷ # ÷ [0.2] HANGUL SYLLABLE GAG (LVT) × [9.0] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) ÷ [0.3]
÷ AC01 × 0308 × 200D ÷ # ÷ [0.2] HANGUL SYLLABLE GAG (LVT) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) ÷ [0.3]
÷ AC01 ÷ 0378 ÷ # ÷ [0.2] HANGUL SYLLABLE GAG (LVT) ÷ [999.0] <reserved-0378> (Other) ÷ [0.3]
÷ AC01 × 0308 ÷ 0378 ÷ # ÷ [0.2] HANGUL SYLLABLE GAG (LVT) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] <reserved-0378> (Other) ÷ [0.3]
÷ 231A ÷ 0020 ÷ # ÷ [0.2] WATCH (ExtPict) ÷ [999.0] SPACE (Other) ÷ [0.3]
÷ 231A × 0308 ÷ 0020 ÷ # ÷ [0.2] WATCH (ExtPict) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] SPACE (Other) ÷ [0.3]
÷ 231A ÷ 000D ÷ # ÷ [0.2] WATCH (ExtPict) ÷ [5.0] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3]
÷ 231A × 0308 ÷ 000D ÷ # ÷ [0.2] WATCH (ExtPict) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [5.0] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3]
÷ 231A ÷ 000A ÷ # ÷ [0.2] WATCH (ExtPict) ÷ [5.0] <LINE FEED (LF)> (LF) ÷ [0.3]
÷ 231A × 0308 ÷ 000A ÷ # ÷ [0.2] WATCH (ExtPict) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [5.0] <LINE FEED (LF)> (LF) ÷ [0.3]
÷ 231A ÷ 0001 ÷ # ÷ [0.2] WATCH (ExtPict) ÷ [5.0] <START OF HEADING> (Control) ÷ [0.3]
÷ 231A × 0308 ÷ 0001 ÷ # ÷ [0.2] WATCH (ExtPict) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [5.0] <START OF HEADING> (Control) ÷ [0.3]
÷ 231A × 034F ÷ # ÷ [0.2] WATCH (ExtPict) × [9.0] COMBINING GRAPHEME JOINER (Extend) ÷ [0.3]
÷ 231A × 0308 × 034F ÷ # ÷ [0.2] WATCH (ExtPict) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] COMBINING GRAPHEME JOINER (Extend) ÷ [0.3]
÷ 231A ÷ 1F1E6 ÷ # ÷ [0.2] WATCH (ExtPict) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [0.3]
÷ 231A × 0308 ÷ 1F1E6 ÷ # ÷ [0.2] WATCH (ExtPict) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [0.3]
÷ 231A ÷ 0600 ÷ # ÷ [0.2] WATCH (ExtPict) ÷ [999.0] ARABIC NUMBER SIGN (Prepend) ÷ [0.3]
÷ 231A × 0308 ÷ 0600 ÷ # ÷ [0.2] WATCH (ExtPict) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] ARABIC NUMBER SIGN (Prepend) ÷ [0.3]
÷ 231A × 0903 ÷ # ÷ [0.2] WATCH (ExtPict) × [9.1] DEVANAGARI SIGN VISARGA (SpacingMark) ÷ [0.3]
÷ 231A × 0308 × 0903 ÷ # ÷ [0.2] WATCH (ExtPict) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.1] DEVANAGARI SIGN VISARGA (SpacingMark) ÷ [0.3]
÷ 231A ÷ 1100 ÷ # ÷ [0.2] WATCH (ExtPict) ÷ [999.0] HANGUL CHOSEONG KIYEOK (L) ÷ [0.3]
÷ 231A × 0308 ÷ 1100 ÷ # ÷ [0.2] WATCH (ExtPict) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL CHOSEONG KIYEOK (L) ÷ [0.3]
÷ 231A ÷ 1160 ÷ # ÷ [0.2] WATCH (ExtPict) ÷ [999.0] HANGUL JUNGSEONG FILLER (V) ÷ [0.3]
÷ 231A × 0308 ÷ 1160 ÷ # ÷ [0.2] WATCH (ExtPict) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL JUNGSEONG FILLER (V) ÷ [0.3]
÷ 231A ÷ 11A8 ÷ # ÷ [0.2] WATCH (ExtPict) ÷ [999.0] HANGUL JONGSEONG KIYEOK (T) ÷ [0.3]
÷ 231A × 0308 ÷ 11A8 ÷ # ÷ [0.2] WATCH (ExtPict) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL JONGSEONG KIYEOK (T) ÷ [0.3]
÷ 231A ÷ AC00 ÷ # ÷ [0.2] WATCH (ExtPict) ÷ [999.0] HANGUL SYLLABLE GA (LV) ÷ [0.3]
÷ 231A × 0308 ÷ AC00 ÷ # ÷ [0.2] WATCH (ExtPict) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL SYLLABLE GA (LV) ÷ [0.3]
÷ 231A ÷ AC01 ÷ # ÷ [0.2] WATCH (ExtPict) ÷ [999.0] HANGUL SYLLABLE GAG (LVT) ÷ [0.3]
÷ 231A × 0308 ÷ AC01 ÷ # ÷ [0.2] WATCH (ExtPict) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL SYLLABLE GAG (LVT) ÷ [0.3]
÷ 231A ÷ 231A ÷ # ÷ [0.2] WATCH (ExtPict) ÷ [999.0] WATCH (ExtPict) ÷ [0.3]
÷ 231A × 0308 ÷ 231A ÷ # ÷ [0.2] WATCH (ExtPict) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] WATCH (ExtPict) ÷ [0.3]
÷ 231A × 0300 ÷ # ÷ [0.2] WATCH (ExtPict) × [9.0] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) ÷ [0.3]
÷ 231A × 0308 × 0300 ÷ # ÷ [0.2] WATCH (ExtPict) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) ÷ [0.3]
÷ 231A × 200D ÷ # ÷ [0.2] WATCH (ExtPict) × [9.0] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) ÷ [0.3]
÷ 231A × 0308 × 200D ÷ # ÷ [0.2] WATCH (ExtPict) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) ÷ [0.3]
÷ 231A ÷ 0378 ÷ # ÷ [0.2] WATCH (ExtPict) ÷ [999.0] <reserved-0378> (Other) ÷ [0.3]
÷ 231A × 0308 ÷ 0378 ÷ # ÷ [0.2] WATCH (ExtPict) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] <reserved-0378> (Other) ÷ [0.3]
÷ 0300 ÷ 0020 ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) ÷ [999.0] SPACE (Other) ÷ [0.3]
÷ 0300 × 0308 ÷ 0020 ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] SPACE (Other) ÷ [0.3]
÷ 0300 ÷ 000D ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) ÷ [5.0] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3]
÷ 0300 × 0308 ÷ 000D ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [5.0] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3]
÷ 0300 ÷ 000A ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) ÷ [5.0] <LINE FEED (LF)> (LF) ÷ [0.3]
÷ 0300 × 0308 ÷ 000A ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [5.0] <LINE FEED (LF)> (LF) ÷ [0.3]
÷ 0300 ÷ 0001 ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) ÷ [5.0] <START OF HEADING> (Control) ÷ [0.3]
÷ 0300 × 0308 ÷ 0001 ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [5.0] <START OF HEADING> (Control) ÷ [0.3]
÷ 0300 × 034F ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) × [9.0] COMBINING GRAPHEME JOINER (Extend) ÷ [0.3]
÷ 0300 × 0308 × 034F ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] COMBINING GRAPHEME JOINER (Extend) ÷ [0.3]
÷ 0300 ÷ 1F1E6 ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [0.3]
÷ 0300 × 0308 ÷ 1F1E6 ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [0.3]
÷ 0300 ÷ 0600 ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) ÷ [999.0] ARABIC NUMBER SIGN (Prepend) ÷ [0.3]
÷ 0300 × 0308 ÷ 0600 ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] ARABIC NUMBER SIGN (Prepend) ÷ [0.3]
÷ 0300 × 0903 ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) × [9.1] DEVANAGARI SIGN VISARGA (SpacingMark) ÷ [0.3]
÷ 0300 × 0308 × 0903 ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.1] DEVANAGARI SIGN VISARGA (SpacingMark) ÷ [0.3]
÷ 0300 ÷ 1100 ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) ÷ [999.0] HANGUL CHOSEONG KIYEOK (L) ÷ [0.3]
÷ 0300 × 0308 ÷ 1100 ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL CHOSEONG KIYEOK (L) ÷ [0.3]
÷ 0300 ÷ 1160 ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) ÷ [999.0] HANGUL JUNGSEONG FILLER (V) ÷ [0.3]
÷ 0300 × 0308 ÷ 1160 ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL JUNGSEONG FILLER (V) ÷ [0.3]
÷ 0300 ÷ 11A8 ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) ÷ [999.0] HANGUL JONGSEONG KIYEOK (T) ÷ [0.3]
÷ 0300 × 0308 ÷ 11A8 ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL JONGSEONG KIYEOK (T) ÷ [0.3]
÷ 0300 ÷ AC00 ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) ÷ [999.0] HANGUL SYLLABLE GA (LV) ÷ [0.3]
÷ 0300 × 0308 ÷ AC00 ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL SYLLABLE GA (LV) ÷ [0.3]
÷ 0300 ÷ AC01 ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) ÷ [999.0] HANGUL SYLLABLE GAG (LVT) ÷ [0.3]
÷ 0300 × 0308 ÷ AC01 ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL SYLLABLE GAG (LVT) ÷ [0.3]
÷ 0300 ÷ 231A ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) ÷ [999.0] WATCH (ExtPict) ÷ [0.3]
÷ 0300 × 0308 ÷ 231A ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] WATCH (ExtPict) ÷ [0.3]
÷ 0300 × 0300 ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) × [9.0] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) ÷ [0.3]
÷ 0300 × 0308 × 0300 ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) ÷ [0.3]
÷ 0300 × 200D ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) × [9.0] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) ÷ [0.3]
÷ 0300 × 0308 × 200D ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) ÷ [0.3]
÷ 0300 ÷ 0378 ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) ÷ [999.0] <reserved-0378> (Other) ÷ [0.3]
÷ 0300 × 0308 ÷ 0378 ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] <reserved-0378> (Other) ÷ [0.3]
÷ 200D ÷ 0020 ÷ # ÷ [0.2] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) ÷ [999.0] SPACE (Other) ÷ [0.3]
÷ 200D × 0308 ÷ 0020 ÷ # ÷ [0.2] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] SPACE (Other) ÷ [0.3]
÷ 200D ÷ 000D ÷ # ÷ [0.2] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) ÷ [5.0] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3]
÷ 200D × 0308 ÷ 000D ÷ # ÷ [0.2] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [5.0] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3]
÷ 200D ÷ 000A ÷ # ÷ [0.2] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) ÷ [5.0] <LINE FEED (LF)> (LF) ÷ [0.3]
÷ 200D × 0308 ÷ 000A ÷ # ÷ [0.2] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [5.0] <LINE FEED (LF)> (LF) ÷ [0.3]
÷ 200D ÷ 0001 ÷ # ÷ [0.2] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) ÷ [5.0] <START OF HEADING> (Control) ÷ [0.3]
÷ 200D × 0308 ÷ 0001 ÷ # ÷ [0.2] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [5.0] <START OF HEADING> (Control) ÷ [0.3]
÷ 200D × 034F ÷ # ÷ [0.2] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) × [9.0] COMBINING GRAPHEME JOINER (Extend) ÷ [0.3]
÷ 200D × 0308 × 034F ÷ # ÷ [0.2] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] COMBINING GRAPHEME JOINER (Extend) ÷ [0.3]
÷ 200D ÷ 1F1E6 ÷ # ÷ [0.2] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [0.3]
÷ 200D × 0308 ÷ 1F1E6 ÷ # ÷ [0.2] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [0.3]
÷ 200D ÷ 0600 ÷ # ÷ [0.2] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) ÷ [999.0] ARABIC NUMBER SIGN (Prepend) ÷ [0.3]
÷ 200D × 0308 ÷ 0600 ÷ # ÷ [0.2] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] ARABIC NUMBER SIGN (Prepend) ÷ [0.3]
÷ 200D × 0903 ÷ # ÷ [0.2] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) × [9.1] DEVANAGARI SIGN VISARGA (SpacingMark) ÷ [0.3]
÷ 200D × 0308 × 0903 ÷ # ÷ [0.2] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.1] DEVANAGARI SIGN VISARGA (SpacingMark) ÷ [0.3]
÷ 200D ÷ 1100 ÷ # ÷ [0.2] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) ÷ [999.0] HANGUL CHOSEONG KIYEOK (L) ÷ [0.3]
÷ 200D × 0308 ÷ 1100 ÷ # ÷ [0.2] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL CHOSEONG KIYEOK (L) ÷ [0.3]
÷ 200D ÷ 1160 ÷ # ÷ [0.2] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) ÷ [999.0] HANGUL JUNGSEONG FILLER (V) ÷ [0.3]
÷ 200D × 0308 ÷ 1160 ÷ # ÷ [0.2] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL JUNGSEONG FILLER (V) ÷ [0.3]
÷ 200D ÷ 11A8 ÷ # ÷ [0.2] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) ÷ [999.0] HANGUL JONGSEONG KIYEOK (T) ÷ [0.3]
÷ 200D × 0308 ÷ 11A8 ÷ # ÷ [0.2] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL JONGSEONG KIYEOK (T) ÷ [0.3]
÷ 200D ÷ AC00 ÷ # ÷ [0.2] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) ÷ [999.0] HANGUL SYLLABLE GA (LV) ÷ [0.3]
÷ 200D × 0308 ÷ AC00 ÷ # ÷ [0.2] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL SYLLABLE GA (LV) ÷ [0.3]
÷ 200D ÷ AC01 ÷ # ÷ [0.2] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) ÷ [999.0] HANGUL SYLLABLE GAG (LVT) ÷ [0.3]
÷ 200D × 0308 ÷ AC01 ÷ # ÷ [0.2] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL SYLLABLE GAG (LVT) ÷ [0.3]
÷ 200D ÷ 231A ÷ # ÷ [0.2] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) ÷ [999.0] WATCH (ExtPict) ÷ [0.3]
÷ 200D × 0308 ÷ 231A ÷ # ÷ [0.2] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] WATCH (ExtPict) ÷ [0.3]
÷ 200D × 0300 ÷ # ÷ [0.2] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) × [9.0] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) ÷ [0.3]
÷ 200D × 0308 × 0300 ÷ # ÷ [0.2] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) ÷ [0.3]
÷ 200D × 200D ÷ # ÷ [0.2] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) × [9.0] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) ÷ [0.3]
÷ 200D × 0308 × 200D ÷ # ÷ [0.2] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) ÷ [0.3]
÷ 200D ÷ 0378 ÷ # ÷ [0.2] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) ÷ [999.0] <reserved-0378> (Other) ÷ [0.3]
÷ 200D × 0308 ÷ 0378 ÷ # ÷ [0.2] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] <reserved-0378> (Other) ÷ [0.3]
÷ 0378 ÷ 0020 ÷ # ÷ [0.2] <reserved-0378> (Other) ÷ [999.0] SPACE (Other) ÷ [0.3]
÷ 0378 × 0308 ÷ 0020 ÷ # ÷ [0.2] <reserved-0378> (Other) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] SPACE (Other) ÷ [0.3]
÷ 0378 ÷ 000D ÷ # ÷ [0.2] <reserved-0378> (Other) ÷ [5.0] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3]
÷ 0378 × 0308 ÷ 000D ÷ # ÷ [0.2] <reserved-0378> (Other) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [5.0] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3]
÷ 0378 ÷ 000A ÷ # ÷ [0.2] <reserved-0378> (Other) ÷ [5.0] <LINE FEED (LF)> (LF) ÷ [0.3]
÷ 0378 × 0308 ÷ 000A ÷ # ÷ [0.2] <reserved-0378> (Other) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [5.0] <LINE FEED (LF)> (LF) ÷ [0.3]
÷ 0378 ÷ 0001 ÷ # ÷ [0.2] <reserved-0378> (Other) ÷ [5.0] <START OF HEADING> (Control) ÷ [0.3]
÷ 0378 × 0308 ÷ 0001 ÷ # ÷ [0.2] <reserved-0378> (Other) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [5.0] <START OF HEADING> (Control) ÷ [0.3]
÷ 0378 × 034F ÷ # ÷ [0.2] <reserved-0378> (Other) × [9.0] COMBINING GRAPHEME JOINER (Extend) ÷ [0.3]
÷ 0378 × 0308 × 034F ÷ # ÷ [0.2] <reserved-0378> (Other) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] COMBINING GRAPHEME JOINER (Extend) ÷ [0.3]
÷ 0378 ÷ 1F1E6 ÷ # ÷ [0.2] <reserved-0378> (Other) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [0.3]
÷ 0378 × 0308 ÷ 1F1E6 ÷ # ÷ [0.2] <reserved-0378> (Other) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [0.3]
÷ 0378 ÷ 0600 ÷ # ÷ [0.2] <reserved-0378> (Other) ÷ [999.0] ARABIC NUMBER SIGN (Prepend) ÷ [0.3]
÷ 0378 × 0308 ÷ 0600 ÷ # ÷ [0.2] <reserved-0378> (Other) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] ARABIC NUMBER SIGN (Prepend) ÷ [0.3]
÷ 0378 × 0903 ÷ # ÷ [0.2] <reserved-0378> (Other) × [9.1] DEVANAGARI SIGN VISARGA (SpacingMark) ÷ [0.3]
÷ 0378 × 0308 × 0903 ÷ # ÷ [0.2] <reserved-0378> (Other) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.1] DEVANAGARI SIGN VISARGA (SpacingMark) ÷ [0.3]
÷ 0378 ÷ 1100 ÷ # ÷ [0.2] <reserved-0378> (Other) ÷ [999.0] HANGUL CHOSEONG KIYEOK (L) ÷ [0.3]
÷ 0378 × 0308 ÷ 1100 ÷ # ÷ [0.2] <reserved-0378> (Other) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL CHOSEONG KIYEOK (L) ÷ [0.3]
÷ 0378 ÷ 1160 ÷ # ÷ [0.2] <reserved-0378> (Other) ÷ [999.0] HANGUL JUNGSEONG FILLER (V) ÷ [0.3]
÷ 0378 × 0308 ÷ 1160 ÷ # ÷ [0.2] <reserved-0378> (Other) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL JUNGSEONG FILLER (V) ÷ [0.3]
÷ 0378 ÷ 11A8 ÷ # ÷ [0.2] <reserved-0378> (Other) ÷ [999.0] HANGUL JONGSEONG KIYEOK (T) ÷ [0.3]
÷ 0378 × 0308 ÷ 11A8 ÷ # ÷ [0.2] <reserved-0378> (Other) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL JONGSEONG KIYEOK (T) ÷ [0.3]
÷ 0378 ÷ AC00 ÷ # ÷ [0.2] <reserved-0378> (Other) ÷ [999.0] HANGUL SYLLABLE GA (LV) ÷ [0.3]
÷ 0378 × 0308 ÷ AC00 ÷ # ÷ [0.2] <reserved-0378> (Other) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL SYLLABLE GA (LV) ÷ [0.3]
÷ 0378 ÷ AC01 ÷ # ÷ [0.2] <reserved-0378> (Other) ÷ [999.0] HANGUL SYLLABLE GAG (LVT) ÷ [0.3]
÷ 0378 × 0308 ÷ AC01 ÷ # ÷ [0.2] <reserved-0378> (Other) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL SYLLABLE GAG (LVT) ÷ [0.3]
÷ 0378 ÷ 231A ÷ # ÷ [0.2] <reserved-0378> (Other) ÷ [999.0] WATCH (ExtPict) ÷ [0.3]
÷ 0378 × 0308 ÷ 231A ÷ # ÷ [0.2] <reserved-0378> (Other) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] WATCH (ExtPict) ÷ [0.3]
÷ 0378 × 0300 ÷ # ÷ [0.2] <reserved-0378> (Other) × [9.0] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) ÷ [0.3]
÷ 0378 × 0308 × 0300 ÷ # ÷ [0.2] <reserved-0378> (Other) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) ÷ [0.3]
÷ 0378 × 200D ÷ # ÷ [0.2] <reserved-0378> (Other) × [9.0] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) ÷ [0.3]
÷ 0378 × 0308 × 200D ÷ # ÷ [0.2] <reserved-0378> (Other) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) ÷ [0.3]
÷ 0378 ÷ 0378 ÷ # ÷ [0.2] <reserved-0378> (Other) ÷ [999.0] <reserved-0378> (Other) ÷ [0.3]
÷ 0378 × 0308 ÷ 0378 ÷ # ÷ [0.2] <reserved-0378> (Other) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] <reserved-0378> (Other) ÷ [0.3]
÷ 000D × 000A ÷ 0061 ÷ 000A ÷ 0308 ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) × [3.0] <LINE FEED (LF)> (LF) ÷ [4.0] LATIN SMALL LETTER A (Other) ÷ [5.0] <LINE FEED (LF)> (LF) ÷ [4.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [0.3]
÷ 0061 × 0308 ÷ # ÷ [0.2] LATIN SMALL LETTER A (Other) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [0.3]
÷ 0020 × 200D ÷ 0646 ÷ # ÷ [0.2] SPACE (Other) × [9.0] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) ÷ [999.0] ARABIC LETTER NOON (Other) ÷ [0.3]
÷ 0646 × 200D ÷ 0020 ÷ # ÷ [0.2] ARABIC LETTER NOON (Other) × [9.0] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) ÷ [999.0] SPACE (Other) ÷ [0.3]
÷ 1100 × 1100 ÷ # ÷ [0.2] HANGUL CHOSEONG KIYEOK (L) × [6.0] HANGUL CHOSEONG KIYEOK (L) ÷ [0.3]
÷ AC00 × 11A8 ÷ 1100 ÷ # ÷ [0.2] HANGUL SYLLABLE GA (LV) × [7.0] HANGUL JONGSEONG KIYEOK (T) ÷ [999.0] HANGUL CHOSEONG KIYEOK (L) ÷ [0.3]
÷ AC01 × 11A8 ÷ 1100 ÷ # ÷ [0.2] HANGUL SYLLABLE GAG (LVT) × [8.0] HANGUL JONGSEONG KIYEOK (T) ÷ [999.0] HANGUL CHOSEONG KIYEOK (L) ÷ [0.3]
÷ 1F1E6 × 1F1E7 ÷ 1F1E8 ÷ 0062 ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (RI) × [12.0] REGIONAL INDICATOR SYMBOL LETTER B (RI) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER C (RI) ÷ [999.0] LATIN SMALL LETTER B (Other) ÷ [0.3]
÷ 0061 ÷ 1F1E6 × 1F1E7 ÷ 1F1E8 ÷ 0062 ÷ # ÷ [0.2] LATIN SMALL LETTER A (Other) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (RI) × [13.0] REGIONAL INDICATOR SYMBOL LETTER B (RI) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER C (RI) ÷ [999.0] LATIN SMALL LETTER B (Other) ÷ [0.3]
÷ 0061 ÷ 1F1E6 × 1F1E7 × 200D ÷ 1F1E8 ÷ 0062 ÷ # ÷ [0.2] LATIN SMALL LETTER A (Other) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (RI) × [13.0] REGIONAL INDICATOR SYMBOL LETTER B (RI) × [9.0] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER C (RI) ÷ [999.0] LATIN SMALL LETTER B (Other) ÷ [0.3]
÷ 0061 ÷ 1F1E6 × 200D ÷ 1F1E7 × 1F1E8 ÷ 0062 ÷ # ÷ [0.2] LATIN SMALL LETTER A (Other) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (RI) × [9.0] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER B (RI) × [13.0] REGIONAL INDICATOR SYMBOL LETTER C (RI) ÷ [999.0] LATIN SMALL LETTER B (Other) ÷ [0.3]
÷ 0061 ÷ 1F1E6 × 1F1E7 ÷ 1F1E8 × 1F1E9 ÷ 0062 ÷ # ÷ [0.2] LATIN SMALL LETTER A (Other) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (RI) × [13.0] REGIONAL INDICATOR SYMBOL LETTER B (RI) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER C (RI) × [13.0] REGIONAL INDICATOR SYMBOL LETTER D (RI) ÷ [999.0] LATIN SMALL LETTER B (Other) ÷ [0.3]
÷ 0061 × 200D ÷ # ÷ [0.2] LATIN SMALL LETTER A (Other) × [9.0] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) ÷ [0.3]
÷ 0061 × 0308 ÷ 0062 ÷ # ÷ [0.2] LATIN SMALL LETTER A (Other) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] LATIN SMALL LETTER B (Other) ÷ [0.3]
÷ 0061 × 0903 ÷ 0062 ÷ # ÷ [0.2] LATIN SMALL LETTER A (Other) × [9.1] DEVANAGARI SIGN VISARGA (SpacingMark) ÷ [999.0] LATIN SMALL LETTER B (Other) ÷ [0.3]
÷ 0061 ÷ 0600 × 0062 ÷ # ÷ [0.2] LATIN SMALL LETTER A (Other) ÷ [999.0] ARABIC NUMBER SIGN (Prepend) × [9.2] LATIN SMALL LETTER B (Other) ÷ [0.3]
÷ 1F476 × 1F3FF ÷ 1F476 ÷ # ÷ [0.2] BABY (ExtPict) × [9.0] EMOJI MODIFIER FITZPATRICK TYPE-6 (Extend) ÷ [999.0] BABY (ExtPict) ÷ [0.3]
÷ 0061 × 1F3FF ÷ 1F476 ÷ # ÷ [0.2] LATIN SMALL LETTER A (Other) × [9.0] EMOJI MODIFIER FITZPATRICK TYPE-6 (Extend) ÷ [999.0] BABY (ExtPict) ÷ [0.3]
÷ 0061 × 1F3FF ÷ 1F476 × 200D × 1F6D1 ÷ # ÷ [0.2] LATIN SMALL LETTER A (Other) × [9.0] EMOJI MODIFIER FITZPATRICK TYPE-6 (Extend) ÷ [999.0] BABY (ExtPict) × [9.0] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) × [11.0] OCTAGONAL SIGN (ExtPict) ÷ [0.3]
÷ 1F476 × 1F3FF × 0308 × 200D × 1F476 × 1F3FF ÷ # ÷ [0.2] BABY (ExtPict) × [9.0] EMOJI MODIFIER FITZPATRICK TYPE-6 (Extend) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) × [11.0] BABY (ExtPict) × [9.0] EMOJI MODIFIER FITZPATRICK TYPE-6 (Extend) ÷ [0.3]
÷ 1F6D1 × 200D × 1F6D1 ÷ # ÷ [0.2] OCTAGONAL SIGN (ExtPict) × [9.0] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) × [11.0] OCTAGONAL SIGN (ExtPict) ÷ [0.3]
÷ 0061 × 200D ÷ 1F6D1 ÷ # ÷ [0.2] LATIN SMALL LETTER A (Other) × [9.0] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) ÷ [999.0] OCTAGONAL SIGN (ExtPict) ÷ [0.3]
÷ 2701 × 200D × 2701 ÷ # ÷ [0.2] UPPER BLADE SCISSORS (Other) × [9.0] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) × [11.0] UPPER BLADE SCISSORS (Other) ÷ [0.3]
÷ 0061 × 200D ÷ 2701 ÷ # ÷ [0.2] LATIN SMALL LETTER A (Other) × [9.0] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) ÷ [999.0] UPPER BLADE SCISSORS (Other) ÷ [0.3]
#
# Lines: 602
#
# EOF

View File

@ -0,0 +1,769 @@
# emoji-data.txt
# Date: 2019-01-15, 12:10:05 GMT
# Copyright (c) 2019 Unicode, Inc.
# Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries.
# For terms of use, see http://www.unicode.org/terms_of_use.html
#
# Emoji Data for UTS #51
# Version: 12.0
#
# For documentation and usage, see http://www.unicode.org/reports/tr51
#
# Format:
# <codepoint(s)> ; <property> # <comments>
# Note: there is no guarantee as to the structure of whitespace or comments
#
# Characters and sequences are listed in code point order. Users should be shown a more natural order.
# See the CLDR collation order for Emoji.
# ================================================
# All omitted code points have Emoji=No
# @missing: 0000..10FFFF ; Emoji ; No
0023 ; Emoji # 1.1 [1] (#) number sign
002A ; Emoji # 1.1 [1] (*) asterisk
0030..0039 ; Emoji # 1.1 [10] (0..9) digit zero..digit nine
00A9 ; Emoji # 1.1 [1] (©️) copyright
00AE ; Emoji # 1.1 [1] (®️) registered
203C ; Emoji # 1.1 [1] (‼️) double exclamation mark
2049 ; Emoji # 3.0 [1] (⁉️) exclamation question mark
2122 ; Emoji # 1.1 [1] (™️) trade mark
2139 ; Emoji # 3.0 [1] () information
2194..2199 ; Emoji # 1.1 [6] (↔️..↙️) left-right arrow..down-left arrow
21A9..21AA ; Emoji # 1.1 [2] (↩️..↪️) right arrow curving left..left arrow curving right
231A..231B ; Emoji # 1.1 [2] (⌚..⌛) watch..hourglass done
2328 ; Emoji # 1.1 [1] (⌨️) keyboard
23CF ; Emoji # 4.0 [1] (⏏️) eject button
23E9..23F3 ; Emoji # 6.0 [11] (⏩..⏳) fast-forward button..hourglass not done
23F8..23FA ; Emoji # 7.0 [3] (⏸️..⏺️) pause button..record button
24C2 ; Emoji # 1.1 [1] (Ⓜ️) circled M
25AA..25AB ; Emoji # 1.1 [2] (▪️..▫️) black small square..white small square
25B6 ; Emoji # 1.1 [1] (▶️) play button
25C0 ; Emoji # 1.1 [1] (◀️) reverse button
25FB..25FE ; Emoji # 3.2 [4] (◻️..◾) white medium square..black medium-small square
2600..2604 ; Emoji # 1.1 [5] (☀️..☄️) sun..comet
260E ; Emoji # 1.1 [1] (☎️) telephone
2611 ; Emoji # 1.1 [1] (☑️) check box with check
2614..2615 ; Emoji # 4.0 [2] (☔..☕) umbrella with rain drops..hot beverage
2618 ; Emoji # 4.1 [1] (☘️) shamrock
261D ; Emoji # 1.1 [1] (☝️) index pointing up
2620 ; Emoji # 1.1 [1] (☠️) skull and crossbones
2622..2623 ; Emoji # 1.1 [2] (☢️..☣️) radioactive..biohazard
2626 ; Emoji # 1.1 [1] (☦️) orthodox cross
262A ; Emoji # 1.1 [1] (☪️) star and crescent
262E..262F ; Emoji # 1.1 [2] (☮️..☯️) peace symbol..yin yang
2638..263A ; Emoji # 1.1 [3] (☸️..☺️) wheel of dharma..smiling face
2640 ; Emoji # 1.1 [1] (♀️) female sign
2642 ; Emoji # 1.1 [1] (♂️) male sign
2648..2653 ; Emoji # 1.1 [12] (♈..♓) Aries..Pisces
265F..2660 ; Emoji # 1.1 [2] (♟️..♠️) chess pawn..spade suit
2663 ; Emoji # 1.1 [1] (♣️) club suit
2665..2666 ; Emoji # 1.1 [2] (♥️..♦️) heart suit..diamond suit
2668 ; Emoji # 1.1 [1] (♨️) hot springs
267B ; Emoji # 3.2 [1] (♻️) recycling symbol
267E..267F ; Emoji # 4.1 [2] (♾️..♿) infinity..wheelchair symbol
2692..2697 ; Emoji # 4.1 [6] (⚒️..⚗️) hammer and pick..alembic
2699 ; Emoji # 4.1 [1] (⚙️) gear
269B..269C ; Emoji # 4.1 [2] (⚛️..⚜️) atom symbol..fleur-de-lis
26A0..26A1 ; Emoji # 4.0 [2] (⚠️..⚡) warning..high voltage
26AA..26AB ; Emoji # 4.1 [2] (⚪..⚫) white circle..black circle
26B0..26B1 ; Emoji # 4.1 [2] (⚰️..⚱️) coffin..funeral urn
26BD..26BE ; Emoji # 5.2 [2] (⚽..⚾) soccer ball..baseball
26C4..26C5 ; Emoji # 5.2 [2] (⛄..⛅) snowman without snow..sun behind cloud
26C8 ; Emoji # 5.2 [1] (⛈️) cloud with lightning and rain
26CE ; Emoji # 6.0 [1] (⛎) Ophiuchus
26CF ; Emoji # 5.2 [1] (⛏️) pick
26D1 ; Emoji # 5.2 [1] (⛑️) rescue workers helmet
26D3..26D4 ; Emoji # 5.2 [2] (⛓️..⛔) chains..no entry
26E9..26EA ; Emoji # 5.2 [2] (⛩️..⛪) shinto shrine..church
26F0..26F5 ; Emoji # 5.2 [6] (⛰️..⛵) mountain..sailboat
26F7..26FA ; Emoji # 5.2 [4] (⛷️..⛺) skier..tent
26FD ; Emoji # 5.2 [1] (⛽) fuel pump
2702 ; Emoji # 1.1 [1] (✂️) scissors
2705 ; Emoji # 6.0 [1] (✅) check mark button
2708..2709 ; Emoji # 1.1 [2] (✈️..✉️) airplane..envelope
270A..270B ; Emoji # 6.0 [2] (✊..✋) raised fist..raised hand
270C..270D ; Emoji # 1.1 [2] (✌️..✍️) victory hand..writing hand
270F ; Emoji # 1.1 [1] (✏️) pencil
2712 ; Emoji # 1.1 [1] (✒️) black nib
2714 ; Emoji # 1.1 [1] (✔️) check mark
2716 ; Emoji # 1.1 [1] (✖️) multiplication sign
271D ; Emoji # 1.1 [1] (✝️) latin cross
2721 ; Emoji # 1.1 [1] (✡️) star of David
2728 ; Emoji # 6.0 [1] (✨) sparkles
2733..2734 ; Emoji # 1.1 [2] (✳️..✴️) eight-spoked asterisk..eight-pointed star
2744 ; Emoji # 1.1 [1] (❄️) snowflake
2747 ; Emoji # 1.1 [1] (❇️) sparkle
274C ; Emoji # 6.0 [1] (❌) cross mark
274E ; Emoji # 6.0 [1] (❎) cross mark button
2753..2755 ; Emoji # 6.0 [3] (❓..❕) question mark..white exclamation mark
2757 ; Emoji # 5.2 [1] (❗) exclamation mark
2763..2764 ; Emoji # 1.1 [2] (❣️..❤️) heart exclamation..red heart
2795..2797 ; Emoji # 6.0 [3] (..➗) plus sign..division sign
27A1 ; Emoji # 1.1 [1] (➡️) right arrow
27B0 ; Emoji # 6.0 [1] (➰) curly loop
27BF ; Emoji # 6.0 [1] (➿) double curly loop
2934..2935 ; Emoji # 3.2 [2] (⤴️..⤵️) right arrow curving up..right arrow curving down
2B05..2B07 ; Emoji # 4.0 [3] (⬅️..⬇️) left arrow..down arrow
2B1B..2B1C ; Emoji # 5.1 [2] (⬛..⬜) black large square..white large square
2B50 ; Emoji # 5.1 [1] (⭐) star
2B55 ; Emoji # 5.2 [1] (⭕) hollow red circle
3030 ; Emoji # 1.1 [1] (〰️) wavy dash
303D ; Emoji # 3.2 [1] (〽️) part alternation mark
3297 ; Emoji # 1.1 [1] (㊗️) Japanese “congratulations” button
3299 ; Emoji # 1.1 [1] (㊙️) Japanese “secret” button
1F004 ; Emoji # 5.1 [1] (🀄) mahjong red dragon
1F0CF ; Emoji # 6.0 [1] (🃏) joker
1F170..1F171 ; Emoji # 6.0 [2] (🅰️..🅱️) A button (blood type)..B button (blood type)
1F17E ; Emoji # 6.0 [1] (🅾️) O button (blood type)
1F17F ; Emoji # 5.2 [1] (🅿️) P button
1F18E ; Emoji # 6.0 [1] (🆎) AB button (blood type)
1F191..1F19A ; Emoji # 6.0 [10] (🆑..🆚) CL button..VS button
1F1E6..1F1FF ; Emoji # 6.0 [26] (🇦..🇿) regional indicator symbol letter a..regional indicator symbol letter z
1F201..1F202 ; Emoji # 6.0 [2] (🈁..🈂️) Japanese “here” button..Japanese “service charge” button
1F21A ; Emoji # 5.2 [1] (🈚) Japanese “free of charge” button
1F22F ; Emoji # 5.2 [1] (🈯) Japanese “reserved” button
1F232..1F23A ; Emoji # 6.0 [9] (🈲..🈺) Japanese “prohibited” button..Japanese “open for business” button
1F250..1F251 ; Emoji # 6.0 [2] (🉐..🉑) Japanese “bargain” button..Japanese “acceptable” button
1F300..1F320 ; Emoji # 6.0 [33] (🌀..🌠) cyclone..shooting star
1F321 ; Emoji # 7.0 [1] (🌡️) thermometer
1F324..1F32C ; Emoji # 7.0 [9] (🌤️..🌬️) sun behind small cloud..wind face
1F32D..1F32F ; Emoji # 8.0 [3] (🌭..🌯) hot dog..burrito
1F330..1F335 ; Emoji # 6.0 [6] (🌰..🌵) chestnut..cactus
1F336 ; Emoji # 7.0 [1] (🌶️) hot pepper
1F337..1F37C ; Emoji # 6.0 [70] (🌷..🍼) tulip..baby bottle
1F37D ; Emoji # 7.0 [1] (🍽️) fork and knife with plate
1F37E..1F37F ; Emoji # 8.0 [2] (🍾..🍿) bottle with popping cork..popcorn
1F380..1F393 ; Emoji # 6.0 [20] (🎀..🎓) ribbon..graduation cap
1F396..1F397 ; Emoji # 7.0 [2] (🎖️..🎗️) military medal..reminder ribbon
1F399..1F39B ; Emoji # 7.0 [3] (🎙️..🎛️) studio microphone..control knobs
1F39E..1F39F ; Emoji # 7.0 [2] (🎞️..🎟️) film frames..admission tickets
1F3A0..1F3C4 ; Emoji # 6.0 [37] (🎠..🏄) carousel horse..person surfing
1F3C5 ; Emoji # 7.0 [1] (🏅) sports medal
1F3C6..1F3CA ; Emoji # 6.0 [5] (🏆..🏊) trophy..person swimming
1F3CB..1F3CE ; Emoji # 7.0 [4] (🏋️..🏎️) person lifting weights..racing car
1F3CF..1F3D3 ; Emoji # 8.0 [5] (🏏..🏓) cricket game..ping pong
1F3D4..1F3DF ; Emoji # 7.0 [12] (🏔️..🏟️) snow-capped mountain..stadium
1F3E0..1F3F0 ; Emoji # 6.0 [17] (🏠..🏰) house..castle
1F3F3..1F3F5 ; Emoji # 7.0 [3] (🏳️..🏵️) white flag..rosette
1F3F7 ; Emoji # 7.0 [1] (🏷️) label
1F3F8..1F3FF ; Emoji # 8.0 [8] (🏸..🏿) badminton..dark skin tone
1F400..1F43E ; Emoji # 6.0 [63] (🐀..🐾) rat..paw prints
1F43F ; Emoji # 7.0 [1] (🐿️) chipmunk
1F440 ; Emoji # 6.0 [1] (👀) eyes
1F441 ; Emoji # 7.0 [1] (👁️) eye
1F442..1F4F7 ; Emoji # 6.0[182] (👂..📷) ear..camera
1F4F8 ; Emoji # 7.0 [1] (📸) camera with flash
1F4F9..1F4FC ; Emoji # 6.0 [4] (📹..📼) video camera..videocassette
1F4FD ; Emoji # 7.0 [1] (📽️) film projector
1F4FF ; Emoji # 8.0 [1] (📿) prayer beads
1F500..1F53D ; Emoji # 6.0 [62] (🔀..🔽) shuffle tracks button..downwards button
1F549..1F54A ; Emoji # 7.0 [2] (🕉️..🕊️) om..dove
1F54B..1F54E ; Emoji # 8.0 [4] (🕋..🕎) kaaba..menorah
1F550..1F567 ; Emoji # 6.0 [24] (🕐..🕧) one oclock..twelve-thirty
1F56F..1F570 ; Emoji # 7.0 [2] (🕯️..🕰️) candle..mantelpiece clock
1F573..1F579 ; Emoji # 7.0 [7] (🕳️..🕹️) hole..joystick
1F57A ; Emoji # 9.0 [1] (🕺) man dancing
1F587 ; Emoji # 7.0 [1] (🖇️) linked paperclips
1F58A..1F58D ; Emoji # 7.0 [4] (🖊️..🖍️) pen..crayon
1F590 ; Emoji # 7.0 [1] (🖐️) hand with fingers splayed
1F595..1F596 ; Emoji # 7.0 [2] (🖕..🖖) middle finger..vulcan salute
1F5A4 ; Emoji # 9.0 [1] (🖤) black heart
1F5A5 ; Emoji # 7.0 [1] (🖥️) desktop computer
1F5A8 ; Emoji # 7.0 [1] (🖨️) printer
1F5B1..1F5B2 ; Emoji # 7.0 [2] (🖱️..🖲️) computer mouse..trackball
1F5BC ; Emoji # 7.0 [1] (🖼️) framed picture
1F5C2..1F5C4 ; Emoji # 7.0 [3] (🗂️..🗄️) card index dividers..file cabinet
1F5D1..1F5D3 ; Emoji # 7.0 [3] (🗑️..🗓️) wastebasket..spiral calendar
1F5DC..1F5DE ; Emoji # 7.0 [3] (🗜️..🗞️) clamp..rolled-up newspaper
1F5E1 ; Emoji # 7.0 [1] (🗡️) dagger
1F5E3 ; Emoji # 7.0 [1] (🗣️) speaking head
1F5E8 ; Emoji # 7.0 [1] (🗨️) left speech bubble
1F5EF ; Emoji # 7.0 [1] (🗯️) right anger bubble
1F5F3 ; Emoji # 7.0 [1] (🗳️) ballot box with ballot
1F5FA ; Emoji # 7.0 [1] (🗺️) world map
1F5FB..1F5FF ; Emoji # 6.0 [5] (🗻..🗿) mount fuji..moai
1F600 ; Emoji # 6.1 [1] (😀) grinning face
1F601..1F610 ; Emoji # 6.0 [16] (😁..😐) beaming face with smiling eyes..neutral face
1F611 ; Emoji # 6.1 [1] (😑) expressionless face
1F612..1F614 ; Emoji # 6.0 [3] (😒..😔) unamused face..pensive face
1F615 ; Emoji # 6.1 [1] (😕) confused face
1F616 ; Emoji # 6.0 [1] (😖) confounded face
1F617 ; Emoji # 6.1 [1] (😗) kissing face
1F618 ; Emoji # 6.0 [1] (😘) face blowing a kiss
1F619 ; Emoji # 6.1 [1] (😙) kissing face with smiling eyes
1F61A ; Emoji # 6.0 [1] (😚) kissing face with closed eyes
1F61B ; Emoji # 6.1 [1] (😛) face with tongue
1F61C..1F61E ; Emoji # 6.0 [3] (😜..😞) winking face with tongue..disappointed face
1F61F ; Emoji # 6.1 [1] (😟) worried face
1F620..1F625 ; Emoji # 6.0 [6] (😠..😥) angry face..sad but relieved face
1F626..1F627 ; Emoji # 6.1 [2] (😦..😧) frowning face with open mouth..anguished face
1F628..1F62B ; Emoji # 6.0 [4] (😨..😫) fearful face..tired face
1F62C ; Emoji # 6.1 [1] (😬) grimacing face
1F62D ; Emoji # 6.0 [1] (😭) loudly crying face
1F62E..1F62F ; Emoji # 6.1 [2] (😮..😯) face with open mouth..hushed face
1F630..1F633 ; Emoji # 6.0 [4] (😰..😳) anxious face with sweat..flushed face
1F634 ; Emoji # 6.1 [1] (😴) sleeping face
1F635..1F640 ; Emoji # 6.0 [12] (😵..🙀) dizzy face..weary cat
1F641..1F642 ; Emoji # 7.0 [2] (🙁..🙂) slightly frowning face..slightly smiling face
1F643..1F644 ; Emoji # 8.0 [2] (🙃..🙄) upside-down face..face with rolling eyes
1F645..1F64F ; Emoji # 6.0 [11] (🙅..🙏) person gesturing NO..folded hands
1F680..1F6C5 ; Emoji # 6.0 [70] (🚀..🛅) rocket..left luggage
1F6CB..1F6CF ; Emoji # 7.0 [5] (🛋️..🛏️) couch and lamp..bed
1F6D0 ; Emoji # 8.0 [1] (🛐) place of worship
1F6D1..1F6D2 ; Emoji # 9.0 [2] (🛑..🛒) stop sign..shopping cart
1F6D5 ; Emoji # 12.0 [1] (🛕) hindu temple
1F6E0..1F6E5 ; Emoji # 7.0 [6] (🛠️..🛥️) hammer and wrench..motor boat
1F6E9 ; Emoji # 7.0 [1] (🛩️) small airplane
1F6EB..1F6EC ; Emoji # 7.0 [2] (🛫..🛬) airplane departure..airplane arrival
1F6F0 ; Emoji # 7.0 [1] (🛰️) satellite
1F6F3 ; Emoji # 7.0 [1] (🛳️) passenger ship
1F6F4..1F6F6 ; Emoji # 9.0 [3] (🛴..🛶) kick scooter..canoe
1F6F7..1F6F8 ; Emoji # 10.0 [2] (🛷..🛸) sled..flying saucer
1F6F9 ; Emoji # 11.0 [1] (🛹) skateboard
1F6FA ; Emoji # 12.0 [1] (🛺) auto rickshaw
1F7E0..1F7EB ; Emoji # 12.0 [12] (🟠..🟫) orange circle..brown square
1F90D..1F90F ; Emoji # 12.0 [3] (🤍..🤏) white heart..pinching hand
1F910..1F918 ; Emoji # 8.0 [9] (🤐..🤘) zipper-mouth face..sign of the horns
1F919..1F91E ; Emoji # 9.0 [6] (🤙..🤞) call me hand..crossed fingers
1F91F ; Emoji # 10.0 [1] (🤟) love-you gesture
1F920..1F927 ; Emoji # 9.0 [8] (🤠..🤧) cowboy hat face..sneezing face
1F928..1F92F ; Emoji # 10.0 [8] (🤨..🤯) face with raised eyebrow..exploding head
1F930 ; Emoji # 9.0 [1] (🤰) pregnant woman
1F931..1F932 ; Emoji # 10.0 [2] (🤱..🤲) breast-feeding..palms up together
1F933..1F93A ; Emoji # 9.0 [8] (🤳..🤺) selfie..person fencing
1F93C..1F93E ; Emoji # 9.0 [3] (🤼..🤾) people wrestling..person playing handball
1F93F ; Emoji # 12.0 [1] (🤿) diving mask
1F940..1F945 ; Emoji # 9.0 [6] (🥀..🥅) wilted flower..goal net
1F947..1F94B ; Emoji # 9.0 [5] (🥇..🥋) 1st place medal..martial arts uniform
1F94C ; Emoji # 10.0 [1] (🥌) curling stone
1F94D..1F94F ; Emoji # 11.0 [3] (🥍..🥏) lacrosse..flying disc
1F950..1F95E ; Emoji # 9.0 [15] (🥐..🥞) croissant..pancakes
1F95F..1F96B ; Emoji # 10.0 [13] (🥟..🥫) dumpling..canned food
1F96C..1F970 ; Emoji # 11.0 [5] (🥬..🥰) leafy green..smiling face with hearts
1F971 ; Emoji # 12.0 [1] (🥱) yawning face
1F973..1F976 ; Emoji # 11.0 [4] (🥳..🥶) partying face..cold face
1F97A ; Emoji # 11.0 [1] (🥺) pleading face
1F97B ; Emoji # 12.0 [1] (🥻) sari
1F97C..1F97F ; Emoji # 11.0 [4] (🥼..🥿) lab coat..flat shoe
1F980..1F984 ; Emoji # 8.0 [5] (🦀..🦄) crab..unicorn
1F985..1F991 ; Emoji # 9.0 [13] (🦅..🦑) eagle..squid
1F992..1F997 ; Emoji # 10.0 [6] (🦒..🦗) giraffe..cricket
1F998..1F9A2 ; Emoji # 11.0 [11] (🦘..🦢) kangaroo..swan
1F9A5..1F9AA ; Emoji # 12.0 [6] (🦥..🦪) sloth..oyster
1F9AE..1F9AF ; Emoji # 12.0 [2] (🦮..🦯) guide dog..probing cane
1F9B0..1F9B9 ; Emoji # 11.0 [10] (🦰..🦹) red hair..supervillain
1F9BA..1F9BF ; Emoji # 12.0 [6] (🦺..🦿) safety vest..mechanical leg
1F9C0 ; Emoji # 8.0 [1] (🧀) cheese wedge
1F9C1..1F9C2 ; Emoji # 11.0 [2] (🧁..🧂) cupcake..salt
1F9C3..1F9CA ; Emoji # 12.0 [8] (🧃..🧊) beverage box..ice cube
1F9CD..1F9CF ; Emoji # 12.0 [3] (🧍..🧏) person standing..deaf person
1F9D0..1F9E6 ; Emoji # 10.0 [23] (🧐..🧦) face with monocle..socks
1F9E7..1F9FF ; Emoji # 11.0 [25] (🧧..🧿) red envelope..nazar amulet
1FA70..1FA73 ; Emoji # 12.0 [4] (🩰..🩳) ballet shoes..shorts
1FA78..1FA7A ; Emoji # 12.0 [3] (🩸..🩺) drop of blood..stethoscope
1FA80..1FA82 ; Emoji # 12.0 [3] (🪀..🪂) yo-yo..parachute
1FA90..1FA95 ; Emoji # 12.0 [6] (🪐..🪕) ringed planet..banjo
# Total elements: 1311
# ================================================
# All omitted code points have Emoji_Presentation=No
# @missing: 0000..10FFFF ; Emoji_Presentation ; No
231A..231B ; Emoji_Presentation # 1.1 [2] (⌚..⌛) watch..hourglass done
23E9..23EC ; Emoji_Presentation # 6.0 [4] (⏩..⏬) fast-forward button..fast down button
23F0 ; Emoji_Presentation # 6.0 [1] (⏰) alarm clock
23F3 ; Emoji_Presentation # 6.0 [1] (⏳) hourglass not done
25FD..25FE ; Emoji_Presentation # 3.2 [2] (◽..◾) white medium-small square..black medium-small square
2614..2615 ; Emoji_Presentation # 4.0 [2] (☔..☕) umbrella with rain drops..hot beverage
2648..2653 ; Emoji_Presentation # 1.1 [12] (♈..♓) Aries..Pisces
267F ; Emoji_Presentation # 4.1 [1] (♿) wheelchair symbol
2693 ; Emoji_Presentation # 4.1 [1] (⚓) anchor
26A1 ; Emoji_Presentation # 4.0 [1] (⚡) high voltage
26AA..26AB ; Emoji_Presentation # 4.1 [2] (⚪..⚫) white circle..black circle
26BD..26BE ; Emoji_Presentation # 5.2 [2] (⚽..⚾) soccer ball..baseball
26C4..26C5 ; Emoji_Presentation # 5.2 [2] (⛄..⛅) snowman without snow..sun behind cloud
26CE ; Emoji_Presentation # 6.0 [1] (⛎) Ophiuchus
26D4 ; Emoji_Presentation # 5.2 [1] (⛔) no entry
26EA ; Emoji_Presentation # 5.2 [1] (⛪) church
26F2..26F3 ; Emoji_Presentation # 5.2 [2] (⛲..⛳) fountain..flag in hole
26F5 ; Emoji_Presentation # 5.2 [1] (⛵) sailboat
26FA ; Emoji_Presentation # 5.2 [1] (⛺) tent
26FD ; Emoji_Presentation # 5.2 [1] (⛽) fuel pump
2705 ; Emoji_Presentation # 6.0 [1] (✅) check mark button
270A..270B ; Emoji_Presentation # 6.0 [2] (✊..✋) raised fist..raised hand
2728 ; Emoji_Presentation # 6.0 [1] (✨) sparkles
274C ; Emoji_Presentation # 6.0 [1] (❌) cross mark
274E ; Emoji_Presentation # 6.0 [1] (❎) cross mark button
2753..2755 ; Emoji_Presentation # 6.0 [3] (❓..❕) question mark..white exclamation mark
2757 ; Emoji_Presentation # 5.2 [1] (❗) exclamation mark
2795..2797 ; Emoji_Presentation # 6.0 [3] (..➗) plus sign..division sign
27B0 ; Emoji_Presentation # 6.0 [1] (➰) curly loop
27BF ; Emoji_Presentation # 6.0 [1] (➿) double curly loop
2B1B..2B1C ; Emoji_Presentation # 5.1 [2] (⬛..⬜) black large square..white large square
2B50 ; Emoji_Presentation # 5.1 [1] (⭐) star
2B55 ; Emoji_Presentation # 5.2 [1] (⭕) hollow red circle
1F004 ; Emoji_Presentation # 5.1 [1] (🀄) mahjong red dragon
1F0CF ; Emoji_Presentation # 6.0 [1] (🃏) joker
1F18E ; Emoji_Presentation # 6.0 [1] (🆎) AB button (blood type)
1F191..1F19A ; Emoji_Presentation # 6.0 [10] (🆑..🆚) CL button..VS button
1F1E6..1F1FF ; Emoji_Presentation # 6.0 [26] (🇦..🇿) regional indicator symbol letter a..regional indicator symbol letter z
1F201 ; Emoji_Presentation # 6.0 [1] (🈁) Japanese “here” button
1F21A ; Emoji_Presentation # 5.2 [1] (🈚) Japanese “free of charge” button
1F22F ; Emoji_Presentation # 5.2 [1] (🈯) Japanese “reserved” button
1F232..1F236 ; Emoji_Presentation # 6.0 [5] (🈲..🈶) Japanese “prohibited” button..Japanese “not free of charge” button
1F238..1F23A ; Emoji_Presentation # 6.0 [3] (🈸..🈺) Japanese “application” button..Japanese “open for business” button
1F250..1F251 ; Emoji_Presentation # 6.0 [2] (🉐..🉑) Japanese “bargain” button..Japanese “acceptable” button
1F300..1F320 ; Emoji_Presentation # 6.0 [33] (🌀..🌠) cyclone..shooting star
1F32D..1F32F ; Emoji_Presentation # 8.0 [3] (🌭..🌯) hot dog..burrito
1F330..1F335 ; Emoji_Presentation # 6.0 [6] (🌰..🌵) chestnut..cactus
1F337..1F37C ; Emoji_Presentation # 6.0 [70] (🌷..🍼) tulip..baby bottle
1F37E..1F37F ; Emoji_Presentation # 8.0 [2] (🍾..🍿) bottle with popping cork..popcorn
1F380..1F393 ; Emoji_Presentation # 6.0 [20] (🎀..🎓) ribbon..graduation cap
1F3A0..1F3C4 ; Emoji_Presentation # 6.0 [37] (🎠..🏄) carousel horse..person surfing
1F3C5 ; Emoji_Presentation # 7.0 [1] (🏅) sports medal
1F3C6..1F3CA ; Emoji_Presentation # 6.0 [5] (🏆..🏊) trophy..person swimming
1F3CF..1F3D3 ; Emoji_Presentation # 8.0 [5] (🏏..🏓) cricket game..ping pong
1F3E0..1F3F0 ; Emoji_Presentation # 6.0 [17] (🏠..🏰) house..castle
1F3F4 ; Emoji_Presentation # 7.0 [1] (🏴) black flag
1F3F8..1F3FF ; Emoji_Presentation # 8.0 [8] (🏸..🏿) badminton..dark skin tone
1F400..1F43E ; Emoji_Presentation # 6.0 [63] (🐀..🐾) rat..paw prints
1F440 ; Emoji_Presentation # 6.0 [1] (👀) eyes
1F442..1F4F7 ; Emoji_Presentation # 6.0[182] (👂..📷) ear..camera
1F4F8 ; Emoji_Presentation # 7.0 [1] (📸) camera with flash
1F4F9..1F4FC ; Emoji_Presentation # 6.0 [4] (📹..📼) video camera..videocassette
1F4FF ; Emoji_Presentation # 8.0 [1] (📿) prayer beads
1F500..1F53D ; Emoji_Presentation # 6.0 [62] (🔀..🔽) shuffle tracks button..downwards button
1F54B..1F54E ; Emoji_Presentation # 8.0 [4] (🕋..🕎) kaaba..menorah
1F550..1F567 ; Emoji_Presentation # 6.0 [24] (🕐..🕧) one oclock..twelve-thirty
1F57A ; Emoji_Presentation # 9.0 [1] (🕺) man dancing
1F595..1F596 ; Emoji_Presentation # 7.0 [2] (🖕..🖖) middle finger..vulcan salute
1F5A4 ; Emoji_Presentation # 9.0 [1] (🖤) black heart
1F5FB..1F5FF ; Emoji_Presentation # 6.0 [5] (🗻..🗿) mount fuji..moai
1F600 ; Emoji_Presentation # 6.1 [1] (😀) grinning face
1F601..1F610 ; Emoji_Presentation # 6.0 [16] (😁..😐) beaming face with smiling eyes..neutral face
1F611 ; Emoji_Presentation # 6.1 [1] (😑) expressionless face
1F612..1F614 ; Emoji_Presentation # 6.0 [3] (😒..😔) unamused face..pensive face
1F615 ; Emoji_Presentation # 6.1 [1] (😕) confused face
1F616 ; Emoji_Presentation # 6.0 [1] (😖) confounded face
1F617 ; Emoji_Presentation # 6.1 [1] (😗) kissing face
1F618 ; Emoji_Presentation # 6.0 [1] (😘) face blowing a kiss
1F619 ; Emoji_Presentation # 6.1 [1] (😙) kissing face with smiling eyes
1F61A ; Emoji_Presentation # 6.0 [1] (😚) kissing face with closed eyes
1F61B ; Emoji_Presentation # 6.1 [1] (😛) face with tongue
1F61C..1F61E ; Emoji_Presentation # 6.0 [3] (😜..😞) winking face with tongue..disappointed face
1F61F ; Emoji_Presentation # 6.1 [1] (😟) worried face
1F620..1F625 ; Emoji_Presentation # 6.0 [6] (😠..😥) angry face..sad but relieved face
1F626..1F627 ; Emoji_Presentation # 6.1 [2] (😦..😧) frowning face with open mouth..anguished face
1F628..1F62B ; Emoji_Presentation # 6.0 [4] (😨..😫) fearful face..tired face
1F62C ; Emoji_Presentation # 6.1 [1] (😬) grimacing face
1F62D ; Emoji_Presentation # 6.0 [1] (😭) loudly crying face
1F62E..1F62F ; Emoji_Presentation # 6.1 [2] (😮..😯) face with open mouth..hushed face
1F630..1F633 ; Emoji_Presentation # 6.0 [4] (😰..😳) anxious face with sweat..flushed face
1F634 ; Emoji_Presentation # 6.1 [1] (😴) sleeping face
1F635..1F640 ; Emoji_Presentation # 6.0 [12] (😵..🙀) dizzy face..weary cat
1F641..1F642 ; Emoji_Presentation # 7.0 [2] (🙁..🙂) slightly frowning face..slightly smiling face
1F643..1F644 ; Emoji_Presentation # 8.0 [2] (🙃..🙄) upside-down face..face with rolling eyes
1F645..1F64F ; Emoji_Presentation # 6.0 [11] (🙅..🙏) person gesturing NO..folded hands
1F680..1F6C5 ; Emoji_Presentation # 6.0 [70] (🚀..🛅) rocket..left luggage
1F6CC ; Emoji_Presentation # 7.0 [1] (🛌) person in bed
1F6D0 ; Emoji_Presentation # 8.0 [1] (🛐) place of worship
1F6D1..1F6D2 ; Emoji_Presentation # 9.0 [2] (🛑..🛒) stop sign..shopping cart
1F6D5 ; Emoji_Presentation # 12.0 [1] (🛕) hindu temple
1F6EB..1F6EC ; Emoji_Presentation # 7.0 [2] (🛫..🛬) airplane departure..airplane arrival
1F6F4..1F6F6 ; Emoji_Presentation # 9.0 [3] (🛴..🛶) kick scooter..canoe
1F6F7..1F6F8 ; Emoji_Presentation # 10.0 [2] (🛷..🛸) sled..flying saucer
1F6F9 ; Emoji_Presentation # 11.0 [1] (🛹) skateboard
1F6FA ; Emoji_Presentation # 12.0 [1] (🛺) auto rickshaw
1F7E0..1F7EB ; Emoji_Presentation # 12.0 [12] (🟠..🟫) orange circle..brown square
1F90D..1F90F ; Emoji_Presentation # 12.0 [3] (🤍..🤏) white heart..pinching hand
1F910..1F918 ; Emoji_Presentation # 8.0 [9] (🤐..🤘) zipper-mouth face..sign of the horns
1F919..1F91E ; Emoji_Presentation # 9.0 [6] (🤙..🤞) call me hand..crossed fingers
1F91F ; Emoji_Presentation # 10.0 [1] (🤟) love-you gesture
1F920..1F927 ; Emoji_Presentation # 9.0 [8] (🤠..🤧) cowboy hat face..sneezing face
1F928..1F92F ; Emoji_Presentation # 10.0 [8] (🤨..🤯) face with raised eyebrow..exploding head
1F930 ; Emoji_Presentation # 9.0 [1] (🤰) pregnant woman
1F931..1F932 ; Emoji_Presentation # 10.0 [2] (🤱..🤲) breast-feeding..palms up together
1F933..1F93A ; Emoji_Presentation # 9.0 [8] (🤳..🤺) selfie..person fencing
1F93C..1F93E ; Emoji_Presentation # 9.0 [3] (🤼..🤾) people wrestling..person playing handball
1F93F ; Emoji_Presentation # 12.0 [1] (🤿) diving mask
1F940..1F945 ; Emoji_Presentation # 9.0 [6] (🥀..🥅) wilted flower..goal net
1F947..1F94B ; Emoji_Presentation # 9.0 [5] (🥇..🥋) 1st place medal..martial arts uniform
1F94C ; Emoji_Presentation # 10.0 [1] (🥌) curling stone
1F94D..1F94F ; Emoji_Presentation # 11.0 [3] (🥍..🥏) lacrosse..flying disc
1F950..1F95E ; Emoji_Presentation # 9.0 [15] (🥐..🥞) croissant..pancakes
1F95F..1F96B ; Emoji_Presentation # 10.0 [13] (🥟..🥫) dumpling..canned food
1F96C..1F970 ; Emoji_Presentation # 11.0 [5] (🥬..🥰) leafy green..smiling face with hearts
1F971 ; Emoji_Presentation # 12.0 [1] (🥱) yawning face
1F973..1F976 ; Emoji_Presentation # 11.0 [4] (🥳..🥶) partying face..cold face
1F97A ; Emoji_Presentation # 11.0 [1] (🥺) pleading face
1F97B ; Emoji_Presentation # 12.0 [1] (🥻) sari
1F97C..1F97F ; Emoji_Presentation # 11.0 [4] (🥼..🥿) lab coat..flat shoe
1F980..1F984 ; Emoji_Presentation # 8.0 [5] (🦀..🦄) crab..unicorn
1F985..1F991 ; Emoji_Presentation # 9.0 [13] (🦅..🦑) eagle..squid
1F992..1F997 ; Emoji_Presentation # 10.0 [6] (🦒..🦗) giraffe..cricket
1F998..1F9A2 ; Emoji_Presentation # 11.0 [11] (🦘..🦢) kangaroo..swan
1F9A5..1F9AA ; Emoji_Presentation # 12.0 [6] (🦥..🦪) sloth..oyster
1F9AE..1F9AF ; Emoji_Presentation # 12.0 [2] (🦮..🦯) guide dog..probing cane
1F9B0..1F9B9 ; Emoji_Presentation # 11.0 [10] (🦰..🦹) red hair..supervillain
1F9BA..1F9BF ; Emoji_Presentation # 12.0 [6] (🦺..🦿) safety vest..mechanical leg
1F9C0 ; Emoji_Presentation # 8.0 [1] (🧀) cheese wedge
1F9C1..1F9C2 ; Emoji_Presentation # 11.0 [2] (🧁..🧂) cupcake..salt
1F9C3..1F9CA ; Emoji_Presentation # 12.0 [8] (🧃..🧊) beverage box..ice cube
1F9CD..1F9CF ; Emoji_Presentation # 12.0 [3] (🧍..🧏) person standing..deaf person
1F9D0..1F9E6 ; Emoji_Presentation # 10.0 [23] (🧐..🧦) face with monocle..socks
1F9E7..1F9FF ; Emoji_Presentation # 11.0 [25] (🧧..🧿) red envelope..nazar amulet
1FA70..1FA73 ; Emoji_Presentation # 12.0 [4] (🩰..🩳) ballet shoes..shorts
1FA78..1FA7A ; Emoji_Presentation # 12.0 [3] (🩸..🩺) drop of blood..stethoscope
1FA80..1FA82 ; Emoji_Presentation # 12.0 [3] (🪀..🪂) yo-yo..parachute
1FA90..1FA95 ; Emoji_Presentation # 12.0 [6] (🪐..🪕) ringed planet..banjo
# Total elements: 1093
# ================================================
# All omitted code points have Emoji_Modifier=No
# @missing: 0000..10FFFF ; Emoji_Modifier ; No
1F3FB..1F3FF ; Emoji_Modifier # 8.0 [5] (🏻..🏿) light skin tone..dark skin tone
# Total elements: 5
# ================================================
# All omitted code points have Emoji_Modifier_Base=No
# @missing: 0000..10FFFF ; Emoji_Modifier_Base ; No
261D ; Emoji_Modifier_Base # 1.1 [1] (☝️) index pointing up
26F9 ; Emoji_Modifier_Base # 5.2 [1] (⛹️) person bouncing ball
270A..270B ; Emoji_Modifier_Base # 6.0 [2] (✊..✋) raised fist..raised hand
270C..270D ; Emoji_Modifier_Base # 1.1 [2] (✌️..✍️) victory hand..writing hand
1F385 ; Emoji_Modifier_Base # 6.0 [1] (🎅) Santa Claus
1F3C2..1F3C4 ; Emoji_Modifier_Base # 6.0 [3] (🏂..🏄) snowboarder..person surfing
1F3C7 ; Emoji_Modifier_Base # 6.0 [1] (🏇) horse racing
1F3CA ; Emoji_Modifier_Base # 6.0 [1] (🏊) person swimming
1F3CB..1F3CC ; Emoji_Modifier_Base # 7.0 [2] (🏋️..🏌️) person lifting weights..person golfing
1F442..1F443 ; Emoji_Modifier_Base # 6.0 [2] (👂..👃) ear..nose
1F446..1F450 ; Emoji_Modifier_Base # 6.0 [11] (👆..👐) backhand index pointing up..open hands
1F466..1F478 ; Emoji_Modifier_Base # 6.0 [19] (👦..👸) boy..princess
1F47C ; Emoji_Modifier_Base # 6.0 [1] (👼) baby angel
1F481..1F483 ; Emoji_Modifier_Base # 6.0 [3] (💁..💃) person tipping hand..woman dancing
1F485..1F487 ; Emoji_Modifier_Base # 6.0 [3] (💅..💇) nail polish..person getting haircut
1F48F ; Emoji_Modifier_Base # 6.0 [1] (💏) kiss
1F491 ; Emoji_Modifier_Base # 6.0 [1] (💑) couple with heart
1F4AA ; Emoji_Modifier_Base # 6.0 [1] (💪) flexed biceps
1F574..1F575 ; Emoji_Modifier_Base # 7.0 [2] (🕴️..🕵️) man in suit levitating..detective
1F57A ; Emoji_Modifier_Base # 9.0 [1] (🕺) man dancing
1F590 ; Emoji_Modifier_Base # 7.0 [1] (🖐️) hand with fingers splayed
1F595..1F596 ; Emoji_Modifier_Base # 7.0 [2] (🖕..🖖) middle finger..vulcan salute
1F645..1F647 ; Emoji_Modifier_Base # 6.0 [3] (🙅..🙇) person gesturing NO..person bowing
1F64B..1F64F ; Emoji_Modifier_Base # 6.0 [5] (🙋..🙏) person raising hand..folded hands
1F6A3 ; Emoji_Modifier_Base # 6.0 [1] (🚣) person rowing boat
1F6B4..1F6B6 ; Emoji_Modifier_Base # 6.0 [3] (🚴..🚶) person biking..person walking
1F6C0 ; Emoji_Modifier_Base # 6.0 [1] (🛀) person taking bath
1F6CC ; Emoji_Modifier_Base # 7.0 [1] (🛌) person in bed
1F90F ; Emoji_Modifier_Base # 12.0 [1] (🤏) pinching hand
1F918 ; Emoji_Modifier_Base # 8.0 [1] (🤘) sign of the horns
1F919..1F91E ; Emoji_Modifier_Base # 9.0 [6] (🤙..🤞) call me hand..crossed fingers
1F91F ; Emoji_Modifier_Base # 10.0 [1] (🤟) love-you gesture
1F926 ; Emoji_Modifier_Base # 9.0 [1] (🤦) person facepalming
1F930 ; Emoji_Modifier_Base # 9.0 [1] (🤰) pregnant woman
1F931..1F932 ; Emoji_Modifier_Base # 10.0 [2] (🤱..🤲) breast-feeding..palms up together
1F933..1F939 ; Emoji_Modifier_Base # 9.0 [7] (🤳..🤹) selfie..person juggling
1F93C..1F93E ; Emoji_Modifier_Base # 9.0 [3] (🤼..🤾) people wrestling..person playing handball
1F9B5..1F9B6 ; Emoji_Modifier_Base # 11.0 [2] (🦵..🦶) leg..foot
1F9B8..1F9B9 ; Emoji_Modifier_Base # 11.0 [2] (🦸..🦹) superhero..supervillain
1F9BB ; Emoji_Modifier_Base # 12.0 [1] (🦻) ear with hearing aid
1F9CD..1F9CF ; Emoji_Modifier_Base # 12.0 [3] (🧍..🧏) person standing..deaf person
1F9D1..1F9DD ; Emoji_Modifier_Base # 10.0 [13] (🧑..🧝) person..elf
# Total elements: 120
# ================================================
# All omitted code points have Emoji_Component=No
# @missing: 0000..10FFFF ; Emoji_Component ; No
0023 ; Emoji_Component # 1.1 [1] (#) number sign
002A ; Emoji_Component # 1.1 [1] (*) asterisk
0030..0039 ; Emoji_Component # 1.1 [10] (0..9) digit zero..digit nine
200D ; Emoji_Component # 1.1 [1] () zero width joiner
20E3 ; Emoji_Component # 3.0 [1] (⃣) combining enclosing keycap
FE0F ; Emoji_Component # 3.2 [1] () VARIATION SELECTOR-16
1F1E6..1F1FF ; Emoji_Component # 6.0 [26] (🇦..🇿) regional indicator symbol letter a..regional indicator symbol letter z
1F3FB..1F3FF ; Emoji_Component # 8.0 [5] (🏻..🏿) light skin tone..dark skin tone
1F9B0..1F9B3 ; Emoji_Component # 11.0 [4] (🦰..🦳) red hair..white hair
E0020..E007F ; Emoji_Component # 3.1 [96] (󠀠..󠁿) tag space..cancel tag
# Total elements: 146
# ================================================
# All omitted code points have Extended_Pictographic=No
# @missing: 0000..10FFFF ; Extended_Pictographic ; No
00A9 ; Extended_Pictographic# 1.1 [1] (©️) copyright
00AE ; Extended_Pictographic# 1.1 [1] (®️) registered
203C ; Extended_Pictographic# 1.1 [1] (‼️) double exclamation mark
2049 ; Extended_Pictographic# 3.0 [1] (⁉️) exclamation question mark
2122 ; Extended_Pictographic# 1.1 [1] (™️) trade mark
2139 ; Extended_Pictographic# 3.0 [1] () information
2194..2199 ; Extended_Pictographic# 1.1 [6] (↔️..↙️) left-right arrow..down-left arrow
21A9..21AA ; Extended_Pictographic# 1.1 [2] (↩️..↪️) right arrow curving left..left arrow curving right
231A..231B ; Extended_Pictographic# 1.1 [2] (⌚..⌛) watch..hourglass done
2328 ; Extended_Pictographic# 1.1 [1] (⌨️) keyboard
2388 ; Extended_Pictographic# 3.0 [1] (⎈) HELM SYMBOL
23CF ; Extended_Pictographic# 4.0 [1] (⏏️) eject button
23E9..23F3 ; Extended_Pictographic# 6.0 [11] (⏩..⏳) fast-forward button..hourglass not done
23F8..23FA ; Extended_Pictographic# 7.0 [3] (⏸️..⏺️) pause button..record button
24C2 ; Extended_Pictographic# 1.1 [1] (Ⓜ️) circled M
25AA..25AB ; Extended_Pictographic# 1.1 [2] (▪️..▫️) black small square..white small square
25B6 ; Extended_Pictographic# 1.1 [1] (▶️) play button
25C0 ; Extended_Pictographic# 1.1 [1] (◀️) reverse button
25FB..25FE ; Extended_Pictographic# 3.2 [4] (◻️..◾) white medium square..black medium-small square
2600..2605 ; Extended_Pictographic# 1.1 [6] (☀️..★) sun..BLACK STAR
2607..2612 ; Extended_Pictographic# 1.1 [12] (☇..☒) LIGHTNING..BALLOT BOX WITH X
2614..2615 ; Extended_Pictographic# 4.0 [2] (☔..☕) umbrella with rain drops..hot beverage
2616..2617 ; Extended_Pictographic# 3.2 [2] (☖..☗) WHITE SHOGI PIECE..BLACK SHOGI PIECE
2618 ; Extended_Pictographic# 4.1 [1] (☘️) shamrock
2619 ; Extended_Pictographic# 3.0 [1] (☙) REVERSED ROTATED FLORAL HEART BULLET
261A..266F ; Extended_Pictographic# 1.1 [86] (☚..♯) BLACK LEFT POINTING INDEX..MUSIC SHARP SIGN
2670..2671 ; Extended_Pictographic# 3.0 [2] (♰..♱) WEST SYRIAC CROSS..EAST SYRIAC CROSS
2672..267D ; Extended_Pictographic# 3.2 [12] (♲..♽) UNIVERSAL RECYCLING SYMBOL..PARTIALLY-RECYCLED PAPER SYMBOL
267E..267F ; Extended_Pictographic# 4.1 [2] (♾️..♿) infinity..wheelchair symbol
2680..2685 ; Extended_Pictographic# 3.2 [6] (⚀..⚅) DIE FACE-1..DIE FACE-6
2690..2691 ; Extended_Pictographic# 4.0 [2] (⚐..⚑) WHITE FLAG..BLACK FLAG
2692..269C ; Extended_Pictographic# 4.1 [11] (⚒️..⚜️) hammer and pick..fleur-de-lis
269D ; Extended_Pictographic# 5.1 [1] (⚝) OUTLINED WHITE STAR
269E..269F ; Extended_Pictographic# 5.2 [2] (⚞..⚟) THREE LINES CONVERGING RIGHT..THREE LINES CONVERGING LEFT
26A0..26A1 ; Extended_Pictographic# 4.0 [2] (⚠️..⚡) warning..high voltage
26A2..26B1 ; Extended_Pictographic# 4.1 [16] (⚢..⚱️) DOUBLED FEMALE SIGN..funeral urn
26B2 ; Extended_Pictographic# 5.0 [1] (⚲) NEUTER
26B3..26BC ; Extended_Pictographic# 5.1 [10] (⚳..⚼) CERES..SESQUIQUADRATE
26BD..26BF ; Extended_Pictographic# 5.2 [3] (⚽..⚿) soccer ball..SQUARED KEY
26C0..26C3 ; Extended_Pictographic# 5.1 [4] (⛀..⛃) WHITE DRAUGHTS MAN..BLACK DRAUGHTS KING
26C4..26CD ; Extended_Pictographic# 5.2 [10] (⛄..⛍) snowman without snow..DISABLED CAR
26CE ; Extended_Pictographic# 6.0 [1] (⛎) Ophiuchus
26CF..26E1 ; Extended_Pictographic# 5.2 [19] (⛏️..⛡) pick..RESTRICTED LEFT ENTRY-2
26E2 ; Extended_Pictographic# 6.0 [1] (⛢) ASTRONOMICAL SYMBOL FOR URANUS
26E3 ; Extended_Pictographic# 5.2 [1] (⛣) HEAVY CIRCLE WITH STROKE AND TWO DOTS ABOVE
26E4..26E7 ; Extended_Pictographic# 6.0 [4] (⛤..⛧) PENTAGRAM..INVERTED PENTAGRAM
26E8..26FF ; Extended_Pictographic# 5.2 [24] (⛨..⛿) BLACK CROSS ON SHIELD..WHITE FLAG WITH HORIZONTAL MIDDLE BLACK STRIPE
2700 ; Extended_Pictographic# 7.0 [1] (✀) BLACK SAFETY SCISSORS
2701..2704 ; Extended_Pictographic# 1.1 [4] (✁..✄) UPPER BLADE SCISSORS..WHITE SCISSORS
2705 ; Extended_Pictographic# 6.0 [1] (✅) check mark button
2708..2709 ; Extended_Pictographic# 1.1 [2] (✈️..✉️) airplane..envelope
270A..270B ; Extended_Pictographic# 6.0 [2] (✊..✋) raised fist..raised hand
270C..2712 ; Extended_Pictographic# 1.1 [7] (✌️..✒️) victory hand..black nib
2714 ; Extended_Pictographic# 1.1 [1] (✔️) check mark
2716 ; Extended_Pictographic# 1.1 [1] (✖️) multiplication sign
271D ; Extended_Pictographic# 1.1 [1] (✝️) latin cross
2721 ; Extended_Pictographic# 1.1 [1] (✡️) star of David
2728 ; Extended_Pictographic# 6.0 [1] (✨) sparkles
2733..2734 ; Extended_Pictographic# 1.1 [2] (✳️..✴️) eight-spoked asterisk..eight-pointed star
2744 ; Extended_Pictographic# 1.1 [1] (❄️) snowflake
2747 ; Extended_Pictographic# 1.1 [1] (❇️) sparkle
274C ; Extended_Pictographic# 6.0 [1] (❌) cross mark
274E ; Extended_Pictographic# 6.0 [1] (❎) cross mark button
2753..2755 ; Extended_Pictographic# 6.0 [3] (❓..❕) question mark..white exclamation mark
2757 ; Extended_Pictographic# 5.2 [1] (❗) exclamation mark
2763..2767 ; Extended_Pictographic# 1.1 [5] (❣️..❧) heart exclamation..ROTATED FLORAL HEART BULLET
2795..2797 ; Extended_Pictographic# 6.0 [3] (..➗) plus sign..division sign
27A1 ; Extended_Pictographic# 1.1 [1] (➡️) right arrow
27B0 ; Extended_Pictographic# 6.0 [1] (➰) curly loop
27BF ; Extended_Pictographic# 6.0 [1] (➿) double curly loop
2934..2935 ; Extended_Pictographic# 3.2 [2] (⤴️..⤵️) right arrow curving up..right arrow curving down
2B05..2B07 ; Extended_Pictographic# 4.0 [3] (⬅️..⬇️) left arrow..down arrow
2B1B..2B1C ; Extended_Pictographic# 5.1 [2] (⬛..⬜) black large square..white large square
2B50 ; Extended_Pictographic# 5.1 [1] (⭐) star
2B55 ; Extended_Pictographic# 5.2 [1] (⭕) hollow red circle
3030 ; Extended_Pictographic# 1.1 [1] (〰️) wavy dash
303D ; Extended_Pictographic# 3.2 [1] (〽️) part alternation mark
3297 ; Extended_Pictographic# 1.1 [1] (㊗️) Japanese “congratulations” button
3299 ; Extended_Pictographic# 1.1 [1] (㊙️) Japanese “secret” button
1F000..1F02B ; Extended_Pictographic# 5.1 [44] (🀀..🀫) MAHJONG TILE EAST WIND..MAHJONG TILE BACK
1F02C..1F02F ; Extended_Pictographic# NA [4] (🀬..🀯) <reserved-1F02C>..<reserved-1F02F>
1F030..1F093 ; Extended_Pictographic# 5.1[100] (🀰..🂓) DOMINO TILE HORIZONTAL BACK..DOMINO TILE VERTICAL-06-06
1F094..1F09F ; Extended_Pictographic# NA [12] (🂔..🂟) <reserved-1F094>..<reserved-1F09F>
1F0A0..1F0AE ; Extended_Pictographic# 6.0 [15] (🂠..🂮) PLAYING CARD BACK..PLAYING CARD KING OF SPADES
1F0AF..1F0B0 ; Extended_Pictographic# NA [2] (🂯..🂰) <reserved-1F0AF>..<reserved-1F0B0>
1F0B1..1F0BE ; Extended_Pictographic# 6.0 [14] (🂱..🂾) PLAYING CARD ACE OF HEARTS..PLAYING CARD KING OF HEARTS
1F0BF ; Extended_Pictographic# 7.0 [1] (🂿) PLAYING CARD RED JOKER
1F0C0 ; Extended_Pictographic# NA [1] (🃀) <reserved-1F0C0>
1F0C1..1F0CF ; Extended_Pictographic# 6.0 [15] (🃁..🃏) PLAYING CARD ACE OF DIAMONDS..joker
1F0D0 ; Extended_Pictographic# NA [1] (🃐) <reserved-1F0D0>
1F0D1..1F0DF ; Extended_Pictographic# 6.0 [15] (🃑..🃟) PLAYING CARD ACE OF CLUBS..PLAYING CARD WHITE JOKER
1F0E0..1F0F5 ; Extended_Pictographic# 7.0 [22] (🃠..🃵) PLAYING CARD FOOL..PLAYING CARD TRUMP-21
1F0F6..1F0FF ; Extended_Pictographic# NA [10] (🃶..🃿) <reserved-1F0F6>..<reserved-1F0FF>
1F10D..1F10F ; Extended_Pictographic# NA [3] (🄍..🄏) <reserved-1F10D>..<reserved-1F10F>
1F12F ; Extended_Pictographic# 11.0 [1] (🄯) COPYLEFT SYMBOL
1F16C ; Extended_Pictographic# 12.0 [1] (🅬) RAISED MR SIGN
1F16D..1F16F ; Extended_Pictographic# NA [3] (🅭..🅯) <reserved-1F16D>..<reserved-1F16F>
1F170..1F171 ; Extended_Pictographic# 6.0 [2] (🅰️..🅱️) A button (blood type)..B button (blood type)
1F17E ; Extended_Pictographic# 6.0 [1] (🅾️) O button (blood type)
1F17F ; Extended_Pictographic# 5.2 [1] (🅿️) P button
1F18E ; Extended_Pictographic# 6.0 [1] (🆎) AB button (blood type)
1F191..1F19A ; Extended_Pictographic# 6.0 [10] (🆑..🆚) CL button..VS button
1F1AD..1F1E5 ; Extended_Pictographic# NA [57] (🆭..🇥) <reserved-1F1AD>..<reserved-1F1E5>
1F201..1F202 ; Extended_Pictographic# 6.0 [2] (🈁..🈂️) Japanese “here” button..Japanese “service charge” button
1F203..1F20F ; Extended_Pictographic# NA [13] (🈃..🈏) <reserved-1F203>..<reserved-1F20F>
1F21A ; Extended_Pictographic# 5.2 [1] (🈚) Japanese “free of charge” button
1F22F ; Extended_Pictographic# 5.2 [1] (🈯) Japanese “reserved” button
1F232..1F23A ; Extended_Pictographic# 6.0 [9] (🈲..🈺) Japanese “prohibited” button..Japanese “open for business” button
1F23C..1F23F ; Extended_Pictographic# NA [4] (🈼..🈿) <reserved-1F23C>..<reserved-1F23F>
1F249..1F24F ; Extended_Pictographic# NA [7] (🉉..🉏) <reserved-1F249>..<reserved-1F24F>
1F250..1F251 ; Extended_Pictographic# 6.0 [2] (🉐..🉑) Japanese “bargain” button..Japanese “acceptable” button
1F252..1F25F ; Extended_Pictographic# NA [14] (🉒..🉟) <reserved-1F252>..<reserved-1F25F>
1F260..1F265 ; Extended_Pictographic# 10.0 [6] (🉠..🉥) ROUNDED SYMBOL FOR FU..ROUNDED SYMBOL FOR CAI
1F266..1F2FF ; Extended_Pictographic# NA[154] (🉦..🋿) <reserved-1F266>..<reserved-1F2FF>
1F300..1F320 ; Extended_Pictographic# 6.0 [33] (🌀..🌠) cyclone..shooting star
1F321..1F32C ; Extended_Pictographic# 7.0 [12] (🌡️..🌬️) thermometer..wind face
1F32D..1F32F ; Extended_Pictographic# 8.0 [3] (🌭..🌯) hot dog..burrito
1F330..1F335 ; Extended_Pictographic# 6.0 [6] (🌰..🌵) chestnut..cactus
1F336 ; Extended_Pictographic# 7.0 [1] (🌶️) hot pepper
1F337..1F37C ; Extended_Pictographic# 6.0 [70] (🌷..🍼) tulip..baby bottle
1F37D ; Extended_Pictographic# 7.0 [1] (🍽️) fork and knife with plate
1F37E..1F37F ; Extended_Pictographic# 8.0 [2] (🍾..🍿) bottle with popping cork..popcorn
1F380..1F393 ; Extended_Pictographic# 6.0 [20] (🎀..🎓) ribbon..graduation cap
1F394..1F39F ; Extended_Pictographic# 7.0 [12] (🎔..🎟️) HEART WITH TIP ON THE LEFT..admission tickets
1F3A0..1F3C4 ; Extended_Pictographic# 6.0 [37] (🎠..🏄) carousel horse..person surfing
1F3C5 ; Extended_Pictographic# 7.0 [1] (🏅) sports medal
1F3C6..1F3CA ; Extended_Pictographic# 6.0 [5] (🏆..🏊) trophy..person swimming
1F3CB..1F3CE ; Extended_Pictographic# 7.0 [4] (🏋️..🏎️) person lifting weights..racing car
1F3CF..1F3D3 ; Extended_Pictographic# 8.0 [5] (🏏..🏓) cricket game..ping pong
1F3D4..1F3DF ; Extended_Pictographic# 7.0 [12] (🏔️..🏟️) snow-capped mountain..stadium
1F3E0..1F3F0 ; Extended_Pictographic# 6.0 [17] (🏠..🏰) house..castle
1F3F1..1F3F7 ; Extended_Pictographic# 7.0 [7] (🏱..🏷️) WHITE PENNANT..label
1F3F8..1F3FA ; Extended_Pictographic# 8.0 [3] (🏸..🏺) badminton..amphora
1F400..1F43E ; Extended_Pictographic# 6.0 [63] (🐀..🐾) rat..paw prints
1F43F ; Extended_Pictographic# 7.0 [1] (🐿️) chipmunk
1F440 ; Extended_Pictographic# 6.0 [1] (👀) eyes
1F441 ; Extended_Pictographic# 7.0 [1] (👁️) eye
1F442..1F4F7 ; Extended_Pictographic# 6.0[182] (👂..📷) ear..camera
1F4F8 ; Extended_Pictographic# 7.0 [1] (📸) camera with flash
1F4F9..1F4FC ; Extended_Pictographic# 6.0 [4] (📹..📼) video camera..videocassette
1F4FD..1F4FE ; Extended_Pictographic# 7.0 [2] (📽️..📾) film projector..PORTABLE STEREO
1F4FF ; Extended_Pictographic# 8.0 [1] (📿) prayer beads
1F500..1F53D ; Extended_Pictographic# 6.0 [62] (🔀..🔽) shuffle tracks button..downwards button
1F546..1F54A ; Extended_Pictographic# 7.0 [5] (🕆..🕊️) WHITE LATIN CROSS..dove
1F54B..1F54F ; Extended_Pictographic# 8.0 [5] (🕋..🕏) kaaba..BOWL OF HYGIEIA
1F550..1F567 ; Extended_Pictographic# 6.0 [24] (🕐..🕧) one oclock..twelve-thirty
1F568..1F579 ; Extended_Pictographic# 7.0 [18] (🕨..🕹️) RIGHT SPEAKER..joystick
1F57A ; Extended_Pictographic# 9.0 [1] (🕺) man dancing
1F57B..1F5A3 ; Extended_Pictographic# 7.0 [41] (🕻..🖣) LEFT HAND TELEPHONE RECEIVER..BLACK DOWN POINTING BACKHAND INDEX
1F5A4 ; Extended_Pictographic# 9.0 [1] (🖤) black heart
1F5A5..1F5FA ; Extended_Pictographic# 7.0 [86] (🖥️..🗺️) desktop computer..world map
1F5FB..1F5FF ; Extended_Pictographic# 6.0 [5] (🗻..🗿) mount fuji..moai
1F600 ; Extended_Pictographic# 6.1 [1] (😀) grinning face
1F601..1F610 ; Extended_Pictographic# 6.0 [16] (😁..😐) beaming face with smiling eyes..neutral face
1F611 ; Extended_Pictographic# 6.1 [1] (😑) expressionless face
1F612..1F614 ; Extended_Pictographic# 6.0 [3] (😒..😔) unamused face..pensive face
1F615 ; Extended_Pictographic# 6.1 [1] (😕) confused face
1F616 ; Extended_Pictographic# 6.0 [1] (😖) confounded face
1F617 ; Extended_Pictographic# 6.1 [1] (😗) kissing face
1F618 ; Extended_Pictographic# 6.0 [1] (😘) face blowing a kiss
1F619 ; Extended_Pictographic# 6.1 [1] (😙) kissing face with smiling eyes
1F61A ; Extended_Pictographic# 6.0 [1] (😚) kissing face with closed eyes
1F61B ; Extended_Pictographic# 6.1 [1] (😛) face with tongue
1F61C..1F61E ; Extended_Pictographic# 6.0 [3] (😜..😞) winking face with tongue..disappointed face
1F61F ; Extended_Pictographic# 6.1 [1] (😟) worried face
1F620..1F625 ; Extended_Pictographic# 6.0 [6] (😠..😥) angry face..sad but relieved face
1F626..1F627 ; Extended_Pictographic# 6.1 [2] (😦..😧) frowning face with open mouth..anguished face
1F628..1F62B ; Extended_Pictographic# 6.0 [4] (😨..😫) fearful face..tired face
1F62C ; Extended_Pictographic# 6.1 [1] (😬) grimacing face
1F62D ; Extended_Pictographic# 6.0 [1] (😭) loudly crying face
1F62E..1F62F ; Extended_Pictographic# 6.1 [2] (😮..😯) face with open mouth..hushed face
1F630..1F633 ; Extended_Pictographic# 6.0 [4] (😰..😳) anxious face with sweat..flushed face
1F634 ; Extended_Pictographic# 6.1 [1] (😴) sleeping face
1F635..1F640 ; Extended_Pictographic# 6.0 [12] (😵..🙀) dizzy face..weary cat
1F641..1F642 ; Extended_Pictographic# 7.0 [2] (🙁..🙂) slightly frowning face..slightly smiling face
1F643..1F644 ; Extended_Pictographic# 8.0 [2] (🙃..🙄) upside-down face..face with rolling eyes
1F645..1F64F ; Extended_Pictographic# 6.0 [11] (🙅..🙏) person gesturing NO..folded hands
1F680..1F6C5 ; Extended_Pictographic# 6.0 [70] (🚀..🛅) rocket..left luggage
1F6C6..1F6CF ; Extended_Pictographic# 7.0 [10] (🛆..🛏️) TRIANGLE WITH ROUNDED CORNERS..bed
1F6D0 ; Extended_Pictographic# 8.0 [1] (🛐) place of worship
1F6D1..1F6D2 ; Extended_Pictographic# 9.0 [2] (🛑..🛒) stop sign..shopping cart
1F6D3..1F6D4 ; Extended_Pictographic# 10.0 [2] (🛓..🛔) STUPA..PAGODA
1F6D5 ; Extended_Pictographic# 12.0 [1] (🛕) hindu temple
1F6D6..1F6DF ; Extended_Pictographic# NA [10] (🛖..🛟) <reserved-1F6D6>..<reserved-1F6DF>
1F6E0..1F6EC ; Extended_Pictographic# 7.0 [13] (🛠️..🛬) hammer and wrench..airplane arrival
1F6ED..1F6EF ; Extended_Pictographic# NA [3] (🛭..🛯) <reserved-1F6ED>..<reserved-1F6EF>
1F6F0..1F6F3 ; Extended_Pictographic# 7.0 [4] (🛰️..🛳️) satellite..passenger ship
1F6F4..1F6F6 ; Extended_Pictographic# 9.0 [3] (🛴..🛶) kick scooter..canoe
1F6F7..1F6F8 ; Extended_Pictographic# 10.0 [2] (🛷..🛸) sled..flying saucer
1F6F9 ; Extended_Pictographic# 11.0 [1] (🛹) skateboard
1F6FA ; Extended_Pictographic# 12.0 [1] (🛺) auto rickshaw
1F6FB..1F6FF ; Extended_Pictographic# NA [5] (🛻..🛿) <reserved-1F6FB>..<reserved-1F6FF>
1F774..1F77F ; Extended_Pictographic# NA [12] (🝴..🝿) <reserved-1F774>..<reserved-1F77F>
1F7D5..1F7D8 ; Extended_Pictographic# 11.0 [4] (🟕..🟘) CIRCLED TRIANGLE..NEGATIVE CIRCLED SQUARE
1F7D9..1F7DF ; Extended_Pictographic# NA [7] (🟙..🟟) <reserved-1F7D9>..<reserved-1F7DF>
1F7E0..1F7EB ; Extended_Pictographic# 12.0 [12] (🟠..🟫) orange circle..brown square
1F7EC..1F7FF ; Extended_Pictographic# NA [20] (🟬..🟿) <reserved-1F7EC>..<reserved-1F7FF>
1F80C..1F80F ; Extended_Pictographic# NA [4] (🠌..🠏) <reserved-1F80C>..<reserved-1F80F>
1F848..1F84F ; Extended_Pictographic# NA [8] (🡈..🡏) <reserved-1F848>..<reserved-1F84F>
1F85A..1F85F ; Extended_Pictographic# NA [6] (🡚..🡟) <reserved-1F85A>..<reserved-1F85F>
1F888..1F88F ; Extended_Pictographic# NA [8] (🢈..🢏) <reserved-1F888>..<reserved-1F88F>
1F8AE..1F8FF ; Extended_Pictographic# NA [82] (🢮..🣿) <reserved-1F8AE>..<reserved-1F8FF>
1F90C ; Extended_Pictographic# NA [1] (🤌) <reserved-1F90C>
1F90D..1F90F ; Extended_Pictographic# 12.0 [3] (🤍..🤏) white heart..pinching hand
1F910..1F918 ; Extended_Pictographic# 8.0 [9] (🤐..🤘) zipper-mouth face..sign of the horns
1F919..1F91E ; Extended_Pictographic# 9.0 [6] (🤙..🤞) call me hand..crossed fingers
1F91F ; Extended_Pictographic# 10.0 [1] (🤟) love-you gesture
1F920..1F927 ; Extended_Pictographic# 9.0 [8] (🤠..🤧) cowboy hat face..sneezing face
1F928..1F92F ; Extended_Pictographic# 10.0 [8] (🤨..🤯) face with raised eyebrow..exploding head
1F930 ; Extended_Pictographic# 9.0 [1] (🤰) pregnant woman
1F931..1F932 ; Extended_Pictographic# 10.0 [2] (🤱..🤲) breast-feeding..palms up together
1F933..1F93A ; Extended_Pictographic# 9.0 [8] (🤳..🤺) selfie..person fencing
1F93C..1F93E ; Extended_Pictographic# 9.0 [3] (🤼..🤾) people wrestling..person playing handball
1F93F ; Extended_Pictographic# 12.0 [1] (🤿) diving mask
1F940..1F945 ; Extended_Pictographic# 9.0 [6] (🥀..🥅) wilted flower..goal net
1F947..1F94B ; Extended_Pictographic# 9.0 [5] (🥇..🥋) 1st place medal..martial arts uniform
1F94C ; Extended_Pictographic# 10.0 [1] (🥌) curling stone
1F94D..1F94F ; Extended_Pictographic# 11.0 [3] (🥍..🥏) lacrosse..flying disc
1F950..1F95E ; Extended_Pictographic# 9.0 [15] (🥐..🥞) croissant..pancakes
1F95F..1F96B ; Extended_Pictographic# 10.0 [13] (🥟..🥫) dumpling..canned food
1F96C..1F970 ; Extended_Pictographic# 11.0 [5] (🥬..🥰) leafy green..smiling face with hearts
1F971 ; Extended_Pictographic# 12.0 [1] (🥱) yawning face
1F972 ; Extended_Pictographic# NA [1] (🥲) <reserved-1F972>
1F973..1F976 ; Extended_Pictographic# 11.0 [4] (🥳..🥶) partying face..cold face
1F977..1F979 ; Extended_Pictographic# NA [3] (🥷..🥹) <reserved-1F977>..<reserved-1F979>
1F97A ; Extended_Pictographic# 11.0 [1] (🥺) pleading face
1F97B ; Extended_Pictographic# 12.0 [1] (🥻) sari
1F97C..1F97F ; Extended_Pictographic# 11.0 [4] (🥼..🥿) lab coat..flat shoe
1F980..1F984 ; Extended_Pictographic# 8.0 [5] (🦀..🦄) crab..unicorn
1F985..1F991 ; Extended_Pictographic# 9.0 [13] (🦅..🦑) eagle..squid
1F992..1F997 ; Extended_Pictographic# 10.0 [6] (🦒..🦗) giraffe..cricket
1F998..1F9A2 ; Extended_Pictographic# 11.0 [11] (🦘..🦢) kangaroo..swan
1F9A3..1F9A4 ; Extended_Pictographic# NA [2] (🦣..🦤) <reserved-1F9A3>..<reserved-1F9A4>
1F9A5..1F9AA ; Extended_Pictographic# 12.0 [6] (🦥..🦪) sloth..oyster
1F9AB..1F9AD ; Extended_Pictographic# NA [3] (🦫..🦭) <reserved-1F9AB>..<reserved-1F9AD>
1F9AE..1F9AF ; Extended_Pictographic# 12.0 [2] (🦮..🦯) guide dog..probing cane
1F9B0..1F9B9 ; Extended_Pictographic# 11.0 [10] (🦰..🦹) red hair..supervillain
1F9BA..1F9BF ; Extended_Pictographic# 12.0 [6] (🦺..🦿) safety vest..mechanical leg
1F9C0 ; Extended_Pictographic# 8.0 [1] (🧀) cheese wedge
1F9C1..1F9C2 ; Extended_Pictographic# 11.0 [2] (🧁..🧂) cupcake..salt
1F9C3..1F9CA ; Extended_Pictographic# 12.0 [8] (🧃..🧊) beverage box..ice cube
1F9CB..1F9CC ; Extended_Pictographic# NA [2] (🧋..🧌) <reserved-1F9CB>..<reserved-1F9CC>
1F9CD..1F9CF ; Extended_Pictographic# 12.0 [3] (🧍..🧏) person standing..deaf person
1F9D0..1F9E6 ; Extended_Pictographic# 10.0 [23] (🧐..🧦) face with monocle..socks
1F9E7..1F9FF ; Extended_Pictographic# 11.0 [25] (🧧..🧿) red envelope..nazar amulet
1FA00..1FA53 ; Extended_Pictographic# 12.0 [84] (🨀..🩓) NEUTRAL CHESS KING..BLACK CHESS KNIGHT-BISHOP
1FA54..1FA5F ; Extended_Pictographic# NA [12] (🩔..🩟) <reserved-1FA54>..<reserved-1FA5F>
1FA60..1FA6D ; Extended_Pictographic# 11.0 [14] (🩠..🩭) XIANGQI RED GENERAL..XIANGQI BLACK SOLDIER
1FA6E..1FA6F ; Extended_Pictographic# NA [2] (🩮..🩯) <reserved-1FA6E>..<reserved-1FA6F>
1FA70..1FA73 ; Extended_Pictographic# 12.0 [4] (🩰..🩳) ballet shoes..shorts
1FA74..1FA77 ; Extended_Pictographic# NA [4] (🩴..🩷) <reserved-1FA74>..<reserved-1FA77>
1FA78..1FA7A ; Extended_Pictographic# 12.0 [3] (🩸..🩺) drop of blood..stethoscope
1FA7B..1FA7F ; Extended_Pictographic# NA [5] (🩻..🩿) <reserved-1FA7B>..<reserved-1FA7F>
1FA80..1FA82 ; Extended_Pictographic# 12.0 [3] (🪀..🪂) yo-yo..parachute
1FA83..1FA8F ; Extended_Pictographic# NA [13] (🪃..🪏) <reserved-1FA83>..<reserved-1FA8F>
1FA90..1FA95 ; Extended_Pictographic# 12.0 [6] (🪐..🪕) ringed planet..banjo
1FA96..1FFFD ; Extended_Pictographic# NA[1384] (🪖..🿽) <reserved-1FA96>..<reserved-1FFFD>
# Total elements: 3793
#EOF

View File

@ -34,6 +34,7 @@ include GensrcBuffer.gmk
include GensrcExceptions.gmk
include GensrcVarHandles.gmk
include GensrcModuleLoaderMap.gmk
include GensrcEmojiData.gmk
################################################################################

View File

@ -0,0 +1,44 @@
#
# Copyright (c) 2019, Oracle and/or its affiliates. All rights reserved.
# DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
#
# This code is free software; you can redistribute it and/or modify it
# under the terms of the GNU General Public License version 2 only, as
# published by the Free Software Foundation. Oracle designates this
# particular file as subject to the "Classpath" exception as provided
# by Oracle in the LICENSE file that accompanied this code.
#
# This code is distributed in the hope that it will be useful, but WITHOUT
# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
# FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
# version 2 for more details (a copy is included in the LICENSE file that
# accompanied this code).
#
# You should have received a copy of the GNU General Public License version
# 2 along with this work; if not, write to the Free Software Foundation,
# Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
#
# Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
# or visit www.oracle.com if you need additional information or have any
# questions.
#
#
# Rules to create $(SUPPORT_OUTPUTDIR)/gensrc/java.base/java/util/regex/EmojiData.java
#
GENSRC_EMOJIDATA := $(SUPPORT_OUTPUTDIR)/gensrc/java.base/java/util/regex/EmojiData.java
EMOJIDATATEMP = $(TOPDIR)/src/java.base/share/classes/java/util/regex/EmojiData.java.template
UNICODEDATA = $(TOPDIR)/make/data/unicodedata
$(GENSRC_EMOJIDATA): $(BUILD_TOOLS_JDK) $(EMOJIDATATEMP) $(UNICODEDATA)/emoji-data.txt
$(call LogInfo, Generating $@)
$(call MakeTargetDir)
$(TOOL_GENERATEEMOJIDATA) \
$(EMOJIDATATEMP) \
$(UNICODEDATA) \
$(GENSRC_EMOJIDATA)
GENSRC_JAVA_BASE += $(GENSRC_EMOJIDATA)

View File

@ -0,0 +1,142 @@
/*
* Copyright (c) 2019, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License version 2 only, as
* published by the Free Software Foundation. Oracle designates this
* particular file as subject to the "Classpath" exception as provided
* by Oracle in the LICENSE file that accompanied this code.
*
* This code is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
* version 2 for more details (a copy is included in the LICENSE file that
* accompanied this code).
*
* You should have received a copy of the GNU General Public License version
* 2 along with this work; if not, write to the Free Software Foundation,
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
*
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
* or visit www.oracle.com if you need additional information or have any
* questions.
*/
package build.tools.generateemojidata;
import java.io.IOException;
import java.nio.file.Files;
import java.nio.file.Paths;
import java.nio.file.StandardOpenOption;
import java.util.ArrayList;
import java.util.List;
import java.util.function.Predicate;
import java.util.stream.Collectors;
import java.util.stream.Stream;
/**
* Generate EmojiData.java
* args[0]: Full path string to the template file
* args[1]: Full path string to the directory that contains "emoji-data.txt"
* args[2]: Full path string to the generated .java file
*/
public class GenerateEmojiData {
public static void main(String[] args) {
try {
final Range[] last = new Range[1]; // last extended pictographic range
last[0] = new Range(0, 0);
List<Range> extPictRanges = Files.lines(Paths.get(args[1], "emoji-data.txt"))
.filter(Predicate.not(l -> l.startsWith("#") || l.isBlank()))
.filter(l -> l.contains("; Extended_Pictograph"))
.map(l -> new Range(l.replaceFirst(" .*", "")))
.sorted()
.collect(ArrayList<Range>::new,
(list, r) -> {
// collapsing consecutive pictographic ranges
int lastIndex = list.size() - 1;
if (lastIndex >= 0) {
Range lastRange = list.get(lastIndex);
if (lastRange.last + 1 == r.start) {
list.set(lastIndex, new Range(lastRange.start, r.last));
return;
}
}
list.add(r);
},
ArrayList<Range>::addAll);
// make the code point conditions
String extPictCodePoints = extPictRanges.stream()
.map(r -> {
if (r.start == r.last) {
return (" ".repeat(12) + "cp == 0x" + toHexString(r.start));
} else if (r.start == r.last - 1) {
return " ".repeat(12) + "cp == 0x" + toHexString(r.start) + " ||\n" +
" ".repeat(12) + "cp == 0x" + toHexString(r.last);
} else {
return " ".repeat(11) + "(cp >= 0x" + toHexString(r.start) +
" && cp <= 0x" + toHexString(r.last) + ")";
}
})
.collect(Collectors.joining(" ||\n")) + ";\n";
// Generate EmojiData.java file
Files.write(Paths.get(args[2]),
Files.lines(Paths.get(args[0]))
.flatMap(l -> {
if (l.equals("%%%EXTPICT%%%")) {
return Stream.of(extPictCodePoints);
} else {
return Stream.of(l);
}
})
.collect(Collectors.toList()),
StandardOpenOption.CREATE, StandardOpenOption.TRUNCATE_EXISTING);
} catch (IOException e) {
e.printStackTrace();
}
}
static int toInt(String hexStr) {
return Integer.parseUnsignedInt(hexStr, 16);
}
static String toHexString(int cp) {
String ret = Integer.toUnsignedString(cp, 16).toUpperCase();
if (ret.length() < 4) {
ret = "0".repeat(4 - ret.length()) + ret;
}
return ret;
}
static class Range implements Comparable<Range> {
int start;
int last;
Range (int start, int last) {
this.start = start;
this.last = last;
}
Range (String input) {
input = input.replaceFirst("\\s#.*", "");
start = toInt(input.replaceFirst("[\\s\\.].*", ""));
last = input.contains("..") ?
toInt(input.replaceFirst(".*\\.\\.", "")
.replaceFirst(";.*", "").trim())
: start;
}
@Override
public String toString() {
return "Start: " + toHexString(start) + ", Last: " + toHexString(last);
}
@Override
public int compareTo(Range other) {
return Integer.compare(start, other.start);
}
}
}

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,46 @@
/*
* Copyright (c) 2019, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License version 2 only, as
* published by the Free Software Foundation. Oracle designates this
* particular file as subject to the "Classpath" exception as provided
* by Oracle in the LICENSE file that accompanied this code.
*
* This code is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
* version 2 for more details (a copy is included in the LICENSE file that
* accompanied this code).
*
* You should have received a copy of the GNU General Public License version
* 2 along with this work; if not, write to the Free Software Foundation,
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
*
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
* or visit www.oracle.com if you need additional information or have any
* questions.
*/
package java.util.regex;
/**
* Holds data contained in the Unicode Technical Standard #51: Unicode
* Emoji.
*
* Currently it is only used for the rule "GB11" in UAX #29 Unicode Text
* Segmentation.
*/
final class EmojiData {
/**
* Returns whether the code point is an extended pictographic or not.
*
* @param cp code point to examine
* @return true if {@code cp} is an extended pictographic
*/
static boolean isExtendedPictographic(int cp) {
return
%%%EXTPICT%%%
}
}

View File

@ -1,5 +1,5 @@
/*
* Copyright (c) 2016, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2016, 2019, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@ -25,17 +25,56 @@
package java.util.regex;
import java.util.Objects;
final class Grapheme {
/**
* Determines if there is an extended grapheme cluster boundary between two
* continuing characters {@code cp1} and {@code cp2}.
* Look for the next extended grapheme cluster boundary in a CharSequence. It assumes
* the start of the char sequence is a boundary.
* <p>
* See Unicode Standard Annex #29 Unicode Text Segmentation for the specification
* for the extended grapheme cluster boundary rules
* for the extended grapheme cluster boundary rules. The following implementation
* is based on version 12.0 of the annex.
* (http://www.unicode.org/reports/tr29/tr29-35.html)
*
* @param src the {@code CharSequence} to be scanned
* @param off offset to start looking for the next boundary in the src
* @param limit limit offset in the src (exclusive)
* @return the next possible boundary
*/
static boolean isBoundary(int cp1, int cp2) {
return rules[getType(cp1)][getType(cp2)];
static int nextBoundary(CharSequence src, int off, int limit) {
Objects.checkFromToIndex(off, limit, src.length());
int ch0 = Character.codePointAt(src, 0);
int ret = Character.charCount(ch0);
int ch1;
// indicates whether gb11 or gb12 is underway
boolean gb11 = EmojiData.isExtendedPictographic(ch0);
int riCount = getType(ch0) == RI ? 1 : 0;
while (ret < limit) {
ch1 = Character.codePointAt(src, ret);
int t0 = getType(ch0);
int t1 = getType(ch1);
if (gb11 && t0 == ZWJ && t1 == EXTENDED_PICTOGRAPHIC) {
gb11 = false;
} else if (riCount % 2 == 1 && t0 == RI && t1 == RI) {
// continue for gb12
} else if (rules[t0][t1]) {
if (ret > off) {
break;
} else {
gb11 = EmojiData.isExtendedPictographic(ch1);
riCount = 0;
}
}
riCount += getType(ch1) == RI ? 1 : 0;
ch0 = ch1;
ret += Character.charCount(ch1);
}
return ret;
}
// types
@ -44,22 +83,24 @@ final class Grapheme {
private static final int LF = 2;
private static final int CONTROL = 3;
private static final int EXTEND = 4;
private static final int RI = 5;
private static final int PREPEND = 6;
private static final int SPACINGMARK = 7;
private static final int L = 8;
private static final int V = 9;
private static final int T = 10;
private static final int LV = 11;
private static final int LVT = 12;
private static final int ZWJ = 5;
private static final int RI = 6;
private static final int PREPEND = 7;
private static final int SPACINGMARK = 8;
private static final int L = 9;
private static final int V = 10;
private static final int T = 11;
private static final int LV = 12;
private static final int LVT = 13;
private static final int EXTENDED_PICTOGRAPHIC = 14;
private static final int FIRST_TYPE = 0;
private static final int LAST_TYPE = 12;
private static final int LAST_TYPE = 14;
private static boolean[][] rules;
static {
rules = new boolean[LAST_TYPE + 1][LAST_TYPE + 1];
// default, any + any
// GB 999 Any + Any -> default
for (int i = FIRST_TYPE; i <= LAST_TYPE; i++)
for (int j = FIRST_TYPE; j <= LAST_TYPE; j++)
rules[i][j] = true;
@ -76,13 +117,12 @@ final class Grapheme {
// GB 8 (LVT | T) x T
rules[LVT][T] = false;
rules[T][T] = false;
// GB 8a RI x RI
rules[RI][RI] = false;
// GB 9 x Extend
// GB 9 x (Extend|ZWJ)
// GB 9a x Spacing Mark
// GB 9b Prepend x
for (int i = FIRST_TYPE; i <= LAST_TYPE; i++) {
rules[i][EXTEND] = false;
rules[i][ZWJ] = false;
rules[i][SPACINGMARK] = false;
rules[PREPEND][i] = false;
}
@ -95,7 +135,9 @@ final class Grapheme {
}
// GB 3 CR x LF
rules[CR][LF] = false;
// GB 10 Any + Any -> default
// GB 11 Exended_Pictographic x (Extend|ZWJ)
rules[EXTENDED_PICTOGRAPHIC][EXTEND] = false;
rules[EXTENDED_PICTOGRAPHIC][ZWJ] = false;
}
// Hangul syllables
@ -123,6 +165,10 @@ final class Grapheme {
@SuppressWarnings("fallthrough")
private static int getType(int cp) {
if (EmojiData.isExtendedPictographic(cp)) {
return EXTENDED_PICTOGRAPHIC;
}
int type = Character.getType(cp);
switch(type) {
case Character.CONTROL:
@ -131,29 +177,36 @@ final class Grapheme {
if (cp == 0x000A)
return LF;
return CONTROL;
case Character.UNASSIGNED:
case Character.UNASSIGNED:
// NOTE: #tr29 lists "Unassigned and Default_Ignorable_Code_Point" as Control
// but GraphemeBreakTest.txt lists u+0378/reserved-0378 as "Other"
// so type it as "Other" to make the test happy
if (cp == 0x0378)
return OTHER;
if (cp == 0x0378)
return OTHER;
case Character.LINE_SEPARATOR:
case Character.PARAGRAPH_SEPARATOR:
case Character.SURROGATE:
return CONTROL;
case Character.FORMAT:
if (cp == 0x200C || cp == 0x200D)
if (cp == 0x200C ||
cp >= 0xE0020 && cp <= 0xE007F)
return EXTEND;
if (cp == 0x200D)
return ZWJ;
if (cp >= 0x0600 && cp <= 0x0605 ||
cp == 0x06DD || cp == 0x070F || cp == 0x08E2 ||
cp == 0x110BD || cp == 0x110CD)
return PREPEND;
return CONTROL;
case Character.NON_SPACING_MARK:
case Character.ENCLOSING_MARK:
// NOTE:
// #tr29 "plus a few General_Category = Spacing_Mark needed for
// canonical equivalence."
// but for "extended grapheme clusters" support, there is no
// need actually to diff "extend" and "spackmark" given GB9, GB9a
return EXTEND;
// NOTE:
// #tr29 "plus a few General_Category = Spacing_Mark needed for
// canonical equivalence."
// but for "extended grapheme clusters" support, there is no
// need actually to diff "extend" and "spackmark" given GB9, GB9a
return EXTEND;
case Character.COMBINING_SPACING_MARK:
if (isExcludedSpacingMark(cp))
return OTHER;
@ -167,9 +220,11 @@ final class Grapheme {
return RI;
return OTHER;
case Character.MODIFIER_LETTER:
case Character.MODIFIER_SYMBOL:
// WARNING:
// not mentioned in #tr29 but listed in GraphemeBreakProperty.txt
if (cp == 0xFF9E || cp == 0xFF9F)
if (cp == 0xFF9E || cp == 0xFF9F ||
cp >= 0x1F3FB && cp <= 0x1F3FF)
return EXTEND;
return OTHER;
case Character.OTHER_LETTER:
@ -199,6 +254,22 @@ final class Grapheme {
return V;
if (cp >= 0xD7CB && cp <= 0xD7FB)
return T;
// Prepend
switch (cp) {
case 0x0D4E:
case 0x111C2:
case 0x111C3:
case 0x11A3A:
case 0x11A84:
case 0x11A85:
case 0x11A86:
case 0x11A87:
case 0x11A88:
case 0x11A89:
case 0x11D46:
return PREPEND;
}
}
return OTHER;
}

View File

@ -540,7 +540,7 @@ import jdk.internal.util.ArraysSupport;
* <p> This class is in conformance with Level 1 of <a
* href="http://www.unicode.org/reports/tr18/"><i>Unicode Technical
* Standard #18: Unicode Regular Expression</i></a>, plus RL2.1
* Canonical Equivalents.
* Canonical Equivalents and RL2.2 Extended Grapheme Clusters.
* <p>
* <b>Unicode escape sequences</b> such as <code>&#92;u2014</code> in Java source code
* are processed as described in section 3.3 of
@ -1501,15 +1501,8 @@ public final class Pattern
off++;
continue;
}
int j = off + Character.charCount(ch0);
int j = Grapheme.nextBoundary(src, off, limit);
int ch1;
while (j < limit) {
ch1 = src.codePointAt(j);
if (Grapheme.isBoundary(ch0, ch1))
break;
ch0 = ch1;
j += Character.charCount(ch1);
}
String seq = src.substring(off, j);
String nfd = Normalizer.normalize(seq, Normalizer.Form.NFD);
off = j;
@ -3975,14 +3968,7 @@ loop: for(int x=0, offset=0; x<nCodePoints; x++, offset+=len) {
if (i < matcher.to) {
int ch0 = Character.codePointAt(seq, i);
int n = Character.charCount(ch0);
int j = i + n;
while (j < matcher.to) {
int ch1 = Character.codePointAt(seq, j);
if (Grapheme.isBoundary(ch0, ch1))
break;
ch0 = ch1;
j += Character.charCount(ch1);
}
int j = Grapheme.nextBoundary(seq, i, matcher.to);
if (i + n == j) { // single, assume nfc cp
if (predicate.is(ch0))
return next.match(matcher, j, seq);
@ -4021,15 +4007,7 @@ loop: for(int x=0, offset=0; x<nCodePoints; x++, offset+=len) {
static class XGrapheme extends Node {
boolean match(Matcher matcher, int i, CharSequence seq) {
if (i < matcher.to) {
int ch0 = Character.codePointAt(seq, i);
i += Character.charCount(ch0);
while (i < matcher.to) {
int ch1 = Character.codePointAt(seq, i);
if (Grapheme.isBoundary(ch0, ch1))
break;
ch0 = ch1;
i += Character.charCount(ch1);
}
i = Grapheme.nextBoundary(seq, i, matcher.to);
return next.match(matcher, i, seq);
}
matcher.hitEnd = true;
@ -4059,8 +4037,9 @@ loop: for(int x=0, offset=0; x<nCodePoints; x++, offset+=len) {
}
if (i < endIndex) {
if (Character.isSurrogatePair(seq.charAt(i-1), seq.charAt(i)) ||
!Grapheme.isBoundary(Character.codePointBefore(seq, i),
Character.codePointAt(seq, i))) {
Grapheme.nextBoundary(seq,
i - Character.charCount(Character.codePointBefore(seq, i)),
i + Character.charCount(Character.codePointAt(seq, i))) > i) {
return false;
}
} else {

View File

@ -0,0 +1,501 @@
/*
* Copyright (c) 2019, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License version 2 only, as
* published by the Free Software Foundation. Oracle designates this
* particular file as subject to the "Classpath" exception as provided
* by Oracle in the LICENSE file that accompanied this code.
*
* This code is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
* version 2 for more details (a copy is included in the LICENSE file that
* accompanied this code).
*
* You should have received a copy of the GNU General Public License version
* 2 along with this work; if not, write to the Free Software Foundation,
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
*
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
* or visit www.oracle.com if you need additional information or have any
* questions.
*/
// (c) 2018 and later: Unicode, Inc. and others.
// License & terms of use: http://www.unicode.org/copyright.html#License
// created: 2018may10 Markus W. Scherer
package sun.text.normalizer;
import java.util.Iterator;
import java.util.NoSuchElementException;
/**
* Abstract map from Unicode code points (U+0000..U+10FFFF) to integer values.
* This does not implement java.util.Map.
*
* @draft ICU 63
* @provisional This API might change or be removed in a future release.
*/
public abstract class CodePointMap implements Iterable<CodePointMap.Range> {
/**
* Selectors for how getRange() should report value ranges overlapping with surrogates.
* Most users should use NORMAL.
*
* @see #getRange
* @draft ICU 63
* @provisional This API might change or be removed in a future release.
*/
public enum RangeOption {
/**
* getRange() enumerates all same-value ranges as stored in the map.
* Most users should use this option.
*
* @draft ICU 63
* @provisional This API might change or be removed in a future release.
*/
NORMAL,
/**
* getRange() enumerates all same-value ranges as stored in the map,
* except that lead surrogates (U+D800..U+DBFF) are treated as having the
* surrogateValue, which is passed to getRange() as a separate parameter.
* The surrogateValue is not transformed via filter().
* See {@link Character#isHighSurrogate}.
*
* <p>Most users should use NORMAL instead.
*
* <p>This option is useful for maps that map surrogate code *units* to
* special values optimized for UTF-16 string processing
* or for special error behavior for unpaired surrogates,
* but those values are not to be associated with the lead surrogate code *points*.
*
* @draft ICU 63
* @provisional This API might change or be removed in a future release.
*/
FIXED_LEAD_SURROGATES,
/**
* getRange() enumerates all same-value ranges as stored in the map,
* except that all surrogates (U+D800..U+DFFF) are treated as having the
* surrogateValue, which is passed to getRange() as a separate parameter.
* The surrogateValue is not transformed via filter().
* See {@link Character#isSurrogate}.
*
* <p>Most users should use NORMAL instead.
*
* <p>This option is useful for maps that map surrogate code *units* to
* special values optimized for UTF-16 string processing
* or for special error behavior for unpaired surrogates,
* but those values are not to be associated with the lead surrogate code *points*.
*
* @draft ICU 63
* @provisional This API might change or be removed in a future release.
*/
FIXED_ALL_SURROGATES
}
/**
* Callback function interface: Modifies a map value.
* Optionally called by getRange().
* The modified value will be returned by the getRange() function.
*
* <p>Can be used to ignore some of the value bits,
* make a filter for one of several values,
* return a value index computed from the map value, etc.
*
* @see #getRange
* @see #iterator
* @draft ICU 63
* @provisional This API might change or be removed in a future release.
*/
public interface ValueFilter {
/**
* Modifies the map value.
*
* @param value map value
* @return modified value
* @draft ICU 63
* @provisional This API might change or be removed in a future release.
*/
public int apply(int value);
}
/**
* Range iteration result data.
* Code points from start to end map to the same value.
* The value may have been modified by {@link ValueFilter#apply(int)},
* or it may be the surrogateValue if a RangeOption other than "normal" was used.
*
* @see #getRange
* @see #iterator
* @draft ICU 63
* @provisional This API might change or be removed in a future release.
*/
public static final class Range {
private int start;
private int end;
private int value;
/**
* Constructor. Sets start and end to -1 and value to 0.
*
* @draft ICU 63
* @provisional This API might change or be removed in a future release.
*/
public Range() {
start = end = -1;
value = 0;
}
/**
* @return the start code point
* @draft ICU 63
* @provisional This API might change or be removed in a future release.
*/
public int getStart() { return start; }
/**
* @return the (inclusive) end code point
* @draft ICU 63
* @provisional This API might change or be removed in a future release.
*/
public int getEnd() { return end; }
/**
* @return the range value
* @draft ICU 63
* @provisional This API might change or be removed in a future release.
*/
public int getValue() { return value; }
/**
* Sets the range. When using {@link #iterator()},
* iteration will resume after the newly set end.
*
* @param start new start code point
* @param end new end code point
* @param value new value
* @draft ICU 63
* @provisional This API might change or be removed in a future release.
*/
public void set(int start, int end, int value) {
this.start = start;
this.end = end;
this.value = value;
}
}
private final class RangeIterator implements Iterator<Range> {
private Range range = new Range();
@Override
public boolean hasNext() {
return -1 <= range.end && range.end < 0x10ffff;
}
@Override
public Range next() {
if (getRange(range.end + 1, null, range)) {
return range;
} else {
throw new NoSuchElementException();
}
}
@Override
public final void remove() {
throw new UnsupportedOperationException();
}
}
/**
* Iterates over code points of a string and fetches map values.
* This does not implement java.util.Iterator.
*
* <pre>
* void onString(CodePointMap map, CharSequence s, int start) {
* CodePointMap.StringIterator iter = map.stringIterator(s, start);
* while (iter.next()) {
* int end = iter.getIndex(); // code point from between start and end
* useValue(s, start, end, iter.getCodePoint(), iter.getValue());
* start = end;
* }
* }
* </pre>
*
* <p>This class is not intended for public subclassing.
*
* @draft ICU 63
* @provisional This API might change or be removed in a future release.
*/
public class StringIterator {
/**
* @internal
* @deprecated This API is ICU internal only.
*/
@Deprecated
protected CharSequence s;
/**
* @internal
* @deprecated This API is ICU internal only.
*/
@Deprecated
protected int sIndex;
/**
* @internal
* @deprecated This API is ICU internal only.
*/
@Deprecated
protected int c;
/**
* @internal
* @deprecated This API is ICU internal only.
*/
@Deprecated
protected int value;
/**
* @internal
* @deprecated This API is ICU internal only.
*/
@Deprecated
protected StringIterator(CharSequence s, int sIndex) {
this.s = s;
this.sIndex = sIndex;
c = -1;
value = 0;
}
/**
* Resets the iterator to a new string and/or a new string index.
*
* @param s string to iterate over
* @param sIndex string index where the iteration will start
* @draft ICU 63
* @provisional This API might change or be removed in a future release.
*/
public void reset(CharSequence s, int sIndex) {
this.s = s;
this.sIndex = sIndex;
c = -1;
value = 0;
}
/**
* Reads the next code point, post-increments the string index,
* and gets a value from the map.
* Sets an implementation-defined error value if the code point is an unpaired surrogate.
*
* @return true if the string index was not yet at the end of the string;
* otherwise the iterator did not advance
* @draft ICU 63
* @provisional This API might change or be removed in a future release.
*/
public boolean next() {
if (sIndex >= s.length()) {
return false;
}
c = Character.codePointAt(s, sIndex);
sIndex += Character.charCount(c);
value = get(c);
return true;
}
/**
* Reads the previous code point, pre-decrements the string index,
* and gets a value from the map.
* Sets an implementation-defined error value if the code point is an unpaired surrogate.
*
* @return true if the string index was not yet at the start of the string;
* otherwise the iterator did not advance
* @draft ICU 63
* @provisional This API might change or be removed in a future release.
*/
public boolean previous() {
if (sIndex <= 0) {
return false;
}
c = Character.codePointBefore(s, sIndex);
sIndex -= Character.charCount(c);
value = get(c);
return true;
}
/**
* @return the string index
* @draft ICU 63
* @provisional This API might change or be removed in a future release.
*/
public final int getIndex() { return sIndex; }
/**
* @return the code point
* @draft ICU 63
* @provisional This API might change or be removed in a future release.
*/
public final int getCodePoint() { return c; }
/**
* @return the map value,
* or an implementation-defined error value if
* the code point is an unpaired surrogate
* @draft ICU 63
* @provisional This API might change or be removed in a future release.
*/
public final int getValue() { return value; }
}
/**
* Protected no-args constructor.
*
* @draft ICU 63
* @provisional This API might change or be removed in a future release.
*/
protected CodePointMap() {
}
/**
* Returns the value for a code point as stored in the map, with range checking.
* Returns an implementation-defined error value if c is not in the range 0..U+10FFFF.
*
* @param c the code point
* @return the map value,
* or an implementation-defined error value if
* the code point is not in the range 0..U+10FFFF
* @draft ICU 63
* @provisional This API might change or be removed in a future release.
*/
public abstract int get(int c);
/**
* Sets the range object to a range of code points beginning with the start parameter.
* The range start is the same as the start input parameter
* (even if there are preceding code points that have the same value).
* The range end is the last code point such that
* all those from start to there have the same value.
* Returns false if start is not 0..U+10FFFF.
* Can be used to efficiently iterate over all same-value ranges in a map.
* (This is normally faster than iterating over code points and get()ting each value,
* but may be much slower than a data structure that stores ranges directly.)
*
* <p>If the {@link ValueFilter} parameter is not null, then
* the value to be delivered is passed through that filter, and the return value is the end
* of the range where all values are modified to the same actual value.
* The value is unchanged if that parameter is null.
*
* <p>Example:
* <pre>
* int start = 0;
* CodePointMap.Range range = new CodePointMap.Range();
* while (map.getRange(start, null, range)) {
* int end = range.getEnd();
* int value = range.getValue();
* // Work with the range start..end and its value.
* start = end + 1;
* }
* </pre>
*
* @param start range start
* @param filter an object that may modify the map data value,
* or null if the values from the map are to be used unmodified
* @param range the range object that will be set to the code point range and value
* @return true if start is 0..U+10FFFF; otherwise no new range is fetched
* @draft ICU 63
* @provisional This API might change or be removed in a future release.
*/
public abstract boolean getRange(int start, ValueFilter filter, Range range);
/**
* Sets the range object to a range of code points beginning with the start parameter.
* The range start is the same as the start input parameter
* (even if there are preceding code points that have the same value).
* The range end is the last code point such that
* all those from start to there have the same value.
* Returns false if start is not 0..U+10FFFF.
*
* <p>Same as the simpler {@link #getRange(int, ValueFilter, Range)} but optionally
* modifies the range if it overlaps with surrogate code points.
*
* @param start range start
* @param option defines whether surrogates are treated normally,
* or as having the surrogateValue; usually {@link RangeOption#NORMAL}
* @param surrogateValue value for surrogates; ignored if option=={@link RangeOption#NORMAL}
* @param filter an object that may modify the map data value,
* or null if the values from the map are to be used unmodified
* @param range the range object that will be set to the code point range and value
* @return true if start is 0..U+10FFFF; otherwise no new range is fetched
* @draft ICU 63
* @provisional This API might change or be removed in a future release.
*/
public boolean getRange(int start, RangeOption option, int surrogateValue,
ValueFilter filter, Range range) {
assert option != null;
if (!getRange(start, filter, range)) {
return false;
}
if (option == RangeOption.NORMAL) {
return true;
}
int surrEnd = option == RangeOption.FIXED_ALL_SURROGATES ? 0xdfff : 0xdbff;
int end = range.end;
if (end < 0xd7ff || start > surrEnd) {
return true;
}
// The range overlaps with surrogates, or ends just before the first one.
if (range.value == surrogateValue) {
if (end >= surrEnd) {
// Surrogates followed by a non-surrValue range,
// or surrogates are part of a larger surrValue range.
return true;
}
} else {
if (start <= 0xd7ff) {
range.end = 0xd7ff; // Non-surrValue range ends before surrValue surrogates.
return true;
}
// Start is a surrogate with a non-surrValue code *unit* value.
// Return a surrValue code *point* range.
range.value = surrogateValue;
if (end > surrEnd) {
range.end = surrEnd; // Surrogate range ends before non-surrValue rest of range.
return true;
}
}
// See if the surrValue surrogate range can be merged with
// an immediately following range.
if (getRange(surrEnd + 1, filter, range) && range.value == surrogateValue) {
range.start = start;
return true;
}
range.start = start;
range.end = surrEnd;
range.value = surrogateValue;
return true;
}
/**
* Convenience iterator over same-map-value code point ranges.
* Same as looping over all ranges with {@link #getRange(int, ValueFilter, Range)}
* without filtering.
* Adjacent ranges have different map values.
*
* <p>The iterator always returns the same Range object.
*
* @return a Range iterator
* @draft ICU 63
* @provisional This API might change or be removed in a future release.
*/
@Override
public Iterator<Range> iterator() {
return new RangeIterator();
}
/**
* Returns an iterator (not a java.util.Iterator) over code points of a string
* for fetching map values.
*
* @param s string to iterate over
* @param sIndex string index where the iteration will start
* @return the iterator
* @draft ICU 63
* @provisional This API might change or be removed in a future release.
*/
public StringIterator stringIterator(CharSequence s, int sIndex) {
return new StringIterator(s, sIndex);
}
}

File diff suppressed because it is too large Load Diff

View File

@ -1,5 +1,5 @@
/*
* Copyright (c) 2003, 2018, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2003, 2019, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@ -119,10 +119,7 @@ public final class ICUBinary {
} else if (capacity < 0x4000) {
capacity *= 2; // Grow faster until we reach 16kB.
}
// TODO Java 6 replace new byte[] and arraycopy(): bytes = Arrays.copyOf(bytes, capacity);
byte[] newBytes = new byte[capacity];
System.arraycopy(bytes, 0, newBytes, 0, length);
bytes = newBytes;
bytes = Arrays.copyOf(bytes, capacity);
bytes[length++] = (byte) nextByte;
}
}
@ -264,6 +261,36 @@ public final class ICUBinary {
}
}
public static byte[] getBytes(ByteBuffer bytes, int length, int additionalSkipLength) {
byte[] dest = new byte[length];
bytes.get(dest);
if (additionalSkipLength > 0) {
skipBytes(bytes, additionalSkipLength);
}
return dest;
}
public static String getString(ByteBuffer bytes, int length, int additionalSkipLength) {
CharSequence cs = bytes.asCharBuffer();
String s = cs.subSequence(0, length).toString();
skipBytes(bytes, length * 2 + additionalSkipLength);
return s;
}
public static char[] getChars(ByteBuffer bytes, int length, int additionalSkipLength) {
char[] dest = new char[length];
bytes.asCharBuffer().get(dest);
skipBytes(bytes, length * 2 + additionalSkipLength);
return dest;
}
public static int[] getInts(ByteBuffer bytes, int length, int additionalSkipLength) {
int[] dest = new int[length];
bytes.asIntBuffer().get(dest);
skipBytes(bytes, length * 4 + additionalSkipLength);
return dest;
}
/**
* Returns a VersionInfo for the bytes in the compact version integer.
*/

View File

@ -1,5 +1,5 @@
/*
* Copyright (c) 2015, 2018, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2015, 2019, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@ -265,7 +265,7 @@ final class Norm2AllModes {
private static final class Norm2AllModesSingleton {
private Norm2AllModesSingleton(String name) {
try {
String DATA_FILE_NAME = "/sun/text/resources/" + name + ".icu";
String DATA_FILE_NAME = "/sun/text/resources/" + name + ".nrm";
NormalizerImpl impl=new NormalizerImpl().load(DATA_FILE_NAME);
allModes=new Norm2AllModes(impl);
} catch (RuntimeException e) {

View File

@ -1,5 +1,5 @@
/*
* Copyright (c) 2009, 2018, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2009, 2019, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@ -61,7 +61,7 @@ public final class NormalizerImpl {
return 0<=c && c<HANGUL_COUNT && c%JAMO_T_COUNT==0;
}
/**
/**
* Decomposes c, which must be a Hangul syllable, into buffer
* and returns the length of the decomposition (2 or 3).
*/
@ -145,8 +145,7 @@ public final class NormalizerImpl {
insert(c, cc);
}
}
// s must be in NFD, otherwise change the implementation.
public void append(CharSequence s, int start, int limit,
public void append(CharSequence s, int start, int limit, boolean isNFD,
int leadCC, int trailCC) {
if(start==limit) {
return;
@ -167,8 +166,11 @@ public final class NormalizerImpl {
c=Character.codePointAt(s, start);
start+=Character.charCount(c);
if(start<limit) {
// s must be in NFD, otherwise we need to use getCC().
leadCC=getCCFromYesOrMaybe(impl.getNorm16(c));
if (isNFD) {
leadCC = getCCFromYesOrMaybe(impl.getNorm16(c));
} else {
leadCC = impl.getCC(impl.getNorm16(c));
}
} else {
leadCC=trailCC;
}
@ -310,6 +312,12 @@ public final class NormalizerImpl {
// TODO: Propose widening UTF16 methods that take char to take int.
// TODO: Propose widening UTF16 methods that take String to take CharSequence.
public static final class UTF16Plus {
/**
* Is this code point a lead surrogate (U+d800..U+dbff)?
* @param c code unit or code point
* @return true or false
*/
public static boolean isLeadSurrogate(int c) { return (c & 0xfffffc00) == 0xd800; }
/**
* Assuming c is a surrogate code point (UTF16.isSurrogate(c)),
* is it a lead surrogate?
@ -350,7 +358,7 @@ public final class NormalizerImpl {
private static final class IsAcceptable implements ICUBinary.Authenticate {
public boolean isDataVersionAcceptable(byte version[]) {
return version[0]==3;
return version[0]==4;
}
}
private static final IsAcceptable IS_ACCEPTABLE = new IsAcceptable();
@ -387,8 +395,9 @@ public final class NormalizerImpl {
// Read the normTrie.
int offset=inIndexes[IX_NORM_TRIE_OFFSET];
int nextOffset=inIndexes[IX_EXTRA_DATA_OFFSET];
normTrie=Trie2_16.createFromSerialized(bytes);
int trieLength=normTrie.getSerializedLength();
int triePosition = bytes.position();
normTrie = CodePointTrie.Fast16.fromBinary(bytes);
int trieLength = bytes.position() - triePosition;
if(trieLength>(nextOffset-offset)) {
throw new InternalError("Normalizer2 data: not enough bytes for normTrie");
}
@ -398,13 +407,8 @@ public final class NormalizerImpl {
offset=nextOffset;
nextOffset=inIndexes[IX_SMALL_FCD_OFFSET];
int numChars=(nextOffset-offset)/2;
char[] chars;
if(numChars!=0) {
chars=new char[numChars];
for(int i=0; i<numChars; ++i) {
chars[i]=bytes.getChar();
}
maybeYesCompositions=new String(chars);
maybeYesCompositions=ICUBinary.getString(bytes, numChars, 0);
extraData=maybeYesCompositions.substring((MIN_NORMAL_MAYBE_YES-minMaybeYes)>>OFFSET_SHIFT);
}
@ -422,8 +426,12 @@ public final class NormalizerImpl {
return load(ICUBinary.getRequiredData(name));
}
public int getNorm16(int c) { return normTrie.get(c); }
// The trie stores values for lead surrogate code *units*.
// Surrogate code *points* are inert.
public int getNorm16(int c) {
return UTF16Plus.isLeadSurrogate(c) ? INERT : normTrie.get(c);
}
public int getRawNorm16(int c) { return normTrie.get(c); }
public boolean isAlgorithmicNoNo(int norm16) { return limitNoNo<=norm16 && norm16<minMaybeYes; }
public boolean isCompNo(int norm16) { return minNoNo<=norm16 && norm16<minMaybeYes; }
public boolean isDecompYes(int norm16) { return norm16<minYesNo || minMaybeYes<=norm16; }
@ -486,7 +494,7 @@ public final class NormalizerImpl {
}
// Maps to an isCompYesAndZeroCC.
c=mapAlgorithmic(c, norm16);
norm16=getNorm16(c);
norm16=getRawNorm16(c);
}
}
if(norm16<=minYesNo || isHangulLVT(norm16)) {
@ -519,7 +527,7 @@ public final class NormalizerImpl {
// Maps to an isCompYesAndZeroCC.
decomp=c=mapAlgorithmic(c, norm16);
// The mapping might decompose further.
norm16 = getNorm16(c);
norm16 = getRawNorm16(c);
}
if (norm16 < minYesNo) {
if(decomp<0) {
@ -641,27 +649,23 @@ public final class NormalizerImpl {
// count code units below the minimum or with irrelevant data for the quick check
for(prevSrc=src; src!=limit;) {
if( (c=s.charAt(src))<minNoCP ||
isMostDecompYesAndZeroCC(norm16=normTrie.getFromU16SingleLead((char)c))
isMostDecompYesAndZeroCC(norm16=normTrie.bmpGet(c))
) {
++src;
} else if(!UTF16.isSurrogate((char)c)) {
} else if(!UTF16Plus.isLeadSurrogate(c)) {
break;
} else {
char c2;
if(UTF16Plus.isSurrogateLead(c)) {
if((src+1)!=limit && Character.isLowSurrogate(c2=s.charAt(src+1))) {
c=Character.toCodePoint((char)c, c2);
if ((src + 1) != limit && Character.isLowSurrogate(c2 = s.charAt(src + 1))) {
c = Character.toCodePoint((char)c, c2);
norm16 = normTrie.suppGet(c);
if (isMostDecompYesAndZeroCC(norm16)) {
src += 2;
} else {
break;
}
} else /* trail surrogate */ {
if(prevSrc<src && Character.isHighSurrogate(c2=s.charAt(src-1))) {
--src;
c=Character.toCodePoint(c2, (char)c);
}
}
if(isMostDecompYesAndZeroCC(norm16=getNorm16(c))) {
src+=Character.charCount(c);
} else {
break;
++src; // unpaired lead surrogate: inert
}
}
}
@ -721,7 +725,7 @@ public final class NormalizerImpl {
c=Character.codePointAt(s, src);
cc=getCC(getNorm16(c));
};
buffer.append(s, 0, src, firstCC, prevCC);
buffer.append(s, 0, src, false, firstCC, prevCC);
buffer.append(s, src, limit);
}
@ -749,28 +753,22 @@ public final class NormalizerImpl {
return true;
}
if( (c=s.charAt(src))<minNoMaybeCP ||
isCompYesAndZeroCC(norm16=normTrie.getFromU16SingleLead((char)c))
isCompYesAndZeroCC(norm16=normTrie.bmpGet(c))
) {
++src;
} else {
prevSrc = src++;
if(!UTF16.isSurrogate((char)c)) {
if (!UTF16Plus.isLeadSurrogate(c)) {
break;
} else {
char c2;
if(UTF16Plus.isSurrogateLead(c)) {
if(src!=limit && Character.isLowSurrogate(c2=s.charAt(src))) {
++src;
c=Character.toCodePoint((char)c, c2);
if (src != limit && Character.isLowSurrogate(c2 = s.charAt(src))) {
++src;
c = Character.toCodePoint((char)c, c2);
norm16 = normTrie.suppGet(c);
if (!isCompYesAndZeroCC(norm16)) {
break;
}
} else /* trail surrogate */ {
if(prevBoundary<prevSrc && Character.isHighSurrogate(c2=s.charAt(prevSrc-1))) {
--prevSrc;
c=Character.toCodePoint(c2, (char)c);
}
}
if(!isCompYesAndZeroCC(norm16=getNorm16(c))) {
break;
}
}
}
@ -991,28 +989,22 @@ public final class NormalizerImpl {
return (src<<1)|qcResult; // "yes" or "maybe"
}
if( (c=s.charAt(src))<minNoMaybeCP ||
isCompYesAndZeroCC(norm16=normTrie.getFromU16SingleLead((char)c))
isCompYesAndZeroCC(norm16=normTrie.bmpGet(c))
) {
++src;
} else {
prevSrc = src++;
if(!UTF16.isSurrogate((char)c)) {
if (!UTF16Plus.isLeadSurrogate(c)) {
break;
} else {
char c2;
if(UTF16Plus.isSurrogateLead(c)) {
if(src!=limit && Character.isLowSurrogate(c2=s.charAt(src))) {
++src;
c=Character.toCodePoint((char)c, c2);
if (src != limit && Character.isLowSurrogate(c2 = s.charAt(src))) {
++src;
c = Character.toCodePoint((char)c, c2);
norm16 = normTrie.suppGet(c);
if (!isCompYesAndZeroCC(norm16)) {
break;
}
} else /* trail surrogate */ {
if(prevBoundary<prevSrc && Character.isHighSurrogate(c2=s.charAt(prevSrc-1))) {
--prevSrc;
c=Character.toCodePoint(c2, (char)c);
}
}
if(!isCompYesAndZeroCC(norm16=getNorm16(c))) {
break;
}
}
}
@ -1134,17 +1126,10 @@ public final class NormalizerImpl {
prevFCD16=0;
++src;
} else {
if(UTF16.isSurrogate((char)c)) {
if (UTF16Plus.isLeadSurrogate(c)) {
char c2;
if(UTF16Plus.isSurrogateLead(c)) {
if((src+1)!=limit && Character.isLowSurrogate(c2=s.charAt(src+1))) {
c=Character.toCodePoint((char)c, c2);
}
} else /* trail surrogate */ {
if(prevSrc<src && Character.isHighSurrogate(c2=s.charAt(src-1))) {
--src;
c=Character.toCodePoint(c2, (char)c);
}
if ((src + 1) != limit && Character.isLowSurrogate(c2 = s.charAt(src + 1))) {
c = Character.toCodePoint((char)c, c2);
}
}
if((fcd16=getFCD16FromNormData(c))<=0xff) {
@ -1430,7 +1415,7 @@ public final class NormalizerImpl {
}
// Maps to an isCompYesAndZeroCC.
c=mapAlgorithmic(c, norm16);
norm16=getNorm16(c);
norm16=getRawNorm16(c);
}
if (norm16 < minYesNo) {
// c does not decompose
@ -1451,7 +1436,7 @@ public final class NormalizerImpl {
leadCC=0;
}
++mapping; // skip over the firstUnit
buffer.append(extraData, mapping, mapping+length, leadCC, trailCC);
buffer.append(extraData, mapping, mapping+length, true, leadCC, trailCC);
}
}
@ -1643,7 +1628,7 @@ public final class NormalizerImpl {
// Is the composite a starter that combines forward?
if((compositeAndFwd&1)!=0) {
compositionsList=
getCompositionsListForComposite(getNorm16(composite));
getCompositionsListForComposite(getRawNorm16(composite));
} else {
compositionsList=-1;
}
@ -2196,9 +2181,8 @@ public final class NormalizerImpl {
private int centerNoNoDelta;
private int minMaybeYes;
private Trie2_16 normTrie;
private CodePointTrie.Fast16 normTrie;
private String maybeYesCompositions;
private String extraData; // mappings and/or compositions for yesYes, yesNo & noNo characters
private byte[] smallFCD; // [0x100] one bit per 32 BMP code points, set if any FCD!=0
}
}

View File

@ -1,38 +1,11 @@
## International Components for Unicode (ICU4J) v62.1
## International Components for Unicode (ICU4J) v64.2
### ICU4J License
```
COPYRIGHT AND PERMISSION NOTICE (ICU 58 and later)
UNICODE, INC. LICENSE AGREEMENT - DATA FILES AND SOFTWARE
Unicode Data Files include all data files under the directories
http://www.unicode.org/Public/, http://www.unicode.org/reports/,
http://www.unicode.org/cldr/data/,
http://source.icu-project.org/repos/icu/, and
http://www.unicode.org/utility/trac/browser/.
Unicode Data Files do not include PDF online code charts under the
directory http://www.unicode.org/Public/.
Software includes any source code published in the Unicode Standard
or under the directories
http://www.unicode.org/Public/, http://www.unicode.org/reports/,
http://www.unicode.org/cldr/data/,
http://source.icu-project.org/repos/icu/, and
http://www.unicode.org/utility/trac/browser/.
NOTICE TO USER: Carefully read the following legal agreement.
BY DOWNLOADING, INSTALLING, COPYING OR OTHERWISE USING UNICODE INC.'S
DATA FILES ("DATA FILES"), AND/OR SOFTWARE ("SOFTWARE"),
YOU UNEQUIVOCALLY ACCEPT, AND AGREE TO BE BOUND BY, ALL OF THE
TERMS AND CONDITIONS OF THIS AGREEMENT.
IF YOU DO NOT AGREE, DO NOT DOWNLOAD, INSTALL, COPY, DISTRIBUTE OR USE
THE DATA FILES OR SOFTWARE.
COPYRIGHT AND PERMISSION NOTICE
Copyright © 1991-2018 Unicode, Inc. All rights reserved.
Distributed under the Terms of Use in http://www.unicode.org/copyright.html.
Copyright © 1991-2019 Unicode, Inc. All rights reserved.
Distributed under the Terms of Use in https://www.unicode.org/copyright.html.
Permission is hereby granted, free of charge, to any person obtaining
a copy of the Unicode data files and any associated documentation
@ -63,4 +36,354 @@ shall not be used in advertising or otherwise to promote the sale,
use or other dealings in these Data Files or Software without prior
written authorization of the copyright holder.
```
---------------------
Third-Party Software Licenses
This section contains third-party software notices and/or additional
terms for licensed third-party software components included within ICU
libraries.
1. ICU License - ICU 1.8.1 to ICU 57.1
COPYRIGHT AND PERMISSION NOTICE
Copyright (c) 1995-2016 International Business Machines Corporation and others
All rights reserved.
Permission is hereby granted, free of charge, to any person obtaining
a copy of this software and associated documentation files (the
"Software"), to deal in the Software without restriction, including
without limitation the rights to use, copy, modify, merge, publish,
distribute, and/or sell copies of the Software, and to permit persons
to whom the Software is furnished to do so, provided that the above
copyright notice(s) and this permission notice appear in all copies of
the Software and that both the above copyright notice(s) and this
permission notice appear in supporting documentation.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT
OF THIRD PARTY RIGHTS. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR
HOLDERS INCLUDED IN THIS NOTICE BE LIABLE FOR ANY CLAIM, OR ANY
SPECIAL INDIRECT OR CONSEQUENTIAL DAMAGES, OR ANY DAMAGES WHATSOEVER
RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF
CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN
CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
Except as contained in this notice, the name of a copyright holder
shall not be used in advertising or otherwise to promote the sale, use
or other dealings in this Software without prior written authorization
of the copyright holder.
All trademarks and registered trademarks mentioned herein are the
property of their respective owners.
2. Chinese/Japanese Word Break Dictionary Data (cjdict.txt)
# The Google Chrome software developed by Google is licensed under
# the BSD license. Other software included in this distribution is
# provided under other licenses, as set forth below.
#
# The BSD License
# http://opensource.org/licenses/bsd-license.php
# Copyright (C) 2006-2008, Google Inc.
#
# All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions are met:
#
# Redistributions of source code must retain the above copyright notice,
# this list of conditions and the following disclaimer.
# Redistributions in binary form must reproduce the above
# copyright notice, this list of conditions and the following
# disclaimer in the documentation and/or other materials provided with
# the distribution.
# Neither the name of Google Inc. nor the names of its
# contributors may be used to endorse or promote products derived from
# this software without specific prior written permission.
#
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND
# CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES,
# INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
# MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
# BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
# LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
# NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
# SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#
#
# The word list in cjdict.txt are generated by combining three word lists
# listed below with further processing for compound word breaking. The
# frequency is generated with an iterative training against Google web
# corpora.
#
# * Libtabe (Chinese)
# - https://sourceforge.net/project/?group_id=1519
# - Its license terms and conditions are shown below.
#
# * IPADIC (Japanese)
# - http://chasen.aist-nara.ac.jp/chasen/distribution.html
# - Its license terms and conditions are shown below.
#
# ---------COPYING.libtabe ---- BEGIN--------------------
#
# /*
# * Copyright (c) 1999 TaBE Project.
# * Copyright (c) 1999 Pai-Hsiang Hsiao.
# * All rights reserved.
# *
# * Redistribution and use in source and binary forms, with or without
# * modification, are permitted provided that the following conditions
# * are met:
# *
# * . Redistributions of source code must retain the above copyright
# * notice, this list of conditions and the following disclaimer.
# * . Redistributions in binary form must reproduce the above copyright
# * notice, this list of conditions and the following disclaimer in
# * the documentation and/or other materials provided with the
# * distribution.
# * . Neither the name of the TaBE Project nor the names of its
# * contributors may be used to endorse or promote products derived
# * from this software without specific prior written permission.
# *
# * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
# * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
# * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
# * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
# * REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
# * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
# * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
# * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
# * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
# * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
# * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
# * OF THE POSSIBILITY OF SUCH DAMAGE.
# */
#
# /*
# * Copyright (c) 1999 Computer Systems and Communication Lab,
# * Institute of Information Science, Academia
# * Sinica. All rights reserved.
# *
# * Redistribution and use in source and binary forms, with or without
# * modification, are permitted provided that the following conditions
# * are met:
# *
# * . Redistributions of source code must retain the above copyright
# * notice, this list of conditions and the following disclaimer.
# * . Redistributions in binary form must reproduce the above copyright
# * notice, this list of conditions and the following disclaimer in
# * the documentation and/or other materials provided with the
# * distribution.
# * . Neither the name of the Computer Systems and Communication Lab
# * nor the names of its contributors may be used to endorse or
# * promote products derived from this software without specific
# * prior written permission.
# *
# * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
# * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
# * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
# * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
# * REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
# * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
# * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
# * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
# * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
# * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
# * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
# * OF THE POSSIBILITY OF SUCH DAMAGE.
# */
#
# Copyright 1996 Chih-Hao Tsai @ Beckman Institute,
# University of Illinois
# c-tsai4@uiuc.edu http://casper.beckman.uiuc.edu/~c-tsai4
#
# ---------------COPYING.libtabe-----END--------------------------------
#
#
# ---------------COPYING.ipadic-----BEGIN-------------------------------
#
# Copyright 2000, 2001, 2002, 2003 Nara Institute of Science
# and Technology. All Rights Reserved.
#
# Use, reproduction, and distribution of this software is permitted.
# Any copy of this software, whether in its original form or modified,
# must include both the above copyright notice and the following
# paragraphs.
#
# Nara Institute of Science and Technology (NAIST),
# the copyright holders, disclaims all warranties with regard to this
# software, including all implied warranties of merchantability and
# fitness, in no event shall NAIST be liable for
# any special, indirect or consequential damages or any damages
# whatsoever resulting from loss of use, data or profits, whether in an
# action of contract, negligence or other tortuous action, arising out
# of or in connection with the use or performance of this software.
#
# A large portion of the dictionary entries
# originate from ICOT Free Software. The following conditions for ICOT
# Free Software applies to the current dictionary as well.
#
# Each User may also freely distribute the Program, whether in its
# original form or modified, to any third party or parties, PROVIDED
# that the provisions of Section 3 ("NO WARRANTY") will ALWAYS appear
# on, or be attached to, the Program, which is distributed substantially
# in the same form as set out herein and that such intended
# distribution, if actually made, will neither violate or otherwise
# contravene any of the laws and regulations of the countries having
# jurisdiction over the User or the intended distribution itself.
#
# NO WARRANTY
#
# The program was produced on an experimental basis in the course of the
# research and development conducted during the project and is provided
# to users as so produced on an experimental basis. Accordingly, the
# program is provided without any warranty whatsoever, whether express,
# implied, statutory or otherwise. The term "warranty" used herein
# includes, but is not limited to, any warranty of the quality,
# performance, merchantability and fitness for a particular purpose of
# the program and the nonexistence of any infringement or violation of
# any right of any third party.
#
# Each user of the program will agree and understand, and be deemed to
# have agreed and understood, that there is no warranty whatsoever for
# the program and, accordingly, the entire risk arising from or
# otherwise connected with the program is assumed by the user.
#
# Therefore, neither ICOT, the copyright holder, or any other
# organization that participated in or was otherwise related to the
# development of the program and their respective officials, directors,
# officers and other employees shall be held liable for any and all
# damages, including, without limitation, general, special, incidental
# and consequential damages, arising out of or otherwise in connection
# with the use or inability to use the program or any product, material
# or result produced or otherwise obtained by using the program,
# regardless of whether they have been advised of, or otherwise had
# knowledge of, the possibility of such damages at any time during the
# project or thereafter. Each user will be deemed to have agreed to the
# foregoing by his or her commencement of use of the program. The term
# "use" as used herein includes, but is not limited to, the use,
# modification, copying and distribution of the program and the
# production of secondary products from the program.
#
# In the case where the program, whether in its original form or
# modified, was distributed or delivered to or received by a user from
# any person, organization or entity other than ICOT, unless it makes or
# grants independently of ICOT any specific warranty to the user in
# writing, such person, organization or entity, will also be exempted
# from and not be held liable to the user for any such damages as noted
# above as far as the program is concerned.
#
# ---------------COPYING.ipadic-----END----------------------------------
3. Lao Word Break Dictionary Data (laodict.txt)
# Copyright (c) 2013 International Business Machines Corporation
# and others. All Rights Reserved.
#
# Project: http://code.google.com/p/lao-dictionary/
# Dictionary: http://lao-dictionary.googlecode.com/git/Lao-Dictionary.txt
# License: http://lao-dictionary.googlecode.com/git/Lao-Dictionary-LICENSE.txt
# (copied below)
#
# This file is derived from the above dictionary, with slight
# modifications.
# ----------------------------------------------------------------------
# Copyright (C) 2013 Brian Eugene Wilson, Robert Martin Campbell.
# All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification,
# are permitted provided that the following conditions are met:
#
#
# Redistributions of source code must retain the above copyright notice, this
# list of conditions and the following disclaimer. Redistributions in
# binary form must reproduce the above copyright notice, this list of
# conditions and the following disclaimer in the documentation and/or
# other materials provided with the distribution.
#
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
# FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
# COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT,
# INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
# (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
# HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
# STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
# OF THE POSSIBILITY OF SUCH DAMAGE.
# --------------------------------------------------------------------------
4. Burmese Word Break Dictionary Data (burmesedict.txt)
# Copyright (c) 2014 International Business Machines Corporation
# and others. All Rights Reserved.
#
# This list is part of a project hosted at:
# github.com/kanyawtech/myanmar-karen-word-lists
#
# --------------------------------------------------------------------------
# Copyright (c) 2013, LeRoy Benjamin Sharon
# All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met: Redistributions of source code must retain the above
# copyright notice, this list of conditions and the following
# disclaimer. Redistributions in binary form must reproduce the
# above copyright notice, this list of conditions and the following
# disclaimer in the documentation and/or other materials provided
# with the distribution.
#
# Neither the name Myanmar Karen Word Lists, nor the names of its
# contributors may be used to endorse or promote products derived
# from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND
# CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES,
# INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
# MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS
# BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
# TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
# ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR
# TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF
# THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
# SUCH DAMAGE.
# --------------------------------------------------------------------------
5. Time Zone Database
ICU uses the public domain data and code derived from Time Zone
Database for its time zone support. The ownership of the TZ database
is explained in BCP 175: Procedure for Maintaining the Time Zone
Database section 7.
# 7. Database Ownership
#
# The TZ database itself is not an IETF Contribution or an IETF
# document. Rather it is a pre-existing and regularly updated work
# that is in the public domain, and is intended to remain in the
# public domain. Therefore, BCPs 78 [RFC5378] and 79 [RFC3979] do
# not apply to the TZ Database or contributions that individuals make
# to it. Should any claims be made and substantiated against the TZ
# Database, the organization that is providing the IANA
# Considerations defined in this RFC, under the memorandum of
# understanding with the IETF, currently ICANN, may act in accordance
# with all competent court orders. No ownership claims will be made
# by ICANN or the IETF Trust on the database or the code. Any person
# making a contribution to the database or code waives all rights to
# future claims in that contribution or in the TZ Database.

View File

@ -1,25 +1,11 @@
## The Unicode Standard, Unicode Character Database, Version 11.0.0
## The Unicode Standard, Unicode Character Database, Version 12.1.0
### Unicode Character Database
```
UNICODE, INC. LICENSE AGREEMENT - DATA FILES AND SOFTWARE
Unicode Data Files include all data files under the directories
http://www.unicode.org/Public/, http://www.unicode.org/reports/,
http://www.unicode.org/cldr/data/,
http://source.icu-project.org/repos/icu/, and
http://www.unicode.org/utility/trac/browser/.
Unicode Data Files do not include PDF online code charts under the
directory http://www.unicode.org/Public/.
Software includes any source code published in the Unicode Standard
or under the directories
http://www.unicode.org/Public/, http://www.unicode.org/reports/,
http://www.unicode.org/cldr/data/,
http://source.icu-project.org/repos/icu/, and
http://www.unicode.org/utility/trac/browser/.
See Terms of Use for definitions of Unicode Inc.'s
Data Files and Software.
NOTICE TO USER: Carefully read the following legal agreement.
BY DOWNLOADING, INSTALLING, COPYING OR OTHERWISE USING UNICODE INC.'S
@ -31,8 +17,8 @@ THE DATA FILES OR SOFTWARE.
COPYRIGHT AND PERMISSION NOTICE
Copyright © 1991-2018 Unicode, Inc. All rights reserved.
Distributed under the Terms of Use in http://www.unicode.org/copyright.html.
Copyright © 1991-2019 Unicode, Inc. All rights reserved.
Distributed under the Terms of Use in https://www.unicode.org/copyright.html.
Permission is hereby granted, free of charge, to any person obtaining
a copy of the Unicode data files and any associated documentation
@ -62,5 +48,3 @@ Except as contained in this notice, the name of a copyright holder
shall not be used in advertising or otherwise to promote the sale,
use or other dealings in these Data Files or Software without prior
written authorization of the copyright holder.
```

View File

@ -1,5 +1,5 @@
/*
* Copyright (c) 2018, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2018, 2019, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@ -23,17 +23,16 @@
/*
* @test
* @bug 8202771
* @bug 8202771 8221431
* @summary Check j.l.Character.isDigit/isLetter/isLetterOrDigit/isSpaceChar
* /isWhitespace/isTitleCase/isISOControl/isIdentifierIgnorable
* /isJavaIdentifierStart/isJavaIdentifierPart/isUnicodeIdentifierStart
* /isUnicodeIdentifierPart
* @library /lib/testlibrary/java/lang
* @run main CharPropTest
*/
import java.nio.file.Files;
import java.nio.file.Path;
import java.nio.file.Paths;
import java.util.stream.Stream;
public class CharPropTest {
@ -42,9 +41,7 @@ public class CharPropTest {
private static boolean isRange = false;
public static void main(String[] args) throws Exception {
Path path = Paths.get(System.getProperty("test.src", "."),
"UnicodeData.txt");
try (Stream<String> lines = Files.lines(path)) {
try (Stream<String> lines = Files.lines(UCDFiles.UNICODE_DATA)) {
lines.map(String::trim)
.filter(line -> line.length() != 0 && line.charAt(0) != '#')
.forEach(line -> handleOneLine(line));

View File

@ -1,5 +1,5 @@
/*
* Copyright (c) 2011, 2015, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2011, 2019, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@ -24,8 +24,9 @@
/**
* @test
* @bug 7037261 7070436 7198195 8032446 8072600
* @bug 7037261 7070436 7198195 8032446 8072600 8221431
* @summary Check j.l.Character.isLowerCase/isUppercase/isAlphabetic/isIdeographic
* @library /lib/testlibrary/java/lang
*/
import java.util.regex.*;
@ -36,7 +37,7 @@ import static java.lang.Character.*;
public class CheckProp {
public static void main(String[] args) throws IOException {
File fPropList = new File(System.getProperty("test.src", "."), "PropList.txt");
File fPropList = UCDFiles.PROP_LIST.toFile();
int i, j;
BufferedReader sbfr = new BufferedReader(new FileReader(fPropList));
Matcher m = Pattern.compile("(\\p{XDigit}+)(?:\\.{2}(\\p{XDigit}+))?\\s*;\\s+(\\w+)\\s+#.*").matcher("");

View File

@ -1,5 +1,5 @@
/*
* Copyright (c) 2010, 2015, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2010, 2019, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@ -23,8 +23,9 @@
/**
* @test
* @bug 6945564 6959267 7033561 7070436 7198195 8032446 8072600
* @bug 6945564 6959267 7033561 7070436 7198195 8032446 8072600 8221431
* @summary Check that the j.l.Character.UnicodeScript
* @library /lib/testlibrary/java/lang
*/
import java.io.*;
@ -38,8 +39,8 @@ public class CheckScript {
File fScripts;
File fAliases;
if (args.length == 0) {
fScripts = new File(System.getProperty("test.src", "."), "Scripts.txt");
fAliases = new File(System.getProperty("test.src", "."), "PropertyValueAliases.txt");
fScripts = UCDFiles.SCRIPTS.toFile();
fAliases = UCDFiles.PROPERTY_VALUE_ALIASES.toFile();
} else if (args.length == 2) {
fScripts = new File(args[0]);
fAliases = new File(args[1]);

View File

@ -1,5 +1,5 @@
/*
* Copyright (c) 2018, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2018, 2019, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@ -25,11 +25,12 @@
/**
* @test
* @bug 4114080 6565620 6959267 7070436 7198195 8032446 8072600
* @bug 4114080 6565620 6959267 7070436 7198195 8032446 8072600 8221431
* @summary Make sure the attributes of Unicode characters, as
* returned by the Character API, are as expected. Do this by
* comparing them to a baseline file together with a list of
* known diffs.
* @library /lib/testlibrary/java/lang
* @build UnicodeSpec CharCheck
* @run main CheckUnicode
* @author Alan Liu
@ -41,40 +42,18 @@ import java.io.*;
public class CheckUnicode {
public static void main(String args[]) throws Exception {
// 1. Check that the dumped property files for planes 0, 1, 2, 3, 14, 15, and 16
// are the same as in the current Character properties.
int[] planes = {0, 1, 2, 3, 14, 15, 16};
String[] fileNames = {"charprop00.bin", "charprop01.bin", "charprop02.bin", "charprop03.bin",
"charprop0E.bin", "charprop0F.bin", "charprop10.bin" };
// Read in the Unicode 4.0 data
for (int x=0; x < planes.length && x < fileNames.length; ++x) {
File unicodeProp = new File(System.getProperty("test.src", "."), fileNames[x]);
ObjectInputStream ois = new ObjectInputStream(new FileInputStream(unicodeProp));
// Find differences -- should be none
int diffs = CharCheck.load(planes[x], ois);
if (diffs != 0) {
throw new RuntimeException("Bug 4114080 - Unicode properties have changed " +
"in an unexpected way");
}
}
// 2. Check that the current 4.0 spec file is handled by the current
// 1. Check that the current 12.1 spec file is handled by the current
// version of Character.
File unicodeSpec = new File(System.getProperty("test.src", "."), "UnicodeData.txt");
for (int x=0; x<planes.length; ++x) {
int diffs = CharCheck.check(planes[x], unicodeSpec);
if (diffs != 0) {
throw new RuntimeException("Bug 4114080 - Unicode properties have changed " +
"in an unexpected way");
}
}
File unicodeSpec = UCDFiles.UNICODE_DATA.toFile();
for (int x = 0; x < 16; ++x) {
int diffs = CharCheck.check(x, unicodeSpec);
if (diffs != 0) {
throw new RuntimeException("Unicode properties have changed " +
"in an unexpected way");
}
}
// 3. Check that Java identifiers are recognized correctly.
// 2. Check that Java identifiers are recognized correctly.
// test a few characters that are good id starts
char[] idStartChar = {'$', '\u20AC', 'a', 'A', 'z', 'Z', '_', '\u0E3F',
'\u1004', '\u10A0', '\u3400', '\u4E00', '\uAC00' };
@ -104,8 +83,5 @@ public class CheckUnicode {
"should not be start characters.");
}
}
}
}

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@ -1,281 +0,0 @@
# SpecialCasing-11.0.0.txt
# Date: 2018-02-22, 06:16:47 GMT
# Copyright (c) 2018 Unicode, Inc.
# Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries.
# For terms of use, see http://www.unicode.org/terms_of_use.html
#
# Unicode Character Database
# For documentation, see http://www.unicode.org/reports/tr44/
#
# Special Casing
#
# This file is a supplement to the UnicodeData.txt file. It does not define any
# properties, but rather provides additional information about the casing of
# Unicode characters, for situations when casing incurs a change in string length
# or is dependent on context or locale. For compatibility, the UnicodeData.txt
# file only contains simple case mappings for characters where they are one-to-one
# and independent of context and language. The data in this file, combined with
# the simple case mappings in UnicodeData.txt, defines the full case mappings
# Lowercase_Mapping (lc), Titlecase_Mapping (tc), and Uppercase_Mapping (uc).
#
# Note that the preferred mechanism for defining tailored casing operations is
# the Unicode Common Locale Data Repository (CLDR). For more information, see the
# discussion of case mappings and case algorithms in the Unicode Standard.
#
# All code points not listed in this file that do not have a simple case mappings
# in UnicodeData.txt map to themselves.
# ================================================================================
# Format
# ================================================================================
# The entries in this file are in the following machine-readable format:
#
# <code>; <lower>; <title>; <upper>; (<condition_list>;)? # <comment>
#
# <code>, <lower>, <title>, and <upper> provide the respective full case mappings
# of <code>, expressed as character values in hex. If there is more than one character,
# they are separated by spaces. Other than as used to separate elements, spaces are
# to be ignored.
#
# The <condition_list> is optional. Where present, it consists of one or more language IDs
# or casing contexts, separated by spaces. In these conditions:
# - A condition list overrides the normal behavior if all of the listed conditions are true.
# - The casing context is always the context of the characters in the original string,
# NOT in the resulting string.
# - Case distinctions in the condition list are not significant.
# - Conditions preceded by "Not_" represent the negation of the condition.
# The condition list is not represented in the UCD as a formal property.
#
# A language ID is defined by BCP 47, with '-' and '_' treated equivalently.
#
# A casing context for a character is defined by Section 3.13 Default Case Algorithms
# of The Unicode Standard.
#
# Parsers of this file must be prepared to deal with future additions to this format:
# * Additional contexts
# * Additional fields
# ================================================================================
# ================================================================================
# Unconditional mappings
# ================================================================================
# The German es-zed is special--the normal mapping is to SS.
# Note: the titlecase should never occur in practice. It is equal to titlecase(uppercase(<es-zed>))
00DF; 00DF; 0053 0073; 0053 0053; # LATIN SMALL LETTER SHARP S
# Preserve canonical equivalence for I with dot. Turkic is handled below.
0130; 0069 0307; 0130; 0130; # LATIN CAPITAL LETTER I WITH DOT ABOVE
# Ligatures
FB00; FB00; 0046 0066; 0046 0046; # LATIN SMALL LIGATURE FF
FB01; FB01; 0046 0069; 0046 0049; # LATIN SMALL LIGATURE FI
FB02; FB02; 0046 006C; 0046 004C; # LATIN SMALL LIGATURE FL
FB03; FB03; 0046 0066 0069; 0046 0046 0049; # LATIN SMALL LIGATURE FFI
FB04; FB04; 0046 0066 006C; 0046 0046 004C; # LATIN SMALL LIGATURE FFL
FB05; FB05; 0053 0074; 0053 0054; # LATIN SMALL LIGATURE LONG S T
FB06; FB06; 0053 0074; 0053 0054; # LATIN SMALL LIGATURE ST
0587; 0587; 0535 0582; 0535 0552; # ARMENIAN SMALL LIGATURE ECH YIWN
FB13; FB13; 0544 0576; 0544 0546; # ARMENIAN SMALL LIGATURE MEN NOW
FB14; FB14; 0544 0565; 0544 0535; # ARMENIAN SMALL LIGATURE MEN ECH
FB15; FB15; 0544 056B; 0544 053B; # ARMENIAN SMALL LIGATURE MEN INI
FB16; FB16; 054E 0576; 054E 0546; # ARMENIAN SMALL LIGATURE VEW NOW
FB17; FB17; 0544 056D; 0544 053D; # ARMENIAN SMALL LIGATURE MEN XEH
# No corresponding uppercase precomposed character
0149; 0149; 02BC 004E; 02BC 004E; # LATIN SMALL LETTER N PRECEDED BY APOSTROPHE
0390; 0390; 0399 0308 0301; 0399 0308 0301; # GREEK SMALL LETTER IOTA WITH DIALYTIKA AND TONOS
03B0; 03B0; 03A5 0308 0301; 03A5 0308 0301; # GREEK SMALL LETTER UPSILON WITH DIALYTIKA AND TONOS
01F0; 01F0; 004A 030C; 004A 030C; # LATIN SMALL LETTER J WITH CARON
1E96; 1E96; 0048 0331; 0048 0331; # LATIN SMALL LETTER H WITH LINE BELOW
1E97; 1E97; 0054 0308; 0054 0308; # LATIN SMALL LETTER T WITH DIAERESIS
1E98; 1E98; 0057 030A; 0057 030A; # LATIN SMALL LETTER W WITH RING ABOVE
1E99; 1E99; 0059 030A; 0059 030A; # LATIN SMALL LETTER Y WITH RING ABOVE
1E9A; 1E9A; 0041 02BE; 0041 02BE; # LATIN SMALL LETTER A WITH RIGHT HALF RING
1F50; 1F50; 03A5 0313; 03A5 0313; # GREEK SMALL LETTER UPSILON WITH PSILI
1F52; 1F52; 03A5 0313 0300; 03A5 0313 0300; # GREEK SMALL LETTER UPSILON WITH PSILI AND VARIA
1F54; 1F54; 03A5 0313 0301; 03A5 0313 0301; # GREEK SMALL LETTER UPSILON WITH PSILI AND OXIA
1F56; 1F56; 03A5 0313 0342; 03A5 0313 0342; # GREEK SMALL LETTER UPSILON WITH PSILI AND PERISPOMENI
1FB6; 1FB6; 0391 0342; 0391 0342; # GREEK SMALL LETTER ALPHA WITH PERISPOMENI
1FC6; 1FC6; 0397 0342; 0397 0342; # GREEK SMALL LETTER ETA WITH PERISPOMENI
1FD2; 1FD2; 0399 0308 0300; 0399 0308 0300; # GREEK SMALL LETTER IOTA WITH DIALYTIKA AND VARIA
1FD3; 1FD3; 0399 0308 0301; 0399 0308 0301; # GREEK SMALL LETTER IOTA WITH DIALYTIKA AND OXIA
1FD6; 1FD6; 0399 0342; 0399 0342; # GREEK SMALL LETTER IOTA WITH PERISPOMENI
1FD7; 1FD7; 0399 0308 0342; 0399 0308 0342; # GREEK SMALL LETTER IOTA WITH DIALYTIKA AND PERISPOMENI
1FE2; 1FE2; 03A5 0308 0300; 03A5 0308 0300; # GREEK SMALL LETTER UPSILON WITH DIALYTIKA AND VARIA
1FE3; 1FE3; 03A5 0308 0301; 03A5 0308 0301; # GREEK SMALL LETTER UPSILON WITH DIALYTIKA AND OXIA
1FE4; 1FE4; 03A1 0313; 03A1 0313; # GREEK SMALL LETTER RHO WITH PSILI
1FE6; 1FE6; 03A5 0342; 03A5 0342; # GREEK SMALL LETTER UPSILON WITH PERISPOMENI
1FE7; 1FE7; 03A5 0308 0342; 03A5 0308 0342; # GREEK SMALL LETTER UPSILON WITH DIALYTIKA AND PERISPOMENI
1FF6; 1FF6; 03A9 0342; 03A9 0342; # GREEK SMALL LETTER OMEGA WITH PERISPOMENI
# IMPORTANT-when iota-subscript (0345) is uppercased or titlecased,
# the result will be incorrect unless the iota-subscript is moved to the end
# of any sequence of combining marks. Otherwise, the accents will go on the capital iota.
# This process can be achieved by first transforming the text to NFC before casing.
# E.g. <alpha><iota_subscript><acute> is uppercased to <ALPHA><acute><IOTA>
# The following cases are already in the UnicodeData.txt file, so are only commented here.
# 0345; 0345; 0399; 0399; # COMBINING GREEK YPOGEGRAMMENI
# All letters with YPOGEGRAMMENI (iota-subscript) or PROSGEGRAMMENI (iota adscript)
# have special uppercases.
# Note: characters with PROSGEGRAMMENI are actually titlecase, not uppercase!
1F80; 1F80; 1F88; 1F08 0399; # GREEK SMALL LETTER ALPHA WITH PSILI AND YPOGEGRAMMENI
1F81; 1F81; 1F89; 1F09 0399; # GREEK SMALL LETTER ALPHA WITH DASIA AND YPOGEGRAMMENI
1F82; 1F82; 1F8A; 1F0A 0399; # GREEK SMALL LETTER ALPHA WITH PSILI AND VARIA AND YPOGEGRAMMENI
1F83; 1F83; 1F8B; 1F0B 0399; # GREEK SMALL LETTER ALPHA WITH DASIA AND VARIA AND YPOGEGRAMMENI
1F84; 1F84; 1F8C; 1F0C 0399; # GREEK SMALL LETTER ALPHA WITH PSILI AND OXIA AND YPOGEGRAMMENI
1F85; 1F85; 1F8D; 1F0D 0399; # GREEK SMALL LETTER ALPHA WITH DASIA AND OXIA AND YPOGEGRAMMENI
1F86; 1F86; 1F8E; 1F0E 0399; # GREEK SMALL LETTER ALPHA WITH PSILI AND PERISPOMENI AND YPOGEGRAMMENI
1F87; 1F87; 1F8F; 1F0F 0399; # GREEK SMALL LETTER ALPHA WITH DASIA AND PERISPOMENI AND YPOGEGRAMMENI
1F88; 1F80; 1F88; 1F08 0399; # GREEK CAPITAL LETTER ALPHA WITH PSILI AND PROSGEGRAMMENI
1F89; 1F81; 1F89; 1F09 0399; # GREEK CAPITAL LETTER ALPHA WITH DASIA AND PROSGEGRAMMENI
1F8A; 1F82; 1F8A; 1F0A 0399; # GREEK CAPITAL LETTER ALPHA WITH PSILI AND VARIA AND PROSGEGRAMMENI
1F8B; 1F83; 1F8B; 1F0B 0399; # GREEK CAPITAL LETTER ALPHA WITH DASIA AND VARIA AND PROSGEGRAMMENI
1F8C; 1F84; 1F8C; 1F0C 0399; # GREEK CAPITAL LETTER ALPHA WITH PSILI AND OXIA AND PROSGEGRAMMENI
1F8D; 1F85; 1F8D; 1F0D 0399; # GREEK CAPITAL LETTER ALPHA WITH DASIA AND OXIA AND PROSGEGRAMMENI
1F8E; 1F86; 1F8E; 1F0E 0399; # GREEK CAPITAL LETTER ALPHA WITH PSILI AND PERISPOMENI AND PROSGEGRAMMENI
1F8F; 1F87; 1F8F; 1F0F 0399; # GREEK CAPITAL LETTER ALPHA WITH DASIA AND PERISPOMENI AND PROSGEGRAMMENI
1F90; 1F90; 1F98; 1F28 0399; # GREEK SMALL LETTER ETA WITH PSILI AND YPOGEGRAMMENI
1F91; 1F91; 1F99; 1F29 0399; # GREEK SMALL LETTER ETA WITH DASIA AND YPOGEGRAMMENI
1F92; 1F92; 1F9A; 1F2A 0399; # GREEK SMALL LETTER ETA WITH PSILI AND VARIA AND YPOGEGRAMMENI
1F93; 1F93; 1F9B; 1F2B 0399; # GREEK SMALL LETTER ETA WITH DASIA AND VARIA AND YPOGEGRAMMENI
1F94; 1F94; 1F9C; 1F2C 0399; # GREEK SMALL LETTER ETA WITH PSILI AND OXIA AND YPOGEGRAMMENI
1F95; 1F95; 1F9D; 1F2D 0399; # GREEK SMALL LETTER ETA WITH DASIA AND OXIA AND YPOGEGRAMMENI
1F96; 1F96; 1F9E; 1F2E 0399; # GREEK SMALL LETTER ETA WITH PSILI AND PERISPOMENI AND YPOGEGRAMMENI
1F97; 1F97; 1F9F; 1F2F 0399; # GREEK SMALL LETTER ETA WITH DASIA AND PERISPOMENI AND YPOGEGRAMMENI
1F98; 1F90; 1F98; 1F28 0399; # GREEK CAPITAL LETTER ETA WITH PSILI AND PROSGEGRAMMENI
1F99; 1F91; 1F99; 1F29 0399; # GREEK CAPITAL LETTER ETA WITH DASIA AND PROSGEGRAMMENI
1F9A; 1F92; 1F9A; 1F2A 0399; # GREEK CAPITAL LETTER ETA WITH PSILI AND VARIA AND PROSGEGRAMMENI
1F9B; 1F93; 1F9B; 1F2B 0399; # GREEK CAPITAL LETTER ETA WITH DASIA AND VARIA AND PROSGEGRAMMENI
1F9C; 1F94; 1F9C; 1F2C 0399; # GREEK CAPITAL LETTER ETA WITH PSILI AND OXIA AND PROSGEGRAMMENI
1F9D; 1F95; 1F9D; 1F2D 0399; # GREEK CAPITAL LETTER ETA WITH DASIA AND OXIA AND PROSGEGRAMMENI
1F9E; 1F96; 1F9E; 1F2E 0399; # GREEK CAPITAL LETTER ETA WITH PSILI AND PERISPOMENI AND PROSGEGRAMMENI
1F9F; 1F97; 1F9F; 1F2F 0399; # GREEK CAPITAL LETTER ETA WITH DASIA AND PERISPOMENI AND PROSGEGRAMMENI
1FA0; 1FA0; 1FA8; 1F68 0399; # GREEK SMALL LETTER OMEGA WITH PSILI AND YPOGEGRAMMENI
1FA1; 1FA1; 1FA9; 1F69 0399; # GREEK SMALL LETTER OMEGA WITH DASIA AND YPOGEGRAMMENI
1FA2; 1FA2; 1FAA; 1F6A 0399; # GREEK SMALL LETTER OMEGA WITH PSILI AND VARIA AND YPOGEGRAMMENI
1FA3; 1FA3; 1FAB; 1F6B 0399; # GREEK SMALL LETTER OMEGA WITH DASIA AND VARIA AND YPOGEGRAMMENI
1FA4; 1FA4; 1FAC; 1F6C 0399; # GREEK SMALL LETTER OMEGA WITH PSILI AND OXIA AND YPOGEGRAMMENI
1FA5; 1FA5; 1FAD; 1F6D 0399; # GREEK SMALL LETTER OMEGA WITH DASIA AND OXIA AND YPOGEGRAMMENI
1FA6; 1FA6; 1FAE; 1F6E 0399; # GREEK SMALL LETTER OMEGA WITH PSILI AND PERISPOMENI AND YPOGEGRAMMENI
1FA7; 1FA7; 1FAF; 1F6F 0399; # GREEK SMALL LETTER OMEGA WITH DASIA AND PERISPOMENI AND YPOGEGRAMMENI
1FA8; 1FA0; 1FA8; 1F68 0399; # GREEK CAPITAL LETTER OMEGA WITH PSILI AND PROSGEGRAMMENI
1FA9; 1FA1; 1FA9; 1F69 0399; # GREEK CAPITAL LETTER OMEGA WITH DASIA AND PROSGEGRAMMENI
1FAA; 1FA2; 1FAA; 1F6A 0399; # GREEK CAPITAL LETTER OMEGA WITH PSILI AND VARIA AND PROSGEGRAMMENI
1FAB; 1FA3; 1FAB; 1F6B 0399; # GREEK CAPITAL LETTER OMEGA WITH DASIA AND VARIA AND PROSGEGRAMMENI
1FAC; 1FA4; 1FAC; 1F6C 0399; # GREEK CAPITAL LETTER OMEGA WITH PSILI AND OXIA AND PROSGEGRAMMENI
1FAD; 1FA5; 1FAD; 1F6D 0399; # GREEK CAPITAL LETTER OMEGA WITH DASIA AND OXIA AND PROSGEGRAMMENI
1FAE; 1FA6; 1FAE; 1F6E 0399; # GREEK CAPITAL LETTER OMEGA WITH PSILI AND PERISPOMENI AND PROSGEGRAMMENI
1FAF; 1FA7; 1FAF; 1F6F 0399; # GREEK CAPITAL LETTER OMEGA WITH DASIA AND PERISPOMENI AND PROSGEGRAMMENI
1FB3; 1FB3; 1FBC; 0391 0399; # GREEK SMALL LETTER ALPHA WITH YPOGEGRAMMENI
1FBC; 1FB3; 1FBC; 0391 0399; # GREEK CAPITAL LETTER ALPHA WITH PROSGEGRAMMENI
1FC3; 1FC3; 1FCC; 0397 0399; # GREEK SMALL LETTER ETA WITH YPOGEGRAMMENI
1FCC; 1FC3; 1FCC; 0397 0399; # GREEK CAPITAL LETTER ETA WITH PROSGEGRAMMENI
1FF3; 1FF3; 1FFC; 03A9 0399; # GREEK SMALL LETTER OMEGA WITH YPOGEGRAMMENI
1FFC; 1FF3; 1FFC; 03A9 0399; # GREEK CAPITAL LETTER OMEGA WITH PROSGEGRAMMENI
# Some characters with YPOGEGRAMMENI also have no corresponding titlecases
1FB2; 1FB2; 1FBA 0345; 1FBA 0399; # GREEK SMALL LETTER ALPHA WITH VARIA AND YPOGEGRAMMENI
1FB4; 1FB4; 0386 0345; 0386 0399; # GREEK SMALL LETTER ALPHA WITH OXIA AND YPOGEGRAMMENI
1FC2; 1FC2; 1FCA 0345; 1FCA 0399; # GREEK SMALL LETTER ETA WITH VARIA AND YPOGEGRAMMENI
1FC4; 1FC4; 0389 0345; 0389 0399; # GREEK SMALL LETTER ETA WITH OXIA AND YPOGEGRAMMENI
1FF2; 1FF2; 1FFA 0345; 1FFA 0399; # GREEK SMALL LETTER OMEGA WITH VARIA AND YPOGEGRAMMENI
1FF4; 1FF4; 038F 0345; 038F 0399; # GREEK SMALL LETTER OMEGA WITH OXIA AND YPOGEGRAMMENI
1FB7; 1FB7; 0391 0342 0345; 0391 0342 0399; # GREEK SMALL LETTER ALPHA WITH PERISPOMENI AND YPOGEGRAMMENI
1FC7; 1FC7; 0397 0342 0345; 0397 0342 0399; # GREEK SMALL LETTER ETA WITH PERISPOMENI AND YPOGEGRAMMENI
1FF7; 1FF7; 03A9 0342 0345; 03A9 0342 0399; # GREEK SMALL LETTER OMEGA WITH PERISPOMENI AND YPOGEGRAMMENI
# ================================================================================
# Conditional Mappings
# The remainder of this file provides conditional casing data used to produce
# full case mappings.
# ================================================================================
# Language-Insensitive Mappings
# These are characters whose full case mappings do not depend on language, but do
# depend on context (which characters come before or after). For more information
# see the header of this file and the Unicode Standard.
# ================================================================================
# Special case for final form of sigma
03A3; 03C2; 03A3; 03A3; Final_Sigma; # GREEK CAPITAL LETTER SIGMA
# Note: the following cases for non-final are already in the UnicodeData.txt file.
# 03A3; 03C3; 03A3; 03A3; # GREEK CAPITAL LETTER SIGMA
# 03C3; 03C3; 03A3; 03A3; # GREEK SMALL LETTER SIGMA
# 03C2; 03C2; 03A3; 03A3; # GREEK SMALL LETTER FINAL SIGMA
# Note: the following cases are not included, since they would case-fold in lowercasing
# 03C3; 03C2; 03A3; 03A3; Final_Sigma; # GREEK SMALL LETTER SIGMA
# 03C2; 03C3; 03A3; 03A3; Not_Final_Sigma; # GREEK SMALL LETTER FINAL SIGMA
# ================================================================================
# Language-Sensitive Mappings
# These are characters whose full case mappings depend on language and perhaps also
# context (which characters come before or after). For more information
# see the header of this file and the Unicode Standard.
# ================================================================================
# Lithuanian
# Lithuanian retains the dot in a lowercase i when followed by accents.
# Remove DOT ABOVE after "i" with upper or titlecase
0307; 0307; ; ; lt After_Soft_Dotted; # COMBINING DOT ABOVE
# Introduce an explicit dot above when lowercasing capital I's and J's
# whenever there are more accents above.
# (of the accents used in Lithuanian: grave, acute, tilde above, and ogonek)
0049; 0069 0307; 0049; 0049; lt More_Above; # LATIN CAPITAL LETTER I
004A; 006A 0307; 004A; 004A; lt More_Above; # LATIN CAPITAL LETTER J
012E; 012F 0307; 012E; 012E; lt More_Above; # LATIN CAPITAL LETTER I WITH OGONEK
00CC; 0069 0307 0300; 00CC; 00CC; lt; # LATIN CAPITAL LETTER I WITH GRAVE
00CD; 0069 0307 0301; 00CD; 00CD; lt; # LATIN CAPITAL LETTER I WITH ACUTE
0128; 0069 0307 0303; 0128; 0128; lt; # LATIN CAPITAL LETTER I WITH TILDE
# ================================================================================
# Turkish and Azeri
# I and i-dotless; I-dot and i are case pairs in Turkish and Azeri
# The following rules handle those cases.
0130; 0069; 0130; 0130; tr; # LATIN CAPITAL LETTER I WITH DOT ABOVE
0130; 0069; 0130; 0130; az; # LATIN CAPITAL LETTER I WITH DOT ABOVE
# When lowercasing, remove dot_above in the sequence I + dot_above, which will turn into i.
# This matches the behavior of the canonically equivalent I-dot_above
0307; ; 0307; 0307; tr After_I; # COMBINING DOT ABOVE
0307; ; 0307; 0307; az After_I; # COMBINING DOT ABOVE
# When lowercasing, unless an I is before a dot_above, it turns into a dotless i.
0049; 0131; 0049; 0049; tr Not_Before_Dot; # LATIN CAPITAL LETTER I
0049; 0131; 0049; 0049; az Not_Before_Dot; # LATIN CAPITAL LETTER I
# When uppercasing, i turns into a dotted capital I
0069; 0069; 0130; 0130; tr; # LATIN SMALL LETTER I
0069; 0069; 0130; 0130; az; # LATIN SMALL LETTER I
# Note: the following case is already in the UnicodeData.txt file.
# 0131; 0131; 0049; 0049; tr; # LATIN SMALL LETTER DOTLESS I
# EOF

View File

@ -1,5 +1,5 @@
/*
* Copyright (c) 2007, 2018, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2007, 2019, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@ -26,7 +26,10 @@
/*
* @test
* @bug 4830803 4886934 6565620 6959267 7070436 7198195 8032446 8072600 8202771
* @summary Check that the UnicodeBlock forName() method works as expected and block ranges are correct for all Unicode characters.
* 8221431
* @summary Check that the UnicodeBlock forName() method works as expected
* and block ranges are correct for all Unicode characters.
* @library /lib/testlibrary/java/lang
* @run main CheckBlocks
* @author John O'Conner
*/
@ -276,8 +279,7 @@ public class CheckBlocks {
public static HashSet<Block> blocks = new HashSet<>();
private static void generateBlockList() throws Exception {
File blockData = new File(System.getProperty("test.src", "."),
"Blocks.txt");
File blockData = UCDFiles.BLOCKS.toFile();
try (BufferedReader f = new BufferedReader(new FileReader(blockData))) {
String line;
while ((line = f.readLine()) != null) {

View File

@ -1,5 +1,5 @@
/*
* Copyright (c) 2015, 2018 Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2015, 2019 Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@ -23,7 +23,7 @@
/**
* @test
* @bug 8080535 8191410 8215194
* @bug 8080535 8191410 8215194 8221431
* @summary Expected size of Character.UnicodeBlock.map is not optimal
* @library /test/lib
* @modules java.base/java.lang:open
@ -46,13 +46,14 @@ import jdk.test.lib.util.OptimalCapacity;
// be 638 entries in Character.UnicodeBlock.map.
//
// As of Unicode 11, 667 entries are expected.
// As of Unicode 12.1, 676 entries are expected.
//
// Initialization of the map and this test will have to be adjusted
// accordingly then.
//
// Note that HashMap's implementation aligns the initial capacity to
// a power of two size, so it will end up 1024 (and thus succeed) in
// cases, such as 638 and 667.
// cases, such as 638, 667, and 676.
public class OptimalMapSize {
public static void main(String[] args) throws Throwable {
@ -61,7 +62,7 @@ public class OptimalMapSize {
Field f = Character.UnicodeBlock.class.getDeclaredField("NUM_ENTITIES");
f.setAccessible(true);
int num_entities = f.getInt(null);
assert num_entities == 667;
assert num_entities == 676;
int initialCapacity = (int)(num_entities / 0.75f + 1.0f);
OptimalCapacity.ofHashMap(Character.UnicodeBlock.class,

View File

@ -1,5 +1,5 @@
/*
* Copyright (c) 2018, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2018, 2019, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@ -25,8 +25,9 @@
/*
* @test
* @bug 4397357 6565620 6959267 8032446 8072600
* @bug 4397357 6565620 6959267 8032446 8072600 8221431
* @summary Confirm normal case mappings are handled correctly.
* @library /lib/testlibrary/java/lang
* @run main/timeout=200 UnicodeCasingTest
*/
@ -60,8 +61,7 @@ public class UnicodeCasingTest {
BufferedReader in = null;
try {
File file = new File(System.getProperty("test.src", "."),
"UnicodeData.txt");
File file = UCDFiles.UNICODE_DATA.toFile();
int locale_num = locales.size();
for (int l = 0; l < locale_num; l++) {

File diff suppressed because it is too large Load Diff

View File

@ -1,5 +1,5 @@
/*
* Copyright (c) 2018, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2018, 2019, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@ -26,7 +26,9 @@
/*
* @test
* @bug 4397357 6565620 6959267 7070436 7198195 8041791 8032446 8072600
* 8221431
* @summary Confirm special case mappings are handled correctly.
* @library /lib/testlibrary/java/lang
*/
import java.io.BufferedReader;
@ -137,8 +139,7 @@ public class SpecialCasingTest {
} else {
specificLocale = false;
}
in = Files.newBufferedReader(Paths.get(System.getProperty("test.src.path"), "..", "/Character/SpecialCasing.txt")
.toRealPath());
in = Files.newBufferedReader(UCDFiles.SPECIAL_CASING.toRealPath());
String line;
while ((line = in.readLine()) != null) {
if (line.length() == 0 || line.charAt(0) == '#') {

View File

@ -1,5 +1,5 @@
/*
* Copyright (c) 2018, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2018, 2019, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@ -25,8 +25,9 @@
/*
* @test
* @bug 4397357 6565620 6959267 7070436 7198195 8032446 8072600
* @bug 4397357 6565620 6959267 7070436 7198195 8032446 8072600 8221431
* @summary Confirm normal case mappings are handled correctly.
* @library /lib/testlibrary/java/lang
* @run main/timeout=200 UnicodeCasingTest
*/
@ -70,8 +71,7 @@ public class UnicodeCasingTest {
// First, we create exlude lists of characters whose mappings exist
// in SpecialCasing.txt and mapping rules in UnicodeData.txt aren't
// applicable.
in = Files.newBufferedReader(Paths.get(System.getProperty("test.src.path"), "..", "/Character/SpecialCasing.txt")
.toRealPath());
in = Files.newBufferedReader(UCDFiles.SPECIAL_CASING.toRealPath());
String line;
while ((line = in.readLine()) != null) {
if (line.length() == 0 || line.charAt(0) == '#') {
@ -88,8 +88,7 @@ public class UnicodeCasingTest {
defaultLang = locale.getLanguage();
// System.out.println("Testing on " + locale + " locale....");
System.err.println("Testing on " + locale + " locale....");
in = Files.newBufferedReader(Paths.get(System.getProperty("test.src.path"), "..", "/Character/UnicodeData.txt")
.toRealPath());
in = Files.newBufferedReader(UCDFiles.UNICODE_DATA.toRealPath());
while ((line = in.readLine()) != null) {
if (line.length() == 0 || line.charAt(0) == '#') {
continue;

View File

@ -0,0 +1,529 @@
/*
* Copyright (c) 2019, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License version 2 only, as
* published by the Free Software Foundation.
*
* This code is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
* version 2 for more details (a copy is included in the LICENSE file that
* accompanied this code).
*
* You should have received a copy of the GNU General Public License version
* 2 along with this work; if not, write to the Free Software Foundation,
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
*
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
* or visit www.oracle.com if you need additional information or have any
* questions.
*/
/*
* @test
* @bug 4221795 6565620 6959267 7070436 7198195 8032446 8221431
* @summary Confirm Normalizer's fundamental behavior
* @library /lib/testlibrary/java/lang
* @modules java.base/sun.text java.base/sun.text.normalizer
* @compile -XDignore.symbol.file ConformanceTest.java
* @run main/timeout=3000 ConformanceTest
*/
import java.io.BufferedReader;
import java.io.File;
import java.io.FileInputStream;
import java.io.InputStreamReader;
import java.lang.reflect.Method;
import java.nio.charset.Charset;
import java.nio.charset.CharsetDecoder;
import java.util.BitSet;
import java.util.StringTokenizer;
import sun.text.normalizer.NormalizerBase;
import sun.text.normalizer.NormalizerImpl;
/*
* Conformance test for java.text.Normalizer and sun.text.Normalizer.
*/
public class ConformanceTest {
//
// Options to be used with sun.text.Normalizer
//
/*
* Default Unicode 3.2.0 normalization. (Provided for IDNA/StringPrep)
*
* - Without Corrigendum 4 fix
* (Different from ICU4J 3.2's Normalizer.)
* - Without Public Review Issue #29 fix
* (Different from ICU4J 3.2's Normalizer.)
*/
private static final int UNICODE_3_2_0 = sun.text.Normalizer.UNICODE_3_2;
/*
* Original Unicode 3.2.0 normalization. (Provided for testing only)
*
* - With Corrigendum 4 fix
* - With Public Revilew Issue #29 fix
*/
private static final int UNICODE_3_2_0_ORIGINAL =
NormalizerBase.UNICODE_3_2;
/*
* Default normalization. In JDK 6,
* - Unicode 4.0.0
* - With Corrigendum 4 fix
* - Without Public Review Issue #29 fix
*
* In JDK 7,
* - Unicode 5.1.0
* (Different from ICU4J 3.2's Normalizer.)
* - With Corrigendum 4 fix
* - With Public Review Issue #29 fix
*
* In JDK 8,
* - Unicode 6.1.0
* - With Corrigendum 4 fix
* - With Public Review Issue #29 fix
*
* When we support Unicode 4.1.0 or later, we need to do normalization
* with Public Review Issue #29 fix. For more details of PRI #29, see
* http://unicode.org/review/pr-29.html .
*/
private static final int UNICODE_LATEST = NormalizerBase.UNICODE_LATEST;
//
// Conformance test datafiles
//
/*
* Conformance test datafile for Unicode 3.2.0 with Corrigendum4
* corrections.
* This testdata is for sun.text.Normalize(UNICODE_3_2)
*
* This is NOT an original Conformace test data. Some inconvenient test
* cases are commented out. About corrigendum 4, please refer
* http://www.unicode.org/review/resolved-pri.html#pri29
*
*/
static final String DATA_3_2_0_CORRIGENDUM =
"NormalizationTest-3.2.0.Corrigendum4.txt";
/*
* Conformance test datafile for Unicode 3.2.0 without Corrigendum4
* corrections. This is the original Conformace test data.
*
* This testdata is for sun.text.Normalize(UNICODE_3_2_IDNA)
*/
static final String DATA_3_2_0 = "NormalizationTest-3.2.0.txt";
/*
* Conformance test datafile for the latest Unicode which is supported
* by J2SE.
* Unicode 4.0.0 is the latest version in JDK 5.0 and JDK 6. Unicode 5.1.0
* in JDK 7, and 6.1.0 in JDK 8. This Unicode can be used via both
* java.text.Normalizer and sun.text.Normalizer.
*
* This testdata is for sun.text.Normalize(UNICODE_LATEST)
*/
static final String DATA_LATEST = "NormalizationTest-Latest.txt";
/*
* Conformance test datafile in ICU4J 3.2.
*/
static final String DATA_ICU = "ICUNormalizationTest.txt";
/*
* Decorder
*/
static final CharsetDecoder decoder = Charset.forName("UTF-8").newDecoder();
/*
* List to pick up characters which are not listed in Part1
*/
static BitSet charList = new BitSet(Character.MAX_CODE_POINT+1);
/*
* Shortcuts
*/
private static final java.text.Normalizer.Form NFC =
java.text.Normalizer.Form.NFC;
private static final java.text.Normalizer.Form NFD =
java.text.Normalizer.Form.NFD;
private static final java.text.Normalizer.Form NFKC =
java.text.Normalizer.Form.NFKC;
private static final java.text.Normalizer.Form NFKD =
java.text.Normalizer.Form.NFKD;
static final java.text.Normalizer.Form[] forms = {NFC, NFD, NFKC, NFKD};
static TestNormalizer normalizer;
public static void main(String[] args) throws Exception {
ConformanceTest ct = new ConformanceTest();
ct.test();
}
void test() throws Exception {
normalizer = new testJavaNormalizer();
test(DATA_LATEST, UNICODE_LATEST);
normalizer = new testSunNormalizer();
test(DATA_3_2_0_CORRIGENDUM, UNICODE_3_2_0);
test(DATA_LATEST, UNICODE_LATEST);
test(DATA_ICU, UNICODE_LATEST);
/* Unconformity test */
// test(DATA_3_2_0, UNICODE_LATEST);
// test(DATA_LATEST, UNICODE_3_2_0);
}
/*
* Main routine of conformance test
*/
private static void test(String filename, int unicodeVer) throws Exception {
File f = filename.equals(DATA_LATEST) ?
UCDFiles.NORMALIZATION_TEST.toFile() :
new File(System.getProperty("test.src", "."), filename);
FileInputStream fis = new FileInputStream(f);
BufferedReader in =
new BufferedReader(new InputStreamReader(fis, decoder));
System.out.println("\nStart testing for " + normalizer.name +
" with " + filename + " for options: " +
(((unicodeVer & NormalizerBase.UNICODE_3_2) != 0) ?
"Unicode 3.2.0" : "the latest Unicode"));
int lineNo = 0;
String text;
boolean part1test = false;
boolean part1testExists = false;
String[] columns = new String[6];
while ((text = in.readLine()) != null) {
lineNo ++;
char c = text.charAt(0);
if (c == '#') {
continue;
} else if (c == '@') {
if (text.startsWith("@Part")) {
System.out.println("# Testing data in " + text);
if (text.startsWith("@Part1 ")) {
part1test = true;
part1testExists = true;
} else {
part1test = false;
}
continue;
}
}
prepareColumns(columns, text, filename, lineNo, part1test);
testNFC(columns, unicodeVer, filename, lineNo);
testNFD(columns, unicodeVer, filename, lineNo);
testNFKC(columns, unicodeVer, filename, lineNo);
testNFKD(columns, unicodeVer, filename, lineNo);
}
in.close();
fis.close();
if (part1testExists) {
System.out.println("# Testing characters which are not listed in Part1");
testRemainingChars(filename, unicodeVer);
part1testExists = false;
}
}
/*
* Test for NFC
*
* c2 == NFC(c1) == NFC(c2) == NFC(c3)
* c4 == NFC(c4) == NFC(c5)
*/
private static void testNFC(String[] c, int unicodeVer,
String file, int line) throws Exception {
test(2, c, 1, 3, NFC, unicodeVer, file, line);
test(4, c, 4, 5, NFC, unicodeVer, file, line);
}
/*
* Test for NFD
*
* c3 == NFD(c1) == NFD(c2) == NFD(c3)
* c5 == NFD(c4) == NFD(c5)
*/
private static void testNFD(String[] c, int unicodeVer,
String file, int line) throws Exception {
test(3, c, 1, 3, NFD, unicodeVer, file, line);
test(5, c, 4, 5, NFD, unicodeVer, file, line);
}
/*
* Test for NFKC
*
* c4 == NFKC(c1) == NFKC(c2) == NFKC(c3) == NFKC(c4) == NFKC(c5)
*/
private static void testNFKC(String[] c, int unicodeVer,
String file, int line) throws Exception {
test(4, c, 1, 5, NFKC, unicodeVer, file, line);
}
/*
* Test for NFKD
*
* c5 == NFKD(c1) == NFKD(c2) == NFKD(c3) == NFKD(c4) == NFKD(c5)
*/
private static void testNFKD(String[] c, int unicodeVer,
String file, int line) throws Exception {
test(5, c, 1, 5, NFKD, unicodeVer, file, line);
}
/*
* Test for characters which aren't listed in Part1
*
* X == NFC(X) == NFD(X) == NFKC(X) == NFKD(X)
*/
private static void testRemainingChars(String file,
int unicodeVer) throws Exception {
for (int i = Character.MIN_CODE_POINT;
i <= Character.MAX_CODE_POINT;
i++) {
if (!charList.get(i)) {
String from = String.valueOf(Character.toChars(i));
String to;
for (int j = 0; j < forms.length; j++) {
java.text.Normalizer.Form form = forms[j];
to = normalizer.normalize(from, form, unicodeVer);
if (!from.equals(to)) {
error(form, from, from, to, file, -1);
// } else {
// okay(form, from, from, to, file, -1);
}
if (!normalizer.isNormalized(from, form, unicodeVer)) {
error(form, from, file, -1);
// } else {
// okay(form, from, file, -1);
}
}
}
}
}
/*
* Test normalize() and isNormalized()
*/
private static void test(int col, String[] c,
int FROM, int TO,
java.text.Normalizer.Form form, int unicodeVer,
String file, int line) throws Exception {
for (int i = FROM; i <= TO; i++) {
String got = normalizer.normalize(c[i], form, unicodeVer);
if (!c[col].equals(got)) {
error(form, c[i], c[col], got, file, line);
// } else {
// okay(form, c[i], c[col], got, file, line);
}
/*
* If the original String equals its normalized String, it means
* that the original String is normalizerd. Thus, isNormalized()
* should return true. And, vice versa!
*/
if (c[col].equals(c[i])) {
if (!normalizer.isNormalized(c[i], form, unicodeVer)) {
error(form, c[i], file, line);
// } else {
// okay(form, c[i], file, line);
}
} else {
if (normalizer.isNormalized(c[i], form, unicodeVer)) {
error(form, c[i], file, line);
// } else {
// okay(form, c[i], file, line);
}
}
}
}
/*
* Generate an array of String from a line of conformance datafile.
*/
private static void prepareColumns(String[] cols, String text,
String file, int line,
boolean part1test) throws Exception {
int index = text.indexOf('#');
if (index != -1) {
text = text.substring(0, index);
}
StringTokenizer st = new StringTokenizer(text, ";");
int tokenCount = st.countTokens();
if (tokenCount < 5) {
throw new RuntimeException("# of tokens in datafile should be 6, but got: " + tokenCount + " at line " + line + " in " + file);
}
StringBuffer sb = new StringBuffer();
for (int i = 1; i <= 5; i++) {
StringTokenizer tst = new StringTokenizer(st.nextToken(), " ");
while (tst.hasMoreTokens()) {
int code = Integer.parseInt(tst.nextToken(), 16);
sb.append(Character.toChars(code));
}
cols[i] = sb.toString();
sb.setLength(0);
}
if (part1test) {
charList.set(cols[1].codePointAt(0));
}
}
/*
* Show an error message when normalize() didn't return the expected value.
* (An exception is sometimes convenient. Therefore, it is commented out
* for the moment.)
*/
private static void error(java.text.Normalizer.Form form,
String from, String to, String got,
String file, int line) throws Exception {
System.err.println("-\t" + form.toString() + ": normalize(" +
toHexString(from) + ") doesn't equal <" + toHexString(to) +
"> at line " + line + " in " + file + ". Got [" +
toHexString(got) + "]");
throw new RuntimeException("Normalization(" + form.toString() + ") failed");
}
/*
* Show an error message when isNormalize() didn't return the expected
* value.
* (An exception is sometimes convenient. Therefore, it is commented out
* for the moment.)
*/
private static void error(java.text.Normalizer.Form form, String s,
String file, int line) throws Exception {
System.err.println("\t" + form.toString() + ": isNormalized(" +
toHexString(s) + ") returned the wrong value at line " + line +
" in " + file);
throw new RuntimeException("Normalization(" + form.toString() +") failed");
}
/*
* (For debugging)
* Shows a message when normalize() returned the expected value.
*/
private static void okay(java.text.Normalizer.Form form,
String from, String to, String got,
String file, int line) {
System.out.println("\t" + form.toString() + ": normalize(" +
toHexString(from) + ") equals <" + toHexString(to) +
"> at line " + line + " in " + file + ". Got [" +
toHexString(got) + "]");
}
/*
* (For debugging)
* Shows a message when isNormalized() returned the expected value.
*/
private static void okay(java.text.Normalizer.Form form, String s,
String file, int line) {
System.out.println("\t" + form.toString() + ": isNormalized(" +
toHexString(s) + ") returned the correct value at line " +
line + " in " + file);
}
/*
* Returns a spece-delimited hex String
*/
private static String toHexString(String s) {
StringBuffer sb = new StringBuffer(" ");
for (int i = 0; i < s.length(); i++) {
sb.append(Integer.toHexString(s.charAt(i)));
sb.append(' ');
}
return sb.toString();
}
/*
* Abstract class to call each Normalizer in java.text or sun.text.
*/
private abstract class TestNormalizer {
String name;
TestNormalizer(String str) {
name = str;
}
String getNormalizerName() {
return name;
}
abstract String normalize(CharSequence cs,
java.text.Normalizer.Form form,
int option);
abstract boolean isNormalized(CharSequence cs,
java.text.Normalizer.Form form,
int option);
}
/*
* For java.text.Normalizer
* - normalize(CharSequence, Normalizer.Form)
* - isNormalized(CharSequence, Normalizer.Form)
*/
private class testJavaNormalizer extends TestNormalizer {
testJavaNormalizer() {
super("java.text.Normalizer");
}
String normalize(CharSequence cs,
java.text.Normalizer.Form form,
int option) {
return java.text.Normalizer.normalize(cs, form);
}
boolean isNormalized(CharSequence cs,
java.text.Normalizer.Form form,
int option) {
return java.text.Normalizer.isNormalized(cs, form);
}
}
/*
* For sun.text.Normalizer
* - normalize(CharSequence, Normalizer.Form, int)
* - isNormalized(CharSequence, Normalizer.Form, int)
*/
private class testSunNormalizer extends TestNormalizer {
testSunNormalizer() {
super("sun.text.Normalizer");
}
String normalize(CharSequence cs,
java.text.Normalizer.Form form,
int option) {
return sun.text.Normalizer.normalize(cs, form, option);
}
boolean isNormalized(CharSequence cs,
java.text.Normalizer.Form form,
int option) {
return sun.text.Normalizer.isNormalized(cs, form, option);
}
}
}

View File

@ -0,0 +1,447 @@
/*
* Copyright (c) 2019, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License version 2 only, as
* published by the Free Software Foundation.
*
* This code is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
* version 2 for more details (a copy is included in the LICENSE file that
* accompanied this code).
*
* You should have received a copy of the GNU General Public License version
* 2 along with this work; if not, write to the Free Software Foundation,
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
*
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
* or visit www.oracle.com if you need additional information or have any
* questions.
*/
/*
* test
* bug 4221795
* summary Confirm *.icu data using ICU4J Normalizer
*/
import java.io.BufferedReader;
import java.io.FileInputStream;
import java.io.InputStreamReader;
import java.nio.charset.Charset;
import java.nio.charset.CharsetDecoder;
import java.util.BitSet;
import java.util.StringTokenizer;
import com.ibm.icu.text.Normalizer;
import com.ibm.icu.impl.NormalizerImpl;
/**
* This is not a test program but a data validation utility.
* Two datafiles for Normalizer, unorm.icu and uprops.icu under
* sun/text/resouces, are generated using generators in ICU4C 3.2 on a
* BIG-ENDIAN machine. Before using them with java.text.Normalizer and
* sun.text.Normalizer, you may want to check these test datafile's validation.
* You can test datafiles using Normalizer in ICU4J 3.2. Download ICU4J 3.2 and
* run this test program with -cp <ICU4J 3.2>.
*/
public class DataValidationTest {
//
// Options to be used with com.ibm.icu.text.Normalizer
//
/*
* Default Unicode 3.2.0 normalization.
*
* - With Corrigendum 4 fix
* (Different from Mustang's Normalizer.)
* - With Public Review Issue #29 fix
* (Different from Mustang's Normalizer.)
*/
private static final int UNICODE_3_2_0 = Normalizer.UNICODE_3_2;
/*
* *Incomplete* Unicode 3.2.0 normalization for IDNA/StringPrep.
*
* - With Corrigendum 4 fix
* - Without Public Review Issue #29 fix
*
* ICU4J's Normalizer itself doesn't support normalization for Unicode 3.2.0
* without Corrigendum 4 fix, which is necessary for IDNA/StringPrep. It is
* done in StringPrep. Therefore, we don't test the normlaization in this
* test program. We merely test normalization for Unicode 3.2.0 without
* Public Review Issue #29 fix with this test program.
*/
private static final int UNICODE_3_2_0_BEFORE_PRI_29 =
Normalizer.UNICODE_3_2 |
NormalizerImpl.BEFORE_PRI_29;
/*
* Default normalization.
*
* - Unicode 4.0.1
* (Different from Mustang's Normalizer.)
* - With Corrigendum 4 fix
* - With Public Review Issue #29 fix
* (Different from Mustang's Normalizer.)
*
* Because Public Review Issue #29 is fixed in Unicode 4.1.0. I think that
* IUC4J 3.2 should not support it. But it actually supports PRI #29 fix
* as default....
*/
private static final int UNICODE_LATEST = 0x00;
/*
* Normalization without Public Review Issue #29 fix.
*
* - Unicode 4.0.1
* - Without Corrigendum 4 fix
* - Without Public Review Issue #29 fix
*/
static final int UNICODE_LATEST_BEFORE_PRI_29 =
NormalizerImpl.BEFORE_PRI_29;
//
// Conformance test datafiles
//
/*
* Conformance test datafile for normalization for Unicode 3.2.0 with
* Corrigendum 4 corrections. This is NOT an original Conformace test
* data. Some inconvenient test cases are commented out.
* About corrigendum 4, please refer
* http://www.unicode.org/versions/corrigendum4.html
*
* ICU4J 3.2's Normalizer itself doesn't support normalization for Unicode
* 3.2.0 without Corrigendum 4 corrections. StringPrep helps it. So, we
* don't test the normalization with this test program.
*/
static final String DATA_3_2_0 = "NormalizationTest-3.2.0.Corrigendum4.txt";
/*
* Conformance test datafile for the latest Unicode which is supported
* by J2SE.
*/
static final String DATA_LATEST = "NormalizationTest-Latest.txt";
/*
* Decorder
*/
static final CharsetDecoder decoder = Charset.forName("UTF-8").newDecoder();
/*
* List to pick up characters which are not listed in Part1
*/
static BitSet charList = new BitSet(Character.MAX_CODE_POINT+1);
/*
* Shortcuts
*/
static final Normalizer.Mode NFC = com.ibm.icu.text.Normalizer.NFC;
static final Normalizer.Mode NFD = com.ibm.icu.text.Normalizer.NFD;
static final Normalizer.Mode NFKC = com.ibm.icu.text.Normalizer.NFKC;
static final Normalizer.Mode NFKD = com.ibm.icu.text.Normalizer.NFKD;
static final Normalizer.Mode[] modes = {NFC, NFD, NFKC, NFKD};
public static void main(String[] args) throws Exception {
test(DATA_3_2_0, UNICODE_3_2_0);
test(DATA_3_2_0, UNICODE_3_2_0_BEFORE_PRI_29);
test(DATA_LATEST, UNICODE_LATEST);
// This test started failing since ICU4J 3.6.
// test(DATA_LATEST, UNICODE_LATEST_BEFORE_PRI_29);
/* Unconformity test */
// test(DATA_3_2_0, UNICODE_LATEST);
// test(DATA_LATEST, UNICODE_3_2);
}
private static void test(String filename, int unicodeVer) throws Exception {
FileInputStream fis = new FileInputStream(filename);
BufferedReader in =
new BufferedReader(new InputStreamReader(fis, decoder));
System.out.println("\nStart testing with " + filename +
" for options: " +
(((unicodeVer & Normalizer.UNICODE_3_2) != 0) ?
"Unicode 3.2.0" : "the latest Unicode") + ", " +
(((unicodeVer & NormalizerImpl.BEFORE_PRI_29) != 0) ?
"with" : "without") + " PRI #29 fix");
int lineNo = 0;
String text;
String[] columns = new String[6];
boolean part1test = false;
while ((text = in.readLine()) != null) {
lineNo ++;
char c = text.charAt(0);
if (c == '#') {
continue;
} else if (c == '@') {
if (text.startsWith("@Part")) {
System.out.println("# Testing data in " + text);
if (text.startsWith("@Part1 ")) {
part1test = true;
} else {
part1test = false;
}
continue;
}
}
prepareColumns(columns, text, filename, lineNo, part1test);
testNFC(columns, unicodeVer, filename, lineNo);
testNFD(columns, unicodeVer, filename, lineNo);
testNFKC(columns, unicodeVer, filename, lineNo);
testNFKD(columns, unicodeVer, filename, lineNo);
}
in.close();
fis.close();
if (unicodeVer == UNICODE_LATEST) {
System.out.println("# Testing characters which are not listed in Part1");
testRemainingChars(filename, unicodeVer);
}
}
/*
* Test for NFC
*
* c2 == NFC(c1) == NFC(c2) == NFC(c3)
* c4 == NFC(c4) == NFC(c5)
*/
private static void testNFC(String[] c, int unicodeVer,
String file, int line) throws Exception {
test(2, c, 1, 3, NFC, unicodeVer, file, line);
test(4, c, 4, 5, NFC, unicodeVer, file, line);
}
/*
* Test for NFD
*
* c3 == NFD(c1) == NFD(c2) == NFD(c3)
* c5 == NFD(c4) == NFD(c5)
*/
private static void testNFD(String[] c, int unicodeVer,
String file, int line) throws Exception {
test(3, c, 1, 3, NFD, unicodeVer, file, line);
test(5, c, 4, 5, NFD, unicodeVer, file, line);
}
/*
* Test for NFKC
*
* c4 == NFKC(c1) == NFKC(c2) == NFKC(c3) == NFKC(c4) == NFKC(c5)
*/
private static void testNFKC(String[] c, int unicodeVer,
String file, int line) throws Exception {
test(4, c, 1, 5, NFKC, unicodeVer, file, line);
}
/*
* Test for NFKD
*
* c5 == NFKD(c1) == NFKD(c2) == NFKD(c3) == NFKD(c4) == NFKD(c5)
*/
private static void testNFKD(String[] c, int unicodeVer,
String file, int line) throws Exception {
test(5, c, 1, 5, NFKD, unicodeVer, file, line);
}
/*
* Test for characters which aren't listed in Part1
*
* X == NFC(X) == NFD(X) == NFKC(X) == NFKD(X)
*/
private static void testRemainingChars(String file,
int unicodeVer) throws Exception {
for (int i = Character.MIN_CODE_POINT;
i <= Character.MAX_CODE_POINT;
i++) {
if (!charList.get(i)) {
String from = String.valueOf(Character.toChars(i));
String to;
for (int j = 0; j < modes.length; j++) {
Normalizer.Mode mode = modes[j];
to = Normalizer.normalize(from, mode, unicodeVer);
if (!from.equals(to)) {
error(mode, from, from, to, file, -1);
// } else {
// okay(mode, from, from, to, file, -1);
}
if (!Normalizer.isNormalized(from, mode, unicodeVer)) {
error(mode, from, file, -1);
// } else {
// okay(mode, from, file, -1);
}
}
}
}
}
/*
* Test normalize() and isNormalized()
*/
private static void test(int col, String[] c,
int FROM, int TO,
Normalizer.Mode mode, int unicodeVer,
String file, int line) throws Exception {
for (int i = FROM; i <= TO; i++) {
String got = Normalizer.normalize(c[i], mode, unicodeVer);
if (!c[col].equals(got)) {
error(mode, c[i], c[col], got, file, line);
// } else {
// okay(mode, c[i], c[col], got, file, line);
}
/*
* If the original String equals its normalized String, it means
* that the original String is normalizerd. Thus, isNormalized()
* should return true. And, vice versa!
*/
if (c[col].equals(c[i])) {
if (!Normalizer.isNormalized(c[i], mode, unicodeVer)) {
error(mode, c[i], file, line);
// } else {
// okay(mode, c[i], file, line);
}
} else {
if (Normalizer.isNormalized(c[i], mode, unicodeVer)) {
error(mode, c[i], file, line);
// } else {
// okay(mode, c[i], file, line);
}
}
}
}
/*
* Generate an array of String from a line of conformance datafile.
*/
private static void prepareColumns(String[] col, String text,
String file, int line,
boolean part1test) throws Exception {
int index = text.indexOf('#');
if (index != -1) {
text = text.substring(0, index);
}
StringTokenizer st = new StringTokenizer(text, ";");
int tokenCount = st.countTokens();
if (tokenCount < 5) {
throw new RuntimeException("# of tokens in datafile should be 6, but got: " + tokenCount + " at line " + line + " in " + file);
}
StringBuffer sb = new StringBuffer();
for (int i = 1; i <= 5; i++) {
StringTokenizer tst = new StringTokenizer(st.nextToken(), " ");
while (tst.hasMoreTokens()) {
int code = Integer.parseInt(tst.nextToken(), 16);
sb.append(Character.toChars(code));
}
col[i] = sb.toString();
sb.setLength(0);
}
if (part1test) {
charList.set(col[1].codePointAt(0));
}
}
/*
* Show an error message when normalize() didn't return the expected value.
* (An exception is sometimes convenient. Therefore, it is commented out
* for the moment.)
*/
private static void error(Normalizer.Mode mode,
String from, String to, String got,
String file, int line) throws Exception {
System.err.println("\t" + toString(mode) + ": normalize(" +
toHexString(from) + ") doesn't equal <" + toHexString(to) +
"> at line " + line + " in " + file + ". Got <" +
toHexString(got) + ">.");
// throw new RuntimeException("Normalization(" + toString(mode) + ") failed");
}
/*
* Show an error message when isNormalize() didn't return the expected value.
* (An exception is sometimes convenient. Therefore, it is commented out
* for the moment.)
*/
private static void error(Normalizer.Mode mode, String orig,
String file, int line) throws Exception {
System.err.println("\t" + toString(mode) + ": isNormalized(" +
toHexString(orig) + ") returned the wrong value at line " + line +
" in " + file + ".");
// throw new RuntimeException("Normalization(" + toString(mode) +") failed");
}
/*
* (For debugging)
* Shows a message when normalize() returned the expected value.
*/
private static void okay(Normalizer.Mode mode,
String from, String to, String got,
String file, int line) {
System.out.println("\t" + toString(mode) + ": normalize(" +
toHexString(from) + ") equals <" + toHexString(to) +
"> at line " + line + " in " + file + ". Got <" +
toHexString(got) + ">.");
}
/*
* (For debugging)
* Shows a message when isNormalized() returned the expected value.
*/
private static void okay(Normalizer.Mode mode, String orig,
String file, int line) {
System.out.println("\t" + toString(mode) + ": isNormalized(" +
toHexString(orig) + ") returned the correct value at line " +
line + " in " + file + ".");
}
/*
* Returns a spece-delimited hex String
*/
private static String toHexString(String s) {
StringBuffer sb = new StringBuffer(" ");
for (int i = 0; i < s.length(); i++) {
sb.append(Integer.toHexString(s.charAt(i)));
sb.append(' ');
}
return sb.toString();
}
/*
* Returns the name of Normalizer.Mode
*/
private static String toString(Normalizer.Mode mode) {
if (mode == Normalizer.NFC) {
return "NFC";
} else if (mode == Normalizer.NFD) {
return "NFD";
} else if (mode == Normalizer.NFKC) {
return "NFKC";
} else if (mode == Normalizer.NFKD) {
return "NFKD";
}
return "unknown";
}
}

View File

@ -0,0 +1,610 @@
/*
* Copyright (c) 2019, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License version 2 only, as
* published by the Free Software Foundation.
*
* This code is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
* version 2 for more details (a copy is included in the LICENSE file that
* accompanied this code).
*
* You should have received a copy of the GNU General Public License version
* 2 along with this work; if not, write to the Free Software Foundation,
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
*
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
* or visit www.oracle.com if you need additional information or have any
* questions.
*/
/*
* @test
* @bug 4221795 8032446
* @summary Confirm Normalizer's fundamental behavior. Imported from ICU4J 3.2's
* src/com/ibm/icu/dev/test and modified.
* @modules java.base/sun.text java.base/sun.text.normalizer
* @library /java/text/testlib
* @compile -XDignore.symbol.file ICUBasicTest.java
* @run main/timeout=30 ICUBasicTest
*/
/*
*******************************************************************************
* Copyright (C) 1996-2004, International Business Machines Corporation and *
* others. All Rights Reserved. *
*******************************************************************************
*/
import sun.text.Normalizer;
import sun.text.normalizer.NormalizerBase;
import sun.text.normalizer.NormalizerImpl;
import static java.text.Normalizer.Form.*;
import static sun.text.normalizer.NormalizerBase.Mode.*;
public class ICUBasicTest extends IntlTest {
public static void main(String[] args) throws Exception {
new ICUBasicTest().run(args);
}
/*
* Normalization modes
*/
private static final NormalizerBase.Mode NFCmode = NormalizerBase.NFC;
private static final NormalizerBase.Mode NFDmode = NormalizerBase.NFD;
private static final NormalizerBase.Mode NFKCmode = NormalizerBase.NFKC;
private static final NormalizerBase.Mode NFKDmode = NormalizerBase.NFKD;
private static final NormalizerBase.Mode NONEmode = NormalizerBase.NONE;
/*
* Normalization options
*/
/* Normal Unicode versions */
private static final int UNICODE_3_2_0 = Normalizer.UNICODE_3_2;
private static final int UNICODE_LATEST = NormalizerBase.UNICODE_LATEST;
/*
* Special cases for UAX #15 bug
* see Unicode Public Review Issue #29
* at http://www.unicode.org/review/resolved-pri.html#pri29
*
* Note:
* PRI #29 is supported in Unicode 4.1.0. Therefore, expected results are
* different for earlier Unicode versions.
*/
public void TestComposition() {
final TestCompositionCase cases[] = new TestCompositionCase[] {
new TestCompositionCase(NFC, UNICODE_3_2_0,
"\u1100\u0300\u1161\u0327",
"\u1100\u0300\u1161\u0327"),
new TestCompositionCase(NFC, UNICODE_LATEST,
"\u1100\u0300\u1161\u0327",
"\u1100\u0300\u1161\u0327"),
new TestCompositionCase(NFC, UNICODE_3_2_0,
"\u1100\u0300\u1161\u0327\u11a8",
"\u1100\u0300\u1161\u0327\u11a8"),
new TestCompositionCase(NFC, UNICODE_LATEST,
"\u1100\u0300\u1161\u0327\u11a8",
"\u1100\u0300\u1161\u0327\u11a8"),
new TestCompositionCase(NFC, UNICODE_3_2_0,
"\uac00\u0300\u0327\u11a8",
"\uac00\u0327\u0300\u11a8"),
new TestCompositionCase(NFC, UNICODE_LATEST,
"\uac00\u0300\u0327\u11a8",
"\uac00\u0327\u0300\u11a8"),
new TestCompositionCase(NFC, UNICODE_3_2_0,
"\u0b47\u0300\u0b3e",
"\u0b47\u0300\u0b3e"),
new TestCompositionCase(NFC, UNICODE_LATEST,
"\u0b47\u0300\u0b3e",
"\u0b47\u0300\u0b3e"),
};
String output;
int i, length;
for (i=0; i<cases.length; ++i) {
output = Normalizer.normalize(cases[i].input,
cases[i].form, cases[i].options);
if (!output.equals(cases[i].expect)) {
errln("unexpected result for case " + i + ". Expected="
+ cases[i].expect + ", Actual=" + output);
} else if (verbose) {
logln("expected result for case " + i + ". Expected="
+ cases[i].expect + ", Actual=" + output);
}
}
}
private final static class TestCompositionCase {
public java.text.Normalizer.Form form;
public int options;
public String input, expect;
TestCompositionCase(java.text.Normalizer.Form form,
int options,
String input,
String expect) {
this.form = form;
this.options = options;
this.input = input;
this.expect = expect;
}
}
/*
* Added in order to detect a regression.
*/
public void TestCombiningMarks() {
String src = "\u0f71\u0f72\u0f73\u0f74\u0f75";
String expected = "\u0F71\u0F71\u0F71\u0F72\u0F72\u0F74\u0F74";
String result = NormalizerBase.normalize(src, NFD);
if (!expected.equals(result)) {
errln("Reordering of combining marks failed. Expected: " +
toHexString(expected) + " Got: "+ toHexString(result));
}
}
/*
* Added in order to detect a regression.
*/
public void TestBengali() throws Exception {
String input = "\u09bc\u09be\u09cd\u09be";
String output=NormalizerBase.normalize(input, NFC);
if (!input.equals(output)) {
errln("ERROR in NFC of string");
}
return;
}
/*
* Added in order to detect a regression.
*/
/**
* Test for a problem found by Verisign. Problem is that
* characters at the start of a string are not put in canonical
* order correctly by compose() if there is no starter.
*/
public void TestVerisign() throws Exception {
String[] inputs = {
"\u05b8\u05b9\u05b1\u0591\u05c3\u05b0\u05ac\u059f",
"\u0592\u05b7\u05bc\u05a5\u05b0\u05c0\u05c4\u05ad"
};
String[] outputs = {
"\u05b1\u05b8\u05b9\u0591\u05c3\u05b0\u05ac\u059f",
"\u05b0\u05b7\u05bc\u05a5\u0592\u05c0\u05ad\u05c4"
};
for (int i = 0; i < inputs.length; ++i) {
String input = inputs[i];
String output = outputs[i];
String result = NormalizerBase.normalize(input, NFD);
if (!result.equals(output)) {
errln("FAIL input: " + toHexString(input) + "\n" +
" decompose: " + toHexString(result) + "\n" +
" expected: " + toHexString(output));
}
result = NormalizerBase.normalize(input, NFC);
if (!result.equals(output)) {
errln("FAIL input: " + toHexString(input) + "\n" +
" compose: " + toHexString(result) + "\n" +
" expected: " + toHexString(output));
}
}
}
/**
* Test for a problem that showed up just before ICU 1.6 release
* having to do with combining characters with an index of zero.
* Such characters do not participate in any canonical
* decompositions. However, having an index of zero means that
* they all share one typeMask[] entry, that is, they all have to
* map to the same canonical class, which is not the case, in
* reality.
*/
public void TestZeroIndex() throws Exception {
String[] DATA = {
// Expect col1 x COMPOSE_COMPAT => col2
// Expect col2 x DECOMP => col3
"A\u0316\u0300", "\u00C0\u0316", "A\u0316\u0300",
"A\u0300\u0316", "\u00C0\u0316", "A\u0316\u0300",
"A\u0327\u0300", "\u00C0\u0327", "A\u0327\u0300",
"c\u0321\u0327", "c\u0321\u0327", "c\u0321\u0327",
"c\u0327\u0321", "\u00E7\u0321", "c\u0327\u0321",
};
for (int i=0; i<DATA.length; i+=3) {
String a = DATA[i];
String b = NormalizerBase.normalize(a, NFKC);
String exp = DATA[i+1];
if (b.equals(exp)) {
logln("Ok: " + toHexString(a) + " x COMPOSE_COMPAT => " +
toHexString(b));
} else {
errln("FAIL: " + toHexString(a) + " x COMPOSE_COMPAT => " +
toHexString(b) + ", expect " + toHexString(exp));
}
a = NormalizerBase.normalize(b, NFD);
exp = DATA[i+2];
if (a.equals(exp)) {
logln("Ok: " + toHexString(b) + " x DECOMP => " +
toHexString(a));
} else {
errln("FAIL: " + toHexString(b) + " x DECOMP => " +
toHexString(a) + ", expect " + toHexString(exp));
}
}
}
/**
* Make sure characters in the CompositionExclusion.txt list do not get
* composed to.
*/
public void TestCompositionExclusion() throws Exception {
// This list is generated from CompositionExclusion.txt.
// Update whenever the normalizer tables are updated. Note
// that we test all characters listed, even those that can be
// derived from the Unicode DB and are therefore commented
// out.
/*
* kyuka's note:
* Original data seemed to be based on Unicode 3.0.0(the initial
* Composition Exclusions list) and seemed to have some mistakes.
* Updated in order to correct mistakes and to support Unicode 4.0.0.
* And, this table can be used also for Unicode 3.2.0.
*/
String[][] EXCLUDED_UNICODE_3_2_0 = {
{"\u0340"},
{"\u0341"},
{"\u0343"},
{"\u0344"},
{"\u0374"},
{"\u037E"},
{"\u0387"},
{"\u0958"},
{"\u0959", "\u095F"},
{"\u09DC"},
{"\u09DD"},
{"\u09DF"},
{"\u0A33"},
{"\u0A36"},
{"\u0A59", "\u0A5B"},
{"\u0A5E"},
{"\u0B5C"},
{"\u0B5D"},
{"\u0F43"},
{"\u0F4D"},
{"\u0F52"},
{"\u0F57"},
{"\u0F5C"},
{"\u0F69"},
{"\u0F73"},
{"\u0F75"},
{"\u0F76"},
{"\u0F78"},
{"\u0F81"},
{"\u0F93"},
{"\u0F9D"},
{"\u0FA2"},
{"\u0FA7"},
{"\u0FAC"},
{"\u0FB9"},
{"\u1F71"},
{"\u1F73"},
{"\u1F75"},
{"\u1F77"},
{"\u1F79"},
{"\u1F7B"},
{"\u1F7D"},
{"\u1FBB"},
{"\u1FBE"},
{"\u1FC9"},
{"\u1FCB"},
{"\u1FD3"},
{"\u1FDB"},
{"\u1FE3"},
{"\u1FEB"},
{"\u1FEE"},
{"\u1FEF"},
{"\u1FF9"},
{"\u1FFB"},
{"\u1FFD"},
{"\u2000"},
{"\u2001"},
{"\u2126"},
{"\u212A"},
{"\u212B"},
{"\u2329"},
{"\u232A"},
{"\u2ADC"},
{"\uF900", "\uFA0D"},
{"\uFA10"},
{"\uFA12"},
{"\uFA15", "\uFA1E"},
{"\uFA20"},
{"\uFA22"},
{"\uFA25"},
{"\uFA26"},
{"\uFA2A", "\uFA2D"},
{"\uFA30", "\uFA6A"},
{"\uFB1D"},
{"\uFB1F"},
{"\uFB2A", "\uFB36"},
{"\uFB38", "\uFB3C"},
{"\uFB3E"},
{"\uFB40"},
{"\uFB41"},
{"\uFB43"},
{"\uFB44"},
{"\uFB46", "\uFB4E"},
{"\uD834\uDD5E", "\uD834\uDD64"},
{"\uD834\uDDBB", "\uD834\uDDC0"},
{"\uD87E\uDC00", "\uD87E\uDE1D"}
};
String[][] EXCLUDED_LATEST = {
};
for (int i = 0; i < EXCLUDED_UNICODE_3_2_0.length; ++i) {
if (EXCLUDED_UNICODE_3_2_0[i].length == 1) {
checkCompositionExclusion_320(EXCLUDED_UNICODE_3_2_0[i][0]);
} else {
int from, to;
from = Character.codePointAt(EXCLUDED_UNICODE_3_2_0[i][0], 0);
to = Character.codePointAt(EXCLUDED_UNICODE_3_2_0[i][1], 0);
for (int j = from; j <= to; j++) {
checkCompositionExclusion_320(String.valueOf(Character.toChars(j)));
}
}
}
}
private void checkCompositionExclusion_320(String s) throws Exception {
String a = String.valueOf(s);
String b = NormalizerBase.normalize(a, NFKD);
String c = NormalizerBase.normalize(b, NFC);
if (c.equals(a)) {
errln("FAIL: " + toHexString(a) + " x DECOMP_COMPAT => " +
toHexString(b) + " x COMPOSE => " +
toHexString(c) + " for the latest Unicode");
} else if (verbose) {
logln("Ok: " + toHexString(a) + " x DECOMP_COMPAT => " +
toHexString(b) + " x COMPOSE => " +
toHexString(c) + " for the latest Unicode");
}
b = NormalizerBase.normalize(a, NFKD, Normalizer.UNICODE_3_2);
c = NormalizerBase.normalize(b, NFC, Normalizer.UNICODE_3_2);
if (c.equals(a)) {
errln("FAIL: " + toHexString(a) + " x DECOMP_COMPAT => " +
toHexString(b) + " x COMPOSE => " +
toHexString(c) + " for Unicode 3.2.0");
} else if (verbose) {
logln("Ok: " + toHexString(a) + " x DECOMP_COMPAT => " +
toHexString(b) + " x COMPOSE => " +
toHexString(c) + " for Unicode 3.2.0");
}
}
public void TestTibetan() throws Exception {
String[][] decomp = {
{ "\u0f77", "\u0f77", "\u0fb2\u0f71\u0f80" }
};
String[][] compose = {
{ "\u0fb2\u0f71\u0f80", "\u0fb2\u0f71\u0f80", "\u0fb2\u0f71\u0f80" }
};
staticTest(NFD, decomp, 1);
staticTest(NFKD,decomp, 2);
staticTest(NFC, compose, 1);
staticTest(NFKC,compose, 2);
}
public void TestExplodingBase() throws Exception{
// \u017f - Latin small letter long s
// \u0307 - combining dot above
// \u1e61 - Latin small letter s with dot above
// \u1e9b - Latin small letter long s with dot above
String[][] canon = {
// Input Decomposed Composed
{ "Tschu\u017f", "Tschu\u017f", "Tschu\u017f" },
{ "Tschu\u1e9b", "Tschu\u017f\u0307", "Tschu\u1e9b" },
};
String[][] compat = {
// Input Decomposed Composed
{ "\u017f", "s", "s" },
{ "\u1e9b", "s\u0307", "\u1e61" },
};
staticTest(NFD, canon, 1);
staticTest(NFC, canon, 2);
staticTest(NFKD, compat, 1);
staticTest(NFKC, compat, 2);
}
private String[][] canonTests = {
// Input Decomposed Composed
{ "cat", "cat", "cat" },
{ "\u00e0ardvark", "a\u0300ardvark", "\u00e0ardvark", },
// D-dot_above
{ "\u1e0a", "D\u0307", "\u1e0a" },
// D dot_above
{ "D\u0307", "D\u0307", "\u1e0a" },
// D-dot_below dot_above
{ "\u1e0c\u0307", "D\u0323\u0307", "\u1e0c\u0307" },
// D-dot_above dot_below
{ "\u1e0a\u0323", "D\u0323\u0307", "\u1e0c\u0307" },
// D dot_below dot_above
{ "D\u0307\u0323", "D\u0323\u0307", "\u1e0c\u0307" },
// D dot_below cedilla dot_above
{ "\u1e10\u0307\u0323", "D\u0327\u0323\u0307", "\u1e10\u0323\u0307"},
// D dot_above ogonek dot_below
{ "D\u0307\u0328\u0323","D\u0328\u0323\u0307", "\u1e0c\u0328\u0307"},
// E-macron-grave
{ "\u1E14", "E\u0304\u0300", "\u1E14" },
// E-macron + grave
{ "\u0112\u0300", "E\u0304\u0300", "\u1E14" },
// E-grave + macron
{ "\u00c8\u0304", "E\u0300\u0304", "\u00c8\u0304" },
// angstrom_sign
{ "\u212b", "A\u030a", "\u00c5" },
// A-ring
{ "\u00c5", "A\u030a", "\u00c5" },
{ "\u00c4ffin", "A\u0308ffin", "\u00c4ffin" },
{ "\u00c4\uFB03n", "A\u0308\uFB03n", "\u00c4\uFB03n" },
//updated with 3.0
{ "\u00fdffin", "y\u0301ffin", "\u00fdffin" },
{ "\u00fd\uFB03n", "y\u0301\uFB03n", "\u00fd\uFB03n" },
{ "Henry IV", "Henry IV", "Henry IV" },
{ "Henry \u2163", "Henry \u2163", "Henry \u2163" },
// ga(Zenkaku-Katakana)
{ "\u30AC", "\u30AB\u3099", "\u30AC" },
// ka(Zenkaku-Katakana) + ten(Zenkaku)
{ "\u30AB\u3099", "\u30AB\u3099", "\u30AC" },
// ka(Hankaku-Katakana) + ten(Hankaku-Katakana)
{ "\uFF76\uFF9E", "\uFF76\uFF9E", "\uFF76\uFF9E" },
// ka(Zenkaku-Katakana) + ten(Hankaku)
{ "\u30AB\uFF9E", "\u30AB\uFF9E", "\u30AB\uFF9E" },
// ka(Hankaku-Katakana) + ten(Zenkaku)
{ "\uFF76\u3099", "\uFF76\u3099", "\uFF76\u3099" },
{ "A\u0300\u0316", "A\u0316\u0300", "\u00C0\u0316" },
{ "\ud834\udd5e\ud834\udd57\ud834\udd65\ud834\udd5e",
"\ud834\udd57\ud834\udd65\ud834\udd57\ud834\udd65\ud834\udd57\ud834\udd65",
"\ud834\udd57\ud834\udd65\ud834\udd57\ud834\udd65\ud834\udd57\ud834\udd65" },
};
private String[][] compatTests = {
// Input Decomposed Composed
{ "cat", "cat", "cat" },
// Alef-Lamed vs. Alef, Lamed
{ "\uFB4f", "\u05D0\u05DC", "\u05D0\u05DC", },
{ "\u00C4ffin", "A\u0308ffin", "\u00C4ffin" },
// ffi ligature -> f + f + i
{ "\u00C4\uFB03n", "A\u0308ffin", "\u00C4ffin" },
//updated for 3.0
{ "\u00fdffin", "y\u0301ffin", "\u00fdffin" },
// ffi ligature -> f + f + i
{ "\u00fd\uFB03n", "y\u0301ffin", "\u00fdffin" },
{ "Henry IV", "Henry IV", "Henry IV" },
{ "Henry \u2163", "Henry IV", "Henry IV" },
// ga(Zenkaku-Katakana)
{ "\u30AC", "\u30AB\u3099", "\u30AC" },
// ka(Zenkaku-Katakana) + ten(Zenkaku)
{ "\u30AB\u3099", "\u30AB\u3099", "\u30AC" },
// ka(Hankaku-Katakana) + ten(Zenkaku)
{ "\uFF76\u3099", "\u30AB\u3099", "\u30AC" },
/* These two are broken in Unicode 2.1.2 but fixed in 2.1.5 and later*/
// ka(Hankaku-Katakana) + ten(Hankaku)
{ "\uFF76\uFF9E", "\u30AB\u3099", "\u30AC" },
// ka(Zenkaku-Katakana) + ten(Hankaku)
{ "\u30AB\uFF9E", "\u30AB\u3099", "\u30AC" },
};
public void TestNFD() throws Exception{
staticTest(NFD, canonTests, 1);
}
public void TestNFC() throws Exception{
staticTest(NFC, canonTests, 2);
}
public void TestNFKD() throws Exception{
staticTest(NFKD, compatTests, 1);
}
public void TestNFKC() throws Exception{
staticTest(NFKC, compatTests, 2);
}
private void staticTest(java.text.Normalizer.Form form,
String[][] tests,
int outCol) throws Exception {
for (int i = 0; i < tests.length; i++) {
String input = tests[i][0];
logln("Normalizing '" + input + "' (" + toHexString(input) + ")" );
String expect =tests[i][outCol];
String output = java.text.Normalizer.normalize(input, form);
if (!output.equals(expect)) {
errln("FAIL: case " + i
+ " expected '" + expect + "' (" + toHexString(expect) + ")"
+ " but got '" + output + "' (" + toHexString(output) + ")"
);
}
}
}
// With Canonical decomposition, Hangul syllables should get decomposed
// into Jamo, but Jamo characters should not be decomposed into
// conjoining Jamo
private String[][] hangulCanon = {
// Input Decomposed Composed
{ "\ud4db", "\u1111\u1171\u11b6", "\ud4db" },
{ "\u1111\u1171\u11b6", "\u1111\u1171\u11b6", "\ud4db" },
};
public void TestHangulCompose() throws Exception{
logln("Canonical composition...");
staticTest(NFC, hangulCanon, 2);
}
public void TestHangulDecomp() throws Exception{
logln("Canonical decomposition...");
staticTest(NFD, hangulCanon, 1);
}
}

View File

@ -0,0 +1,11 @@
#
#
#
# Conformance test data which was added by Markus Scherer of IBM.
# According to him, "more interesting conformance test cases, not in the
# unicode.org NormalizationTest.txt".
# Found in ConformanceTest.java in
# src/com/ibm/icu/dev/test/normalizer/ConformanceTest.java of ICU4J 3.2.
@Part0 # Other (ICU4J test data)
0061 0332 0308;00E4 0332;0061 0332 0308;00E4 0332;0061 0332 0308; # Markus 0",
0061 0301 0F73;00E1 0F71 0F72;0061 0F71 0F72 0301;00E1 0F71 0F72;0061 0F71 0F72 0301; # Markus 1"

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,62 @@
/*
* Copyright (c) 2019, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License version 2 only, as
* published by the Free Software Foundation.
*
* This code is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
* version 2 for more details (a copy is included in the LICENSE file that
* accompanied this code).
*
* You should have received a copy of the GNU General Public License version
* 2 along with this work; if not, write to the Free Software Foundation,
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
*
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
* or visit www.oracle.com if you need additional information or have any
* questions.
*/
/*
* @test
* @bug 8221431
* @summary Tests decomposition of Japanese square era characters.
* @run testng/othervm SquareEraCharacterTest
*/
import static org.testng.Assert.assertEquals;
import java.text.Normalizer;
import org.testng.annotations.DataProvider;
import org.testng.annotations.Test;
@Test
public class SquareEraCharacterTest {
@DataProvider
Object[][] squareEras() {
return new Object[][] {
// square era character, expected decomposed string
{'\u337e', "\u660e\u6cbb"}, // Meizi
{'\u337d', "\u5927\u6b63"}, // Taisho
{'\u337c', "\u662d\u548c"}, // Showa
{'\u337b', "\u5e73\u6210"}, // Heisei
{'\u32ff', "\u4ee4\u548c"}, // Reiwa
};
}
@Test(dataProvider="squareEras")
public void test_normalize(char squareChar, String expected) {
assertEquals(
Normalizer.normalize(Character.toString(squareChar), Normalizer.Form.NFKD),
expected,
"decomposing " + Character.getName(squareChar) + ".");
}
}

View File

@ -1,429 +0,0 @@
# GraphemeBreakTest-8.0.0.txt
# Date: 2015-02-13, 13:47:15 GMT [MD]
#
# Unicode Character Database
# Copyright (c) 1991-2015 Unicode, Inc.
# For terms of use, see http://www.unicode.org/terms_of_use.html
# For documentation, see http://www.unicode.org/reports/tr44/
#
# Default Grapheme Break Test
#
# Format:
# <string> (# <comment>)?
# <string> contains hex Unicode code points, with
# ÷ wherever there is a break opportunity, and
# × wherever there is not.
# <comment> the format can change, but currently it shows:
# - the sample character name
# - (x) the Grapheme_Cluster_Break property value for the sample character
# - [x] the rule that determines whether there is a break or not
#
# These samples may be extended or changed in the future.
#
÷ 0020 ÷ 0020 ÷ # ÷ [0.2] SPACE (Other) ÷ [999.0] SPACE (Other) ÷ [0.3]
÷ 0020 × 0308 ÷ 0020 ÷ # ÷ [0.2] SPACE (Other) × [9.0] COMBINING DIAERESIS (Extend) ÷ [999.0] SPACE (Other) ÷ [0.3]
÷ 0020 ÷ 000D ÷ # ÷ [0.2] SPACE (Other) ÷ [5.0] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3]
÷ 0020 × 0308 ÷ 000D ÷ # ÷ [0.2] SPACE (Other) × [9.0] COMBINING DIAERESIS (Extend) ÷ [5.0] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3]
÷ 0020 ÷ 000A ÷ # ÷ [0.2] SPACE (Other) ÷ [5.0] <LINE FEED (LF)> (LF) ÷ [0.3]
÷ 0020 × 0308 ÷ 000A ÷ # ÷ [0.2] SPACE (Other) × [9.0] COMBINING DIAERESIS (Extend) ÷ [5.0] <LINE FEED (LF)> (LF) ÷ [0.3]
÷ 0020 ÷ 0001 ÷ # ÷ [0.2] SPACE (Other) ÷ [5.0] <START OF HEADING> (Control) ÷ [0.3]
÷ 0020 × 0308 ÷ 0001 ÷ # ÷ [0.2] SPACE (Other) × [9.0] COMBINING DIAERESIS (Extend) ÷ [5.0] <START OF HEADING> (Control) ÷ [0.3]
÷ 0020 × 0300 ÷ # ÷ [0.2] SPACE (Other) × [9.0] COMBINING GRAVE ACCENT (Extend) ÷ [0.3]
÷ 0020 × 0308 × 0300 ÷ # ÷ [0.2] SPACE (Other) × [9.0] COMBINING DIAERESIS (Extend) × [9.0] COMBINING GRAVE ACCENT (Extend) ÷ [0.3]
÷ 0020 × 0903 ÷ # ÷ [0.2] SPACE (Other) × [9.1] DEVANAGARI SIGN VISARGA (SpacingMark) ÷ [0.3]
÷ 0020 × 0308 × 0903 ÷ # ÷ [0.2] SPACE (Other) × [9.0] COMBINING DIAERESIS (Extend) × [9.1] DEVANAGARI SIGN VISARGA (SpacingMark) ÷ [0.3]
÷ 0020 ÷ 1100 ÷ # ÷ [0.2] SPACE (Other) ÷ [999.0] HANGUL CHOSEONG KIYEOK (L) ÷ [0.3]
÷ 0020 × 0308 ÷ 1100 ÷ # ÷ [0.2] SPACE (Other) × [9.0] COMBINING DIAERESIS (Extend) ÷ [999.0] HANGUL CHOSEONG KIYEOK (L) ÷ [0.3]
÷ 0020 ÷ 1160 ÷ # ÷ [0.2] SPACE (Other) ÷ [999.0] HANGUL JUNGSEONG FILLER (V) ÷ [0.3]
÷ 0020 × 0308 ÷ 1160 ÷ # ÷ [0.2] SPACE (Other) × [9.0] COMBINING DIAERESIS (Extend) ÷ [999.0] HANGUL JUNGSEONG FILLER (V) ÷ [0.3]
÷ 0020 ÷ 11A8 ÷ # ÷ [0.2] SPACE (Other) ÷ [999.0] HANGUL JONGSEONG KIYEOK (T) ÷ [0.3]
÷ 0020 × 0308 ÷ 11A8 ÷ # ÷ [0.2] SPACE (Other) × [9.0] COMBINING DIAERESIS (Extend) ÷ [999.0] HANGUL JONGSEONG KIYEOK (T) ÷ [0.3]
÷ 0020 ÷ AC00 ÷ # ÷ [0.2] SPACE (Other) ÷ [999.0] HANGUL SYLLABLE GA (LV) ÷ [0.3]
÷ 0020 × 0308 ÷ AC00 ÷ # ÷ [0.2] SPACE (Other) × [9.0] COMBINING DIAERESIS (Extend) ÷ [999.0] HANGUL SYLLABLE GA (LV) ÷ [0.3]
÷ 0020 ÷ AC01 ÷ # ÷ [0.2] SPACE (Other) ÷ [999.0] HANGUL SYLLABLE GAG (LVT) ÷ [0.3]
÷ 0020 × 0308 ÷ AC01 ÷ # ÷ [0.2] SPACE (Other) × [9.0] COMBINING DIAERESIS (Extend) ÷ [999.0] HANGUL SYLLABLE GAG (LVT) ÷ [0.3]
÷ 0020 ÷ 1F1E6 ÷ # ÷ [0.2] SPACE (Other) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (Regional_Indicator) ÷ [0.3]
÷ 0020 × 0308 ÷ 1F1E6 ÷ # ÷ [0.2] SPACE (Other) × [9.0] COMBINING DIAERESIS (Extend) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (Regional_Indicator) ÷ [0.3]
÷ 0020 ÷ 0378 ÷ # ÷ [0.2] SPACE (Other) ÷ [999.0] <reserved-0378> (Other) ÷ [0.3]
÷ 0020 × 0308 ÷ 0378 ÷ # ÷ [0.2] SPACE (Other) × [9.0] COMBINING DIAERESIS (Extend) ÷ [999.0] <reserved-0378> (Other) ÷ [0.3]
÷ 0020 ÷ D800 ÷ # ÷ [0.2] SPACE (Other) ÷ [5.0] <surrogate-D800> (Control) ÷ [0.3]
÷ 0020 × 0308 ÷ D800 ÷ # ÷ [0.2] SPACE (Other) × [9.0] COMBINING DIAERESIS (Extend) ÷ [5.0] <surrogate-D800> (Control) ÷ [0.3]
÷ 000D ÷ 0020 ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [4.0] SPACE (Other) ÷ [0.3]
÷ 000D ÷ 0308 ÷ 0020 ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [4.0] COMBINING DIAERESIS (Extend) ÷ [999.0] SPACE (Other) ÷ [0.3]
÷ 000D ÷ 000D ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [4.0] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3]
÷ 000D ÷ 0308 ÷ 000D ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [4.0] COMBINING DIAERESIS (Extend) ÷ [5.0] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3]
÷ 000D × 000A ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) × [3.0] <LINE FEED (LF)> (LF) ÷ [0.3]
÷ 000D ÷ 0308 ÷ 000A ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [4.0] COMBINING DIAERESIS (Extend) ÷ [5.0] <LINE FEED (LF)> (LF) ÷ [0.3]
÷ 000D ÷ 0001 ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [4.0] <START OF HEADING> (Control) ÷ [0.3]
÷ 000D ÷ 0308 ÷ 0001 ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [4.0] COMBINING DIAERESIS (Extend) ÷ [5.0] <START OF HEADING> (Control) ÷ [0.3]
÷ 000D ÷ 0300 ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [4.0] COMBINING GRAVE ACCENT (Extend) ÷ [0.3]
÷ 000D ÷ 0308 × 0300 ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [4.0] COMBINING DIAERESIS (Extend) × [9.0] COMBINING GRAVE ACCENT (Extend) ÷ [0.3]
÷ 000D ÷ 0903 ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [4.0] DEVANAGARI SIGN VISARGA (SpacingMark) ÷ [0.3]
÷ 000D ÷ 0308 × 0903 ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [4.0] COMBINING DIAERESIS (Extend) × [9.1] DEVANAGARI SIGN VISARGA (SpacingMark) ÷ [0.3]
÷ 000D ÷ 1100 ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [4.0] HANGUL CHOSEONG KIYEOK (L) ÷ [0.3]
÷ 000D ÷ 0308 ÷ 1100 ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [4.0] COMBINING DIAERESIS (Extend) ÷ [999.0] HANGUL CHOSEONG KIYEOK (L) ÷ [0.3]
÷ 000D ÷ 1160 ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [4.0] HANGUL JUNGSEONG FILLER (V) ÷ [0.3]
÷ 000D ÷ 0308 ÷ 1160 ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [4.0] COMBINING DIAERESIS (Extend) ÷ [999.0] HANGUL JUNGSEONG FILLER (V) ÷ [0.3]
÷ 000D ÷ 11A8 ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [4.0] HANGUL JONGSEONG KIYEOK (T) ÷ [0.3]
÷ 000D ÷ 0308 ÷ 11A8 ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [4.0] COMBINING DIAERESIS (Extend) ÷ [999.0] HANGUL JONGSEONG KIYEOK (T) ÷ [0.3]
÷ 000D ÷ AC00 ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [4.0] HANGUL SYLLABLE GA (LV) ÷ [0.3]
÷ 000D ÷ 0308 ÷ AC00 ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [4.0] COMBINING DIAERESIS (Extend) ÷ [999.0] HANGUL SYLLABLE GA (LV) ÷ [0.3]
÷ 000D ÷ AC01 ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [4.0] HANGUL SYLLABLE GAG (LVT) ÷ [0.3]
÷ 000D ÷ 0308 ÷ AC01 ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [4.0] COMBINING DIAERESIS (Extend) ÷ [999.0] HANGUL SYLLABLE GAG (LVT) ÷ [0.3]
÷ 000D ÷ 1F1E6 ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [4.0] REGIONAL INDICATOR SYMBOL LETTER A (Regional_Indicator) ÷ [0.3]
÷ 000D ÷ 0308 ÷ 1F1E6 ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [4.0] COMBINING DIAERESIS (Extend) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (Regional_Indicator) ÷ [0.3]
÷ 000D ÷ 0378 ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [4.0] <reserved-0378> (Other) ÷ [0.3]
÷ 000D ÷ 0308 ÷ 0378 ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [4.0] COMBINING DIAERESIS (Extend) ÷ [999.0] <reserved-0378> (Other) ÷ [0.3]
÷ 000D ÷ D800 ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [4.0] <surrogate-D800> (Control) ÷ [0.3]
÷ 000D ÷ 0308 ÷ D800 ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [4.0] COMBINING DIAERESIS (Extend) ÷ [5.0] <surrogate-D800> (Control) ÷ [0.3]
÷ 000A ÷ 0020 ÷ # ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [4.0] SPACE (Other) ÷ [0.3]
÷ 000A ÷ 0308 ÷ 0020 ÷ # ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [4.0] COMBINING DIAERESIS (Extend) ÷ [999.0] SPACE (Other) ÷ [0.3]
÷ 000A ÷ 000D ÷ # ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [4.0] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3]
÷ 000A ÷ 0308 ÷ 000D ÷ # ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [4.0] COMBINING DIAERESIS (Extend) ÷ [5.0] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3]
÷ 000A ÷ 000A ÷ # ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [4.0] <LINE FEED (LF)> (LF) ÷ [0.3]
÷ 000A ÷ 0308 ÷ 000A ÷ # ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [4.0] COMBINING DIAERESIS (Extend) ÷ [5.0] <LINE FEED (LF)> (LF) ÷ [0.3]
÷ 000A ÷ 0001 ÷ # ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [4.0] <START OF HEADING> (Control) ÷ [0.3]
÷ 000A ÷ 0308 ÷ 0001 ÷ # ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [4.0] COMBINING DIAERESIS (Extend) ÷ [5.0] <START OF HEADING> (Control) ÷ [0.3]
÷ 000A ÷ 0300 ÷ # ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [4.0] COMBINING GRAVE ACCENT (Extend) ÷ [0.3]
÷ 000A ÷ 0308 × 0300 ÷ # ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [4.0] COMBINING DIAERESIS (Extend) × [9.0] COMBINING GRAVE ACCENT (Extend) ÷ [0.3]
÷ 000A ÷ 0903 ÷ # ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [4.0] DEVANAGARI SIGN VISARGA (SpacingMark) ÷ [0.3]
÷ 000A ÷ 0308 × 0903 ÷ # ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [4.0] COMBINING DIAERESIS (Extend) × [9.1] DEVANAGARI SIGN VISARGA (SpacingMark) ÷ [0.3]
÷ 000A ÷ 1100 ÷ # ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [4.0] HANGUL CHOSEONG KIYEOK (L) ÷ [0.3]
÷ 000A ÷ 0308 ÷ 1100 ÷ # ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [4.0] COMBINING DIAERESIS (Extend) ÷ [999.0] HANGUL CHOSEONG KIYEOK (L) ÷ [0.3]
÷ 000A ÷ 1160 ÷ # ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [4.0] HANGUL JUNGSEONG FILLER (V) ÷ [0.3]
÷ 000A ÷ 0308 ÷ 1160 ÷ # ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [4.0] COMBINING DIAERESIS (Extend) ÷ [999.0] HANGUL JUNGSEONG FILLER (V) ÷ [0.3]
÷ 000A ÷ 11A8 ÷ # ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [4.0] HANGUL JONGSEONG KIYEOK (T) ÷ [0.3]
÷ 000A ÷ 0308 ÷ 11A8 ÷ # ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [4.0] COMBINING DIAERESIS (Extend) ÷ [999.0] HANGUL JONGSEONG KIYEOK (T) ÷ [0.3]
÷ 000A ÷ AC00 ÷ # ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [4.0] HANGUL SYLLABLE GA (LV) ÷ [0.3]
÷ 000A ÷ 0308 ÷ AC00 ÷ # ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [4.0] COMBINING DIAERESIS (Extend) ÷ [999.0] HANGUL SYLLABLE GA (LV) ÷ [0.3]
÷ 000A ÷ AC01 ÷ # ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [4.0] HANGUL SYLLABLE GAG (LVT) ÷ [0.3]
÷ 000A ÷ 0308 ÷ AC01 ÷ # ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [4.0] COMBINING DIAERESIS (Extend) ÷ [999.0] HANGUL SYLLABLE GAG (LVT) ÷ [0.3]
÷ 000A ÷ 1F1E6 ÷ # ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [4.0] REGIONAL INDICATOR SYMBOL LETTER A (Regional_Indicator) ÷ [0.3]
÷ 000A ÷ 0308 ÷ 1F1E6 ÷ # ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [4.0] COMBINING DIAERESIS (Extend) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (Regional_Indicator) ÷ [0.3]
÷ 000A ÷ 0378 ÷ # ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [4.0] <reserved-0378> (Other) ÷ [0.3]
÷ 000A ÷ 0308 ÷ 0378 ÷ # ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [4.0] COMBINING DIAERESIS (Extend) ÷ [999.0] <reserved-0378> (Other) ÷ [0.3]
÷ 000A ÷ D800 ÷ # ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [4.0] <surrogate-D800> (Control) ÷ [0.3]
÷ 000A ÷ 0308 ÷ D800 ÷ # ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [4.0] COMBINING DIAERESIS (Extend) ÷ [5.0] <surrogate-D800> (Control) ÷ [0.3]
÷ 0001 ÷ 0020 ÷ # ÷ [0.2] <START OF HEADING> (Control) ÷ [4.0] SPACE (Other) ÷ [0.3]
÷ 0001 ÷ 0308 ÷ 0020 ÷ # ÷ [0.2] <START OF HEADING> (Control) ÷ [4.0] COMBINING DIAERESIS (Extend) ÷ [999.0] SPACE (Other) ÷ [0.3]
÷ 0001 ÷ 000D ÷ # ÷ [0.2] <START OF HEADING> (Control) ÷ [4.0] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3]
÷ 0001 ÷ 0308 ÷ 000D ÷ # ÷ [0.2] <START OF HEADING> (Control) ÷ [4.0] COMBINING DIAERESIS (Extend) ÷ [5.0] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3]
÷ 0001 ÷ 000A ÷ # ÷ [0.2] <START OF HEADING> (Control) ÷ [4.0] <LINE FEED (LF)> (LF) ÷ [0.3]
÷ 0001 ÷ 0308 ÷ 000A ÷ # ÷ [0.2] <START OF HEADING> (Control) ÷ [4.0] COMBINING DIAERESIS (Extend) ÷ [5.0] <LINE FEED (LF)> (LF) ÷ [0.3]
÷ 0001 ÷ 0001 ÷ # ÷ [0.2] <START OF HEADING> (Control) ÷ [4.0] <START OF HEADING> (Control) ÷ [0.3]
÷ 0001 ÷ 0308 ÷ 0001 ÷ # ÷ [0.2] <START OF HEADING> (Control) ÷ [4.0] COMBINING DIAERESIS (Extend) ÷ [5.0] <START OF HEADING> (Control) ÷ [0.3]
÷ 0001 ÷ 0300 ÷ # ÷ [0.2] <START OF HEADING> (Control) ÷ [4.0] COMBINING GRAVE ACCENT (Extend) ÷ [0.3]
÷ 0001 ÷ 0308 × 0300 ÷ # ÷ [0.2] <START OF HEADING> (Control) ÷ [4.0] COMBINING DIAERESIS (Extend) × [9.0] COMBINING GRAVE ACCENT (Extend) ÷ [0.3]
÷ 0001 ÷ 0903 ÷ # ÷ [0.2] <START OF HEADING> (Control) ÷ [4.0] DEVANAGARI SIGN VISARGA (SpacingMark) ÷ [0.3]
÷ 0001 ÷ 0308 × 0903 ÷ # ÷ [0.2] <START OF HEADING> (Control) ÷ [4.0] COMBINING DIAERESIS (Extend) × [9.1] DEVANAGARI SIGN VISARGA (SpacingMark) ÷ [0.3]
÷ 0001 ÷ 1100 ÷ # ÷ [0.2] <START OF HEADING> (Control) ÷ [4.0] HANGUL CHOSEONG KIYEOK (L) ÷ [0.3]
÷ 0001 ÷ 0308 ÷ 1100 ÷ # ÷ [0.2] <START OF HEADING> (Control) ÷ [4.0] COMBINING DIAERESIS (Extend) ÷ [999.0] HANGUL CHOSEONG KIYEOK (L) ÷ [0.3]
÷ 0001 ÷ 1160 ÷ # ÷ [0.2] <START OF HEADING> (Control) ÷ [4.0] HANGUL JUNGSEONG FILLER (V) ÷ [0.3]
÷ 0001 ÷ 0308 ÷ 1160 ÷ # ÷ [0.2] <START OF HEADING> (Control) ÷ [4.0] COMBINING DIAERESIS (Extend) ÷ [999.0] HANGUL JUNGSEONG FILLER (V) ÷ [0.3]
÷ 0001 ÷ 11A8 ÷ # ÷ [0.2] <START OF HEADING> (Control) ÷ [4.0] HANGUL JONGSEONG KIYEOK (T) ÷ [0.3]
÷ 0001 ÷ 0308 ÷ 11A8 ÷ # ÷ [0.2] <START OF HEADING> (Control) ÷ [4.0] COMBINING DIAERESIS (Extend) ÷ [999.0] HANGUL JONGSEONG KIYEOK (T) ÷ [0.3]
÷ 0001 ÷ AC00 ÷ # ÷ [0.2] <START OF HEADING> (Control) ÷ [4.0] HANGUL SYLLABLE GA (LV) ÷ [0.3]
÷ 0001 ÷ 0308 ÷ AC00 ÷ # ÷ [0.2] <START OF HEADING> (Control) ÷ [4.0] COMBINING DIAERESIS (Extend) ÷ [999.0] HANGUL SYLLABLE GA (LV) ÷ [0.3]
÷ 0001 ÷ AC01 ÷ # ÷ [0.2] <START OF HEADING> (Control) ÷ [4.0] HANGUL SYLLABLE GAG (LVT) ÷ [0.3]
÷ 0001 ÷ 0308 ÷ AC01 ÷ # ÷ [0.2] <START OF HEADING> (Control) ÷ [4.0] COMBINING DIAERESIS (Extend) ÷ [999.0] HANGUL SYLLABLE GAG (LVT) ÷ [0.3]
÷ 0001 ÷ 1F1E6 ÷ # ÷ [0.2] <START OF HEADING> (Control) ÷ [4.0] REGIONAL INDICATOR SYMBOL LETTER A (Regional_Indicator) ÷ [0.3]
÷ 0001 ÷ 0308 ÷ 1F1E6 ÷ # ÷ [0.2] <START OF HEADING> (Control) ÷ [4.0] COMBINING DIAERESIS (Extend) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (Regional_Indicator) ÷ [0.3]
÷ 0001 ÷ 0378 ÷ # ÷ [0.2] <START OF HEADING> (Control) ÷ [4.0] <reserved-0378> (Other) ÷ [0.3]
÷ 0001 ÷ 0308 ÷ 0378 ÷ # ÷ [0.2] <START OF HEADING> (Control) ÷ [4.0] COMBINING DIAERESIS (Extend) ÷ [999.0] <reserved-0378> (Other) ÷ [0.3]
÷ 0001 ÷ D800 ÷ # ÷ [0.2] <START OF HEADING> (Control) ÷ [4.0] <surrogate-D800> (Control) ÷ [0.3]
÷ 0001 ÷ 0308 ÷ D800 ÷ # ÷ [0.2] <START OF HEADING> (Control) ÷ [4.0] COMBINING DIAERESIS (Extend) ÷ [5.0] <surrogate-D800> (Control) ÷ [0.3]
÷ 0300 ÷ 0020 ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend) ÷ [999.0] SPACE (Other) ÷ [0.3]
÷ 0300 × 0308 ÷ 0020 ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend) × [9.0] COMBINING DIAERESIS (Extend) ÷ [999.0] SPACE (Other) ÷ [0.3]
÷ 0300 ÷ 000D ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend) ÷ [5.0] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3]
÷ 0300 × 0308 ÷ 000D ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend) × [9.0] COMBINING DIAERESIS (Extend) ÷ [5.0] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3]
÷ 0300 ÷ 000A ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend) ÷ [5.0] <LINE FEED (LF)> (LF) ÷ [0.3]
÷ 0300 × 0308 ÷ 000A ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend) × [9.0] COMBINING DIAERESIS (Extend) ÷ [5.0] <LINE FEED (LF)> (LF) ÷ [0.3]
÷ 0300 ÷ 0001 ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend) ÷ [5.0] <START OF HEADING> (Control) ÷ [0.3]
÷ 0300 × 0308 ÷ 0001 ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend) × [9.0] COMBINING DIAERESIS (Extend) ÷ [5.0] <START OF HEADING> (Control) ÷ [0.3]
÷ 0300 × 0300 ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend) × [9.0] COMBINING GRAVE ACCENT (Extend) ÷ [0.3]
÷ 0300 × 0308 × 0300 ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend) × [9.0] COMBINING DIAERESIS (Extend) × [9.0] COMBINING GRAVE ACCENT (Extend) ÷ [0.3]
÷ 0300 × 0903 ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend) × [9.1] DEVANAGARI SIGN VISARGA (SpacingMark) ÷ [0.3]
÷ 0300 × 0308 × 0903 ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend) × [9.0] COMBINING DIAERESIS (Extend) × [9.1] DEVANAGARI SIGN VISARGA (SpacingMark) ÷ [0.3]
÷ 0300 ÷ 1100 ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend) ÷ [999.0] HANGUL CHOSEONG KIYEOK (L) ÷ [0.3]
÷ 0300 × 0308 ÷ 1100 ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend) × [9.0] COMBINING DIAERESIS (Extend) ÷ [999.0] HANGUL CHOSEONG KIYEOK (L) ÷ [0.3]
÷ 0300 ÷ 1160 ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend) ÷ [999.0] HANGUL JUNGSEONG FILLER (V) ÷ [0.3]
÷ 0300 × 0308 ÷ 1160 ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend) × [9.0] COMBINING DIAERESIS (Extend) ÷ [999.0] HANGUL JUNGSEONG FILLER (V) ÷ [0.3]
÷ 0300 ÷ 11A8 ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend) ÷ [999.0] HANGUL JONGSEONG KIYEOK (T) ÷ [0.3]
÷ 0300 × 0308 ÷ 11A8 ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend) × [9.0] COMBINING DIAERESIS (Extend) ÷ [999.0] HANGUL JONGSEONG KIYEOK (T) ÷ [0.3]
÷ 0300 ÷ AC00 ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend) ÷ [999.0] HANGUL SYLLABLE GA (LV) ÷ [0.3]
÷ 0300 × 0308 ÷ AC00 ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend) × [9.0] COMBINING DIAERESIS (Extend) ÷ [999.0] HANGUL SYLLABLE GA (LV) ÷ [0.3]
÷ 0300 ÷ AC01 ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend) ÷ [999.0] HANGUL SYLLABLE GAG (LVT) ÷ [0.3]
÷ 0300 × 0308 ÷ AC01 ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend) × [9.0] COMBINING DIAERESIS (Extend) ÷ [999.0] HANGUL SYLLABLE GAG (LVT) ÷ [0.3]
÷ 0300 ÷ 1F1E6 ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (Regional_Indicator) ÷ [0.3]
÷ 0300 × 0308 ÷ 1F1E6 ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend) × [9.0] COMBINING DIAERESIS (Extend) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (Regional_Indicator) ÷ [0.3]
÷ 0300 ÷ 0378 ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend) ÷ [999.0] <reserved-0378> (Other) ÷ [0.3]
÷ 0300 × 0308 ÷ 0378 ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend) × [9.0] COMBINING DIAERESIS (Extend) ÷ [999.0] <reserved-0378> (Other) ÷ [0.3]
÷ 0300 ÷ D800 ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend) ÷ [5.0] <surrogate-D800> (Control) ÷ [0.3]
÷ 0300 × 0308 ÷ D800 ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend) × [9.0] COMBINING DIAERESIS (Extend) ÷ [5.0] <surrogate-D800> (Control) ÷ [0.3]
÷ 0903 ÷ 0020 ÷ # ÷ [0.2] DEVANAGARI SIGN VISARGA (SpacingMark) ÷ [999.0] SPACE (Other) ÷ [0.3]
÷ 0903 × 0308 ÷ 0020 ÷ # ÷ [0.2] DEVANAGARI SIGN VISARGA (SpacingMark) × [9.0] COMBINING DIAERESIS (Extend) ÷ [999.0] SPACE (Other) ÷ [0.3]
÷ 0903 ÷ 000D ÷ # ÷ [0.2] DEVANAGARI SIGN VISARGA (SpacingMark) ÷ [5.0] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3]
÷ 0903 × 0308 ÷ 000D ÷ # ÷ [0.2] DEVANAGARI SIGN VISARGA (SpacingMark) × [9.0] COMBINING DIAERESIS (Extend) ÷ [5.0] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3]
÷ 0903 ÷ 000A ÷ # ÷ [0.2] DEVANAGARI SIGN VISARGA (SpacingMark) ÷ [5.0] <LINE FEED (LF)> (LF) ÷ [0.3]
÷ 0903 × 0308 ÷ 000A ÷ # ÷ [0.2] DEVANAGARI SIGN VISARGA (SpacingMark) × [9.0] COMBINING DIAERESIS (Extend) ÷ [5.0] <LINE FEED (LF)> (LF) ÷ [0.3]
÷ 0903 ÷ 0001 ÷ # ÷ [0.2] DEVANAGARI SIGN VISARGA (SpacingMark) ÷ [5.0] <START OF HEADING> (Control) ÷ [0.3]
÷ 0903 × 0308 ÷ 0001 ÷ # ÷ [0.2] DEVANAGARI SIGN VISARGA (SpacingMark) × [9.0] COMBINING DIAERESIS (Extend) ÷ [5.0] <START OF HEADING> (Control) ÷ [0.3]
÷ 0903 × 0300 ÷ # ÷ [0.2] DEVANAGARI SIGN VISARGA (SpacingMark) × [9.0] COMBINING GRAVE ACCENT (Extend) ÷ [0.3]
÷ 0903 × 0308 × 0300 ÷ # ÷ [0.2] DEVANAGARI SIGN VISARGA (SpacingMark) × [9.0] COMBINING DIAERESIS (Extend) × [9.0] COMBINING GRAVE ACCENT (Extend) ÷ [0.3]
÷ 0903 × 0903 ÷ # ÷ [0.2] DEVANAGARI SIGN VISARGA (SpacingMark) × [9.1] DEVANAGARI SIGN VISARGA (SpacingMark) ÷ [0.3]
÷ 0903 × 0308 × 0903 ÷ # ÷ [0.2] DEVANAGARI SIGN VISARGA (SpacingMark) × [9.0] COMBINING DIAERESIS (Extend) × [9.1] DEVANAGARI SIGN VISARGA (SpacingMark) ÷ [0.3]
÷ 0903 ÷ 1100 ÷ # ÷ [0.2] DEVANAGARI SIGN VISARGA (SpacingMark) ÷ [999.0] HANGUL CHOSEONG KIYEOK (L) ÷ [0.3]
÷ 0903 × 0308 ÷ 1100 ÷ # ÷ [0.2] DEVANAGARI SIGN VISARGA (SpacingMark) × [9.0] COMBINING DIAERESIS (Extend) ÷ [999.0] HANGUL CHOSEONG KIYEOK (L) ÷ [0.3]
÷ 0903 ÷ 1160 ÷ # ÷ [0.2] DEVANAGARI SIGN VISARGA (SpacingMark) ÷ [999.0] HANGUL JUNGSEONG FILLER (V) ÷ [0.3]
÷ 0903 × 0308 ÷ 1160 ÷ # ÷ [0.2] DEVANAGARI SIGN VISARGA (SpacingMark) × [9.0] COMBINING DIAERESIS (Extend) ÷ [999.0] HANGUL JUNGSEONG FILLER (V) ÷ [0.3]
÷ 0903 ÷ 11A8 ÷ # ÷ [0.2] DEVANAGARI SIGN VISARGA (SpacingMark) ÷ [999.0] HANGUL JONGSEONG KIYEOK (T) ÷ [0.3]
÷ 0903 × 0308 ÷ 11A8 ÷ # ÷ [0.2] DEVANAGARI SIGN VISARGA (SpacingMark) × [9.0] COMBINING DIAERESIS (Extend) ÷ [999.0] HANGUL JONGSEONG KIYEOK (T) ÷ [0.3]
÷ 0903 ÷ AC00 ÷ # ÷ [0.2] DEVANAGARI SIGN VISARGA (SpacingMark) ÷ [999.0] HANGUL SYLLABLE GA (LV) ÷ [0.3]
÷ 0903 × 0308 ÷ AC00 ÷ # ÷ [0.2] DEVANAGARI SIGN VISARGA (SpacingMark) × [9.0] COMBINING DIAERESIS (Extend) ÷ [999.0] HANGUL SYLLABLE GA (LV) ÷ [0.3]
÷ 0903 ÷ AC01 ÷ # ÷ [0.2] DEVANAGARI SIGN VISARGA (SpacingMark) ÷ [999.0] HANGUL SYLLABLE GAG (LVT) ÷ [0.3]
÷ 0903 × 0308 ÷ AC01 ÷ # ÷ [0.2] DEVANAGARI SIGN VISARGA (SpacingMark) × [9.0] COMBINING DIAERESIS (Extend) ÷ [999.0] HANGUL SYLLABLE GAG (LVT) ÷ [0.3]
÷ 0903 ÷ 1F1E6 ÷ # ÷ [0.2] DEVANAGARI SIGN VISARGA (SpacingMark) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (Regional_Indicator) ÷ [0.3]
÷ 0903 × 0308 ÷ 1F1E6 ÷ # ÷ [0.2] DEVANAGARI SIGN VISARGA (SpacingMark) × [9.0] COMBINING DIAERESIS (Extend) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (Regional_Indicator) ÷ [0.3]
÷ 0903 ÷ 0378 ÷ # ÷ [0.2] DEVANAGARI SIGN VISARGA (SpacingMark) ÷ [999.0] <reserved-0378> (Other) ÷ [0.3]
÷ 0903 × 0308 ÷ 0378 ÷ # ÷ [0.2] DEVANAGARI SIGN VISARGA (SpacingMark) × [9.0] COMBINING DIAERESIS (Extend) ÷ [999.0] <reserved-0378> (Other) ÷ [0.3]
÷ 0903 ÷ D800 ÷ # ÷ [0.2] DEVANAGARI SIGN VISARGA (SpacingMark) ÷ [5.0] <surrogate-D800> (Control) ÷ [0.3]
÷ 0903 × 0308 ÷ D800 ÷ # ÷ [0.2] DEVANAGARI SIGN VISARGA (SpacingMark) × [9.0] COMBINING DIAERESIS (Extend) ÷ [5.0] <surrogate-D800> (Control) ÷ [0.3]
÷ 1100 ÷ 0020 ÷ # ÷ [0.2] HANGUL CHOSEONG KIYEOK (L) ÷ [999.0] SPACE (Other) ÷ [0.3]
÷ 1100 × 0308 ÷ 0020 ÷ # ÷ [0.2] HANGUL CHOSEONG KIYEOK (L) × [9.0] COMBINING DIAERESIS (Extend) ÷ [999.0] SPACE (Other) ÷ [0.3]
÷ 1100 ÷ 000D ÷ # ÷ [0.2] HANGUL CHOSEONG KIYEOK (L) ÷ [5.0] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3]
÷ 1100 × 0308 ÷ 000D ÷ # ÷ [0.2] HANGUL CHOSEONG KIYEOK (L) × [9.0] COMBINING DIAERESIS (Extend) ÷ [5.0] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3]
÷ 1100 ÷ 000A ÷ # ÷ [0.2] HANGUL CHOSEONG KIYEOK (L) ÷ [5.0] <LINE FEED (LF)> (LF) ÷ [0.3]
÷ 1100 × 0308 ÷ 000A ÷ # ÷ [0.2] HANGUL CHOSEONG KIYEOK (L) × [9.0] COMBINING DIAERESIS (Extend) ÷ [5.0] <LINE FEED (LF)> (LF) ÷ [0.3]
÷ 1100 ÷ 0001 ÷ # ÷ [0.2] HANGUL CHOSEONG KIYEOK (L) ÷ [5.0] <START OF HEADING> (Control) ÷ [0.3]
÷ 1100 × 0308 ÷ 0001 ÷ # ÷ [0.2] HANGUL CHOSEONG KIYEOK (L) × [9.0] COMBINING DIAERESIS (Extend) ÷ [5.0] <START OF HEADING> (Control) ÷ [0.3]
÷ 1100 × 0300 ÷ # ÷ [0.2] HANGUL CHOSEONG KIYEOK (L) × [9.0] COMBINING GRAVE ACCENT (Extend) ÷ [0.3]
÷ 1100 × 0308 × 0300 ÷ # ÷ [0.2] HANGUL CHOSEONG KIYEOK (L) × [9.0] COMBINING DIAERESIS (Extend) × [9.0] COMBINING GRAVE ACCENT (Extend) ÷ [0.3]
÷ 1100 × 0903 ÷ # ÷ [0.2] HANGUL CHOSEONG KIYEOK (L) × [9.1] DEVANAGARI SIGN VISARGA (SpacingMark) ÷ [0.3]
÷ 1100 × 0308 × 0903 ÷ # ÷ [0.2] HANGUL CHOSEONG KIYEOK (L) × [9.0] COMBINING DIAERESIS (Extend) × [9.1] DEVANAGARI SIGN VISARGA (SpacingMark) ÷ [0.3]
÷ 1100 × 1100 ÷ # ÷ [0.2] HANGUL CHOSEONG KIYEOK (L) × [6.0] HANGUL CHOSEONG KIYEOK (L) ÷ [0.3]
÷ 1100 × 0308 ÷ 1100 ÷ # ÷ [0.2] HANGUL CHOSEONG KIYEOK (L) × [9.0] COMBINING DIAERESIS (Extend) ÷ [999.0] HANGUL CHOSEONG KIYEOK (L) ÷ [0.3]
÷ 1100 × 1160 ÷ # ÷ [0.2] HANGUL CHOSEONG KIYEOK (L) × [6.0] HANGUL JUNGSEONG FILLER (V) ÷ [0.3]
÷ 1100 × 0308 ÷ 1160 ÷ # ÷ [0.2] HANGUL CHOSEONG KIYEOK (L) × [9.0] COMBINING DIAERESIS (Extend) ÷ [999.0] HANGUL JUNGSEONG FILLER (V) ÷ [0.3]
÷ 1100 ÷ 11A8 ÷ # ÷ [0.2] HANGUL CHOSEONG KIYEOK (L) ÷ [999.0] HANGUL JONGSEONG KIYEOK (T) ÷ [0.3]
÷ 1100 × 0308 ÷ 11A8 ÷ # ÷ [0.2] HANGUL CHOSEONG KIYEOK (L) × [9.0] COMBINING DIAERESIS (Extend) ÷ [999.0] HANGUL JONGSEONG KIYEOK (T) ÷ [0.3]
÷ 1100 × AC00 ÷ # ÷ [0.2] HANGUL CHOSEONG KIYEOK (L) × [6.0] HANGUL SYLLABLE GA (LV) ÷ [0.3]
÷ 1100 × 0308 ÷ AC00 ÷ # ÷ [0.2] HANGUL CHOSEONG KIYEOK (L) × [9.0] COMBINING DIAERESIS (Extend) ÷ [999.0] HANGUL SYLLABLE GA (LV) ÷ [0.3]
÷ 1100 × AC01 ÷ # ÷ [0.2] HANGUL CHOSEONG KIYEOK (L) × [6.0] HANGUL SYLLABLE GAG (LVT) ÷ [0.3]
÷ 1100 × 0308 ÷ AC01 ÷ # ÷ [0.2] HANGUL CHOSEONG KIYEOK (L) × [9.0] COMBINING DIAERESIS (Extend) ÷ [999.0] HANGUL SYLLABLE GAG (LVT) ÷ [0.3]
÷ 1100 ÷ 1F1E6 ÷ # ÷ [0.2] HANGUL CHOSEONG KIYEOK (L) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (Regional_Indicator) ÷ [0.3]
÷ 1100 × 0308 ÷ 1F1E6 ÷ # ÷ [0.2] HANGUL CHOSEONG KIYEOK (L) × [9.0] COMBINING DIAERESIS (Extend) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (Regional_Indicator) ÷ [0.3]
÷ 1100 ÷ 0378 ÷ # ÷ [0.2] HANGUL CHOSEONG KIYEOK (L) ÷ [999.0] <reserved-0378> (Other) ÷ [0.3]
÷ 1100 × 0308 ÷ 0378 ÷ # ÷ [0.2] HANGUL CHOSEONG KIYEOK (L) × [9.0] COMBINING DIAERESIS (Extend) ÷ [999.0] <reserved-0378> (Other) ÷ [0.3]
÷ 1100 ÷ D800 ÷ # ÷ [0.2] HANGUL CHOSEONG KIYEOK (L) ÷ [5.0] <surrogate-D800> (Control) ÷ [0.3]
÷ 1100 × 0308 ÷ D800 ÷ # ÷ [0.2] HANGUL CHOSEONG KIYEOK (L) × [9.0] COMBINING DIAERESIS (Extend) ÷ [5.0] <surrogate-D800> (Control) ÷ [0.3]
÷ 1160 ÷ 0020 ÷ # ÷ [0.2] HANGUL JUNGSEONG FILLER (V) ÷ [999.0] SPACE (Other) ÷ [0.3]
÷ 1160 × 0308 ÷ 0020 ÷ # ÷ [0.2] HANGUL JUNGSEONG FILLER (V) × [9.0] COMBINING DIAERESIS (Extend) ÷ [999.0] SPACE (Other) ÷ [0.3]
÷ 1160 ÷ 000D ÷ # ÷ [0.2] HANGUL JUNGSEONG FILLER (V) ÷ [5.0] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3]
÷ 1160 × 0308 ÷ 000D ÷ # ÷ [0.2] HANGUL JUNGSEONG FILLER (V) × [9.0] COMBINING DIAERESIS (Extend) ÷ [5.0] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3]
÷ 1160 ÷ 000A ÷ # ÷ [0.2] HANGUL JUNGSEONG FILLER (V) ÷ [5.0] <LINE FEED (LF)> (LF) ÷ [0.3]
÷ 1160 × 0308 ÷ 000A ÷ # ÷ [0.2] HANGUL JUNGSEONG FILLER (V) × [9.0] COMBINING DIAERESIS (Extend) ÷ [5.0] <LINE FEED (LF)> (LF) ÷ [0.3]
÷ 1160 ÷ 0001 ÷ # ÷ [0.2] HANGUL JUNGSEONG FILLER (V) ÷ [5.0] <START OF HEADING> (Control) ÷ [0.3]
÷ 1160 × 0308 ÷ 0001 ÷ # ÷ [0.2] HANGUL JUNGSEONG FILLER (V) × [9.0] COMBINING DIAERESIS (Extend) ÷ [5.0] <START OF HEADING> (Control) ÷ [0.3]
÷ 1160 × 0300 ÷ # ÷ [0.2] HANGUL JUNGSEONG FILLER (V) × [9.0] COMBINING GRAVE ACCENT (Extend) ÷ [0.3]
÷ 1160 × 0308 × 0300 ÷ # ÷ [0.2] HANGUL JUNGSEONG FILLER (V) × [9.0] COMBINING DIAERESIS (Extend) × [9.0] COMBINING GRAVE ACCENT (Extend) ÷ [0.3]
÷ 1160 × 0903 ÷ # ÷ [0.2] HANGUL JUNGSEONG FILLER (V) × [9.1] DEVANAGARI SIGN VISARGA (SpacingMark) ÷ [0.3]
÷ 1160 × 0308 × 0903 ÷ # ÷ [0.2] HANGUL JUNGSEONG FILLER (V) × [9.0] COMBINING DIAERESIS (Extend) × [9.1] DEVANAGARI SIGN VISARGA (SpacingMark) ÷ [0.3]
÷ 1160 ÷ 1100 ÷ # ÷ [0.2] HANGUL JUNGSEONG FILLER (V) ÷ [999.0] HANGUL CHOSEONG KIYEOK (L) ÷ [0.3]
÷ 1160 × 0308 ÷ 1100 ÷ # ÷ [0.2] HANGUL JUNGSEONG FILLER (V) × [9.0] COMBINING DIAERESIS (Extend) ÷ [999.0] HANGUL CHOSEONG KIYEOK (L) ÷ [0.3]
÷ 1160 × 1160 ÷ # ÷ [0.2] HANGUL JUNGSEONG FILLER (V) × [7.0] HANGUL JUNGSEONG FILLER (V) ÷ [0.3]
÷ 1160 × 0308 ÷ 1160 ÷ # ÷ [0.2] HANGUL JUNGSEONG FILLER (V) × [9.0] COMBINING DIAERESIS (Extend) ÷ [999.0] HANGUL JUNGSEONG FILLER (V) ÷ [0.3]
÷ 1160 × 11A8 ÷ # ÷ [0.2] HANGUL JUNGSEONG FILLER (V) × [7.0] HANGUL JONGSEONG KIYEOK (T) ÷ [0.3]
÷ 1160 × 0308 ÷ 11A8 ÷ # ÷ [0.2] HANGUL JUNGSEONG FILLER (V) × [9.0] COMBINING DIAERESIS (Extend) ÷ [999.0] HANGUL JONGSEONG KIYEOK (T) ÷ [0.3]
÷ 1160 ÷ AC00 ÷ # ÷ [0.2] HANGUL JUNGSEONG FILLER (V) ÷ [999.0] HANGUL SYLLABLE GA (LV) ÷ [0.3]
÷ 1160 × 0308 ÷ AC00 ÷ # ÷ [0.2] HANGUL JUNGSEONG FILLER (V) × [9.0] COMBINING DIAERESIS (Extend) ÷ [999.0] HANGUL SYLLABLE GA (LV) ÷ [0.3]
÷ 1160 ÷ AC01 ÷ # ÷ [0.2] HANGUL JUNGSEONG FILLER (V) ÷ [999.0] HANGUL SYLLABLE GAG (LVT) ÷ [0.3]
÷ 1160 × 0308 ÷ AC01 ÷ # ÷ [0.2] HANGUL JUNGSEONG FILLER (V) × [9.0] COMBINING DIAERESIS (Extend) ÷ [999.0] HANGUL SYLLABLE GAG (LVT) ÷ [0.3]
÷ 1160 ÷ 1F1E6 ÷ # ÷ [0.2] HANGUL JUNGSEONG FILLER (V) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (Regional_Indicator) ÷ [0.3]
÷ 1160 × 0308 ÷ 1F1E6 ÷ # ÷ [0.2] HANGUL JUNGSEONG FILLER (V) × [9.0] COMBINING DIAERESIS (Extend) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (Regional_Indicator) ÷ [0.3]
÷ 1160 ÷ 0378 ÷ # ÷ [0.2] HANGUL JUNGSEONG FILLER (V) ÷ [999.0] <reserved-0378> (Other) ÷ [0.3]
÷ 1160 × 0308 ÷ 0378 ÷ # ÷ [0.2] HANGUL JUNGSEONG FILLER (V) × [9.0] COMBINING DIAERESIS (Extend) ÷ [999.0] <reserved-0378> (Other) ÷ [0.3]
÷ 1160 ÷ D800 ÷ # ÷ [0.2] HANGUL JUNGSEONG FILLER (V) ÷ [5.0] <surrogate-D800> (Control) ÷ [0.3]
÷ 1160 × 0308 ÷ D800 ÷ # ÷ [0.2] HANGUL JUNGSEONG FILLER (V) × [9.0] COMBINING DIAERESIS (Extend) ÷ [5.0] <surrogate-D800> (Control) ÷ [0.3]
÷ 11A8 ÷ 0020 ÷ # ÷ [0.2] HANGUL JONGSEONG KIYEOK (T) ÷ [999.0] SPACE (Other) ÷ [0.3]
÷ 11A8 × 0308 ÷ 0020 ÷ # ÷ [0.2] HANGUL JONGSEONG KIYEOK (T) × [9.0] COMBINING DIAERESIS (Extend) ÷ [999.0] SPACE (Other) ÷ [0.3]
÷ 11A8 ÷ 000D ÷ # ÷ [0.2] HANGUL JONGSEONG KIYEOK (T) ÷ [5.0] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3]
÷ 11A8 × 0308 ÷ 000D ÷ # ÷ [0.2] HANGUL JONGSEONG KIYEOK (T) × [9.0] COMBINING DIAERESIS (Extend) ÷ [5.0] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3]
÷ 11A8 ÷ 000A ÷ # ÷ [0.2] HANGUL JONGSEONG KIYEOK (T) ÷ [5.0] <LINE FEED (LF)> (LF) ÷ [0.3]
÷ 11A8 × 0308 ÷ 000A ÷ # ÷ [0.2] HANGUL JONGSEONG KIYEOK (T) × [9.0] COMBINING DIAERESIS (Extend) ÷ [5.0] <LINE FEED (LF)> (LF) ÷ [0.3]
÷ 11A8 ÷ 0001 ÷ # ÷ [0.2] HANGUL JONGSEONG KIYEOK (T) ÷ [5.0] <START OF HEADING> (Control) ÷ [0.3]
÷ 11A8 × 0308 ÷ 0001 ÷ # ÷ [0.2] HANGUL JONGSEONG KIYEOK (T) × [9.0] COMBINING DIAERESIS (Extend) ÷ [5.0] <START OF HEADING> (Control) ÷ [0.3]
÷ 11A8 × 0300 ÷ # ÷ [0.2] HANGUL JONGSEONG KIYEOK (T) × [9.0] COMBINING GRAVE ACCENT (Extend) ÷ [0.3]
÷ 11A8 × 0308 × 0300 ÷ # ÷ [0.2] HANGUL JONGSEONG KIYEOK (T) × [9.0] COMBINING DIAERESIS (Extend) × [9.0] COMBINING GRAVE ACCENT (Extend) ÷ [0.3]
÷ 11A8 × 0903 ÷ # ÷ [0.2] HANGUL JONGSEONG KIYEOK (T) × [9.1] DEVANAGARI SIGN VISARGA (SpacingMark) ÷ [0.3]
÷ 11A8 × 0308 × 0903 ÷ # ÷ [0.2] HANGUL JONGSEONG KIYEOK (T) × [9.0] COMBINING DIAERESIS (Extend) × [9.1] DEVANAGARI SIGN VISARGA (SpacingMark) ÷ [0.3]
÷ 11A8 ÷ 1100 ÷ # ÷ [0.2] HANGUL JONGSEONG KIYEOK (T) ÷ [999.0] HANGUL CHOSEONG KIYEOK (L) ÷ [0.3]
÷ 11A8 × 0308 ÷ 1100 ÷ # ÷ [0.2] HANGUL JONGSEONG KIYEOK (T) × [9.0] COMBINING DIAERESIS (Extend) ÷ [999.0] HANGUL CHOSEONG KIYEOK (L) ÷ [0.3]
÷ 11A8 ÷ 1160 ÷ # ÷ [0.2] HANGUL JONGSEONG KIYEOK (T) ÷ [999.0] HANGUL JUNGSEONG FILLER (V) ÷ [0.3]
÷ 11A8 × 0308 ÷ 1160 ÷ # ÷ [0.2] HANGUL JONGSEONG KIYEOK (T) × [9.0] COMBINING DIAERESIS (Extend) ÷ [999.0] HANGUL JUNGSEONG FILLER (V) ÷ [0.3]
÷ 11A8 × 11A8 ÷ # ÷ [0.2] HANGUL JONGSEONG KIYEOK (T) × [8.0] HANGUL JONGSEONG KIYEOK (T) ÷ [0.3]
÷ 11A8 × 0308 ÷ 11A8 ÷ # ÷ [0.2] HANGUL JONGSEONG KIYEOK (T) × [9.0] COMBINING DIAERESIS (Extend) ÷ [999.0] HANGUL JONGSEONG KIYEOK (T) ÷ [0.3]
÷ 11A8 ÷ AC00 ÷ # ÷ [0.2] HANGUL JONGSEONG KIYEOK (T) ÷ [999.0] HANGUL SYLLABLE GA (LV) ÷ [0.3]
÷ 11A8 × 0308 ÷ AC00 ÷ # ÷ [0.2] HANGUL JONGSEONG KIYEOK (T) × [9.0] COMBINING DIAERESIS (Extend) ÷ [999.0] HANGUL SYLLABLE GA (LV) ÷ [0.3]
÷ 11A8 ÷ AC01 ÷ # ÷ [0.2] HANGUL JONGSEONG KIYEOK (T) ÷ [999.0] HANGUL SYLLABLE GAG (LVT) ÷ [0.3]
÷ 11A8 × 0308 ÷ AC01 ÷ # ÷ [0.2] HANGUL JONGSEONG KIYEOK (T) × [9.0] COMBINING DIAERESIS (Extend) ÷ [999.0] HANGUL SYLLABLE GAG (LVT) ÷ [0.3]
÷ 11A8 ÷ 1F1E6 ÷ # ÷ [0.2] HANGUL JONGSEONG KIYEOK (T) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (Regional_Indicator) ÷ [0.3]
÷ 11A8 × 0308 ÷ 1F1E6 ÷ # ÷ [0.2] HANGUL JONGSEONG KIYEOK (T) × [9.0] COMBINING DIAERESIS (Extend) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (Regional_Indicator) ÷ [0.3]
÷ 11A8 ÷ 0378 ÷ # ÷ [0.2] HANGUL JONGSEONG KIYEOK (T) ÷ [999.0] <reserved-0378> (Other) ÷ [0.3]
÷ 11A8 × 0308 ÷ 0378 ÷ # ÷ [0.2] HANGUL JONGSEONG KIYEOK (T) × [9.0] COMBINING DIAERESIS (Extend) ÷ [999.0] <reserved-0378> (Other) ÷ [0.3]
÷ 11A8 ÷ D800 ÷ # ÷ [0.2] HANGUL JONGSEONG KIYEOK (T) ÷ [5.0] <surrogate-D800> (Control) ÷ [0.3]
÷ 11A8 × 0308 ÷ D800 ÷ # ÷ [0.2] HANGUL JONGSEONG KIYEOK (T) × [9.0] COMBINING DIAERESIS (Extend) ÷ [5.0] <surrogate-D800> (Control) ÷ [0.3]
÷ AC00 ÷ 0020 ÷ # ÷ [0.2] HANGUL SYLLABLE GA (LV) ÷ [999.0] SPACE (Other) ÷ [0.3]
÷ AC00 × 0308 ÷ 0020 ÷ # ÷ [0.2] HANGUL SYLLABLE GA (LV) × [9.0] COMBINING DIAERESIS (Extend) ÷ [999.0] SPACE (Other) ÷ [0.3]
÷ AC00 ÷ 000D ÷ # ÷ [0.2] HANGUL SYLLABLE GA (LV) ÷ [5.0] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3]
÷ AC00 × 0308 ÷ 000D ÷ # ÷ [0.2] HANGUL SYLLABLE GA (LV) × [9.0] COMBINING DIAERESIS (Extend) ÷ [5.0] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3]
÷ AC00 ÷ 000A ÷ # ÷ [0.2] HANGUL SYLLABLE GA (LV) ÷ [5.0] <LINE FEED (LF)> (LF) ÷ [0.3]
÷ AC00 × 0308 ÷ 000A ÷ # ÷ [0.2] HANGUL SYLLABLE GA (LV) × [9.0] COMBINING DIAERESIS (Extend) ÷ [5.0] <LINE FEED (LF)> (LF) ÷ [0.3]
÷ AC00 ÷ 0001 ÷ # ÷ [0.2] HANGUL SYLLABLE GA (LV) ÷ [5.0] <START OF HEADING> (Control) ÷ [0.3]
÷ AC00 × 0308 ÷ 0001 ÷ # ÷ [0.2] HANGUL SYLLABLE GA (LV) × [9.0] COMBINING DIAERESIS (Extend) ÷ [5.0] <START OF HEADING> (Control) ÷ [0.3]
÷ AC00 × 0300 ÷ # ÷ [0.2] HANGUL SYLLABLE GA (LV) × [9.0] COMBINING GRAVE ACCENT (Extend) ÷ [0.3]
÷ AC00 × 0308 × 0300 ÷ # ÷ [0.2] HANGUL SYLLABLE GA (LV) × [9.0] COMBINING DIAERESIS (Extend) × [9.0] COMBINING GRAVE ACCENT (Extend) ÷ [0.3]
÷ AC00 × 0903 ÷ # ÷ [0.2] HANGUL SYLLABLE GA (LV) × [9.1] DEVANAGARI SIGN VISARGA (SpacingMark) ÷ [0.3]
÷ AC00 × 0308 × 0903 ÷ # ÷ [0.2] HANGUL SYLLABLE GA (LV) × [9.0] COMBINING DIAERESIS (Extend) × [9.1] DEVANAGARI SIGN VISARGA (SpacingMark) ÷ [0.3]
÷ AC00 ÷ 1100 ÷ # ÷ [0.2] HANGUL SYLLABLE GA (LV) ÷ [999.0] HANGUL CHOSEONG KIYEOK (L) ÷ [0.3]
÷ AC00 × 0308 ÷ 1100 ÷ # ÷ [0.2] HANGUL SYLLABLE GA (LV) × [9.0] COMBINING DIAERESIS (Extend) ÷ [999.0] HANGUL CHOSEONG KIYEOK (L) ÷ [0.3]
÷ AC00 × 1160 ÷ # ÷ [0.2] HANGUL SYLLABLE GA (LV) × [7.0] HANGUL JUNGSEONG FILLER (V) ÷ [0.3]
÷ AC00 × 0308 ÷ 1160 ÷ # ÷ [0.2] HANGUL SYLLABLE GA (LV) × [9.0] COMBINING DIAERESIS (Extend) ÷ [999.0] HANGUL JUNGSEONG FILLER (V) ÷ [0.3]
÷ AC00 × 11A8 ÷ # ÷ [0.2] HANGUL SYLLABLE GA (LV) × [7.0] HANGUL JONGSEONG KIYEOK (T) ÷ [0.3]
÷ AC00 × 0308 ÷ 11A8 ÷ # ÷ [0.2] HANGUL SYLLABLE GA (LV) × [9.0] COMBINING DIAERESIS (Extend) ÷ [999.0] HANGUL JONGSEONG KIYEOK (T) ÷ [0.3]
÷ AC00 ÷ AC00 ÷ # ÷ [0.2] HANGUL SYLLABLE GA (LV) ÷ [999.0] HANGUL SYLLABLE GA (LV) ÷ [0.3]
÷ AC00 × 0308 ÷ AC00 ÷ # ÷ [0.2] HANGUL SYLLABLE GA (LV) × [9.0] COMBINING DIAERESIS (Extend) ÷ [999.0] HANGUL SYLLABLE GA (LV) ÷ [0.3]
÷ AC00 ÷ AC01 ÷ # ÷ [0.2] HANGUL SYLLABLE GA (LV) ÷ [999.0] HANGUL SYLLABLE GAG (LVT) ÷ [0.3]
÷ AC00 × 0308 ÷ AC01 ÷ # ÷ [0.2] HANGUL SYLLABLE GA (LV) × [9.0] COMBINING DIAERESIS (Extend) ÷ [999.0] HANGUL SYLLABLE GAG (LVT) ÷ [0.3]
÷ AC00 ÷ 1F1E6 ÷ # ÷ [0.2] HANGUL SYLLABLE GA (LV) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (Regional_Indicator) ÷ [0.3]
÷ AC00 × 0308 ÷ 1F1E6 ÷ # ÷ [0.2] HANGUL SYLLABLE GA (LV) × [9.0] COMBINING DIAERESIS (Extend) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (Regional_Indicator) ÷ [0.3]
÷ AC00 ÷ 0378 ÷ # ÷ [0.2] HANGUL SYLLABLE GA (LV) ÷ [999.0] <reserved-0378> (Other) ÷ [0.3]
÷ AC00 × 0308 ÷ 0378 ÷ # ÷ [0.2] HANGUL SYLLABLE GA (LV) × [9.0] COMBINING DIAERESIS (Extend) ÷ [999.0] <reserved-0378> (Other) ÷ [0.3]
÷ AC00 ÷ D800 ÷ # ÷ [0.2] HANGUL SYLLABLE GA (LV) ÷ [5.0] <surrogate-D800> (Control) ÷ [0.3]
÷ AC00 × 0308 ÷ D800 ÷ # ÷ [0.2] HANGUL SYLLABLE GA (LV) × [9.0] COMBINING DIAERESIS (Extend) ÷ [5.0] <surrogate-D800> (Control) ÷ [0.3]
÷ AC01 ÷ 0020 ÷ # ÷ [0.2] HANGUL SYLLABLE GAG (LVT) ÷ [999.0] SPACE (Other) ÷ [0.3]
÷ AC01 × 0308 ÷ 0020 ÷ # ÷ [0.2] HANGUL SYLLABLE GAG (LVT) × [9.0] COMBINING DIAERESIS (Extend) ÷ [999.0] SPACE (Other) ÷ [0.3]
÷ AC01 ÷ 000D ÷ # ÷ [0.2] HANGUL SYLLABLE GAG (LVT) ÷ [5.0] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3]
÷ AC01 × 0308 ÷ 000D ÷ # ÷ [0.2] HANGUL SYLLABLE GAG (LVT) × [9.0] COMBINING DIAERESIS (Extend) ÷ [5.0] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3]
÷ AC01 ÷ 000A ÷ # ÷ [0.2] HANGUL SYLLABLE GAG (LVT) ÷ [5.0] <LINE FEED (LF)> (LF) ÷ [0.3]
÷ AC01 × 0308 ÷ 000A ÷ # ÷ [0.2] HANGUL SYLLABLE GAG (LVT) × [9.0] COMBINING DIAERESIS (Extend) ÷ [5.0] <LINE FEED (LF)> (LF) ÷ [0.3]
÷ AC01 ÷ 0001 ÷ # ÷ [0.2] HANGUL SYLLABLE GAG (LVT) ÷ [5.0] <START OF HEADING> (Control) ÷ [0.3]
÷ AC01 × 0308 ÷ 0001 ÷ # ÷ [0.2] HANGUL SYLLABLE GAG (LVT) × [9.0] COMBINING DIAERESIS (Extend) ÷ [5.0] <START OF HEADING> (Control) ÷ [0.3]
÷ AC01 × 0300 ÷ # ÷ [0.2] HANGUL SYLLABLE GAG (LVT) × [9.0] COMBINING GRAVE ACCENT (Extend) ÷ [0.3]
÷ AC01 × 0308 × 0300 ÷ # ÷ [0.2] HANGUL SYLLABLE GAG (LVT) × [9.0] COMBINING DIAERESIS (Extend) × [9.0] COMBINING GRAVE ACCENT (Extend) ÷ [0.3]
÷ AC01 × 0903 ÷ # ÷ [0.2] HANGUL SYLLABLE GAG (LVT) × [9.1] DEVANAGARI SIGN VISARGA (SpacingMark) ÷ [0.3]
÷ AC01 × 0308 × 0903 ÷ # ÷ [0.2] HANGUL SYLLABLE GAG (LVT) × [9.0] COMBINING DIAERESIS (Extend) × [9.1] DEVANAGARI SIGN VISARGA (SpacingMark) ÷ [0.3]
÷ AC01 ÷ 1100 ÷ # ÷ [0.2] HANGUL SYLLABLE GAG (LVT) ÷ [999.0] HANGUL CHOSEONG KIYEOK (L) ÷ [0.3]
÷ AC01 × 0308 ÷ 1100 ÷ # ÷ [0.2] HANGUL SYLLABLE GAG (LVT) × [9.0] COMBINING DIAERESIS (Extend) ÷ [999.0] HANGUL CHOSEONG KIYEOK (L) ÷ [0.3]
÷ AC01 ÷ 1160 ÷ # ÷ [0.2] HANGUL SYLLABLE GAG (LVT) ÷ [999.0] HANGUL JUNGSEONG FILLER (V) ÷ [0.3]
÷ AC01 × 0308 ÷ 1160 ÷ # ÷ [0.2] HANGUL SYLLABLE GAG (LVT) × [9.0] COMBINING DIAERESIS (Extend) ÷ [999.0] HANGUL JUNGSEONG FILLER (V) ÷ [0.3]
÷ AC01 × 11A8 ÷ # ÷ [0.2] HANGUL SYLLABLE GAG (LVT) × [8.0] HANGUL JONGSEONG KIYEOK (T) ÷ [0.3]
÷ AC01 × 0308 ÷ 11A8 ÷ # ÷ [0.2] HANGUL SYLLABLE GAG (LVT) × [9.0] COMBINING DIAERESIS (Extend) ÷ [999.0] HANGUL JONGSEONG KIYEOK (T) ÷ [0.3]
÷ AC01 ÷ AC00 ÷ # ÷ [0.2] HANGUL SYLLABLE GAG (LVT) ÷ [999.0] HANGUL SYLLABLE GA (LV) ÷ [0.3]
÷ AC01 × 0308 ÷ AC00 ÷ # ÷ [0.2] HANGUL SYLLABLE GAG (LVT) × [9.0] COMBINING DIAERESIS (Extend) ÷ [999.0] HANGUL SYLLABLE GA (LV) ÷ [0.3]
÷ AC01 ÷ AC01 ÷ # ÷ [0.2] HANGUL SYLLABLE GAG (LVT) ÷ [999.0] HANGUL SYLLABLE GAG (LVT) ÷ [0.3]
÷ AC01 × 0308 ÷ AC01 ÷ # ÷ [0.2] HANGUL SYLLABLE GAG (LVT) × [9.0] COMBINING DIAERESIS (Extend) ÷ [999.0] HANGUL SYLLABLE GAG (LVT) ÷ [0.3]
÷ AC01 ÷ 1F1E6 ÷ # ÷ [0.2] HANGUL SYLLABLE GAG (LVT) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (Regional_Indicator) ÷ [0.3]
÷ AC01 × 0308 ÷ 1F1E6 ÷ # ÷ [0.2] HANGUL SYLLABLE GAG (LVT) × [9.0] COMBINING DIAERESIS (Extend) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (Regional_Indicator) ÷ [0.3]
÷ AC01 ÷ 0378 ÷ # ÷ [0.2] HANGUL SYLLABLE GAG (LVT) ÷ [999.0] <reserved-0378> (Other) ÷ [0.3]
÷ AC01 × 0308 ÷ 0378 ÷ # ÷ [0.2] HANGUL SYLLABLE GAG (LVT) × [9.0] COMBINING DIAERESIS (Extend) ÷ [999.0] <reserved-0378> (Other) ÷ [0.3]
÷ AC01 ÷ D800 ÷ # ÷ [0.2] HANGUL SYLLABLE GAG (LVT) ÷ [5.0] <surrogate-D800> (Control) ÷ [0.3]
÷ AC01 × 0308 ÷ D800 ÷ # ÷ [0.2] HANGUL SYLLABLE GAG (LVT) × [9.0] COMBINING DIAERESIS (Extend) ÷ [5.0] <surrogate-D800> (Control) ÷ [0.3]
÷ 1F1E6 ÷ 0020 ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (Regional_Indicator) ÷ [999.0] SPACE (Other) ÷ [0.3]
÷ 1F1E6 × 0308 ÷ 0020 ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (Regional_Indicator) × [9.0] COMBINING DIAERESIS (Extend) ÷ [999.0] SPACE (Other) ÷ [0.3]
÷ 1F1E6 ÷ 000D ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (Regional_Indicator) ÷ [5.0] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3]
÷ 1F1E6 × 0308 ÷ 000D ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (Regional_Indicator) × [9.0] COMBINING DIAERESIS (Extend) ÷ [5.0] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3]
÷ 1F1E6 ÷ 000A ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (Regional_Indicator) ÷ [5.0] <LINE FEED (LF)> (LF) ÷ [0.3]
÷ 1F1E6 × 0308 ÷ 000A ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (Regional_Indicator) × [9.0] COMBINING DIAERESIS (Extend) ÷ [5.0] <LINE FEED (LF)> (LF) ÷ [0.3]
÷ 1F1E6 ÷ 0001 ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (Regional_Indicator) ÷ [5.0] <START OF HEADING> (Control) ÷ [0.3]
÷ 1F1E6 × 0308 ÷ 0001 ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (Regional_Indicator) × [9.0] COMBINING DIAERESIS (Extend) ÷ [5.0] <START OF HEADING> (Control) ÷ [0.3]
÷ 1F1E6 × 0300 ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (Regional_Indicator) × [9.0] COMBINING GRAVE ACCENT (Extend) ÷ [0.3]
÷ 1F1E6 × 0308 × 0300 ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (Regional_Indicator) × [9.0] COMBINING DIAERESIS (Extend) × [9.0] COMBINING GRAVE ACCENT (Extend) ÷ [0.3]
÷ 1F1E6 × 0903 ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (Regional_Indicator) × [9.1] DEVANAGARI SIGN VISARGA (SpacingMark) ÷ [0.3]
÷ 1F1E6 × 0308 × 0903 ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (Regional_Indicator) × [9.0] COMBINING DIAERESIS (Extend) × [9.1] DEVANAGARI SIGN VISARGA (SpacingMark) ÷ [0.3]
÷ 1F1E6 ÷ 1100 ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (Regional_Indicator) ÷ [999.0] HANGUL CHOSEONG KIYEOK (L) ÷ [0.3]
÷ 1F1E6 × 0308 ÷ 1100 ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (Regional_Indicator) × [9.0] COMBINING DIAERESIS (Extend) ÷ [999.0] HANGUL CHOSEONG KIYEOK (L) ÷ [0.3]
÷ 1F1E6 ÷ 1160 ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (Regional_Indicator) ÷ [999.0] HANGUL JUNGSEONG FILLER (V) ÷ [0.3]
÷ 1F1E6 × 0308 ÷ 1160 ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (Regional_Indicator) × [9.0] COMBINING DIAERESIS (Extend) ÷ [999.0] HANGUL JUNGSEONG FILLER (V) ÷ [0.3]
÷ 1F1E6 ÷ 11A8 ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (Regional_Indicator) ÷ [999.0] HANGUL JONGSEONG KIYEOK (T) ÷ [0.3]
÷ 1F1E6 × 0308 ÷ 11A8 ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (Regional_Indicator) × [9.0] COMBINING DIAERESIS (Extend) ÷ [999.0] HANGUL JONGSEONG KIYEOK (T) ÷ [0.3]
÷ 1F1E6 ÷ AC00 ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (Regional_Indicator) ÷ [999.0] HANGUL SYLLABLE GA (LV) ÷ [0.3]
÷ 1F1E6 × 0308 ÷ AC00 ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (Regional_Indicator) × [9.0] COMBINING DIAERESIS (Extend) ÷ [999.0] HANGUL SYLLABLE GA (LV) ÷ [0.3]
÷ 1F1E6 ÷ AC01 ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (Regional_Indicator) ÷ [999.0] HANGUL SYLLABLE GAG (LVT) ÷ [0.3]
÷ 1F1E6 × 0308 ÷ AC01 ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (Regional_Indicator) × [9.0] COMBINING DIAERESIS (Extend) ÷ [999.0] HANGUL SYLLABLE GAG (LVT) ÷ [0.3]
÷ 1F1E6 × 1F1E6 ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (Regional_Indicator) × [8.1] REGIONAL INDICATOR SYMBOL LETTER A (Regional_Indicator) ÷ [0.3]
÷ 1F1E6 × 0308 ÷ 1F1E6 ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (Regional_Indicator) × [9.0] COMBINING DIAERESIS (Extend) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (Regional_Indicator) ÷ [0.3]
÷ 1F1E6 ÷ 0378 ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (Regional_Indicator) ÷ [999.0] <reserved-0378> (Other) ÷ [0.3]
÷ 1F1E6 × 0308 ÷ 0378 ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (Regional_Indicator) × [9.0] COMBINING DIAERESIS (Extend) ÷ [999.0] <reserved-0378> (Other) ÷ [0.3]
÷ 1F1E6 ÷ D800 ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (Regional_Indicator) ÷ [5.0] <surrogate-D800> (Control) ÷ [0.3]
÷ 1F1E6 × 0308 ÷ D800 ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (Regional_Indicator) × [9.0] COMBINING DIAERESIS (Extend) ÷ [5.0] <surrogate-D800> (Control) ÷ [0.3]
÷ 0378 ÷ 0020 ÷ # ÷ [0.2] <reserved-0378> (Other) ÷ [999.0] SPACE (Other) ÷ [0.3]
÷ 0378 × 0308 ÷ 0020 ÷ # ÷ [0.2] <reserved-0378> (Other) × [9.0] COMBINING DIAERESIS (Extend) ÷ [999.0] SPACE (Other) ÷ [0.3]
÷ 0378 ÷ 000D ÷ # ÷ [0.2] <reserved-0378> (Other) ÷ [5.0] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3]
÷ 0378 × 0308 ÷ 000D ÷ # ÷ [0.2] <reserved-0378> (Other) × [9.0] COMBINING DIAERESIS (Extend) ÷ [5.0] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3]
÷ 0378 ÷ 000A ÷ # ÷ [0.2] <reserved-0378> (Other) ÷ [5.0] <LINE FEED (LF)> (LF) ÷ [0.3]
÷ 0378 × 0308 ÷ 000A ÷ # ÷ [0.2] <reserved-0378> (Other) × [9.0] COMBINING DIAERESIS (Extend) ÷ [5.0] <LINE FEED (LF)> (LF) ÷ [0.3]
÷ 0378 ÷ 0001 ÷ # ÷ [0.2] <reserved-0378> (Other) ÷ [5.0] <START OF HEADING> (Control) ÷ [0.3]
÷ 0378 × 0308 ÷ 0001 ÷ # ÷ [0.2] <reserved-0378> (Other) × [9.0] COMBINING DIAERESIS (Extend) ÷ [5.0] <START OF HEADING> (Control) ÷ [0.3]
÷ 0378 × 0300 ÷ # ÷ [0.2] <reserved-0378> (Other) × [9.0] COMBINING GRAVE ACCENT (Extend) ÷ [0.3]
÷ 0378 × 0308 × 0300 ÷ # ÷ [0.2] <reserved-0378> (Other) × [9.0] COMBINING DIAERESIS (Extend) × [9.0] COMBINING GRAVE ACCENT (Extend) ÷ [0.3]
÷ 0378 × 0903 ÷ # ÷ [0.2] <reserved-0378> (Other) × [9.1] DEVANAGARI SIGN VISARGA (SpacingMark) ÷ [0.3]
÷ 0378 × 0308 × 0903 ÷ # ÷ [0.2] <reserved-0378> (Other) × [9.0] COMBINING DIAERESIS (Extend) × [9.1] DEVANAGARI SIGN VISARGA (SpacingMark) ÷ [0.3]
÷ 0378 ÷ 1100 ÷ # ÷ [0.2] <reserved-0378> (Other) ÷ [999.0] HANGUL CHOSEONG KIYEOK (L) ÷ [0.3]
÷ 0378 × 0308 ÷ 1100 ÷ # ÷ [0.2] <reserved-0378> (Other) × [9.0] COMBINING DIAERESIS (Extend) ÷ [999.0] HANGUL CHOSEONG KIYEOK (L) ÷ [0.3]
÷ 0378 ÷ 1160 ÷ # ÷ [0.2] <reserved-0378> (Other) ÷ [999.0] HANGUL JUNGSEONG FILLER (V) ÷ [0.3]
÷ 0378 × 0308 ÷ 1160 ÷ # ÷ [0.2] <reserved-0378> (Other) × [9.0] COMBINING DIAERESIS (Extend) ÷ [999.0] HANGUL JUNGSEONG FILLER (V) ÷ [0.3]
÷ 0378 ÷ 11A8 ÷ # ÷ [0.2] <reserved-0378> (Other) ÷ [999.0] HANGUL JONGSEONG KIYEOK (T) ÷ [0.3]
÷ 0378 × 0308 ÷ 11A8 ÷ # ÷ [0.2] <reserved-0378> (Other) × [9.0] COMBINING DIAERESIS (Extend) ÷ [999.0] HANGUL JONGSEONG KIYEOK (T) ÷ [0.3]
÷ 0378 ÷ AC00 ÷ # ÷ [0.2] <reserved-0378> (Other) ÷ [999.0] HANGUL SYLLABLE GA (LV) ÷ [0.3]
÷ 0378 × 0308 ÷ AC00 ÷ # ÷ [0.2] <reserved-0378> (Other) × [9.0] COMBINING DIAERESIS (Extend) ÷ [999.0] HANGUL SYLLABLE GA (LV) ÷ [0.3]
÷ 0378 ÷ AC01 ÷ # ÷ [0.2] <reserved-0378> (Other) ÷ [999.0] HANGUL SYLLABLE GAG (LVT) ÷ [0.3]
÷ 0378 × 0308 ÷ AC01 ÷ # ÷ [0.2] <reserved-0378> (Other) × [9.0] COMBINING DIAERESIS (Extend) ÷ [999.0] HANGUL SYLLABLE GAG (LVT) ÷ [0.3]
÷ 0378 ÷ 1F1E6 ÷ # ÷ [0.2] <reserved-0378> (Other) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (Regional_Indicator) ÷ [0.3]
÷ 0378 × 0308 ÷ 1F1E6 ÷ # ÷ [0.2] <reserved-0378> (Other) × [9.0] COMBINING DIAERESIS (Extend) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (Regional_Indicator) ÷ [0.3]
÷ 0378 ÷ 0378 ÷ # ÷ [0.2] <reserved-0378> (Other) ÷ [999.0] <reserved-0378> (Other) ÷ [0.3]
÷ 0378 × 0308 ÷ 0378 ÷ # ÷ [0.2] <reserved-0378> (Other) × [9.0] COMBINING DIAERESIS (Extend) ÷ [999.0] <reserved-0378> (Other) ÷ [0.3]
÷ 0378 ÷ D800 ÷ # ÷ [0.2] <reserved-0378> (Other) ÷ [5.0] <surrogate-D800> (Control) ÷ [0.3]
÷ 0378 × 0308 ÷ D800 ÷ # ÷ [0.2] <reserved-0378> (Other) × [9.0] COMBINING DIAERESIS (Extend) ÷ [5.0] <surrogate-D800> (Control) ÷ [0.3]
÷ D800 ÷ 0020 ÷ # ÷ [0.2] <surrogate-D800> (Control) ÷ [4.0] SPACE (Other) ÷ [0.3]
÷ D800 ÷ 0308 ÷ 0020 ÷ # ÷ [0.2] <surrogate-D800> (Control) ÷ [4.0] COMBINING DIAERESIS (Extend) ÷ [999.0] SPACE (Other) ÷ [0.3]
÷ D800 ÷ 000D ÷ # ÷ [0.2] <surrogate-D800> (Control) ÷ [4.0] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3]
÷ D800 ÷ 0308 ÷ 000D ÷ # ÷ [0.2] <surrogate-D800> (Control) ÷ [4.0] COMBINING DIAERESIS (Extend) ÷ [5.0] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3]
÷ D800 ÷ 000A ÷ # ÷ [0.2] <surrogate-D800> (Control) ÷ [4.0] <LINE FEED (LF)> (LF) ÷ [0.3]
÷ D800 ÷ 0308 ÷ 000A ÷ # ÷ [0.2] <surrogate-D800> (Control) ÷ [4.0] COMBINING DIAERESIS (Extend) ÷ [5.0] <LINE FEED (LF)> (LF) ÷ [0.3]
÷ D800 ÷ 0001 ÷ # ÷ [0.2] <surrogate-D800> (Control) ÷ [4.0] <START OF HEADING> (Control) ÷ [0.3]
÷ D800 ÷ 0308 ÷ 0001 ÷ # ÷ [0.2] <surrogate-D800> (Control) ÷ [4.0] COMBINING DIAERESIS (Extend) ÷ [5.0] <START OF HEADING> (Control) ÷ [0.3]
÷ D800 ÷ 0300 ÷ # ÷ [0.2] <surrogate-D800> (Control) ÷ [4.0] COMBINING GRAVE ACCENT (Extend) ÷ [0.3]
÷ D800 ÷ 0308 × 0300 ÷ # ÷ [0.2] <surrogate-D800> (Control) ÷ [4.0] COMBINING DIAERESIS (Extend) × [9.0] COMBINING GRAVE ACCENT (Extend) ÷ [0.3]
÷ D800 ÷ 0903 ÷ # ÷ [0.2] <surrogate-D800> (Control) ÷ [4.0] DEVANAGARI SIGN VISARGA (SpacingMark) ÷ [0.3]
÷ D800 ÷ 0308 × 0903 ÷ # ÷ [0.2] <surrogate-D800> (Control) ÷ [4.0] COMBINING DIAERESIS (Extend) × [9.1] DEVANAGARI SIGN VISARGA (SpacingMark) ÷ [0.3]
÷ D800 ÷ 1100 ÷ # ÷ [0.2] <surrogate-D800> (Control) ÷ [4.0] HANGUL CHOSEONG KIYEOK (L) ÷ [0.3]
÷ D800 ÷ 0308 ÷ 1100 ÷ # ÷ [0.2] <surrogate-D800> (Control) ÷ [4.0] COMBINING DIAERESIS (Extend) ÷ [999.0] HANGUL CHOSEONG KIYEOK (L) ÷ [0.3]
÷ D800 ÷ 1160 ÷ # ÷ [0.2] <surrogate-D800> (Control) ÷ [4.0] HANGUL JUNGSEONG FILLER (V) ÷ [0.3]
÷ D800 ÷ 0308 ÷ 1160 ÷ # ÷ [0.2] <surrogate-D800> (Control) ÷ [4.0] COMBINING DIAERESIS (Extend) ÷ [999.0] HANGUL JUNGSEONG FILLER (V) ÷ [0.3]
÷ D800 ÷ 11A8 ÷ # ÷ [0.2] <surrogate-D800> (Control) ÷ [4.0] HANGUL JONGSEONG KIYEOK (T) ÷ [0.3]
÷ D800 ÷ 0308 ÷ 11A8 ÷ # ÷ [0.2] <surrogate-D800> (Control) ÷ [4.0] COMBINING DIAERESIS (Extend) ÷ [999.0] HANGUL JONGSEONG KIYEOK (T) ÷ [0.3]
÷ D800 ÷ AC00 ÷ # ÷ [0.2] <surrogate-D800> (Control) ÷ [4.0] HANGUL SYLLABLE GA (LV) ÷ [0.3]
÷ D800 ÷ 0308 ÷ AC00 ÷ # ÷ [0.2] <surrogate-D800> (Control) ÷ [4.0] COMBINING DIAERESIS (Extend) ÷ [999.0] HANGUL SYLLABLE GA (LV) ÷ [0.3]
÷ D800 ÷ AC01 ÷ # ÷ [0.2] <surrogate-D800> (Control) ÷ [4.0] HANGUL SYLLABLE GAG (LVT) ÷ [0.3]
÷ D800 ÷ 0308 ÷ AC01 ÷ # ÷ [0.2] <surrogate-D800> (Control) ÷ [4.0] COMBINING DIAERESIS (Extend) ÷ [999.0] HANGUL SYLLABLE GAG (LVT) ÷ [0.3]
÷ D800 ÷ 1F1E6 ÷ # ÷ [0.2] <surrogate-D800> (Control) ÷ [4.0] REGIONAL INDICATOR SYMBOL LETTER A (Regional_Indicator) ÷ [0.3]
÷ D800 ÷ 0308 ÷ 1F1E6 ÷ # ÷ [0.2] <surrogate-D800> (Control) ÷ [4.0] COMBINING DIAERESIS (Extend) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (Regional_Indicator) ÷ [0.3]
÷ D800 ÷ 0378 ÷ # ÷ [0.2] <surrogate-D800> (Control) ÷ [4.0] <reserved-0378> (Other) ÷ [0.3]
÷ D800 ÷ 0308 ÷ 0378 ÷ # ÷ [0.2] <surrogate-D800> (Control) ÷ [4.0] COMBINING DIAERESIS (Extend) ÷ [999.0] <reserved-0378> (Other) ÷ [0.3]
÷ D800 ÷ D800 ÷ # ÷ [0.2] <surrogate-D800> (Control) ÷ [4.0] <surrogate-D800> (Control) ÷ [0.3]
÷ D800 ÷ 0308 ÷ D800 ÷ # ÷ [0.2] <surrogate-D800> (Control) ÷ [4.0] COMBINING DIAERESIS (Extend) ÷ [5.0] <surrogate-D800> (Control) ÷ [0.3]
÷ 0061 ÷ 1F1E6 ÷ 0062 ÷ # ÷ [0.2] LATIN SMALL LETTER A (Other) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (Regional_Indicator) ÷ [999.0] LATIN SMALL LETTER B (Other) ÷ [0.3]
÷ 1F1F7 × 1F1FA ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER R (Regional_Indicator) × [8.1] REGIONAL INDICATOR SYMBOL LETTER U (Regional_Indicator) ÷ [0.3]
÷ 1F1F7 × 1F1FA × 1F1F8 ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER R (Regional_Indicator) × [8.1] REGIONAL INDICATOR SYMBOL LETTER U (Regional_Indicator) × [8.1] REGIONAL INDICATOR SYMBOL LETTER S (Regional_Indicator) ÷ [0.3]
÷ 1F1F7 × 1F1FA × 1F1F8 × 1F1EA ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER R (Regional_Indicator) × [8.1] REGIONAL INDICATOR SYMBOL LETTER U (Regional_Indicator) × [8.1] REGIONAL INDICATOR SYMBOL LETTER S (Regional_Indicator) × [8.1] REGIONAL INDICATOR SYMBOL LETTER E (Regional_Indicator) ÷ [0.3]
÷ 1F1F7 × 1F1FA ÷ 200B ÷ 1F1F8 × 1F1EA ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER R (Regional_Indicator) × [8.1] REGIONAL INDICATOR SYMBOL LETTER U (Regional_Indicator) ÷ [5.0] ZERO WIDTH SPACE (Control) ÷ [4.0] REGIONAL INDICATOR SYMBOL LETTER S (Regional_Indicator) × [8.1] REGIONAL INDICATOR SYMBOL LETTER E (Regional_Indicator) ÷ [0.3]
÷ 1F1E6 × 1F1E7 × 1F1E8 ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (Regional_Indicator) × [8.1] REGIONAL INDICATOR SYMBOL LETTER B (Regional_Indicator) × [8.1] REGIONAL INDICATOR SYMBOL LETTER C (Regional_Indicator) ÷ [0.3]
÷ 1F1E6 × 200D ÷ 1F1E7 × 1F1E8 ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (Regional_Indicator) × [9.0] ZERO WIDTH JOINER (Extend) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER B (Regional_Indicator) × [8.1] REGIONAL INDICATOR SYMBOL LETTER C (Regional_Indicator) ÷ [0.3]
÷ 1F1E6 × 1F1E7 × 200D ÷ 1F1E8 ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (Regional_Indicator) × [8.1] REGIONAL INDICATOR SYMBOL LETTER B (Regional_Indicator) × [9.0] ZERO WIDTH JOINER (Extend) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER C (Regional_Indicator) ÷ [0.3]
÷ 0020 × 200D ÷ 0646 ÷ # ÷ [0.2] SPACE (Other) × [9.0] ZERO WIDTH JOINER (Extend) ÷ [999.0] ARABIC LETTER NOON (Other) ÷ [0.3]
÷ 0646 × 200D ÷ 0020 ÷ # ÷ [0.2] ARABIC LETTER NOON (Other) × [9.0] ZERO WIDTH JOINER (Extend) ÷ [999.0] SPACE (Other) ÷ [0.3]
#
# Lines: 402
#
# EOF

View File

@ -1,5 +1,5 @@
/*
* Copyright (c) 2016, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2016, 2019, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@ -23,8 +23,9 @@
/*
* @test
* @bug 7071819
* @bug 7071819 8221431
* @summary tests Unicode Extended Grapheme support
* @library /lib/testlibrary/java/lang
* @run main GraphemeTest
*/
@ -41,15 +42,14 @@ import java.util.regex.Matcher;
public class GraphemeTest {
public static void main(String[] args) throws Throwable {
testProps(Paths.get(System.getProperty("test.src", "."),
"GraphemeBreakProperty.txt"));
testBreak(Paths.get(System.getProperty("test.src", "."),
"GraphemeBreakTest.txt"));
testProps(UCDFiles.GRAPHEME_BREAK_PROPERTY);
testProps(UCDFiles.EMOJI_DATA);
}
private static void testProps(Path path) throws IOException {
Files.lines(path)
.filter( ln -> ln.length() != 0 && !ln.startsWith("#") )
.map( ln -> ln.replaceFirst("#.*", "") )
.filter( ln -> ln.length() != 0 )
.forEach(ln -> {
String[] strs = ln.split("\\s+");
int off = strs[0].indexOf("..");
@ -62,6 +62,11 @@ public class GraphemeTest {
cp0 = cp1 = Integer.parseInt(strs[0], 16);
}
for (int cp = cp0; cp <= cp1; cp++) {
// Ignore Emoji* for now (only interested in Extended_Pictographic)
if (expected.startsWith("Emoji")) {
continue;
}
// NOTE:
// #tr29 "plus a few General_Category = Spacing_Mark needed for
// canonical equivalence."
@ -81,68 +86,39 @@ public class GraphemeTest {
});
}
private static void testBreak(Path path) throws IOException {
Files.lines(path)
.filter( ln -> ln.length() != 0 && !ln.startsWith("#") )
.forEach(ln -> {
String str = ln.replaceAll("\\s+|\\([a-zA-Z]+\\)|\\[[a-zA-Z]]+\\]|#.*", "");
// System.out.println(str);
String[] cstrs = str.split("\u00f7|\u00d7");
int prevCp = -1;
char prevBk = '\u00f7';
int offBk = 0;
for (String cstr : cstrs) {
if (cstr.length() == 0) // first empty str
continue;
int cp = Integer.parseInt(cstr, 16);
if (prevCp == -1) {
prevCp = cp;
} else {
// test against the rules directly
if (rules[getType(prevCp)][getType(cp)] != (prevBk == '\u00f7')) {
throw new RuntimeException(String.format(
"NG %x[%d] %x[%d] -> %b [%s]%n",
prevCp, getType(prevCp), cp, getType(cp),
rules[getType(prevCp)][getType(cp)],
ln));
}
}
prevCp = cp;
offBk += (cstr.length() + 1);
prevBk = str.charAt(offBk);
}
});
}
private static final String[] types = {
"Other", "CR", "LF", "Control", "Extend", "Regional_Indicator",
"Other", "CR", "LF", "Control", "Extend", "ZWJ", "Regional_Indicator",
"Prepend", "SpacingMark",
"L", "V", "T", "LV", "LVT" };
"L", "V", "T", "LV", "LVT",
"Extended_Pictographic" };
///////////////////////////////////////////////////////////////////////////////////////////////////////////////
// from java.util.regex.Grapheme.java
// types
private static final int OTHER = 0;
private static final int CR = 1;
private static final int LF = 2;
private static final int CONTROL = 3;
private static final int EXTEND = 4;
private static final int RI = 5;
private static final int PREPEND = 6;
private static final int SPACINGMARK = 7;
private static final int L = 8;
private static final int V = 9;
private static final int T = 10;
private static final int LV = 11;
private static final int LVT = 12;
private static final int ZWJ = 5;
private static final int RI = 6;
private static final int PREPEND = 7;
private static final int SPACINGMARK = 8;
private static final int L = 9;
private static final int V = 10;
private static final int T = 11;
private static final int LV = 12;
private static final int LVT = 13;
private static final int EXTENDED_PICTOGRAPHIC = 14;
private static final int FIRST_TYPE = 0;
private static final int LAST_TYPE = 12;
private static final int LAST_TYPE = 14;
private static boolean[][] rules;
static {
rules = new boolean[LAST_TYPE + 1][LAST_TYPE + 1];
// default, any + any
// GB 999 Any + Any -> default
for (int i = FIRST_TYPE; i <= LAST_TYPE; i++)
for (int j = FIRST_TYPE; j <= LAST_TYPE; j++)
rules[i][j] = true;
@ -159,13 +135,12 @@ public class GraphemeTest {
// GB 8 (LVT | T) x T
rules[LVT][T] = false;
rules[T][T] = false;
// GB 8a RI x RI
rules[RI][RI] = false;
// GB 9 x Extend
// GB 9 x (Extend|ZWJ)
// GB 9a x Spacing Mark
// GB 9b Prepend x
for (int i = FIRST_TYPE; i <= LAST_TYPE; i++) {
rules[i][EXTEND] = false;
rules[i][ZWJ] = false;
rules[i][SPACINGMARK] = false;
rules[PREPEND][i] = false;
}
@ -178,7 +153,9 @@ public class GraphemeTest {
}
// GB 3 CR x LF
rules[CR][LF] = false;
// GB 10 Any + Any -> default
// GB 11 Exended_Pictographic x (Extend|ZWJ)
rules[EXTENDED_PICTOGRAPHIC][EXTEND] = false;
rules[EXTENDED_PICTOGRAPHIC][ZWJ] = false;
}
// Hangul syllables
@ -204,7 +181,12 @@ public class GraphemeTest {
cp == 0xAA7B || cp == 0xAA7D;
}
@SuppressWarnings("fallthrough")
private static int getType(int cp) {
if (isExtendedPictographic(cp)) {
return EXTENDED_PICTOGRAPHIC;
}
int type = Character.getType(cp);
switch(type) {
case Character.CONTROL:
@ -213,28 +195,36 @@ public class GraphemeTest {
if (cp == 0x000A)
return LF;
return CONTROL;
case Character.UNASSIGNED:
case Character.UNASSIGNED:
// NOTE: #tr29 lists "Unassigned and Default_Ignorable_Code_Point" as Control
// but GraphemeBreakTest.txt lists u+0378/reserved-0378 as "Other"
// so type it as "Other" to make the test happy
if (cp == 0x0378)
return OTHER;
if (cp == 0x0378)
return OTHER;
case Character.LINE_SEPARATOR:
case Character.PARAGRAPH_SEPARATOR:
case Character.SURROGATE:
return CONTROL;
case Character.FORMAT:
if (cp == 0x200C || cp == 0x200D)
if (cp == 0x200C ||
cp >= 0xE0020 && cp <= 0xE007F)
return EXTEND;
if (cp == 0x200D)
return ZWJ;
if (cp >= 0x0600 && cp <= 0x0605 ||
cp == 0x06DD || cp == 0x070F || cp == 0x08E2 ||
cp == 0x110BD || cp == 0x110CD)
return PREPEND;
return CONTROL;
case Character.NON_SPACING_MARK:
case Character.ENCLOSING_MARK:
// NOTE:
// #tr29 "plus a few General_Category = Spacing_Mark needed for
// canonical equivalence."
// but for "extended grapheme clusters" support, there is no
// need actually to diff "extend" and "spackmark" given GB9, GB9a
return EXTEND;
// NOTE:
// #tr29 "plus a few General_Category = Spacing_Mark needed for
// canonical equivalence."
// but for "extended grapheme clusters" support, there is no
// need actually to diff "extend" and "spackmark" given GB9, GB9a
return EXTEND;
case Character.COMBINING_SPACING_MARK:
if (isExcludedSpacingMark(cp))
return OTHER;
@ -248,9 +238,11 @@ public class GraphemeTest {
return RI;
return OTHER;
case Character.MODIFIER_LETTER:
case Character.MODIFIER_SYMBOL:
// WARNING:
// not mentioned in #tr29 but listed in GraphemeBreakProperty.txt
if (cp == 0xFF9E || cp == 0xFF9F)
if (cp == 0xFF9E || cp == 0xFF9F ||
cp >= 0x1F3FB && cp <= 0x1F3FF)
return EXTEND;
return OTHER;
case Character.OTHER_LETTER:
@ -280,7 +272,113 @@ public class GraphemeTest {
return V;
if (cp >= 0xD7CB && cp <= 0xD7FB)
return T;
// Prepend
switch (cp) {
case 0x0D4E:
case 0x111C2:
case 0x111C3:
case 0x11A3A:
case 0x11A84:
case 0x11A85:
case 0x11A86:
case 0x11A87:
case 0x11A88:
case 0x11A89:
case 0x11D46:
return PREPEND;
}
}
return OTHER;
}
// from generated java.util.regex.EmojiData.java
static boolean isExtendedPictographic(int cp) {
return
cp == 0x00A9 ||
cp == 0x00AE ||
cp == 0x203C ||
cp == 0x2049 ||
cp == 0x2122 ||
cp == 0x2139 ||
(cp >= 0x2194 && cp <= 0x2199) ||
cp == 0x21A9 ||
cp == 0x21AA ||
cp == 0x231A ||
cp == 0x231B ||
cp == 0x2328 ||
cp == 0x2388 ||
cp == 0x23CF ||
(cp >= 0x23E9 && cp <= 0x23F3) ||
(cp >= 0x23F8 && cp <= 0x23FA) ||
cp == 0x24C2 ||
cp == 0x25AA ||
cp == 0x25AB ||
cp == 0x25B6 ||
cp == 0x25C0 ||
(cp >= 0x25FB && cp <= 0x25FE) ||
(cp >= 0x2600 && cp <= 0x2605) ||
(cp >= 0x2607 && cp <= 0x2612) ||
(cp >= 0x2614 && cp <= 0x2685) ||
(cp >= 0x2690 && cp <= 0x2705) ||
(cp >= 0x2708 && cp <= 0x2712) ||
cp == 0x2714 ||
cp == 0x2716 ||
cp == 0x271D ||
cp == 0x2721 ||
cp == 0x2728 ||
cp == 0x2733 ||
cp == 0x2734 ||
cp == 0x2744 ||
cp == 0x2747 ||
cp == 0x274C ||
cp == 0x274E ||
(cp >= 0x2753 && cp <= 0x2755) ||
cp == 0x2757 ||
(cp >= 0x2763 && cp <= 0x2767) ||
(cp >= 0x2795 && cp <= 0x2797) ||
cp == 0x27A1 ||
cp == 0x27B0 ||
cp == 0x27BF ||
cp == 0x2934 ||
cp == 0x2935 ||
(cp >= 0x2B05 && cp <= 0x2B07) ||
cp == 0x2B1B ||
cp == 0x2B1C ||
cp == 0x2B50 ||
cp == 0x2B55 ||
cp == 0x3030 ||
cp == 0x303D ||
cp == 0x3297 ||
cp == 0x3299 ||
(cp >= 0x1F000 && cp <= 0x1F0FF) ||
(cp >= 0x1F10D && cp <= 0x1F10F) ||
cp == 0x1F12F ||
(cp >= 0x1F16C && cp <= 0x1F171) ||
cp == 0x1F17E ||
cp == 0x1F17F ||
cp == 0x1F18E ||
(cp >= 0x1F191 && cp <= 0x1F19A) ||
(cp >= 0x1F1AD && cp <= 0x1F1E5) ||
(cp >= 0x1F201 && cp <= 0x1F20F) ||
cp == 0x1F21A ||
cp == 0x1F22F ||
(cp >= 0x1F232 && cp <= 0x1F23A) ||
(cp >= 0x1F23C && cp <= 0x1F23F) ||
(cp >= 0x1F249 && cp <= 0x1F3FA) ||
(cp >= 0x1F400 && cp <= 0x1F53D) ||
(cp >= 0x1F546 && cp <= 0x1F64F) ||
(cp >= 0x1F680 && cp <= 0x1F6FF) ||
(cp >= 0x1F774 && cp <= 0x1F77F) ||
(cp >= 0x1F7D5 && cp <= 0x1F7FF) ||
(cp >= 0x1F80C && cp <= 0x1F80F) ||
(cp >= 0x1F848 && cp <= 0x1F84F) ||
(cp >= 0x1F85A && cp <= 0x1F85F) ||
(cp >= 0x1F888 && cp <= 0x1F88F) ||
(cp >= 0x1F8AE && cp <= 0x1F8FF) ||
(cp >= 0x1F90C && cp <= 0x1F93A) ||
(cp >= 0x1F93C && cp <= 0x1F945) ||
(cp >= 0x1F947 && cp <= 0x1FFFD);
}
}

View File

@ -35,9 +35,10 @@
* 8027645 8035076 8039124 8035975 8074678 6854417 8143854 8147531 7071819
* 8151481 4867170 7080302 6728861 6995635 6736245 4916384 6328855 6192895
* 6345469 6988218 6693451 7006761 8140212 8143282 8158482 8176029 8184706
* 8194667 8197462 8184692
* 8194667 8197462 8184692 8221431
*
* @library /test/lib
* @library /lib/testlibrary/java/lang
* @build jdk.test.lib.RandomFactory
* @run main RegExTest
* @key randomness
@ -4755,8 +4756,7 @@ public class RegExTest {
}
private static void grapheme() throws Exception {
Files.lines(Paths.get(System.getProperty("test.src", "."),
"GraphemeBreakTest.txt"))
Files.lines(UCDFiles.GRAPHEME_BREAK_TEST)
.filter( ln -> ln.length() != 0 && !ln.startsWith("#") )
.forEach( ln -> {
ln = ln.replaceAll("\\s+|\\([a-zA-Z]+\\)|\\[[a-zA-Z]]+\\]|#.*", "");

View File

@ -0,0 +1,57 @@
/*
* Copyright (c) 2019, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License version 2 only, as
* published by the Free Software Foundation.
*
* This code is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
* version 2 for more details (a copy is included in the LICENSE file that
* accompanied this code).
*
* You should have received a copy of the GNU General Public License version
* 2 along with this work; if not, write to the Free Software Foundation,
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
*
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
* or visit www.oracle.com if you need additional information or have any
* questions.
*/
/**
* Holds the file paths to the Unicode Character Database source files.
* Paths to the source files in the "make" directory are relative, and
* subject to change due to future repository structure re-org.
*/
import java.nio.file.Path;
import java.nio.file.Paths;
public class UCDFiles {
public static Path UCD_DIR = Paths.get(
System.getProperty("test.root"), "..", "..", "make", "data", "unicodedata");
public static Path BLOCKS =
UCD_DIR.resolve("Blocks.txt");
public static Path GRAPHEME_BREAK_PROPERTY =
UCD_DIR.resolve("auxiliary").resolve("GraphemeBreakProperty.txt");
public static Path GRAPHEME_BREAK_TEST =
UCD_DIR.resolve("auxiliary").resolve("GraphemeBreakTest.txt");
public static Path NORMALIZATION_TEST =
UCD_DIR.resolve("NormalizationTest.txt");
public static Path PROP_LIST =
UCD_DIR.resolve("PropList.txt");
public static Path PROPERTY_VALUE_ALIASES =
UCD_DIR.resolve("PropertyValueAliases.txt");
public static Path SCRIPTS =
UCD_DIR.resolve("Scripts.txt");
public static Path SPECIAL_CASING =
UCD_DIR.resolve("SpecialCasing.txt");
public static Path UNICODE_DATA =
UCD_DIR.resolve("UnicodeData.txt");
public static Path EMOJI_DATA =
UCD_DIR.resolve("emoji-data.txt");
}