8268081: Upgrade Unicode Data Files to 14.0.0

Reviewed-by: joehw, iris, lancea
This commit is contained in:
Naoto Sato 2022-01-12 19:17:18 +00:00
parent ddddec7d74
commit 0a094d7c28
38 changed files with 3333 additions and 1081 deletions

View File

@ -1,6 +1,6 @@
# Blocks-13.0.0.txt
# Date: 2019-07-10, 19:06:00 GMT [KW]
# Copyright (c) 2019 Unicode, Inc.
# Blocks-14.0.0.txt
# Date: 2021-01-22, 23:29:00 GMT [KW]
# Copyright (c) 2021 Unicode, Inc.
# For terms of use, see http://www.unicode.org/terms_of_use.html
#
# Unicode Character Database
@ -52,6 +52,7 @@
0800..083F; Samaritan
0840..085F; Mandaic
0860..086F; Syriac Supplement
0870..089F; Arabic Extended-B
08A0..08FF; Arabic Extended-A
0900..097F; Devanagari
0980..09FF; Bengali
@ -215,7 +216,9 @@ FFF0..FFFF; Specials
104B0..104FF; Osage
10500..1052F; Elbasan
10530..1056F; Caucasian Albanian
10570..105BF; Vithkuqi
10600..1077F; Linear A
10780..107BF; Latin Extended-F
10800..1083F; Cypriot Syllabary
10840..1085F; Imperial Aramaic
10860..1087F; Palmyrene
@ -240,6 +243,7 @@ FFF0..FFFF; Specials
10E80..10EBF; Yezidi
10F00..10F2F; Old Sogdian
10F30..10F6F; Sogdian
10F70..10FAF; Old Uyghur
10FB0..10FDF; Chorasmian
10FE0..10FFF; Elymaic
11000..1107F; Brahmi
@ -259,13 +263,14 @@ FFF0..FFFF; Specials
11600..1165F; Modi
11660..1167F; Mongolian Supplement
11680..116CF; Takri
11700..1173F; Ahom
11700..1174F; Ahom
11800..1184F; Dogra
118A0..118FF; Warang Citi
11900..1195F; Dives Akuru
119A0..119FF; Nandinagari
11A00..11A4F; Zanabazar Square
11A50..11AAF; Soyombo
11AB0..11ABF; Unified Canadian Aboriginal Syllabics Extended-A
11AC0..11AFF; Pau Cin Hau
11C00..11C6F; Bhaiksuki
11C70..11CBF; Marchen
@ -277,11 +282,13 @@ FFF0..FFFF; Specials
12000..123FF; Cuneiform
12400..1247F; Cuneiform Numbers and Punctuation
12480..1254F; Early Dynastic Cuneiform
12F90..12FFF; Cypro-Minoan
13000..1342F; Egyptian Hieroglyphs
13430..1343F; Egyptian Hieroglyph Format Controls
14400..1467F; Anatolian Hieroglyphs
16800..16A3F; Bamum Supplement
16A40..16A6F; Mro
16A70..16ACF; Tangsa
16AD0..16AFF; Bassa Vah
16B00..16B8F; Pahawh Hmong
16E40..16E9F; Medefaidrin
@ -290,13 +297,15 @@ FFF0..FFFF; Specials
17000..187FF; Tangut
18800..18AFF; Tangut Components
18B00..18CFF; Khitan Small Script
18D00..18D8F; Tangut Supplement
18D00..18D7F; Tangut Supplement
1AFF0..1AFFF; Kana Extended-B
1B000..1B0FF; Kana Supplement
1B100..1B12F; Kana Extended-A
1B130..1B16F; Small Kana Extension
1B170..1B2FF; Nushu
1BC00..1BC9F; Duployan
1BCA0..1BCAF; Shorthand Format Controls
1CF00..1CFCF; Znamenny Musical Notation
1D000..1D0FF; Byzantine Musical Symbols
1D100..1D1FF; Musical Symbols
1D200..1D24F; Ancient Greek Musical Notation
@ -305,9 +314,12 @@ FFF0..FFFF; Specials
1D360..1D37F; Counting Rod Numerals
1D400..1D7FF; Mathematical Alphanumeric Symbols
1D800..1DAAF; Sutton SignWriting
1DF00..1DFFF; Latin Extended-G
1E000..1E02F; Glagolitic Supplement
1E100..1E14F; Nyiakeng Puachue Hmong
1E290..1E2BF; Toto
1E2C0..1E2FF; Wancho
1E7E0..1E7FF; Ethiopic Extended-B
1E800..1E8DF; Mende Kikakui
1E900..1E95F; Adlam
1EC70..1ECBF; Indic Siyaq Numbers

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@ -1,6 +1,6 @@
# PropList-13.0.0.txt
# Date: 2019-11-27, 03:13:28 GMT
# Copyright (c) 2019 Unicode, Inc.
# PropList-14.0.0.txt
# Date: 2021-08-12, 23:13:05 GMT
# Copyright (c) 2021 Unicode, Inc.
# Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries.
# For terms of use, see http://www.unicode.org/terms_of_use.html
#
@ -54,6 +54,7 @@
2E1A ; Dash # Pd HYPHEN WITH DIAERESIS
2E3A..2E3B ; Dash # Pd [2] TWO-EM DASH..THREE-EM DASH
2E40 ; Dash # Pd DOUBLE HYPHEN
2E5D ; Dash # Pd OBLIQUE HYPHEN
301C ; Dash # Pd WAVE DASH
3030 ; Dash # Pd WAVY DASH
30A0 ; Dash # Pd KATAKANA-HIRAGANA DOUBLE HYPHEN
@ -63,7 +64,7 @@ FE63 ; Dash # Pd SMALL HYPHEN-MINUS
FF0D ; Dash # Pd FULLWIDTH HYPHEN-MINUS
10EAD ; Dash # Pd YEZIDI HYPHENATION MARK
# Total code points: 29
# Total code points: 30
# ================================================
@ -126,7 +127,7 @@ FF63 ; Quotation_Mark # Pe HALFWIDTH RIGHT CORNER BRACKET
05C3 ; Terminal_Punctuation # Po HEBREW PUNCTUATION SOF PASUQ
060C ; Terminal_Punctuation # Po ARABIC COMMA
061B ; Terminal_Punctuation # Po ARABIC SEMICOLON
061E..061F ; Terminal_Punctuation # Po [2] ARABIC TRIPLE DOT PUNCTUATION MARK..ARABIC QUESTION MARK
061D..061F ; Terminal_Punctuation # Po [3] ARABIC END OF TEXT MARK..ARABIC QUESTION MARK
06D4 ; Terminal_Punctuation # Po ARABIC FULL STOP
0700..070A ; Terminal_Punctuation # Po [11] SYRIAC END OF PARAGRAPH..SYRIAC CONTRACTION
070C ; Terminal_Punctuation # Po SYRIAC HARKLEAN METOBELUS
@ -150,6 +151,7 @@ FF63 ; Quotation_Mark # Pe HALFWIDTH RIGHT CORNER BRACKET
1AA8..1AAB ; Terminal_Punctuation # Po [4] TAI THAM SIGN KAAN..TAI THAM SIGN SATKAANKUU
1B5A..1B5B ; Terminal_Punctuation # Po [2] BALINESE PANTI..BALINESE PAMADA
1B5D..1B5F ; Terminal_Punctuation # Po [3] BALINESE CARIK PAMUNGKAH..BALINESE CARIK PAREREN
1B7D..1B7E ; Terminal_Punctuation # Po [2] BALINESE PANTI LANTANG..BALINESE PAMADA LANTANG
1C3B..1C3F ; Terminal_Punctuation # Po [5] LEPCHA PUNCTUATION TA-ROL..LEPCHA PUNCTUATION TSHOOK
1C7E..1C7F ; Terminal_Punctuation # Po [2] OL CHIKI PUNCTUATION MUCAAD..OL CHIKI PUNCTUATION DOUBLE MUCAAD
203C..203D ; Terminal_Punctuation # Po [2] DOUBLE EXCLAMATION MARK..INTERROBANG
@ -159,6 +161,7 @@ FF63 ; Quotation_Mark # Pe HALFWIDTH RIGHT CORNER BRACKET
2E41 ; Terminal_Punctuation # Po REVERSED COMMA
2E4C ; Terminal_Punctuation # Po MEDIEVAL COMMA
2E4E..2E4F ; Terminal_Punctuation # Po [2] PUNCTUS ELEVATUS MARK..CORNISH VERSE DIVIDER
2E53..2E54 ; Terminal_Punctuation # Po [2] MEDIEVAL EXCLAMATION MARK..MEDIEVAL QUESTION MARK
3001..3002 ; Terminal_Punctuation # Po [2] IDEOGRAPHIC COMMA..IDEOGRAPHIC FULL STOP
A4FE..A4FF ; Terminal_Punctuation # Po [2] LISU PUNCTUATION COMMA..LISU PUNCTUATION FULL STOP
A60D..A60F ; Terminal_Punctuation # Po [3] VAI COMMA..VAI QUESTION MARK
@ -189,6 +192,7 @@ FF64 ; Terminal_Punctuation # Po HALFWIDTH IDEOGRAPHIC COMMA
10B3A..10B3F ; Terminal_Punctuation # Po [6] TINY TWO DOTS OVER ONE DOT PUNCTUATION..LARGE ONE RING OVER TWO RINGS PUNCTUATION
10B99..10B9C ; Terminal_Punctuation # Po [4] PSALTER PAHLAVI SECTION MARK..PSALTER PAHLAVI FOUR DOTS WITH DOT
10F55..10F59 ; Terminal_Punctuation # Po [5] SOGDIAN PUNCTUATION TWO VERTICAL BARS..SOGDIAN PUNCTUATION HALF CIRCLE WITH DOT
10F86..10F89 ; Terminal_Punctuation # Po [4] OLD UYGHUR PUNCTUATION BAR..OLD UYGHUR PUNCTUATION FOUR DOTS
11047..1104D ; Terminal_Punctuation # Po [7] BRAHMI DANDA..BRAHMI PUNCTUATION LOTUS
110BE..110C1 ; Terminal_Punctuation # Po [4] KAITHI SECTION MARK..KAITHI DOUBLE DANDA
11141..11143 ; Terminal_Punctuation # Po [3] CHAKMA DANDA..CHAKMA QUESTION MARK
@ -220,7 +224,7 @@ FF64 ; Terminal_Punctuation # Po HALFWIDTH IDEOGRAPHIC COMMA
1BC9F ; Terminal_Punctuation # Po DUPLOYAN PUNCTUATION CHINOOK FULL STOP
1DA87..1DA8A ; Terminal_Punctuation # Po [4] SIGNWRITING COMMA..SIGNWRITING COLON
# Total code points: 267
# Total code points: 276
# ================================================
@ -600,6 +604,7 @@ FF41..FF46 ; Hex_Digit # L& [6] FULLWIDTH LATIN SMALL LETTER A..FULLWIDTH L
1A6D..1A72 ; Other_Alphabetic # Mc [6] TAI THAM VOWEL SIGN OY..TAI THAM VOWEL SIGN THAM AI
1A73..1A74 ; Other_Alphabetic # Mn [2] TAI THAM VOWEL SIGN OA ABOVE..TAI THAM SIGN MAI KANG
1ABF..1AC0 ; Other_Alphabetic # Mn [2] COMBINING LATIN SMALL LETTER W BELOW..COMBINING LATIN SMALL LETTER TURNED W BELOW
1ACC..1ACE ; Other_Alphabetic # Mn [3] COMBINING LATIN SMALL LETTER INSULAR G..COMBINING LATIN SMALL LETTER INSULAR T
1B00..1B03 ; Other_Alphabetic # Mn [4] BALINESE SIGN ULU RICEM..BALINESE SIGN SURANG
1B04 ; Other_Alphabetic # Mc BALINESE SIGN BISAH
1B35 ; Other_Alphabetic # Mc BALINESE VOWEL SIGN TEDUNG
@ -686,10 +691,12 @@ FB1E ; Other_Alphabetic # Mn HEBREW POINT JUDEO-SPANISH VARIKA
11001 ; Other_Alphabetic # Mn BRAHMI SIGN ANUSVARA
11002 ; Other_Alphabetic # Mc BRAHMI SIGN VISARGA
11038..11045 ; Other_Alphabetic # Mn [14] BRAHMI VOWEL SIGN AA..BRAHMI VOWEL SIGN AU
11073..11074 ; Other_Alphabetic # Mn [2] BRAHMI VOWEL SIGN OLD TAMIL SHORT E..BRAHMI VOWEL SIGN OLD TAMIL SHORT O
11082 ; Other_Alphabetic # Mc KAITHI SIGN VISARGA
110B0..110B2 ; Other_Alphabetic # Mc [3] KAITHI VOWEL SIGN AA..KAITHI VOWEL SIGN II
110B3..110B6 ; Other_Alphabetic # Mn [4] KAITHI VOWEL SIGN U..KAITHI VOWEL SIGN AI
110B7..110B8 ; Other_Alphabetic # Mc [2] KAITHI VOWEL SIGN O..KAITHI VOWEL SIGN AU
110C2 ; Other_Alphabetic # Mn KAITHI VOWEL SIGN VOCALIC R
11100..11102 ; Other_Alphabetic # Mn [3] CHAKMA SIGN CANDRABINDU..CHAKMA SIGN VISARGA
11127..1112B ; Other_Alphabetic # Mn [5] CHAKMA VOWEL SIGN A..CHAKMA VOWEL SIGN UU
1112C ; Other_Alphabetic # Mc CHAKMA VOWEL SIGN E
@ -815,7 +822,7 @@ FB1E ; Other_Alphabetic # Mn HEBREW POINT JUDEO-SPANISH VARIKA
1F150..1F169 ; Other_Alphabetic # So [26] NEGATIVE CIRCLED LATIN CAPITAL LETTER A..NEGATIVE CIRCLED LATIN CAPITAL LETTER Z
1F170..1F189 ; Other_Alphabetic # So [26] NEGATIVE SQUARED LATIN CAPITAL LETTER A..NEGATIVE SQUARED LATIN CAPITAL LETTER Z
# Total code points: 1398
# Total code points: 1404
# ================================================
@ -824,7 +831,7 @@ FB1E ; Other_Alphabetic # Mn HEBREW POINT JUDEO-SPANISH VARIKA
3021..3029 ; Ideographic # Nl [9] HANGZHOU NUMERAL ONE..HANGZHOU NUMERAL NINE
3038..303A ; Ideographic # Nl [3] HANGZHOU NUMERAL TEN..HANGZHOU NUMERAL THIRTY
3400..4DBF ; Ideographic # Lo [6592] CJK UNIFIED IDEOGRAPH-3400..CJK UNIFIED IDEOGRAPH-4DBF
4E00..9FFC ; Ideographic # Lo [20989] CJK UNIFIED IDEOGRAPH-4E00..CJK UNIFIED IDEOGRAPH-9FFC
4E00..9FFF ; Ideographic # Lo [20992] CJK UNIFIED IDEOGRAPH-4E00..CJK UNIFIED IDEOGRAPH-9FFF
F900..FA6D ; Ideographic # Lo [366] CJK COMPATIBILITY IDEOGRAPH-F900..CJK COMPATIBILITY IDEOGRAPH-FA6D
FA70..FAD9 ; Ideographic # Lo [106] CJK COMPATIBILITY IDEOGRAPH-FA70..CJK COMPATIBILITY IDEOGRAPH-FAD9
16FE4 ; Ideographic # Mn KHITAN SMALL SCRIPT FILLER
@ -832,15 +839,15 @@ FA70..FAD9 ; Ideographic # Lo [106] CJK COMPATIBILITY IDEOGRAPH-FA70..CJK COM
18800..18CD5 ; Ideographic # Lo [1238] TANGUT COMPONENT-001..KHITAN SMALL SCRIPT CHARACTER-18CD5
18D00..18D08 ; Ideographic # Lo [9] TANGUT IDEOGRAPH-18D00..TANGUT IDEOGRAPH-18D08
1B170..1B2FB ; Ideographic # Lo [396] NUSHU CHARACTER-1B170..NUSHU CHARACTER-1B2FB
20000..2A6DD ; Ideographic # Lo [42718] CJK UNIFIED IDEOGRAPH-20000..CJK UNIFIED IDEOGRAPH-2A6DD
2A700..2B734 ; Ideographic # Lo [4149] CJK UNIFIED IDEOGRAPH-2A700..CJK UNIFIED IDEOGRAPH-2B734
20000..2A6DF ; Ideographic # Lo [42720] CJK UNIFIED IDEOGRAPH-20000..CJK UNIFIED IDEOGRAPH-2A6DF
2A700..2B738 ; Ideographic # Lo [4153] CJK UNIFIED IDEOGRAPH-2A700..CJK UNIFIED IDEOGRAPH-2B738
2B740..2B81D ; Ideographic # Lo [222] CJK UNIFIED IDEOGRAPH-2B740..CJK UNIFIED IDEOGRAPH-2B81D
2B820..2CEA1 ; Ideographic # Lo [5762] CJK UNIFIED IDEOGRAPH-2B820..CJK UNIFIED IDEOGRAPH-2CEA1
2CEB0..2EBE0 ; Ideographic # Lo [7473] CJK UNIFIED IDEOGRAPH-2CEB0..CJK UNIFIED IDEOGRAPH-2EBE0
2F800..2FA1D ; Ideographic # Lo [542] CJK COMPATIBILITY IDEOGRAPH-2F800..CJK COMPATIBILITY IDEOGRAPH-2FA1D
30000..3134A ; Ideographic # Lo [4939] CJK UNIFIED IDEOGRAPH-30000..CJK UNIFIED IDEOGRAPH-3134A
# Total code points: 101652
# Total code points: 101661
# ================================================
@ -885,6 +892,9 @@ FA70..FAD9 ; Ideographic # Lo [106] CJK COMPATIBILITY IDEOGRAPH-FA70..CJK COM
07EB..07F3 ; Diacritic # Mn [9] NKO COMBINING SHORT HIGH TONE..NKO COMBINING DOUBLE DOT ABOVE
07F4..07F5 ; Diacritic # Lm [2] NKO HIGH TONE APOSTROPHE..NKO LOW TONE APOSTROPHE
0818..0819 ; Diacritic # Mn [2] SAMARITAN MARK OCCLUSION..SAMARITAN MARK DAGESH
0898..089F ; Diacritic # Mn [8] ARABIC SMALL HIGH WORD AL-JUZ..ARABIC HALF MADDA OVER MADDA
08C9 ; Diacritic # Lm ARABIC SMALL FARSI YEH
08CA..08D2 ; Diacritic # Mn [9] ARABIC SMALL HIGH FARSI YEH..ARABIC LARGE ROUND DOT INSIDE CIRCLE BELOW
08E3..08FE ; Diacritic # Mn [28] ARABIC TURNED DAMMA BELOW..ARABIC DAMMA WITH DOT
093C ; Diacritic # Mn DEVANAGARI SIGN NUKTA
094D ; Diacritic # Mn DEVANAGARI SIGN VIRAMA
@ -901,6 +911,7 @@ FA70..FAD9 ; Ideographic # Lo [106] CJK COMPATIBILITY IDEOGRAPH-FA70..CJK COM
0B4D ; Diacritic # Mn ORIYA SIGN VIRAMA
0B55 ; Diacritic # Mn ORIYA SIGN OVERLINE
0BCD ; Diacritic # Mn TAMIL SIGN VIRAMA
0C3C ; Diacritic # Mn TELUGU SIGN NUKTA
0C4D ; Diacritic # Mn TELUGU SIGN VIRAMA
0CBC ; Diacritic # Mn KANNADA SIGN NUKTA
0CCD ; Diacritic # Mn KANNADA SIGN VIRAMA
@ -928,12 +939,16 @@ FA70..FAD9 ; Ideographic # Lo [106] CJK COMPATIBILITY IDEOGRAPH-FA70..CJK COM
108F ; Diacritic # Mc MYANMAR SIGN RUMAI PALAUNG TONE-5
109A..109B ; Diacritic # Mc [2] MYANMAR SIGN KHAMTI TONE-1..MYANMAR SIGN KHAMTI TONE-3
135D..135F ; Diacritic # Mn [3] ETHIOPIC COMBINING GEMINATION AND VOWEL LENGTH MARK..ETHIOPIC COMBINING GEMINATION MARK
1714 ; Diacritic # Mn TAGALOG SIGN VIRAMA
1715 ; Diacritic # Mc TAGALOG SIGN PAMUDPOD
17C9..17D3 ; Diacritic # Mn [11] KHMER SIGN MUUSIKATOAN..KHMER SIGN BATHAMASAT
17DD ; Diacritic # Mn KHMER SIGN ATTHACAN
1939..193B ; Diacritic # Mn [3] LIMBU SIGN MUKPHRENG..LIMBU SIGN SA-I
1A75..1A7C ; Diacritic # Mn [8] TAI THAM SIGN TONE-1..TAI THAM SIGN KHUEN-LUE KARAN
1A7F ; Diacritic # Mn TAI THAM COMBINING CRYPTOGRAMMIC DOT
1AB0..1ABD ; Diacritic # Mn [14] COMBINING DOUBLED CIRCUMFLEX ACCENT..COMBINING PARENTHESES BELOW
1ABE ; Diacritic # Me COMBINING PARENTHESES OVERLAY
1AC1..1ACB ; Diacritic # Mn [11] COMBINING LEFT PARENTHESIS ABOVE LEFT..COMBINING TRIPLE ACUTE ACCENT
1B34 ; Diacritic # Mn BALINESE SIGN REREKAN
1B44 ; Diacritic # Mc BALINESE ADEG ADEG
1B6B..1B73 ; Diacritic # Mn [9] BALINESE MUSICAL SYMBOL COMBINING TEGEH..BALINESE MUSICAL SYMBOL COMBINING GONG
@ -952,8 +967,7 @@ FA70..FAD9 ; Ideographic # Lo [106] CJK COMPATIBILITY IDEOGRAPH-FA70..CJK COM
1CF8..1CF9 ; Diacritic # Mn [2] VEDIC TONE RING ABOVE..VEDIC TONE DOUBLE RING ABOVE
1D2C..1D6A ; Diacritic # Lm [63] MODIFIER LETTER CAPITAL A..GREEK SUBSCRIPT SMALL LETTER CHI
1DC4..1DCF ; Diacritic # Mn [12] COMBINING MACRON-ACUTE..COMBINING ZIGZAG BELOW
1DF5..1DF9 ; Diacritic # Mn [5] COMBINING UP TACK ABOVE..COMBINING WIDE INVERTED BRIDGE BELOW
1DFD..1DFF ; Diacritic # Mn [3] COMBINING ALMOST EQUAL TO BELOW..COMBINING RIGHT ARROWHEAD AND DOWN ARROWHEAD BELOW
1DF5..1DFF ; Diacritic # Mn [11] COMBINING UP TACK ABOVE..COMBINING RIGHT ARROWHEAD AND DOWN ARROWHEAD BELOW
1FBD ; Diacritic # Sk GREEK KORONIS
1FBF..1FC1 ; Diacritic # Sk [3] GREEK PSILI..GREEK DIALYTIKA AND PERISPOMENI
1FCD..1FCF ; Diacritic # Sk [3] GREEK PSILI AND VARIA..GREEK PSILI AND PERISPOMENI
@ -1008,10 +1022,16 @@ FF70 ; Diacritic # Lm HALFWIDTH KATAKANA-HIRAGANA PROLONGED SOUND
FF9E..FF9F ; Diacritic # Lm [2] HALFWIDTH KATAKANA VOICED SOUND MARK..HALFWIDTH KATAKANA SEMI-VOICED SOUND MARK
FFE3 ; Diacritic # Sk FULLWIDTH MACRON
102E0 ; Diacritic # Mn COPTIC EPACT THOUSANDS MARK
10780..10785 ; Diacritic # Lm [6] MODIFIER LETTER SMALL CAPITAL AA..MODIFIER LETTER SMALL B WITH HOOK
10787..107B0 ; Diacritic # Lm [42] MODIFIER LETTER SMALL DZ DIGRAPH..MODIFIER LETTER SMALL V WITH RIGHT HOOK
107B2..107BA ; Diacritic # Lm [9] MODIFIER LETTER SMALL CAPITAL Y..MODIFIER LETTER SMALL S WITH CURL
10AE5..10AE6 ; Diacritic # Mn [2] MANICHAEAN ABBREVIATION MARK ABOVE..MANICHAEAN ABBREVIATION MARK BELOW
10D22..10D23 ; Diacritic # Lo [2] HANIFI ROHINGYA MARK SAKIN..HANIFI ROHINGYA MARK NA KHONNA
10D24..10D27 ; Diacritic # Mn [4] HANIFI ROHINGYA SIGN HARBAHAY..HANIFI ROHINGYA SIGN TASSI
10F46..10F50 ; Diacritic # Mn [11] SOGDIAN COMBINING DOT BELOW..SOGDIAN COMBINING STROKE BELOW
10F82..10F85 ; Diacritic # Mn [4] OLD UYGHUR COMBINING DOT ABOVE..OLD UYGHUR COMBINING TWO DOTS BELOW
11046 ; Diacritic # Mn BRAHMI VIRAMA
11070 ; Diacritic # Mn BRAHMI SIGN OLD TAMIL VIRAMA
110B9..110BA ; Diacritic # Mn [2] KAITHI SIGN VIRAMA..KAITHI SIGN NUKTA
11133..11134 ; Diacritic # Mn [2] CHAKMA VIRAMA..CHAKMA MAAYYAA
11173 ; Diacritic # Mn MAHAJANI SIGN NUKTA
@ -1049,18 +1069,24 @@ FFE3 ; Diacritic # Sk FULLWIDTH MACRON
16F8F..16F92 ; Diacritic # Mn [4] MIAO TONE RIGHT..MIAO TONE BELOW
16F93..16F9F ; Diacritic # Lm [13] MIAO LETTER TONE-2..MIAO LETTER REFORMED TONE-8
16FF0..16FF1 ; Diacritic # Mc [2] VIETNAMESE ALTERNATE READING MARK CA..VIETNAMESE ALTERNATE READING MARK NHAY
1AFF0..1AFF3 ; Diacritic # Lm [4] KATAKANA LETTER MINNAN TONE-2..KATAKANA LETTER MINNAN TONE-5
1AFF5..1AFFB ; Diacritic # Lm [7] KATAKANA LETTER MINNAN TONE-7..KATAKANA LETTER MINNAN NASALIZED TONE-5
1AFFD..1AFFE ; Diacritic # Lm [2] KATAKANA LETTER MINNAN NASALIZED TONE-7..KATAKANA LETTER MINNAN NASALIZED TONE-8
1CF00..1CF2D ; Diacritic # Mn [46] ZNAMENNY COMBINING MARK GORAZDO NIZKO S KRYZHEM ON LEFT..ZNAMENNY COMBINING MARK KRYZH ON LEFT
1CF30..1CF46 ; Diacritic # Mn [23] ZNAMENNY COMBINING TONAL RANGE MARK MRACHNO..ZNAMENNY PRIZNAK MODIFIER ROG
1D167..1D169 ; Diacritic # Mn [3] MUSICAL SYMBOL COMBINING TREMOLO-1..MUSICAL SYMBOL COMBINING TREMOLO-3
1D16D..1D172 ; Diacritic # Mc [6] MUSICAL SYMBOL COMBINING AUGMENTATION DOT..MUSICAL SYMBOL COMBINING FLAG-5
1D17B..1D182 ; Diacritic # Mn [8] MUSICAL SYMBOL COMBINING ACCENT..MUSICAL SYMBOL COMBINING LOURE
1D185..1D18B ; Diacritic # Mn [7] MUSICAL SYMBOL COMBINING DOIT..MUSICAL SYMBOL COMBINING TRIPLE TONGUE
1D1AA..1D1AD ; Diacritic # Mn [4] MUSICAL SYMBOL COMBINING DOWN BOW..MUSICAL SYMBOL COMBINING SNAP PIZZICATO
1E130..1E136 ; Diacritic # Mn [7] NYIAKENG PUACHUE HMONG TONE-B..NYIAKENG PUACHUE HMONG TONE-D
1E2AE ; Diacritic # Mn TOTO SIGN RISING TONE
1E2EC..1E2EF ; Diacritic # Mn [4] WANCHO TONE TUP..WANCHO TONE KOINI
1E8D0..1E8D6 ; Diacritic # Mn [7] MENDE KIKAKUI COMBINING NUMBER TEENS..MENDE KIKAKUI COMBINING NUMBER MILLIONS
1E944..1E946 ; Diacritic # Mn [3] ADLAM ALIF LENGTHENER..ADLAM GEMINATION MARK
1E948..1E94A ; Diacritic # Mn [3] ADLAM CONSONANT MODIFIER..ADLAM NUKTA
# Total code points: 882
# Total code points: 1064
# ================================================
@ -1088,6 +1114,7 @@ AA70 ; Extender # Lm MYANMAR MODIFIER LETTER KHAMTI REDUPLICATION
AADD ; Extender # Lm TAI VIET SYMBOL SAM
AAF3..AAF4 ; Extender # Lm [2] MEETEI MAYEK SYLLABLE REPETITION MARK..MEETEI MAYEK WORD REPETITION MARK
FF70 ; Extender # Lm HALFWIDTH KATAKANA-HIRAGANA PROLONGED SOUND MARK
10781..10782 ; Extender # Lm [2] MODIFIER LETTER SUPERSCRIPT TRIANGULAR COLON..MODIFIER LETTER SUPERSCRIPT HALF TRIANGULAR COLON
1135D ; Extender # Lo GRANTHA SIGN PLUTA
115C6..115C8 ; Extender # Po [3] SIDDHAM REPETITION MARK-1..SIDDHAM REPETITION MARK-3
11A98 ; Extender # Mn SOYOMBO GEMINATION MARK
@ -1097,7 +1124,7 @@ FF70 ; Extender # Lm HALFWIDTH KATAKANA-HIRAGANA PROLONGED SOUND
1E13C..1E13D ; Extender # Lm [2] NYIAKENG PUACHUE HMONG SIGN XW XW..NYIAKENG PUACHUE HMONG SYLLABLE LENGTHENER
1E944..1E946 ; Extender # Mn [3] ADLAM ALIF LENGTHENER..ADLAM GEMINATION MARK
# Total code points: 48
# Total code points: 50
# ================================================
@ -1121,8 +1148,12 @@ A69C..A69D ; Other_Lowercase # Lm [2] MODIFIER LETTER CYRILLIC HARD SIGN..M
A770 ; Other_Lowercase # Lm MODIFIER LETTER US
A7F8..A7F9 ; Other_Lowercase # Lm [2] MODIFIER LETTER CAPITAL H WITH STROKE..MODIFIER LETTER SMALL LIGATURE OE
AB5C..AB5F ; Other_Lowercase # Lm [4] MODIFIER LETTER SMALL HENG..MODIFIER LETTER SMALL U WITH LEFT HOOK
10780 ; Other_Lowercase # Lm MODIFIER LETTER SMALL CAPITAL AA
10783..10785 ; Other_Lowercase # Lm [3] MODIFIER LETTER SMALL AE..MODIFIER LETTER SMALL B WITH HOOK
10787..107B0 ; Other_Lowercase # Lm [42] MODIFIER LETTER SMALL DZ DIGRAPH..MODIFIER LETTER SMALL V WITH RIGHT HOOK
107B2..107BA ; Other_Lowercase # Lm [9] MODIFIER LETTER SMALL CAPITAL Y..MODIFIER LETTER SMALL S WITH CURL
# Total code points: 189
# Total code points: 244
# ================================================
@ -1211,7 +1242,7 @@ E0020..E007F ; Other_Grapheme_Extend # Cf [96] TAG SPACE..CANCEL TAG
# ================================================
3400..4DBF ; Unified_Ideograph # Lo [6592] CJK UNIFIED IDEOGRAPH-3400..CJK UNIFIED IDEOGRAPH-4DBF
4E00..9FFC ; Unified_Ideograph # Lo [20989] CJK UNIFIED IDEOGRAPH-4E00..CJK UNIFIED IDEOGRAPH-9FFC
4E00..9FFF ; Unified_Ideograph # Lo [20992] CJK UNIFIED IDEOGRAPH-4E00..CJK UNIFIED IDEOGRAPH-9FFF
FA0E..FA0F ; Unified_Ideograph # Lo [2] CJK COMPATIBILITY IDEOGRAPH-FA0E..CJK COMPATIBILITY IDEOGRAPH-FA0F
FA11 ; Unified_Ideograph # Lo CJK COMPATIBILITY IDEOGRAPH-FA11
FA13..FA14 ; Unified_Ideograph # Lo [2] CJK COMPATIBILITY IDEOGRAPH-FA13..CJK COMPATIBILITY IDEOGRAPH-FA14
@ -1219,14 +1250,14 @@ FA1F ; Unified_Ideograph # Lo CJK COMPATIBILITY IDEOGRAPH-FA1F
FA21 ; Unified_Ideograph # Lo CJK COMPATIBILITY IDEOGRAPH-FA21
FA23..FA24 ; Unified_Ideograph # Lo [2] CJK COMPATIBILITY IDEOGRAPH-FA23..CJK COMPATIBILITY IDEOGRAPH-FA24
FA27..FA29 ; Unified_Ideograph # Lo [3] CJK COMPATIBILITY IDEOGRAPH-FA27..CJK COMPATIBILITY IDEOGRAPH-FA29
20000..2A6DD ; Unified_Ideograph # Lo [42718] CJK UNIFIED IDEOGRAPH-20000..CJK UNIFIED IDEOGRAPH-2A6DD
2A700..2B734 ; Unified_Ideograph # Lo [4149] CJK UNIFIED IDEOGRAPH-2A700..CJK UNIFIED IDEOGRAPH-2B734
20000..2A6DF ; Unified_Ideograph # Lo [42720] CJK UNIFIED IDEOGRAPH-20000..CJK UNIFIED IDEOGRAPH-2A6DF
2A700..2B738 ; Unified_Ideograph # Lo [4153] CJK UNIFIED IDEOGRAPH-2A700..CJK UNIFIED IDEOGRAPH-2B738
2B740..2B81D ; Unified_Ideograph # Lo [222] CJK UNIFIED IDEOGRAPH-2B740..CJK UNIFIED IDEOGRAPH-2B81D
2B820..2CEA1 ; Unified_Ideograph # Lo [5762] CJK UNIFIED IDEOGRAPH-2B820..CJK UNIFIED IDEOGRAPH-2CEA1
2CEB0..2EBE0 ; Unified_Ideograph # Lo [7473] CJK UNIFIED IDEOGRAPH-2CEB0..CJK UNIFIED IDEOGRAPH-2EBE0
30000..3134A ; Unified_Ideograph # Lo [4939] CJK UNIFIED IDEOGRAPH-30000..CJK UNIFIED IDEOGRAPH-3134A
# Total code points: 92856
# Total code points: 92865
# ================================================
@ -1291,8 +1322,9 @@ E0001 ; Deprecated # Cf LANGUAGE TAG
1D62A..1D62B ; Soft_Dotted # L& [2] MATHEMATICAL SANS-SERIF ITALIC SMALL I..MATHEMATICAL SANS-SERIF ITALIC SMALL J
1D65E..1D65F ; Soft_Dotted # L& [2] MATHEMATICAL SANS-SERIF BOLD ITALIC SMALL I..MATHEMATICAL SANS-SERIF BOLD ITALIC SMALL J
1D692..1D693 ; Soft_Dotted # L& [2] MATHEMATICAL MONOSPACE SMALL I..MATHEMATICAL MONOSPACE SMALL J
1DF1A ; Soft_Dotted # L& LATIN SMALL LETTER I WITH STROKE AND RETROFLEX HOOK
# Total code points: 46
# Total code points: 47
# ================================================
@ -1330,7 +1362,7 @@ AABB..AABC ; Logical_Order_Exception # Lo [2] TAI VIET VOWEL AUE..TAI VIET
002E ; Sentence_Terminal # Po FULL STOP
003F ; Sentence_Terminal # Po QUESTION MARK
0589 ; Sentence_Terminal # Po ARMENIAN FULL STOP
061E..061F ; Sentence_Terminal # Po [2] ARABIC TRIPLE DOT PUNCTUATION MARK..ARABIC QUESTION MARK
061D..061F ; Sentence_Terminal # Po [3] ARABIC END OF TEXT MARK..ARABIC QUESTION MARK
06D4 ; Sentence_Terminal # Po ARABIC FULL STOP
0700..0702 ; Sentence_Terminal # Po [3] SYRIAC END OF PARAGRAPH..SYRIAC SUBLINEAR FULL STOP
07F9 ; Sentence_Terminal # Po NKO EXCLAMATION MARK
@ -1349,12 +1381,14 @@ AABB..AABC ; Logical_Order_Exception # Lo [2] TAI VIET VOWEL AUE..TAI VIET
1AA8..1AAB ; Sentence_Terminal # Po [4] TAI THAM SIGN KAAN..TAI THAM SIGN SATKAANKUU
1B5A..1B5B ; Sentence_Terminal # Po [2] BALINESE PANTI..BALINESE PAMADA
1B5E..1B5F ; Sentence_Terminal # Po [2] BALINESE CARIK SIKI..BALINESE CARIK PAREREN
1B7D..1B7E ; Sentence_Terminal # Po [2] BALINESE PANTI LANTANG..BALINESE PAMADA LANTANG
1C3B..1C3C ; Sentence_Terminal # Po [2] LEPCHA PUNCTUATION TA-ROL..LEPCHA PUNCTUATION NYET THYOOM TA-ROL
1C7E..1C7F ; Sentence_Terminal # Po [2] OL CHIKI PUNCTUATION MUCAAD..OL CHIKI PUNCTUATION DOUBLE MUCAAD
203C..203D ; Sentence_Terminal # Po [2] DOUBLE EXCLAMATION MARK..INTERROBANG
2047..2049 ; Sentence_Terminal # Po [3] DOUBLE QUESTION MARK..EXCLAMATION QUESTION MARK
2E2E ; Sentence_Terminal # Po REVERSED QUESTION MARK
2E3C ; Sentence_Terminal # Po STENOGRAPHIC FULL STOP
2E53..2E54 ; Sentence_Terminal # Po [2] MEDIEVAL EXCLAMATION MARK..MEDIEVAL QUESTION MARK
3002 ; Sentence_Terminal # Po IDEOGRAPHIC FULL STOP
A4FF ; Sentence_Terminal # Po LISU PUNCTUATION FULL STOP
A60E..A60F ; Sentence_Terminal # Po [2] VAI FULL STOP..VAI QUESTION MARK
@ -1375,6 +1409,7 @@ FF1F ; Sentence_Terminal # Po FULLWIDTH QUESTION MARK
FF61 ; Sentence_Terminal # Po HALFWIDTH IDEOGRAPHIC FULL STOP
10A56..10A57 ; Sentence_Terminal # Po [2] KHAROSHTHI PUNCTUATION DANDA..KHAROSHTHI PUNCTUATION DOUBLE DANDA
10F55..10F59 ; Sentence_Terminal # Po [5] SOGDIAN PUNCTUATION TWO VERTICAL BARS..SOGDIAN PUNCTUATION HALF CIRCLE WITH DOT
10F86..10F89 ; Sentence_Terminal # Po [4] OLD UYGHUR PUNCTUATION BAR..OLD UYGHUR PUNCTUATION FOUR DOTS
11047..11048 ; Sentence_Terminal # Po [2] BRAHMI DANDA..BRAHMI DOUBLE DANDA
110BE..110C1 ; Sentence_Terminal # Po [4] KAITHI SECTION MARK..KAITHI DOUBLE DANDA
11141..11143 ; Sentence_Terminal # Po [3] CHAKMA DANDA..CHAKMA QUESTION MARK
@ -1403,15 +1438,16 @@ FF61 ; Sentence_Terminal # Po HALFWIDTH IDEOGRAPHIC FULL STOP
1BC9F ; Sentence_Terminal # Po DUPLOYAN PUNCTUATION CHINOOK FULL STOP
1DA88 ; Sentence_Terminal # Po SIGNWRITING FULL STOP
# Total code points: 143
# Total code points: 152
# ================================================
180B..180D ; Variation_Selector # Mn [3] MONGOLIAN FREE VARIATION SELECTOR ONE..MONGOLIAN FREE VARIATION SELECTOR THREE
180F ; Variation_Selector # Mn MONGOLIAN FREE VARIATION SELECTOR FOUR
FE00..FE0F ; Variation_Selector # Mn [16] VARIATION SELECTOR-1..VARIATION SELECTOR-16
E0100..E01EF ; Variation_Selector # Mn [240] VARIATION SELECTOR-17..VARIATION SELECTOR-256
# Total code points: 259
# Total code points: 260
# ================================================
@ -1644,8 +1680,17 @@ E0100..E01EF ; Variation_Selector # Mn [240] VARIATION SELECTOR-17..VARIATION S
2E42 ; Pattern_Syntax # Ps DOUBLE LOW-REVERSED-9 QUOTATION MARK
2E43..2E4F ; Pattern_Syntax # Po [13] DASH WITH LEFT UPTURN..CORNISH VERSE DIVIDER
2E50..2E51 ; Pattern_Syntax # So [2] CROSS PATTY WITH RIGHT CROSSBAR..CROSS PATTY WITH LEFT CROSSBAR
2E52 ; Pattern_Syntax # Po TIRONIAN SIGN CAPITAL ET
2E53..2E7F ; Pattern_Syntax # Cn [45] <reserved-2E53>..<reserved-2E7F>
2E52..2E54 ; Pattern_Syntax # Po [3] TIRONIAN SIGN CAPITAL ET..MEDIEVAL QUESTION MARK
2E55 ; Pattern_Syntax # Ps LEFT SQUARE BRACKET WITH STROKE
2E56 ; Pattern_Syntax # Pe RIGHT SQUARE BRACKET WITH STROKE
2E57 ; Pattern_Syntax # Ps LEFT SQUARE BRACKET WITH DOUBLE STROKE
2E58 ; Pattern_Syntax # Pe RIGHT SQUARE BRACKET WITH DOUBLE STROKE
2E59 ; Pattern_Syntax # Ps TOP HALF LEFT PARENTHESIS
2E5A ; Pattern_Syntax # Pe TOP HALF RIGHT PARENTHESIS
2E5B ; Pattern_Syntax # Ps BOTTOM HALF LEFT PARENTHESIS
2E5C ; Pattern_Syntax # Pe BOTTOM HALF RIGHT PARENTHESIS
2E5D ; Pattern_Syntax # Pd OBLIQUE HYPHEN
2E5E..2E7F ; Pattern_Syntax # Cn [34] <reserved-2E5E>..<reserved-2E7F>
3001..3003 ; Pattern_Syntax # Po [3] IDEOGRAPHIC COMMA..DITTO MARK
3008 ; Pattern_Syntax # Ps LEFT ANGLE BRACKET
3009 ; Pattern_Syntax # Pe RIGHT ANGLE BRACKET
@ -1682,11 +1727,12 @@ FE45..FE46 ; Pattern_Syntax # Po [2] SESAME DOT..WHITE SESAME DOT
0600..0605 ; Prepended_Concatenation_Mark # Cf [6] ARABIC NUMBER SIGN..ARABIC NUMBER MARK ABOVE
06DD ; Prepended_Concatenation_Mark # Cf ARABIC END OF AYAH
070F ; Prepended_Concatenation_Mark # Cf SYRIAC ABBREVIATION MARK
0890..0891 ; Prepended_Concatenation_Mark # Cf [2] ARABIC POUND MARK ABOVE..ARABIC PIASTRE MARK ABOVE
08E2 ; Prepended_Concatenation_Mark # Cf ARABIC DISPUTED END OF AYAH
110BD ; Prepended_Concatenation_Mark # Cf KAITHI NUMBER SIGN
110CD ; Prepended_Concatenation_Mark # Cf KAITHI NUMBER SIGN ABOVE
# Total code points: 11
# Total code points: 13
# ================================================

View File

@ -1,6 +1,6 @@
# PropertyValueAliases-13.0.0.txt
# Date: 2019-11-13, 21:52:10 GMT
# Copyright (c) 2019 Unicode, Inc.
# PropertyValueAliases-14.0.0.txt
# Date: 2021-05-10, 21:08:53 GMT
# Copyright (c) 2021 Unicode, Inc.
# Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries.
# For terms of use, see http://www.unicode.org/terms_of_use.html
#
@ -89,6 +89,7 @@ age; 11.0 ; V11_0
age; 12.0 ; V12_0
age; 12.1 ; V12_1
age; 13.0 ; V13_0
age; 14.0 ; V14_0
age; NA ; Unassigned
# Alphabetic (Alpha)
@ -160,6 +161,7 @@ blk; Ancient_Greek_Numbers ; Ancient_Greek_Numbers
blk; Ancient_Symbols ; Ancient_Symbols
blk; Arabic ; Arabic
blk; Arabic_Ext_A ; Arabic_Extended_A
blk; Arabic_Ext_B ; Arabic_Extended_B
blk; Arabic_Math ; Arabic_Mathematical_Alphabetic_Symbols
blk; Arabic_PF_A ; Arabic_Presentation_Forms_A ; Arabic_Presentation_Forms-A
blk; Arabic_PF_B ; Arabic_Presentation_Forms_B
@ -216,6 +218,7 @@ blk; Cuneiform ; Cuneiform
blk; Cuneiform_Numbers ; Cuneiform_Numbers_And_Punctuation
blk; Currency_Symbols ; Currency_Symbols
blk; Cypriot_Syllabary ; Cypriot_Syllabary
blk; Cypro_Minoan ; Cypro_Minoan
blk; Cyrillic ; Cyrillic
blk; Cyrillic_Ext_A ; Cyrillic_Extended_A
blk; Cyrillic_Ext_B ; Cyrillic_Extended_B
@ -246,6 +249,7 @@ blk; Enclosed_Ideographic_Sup ; Enclosed_Ideographic_Supplement
blk; Ethiopic ; Ethiopic
blk; Ethiopic_Ext ; Ethiopic_Extended
blk; Ethiopic_Ext_A ; Ethiopic_Extended_A
blk; Ethiopic_Ext_B ; Ethiopic_Extended_B
blk; Ethiopic_Sup ; Ethiopic_Supplement
blk; Geometric_Shapes ; Geometric_Shapes
blk; Geometric_Shapes_Ext ; Geometric_Shapes_Extended
@ -285,6 +289,7 @@ blk; Jamo_Ext_B ; Hangul_Jamo_Extended_B
blk; Javanese ; Javanese
blk; Kaithi ; Kaithi
blk; Kana_Ext_A ; Kana_Extended_A
blk; Kana_Ext_B ; Kana_Extended_B
blk; Kana_Sup ; Kana_Supplement
blk; Kanbun ; Kanbun
blk; Kangxi ; Kangxi_Radicals
@ -306,6 +311,8 @@ blk; Latin_Ext_B ; Latin_Extended_B
blk; Latin_Ext_C ; Latin_Extended_C
blk; Latin_Ext_D ; Latin_Extended_D
blk; Latin_Ext_E ; Latin_Extended_E
blk; Latin_Ext_F ; Latin_Extended_F
blk; Latin_Ext_G ; Latin_Extended_G
blk; Lepcha ; Lepcha
blk; Letterlike_Symbols ; Letterlike_Symbols
blk; Limbu ; Limbu
@ -372,6 +379,7 @@ blk; Old_Persian ; Old_Persian
blk; Old_Sogdian ; Old_Sogdian
blk; Old_South_Arabian ; Old_South_Arabian
blk; Old_Turkic ; Old_Turkic
blk; Old_Uyghur ; Old_Uyghur
blk; Oriya ; Oriya
blk; Ornamental_Dingbats ; Ornamental_Dingbats
blk; Osage ; Osage
@ -433,6 +441,7 @@ blk; Tai_Xuan_Jing ; Tai_Xuan_Jing_Symbols
blk; Takri ; Takri
blk; Tamil ; Tamil
blk; Tamil_Sup ; Tamil_Supplement
blk; Tangsa ; Tangsa
blk; Tangut ; Tangut
blk; Tangut_Components ; Tangut_Components
blk; Tangut_Sup ; Tangut_Supplement
@ -442,13 +451,16 @@ blk; Thai ; Thai
blk; Tibetan ; Tibetan
blk; Tifinagh ; Tifinagh
blk; Tirhuta ; Tirhuta
blk; Toto ; Toto
blk; Transport_And_Map ; Transport_And_Map_Symbols
blk; UCAS ; Unified_Canadian_Aboriginal_Syllabics; Canadian_Syllabics
blk; UCAS_Ext ; Unified_Canadian_Aboriginal_Syllabics_Extended
blk; UCAS_Ext_A ; Unified_Canadian_Aboriginal_Syllabics_Extended_A
blk; Ugaritic ; Ugaritic
blk; Vai ; Vai
blk; Vedic_Ext ; Vedic_Extensions
blk; Vertical_Forms ; Vertical_Forms
blk; Vithkuqi ; Vithkuqi
blk; VS ; Variation_Selectors
blk; VS_Sup ; Variation_Selectors_Supplement
blk; Wancho ; Wancho
@ -458,6 +470,7 @@ blk; Yi_Radicals ; Yi_Radicals
blk; Yi_Syllables ; Yi_Syllables
blk; Yijing ; Yijing_Hexagram_Symbols
blk; Zanabazar_Square ; Zanabazar_Square
blk; Znamenny_Music ; Znamenny_Musical_Notation
# Canonical_Combining_Class (ccc)
@ -1032,6 +1045,8 @@ jg ; Taw ; Taw
jg ; Teh_Marbuta ; Teh_Marbuta
jg ; Teh_Marbuta_Goal ; Hamza_On_Heh_Goal
jg ; Teth ; Teth
jg ; Thin_Yeh ; Thin_Yeh
jg ; Vertical_Tail ; Vertical_Tail
jg ; Waw ; Waw
jg ; Yeh ; Yeh
jg ; Yeh_Barree ; Yeh_Barree
@ -1262,6 +1277,7 @@ sc ; Cham ; Cham
sc ; Cher ; Cherokee
sc ; Chrs ; Chorasmian
sc ; Copt ; Coptic ; Qaac
sc ; Cpmn ; Cypro_Minoan
sc ; Cprt ; Cypriot
sc ; Cyrl ; Cyrillic
sc ; Deva ; Devanagari
@ -1341,6 +1357,7 @@ sc ; Orkh ; Old_Turkic
sc ; Orya ; Oriya
sc ; Osge ; Osage
sc ; Osma ; Osmanya
sc ; Ougr ; Old_Uyghur
sc ; Palm ; Palmyrene
sc ; Pauc ; Pau_Cin_Hau
sc ; Perm ; Old_Permic
@ -1383,8 +1400,11 @@ sc ; Thaa ; Thaana
sc ; Thai ; Thai
sc ; Tibt ; Tibetan
sc ; Tirh ; Tirhuta
sc ; Tnsa ; Tangsa
sc ; Toto ; Toto
sc ; Ugar ; Ugaritic
sc ; Vaii ; Vai
sc ; Vith ; Vithkuqi
sc ; Wara ; Warang_Citi
sc ; Wcho ; Wancho
sc ; Xpeo ; Old_Persian

View File

@ -0,0 +1,16 @@
# Unicode Character Database
# Date: 2021-09-10, 17:22:00 GMT [KW]
# Copyright (c) 2021 Unicode, Inc.
# Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries.
# For terms of use, see https://www.unicode.org/terms_of_use.html
#
# For documentation, see the following:
# NamesList.html
# UAX #38, "Unicode Han Database (Unihan)"
# UAX #44, "Unicode Character Database"
# UTS #51, "Unicode Emoji"
#
# The UAXes and UTS #51 can be accessed at https://www.unicode.org/versions/Unicode14.0.0/
This directory contains the final data files
for the Unicode Character Database, for Version 14.0.0 of the Unicode Standard.

View File

@ -1,16 +1,16 @@
# Scripts-13.0.0.txt
# Date: 2020-01-22, 00:07:43 GMT
# Copyright (c) 2020 Unicode, Inc.
# Scripts-14.0.0.txt
# Date: 2021-07-10, 00:35:31 GMT
# Copyright (c) 2021 Unicode, Inc.
# Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries.
# For terms of use, see http://www.unicode.org/terms_of_use.html
#
# Unicode Character Database
# For documentation, see http://www.unicode.org/reports/tr44/
# For more information, see:
# UAX #24, Unicode Script Property: http://www.unicode.org/reports/tr24/
# UAX #24, Unicode Script Property: https://www.unicode.org/reports/tr24/
# Especially the sections:
# http://www.unicode.org/reports/tr24/#Assignment_Script_Values
# http://www.unicode.org/reports/tr24/#Assignment_ScriptX_Values
# https://www.unicode.org/reports/tr24/#Assignment_Script_Values
# https://www.unicode.org/reports/tr24/#Assignment_ScriptX_Values
#
# ================================================
@ -154,7 +154,7 @@
208A..208C ; Common # Sm [3] SUBSCRIPT PLUS SIGN..SUBSCRIPT EQUALS SIGN
208D ; Common # Ps SUBSCRIPT LEFT PARENTHESIS
208E ; Common # Pe SUBSCRIPT RIGHT PARENTHESIS
20A0..20BF ; Common # Sc [32] EURO-CURRENCY SIGN..BITCOIN SIGN
20A0..20C0 ; Common # Sc [33] EURO-CURRENCY SIGN..SOM SIGN
2100..2101 ; Common # So [2] ACCOUNT OF..ADDRESSED TO THE SUBJECT
2102 ; Common # L& DOUBLE-STRUCK CAPITAL C
2103..2106 ; Common # So [4] DEGREE CELSIUS..CADA UNA
@ -347,7 +347,16 @@
2E42 ; Common # Ps DOUBLE LOW-REVERSED-9 QUOTATION MARK
2E43..2E4F ; Common # Po [13] DASH WITH LEFT UPTURN..CORNISH VERSE DIVIDER
2E50..2E51 ; Common # So [2] CROSS PATTY WITH RIGHT CROSSBAR..CROSS PATTY WITH LEFT CROSSBAR
2E52 ; Common # Po TIRONIAN SIGN CAPITAL ET
2E52..2E54 ; Common # Po [3] TIRONIAN SIGN CAPITAL ET..MEDIEVAL QUESTION MARK
2E55 ; Common # Ps LEFT SQUARE BRACKET WITH STROKE
2E56 ; Common # Pe RIGHT SQUARE BRACKET WITH STROKE
2E57 ; Common # Ps LEFT SQUARE BRACKET WITH DOUBLE STROKE
2E58 ; Common # Pe RIGHT SQUARE BRACKET WITH DOUBLE STROKE
2E59 ; Common # Ps TOP HALF LEFT PARENTHESIS
2E5A ; Common # Pe TOP HALF RIGHT PARENTHESIS
2E5B ; Common # Ps BOTTOM HALF LEFT PARENTHESIS
2E5C ; Common # Pe BOTTOM HALF RIGHT PARENTHESIS
2E5D ; Common # Pd OBLIQUE HYPHEN
2FF0..2FFB ; Common # So [12] IDEOGRAPHIC DESCRIPTION CHARACTER LEFT TO RIGHT..IDEOGRAPHIC DESCRIPTION CHARACTER OVERLAID
3000 ; Common # Zs IDEOGRAPHIC SPACE
3001..3003 ; Common # Po [3] IDEOGRAPHIC COMMA..DITTO MARK
@ -511,9 +520,8 @@ FFFC..FFFD ; Common # So [2] OBJECT REPLACEMENT CHARACTER..REPLACEMENT CHAR
10190..1019C ; Common # So [13] ROMAN SEXTANS SIGN..ASCIA SYMBOL
101D0..101FC ; Common # So [45] PHAISTOS DISC SIGN PEDESTRIAN..PHAISTOS DISC SIGN WAVY BAND
102E1..102FB ; Common # No [27] COPTIC EPACT DIGIT ONE..COPTIC EPACT NUMBER NINE HUNDRED
16FE2 ; Common # Po OLD CHINESE HOOK MARK
16FE3 ; Common # Lm OLD CHINESE ITERATION MARK
1BCA0..1BCA3 ; Common # Cf [4] SHORTHAND FORMAT LETTER OVERLAP..SHORTHAND FORMAT UP STEP
1CF50..1CFC3 ; Common # So [116] ZNAMENNY NEUME KRYUK..ZNAMENNY NEUME PAUK
1D000..1D0F5 ; Common # So [246] BYZANTINE MUSICAL SYMBOL PSILI..BYZANTINE MUSICAL SYMBOL GORGON NEO KATO
1D100..1D126 ; Common # So [39] MUSICAL SYMBOL SINGLE BARLINE..MUSICAL SYMBOL DRUM CLEF-2
1D129..1D164 ; Common # So [60] MUSICAL SYMBOL MULTIPLE MEASURE REST..MUSICAL SYMBOL ONE HUNDRED TWENTY-EIGHTH NOTE
@ -523,7 +531,7 @@ FFFC..FFFD ; Common # So [2] OBJECT REPLACEMENT CHARACTER..REPLACEMENT CHAR
1D173..1D17A ; Common # Cf [8] MUSICAL SYMBOL BEGIN BEAM..MUSICAL SYMBOL END PHRASE
1D183..1D184 ; Common # So [2] MUSICAL SYMBOL ARPEGGIATO UP..MUSICAL SYMBOL ARPEGGIATO DOWN
1D18C..1D1A9 ; Common # So [30] MUSICAL SYMBOL RINFORZANDO..MUSICAL SYMBOL DEGREE SLASH
1D1AE..1D1E8 ; Common # So [59] MUSICAL SYMBOL PEDAL MARK..MUSICAL SYMBOL KIEVAN FLAT SIGN
1D1AE..1D1EA ; Common # So [61] MUSICAL SYMBOL PEDAL MARK..MUSICAL SYMBOL KORON
1D2E0..1D2F3 ; Common # No [20] MAYAN NUMERAL ZERO..MAYAN NUMERAL NINETEEN
1D300..1D356 ; Common # So [87] MONOGRAM FOR EARTH..TETRAGRAM FOR FOSTERING
1D360..1D378 ; Common # No [25] COUNTING ROD UNIT DIGIT ONE..TALLY MARK FIVE
@ -593,35 +601,36 @@ FFFC..FFFD ; Common # So [2] OBJECT REPLACEMENT CHARACTER..REPLACEMENT CHAR
1F300..1F3FA ; Common # So [251] CYCLONE..AMPHORA
1F3FB..1F3FF ; Common # Sk [5] EMOJI MODIFIER FITZPATRICK TYPE-1-2..EMOJI MODIFIER FITZPATRICK TYPE-6
1F400..1F6D7 ; Common # So [728] RAT..ELEVATOR
1F6E0..1F6EC ; Common # So [13] HAMMER AND WRENCH..AIRPLANE ARRIVING
1F6DD..1F6EC ; Common # So [16] PLAYGROUND SLIDE..AIRPLANE ARRIVING
1F6F0..1F6FC ; Common # So [13] SATELLITE..ROLLER SKATE
1F700..1F773 ; Common # So [116] ALCHEMICAL SYMBOL FOR QUINTESSENCE..ALCHEMICAL SYMBOL FOR HALF OUNCE
1F780..1F7D8 ; Common # So [89] BLACK LEFT-POINTING ISOSCELES RIGHT TRIANGLE..NEGATIVE CIRCLED SQUARE
1F7E0..1F7EB ; Common # So [12] LARGE ORANGE CIRCLE..LARGE BROWN SQUARE
1F7F0 ; Common # So HEAVY EQUALS SIGN
1F800..1F80B ; Common # So [12] LEFTWARDS ARROW WITH SMALL TRIANGLE ARROWHEAD..DOWNWARDS ARROW WITH LARGE TRIANGLE ARROWHEAD
1F810..1F847 ; Common # So [56] LEFTWARDS ARROW WITH SMALL EQUILATERAL ARROWHEAD..DOWNWARDS HEAVY ARROW
1F850..1F859 ; Common # So [10] LEFTWARDS SANS-SERIF ARROW..UP DOWN SANS-SERIF ARROW
1F860..1F887 ; Common # So [40] WIDE-HEADED LEFTWARDS LIGHT BARB ARROW..WIDE-HEADED SOUTH WEST VERY HEAVY BARB ARROW
1F890..1F8AD ; Common # So [30] LEFTWARDS TRIANGLE ARROWHEAD..WHITE ARROW SHAFT WIDTH TWO THIRDS
1F8B0..1F8B1 ; Common # So [2] ARROW POINTING UPWARDS THEN NORTH WEST..ARROW POINTING RIGHTWARDS THEN CURVING SOUTH WEST
1F900..1F978 ; Common # So [121] CIRCLED CROSS FORMEE WITH FOUR DOTS..DISGUISED FACE
1F97A..1F9CB ; Common # So [82] FACE WITH PLEADING EYES..BUBBLE TEA
1F9CD..1FA53 ; Common # So [135] STANDING PERSON..BLACK CHESS KNIGHT-BISHOP
1F900..1FA53 ; Common # So [340] CIRCLED CROSS FORMEE WITH FOUR DOTS..BLACK CHESS KNIGHT-BISHOP
1FA60..1FA6D ; Common # So [14] XIANGQI RED GENERAL..XIANGQI BLACK SOLDIER
1FA70..1FA74 ; Common # So [5] BALLET SHOES..THONG SANDAL
1FA78..1FA7A ; Common # So [3] DROP OF BLOOD..STETHOSCOPE
1FA78..1FA7C ; Common # So [5] DROP OF BLOOD..CRUTCH
1FA80..1FA86 ; Common # So [7] YO-YO..NESTING DOLLS
1FA90..1FAA8 ; Common # So [25] RINGED PLANET..ROCK
1FAB0..1FAB6 ; Common # So [7] FLY..FEATHER
1FAC0..1FAC2 ; Common # So [3] ANATOMICAL HEART..PEOPLE HUGGING
1FAD0..1FAD6 ; Common # So [7] BLUEBERRIES..TEAPOT
1FA90..1FAAC ; Common # So [29] RINGED PLANET..HAMSA
1FAB0..1FABA ; Common # So [11] FLY..NEST WITH EGGS
1FAC0..1FAC5 ; Common # So [6] ANATOMICAL HEART..PERSON WITH CROWN
1FAD0..1FAD9 ; Common # So [10] BLUEBERRIES..JAR
1FAE0..1FAE7 ; Common # So [8] MELTING FACE..BUBBLES
1FAF0..1FAF6 ; Common # So [7] HAND WITH INDEX FINGER AND THUMB CROSSED..HEART HANDS
1FB00..1FB92 ; Common # So [147] BLOCK SEXTANT-1..UPPER HALF INVERSE MEDIUM SHADE AND LOWER HALF BLOCK
1FB94..1FBCA ; Common # So [55] LEFT HALF INVERSE MEDIUM SHADE AND RIGHT HALF BLOCK..WHITE UP-POINTING CHEVRON
1FBF0..1FBF9 ; Common # Nd [10] SEGMENTED DIGIT ZERO..SEGMENTED DIGIT NINE
E0001 ; Common # Cf LANGUAGE TAG
E0020..E007F ; Common # Cf [96] TAG SPACE..CANCEL TAG
# Total code points: 8087
# Total code points: 8252
# ================================================
@ -664,8 +673,11 @@ A770 ; Latin # Lm MODIFIER LETTER US
A771..A787 ; Latin # L& [23] LATIN SMALL LETTER DUM..LATIN SMALL LETTER INSULAR T
A78B..A78E ; Latin # L& [4] LATIN CAPITAL LETTER SALTILLO..LATIN SMALL LETTER L WITH RETROFLEX HOOK AND BELT
A78F ; Latin # Lo LATIN LETTER SINOLOGICAL DOT
A790..A7BF ; Latin # L& [48] LATIN CAPITAL LETTER N WITH DESCENDER..LATIN SMALL LETTER GLOTTAL U
A7C2..A7CA ; Latin # L& [9] LATIN CAPITAL LETTER ANGLICANA W..LATIN SMALL LETTER S WITH SHORT STROKE OVERLAY
A790..A7CA ; Latin # L& [59] LATIN CAPITAL LETTER N WITH DESCENDER..LATIN SMALL LETTER S WITH SHORT STROKE OVERLAY
A7D0..A7D1 ; Latin # L& [2] LATIN CAPITAL LETTER CLOSED INSULAR G..LATIN SMALL LETTER CLOSED INSULAR G
A7D3 ; Latin # L& LATIN SMALL LETTER DOUBLE THORN
A7D5..A7D9 ; Latin # L& [5] LATIN SMALL LETTER DOUBLE WYNN..LATIN SMALL LETTER SIGMOID S
A7F2..A7F4 ; Latin # Lm [3] MODIFIER LETTER CAPITAL C..MODIFIER LETTER CAPITAL Q
A7F5..A7F6 ; Latin # L& [2] LATIN CAPITAL LETTER REVERSED HALF H..LATIN SMALL LETTER REVERSED HALF H
A7F7 ; Latin # Lo LATIN EPIGRAPHIC LETTER SIDEWAYS I
A7F8..A7F9 ; Latin # Lm [2] MODIFIER LETTER CAPITAL H WITH STROKE..MODIFIER LETTER SMALL LIGATURE OE
@ -679,8 +691,14 @@ AB69 ; Latin # Lm MODIFIER LETTER SMALL TURNED W
FB00..FB06 ; Latin # L& [7] LATIN SMALL LIGATURE FF..LATIN SMALL LIGATURE ST
FF21..FF3A ; Latin # L& [26] FULLWIDTH LATIN CAPITAL LETTER A..FULLWIDTH LATIN CAPITAL LETTER Z
FF41..FF5A ; Latin # L& [26] FULLWIDTH LATIN SMALL LETTER A..FULLWIDTH LATIN SMALL LETTER Z
10780..10785 ; Latin # Lm [6] MODIFIER LETTER SMALL CAPITAL AA..MODIFIER LETTER SMALL B WITH HOOK
10787..107B0 ; Latin # Lm [42] MODIFIER LETTER SMALL DZ DIGRAPH..MODIFIER LETTER SMALL V WITH RIGHT HOOK
107B2..107BA ; Latin # Lm [9] MODIFIER LETTER SMALL CAPITAL Y..MODIFIER LETTER SMALL S WITH CURL
1DF00..1DF09 ; Latin # L& [10] LATIN SMALL LETTER FENG DIGRAPH WITH TRILL..LATIN SMALL LETTER T WITH HOOK AND RETROFLEX HOOK
1DF0A ; Latin # Lo LATIN LETTER RETROFLEX CLICK WITH RETROFLEX HOOK
1DF0B..1DF1E ; Latin # L& [20] LATIN SMALL LETTER ESH WITH DOUBLE BAR..LATIN SMALL LETTER S WITH CURL
# Total code points: 1374
# Total code points: 1475
# ================================================
@ -820,7 +838,7 @@ FB46..FB4F ; Hebrew # Lo [10] HEBREW LETTER TSADI WITH DAGESH..HEBREW LIGATU
060E..060F ; Arabic # So [2] ARABIC POETIC VERSE SIGN..ARABIC SIGN MISRA
0610..061A ; Arabic # Mn [11] ARABIC SIGN SALLALLAHOU ALAYHE WASSALLAM..ARABIC SMALL KASRA
061C ; Arabic # Cf ARABIC LETTER MARK
061E ; Arabic # Po ARABIC TRIPLE DOT PUNCTUATION MARK
061D..061E ; Arabic # Po [2] ARABIC END OF TEXT MARK..ARABIC TRIPLE DOT PUNCTUATION MARK
0620..063F ; Arabic # Lo [32] ARABIC LETTER KASHMIRI YEH..ARABIC LETTER FARSI YEH WITH THREE DOTS ABOVE
0641..064A ; Arabic # Lo [10] ARABIC LETTER FEH..ARABIC LETTER YEH
0656..065F ; Arabic # Mn [10] ARABIC SUBSCRIPT ALEF..ARABIC WAVY HAMZA BELOW
@ -843,18 +861,25 @@ FB46..FB4F ; Hebrew # Lo [10] HEBREW LETTER TSADI WITH DAGESH..HEBREW LIGATU
06FD..06FE ; Arabic # So [2] ARABIC SIGN SINDHI AMPERSAND..ARABIC SIGN SINDHI POSTPOSITION MEN
06FF ; Arabic # Lo ARABIC LETTER HEH WITH INVERTED V
0750..077F ; Arabic # Lo [48] ARABIC LETTER BEH WITH THREE DOTS HORIZONTALLY BELOW..ARABIC LETTER KAF WITH TWO DOTS ABOVE
08A0..08B4 ; Arabic # Lo [21] ARABIC LETTER BEH WITH SMALL V BELOW..ARABIC LETTER KAF WITH DOT BELOW
08B6..08C7 ; Arabic # Lo [18] ARABIC LETTER BEH WITH SMALL MEEM ABOVE..ARABIC LETTER LAM WITH SMALL ARABIC LETTER TAH ABOVE
08D3..08E1 ; Arabic # Mn [15] ARABIC SMALL LOW WAW..ARABIC SMALL HIGH SIGN SAFHA
0870..0887 ; Arabic # Lo [24] ARABIC LETTER ALEF WITH ATTACHED FATHA..ARABIC BASELINE ROUND DOT
0888 ; Arabic # Sk ARABIC RAISED ROUND DOT
0889..088E ; Arabic # Lo [6] ARABIC LETTER NOON WITH INVERTED SMALL V..ARABIC VERTICAL TAIL
0890..0891 ; Arabic # Cf [2] ARABIC POUND MARK ABOVE..ARABIC PIASTRE MARK ABOVE
0898..089F ; Arabic # Mn [8] ARABIC SMALL HIGH WORD AL-JUZ..ARABIC HALF MADDA OVER MADDA
08A0..08C8 ; Arabic # Lo [41] ARABIC LETTER BEH WITH SMALL V BELOW..ARABIC LETTER GRAF
08C9 ; Arabic # Lm ARABIC SMALL FARSI YEH
08CA..08E1 ; Arabic # Mn [24] ARABIC SMALL HIGH FARSI YEH..ARABIC SMALL HIGH SIGN SAFHA
08E3..08FF ; Arabic # Mn [29] ARABIC TURNED DAMMA BELOW..ARABIC MARK SIDEWAYS NOON GHUNNA
FB50..FBB1 ; Arabic # Lo [98] ARABIC LETTER ALEF WASLA ISOLATED FORM..ARABIC LETTER YEH BARREE WITH HAMZA ABOVE FINAL FORM
FBB2..FBC1 ; Arabic # Sk [16] ARABIC SYMBOL DOT ABOVE..ARABIC SYMBOL SMALL TAH BELOW
FBB2..FBC2 ; Arabic # Sk [17] ARABIC SYMBOL DOT ABOVE..ARABIC SYMBOL WASLA ABOVE
FBD3..FD3D ; Arabic # Lo [363] ARABIC LETTER NG ISOLATED FORM..ARABIC LIGATURE ALEF WITH FATHATAN ISOLATED FORM
FD40..FD4F ; Arabic # So [16] ARABIC LIGATURE RAHIMAHU ALLAAH..ARABIC LIGATURE RAHIMAHUM ALLAAH
FD50..FD8F ; Arabic # Lo [64] ARABIC LIGATURE TEH WITH JEEM WITH MEEM INITIAL FORM..ARABIC LIGATURE MEEM WITH KHAH WITH MEEM INITIAL FORM
FD92..FDC7 ; Arabic # Lo [54] ARABIC LIGATURE MEEM WITH JEEM WITH KHAH INITIAL FORM..ARABIC LIGATURE NOON WITH JEEM WITH YEH FINAL FORM
FDCF ; Arabic # So ARABIC LIGATURE SALAAMUHU ALAYNAA
FDF0..FDFB ; Arabic # Lo [12] ARABIC LIGATURE SALLA USED AS KORANIC STOP SIGN ISOLATED FORM..ARABIC LIGATURE JALLAJALALOUHOU
FDFC ; Arabic # Sc RIAL SIGN
FDFD ; Arabic # So ARABIC LIGATURE BISMILLAH AR-RAHMAN AR-RAHEEM
FDFD..FDFF ; Arabic # So [3] ARABIC LIGATURE BISMILLAH AR-RAHMAN AR-RAHEEM..ARABIC LIGATURE AZZA WA JALL
FE70..FE74 ; Arabic # Lo [5] ARABIC FATHATAN ISOLATED FORM..ARABIC KASRATAN ISOLATED FORM
FE76..FEFC ; Arabic # Lo [135] ARABIC FATHA ISOLATED FORM..ARABIC LIGATURE LAM WITH ALEF FINAL FORM
10E60..10E7E ; Arabic # No [31] RUMI DIGIT ONE..RUMI FRACTION TWO THIRDS
@ -893,7 +918,7 @@ FE76..FEFC ; Arabic # Lo [135] ARABIC FATHA ISOLATED FORM..ARABIC LIGATURE LA
1EEAB..1EEBB ; Arabic # Lo [17] ARABIC MATHEMATICAL DOUBLE-STRUCK LAM..ARABIC MATHEMATICAL DOUBLE-STRUCK GHAIN
1EEF0..1EEF1 ; Arabic # Sm [2] ARABIC MATHEMATICAL OPERATOR MEEM WITH HAH WITH TATWEEL..ARABIC MATHEMATICAL OPERATOR HAH WITH DAL
# Total code points: 1291
# Total code points: 1365
# ================================================
@ -1113,6 +1138,7 @@ A8FF ; Devanagari # Mn DEVANAGARI VOWEL SIGN AY
0C0E..0C10 ; Telugu # Lo [3] TELUGU LETTER E..TELUGU LETTER AI
0C12..0C28 ; Telugu # Lo [23] TELUGU LETTER O..TELUGU LETTER NA
0C2A..0C39 ; Telugu # Lo [16] TELUGU LETTER PA..TELUGU LETTER HA
0C3C ; Telugu # Mn TELUGU SIGN NUKTA
0C3D ; Telugu # Lo TELUGU SIGN AVAGRAHA
0C3E..0C40 ; Telugu # Mn [3] TELUGU VOWEL SIGN AA..TELUGU VOWEL SIGN II
0C41..0C44 ; Telugu # Mc [4] TELUGU VOWEL SIGN U..TELUGU VOWEL SIGN VOCALIC RR
@ -1120,6 +1146,7 @@ A8FF ; Devanagari # Mn DEVANAGARI VOWEL SIGN AY
0C4A..0C4D ; Telugu # Mn [4] TELUGU VOWEL SIGN O..TELUGU SIGN VIRAMA
0C55..0C56 ; Telugu # Mn [2] TELUGU LENGTH MARK..TELUGU AI LENGTH MARK
0C58..0C5A ; Telugu # Lo [3] TELUGU LETTER TSA..TELUGU LETTER RRRA
0C5D ; Telugu # Lo TELUGU LETTER NAKAARA POLLU
0C60..0C61 ; Telugu # Lo [2] TELUGU LETTER VOCALIC RR..TELUGU LETTER VOCALIC LL
0C62..0C63 ; Telugu # Mn [2] TELUGU VOWEL SIGN VOCALIC L..TELUGU VOWEL SIGN VOCALIC LL
0C66..0C6F ; Telugu # Nd [10] TELUGU DIGIT ZERO..TELUGU DIGIT NINE
@ -1127,7 +1154,7 @@ A8FF ; Devanagari # Mn DEVANAGARI VOWEL SIGN AY
0C78..0C7E ; Telugu # No [7] TELUGU FRACTION DIGIT ZERO FOR ODD POWERS OF FOUR..TELUGU FRACTION DIGIT THREE FOR EVEN POWERS OF FOUR
0C7F ; Telugu # So TELUGU SIGN TUUMU
# Total code points: 98
# Total code points: 100
# ================================================
@ -1150,13 +1177,13 @@ A8FF ; Devanagari # Mn DEVANAGARI VOWEL SIGN AY
0CCA..0CCB ; Kannada # Mc [2] KANNADA VOWEL SIGN O..KANNADA VOWEL SIGN OO
0CCC..0CCD ; Kannada # Mn [2] KANNADA VOWEL SIGN AU..KANNADA SIGN VIRAMA
0CD5..0CD6 ; Kannada # Mc [2] KANNADA LENGTH MARK..KANNADA AI LENGTH MARK
0CDE ; Kannada # Lo KANNADA LETTER FA
0CDD..0CDE ; Kannada # Lo [2] KANNADA LETTER NAKAARA POLLU..KANNADA LETTER FA
0CE0..0CE1 ; Kannada # Lo [2] KANNADA LETTER VOCALIC RR..KANNADA LETTER VOCALIC LL
0CE2..0CE3 ; Kannada # Mn [2] KANNADA VOWEL SIGN VOCALIC L..KANNADA VOWEL SIGN VOCALIC LL
0CE6..0CEF ; Kannada # Nd [10] KANNADA DIGIT ZERO..KANNADA DIGIT NINE
0CF1..0CF2 ; Kannada # Lo [2] KANNADA SIGN JIHVAMULIYA..KANNADA SIGN UPADHMANIYA
# Total code points: 89
# Total code points: 90
# ================================================
@ -1411,8 +1438,12 @@ AB09..AB0E ; Ethiopic # Lo [6] ETHIOPIC SYLLABLE DDHU..ETHIOPIC SYLLABLE DD
AB11..AB16 ; Ethiopic # Lo [6] ETHIOPIC SYLLABLE DZU..ETHIOPIC SYLLABLE DZO
AB20..AB26 ; Ethiopic # Lo [7] ETHIOPIC SYLLABLE CCHHA..ETHIOPIC SYLLABLE CCHHO
AB28..AB2E ; Ethiopic # Lo [7] ETHIOPIC SYLLABLE BBA..ETHIOPIC SYLLABLE BBO
1E7E0..1E7E6 ; Ethiopic # Lo [7] ETHIOPIC SYLLABLE HHYA..ETHIOPIC SYLLABLE HHYO
1E7E8..1E7EB ; Ethiopic # Lo [4] ETHIOPIC SYLLABLE GURAGE HHWA..ETHIOPIC SYLLABLE HHWE
1E7ED..1E7EE ; Ethiopic # Lo [2] ETHIOPIC SYLLABLE GURAGE MWI..ETHIOPIC SYLLABLE GURAGE MWEE
1E7F0..1E7FE ; Ethiopic # Lo [15] ETHIOPIC SYLLABLE GURAGE QWI..ETHIOPIC SYLLABLE GURAGE PWEE
# Total code points: 495
# Total code points: 523
# ================================================
@ -1430,8 +1461,9 @@ AB70..ABBF ; Cherokee # L& [80] CHEROKEE SMALL LETTER A..CHEROKEE SMALL LETT
166E ; Canadian_Aboriginal # Po CANADIAN SYLLABICS FULL STOP
166F..167F ; Canadian_Aboriginal # Lo [17] CANADIAN SYLLABICS QAI..CANADIAN SYLLABICS BLACKFOOT W
18B0..18F5 ; Canadian_Aboriginal # Lo [70] CANADIAN SYLLABICS OY..CANADIAN SYLLABICS CARRIER DENTAL S
11AB0..11ABF ; Canadian_Aboriginal # Lo [16] CANADIAN SYLLABICS NATTILIK HI..CANADIAN SYLLABICS SPA
# Total code points: 710
# Total code points: 726
# ================================================
@ -1480,6 +1512,7 @@ AB70..ABBF ; Cherokee # L& [80] CHEROKEE SMALL LETTER A..CHEROKEE SMALL LETT
1807..180A ; Mongolian # Po [4] MONGOLIAN SIBE SYLLABLE BOUNDARY MARKER..MONGOLIAN NIRUGU
180B..180D ; Mongolian # Mn [3] MONGOLIAN FREE VARIATION SELECTOR ONE..MONGOLIAN FREE VARIATION SELECTOR THREE
180E ; Mongolian # Cf MONGOLIAN VOWEL SEPARATOR
180F ; Mongolian # Mn MONGOLIAN FREE VARIATION SELECTOR FOUR
1810..1819 ; Mongolian # Nd [10] MONGOLIAN DIGIT ZERO..MONGOLIAN DIGIT NINE
1820..1842 ; Mongolian # Lo [35] MONGOLIAN LETTER A..MONGOLIAN LETTER CHI
1843 ; Mongolian # Lm MONGOLIAN LETTER TODO LONG VOWEL SIGN
@ -1491,18 +1524,18 @@ AB70..ABBF ; Cherokee # L& [80] CHEROKEE SMALL LETTER A..CHEROKEE SMALL LETT
18AA ; Mongolian # Lo MONGOLIAN LETTER MANCHU ALI GALI LHA
11660..1166C ; Mongolian # Po [13] MONGOLIAN BIRGA WITH ORNAMENT..MONGOLIAN TURNED SWIRL BIRGA WITH DOUBLE ORNAMENT
# Total code points: 167
# Total code points: 168
# ================================================
3041..3096 ; Hiragana # Lo [86] HIRAGANA LETTER SMALL A..HIRAGANA LETTER SMALL KE
309D..309E ; Hiragana # Lm [2] HIRAGANA ITERATION MARK..HIRAGANA VOICED ITERATION MARK
309F ; Hiragana # Lo HIRAGANA DIGRAPH YORI
1B001..1B11E ; Hiragana # Lo [286] HIRAGANA LETTER ARCHAIC YE..HENTAIGANA LETTER N-MU-MO-2
1B001..1B11F ; Hiragana # Lo [287] HIRAGANA LETTER ARCHAIC YE..HIRAGANA LETTER ARCHAIC WU
1B150..1B152 ; Hiragana # Lo [3] HIRAGANA LETTER SMALL WI..HIRAGANA LETTER SMALL WO
1F200 ; Hiragana # So SQUARE HIRAGANA HOKA
# Total code points: 379
# Total code points: 380
# ================================================
@ -1514,10 +1547,14 @@ AB70..ABBF ; Cherokee # L& [80] CHEROKEE SMALL LETTER A..CHEROKEE SMALL LETT
3300..3357 ; Katakana # So [88] SQUARE APAATO..SQUARE WATTO
FF66..FF6F ; Katakana # Lo [10] HALFWIDTH KATAKANA LETTER WO..HALFWIDTH KATAKANA LETTER SMALL TU
FF71..FF9D ; Katakana # Lo [45] HALFWIDTH KATAKANA LETTER A..HALFWIDTH KATAKANA LETTER N
1AFF0..1AFF3 ; Katakana # Lm [4] KATAKANA LETTER MINNAN TONE-2..KATAKANA LETTER MINNAN TONE-5
1AFF5..1AFFB ; Katakana # Lm [7] KATAKANA LETTER MINNAN TONE-7..KATAKANA LETTER MINNAN NASALIZED TONE-5
1AFFD..1AFFE ; Katakana # Lm [2] KATAKANA LETTER MINNAN NASALIZED TONE-7..KATAKANA LETTER MINNAN NASALIZED TONE-8
1B000 ; Katakana # Lo KATAKANA LETTER ARCHAIC E
1B120..1B122 ; Katakana # Lo [3] KATAKANA LETTER ARCHAIC YI..KATAKANA LETTER ARCHAIC WU
1B164..1B167 ; Katakana # Lo [4] KATAKANA LETTER SMALL WI..KATAKANA LETTER SMALL N
# Total code points: 304
# Total code points: 320
# ================================================
@ -1538,19 +1575,21 @@ FF71..FF9D ; Katakana # Lo [45] HALFWIDTH KATAKANA LETTER A..HALFWIDTH KATAK
3038..303A ; Han # Nl [3] HANGZHOU NUMERAL TEN..HANGZHOU NUMERAL THIRTY
303B ; Han # Lm VERTICAL IDEOGRAPHIC ITERATION MARK
3400..4DBF ; Han # Lo [6592] CJK UNIFIED IDEOGRAPH-3400..CJK UNIFIED IDEOGRAPH-4DBF
4E00..9FFC ; Han # Lo [20989] CJK UNIFIED IDEOGRAPH-4E00..CJK UNIFIED IDEOGRAPH-9FFC
4E00..9FFF ; Han # Lo [20992] CJK UNIFIED IDEOGRAPH-4E00..CJK UNIFIED IDEOGRAPH-9FFF
F900..FA6D ; Han # Lo [366] CJK COMPATIBILITY IDEOGRAPH-F900..CJK COMPATIBILITY IDEOGRAPH-FA6D
FA70..FAD9 ; Han # Lo [106] CJK COMPATIBILITY IDEOGRAPH-FA70..CJK COMPATIBILITY IDEOGRAPH-FAD9
16FE2 ; Han # Po OLD CHINESE HOOK MARK
16FE3 ; Han # Lm OLD CHINESE ITERATION MARK
16FF0..16FF1 ; Han # Mc [2] VIETNAMESE ALTERNATE READING MARK CA..VIETNAMESE ALTERNATE READING MARK NHAY
20000..2A6DD ; Han # Lo [42718] CJK UNIFIED IDEOGRAPH-20000..CJK UNIFIED IDEOGRAPH-2A6DD
2A700..2B734 ; Han # Lo [4149] CJK UNIFIED IDEOGRAPH-2A700..CJK UNIFIED IDEOGRAPH-2B734
20000..2A6DF ; Han # Lo [42720] CJK UNIFIED IDEOGRAPH-20000..CJK UNIFIED IDEOGRAPH-2A6DF
2A700..2B738 ; Han # Lo [4153] CJK UNIFIED IDEOGRAPH-2A700..CJK UNIFIED IDEOGRAPH-2B738
2B740..2B81D ; Han # Lo [222] CJK UNIFIED IDEOGRAPH-2B740..CJK UNIFIED IDEOGRAPH-2B81D
2B820..2CEA1 ; Han # Lo [5762] CJK UNIFIED IDEOGRAPH-2B820..CJK UNIFIED IDEOGRAPH-2CEA1
2CEB0..2EBE0 ; Han # Lo [7473] CJK UNIFIED IDEOGRAPH-2CEB0..CJK UNIFIED IDEOGRAPH-2EBE0
2F800..2FA1D ; Han # Lo [542] CJK COMPATIBILITY IDEOGRAPH-2F800..CJK COMPATIBILITY IDEOGRAPH-2FA1D
30000..3134A ; Han # Lo [4939] CJK UNIFIED IDEOGRAPH-30000..CJK UNIFIED IDEOGRAPH-3134A
# Total code points: 94204
# Total code points: 94215
# ================================================
@ -1593,15 +1632,14 @@ A490..A4C6 ; Yi # So [55] YI RADICAL QOT..YI RADICAL KE
0951..0954 ; Inherited # Mn [4] DEVANAGARI STRESS SIGN UDATTA..DEVANAGARI ACUTE ACCENT
1AB0..1ABD ; Inherited # Mn [14] COMBINING DOUBLED CIRCUMFLEX ACCENT..COMBINING PARENTHESES BELOW
1ABE ; Inherited # Me COMBINING PARENTHESES OVERLAY
1ABF..1AC0 ; Inherited # Mn [2] COMBINING LATIN SMALL LETTER W BELOW..COMBINING LATIN SMALL LETTER TURNED W BELOW
1ABF..1ACE ; Inherited # Mn [16] COMBINING LATIN SMALL LETTER W BELOW..COMBINING LATIN SMALL LETTER INSULAR T
1CD0..1CD2 ; Inherited # Mn [3] VEDIC TONE KARSHANA..VEDIC TONE PRENKHA
1CD4..1CE0 ; Inherited # Mn [13] VEDIC SIGN YAJURVEDIC MIDLINE SVARITA..VEDIC TONE RIGVEDIC KASHMIRI INDEPENDENT SVARITA
1CE2..1CE8 ; Inherited # Mn [7] VEDIC SIGN VISARGA SVARITA..VEDIC SIGN VISARGA ANUDATTA WITH TAIL
1CED ; Inherited # Mn VEDIC SIGN TIRYAK
1CF4 ; Inherited # Mn VEDIC TONE CANDRA ABOVE
1CF8..1CF9 ; Inherited # Mn [2] VEDIC TONE RING ABOVE..VEDIC TONE DOUBLE RING ABOVE
1DC0..1DF9 ; Inherited # Mn [58] COMBINING DOTTED GRAVE ACCENT..COMBINING WIDE INVERTED BRIDGE BELOW
1DFB..1DFF ; Inherited # Mn [5] COMBINING DELETION MARK..COMBINING RIGHT ARROWHEAD AND DOWN ARROWHEAD BELOW
1DC0..1DFF ; Inherited # Mn [64] COMBINING DOTTED GRAVE ACCENT..COMBINING RIGHT ARROWHEAD AND DOWN ARROWHEAD BELOW
200C..200D ; Inherited # Cf [2] ZERO WIDTH NON-JOINER..ZERO WIDTH JOINER
20D0..20DC ; Inherited # Mn [13] COMBINING LEFT HARPOON ABOVE..COMBINING FOUR DOTS ABOVE
20DD..20E0 ; Inherited # Me [4] COMBINING ENCLOSING CIRCLE..COMBINING ENCLOSING CIRCLE BACKSLASH
@ -1615,26 +1653,30 @@ FE20..FE2D ; Inherited # Mn [14] COMBINING LIGATURE LEFT HALF..COMBINING CON
101FD ; Inherited # Mn PHAISTOS DISC SIGN COMBINING OBLIQUE STROKE
102E0 ; Inherited # Mn COPTIC EPACT THOUSANDS MARK
1133B ; Inherited # Mn COMBINING BINDU BELOW
1CF00..1CF2D ; Inherited # Mn [46] ZNAMENNY COMBINING MARK GORAZDO NIZKO S KRYZHEM ON LEFT..ZNAMENNY COMBINING MARK KRYZH ON LEFT
1CF30..1CF46 ; Inherited # Mn [23] ZNAMENNY COMBINING TONAL RANGE MARK MRACHNO..ZNAMENNY PRIZNAK MODIFIER ROG
1D167..1D169 ; Inherited # Mn [3] MUSICAL SYMBOL COMBINING TREMOLO-1..MUSICAL SYMBOL COMBINING TREMOLO-3
1D17B..1D182 ; Inherited # Mn [8] MUSICAL SYMBOL COMBINING ACCENT..MUSICAL SYMBOL COMBINING LOURE
1D185..1D18B ; Inherited # Mn [7] MUSICAL SYMBOL COMBINING DOIT..MUSICAL SYMBOL COMBINING TRIPLE TONGUE
1D1AA..1D1AD ; Inherited # Mn [4] MUSICAL SYMBOL COMBINING DOWN BOW..MUSICAL SYMBOL COMBINING SNAP PIZZICATO
E0100..E01EF ; Inherited # Mn [240] VARIATION SELECTOR-17..VARIATION SELECTOR-256
# Total code points: 573
# Total code points: 657
# ================================================
1700..170C ; Tagalog # Lo [13] TAGALOG LETTER A..TAGALOG LETTER YA
170E..1711 ; Tagalog # Lo [4] TAGALOG LETTER LA..TAGALOG LETTER HA
1700..1711 ; Tagalog # Lo [18] TAGALOG LETTER A..TAGALOG LETTER HA
1712..1714 ; Tagalog # Mn [3] TAGALOG VOWEL SIGN I..TAGALOG SIGN VIRAMA
1715 ; Tagalog # Mc TAGALOG SIGN PAMUDPOD
171F ; Tagalog # Lo TAGALOG LETTER ARCHAIC RA
# Total code points: 20
# Total code points: 23
# ================================================
1720..1731 ; Hanunoo # Lo [18] HANUNOO LETTER A..HANUNOO LETTER HA
1732..1734 ; Hanunoo # Mn [3] HANUNOO VOWEL SIGN I..HANUNOO SIGN PAMUDPOD
1732..1733 ; Hanunoo # Mn [2] HANUNOO VOWEL SIGN I..HANUNOO VOWEL SIGN U
1734 ; Hanunoo # Mc HANUNOO SIGN PAMUDPOD
# Total code points: 21
@ -1762,15 +1804,14 @@ E0100..E01EF ; Inherited # Mn [240] VARIATION SELECTOR-17..VARIATION SELECTOR-2
# ================================================
2C00..2C2E ; Glagolitic # L& [47] GLAGOLITIC CAPITAL LETTER AZU..GLAGOLITIC CAPITAL LETTER LATINATE MYSLITE
2C30..2C5E ; Glagolitic # L& [47] GLAGOLITIC SMALL LETTER AZU..GLAGOLITIC SMALL LETTER LATINATE MYSLITE
2C00..2C5F ; Glagolitic # L& [96] GLAGOLITIC CAPITAL LETTER AZU..GLAGOLITIC SMALL LETTER CAUDATE CHRIVI
1E000..1E006 ; Glagolitic # Mn [7] COMBINING GLAGOLITIC LETTER AZU..COMBINING GLAGOLITIC LETTER ZHIVETE
1E008..1E018 ; Glagolitic # Mn [17] COMBINING GLAGOLITIC LETTER ZEMLJA..COMBINING GLAGOLITIC LETTER HERU
1E01B..1E021 ; Glagolitic # Mn [7] COMBINING GLAGOLITIC LETTER SHTA..COMBINING GLAGOLITIC LETTER YATI
1E023..1E024 ; Glagolitic # Mn [2] COMBINING GLAGOLITIC LETTER YU..COMBINING GLAGOLITIC LETTER SMALL YUS
1E026..1E02A ; Glagolitic # Mn [5] COMBINING GLAGOLITIC LETTER YO..COMBINING GLAGOLITIC LETTER FITA
# Total code points: 132
# Total code points: 134
# ================================================
@ -1836,14 +1877,15 @@ A82C ; Syloti_Nagri # Mn SYLOTI NAGRI SIGN ALTERNATE HASANTA
1B3D..1B41 ; Balinese # Mc [5] BALINESE VOWEL SIGN LA LENGA TEDUNG..BALINESE VOWEL SIGN TALING REPA TEDUNG
1B42 ; Balinese # Mn BALINESE VOWEL SIGN PEPET
1B43..1B44 ; Balinese # Mc [2] BALINESE VOWEL SIGN PEPET TEDUNG..BALINESE ADEG ADEG
1B45..1B4B ; Balinese # Lo [7] BALINESE LETTER KAF SASAK..BALINESE LETTER ASYURA SASAK
1B45..1B4C ; Balinese # Lo [8] BALINESE LETTER KAF SASAK..BALINESE LETTER ARCHAIC JNYA
1B50..1B59 ; Balinese # Nd [10] BALINESE DIGIT ZERO..BALINESE DIGIT NINE
1B5A..1B60 ; Balinese # Po [7] BALINESE PANTI..BALINESE PAMENENG
1B61..1B6A ; Balinese # So [10] BALINESE MUSICAL SYMBOL DONG..BALINESE MUSICAL SYMBOL DANG GEDE
1B6B..1B73 ; Balinese # Mn [9] BALINESE MUSICAL SYMBOL COMBINING TEGEH..BALINESE MUSICAL SYMBOL COMBINING GONG
1B74..1B7C ; Balinese # So [9] BALINESE MUSICAL SYMBOL RIGHT-HAND OPEN DUG..BALINESE MUSICAL SYMBOL LEFT-HAND OPEN PING
1B7D..1B7E ; Balinese # Po [2] BALINESE PANTI LANTANG..BALINESE PAMADA LANTANG
# Total code points: 121
# Total code points: 124
# ================================================
@ -2178,9 +2220,10 @@ ABF0..ABF9 ; Meetei_Mayek # Nd [10] MEETEI MAYEK DIGIT ZERO..MEETEI MAYEK DI
110BB..110BC ; Kaithi # Po [2] KAITHI ABBREVIATION SIGN..KAITHI ENUMERATION SIGN
110BD ; Kaithi # Cf KAITHI NUMBER SIGN
110BE..110C1 ; Kaithi # Po [4] KAITHI SECTION MARK..KAITHI DOUBLE DANDA
110C2 ; Kaithi # Mn KAITHI VOWEL SIGN VOCALIC R
110CD ; Kaithi # Cf KAITHI NUMBER SIGN ABOVE
# Total code points: 67
# Total code points: 68
# ================================================
@ -2207,9 +2250,13 @@ ABF0..ABF9 ; Meetei_Mayek # Nd [10] MEETEI MAYEK DIGIT ZERO..MEETEI MAYEK DI
11047..1104D ; Brahmi # Po [7] BRAHMI DANDA..BRAHMI PUNCTUATION LOTUS
11052..11065 ; Brahmi # No [20] BRAHMI NUMBER ONE..BRAHMI NUMBER ONE THOUSAND
11066..1106F ; Brahmi # Nd [10] BRAHMI DIGIT ZERO..BRAHMI DIGIT NINE
11070 ; Brahmi # Mn BRAHMI SIGN OLD TAMIL VIRAMA
11071..11072 ; Brahmi # Lo [2] BRAHMI LETTER OLD TAMIL SHORT E..BRAHMI LETTER OLD TAMIL SHORT O
11073..11074 ; Brahmi # Mn [2] BRAHMI VOWEL SIGN OLD TAMIL SHORT E..BRAHMI VOWEL SIGN OLD TAMIL SHORT O
11075 ; Brahmi # Lo BRAHMI LETTER OLD TAMIL LLA
1107F ; Brahmi # Mn BRAHMI NUMBER JOINER
# Total code points: 109
# Total code points: 115
# ================================================
@ -2301,9 +2348,10 @@ ABF0..ABF9 ; Meetei_Mayek # Nd [10] MEETEI MAYEK DIGIT ZERO..MEETEI MAYEK DI
116B6 ; Takri # Mc TAKRI SIGN VIRAMA
116B7 ; Takri # Mn TAKRI SIGN NUKTA
116B8 ; Takri # Lo TAKRI LETTER ARCHAIC KHA
116B9 ; Takri # Po TAKRI ABBREVIATION SIGN
116C0..116C9 ; Takri # Nd [10] TAKRI DIGIT ZERO..TAKRI DIGIT NINE
# Total code points: 67
# Total code points: 68
# ================================================
@ -2561,8 +2609,9 @@ ABF0..ABF9 ; Meetei_Mayek # Nd [10] MEETEI MAYEK DIGIT ZERO..MEETEI MAYEK DI
1173A..1173B ; Ahom # No [2] AHOM NUMBER TEN..AHOM NUMBER TWENTY
1173C..1173E ; Ahom # Po [3] AHOM SIGN SMALL SECTION..AHOM SIGN RULAI
1173F ; Ahom # So AHOM SYMBOL VI
11740..11746 ; Ahom # Lo [7] AHOM LETTER CA..AHOM LETTER LLA
# Total code points: 58
# Total code points: 65
# ================================================
@ -2897,4 +2946,46 @@ ABF0..ABF9 ; Meetei_Mayek # Nd [10] MEETEI MAYEK DIGIT ZERO..MEETEI MAYEK DI
# Total code points: 47
# ================================================
12F90..12FF0 ; Cypro_Minoan # Lo [97] CYPRO-MINOAN SIGN CM001..CYPRO-MINOAN SIGN CM114
12FF1..12FF2 ; Cypro_Minoan # Po [2] CYPRO-MINOAN SIGN CM301..CYPRO-MINOAN SIGN CM302
# Total code points: 99
# ================================================
10F70..10F81 ; Old_Uyghur # Lo [18] OLD UYGHUR LETTER ALEPH..OLD UYGHUR LETTER LESH
10F82..10F85 ; Old_Uyghur # Mn [4] OLD UYGHUR COMBINING DOT ABOVE..OLD UYGHUR COMBINING TWO DOTS BELOW
10F86..10F89 ; Old_Uyghur # Po [4] OLD UYGHUR PUNCTUATION BAR..OLD UYGHUR PUNCTUATION FOUR DOTS
# Total code points: 26
# ================================================
16A70..16ABE ; Tangsa # Lo [79] TANGSA LETTER OZ..TANGSA LETTER ZA
16AC0..16AC9 ; Tangsa # Nd [10] TANGSA DIGIT ZERO..TANGSA DIGIT NINE
# Total code points: 89
# ================================================
1E290..1E2AD ; Toto # Lo [30] TOTO LETTER PA..TOTO LETTER A
1E2AE ; Toto # Mn TOTO SIGN RISING TONE
# Total code points: 31
# ================================================
10570..1057A ; Vithkuqi # L& [11] VITHKUQI CAPITAL LETTER A..VITHKUQI CAPITAL LETTER GA
1057C..1058A ; Vithkuqi # L& [15] VITHKUQI CAPITAL LETTER HA..VITHKUQI CAPITAL LETTER RE
1058C..10592 ; Vithkuqi # L& [7] VITHKUQI CAPITAL LETTER SE..VITHKUQI CAPITAL LETTER XE
10594..10595 ; Vithkuqi # L& [2] VITHKUQI CAPITAL LETTER Y..VITHKUQI CAPITAL LETTER ZE
10597..105A1 ; Vithkuqi # L& [11] VITHKUQI SMALL LETTER A..VITHKUQI SMALL LETTER GA
105A3..105B1 ; Vithkuqi # L& [15] VITHKUQI SMALL LETTER HA..VITHKUQI SMALL LETTER RE
105B3..105B9 ; Vithkuqi # L& [7] VITHKUQI SMALL LETTER SE..VITHKUQI SMALL LETTER XE
105BB..105BC ; Vithkuqi # L& [2] VITHKUQI SMALL LETTER Y..VITHKUQI SMALL LETTER ZE
# Total code points: 70
# EOF

View File

@ -1,6 +1,6 @@
# SpecialCasing-13.0.0.txt
# Date: 2019-09-08, 23:31:24 GMT
# Copyright (c) 2019 Unicode, Inc.
# SpecialCasing-14.0.0.txt
# Date: 2021-03-08, 19:35:55 GMT
# Copyright (c) 2021 Unicode, Inc.
# Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries.
# For terms of use, see http://www.unicode.org/terms_of_use.html
#

File diff suppressed because it is too large Load Diff

View File

@ -1 +0,0 @@
13.0.0

View File

@ -1,6 +1,6 @@
# GraphemeBreakProperty-13.0.0.txt
# Date: 2019-10-21, 14:30:35 GMT
# Copyright (c) 2019 Unicode, Inc.
# GraphemeBreakProperty-14.0.0.txt
# Date: 2021-08-12, 23:13:02 GMT
# Copyright (c) 2021 Unicode, Inc.
# Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries.
# For terms of use, see http://www.unicode.org/terms_of_use.html
#
@ -21,6 +21,7 @@
0600..0605 ; Prepend # Cf [6] ARABIC NUMBER SIGN..ARABIC NUMBER MARK ABOVE
06DD ; Prepend # Cf ARABIC END OF AYAH
070F ; Prepend # Cf SYRIAC ABBREVIATION MARK
0890..0891 ; Prepend # Cf [2] ARABIC POUND MARK ABOVE..ARABIC PIASTRE MARK ABOVE
08E2 ; Prepend # Cf ARABIC DISPUTED END OF AYAH
0D4E ; Prepend # Lo MALAYALAM LETTER DOT REPH
110BD ; Prepend # Cf KAITHI NUMBER SIGN
@ -32,7 +33,7 @@
11A84..11A89 ; Prepend # Lo [6] SOYOMBO SIGN JIHVAMULIYA..SOYOMBO CLUSTER-INITIAL LETTER SA
11D46 ; Prepend # Lo MASARAM GONDI REPHA
# Total code points: 24
# Total code points: 26
# ================================================
@ -104,7 +105,8 @@ E01F0..E0FFF ; Control # Cn [3600] <reserved-E01F0>..<reserved-E0FFF>
0825..0827 ; Extend # Mn [3] SAMARITAN VOWEL SIGN SHORT A..SAMARITAN VOWEL SIGN U
0829..082D ; Extend # Mn [5] SAMARITAN VOWEL SIGN LONG I..SAMARITAN MARK NEQUDAA
0859..085B ; Extend # Mn [3] MANDAIC AFFRICATION MARK..MANDAIC GEMINATION MARK
08D3..08E1 ; Extend # Mn [15] ARABIC SMALL LOW WAW..ARABIC SMALL HIGH SIGN SAFHA
0898..089F ; Extend # Mn [8] ARABIC SMALL HIGH WORD AL-JUZ..ARABIC HALF MADDA OVER MADDA
08CA..08E1 ; Extend # Mn [24] ARABIC SMALL HIGH FARSI YEH..ARABIC SMALL HIGH SIGN SAFHA
08E3..0902 ; Extend # Mn [32] ARABIC TURNED DAMMA BELOW..DEVANAGARI SIGN ANUSVARA
093A ; Extend # Mn DEVANAGARI VOWEL SIGN OE
093C ; Extend # Mn DEVANAGARI SIGN NUKTA
@ -151,6 +153,7 @@ E01F0..E0FFF ; Control # Cn [3600] <reserved-E01F0>..<reserved-E0FFF>
0BD7 ; Extend # Mc TAMIL AU LENGTH MARK
0C00 ; Extend # Mn TELUGU SIGN COMBINING CANDRABINDU ABOVE
0C04 ; Extend # Mn TELUGU SIGN COMBINING ANUSVARA ABOVE
0C3C ; Extend # Mn TELUGU SIGN NUKTA
0C3E..0C40 ; Extend # Mn [3] TELUGU VOWEL SIGN AA..TELUGU VOWEL SIGN II
0C46..0C48 ; Extend # Mn [3] TELUGU VOWEL SIGN E..TELUGU VOWEL SIGN AI
0C4A..0C4D ; Extend # Mn [4] TELUGU VOWEL SIGN O..TELUGU SIGN VIRAMA
@ -206,7 +209,7 @@ E01F0..E0FFF ; Control # Cn [3600] <reserved-E01F0>..<reserved-E0FFF>
109D ; Extend # Mn MYANMAR VOWEL SIGN AITON AI
135D..135F ; Extend # Mn [3] ETHIOPIC COMBINING GEMINATION AND VOWEL LENGTH MARK..ETHIOPIC COMBINING GEMINATION MARK
1712..1714 ; Extend # Mn [3] TAGALOG VOWEL SIGN I..TAGALOG SIGN VIRAMA
1732..1734 ; Extend # Mn [3] HANUNOO VOWEL SIGN I..HANUNOO SIGN PAMUDPOD
1732..1733 ; Extend # Mn [2] HANUNOO VOWEL SIGN I..HANUNOO VOWEL SIGN U
1752..1753 ; Extend # Mn [2] BUHID VOWEL SIGN I..BUHID VOWEL SIGN U
1772..1773 ; Extend # Mn [2] TAGBANWA VOWEL SIGN I..TAGBANWA VOWEL SIGN U
17B4..17B5 ; Extend # Mn [2] KHMER VOWEL INHERENT AQ..KHMER VOWEL INHERENT AA
@ -215,6 +218,7 @@ E01F0..E0FFF ; Control # Cn [3600] <reserved-E01F0>..<reserved-E0FFF>
17C9..17D3 ; Extend # Mn [11] KHMER SIGN MUUSIKATOAN..KHMER SIGN BATHAMASAT
17DD ; Extend # Mn KHMER SIGN ATTHACAN
180B..180D ; Extend # Mn [3] MONGOLIAN FREE VARIATION SELECTOR ONE..MONGOLIAN FREE VARIATION SELECTOR THREE
180F ; Extend # Mn MONGOLIAN FREE VARIATION SELECTOR FOUR
1885..1886 ; Extend # Mn [2] MONGOLIAN LETTER ALI GALI BALUDA..MONGOLIAN LETTER ALI GALI THREE BALUDA
18A9 ; Extend # Mn MONGOLIAN LETTER ALI GALI DAGALGA
1920..1922 ; Extend # Mn [3] LIMBU VOWEL SIGN A..LIMBU VOWEL SIGN U
@ -232,7 +236,7 @@ E01F0..E0FFF ; Control # Cn [3600] <reserved-E01F0>..<reserved-E0FFF>
1A7F ; Extend # Mn TAI THAM COMBINING CRYPTOGRAMMIC DOT
1AB0..1ABD ; Extend # Mn [14] COMBINING DOUBLED CIRCUMFLEX ACCENT..COMBINING PARENTHESES BELOW
1ABE ; Extend # Me COMBINING PARENTHESES OVERLAY
1ABF..1AC0 ; Extend # Mn [2] COMBINING LATIN SMALL LETTER W BELOW..COMBINING LATIN SMALL LETTER TURNED W BELOW
1ABF..1ACE ; Extend # Mn [16] COMBINING LATIN SMALL LETTER W BELOW..COMBINING LATIN SMALL LETTER INSULAR T
1B00..1B03 ; Extend # Mn [4] BALINESE SIGN ULU RICEM..BALINESE SIGN SURANG
1B34 ; Extend # Mn BALINESE SIGN REREKAN
1B35 ; Extend # Mc BALINESE VOWEL SIGN TEDUNG
@ -256,8 +260,7 @@ E01F0..E0FFF ; Control # Cn [3600] <reserved-E01F0>..<reserved-E0FFF>
1CED ; Extend # Mn VEDIC SIGN TIRYAK
1CF4 ; Extend # Mn VEDIC TONE CANDRA ABOVE
1CF8..1CF9 ; Extend # Mn [2] VEDIC TONE RING ABOVE..VEDIC TONE DOUBLE RING ABOVE
1DC0..1DF9 ; Extend # Mn [58] COMBINING DOTTED GRAVE ACCENT..COMBINING WIDE INVERTED BRIDGE BELOW
1DFB..1DFF ; Extend # Mn [5] COMBINING DELETION MARK..COMBINING RIGHT ARROWHEAD AND DOWN ARROWHEAD BELOW
1DC0..1DFF ; Extend # Mn [64] COMBINING DOTTED GRAVE ACCENT..COMBINING RIGHT ARROWHEAD AND DOWN ARROWHEAD BELOW
200C ; Extend # Cf ZERO WIDTH NON-JOINER
20D0..20DC ; Extend # Mn [13] COMBINING LEFT HARPOON ABOVE..COMBINING FOUR DOTS ABOVE
20DD..20E0 ; Extend # Me [4] COMBINING ENCLOSING CIRCLE..COMBINING ENCLOSING CIRCLE BACKSLASH
@ -322,11 +325,15 @@ FF9E..FF9F ; Extend # Lm [2] HALFWIDTH KATAKANA VOICED SOUND MARK..HALFWIDT
10D24..10D27 ; Extend # Mn [4] HANIFI ROHINGYA SIGN HARBAHAY..HANIFI ROHINGYA SIGN TASSI
10EAB..10EAC ; Extend # Mn [2] YEZIDI COMBINING HAMZA MARK..YEZIDI COMBINING MADDA MARK
10F46..10F50 ; Extend # Mn [11] SOGDIAN COMBINING DOT BELOW..SOGDIAN COMBINING STROKE BELOW
10F82..10F85 ; Extend # Mn [4] OLD UYGHUR COMBINING DOT ABOVE..OLD UYGHUR COMBINING TWO DOTS BELOW
11001 ; Extend # Mn BRAHMI SIGN ANUSVARA
11038..11046 ; Extend # Mn [15] BRAHMI VOWEL SIGN AA..BRAHMI VIRAMA
11070 ; Extend # Mn BRAHMI SIGN OLD TAMIL VIRAMA
11073..11074 ; Extend # Mn [2] BRAHMI VOWEL SIGN OLD TAMIL SHORT E..BRAHMI VOWEL SIGN OLD TAMIL SHORT O
1107F..11081 ; Extend # Mn [3] BRAHMI NUMBER JOINER..KAITHI SIGN ANUSVARA
110B3..110B6 ; Extend # Mn [4] KAITHI VOWEL SIGN U..KAITHI VOWEL SIGN AI
110B9..110BA ; Extend # Mn [2] KAITHI SIGN VIRAMA..KAITHI SIGN NUKTA
110C2 ; Extend # Mn KAITHI VOWEL SIGN VOCALIC R
11100..11102 ; Extend # Mn [3] CHAKMA SIGN CANDRABINDU..CHAKMA SIGN VISARGA
11127..1112B ; Extend # Mn [5] CHAKMA VOWEL SIGN A..CHAKMA VOWEL SIGN UU
1112D..11134 ; Extend # Mn [8] CHAKMA VOWEL SIGN AI..CHAKMA MAAYYAA
@ -412,6 +419,8 @@ FF9E..FF9F ; Extend # Lm [2] HALFWIDTH KATAKANA VOICED SOUND MARK..HALFWIDT
16F8F..16F92 ; Extend # Mn [4] MIAO TONE RIGHT..MIAO TONE BELOW
16FE4 ; Extend # Mn KHITAN SMALL SCRIPT FILLER
1BC9D..1BC9E ; Extend # Mn [2] DUPLOYAN THICK LETTER SELECTOR..DUPLOYAN DOUBLE MARK
1CF00..1CF2D ; Extend # Mn [46] ZNAMENNY COMBINING MARK GORAZDO NIZKO S KRYZHEM ON LEFT..ZNAMENNY COMBINING MARK KRYZH ON LEFT
1CF30..1CF46 ; Extend # Mn [23] ZNAMENNY COMBINING TONAL RANGE MARK MRACHNO..ZNAMENNY PRIZNAK MODIFIER ROG
1D165 ; Extend # Mc MUSICAL SYMBOL COMBINING STEM
1D167..1D169 ; Extend # Mn [3] MUSICAL SYMBOL COMBINING TREMOLO-1..MUSICAL SYMBOL COMBINING TREMOLO-3
1D16E..1D172 ; Extend # Mc [5] MUSICAL SYMBOL COMBINING FLAG-1..MUSICAL SYMBOL COMBINING FLAG-5
@ -431,6 +440,7 @@ FF9E..FF9F ; Extend # Lm [2] HALFWIDTH KATAKANA VOICED SOUND MARK..HALFWIDT
1E023..1E024 ; Extend # Mn [2] COMBINING GLAGOLITIC LETTER YU..COMBINING GLAGOLITIC LETTER SMALL YUS
1E026..1E02A ; Extend # Mn [5] COMBINING GLAGOLITIC LETTER YO..COMBINING GLAGOLITIC LETTER FITA
1E130..1E136 ; Extend # Mn [7] NYIAKENG PUACHUE HMONG TONE-B..NYIAKENG PUACHUE HMONG TONE-D
1E2AE ; Extend # Mn TOTO SIGN RISING TONE
1E2EC..1E2EF ; Extend # Mn [4] WANCHO TONE TUP..WANCHO TONE KOINI
1E8D0..1E8D6 ; Extend # Mn [7] MENDE KIKAKUI COMBINING NUMBER TEENS..MENDE KIKAKUI COMBINING NUMBER MILLIONS
1E944..1E94A ; Extend # Mn [7] ADLAM ALIF LENGTHENER..ADLAM NUKTA
@ -438,7 +448,7 @@ FF9E..FF9F ; Extend # Lm [2] HALFWIDTH KATAKANA VOICED SOUND MARK..HALFWIDT
E0020..E007F ; Extend # Cf [96] TAG SPACE..CANCEL TAG
E0100..E01EF ; Extend # Mn [240] VARIATION SELECTOR-17..VARIATION SELECTOR-256
# Total code points: 1984
# Total code points: 2095
# ================================================
@ -495,6 +505,8 @@ E0100..E01EF ; Extend # Mn [240] VARIATION SELECTOR-17..VARIATION SELECTOR-256
103B..103C ; SpacingMark # Mc [2] MYANMAR CONSONANT SIGN MEDIAL YA..MYANMAR CONSONANT SIGN MEDIAL RA
1056..1057 ; SpacingMark # Mc [2] MYANMAR VOWEL SIGN VOCALIC R..MYANMAR VOWEL SIGN VOCALIC RR
1084 ; SpacingMark # Mc MYANMAR VOWEL SIGN SHAN E
1715 ; SpacingMark # Mc TAGALOG SIGN PAMUDPOD
1734 ; SpacingMark # Mc HANUNOO SIGN PAMUDPOD
17B6 ; SpacingMark # Mc KHMER VOWEL SIGN AA
17BE..17C5 ; SpacingMark # Mc [8] KHMER VOWEL SIGN OE..KHMER VOWEL SIGN AU
17C7..17C8 ; SpacingMark # Mc [2] KHMER SIGN REAHMUK..KHMER SIGN YUUKALEAPINTU
@ -579,7 +591,6 @@ ABEC ; SpacingMark # Mc MEETEI MAYEK LUM IYEK
116AC ; SpacingMark # Mc TAKRI SIGN VISARGA
116AE..116AF ; SpacingMark # Mc [2] TAKRI VOWEL SIGN I..TAKRI VOWEL SIGN II
116B6 ; SpacingMark # Mc TAKRI SIGN VIRAMA
11720..11721 ; SpacingMark # Mc [2] AHOM VOWEL SIGN A..AHOM VOWEL SIGN AA
11726 ; SpacingMark # Mc AHOM VOWEL SIGN E
1182C..1182E ; SpacingMark # Mc [3] DOGRA VOWEL SIGN AA..DOGRA VOWEL SIGN II
11838 ; SpacingMark # Mc DOGRA SIGN VISARGA

View File

@ -1,6 +1,6 @@
# GraphemeBreakTest-13.0.0.txt
# Date: 2019-11-15, 19:49:10 GMT
# Copyright (c) 2019 Unicode, Inc.
# GraphemeBreakTest-14.0.0.txt
# Date: 2021-03-08, 06:22:32 GMT
# Copyright (c) 2021 Unicode, Inc.
# Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries.
# For terms of use, see http://www.unicode.org/terms_of_use.html
#

View File

@ -1,11 +1,11 @@
# emoji-data.txt
# Date: 2020-01-28, 20:52:38 GMT
# Copyright (c) 2020 Unicode, Inc.
# emoji-data-14.0.0.txt
# Date: 2021-08-26, 17:22:22 GMT
# Copyright (c) 2021 Unicode, Inc.
# Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries.
# For terms of use, see http://www.unicode.org/terms_of_use.html
#
# Emoji Data for UTS #51
# Version: 13.0
# Used with Emoji Version 14.0 and subsequent minor revisions (if any)
#
# For documentation and usage, see http://www.unicode.org/reports/tr51
#
@ -22,7 +22,7 @@
# All omitted code points have Emoji=No
# @missing: 0000..10FFFF ; Emoji ; No
0023 ; Emoji # E0.0 [1] (#) number sign
0023 ; Emoji # E0.0 [1] (#) hash sign
002A ; Emoji # E0.0 [1] (*) asterisk
0030..0039 ; Emoji # E0.0 [10] (0..9) digit zero..digit nine
00A9 ; Emoji # E0.6 [1] (©️) copyright
@ -119,8 +119,8 @@
2747 ; Emoji # E0.6 [1] (❇️) sparkle
274C ; Emoji # E0.6 [1] (❌) cross mark
274E ; Emoji # E0.6 [1] (❎) cross mark button
2753..2755 ; Emoji # E0.6 [3] (❓..❕) question mark..white exclamation mark
2757 ; Emoji # E0.6 [1] (❗) exclamation mark
2753..2755 ; Emoji # E0.6 [3] (❓..❕) red question mark..white exclamation mark
2757 ; Emoji # E0.6 [1] (❗) red exclamation mark
2763 ; Emoji # E1.0 [1] (❣️) heart exclamation
2764 ; Emoji # E0.6 [1] (❤️) red heart
2795..2797 ; Emoji # E0.6 [3] (..➗) plus..divide
@ -239,7 +239,7 @@
1F509 ; Emoji # E1.0 [1] (🔉) speaker medium volume
1F50A..1F514 ; Emoji # E0.6 [11] (🔊..🔔) speaker high volume..bell
1F515 ; Emoji # E1.0 [1] (🔕) bell with slash
1F516..1F52B ; Emoji # E0.6 [22] (🔖..🔫) bookmark..pistol
1F516..1F52B ; Emoji # E0.6 [22] (🔖..🔫) bookmark..water pistol
1F52C..1F52D ; Emoji # E1.0 [2] (🔬..🔭) microscope..telescope
1F52E..1F53D ; Emoji # E0.6 [16] (🔮..🔽) crystal ball..downwards button
1F549..1F54A ; Emoji # E0.7 [2] (🕉️..🕊️) om..dove
@ -294,7 +294,7 @@
1F62E..1F62F ; Emoji # E1.0 [2] (😮..😯) face with open mouth..hushed face
1F630..1F633 ; Emoji # E0.6 [4] (😰..😳) anxious face with sweat..flushed face
1F634 ; Emoji # E1.0 [1] (😴) sleeping face
1F635 ; Emoji # E0.6 [1] (😵) dizzy face
1F635 ; Emoji # E0.6 [1] (😵) face with crossed-out eyes
1F636 ; Emoji # E1.0 [1] (😶) face without mouth
1F637..1F640 ; Emoji # E0.6 [10] (😷..🙀) face with medical mask..weary cat
1F641..1F644 ; Emoji # E1.0 [4] (🙁..🙄) slightly frowning face..face with rolling eyes
@ -341,6 +341,7 @@
1F6D1..1F6D2 ; Emoji # E3.0 [2] (🛑..🛒) stop sign..shopping cart
1F6D5 ; Emoji # E12.0 [1] (🛕) hindu temple
1F6D6..1F6D7 ; Emoji # E13.0 [2] (🛖..🛗) hut..elevator
1F6DD..1F6DF ; Emoji # E14.0 [3] (🛝..🛟) playground slide..ring buoy
1F6E0..1F6E5 ; Emoji # E0.7 [6] (🛠️..🛥️) hammer and wrench..motor boat
1F6E9 ; Emoji # E0.7 [1] (🛩️) small airplane
1F6EB..1F6EC ; Emoji # E1.0 [2] (🛫..🛬) airplane departure..airplane arrival
@ -352,6 +353,7 @@
1F6FA ; Emoji # E12.0 [1] (🛺) auto rickshaw
1F6FB..1F6FC ; Emoji # E13.0 [2] (🛻..🛼) pickup truck..roller skate
1F7E0..1F7EB ; Emoji # E12.0 [12] (🟠..🟫) orange circle..brown square
1F7F0 ; Emoji # E14.0 [1] (🟰) heavy equals sign
1F90C ; Emoji # E13.0 [1] (🤌) pinched fingers
1F90D..1F90F ; Emoji # E12.0 [3] (🤍..🤏) white heart..pinching hand
1F910..1F918 ; Emoji # E1.0 [9] (🤐..🤘) zipper-mouth face..sign of the horns
@ -375,6 +377,7 @@
1F972 ; Emoji # E13.0 [1] (🥲) smiling face with tear
1F973..1F976 ; Emoji # E11.0 [4] (🥳..🥶) partying face..cold face
1F977..1F978 ; Emoji # E13.0 [2] (🥷..🥸) ninja..disguised face
1F979 ; Emoji # E14.0 [1] (🥹) face holding back tears
1F97A ; Emoji # E11.0 [1] (🥺) pleading face
1F97B ; Emoji # E12.0 [1] (🥻) sari
1F97C..1F97F ; Emoji # E11.0 [4] (🥼..🥿) lab coat..flat shoe
@ -392,21 +395,29 @@
1F9C1..1F9C2 ; Emoji # E11.0 [2] (🧁..🧂) cupcake..salt
1F9C3..1F9CA ; Emoji # E12.0 [8] (🧃..🧊) beverage box..ice
1F9CB ; Emoji # E13.0 [1] (🧋) bubble tea
1F9CC ; Emoji # E14.0 [1] (🧌) troll
1F9CD..1F9CF ; Emoji # E12.0 [3] (🧍..🧏) person standing..deaf person
1F9D0..1F9E6 ; Emoji # E5.0 [23] (🧐..🧦) face with monocle..socks
1F9E7..1F9FF ; Emoji # E11.0 [25] (🧧..🧿) red envelope..nazar amulet
1FA70..1FA73 ; Emoji # E12.0 [4] (🩰..🩳) ballet shoes..shorts
1FA74 ; Emoji # E13.0 [1] (🩴) thong sandal
1FA78..1FA7A ; Emoji # E12.0 [3] (🩸..🩺) drop of blood..stethoscope
1FA7B..1FA7C ; Emoji # E14.0 [2] (🩻..🩼) x-ray..crutch
1FA80..1FA82 ; Emoji # E12.0 [3] (🪀..🪂) yo-yo..parachute
1FA83..1FA86 ; Emoji # E13.0 [4] (🪃..🪆) boomerang..nesting dolls
1FA90..1FA95 ; Emoji # E12.0 [6] (🪐..🪕) ringed planet..banjo
1FA96..1FAA8 ; Emoji # E13.0 [19] (🪖..🪨) military helmet..rock
1FAA9..1FAAC ; Emoji # E14.0 [4] (🪩..🪬) mirror ball..hamsa
1FAB0..1FAB6 ; Emoji # E13.0 [7] (🪰..🪶) fly..feather
1FAB7..1FABA ; Emoji # E14.0 [4] (🪷..🪺) lotus..nest with eggs
1FAC0..1FAC2 ; Emoji # E13.0 [3] (🫀..🫂) anatomical heart..people hugging
1FAC3..1FAC5 ; Emoji # E14.0 [3] (🫃..🫅) pregnant man..person with crown
1FAD0..1FAD6 ; Emoji # E13.0 [7] (🫐..🫖) blueberries..teapot
1FAD7..1FAD9 ; Emoji # E14.0 [3] (🫗..🫙) pouring liquid..jar
1FAE0..1FAE7 ; Emoji # E14.0 [8] (🫠..🫧) melting face..bubbles
1FAF0..1FAF6 ; Emoji # E14.0 [7] (🫰..🫶) hand with index finger and thumb crossed..heart hands
# Total elements: 1367
# Total elements: 1404
# ================================================
@ -438,8 +449,8 @@
2728 ; Emoji_Presentation # E0.6 [1] (✨) sparkles
274C ; Emoji_Presentation # E0.6 [1] (❌) cross mark
274E ; Emoji_Presentation # E0.6 [1] (❎) cross mark button
2753..2755 ; Emoji_Presentation # E0.6 [3] (❓..❕) question mark..white exclamation mark
2757 ; Emoji_Presentation # E0.6 [1] (❗) exclamation mark
2753..2755 ; Emoji_Presentation # E0.6 [3] (❓..❕) red question mark..white exclamation mark
2757 ; Emoji_Presentation # E0.6 [1] (❗) red exclamation mark
2795..2797 ; Emoji_Presentation # E0.6 [3] (..➗) plus..divide
27B0 ; Emoji_Presentation # E0.6 [1] (➰) curly loop
27BF ; Emoji_Presentation # E1.0 [1] (➿) double curly loop
@ -533,7 +544,7 @@
1F509 ; Emoji_Presentation # E1.0 [1] (🔉) speaker medium volume
1F50A..1F514 ; Emoji_Presentation # E0.6 [11] (🔊..🔔) speaker high volume..bell
1F515 ; Emoji_Presentation # E1.0 [1] (🔕) bell with slash
1F516..1F52B ; Emoji_Presentation # E0.6 [22] (🔖..🔫) bookmark..pistol
1F516..1F52B ; Emoji_Presentation # E0.6 [22] (🔖..🔫) bookmark..water pistol
1F52C..1F52D ; Emoji_Presentation # E1.0 [2] (🔬..🔭) microscope..telescope
1F52E..1F53D ; Emoji_Presentation # E0.6 [16] (🔮..🔽) crystal ball..downwards button
1F54B..1F54E ; Emoji_Presentation # E1.0 [4] (🕋..🕎) kaaba..menorah
@ -569,7 +580,7 @@
1F62E..1F62F ; Emoji_Presentation # E1.0 [2] (😮..😯) face with open mouth..hushed face
1F630..1F633 ; Emoji_Presentation # E0.6 [4] (😰..😳) anxious face with sweat..flushed face
1F634 ; Emoji_Presentation # E1.0 [1] (😴) sleeping face
1F635 ; Emoji_Presentation # E0.6 [1] (😵) dizzy face
1F635 ; Emoji_Presentation # E0.6 [1] (😵) face with crossed-out eyes
1F636 ; Emoji_Presentation # E1.0 [1] (😶) face without mouth
1F637..1F640 ; Emoji_Presentation # E0.6 [10] (😷..🙀) face with medical mask..weary cat
1F641..1F644 ; Emoji_Presentation # E1.0 [4] (🙁..🙄) slightly frowning face..face with rolling eyes
@ -614,6 +625,7 @@
1F6D1..1F6D2 ; Emoji_Presentation # E3.0 [2] (🛑..🛒) stop sign..shopping cart
1F6D5 ; Emoji_Presentation # E12.0 [1] (🛕) hindu temple
1F6D6..1F6D7 ; Emoji_Presentation # E13.0 [2] (🛖..🛗) hut..elevator
1F6DD..1F6DF ; Emoji_Presentation # E14.0 [3] (🛝..🛟) playground slide..ring buoy
1F6EB..1F6EC ; Emoji_Presentation # E1.0 [2] (🛫..🛬) airplane departure..airplane arrival
1F6F4..1F6F6 ; Emoji_Presentation # E3.0 [3] (🛴..🛶) kick scooter..canoe
1F6F7..1F6F8 ; Emoji_Presentation # E5.0 [2] (🛷..🛸) sled..flying saucer
@ -621,6 +633,7 @@
1F6FA ; Emoji_Presentation # E12.0 [1] (🛺) auto rickshaw
1F6FB..1F6FC ; Emoji_Presentation # E13.0 [2] (🛻..🛼) pickup truck..roller skate
1F7E0..1F7EB ; Emoji_Presentation # E12.0 [12] (🟠..🟫) orange circle..brown square
1F7F0 ; Emoji_Presentation # E14.0 [1] (🟰) heavy equals sign
1F90C ; Emoji_Presentation # E13.0 [1] (🤌) pinched fingers
1F90D..1F90F ; Emoji_Presentation # E12.0 [3] (🤍..🤏) white heart..pinching hand
1F910..1F918 ; Emoji_Presentation # E1.0 [9] (🤐..🤘) zipper-mouth face..sign of the horns
@ -644,6 +657,7 @@
1F972 ; Emoji_Presentation # E13.0 [1] (🥲) smiling face with tear
1F973..1F976 ; Emoji_Presentation # E11.0 [4] (🥳..🥶) partying face..cold face
1F977..1F978 ; Emoji_Presentation # E13.0 [2] (🥷..🥸) ninja..disguised face
1F979 ; Emoji_Presentation # E14.0 [1] (🥹) face holding back tears
1F97A ; Emoji_Presentation # E11.0 [1] (🥺) pleading face
1F97B ; Emoji_Presentation # E12.0 [1] (🥻) sari
1F97C..1F97F ; Emoji_Presentation # E11.0 [4] (🥼..🥿) lab coat..flat shoe
@ -661,21 +675,29 @@
1F9C1..1F9C2 ; Emoji_Presentation # E11.0 [2] (🧁..🧂) cupcake..salt
1F9C3..1F9CA ; Emoji_Presentation # E12.0 [8] (🧃..🧊) beverage box..ice
1F9CB ; Emoji_Presentation # E13.0 [1] (🧋) bubble tea
1F9CC ; Emoji_Presentation # E14.0 [1] (🧌) troll
1F9CD..1F9CF ; Emoji_Presentation # E12.0 [3] (🧍..🧏) person standing..deaf person
1F9D0..1F9E6 ; Emoji_Presentation # E5.0 [23] (🧐..🧦) face with monocle..socks
1F9E7..1F9FF ; Emoji_Presentation # E11.0 [25] (🧧..🧿) red envelope..nazar amulet
1FA70..1FA73 ; Emoji_Presentation # E12.0 [4] (🩰..🩳) ballet shoes..shorts
1FA74 ; Emoji_Presentation # E13.0 [1] (🩴) thong sandal
1FA78..1FA7A ; Emoji_Presentation # E12.0 [3] (🩸..🩺) drop of blood..stethoscope
1FA7B..1FA7C ; Emoji_Presentation # E14.0 [2] (🩻..🩼) x-ray..crutch
1FA80..1FA82 ; Emoji_Presentation # E12.0 [3] (🪀..🪂) yo-yo..parachute
1FA83..1FA86 ; Emoji_Presentation # E13.0 [4] (🪃..🪆) boomerang..nesting dolls
1FA90..1FA95 ; Emoji_Presentation # E12.0 [6] (🪐..🪕) ringed planet..banjo
1FA96..1FAA8 ; Emoji_Presentation # E13.0 [19] (🪖..🪨) military helmet..rock
1FAA9..1FAAC ; Emoji_Presentation # E14.0 [4] (🪩..🪬) mirror ball..hamsa
1FAB0..1FAB6 ; Emoji_Presentation # E13.0 [7] (🪰..🪶) fly..feather
1FAB7..1FABA ; Emoji_Presentation # E14.0 [4] (🪷..🪺) lotus..nest with eggs
1FAC0..1FAC2 ; Emoji_Presentation # E13.0 [3] (🫀..🫂) anatomical heart..people hugging
1FAC3..1FAC5 ; Emoji_Presentation # E14.0 [3] (🫃..🫅) pregnant man..person with crown
1FAD0..1FAD6 ; Emoji_Presentation # E13.0 [7] (🫐..🫖) blueberries..teapot
1FAD7..1FAD9 ; Emoji_Presentation # E14.0 [3] (🫗..🫙) pouring liquid..jar
1FAE0..1FAE7 ; Emoji_Presentation # E14.0 [8] (🫠..🫧) melting face..bubbles
1FAF0..1FAF6 ; Emoji_Presentation # E14.0 [7] (🫰..🫶) hand with index finger and thumb crossed..heart hands
# Total elements: 1148
# Total elements: 1185
# ================================================
@ -738,15 +760,17 @@
1F9BB ; Emoji_Modifier_Base # E12.0 [1] (🦻) ear with hearing aid
1F9CD..1F9CF ; Emoji_Modifier_Base # E12.0 [3] (🧍..🧏) person standing..deaf person
1F9D1..1F9DD ; Emoji_Modifier_Base # E5.0 [13] (🧑..🧝) person..elf
1FAC3..1FAC5 ; Emoji_Modifier_Base # E14.0 [3] (🫃..🫅) pregnant man..person with crown
1FAF0..1FAF6 ; Emoji_Modifier_Base # E14.0 [7] (🫰..🫶) hand with index finger and thumb crossed..heart hands
# Total elements: 122
# Total elements: 132
# ================================================
# All omitted code points have Emoji_Component=No
# @missing: 0000..10FFFF ; Emoji_Component ; No
0023 ; Emoji_Component # E0.0 [1] (#) number sign
0023 ; Emoji_Component # E0.0 [1] (#) hash sign
002A ; Emoji_Component # E0.0 [1] (*) asterisk
0030..0039 ; Emoji_Component # E0.0 [10] (0..9) digit zero..digit nine
200D ; Emoji_Component # E0.0 [1] () zero width joiner
@ -902,8 +926,8 @@ E0020..E007F ; Emoji_Component # E0.0 [96] (󠀠..󠁿) tag space..c
2747 ; Extended_Pictographic# E0.6 [1] (❇️) sparkle
274C ; Extended_Pictographic# E0.6 [1] (❌) cross mark
274E ; Extended_Pictographic# E0.6 [1] (❎) cross mark button
2753..2755 ; Extended_Pictographic# E0.6 [3] (❓..❕) question mark..white exclamation mark
2757 ; Extended_Pictographic# E0.6 [1] (❗) exclamation mark
2753..2755 ; Extended_Pictographic# E0.6 [3] (❓..❕) red question mark..white exclamation mark
2757 ; Extended_Pictographic# E0.6 [1] (❗) red exclamation mark
2763 ; Extended_Pictographic# E1.0 [1] (❣️) heart exclamation
2764 ; Extended_Pictographic# E0.6 [1] (❤️) red heart
2765..2767 ; Extended_Pictographic# E0.0 [3] (❥..❧) ROTATED HEAVY BLACK HEART BULLET..ROTATED FLORAL HEART BULLET
@ -1041,7 +1065,7 @@ E0020..E007F ; Emoji_Component # E0.0 [96] (󠀠..󠁿) tag space..c
1F509 ; Extended_Pictographic# E1.0 [1] (🔉) speaker medium volume
1F50A..1F514 ; Extended_Pictographic# E0.6 [11] (🔊..🔔) speaker high volume..bell
1F515 ; Extended_Pictographic# E1.0 [1] (🔕) bell with slash
1F516..1F52B ; Extended_Pictographic# E0.6 [22] (🔖..🔫) bookmark..pistol
1F516..1F52B ; Extended_Pictographic# E0.6 [22] (🔖..🔫) bookmark..water pistol
1F52C..1F52D ; Extended_Pictographic# E1.0 [2] (🔬..🔭) microscope..telescope
1F52E..1F53D ; Extended_Pictographic# E0.6 [16] (🔮..🔽) crystal ball..downwards button
1F546..1F548 ; Extended_Pictographic# E0.0 [3] (🕆..🕈) WHITE LATIN CROSS..CELTIC CROSS
@ -1117,7 +1141,7 @@ E0020..E007F ; Emoji_Component # E0.0 [96] (󠀠..󠁿) tag space..c
1F62E..1F62F ; Extended_Pictographic# E1.0 [2] (😮..😯) face with open mouth..hushed face
1F630..1F633 ; Extended_Pictographic# E0.6 [4] (😰..😳) anxious face with sweat..flushed face
1F634 ; Extended_Pictographic# E1.0 [1] (😴) sleeping face
1F635 ; Extended_Pictographic# E0.6 [1] (😵) dizzy face
1F635 ; Extended_Pictographic# E0.6 [1] (😵) face with crossed-out eyes
1F636 ; Extended_Pictographic# E1.0 [1] (😶) face without mouth
1F637..1F640 ; Extended_Pictographic# E0.6 [10] (😷..🙀) face with medical mask..weary cat
1F641..1F644 ; Extended_Pictographic# E1.0 [4] (🙁..🙄) slightly frowning face..face with rolling eyes
@ -1166,7 +1190,8 @@ E0020..E007F ; Emoji_Component # E0.0 [96] (󠀠..󠁿) tag space..c
1F6D3..1F6D4 ; Extended_Pictographic# E0.0 [2] (🛓..🛔) STUPA..PAGODA
1F6D5 ; Extended_Pictographic# E12.0 [1] (🛕) hindu temple
1F6D6..1F6D7 ; Extended_Pictographic# E13.0 [2] (🛖..🛗) hut..elevator
1F6D8..1F6DF ; Extended_Pictographic# E0.0 [8] (🛘..🛟) <reserved-1F6D8>..<reserved-1F6DF>
1F6D8..1F6DC ; Extended_Pictographic# E0.0 [5] (🛘..🛜) <reserved-1F6D8>..<reserved-1F6DC>
1F6DD..1F6DF ; Extended_Pictographic# E14.0 [3] (🛝..🛟) playground slide..ring buoy
1F6E0..1F6E5 ; Extended_Pictographic# E0.7 [6] (🛠️..🛥️) hammer and wrench..motor boat
1F6E6..1F6E8 ; Extended_Pictographic# E0.0 [3] (🛦..🛨) UP-POINTING MILITARY AIRPLANE..UP-POINTING SMALL AIRPLANE
1F6E9 ; Extended_Pictographic# E0.7 [1] (🛩️) small airplane
@ -1185,7 +1210,9 @@ E0020..E007F ; Emoji_Component # E0.0 [96] (󠀠..󠁿) tag space..c
1F774..1F77F ; Extended_Pictographic# E0.0 [12] (🝴..🝿) <reserved-1F774>..<reserved-1F77F>
1F7D5..1F7DF ; Extended_Pictographic# E0.0 [11] (🟕..🟟) CIRCLED TRIANGLE..<reserved-1F7DF>
1F7E0..1F7EB ; Extended_Pictographic# E12.0 [12] (🟠..🟫) orange circle..brown square
1F7EC..1F7FF ; Extended_Pictographic# E0.0 [20] (🟬..🟿) <reserved-1F7EC>..<reserved-1F7FF>
1F7EC..1F7EF ; Extended_Pictographic# E0.0 [4] (🟬..🟯) <reserved-1F7EC>..<reserved-1F7EF>
1F7F0 ; Extended_Pictographic# E14.0 [1] (🟰) heavy equals sign
1F7F1..1F7FF ; Extended_Pictographic# E0.0 [15] (🟱..🟿) <reserved-1F7F1>..<reserved-1F7FF>
1F80C..1F80F ; Extended_Pictographic# E0.0 [4] (🠌..🠏) <reserved-1F80C>..<reserved-1F80F>
1F848..1F84F ; Extended_Pictographic# E0.0 [8] (🡈..🡏) <reserved-1F848>..<reserved-1F84F>
1F85A..1F85F ; Extended_Pictographic# E0.0 [6] (🡚..🡟) <reserved-1F85A>..<reserved-1F85F>
@ -1214,7 +1241,7 @@ E0020..E007F ; Emoji_Component # E0.0 [96] (󠀠..󠁿) tag space..c
1F972 ; Extended_Pictographic# E13.0 [1] (🥲) smiling face with tear
1F973..1F976 ; Extended_Pictographic# E11.0 [4] (🥳..🥶) partying face..cold face
1F977..1F978 ; Extended_Pictographic# E13.0 [2] (🥷..🥸) ninja..disguised face
1F979 ; Extended_Pictographic# E0.0 [1] (🥹) <reserved-1F979>
1F979 ; Extended_Pictographic# E14.0 [1] (🥹) face holding back tears
1F97A ; Extended_Pictographic# E11.0 [1] (🥺) pleading face
1F97B ; Extended_Pictographic# E12.0 [1] (🥻) sari
1F97C..1F97F ; Extended_Pictographic# E11.0 [4] (🥼..🥿) lab coat..flat shoe
@ -1232,7 +1259,7 @@ E0020..E007F ; Emoji_Component # E0.0 [96] (󠀠..󠁿) tag space..c
1F9C1..1F9C2 ; Extended_Pictographic# E11.0 [2] (🧁..🧂) cupcake..salt
1F9C3..1F9CA ; Extended_Pictographic# E12.0 [8] (🧃..🧊) beverage box..ice
1F9CB ; Extended_Pictographic# E13.0 [1] (🧋) bubble tea
1F9CC ; Extended_Pictographic# E0.0 [1] (🧌) <reserved-1F9CC>
1F9CC ; Extended_Pictographic# E14.0 [1] (🧌) troll
1F9CD..1F9CF ; Extended_Pictographic# E12.0 [3] (🧍..🧏) person standing..deaf person
1F9D0..1F9E6 ; Extended_Pictographic# E5.0 [23] (🧐..🧦) face with monocle..socks
1F9E7..1F9FF ; Extended_Pictographic# E11.0 [25] (🧧..🧿) red envelope..nazar amulet
@ -1241,19 +1268,28 @@ E0020..E007F ; Emoji_Component # E0.0 [96] (󠀠..󠁿) tag space..c
1FA74 ; Extended_Pictographic# E13.0 [1] (🩴) thong sandal
1FA75..1FA77 ; Extended_Pictographic# E0.0 [3] (🩵..🩷) <reserved-1FA75>..<reserved-1FA77>
1FA78..1FA7A ; Extended_Pictographic# E12.0 [3] (🩸..🩺) drop of blood..stethoscope
1FA7B..1FA7F ; Extended_Pictographic# E0.0 [5] (🩻..🩿) <reserved-1FA7B>..<reserved-1FA7F>
1FA7B..1FA7C ; Extended_Pictographic# E14.0 [2] (🩻..🩼) x-ray..crutch
1FA7D..1FA7F ; Extended_Pictographic# E0.0 [3] (🩽..🩿) <reserved-1FA7D>..<reserved-1FA7F>
1FA80..1FA82 ; Extended_Pictographic# E12.0 [3] (🪀..🪂) yo-yo..parachute
1FA83..1FA86 ; Extended_Pictographic# E13.0 [4] (🪃..🪆) boomerang..nesting dolls
1FA87..1FA8F ; Extended_Pictographic# E0.0 [9] (🪇..🪏) <reserved-1FA87>..<reserved-1FA8F>
1FA90..1FA95 ; Extended_Pictographic# E12.0 [6] (🪐..🪕) ringed planet..banjo
1FA96..1FAA8 ; Extended_Pictographic# E13.0 [19] (🪖..🪨) military helmet..rock
1FAA9..1FAAF ; Extended_Pictographic# E0.0 [7] (🪩..🪯) <reserved-1FAA9>..<reserved-1FAAF>
1FAA9..1FAAC ; Extended_Pictographic# E14.0 [4] (🪩..🪬) mirror ball..hamsa
1FAAD..1FAAF ; Extended_Pictographic# E0.0 [3] (🪭..🪯) <reserved-1FAAD>..<reserved-1FAAF>
1FAB0..1FAB6 ; Extended_Pictographic# E13.0 [7] (🪰..🪶) fly..feather
1FAB7..1FABF ; Extended_Pictographic# E0.0 [9] (🪷..🪿) <reserved-1FAB7>..<reserved-1FABF>
1FAB7..1FABA ; Extended_Pictographic# E14.0 [4] (🪷..🪺) lotus..nest with eggs
1FABB..1FABF ; Extended_Pictographic# E0.0 [5] (🪻..🪿) <reserved-1FABB>..<reserved-1FABF>
1FAC0..1FAC2 ; Extended_Pictographic# E13.0 [3] (🫀..🫂) anatomical heart..people hugging
1FAC3..1FACF ; Extended_Pictographic# E0.0 [13] (🫃..🫏) <reserved-1FAC3>..<reserved-1FACF>
1FAC3..1FAC5 ; Extended_Pictographic# E14.0 [3] (🫃..🫅) pregnant man..person with crown
1FAC6..1FACF ; Extended_Pictographic# E0.0 [10] (🫆..🫏) <reserved-1FAC6>..<reserved-1FACF>
1FAD0..1FAD6 ; Extended_Pictographic# E13.0 [7] (🫐..🫖) blueberries..teapot
1FAD7..1FAFF ; Extended_Pictographic# E0.0 [41] (🫗..🫿) <reserved-1FAD7>..<reserved-1FAFF>
1FAD7..1FAD9 ; Extended_Pictographic# E14.0 [3] (🫗..🫙) pouring liquid..jar
1FADA..1FADF ; Extended_Pictographic# E0.0 [6] (🫚..🫟) <reserved-1FADA>..<reserved-1FADF>
1FAE0..1FAE7 ; Extended_Pictographic# E14.0 [8] (🫠..🫧) melting face..bubbles
1FAE8..1FAEF ; Extended_Pictographic# E0.0 [8] (🫨..🫯) <reserved-1FAE8>..<reserved-1FAEF>
1FAF0..1FAF6 ; Extended_Pictographic# E14.0 [7] (🫰..🫶) hand with index finger and thumb crossed..heart hands
1FAF7..1FAFF ; Extended_Pictographic# E0.0 [9] (🫷..🫿) <reserved-1FAF7>..<reserved-1FAFF>
1FC00..1FFFD ; Extended_Pictographic# E0.0[1022] (🰀..🿽) <reserved-1FC00>..<reserved-1FFFD>
# Total elements: 3537

File diff suppressed because it is too large Load Diff

View File

@ -1,5 +1,5 @@
/*
* Copyright (c) 2016, 2021, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2016, 2022, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@ -35,8 +35,8 @@ final class Grapheme {
* <p>
* See Unicode Standard Annex #29 Unicode Text Segmentation for the specification
* for the extended grapheme cluster boundary rules. The following implementation
* is based on version 12.0 of the annex.
* (http://www.unicode.org/reports/tr29/tr29-35.html)
* is based on the annex for Unicode version 14.0.
* (http://www.unicode.org/reports/tr29/tr29-38.html)
*
* @param src the {@code CharSequence} to be scanned
* @param off offset to start looking for the next boundary in the src
@ -97,7 +97,7 @@ final class Grapheme {
private static final int FIRST_TYPE = 0;
private static final int LAST_TYPE = 14;
private static boolean[][] rules;
private static final boolean[][] rules;
static {
rules = new boolean[LAST_TYPE + 1][LAST_TYPE + 1];
// GB 999 Any + Any -> default
@ -201,8 +201,9 @@ final class Grapheme {
if (cp == 0x200D)
return ZWJ;
if (cp >= 0x0600 && cp <= 0x0605 ||
cp == 0x06DD || cp == 0x070F || cp == 0x08E2 ||
cp == 0x110BD || cp == 0x110CD)
cp == 0x06DD || cp == 0x070F ||
cp == 0x0890 || cp == 0x0891 ||
cp == 0x08E2 || cp == 0x110BD || cp == 0x110CD)
return PREPEND;
return CONTROL;
case Character.NON_SPACING_MARK:

View File

@ -1,5 +1,5 @@
/*
* Copyright (c) 2005, 2020, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2005, 2022, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@ -76,9 +76,6 @@ public final class Punycode {
// TODO: eliminate the 256 limitation
private static final int MAX_CP_COUNT = 256;
private static final int UINT_MAGIC = 0x80000000;
private static final long ULONG_MAGIC = 0x8000000000000000L;
private static int adaptBias(int delta, int length, boolean firstTime){
if(firstTime){
delta /=DAMP;
@ -96,34 +93,25 @@ public final class Punycode {
}
/**
* basicToDigit[] contains the numeric value of a basic code
* point (for use in representing integers) in the range 0 to
* BASE-1, or -1 if b is does not represent a value.
* @return the numeric value of a basic code point (for use in representing integers)
* in the range 0 to BASE-1, or a negative value if cp is invalid.
*/
static final int[] basicToDigit= new int[]{
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
26, 27, 28, 29, 30, 31, 32, 33, 34, 35, -1, -1, -1, -1, -1, -1,
-1, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14,
15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, -1, -1, -1, -1, -1,
-1, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14,
15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1
private static final int decodeDigit(int cp) {
if(cp<='Z') {
if(cp<='9') {
if(cp<'0') {
return -1;
} else {
return cp-'0'+26; // 0..9 -> 26..35
}
} else {
return cp-'A'; // A-Z -> 0..25
}
} else if(cp<='z') {
return cp-'a'; // a..z -> 0..25
} else {
return -1;
}
};
private static char asciiCaseMap(char b, boolean uppercase) {
@ -158,6 +146,12 @@ public final class Punycode {
return (char)((ZERO-26)+digit);
}
}
// ICU-13727: Limit input length for n^2 algorithm
// where well-formed strings are at most 59 characters long.
private static final int ENCODE_MAX_CODE_UNITS = 1000;
private static final int DECODE_MAX_CHARS = 2000;
/**
* Converts Unicode to Punycode.
* The input string must not contain single, unpaired surrogates.
@ -174,6 +168,10 @@ public final class Punycode {
int n, delta, handledCPCount, basicLength, destLength, bias, j, m, q, k, t, srcCPCount;
char c, c2;
int srcLength = src.length();
if (srcLength > ENCODE_MAX_CODE_UNITS) {
throw new RuntimeException(
"input too long: " + srcLength + " UTF-16 code units");
}
int destCapacity = MAX_CP_COUNT;
char[] dest = new char[destCapacity];
StringBuffer result = new StringBuffer();
@ -251,7 +249,7 @@ public final class Punycode {
* Increase delta enough to advance the decoder's
* <n,i> state to <m,0>, but guard against overflow:
*/
if(m-n>(0x7fffffff-MAX_CP_COUNT-delta)/(handledCPCount+1)) {
if(m-n>(0x7fffffff-handledCPCount-delta)/(handledCPCount+1)) {
throw new RuntimeException("Internal program error");
}
delta+=(m-n)*(handledCPCount+1);
@ -332,6 +330,9 @@ public final class Punycode {
public static StringBuffer decode(StringBuffer src, boolean[] caseFlags)
throws ParseException{
int srcLength = src.length();
if (srcLength > DECODE_MAX_CHARS) {
throw new RuntimeException("input too long: " + srcLength + " characters");
}
StringBuffer result = new StringBuffer();
int n, destLength, i, bias, basicLength, j, in, oldi, w, k, digit, t,
destCPCount, firstSupplementaryIndex, cpLength;
@ -395,7 +396,7 @@ public final class Punycode {
throw new ParseException("Illegal char found", -1);
}
digit=basicToDigit[(byte)src.charAt(in++)];
digit=decodeDigit(src.charAt(in++));
if(digit<0) {
throw new ParseException("Invalid char found", -1);
}

View File

@ -1,5 +1,5 @@
/*
* Copyright (c) 2015, 2020, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2015, 2022, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@ -134,9 +134,15 @@ public class UnicodeSetStringSpan {
int i, spanLength;
someRelevant = false;
for (i = 0; i < stringsLength; ++i) {
for (i = 0; i < stringsLength;) {
String string = strings.get(i);
int length16 = string.length();
if (length16 == 0) {
// Remove the empty string.
strings.remove(i);
--stringsLength;
continue;
}
spanLength = spanSet.span(string, SpanCondition.CONTAINED);
if (spanLength < length16) { // Relevant string.
someRelevant = true;
@ -144,6 +150,7 @@ public class UnicodeSetStringSpan {
if (/* (0 != (which & UTF16)) && */ length16 > maxLength16) {
maxLength16 = length16;
}
++i;
}
if (!someRelevant && (which & WITH_COUNT) == 0) {
return;

View File

@ -1,5 +1,5 @@
/*
* Copyright (c) 2009, 2020, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2009, 2022, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@ -84,7 +84,7 @@ import jdk.internal.icu.util.VersionInfo;
* <p>
* Further detail on differences can be determined using the program
* <a href=
* "http://source.icu-project.org/repos/icu/icu4j/trunk/src/com/ibm/icu/dev/test/lang/UCharacterCompare.java">
* "https://github.com/unicode-org/icu/blob/main/icu4j/main/tests/core/src/com/ibm/icu/dev/test/lang/UCharacterCompare.java">
* com.ibm.icu.dev.test.lang.UCharacterCompare</a>
* </p>
* <p>
@ -101,9 +101,9 @@ import jdk.internal.icu.util.VersionInfo;
* For more information see
* <a href="http://www.unicode/org/ucd/">"About the Unicode Character Database"</a>
* (http://www.unicode.org/ucd/)
* and the <a href="http://www.icu-project.org/userguide/properties.html">ICU
* and the <a href="https://unicode-org.github.io/icu/userguide/strings/properties">ICU
* User Guide chapter on Properties</a>
* (http://www.icu-project.org/userguide/properties.html).
* (https://unicode-org.github.io/icu/userguide/strings/properties).
* </p>
* <p>
* There are also functions that provide easy migration from C/POSIX functions

View File

@ -1,5 +1,5 @@
/*
* Copyright (c) 2005, 2020, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2005, 2022, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@ -53,7 +53,7 @@ public final class UCharacterDirection implements UCharacterEnums.ECharacterDire
// private constructor =========================================
///CLOVER:OFF
/**
* Private constructor to prevent initialisation
* Private constructor to prevent initialization
*/
private UCharacterDirection()
{

View File

@ -1,5 +1,5 @@
/*
* Copyright (c) 2005, 2020, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2005, 2022, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@ -62,7 +62,7 @@ package jdk.internal.icu.lang;
@Deprecated
class UCharacterEnums {
/** This is just a namespace, it is not instantiatable. */
/** This is just a namespace, it is not instantiable. */
private UCharacterEnums() {};
/**

View File

@ -1,5 +1,5 @@
/*
* Copyright (c) 2009, 2021, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2009, 2022, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@ -63,7 +63,7 @@ import jdk.internal.icu.impl.UBiDiProps;
*
* This is an implementation of the Unicode Bidirectional Algorithm. The
* algorithm is defined in the
* <a href="http://www.unicode.org/reports/tr9/">Unicode Standard Annex #9:
* <a href="https://www.unicode.org/reports/tr9/">Unicode Standard Annex #9:
* Unicode Bidirectional Algorithm</a>.
* <p>
*
@ -985,7 +985,7 @@ public class BidiBase {
/**
* Enumerated property Bidi_Paired_Bracket_Type (new in Unicode 6.3).
* Used in
* <a href="http://www.unicode.org/reports/tr9/">Unicode Standard Annex #9:
* <a href="https://www.unicode.org/reports/tr9/">Unicode Standard Annex #9:
* Unicode Bidirectional Algorithm</a>.
* Returns UCharacter.BidiPairedBracketType values.
* @stable ICU 52
@ -3365,7 +3365,7 @@ public class BidiBase {
/**
* Perform the Unicode Bidi algorithm. It is defined in the
* <a href="http://www.unicode.org/reports/tr9/">Unicode Standard Annex #9:
* <a href="https://www.unicode.org/reports/tr9/">Unicode Standard Annex #9:
* Unicode Bidirectional Algorithm</a>, version 13,
* also described in The Unicode Standard, Version 4.0 .<p>
*
@ -3450,7 +3450,7 @@ public class BidiBase {
/**
* Perform the Unicode Bidi algorithm. It is defined in the
* <a href="http://www.unicode.org/reports/tr9/">Unicode Standard Annex #9:
* <a href="https://www.unicode.org/reports/tr9/">Unicode Standard Annex #9:
* Unicode Bidirectional Algorithm</a>, version 13,
* also described in The Unicode Standard, Version 4.0 .<p>
*
@ -3786,7 +3786,7 @@ public class BidiBase {
/**
* Perform the Unicode Bidi algorithm on a given paragraph, as defined in the
* <a href="http://www.unicode.org/reports/tr9/">Unicode Standard Annex #9:
* <a href="https://www.unicode.org/reports/tr9/">Unicode Standard Annex #9:
* Unicode Bidirectional Algorithm</a>, version 13,
* also described in The Unicode Standard, Version 4.0 .<p>
*

View File

@ -1,5 +1,5 @@
/*
* Copyright (c) 2009, 2021, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2009, 2022, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@ -47,7 +47,7 @@ final class BidiLine {
* text in a single paragraph or in a line of a single paragraph
* which has already been processed according to
* the Unicode 3.0 Bidi algorithm as defined in
* <a href="http://www.unicode.org/reports/tr9/">Unicode Standard Annex #9:
* <a href="https://www.unicode.org/reports/tr9/">Unicode Standard Annex #9:
* Unicode Bidirectional Algorithm</a>, version 13,
* also described in The Unicode Standard, Version 4.0.1 .
*

View File

@ -1,5 +1,5 @@
/*
* Copyright (c) 2015, 2021, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2015, 2022, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@ -43,7 +43,7 @@ import jdk.internal.icu.impl.Norm2AllModes;
* The primary functions are to produce a normalized string and to detect whether
* a string is already normalized.
* The most commonly used normalization forms are those defined in
* <a href="http://www.unicode.org/reports/tr15/">Unicode Standard Annex #15:
* <a href="https://www.unicode.org/reports/tr15/">Unicode Standard Annex #15:
* Unicode Normalization Forms</a>.
* However, this API supports additional normalization forms for specialized purposes.
* For example, NFKC_Casefold is provided via getInstance("nfkc_cf", COMPOSE)

View File

@ -1,5 +1,5 @@
/*
* Copyright (c) 2005, 2021, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2005, 2022, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@ -44,7 +44,7 @@ import java.text.Normalizer;
* <code>normalize</code> transforms Unicode text into an equivalent composed or
* decomposed form, allowing for easier sorting and searching of text.
* <code>normalize</code> supports the standard normalization forms described in
* <a href="http://www.unicode.org/reports/tr15/" target="unicode">
* <a href="https://www.unicode.org/reports/tr15/" target="unicode">
* Unicode Standard Annex #15 &mdash; Unicode Normalization Forms</a>.
*
* Characters with accents or other adornments can be encoded in

View File

@ -1,5 +1,5 @@
/*
* Copyright (c) 2005, 2020, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2005, 2022, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@ -67,9 +67,9 @@ import jdk.internal.icu.util.VersionInfo;
* <li> Unassigned Table: Contains code points that are unassigned
* in the Unicode Version supported by StringPrep. Currently
* RFC 3454 supports Unicode 3.2. </li>
* <li> Prohibited Table: Contains code points that are prohibted from
* <li> Prohibited Table: Contains code points that are prohibited from
* the output of the StringPrep processing function. </li>
* <li> Mapping Table: Contains code ponts that are deleted from the output or case mapped. </li>
* <li> Mapping Table: Contains code points that are deleted from the output or case mapped. </li>
* </ul>
*
* The procedure for preparing Unicode strings:
@ -226,8 +226,8 @@ public final class StringPrep {
sprepUniVer = getVersionInfo(reader.getUnicodeVersion());
normCorrVer = getVersionInfo(indexes[NORM_CORRECTNS_LAST_UNI_VERSION]);
VersionInfo normUniVer = UCharacter.getUnicodeVersion();
if(normUniVer.compareTo(sprepUniVer) < 0 && /* the Unicode version of SPREP file must be less than the Unicode Vesion of the normalization data */
normUniVer.compareTo(normCorrVer) < 0 && /* the Unicode version of the NormalizationCorrections.txt file should be less than the Unicode Vesion of the normalization data */
if(normUniVer.compareTo(sprepUniVer) < 0 && /* the Unicode version of SPREP file must be less than the Unicode Version of the normalization data */
normUniVer.compareTo(normCorrVer) < 0 && /* the Unicode version of the NormalizationCorrections.txt file should be less than the Unicode Version of the normalization data */
((indexes[OPTIONS] & NORMALIZATION_ON) > 0) /* normalization turned on*/
){
throw new IOException("Normalization Correction version not supported");
@ -325,7 +325,7 @@ public final class StringPrep {
ch -= val.value;
}
}else if(val.type == DELETE){
// just consume the codepoint and contine
// just consume the codepoint and continue
continue;
}
//copy the source into destination

View File

@ -1,5 +1,5 @@
/*
* Copyright (c) 2003, 2020, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2003, 2022, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@ -147,9 +147,9 @@ public abstract class UCharacterIterator
*/
public int nextCodePoint(){
int ch1 = next();
if(UTF16.isLeadSurrogate((char)ch1)){
if(UTF16.isLeadSurrogate(ch1)){
int ch2 = next();
if(UTF16.isTrailSurrogate((char)ch2)){
if(UTF16.isTrailSurrogate(ch2)){
return UCharacterProperty.getRawSupplementary((char)ch1,
(char)ch2);
}else if (ch2 != DONE) {
@ -175,7 +175,7 @@ public abstract class UCharacterIterator
/**
* Retreat to the start of the previous code point in the text,
* and return it (pre-decrement semantics). If the index is not
* preceeded by a valid surrogate pair, the behavior is the same
* preceded by a valid surrogate pair, the behavior is the same
* as <code>previous()</code>. Otherwise the iterator is
* decremented to the start of the surrogate pair, and the code
* point represented by the pair is returned.
@ -185,9 +185,9 @@ public abstract class UCharacterIterator
*/
public int previousCodePoint(){
int ch1 = previous();
if(UTF16.isTrailSurrogate((char)ch1)){
if(UTF16.isTrailSurrogate(ch1)){
int ch2 = previous();
if(UTF16.isLeadSurrogate((char)ch2)){
if(UTF16.isLeadSurrogate(ch2)){
return UCharacterProperty.getRawSupplementary((char)ch2,
(char)ch1);
}else if (ch2 != DONE) {

View File

@ -1,5 +1,5 @@
/*
* Copyright (c) 2005, 2020, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2005, 2022, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@ -382,36 +382,39 @@ public final class UTF16
}
/**
* Determines whether the code value is a surrogate.
* @param char16 the input character.
* @return true if the input character is a surrogate.
* @stable ICU 2.1
* Determines whether the code point is a surrogate.
*
* @param codePoint The input character.
* (In ICU 2.1-69 the type of this parameter was <code>char</code>.)
* @return true If the input code point is a surrogate.
* @stable ICU 70
*/
public static boolean isSurrogate(char char16)
{
return (char16 & SURROGATE_BITMASK) == SURROGATE_BITS;
public static boolean isSurrogate(int codePoint) {
return (codePoint & SURROGATE_BITMASK) == SURROGATE_BITS;
}
/**
* Determines whether the character is a trail surrogate.
* @param char16 the input character.
* @return true if the input character is a trail surrogate.
* @stable ICU 2.1
* Determines whether the code point is a trail surrogate.
*
* @param codePoint The input character.
* (In ICU 2.1-69 the type of this parameter was <code>char</code>.)
* @return true If the input code point is a trail surrogate.
* @stable ICU 70
*/
public static boolean isTrailSurrogate(char char16)
{
return (char16 & TRAIL_SURROGATE_BITMASK) == TRAIL_SURROGATE_BITS;
public static boolean isTrailSurrogate(int codePoint) {
return (codePoint & TRAIL_SURROGATE_BITMASK) == TRAIL_SURROGATE_BITS;
}
/**
* Determines whether the character is a lead surrogate.
* @param char16 the input character.
* @return true if the input character is a lead surrogate
* @stable ICU 2.1
* Determines whether the code point is a lead surrogate.
*
* @param codePoint The input character.
* (In ICU 2.1-69 the type of this parameter was <code>char</code>.)
* @return true If the input code point is a lead surrogate
* @stable ICU 70
*/
public static boolean isLeadSurrogate(char char16)
{
return (char16 & LEAD_SURROGATE_BITMASK) == LEAD_SURROGATE_BITS;
public static boolean isLeadSurrogate(int codePoint) {
return (codePoint & LEAD_SURROGATE_BITMASK) == LEAD_SURROGATE_BITS;
}
/**

View File

@ -1,5 +1,5 @@
/*
* Copyright (c) 2005, 2020, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2005, 2022, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@ -135,8 +135,8 @@ import jdk.internal.icu.util.VersionInfo;
* "[:Lu:]" and the Perl-like syntax "\p{Lu}" are recognized. For a
* complete list of supported property patterns, see the User's Guide
* for UnicodeSet at
* <a href="http://www.icu-project.org/userguide/unicodeSet.html">
* http://www.icu-project.org/userguide/unicodeSet.html</a>.
* <a href="https://unicode-org.github.io/icu/userguide/strings/unicodeset">
* https://unicode-org.github.io/icu/userguide/strings/unicodeset</a>.
* Actual determination of property data is defined by the underlying
* Unicode database as implemented by UCharacter.
*
@ -147,6 +147,13 @@ import jdk.internal.icu.util.VersionInfo;
* their delimiters; "[:^foo]" and "\P{foo}". In any other location,
* '^' has no special meaning.
*
* <p>Since ICU 70, "[^...]", "[:^foo]", "\P{foo}", and "[:binaryProperty=No:]"
* perform a "code point complement" (all code points minus the original set),
* removing all multicharacter strings,
* equivalent to .{@link #complement()}.{@link #removeAllStrings()} .
* The {@link #complement()} API function continues to perform a
* symmetric difference with all code points and thus retains all multicharacter strings.
*
* <p>Ranges are indicated by placing two a '-' between two
* characters, as in "a-z". This specifies the range of all
* characters from the left to the right, in Unicode order. If the
@ -189,8 +196,6 @@ import jdk.internal.icu.util.VersionInfo;
* Unicode property
* </table>
*
* <p><b>Warning</b>: you cannot add an empty string ("") to a UnicodeSet.</p>
*
* <p><b>Formal syntax</b></p>
*
* <blockquote>
@ -230,9 +235,8 @@ import jdk.internal.icu.util.VersionInfo;
* </tr>
* <tr align="top">
* <td nowrap valign="top" align="right"><code>hex :=&nbsp; </code></td>
* <td valign="top"><em>any character for which
* </em><code>Character.digit(c, 16)</code><em>
* returns a non-negative result</em></td>
* <td style="vertical-align: top;"><code>'0' | '1' | '2' | '3' | '4' | '5' | '6' | '7' | '8' | '9' |<br>
* &nbsp;&nbsp;&nbsp;&nbsp;'A' | 'B' | 'C' | 'D' | 'E' | 'F' | 'a' | 'b' | 'c' | 'd' | 'e' | 'f'</code></td>
* </tr>
* <tr>
* <td nowrap valign="top" align="right"><code>property :=&nbsp; </code></td>
@ -487,7 +491,7 @@ public class UnicodeSet {
else if (i > 0 && c == list[i-1]) {
// c is after end of prior range
list[i-1]++;
// no need to chcek for collapse here
// no need to check for collapse here
}
else {
@ -528,7 +532,6 @@ public class UnicodeSet {
* present. If this set already contains the multicharacter,
* the call leaves this set unchanged.
* Thus {@code "ch" => {"ch"}}
* <br><b>Warning: you cannot add an empty string ("") to a UnicodeSet.</b>
* @param s the source string
* @return this object, for chaining
* @stable ICU 2.0
@ -546,22 +549,19 @@ public class UnicodeSet {
/**
* Utility for getting code point from single code point CharSequence.
* See the public UTF16.getSingleCodePoint()
* See the public UTF16.getSingleCodePoint() (which returns -1 for null rather than throwing NPE).
*
* @return a code point IF the string consists of a single one.
* otherwise returns -1.
* @param s to test
*/
private static int getSingleCP(CharSequence s) {
if (s.length() < 1) {
throw new IllegalArgumentException("Can't use zero-length strings in UnicodeSet");
}
if (s.length() > 2) return -1;
if (s.length() == 1) return s.charAt(0);
// at this point, len = 2
int cp = UTF16.charAt(s, 0);
if (cp > 0xFFFF) { // is surrogate pair
return cp;
if (s.length() == 2) {
int cp = Character.codePointAt(s, 0);
if (cp > 0xFFFF) { // is surrogate pair
return cp;
}
}
return -1;
}
@ -569,13 +569,11 @@ public class UnicodeSet {
/**
* Complements the specified range in this set. Any character in
* the range will be removed if it is in this set, or will be
* added if it is not in this set. If {@code end > start}
* added if it is not in this set. If <code>start &gt; end</code>
* then an empty range is complemented, leaving the set unchanged.
*
* @param start first character, inclusive, of range to be removed
* from this set.
* @param end last character, inclusive, of range to be removed
* from this set.
* @param start first character, inclusive, of range
* @param end last character, inclusive, of range
* @stable ICU 2.0
*/
public UnicodeSet complement(int start, int end) {

View File

@ -1,5 +1,5 @@
/*
* Copyright (c) 2019, 2021, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2019, 2022, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@ -43,7 +43,7 @@ import static jdk.internal.icu.impl.NormalizerImpl.UTF16Plus;
/**
* Immutable Unicode code point trie.
* Fast, reasonably compact, map from Unicode code points (U+0000..U+10FFFF) to integer values.
* For details see http://site.icu-project.org/design/struct/utrie
* For details see https://icu.unicode.org/design/struct/utrie
*
* <p>This class is not intended for public subclassing.
*

View File

@ -1,5 +1,5 @@
/*
* Copyright (c) 2005, 2020, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2005, 2022, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@ -54,7 +54,7 @@ public final class VersionInfo
* @deprecated This API is ICU internal only.
*/
@Deprecated
public static final String ICU_DATA_VERSION_PATH = "67b";
public static final String ICU_DATA_VERSION_PATH = "70b";
// public methods ------------------------------------------------------
@ -148,7 +148,15 @@ public final class VersionInfo
*/
public int compareTo(VersionInfo other)
{
return m_version_ - other.m_version_;
// m_version_ is an int, a signed 32-bit integer.
// When the major version is >=128, then the version int is negative.
// Compare it in two steps to simulate an unsigned-int comparison.
// (Alternatively we could turn each int into a long and reset the upper 32 bits.)
// Compare the upper bits first, using logical shift right (unsigned).
int diff = (m_version_ >>> 1) - (other.m_version_ >>> 1);
if (diff != 0) { return diff; }
// Compare the remaining bits.
return (m_version_ & 1) - (other.m_version_ & 1);
}
// private data members ----------------------------------------------

View File

@ -1,4 +1,4 @@
## International Components for Unicode (ICU4J) v67.1
## International Components for Unicode (ICU4J) v70.1
### ICU4J License
```
@ -80,61 +80,439 @@ of the copyright holder.
All trademarks and registered trademarks mentioned herein are the
property of their respective owners.
2. Chinese/Japanese Word Break Dictionary Data (cjdict.txt)
——————————————————————————————————————————————————————————————————————
# The Google Chrome software developed by Google is licensed under
# the BSD license. Other software included in this distribution is
# provided under other licenses, as set forth below.
#
# The BSD License
# http://opensource.org/licenses/bsd-license.php
# Copyright (C) 2006-2008, Google Inc.
#
# All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions are met:
#
# Redistributions of source code must retain the above copyright notice,
# this list of conditions and the following disclaimer.
# Redistributions in binary form must reproduce the above
# copyright notice, this list of conditions and the following
# disclaimer in the documentation and/or other materials provided with
# the distribution.
# Neither the name of Google Inc. nor the names of its
# contributors may be used to endorse or promote products derived from
# this software without specific prior written permission.
#
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND
# CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES,
# INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
# MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
# BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
# LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
# NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
# SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#
#
# The word list in cjdict.txt are generated by combining three word lists
# listed below with further processing for compound word breaking. The
# frequency is generated with an iterative training against Google web
# corpora.
#
# * Libtabe (Chinese)
# - https://sourceforge.net/project/?group_id=1519
# - Its license terms and conditions are shown below.
#
# * IPADIC (Japanese)
# - http://chasen.aist-nara.ac.jp/chasen/distribution.html
# - Its license terms and conditions are shown below.
#
# ---------COPYING.libtabe ---- BEGIN--------------------
#
# /*
# * Copyright (c) 1999 TaBE Project.
# * Copyright (c) 1999 Pai-Hsiang Hsiao.
# * All rights reserved.
# *
# * Redistribution and use in source and binary forms, with or without
# * modification, are permitted provided that the following conditions
# * are met:
# *
# * . Redistributions of source code must retain the above copyright
# * notice, this list of conditions and the following disclaimer.
# * . Redistributions in binary form must reproduce the above copyright
# * notice, this list of conditions and the following disclaimer in
# * the documentation and/or other materials provided with the
# * distribution.
# * . Neither the name of the TaBE Project nor the names of its
# * contributors may be used to endorse or promote products derived
# * from this software without specific prior written permission.
# *
# * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
# * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
# * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
# * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
# * REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
# * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
# * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
# * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
# * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
# * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
# * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
# * OF THE POSSIBILITY OF SUCH DAMAGE.
# */
#
# /*
# * Copyright (c) 1999 Computer Systems and Communication Lab,
# * Institute of Information Science, Academia
# * Sinica. All rights reserved.
# *
# * Redistribution and use in source and binary forms, with or without
# * modification, are permitted provided that the following conditions
# * are met:
# *
# * . Redistributions of source code must retain the above copyright
# * notice, this list of conditions and the following disclaimer.
# * . Redistributions in binary form must reproduce the above copyright
# * notice, this list of conditions and the following disclaimer in
# * the documentation and/or other materials provided with the
# * distribution.
# * . Neither the name of the Computer Systems and Communication Lab
# * nor the names of its contributors may be used to endorse or
# * promote products derived from this software without specific
# * prior written permission.
# *
# * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
# * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
# * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
# * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
# * REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
# * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
# * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
# * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
# * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
# * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
# * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
# * OF THE POSSIBILITY OF SUCH DAMAGE.
# */
#
# Copyright 1996 Chih-Hao Tsai @ Beckman Institute,
# University of Illinois
# c-tsai4@uiuc.edu http://casper.beckman.uiuc.edu/~c-tsai4
#
# ---------------COPYING.libtabe-----END--------------------------------
#
#
# ---------------COPYING.ipadic-----BEGIN-------------------------------
#
# Copyright 2000, 2001, 2002, 2003 Nara Institute of Science
# and Technology. All Rights Reserved.
#
# Use, reproduction, and distribution of this software is permitted.
# Any copy of this software, whether in its original form or modified,
# must include both the above copyright notice and the following
# paragraphs.
#
# Nara Institute of Science and Technology (NAIST),
# the copyright holders, disclaims all warranties with regard to this
# software, including all implied warranties of merchantability and
# fitness, in no event shall NAIST be liable for
# any special, indirect or consequential damages or any damages
# whatsoever resulting from loss of use, data or profits, whether in an
# action of contract, negligence or other tortuous action, arising out
# of or in connection with the use or performance of this software.
#
# A large portion of the dictionary entries
# originate from ICOT Free Software. The following conditions for ICOT
# Free Software applies to the current dictionary as well.
#
# Each User may also freely distribute the Program, whether in its
# original form or modified, to any third party or parties, PROVIDED
# that the provisions of Section 3 ("NO WARRANTY") will ALWAYS appear
# on, or be attached to, the Program, which is distributed substantially
# in the same form as set out herein and that such intended
# distribution, if actually made, will neither violate or otherwise
# contravene any of the laws and regulations of the countries having
# jurisdiction over the User or the intended distribution itself.
#
# NO WARRANTY
#
# The program was produced on an experimental basis in the course of the
# research and development conducted during the project and is provided
# to users as so produced on an experimental basis. Accordingly, the
# program is provided without any warranty whatsoever, whether express,
# implied, statutory or otherwise. The term "warranty" used herein
# includes, but is not limited to, any warranty of the quality,
# performance, merchantability and fitness for a particular purpose of
# the program and the nonexistence of any infringement or violation of
# any right of any third party.
#
# Each user of the program will agree and understand, and be deemed to
# have agreed and understood, that there is no warranty whatsoever for
# the program and, accordingly, the entire risk arising from or
# otherwise connected with the program is assumed by the user.
#
# Therefore, neither ICOT, the copyright holder, or any other
# organization that participated in or was otherwise related to the
# development of the program and their respective officials, directors,
# officers and other employees shall be held liable for any and all
# damages, including, without limitation, general, special, incidental
# and consequential damages, arising out of or otherwise in connection
# with the use or inability to use the program or any product, material
# or result produced or otherwise obtained by using the program,
# regardless of whether they have been advised of, or otherwise had
# knowledge of, the possibility of such damages at any time during the
# project or thereafter. Each user will be deemed to have agreed to the
# foregoing by his or her commencement of use of the program. The term
# "use" as used herein includes, but is not limited to, the use,
# modification, copying and distribution of the program and the
# production of secondary products from the program.
#
# In the case where the program, whether in its original form or
# modified, was distributed or delivered to or received by a user from
# any person, organization or entity other than ICOT, unless it makes or
# grants independently of ICOT any specific warranty to the user in
# writing, such person, organization or entity, will also be exempted
# from and not be held liable to the user for any such damages as noted
# above as far as the program is concerned.
#
# ---------------COPYING.ipadic-----END----------------------------------
3. Lao Word Break Dictionary Data (laodict.txt)
From: https://www.unicode.org/copyright.html:
# Copyright (C) 2016 and later: Unicode, Inc. and others.
# License & terms of use: http://www.unicode.org/copyright.html
# Copyright (c) 2015 International Business Machines Corporation
# and others. All Rights Reserved.
#
# Project: https://github.com/rober42539/lao-dictionary
# Dictionary: https://github.com/rober42539/lao-dictionary/laodict.txt
# License: https://github.com/rober42539/lao-dictionary/LICENSE.txt
# (copied below)
#
# This file is derived from the above dictionary version of Nov 22, 2020
# ----------------------------------------------------------------------
# Copyright (C) 2013 Brian Eugene Wilson, Robert Martin Campbell.
# All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions are met:
#
# Redistributions of source code must retain the above copyright notice, this
# list of conditions and the following disclaimer. Redistributions in binary
# form must reproduce the above copyright notice, this list of conditions and
# the following disclaimer in the documentation and/or ther materials
# provided with the distribution.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
# FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
# COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT,
# INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
# (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
# HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
# STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
# OF THE POSSIBILITY OF SUCH DAMAGE.
# --------------------------------------------------------------------------
Unicode® Copyright and Terms of Use
4. Burmese Word Break Dictionary Data (burmesedict.txt)
For the general privacy policy governing access to this site, see the Unicode Privacy Policy.
# Copyright (c) 2014 International Business Machines Corporation
# and others. All Rights Reserved.
#
# This list is part of a project hosted at:
# github.com/kanyawtech/myanmar-karen-word-lists
#
# --------------------------------------------------------------------------
# Copyright (c) 2013, LeRoy Benjamin Sharon
# All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met: Redistributions of source code must retain the above
# copyright notice, this list of conditions and the following
# disclaimer. Redistributions in binary form must reproduce the
# above copyright notice, this list of conditions and the following
# disclaimer in the documentation and/or other materials provided
# with the distribution.
#
# Neither the name Myanmar Karen Word Lists, nor the names of its
# contributors may be used to endorse or promote products derived
# from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND
# CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES,
# INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
# MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS
# BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
# TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
# ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR
# TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF
# THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
# SUCH DAMAGE.
# --------------------------------------------------------------------------
Unicode Copyright
Copyright © 1991-2020 Unicode, Inc. All rights reserved.
Definitions
5. Time Zone Database
Unicode Data Files ("DATA FILES") include all data files under the directories:
https://www.unicode.org/Public/
https://www.unicode.org/reports/
https://www.unicode.org/ivd/data/
ICU uses the public domain data and code derived from Time Zone
Database for its time zone support. The ownership of the TZ database
is explained in BCP 175: Procedure for Maintaining the Time Zone
Database section 7.
Unicode Data Files do not include PDF online code charts under the directory:
https://www.unicode.org/Public/
# 7. Database Ownership
#
# The TZ database itself is not an IETF Contribution or an IETF
# document. Rather it is a pre-existing and regularly updated work
# that is in the public domain, and is intended to remain in the
# public domain. Therefore, BCPs 78 [RFC5378] and 79 [RFC3979] do
# not apply to the TZ Database or contributions that individuals make
# to it. Should any claims be made and substantiated against the TZ
# Database, the organization that is providing the IANA
# Considerations defined in this RFC, under the memorandum of
# understanding with the IETF, currently ICANN, may act in accordance
# with all competent court orders. No ownership claims will be made
# by ICANN or the IETF Trust on the database or the code. Any person
# making a contribution to the database or code waives all rights to
# future claims in that contribution or in the TZ Database.
Unicode Software ("SOFTWARE") includes any source code published in the Unicode Standard
or any source code or compiled code under the directories:
https://www.unicode.org/Public/PROGRAMS/
https://www.unicode.org/Public/cldr/
http://site.icu-project.org/download/
6. Google double-conversion
Terms of Use
Certain documents and files on this website contain a legend indicating that "Modification is permitted." Any person is hereby authorized, without fee, to modify such documents and files to create derivative works conforming to the Unicode® Standard, subject to Terms and Conditions herein.
Any person is hereby authorized, without fee, to view, use, reproduce, and distribute all documents and files, subject to the Terms and Conditions herein.
Further specifications of rights and restrictions pertaining to the use of the Unicode DATA FILES and SOFTWARE can be found in the Unicode Data Files and Software License.
Each version of the Unicode Standard has further specifications of rights and restrictions of use. For the book editions (Unicode 5.0 and earlier), these are found on the back of the title page.
The Unicode PDF online code charts carry specific restrictions. Those restrictions are incorporated as the first page of each PDF code chart.
All other files, including online documentation of the core specification for Unicode 6.0 and later, are covered under these general Terms of Use.
No license is granted to "mirror" the Unicode website where a fee is charged for access to the "mirror" site.
Modification is not permitted with respect to this document. All copies of this document must be verbatim.
Restricted Rights Legend
Any technical data or software which is licensed to the United States of America, its agencies and/or instrumentalities under this Agreement is commercial technical data or commercial computer software developed exclusively at private expense as defined in FAR 2.101, or DFARS 252.227-7014 (June 1995), as applicable. For technical data, use, duplication, or disclosure by the Government is subject to restrictions as set forth in DFARS 202.227-7015 Technical Data, Commercial and Items (Nov 1995) and this Agreement. For Software, in accordance with FAR 12-212 or DFARS 227-7202, as applicable, use, duplication or disclosure by the Government is subject to the restrictions set forth in this Agreement.
Warranties and Disclaimers
This publication and/or website may include technical or typographical errors or other inaccuracies. Changes are periodically added to the information herein; these changes will be incorporated in new editions of the publication and/or website. Unicode, Inc. may make improvements and/or changes in the product(s) and/or program(s) described in this publication and/or website at any time.
If this file has been purchased on magnetic or optical media from Unicode, Inc. the sole and exclusive remedy for any claim will be exchange of the defective media within ninety (90) days of original purchase.
EXCEPT AS PROVIDED IN SECTION E.2, THIS PUBLICATION AND/OR SOFTWARE IS PROVIDED "AS IS" WITHOUT WARRANTY OF ANY KIND EITHER EXPRESS, IMPLIED, OR STATUTORY, INCLUDING, BUT NOT LIMITED TO, ANY WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, OR NON-INFRINGEMENT. UNICODE, INC. AND ITS LICENSORS ASSUME NO RESPONSIBILITY FOR ERRORS OR OMISSIONS IN THIS PUBLICATION AND/OR SOFTWARE OR OTHER DOCUMENTS WHICH ARE REFERENCED BY OR LINKED TO THIS PUBLICATION OR THE UNICODE WEBSITE.
Waiver of Damages
In no event shall Unicode, Inc. or its licensors be liable for any special, incidental, indirect or consequential damages of any kind, or any damages whatsoever, whether or not Unicode, Inc. was advised of the possibility of the damage, including, without limitation, those resulting from the following: loss of use, data or profits, in connection with the use, modification or distribution of this information or its derivatives.
Trademarks & Logos
The Unicode Word Mark and the Unicode Logo are trademarks of Unicode, Inc. “The Unicode Consortium” and “Unicode, Inc.” are trade names of Unicode, Inc. Use of the information and materials found on this website indicates your acknowledgement of Unicode, Inc.s exclusive worldwide rights in the Unicode Word Mark, the Unicode Logo, and the Unicode trade names.
The Unicode Consortium Name and Trademark Usage Policy (“Trademark Policy”) are incorporated herein by reference and you agree to abide by the provisions of the Trademark Policy, which may be changed from time to time in the sole discretion of Unicode, Inc.
All third party trademarks referenced herein are the property of their respective owners.
Miscellaneous
Jurisdiction and Venue. This website is operated from a location in the State of California, United States of America. Unicode, Inc. makes no representation that the materials are appropriate for use in other locations. If you access this website from other locations, you are responsible for compliance with local laws. This Agreement, all use of this website and any claims and damages resulting from use of this website are governed solely by the laws of the State of California without regard to any principles which would apply the laws of a different jurisdiction. The user agrees that any disputes regarding this website shall be resolved solely in the courts located in Santa Clara County, California. The user agrees said courts have personal jurisdiction and agree to waive any right to transfer the dispute to any other forum.
Modification by Unicode, Inc. Unicode, Inc. shall have the right to modify this Agreement at any time by posting it to this website. The user may not assign any part of this Agreement without Unicode, Inc.s prior written consent.
Taxes. The user agrees to pay any taxes arising from access to this website or use of the information herein, except for those based on Unicodes net income.
Severability. If any provision of this Agreement is declared invalid or unenforceable, the remaining provisions of this Agreement shall remain in effect.
Entire Agreement. This Agreement constitutes the entire agreement between the parties.
Copyright 2006-2011, the V8 project authors. All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are
met:
* Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above
copyright notice, this list of conditions and the following
disclaimer in the documentation and/or other materials provided
with the distribution.
* Neither the name of Google Inc. nor the names of its
contributors may be used to endorse or promote products derived
from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
====================================================
Unicode® Copyright and Terms of Use
For the general privacy policy governing access to this site, see the Unicode Privacy Policy.
Unicode Copyright
Copyright © 1991-2021 Unicode, Inc. All rights reserved.
Definitions
Unicode Data Files ("DATA FILES") include all data files under the directories:
https://www.unicode.org/Public/
https://www.unicode.org/reports/
https://www.unicode.org/ivd/data/
Unicode Data Files do not include PDF online code charts under the directory:
https://www.unicode.org/Public/
Unicode Software ("SOFTWARE") includes any source code published in the Unicode Standard
or any source code or compiled code under the directories:
https://www.unicode.org/Public/PROGRAMS/
https://www.unicode.org/Public/cldr/
http://site.icu-project.org/download/
Terms of Use
Certain documents and files on this website contain a legend indicating that "Modification is permitted." Any person is hereby authorized, without fee, to modify such documents and files to create derivative works conforming to the Unicode® Standard, subject to Terms and Conditions herein.
Any person is hereby authorized, without fee, to view, use, reproduce, and distribute all documents and files, subject to the Terms and Conditions herein.
Further specifications of rights and restrictions pertaining to the use of the Unicode DATA FILES and SOFTWARE can be found in the Unicode Data Files and Software License.
Each version of the Unicode Standard has further specifications of rights and restrictions of use. For the book editions (Unicode 5.0 and earlier), these are found on the back of the title page.
The Unicode PDF online code charts carry specific restrictions. Those restrictions are incorporated as the first page of each PDF code chart.
All other files, including online documentation of the core specification for Unicode 6.0 and later, are covered under these general Terms of Use.
No license is granted to "mirror" the Unicode website where a fee is charged for access to the "mirror" site.
Modification is not permitted with respect to this document. All copies of this document must be verbatim.
Restricted Rights Legend
Any technical data or software which is licensed to the United States of America, its agencies and/or instrumentalities under this Agreement is commercial technical data or commercial computer software developed exclusively at private expense as defined in FAR 2.101, or DFARS 252.227-7014 (June 1995), as applicable. For technical data, use, duplication, or disclosure by the Government is subject to restrictions as set forth in DFARS 202.227-7015 Technical Data, Commercial and Items (Nov 1995) and this Agreement. For Software, in accordance with FAR 12-212 or DFARS 227-7202, as applicable, use, duplication or disclosure by the Government is subject to the restrictions set forth in this Agreement.
Warranties and Disclaimers
This publication and/or website may include technical or typographical errors or other inaccuracies. Changes are periodically added to the information herein; these changes will be incorporated in new editions of the publication and/or website. Unicode, Inc. may make improvements and/or changes in the product(s) and/or program(s) described in this publication and/or website at any time.
If this file has been purchased on magnetic or optical media from Unicode, Inc. the sole and exclusive remedy for any claim will be exchange of the defective media within ninety (90) days of original purchase.
EXCEPT AS PROVIDED IN SECTION E.2, THIS PUBLICATION AND/OR SOFTWARE IS PROVIDED "AS IS" WITHOUT WARRANTY OF ANY KIND EITHER EXPRESS, IMPLIED, OR STATUTORY, INCLUDING, BUT NOT LIMITED TO, ANY WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, OR NON-INFRINGEMENT. UNICODE, INC. AND ITS LICENSORS ASSUME NO RESPONSIBILITY FOR ERRORS OR OMISSIONS IN THIS PUBLICATION AND/OR SOFTWARE OR OTHER DOCUMENTS WHICH ARE REFERENCED BY OR LINKED TO THIS PUBLICATION OR THE UNICODE WEBSITE.
Waiver of Damages
In no event shall Unicode, Inc. or its licensors be liable for any special, incidental, indirect or consequential damages of any kind, or any damages whatsoever, whether or not Unicode, Inc. was advised of the possibility of the damage, including, without limitation, those resulting from the following: loss of use, data or profits, in connection with the use, modification or distribution of this information or its derivatives.
Trademarks & Logos
The Unicode Word Mark and the Unicode Logo are trademarks of Unicode, Inc. “The Unicode Consortium” and “Unicode, Inc.” are trade names of Unicode, Inc. Use of the information and materials found on this website indicates your acknowledgement of Unicode, Inc.s exclusive worldwide rights in the Unicode Word Mark, the Unicode Logo, and the Unicode trade names.
The Unicode Consortium Name and Trademark Usage Policy (“Trademark Policy”) are incorporated herein by reference and you agree to abide by the provisions of the Trademark Policy, which may be changed from time to time in the sole discretion of Unicode, Inc.
All third party trademarks referenced herein are the property of their respective owners.
Miscellaneous
Jurisdiction and Venue. This website is operated from a location in the State of California, United States of America. Unicode, Inc. makes no representation that the materials are appropriate for use in other locations. If you access this website from other locations, you are responsible for compliance with local laws. This Agreement, all use of this website and any claims and damages resulting from use of this website are governed solely by the laws of the State of California without regard to any principles which would apply the laws of a different jurisdiction. The user agrees that any disputes regarding this website shall be resolved solely in the courts located in Santa Clara County, California. The user agrees said courts have personal jurisdiction and agree to waive any right to transfer the dispute to any other forum.
Modification by Unicode, Inc. Unicode, Inc. shall have the right to modify this Agreement at any time by posting it to this website. The user may not assign any part of this Agreement without Unicode, Inc.s prior written consent.
Taxes. The user agrees to pay any taxes arising from access to this website or use of the information herein, except for those based on Unicodes net income.
Severability. If any provision of this Agreement is declared invalid or unenforceable, the remaining provisions of this Agreement shall remain in effect.
Entire Agreement. This Agreement constitutes the entire agreement between the parties.
=======================================================
UNICODE, INC. LICENSE AGREEMENT - DATA FILES AND SOFTWARE
See Terms of Use
for definitions of Unicode Inc.s Data Files and Software.
NOTICE TO USER: Carefully read the following legal agreement.
BY DOWNLOADING, INSTALLING, COPYING OR OTHERWISE USING UNICODE INC.'S
DATA FILES ("DATA FILES"), AND/OR SOFTWARE ("SOFTWARE"),
YOU UNEQUIVOCALLY ACCEPT, AND AGREE TO BE BOUND BY, ALL OF THE
TERMS AND CONDITIONS OF THIS AGREEMENT.
IF YOU DO NOT AGREE, DO NOT DOWNLOAD, INSTALL, COPY, DISTRIBUTE OR USE
THE DATA FILES OR SOFTWARE.
COPYRIGHT AND PERMISSION NOTICE
Copyright © 1991-2021 Unicode, Inc. All rights reserved.
Distributed under the Terms of Use in https://www.unicode.org/copyright.html.
Permission is hereby granted, free of charge, to any person obtaining
a copy of the Unicode data files and any associated documentation
(the "Data Files") or Unicode software and any associated documentation
(the "Software") to deal in the Data Files or Software
without restriction, including without limitation the rights to use,
copy, modify, merge, publish, distribute, and/or sell copies of
the Data Files or Software, and to permit persons to whom the Data Files
or Software are furnished to do so, provided that either
(a) this copyright and permission notice appear with all copies
of the Data Files or Software, or
(b) this copyright and permission notice appear in associated
Documentation.
THE DATA FILES AND SOFTWARE ARE PROVIDED "AS IS", WITHOUT WARRANTY OF
ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE
WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
NONINFRINGEMENT OF THIRD PARTY RIGHTS.
IN NO EVENT SHALL THE COPYRIGHT HOLDER OR HOLDERS INCLUDED IN THIS
NOTICE BE LIABLE FOR ANY CLAIM, OR ANY SPECIAL INDIRECT OR CONSEQUENTIAL
DAMAGES, OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE,
DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER
TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
PERFORMANCE OF THE DATA FILES OR SOFTWARE.
Except as contained in this notice, the name of a copyright holder
shall not be used in advertising or otherwise to promote the sale,
use or other dealings in these Data Files or Software without prior
written authorization of the copyright holder.
```

View File

@ -1,5 +1,5 @@
## The Unicode Standard, Unicode Character Database, Version 13.0.0
## The Unicode Standard, Unicode Character Database, Version 14.0.0
### Unicode Character Database
```
@ -18,7 +18,7 @@ THE DATA FILES OR SOFTWARE.
COPYRIGHT AND PERMISSION NOTICE
Copyright © 1991-2020 Unicode, Inc. All rights reserved.
Copyright © 1991-2021 Unicode, Inc. All rights reserved.
Distributed under the Terms of Use in https://www.unicode.org/copyright.html.
Permission is hereby granted, free of charge, to any person obtaining
@ -50,5 +50,54 @@ shall not be used in advertising or otherwise to promote the sale,
use or other dealings in these Data Files or Software without prior
written authorization of the copyright holder.
=== http://www.unicode.org/copyright.html content ===
Unicode (R) Copyright and Terms of Use
For the general privacy policy governing access to this site, see the Unicode Privacy Policy.
Unicode Copyright
Copyright (C) 1991-2021 Unicode, Inc. All rights reserved.
Definitions
Unicode Data Files ("DATA FILES") include all data files under the directories:
https://www.unicode.org/Public/
https://www.unicode.org/reports/
https://www.unicode.org/ivd/data/
Unicode Data Files do not include PDF online code charts under the directory:
https://www.unicode.org/Public/
Unicode Software ("SOFTWARE") includes any source code published in the Unicode Standard
or any source code or compiled code under the directories:
https://www.unicode.org/Public/PROGRAMS/
https://www.unicode.org/Public/cldr/
http://site.icu-project.org/download/
Terms of Use
Certain documents and files on this website contain a legend indicating that "Modification is permitted." Any person is hereby authorized, without fee, to modify such documents and files to create derivative works conforming to the Unicode® Standard, subject to Terms and Conditions herein.
Any person is hereby authorized, without fee, to view, use, reproduce, and distribute all documents and files, subject to the Terms and Conditions herein.
Further specifications of rights and restrictions pertaining to the use of the Unicode DATA FILES and SOFTWARE can be found in the Unicode Data Files and Software License.
Each version of the Unicode Standard has further specifications of rights and restrictions of use. For the book editions (Unicode 5.0 and earlier), these are found on the back of the title page.
The Unicode PDF online code charts carry specific restrictions. Those restrictions are incorporated as the first page of each PDF code chart.
All other files, including online documentation of the core specification for Unicode 6.0 and later, are covered under these general Terms of Use.
No license is granted to "mirror" the Unicode website where a fee is charged for access to the "mirror" site.
Modification is not permitted with respect to this document. All copies of this document must be verbatim.
Restricted Rights Legend
Any technical data or software which is licensed to the United States of America, its agencies and/or instrumentalities under this Agreement is commercial technical data or commercial computer software developed exclusively at private expense as defined in FAR 2.101, or DFARS 252.227-7014 (June 1995), as applicable. For technical data, use, duplication, or disclosure by the Government is subject to restrictions as set forth in DFARS 202.227-7015 Technical Data, Commercial and Items (Nov 1995) and this Agreement. For Software, in accordance with FAR 12-212 or DFARS 227-7202, as applicable, use, duplication or disclosure by the Government is subject to the restrictions set forth in this Agreement.
Warranties and Disclaimers
This publication and/or website may include technical or typographical errors or other inaccuracies. Changes are periodically added to the information herein; these changes will be incorporated in new editions of the publication and/or website. Unicode, Inc. may make improvements and/or changes in the product(s) and/or program(s) described in this publication and/or website at any time.
If this file has been purchased on magnetic or optical media from Unicode, Inc. the sole and exclusive remedy for any claim will be exchange of the defective media within ninety (90) days of original purchase.
EXCEPT AS PROVIDED IN SECTION E.2, THIS PUBLICATION AND/OR SOFTWARE IS PROVIDED "AS IS" WITHOUT WARRANTY OF ANY KIND EITHER EXPRESS, IMPLIED, OR STATUTORY, INCLUDING, BUT NOT LIMITED TO, ANY WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, OR NON-INFRINGEMENT. UNICODE, INC. AND ITS LICENSORS ASSUME NO RESPONSIBILITY FOR ERRORS OR OMISSIONS IN THIS PUBLICATION AND/OR SOFTWARE OR OTHER DOCUMENTS WHICH ARE REFERENCED BY OR LINKED TO THIS PUBLICATION OR THE UNICODE WEBSITE.
Waiver of Damages
In no event shall Unicode, Inc. or its licensors be liable for any special, incidental, indirect or consequential damages of any kind, or any damages whatsoever, whether or not Unicode, Inc. was advised of the possibility of the damage, including, without limitation, those resulting from the following: loss of use, data or profits, in connection with the use, modification or distribution of this information or its derivatives.
Trademarks & Logos
The Unicode Word Mark and the Unicode Logo are trademarks of Unicode, Inc. “The Unicode Consortium” and “Unicode, Inc.” are trade names of Unicode, Inc. Use of the information and materials found on this website indicates your acknowledgement of Unicode, Inc.s exclusive worldwide rights in the Unicode Word Mark, the Unicode Logo, and the Unicode trade names.
The Unicode Consortium Name and Trademark Usage Policy (“Trademark Policy”) are incorporated herein by reference and you agree to abide by the provisions of the Trademark Policy, which may be changed from time to time in the sole discretion of Unicode, Inc.
All third party trademarks referenced herein are the property of their respective owners.
Miscellaneous
Jurisdiction and Venue. This website is operated from a location in the State of California, United States of America. Unicode, Inc. makes no representation that the materials are appropriate for use in other locations. If you access this website from other locations, you are responsible for compliance with local laws. This Agreement, all use of this website and any claims and damages resulting from use of this website are governed solely by the laws of the State of California without regard to any principles which would apply the laws of a different jurisdiction. The user agrees that any disputes regarding this website shall be resolved solely in the courts located in Santa Clara County, California. The user agrees said courts have personal jurisdiction and agree to waive any right to transfer the dispute to any other forum.
Modification by Unicode, Inc. Unicode, Inc. shall have the right to modify this Agreement at any time by posting it to this website. The user may not assign any part of this Agreement without Unicode, Inc.s prior written consent.
Taxes. The user agrees to pay any taxes arising from access to this website or use of the information herein, except for those based on Unicodes net income.
Severability. If any provision of this Agreement is declared invalid or unenforceable, the remaining provisions of this Agreement shall remain in effect.
Entire Agreement. This Agreement constitutes the entire agreement between the parties.
```

View File

@ -1,5 +1,5 @@
/*
* Copyright (c) 2015, 2020, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2015, 2022, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@ -23,7 +23,7 @@
/**
* @test
* @bug 8080535 8191410 8215194 8221431 8239383
* @bug 8080535 8191410 8215194 8221431 8239383 8268081
* @summary Expected size of Character.UnicodeBlock.map is not optimal
* @library /test/lib
* @modules java.base/java.lang:open
@ -48,13 +48,14 @@ import jdk.test.lib.util.OptimalCapacity;
// As of Unicode 11, 667 entries are expected.
// As of Unicode 12.1, 676 entries are expected.
// As of Unicode 13.0, 684 entries are expected.
// As of Unicode 14.0, 696 entries are expected.
//
// Initialization of the map and this test will have to be adjusted
// accordingly then.
//
// Note that HashMap's implementation aligns the initial capacity to
// a power of two size, so it will end up 1024 (and thus succeed) in
// cases, such as 638, 667, 676, and 684.
// cases, such as 638, 667, 676, 684, and 696.
public class OptimalMapSize {
public static void main(String[] args) throws Throwable {
@ -63,7 +64,7 @@ public class OptimalMapSize {
Field f = Character.UnicodeBlock.class.getDeclaredField("NUM_ENTITIES");
f.setAccessible(true);
int num_entities = f.getInt(null);
assert num_entities == 684;
assert num_entities == 696;
int initialCapacity = (int)(num_entities / 0.75f + 1.0f);
OptimalCapacity.ofHashMap(Character.UnicodeBlock.class,