8268081: Upgrade Unicode Data Files to 14.0.0
Reviewed-by: joehw, iris, lancea
This commit is contained in:
parent
ddddec7d74
commit
0a094d7c28
@ -1,6 +1,6 @@
|
||||
# Blocks-13.0.0.txt
|
||||
# Date: 2019-07-10, 19:06:00 GMT [KW]
|
||||
# Copyright (c) 2019 Unicode, Inc.
|
||||
# Blocks-14.0.0.txt
|
||||
# Date: 2021-01-22, 23:29:00 GMT [KW]
|
||||
# Copyright (c) 2021 Unicode, Inc.
|
||||
# For terms of use, see http://www.unicode.org/terms_of_use.html
|
||||
#
|
||||
# Unicode Character Database
|
||||
@ -52,6 +52,7 @@
|
||||
0800..083F; Samaritan
|
||||
0840..085F; Mandaic
|
||||
0860..086F; Syriac Supplement
|
||||
0870..089F; Arabic Extended-B
|
||||
08A0..08FF; Arabic Extended-A
|
||||
0900..097F; Devanagari
|
||||
0980..09FF; Bengali
|
||||
@ -215,7 +216,9 @@ FFF0..FFFF; Specials
|
||||
104B0..104FF; Osage
|
||||
10500..1052F; Elbasan
|
||||
10530..1056F; Caucasian Albanian
|
||||
10570..105BF; Vithkuqi
|
||||
10600..1077F; Linear A
|
||||
10780..107BF; Latin Extended-F
|
||||
10800..1083F; Cypriot Syllabary
|
||||
10840..1085F; Imperial Aramaic
|
||||
10860..1087F; Palmyrene
|
||||
@ -240,6 +243,7 @@ FFF0..FFFF; Specials
|
||||
10E80..10EBF; Yezidi
|
||||
10F00..10F2F; Old Sogdian
|
||||
10F30..10F6F; Sogdian
|
||||
10F70..10FAF; Old Uyghur
|
||||
10FB0..10FDF; Chorasmian
|
||||
10FE0..10FFF; Elymaic
|
||||
11000..1107F; Brahmi
|
||||
@ -259,13 +263,14 @@ FFF0..FFFF; Specials
|
||||
11600..1165F; Modi
|
||||
11660..1167F; Mongolian Supplement
|
||||
11680..116CF; Takri
|
||||
11700..1173F; Ahom
|
||||
11700..1174F; Ahom
|
||||
11800..1184F; Dogra
|
||||
118A0..118FF; Warang Citi
|
||||
11900..1195F; Dives Akuru
|
||||
119A0..119FF; Nandinagari
|
||||
11A00..11A4F; Zanabazar Square
|
||||
11A50..11AAF; Soyombo
|
||||
11AB0..11ABF; Unified Canadian Aboriginal Syllabics Extended-A
|
||||
11AC0..11AFF; Pau Cin Hau
|
||||
11C00..11C6F; Bhaiksuki
|
||||
11C70..11CBF; Marchen
|
||||
@ -277,11 +282,13 @@ FFF0..FFFF; Specials
|
||||
12000..123FF; Cuneiform
|
||||
12400..1247F; Cuneiform Numbers and Punctuation
|
||||
12480..1254F; Early Dynastic Cuneiform
|
||||
12F90..12FFF; Cypro-Minoan
|
||||
13000..1342F; Egyptian Hieroglyphs
|
||||
13430..1343F; Egyptian Hieroglyph Format Controls
|
||||
14400..1467F; Anatolian Hieroglyphs
|
||||
16800..16A3F; Bamum Supplement
|
||||
16A40..16A6F; Mro
|
||||
16A70..16ACF; Tangsa
|
||||
16AD0..16AFF; Bassa Vah
|
||||
16B00..16B8F; Pahawh Hmong
|
||||
16E40..16E9F; Medefaidrin
|
||||
@ -290,13 +297,15 @@ FFF0..FFFF; Specials
|
||||
17000..187FF; Tangut
|
||||
18800..18AFF; Tangut Components
|
||||
18B00..18CFF; Khitan Small Script
|
||||
18D00..18D8F; Tangut Supplement
|
||||
18D00..18D7F; Tangut Supplement
|
||||
1AFF0..1AFFF; Kana Extended-B
|
||||
1B000..1B0FF; Kana Supplement
|
||||
1B100..1B12F; Kana Extended-A
|
||||
1B130..1B16F; Small Kana Extension
|
||||
1B170..1B2FF; Nushu
|
||||
1BC00..1BC9F; Duployan
|
||||
1BCA0..1BCAF; Shorthand Format Controls
|
||||
1CF00..1CFCF; Znamenny Musical Notation
|
||||
1D000..1D0FF; Byzantine Musical Symbols
|
||||
1D100..1D1FF; Musical Symbols
|
||||
1D200..1D24F; Ancient Greek Musical Notation
|
||||
@ -305,9 +314,12 @@ FFF0..FFFF; Specials
|
||||
1D360..1D37F; Counting Rod Numerals
|
||||
1D400..1D7FF; Mathematical Alphanumeric Symbols
|
||||
1D800..1DAAF; Sutton SignWriting
|
||||
1DF00..1DFFF; Latin Extended-G
|
||||
1E000..1E02F; Glagolitic Supplement
|
||||
1E100..1E14F; Nyiakeng Puachue Hmong
|
||||
1E290..1E2BF; Toto
|
||||
1E2C0..1E2FF; Wancho
|
||||
1E7E0..1E7FF; Ethiopic Extended-B
|
||||
1E800..1E8DF; Mende Kikakui
|
||||
1E900..1E95F; Adlam
|
||||
1EC70..1ECBF; Indic Siyaq Numbers
|
||||
|
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
@ -1,6 +1,6 @@
|
||||
# PropList-13.0.0.txt
|
||||
# Date: 2019-11-27, 03:13:28 GMT
|
||||
# Copyright (c) 2019 Unicode, Inc.
|
||||
# PropList-14.0.0.txt
|
||||
# Date: 2021-08-12, 23:13:05 GMT
|
||||
# Copyright (c) 2021 Unicode, Inc.
|
||||
# Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries.
|
||||
# For terms of use, see http://www.unicode.org/terms_of_use.html
|
||||
#
|
||||
@ -54,6 +54,7 @@
|
||||
2E1A ; Dash # Pd HYPHEN WITH DIAERESIS
|
||||
2E3A..2E3B ; Dash # Pd [2] TWO-EM DASH..THREE-EM DASH
|
||||
2E40 ; Dash # Pd DOUBLE HYPHEN
|
||||
2E5D ; Dash # Pd OBLIQUE HYPHEN
|
||||
301C ; Dash # Pd WAVE DASH
|
||||
3030 ; Dash # Pd WAVY DASH
|
||||
30A0 ; Dash # Pd KATAKANA-HIRAGANA DOUBLE HYPHEN
|
||||
@ -63,7 +64,7 @@ FE63 ; Dash # Pd SMALL HYPHEN-MINUS
|
||||
FF0D ; Dash # Pd FULLWIDTH HYPHEN-MINUS
|
||||
10EAD ; Dash # Pd YEZIDI HYPHENATION MARK
|
||||
|
||||
# Total code points: 29
|
||||
# Total code points: 30
|
||||
|
||||
# ================================================
|
||||
|
||||
@ -126,7 +127,7 @@ FF63 ; Quotation_Mark # Pe HALFWIDTH RIGHT CORNER BRACKET
|
||||
05C3 ; Terminal_Punctuation # Po HEBREW PUNCTUATION SOF PASUQ
|
||||
060C ; Terminal_Punctuation # Po ARABIC COMMA
|
||||
061B ; Terminal_Punctuation # Po ARABIC SEMICOLON
|
||||
061E..061F ; Terminal_Punctuation # Po [2] ARABIC TRIPLE DOT PUNCTUATION MARK..ARABIC QUESTION MARK
|
||||
061D..061F ; Terminal_Punctuation # Po [3] ARABIC END OF TEXT MARK..ARABIC QUESTION MARK
|
||||
06D4 ; Terminal_Punctuation # Po ARABIC FULL STOP
|
||||
0700..070A ; Terminal_Punctuation # Po [11] SYRIAC END OF PARAGRAPH..SYRIAC CONTRACTION
|
||||
070C ; Terminal_Punctuation # Po SYRIAC HARKLEAN METOBELUS
|
||||
@ -150,6 +151,7 @@ FF63 ; Quotation_Mark # Pe HALFWIDTH RIGHT CORNER BRACKET
|
||||
1AA8..1AAB ; Terminal_Punctuation # Po [4] TAI THAM SIGN KAAN..TAI THAM SIGN SATKAANKUU
|
||||
1B5A..1B5B ; Terminal_Punctuation # Po [2] BALINESE PANTI..BALINESE PAMADA
|
||||
1B5D..1B5F ; Terminal_Punctuation # Po [3] BALINESE CARIK PAMUNGKAH..BALINESE CARIK PAREREN
|
||||
1B7D..1B7E ; Terminal_Punctuation # Po [2] BALINESE PANTI LANTANG..BALINESE PAMADA LANTANG
|
||||
1C3B..1C3F ; Terminal_Punctuation # Po [5] LEPCHA PUNCTUATION TA-ROL..LEPCHA PUNCTUATION TSHOOK
|
||||
1C7E..1C7F ; Terminal_Punctuation # Po [2] OL CHIKI PUNCTUATION MUCAAD..OL CHIKI PUNCTUATION DOUBLE MUCAAD
|
||||
203C..203D ; Terminal_Punctuation # Po [2] DOUBLE EXCLAMATION MARK..INTERROBANG
|
||||
@ -159,6 +161,7 @@ FF63 ; Quotation_Mark # Pe HALFWIDTH RIGHT CORNER BRACKET
|
||||
2E41 ; Terminal_Punctuation # Po REVERSED COMMA
|
||||
2E4C ; Terminal_Punctuation # Po MEDIEVAL COMMA
|
||||
2E4E..2E4F ; Terminal_Punctuation # Po [2] PUNCTUS ELEVATUS MARK..CORNISH VERSE DIVIDER
|
||||
2E53..2E54 ; Terminal_Punctuation # Po [2] MEDIEVAL EXCLAMATION MARK..MEDIEVAL QUESTION MARK
|
||||
3001..3002 ; Terminal_Punctuation # Po [2] IDEOGRAPHIC COMMA..IDEOGRAPHIC FULL STOP
|
||||
A4FE..A4FF ; Terminal_Punctuation # Po [2] LISU PUNCTUATION COMMA..LISU PUNCTUATION FULL STOP
|
||||
A60D..A60F ; Terminal_Punctuation # Po [3] VAI COMMA..VAI QUESTION MARK
|
||||
@ -189,6 +192,7 @@ FF64 ; Terminal_Punctuation # Po HALFWIDTH IDEOGRAPHIC COMMA
|
||||
10B3A..10B3F ; Terminal_Punctuation # Po [6] TINY TWO DOTS OVER ONE DOT PUNCTUATION..LARGE ONE RING OVER TWO RINGS PUNCTUATION
|
||||
10B99..10B9C ; Terminal_Punctuation # Po [4] PSALTER PAHLAVI SECTION MARK..PSALTER PAHLAVI FOUR DOTS WITH DOT
|
||||
10F55..10F59 ; Terminal_Punctuation # Po [5] SOGDIAN PUNCTUATION TWO VERTICAL BARS..SOGDIAN PUNCTUATION HALF CIRCLE WITH DOT
|
||||
10F86..10F89 ; Terminal_Punctuation # Po [4] OLD UYGHUR PUNCTUATION BAR..OLD UYGHUR PUNCTUATION FOUR DOTS
|
||||
11047..1104D ; Terminal_Punctuation # Po [7] BRAHMI DANDA..BRAHMI PUNCTUATION LOTUS
|
||||
110BE..110C1 ; Terminal_Punctuation # Po [4] KAITHI SECTION MARK..KAITHI DOUBLE DANDA
|
||||
11141..11143 ; Terminal_Punctuation # Po [3] CHAKMA DANDA..CHAKMA QUESTION MARK
|
||||
@ -220,7 +224,7 @@ FF64 ; Terminal_Punctuation # Po HALFWIDTH IDEOGRAPHIC COMMA
|
||||
1BC9F ; Terminal_Punctuation # Po DUPLOYAN PUNCTUATION CHINOOK FULL STOP
|
||||
1DA87..1DA8A ; Terminal_Punctuation # Po [4] SIGNWRITING COMMA..SIGNWRITING COLON
|
||||
|
||||
# Total code points: 267
|
||||
# Total code points: 276
|
||||
|
||||
# ================================================
|
||||
|
||||
@ -600,6 +604,7 @@ FF41..FF46 ; Hex_Digit # L& [6] FULLWIDTH LATIN SMALL LETTER A..FULLWIDTH L
|
||||
1A6D..1A72 ; Other_Alphabetic # Mc [6] TAI THAM VOWEL SIGN OY..TAI THAM VOWEL SIGN THAM AI
|
||||
1A73..1A74 ; Other_Alphabetic # Mn [2] TAI THAM VOWEL SIGN OA ABOVE..TAI THAM SIGN MAI KANG
|
||||
1ABF..1AC0 ; Other_Alphabetic # Mn [2] COMBINING LATIN SMALL LETTER W BELOW..COMBINING LATIN SMALL LETTER TURNED W BELOW
|
||||
1ACC..1ACE ; Other_Alphabetic # Mn [3] COMBINING LATIN SMALL LETTER INSULAR G..COMBINING LATIN SMALL LETTER INSULAR T
|
||||
1B00..1B03 ; Other_Alphabetic # Mn [4] BALINESE SIGN ULU RICEM..BALINESE SIGN SURANG
|
||||
1B04 ; Other_Alphabetic # Mc BALINESE SIGN BISAH
|
||||
1B35 ; Other_Alphabetic # Mc BALINESE VOWEL SIGN TEDUNG
|
||||
@ -686,10 +691,12 @@ FB1E ; Other_Alphabetic # Mn HEBREW POINT JUDEO-SPANISH VARIKA
|
||||
11001 ; Other_Alphabetic # Mn BRAHMI SIGN ANUSVARA
|
||||
11002 ; Other_Alphabetic # Mc BRAHMI SIGN VISARGA
|
||||
11038..11045 ; Other_Alphabetic # Mn [14] BRAHMI VOWEL SIGN AA..BRAHMI VOWEL SIGN AU
|
||||
11073..11074 ; Other_Alphabetic # Mn [2] BRAHMI VOWEL SIGN OLD TAMIL SHORT E..BRAHMI VOWEL SIGN OLD TAMIL SHORT O
|
||||
11082 ; Other_Alphabetic # Mc KAITHI SIGN VISARGA
|
||||
110B0..110B2 ; Other_Alphabetic # Mc [3] KAITHI VOWEL SIGN AA..KAITHI VOWEL SIGN II
|
||||
110B3..110B6 ; Other_Alphabetic # Mn [4] KAITHI VOWEL SIGN U..KAITHI VOWEL SIGN AI
|
||||
110B7..110B8 ; Other_Alphabetic # Mc [2] KAITHI VOWEL SIGN O..KAITHI VOWEL SIGN AU
|
||||
110C2 ; Other_Alphabetic # Mn KAITHI VOWEL SIGN VOCALIC R
|
||||
11100..11102 ; Other_Alphabetic # Mn [3] CHAKMA SIGN CANDRABINDU..CHAKMA SIGN VISARGA
|
||||
11127..1112B ; Other_Alphabetic # Mn [5] CHAKMA VOWEL SIGN A..CHAKMA VOWEL SIGN UU
|
||||
1112C ; Other_Alphabetic # Mc CHAKMA VOWEL SIGN E
|
||||
@ -815,7 +822,7 @@ FB1E ; Other_Alphabetic # Mn HEBREW POINT JUDEO-SPANISH VARIKA
|
||||
1F150..1F169 ; Other_Alphabetic # So [26] NEGATIVE CIRCLED LATIN CAPITAL LETTER A..NEGATIVE CIRCLED LATIN CAPITAL LETTER Z
|
||||
1F170..1F189 ; Other_Alphabetic # So [26] NEGATIVE SQUARED LATIN CAPITAL LETTER A..NEGATIVE SQUARED LATIN CAPITAL LETTER Z
|
||||
|
||||
# Total code points: 1398
|
||||
# Total code points: 1404
|
||||
|
||||
# ================================================
|
||||
|
||||
@ -824,7 +831,7 @@ FB1E ; Other_Alphabetic # Mn HEBREW POINT JUDEO-SPANISH VARIKA
|
||||
3021..3029 ; Ideographic # Nl [9] HANGZHOU NUMERAL ONE..HANGZHOU NUMERAL NINE
|
||||
3038..303A ; Ideographic # Nl [3] HANGZHOU NUMERAL TEN..HANGZHOU NUMERAL THIRTY
|
||||
3400..4DBF ; Ideographic # Lo [6592] CJK UNIFIED IDEOGRAPH-3400..CJK UNIFIED IDEOGRAPH-4DBF
|
||||
4E00..9FFC ; Ideographic # Lo [20989] CJK UNIFIED IDEOGRAPH-4E00..CJK UNIFIED IDEOGRAPH-9FFC
|
||||
4E00..9FFF ; Ideographic # Lo [20992] CJK UNIFIED IDEOGRAPH-4E00..CJK UNIFIED IDEOGRAPH-9FFF
|
||||
F900..FA6D ; Ideographic # Lo [366] CJK COMPATIBILITY IDEOGRAPH-F900..CJK COMPATIBILITY IDEOGRAPH-FA6D
|
||||
FA70..FAD9 ; Ideographic # Lo [106] CJK COMPATIBILITY IDEOGRAPH-FA70..CJK COMPATIBILITY IDEOGRAPH-FAD9
|
||||
16FE4 ; Ideographic # Mn KHITAN SMALL SCRIPT FILLER
|
||||
@ -832,15 +839,15 @@ FA70..FAD9 ; Ideographic # Lo [106] CJK COMPATIBILITY IDEOGRAPH-FA70..CJK COM
|
||||
18800..18CD5 ; Ideographic # Lo [1238] TANGUT COMPONENT-001..KHITAN SMALL SCRIPT CHARACTER-18CD5
|
||||
18D00..18D08 ; Ideographic # Lo [9] TANGUT IDEOGRAPH-18D00..TANGUT IDEOGRAPH-18D08
|
||||
1B170..1B2FB ; Ideographic # Lo [396] NUSHU CHARACTER-1B170..NUSHU CHARACTER-1B2FB
|
||||
20000..2A6DD ; Ideographic # Lo [42718] CJK UNIFIED IDEOGRAPH-20000..CJK UNIFIED IDEOGRAPH-2A6DD
|
||||
2A700..2B734 ; Ideographic # Lo [4149] CJK UNIFIED IDEOGRAPH-2A700..CJK UNIFIED IDEOGRAPH-2B734
|
||||
20000..2A6DF ; Ideographic # Lo [42720] CJK UNIFIED IDEOGRAPH-20000..CJK UNIFIED IDEOGRAPH-2A6DF
|
||||
2A700..2B738 ; Ideographic # Lo [4153] CJK UNIFIED IDEOGRAPH-2A700..CJK UNIFIED IDEOGRAPH-2B738
|
||||
2B740..2B81D ; Ideographic # Lo [222] CJK UNIFIED IDEOGRAPH-2B740..CJK UNIFIED IDEOGRAPH-2B81D
|
||||
2B820..2CEA1 ; Ideographic # Lo [5762] CJK UNIFIED IDEOGRAPH-2B820..CJK UNIFIED IDEOGRAPH-2CEA1
|
||||
2CEB0..2EBE0 ; Ideographic # Lo [7473] CJK UNIFIED IDEOGRAPH-2CEB0..CJK UNIFIED IDEOGRAPH-2EBE0
|
||||
2F800..2FA1D ; Ideographic # Lo [542] CJK COMPATIBILITY IDEOGRAPH-2F800..CJK COMPATIBILITY IDEOGRAPH-2FA1D
|
||||
30000..3134A ; Ideographic # Lo [4939] CJK UNIFIED IDEOGRAPH-30000..CJK UNIFIED IDEOGRAPH-3134A
|
||||
|
||||
# Total code points: 101652
|
||||
# Total code points: 101661
|
||||
|
||||
# ================================================
|
||||
|
||||
@ -885,6 +892,9 @@ FA70..FAD9 ; Ideographic # Lo [106] CJK COMPATIBILITY IDEOGRAPH-FA70..CJK COM
|
||||
07EB..07F3 ; Diacritic # Mn [9] NKO COMBINING SHORT HIGH TONE..NKO COMBINING DOUBLE DOT ABOVE
|
||||
07F4..07F5 ; Diacritic # Lm [2] NKO HIGH TONE APOSTROPHE..NKO LOW TONE APOSTROPHE
|
||||
0818..0819 ; Diacritic # Mn [2] SAMARITAN MARK OCCLUSION..SAMARITAN MARK DAGESH
|
||||
0898..089F ; Diacritic # Mn [8] ARABIC SMALL HIGH WORD AL-JUZ..ARABIC HALF MADDA OVER MADDA
|
||||
08C9 ; Diacritic # Lm ARABIC SMALL FARSI YEH
|
||||
08CA..08D2 ; Diacritic # Mn [9] ARABIC SMALL HIGH FARSI YEH..ARABIC LARGE ROUND DOT INSIDE CIRCLE BELOW
|
||||
08E3..08FE ; Diacritic # Mn [28] ARABIC TURNED DAMMA BELOW..ARABIC DAMMA WITH DOT
|
||||
093C ; Diacritic # Mn DEVANAGARI SIGN NUKTA
|
||||
094D ; Diacritic # Mn DEVANAGARI SIGN VIRAMA
|
||||
@ -901,6 +911,7 @@ FA70..FAD9 ; Ideographic # Lo [106] CJK COMPATIBILITY IDEOGRAPH-FA70..CJK COM
|
||||
0B4D ; Diacritic # Mn ORIYA SIGN VIRAMA
|
||||
0B55 ; Diacritic # Mn ORIYA SIGN OVERLINE
|
||||
0BCD ; Diacritic # Mn TAMIL SIGN VIRAMA
|
||||
0C3C ; Diacritic # Mn TELUGU SIGN NUKTA
|
||||
0C4D ; Diacritic # Mn TELUGU SIGN VIRAMA
|
||||
0CBC ; Diacritic # Mn KANNADA SIGN NUKTA
|
||||
0CCD ; Diacritic # Mn KANNADA SIGN VIRAMA
|
||||
@ -928,12 +939,16 @@ FA70..FAD9 ; Ideographic # Lo [106] CJK COMPATIBILITY IDEOGRAPH-FA70..CJK COM
|
||||
108F ; Diacritic # Mc MYANMAR SIGN RUMAI PALAUNG TONE-5
|
||||
109A..109B ; Diacritic # Mc [2] MYANMAR SIGN KHAMTI TONE-1..MYANMAR SIGN KHAMTI TONE-3
|
||||
135D..135F ; Diacritic # Mn [3] ETHIOPIC COMBINING GEMINATION AND VOWEL LENGTH MARK..ETHIOPIC COMBINING GEMINATION MARK
|
||||
1714 ; Diacritic # Mn TAGALOG SIGN VIRAMA
|
||||
1715 ; Diacritic # Mc TAGALOG SIGN PAMUDPOD
|
||||
17C9..17D3 ; Diacritic # Mn [11] KHMER SIGN MUUSIKATOAN..KHMER SIGN BATHAMASAT
|
||||
17DD ; Diacritic # Mn KHMER SIGN ATTHACAN
|
||||
1939..193B ; Diacritic # Mn [3] LIMBU SIGN MUKPHRENG..LIMBU SIGN SA-I
|
||||
1A75..1A7C ; Diacritic # Mn [8] TAI THAM SIGN TONE-1..TAI THAM SIGN KHUEN-LUE KARAN
|
||||
1A7F ; Diacritic # Mn TAI THAM COMBINING CRYPTOGRAMMIC DOT
|
||||
1AB0..1ABD ; Diacritic # Mn [14] COMBINING DOUBLED CIRCUMFLEX ACCENT..COMBINING PARENTHESES BELOW
|
||||
1ABE ; Diacritic # Me COMBINING PARENTHESES OVERLAY
|
||||
1AC1..1ACB ; Diacritic # Mn [11] COMBINING LEFT PARENTHESIS ABOVE LEFT..COMBINING TRIPLE ACUTE ACCENT
|
||||
1B34 ; Diacritic # Mn BALINESE SIGN REREKAN
|
||||
1B44 ; Diacritic # Mc BALINESE ADEG ADEG
|
||||
1B6B..1B73 ; Diacritic # Mn [9] BALINESE MUSICAL SYMBOL COMBINING TEGEH..BALINESE MUSICAL SYMBOL COMBINING GONG
|
||||
@ -952,8 +967,7 @@ FA70..FAD9 ; Ideographic # Lo [106] CJK COMPATIBILITY IDEOGRAPH-FA70..CJK COM
|
||||
1CF8..1CF9 ; Diacritic # Mn [2] VEDIC TONE RING ABOVE..VEDIC TONE DOUBLE RING ABOVE
|
||||
1D2C..1D6A ; Diacritic # Lm [63] MODIFIER LETTER CAPITAL A..GREEK SUBSCRIPT SMALL LETTER CHI
|
||||
1DC4..1DCF ; Diacritic # Mn [12] COMBINING MACRON-ACUTE..COMBINING ZIGZAG BELOW
|
||||
1DF5..1DF9 ; Diacritic # Mn [5] COMBINING UP TACK ABOVE..COMBINING WIDE INVERTED BRIDGE BELOW
|
||||
1DFD..1DFF ; Diacritic # Mn [3] COMBINING ALMOST EQUAL TO BELOW..COMBINING RIGHT ARROWHEAD AND DOWN ARROWHEAD BELOW
|
||||
1DF5..1DFF ; Diacritic # Mn [11] COMBINING UP TACK ABOVE..COMBINING RIGHT ARROWHEAD AND DOWN ARROWHEAD BELOW
|
||||
1FBD ; Diacritic # Sk GREEK KORONIS
|
||||
1FBF..1FC1 ; Diacritic # Sk [3] GREEK PSILI..GREEK DIALYTIKA AND PERISPOMENI
|
||||
1FCD..1FCF ; Diacritic # Sk [3] GREEK PSILI AND VARIA..GREEK PSILI AND PERISPOMENI
|
||||
@ -1008,10 +1022,16 @@ FF70 ; Diacritic # Lm HALFWIDTH KATAKANA-HIRAGANA PROLONGED SOUND
|
||||
FF9E..FF9F ; Diacritic # Lm [2] HALFWIDTH KATAKANA VOICED SOUND MARK..HALFWIDTH KATAKANA SEMI-VOICED SOUND MARK
|
||||
FFE3 ; Diacritic # Sk FULLWIDTH MACRON
|
||||
102E0 ; Diacritic # Mn COPTIC EPACT THOUSANDS MARK
|
||||
10780..10785 ; Diacritic # Lm [6] MODIFIER LETTER SMALL CAPITAL AA..MODIFIER LETTER SMALL B WITH HOOK
|
||||
10787..107B0 ; Diacritic # Lm [42] MODIFIER LETTER SMALL DZ DIGRAPH..MODIFIER LETTER SMALL V WITH RIGHT HOOK
|
||||
107B2..107BA ; Diacritic # Lm [9] MODIFIER LETTER SMALL CAPITAL Y..MODIFIER LETTER SMALL S WITH CURL
|
||||
10AE5..10AE6 ; Diacritic # Mn [2] MANICHAEAN ABBREVIATION MARK ABOVE..MANICHAEAN ABBREVIATION MARK BELOW
|
||||
10D22..10D23 ; Diacritic # Lo [2] HANIFI ROHINGYA MARK SAKIN..HANIFI ROHINGYA MARK NA KHONNA
|
||||
10D24..10D27 ; Diacritic # Mn [4] HANIFI ROHINGYA SIGN HARBAHAY..HANIFI ROHINGYA SIGN TASSI
|
||||
10F46..10F50 ; Diacritic # Mn [11] SOGDIAN COMBINING DOT BELOW..SOGDIAN COMBINING STROKE BELOW
|
||||
10F82..10F85 ; Diacritic # Mn [4] OLD UYGHUR COMBINING DOT ABOVE..OLD UYGHUR COMBINING TWO DOTS BELOW
|
||||
11046 ; Diacritic # Mn BRAHMI VIRAMA
|
||||
11070 ; Diacritic # Mn BRAHMI SIGN OLD TAMIL VIRAMA
|
||||
110B9..110BA ; Diacritic # Mn [2] KAITHI SIGN VIRAMA..KAITHI SIGN NUKTA
|
||||
11133..11134 ; Diacritic # Mn [2] CHAKMA VIRAMA..CHAKMA MAAYYAA
|
||||
11173 ; Diacritic # Mn MAHAJANI SIGN NUKTA
|
||||
@ -1049,18 +1069,24 @@ FFE3 ; Diacritic # Sk FULLWIDTH MACRON
|
||||
16F8F..16F92 ; Diacritic # Mn [4] MIAO TONE RIGHT..MIAO TONE BELOW
|
||||
16F93..16F9F ; Diacritic # Lm [13] MIAO LETTER TONE-2..MIAO LETTER REFORMED TONE-8
|
||||
16FF0..16FF1 ; Diacritic # Mc [2] VIETNAMESE ALTERNATE READING MARK CA..VIETNAMESE ALTERNATE READING MARK NHAY
|
||||
1AFF0..1AFF3 ; Diacritic # Lm [4] KATAKANA LETTER MINNAN TONE-2..KATAKANA LETTER MINNAN TONE-5
|
||||
1AFF5..1AFFB ; Diacritic # Lm [7] KATAKANA LETTER MINNAN TONE-7..KATAKANA LETTER MINNAN NASALIZED TONE-5
|
||||
1AFFD..1AFFE ; Diacritic # Lm [2] KATAKANA LETTER MINNAN NASALIZED TONE-7..KATAKANA LETTER MINNAN NASALIZED TONE-8
|
||||
1CF00..1CF2D ; Diacritic # Mn [46] ZNAMENNY COMBINING MARK GORAZDO NIZKO S KRYZHEM ON LEFT..ZNAMENNY COMBINING MARK KRYZH ON LEFT
|
||||
1CF30..1CF46 ; Diacritic # Mn [23] ZNAMENNY COMBINING TONAL RANGE MARK MRACHNO..ZNAMENNY PRIZNAK MODIFIER ROG
|
||||
1D167..1D169 ; Diacritic # Mn [3] MUSICAL SYMBOL COMBINING TREMOLO-1..MUSICAL SYMBOL COMBINING TREMOLO-3
|
||||
1D16D..1D172 ; Diacritic # Mc [6] MUSICAL SYMBOL COMBINING AUGMENTATION DOT..MUSICAL SYMBOL COMBINING FLAG-5
|
||||
1D17B..1D182 ; Diacritic # Mn [8] MUSICAL SYMBOL COMBINING ACCENT..MUSICAL SYMBOL COMBINING LOURE
|
||||
1D185..1D18B ; Diacritic # Mn [7] MUSICAL SYMBOL COMBINING DOIT..MUSICAL SYMBOL COMBINING TRIPLE TONGUE
|
||||
1D1AA..1D1AD ; Diacritic # Mn [4] MUSICAL SYMBOL COMBINING DOWN BOW..MUSICAL SYMBOL COMBINING SNAP PIZZICATO
|
||||
1E130..1E136 ; Diacritic # Mn [7] NYIAKENG PUACHUE HMONG TONE-B..NYIAKENG PUACHUE HMONG TONE-D
|
||||
1E2AE ; Diacritic # Mn TOTO SIGN RISING TONE
|
||||
1E2EC..1E2EF ; Diacritic # Mn [4] WANCHO TONE TUP..WANCHO TONE KOINI
|
||||
1E8D0..1E8D6 ; Diacritic # Mn [7] MENDE KIKAKUI COMBINING NUMBER TEENS..MENDE KIKAKUI COMBINING NUMBER MILLIONS
|
||||
1E944..1E946 ; Diacritic # Mn [3] ADLAM ALIF LENGTHENER..ADLAM GEMINATION MARK
|
||||
1E948..1E94A ; Diacritic # Mn [3] ADLAM CONSONANT MODIFIER..ADLAM NUKTA
|
||||
|
||||
# Total code points: 882
|
||||
# Total code points: 1064
|
||||
|
||||
# ================================================
|
||||
|
||||
@ -1088,6 +1114,7 @@ AA70 ; Extender # Lm MYANMAR MODIFIER LETTER KHAMTI REDUPLICATION
|
||||
AADD ; Extender # Lm TAI VIET SYMBOL SAM
|
||||
AAF3..AAF4 ; Extender # Lm [2] MEETEI MAYEK SYLLABLE REPETITION MARK..MEETEI MAYEK WORD REPETITION MARK
|
||||
FF70 ; Extender # Lm HALFWIDTH KATAKANA-HIRAGANA PROLONGED SOUND MARK
|
||||
10781..10782 ; Extender # Lm [2] MODIFIER LETTER SUPERSCRIPT TRIANGULAR COLON..MODIFIER LETTER SUPERSCRIPT HALF TRIANGULAR COLON
|
||||
1135D ; Extender # Lo GRANTHA SIGN PLUTA
|
||||
115C6..115C8 ; Extender # Po [3] SIDDHAM REPETITION MARK-1..SIDDHAM REPETITION MARK-3
|
||||
11A98 ; Extender # Mn SOYOMBO GEMINATION MARK
|
||||
@ -1097,7 +1124,7 @@ FF70 ; Extender # Lm HALFWIDTH KATAKANA-HIRAGANA PROLONGED SOUND
|
||||
1E13C..1E13D ; Extender # Lm [2] NYIAKENG PUACHUE HMONG SIGN XW XW..NYIAKENG PUACHUE HMONG SYLLABLE LENGTHENER
|
||||
1E944..1E946 ; Extender # Mn [3] ADLAM ALIF LENGTHENER..ADLAM GEMINATION MARK
|
||||
|
||||
# Total code points: 48
|
||||
# Total code points: 50
|
||||
|
||||
# ================================================
|
||||
|
||||
@ -1121,8 +1148,12 @@ A69C..A69D ; Other_Lowercase # Lm [2] MODIFIER LETTER CYRILLIC HARD SIGN..M
|
||||
A770 ; Other_Lowercase # Lm MODIFIER LETTER US
|
||||
A7F8..A7F9 ; Other_Lowercase # Lm [2] MODIFIER LETTER CAPITAL H WITH STROKE..MODIFIER LETTER SMALL LIGATURE OE
|
||||
AB5C..AB5F ; Other_Lowercase # Lm [4] MODIFIER LETTER SMALL HENG..MODIFIER LETTER SMALL U WITH LEFT HOOK
|
||||
10780 ; Other_Lowercase # Lm MODIFIER LETTER SMALL CAPITAL AA
|
||||
10783..10785 ; Other_Lowercase # Lm [3] MODIFIER LETTER SMALL AE..MODIFIER LETTER SMALL B WITH HOOK
|
||||
10787..107B0 ; Other_Lowercase # Lm [42] MODIFIER LETTER SMALL DZ DIGRAPH..MODIFIER LETTER SMALL V WITH RIGHT HOOK
|
||||
107B2..107BA ; Other_Lowercase # Lm [9] MODIFIER LETTER SMALL CAPITAL Y..MODIFIER LETTER SMALL S WITH CURL
|
||||
|
||||
# Total code points: 189
|
||||
# Total code points: 244
|
||||
|
||||
# ================================================
|
||||
|
||||
@ -1211,7 +1242,7 @@ E0020..E007F ; Other_Grapheme_Extend # Cf [96] TAG SPACE..CANCEL TAG
|
||||
# ================================================
|
||||
|
||||
3400..4DBF ; Unified_Ideograph # Lo [6592] CJK UNIFIED IDEOGRAPH-3400..CJK UNIFIED IDEOGRAPH-4DBF
|
||||
4E00..9FFC ; Unified_Ideograph # Lo [20989] CJK UNIFIED IDEOGRAPH-4E00..CJK UNIFIED IDEOGRAPH-9FFC
|
||||
4E00..9FFF ; Unified_Ideograph # Lo [20992] CJK UNIFIED IDEOGRAPH-4E00..CJK UNIFIED IDEOGRAPH-9FFF
|
||||
FA0E..FA0F ; Unified_Ideograph # Lo [2] CJK COMPATIBILITY IDEOGRAPH-FA0E..CJK COMPATIBILITY IDEOGRAPH-FA0F
|
||||
FA11 ; Unified_Ideograph # Lo CJK COMPATIBILITY IDEOGRAPH-FA11
|
||||
FA13..FA14 ; Unified_Ideograph # Lo [2] CJK COMPATIBILITY IDEOGRAPH-FA13..CJK COMPATIBILITY IDEOGRAPH-FA14
|
||||
@ -1219,14 +1250,14 @@ FA1F ; Unified_Ideograph # Lo CJK COMPATIBILITY IDEOGRAPH-FA1F
|
||||
FA21 ; Unified_Ideograph # Lo CJK COMPATIBILITY IDEOGRAPH-FA21
|
||||
FA23..FA24 ; Unified_Ideograph # Lo [2] CJK COMPATIBILITY IDEOGRAPH-FA23..CJK COMPATIBILITY IDEOGRAPH-FA24
|
||||
FA27..FA29 ; Unified_Ideograph # Lo [3] CJK COMPATIBILITY IDEOGRAPH-FA27..CJK COMPATIBILITY IDEOGRAPH-FA29
|
||||
20000..2A6DD ; Unified_Ideograph # Lo [42718] CJK UNIFIED IDEOGRAPH-20000..CJK UNIFIED IDEOGRAPH-2A6DD
|
||||
2A700..2B734 ; Unified_Ideograph # Lo [4149] CJK UNIFIED IDEOGRAPH-2A700..CJK UNIFIED IDEOGRAPH-2B734
|
||||
20000..2A6DF ; Unified_Ideograph # Lo [42720] CJK UNIFIED IDEOGRAPH-20000..CJK UNIFIED IDEOGRAPH-2A6DF
|
||||
2A700..2B738 ; Unified_Ideograph # Lo [4153] CJK UNIFIED IDEOGRAPH-2A700..CJK UNIFIED IDEOGRAPH-2B738
|
||||
2B740..2B81D ; Unified_Ideograph # Lo [222] CJK UNIFIED IDEOGRAPH-2B740..CJK UNIFIED IDEOGRAPH-2B81D
|
||||
2B820..2CEA1 ; Unified_Ideograph # Lo [5762] CJK UNIFIED IDEOGRAPH-2B820..CJK UNIFIED IDEOGRAPH-2CEA1
|
||||
2CEB0..2EBE0 ; Unified_Ideograph # Lo [7473] CJK UNIFIED IDEOGRAPH-2CEB0..CJK UNIFIED IDEOGRAPH-2EBE0
|
||||
30000..3134A ; Unified_Ideograph # Lo [4939] CJK UNIFIED IDEOGRAPH-30000..CJK UNIFIED IDEOGRAPH-3134A
|
||||
|
||||
# Total code points: 92856
|
||||
# Total code points: 92865
|
||||
|
||||
# ================================================
|
||||
|
||||
@ -1291,8 +1322,9 @@ E0001 ; Deprecated # Cf LANGUAGE TAG
|
||||
1D62A..1D62B ; Soft_Dotted # L& [2] MATHEMATICAL SANS-SERIF ITALIC SMALL I..MATHEMATICAL SANS-SERIF ITALIC SMALL J
|
||||
1D65E..1D65F ; Soft_Dotted # L& [2] MATHEMATICAL SANS-SERIF BOLD ITALIC SMALL I..MATHEMATICAL SANS-SERIF BOLD ITALIC SMALL J
|
||||
1D692..1D693 ; Soft_Dotted # L& [2] MATHEMATICAL MONOSPACE SMALL I..MATHEMATICAL MONOSPACE SMALL J
|
||||
1DF1A ; Soft_Dotted # L& LATIN SMALL LETTER I WITH STROKE AND RETROFLEX HOOK
|
||||
|
||||
# Total code points: 46
|
||||
# Total code points: 47
|
||||
|
||||
# ================================================
|
||||
|
||||
@ -1330,7 +1362,7 @@ AABB..AABC ; Logical_Order_Exception # Lo [2] TAI VIET VOWEL AUE..TAI VIET
|
||||
002E ; Sentence_Terminal # Po FULL STOP
|
||||
003F ; Sentence_Terminal # Po QUESTION MARK
|
||||
0589 ; Sentence_Terminal # Po ARMENIAN FULL STOP
|
||||
061E..061F ; Sentence_Terminal # Po [2] ARABIC TRIPLE DOT PUNCTUATION MARK..ARABIC QUESTION MARK
|
||||
061D..061F ; Sentence_Terminal # Po [3] ARABIC END OF TEXT MARK..ARABIC QUESTION MARK
|
||||
06D4 ; Sentence_Terminal # Po ARABIC FULL STOP
|
||||
0700..0702 ; Sentence_Terminal # Po [3] SYRIAC END OF PARAGRAPH..SYRIAC SUBLINEAR FULL STOP
|
||||
07F9 ; Sentence_Terminal # Po NKO EXCLAMATION MARK
|
||||
@ -1349,12 +1381,14 @@ AABB..AABC ; Logical_Order_Exception # Lo [2] TAI VIET VOWEL AUE..TAI VIET
|
||||
1AA8..1AAB ; Sentence_Terminal # Po [4] TAI THAM SIGN KAAN..TAI THAM SIGN SATKAANKUU
|
||||
1B5A..1B5B ; Sentence_Terminal # Po [2] BALINESE PANTI..BALINESE PAMADA
|
||||
1B5E..1B5F ; Sentence_Terminal # Po [2] BALINESE CARIK SIKI..BALINESE CARIK PAREREN
|
||||
1B7D..1B7E ; Sentence_Terminal # Po [2] BALINESE PANTI LANTANG..BALINESE PAMADA LANTANG
|
||||
1C3B..1C3C ; Sentence_Terminal # Po [2] LEPCHA PUNCTUATION TA-ROL..LEPCHA PUNCTUATION NYET THYOOM TA-ROL
|
||||
1C7E..1C7F ; Sentence_Terminal # Po [2] OL CHIKI PUNCTUATION MUCAAD..OL CHIKI PUNCTUATION DOUBLE MUCAAD
|
||||
203C..203D ; Sentence_Terminal # Po [2] DOUBLE EXCLAMATION MARK..INTERROBANG
|
||||
2047..2049 ; Sentence_Terminal # Po [3] DOUBLE QUESTION MARK..EXCLAMATION QUESTION MARK
|
||||
2E2E ; Sentence_Terminal # Po REVERSED QUESTION MARK
|
||||
2E3C ; Sentence_Terminal # Po STENOGRAPHIC FULL STOP
|
||||
2E53..2E54 ; Sentence_Terminal # Po [2] MEDIEVAL EXCLAMATION MARK..MEDIEVAL QUESTION MARK
|
||||
3002 ; Sentence_Terminal # Po IDEOGRAPHIC FULL STOP
|
||||
A4FF ; Sentence_Terminal # Po LISU PUNCTUATION FULL STOP
|
||||
A60E..A60F ; Sentence_Terminal # Po [2] VAI FULL STOP..VAI QUESTION MARK
|
||||
@ -1375,6 +1409,7 @@ FF1F ; Sentence_Terminal # Po FULLWIDTH QUESTION MARK
|
||||
FF61 ; Sentence_Terminal # Po HALFWIDTH IDEOGRAPHIC FULL STOP
|
||||
10A56..10A57 ; Sentence_Terminal # Po [2] KHAROSHTHI PUNCTUATION DANDA..KHAROSHTHI PUNCTUATION DOUBLE DANDA
|
||||
10F55..10F59 ; Sentence_Terminal # Po [5] SOGDIAN PUNCTUATION TWO VERTICAL BARS..SOGDIAN PUNCTUATION HALF CIRCLE WITH DOT
|
||||
10F86..10F89 ; Sentence_Terminal # Po [4] OLD UYGHUR PUNCTUATION BAR..OLD UYGHUR PUNCTUATION FOUR DOTS
|
||||
11047..11048 ; Sentence_Terminal # Po [2] BRAHMI DANDA..BRAHMI DOUBLE DANDA
|
||||
110BE..110C1 ; Sentence_Terminal # Po [4] KAITHI SECTION MARK..KAITHI DOUBLE DANDA
|
||||
11141..11143 ; Sentence_Terminal # Po [3] CHAKMA DANDA..CHAKMA QUESTION MARK
|
||||
@ -1403,15 +1438,16 @@ FF61 ; Sentence_Terminal # Po HALFWIDTH IDEOGRAPHIC FULL STOP
|
||||
1BC9F ; Sentence_Terminal # Po DUPLOYAN PUNCTUATION CHINOOK FULL STOP
|
||||
1DA88 ; Sentence_Terminal # Po SIGNWRITING FULL STOP
|
||||
|
||||
# Total code points: 143
|
||||
# Total code points: 152
|
||||
|
||||
# ================================================
|
||||
|
||||
180B..180D ; Variation_Selector # Mn [3] MONGOLIAN FREE VARIATION SELECTOR ONE..MONGOLIAN FREE VARIATION SELECTOR THREE
|
||||
180F ; Variation_Selector # Mn MONGOLIAN FREE VARIATION SELECTOR FOUR
|
||||
FE00..FE0F ; Variation_Selector # Mn [16] VARIATION SELECTOR-1..VARIATION SELECTOR-16
|
||||
E0100..E01EF ; Variation_Selector # Mn [240] VARIATION SELECTOR-17..VARIATION SELECTOR-256
|
||||
|
||||
# Total code points: 259
|
||||
# Total code points: 260
|
||||
|
||||
# ================================================
|
||||
|
||||
@ -1644,8 +1680,17 @@ E0100..E01EF ; Variation_Selector # Mn [240] VARIATION SELECTOR-17..VARIATION S
|
||||
2E42 ; Pattern_Syntax # Ps DOUBLE LOW-REVERSED-9 QUOTATION MARK
|
||||
2E43..2E4F ; Pattern_Syntax # Po [13] DASH WITH LEFT UPTURN..CORNISH VERSE DIVIDER
|
||||
2E50..2E51 ; Pattern_Syntax # So [2] CROSS PATTY WITH RIGHT CROSSBAR..CROSS PATTY WITH LEFT CROSSBAR
|
||||
2E52 ; Pattern_Syntax # Po TIRONIAN SIGN CAPITAL ET
|
||||
2E53..2E7F ; Pattern_Syntax # Cn [45] <reserved-2E53>..<reserved-2E7F>
|
||||
2E52..2E54 ; Pattern_Syntax # Po [3] TIRONIAN SIGN CAPITAL ET..MEDIEVAL QUESTION MARK
|
||||
2E55 ; Pattern_Syntax # Ps LEFT SQUARE BRACKET WITH STROKE
|
||||
2E56 ; Pattern_Syntax # Pe RIGHT SQUARE BRACKET WITH STROKE
|
||||
2E57 ; Pattern_Syntax # Ps LEFT SQUARE BRACKET WITH DOUBLE STROKE
|
||||
2E58 ; Pattern_Syntax # Pe RIGHT SQUARE BRACKET WITH DOUBLE STROKE
|
||||
2E59 ; Pattern_Syntax # Ps TOP HALF LEFT PARENTHESIS
|
||||
2E5A ; Pattern_Syntax # Pe TOP HALF RIGHT PARENTHESIS
|
||||
2E5B ; Pattern_Syntax # Ps BOTTOM HALF LEFT PARENTHESIS
|
||||
2E5C ; Pattern_Syntax # Pe BOTTOM HALF RIGHT PARENTHESIS
|
||||
2E5D ; Pattern_Syntax # Pd OBLIQUE HYPHEN
|
||||
2E5E..2E7F ; Pattern_Syntax # Cn [34] <reserved-2E5E>..<reserved-2E7F>
|
||||
3001..3003 ; Pattern_Syntax # Po [3] IDEOGRAPHIC COMMA..DITTO MARK
|
||||
3008 ; Pattern_Syntax # Ps LEFT ANGLE BRACKET
|
||||
3009 ; Pattern_Syntax # Pe RIGHT ANGLE BRACKET
|
||||
@ -1682,11 +1727,12 @@ FE45..FE46 ; Pattern_Syntax # Po [2] SESAME DOT..WHITE SESAME DOT
|
||||
0600..0605 ; Prepended_Concatenation_Mark # Cf [6] ARABIC NUMBER SIGN..ARABIC NUMBER MARK ABOVE
|
||||
06DD ; Prepended_Concatenation_Mark # Cf ARABIC END OF AYAH
|
||||
070F ; Prepended_Concatenation_Mark # Cf SYRIAC ABBREVIATION MARK
|
||||
0890..0891 ; Prepended_Concatenation_Mark # Cf [2] ARABIC POUND MARK ABOVE..ARABIC PIASTRE MARK ABOVE
|
||||
08E2 ; Prepended_Concatenation_Mark # Cf ARABIC DISPUTED END OF AYAH
|
||||
110BD ; Prepended_Concatenation_Mark # Cf KAITHI NUMBER SIGN
|
||||
110CD ; Prepended_Concatenation_Mark # Cf KAITHI NUMBER SIGN ABOVE
|
||||
|
||||
# Total code points: 11
|
||||
# Total code points: 13
|
||||
|
||||
# ================================================
|
||||
|
||||
|
@ -1,6 +1,6 @@
|
||||
# PropertyValueAliases-13.0.0.txt
|
||||
# Date: 2019-11-13, 21:52:10 GMT
|
||||
# Copyright (c) 2019 Unicode, Inc.
|
||||
# PropertyValueAliases-14.0.0.txt
|
||||
# Date: 2021-05-10, 21:08:53 GMT
|
||||
# Copyright (c) 2021 Unicode, Inc.
|
||||
# Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries.
|
||||
# For terms of use, see http://www.unicode.org/terms_of_use.html
|
||||
#
|
||||
@ -89,6 +89,7 @@ age; 11.0 ; V11_0
|
||||
age; 12.0 ; V12_0
|
||||
age; 12.1 ; V12_1
|
||||
age; 13.0 ; V13_0
|
||||
age; 14.0 ; V14_0
|
||||
age; NA ; Unassigned
|
||||
|
||||
# Alphabetic (Alpha)
|
||||
@ -160,6 +161,7 @@ blk; Ancient_Greek_Numbers ; Ancient_Greek_Numbers
|
||||
blk; Ancient_Symbols ; Ancient_Symbols
|
||||
blk; Arabic ; Arabic
|
||||
blk; Arabic_Ext_A ; Arabic_Extended_A
|
||||
blk; Arabic_Ext_B ; Arabic_Extended_B
|
||||
blk; Arabic_Math ; Arabic_Mathematical_Alphabetic_Symbols
|
||||
blk; Arabic_PF_A ; Arabic_Presentation_Forms_A ; Arabic_Presentation_Forms-A
|
||||
blk; Arabic_PF_B ; Arabic_Presentation_Forms_B
|
||||
@ -216,6 +218,7 @@ blk; Cuneiform ; Cuneiform
|
||||
blk; Cuneiform_Numbers ; Cuneiform_Numbers_And_Punctuation
|
||||
blk; Currency_Symbols ; Currency_Symbols
|
||||
blk; Cypriot_Syllabary ; Cypriot_Syllabary
|
||||
blk; Cypro_Minoan ; Cypro_Minoan
|
||||
blk; Cyrillic ; Cyrillic
|
||||
blk; Cyrillic_Ext_A ; Cyrillic_Extended_A
|
||||
blk; Cyrillic_Ext_B ; Cyrillic_Extended_B
|
||||
@ -246,6 +249,7 @@ blk; Enclosed_Ideographic_Sup ; Enclosed_Ideographic_Supplement
|
||||
blk; Ethiopic ; Ethiopic
|
||||
blk; Ethiopic_Ext ; Ethiopic_Extended
|
||||
blk; Ethiopic_Ext_A ; Ethiopic_Extended_A
|
||||
blk; Ethiopic_Ext_B ; Ethiopic_Extended_B
|
||||
blk; Ethiopic_Sup ; Ethiopic_Supplement
|
||||
blk; Geometric_Shapes ; Geometric_Shapes
|
||||
blk; Geometric_Shapes_Ext ; Geometric_Shapes_Extended
|
||||
@ -285,6 +289,7 @@ blk; Jamo_Ext_B ; Hangul_Jamo_Extended_B
|
||||
blk; Javanese ; Javanese
|
||||
blk; Kaithi ; Kaithi
|
||||
blk; Kana_Ext_A ; Kana_Extended_A
|
||||
blk; Kana_Ext_B ; Kana_Extended_B
|
||||
blk; Kana_Sup ; Kana_Supplement
|
||||
blk; Kanbun ; Kanbun
|
||||
blk; Kangxi ; Kangxi_Radicals
|
||||
@ -306,6 +311,8 @@ blk; Latin_Ext_B ; Latin_Extended_B
|
||||
blk; Latin_Ext_C ; Latin_Extended_C
|
||||
blk; Latin_Ext_D ; Latin_Extended_D
|
||||
blk; Latin_Ext_E ; Latin_Extended_E
|
||||
blk; Latin_Ext_F ; Latin_Extended_F
|
||||
blk; Latin_Ext_G ; Latin_Extended_G
|
||||
blk; Lepcha ; Lepcha
|
||||
blk; Letterlike_Symbols ; Letterlike_Symbols
|
||||
blk; Limbu ; Limbu
|
||||
@ -372,6 +379,7 @@ blk; Old_Persian ; Old_Persian
|
||||
blk; Old_Sogdian ; Old_Sogdian
|
||||
blk; Old_South_Arabian ; Old_South_Arabian
|
||||
blk; Old_Turkic ; Old_Turkic
|
||||
blk; Old_Uyghur ; Old_Uyghur
|
||||
blk; Oriya ; Oriya
|
||||
blk; Ornamental_Dingbats ; Ornamental_Dingbats
|
||||
blk; Osage ; Osage
|
||||
@ -433,6 +441,7 @@ blk; Tai_Xuan_Jing ; Tai_Xuan_Jing_Symbols
|
||||
blk; Takri ; Takri
|
||||
blk; Tamil ; Tamil
|
||||
blk; Tamil_Sup ; Tamil_Supplement
|
||||
blk; Tangsa ; Tangsa
|
||||
blk; Tangut ; Tangut
|
||||
blk; Tangut_Components ; Tangut_Components
|
||||
blk; Tangut_Sup ; Tangut_Supplement
|
||||
@ -442,13 +451,16 @@ blk; Thai ; Thai
|
||||
blk; Tibetan ; Tibetan
|
||||
blk; Tifinagh ; Tifinagh
|
||||
blk; Tirhuta ; Tirhuta
|
||||
blk; Toto ; Toto
|
||||
blk; Transport_And_Map ; Transport_And_Map_Symbols
|
||||
blk; UCAS ; Unified_Canadian_Aboriginal_Syllabics; Canadian_Syllabics
|
||||
blk; UCAS_Ext ; Unified_Canadian_Aboriginal_Syllabics_Extended
|
||||
blk; UCAS_Ext_A ; Unified_Canadian_Aboriginal_Syllabics_Extended_A
|
||||
blk; Ugaritic ; Ugaritic
|
||||
blk; Vai ; Vai
|
||||
blk; Vedic_Ext ; Vedic_Extensions
|
||||
blk; Vertical_Forms ; Vertical_Forms
|
||||
blk; Vithkuqi ; Vithkuqi
|
||||
blk; VS ; Variation_Selectors
|
||||
blk; VS_Sup ; Variation_Selectors_Supplement
|
||||
blk; Wancho ; Wancho
|
||||
@ -458,6 +470,7 @@ blk; Yi_Radicals ; Yi_Radicals
|
||||
blk; Yi_Syllables ; Yi_Syllables
|
||||
blk; Yijing ; Yijing_Hexagram_Symbols
|
||||
blk; Zanabazar_Square ; Zanabazar_Square
|
||||
blk; Znamenny_Music ; Znamenny_Musical_Notation
|
||||
|
||||
# Canonical_Combining_Class (ccc)
|
||||
|
||||
@ -1032,6 +1045,8 @@ jg ; Taw ; Taw
|
||||
jg ; Teh_Marbuta ; Teh_Marbuta
|
||||
jg ; Teh_Marbuta_Goal ; Hamza_On_Heh_Goal
|
||||
jg ; Teth ; Teth
|
||||
jg ; Thin_Yeh ; Thin_Yeh
|
||||
jg ; Vertical_Tail ; Vertical_Tail
|
||||
jg ; Waw ; Waw
|
||||
jg ; Yeh ; Yeh
|
||||
jg ; Yeh_Barree ; Yeh_Barree
|
||||
@ -1262,6 +1277,7 @@ sc ; Cham ; Cham
|
||||
sc ; Cher ; Cherokee
|
||||
sc ; Chrs ; Chorasmian
|
||||
sc ; Copt ; Coptic ; Qaac
|
||||
sc ; Cpmn ; Cypro_Minoan
|
||||
sc ; Cprt ; Cypriot
|
||||
sc ; Cyrl ; Cyrillic
|
||||
sc ; Deva ; Devanagari
|
||||
@ -1341,6 +1357,7 @@ sc ; Orkh ; Old_Turkic
|
||||
sc ; Orya ; Oriya
|
||||
sc ; Osge ; Osage
|
||||
sc ; Osma ; Osmanya
|
||||
sc ; Ougr ; Old_Uyghur
|
||||
sc ; Palm ; Palmyrene
|
||||
sc ; Pauc ; Pau_Cin_Hau
|
||||
sc ; Perm ; Old_Permic
|
||||
@ -1383,8 +1400,11 @@ sc ; Thaa ; Thaana
|
||||
sc ; Thai ; Thai
|
||||
sc ; Tibt ; Tibetan
|
||||
sc ; Tirh ; Tirhuta
|
||||
sc ; Tnsa ; Tangsa
|
||||
sc ; Toto ; Toto
|
||||
sc ; Ugar ; Ugaritic
|
||||
sc ; Vaii ; Vai
|
||||
sc ; Vith ; Vithkuqi
|
||||
sc ; Wara ; Warang_Citi
|
||||
sc ; Wcho ; Wancho
|
||||
sc ; Xpeo ; Old_Persian
|
||||
|
16
make/data/unicodedata/ReadMe.txt
Normal file
16
make/data/unicodedata/ReadMe.txt
Normal file
@ -0,0 +1,16 @@
|
||||
# Unicode Character Database
|
||||
# Date: 2021-09-10, 17:22:00 GMT [KW]
|
||||
# Copyright (c) 2021 Unicode, Inc.
|
||||
# Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries.
|
||||
# For terms of use, see https://www.unicode.org/terms_of_use.html
|
||||
#
|
||||
# For documentation, see the following:
|
||||
# NamesList.html
|
||||
# UAX #38, "Unicode Han Database (Unihan)"
|
||||
# UAX #44, "Unicode Character Database"
|
||||
# UTS #51, "Unicode Emoji"
|
||||
#
|
||||
# The UAXes and UTS #51 can be accessed at https://www.unicode.org/versions/Unicode14.0.0/
|
||||
|
||||
This directory contains the final data files
|
||||
for the Unicode Character Database, for Version 14.0.0 of the Unicode Standard.
|
@ -1,16 +1,16 @@
|
||||
# Scripts-13.0.0.txt
|
||||
# Date: 2020-01-22, 00:07:43 GMT
|
||||
# Copyright (c) 2020 Unicode, Inc.
|
||||
# Scripts-14.0.0.txt
|
||||
# Date: 2021-07-10, 00:35:31 GMT
|
||||
# Copyright (c) 2021 Unicode, Inc.
|
||||
# Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries.
|
||||
# For terms of use, see http://www.unicode.org/terms_of_use.html
|
||||
#
|
||||
# Unicode Character Database
|
||||
# For documentation, see http://www.unicode.org/reports/tr44/
|
||||
# For more information, see:
|
||||
# UAX #24, Unicode Script Property: http://www.unicode.org/reports/tr24/
|
||||
# UAX #24, Unicode Script Property: https://www.unicode.org/reports/tr24/
|
||||
# Especially the sections:
|
||||
# http://www.unicode.org/reports/tr24/#Assignment_Script_Values
|
||||
# http://www.unicode.org/reports/tr24/#Assignment_ScriptX_Values
|
||||
# https://www.unicode.org/reports/tr24/#Assignment_Script_Values
|
||||
# https://www.unicode.org/reports/tr24/#Assignment_ScriptX_Values
|
||||
#
|
||||
|
||||
# ================================================
|
||||
@ -154,7 +154,7 @@
|
||||
208A..208C ; Common # Sm [3] SUBSCRIPT PLUS SIGN..SUBSCRIPT EQUALS SIGN
|
||||
208D ; Common # Ps SUBSCRIPT LEFT PARENTHESIS
|
||||
208E ; Common # Pe SUBSCRIPT RIGHT PARENTHESIS
|
||||
20A0..20BF ; Common # Sc [32] EURO-CURRENCY SIGN..BITCOIN SIGN
|
||||
20A0..20C0 ; Common # Sc [33] EURO-CURRENCY SIGN..SOM SIGN
|
||||
2100..2101 ; Common # So [2] ACCOUNT OF..ADDRESSED TO THE SUBJECT
|
||||
2102 ; Common # L& DOUBLE-STRUCK CAPITAL C
|
||||
2103..2106 ; Common # So [4] DEGREE CELSIUS..CADA UNA
|
||||
@ -347,7 +347,16 @@
|
||||
2E42 ; Common # Ps DOUBLE LOW-REVERSED-9 QUOTATION MARK
|
||||
2E43..2E4F ; Common # Po [13] DASH WITH LEFT UPTURN..CORNISH VERSE DIVIDER
|
||||
2E50..2E51 ; Common # So [2] CROSS PATTY WITH RIGHT CROSSBAR..CROSS PATTY WITH LEFT CROSSBAR
|
||||
2E52 ; Common # Po TIRONIAN SIGN CAPITAL ET
|
||||
2E52..2E54 ; Common # Po [3] TIRONIAN SIGN CAPITAL ET..MEDIEVAL QUESTION MARK
|
||||
2E55 ; Common # Ps LEFT SQUARE BRACKET WITH STROKE
|
||||
2E56 ; Common # Pe RIGHT SQUARE BRACKET WITH STROKE
|
||||
2E57 ; Common # Ps LEFT SQUARE BRACKET WITH DOUBLE STROKE
|
||||
2E58 ; Common # Pe RIGHT SQUARE BRACKET WITH DOUBLE STROKE
|
||||
2E59 ; Common # Ps TOP HALF LEFT PARENTHESIS
|
||||
2E5A ; Common # Pe TOP HALF RIGHT PARENTHESIS
|
||||
2E5B ; Common # Ps BOTTOM HALF LEFT PARENTHESIS
|
||||
2E5C ; Common # Pe BOTTOM HALF RIGHT PARENTHESIS
|
||||
2E5D ; Common # Pd OBLIQUE HYPHEN
|
||||
2FF0..2FFB ; Common # So [12] IDEOGRAPHIC DESCRIPTION CHARACTER LEFT TO RIGHT..IDEOGRAPHIC DESCRIPTION CHARACTER OVERLAID
|
||||
3000 ; Common # Zs IDEOGRAPHIC SPACE
|
||||
3001..3003 ; Common # Po [3] IDEOGRAPHIC COMMA..DITTO MARK
|
||||
@ -511,9 +520,8 @@ FFFC..FFFD ; Common # So [2] OBJECT REPLACEMENT CHARACTER..REPLACEMENT CHAR
|
||||
10190..1019C ; Common # So [13] ROMAN SEXTANS SIGN..ASCIA SYMBOL
|
||||
101D0..101FC ; Common # So [45] PHAISTOS DISC SIGN PEDESTRIAN..PHAISTOS DISC SIGN WAVY BAND
|
||||
102E1..102FB ; Common # No [27] COPTIC EPACT DIGIT ONE..COPTIC EPACT NUMBER NINE HUNDRED
|
||||
16FE2 ; Common # Po OLD CHINESE HOOK MARK
|
||||
16FE3 ; Common # Lm OLD CHINESE ITERATION MARK
|
||||
1BCA0..1BCA3 ; Common # Cf [4] SHORTHAND FORMAT LETTER OVERLAP..SHORTHAND FORMAT UP STEP
|
||||
1CF50..1CFC3 ; Common # So [116] ZNAMENNY NEUME KRYUK..ZNAMENNY NEUME PAUK
|
||||
1D000..1D0F5 ; Common # So [246] BYZANTINE MUSICAL SYMBOL PSILI..BYZANTINE MUSICAL SYMBOL GORGON NEO KATO
|
||||
1D100..1D126 ; Common # So [39] MUSICAL SYMBOL SINGLE BARLINE..MUSICAL SYMBOL DRUM CLEF-2
|
||||
1D129..1D164 ; Common # So [60] MUSICAL SYMBOL MULTIPLE MEASURE REST..MUSICAL SYMBOL ONE HUNDRED TWENTY-EIGHTH NOTE
|
||||
@ -523,7 +531,7 @@ FFFC..FFFD ; Common # So [2] OBJECT REPLACEMENT CHARACTER..REPLACEMENT CHAR
|
||||
1D173..1D17A ; Common # Cf [8] MUSICAL SYMBOL BEGIN BEAM..MUSICAL SYMBOL END PHRASE
|
||||
1D183..1D184 ; Common # So [2] MUSICAL SYMBOL ARPEGGIATO UP..MUSICAL SYMBOL ARPEGGIATO DOWN
|
||||
1D18C..1D1A9 ; Common # So [30] MUSICAL SYMBOL RINFORZANDO..MUSICAL SYMBOL DEGREE SLASH
|
||||
1D1AE..1D1E8 ; Common # So [59] MUSICAL SYMBOL PEDAL MARK..MUSICAL SYMBOL KIEVAN FLAT SIGN
|
||||
1D1AE..1D1EA ; Common # So [61] MUSICAL SYMBOL PEDAL MARK..MUSICAL SYMBOL KORON
|
||||
1D2E0..1D2F3 ; Common # No [20] MAYAN NUMERAL ZERO..MAYAN NUMERAL NINETEEN
|
||||
1D300..1D356 ; Common # So [87] MONOGRAM FOR EARTH..TETRAGRAM FOR FOSTERING
|
||||
1D360..1D378 ; Common # No [25] COUNTING ROD UNIT DIGIT ONE..TALLY MARK FIVE
|
||||
@ -593,35 +601,36 @@ FFFC..FFFD ; Common # So [2] OBJECT REPLACEMENT CHARACTER..REPLACEMENT CHAR
|
||||
1F300..1F3FA ; Common # So [251] CYCLONE..AMPHORA
|
||||
1F3FB..1F3FF ; Common # Sk [5] EMOJI MODIFIER FITZPATRICK TYPE-1-2..EMOJI MODIFIER FITZPATRICK TYPE-6
|
||||
1F400..1F6D7 ; Common # So [728] RAT..ELEVATOR
|
||||
1F6E0..1F6EC ; Common # So [13] HAMMER AND WRENCH..AIRPLANE ARRIVING
|
||||
1F6DD..1F6EC ; Common # So [16] PLAYGROUND SLIDE..AIRPLANE ARRIVING
|
||||
1F6F0..1F6FC ; Common # So [13] SATELLITE..ROLLER SKATE
|
||||
1F700..1F773 ; Common # So [116] ALCHEMICAL SYMBOL FOR QUINTESSENCE..ALCHEMICAL SYMBOL FOR HALF OUNCE
|
||||
1F780..1F7D8 ; Common # So [89] BLACK LEFT-POINTING ISOSCELES RIGHT TRIANGLE..NEGATIVE CIRCLED SQUARE
|
||||
1F7E0..1F7EB ; Common # So [12] LARGE ORANGE CIRCLE..LARGE BROWN SQUARE
|
||||
1F7F0 ; Common # So HEAVY EQUALS SIGN
|
||||
1F800..1F80B ; Common # So [12] LEFTWARDS ARROW WITH SMALL TRIANGLE ARROWHEAD..DOWNWARDS ARROW WITH LARGE TRIANGLE ARROWHEAD
|
||||
1F810..1F847 ; Common # So [56] LEFTWARDS ARROW WITH SMALL EQUILATERAL ARROWHEAD..DOWNWARDS HEAVY ARROW
|
||||
1F850..1F859 ; Common # So [10] LEFTWARDS SANS-SERIF ARROW..UP DOWN SANS-SERIF ARROW
|
||||
1F860..1F887 ; Common # So [40] WIDE-HEADED LEFTWARDS LIGHT BARB ARROW..WIDE-HEADED SOUTH WEST VERY HEAVY BARB ARROW
|
||||
1F890..1F8AD ; Common # So [30] LEFTWARDS TRIANGLE ARROWHEAD..WHITE ARROW SHAFT WIDTH TWO THIRDS
|
||||
1F8B0..1F8B1 ; Common # So [2] ARROW POINTING UPWARDS THEN NORTH WEST..ARROW POINTING RIGHTWARDS THEN CURVING SOUTH WEST
|
||||
1F900..1F978 ; Common # So [121] CIRCLED CROSS FORMEE WITH FOUR DOTS..DISGUISED FACE
|
||||
1F97A..1F9CB ; Common # So [82] FACE WITH PLEADING EYES..BUBBLE TEA
|
||||
1F9CD..1FA53 ; Common # So [135] STANDING PERSON..BLACK CHESS KNIGHT-BISHOP
|
||||
1F900..1FA53 ; Common # So [340] CIRCLED CROSS FORMEE WITH FOUR DOTS..BLACK CHESS KNIGHT-BISHOP
|
||||
1FA60..1FA6D ; Common # So [14] XIANGQI RED GENERAL..XIANGQI BLACK SOLDIER
|
||||
1FA70..1FA74 ; Common # So [5] BALLET SHOES..THONG SANDAL
|
||||
1FA78..1FA7A ; Common # So [3] DROP OF BLOOD..STETHOSCOPE
|
||||
1FA78..1FA7C ; Common # So [5] DROP OF BLOOD..CRUTCH
|
||||
1FA80..1FA86 ; Common # So [7] YO-YO..NESTING DOLLS
|
||||
1FA90..1FAA8 ; Common # So [25] RINGED PLANET..ROCK
|
||||
1FAB0..1FAB6 ; Common # So [7] FLY..FEATHER
|
||||
1FAC0..1FAC2 ; Common # So [3] ANATOMICAL HEART..PEOPLE HUGGING
|
||||
1FAD0..1FAD6 ; Common # So [7] BLUEBERRIES..TEAPOT
|
||||
1FA90..1FAAC ; Common # So [29] RINGED PLANET..HAMSA
|
||||
1FAB0..1FABA ; Common # So [11] FLY..NEST WITH EGGS
|
||||
1FAC0..1FAC5 ; Common # So [6] ANATOMICAL HEART..PERSON WITH CROWN
|
||||
1FAD0..1FAD9 ; Common # So [10] BLUEBERRIES..JAR
|
||||
1FAE0..1FAE7 ; Common # So [8] MELTING FACE..BUBBLES
|
||||
1FAF0..1FAF6 ; Common # So [7] HAND WITH INDEX FINGER AND THUMB CROSSED..HEART HANDS
|
||||
1FB00..1FB92 ; Common # So [147] BLOCK SEXTANT-1..UPPER HALF INVERSE MEDIUM SHADE AND LOWER HALF BLOCK
|
||||
1FB94..1FBCA ; Common # So [55] LEFT HALF INVERSE MEDIUM SHADE AND RIGHT HALF BLOCK..WHITE UP-POINTING CHEVRON
|
||||
1FBF0..1FBF9 ; Common # Nd [10] SEGMENTED DIGIT ZERO..SEGMENTED DIGIT NINE
|
||||
E0001 ; Common # Cf LANGUAGE TAG
|
||||
E0020..E007F ; Common # Cf [96] TAG SPACE..CANCEL TAG
|
||||
|
||||
# Total code points: 8087
|
||||
# Total code points: 8252
|
||||
|
||||
# ================================================
|
||||
|
||||
@ -664,8 +673,11 @@ A770 ; Latin # Lm MODIFIER LETTER US
|
||||
A771..A787 ; Latin # L& [23] LATIN SMALL LETTER DUM..LATIN SMALL LETTER INSULAR T
|
||||
A78B..A78E ; Latin # L& [4] LATIN CAPITAL LETTER SALTILLO..LATIN SMALL LETTER L WITH RETROFLEX HOOK AND BELT
|
||||
A78F ; Latin # Lo LATIN LETTER SINOLOGICAL DOT
|
||||
A790..A7BF ; Latin # L& [48] LATIN CAPITAL LETTER N WITH DESCENDER..LATIN SMALL LETTER GLOTTAL U
|
||||
A7C2..A7CA ; Latin # L& [9] LATIN CAPITAL LETTER ANGLICANA W..LATIN SMALL LETTER S WITH SHORT STROKE OVERLAY
|
||||
A790..A7CA ; Latin # L& [59] LATIN CAPITAL LETTER N WITH DESCENDER..LATIN SMALL LETTER S WITH SHORT STROKE OVERLAY
|
||||
A7D0..A7D1 ; Latin # L& [2] LATIN CAPITAL LETTER CLOSED INSULAR G..LATIN SMALL LETTER CLOSED INSULAR G
|
||||
A7D3 ; Latin # L& LATIN SMALL LETTER DOUBLE THORN
|
||||
A7D5..A7D9 ; Latin # L& [5] LATIN SMALL LETTER DOUBLE WYNN..LATIN SMALL LETTER SIGMOID S
|
||||
A7F2..A7F4 ; Latin # Lm [3] MODIFIER LETTER CAPITAL C..MODIFIER LETTER CAPITAL Q
|
||||
A7F5..A7F6 ; Latin # L& [2] LATIN CAPITAL LETTER REVERSED HALF H..LATIN SMALL LETTER REVERSED HALF H
|
||||
A7F7 ; Latin # Lo LATIN EPIGRAPHIC LETTER SIDEWAYS I
|
||||
A7F8..A7F9 ; Latin # Lm [2] MODIFIER LETTER CAPITAL H WITH STROKE..MODIFIER LETTER SMALL LIGATURE OE
|
||||
@ -679,8 +691,14 @@ AB69 ; Latin # Lm MODIFIER LETTER SMALL TURNED W
|
||||
FB00..FB06 ; Latin # L& [7] LATIN SMALL LIGATURE FF..LATIN SMALL LIGATURE ST
|
||||
FF21..FF3A ; Latin # L& [26] FULLWIDTH LATIN CAPITAL LETTER A..FULLWIDTH LATIN CAPITAL LETTER Z
|
||||
FF41..FF5A ; Latin # L& [26] FULLWIDTH LATIN SMALL LETTER A..FULLWIDTH LATIN SMALL LETTER Z
|
||||
10780..10785 ; Latin # Lm [6] MODIFIER LETTER SMALL CAPITAL AA..MODIFIER LETTER SMALL B WITH HOOK
|
||||
10787..107B0 ; Latin # Lm [42] MODIFIER LETTER SMALL DZ DIGRAPH..MODIFIER LETTER SMALL V WITH RIGHT HOOK
|
||||
107B2..107BA ; Latin # Lm [9] MODIFIER LETTER SMALL CAPITAL Y..MODIFIER LETTER SMALL S WITH CURL
|
||||
1DF00..1DF09 ; Latin # L& [10] LATIN SMALL LETTER FENG DIGRAPH WITH TRILL..LATIN SMALL LETTER T WITH HOOK AND RETROFLEX HOOK
|
||||
1DF0A ; Latin # Lo LATIN LETTER RETROFLEX CLICK WITH RETROFLEX HOOK
|
||||
1DF0B..1DF1E ; Latin # L& [20] LATIN SMALL LETTER ESH WITH DOUBLE BAR..LATIN SMALL LETTER S WITH CURL
|
||||
|
||||
# Total code points: 1374
|
||||
# Total code points: 1475
|
||||
|
||||
# ================================================
|
||||
|
||||
@ -820,7 +838,7 @@ FB46..FB4F ; Hebrew # Lo [10] HEBREW LETTER TSADI WITH DAGESH..HEBREW LIGATU
|
||||
060E..060F ; Arabic # So [2] ARABIC POETIC VERSE SIGN..ARABIC SIGN MISRA
|
||||
0610..061A ; Arabic # Mn [11] ARABIC SIGN SALLALLAHOU ALAYHE WASSALLAM..ARABIC SMALL KASRA
|
||||
061C ; Arabic # Cf ARABIC LETTER MARK
|
||||
061E ; Arabic # Po ARABIC TRIPLE DOT PUNCTUATION MARK
|
||||
061D..061E ; Arabic # Po [2] ARABIC END OF TEXT MARK..ARABIC TRIPLE DOT PUNCTUATION MARK
|
||||
0620..063F ; Arabic # Lo [32] ARABIC LETTER KASHMIRI YEH..ARABIC LETTER FARSI YEH WITH THREE DOTS ABOVE
|
||||
0641..064A ; Arabic # Lo [10] ARABIC LETTER FEH..ARABIC LETTER YEH
|
||||
0656..065F ; Arabic # Mn [10] ARABIC SUBSCRIPT ALEF..ARABIC WAVY HAMZA BELOW
|
||||
@ -843,18 +861,25 @@ FB46..FB4F ; Hebrew # Lo [10] HEBREW LETTER TSADI WITH DAGESH..HEBREW LIGATU
|
||||
06FD..06FE ; Arabic # So [2] ARABIC SIGN SINDHI AMPERSAND..ARABIC SIGN SINDHI POSTPOSITION MEN
|
||||
06FF ; Arabic # Lo ARABIC LETTER HEH WITH INVERTED V
|
||||
0750..077F ; Arabic # Lo [48] ARABIC LETTER BEH WITH THREE DOTS HORIZONTALLY BELOW..ARABIC LETTER KAF WITH TWO DOTS ABOVE
|
||||
08A0..08B4 ; Arabic # Lo [21] ARABIC LETTER BEH WITH SMALL V BELOW..ARABIC LETTER KAF WITH DOT BELOW
|
||||
08B6..08C7 ; Arabic # Lo [18] ARABIC LETTER BEH WITH SMALL MEEM ABOVE..ARABIC LETTER LAM WITH SMALL ARABIC LETTER TAH ABOVE
|
||||
08D3..08E1 ; Arabic # Mn [15] ARABIC SMALL LOW WAW..ARABIC SMALL HIGH SIGN SAFHA
|
||||
0870..0887 ; Arabic # Lo [24] ARABIC LETTER ALEF WITH ATTACHED FATHA..ARABIC BASELINE ROUND DOT
|
||||
0888 ; Arabic # Sk ARABIC RAISED ROUND DOT
|
||||
0889..088E ; Arabic # Lo [6] ARABIC LETTER NOON WITH INVERTED SMALL V..ARABIC VERTICAL TAIL
|
||||
0890..0891 ; Arabic # Cf [2] ARABIC POUND MARK ABOVE..ARABIC PIASTRE MARK ABOVE
|
||||
0898..089F ; Arabic # Mn [8] ARABIC SMALL HIGH WORD AL-JUZ..ARABIC HALF MADDA OVER MADDA
|
||||
08A0..08C8 ; Arabic # Lo [41] ARABIC LETTER BEH WITH SMALL V BELOW..ARABIC LETTER GRAF
|
||||
08C9 ; Arabic # Lm ARABIC SMALL FARSI YEH
|
||||
08CA..08E1 ; Arabic # Mn [24] ARABIC SMALL HIGH FARSI YEH..ARABIC SMALL HIGH SIGN SAFHA
|
||||
08E3..08FF ; Arabic # Mn [29] ARABIC TURNED DAMMA BELOW..ARABIC MARK SIDEWAYS NOON GHUNNA
|
||||
FB50..FBB1 ; Arabic # Lo [98] ARABIC LETTER ALEF WASLA ISOLATED FORM..ARABIC LETTER YEH BARREE WITH HAMZA ABOVE FINAL FORM
|
||||
FBB2..FBC1 ; Arabic # Sk [16] ARABIC SYMBOL DOT ABOVE..ARABIC SYMBOL SMALL TAH BELOW
|
||||
FBB2..FBC2 ; Arabic # Sk [17] ARABIC SYMBOL DOT ABOVE..ARABIC SYMBOL WASLA ABOVE
|
||||
FBD3..FD3D ; Arabic # Lo [363] ARABIC LETTER NG ISOLATED FORM..ARABIC LIGATURE ALEF WITH FATHATAN ISOLATED FORM
|
||||
FD40..FD4F ; Arabic # So [16] ARABIC LIGATURE RAHIMAHU ALLAAH..ARABIC LIGATURE RAHIMAHUM ALLAAH
|
||||
FD50..FD8F ; Arabic # Lo [64] ARABIC LIGATURE TEH WITH JEEM WITH MEEM INITIAL FORM..ARABIC LIGATURE MEEM WITH KHAH WITH MEEM INITIAL FORM
|
||||
FD92..FDC7 ; Arabic # Lo [54] ARABIC LIGATURE MEEM WITH JEEM WITH KHAH INITIAL FORM..ARABIC LIGATURE NOON WITH JEEM WITH YEH FINAL FORM
|
||||
FDCF ; Arabic # So ARABIC LIGATURE SALAAMUHU ALAYNAA
|
||||
FDF0..FDFB ; Arabic # Lo [12] ARABIC LIGATURE SALLA USED AS KORANIC STOP SIGN ISOLATED FORM..ARABIC LIGATURE JALLAJALALOUHOU
|
||||
FDFC ; Arabic # Sc RIAL SIGN
|
||||
FDFD ; Arabic # So ARABIC LIGATURE BISMILLAH AR-RAHMAN AR-RAHEEM
|
||||
FDFD..FDFF ; Arabic # So [3] ARABIC LIGATURE BISMILLAH AR-RAHMAN AR-RAHEEM..ARABIC LIGATURE AZZA WA JALL
|
||||
FE70..FE74 ; Arabic # Lo [5] ARABIC FATHATAN ISOLATED FORM..ARABIC KASRATAN ISOLATED FORM
|
||||
FE76..FEFC ; Arabic # Lo [135] ARABIC FATHA ISOLATED FORM..ARABIC LIGATURE LAM WITH ALEF FINAL FORM
|
||||
10E60..10E7E ; Arabic # No [31] RUMI DIGIT ONE..RUMI FRACTION TWO THIRDS
|
||||
@ -893,7 +918,7 @@ FE76..FEFC ; Arabic # Lo [135] ARABIC FATHA ISOLATED FORM..ARABIC LIGATURE LA
|
||||
1EEAB..1EEBB ; Arabic # Lo [17] ARABIC MATHEMATICAL DOUBLE-STRUCK LAM..ARABIC MATHEMATICAL DOUBLE-STRUCK GHAIN
|
||||
1EEF0..1EEF1 ; Arabic # Sm [2] ARABIC MATHEMATICAL OPERATOR MEEM WITH HAH WITH TATWEEL..ARABIC MATHEMATICAL OPERATOR HAH WITH DAL
|
||||
|
||||
# Total code points: 1291
|
||||
# Total code points: 1365
|
||||
|
||||
# ================================================
|
||||
|
||||
@ -1113,6 +1138,7 @@ A8FF ; Devanagari # Mn DEVANAGARI VOWEL SIGN AY
|
||||
0C0E..0C10 ; Telugu # Lo [3] TELUGU LETTER E..TELUGU LETTER AI
|
||||
0C12..0C28 ; Telugu # Lo [23] TELUGU LETTER O..TELUGU LETTER NA
|
||||
0C2A..0C39 ; Telugu # Lo [16] TELUGU LETTER PA..TELUGU LETTER HA
|
||||
0C3C ; Telugu # Mn TELUGU SIGN NUKTA
|
||||
0C3D ; Telugu # Lo TELUGU SIGN AVAGRAHA
|
||||
0C3E..0C40 ; Telugu # Mn [3] TELUGU VOWEL SIGN AA..TELUGU VOWEL SIGN II
|
||||
0C41..0C44 ; Telugu # Mc [4] TELUGU VOWEL SIGN U..TELUGU VOWEL SIGN VOCALIC RR
|
||||
@ -1120,6 +1146,7 @@ A8FF ; Devanagari # Mn DEVANAGARI VOWEL SIGN AY
|
||||
0C4A..0C4D ; Telugu # Mn [4] TELUGU VOWEL SIGN O..TELUGU SIGN VIRAMA
|
||||
0C55..0C56 ; Telugu # Mn [2] TELUGU LENGTH MARK..TELUGU AI LENGTH MARK
|
||||
0C58..0C5A ; Telugu # Lo [3] TELUGU LETTER TSA..TELUGU LETTER RRRA
|
||||
0C5D ; Telugu # Lo TELUGU LETTER NAKAARA POLLU
|
||||
0C60..0C61 ; Telugu # Lo [2] TELUGU LETTER VOCALIC RR..TELUGU LETTER VOCALIC LL
|
||||
0C62..0C63 ; Telugu # Mn [2] TELUGU VOWEL SIGN VOCALIC L..TELUGU VOWEL SIGN VOCALIC LL
|
||||
0C66..0C6F ; Telugu # Nd [10] TELUGU DIGIT ZERO..TELUGU DIGIT NINE
|
||||
@ -1127,7 +1154,7 @@ A8FF ; Devanagari # Mn DEVANAGARI VOWEL SIGN AY
|
||||
0C78..0C7E ; Telugu # No [7] TELUGU FRACTION DIGIT ZERO FOR ODD POWERS OF FOUR..TELUGU FRACTION DIGIT THREE FOR EVEN POWERS OF FOUR
|
||||
0C7F ; Telugu # So TELUGU SIGN TUUMU
|
||||
|
||||
# Total code points: 98
|
||||
# Total code points: 100
|
||||
|
||||
# ================================================
|
||||
|
||||
@ -1150,13 +1177,13 @@ A8FF ; Devanagari # Mn DEVANAGARI VOWEL SIGN AY
|
||||
0CCA..0CCB ; Kannada # Mc [2] KANNADA VOWEL SIGN O..KANNADA VOWEL SIGN OO
|
||||
0CCC..0CCD ; Kannada # Mn [2] KANNADA VOWEL SIGN AU..KANNADA SIGN VIRAMA
|
||||
0CD5..0CD6 ; Kannada # Mc [2] KANNADA LENGTH MARK..KANNADA AI LENGTH MARK
|
||||
0CDE ; Kannada # Lo KANNADA LETTER FA
|
||||
0CDD..0CDE ; Kannada # Lo [2] KANNADA LETTER NAKAARA POLLU..KANNADA LETTER FA
|
||||
0CE0..0CE1 ; Kannada # Lo [2] KANNADA LETTER VOCALIC RR..KANNADA LETTER VOCALIC LL
|
||||
0CE2..0CE3 ; Kannada # Mn [2] KANNADA VOWEL SIGN VOCALIC L..KANNADA VOWEL SIGN VOCALIC LL
|
||||
0CE6..0CEF ; Kannada # Nd [10] KANNADA DIGIT ZERO..KANNADA DIGIT NINE
|
||||
0CF1..0CF2 ; Kannada # Lo [2] KANNADA SIGN JIHVAMULIYA..KANNADA SIGN UPADHMANIYA
|
||||
|
||||
# Total code points: 89
|
||||
# Total code points: 90
|
||||
|
||||
# ================================================
|
||||
|
||||
@ -1411,8 +1438,12 @@ AB09..AB0E ; Ethiopic # Lo [6] ETHIOPIC SYLLABLE DDHU..ETHIOPIC SYLLABLE DD
|
||||
AB11..AB16 ; Ethiopic # Lo [6] ETHIOPIC SYLLABLE DZU..ETHIOPIC SYLLABLE DZO
|
||||
AB20..AB26 ; Ethiopic # Lo [7] ETHIOPIC SYLLABLE CCHHA..ETHIOPIC SYLLABLE CCHHO
|
||||
AB28..AB2E ; Ethiopic # Lo [7] ETHIOPIC SYLLABLE BBA..ETHIOPIC SYLLABLE BBO
|
||||
1E7E0..1E7E6 ; Ethiopic # Lo [7] ETHIOPIC SYLLABLE HHYA..ETHIOPIC SYLLABLE HHYO
|
||||
1E7E8..1E7EB ; Ethiopic # Lo [4] ETHIOPIC SYLLABLE GURAGE HHWA..ETHIOPIC SYLLABLE HHWE
|
||||
1E7ED..1E7EE ; Ethiopic # Lo [2] ETHIOPIC SYLLABLE GURAGE MWI..ETHIOPIC SYLLABLE GURAGE MWEE
|
||||
1E7F0..1E7FE ; Ethiopic # Lo [15] ETHIOPIC SYLLABLE GURAGE QWI..ETHIOPIC SYLLABLE GURAGE PWEE
|
||||
|
||||
# Total code points: 495
|
||||
# Total code points: 523
|
||||
|
||||
# ================================================
|
||||
|
||||
@ -1430,8 +1461,9 @@ AB70..ABBF ; Cherokee # L& [80] CHEROKEE SMALL LETTER A..CHEROKEE SMALL LETT
|
||||
166E ; Canadian_Aboriginal # Po CANADIAN SYLLABICS FULL STOP
|
||||
166F..167F ; Canadian_Aboriginal # Lo [17] CANADIAN SYLLABICS QAI..CANADIAN SYLLABICS BLACKFOOT W
|
||||
18B0..18F5 ; Canadian_Aboriginal # Lo [70] CANADIAN SYLLABICS OY..CANADIAN SYLLABICS CARRIER DENTAL S
|
||||
11AB0..11ABF ; Canadian_Aboriginal # Lo [16] CANADIAN SYLLABICS NATTILIK HI..CANADIAN SYLLABICS SPA
|
||||
|
||||
# Total code points: 710
|
||||
# Total code points: 726
|
||||
|
||||
# ================================================
|
||||
|
||||
@ -1480,6 +1512,7 @@ AB70..ABBF ; Cherokee # L& [80] CHEROKEE SMALL LETTER A..CHEROKEE SMALL LETT
|
||||
1807..180A ; Mongolian # Po [4] MONGOLIAN SIBE SYLLABLE BOUNDARY MARKER..MONGOLIAN NIRUGU
|
||||
180B..180D ; Mongolian # Mn [3] MONGOLIAN FREE VARIATION SELECTOR ONE..MONGOLIAN FREE VARIATION SELECTOR THREE
|
||||
180E ; Mongolian # Cf MONGOLIAN VOWEL SEPARATOR
|
||||
180F ; Mongolian # Mn MONGOLIAN FREE VARIATION SELECTOR FOUR
|
||||
1810..1819 ; Mongolian # Nd [10] MONGOLIAN DIGIT ZERO..MONGOLIAN DIGIT NINE
|
||||
1820..1842 ; Mongolian # Lo [35] MONGOLIAN LETTER A..MONGOLIAN LETTER CHI
|
||||
1843 ; Mongolian # Lm MONGOLIAN LETTER TODO LONG VOWEL SIGN
|
||||
@ -1491,18 +1524,18 @@ AB70..ABBF ; Cherokee # L& [80] CHEROKEE SMALL LETTER A..CHEROKEE SMALL LETT
|
||||
18AA ; Mongolian # Lo MONGOLIAN LETTER MANCHU ALI GALI LHA
|
||||
11660..1166C ; Mongolian # Po [13] MONGOLIAN BIRGA WITH ORNAMENT..MONGOLIAN TURNED SWIRL BIRGA WITH DOUBLE ORNAMENT
|
||||
|
||||
# Total code points: 167
|
||||
# Total code points: 168
|
||||
|
||||
# ================================================
|
||||
|
||||
3041..3096 ; Hiragana # Lo [86] HIRAGANA LETTER SMALL A..HIRAGANA LETTER SMALL KE
|
||||
309D..309E ; Hiragana # Lm [2] HIRAGANA ITERATION MARK..HIRAGANA VOICED ITERATION MARK
|
||||
309F ; Hiragana # Lo HIRAGANA DIGRAPH YORI
|
||||
1B001..1B11E ; Hiragana # Lo [286] HIRAGANA LETTER ARCHAIC YE..HENTAIGANA LETTER N-MU-MO-2
|
||||
1B001..1B11F ; Hiragana # Lo [287] HIRAGANA LETTER ARCHAIC YE..HIRAGANA LETTER ARCHAIC WU
|
||||
1B150..1B152 ; Hiragana # Lo [3] HIRAGANA LETTER SMALL WI..HIRAGANA LETTER SMALL WO
|
||||
1F200 ; Hiragana # So SQUARE HIRAGANA HOKA
|
||||
|
||||
# Total code points: 379
|
||||
# Total code points: 380
|
||||
|
||||
# ================================================
|
||||
|
||||
@ -1514,10 +1547,14 @@ AB70..ABBF ; Cherokee # L& [80] CHEROKEE SMALL LETTER A..CHEROKEE SMALL LETT
|
||||
3300..3357 ; Katakana # So [88] SQUARE APAATO..SQUARE WATTO
|
||||
FF66..FF6F ; Katakana # Lo [10] HALFWIDTH KATAKANA LETTER WO..HALFWIDTH KATAKANA LETTER SMALL TU
|
||||
FF71..FF9D ; Katakana # Lo [45] HALFWIDTH KATAKANA LETTER A..HALFWIDTH KATAKANA LETTER N
|
||||
1AFF0..1AFF3 ; Katakana # Lm [4] KATAKANA LETTER MINNAN TONE-2..KATAKANA LETTER MINNAN TONE-5
|
||||
1AFF5..1AFFB ; Katakana # Lm [7] KATAKANA LETTER MINNAN TONE-7..KATAKANA LETTER MINNAN NASALIZED TONE-5
|
||||
1AFFD..1AFFE ; Katakana # Lm [2] KATAKANA LETTER MINNAN NASALIZED TONE-7..KATAKANA LETTER MINNAN NASALIZED TONE-8
|
||||
1B000 ; Katakana # Lo KATAKANA LETTER ARCHAIC E
|
||||
1B120..1B122 ; Katakana # Lo [3] KATAKANA LETTER ARCHAIC YI..KATAKANA LETTER ARCHAIC WU
|
||||
1B164..1B167 ; Katakana # Lo [4] KATAKANA LETTER SMALL WI..KATAKANA LETTER SMALL N
|
||||
|
||||
# Total code points: 304
|
||||
# Total code points: 320
|
||||
|
||||
# ================================================
|
||||
|
||||
@ -1538,19 +1575,21 @@ FF71..FF9D ; Katakana # Lo [45] HALFWIDTH KATAKANA LETTER A..HALFWIDTH KATAK
|
||||
3038..303A ; Han # Nl [3] HANGZHOU NUMERAL TEN..HANGZHOU NUMERAL THIRTY
|
||||
303B ; Han # Lm VERTICAL IDEOGRAPHIC ITERATION MARK
|
||||
3400..4DBF ; Han # Lo [6592] CJK UNIFIED IDEOGRAPH-3400..CJK UNIFIED IDEOGRAPH-4DBF
|
||||
4E00..9FFC ; Han # Lo [20989] CJK UNIFIED IDEOGRAPH-4E00..CJK UNIFIED IDEOGRAPH-9FFC
|
||||
4E00..9FFF ; Han # Lo [20992] CJK UNIFIED IDEOGRAPH-4E00..CJK UNIFIED IDEOGRAPH-9FFF
|
||||
F900..FA6D ; Han # Lo [366] CJK COMPATIBILITY IDEOGRAPH-F900..CJK COMPATIBILITY IDEOGRAPH-FA6D
|
||||
FA70..FAD9 ; Han # Lo [106] CJK COMPATIBILITY IDEOGRAPH-FA70..CJK COMPATIBILITY IDEOGRAPH-FAD9
|
||||
16FE2 ; Han # Po OLD CHINESE HOOK MARK
|
||||
16FE3 ; Han # Lm OLD CHINESE ITERATION MARK
|
||||
16FF0..16FF1 ; Han # Mc [2] VIETNAMESE ALTERNATE READING MARK CA..VIETNAMESE ALTERNATE READING MARK NHAY
|
||||
20000..2A6DD ; Han # Lo [42718] CJK UNIFIED IDEOGRAPH-20000..CJK UNIFIED IDEOGRAPH-2A6DD
|
||||
2A700..2B734 ; Han # Lo [4149] CJK UNIFIED IDEOGRAPH-2A700..CJK UNIFIED IDEOGRAPH-2B734
|
||||
20000..2A6DF ; Han # Lo [42720] CJK UNIFIED IDEOGRAPH-20000..CJK UNIFIED IDEOGRAPH-2A6DF
|
||||
2A700..2B738 ; Han # Lo [4153] CJK UNIFIED IDEOGRAPH-2A700..CJK UNIFIED IDEOGRAPH-2B738
|
||||
2B740..2B81D ; Han # Lo [222] CJK UNIFIED IDEOGRAPH-2B740..CJK UNIFIED IDEOGRAPH-2B81D
|
||||
2B820..2CEA1 ; Han # Lo [5762] CJK UNIFIED IDEOGRAPH-2B820..CJK UNIFIED IDEOGRAPH-2CEA1
|
||||
2CEB0..2EBE0 ; Han # Lo [7473] CJK UNIFIED IDEOGRAPH-2CEB0..CJK UNIFIED IDEOGRAPH-2EBE0
|
||||
2F800..2FA1D ; Han # Lo [542] CJK COMPATIBILITY IDEOGRAPH-2F800..CJK COMPATIBILITY IDEOGRAPH-2FA1D
|
||||
30000..3134A ; Han # Lo [4939] CJK UNIFIED IDEOGRAPH-30000..CJK UNIFIED IDEOGRAPH-3134A
|
||||
|
||||
# Total code points: 94204
|
||||
# Total code points: 94215
|
||||
|
||||
# ================================================
|
||||
|
||||
@ -1593,15 +1632,14 @@ A490..A4C6 ; Yi # So [55] YI RADICAL QOT..YI RADICAL KE
|
||||
0951..0954 ; Inherited # Mn [4] DEVANAGARI STRESS SIGN UDATTA..DEVANAGARI ACUTE ACCENT
|
||||
1AB0..1ABD ; Inherited # Mn [14] COMBINING DOUBLED CIRCUMFLEX ACCENT..COMBINING PARENTHESES BELOW
|
||||
1ABE ; Inherited # Me COMBINING PARENTHESES OVERLAY
|
||||
1ABF..1AC0 ; Inherited # Mn [2] COMBINING LATIN SMALL LETTER W BELOW..COMBINING LATIN SMALL LETTER TURNED W BELOW
|
||||
1ABF..1ACE ; Inherited # Mn [16] COMBINING LATIN SMALL LETTER W BELOW..COMBINING LATIN SMALL LETTER INSULAR T
|
||||
1CD0..1CD2 ; Inherited # Mn [3] VEDIC TONE KARSHANA..VEDIC TONE PRENKHA
|
||||
1CD4..1CE0 ; Inherited # Mn [13] VEDIC SIGN YAJURVEDIC MIDLINE SVARITA..VEDIC TONE RIGVEDIC KASHMIRI INDEPENDENT SVARITA
|
||||
1CE2..1CE8 ; Inherited # Mn [7] VEDIC SIGN VISARGA SVARITA..VEDIC SIGN VISARGA ANUDATTA WITH TAIL
|
||||
1CED ; Inherited # Mn VEDIC SIGN TIRYAK
|
||||
1CF4 ; Inherited # Mn VEDIC TONE CANDRA ABOVE
|
||||
1CF8..1CF9 ; Inherited # Mn [2] VEDIC TONE RING ABOVE..VEDIC TONE DOUBLE RING ABOVE
|
||||
1DC0..1DF9 ; Inherited # Mn [58] COMBINING DOTTED GRAVE ACCENT..COMBINING WIDE INVERTED BRIDGE BELOW
|
||||
1DFB..1DFF ; Inherited # Mn [5] COMBINING DELETION MARK..COMBINING RIGHT ARROWHEAD AND DOWN ARROWHEAD BELOW
|
||||
1DC0..1DFF ; Inherited # Mn [64] COMBINING DOTTED GRAVE ACCENT..COMBINING RIGHT ARROWHEAD AND DOWN ARROWHEAD BELOW
|
||||
200C..200D ; Inherited # Cf [2] ZERO WIDTH NON-JOINER..ZERO WIDTH JOINER
|
||||
20D0..20DC ; Inherited # Mn [13] COMBINING LEFT HARPOON ABOVE..COMBINING FOUR DOTS ABOVE
|
||||
20DD..20E0 ; Inherited # Me [4] COMBINING ENCLOSING CIRCLE..COMBINING ENCLOSING CIRCLE BACKSLASH
|
||||
@ -1615,26 +1653,30 @@ FE20..FE2D ; Inherited # Mn [14] COMBINING LIGATURE LEFT HALF..COMBINING CON
|
||||
101FD ; Inherited # Mn PHAISTOS DISC SIGN COMBINING OBLIQUE STROKE
|
||||
102E0 ; Inherited # Mn COPTIC EPACT THOUSANDS MARK
|
||||
1133B ; Inherited # Mn COMBINING BINDU BELOW
|
||||
1CF00..1CF2D ; Inherited # Mn [46] ZNAMENNY COMBINING MARK GORAZDO NIZKO S KRYZHEM ON LEFT..ZNAMENNY COMBINING MARK KRYZH ON LEFT
|
||||
1CF30..1CF46 ; Inherited # Mn [23] ZNAMENNY COMBINING TONAL RANGE MARK MRACHNO..ZNAMENNY PRIZNAK MODIFIER ROG
|
||||
1D167..1D169 ; Inherited # Mn [3] MUSICAL SYMBOL COMBINING TREMOLO-1..MUSICAL SYMBOL COMBINING TREMOLO-3
|
||||
1D17B..1D182 ; Inherited # Mn [8] MUSICAL SYMBOL COMBINING ACCENT..MUSICAL SYMBOL COMBINING LOURE
|
||||
1D185..1D18B ; Inherited # Mn [7] MUSICAL SYMBOL COMBINING DOIT..MUSICAL SYMBOL COMBINING TRIPLE TONGUE
|
||||
1D1AA..1D1AD ; Inherited # Mn [4] MUSICAL SYMBOL COMBINING DOWN BOW..MUSICAL SYMBOL COMBINING SNAP PIZZICATO
|
||||
E0100..E01EF ; Inherited # Mn [240] VARIATION SELECTOR-17..VARIATION SELECTOR-256
|
||||
|
||||
# Total code points: 573
|
||||
# Total code points: 657
|
||||
|
||||
# ================================================
|
||||
|
||||
1700..170C ; Tagalog # Lo [13] TAGALOG LETTER A..TAGALOG LETTER YA
|
||||
170E..1711 ; Tagalog # Lo [4] TAGALOG LETTER LA..TAGALOG LETTER HA
|
||||
1700..1711 ; Tagalog # Lo [18] TAGALOG LETTER A..TAGALOG LETTER HA
|
||||
1712..1714 ; Tagalog # Mn [3] TAGALOG VOWEL SIGN I..TAGALOG SIGN VIRAMA
|
||||
1715 ; Tagalog # Mc TAGALOG SIGN PAMUDPOD
|
||||
171F ; Tagalog # Lo TAGALOG LETTER ARCHAIC RA
|
||||
|
||||
# Total code points: 20
|
||||
# Total code points: 23
|
||||
|
||||
# ================================================
|
||||
|
||||
1720..1731 ; Hanunoo # Lo [18] HANUNOO LETTER A..HANUNOO LETTER HA
|
||||
1732..1734 ; Hanunoo # Mn [3] HANUNOO VOWEL SIGN I..HANUNOO SIGN PAMUDPOD
|
||||
1732..1733 ; Hanunoo # Mn [2] HANUNOO VOWEL SIGN I..HANUNOO VOWEL SIGN U
|
||||
1734 ; Hanunoo # Mc HANUNOO SIGN PAMUDPOD
|
||||
|
||||
# Total code points: 21
|
||||
|
||||
@ -1762,15 +1804,14 @@ E0100..E01EF ; Inherited # Mn [240] VARIATION SELECTOR-17..VARIATION SELECTOR-2
|
||||
|
||||
# ================================================
|
||||
|
||||
2C00..2C2E ; Glagolitic # L& [47] GLAGOLITIC CAPITAL LETTER AZU..GLAGOLITIC CAPITAL LETTER LATINATE MYSLITE
|
||||
2C30..2C5E ; Glagolitic # L& [47] GLAGOLITIC SMALL LETTER AZU..GLAGOLITIC SMALL LETTER LATINATE MYSLITE
|
||||
2C00..2C5F ; Glagolitic # L& [96] GLAGOLITIC CAPITAL LETTER AZU..GLAGOLITIC SMALL LETTER CAUDATE CHRIVI
|
||||
1E000..1E006 ; Glagolitic # Mn [7] COMBINING GLAGOLITIC LETTER AZU..COMBINING GLAGOLITIC LETTER ZHIVETE
|
||||
1E008..1E018 ; Glagolitic # Mn [17] COMBINING GLAGOLITIC LETTER ZEMLJA..COMBINING GLAGOLITIC LETTER HERU
|
||||
1E01B..1E021 ; Glagolitic # Mn [7] COMBINING GLAGOLITIC LETTER SHTA..COMBINING GLAGOLITIC LETTER YATI
|
||||
1E023..1E024 ; Glagolitic # Mn [2] COMBINING GLAGOLITIC LETTER YU..COMBINING GLAGOLITIC LETTER SMALL YUS
|
||||
1E026..1E02A ; Glagolitic # Mn [5] COMBINING GLAGOLITIC LETTER YO..COMBINING GLAGOLITIC LETTER FITA
|
||||
|
||||
# Total code points: 132
|
||||
# Total code points: 134
|
||||
|
||||
# ================================================
|
||||
|
||||
@ -1836,14 +1877,15 @@ A82C ; Syloti_Nagri # Mn SYLOTI NAGRI SIGN ALTERNATE HASANTA
|
||||
1B3D..1B41 ; Balinese # Mc [5] BALINESE VOWEL SIGN LA LENGA TEDUNG..BALINESE VOWEL SIGN TALING REPA TEDUNG
|
||||
1B42 ; Balinese # Mn BALINESE VOWEL SIGN PEPET
|
||||
1B43..1B44 ; Balinese # Mc [2] BALINESE VOWEL SIGN PEPET TEDUNG..BALINESE ADEG ADEG
|
||||
1B45..1B4B ; Balinese # Lo [7] BALINESE LETTER KAF SASAK..BALINESE LETTER ASYURA SASAK
|
||||
1B45..1B4C ; Balinese # Lo [8] BALINESE LETTER KAF SASAK..BALINESE LETTER ARCHAIC JNYA
|
||||
1B50..1B59 ; Balinese # Nd [10] BALINESE DIGIT ZERO..BALINESE DIGIT NINE
|
||||
1B5A..1B60 ; Balinese # Po [7] BALINESE PANTI..BALINESE PAMENENG
|
||||
1B61..1B6A ; Balinese # So [10] BALINESE MUSICAL SYMBOL DONG..BALINESE MUSICAL SYMBOL DANG GEDE
|
||||
1B6B..1B73 ; Balinese # Mn [9] BALINESE MUSICAL SYMBOL COMBINING TEGEH..BALINESE MUSICAL SYMBOL COMBINING GONG
|
||||
1B74..1B7C ; Balinese # So [9] BALINESE MUSICAL SYMBOL RIGHT-HAND OPEN DUG..BALINESE MUSICAL SYMBOL LEFT-HAND OPEN PING
|
||||
1B7D..1B7E ; Balinese # Po [2] BALINESE PANTI LANTANG..BALINESE PAMADA LANTANG
|
||||
|
||||
# Total code points: 121
|
||||
# Total code points: 124
|
||||
|
||||
# ================================================
|
||||
|
||||
@ -2178,9 +2220,10 @@ ABF0..ABF9 ; Meetei_Mayek # Nd [10] MEETEI MAYEK DIGIT ZERO..MEETEI MAYEK DI
|
||||
110BB..110BC ; Kaithi # Po [2] KAITHI ABBREVIATION SIGN..KAITHI ENUMERATION SIGN
|
||||
110BD ; Kaithi # Cf KAITHI NUMBER SIGN
|
||||
110BE..110C1 ; Kaithi # Po [4] KAITHI SECTION MARK..KAITHI DOUBLE DANDA
|
||||
110C2 ; Kaithi # Mn KAITHI VOWEL SIGN VOCALIC R
|
||||
110CD ; Kaithi # Cf KAITHI NUMBER SIGN ABOVE
|
||||
|
||||
# Total code points: 67
|
||||
# Total code points: 68
|
||||
|
||||
# ================================================
|
||||
|
||||
@ -2207,9 +2250,13 @@ ABF0..ABF9 ; Meetei_Mayek # Nd [10] MEETEI MAYEK DIGIT ZERO..MEETEI MAYEK DI
|
||||
11047..1104D ; Brahmi # Po [7] BRAHMI DANDA..BRAHMI PUNCTUATION LOTUS
|
||||
11052..11065 ; Brahmi # No [20] BRAHMI NUMBER ONE..BRAHMI NUMBER ONE THOUSAND
|
||||
11066..1106F ; Brahmi # Nd [10] BRAHMI DIGIT ZERO..BRAHMI DIGIT NINE
|
||||
11070 ; Brahmi # Mn BRAHMI SIGN OLD TAMIL VIRAMA
|
||||
11071..11072 ; Brahmi # Lo [2] BRAHMI LETTER OLD TAMIL SHORT E..BRAHMI LETTER OLD TAMIL SHORT O
|
||||
11073..11074 ; Brahmi # Mn [2] BRAHMI VOWEL SIGN OLD TAMIL SHORT E..BRAHMI VOWEL SIGN OLD TAMIL SHORT O
|
||||
11075 ; Brahmi # Lo BRAHMI LETTER OLD TAMIL LLA
|
||||
1107F ; Brahmi # Mn BRAHMI NUMBER JOINER
|
||||
|
||||
# Total code points: 109
|
||||
# Total code points: 115
|
||||
|
||||
# ================================================
|
||||
|
||||
@ -2301,9 +2348,10 @@ ABF0..ABF9 ; Meetei_Mayek # Nd [10] MEETEI MAYEK DIGIT ZERO..MEETEI MAYEK DI
|
||||
116B6 ; Takri # Mc TAKRI SIGN VIRAMA
|
||||
116B7 ; Takri # Mn TAKRI SIGN NUKTA
|
||||
116B8 ; Takri # Lo TAKRI LETTER ARCHAIC KHA
|
||||
116B9 ; Takri # Po TAKRI ABBREVIATION SIGN
|
||||
116C0..116C9 ; Takri # Nd [10] TAKRI DIGIT ZERO..TAKRI DIGIT NINE
|
||||
|
||||
# Total code points: 67
|
||||
# Total code points: 68
|
||||
|
||||
# ================================================
|
||||
|
||||
@ -2561,8 +2609,9 @@ ABF0..ABF9 ; Meetei_Mayek # Nd [10] MEETEI MAYEK DIGIT ZERO..MEETEI MAYEK DI
|
||||
1173A..1173B ; Ahom # No [2] AHOM NUMBER TEN..AHOM NUMBER TWENTY
|
||||
1173C..1173E ; Ahom # Po [3] AHOM SIGN SMALL SECTION..AHOM SIGN RULAI
|
||||
1173F ; Ahom # So AHOM SYMBOL VI
|
||||
11740..11746 ; Ahom # Lo [7] AHOM LETTER CA..AHOM LETTER LLA
|
||||
|
||||
# Total code points: 58
|
||||
# Total code points: 65
|
||||
|
||||
# ================================================
|
||||
|
||||
@ -2897,4 +2946,46 @@ ABF0..ABF9 ; Meetei_Mayek # Nd [10] MEETEI MAYEK DIGIT ZERO..MEETEI MAYEK DI
|
||||
|
||||
# Total code points: 47
|
||||
|
||||
# ================================================
|
||||
|
||||
12F90..12FF0 ; Cypro_Minoan # Lo [97] CYPRO-MINOAN SIGN CM001..CYPRO-MINOAN SIGN CM114
|
||||
12FF1..12FF2 ; Cypro_Minoan # Po [2] CYPRO-MINOAN SIGN CM301..CYPRO-MINOAN SIGN CM302
|
||||
|
||||
# Total code points: 99
|
||||
|
||||
# ================================================
|
||||
|
||||
10F70..10F81 ; Old_Uyghur # Lo [18] OLD UYGHUR LETTER ALEPH..OLD UYGHUR LETTER LESH
|
||||
10F82..10F85 ; Old_Uyghur # Mn [4] OLD UYGHUR COMBINING DOT ABOVE..OLD UYGHUR COMBINING TWO DOTS BELOW
|
||||
10F86..10F89 ; Old_Uyghur # Po [4] OLD UYGHUR PUNCTUATION BAR..OLD UYGHUR PUNCTUATION FOUR DOTS
|
||||
|
||||
# Total code points: 26
|
||||
|
||||
# ================================================
|
||||
|
||||
16A70..16ABE ; Tangsa # Lo [79] TANGSA LETTER OZ..TANGSA LETTER ZA
|
||||
16AC0..16AC9 ; Tangsa # Nd [10] TANGSA DIGIT ZERO..TANGSA DIGIT NINE
|
||||
|
||||
# Total code points: 89
|
||||
|
||||
# ================================================
|
||||
|
||||
1E290..1E2AD ; Toto # Lo [30] TOTO LETTER PA..TOTO LETTER A
|
||||
1E2AE ; Toto # Mn TOTO SIGN RISING TONE
|
||||
|
||||
# Total code points: 31
|
||||
|
||||
# ================================================
|
||||
|
||||
10570..1057A ; Vithkuqi # L& [11] VITHKUQI CAPITAL LETTER A..VITHKUQI CAPITAL LETTER GA
|
||||
1057C..1058A ; Vithkuqi # L& [15] VITHKUQI CAPITAL LETTER HA..VITHKUQI CAPITAL LETTER RE
|
||||
1058C..10592 ; Vithkuqi # L& [7] VITHKUQI CAPITAL LETTER SE..VITHKUQI CAPITAL LETTER XE
|
||||
10594..10595 ; Vithkuqi # L& [2] VITHKUQI CAPITAL LETTER Y..VITHKUQI CAPITAL LETTER ZE
|
||||
10597..105A1 ; Vithkuqi # L& [11] VITHKUQI SMALL LETTER A..VITHKUQI SMALL LETTER GA
|
||||
105A3..105B1 ; Vithkuqi # L& [15] VITHKUQI SMALL LETTER HA..VITHKUQI SMALL LETTER RE
|
||||
105B3..105B9 ; Vithkuqi # L& [7] VITHKUQI SMALL LETTER SE..VITHKUQI SMALL LETTER XE
|
||||
105BB..105BC ; Vithkuqi # L& [2] VITHKUQI SMALL LETTER Y..VITHKUQI SMALL LETTER ZE
|
||||
|
||||
# Total code points: 70
|
||||
|
||||
# EOF
|
||||
|
@ -1,6 +1,6 @@
|
||||
# SpecialCasing-13.0.0.txt
|
||||
# Date: 2019-09-08, 23:31:24 GMT
|
||||
# Copyright (c) 2019 Unicode, Inc.
|
||||
# SpecialCasing-14.0.0.txt
|
||||
# Date: 2021-03-08, 19:35:55 GMT
|
||||
# Copyright (c) 2021 Unicode, Inc.
|
||||
# Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries.
|
||||
# For terms of use, see http://www.unicode.org/terms_of_use.html
|
||||
#
|
||||
|
File diff suppressed because it is too large
Load Diff
@ -1 +0,0 @@
|
||||
13.0.0
|
@ -1,6 +1,6 @@
|
||||
# GraphemeBreakProperty-13.0.0.txt
|
||||
# Date: 2019-10-21, 14:30:35 GMT
|
||||
# Copyright (c) 2019 Unicode, Inc.
|
||||
# GraphemeBreakProperty-14.0.0.txt
|
||||
# Date: 2021-08-12, 23:13:02 GMT
|
||||
# Copyright (c) 2021 Unicode, Inc.
|
||||
# Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries.
|
||||
# For terms of use, see http://www.unicode.org/terms_of_use.html
|
||||
#
|
||||
@ -21,6 +21,7 @@
|
||||
0600..0605 ; Prepend # Cf [6] ARABIC NUMBER SIGN..ARABIC NUMBER MARK ABOVE
|
||||
06DD ; Prepend # Cf ARABIC END OF AYAH
|
||||
070F ; Prepend # Cf SYRIAC ABBREVIATION MARK
|
||||
0890..0891 ; Prepend # Cf [2] ARABIC POUND MARK ABOVE..ARABIC PIASTRE MARK ABOVE
|
||||
08E2 ; Prepend # Cf ARABIC DISPUTED END OF AYAH
|
||||
0D4E ; Prepend # Lo MALAYALAM LETTER DOT REPH
|
||||
110BD ; Prepend # Cf KAITHI NUMBER SIGN
|
||||
@ -32,7 +33,7 @@
|
||||
11A84..11A89 ; Prepend # Lo [6] SOYOMBO SIGN JIHVAMULIYA..SOYOMBO CLUSTER-INITIAL LETTER SA
|
||||
11D46 ; Prepend # Lo MASARAM GONDI REPHA
|
||||
|
||||
# Total code points: 24
|
||||
# Total code points: 26
|
||||
|
||||
# ================================================
|
||||
|
||||
@ -104,7 +105,8 @@ E01F0..E0FFF ; Control # Cn [3600] <reserved-E01F0>..<reserved-E0FFF>
|
||||
0825..0827 ; Extend # Mn [3] SAMARITAN VOWEL SIGN SHORT A..SAMARITAN VOWEL SIGN U
|
||||
0829..082D ; Extend # Mn [5] SAMARITAN VOWEL SIGN LONG I..SAMARITAN MARK NEQUDAA
|
||||
0859..085B ; Extend # Mn [3] MANDAIC AFFRICATION MARK..MANDAIC GEMINATION MARK
|
||||
08D3..08E1 ; Extend # Mn [15] ARABIC SMALL LOW WAW..ARABIC SMALL HIGH SIGN SAFHA
|
||||
0898..089F ; Extend # Mn [8] ARABIC SMALL HIGH WORD AL-JUZ..ARABIC HALF MADDA OVER MADDA
|
||||
08CA..08E1 ; Extend # Mn [24] ARABIC SMALL HIGH FARSI YEH..ARABIC SMALL HIGH SIGN SAFHA
|
||||
08E3..0902 ; Extend # Mn [32] ARABIC TURNED DAMMA BELOW..DEVANAGARI SIGN ANUSVARA
|
||||
093A ; Extend # Mn DEVANAGARI VOWEL SIGN OE
|
||||
093C ; Extend # Mn DEVANAGARI SIGN NUKTA
|
||||
@ -151,6 +153,7 @@ E01F0..E0FFF ; Control # Cn [3600] <reserved-E01F0>..<reserved-E0FFF>
|
||||
0BD7 ; Extend # Mc TAMIL AU LENGTH MARK
|
||||
0C00 ; Extend # Mn TELUGU SIGN COMBINING CANDRABINDU ABOVE
|
||||
0C04 ; Extend # Mn TELUGU SIGN COMBINING ANUSVARA ABOVE
|
||||
0C3C ; Extend # Mn TELUGU SIGN NUKTA
|
||||
0C3E..0C40 ; Extend # Mn [3] TELUGU VOWEL SIGN AA..TELUGU VOWEL SIGN II
|
||||
0C46..0C48 ; Extend # Mn [3] TELUGU VOWEL SIGN E..TELUGU VOWEL SIGN AI
|
||||
0C4A..0C4D ; Extend # Mn [4] TELUGU VOWEL SIGN O..TELUGU SIGN VIRAMA
|
||||
@ -206,7 +209,7 @@ E01F0..E0FFF ; Control # Cn [3600] <reserved-E01F0>..<reserved-E0FFF>
|
||||
109D ; Extend # Mn MYANMAR VOWEL SIGN AITON AI
|
||||
135D..135F ; Extend # Mn [3] ETHIOPIC COMBINING GEMINATION AND VOWEL LENGTH MARK..ETHIOPIC COMBINING GEMINATION MARK
|
||||
1712..1714 ; Extend # Mn [3] TAGALOG VOWEL SIGN I..TAGALOG SIGN VIRAMA
|
||||
1732..1734 ; Extend # Mn [3] HANUNOO VOWEL SIGN I..HANUNOO SIGN PAMUDPOD
|
||||
1732..1733 ; Extend # Mn [2] HANUNOO VOWEL SIGN I..HANUNOO VOWEL SIGN U
|
||||
1752..1753 ; Extend # Mn [2] BUHID VOWEL SIGN I..BUHID VOWEL SIGN U
|
||||
1772..1773 ; Extend # Mn [2] TAGBANWA VOWEL SIGN I..TAGBANWA VOWEL SIGN U
|
||||
17B4..17B5 ; Extend # Mn [2] KHMER VOWEL INHERENT AQ..KHMER VOWEL INHERENT AA
|
||||
@ -215,6 +218,7 @@ E01F0..E0FFF ; Control # Cn [3600] <reserved-E01F0>..<reserved-E0FFF>
|
||||
17C9..17D3 ; Extend # Mn [11] KHMER SIGN MUUSIKATOAN..KHMER SIGN BATHAMASAT
|
||||
17DD ; Extend # Mn KHMER SIGN ATTHACAN
|
||||
180B..180D ; Extend # Mn [3] MONGOLIAN FREE VARIATION SELECTOR ONE..MONGOLIAN FREE VARIATION SELECTOR THREE
|
||||
180F ; Extend # Mn MONGOLIAN FREE VARIATION SELECTOR FOUR
|
||||
1885..1886 ; Extend # Mn [2] MONGOLIAN LETTER ALI GALI BALUDA..MONGOLIAN LETTER ALI GALI THREE BALUDA
|
||||
18A9 ; Extend # Mn MONGOLIAN LETTER ALI GALI DAGALGA
|
||||
1920..1922 ; Extend # Mn [3] LIMBU VOWEL SIGN A..LIMBU VOWEL SIGN U
|
||||
@ -232,7 +236,7 @@ E01F0..E0FFF ; Control # Cn [3600] <reserved-E01F0>..<reserved-E0FFF>
|
||||
1A7F ; Extend # Mn TAI THAM COMBINING CRYPTOGRAMMIC DOT
|
||||
1AB0..1ABD ; Extend # Mn [14] COMBINING DOUBLED CIRCUMFLEX ACCENT..COMBINING PARENTHESES BELOW
|
||||
1ABE ; Extend # Me COMBINING PARENTHESES OVERLAY
|
||||
1ABF..1AC0 ; Extend # Mn [2] COMBINING LATIN SMALL LETTER W BELOW..COMBINING LATIN SMALL LETTER TURNED W BELOW
|
||||
1ABF..1ACE ; Extend # Mn [16] COMBINING LATIN SMALL LETTER W BELOW..COMBINING LATIN SMALL LETTER INSULAR T
|
||||
1B00..1B03 ; Extend # Mn [4] BALINESE SIGN ULU RICEM..BALINESE SIGN SURANG
|
||||
1B34 ; Extend # Mn BALINESE SIGN REREKAN
|
||||
1B35 ; Extend # Mc BALINESE VOWEL SIGN TEDUNG
|
||||
@ -256,8 +260,7 @@ E01F0..E0FFF ; Control # Cn [3600] <reserved-E01F0>..<reserved-E0FFF>
|
||||
1CED ; Extend # Mn VEDIC SIGN TIRYAK
|
||||
1CF4 ; Extend # Mn VEDIC TONE CANDRA ABOVE
|
||||
1CF8..1CF9 ; Extend # Mn [2] VEDIC TONE RING ABOVE..VEDIC TONE DOUBLE RING ABOVE
|
||||
1DC0..1DF9 ; Extend # Mn [58] COMBINING DOTTED GRAVE ACCENT..COMBINING WIDE INVERTED BRIDGE BELOW
|
||||
1DFB..1DFF ; Extend # Mn [5] COMBINING DELETION MARK..COMBINING RIGHT ARROWHEAD AND DOWN ARROWHEAD BELOW
|
||||
1DC0..1DFF ; Extend # Mn [64] COMBINING DOTTED GRAVE ACCENT..COMBINING RIGHT ARROWHEAD AND DOWN ARROWHEAD BELOW
|
||||
200C ; Extend # Cf ZERO WIDTH NON-JOINER
|
||||
20D0..20DC ; Extend # Mn [13] COMBINING LEFT HARPOON ABOVE..COMBINING FOUR DOTS ABOVE
|
||||
20DD..20E0 ; Extend # Me [4] COMBINING ENCLOSING CIRCLE..COMBINING ENCLOSING CIRCLE BACKSLASH
|
||||
@ -322,11 +325,15 @@ FF9E..FF9F ; Extend # Lm [2] HALFWIDTH KATAKANA VOICED SOUND MARK..HALFWIDT
|
||||
10D24..10D27 ; Extend # Mn [4] HANIFI ROHINGYA SIGN HARBAHAY..HANIFI ROHINGYA SIGN TASSI
|
||||
10EAB..10EAC ; Extend # Mn [2] YEZIDI COMBINING HAMZA MARK..YEZIDI COMBINING MADDA MARK
|
||||
10F46..10F50 ; Extend # Mn [11] SOGDIAN COMBINING DOT BELOW..SOGDIAN COMBINING STROKE BELOW
|
||||
10F82..10F85 ; Extend # Mn [4] OLD UYGHUR COMBINING DOT ABOVE..OLD UYGHUR COMBINING TWO DOTS BELOW
|
||||
11001 ; Extend # Mn BRAHMI SIGN ANUSVARA
|
||||
11038..11046 ; Extend # Mn [15] BRAHMI VOWEL SIGN AA..BRAHMI VIRAMA
|
||||
11070 ; Extend # Mn BRAHMI SIGN OLD TAMIL VIRAMA
|
||||
11073..11074 ; Extend # Mn [2] BRAHMI VOWEL SIGN OLD TAMIL SHORT E..BRAHMI VOWEL SIGN OLD TAMIL SHORT O
|
||||
1107F..11081 ; Extend # Mn [3] BRAHMI NUMBER JOINER..KAITHI SIGN ANUSVARA
|
||||
110B3..110B6 ; Extend # Mn [4] KAITHI VOWEL SIGN U..KAITHI VOWEL SIGN AI
|
||||
110B9..110BA ; Extend # Mn [2] KAITHI SIGN VIRAMA..KAITHI SIGN NUKTA
|
||||
110C2 ; Extend # Mn KAITHI VOWEL SIGN VOCALIC R
|
||||
11100..11102 ; Extend # Mn [3] CHAKMA SIGN CANDRABINDU..CHAKMA SIGN VISARGA
|
||||
11127..1112B ; Extend # Mn [5] CHAKMA VOWEL SIGN A..CHAKMA VOWEL SIGN UU
|
||||
1112D..11134 ; Extend # Mn [8] CHAKMA VOWEL SIGN AI..CHAKMA MAAYYAA
|
||||
@ -412,6 +419,8 @@ FF9E..FF9F ; Extend # Lm [2] HALFWIDTH KATAKANA VOICED SOUND MARK..HALFWIDT
|
||||
16F8F..16F92 ; Extend # Mn [4] MIAO TONE RIGHT..MIAO TONE BELOW
|
||||
16FE4 ; Extend # Mn KHITAN SMALL SCRIPT FILLER
|
||||
1BC9D..1BC9E ; Extend # Mn [2] DUPLOYAN THICK LETTER SELECTOR..DUPLOYAN DOUBLE MARK
|
||||
1CF00..1CF2D ; Extend # Mn [46] ZNAMENNY COMBINING MARK GORAZDO NIZKO S KRYZHEM ON LEFT..ZNAMENNY COMBINING MARK KRYZH ON LEFT
|
||||
1CF30..1CF46 ; Extend # Mn [23] ZNAMENNY COMBINING TONAL RANGE MARK MRACHNO..ZNAMENNY PRIZNAK MODIFIER ROG
|
||||
1D165 ; Extend # Mc MUSICAL SYMBOL COMBINING STEM
|
||||
1D167..1D169 ; Extend # Mn [3] MUSICAL SYMBOL COMBINING TREMOLO-1..MUSICAL SYMBOL COMBINING TREMOLO-3
|
||||
1D16E..1D172 ; Extend # Mc [5] MUSICAL SYMBOL COMBINING FLAG-1..MUSICAL SYMBOL COMBINING FLAG-5
|
||||
@ -431,6 +440,7 @@ FF9E..FF9F ; Extend # Lm [2] HALFWIDTH KATAKANA VOICED SOUND MARK..HALFWIDT
|
||||
1E023..1E024 ; Extend # Mn [2] COMBINING GLAGOLITIC LETTER YU..COMBINING GLAGOLITIC LETTER SMALL YUS
|
||||
1E026..1E02A ; Extend # Mn [5] COMBINING GLAGOLITIC LETTER YO..COMBINING GLAGOLITIC LETTER FITA
|
||||
1E130..1E136 ; Extend # Mn [7] NYIAKENG PUACHUE HMONG TONE-B..NYIAKENG PUACHUE HMONG TONE-D
|
||||
1E2AE ; Extend # Mn TOTO SIGN RISING TONE
|
||||
1E2EC..1E2EF ; Extend # Mn [4] WANCHO TONE TUP..WANCHO TONE KOINI
|
||||
1E8D0..1E8D6 ; Extend # Mn [7] MENDE KIKAKUI COMBINING NUMBER TEENS..MENDE KIKAKUI COMBINING NUMBER MILLIONS
|
||||
1E944..1E94A ; Extend # Mn [7] ADLAM ALIF LENGTHENER..ADLAM NUKTA
|
||||
@ -438,7 +448,7 @@ FF9E..FF9F ; Extend # Lm [2] HALFWIDTH KATAKANA VOICED SOUND MARK..HALFWIDT
|
||||
E0020..E007F ; Extend # Cf [96] TAG SPACE..CANCEL TAG
|
||||
E0100..E01EF ; Extend # Mn [240] VARIATION SELECTOR-17..VARIATION SELECTOR-256
|
||||
|
||||
# Total code points: 1984
|
||||
# Total code points: 2095
|
||||
|
||||
# ================================================
|
||||
|
||||
@ -495,6 +505,8 @@ E0100..E01EF ; Extend # Mn [240] VARIATION SELECTOR-17..VARIATION SELECTOR-256
|
||||
103B..103C ; SpacingMark # Mc [2] MYANMAR CONSONANT SIGN MEDIAL YA..MYANMAR CONSONANT SIGN MEDIAL RA
|
||||
1056..1057 ; SpacingMark # Mc [2] MYANMAR VOWEL SIGN VOCALIC R..MYANMAR VOWEL SIGN VOCALIC RR
|
||||
1084 ; SpacingMark # Mc MYANMAR VOWEL SIGN SHAN E
|
||||
1715 ; SpacingMark # Mc TAGALOG SIGN PAMUDPOD
|
||||
1734 ; SpacingMark # Mc HANUNOO SIGN PAMUDPOD
|
||||
17B6 ; SpacingMark # Mc KHMER VOWEL SIGN AA
|
||||
17BE..17C5 ; SpacingMark # Mc [8] KHMER VOWEL SIGN OE..KHMER VOWEL SIGN AU
|
||||
17C7..17C8 ; SpacingMark # Mc [2] KHMER SIGN REAHMUK..KHMER SIGN YUUKALEAPINTU
|
||||
@ -579,7 +591,6 @@ ABEC ; SpacingMark # Mc MEETEI MAYEK LUM IYEK
|
||||
116AC ; SpacingMark # Mc TAKRI SIGN VISARGA
|
||||
116AE..116AF ; SpacingMark # Mc [2] TAKRI VOWEL SIGN I..TAKRI VOWEL SIGN II
|
||||
116B6 ; SpacingMark # Mc TAKRI SIGN VIRAMA
|
||||
11720..11721 ; SpacingMark # Mc [2] AHOM VOWEL SIGN A..AHOM VOWEL SIGN AA
|
||||
11726 ; SpacingMark # Mc AHOM VOWEL SIGN E
|
||||
1182C..1182E ; SpacingMark # Mc [3] DOGRA VOWEL SIGN AA..DOGRA VOWEL SIGN II
|
||||
11838 ; SpacingMark # Mc DOGRA SIGN VISARGA
|
||||
|
@ -1,6 +1,6 @@
|
||||
# GraphemeBreakTest-13.0.0.txt
|
||||
# Date: 2019-11-15, 19:49:10 GMT
|
||||
# Copyright (c) 2019 Unicode, Inc.
|
||||
# GraphemeBreakTest-14.0.0.txt
|
||||
# Date: 2021-03-08, 06:22:32 GMT
|
||||
# Copyright (c) 2021 Unicode, Inc.
|
||||
# Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries.
|
||||
# For terms of use, see http://www.unicode.org/terms_of_use.html
|
||||
#
|
||||
|
@ -1,11 +1,11 @@
|
||||
# emoji-data.txt
|
||||
# Date: 2020-01-28, 20:52:38 GMT
|
||||
# Copyright (c) 2020 Unicode, Inc.
|
||||
# emoji-data-14.0.0.txt
|
||||
# Date: 2021-08-26, 17:22:22 GMT
|
||||
# Copyright (c) 2021 Unicode, Inc.
|
||||
# Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries.
|
||||
# For terms of use, see http://www.unicode.org/terms_of_use.html
|
||||
#
|
||||
# Emoji Data for UTS #51
|
||||
# Version: 13.0
|
||||
# Used with Emoji Version 14.0 and subsequent minor revisions (if any)
|
||||
#
|
||||
# For documentation and usage, see http://www.unicode.org/reports/tr51
|
||||
#
|
||||
@ -22,7 +22,7 @@
|
||||
# All omitted code points have Emoji=No
|
||||
# @missing: 0000..10FFFF ; Emoji ; No
|
||||
|
||||
0023 ; Emoji # E0.0 [1] (#️) number sign
|
||||
0023 ; Emoji # E0.0 [1] (#️) hash sign
|
||||
002A ; Emoji # E0.0 [1] (*️) asterisk
|
||||
0030..0039 ; Emoji # E0.0 [10] (0️..9️) digit zero..digit nine
|
||||
00A9 ; Emoji # E0.6 [1] (©️) copyright
|
||||
@ -119,8 +119,8 @@
|
||||
2747 ; Emoji # E0.6 [1] (❇️) sparkle
|
||||
274C ; Emoji # E0.6 [1] (❌) cross mark
|
||||
274E ; Emoji # E0.6 [1] (❎) cross mark button
|
||||
2753..2755 ; Emoji # E0.6 [3] (❓..❕) question mark..white exclamation mark
|
||||
2757 ; Emoji # E0.6 [1] (❗) exclamation mark
|
||||
2753..2755 ; Emoji # E0.6 [3] (❓..❕) red question mark..white exclamation mark
|
||||
2757 ; Emoji # E0.6 [1] (❗) red exclamation mark
|
||||
2763 ; Emoji # E1.0 [1] (❣️) heart exclamation
|
||||
2764 ; Emoji # E0.6 [1] (❤️) red heart
|
||||
2795..2797 ; Emoji # E0.6 [3] (➕..➗) plus..divide
|
||||
@ -239,7 +239,7 @@
|
||||
1F509 ; Emoji # E1.0 [1] (🔉) speaker medium volume
|
||||
1F50A..1F514 ; Emoji # E0.6 [11] (🔊..🔔) speaker high volume..bell
|
||||
1F515 ; Emoji # E1.0 [1] (🔕) bell with slash
|
||||
1F516..1F52B ; Emoji # E0.6 [22] (🔖..🔫) bookmark..pistol
|
||||
1F516..1F52B ; Emoji # E0.6 [22] (🔖..🔫) bookmark..water pistol
|
||||
1F52C..1F52D ; Emoji # E1.0 [2] (🔬..🔭) microscope..telescope
|
||||
1F52E..1F53D ; Emoji # E0.6 [16] (🔮..🔽) crystal ball..downwards button
|
||||
1F549..1F54A ; Emoji # E0.7 [2] (🕉️..🕊️) om..dove
|
||||
@ -294,7 +294,7 @@
|
||||
1F62E..1F62F ; Emoji # E1.0 [2] (😮..😯) face with open mouth..hushed face
|
||||
1F630..1F633 ; Emoji # E0.6 [4] (😰..😳) anxious face with sweat..flushed face
|
||||
1F634 ; Emoji # E1.0 [1] (😴) sleeping face
|
||||
1F635 ; Emoji # E0.6 [1] (😵) dizzy face
|
||||
1F635 ; Emoji # E0.6 [1] (😵) face with crossed-out eyes
|
||||
1F636 ; Emoji # E1.0 [1] (😶) face without mouth
|
||||
1F637..1F640 ; Emoji # E0.6 [10] (😷..🙀) face with medical mask..weary cat
|
||||
1F641..1F644 ; Emoji # E1.0 [4] (🙁..🙄) slightly frowning face..face with rolling eyes
|
||||
@ -341,6 +341,7 @@
|
||||
1F6D1..1F6D2 ; Emoji # E3.0 [2] (🛑..🛒) stop sign..shopping cart
|
||||
1F6D5 ; Emoji # E12.0 [1] (🛕) hindu temple
|
||||
1F6D6..1F6D7 ; Emoji # E13.0 [2] (🛖..🛗) hut..elevator
|
||||
1F6DD..1F6DF ; Emoji # E14.0 [3] (🛝..🛟) playground slide..ring buoy
|
||||
1F6E0..1F6E5 ; Emoji # E0.7 [6] (🛠️..🛥️) hammer and wrench..motor boat
|
||||
1F6E9 ; Emoji # E0.7 [1] (🛩️) small airplane
|
||||
1F6EB..1F6EC ; Emoji # E1.0 [2] (🛫..🛬) airplane departure..airplane arrival
|
||||
@ -352,6 +353,7 @@
|
||||
1F6FA ; Emoji # E12.0 [1] (🛺) auto rickshaw
|
||||
1F6FB..1F6FC ; Emoji # E13.0 [2] (🛻..🛼) pickup truck..roller skate
|
||||
1F7E0..1F7EB ; Emoji # E12.0 [12] (🟠..🟫) orange circle..brown square
|
||||
1F7F0 ; Emoji # E14.0 [1] (🟰) heavy equals sign
|
||||
1F90C ; Emoji # E13.0 [1] (🤌) pinched fingers
|
||||
1F90D..1F90F ; Emoji # E12.0 [3] (🤍..🤏) white heart..pinching hand
|
||||
1F910..1F918 ; Emoji # E1.0 [9] (🤐..🤘) zipper-mouth face..sign of the horns
|
||||
@ -375,6 +377,7 @@
|
||||
1F972 ; Emoji # E13.0 [1] (🥲) smiling face with tear
|
||||
1F973..1F976 ; Emoji # E11.0 [4] (🥳..🥶) partying face..cold face
|
||||
1F977..1F978 ; Emoji # E13.0 [2] (🥷..🥸) ninja..disguised face
|
||||
1F979 ; Emoji # E14.0 [1] (🥹) face holding back tears
|
||||
1F97A ; Emoji # E11.0 [1] (🥺) pleading face
|
||||
1F97B ; Emoji # E12.0 [1] (🥻) sari
|
||||
1F97C..1F97F ; Emoji # E11.0 [4] (🥼..🥿) lab coat..flat shoe
|
||||
@ -392,21 +395,29 @@
|
||||
1F9C1..1F9C2 ; Emoji # E11.0 [2] (🧁..🧂) cupcake..salt
|
||||
1F9C3..1F9CA ; Emoji # E12.0 [8] (🧃..🧊) beverage box..ice
|
||||
1F9CB ; Emoji # E13.0 [1] (🧋) bubble tea
|
||||
1F9CC ; Emoji # E14.0 [1] (🧌) troll
|
||||
1F9CD..1F9CF ; Emoji # E12.0 [3] (🧍..🧏) person standing..deaf person
|
||||
1F9D0..1F9E6 ; Emoji # E5.0 [23] (🧐..🧦) face with monocle..socks
|
||||
1F9E7..1F9FF ; Emoji # E11.0 [25] (🧧..🧿) red envelope..nazar amulet
|
||||
1FA70..1FA73 ; Emoji # E12.0 [4] (🩰..🩳) ballet shoes..shorts
|
||||
1FA74 ; Emoji # E13.0 [1] (🩴) thong sandal
|
||||
1FA78..1FA7A ; Emoji # E12.0 [3] (🩸..🩺) drop of blood..stethoscope
|
||||
1FA7B..1FA7C ; Emoji # E14.0 [2] (🩻..🩼) x-ray..crutch
|
||||
1FA80..1FA82 ; Emoji # E12.0 [3] (🪀..🪂) yo-yo..parachute
|
||||
1FA83..1FA86 ; Emoji # E13.0 [4] (🪃..🪆) boomerang..nesting dolls
|
||||
1FA90..1FA95 ; Emoji # E12.0 [6] (🪐..🪕) ringed planet..banjo
|
||||
1FA96..1FAA8 ; Emoji # E13.0 [19] (🪖..🪨) military helmet..rock
|
||||
1FAA9..1FAAC ; Emoji # E14.0 [4] (🪩..🪬) mirror ball..hamsa
|
||||
1FAB0..1FAB6 ; Emoji # E13.0 [7] (🪰..🪶) fly..feather
|
||||
1FAB7..1FABA ; Emoji # E14.0 [4] (🪷..🪺) lotus..nest with eggs
|
||||
1FAC0..1FAC2 ; Emoji # E13.0 [3] (🫀..🫂) anatomical heart..people hugging
|
||||
1FAC3..1FAC5 ; Emoji # E14.0 [3] (🫃..🫅) pregnant man..person with crown
|
||||
1FAD0..1FAD6 ; Emoji # E13.0 [7] (🫐..🫖) blueberries..teapot
|
||||
1FAD7..1FAD9 ; Emoji # E14.0 [3] (🫗..🫙) pouring liquid..jar
|
||||
1FAE0..1FAE7 ; Emoji # E14.0 [8] (🫠..🫧) melting face..bubbles
|
||||
1FAF0..1FAF6 ; Emoji # E14.0 [7] (🫰..🫶) hand with index finger and thumb crossed..heart hands
|
||||
|
||||
# Total elements: 1367
|
||||
# Total elements: 1404
|
||||
|
||||
# ================================================
|
||||
|
||||
@ -438,8 +449,8 @@
|
||||
2728 ; Emoji_Presentation # E0.6 [1] (✨) sparkles
|
||||
274C ; Emoji_Presentation # E0.6 [1] (❌) cross mark
|
||||
274E ; Emoji_Presentation # E0.6 [1] (❎) cross mark button
|
||||
2753..2755 ; Emoji_Presentation # E0.6 [3] (❓..❕) question mark..white exclamation mark
|
||||
2757 ; Emoji_Presentation # E0.6 [1] (❗) exclamation mark
|
||||
2753..2755 ; Emoji_Presentation # E0.6 [3] (❓..❕) red question mark..white exclamation mark
|
||||
2757 ; Emoji_Presentation # E0.6 [1] (❗) red exclamation mark
|
||||
2795..2797 ; Emoji_Presentation # E0.6 [3] (➕..➗) plus..divide
|
||||
27B0 ; Emoji_Presentation # E0.6 [1] (➰) curly loop
|
||||
27BF ; Emoji_Presentation # E1.0 [1] (➿) double curly loop
|
||||
@ -533,7 +544,7 @@
|
||||
1F509 ; Emoji_Presentation # E1.0 [1] (🔉) speaker medium volume
|
||||
1F50A..1F514 ; Emoji_Presentation # E0.6 [11] (🔊..🔔) speaker high volume..bell
|
||||
1F515 ; Emoji_Presentation # E1.0 [1] (🔕) bell with slash
|
||||
1F516..1F52B ; Emoji_Presentation # E0.6 [22] (🔖..🔫) bookmark..pistol
|
||||
1F516..1F52B ; Emoji_Presentation # E0.6 [22] (🔖..🔫) bookmark..water pistol
|
||||
1F52C..1F52D ; Emoji_Presentation # E1.0 [2] (🔬..🔭) microscope..telescope
|
||||
1F52E..1F53D ; Emoji_Presentation # E0.6 [16] (🔮..🔽) crystal ball..downwards button
|
||||
1F54B..1F54E ; Emoji_Presentation # E1.0 [4] (🕋..🕎) kaaba..menorah
|
||||
@ -569,7 +580,7 @@
|
||||
1F62E..1F62F ; Emoji_Presentation # E1.0 [2] (😮..😯) face with open mouth..hushed face
|
||||
1F630..1F633 ; Emoji_Presentation # E0.6 [4] (😰..😳) anxious face with sweat..flushed face
|
||||
1F634 ; Emoji_Presentation # E1.0 [1] (😴) sleeping face
|
||||
1F635 ; Emoji_Presentation # E0.6 [1] (😵) dizzy face
|
||||
1F635 ; Emoji_Presentation # E0.6 [1] (😵) face with crossed-out eyes
|
||||
1F636 ; Emoji_Presentation # E1.0 [1] (😶) face without mouth
|
||||
1F637..1F640 ; Emoji_Presentation # E0.6 [10] (😷..🙀) face with medical mask..weary cat
|
||||
1F641..1F644 ; Emoji_Presentation # E1.0 [4] (🙁..🙄) slightly frowning face..face with rolling eyes
|
||||
@ -614,6 +625,7 @@
|
||||
1F6D1..1F6D2 ; Emoji_Presentation # E3.0 [2] (🛑..🛒) stop sign..shopping cart
|
||||
1F6D5 ; Emoji_Presentation # E12.0 [1] (🛕) hindu temple
|
||||
1F6D6..1F6D7 ; Emoji_Presentation # E13.0 [2] (🛖..🛗) hut..elevator
|
||||
1F6DD..1F6DF ; Emoji_Presentation # E14.0 [3] (🛝..🛟) playground slide..ring buoy
|
||||
1F6EB..1F6EC ; Emoji_Presentation # E1.0 [2] (🛫..🛬) airplane departure..airplane arrival
|
||||
1F6F4..1F6F6 ; Emoji_Presentation # E3.0 [3] (🛴..🛶) kick scooter..canoe
|
||||
1F6F7..1F6F8 ; Emoji_Presentation # E5.0 [2] (🛷..🛸) sled..flying saucer
|
||||
@ -621,6 +633,7 @@
|
||||
1F6FA ; Emoji_Presentation # E12.0 [1] (🛺) auto rickshaw
|
||||
1F6FB..1F6FC ; Emoji_Presentation # E13.0 [2] (🛻..🛼) pickup truck..roller skate
|
||||
1F7E0..1F7EB ; Emoji_Presentation # E12.0 [12] (🟠..🟫) orange circle..brown square
|
||||
1F7F0 ; Emoji_Presentation # E14.0 [1] (🟰) heavy equals sign
|
||||
1F90C ; Emoji_Presentation # E13.0 [1] (🤌) pinched fingers
|
||||
1F90D..1F90F ; Emoji_Presentation # E12.0 [3] (🤍..🤏) white heart..pinching hand
|
||||
1F910..1F918 ; Emoji_Presentation # E1.0 [9] (🤐..🤘) zipper-mouth face..sign of the horns
|
||||
@ -644,6 +657,7 @@
|
||||
1F972 ; Emoji_Presentation # E13.0 [1] (🥲) smiling face with tear
|
||||
1F973..1F976 ; Emoji_Presentation # E11.0 [4] (🥳..🥶) partying face..cold face
|
||||
1F977..1F978 ; Emoji_Presentation # E13.0 [2] (🥷..🥸) ninja..disguised face
|
||||
1F979 ; Emoji_Presentation # E14.0 [1] (🥹) face holding back tears
|
||||
1F97A ; Emoji_Presentation # E11.0 [1] (🥺) pleading face
|
||||
1F97B ; Emoji_Presentation # E12.0 [1] (🥻) sari
|
||||
1F97C..1F97F ; Emoji_Presentation # E11.0 [4] (🥼..🥿) lab coat..flat shoe
|
||||
@ -661,21 +675,29 @@
|
||||
1F9C1..1F9C2 ; Emoji_Presentation # E11.0 [2] (🧁..🧂) cupcake..salt
|
||||
1F9C3..1F9CA ; Emoji_Presentation # E12.0 [8] (🧃..🧊) beverage box..ice
|
||||
1F9CB ; Emoji_Presentation # E13.0 [1] (🧋) bubble tea
|
||||
1F9CC ; Emoji_Presentation # E14.0 [1] (🧌) troll
|
||||
1F9CD..1F9CF ; Emoji_Presentation # E12.0 [3] (🧍..🧏) person standing..deaf person
|
||||
1F9D0..1F9E6 ; Emoji_Presentation # E5.0 [23] (🧐..🧦) face with monocle..socks
|
||||
1F9E7..1F9FF ; Emoji_Presentation # E11.0 [25] (🧧..🧿) red envelope..nazar amulet
|
||||
1FA70..1FA73 ; Emoji_Presentation # E12.0 [4] (🩰..🩳) ballet shoes..shorts
|
||||
1FA74 ; Emoji_Presentation # E13.0 [1] (🩴) thong sandal
|
||||
1FA78..1FA7A ; Emoji_Presentation # E12.0 [3] (🩸..🩺) drop of blood..stethoscope
|
||||
1FA7B..1FA7C ; Emoji_Presentation # E14.0 [2] (🩻..🩼) x-ray..crutch
|
||||
1FA80..1FA82 ; Emoji_Presentation # E12.0 [3] (🪀..🪂) yo-yo..parachute
|
||||
1FA83..1FA86 ; Emoji_Presentation # E13.0 [4] (🪃..🪆) boomerang..nesting dolls
|
||||
1FA90..1FA95 ; Emoji_Presentation # E12.0 [6] (🪐..🪕) ringed planet..banjo
|
||||
1FA96..1FAA8 ; Emoji_Presentation # E13.0 [19] (🪖..🪨) military helmet..rock
|
||||
1FAA9..1FAAC ; Emoji_Presentation # E14.0 [4] (🪩..🪬) mirror ball..hamsa
|
||||
1FAB0..1FAB6 ; Emoji_Presentation # E13.0 [7] (🪰..🪶) fly..feather
|
||||
1FAB7..1FABA ; Emoji_Presentation # E14.0 [4] (🪷..🪺) lotus..nest with eggs
|
||||
1FAC0..1FAC2 ; Emoji_Presentation # E13.0 [3] (🫀..🫂) anatomical heart..people hugging
|
||||
1FAC3..1FAC5 ; Emoji_Presentation # E14.0 [3] (🫃..🫅) pregnant man..person with crown
|
||||
1FAD0..1FAD6 ; Emoji_Presentation # E13.0 [7] (🫐..🫖) blueberries..teapot
|
||||
1FAD7..1FAD9 ; Emoji_Presentation # E14.0 [3] (🫗..🫙) pouring liquid..jar
|
||||
1FAE0..1FAE7 ; Emoji_Presentation # E14.0 [8] (🫠..🫧) melting face..bubbles
|
||||
1FAF0..1FAF6 ; Emoji_Presentation # E14.0 [7] (🫰..🫶) hand with index finger and thumb crossed..heart hands
|
||||
|
||||
# Total elements: 1148
|
||||
# Total elements: 1185
|
||||
|
||||
# ================================================
|
||||
|
||||
@ -738,15 +760,17 @@
|
||||
1F9BB ; Emoji_Modifier_Base # E12.0 [1] (🦻) ear with hearing aid
|
||||
1F9CD..1F9CF ; Emoji_Modifier_Base # E12.0 [3] (🧍..🧏) person standing..deaf person
|
||||
1F9D1..1F9DD ; Emoji_Modifier_Base # E5.0 [13] (🧑..🧝) person..elf
|
||||
1FAC3..1FAC5 ; Emoji_Modifier_Base # E14.0 [3] (🫃..🫅) pregnant man..person with crown
|
||||
1FAF0..1FAF6 ; Emoji_Modifier_Base # E14.0 [7] (🫰..🫶) hand with index finger and thumb crossed..heart hands
|
||||
|
||||
# Total elements: 122
|
||||
# Total elements: 132
|
||||
|
||||
# ================================================
|
||||
|
||||
# All omitted code points have Emoji_Component=No
|
||||
# @missing: 0000..10FFFF ; Emoji_Component ; No
|
||||
|
||||
0023 ; Emoji_Component # E0.0 [1] (#️) number sign
|
||||
0023 ; Emoji_Component # E0.0 [1] (#️) hash sign
|
||||
002A ; Emoji_Component # E0.0 [1] (*️) asterisk
|
||||
0030..0039 ; Emoji_Component # E0.0 [10] (0️..9️) digit zero..digit nine
|
||||
200D ; Emoji_Component # E0.0 [1] () zero width joiner
|
||||
@ -902,8 +926,8 @@ E0020..E007F ; Emoji_Component # E0.0 [96] (..) tag space..c
|
||||
2747 ; Extended_Pictographic# E0.6 [1] (❇️) sparkle
|
||||
274C ; Extended_Pictographic# E0.6 [1] (❌) cross mark
|
||||
274E ; Extended_Pictographic# E0.6 [1] (❎) cross mark button
|
||||
2753..2755 ; Extended_Pictographic# E0.6 [3] (❓..❕) question mark..white exclamation mark
|
||||
2757 ; Extended_Pictographic# E0.6 [1] (❗) exclamation mark
|
||||
2753..2755 ; Extended_Pictographic# E0.6 [3] (❓..❕) red question mark..white exclamation mark
|
||||
2757 ; Extended_Pictographic# E0.6 [1] (❗) red exclamation mark
|
||||
2763 ; Extended_Pictographic# E1.0 [1] (❣️) heart exclamation
|
||||
2764 ; Extended_Pictographic# E0.6 [1] (❤️) red heart
|
||||
2765..2767 ; Extended_Pictographic# E0.0 [3] (❥..❧) ROTATED HEAVY BLACK HEART BULLET..ROTATED FLORAL HEART BULLET
|
||||
@ -1041,7 +1065,7 @@ E0020..E007F ; Emoji_Component # E0.0 [96] (..) tag space..c
|
||||
1F509 ; Extended_Pictographic# E1.0 [1] (🔉) speaker medium volume
|
||||
1F50A..1F514 ; Extended_Pictographic# E0.6 [11] (🔊..🔔) speaker high volume..bell
|
||||
1F515 ; Extended_Pictographic# E1.0 [1] (🔕) bell with slash
|
||||
1F516..1F52B ; Extended_Pictographic# E0.6 [22] (🔖..🔫) bookmark..pistol
|
||||
1F516..1F52B ; Extended_Pictographic# E0.6 [22] (🔖..🔫) bookmark..water pistol
|
||||
1F52C..1F52D ; Extended_Pictographic# E1.0 [2] (🔬..🔭) microscope..telescope
|
||||
1F52E..1F53D ; Extended_Pictographic# E0.6 [16] (🔮..🔽) crystal ball..downwards button
|
||||
1F546..1F548 ; Extended_Pictographic# E0.0 [3] (🕆..🕈) WHITE LATIN CROSS..CELTIC CROSS
|
||||
@ -1117,7 +1141,7 @@ E0020..E007F ; Emoji_Component # E0.0 [96] (..) tag space..c
|
||||
1F62E..1F62F ; Extended_Pictographic# E1.0 [2] (😮..😯) face with open mouth..hushed face
|
||||
1F630..1F633 ; Extended_Pictographic# E0.6 [4] (😰..😳) anxious face with sweat..flushed face
|
||||
1F634 ; Extended_Pictographic# E1.0 [1] (😴) sleeping face
|
||||
1F635 ; Extended_Pictographic# E0.6 [1] (😵) dizzy face
|
||||
1F635 ; Extended_Pictographic# E0.6 [1] (😵) face with crossed-out eyes
|
||||
1F636 ; Extended_Pictographic# E1.0 [1] (😶) face without mouth
|
||||
1F637..1F640 ; Extended_Pictographic# E0.6 [10] (😷..🙀) face with medical mask..weary cat
|
||||
1F641..1F644 ; Extended_Pictographic# E1.0 [4] (🙁..🙄) slightly frowning face..face with rolling eyes
|
||||
@ -1166,7 +1190,8 @@ E0020..E007F ; Emoji_Component # E0.0 [96] (..) tag space..c
|
||||
1F6D3..1F6D4 ; Extended_Pictographic# E0.0 [2] (🛓..🛔) STUPA..PAGODA
|
||||
1F6D5 ; Extended_Pictographic# E12.0 [1] (🛕) hindu temple
|
||||
1F6D6..1F6D7 ; Extended_Pictographic# E13.0 [2] (🛖..🛗) hut..elevator
|
||||
1F6D8..1F6DF ; Extended_Pictographic# E0.0 [8] (..🛟) <reserved-1F6D8>..<reserved-1F6DF>
|
||||
1F6D8..1F6DC ; Extended_Pictographic# E0.0 [5] (..🛜) <reserved-1F6D8>..<reserved-1F6DC>
|
||||
1F6DD..1F6DF ; Extended_Pictographic# E14.0 [3] (🛝..🛟) playground slide..ring buoy
|
||||
1F6E0..1F6E5 ; Extended_Pictographic# E0.7 [6] (🛠️..🛥️) hammer and wrench..motor boat
|
||||
1F6E6..1F6E8 ; Extended_Pictographic# E0.0 [3] (🛦..🛨) UP-POINTING MILITARY AIRPLANE..UP-POINTING SMALL AIRPLANE
|
||||
1F6E9 ; Extended_Pictographic# E0.7 [1] (🛩️) small airplane
|
||||
@ -1185,7 +1210,9 @@ E0020..E007F ; Emoji_Component # E0.0 [96] (..) tag space..c
|
||||
1F774..1F77F ; Extended_Pictographic# E0.0 [12] (🝴..🝿) <reserved-1F774>..<reserved-1F77F>
|
||||
1F7D5..1F7DF ; Extended_Pictographic# E0.0 [11] (🟕..) CIRCLED TRIANGLE..<reserved-1F7DF>
|
||||
1F7E0..1F7EB ; Extended_Pictographic# E12.0 [12] (🟠..🟫) orange circle..brown square
|
||||
1F7EC..1F7FF ; Extended_Pictographic# E0.0 [20] (..) <reserved-1F7EC>..<reserved-1F7FF>
|
||||
1F7EC..1F7EF ; Extended_Pictographic# E0.0 [4] (..) <reserved-1F7EC>..<reserved-1F7EF>
|
||||
1F7F0 ; Extended_Pictographic# E14.0 [1] (🟰) heavy equals sign
|
||||
1F7F1..1F7FF ; Extended_Pictographic# E0.0 [15] (..) <reserved-1F7F1>..<reserved-1F7FF>
|
||||
1F80C..1F80F ; Extended_Pictographic# E0.0 [4] (..) <reserved-1F80C>..<reserved-1F80F>
|
||||
1F848..1F84F ; Extended_Pictographic# E0.0 [8] (..) <reserved-1F848>..<reserved-1F84F>
|
||||
1F85A..1F85F ; Extended_Pictographic# E0.0 [6] (..) <reserved-1F85A>..<reserved-1F85F>
|
||||
@ -1214,7 +1241,7 @@ E0020..E007F ; Emoji_Component # E0.0 [96] (..) tag space..c
|
||||
1F972 ; Extended_Pictographic# E13.0 [1] (🥲) smiling face with tear
|
||||
1F973..1F976 ; Extended_Pictographic# E11.0 [4] (🥳..🥶) partying face..cold face
|
||||
1F977..1F978 ; Extended_Pictographic# E13.0 [2] (🥷..🥸) ninja..disguised face
|
||||
1F979 ; Extended_Pictographic# E0.0 [1] (🥹) <reserved-1F979>
|
||||
1F979 ; Extended_Pictographic# E14.0 [1] (🥹) face holding back tears
|
||||
1F97A ; Extended_Pictographic# E11.0 [1] (🥺) pleading face
|
||||
1F97B ; Extended_Pictographic# E12.0 [1] (🥻) sari
|
||||
1F97C..1F97F ; Extended_Pictographic# E11.0 [4] (🥼..🥿) lab coat..flat shoe
|
||||
@ -1232,7 +1259,7 @@ E0020..E007F ; Emoji_Component # E0.0 [96] (..) tag space..c
|
||||
1F9C1..1F9C2 ; Extended_Pictographic# E11.0 [2] (🧁..🧂) cupcake..salt
|
||||
1F9C3..1F9CA ; Extended_Pictographic# E12.0 [8] (🧃..🧊) beverage box..ice
|
||||
1F9CB ; Extended_Pictographic# E13.0 [1] (🧋) bubble tea
|
||||
1F9CC ; Extended_Pictographic# E0.0 [1] (🧌) <reserved-1F9CC>
|
||||
1F9CC ; Extended_Pictographic# E14.0 [1] (🧌) troll
|
||||
1F9CD..1F9CF ; Extended_Pictographic# E12.0 [3] (🧍..🧏) person standing..deaf person
|
||||
1F9D0..1F9E6 ; Extended_Pictographic# E5.0 [23] (🧐..🧦) face with monocle..socks
|
||||
1F9E7..1F9FF ; Extended_Pictographic# E11.0 [25] (🧧..🧿) red envelope..nazar amulet
|
||||
@ -1241,19 +1268,28 @@ E0020..E007F ; Emoji_Component # E0.0 [96] (..) tag space..c
|
||||
1FA74 ; Extended_Pictographic# E13.0 [1] (🩴) thong sandal
|
||||
1FA75..1FA77 ; Extended_Pictographic# E0.0 [3] (🩵..🩷) <reserved-1FA75>..<reserved-1FA77>
|
||||
1FA78..1FA7A ; Extended_Pictographic# E12.0 [3] (🩸..🩺) drop of blood..stethoscope
|
||||
1FA7B..1FA7F ; Extended_Pictographic# E0.0 [5] (🩻..) <reserved-1FA7B>..<reserved-1FA7F>
|
||||
1FA7B..1FA7C ; Extended_Pictographic# E14.0 [2] (🩻..🩼) x-ray..crutch
|
||||
1FA7D..1FA7F ; Extended_Pictographic# E0.0 [3] (..) <reserved-1FA7D>..<reserved-1FA7F>
|
||||
1FA80..1FA82 ; Extended_Pictographic# E12.0 [3] (🪀..🪂) yo-yo..parachute
|
||||
1FA83..1FA86 ; Extended_Pictographic# E13.0 [4] (🪃..🪆) boomerang..nesting dolls
|
||||
1FA87..1FA8F ; Extended_Pictographic# E0.0 [9] (🪇..) <reserved-1FA87>..<reserved-1FA8F>
|
||||
1FA90..1FA95 ; Extended_Pictographic# E12.0 [6] (🪐..🪕) ringed planet..banjo
|
||||
1FA96..1FAA8 ; Extended_Pictographic# E13.0 [19] (🪖..🪨) military helmet..rock
|
||||
1FAA9..1FAAF ; Extended_Pictographic# E0.0 [7] (🪩..🪯) <reserved-1FAA9>..<reserved-1FAAF>
|
||||
1FAA9..1FAAC ; Extended_Pictographic# E14.0 [4] (🪩..🪬) mirror ball..hamsa
|
||||
1FAAD..1FAAF ; Extended_Pictographic# E0.0 [3] (🪭..🪯) <reserved-1FAAD>..<reserved-1FAAF>
|
||||
1FAB0..1FAB6 ; Extended_Pictographic# E13.0 [7] (🪰..🪶) fly..feather
|
||||
1FAB7..1FABF ; Extended_Pictographic# E0.0 [9] (🪷..🪿) <reserved-1FAB7>..<reserved-1FABF>
|
||||
1FAB7..1FABA ; Extended_Pictographic# E14.0 [4] (🪷..🪺) lotus..nest with eggs
|
||||
1FABB..1FABF ; Extended_Pictographic# E0.0 [5] (🪻..🪿) <reserved-1FABB>..<reserved-1FABF>
|
||||
1FAC0..1FAC2 ; Extended_Pictographic# E13.0 [3] (🫀..🫂) anatomical heart..people hugging
|
||||
1FAC3..1FACF ; Extended_Pictographic# E0.0 [13] (🫃..🫏) <reserved-1FAC3>..<reserved-1FACF>
|
||||
1FAC3..1FAC5 ; Extended_Pictographic# E14.0 [3] (🫃..🫅) pregnant man..person with crown
|
||||
1FAC6..1FACF ; Extended_Pictographic# E0.0 [10] (..🫏) <reserved-1FAC6>..<reserved-1FACF>
|
||||
1FAD0..1FAD6 ; Extended_Pictographic# E13.0 [7] (🫐..🫖) blueberries..teapot
|
||||
1FAD7..1FAFF ; Extended_Pictographic# E0.0 [41] (🫗..) <reserved-1FAD7>..<reserved-1FAFF>
|
||||
1FAD7..1FAD9 ; Extended_Pictographic# E14.0 [3] (🫗..🫙) pouring liquid..jar
|
||||
1FADA..1FADF ; Extended_Pictographic# E0.0 [6] (🫚..) <reserved-1FADA>..<reserved-1FADF>
|
||||
1FAE0..1FAE7 ; Extended_Pictographic# E14.0 [8] (🫠..🫧) melting face..bubbles
|
||||
1FAE8..1FAEF ; Extended_Pictographic# E0.0 [8] (🫨..) <reserved-1FAE8>..<reserved-1FAEF>
|
||||
1FAF0..1FAF6 ; Extended_Pictographic# E14.0 [7] (🫰..🫶) hand with index finger and thumb crossed..heart hands
|
||||
1FAF7..1FAFF ; Extended_Pictographic# E0.0 [9] (🫷..) <reserved-1FAF7>..<reserved-1FAFF>
|
||||
1FC00..1FFFD ; Extended_Pictographic# E0.0[1022] (..) <reserved-1FC00>..<reserved-1FFFD>
|
||||
|
||||
# Total elements: 3537
|
||||
|
File diff suppressed because it is too large
Load Diff
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) 2016, 2021, Oracle and/or its affiliates. All rights reserved.
|
||||
* Copyright (c) 2016, 2022, Oracle and/or its affiliates. All rights reserved.
|
||||
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
||||
*
|
||||
* This code is free software; you can redistribute it and/or modify it
|
||||
@ -35,8 +35,8 @@ final class Grapheme {
|
||||
* <p>
|
||||
* See Unicode Standard Annex #29 Unicode Text Segmentation for the specification
|
||||
* for the extended grapheme cluster boundary rules. The following implementation
|
||||
* is based on version 12.0 of the annex.
|
||||
* (http://www.unicode.org/reports/tr29/tr29-35.html)
|
||||
* is based on the annex for Unicode version 14.0.
|
||||
* (http://www.unicode.org/reports/tr29/tr29-38.html)
|
||||
*
|
||||
* @param src the {@code CharSequence} to be scanned
|
||||
* @param off offset to start looking for the next boundary in the src
|
||||
@ -97,7 +97,7 @@ final class Grapheme {
|
||||
private static final int FIRST_TYPE = 0;
|
||||
private static final int LAST_TYPE = 14;
|
||||
|
||||
private static boolean[][] rules;
|
||||
private static final boolean[][] rules;
|
||||
static {
|
||||
rules = new boolean[LAST_TYPE + 1][LAST_TYPE + 1];
|
||||
// GB 999 Any + Any -> default
|
||||
@ -201,8 +201,9 @@ final class Grapheme {
|
||||
if (cp == 0x200D)
|
||||
return ZWJ;
|
||||
if (cp >= 0x0600 && cp <= 0x0605 ||
|
||||
cp == 0x06DD || cp == 0x070F || cp == 0x08E2 ||
|
||||
cp == 0x110BD || cp == 0x110CD)
|
||||
cp == 0x06DD || cp == 0x070F ||
|
||||
cp == 0x0890 || cp == 0x0891 ||
|
||||
cp == 0x08E2 || cp == 0x110BD || cp == 0x110CD)
|
||||
return PREPEND;
|
||||
return CONTROL;
|
||||
case Character.NON_SPACING_MARK:
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) 2005, 2020, Oracle and/or its affiliates. All rights reserved.
|
||||
* Copyright (c) 2005, 2022, Oracle and/or its affiliates. All rights reserved.
|
||||
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
||||
*
|
||||
* This code is free software; you can redistribute it and/or modify it
|
||||
@ -76,9 +76,6 @@ public final class Punycode {
|
||||
// TODO: eliminate the 256 limitation
|
||||
private static final int MAX_CP_COUNT = 256;
|
||||
|
||||
private static final int UINT_MAGIC = 0x80000000;
|
||||
private static final long ULONG_MAGIC = 0x8000000000000000L;
|
||||
|
||||
private static int adaptBias(int delta, int length, boolean firstTime){
|
||||
if(firstTime){
|
||||
delta /=DAMP;
|
||||
@ -96,34 +93,25 @@ public final class Punycode {
|
||||
}
|
||||
|
||||
/**
|
||||
* basicToDigit[] contains the numeric value of a basic code
|
||||
* point (for use in representing integers) in the range 0 to
|
||||
* BASE-1, or -1 if b is does not represent a value.
|
||||
* @return the numeric value of a basic code point (for use in representing integers)
|
||||
* in the range 0 to BASE-1, or a negative value if cp is invalid.
|
||||
*/
|
||||
static final int[] basicToDigit= new int[]{
|
||||
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
|
||||
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
|
||||
|
||||
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
|
||||
26, 27, 28, 29, 30, 31, 32, 33, 34, 35, -1, -1, -1, -1, -1, -1,
|
||||
|
||||
-1, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14,
|
||||
15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, -1, -1, -1, -1, -1,
|
||||
|
||||
-1, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14,
|
||||
15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, -1, -1, -1, -1, -1,
|
||||
|
||||
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
|
||||
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
|
||||
|
||||
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
|
||||
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
|
||||
|
||||
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
|
||||
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
|
||||
|
||||
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
|
||||
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1
|
||||
private static final int decodeDigit(int cp) {
|
||||
if(cp<='Z') {
|
||||
if(cp<='9') {
|
||||
if(cp<'0') {
|
||||
return -1;
|
||||
} else {
|
||||
return cp-'0'+26; // 0..9 -> 26..35
|
||||
}
|
||||
} else {
|
||||
return cp-'A'; // A-Z -> 0..25
|
||||
}
|
||||
} else if(cp<='z') {
|
||||
return cp-'a'; // a..z -> 0..25
|
||||
} else {
|
||||
return -1;
|
||||
}
|
||||
};
|
||||
|
||||
private static char asciiCaseMap(char b, boolean uppercase) {
|
||||
@ -158,6 +146,12 @@ public final class Punycode {
|
||||
return (char)((ZERO-26)+digit);
|
||||
}
|
||||
}
|
||||
|
||||
// ICU-13727: Limit input length for n^2 algorithm
|
||||
// where well-formed strings are at most 59 characters long.
|
||||
private static final int ENCODE_MAX_CODE_UNITS = 1000;
|
||||
private static final int DECODE_MAX_CHARS = 2000;
|
||||
|
||||
/**
|
||||
* Converts Unicode to Punycode.
|
||||
* The input string must not contain single, unpaired surrogates.
|
||||
@ -174,6 +168,10 @@ public final class Punycode {
|
||||
int n, delta, handledCPCount, basicLength, destLength, bias, j, m, q, k, t, srcCPCount;
|
||||
char c, c2;
|
||||
int srcLength = src.length();
|
||||
if (srcLength > ENCODE_MAX_CODE_UNITS) {
|
||||
throw new RuntimeException(
|
||||
"input too long: " + srcLength + " UTF-16 code units");
|
||||
}
|
||||
int destCapacity = MAX_CP_COUNT;
|
||||
char[] dest = new char[destCapacity];
|
||||
StringBuffer result = new StringBuffer();
|
||||
@ -251,7 +249,7 @@ public final class Punycode {
|
||||
* Increase delta enough to advance the decoder's
|
||||
* <n,i> state to <m,0>, but guard against overflow:
|
||||
*/
|
||||
if(m-n>(0x7fffffff-MAX_CP_COUNT-delta)/(handledCPCount+1)) {
|
||||
if(m-n>(0x7fffffff-handledCPCount-delta)/(handledCPCount+1)) {
|
||||
throw new RuntimeException("Internal program error");
|
||||
}
|
||||
delta+=(m-n)*(handledCPCount+1);
|
||||
@ -332,6 +330,9 @@ public final class Punycode {
|
||||
public static StringBuffer decode(StringBuffer src, boolean[] caseFlags)
|
||||
throws ParseException{
|
||||
int srcLength = src.length();
|
||||
if (srcLength > DECODE_MAX_CHARS) {
|
||||
throw new RuntimeException("input too long: " + srcLength + " characters");
|
||||
}
|
||||
StringBuffer result = new StringBuffer();
|
||||
int n, destLength, i, bias, basicLength, j, in, oldi, w, k, digit, t,
|
||||
destCPCount, firstSupplementaryIndex, cpLength;
|
||||
@ -395,7 +396,7 @@ public final class Punycode {
|
||||
throw new ParseException("Illegal char found", -1);
|
||||
}
|
||||
|
||||
digit=basicToDigit[(byte)src.charAt(in++)];
|
||||
digit=decodeDigit(src.charAt(in++));
|
||||
if(digit<0) {
|
||||
throw new ParseException("Invalid char found", -1);
|
||||
}
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) 2015, 2020, Oracle and/or its affiliates. All rights reserved.
|
||||
* Copyright (c) 2015, 2022, Oracle and/or its affiliates. All rights reserved.
|
||||
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
||||
*
|
||||
* This code is free software; you can redistribute it and/or modify it
|
||||
@ -134,9 +134,15 @@ public class UnicodeSetStringSpan {
|
||||
|
||||
int i, spanLength;
|
||||
someRelevant = false;
|
||||
for (i = 0; i < stringsLength; ++i) {
|
||||
for (i = 0; i < stringsLength;) {
|
||||
String string = strings.get(i);
|
||||
int length16 = string.length();
|
||||
if (length16 == 0) {
|
||||
// Remove the empty string.
|
||||
strings.remove(i);
|
||||
--stringsLength;
|
||||
continue;
|
||||
}
|
||||
spanLength = spanSet.span(string, SpanCondition.CONTAINED);
|
||||
if (spanLength < length16) { // Relevant string.
|
||||
someRelevant = true;
|
||||
@ -144,6 +150,7 @@ public class UnicodeSetStringSpan {
|
||||
if (/* (0 != (which & UTF16)) && */ length16 > maxLength16) {
|
||||
maxLength16 = length16;
|
||||
}
|
||||
++i;
|
||||
}
|
||||
if (!someRelevant && (which & WITH_COUNT) == 0) {
|
||||
return;
|
||||
|
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) 2009, 2020, Oracle and/or its affiliates. All rights reserved.
|
||||
* Copyright (c) 2009, 2022, Oracle and/or its affiliates. All rights reserved.
|
||||
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
||||
*
|
||||
* This code is free software; you can redistribute it and/or modify it
|
||||
@ -84,7 +84,7 @@ import jdk.internal.icu.util.VersionInfo;
|
||||
* <p>
|
||||
* Further detail on differences can be determined using the program
|
||||
* <a href=
|
||||
* "http://source.icu-project.org/repos/icu/icu4j/trunk/src/com/ibm/icu/dev/test/lang/UCharacterCompare.java">
|
||||
* "https://github.com/unicode-org/icu/blob/main/icu4j/main/tests/core/src/com/ibm/icu/dev/test/lang/UCharacterCompare.java">
|
||||
* com.ibm.icu.dev.test.lang.UCharacterCompare</a>
|
||||
* </p>
|
||||
* <p>
|
||||
@ -101,9 +101,9 @@ import jdk.internal.icu.util.VersionInfo;
|
||||
* For more information see
|
||||
* <a href="http://www.unicode/org/ucd/">"About the Unicode Character Database"</a>
|
||||
* (http://www.unicode.org/ucd/)
|
||||
* and the <a href="http://www.icu-project.org/userguide/properties.html">ICU
|
||||
* and the <a href="https://unicode-org.github.io/icu/userguide/strings/properties">ICU
|
||||
* User Guide chapter on Properties</a>
|
||||
* (http://www.icu-project.org/userguide/properties.html).
|
||||
* (https://unicode-org.github.io/icu/userguide/strings/properties).
|
||||
* </p>
|
||||
* <p>
|
||||
* There are also functions that provide easy migration from C/POSIX functions
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) 2005, 2020, Oracle and/or its affiliates. All rights reserved.
|
||||
* Copyright (c) 2005, 2022, Oracle and/or its affiliates. All rights reserved.
|
||||
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
||||
*
|
||||
* This code is free software; you can redistribute it and/or modify it
|
||||
@ -53,7 +53,7 @@ public final class UCharacterDirection implements UCharacterEnums.ECharacterDire
|
||||
// private constructor =========================================
|
||||
///CLOVER:OFF
|
||||
/**
|
||||
* Private constructor to prevent initialisation
|
||||
* Private constructor to prevent initialization
|
||||
*/
|
||||
private UCharacterDirection()
|
||||
{
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) 2005, 2020, Oracle and/or its affiliates. All rights reserved.
|
||||
* Copyright (c) 2005, 2022, Oracle and/or its affiliates. All rights reserved.
|
||||
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
||||
*
|
||||
* This code is free software; you can redistribute it and/or modify it
|
||||
@ -62,7 +62,7 @@ package jdk.internal.icu.lang;
|
||||
@Deprecated
|
||||
class UCharacterEnums {
|
||||
|
||||
/** This is just a namespace, it is not instantiatable. */
|
||||
/** This is just a namespace, it is not instantiable. */
|
||||
private UCharacterEnums() {};
|
||||
|
||||
/**
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) 2009, 2021, Oracle and/or its affiliates. All rights reserved.
|
||||
* Copyright (c) 2009, 2022, Oracle and/or its affiliates. All rights reserved.
|
||||
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
||||
*
|
||||
* This code is free software; you can redistribute it and/or modify it
|
||||
@ -63,7 +63,7 @@ import jdk.internal.icu.impl.UBiDiProps;
|
||||
*
|
||||
* This is an implementation of the Unicode Bidirectional Algorithm. The
|
||||
* algorithm is defined in the
|
||||
* <a href="http://www.unicode.org/reports/tr9/">Unicode Standard Annex #9:
|
||||
* <a href="https://www.unicode.org/reports/tr9/">Unicode Standard Annex #9:
|
||||
* Unicode Bidirectional Algorithm</a>.
|
||||
* <p>
|
||||
*
|
||||
@ -985,7 +985,7 @@ public class BidiBase {
|
||||
/**
|
||||
* Enumerated property Bidi_Paired_Bracket_Type (new in Unicode 6.3).
|
||||
* Used in
|
||||
* <a href="http://www.unicode.org/reports/tr9/">Unicode Standard Annex #9:
|
||||
* <a href="https://www.unicode.org/reports/tr9/">Unicode Standard Annex #9:
|
||||
* Unicode Bidirectional Algorithm</a>.
|
||||
* Returns UCharacter.BidiPairedBracketType values.
|
||||
* @stable ICU 52
|
||||
@ -3365,7 +3365,7 @@ public class BidiBase {
|
||||
|
||||
/**
|
||||
* Perform the Unicode Bidi algorithm. It is defined in the
|
||||
* <a href="http://www.unicode.org/reports/tr9/">Unicode Standard Annex #9:
|
||||
* <a href="https://www.unicode.org/reports/tr9/">Unicode Standard Annex #9:
|
||||
* Unicode Bidirectional Algorithm</a>, version 13,
|
||||
* also described in The Unicode Standard, Version 4.0 .<p>
|
||||
*
|
||||
@ -3450,7 +3450,7 @@ public class BidiBase {
|
||||
|
||||
/**
|
||||
* Perform the Unicode Bidi algorithm. It is defined in the
|
||||
* <a href="http://www.unicode.org/reports/tr9/">Unicode Standard Annex #9:
|
||||
* <a href="https://www.unicode.org/reports/tr9/">Unicode Standard Annex #9:
|
||||
* Unicode Bidirectional Algorithm</a>, version 13,
|
||||
* also described in The Unicode Standard, Version 4.0 .<p>
|
||||
*
|
||||
@ -3786,7 +3786,7 @@ public class BidiBase {
|
||||
|
||||
/**
|
||||
* Perform the Unicode Bidi algorithm on a given paragraph, as defined in the
|
||||
* <a href="http://www.unicode.org/reports/tr9/">Unicode Standard Annex #9:
|
||||
* <a href="https://www.unicode.org/reports/tr9/">Unicode Standard Annex #9:
|
||||
* Unicode Bidirectional Algorithm</a>, version 13,
|
||||
* also described in The Unicode Standard, Version 4.0 .<p>
|
||||
*
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) 2009, 2021, Oracle and/or its affiliates. All rights reserved.
|
||||
* Copyright (c) 2009, 2022, Oracle and/or its affiliates. All rights reserved.
|
||||
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
||||
*
|
||||
* This code is free software; you can redistribute it and/or modify it
|
||||
@ -47,7 +47,7 @@ final class BidiLine {
|
||||
* text in a single paragraph or in a line of a single paragraph
|
||||
* which has already been processed according to
|
||||
* the Unicode 3.0 Bidi algorithm as defined in
|
||||
* <a href="http://www.unicode.org/reports/tr9/">Unicode Standard Annex #9:
|
||||
* <a href="https://www.unicode.org/reports/tr9/">Unicode Standard Annex #9:
|
||||
* Unicode Bidirectional Algorithm</a>, version 13,
|
||||
* also described in The Unicode Standard, Version 4.0.1 .
|
||||
*
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) 2015, 2021, Oracle and/or its affiliates. All rights reserved.
|
||||
* Copyright (c) 2015, 2022, Oracle and/or its affiliates. All rights reserved.
|
||||
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
||||
*
|
||||
* This code is free software; you can redistribute it and/or modify it
|
||||
@ -43,7 +43,7 @@ import jdk.internal.icu.impl.Norm2AllModes;
|
||||
* The primary functions are to produce a normalized string and to detect whether
|
||||
* a string is already normalized.
|
||||
* The most commonly used normalization forms are those defined in
|
||||
* <a href="http://www.unicode.org/reports/tr15/">Unicode Standard Annex #15:
|
||||
* <a href="https://www.unicode.org/reports/tr15/">Unicode Standard Annex #15:
|
||||
* Unicode Normalization Forms</a>.
|
||||
* However, this API supports additional normalization forms for specialized purposes.
|
||||
* For example, NFKC_Casefold is provided via getInstance("nfkc_cf", COMPOSE)
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) 2005, 2021, Oracle and/or its affiliates. All rights reserved.
|
||||
* Copyright (c) 2005, 2022, Oracle and/or its affiliates. All rights reserved.
|
||||
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
||||
*
|
||||
* This code is free software; you can redistribute it and/or modify it
|
||||
@ -44,7 +44,7 @@ import java.text.Normalizer;
|
||||
* <code>normalize</code> transforms Unicode text into an equivalent composed or
|
||||
* decomposed form, allowing for easier sorting and searching of text.
|
||||
* <code>normalize</code> supports the standard normalization forms described in
|
||||
* <a href="http://www.unicode.org/reports/tr15/" target="unicode">
|
||||
* <a href="https://www.unicode.org/reports/tr15/" target="unicode">
|
||||
* Unicode Standard Annex #15 — Unicode Normalization Forms</a>.
|
||||
*
|
||||
* Characters with accents or other adornments can be encoded in
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) 2005, 2020, Oracle and/or its affiliates. All rights reserved.
|
||||
* Copyright (c) 2005, 2022, Oracle and/or its affiliates. All rights reserved.
|
||||
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
||||
*
|
||||
* This code is free software; you can redistribute it and/or modify it
|
||||
@ -67,9 +67,9 @@ import jdk.internal.icu.util.VersionInfo;
|
||||
* <li> Unassigned Table: Contains code points that are unassigned
|
||||
* in the Unicode Version supported by StringPrep. Currently
|
||||
* RFC 3454 supports Unicode 3.2. </li>
|
||||
* <li> Prohibited Table: Contains code points that are prohibted from
|
||||
* <li> Prohibited Table: Contains code points that are prohibited from
|
||||
* the output of the StringPrep processing function. </li>
|
||||
* <li> Mapping Table: Contains code ponts that are deleted from the output or case mapped. </li>
|
||||
* <li> Mapping Table: Contains code points that are deleted from the output or case mapped. </li>
|
||||
* </ul>
|
||||
*
|
||||
* The procedure for preparing Unicode strings:
|
||||
@ -226,8 +226,8 @@ public final class StringPrep {
|
||||
sprepUniVer = getVersionInfo(reader.getUnicodeVersion());
|
||||
normCorrVer = getVersionInfo(indexes[NORM_CORRECTNS_LAST_UNI_VERSION]);
|
||||
VersionInfo normUniVer = UCharacter.getUnicodeVersion();
|
||||
if(normUniVer.compareTo(sprepUniVer) < 0 && /* the Unicode version of SPREP file must be less than the Unicode Vesion of the normalization data */
|
||||
normUniVer.compareTo(normCorrVer) < 0 && /* the Unicode version of the NormalizationCorrections.txt file should be less than the Unicode Vesion of the normalization data */
|
||||
if(normUniVer.compareTo(sprepUniVer) < 0 && /* the Unicode version of SPREP file must be less than the Unicode Version of the normalization data */
|
||||
normUniVer.compareTo(normCorrVer) < 0 && /* the Unicode version of the NormalizationCorrections.txt file should be less than the Unicode Version of the normalization data */
|
||||
((indexes[OPTIONS] & NORMALIZATION_ON) > 0) /* normalization turned on*/
|
||||
){
|
||||
throw new IOException("Normalization Correction version not supported");
|
||||
@ -325,7 +325,7 @@ public final class StringPrep {
|
||||
ch -= val.value;
|
||||
}
|
||||
}else if(val.type == DELETE){
|
||||
// just consume the codepoint and contine
|
||||
// just consume the codepoint and continue
|
||||
continue;
|
||||
}
|
||||
//copy the source into destination
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) 2003, 2020, Oracle and/or its affiliates. All rights reserved.
|
||||
* Copyright (c) 2003, 2022, Oracle and/or its affiliates. All rights reserved.
|
||||
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
||||
*
|
||||
* This code is free software; you can redistribute it and/or modify it
|
||||
@ -147,9 +147,9 @@ public abstract class UCharacterIterator
|
||||
*/
|
||||
public int nextCodePoint(){
|
||||
int ch1 = next();
|
||||
if(UTF16.isLeadSurrogate((char)ch1)){
|
||||
if(UTF16.isLeadSurrogate(ch1)){
|
||||
int ch2 = next();
|
||||
if(UTF16.isTrailSurrogate((char)ch2)){
|
||||
if(UTF16.isTrailSurrogate(ch2)){
|
||||
return UCharacterProperty.getRawSupplementary((char)ch1,
|
||||
(char)ch2);
|
||||
}else if (ch2 != DONE) {
|
||||
@ -175,7 +175,7 @@ public abstract class UCharacterIterator
|
||||
/**
|
||||
* Retreat to the start of the previous code point in the text,
|
||||
* and return it (pre-decrement semantics). If the index is not
|
||||
* preceeded by a valid surrogate pair, the behavior is the same
|
||||
* preceded by a valid surrogate pair, the behavior is the same
|
||||
* as <code>previous()</code>. Otherwise the iterator is
|
||||
* decremented to the start of the surrogate pair, and the code
|
||||
* point represented by the pair is returned.
|
||||
@ -185,9 +185,9 @@ public abstract class UCharacterIterator
|
||||
*/
|
||||
public int previousCodePoint(){
|
||||
int ch1 = previous();
|
||||
if(UTF16.isTrailSurrogate((char)ch1)){
|
||||
if(UTF16.isTrailSurrogate(ch1)){
|
||||
int ch2 = previous();
|
||||
if(UTF16.isLeadSurrogate((char)ch2)){
|
||||
if(UTF16.isLeadSurrogate(ch2)){
|
||||
return UCharacterProperty.getRawSupplementary((char)ch2,
|
||||
(char)ch1);
|
||||
}else if (ch2 != DONE) {
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) 2005, 2020, Oracle and/or its affiliates. All rights reserved.
|
||||
* Copyright (c) 2005, 2022, Oracle and/or its affiliates. All rights reserved.
|
||||
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
||||
*
|
||||
* This code is free software; you can redistribute it and/or modify it
|
||||
@ -382,36 +382,39 @@ public final class UTF16
|
||||
}
|
||||
|
||||
/**
|
||||
* Determines whether the code value is a surrogate.
|
||||
* @param char16 the input character.
|
||||
* @return true if the input character is a surrogate.
|
||||
* @stable ICU 2.1
|
||||
* Determines whether the code point is a surrogate.
|
||||
*
|
||||
* @param codePoint The input character.
|
||||
* (In ICU 2.1-69 the type of this parameter was <code>char</code>.)
|
||||
* @return true If the input code point is a surrogate.
|
||||
* @stable ICU 70
|
||||
*/
|
||||
public static boolean isSurrogate(char char16)
|
||||
{
|
||||
return (char16 & SURROGATE_BITMASK) == SURROGATE_BITS;
|
||||
public static boolean isSurrogate(int codePoint) {
|
||||
return (codePoint & SURROGATE_BITMASK) == SURROGATE_BITS;
|
||||
}
|
||||
|
||||
/**
|
||||
* Determines whether the character is a trail surrogate.
|
||||
* @param char16 the input character.
|
||||
* @return true if the input character is a trail surrogate.
|
||||
* @stable ICU 2.1
|
||||
* Determines whether the code point is a trail surrogate.
|
||||
*
|
||||
* @param codePoint The input character.
|
||||
* (In ICU 2.1-69 the type of this parameter was <code>char</code>.)
|
||||
* @return true If the input code point is a trail surrogate.
|
||||
* @stable ICU 70
|
||||
*/
|
||||
public static boolean isTrailSurrogate(char char16)
|
||||
{
|
||||
return (char16 & TRAIL_SURROGATE_BITMASK) == TRAIL_SURROGATE_BITS;
|
||||
public static boolean isTrailSurrogate(int codePoint) {
|
||||
return (codePoint & TRAIL_SURROGATE_BITMASK) == TRAIL_SURROGATE_BITS;
|
||||
}
|
||||
|
||||
/**
|
||||
* Determines whether the character is a lead surrogate.
|
||||
* @param char16 the input character.
|
||||
* @return true if the input character is a lead surrogate
|
||||
* @stable ICU 2.1
|
||||
* Determines whether the code point is a lead surrogate.
|
||||
*
|
||||
* @param codePoint The input character.
|
||||
* (In ICU 2.1-69 the type of this parameter was <code>char</code>.)
|
||||
* @return true If the input code point is a lead surrogate
|
||||
* @stable ICU 70
|
||||
*/
|
||||
public static boolean isLeadSurrogate(char char16)
|
||||
{
|
||||
return (char16 & LEAD_SURROGATE_BITMASK) == LEAD_SURROGATE_BITS;
|
||||
public static boolean isLeadSurrogate(int codePoint) {
|
||||
return (codePoint & LEAD_SURROGATE_BITMASK) == LEAD_SURROGATE_BITS;
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) 2005, 2020, Oracle and/or its affiliates. All rights reserved.
|
||||
* Copyright (c) 2005, 2022, Oracle and/or its affiliates. All rights reserved.
|
||||
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
||||
*
|
||||
* This code is free software; you can redistribute it and/or modify it
|
||||
@ -135,8 +135,8 @@ import jdk.internal.icu.util.VersionInfo;
|
||||
* "[:Lu:]" and the Perl-like syntax "\p{Lu}" are recognized. For a
|
||||
* complete list of supported property patterns, see the User's Guide
|
||||
* for UnicodeSet at
|
||||
* <a href="http://www.icu-project.org/userguide/unicodeSet.html">
|
||||
* http://www.icu-project.org/userguide/unicodeSet.html</a>.
|
||||
* <a href="https://unicode-org.github.io/icu/userguide/strings/unicodeset">
|
||||
* https://unicode-org.github.io/icu/userguide/strings/unicodeset</a>.
|
||||
* Actual determination of property data is defined by the underlying
|
||||
* Unicode database as implemented by UCharacter.
|
||||
*
|
||||
@ -147,6 +147,13 @@ import jdk.internal.icu.util.VersionInfo;
|
||||
* their delimiters; "[:^foo]" and "\P{foo}". In any other location,
|
||||
* '^' has no special meaning.
|
||||
*
|
||||
* <p>Since ICU 70, "[^...]", "[:^foo]", "\P{foo}", and "[:binaryProperty=No:]"
|
||||
* perform a "code point complement" (all code points minus the original set),
|
||||
* removing all multicharacter strings,
|
||||
* equivalent to .{@link #complement()}.{@link #removeAllStrings()} .
|
||||
* The {@link #complement()} API function continues to perform a
|
||||
* symmetric difference with all code points and thus retains all multicharacter strings.
|
||||
*
|
||||
* <p>Ranges are indicated by placing two a '-' between two
|
||||
* characters, as in "a-z". This specifies the range of all
|
||||
* characters from the left to the right, in Unicode order. If the
|
||||
@ -189,8 +196,6 @@ import jdk.internal.icu.util.VersionInfo;
|
||||
* Unicode property
|
||||
* </table>
|
||||
*
|
||||
* <p><b>Warning</b>: you cannot add an empty string ("") to a UnicodeSet.</p>
|
||||
*
|
||||
* <p><b>Formal syntax</b></p>
|
||||
*
|
||||
* <blockquote>
|
||||
@ -230,9 +235,8 @@ import jdk.internal.icu.util.VersionInfo;
|
||||
* </tr>
|
||||
* <tr align="top">
|
||||
* <td nowrap valign="top" align="right"><code>hex := </code></td>
|
||||
* <td valign="top"><em>any character for which
|
||||
* </em><code>Character.digit(c, 16)</code><em>
|
||||
* returns a non-negative result</em></td>
|
||||
* <td style="vertical-align: top;"><code>'0' | '1' | '2' | '3' | '4' | '5' | '6' | '7' | '8' | '9' |<br>
|
||||
* 'A' | 'B' | 'C' | 'D' | 'E' | 'F' | 'a' | 'b' | 'c' | 'd' | 'e' | 'f'</code></td>
|
||||
* </tr>
|
||||
* <tr>
|
||||
* <td nowrap valign="top" align="right"><code>property := </code></td>
|
||||
@ -487,7 +491,7 @@ public class UnicodeSet {
|
||||
else if (i > 0 && c == list[i-1]) {
|
||||
// c is after end of prior range
|
||||
list[i-1]++;
|
||||
// no need to chcek for collapse here
|
||||
// no need to check for collapse here
|
||||
}
|
||||
|
||||
else {
|
||||
@ -528,7 +532,6 @@ public class UnicodeSet {
|
||||
* present. If this set already contains the multicharacter,
|
||||
* the call leaves this set unchanged.
|
||||
* Thus {@code "ch" => {"ch"}}
|
||||
* <br><b>Warning: you cannot add an empty string ("") to a UnicodeSet.</b>
|
||||
* @param s the source string
|
||||
* @return this object, for chaining
|
||||
* @stable ICU 2.0
|
||||
@ -546,22 +549,19 @@ public class UnicodeSet {
|
||||
|
||||
/**
|
||||
* Utility for getting code point from single code point CharSequence.
|
||||
* See the public UTF16.getSingleCodePoint()
|
||||
* See the public UTF16.getSingleCodePoint() (which returns -1 for null rather than throwing NPE).
|
||||
*
|
||||
* @return a code point IF the string consists of a single one.
|
||||
* otherwise returns -1.
|
||||
* @param s to test
|
||||
*/
|
||||
private static int getSingleCP(CharSequence s) {
|
||||
if (s.length() < 1) {
|
||||
throw new IllegalArgumentException("Can't use zero-length strings in UnicodeSet");
|
||||
}
|
||||
if (s.length() > 2) return -1;
|
||||
if (s.length() == 1) return s.charAt(0);
|
||||
|
||||
// at this point, len = 2
|
||||
int cp = UTF16.charAt(s, 0);
|
||||
if (cp > 0xFFFF) { // is surrogate pair
|
||||
return cp;
|
||||
if (s.length() == 2) {
|
||||
int cp = Character.codePointAt(s, 0);
|
||||
if (cp > 0xFFFF) { // is surrogate pair
|
||||
return cp;
|
||||
}
|
||||
}
|
||||
return -1;
|
||||
}
|
||||
@ -569,13 +569,11 @@ public class UnicodeSet {
|
||||
/**
|
||||
* Complements the specified range in this set. Any character in
|
||||
* the range will be removed if it is in this set, or will be
|
||||
* added if it is not in this set. If {@code end > start}
|
||||
* added if it is not in this set. If <code>start > end</code>
|
||||
* then an empty range is complemented, leaving the set unchanged.
|
||||
*
|
||||
* @param start first character, inclusive, of range to be removed
|
||||
* from this set.
|
||||
* @param end last character, inclusive, of range to be removed
|
||||
* from this set.
|
||||
* @param start first character, inclusive, of range
|
||||
* @param end last character, inclusive, of range
|
||||
* @stable ICU 2.0
|
||||
*/
|
||||
public UnicodeSet complement(int start, int end) {
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) 2019, 2021, Oracle and/or its affiliates. All rights reserved.
|
||||
* Copyright (c) 2019, 2022, Oracle and/or its affiliates. All rights reserved.
|
||||
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
||||
*
|
||||
* This code is free software; you can redistribute it and/or modify it
|
||||
@ -43,7 +43,7 @@ import static jdk.internal.icu.impl.NormalizerImpl.UTF16Plus;
|
||||
/**
|
||||
* Immutable Unicode code point trie.
|
||||
* Fast, reasonably compact, map from Unicode code points (U+0000..U+10FFFF) to integer values.
|
||||
* For details see http://site.icu-project.org/design/struct/utrie
|
||||
* For details see https://icu.unicode.org/design/struct/utrie
|
||||
*
|
||||
* <p>This class is not intended for public subclassing.
|
||||
*
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) 2005, 2020, Oracle and/or its affiliates. All rights reserved.
|
||||
* Copyright (c) 2005, 2022, Oracle and/or its affiliates. All rights reserved.
|
||||
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
||||
*
|
||||
* This code is free software; you can redistribute it and/or modify it
|
||||
@ -54,7 +54,7 @@ public final class VersionInfo
|
||||
* @deprecated This API is ICU internal only.
|
||||
*/
|
||||
@Deprecated
|
||||
public static final String ICU_DATA_VERSION_PATH = "67b";
|
||||
public static final String ICU_DATA_VERSION_PATH = "70b";
|
||||
|
||||
// public methods ------------------------------------------------------
|
||||
|
||||
@ -148,7 +148,15 @@ public final class VersionInfo
|
||||
*/
|
||||
public int compareTo(VersionInfo other)
|
||||
{
|
||||
return m_version_ - other.m_version_;
|
||||
// m_version_ is an int, a signed 32-bit integer.
|
||||
// When the major version is >=128, then the version int is negative.
|
||||
// Compare it in two steps to simulate an unsigned-int comparison.
|
||||
// (Alternatively we could turn each int into a long and reset the upper 32 bits.)
|
||||
// Compare the upper bits first, using logical shift right (unsigned).
|
||||
int diff = (m_version_ >>> 1) - (other.m_version_ >>> 1);
|
||||
if (diff != 0) { return diff; }
|
||||
// Compare the remaining bits.
|
||||
return (m_version_ & 1) - (other.m_version_ & 1);
|
||||
}
|
||||
|
||||
// private data members ----------------------------------------------
|
||||
|
@ -1,4 +1,4 @@
|
||||
## International Components for Unicode (ICU4J) v67.1
|
||||
## International Components for Unicode (ICU4J) v70.1
|
||||
|
||||
### ICU4J License
|
||||
```
|
||||
@ -80,61 +80,439 @@ of the copyright holder.
|
||||
All trademarks and registered trademarks mentioned herein are the
|
||||
property of their respective owners.
|
||||
|
||||
2. Chinese/Japanese Word Break Dictionary Data (cjdict.txt)
|
||||
|
||||
——————————————————————————————————————————————————————————————————————
|
||||
# The Google Chrome software developed by Google is licensed under
|
||||
# the BSD license. Other software included in this distribution is
|
||||
# provided under other licenses, as set forth below.
|
||||
#
|
||||
# The BSD License
|
||||
# http://opensource.org/licenses/bsd-license.php
|
||||
# Copyright (C) 2006-2008, Google Inc.
|
||||
#
|
||||
# All rights reserved.
|
||||
#
|
||||
# Redistribution and use in source and binary forms, with or without
|
||||
# modification, are permitted provided that the following conditions are met:
|
||||
#
|
||||
# Redistributions of source code must retain the above copyright notice,
|
||||
# this list of conditions and the following disclaimer.
|
||||
# Redistributions in binary form must reproduce the above
|
||||
# copyright notice, this list of conditions and the following
|
||||
# disclaimer in the documentation and/or other materials provided with
|
||||
# the distribution.
|
||||
# Neither the name of Google Inc. nor the names of its
|
||||
# contributors may be used to endorse or promote products derived from
|
||||
# this software without specific prior written permission.
|
||||
#
|
||||
#
|
||||
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND
|
||||
# CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES,
|
||||
# INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
|
||||
# MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
||||
# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
|
||||
# BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
||||
# LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
||||
# NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
# SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
#
|
||||
#
|
||||
# The word list in cjdict.txt are generated by combining three word lists
|
||||
# listed below with further processing for compound word breaking. The
|
||||
# frequency is generated with an iterative training against Google web
|
||||
# corpora.
|
||||
#
|
||||
# * Libtabe (Chinese)
|
||||
# - https://sourceforge.net/project/?group_id=1519
|
||||
# - Its license terms and conditions are shown below.
|
||||
#
|
||||
# * IPADIC (Japanese)
|
||||
# - http://chasen.aist-nara.ac.jp/chasen/distribution.html
|
||||
# - Its license terms and conditions are shown below.
|
||||
#
|
||||
# ---------COPYING.libtabe ---- BEGIN--------------------
|
||||
#
|
||||
# /*
|
||||
# * Copyright (c) 1999 TaBE Project.
|
||||
# * Copyright (c) 1999 Pai-Hsiang Hsiao.
|
||||
# * All rights reserved.
|
||||
# *
|
||||
# * Redistribution and use in source and binary forms, with or without
|
||||
# * modification, are permitted provided that the following conditions
|
||||
# * are met:
|
||||
# *
|
||||
# * . Redistributions of source code must retain the above copyright
|
||||
# * notice, this list of conditions and the following disclaimer.
|
||||
# * . Redistributions in binary form must reproduce the above copyright
|
||||
# * notice, this list of conditions and the following disclaimer in
|
||||
# * the documentation and/or other materials provided with the
|
||||
# * distribution.
|
||||
# * . Neither the name of the TaBE Project nor the names of its
|
||||
# * contributors may be used to endorse or promote products derived
|
||||
# * from this software without specific prior written permission.
|
||||
# *
|
||||
# * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
# * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
# * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
|
||||
# * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
|
||||
# * REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
|
||||
# * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
|
||||
# * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
||||
# * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
||||
# * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
|
||||
# * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
# * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
|
||||
# * OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
# */
|
||||
#
|
||||
# /*
|
||||
# * Copyright (c) 1999 Computer Systems and Communication Lab,
|
||||
# * Institute of Information Science, Academia
|
||||
# * Sinica. All rights reserved.
|
||||
# *
|
||||
# * Redistribution and use in source and binary forms, with or without
|
||||
# * modification, are permitted provided that the following conditions
|
||||
# * are met:
|
||||
# *
|
||||
# * . Redistributions of source code must retain the above copyright
|
||||
# * notice, this list of conditions and the following disclaimer.
|
||||
# * . Redistributions in binary form must reproduce the above copyright
|
||||
# * notice, this list of conditions and the following disclaimer in
|
||||
# * the documentation and/or other materials provided with the
|
||||
# * distribution.
|
||||
# * . Neither the name of the Computer Systems and Communication Lab
|
||||
# * nor the names of its contributors may be used to endorse or
|
||||
# * promote products derived from this software without specific
|
||||
# * prior written permission.
|
||||
# *
|
||||
# * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
# * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
# * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
|
||||
# * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
|
||||
# * REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
|
||||
# * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
|
||||
# * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
||||
# * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
||||
# * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
|
||||
# * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
# * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
|
||||
# * OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
# */
|
||||
#
|
||||
# Copyright 1996 Chih-Hao Tsai @ Beckman Institute,
|
||||
# University of Illinois
|
||||
# c-tsai4@uiuc.edu http://casper.beckman.uiuc.edu/~c-tsai4
|
||||
#
|
||||
# ---------------COPYING.libtabe-----END--------------------------------
|
||||
#
|
||||
#
|
||||
# ---------------COPYING.ipadic-----BEGIN-------------------------------
|
||||
#
|
||||
# Copyright 2000, 2001, 2002, 2003 Nara Institute of Science
|
||||
# and Technology. All Rights Reserved.
|
||||
#
|
||||
# Use, reproduction, and distribution of this software is permitted.
|
||||
# Any copy of this software, whether in its original form or modified,
|
||||
# must include both the above copyright notice and the following
|
||||
# paragraphs.
|
||||
#
|
||||
# Nara Institute of Science and Technology (NAIST),
|
||||
# the copyright holders, disclaims all warranties with regard to this
|
||||
# software, including all implied warranties of merchantability and
|
||||
# fitness, in no event shall NAIST be liable for
|
||||
# any special, indirect or consequential damages or any damages
|
||||
# whatsoever resulting from loss of use, data or profits, whether in an
|
||||
# action of contract, negligence or other tortuous action, arising out
|
||||
# of or in connection with the use or performance of this software.
|
||||
#
|
||||
# A large portion of the dictionary entries
|
||||
# originate from ICOT Free Software. The following conditions for ICOT
|
||||
# Free Software applies to the current dictionary as well.
|
||||
#
|
||||
# Each User may also freely distribute the Program, whether in its
|
||||
# original form or modified, to any third party or parties, PROVIDED
|
||||
# that the provisions of Section 3 ("NO WARRANTY") will ALWAYS appear
|
||||
# on, or be attached to, the Program, which is distributed substantially
|
||||
# in the same form as set out herein and that such intended
|
||||
# distribution, if actually made, will neither violate or otherwise
|
||||
# contravene any of the laws and regulations of the countries having
|
||||
# jurisdiction over the User or the intended distribution itself.
|
||||
#
|
||||
# NO WARRANTY
|
||||
#
|
||||
# The program was produced on an experimental basis in the course of the
|
||||
# research and development conducted during the project and is provided
|
||||
# to users as so produced on an experimental basis. Accordingly, the
|
||||
# program is provided without any warranty whatsoever, whether express,
|
||||
# implied, statutory or otherwise. The term "warranty" used herein
|
||||
# includes, but is not limited to, any warranty of the quality,
|
||||
# performance, merchantability and fitness for a particular purpose of
|
||||
# the program and the nonexistence of any infringement or violation of
|
||||
# any right of any third party.
|
||||
#
|
||||
# Each user of the program will agree and understand, and be deemed to
|
||||
# have agreed and understood, that there is no warranty whatsoever for
|
||||
# the program and, accordingly, the entire risk arising from or
|
||||
# otherwise connected with the program is assumed by the user.
|
||||
#
|
||||
# Therefore, neither ICOT, the copyright holder, or any other
|
||||
# organization that participated in or was otherwise related to the
|
||||
# development of the program and their respective officials, directors,
|
||||
# officers and other employees shall be held liable for any and all
|
||||
# damages, including, without limitation, general, special, incidental
|
||||
# and consequential damages, arising out of or otherwise in connection
|
||||
# with the use or inability to use the program or any product, material
|
||||
# or result produced or otherwise obtained by using the program,
|
||||
# regardless of whether they have been advised of, or otherwise had
|
||||
# knowledge of, the possibility of such damages at any time during the
|
||||
# project or thereafter. Each user will be deemed to have agreed to the
|
||||
# foregoing by his or her commencement of use of the program. The term
|
||||
# "use" as used herein includes, but is not limited to, the use,
|
||||
# modification, copying and distribution of the program and the
|
||||
# production of secondary products from the program.
|
||||
#
|
||||
# In the case where the program, whether in its original form or
|
||||
# modified, was distributed or delivered to or received by a user from
|
||||
# any person, organization or entity other than ICOT, unless it makes or
|
||||
# grants independently of ICOT any specific warranty to the user in
|
||||
# writing, such person, organization or entity, will also be exempted
|
||||
# from and not be held liable to the user for any such damages as noted
|
||||
# above as far as the program is concerned.
|
||||
#
|
||||
# ---------------COPYING.ipadic-----END----------------------------------
|
||||
|
||||
3. Lao Word Break Dictionary Data (laodict.txt)
|
||||
|
||||
From: https://www.unicode.org/copyright.html:
|
||||
# Copyright (C) 2016 and later: Unicode, Inc. and others.
|
||||
# License & terms of use: http://www.unicode.org/copyright.html
|
||||
# Copyright (c) 2015 International Business Machines Corporation
|
||||
# and others. All Rights Reserved.
|
||||
#
|
||||
# Project: https://github.com/rober42539/lao-dictionary
|
||||
# Dictionary: https://github.com/rober42539/lao-dictionary/laodict.txt
|
||||
# License: https://github.com/rober42539/lao-dictionary/LICENSE.txt
|
||||
# (copied below)
|
||||
#
|
||||
# This file is derived from the above dictionary version of Nov 22, 2020
|
||||
# ----------------------------------------------------------------------
|
||||
# Copyright (C) 2013 Brian Eugene Wilson, Robert Martin Campbell.
|
||||
# All rights reserved.
|
||||
#
|
||||
# Redistribution and use in source and binary forms, with or without
|
||||
# modification, are permitted provided that the following conditions are met:
|
||||
#
|
||||
# Redistributions of source code must retain the above copyright notice, this
|
||||
# list of conditions and the following disclaimer. Redistributions in binary
|
||||
# form must reproduce the above copyright notice, this list of conditions and
|
||||
# the following disclaimer in the documentation and/or ther materials
|
||||
# provided with the distribution.
|
||||
#
|
||||
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
|
||||
# FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
|
||||
# COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT,
|
||||
# INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
|
||||
# (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
||||
# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
||||
# HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
|
||||
# STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
|
||||
# OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
# --------------------------------------------------------------------------
|
||||
|
||||
Unicode® Copyright and Terms of Use
|
||||
4. Burmese Word Break Dictionary Data (burmesedict.txt)
|
||||
|
||||
For the general privacy policy governing access to this site, see the Unicode Privacy Policy.
|
||||
# Copyright (c) 2014 International Business Machines Corporation
|
||||
# and others. All Rights Reserved.
|
||||
#
|
||||
# This list is part of a project hosted at:
|
||||
# github.com/kanyawtech/myanmar-karen-word-lists
|
||||
#
|
||||
# --------------------------------------------------------------------------
|
||||
# Copyright (c) 2013, LeRoy Benjamin Sharon
|
||||
# All rights reserved.
|
||||
#
|
||||
# Redistribution and use in source and binary forms, with or without
|
||||
# modification, are permitted provided that the following conditions
|
||||
# are met: Redistributions of source code must retain the above
|
||||
# copyright notice, this list of conditions and the following
|
||||
# disclaimer. Redistributions in binary form must reproduce the
|
||||
# above copyright notice, this list of conditions and the following
|
||||
# disclaimer in the documentation and/or other materials provided
|
||||
# with the distribution.
|
||||
#
|
||||
# Neither the name Myanmar Karen Word Lists, nor the names of its
|
||||
# contributors may be used to endorse or promote products derived
|
||||
# from this software without specific prior written permission.
|
||||
#
|
||||
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND
|
||||
# CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES,
|
||||
# INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
|
||||
# MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
||||
# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS
|
||||
# BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
|
||||
# TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
|
||||
# ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR
|
||||
# TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF
|
||||
# THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
||||
# SUCH DAMAGE.
|
||||
# --------------------------------------------------------------------------
|
||||
|
||||
Unicode Copyright
|
||||
Copyright © 1991-2020 Unicode, Inc. All rights reserved.
|
||||
Definitions
|
||||
5. Time Zone Database
|
||||
|
||||
Unicode Data Files ("DATA FILES") include all data files under the directories:
|
||||
https://www.unicode.org/Public/
|
||||
https://www.unicode.org/reports/
|
||||
https://www.unicode.org/ivd/data/
|
||||
ICU uses the public domain data and code derived from Time Zone
|
||||
Database for its time zone support. The ownership of the TZ database
|
||||
is explained in BCP 175: Procedure for Maintaining the Time Zone
|
||||
Database section 7.
|
||||
|
||||
Unicode Data Files do not include PDF online code charts under the directory:
|
||||
https://www.unicode.org/Public/
|
||||
# 7. Database Ownership
|
||||
#
|
||||
# The TZ database itself is not an IETF Contribution or an IETF
|
||||
# document. Rather it is a pre-existing and regularly updated work
|
||||
# that is in the public domain, and is intended to remain in the
|
||||
# public domain. Therefore, BCPs 78 [RFC5378] and 79 [RFC3979] do
|
||||
# not apply to the TZ Database or contributions that individuals make
|
||||
# to it. Should any claims be made and substantiated against the TZ
|
||||
# Database, the organization that is providing the IANA
|
||||
# Considerations defined in this RFC, under the memorandum of
|
||||
# understanding with the IETF, currently ICANN, may act in accordance
|
||||
# with all competent court orders. No ownership claims will be made
|
||||
# by ICANN or the IETF Trust on the database or the code. Any person
|
||||
# making a contribution to the database or code waives all rights to
|
||||
# future claims in that contribution or in the TZ Database.
|
||||
|
||||
Unicode Software ("SOFTWARE") includes any source code published in the Unicode Standard
|
||||
or any source code or compiled code under the directories:
|
||||
https://www.unicode.org/Public/PROGRAMS/
|
||||
https://www.unicode.org/Public/cldr/
|
||||
http://site.icu-project.org/download/
|
||||
6. Google double-conversion
|
||||
|
||||
Terms of Use
|
||||
Certain documents and files on this website contain a legend indicating that "Modification is permitted." Any person is hereby authorized, without fee, to modify such documents and files to create derivative works conforming to the Unicode® Standard, subject to Terms and Conditions herein.
|
||||
Any person is hereby authorized, without fee, to view, use, reproduce, and distribute all documents and files, subject to the Terms and Conditions herein.
|
||||
Further specifications of rights and restrictions pertaining to the use of the Unicode DATA FILES and SOFTWARE can be found in the Unicode Data Files and Software License.
|
||||
Each version of the Unicode Standard has further specifications of rights and restrictions of use. For the book editions (Unicode 5.0 and earlier), these are found on the back of the title page.
|
||||
The Unicode PDF online code charts carry specific restrictions. Those restrictions are incorporated as the first page of each PDF code chart.
|
||||
All other files, including online documentation of the core specification for Unicode 6.0 and later, are covered under these general Terms of Use.
|
||||
No license is granted to "mirror" the Unicode website where a fee is charged for access to the "mirror" site.
|
||||
Modification is not permitted with respect to this document. All copies of this document must be verbatim.
|
||||
Restricted Rights Legend
|
||||
Any technical data or software which is licensed to the United States of America, its agencies and/or instrumentalities under this Agreement is commercial technical data or commercial computer software developed exclusively at private expense as defined in FAR 2.101, or DFARS 252.227-7014 (June 1995), as applicable. For technical data, use, duplication, or disclosure by the Government is subject to restrictions as set forth in DFARS 202.227-7015 Technical Data, Commercial and Items (Nov 1995) and this Agreement. For Software, in accordance with FAR 12-212 or DFARS 227-7202, as applicable, use, duplication or disclosure by the Government is subject to the restrictions set forth in this Agreement.
|
||||
Warranties and Disclaimers
|
||||
This publication and/or website may include technical or typographical errors or other inaccuracies. Changes are periodically added to the information herein; these changes will be incorporated in new editions of the publication and/or website. Unicode, Inc. may make improvements and/or changes in the product(s) and/or program(s) described in this publication and/or website at any time.
|
||||
If this file has been purchased on magnetic or optical media from Unicode, Inc. the sole and exclusive remedy for any claim will be exchange of the defective media within ninety (90) days of original purchase.
|
||||
EXCEPT AS PROVIDED IN SECTION E.2, THIS PUBLICATION AND/OR SOFTWARE IS PROVIDED "AS IS" WITHOUT WARRANTY OF ANY KIND EITHER EXPRESS, IMPLIED, OR STATUTORY, INCLUDING, BUT NOT LIMITED TO, ANY WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, OR NON-INFRINGEMENT. UNICODE, INC. AND ITS LICENSORS ASSUME NO RESPONSIBILITY FOR ERRORS OR OMISSIONS IN THIS PUBLICATION AND/OR SOFTWARE OR OTHER DOCUMENTS WHICH ARE REFERENCED BY OR LINKED TO THIS PUBLICATION OR THE UNICODE WEBSITE.
|
||||
Waiver of Damages
|
||||
In no event shall Unicode, Inc. or its licensors be liable for any special, incidental, indirect or consequential damages of any kind, or any damages whatsoever, whether or not Unicode, Inc. was advised of the possibility of the damage, including, without limitation, those resulting from the following: loss of use, data or profits, in connection with the use, modification or distribution of this information or its derivatives.
|
||||
Trademarks & Logos
|
||||
The Unicode Word Mark and the Unicode Logo are trademarks of Unicode, Inc. “The Unicode Consortium” and “Unicode, Inc.” are trade names of Unicode, Inc. Use of the information and materials found on this website indicates your acknowledgement of Unicode, Inc.’s exclusive worldwide rights in the Unicode Word Mark, the Unicode Logo, and the Unicode trade names.
|
||||
The Unicode Consortium Name and Trademark Usage Policy (“Trademark Policy”) are incorporated herein by reference and you agree to abide by the provisions of the Trademark Policy, which may be changed from time to time in the sole discretion of Unicode, Inc.
|
||||
All third party trademarks referenced herein are the property of their respective owners.
|
||||
Miscellaneous
|
||||
Jurisdiction and Venue. This website is operated from a location in the State of California, United States of America. Unicode, Inc. makes no representation that the materials are appropriate for use in other locations. If you access this website from other locations, you are responsible for compliance with local laws. This Agreement, all use of this website and any claims and damages resulting from use of this website are governed solely by the laws of the State of California without regard to any principles which would apply the laws of a different jurisdiction. The user agrees that any disputes regarding this website shall be resolved solely in the courts located in Santa Clara County, California. The user agrees said courts have personal jurisdiction and agree to waive any right to transfer the dispute to any other forum.
|
||||
Modification by Unicode, Inc. Unicode, Inc. shall have the right to modify this Agreement at any time by posting it to this website. The user may not assign any part of this Agreement without Unicode, Inc.’s prior written consent.
|
||||
Taxes. The user agrees to pay any taxes arising from access to this website or use of the information herein, except for those based on Unicode’s net income.
|
||||
Severability. If any provision of this Agreement is declared invalid or unenforceable, the remaining provisions of this Agreement shall remain in effect.
|
||||
Entire Agreement. This Agreement constitutes the entire agreement between the parties.
|
||||
Copyright 2006-2011, the V8 project authors. All rights reserved.
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are
|
||||
met:
|
||||
|
||||
* Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
* Redistributions in binary form must reproduce the above
|
||||
copyright notice, this list of conditions and the following
|
||||
disclaimer in the documentation and/or other materials provided
|
||||
with the distribution.
|
||||
* Neither the name of Google Inc. nor the names of its
|
||||
contributors may be used to endorse or promote products derived
|
||||
from this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
====================================================
|
||||
|
||||
Unicode® Copyright and Terms of Use
|
||||
For the general privacy policy governing access to this site, see the Unicode Privacy Policy.
|
||||
|
||||
Unicode Copyright
|
||||
Copyright © 1991-2021 Unicode, Inc. All rights reserved.
|
||||
Definitions
|
||||
Unicode Data Files ("DATA FILES") include all data files under the directories:
|
||||
https://www.unicode.org/Public/
|
||||
https://www.unicode.org/reports/
|
||||
https://www.unicode.org/ivd/data/
|
||||
|
||||
Unicode Data Files do not include PDF online code charts under the directory:
|
||||
https://www.unicode.org/Public/
|
||||
|
||||
Unicode Software ("SOFTWARE") includes any source code published in the Unicode Standard
|
||||
or any source code or compiled code under the directories:
|
||||
https://www.unicode.org/Public/PROGRAMS/
|
||||
https://www.unicode.org/Public/cldr/
|
||||
http://site.icu-project.org/download/
|
||||
Terms of Use
|
||||
Certain documents and files on this website contain a legend indicating that "Modification is permitted." Any person is hereby authorized, without fee, to modify such documents and files to create derivative works conforming to the Unicode® Standard, subject to Terms and Conditions herein.
|
||||
Any person is hereby authorized, without fee, to view, use, reproduce, and distribute all documents and files, subject to the Terms and Conditions herein.
|
||||
Further specifications of rights and restrictions pertaining to the use of the Unicode DATA FILES and SOFTWARE can be found in the Unicode Data Files and Software License.
|
||||
Each version of the Unicode Standard has further specifications of rights and restrictions of use. For the book editions (Unicode 5.0 and earlier), these are found on the back of the title page.
|
||||
The Unicode PDF online code charts carry specific restrictions. Those restrictions are incorporated as the first page of each PDF code chart.
|
||||
All other files, including online documentation of the core specification for Unicode 6.0 and later, are covered under these general Terms of Use.
|
||||
No license is granted to "mirror" the Unicode website where a fee is charged for access to the "mirror" site.
|
||||
Modification is not permitted with respect to this document. All copies of this document must be verbatim.
|
||||
Restricted Rights Legend
|
||||
Any technical data or software which is licensed to the United States of America, its agencies and/or instrumentalities under this Agreement is commercial technical data or commercial computer software developed exclusively at private expense as defined in FAR 2.101, or DFARS 252.227-7014 (June 1995), as applicable. For technical data, use, duplication, or disclosure by the Government is subject to restrictions as set forth in DFARS 202.227-7015 Technical Data, Commercial and Items (Nov 1995) and this Agreement. For Software, in accordance with FAR 12-212 or DFARS 227-7202, as applicable, use, duplication or disclosure by the Government is subject to the restrictions set forth in this Agreement.
|
||||
Warranties and Disclaimers
|
||||
This publication and/or website may include technical or typographical errors or other inaccuracies. Changes are periodically added to the information herein; these changes will be incorporated in new editions of the publication and/or website. Unicode, Inc. may make improvements and/or changes in the product(s) and/or program(s) described in this publication and/or website at any time.
|
||||
If this file has been purchased on magnetic or optical media from Unicode, Inc. the sole and exclusive remedy for any claim will be exchange of the defective media within ninety (90) days of original purchase.
|
||||
EXCEPT AS PROVIDED IN SECTION E.2, THIS PUBLICATION AND/OR SOFTWARE IS PROVIDED "AS IS" WITHOUT WARRANTY OF ANY KIND EITHER EXPRESS, IMPLIED, OR STATUTORY, INCLUDING, BUT NOT LIMITED TO, ANY WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, OR NON-INFRINGEMENT. UNICODE, INC. AND ITS LICENSORS ASSUME NO RESPONSIBILITY FOR ERRORS OR OMISSIONS IN THIS PUBLICATION AND/OR SOFTWARE OR OTHER DOCUMENTS WHICH ARE REFERENCED BY OR LINKED TO THIS PUBLICATION OR THE UNICODE WEBSITE.
|
||||
Waiver of Damages
|
||||
In no event shall Unicode, Inc. or its licensors be liable for any special, incidental, indirect or consequential damages of any kind, or any damages whatsoever, whether or not Unicode, Inc. was advised of the possibility of the damage, including, without limitation, those resulting from the following: loss of use, data or profits, in connection with the use, modification or distribution of this information or its derivatives.
|
||||
Trademarks & Logos
|
||||
The Unicode Word Mark and the Unicode Logo are trademarks of Unicode, Inc. “The Unicode Consortium” and “Unicode, Inc.” are trade names of Unicode, Inc. Use of the information and materials found on this website indicates your acknowledgement of Unicode, Inc.’s exclusive worldwide rights in the Unicode Word Mark, the Unicode Logo, and the Unicode trade names.
|
||||
The Unicode Consortium Name and Trademark Usage Policy (“Trademark Policy”) are incorporated herein by reference and you agree to abide by the provisions of the Trademark Policy, which may be changed from time to time in the sole discretion of Unicode, Inc.
|
||||
All third party trademarks referenced herein are the property of their respective owners.
|
||||
Miscellaneous
|
||||
Jurisdiction and Venue. This website is operated from a location in the State of California, United States of America. Unicode, Inc. makes no representation that the materials are appropriate for use in other locations. If you access this website from other locations, you are responsible for compliance with local laws. This Agreement, all use of this website and any claims and damages resulting from use of this website are governed solely by the laws of the State of California without regard to any principles which would apply the laws of a different jurisdiction. The user agrees that any disputes regarding this website shall be resolved solely in the courts located in Santa Clara County, California. The user agrees said courts have personal jurisdiction and agree to waive any right to transfer the dispute to any other forum.
|
||||
Modification by Unicode, Inc. Unicode, Inc. shall have the right to modify this Agreement at any time by posting it to this website. The user may not assign any part of this Agreement without Unicode, Inc.’s prior written consent.
|
||||
Taxes. The user agrees to pay any taxes arising from access to this website or use of the information herein, except for those based on Unicode’s net income.
|
||||
Severability. If any provision of this Agreement is declared invalid or unenforceable, the remaining provisions of this Agreement shall remain in effect.
|
||||
Entire Agreement. This Agreement constitutes the entire agreement between the parties.
|
||||
|
||||
=======================================================
|
||||
|
||||
UNICODE, INC. LICENSE AGREEMENT - DATA FILES AND SOFTWARE
|
||||
|
||||
See Terms of Use
|
||||
for definitions of Unicode Inc.’s Data Files and Software.
|
||||
|
||||
NOTICE TO USER: Carefully read the following legal agreement.
|
||||
BY DOWNLOADING, INSTALLING, COPYING OR OTHERWISE USING UNICODE INC.'S
|
||||
DATA FILES ("DATA FILES"), AND/OR SOFTWARE ("SOFTWARE"),
|
||||
YOU UNEQUIVOCALLY ACCEPT, AND AGREE TO BE BOUND BY, ALL OF THE
|
||||
TERMS AND CONDITIONS OF THIS AGREEMENT.
|
||||
IF YOU DO NOT AGREE, DO NOT DOWNLOAD, INSTALL, COPY, DISTRIBUTE OR USE
|
||||
THE DATA FILES OR SOFTWARE.
|
||||
|
||||
COPYRIGHT AND PERMISSION NOTICE
|
||||
|
||||
Copyright © 1991-2021 Unicode, Inc. All rights reserved.
|
||||
Distributed under the Terms of Use in https://www.unicode.org/copyright.html.
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining
|
||||
a copy of the Unicode data files and any associated documentation
|
||||
(the "Data Files") or Unicode software and any associated documentation
|
||||
(the "Software") to deal in the Data Files or Software
|
||||
without restriction, including without limitation the rights to use,
|
||||
copy, modify, merge, publish, distribute, and/or sell copies of
|
||||
the Data Files or Software, and to permit persons to whom the Data Files
|
||||
or Software are furnished to do so, provided that either
|
||||
(a) this copyright and permission notice appear with all copies
|
||||
of the Data Files or Software, or
|
||||
(b) this copyright and permission notice appear in associated
|
||||
Documentation.
|
||||
|
||||
THE DATA FILES AND SOFTWARE ARE PROVIDED "AS IS", WITHOUT WARRANTY OF
|
||||
ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE
|
||||
WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
NONINFRINGEMENT OF THIRD PARTY RIGHTS.
|
||||
IN NO EVENT SHALL THE COPYRIGHT HOLDER OR HOLDERS INCLUDED IN THIS
|
||||
NOTICE BE LIABLE FOR ANY CLAIM, OR ANY SPECIAL INDIRECT OR CONSEQUENTIAL
|
||||
DAMAGES, OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE,
|
||||
DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER
|
||||
TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
|
||||
PERFORMANCE OF THE DATA FILES OR SOFTWARE.
|
||||
|
||||
Except as contained in this notice, the name of a copyright holder
|
||||
shall not be used in advertising or otherwise to promote the sale,
|
||||
use or other dealings in these Data Files or Software without prior
|
||||
written authorization of the copyright holder.
|
||||
|
||||
```
|
||||
|
||||
|
@ -1,5 +1,5 @@
|
||||
## The Unicode Standard, Unicode Character Database, Version 13.0.0
|
||||
|
||||
## The Unicode Standard, Unicode Character Database, Version 14.0.0
|
||||
|
||||
### Unicode Character Database
|
||||
```
|
||||
|
||||
@ -18,7 +18,7 @@ THE DATA FILES OR SOFTWARE.
|
||||
|
||||
COPYRIGHT AND PERMISSION NOTICE
|
||||
|
||||
Copyright © 1991-2020 Unicode, Inc. All rights reserved.
|
||||
Copyright © 1991-2021 Unicode, Inc. All rights reserved.
|
||||
Distributed under the Terms of Use in https://www.unicode.org/copyright.html.
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining
|
||||
@ -50,5 +50,54 @@ shall not be used in advertising or otherwise to promote the sale,
|
||||
use or other dealings in these Data Files or Software without prior
|
||||
written authorization of the copyright holder.
|
||||
|
||||
=== http://www.unicode.org/copyright.html content ===
|
||||
Unicode (R) Copyright and Terms of Use
|
||||
For the general privacy policy governing access to this site, see the Unicode Privacy Policy.
|
||||
|
||||
Unicode Copyright
|
||||
Copyright (C) 1991-2021 Unicode, Inc. All rights reserved.
|
||||
Definitions
|
||||
Unicode Data Files ("DATA FILES") include all data files under the directories:
|
||||
https://www.unicode.org/Public/
|
||||
https://www.unicode.org/reports/
|
||||
https://www.unicode.org/ivd/data/
|
||||
|
||||
Unicode Data Files do not include PDF online code charts under the directory:
|
||||
https://www.unicode.org/Public/
|
||||
|
||||
Unicode Software ("SOFTWARE") includes any source code published in the Unicode Standard
|
||||
or any source code or compiled code under the directories:
|
||||
https://www.unicode.org/Public/PROGRAMS/
|
||||
https://www.unicode.org/Public/cldr/
|
||||
http://site.icu-project.org/download/
|
||||
Terms of Use
|
||||
Certain documents and files on this website contain a legend indicating that "Modification is permitted." Any person is hereby authorized, without fee, to modify such documents and files to create derivative works conforming to the Unicode® Standard, subject to Terms and Conditions herein.
|
||||
Any person is hereby authorized, without fee, to view, use, reproduce, and distribute all documents and files, subject to the Terms and Conditions herein.
|
||||
Further specifications of rights and restrictions pertaining to the use of the Unicode DATA FILES and SOFTWARE can be found in the Unicode Data Files and Software License.
|
||||
Each version of the Unicode Standard has further specifications of rights and restrictions of use. For the book editions (Unicode 5.0 and earlier), these are found on the back of the title page.
|
||||
The Unicode PDF online code charts carry specific restrictions. Those restrictions are incorporated as the first page of each PDF code chart.
|
||||
All other files, including online documentation of the core specification for Unicode 6.0 and later, are covered under these general Terms of Use.
|
||||
No license is granted to "mirror" the Unicode website where a fee is charged for access to the "mirror" site.
|
||||
Modification is not permitted with respect to this document. All copies of this document must be verbatim.
|
||||
Restricted Rights Legend
|
||||
Any technical data or software which is licensed to the United States of America, its agencies and/or instrumentalities under this Agreement is commercial technical data or commercial computer software developed exclusively at private expense as defined in FAR 2.101, or DFARS 252.227-7014 (June 1995), as applicable. For technical data, use, duplication, or disclosure by the Government is subject to restrictions as set forth in DFARS 202.227-7015 Technical Data, Commercial and Items (Nov 1995) and this Agreement. For Software, in accordance with FAR 12-212 or DFARS 227-7202, as applicable, use, duplication or disclosure by the Government is subject to the restrictions set forth in this Agreement.
|
||||
Warranties and Disclaimers
|
||||
This publication and/or website may include technical or typographical errors or other inaccuracies. Changes are periodically added to the information herein; these changes will be incorporated in new editions of the publication and/or website. Unicode, Inc. may make improvements and/or changes in the product(s) and/or program(s) described in this publication and/or website at any time.
|
||||
If this file has been purchased on magnetic or optical media from Unicode, Inc. the sole and exclusive remedy for any claim will be exchange of the defective media within ninety (90) days of original purchase.
|
||||
EXCEPT AS PROVIDED IN SECTION E.2, THIS PUBLICATION AND/OR SOFTWARE IS PROVIDED "AS IS" WITHOUT WARRANTY OF ANY KIND EITHER EXPRESS, IMPLIED, OR STATUTORY, INCLUDING, BUT NOT LIMITED TO, ANY WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, OR NON-INFRINGEMENT. UNICODE, INC. AND ITS LICENSORS ASSUME NO RESPONSIBILITY FOR ERRORS OR OMISSIONS IN THIS PUBLICATION AND/OR SOFTWARE OR OTHER DOCUMENTS WHICH ARE REFERENCED BY OR LINKED TO THIS PUBLICATION OR THE UNICODE WEBSITE.
|
||||
Waiver of Damages
|
||||
In no event shall Unicode, Inc. or its licensors be liable for any special, incidental, indirect or consequential damages of any kind, or any damages whatsoever, whether or not Unicode, Inc. was advised of the possibility of the damage, including, without limitation, those resulting from the following: loss of use, data or profits, in connection with the use, modification or distribution of this information or its derivatives.
|
||||
Trademarks & Logos
|
||||
The Unicode Word Mark and the Unicode Logo are trademarks of Unicode, Inc. “The Unicode Consortium” and “Unicode, Inc.” are trade names of Unicode, Inc. Use of the information and materials found on this website indicates your acknowledgement of Unicode, Inc.’s exclusive worldwide rights in the Unicode Word Mark, the Unicode Logo, and the Unicode trade names.
|
||||
The Unicode Consortium Name and Trademark Usage Policy (“Trademark Policy”) are incorporated herein by reference and you agree to abide by the provisions of the Trademark Policy, which may be changed from time to time in the sole discretion of Unicode, Inc.
|
||||
All third party trademarks referenced herein are the property of their respective owners.
|
||||
Miscellaneous
|
||||
Jurisdiction and Venue. This website is operated from a location in the State of California, United States of America. Unicode, Inc. makes no representation that the materials are appropriate for use in other locations. If you access this website from other locations, you are responsible for compliance with local laws. This Agreement, all use of this website and any claims and damages resulting from use of this website are governed solely by the laws of the State of California without regard to any principles which would apply the laws of a different jurisdiction. The user agrees that any disputes regarding this website shall be resolved solely in the courts located in Santa Clara County, California. The user agrees said courts have personal jurisdiction and agree to waive any right to transfer the dispute to any other forum.
|
||||
Modification by Unicode, Inc. Unicode, Inc. shall have the right to modify this Agreement at any time by posting it to this website. The user may not assign any part of this Agreement without Unicode, Inc.’s prior written consent.
|
||||
Taxes. The user agrees to pay any taxes arising from access to this website or use of the information herein, except for those based on Unicode’s net income.
|
||||
Severability. If any provision of this Agreement is declared invalid or unenforceable, the remaining provisions of this Agreement shall remain in effect.
|
||||
Entire Agreement. This Agreement constitutes the entire agreement between the parties.
|
||||
|
||||
|
||||
```
|
||||
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) 2015, 2020, Oracle and/or its affiliates. All rights reserved.
|
||||
* Copyright (c) 2015, 2022, Oracle and/or its affiliates. All rights reserved.
|
||||
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
||||
*
|
||||
* This code is free software; you can redistribute it and/or modify it
|
||||
@ -23,7 +23,7 @@
|
||||
|
||||
/**
|
||||
* @test
|
||||
* @bug 8080535 8191410 8215194 8221431 8239383
|
||||
* @bug 8080535 8191410 8215194 8221431 8239383 8268081
|
||||
* @summary Expected size of Character.UnicodeBlock.map is not optimal
|
||||
* @library /test/lib
|
||||
* @modules java.base/java.lang:open
|
||||
@ -48,13 +48,14 @@ import jdk.test.lib.util.OptimalCapacity;
|
||||
// As of Unicode 11, 667 entries are expected.
|
||||
// As of Unicode 12.1, 676 entries are expected.
|
||||
// As of Unicode 13.0, 684 entries are expected.
|
||||
// As of Unicode 14.0, 696 entries are expected.
|
||||
//
|
||||
// Initialization of the map and this test will have to be adjusted
|
||||
// accordingly then.
|
||||
//
|
||||
// Note that HashMap's implementation aligns the initial capacity to
|
||||
// a power of two size, so it will end up 1024 (and thus succeed) in
|
||||
// cases, such as 638, 667, 676, and 684.
|
||||
// cases, such as 638, 667, 676, 684, and 696.
|
||||
|
||||
public class OptimalMapSize {
|
||||
public static void main(String[] args) throws Throwable {
|
||||
@ -63,7 +64,7 @@ public class OptimalMapSize {
|
||||
Field f = Character.UnicodeBlock.class.getDeclaredField("NUM_ENTITIES");
|
||||
f.setAccessible(true);
|
||||
int num_entities = f.getInt(null);
|
||||
assert num_entities == 684;
|
||||
assert num_entities == 696;
|
||||
int initialCapacity = (int)(num_entities / 0.75f + 1.0f);
|
||||
|
||||
OptimalCapacity.ofHashMap(Character.UnicodeBlock.class,
|
||||
|
Loading…
Reference in New Issue
Block a user