8191410: Unicode 10
Upgrade to Unicode 10 Reviewed-by: naoto, rriggs, igerasim
This commit is contained in:
parent
2dd9adbf24
commit
78bd242097
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) 2003, 2015, Oracle and/or its affiliates. All rights reserved.
|
||||
* Copyright (c) 2003, 2018, Oracle and/or its affiliates. All rights reserved.
|
||||
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
||||
*
|
||||
* This code is free software; you can redistribute it and/or modify it
|
||||
@ -139,6 +139,12 @@ class CharacterData00 extends CharacterData {
|
||||
case 0x0130: mapChar = 0x0069; break;
|
||||
case 0x023A: mapChar = 0x2C65; break;
|
||||
case 0x023E: mapChar = 0x2C66; break;
|
||||
case 0x0412: mapChar = 0x1C80; break;
|
||||
case 0x0414: mapChar = 0x1C81; break;
|
||||
case 0x041E: mapChar = 0x1C82; break;
|
||||
case 0x0421: mapChar = 0x1C83; break;
|
||||
case 0x042A: mapChar = 0x1C86; break;
|
||||
case 0x0462: mapChar = 0x1C87; break;
|
||||
case 0x10A0: mapChar = 0x2D00; break;
|
||||
case 0x10A1: mapChar = 0x2D01; break;
|
||||
case 0x10A2: mapChar = 0x2D02; break;
|
||||
@ -299,12 +305,14 @@ class CharacterData00 extends CharacterData {
|
||||
case 0x2C70: mapChar = 0x0252; break;
|
||||
case 0x2C7E: mapChar = 0x023F; break;
|
||||
case 0x2C7F: mapChar = 0x0240; break;
|
||||
case 0xA64A: mapChar = 0x1C88; break;
|
||||
case 0xA77D: mapChar = 0x1D79; break;
|
||||
case 0xA78D: mapChar = 0x0265; break;
|
||||
case 0xA7AA: mapChar = 0x0266; break;
|
||||
case 0xA7AB: mapChar = 0x025C; break;
|
||||
case 0xA7AC: mapChar = 0x0261; break;
|
||||
case 0xA7AD: mapChar = 0x026C; break;
|
||||
case 0xA7AE: mapChar = 0x026A; break;
|
||||
case 0xA7B0: mapChar = 0x029E; break;
|
||||
case 0xA7B1: mapChar = 0x0287; break;
|
||||
case 0xA7B2: mapChar = 0x029D; break;
|
||||
@ -339,6 +347,7 @@ class CharacterData00 extends CharacterData {
|
||||
case 0x0261: mapChar = 0xA7AC; break;
|
||||
case 0x0265: mapChar = 0xA78D; break;
|
||||
case 0x0266: mapChar = 0xA7AA; break;
|
||||
case 0x026A: mapChar = 0xA7AE; break;
|
||||
case 0x026B: mapChar = 0x2C62; break;
|
||||
case 0x026C: mapChar = 0xA7AD; break;
|
||||
case 0x0271: mapChar = 0x2C6E; break;
|
||||
@ -346,6 +355,15 @@ class CharacterData00 extends CharacterData {
|
||||
case 0x0287: mapChar = 0xA7B1; break;
|
||||
case 0x029D: mapChar = 0xA7B2; break;
|
||||
case 0x029E: mapChar = 0xA7B0; break;
|
||||
case 0x1C80: mapChar = 0x0412; break;
|
||||
case 0x1C81: mapChar = 0x0414; break;
|
||||
case 0x1C82: mapChar = 0x041E; break;
|
||||
case 0x1C83: mapChar = 0x0421; break;
|
||||
case 0x1C84: mapChar = 0x0422; break;
|
||||
case 0x1C85: mapChar = 0x0422; break;
|
||||
case 0x1C86: mapChar = 0x042A; break;
|
||||
case 0x1C87: mapChar = 0x0462; break;
|
||||
case 0x1C88: mapChar = 0xA64A; break;
|
||||
case 0x1D79: mapChar = 0xA77D; break;
|
||||
case 0x1D7D: mapChar = 0x2C63; break;
|
||||
case 0x1F80: mapChar = 0x1F88; break;
|
||||
@ -715,6 +733,7 @@ class CharacterData00 extends CharacterData {
|
||||
case 0x0261: mapChar = 0xA7AC; break;
|
||||
case 0x0265: mapChar = 0xA78D; break;
|
||||
case 0x0266: mapChar = 0xA7AA; break;
|
||||
case 0x026A: mapChar = 0xA7AE; break;
|
||||
case 0x026B: mapChar = 0x2C62; break;
|
||||
case 0x026C: mapChar = 0xA7AD; break;
|
||||
case 0x0271: mapChar = 0x2C6E; break;
|
||||
@ -722,6 +741,15 @@ class CharacterData00 extends CharacterData {
|
||||
case 0x0287: mapChar = 0xA7B1; break;
|
||||
case 0x029D: mapChar = 0xA7B2; break;
|
||||
case 0x029E: mapChar = 0xA7B0; break;
|
||||
case 0x1C80: mapChar = 0x0412; break;
|
||||
case 0x1C81: mapChar = 0x0414; break;
|
||||
case 0x1C82: mapChar = 0x041E; break;
|
||||
case 0x1C83: mapChar = 0x0421; break;
|
||||
case 0x1C84: mapChar = 0x0422; break;
|
||||
case 0x1C85: mapChar = 0x0422; break;
|
||||
case 0x1C86: mapChar = 0x042A; break;
|
||||
case 0x1C87: mapChar = 0x0462; break;
|
||||
case 0x1C88: mapChar = 0xA64A; break;
|
||||
case 0x1D79: mapChar = 0xA77D; break;
|
||||
case 0x1D7D: mapChar = 0x2C63; break;
|
||||
case 0x1FBE: mapChar = 0x0399; break;
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) 2003, 2015, Oracle and/or its affiliates. All rights reserved.
|
||||
* Copyright (c) 2003, 2018, Oracle and/or its affiliates. All rights reserved.
|
||||
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
||||
*
|
||||
* This code is free software; you can redistribute it and/or modify it
|
||||
@ -372,6 +372,13 @@ class CharacterData01 extends CharacterData {
|
||||
case 0x11063: retval = 90; break; // BRAHMI NUMBER NINETY
|
||||
case 0x11064: retval = 100; break; // BRAHMI NUMBER ONE HUNDRED
|
||||
case 0x11065: retval = 1000; break; // BRAHMI NUMBER ONE THOUSAND
|
||||
case 0x11C66: retval = 40; break; // BHAIKSUKI NUMBER FORTY
|
||||
case 0x11C67: retval = 50; break; // BHAIKSUKI NUMBER FIFTY
|
||||
case 0x11C68: retval = 60; break; // BHAIKSUKI NUMBER SIXTY
|
||||
case 0x11C69: retval = 70; break; // BHAIKSUKI NUMBER SEVENTY
|
||||
case 0x11C6A: retval = 80; break; // BHAIKSUKI NUMBER EIGHTY
|
||||
case 0x11C6B: retval = 90; break; // BHAIKSUKI NUMBER NINETY
|
||||
case 0x11C6C: retval = 100; break; // BHAIKSUKI HUNDREDS UNIT MARK
|
||||
case 0x111ED: retval = 40; break; // SINHALA ARCHAIC NUMBER FORTY
|
||||
case 0x111EE: retval = 50; break; // SINHALA ARCHAIC NUMBER FIFTY
|
||||
case 0x111EF: retval = 60; break; // SINHALA ARCHAIC NUMBER SIXTY
|
||||
|
@ -1,10 +1,11 @@
|
||||
# PropList-8.0.0.txt
|
||||
# Date: 2015-05-16, 17:50:38 GMT [MD]
|
||||
# PropList-10.0.0.txt
|
||||
# Date: 2017-03-10, 08:25:30 GMT
|
||||
# Copyright (c) 2017 Unicode, Inc.
|
||||
# Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries.
|
||||
# For terms of use, see http://www.unicode.org/terms_of_use.html
|
||||
#
|
||||
# Unicode Character Database
|
||||
# Copyright (c) 1991-2015 Unicode, Inc.
|
||||
# For terms of use, see http://www.unicode.org/terms_of_use.html
|
||||
# For documentation, see http://www.unicode.org/reports/tr44/
|
||||
# For documentation, see http://www.unicode.org/reports/tr44/
|
||||
|
||||
# ================================================
|
||||
|
||||
@ -192,10 +193,17 @@ FF64 ; Terminal_Punctuation # Po HALFWIDTH IDEOGRAPHIC COMMA
|
||||
111DE..111DF ; Terminal_Punctuation # Po [2] SHARADA SECTION MARK-1..SHARADA SECTION MARK-2
|
||||
11238..1123C ; Terminal_Punctuation # Po [5] KHOJKI DANDA..KHOJKI DOUBLE SECTION MARK
|
||||
112A9 ; Terminal_Punctuation # Po MULTANI SECTION MARK
|
||||
1144B..1144D ; Terminal_Punctuation # Po [3] NEWA DANDA..NEWA COMMA
|
||||
1145B ; Terminal_Punctuation # Po NEWA PLACEHOLDER MARK
|
||||
115C2..115C5 ; Terminal_Punctuation # Po [4] SIDDHAM DANDA..SIDDHAM SEPARATOR BAR
|
||||
115C9..115D7 ; Terminal_Punctuation # Po [15] SIDDHAM END OF TEXT MARK..SIDDHAM SECTION MARK WITH CIRCLES AND FOUR ENCLOSURES
|
||||
11641..11642 ; Terminal_Punctuation # Po [2] MODI DANDA..MODI DOUBLE DANDA
|
||||
1173C..1173E ; Terminal_Punctuation # Po [3] AHOM SIGN SMALL SECTION..AHOM SIGN RULAI
|
||||
11A42..11A43 ; Terminal_Punctuation # Po [2] ZANABAZAR SQUARE MARK SHAD..ZANABAZAR SQUARE MARK DOUBLE SHAD
|
||||
11A9B..11A9C ; Terminal_Punctuation # Po [2] SOYOMBO MARK SHAD..SOYOMBO MARK DOUBLE SHAD
|
||||
11AA1..11AA2 ; Terminal_Punctuation # Po [2] SOYOMBO TERMINAL MARK-1..SOYOMBO TERMINAL MARK-2
|
||||
11C41..11C43 ; Terminal_Punctuation # Po [3] BHAIKSUKI DANDA..BHAIKSUKI WORD SEPARATOR
|
||||
11C71 ; Terminal_Punctuation # Po MARCHEN MARK SHAD
|
||||
12470..12474 ; Terminal_Punctuation # Po [5] CUNEIFORM PUNCTUATION SIGN OLD ASSYRIAN WORD DIVIDER..CUNEIFORM PUNCTUATION SIGN DIAGONAL QUADCOLON
|
||||
16A6E..16A6F ; Terminal_Punctuation # Po [2] MRO DANDA..MRO DOUBLE DANDA
|
||||
16AF5 ; Terminal_Punctuation # Po BASSA VAH FULL STOP
|
||||
@ -204,7 +212,7 @@ FF64 ; Terminal_Punctuation # Po HALFWIDTH IDEOGRAPHIC COMMA
|
||||
1BC9F ; Terminal_Punctuation # Po DUPLOYAN PUNCTUATION CHINOOK FULL STOP
|
||||
1DA87..1DA8A ; Terminal_Punctuation # Po [4] SIGNWRITING COMMA..SIGNWRITING COLON
|
||||
|
||||
# Total code points: 238
|
||||
# Total code points: 252
|
||||
|
||||
# ================================================
|
||||
|
||||
@ -429,6 +437,7 @@ FF41..FF46 ; Hex_Digit # L& [6] FULLWIDTH LATIN SMALL LETTER A..FULLWIDTH L
|
||||
081B..0823 ; Other_Alphabetic # Mn [9] SAMARITAN MARK EPENTHETIC YUT..SAMARITAN VOWEL SIGN A
|
||||
0825..0827 ; Other_Alphabetic # Mn [3] SAMARITAN VOWEL SIGN SHORT A..SAMARITAN VOWEL SIGN U
|
||||
0829..082C ; Other_Alphabetic # Mn [4] SAMARITAN VOWEL SIGN LONG I..SAMARITAN VOWEL SIGN SUKUN
|
||||
08D4..08DF ; Other_Alphabetic # Mn [12] ARABIC SMALL HIGH WORD AR-RUB..ARABIC SMALL HIGH WORD WAQFA
|
||||
08E3..08E9 ; Other_Alphabetic # Mn [7] ARABIC TURNED DAMMA BELOW..ARABIC CURLY KASRATAN
|
||||
08F0..0902 ; Other_Alphabetic # Mn [19] ARABIC OPEN FATHATAN..DEVANAGARI SIGN ANUSVARA
|
||||
0903 ; Other_Alphabetic # Mc DEVANAGARI SIGN VISARGA
|
||||
@ -465,6 +474,7 @@ FF41..FF46 ; Hex_Digit # L& [6] FULLWIDTH LATIN SMALL LETTER A..FULLWIDTH L
|
||||
0AC9 ; Other_Alphabetic # Mc GUJARATI VOWEL SIGN CANDRA O
|
||||
0ACB..0ACC ; Other_Alphabetic # Mc [2] GUJARATI VOWEL SIGN O..GUJARATI VOWEL SIGN AU
|
||||
0AE2..0AE3 ; Other_Alphabetic # Mn [2] GUJARATI VOWEL SIGN VOCALIC L..GUJARATI VOWEL SIGN VOCALIC LL
|
||||
0AFA..0AFC ; Other_Alphabetic # Mn [3] GUJARATI SIGN SUKUN..GUJARATI SIGN MADDAH
|
||||
0B01 ; Other_Alphabetic # Mn ORIYA SIGN CANDRABINDU
|
||||
0B02..0B03 ; Other_Alphabetic # Mc [2] ORIYA SIGN ANUSVARA..ORIYA SIGN VISARGA
|
||||
0B3E ; Other_Alphabetic # Mc ORIYA VOWEL SIGN AA
|
||||
@ -502,7 +512,7 @@ FF41..FF46 ; Hex_Digit # L& [6] FULLWIDTH LATIN SMALL LETTER A..FULLWIDTH L
|
||||
0CCC ; Other_Alphabetic # Mn KANNADA VOWEL SIGN AU
|
||||
0CD5..0CD6 ; Other_Alphabetic # Mc [2] KANNADA LENGTH MARK..KANNADA AI LENGTH MARK
|
||||
0CE2..0CE3 ; Other_Alphabetic # Mn [2] KANNADA VOWEL SIGN VOCALIC L..KANNADA VOWEL SIGN VOCALIC LL
|
||||
0D01 ; Other_Alphabetic # Mn MALAYALAM SIGN CANDRABINDU
|
||||
0D00..0D01 ; Other_Alphabetic # Mn [2] MALAYALAM SIGN COMBINING ANUSVARA ABOVE..MALAYALAM SIGN CANDRABINDU
|
||||
0D02..0D03 ; Other_Alphabetic # Mc [2] MALAYALAM SIGN ANUSVARA..MALAYALAM SIGN VISARGA
|
||||
0D3E..0D40 ; Other_Alphabetic # Mc [3] MALAYALAM VOWEL SIGN AA..MALAYALAM VOWEL SIGN II
|
||||
0D41..0D44 ; Other_Alphabetic # Mn [4] MALAYALAM VOWEL SIGN U..MALAYALAM VOWEL SIGN VOCALIC RR
|
||||
@ -556,6 +566,7 @@ FF41..FF46 ; Hex_Digit # L& [6] FULLWIDTH LATIN SMALL LETTER A..FULLWIDTH L
|
||||
17BE..17C5 ; Other_Alphabetic # Mc [8] KHMER VOWEL SIGN OE..KHMER VOWEL SIGN AU
|
||||
17C6 ; Other_Alphabetic # Mn KHMER SIGN NIKAHIT
|
||||
17C7..17C8 ; Other_Alphabetic # Mc [2] KHMER SIGN REAHMUK..KHMER SIGN YUUKALEAPINTU
|
||||
1885..1886 ; Other_Alphabetic # Mn [2] MONGOLIAN LETTER ALI GALI BALUDA..MONGOLIAN LETTER ALI GALI THREE BALUDA
|
||||
18A9 ; Other_Alphabetic # Mn MONGOLIAN LETTER ALI GALI DAGALGA
|
||||
1920..1922 ; Other_Alphabetic # Mn [3] LIMBU VOWEL SIGN A..LIMBU VOWEL SIGN U
|
||||
1923..1926 ; Other_Alphabetic # Mc [4] LIMBU VOWEL SIGN EE..LIMBU VOWEL SIGN AU
|
||||
@ -613,6 +624,7 @@ A825..A826 ; Other_Alphabetic # Mn [2] SYLOTI NAGRI VOWEL SIGN U..SYLOTI NA
|
||||
A827 ; Other_Alphabetic # Mc SYLOTI NAGRI VOWEL SIGN OO
|
||||
A880..A881 ; Other_Alphabetic # Mc [2] SAURASHTRA SIGN ANUSVARA..SAURASHTRA SIGN VISARGA
|
||||
A8B4..A8C3 ; Other_Alphabetic # Mc [16] SAURASHTRA CONSONANT SIGN HAARU..SAURASHTRA VOWEL SIGN AU
|
||||
A8C5 ; Other_Alphabetic # Mn SAURASHTRA SIGN CANDRABINDU
|
||||
A926..A92A ; Other_Alphabetic # Mn [5] KAYAH LI VOWEL UE..KAYAH LI VOWEL O
|
||||
A947..A951 ; Other_Alphabetic # Mn [11] REJANG VOWEL SIGN I..REJANG CONSONANT SIGN R
|
||||
A952 ; Other_Alphabetic # Mc REJANG CONSONANT SIGN H
|
||||
@ -671,6 +683,7 @@ FB1E ; Other_Alphabetic # Mn HEBREW POINT JUDEO-SPANISH VARIKA
|
||||
11232..11233 ; Other_Alphabetic # Mc [2] KHOJKI VOWEL SIGN O..KHOJKI VOWEL SIGN AU
|
||||
11234 ; Other_Alphabetic # Mn KHOJKI SIGN ANUSVARA
|
||||
11237 ; Other_Alphabetic # Mn KHOJKI SIGN SHADDA
|
||||
1123E ; Other_Alphabetic # Mn KHOJKI SIGN SUKUN
|
||||
112DF ; Other_Alphabetic # Mn KHUDAWADI SIGN ANUSVARA
|
||||
112E0..112E2 ; Other_Alphabetic # Mc [3] KHUDAWADI VOWEL SIGN AA..KHUDAWADI VOWEL SIGN II
|
||||
112E3..112E8 ; Other_Alphabetic # Mn [6] KHUDAWADI VOWEL SIGN U..KHUDAWADI VOWEL SIGN AU
|
||||
@ -683,6 +696,11 @@ FB1E ; Other_Alphabetic # Mn HEBREW POINT JUDEO-SPANISH VARIKA
|
||||
1134B..1134C ; Other_Alphabetic # Mc [2] GRANTHA VOWEL SIGN OO..GRANTHA VOWEL SIGN AU
|
||||
11357 ; Other_Alphabetic # Mc GRANTHA AU LENGTH MARK
|
||||
11362..11363 ; Other_Alphabetic # Mc [2] GRANTHA VOWEL SIGN VOCALIC L..GRANTHA VOWEL SIGN VOCALIC LL
|
||||
11435..11437 ; Other_Alphabetic # Mc [3] NEWA VOWEL SIGN AA..NEWA VOWEL SIGN II
|
||||
11438..1143F ; Other_Alphabetic # Mn [8] NEWA VOWEL SIGN U..NEWA VOWEL SIGN AI
|
||||
11440..11441 ; Other_Alphabetic # Mc [2] NEWA VOWEL SIGN O..NEWA VOWEL SIGN AU
|
||||
11443..11444 ; Other_Alphabetic # Mn [2] NEWA SIGN CANDRABINDU..NEWA SIGN ANUSVARA
|
||||
11445 ; Other_Alphabetic # Mc NEWA SIGN VISARGA
|
||||
114B0..114B2 ; Other_Alphabetic # Mc [3] TIRHUTA VOWEL SIGN AA..TIRHUTA VOWEL SIGN II
|
||||
114B3..114B8 ; Other_Alphabetic # Mn [6] TIRHUTA VOWEL SIGN U..TIRHUTA VOWEL SIGN VOCALIC LL
|
||||
114B9 ; Other_Alphabetic # Mc TIRHUTA VOWEL SIGN E
|
||||
@ -712,14 +730,48 @@ FB1E ; Other_Alphabetic # Mn HEBREW POINT JUDEO-SPANISH VARIKA
|
||||
11722..11725 ; Other_Alphabetic # Mn [4] AHOM VOWEL SIGN I..AHOM VOWEL SIGN UU
|
||||
11726 ; Other_Alphabetic # Mc AHOM VOWEL SIGN E
|
||||
11727..1172A ; Other_Alphabetic # Mn [4] AHOM VOWEL SIGN AW..AHOM VOWEL SIGN AM
|
||||
11A01..11A06 ; Other_Alphabetic # Mn [6] ZANABAZAR SQUARE VOWEL SIGN I..ZANABAZAR SQUARE VOWEL SIGN O
|
||||
11A07..11A08 ; Other_Alphabetic # Mc [2] ZANABAZAR SQUARE VOWEL SIGN AI..ZANABAZAR SQUARE VOWEL SIGN AU
|
||||
11A09..11A0A ; Other_Alphabetic # Mn [2] ZANABAZAR SQUARE VOWEL SIGN REVERSED I..ZANABAZAR SQUARE VOWEL LENGTH MARK
|
||||
11A35..11A38 ; Other_Alphabetic # Mn [4] ZANABAZAR SQUARE SIGN CANDRABINDU..ZANABAZAR SQUARE SIGN ANUSVARA
|
||||
11A39 ; Other_Alphabetic # Mc ZANABAZAR SQUARE SIGN VISARGA
|
||||
11A3B..11A3E ; Other_Alphabetic # Mn [4] ZANABAZAR SQUARE CLUSTER-FINAL LETTER YA..ZANABAZAR SQUARE CLUSTER-FINAL LETTER VA
|
||||
11A51..11A56 ; Other_Alphabetic # Mn [6] SOYOMBO VOWEL SIGN I..SOYOMBO VOWEL SIGN OE
|
||||
11A57..11A58 ; Other_Alphabetic # Mc [2] SOYOMBO VOWEL SIGN AI..SOYOMBO VOWEL SIGN AU
|
||||
11A59..11A5B ; Other_Alphabetic # Mn [3] SOYOMBO VOWEL SIGN VOCALIC R..SOYOMBO VOWEL LENGTH MARK
|
||||
11A8A..11A96 ; Other_Alphabetic # Mn [13] SOYOMBO FINAL CONSONANT SIGN G..SOYOMBO SIGN ANUSVARA
|
||||
11A97 ; Other_Alphabetic # Mc SOYOMBO SIGN VISARGA
|
||||
11C2F ; Other_Alphabetic # Mc BHAIKSUKI VOWEL SIGN AA
|
||||
11C30..11C36 ; Other_Alphabetic # Mn [7] BHAIKSUKI VOWEL SIGN I..BHAIKSUKI VOWEL SIGN VOCALIC L
|
||||
11C38..11C3D ; Other_Alphabetic # Mn [6] BHAIKSUKI VOWEL SIGN E..BHAIKSUKI SIGN ANUSVARA
|
||||
11C3E ; Other_Alphabetic # Mc BHAIKSUKI SIGN VISARGA
|
||||
11C92..11CA7 ; Other_Alphabetic # Mn [22] MARCHEN SUBJOINED LETTER KA..MARCHEN SUBJOINED LETTER ZA
|
||||
11CA9 ; Other_Alphabetic # Mc MARCHEN SUBJOINED LETTER YA
|
||||
11CAA..11CB0 ; Other_Alphabetic # Mn [7] MARCHEN SUBJOINED LETTER RA..MARCHEN VOWEL SIGN AA
|
||||
11CB1 ; Other_Alphabetic # Mc MARCHEN VOWEL SIGN I
|
||||
11CB2..11CB3 ; Other_Alphabetic # Mn [2] MARCHEN VOWEL SIGN U..MARCHEN VOWEL SIGN E
|
||||
11CB4 ; Other_Alphabetic # Mc MARCHEN VOWEL SIGN O
|
||||
11CB5..11CB6 ; Other_Alphabetic # Mn [2] MARCHEN SIGN ANUSVARA..MARCHEN SIGN CANDRABINDU
|
||||
11D31..11D36 ; Other_Alphabetic # Mn [6] MASARAM GONDI VOWEL SIGN AA..MASARAM GONDI VOWEL SIGN VOCALIC R
|
||||
11D3A ; Other_Alphabetic # Mn MASARAM GONDI VOWEL SIGN E
|
||||
11D3C..11D3D ; Other_Alphabetic # Mn [2] MASARAM GONDI VOWEL SIGN AI..MASARAM GONDI VOWEL SIGN O
|
||||
11D3F..11D41 ; Other_Alphabetic # Mn [3] MASARAM GONDI VOWEL SIGN AU..MASARAM GONDI SIGN VISARGA
|
||||
11D43 ; Other_Alphabetic # Mn MASARAM GONDI SIGN CANDRA
|
||||
11D47 ; Other_Alphabetic # Mn MASARAM GONDI RA-KARA
|
||||
16B30..16B36 ; Other_Alphabetic # Mn [7] PAHAWH HMONG MARK CIM TUB..PAHAWH HMONG MARK CIM TAUM
|
||||
16F51..16F7E ; Other_Alphabetic # Mc [46] MIAO SIGN ASPIRATION..MIAO VOWEL SIGN NG
|
||||
1BC9E ; Other_Alphabetic # Mn DUPLOYAN DOUBLE MARK
|
||||
1E000..1E006 ; Other_Alphabetic # Mn [7] COMBINING GLAGOLITIC LETTER AZU..COMBINING GLAGOLITIC LETTER ZHIVETE
|
||||
1E008..1E018 ; Other_Alphabetic # Mn [17] COMBINING GLAGOLITIC LETTER ZEMLJA..COMBINING GLAGOLITIC LETTER HERU
|
||||
1E01B..1E021 ; Other_Alphabetic # Mn [7] COMBINING GLAGOLITIC LETTER SHTA..COMBINING GLAGOLITIC LETTER YATI
|
||||
1E023..1E024 ; Other_Alphabetic # Mn [2] COMBINING GLAGOLITIC LETTER YU..COMBINING GLAGOLITIC LETTER SMALL YUS
|
||||
1E026..1E02A ; Other_Alphabetic # Mn [5] COMBINING GLAGOLITIC LETTER YO..COMBINING GLAGOLITIC LETTER FITA
|
||||
1E947 ; Other_Alphabetic # Mn ADLAM HAMZA
|
||||
1F130..1F149 ; Other_Alphabetic # So [26] SQUARED LATIN CAPITAL LETTER A..SQUARED LATIN CAPITAL LETTER Z
|
||||
1F150..1F169 ; Other_Alphabetic # So [26] NEGATIVE CIRCLED LATIN CAPITAL LETTER A..NEGATIVE CIRCLED LATIN CAPITAL LETTER Z
|
||||
1F170..1F189 ; Other_Alphabetic # So [26] NEGATIVE SQUARED LATIN CAPITAL LETTER A..NEGATIVE SQUARED LATIN CAPITAL LETTER Z
|
||||
|
||||
# Total code points: 1116
|
||||
# Total code points: 1300
|
||||
|
||||
# ================================================
|
||||
|
||||
@ -728,16 +780,20 @@ FB1E ; Other_Alphabetic # Mn HEBREW POINT JUDEO-SPANISH VARIKA
|
||||
3021..3029 ; Ideographic # Nl [9] HANGZHOU NUMERAL ONE..HANGZHOU NUMERAL NINE
|
||||
3038..303A ; Ideographic # Nl [3] HANGZHOU NUMERAL TEN..HANGZHOU NUMERAL THIRTY
|
||||
3400..4DB5 ; Ideographic # Lo [6582] CJK UNIFIED IDEOGRAPH-3400..CJK UNIFIED IDEOGRAPH-4DB5
|
||||
4E00..9FD5 ; Ideographic # Lo [20950] CJK UNIFIED IDEOGRAPH-4E00..CJK UNIFIED IDEOGRAPH-9FD5
|
||||
4E00..9FEA ; Ideographic # Lo [20971] CJK UNIFIED IDEOGRAPH-4E00..CJK UNIFIED IDEOGRAPH-9FEA
|
||||
F900..FA6D ; Ideographic # Lo [366] CJK COMPATIBILITY IDEOGRAPH-F900..CJK COMPATIBILITY IDEOGRAPH-FA6D
|
||||
FA70..FAD9 ; Ideographic # Lo [106] CJK COMPATIBILITY IDEOGRAPH-FA70..CJK COMPATIBILITY IDEOGRAPH-FAD9
|
||||
17000..187EC ; Ideographic # Lo [6125] TANGUT IDEOGRAPH-17000..TANGUT IDEOGRAPH-187EC
|
||||
18800..18AF2 ; Ideographic # Lo [755] TANGUT COMPONENT-001..TANGUT COMPONENT-755
|
||||
1B170..1B2FB ; Ideographic # Lo [396] NUSHU CHARACTER-1B170..NUSHU CHARACTER-1B2FB
|
||||
20000..2A6D6 ; Ideographic # Lo [42711] CJK UNIFIED IDEOGRAPH-20000..CJK UNIFIED IDEOGRAPH-2A6D6
|
||||
2A700..2B734 ; Ideographic # Lo [4149] CJK UNIFIED IDEOGRAPH-2A700..CJK UNIFIED IDEOGRAPH-2B734
|
||||
2B740..2B81D ; Ideographic # Lo [222] CJK UNIFIED IDEOGRAPH-2B740..CJK UNIFIED IDEOGRAPH-2B81D
|
||||
2B820..2CEA1 ; Ideographic # Lo [5762] CJK UNIFIED IDEOGRAPH-2B820..CJK UNIFIED IDEOGRAPH-2CEA1
|
||||
2CEB0..2EBE0 ; Ideographic # Lo [7473] CJK UNIFIED IDEOGRAPH-2CEB0..CJK UNIFIED IDEOGRAPH-2EBE0
|
||||
2F800..2FA1D ; Ideographic # Lo [542] CJK COMPATIBILITY IDEOGRAPH-2F800..CJK COMPATIBILITY IDEOGRAPH-2FA1D
|
||||
|
||||
# Total code points: 81404
|
||||
# Total code points: 96174
|
||||
|
||||
# ================================================
|
||||
|
||||
@ -793,12 +849,14 @@ FA70..FAD9 ; Ideographic # Lo [106] CJK COMPATIBILITY IDEOGRAPH-FA70..CJK COM
|
||||
0A4D ; Diacritic # Mn GURMUKHI SIGN VIRAMA
|
||||
0ABC ; Diacritic # Mn GUJARATI SIGN NUKTA
|
||||
0ACD ; Diacritic # Mn GUJARATI SIGN VIRAMA
|
||||
0AFD..0AFF ; Diacritic # Mn [3] GUJARATI SIGN THREE-DOT NUKTA ABOVE..GUJARATI SIGN TWO-CIRCLE NUKTA ABOVE
|
||||
0B3C ; Diacritic # Mn ORIYA SIGN NUKTA
|
||||
0B4D ; Diacritic # Mn ORIYA SIGN VIRAMA
|
||||
0BCD ; Diacritic # Mn TAMIL SIGN VIRAMA
|
||||
0C4D ; Diacritic # Mn TELUGU SIGN VIRAMA
|
||||
0CBC ; Diacritic # Mn KANNADA SIGN NUKTA
|
||||
0CCD ; Diacritic # Mn KANNADA SIGN VIRAMA
|
||||
0D3B..0D3C ; Diacritic # Mn [2] MALAYALAM SIGN VERTICAL BAR VIRAMA..MALAYALAM SIGN CIRCULAR VIRAMA
|
||||
0D4D ; Diacritic # Mn MALAYALAM SIGN VIRAMA
|
||||
0DCA ; Diacritic # Mn SINHALA SIGN AL-LAKUNA
|
||||
0E47..0E4C ; Diacritic # Mn [6] THAI CHARACTER MAITAIKHU..THAI CHARACTER THANTHAKHAT
|
||||
@ -838,10 +896,11 @@ FA70..FAD9 ; Ideographic # Lo [106] CJK COMPATIBILITY IDEOGRAPH-FA70..CJK COM
|
||||
1CE2..1CE8 ; Diacritic # Mn [7] VEDIC SIGN VISARGA SVARITA..VEDIC SIGN VISARGA ANUDATTA WITH TAIL
|
||||
1CED ; Diacritic # Mn VEDIC SIGN TIRYAK
|
||||
1CF4 ; Diacritic # Mn VEDIC TONE CANDRA ABOVE
|
||||
1CF7 ; Diacritic # Mc VEDIC SIGN ATIKRAMA
|
||||
1CF8..1CF9 ; Diacritic # Mn [2] VEDIC TONE RING ABOVE..VEDIC TONE DOUBLE RING ABOVE
|
||||
1D2C..1D6A ; Diacritic # Lm [63] MODIFIER LETTER CAPITAL A..GREEK SUBSCRIPT SMALL LETTER CHI
|
||||
1DC4..1DCF ; Diacritic # Mn [12] COMBINING MACRON-ACUTE..COMBINING ZIGZAG BELOW
|
||||
1DF5 ; Diacritic # Mn COMBINING UP TACK ABOVE
|
||||
1DF5..1DF9 ; Diacritic # Mn [5] COMBINING UP TACK ABOVE..COMBINING WIDE INVERTED BRIDGE BELOW
|
||||
1DFD..1DFF ; Diacritic # Mn [3] COMBINING ALMOST EQUAL TO BELOW..COMBINING RIGHT ARROWHEAD AND DOWN ARROWHEAD BELOW
|
||||
1FBD ; Diacritic # Sk GREEK KORONIS
|
||||
1FBF..1FC1 ; Diacritic # Sk [3] GREEK PSILI..GREEK DIALYTIKA AND PERISPOMENI
|
||||
@ -906,12 +965,20 @@ FFE3 ; Diacritic # Sk FULLWIDTH MACRON
|
||||
1134D ; Diacritic # Mc GRANTHA SIGN VIRAMA
|
||||
11366..1136C ; Diacritic # Mn [7] COMBINING GRANTHA DIGIT ZERO..COMBINING GRANTHA DIGIT SIX
|
||||
11370..11374 ; Diacritic # Mn [5] COMBINING GRANTHA LETTER A..COMBINING GRANTHA LETTER PA
|
||||
11442 ; Diacritic # Mn NEWA SIGN VIRAMA
|
||||
11446 ; Diacritic # Mn NEWA SIGN NUKTA
|
||||
114C2..114C3 ; Diacritic # Mn [2] TIRHUTA SIGN VIRAMA..TIRHUTA SIGN NUKTA
|
||||
115BF..115C0 ; Diacritic # Mn [2] SIDDHAM SIGN VIRAMA..SIDDHAM SIGN NUKTA
|
||||
1163F ; Diacritic # Mn MODI SIGN VIRAMA
|
||||
116B6 ; Diacritic # Mc TAKRI SIGN VIRAMA
|
||||
116B7 ; Diacritic # Mn TAKRI SIGN NUKTA
|
||||
1172B ; Diacritic # Mn AHOM SIGN KILLER
|
||||
11A34 ; Diacritic # Mn ZANABAZAR SQUARE SIGN VIRAMA
|
||||
11A47 ; Diacritic # Mn ZANABAZAR SQUARE SUBJOINER
|
||||
11A99 ; Diacritic # Mn SOYOMBO SUBJOINER
|
||||
11C3F ; Diacritic # Mn BHAIKSUKI SIGN VIRAMA
|
||||
11D42 ; Diacritic # Mn MASARAM GONDI SIGN NUKTA
|
||||
11D44..11D45 ; Diacritic # Mn [2] MASARAM GONDI SIGN HALANTA..MASARAM GONDI VIRAMA
|
||||
16AF0..16AF4 ; Diacritic # Mn [5] BASSA VAH COMBINING HIGH TONE..BASSA VAH COMBINING HIGH-LOW TONE
|
||||
16F8F..16F92 ; Diacritic # Mn [4] MIAO TONE RIGHT..MIAO TONE BELOW
|
||||
16F93..16F9F ; Diacritic # Lm [13] MIAO LETTER TONE-2..MIAO LETTER REFORMED TONE-8
|
||||
@ -921,8 +988,10 @@ FFE3 ; Diacritic # Sk FULLWIDTH MACRON
|
||||
1D185..1D18B ; Diacritic # Mn [7] MUSICAL SYMBOL COMBINING DOIT..MUSICAL SYMBOL COMBINING TRIPLE TONGUE
|
||||
1D1AA..1D1AD ; Diacritic # Mn [4] MUSICAL SYMBOL COMBINING DOWN BOW..MUSICAL SYMBOL COMBINING SNAP PIZZICATO
|
||||
1E8D0..1E8D6 ; Diacritic # Mn [7] MENDE KIKAKUI COMBINING NUMBER TEENS..MENDE KIKAKUI COMBINING NUMBER MILLIONS
|
||||
1E944..1E946 ; Diacritic # Mn [3] ADLAM ALIF LENGTHENER..ADLAM GEMINATION MARK
|
||||
1E948..1E94A ; Diacritic # Mn [3] ADLAM CONSONANT MODIFIER..ADLAM NUKTA
|
||||
|
||||
# Total code points: 773
|
||||
# Total code points: 798
|
||||
|
||||
# ================================================
|
||||
|
||||
@ -951,9 +1020,12 @@ AAF3..AAF4 ; Extender # Lm [2] MEETEI MAYEK SYLLABLE REPETITION MARK..MEETE
|
||||
FF70 ; Extender # Lm HALFWIDTH KATAKANA-HIRAGANA PROLONGED SOUND MARK
|
||||
1135D ; Extender # Lo GRANTHA SIGN PLUTA
|
||||
115C6..115C8 ; Extender # Po [3] SIDDHAM REPETITION MARK-1..SIDDHAM REPETITION MARK-3
|
||||
11A98 ; Extender # Mn SOYOMBO GEMINATION MARK
|
||||
16B42..16B43 ; Extender # Lm [2] PAHAWH HMONG SIGN VOS NRUA..PAHAWH HMONG SIGN IB YAM
|
||||
16FE0..16FE1 ; Extender # Lm [2] TANGUT ITERATION MARK..NUSHU ITERATION MARK
|
||||
1E944..1E946 ; Extender # Mn [3] ADLAM ALIF LENGTHENER..ADLAM GEMINATION MARK
|
||||
|
||||
# Total code points: 38
|
||||
# Total code points: 44
|
||||
|
||||
# ================================================
|
||||
|
||||
@ -1027,7 +1099,7 @@ FFFFE..FFFFF ; Noncharacter_Code_Point # Cn [2] <noncharacter-FFFFE>..<noncha
|
||||
0D57 ; Other_Grapheme_Extend # Mc MALAYALAM AU LENGTH MARK
|
||||
0DCF ; Other_Grapheme_Extend # Mc SINHALA VOWEL SIGN AELA-PILLA
|
||||
0DDF ; Other_Grapheme_Extend # Mc SINHALA VOWEL SIGN GAYANUKITTA
|
||||
200C..200D ; Other_Grapheme_Extend # Cf [2] ZERO WIDTH NON-JOINER..ZERO WIDTH JOINER
|
||||
200C ; Other_Grapheme_Extend # Cf ZERO WIDTH NON-JOINER
|
||||
302E..302F ; Other_Grapheme_Extend # Mc [2] HANGUL SINGLE DOT TONE MARK..HANGUL DOUBLE DOT TONE MARK
|
||||
FF9E..FF9F ; Other_Grapheme_Extend # Lm [2] HALFWIDTH KATAKANA VOICED SOUND MARK..HALFWIDTH KATAKANA SEMI-VOICED SOUND MARK
|
||||
1133E ; Other_Grapheme_Extend # Mc GRANTHA VOWEL SIGN AA
|
||||
@ -1037,8 +1109,9 @@ FF9E..FF9F ; Other_Grapheme_Extend # Lm [2] HALFWIDTH KATAKANA VOICED SOUND
|
||||
115AF ; Other_Grapheme_Extend # Mc SIDDHAM VOWEL SIGN AA
|
||||
1D165 ; Other_Grapheme_Extend # Mc MUSICAL SYMBOL COMBINING STEM
|
||||
1D16E..1D172 ; Other_Grapheme_Extend # Mc [5] MUSICAL SYMBOL COMBINING FLAG-1..MUSICAL SYMBOL COMBINING FLAG-5
|
||||
E0020..E007F ; Other_Grapheme_Extend # Cf [96] TAG SPACE..CANCEL TAG
|
||||
|
||||
# Total code points: 30
|
||||
# Total code points: 125
|
||||
|
||||
# ================================================
|
||||
|
||||
@ -1064,7 +1137,7 @@ FF9E..FF9F ; Other_Grapheme_Extend # Lm [2] HALFWIDTH KATAKANA VOICED SOUND
|
||||
# ================================================
|
||||
|
||||
3400..4DB5 ; Unified_Ideograph # Lo [6582] CJK UNIFIED IDEOGRAPH-3400..CJK UNIFIED IDEOGRAPH-4DB5
|
||||
4E00..9FD5 ; Unified_Ideograph # Lo [20950] CJK UNIFIED IDEOGRAPH-4E00..CJK UNIFIED IDEOGRAPH-9FD5
|
||||
4E00..9FEA ; Unified_Ideograph # Lo [20971] CJK UNIFIED IDEOGRAPH-4E00..CJK UNIFIED IDEOGRAPH-9FEA
|
||||
FA0E..FA0F ; Unified_Ideograph # Lo [2] CJK COMPATIBILITY IDEOGRAPH-FA0E..CJK COMPATIBILITY IDEOGRAPH-FA0F
|
||||
FA11 ; Unified_Ideograph # Lo CJK COMPATIBILITY IDEOGRAPH-FA11
|
||||
FA13..FA14 ; Unified_Ideograph # Lo [2] CJK COMPATIBILITY IDEOGRAPH-FA13..CJK COMPATIBILITY IDEOGRAPH-FA14
|
||||
@ -1076,8 +1149,9 @@ FA27..FA29 ; Unified_Ideograph # Lo [3] CJK COMPATIBILITY IDEOGRAPH-FA27..C
|
||||
2A700..2B734 ; Unified_Ideograph # Lo [4149] CJK UNIFIED IDEOGRAPH-2A700..CJK UNIFIED IDEOGRAPH-2B734
|
||||
2B740..2B81D ; Unified_Ideograph # Lo [222] CJK UNIFIED IDEOGRAPH-2B740..CJK UNIFIED IDEOGRAPH-2B81D
|
||||
2B820..2CEA1 ; Unified_Ideograph # Lo [5762] CJK UNIFIED IDEOGRAPH-2B820..CJK UNIFIED IDEOGRAPH-2CEA1
|
||||
2CEB0..2EBE0 ; Unified_Ideograph # Lo [7473] CJK UNIFIED IDEOGRAPH-2CEB0..CJK UNIFIED IDEOGRAPH-2EBE0
|
||||
|
||||
# Total code points: 80388
|
||||
# Total code points: 87882
|
||||
|
||||
# ================================================
|
||||
|
||||
@ -1106,9 +1180,8 @@ E01F0..E0FFF ; Other_Default_Ignorable_Code_Point # Cn [3600] <reserved-E01F0>.
|
||||
2329 ; Deprecated # Ps LEFT-POINTING ANGLE BRACKET
|
||||
232A ; Deprecated # Pe RIGHT-POINTING ANGLE BRACKET
|
||||
E0001 ; Deprecated # Cf LANGUAGE TAG
|
||||
E007F ; Deprecated # Cf CANCEL TAG
|
||||
|
||||
# Total code points: 16
|
||||
# Total code points: 15
|
||||
|
||||
# ================================================
|
||||
|
||||
@ -1160,11 +1233,12 @@ AABB..AABC ; Logical_Order_Exception # Lo [2] TAI VIET VOWEL AUE..TAI VIET
|
||||
|
||||
# ================================================
|
||||
|
||||
1885..1886 ; Other_ID_Start # Mn [2] MONGOLIAN LETTER ALI GALI BALUDA..MONGOLIAN LETTER ALI GALI THREE BALUDA
|
||||
2118 ; Other_ID_Start # Sm SCRIPT CAPITAL P
|
||||
212E ; Other_ID_Start # So ESTIMATED SYMBOL
|
||||
309B..309C ; Other_ID_Start # Sk [2] KATAKANA-HIRAGANA VOICED SOUND MARK..KATAKANA-HIRAGANA SEMI-VOICED SOUND MARK
|
||||
|
||||
# Total code points: 4
|
||||
# Total code points: 6
|
||||
|
||||
# ================================================
|
||||
|
||||
@ -1177,72 +1251,76 @@ AABB..AABC ; Logical_Order_Exception # Lo [2] TAI VIET VOWEL AUE..TAI VIET
|
||||
|
||||
# ================================================
|
||||
|
||||
0021 ; STerm # Po EXCLAMATION MARK
|
||||
002E ; STerm # Po FULL STOP
|
||||
003F ; STerm # Po QUESTION MARK
|
||||
0589 ; STerm # Po ARMENIAN FULL STOP
|
||||
061F ; STerm # Po ARABIC QUESTION MARK
|
||||
06D4 ; STerm # Po ARABIC FULL STOP
|
||||
0700..0702 ; STerm # Po [3] SYRIAC END OF PARAGRAPH..SYRIAC SUBLINEAR FULL STOP
|
||||
07F9 ; STerm # Po NKO EXCLAMATION MARK
|
||||
0964..0965 ; STerm # Po [2] DEVANAGARI DANDA..DEVANAGARI DOUBLE DANDA
|
||||
104A..104B ; STerm # Po [2] MYANMAR SIGN LITTLE SECTION..MYANMAR SIGN SECTION
|
||||
1362 ; STerm # Po ETHIOPIC FULL STOP
|
||||
1367..1368 ; STerm # Po [2] ETHIOPIC QUESTION MARK..ETHIOPIC PARAGRAPH SEPARATOR
|
||||
166E ; STerm # Po CANADIAN SYLLABICS FULL STOP
|
||||
1735..1736 ; STerm # Po [2] PHILIPPINE SINGLE PUNCTUATION..PHILIPPINE DOUBLE PUNCTUATION
|
||||
1803 ; STerm # Po MONGOLIAN FULL STOP
|
||||
1809 ; STerm # Po MONGOLIAN MANCHU FULL STOP
|
||||
1944..1945 ; STerm # Po [2] LIMBU EXCLAMATION MARK..LIMBU QUESTION MARK
|
||||
1AA8..1AAB ; STerm # Po [4] TAI THAM SIGN KAAN..TAI THAM SIGN SATKAANKUU
|
||||
1B5A..1B5B ; STerm # Po [2] BALINESE PANTI..BALINESE PAMADA
|
||||
1B5E..1B5F ; STerm # Po [2] BALINESE CARIK SIKI..BALINESE CARIK PAREREN
|
||||
1C3B..1C3C ; STerm # Po [2] LEPCHA PUNCTUATION TA-ROL..LEPCHA PUNCTUATION NYET THYOOM TA-ROL
|
||||
1C7E..1C7F ; STerm # Po [2] OL CHIKI PUNCTUATION MUCAAD..OL CHIKI PUNCTUATION DOUBLE MUCAAD
|
||||
203C..203D ; STerm # Po [2] DOUBLE EXCLAMATION MARK..INTERROBANG
|
||||
2047..2049 ; STerm # Po [3] DOUBLE QUESTION MARK..EXCLAMATION QUESTION MARK
|
||||
2E2E ; STerm # Po REVERSED QUESTION MARK
|
||||
2E3C ; STerm # Po STENOGRAPHIC FULL STOP
|
||||
3002 ; STerm # Po IDEOGRAPHIC FULL STOP
|
||||
A4FF ; STerm # Po LISU PUNCTUATION FULL STOP
|
||||
A60E..A60F ; STerm # Po [2] VAI FULL STOP..VAI QUESTION MARK
|
||||
A6F3 ; STerm # Po BAMUM FULL STOP
|
||||
A6F7 ; STerm # Po BAMUM QUESTION MARK
|
||||
A876..A877 ; STerm # Po [2] PHAGS-PA MARK SHAD..PHAGS-PA MARK DOUBLE SHAD
|
||||
A8CE..A8CF ; STerm # Po [2] SAURASHTRA DANDA..SAURASHTRA DOUBLE DANDA
|
||||
A92F ; STerm # Po KAYAH LI SIGN SHYA
|
||||
A9C8..A9C9 ; STerm # Po [2] JAVANESE PADA LINGSA..JAVANESE PADA LUNGSI
|
||||
AA5D..AA5F ; STerm # Po [3] CHAM PUNCTUATION DANDA..CHAM PUNCTUATION TRIPLE DANDA
|
||||
AAF0..AAF1 ; STerm # Po [2] MEETEI MAYEK CHEIKHAN..MEETEI MAYEK AHANG KHUDAM
|
||||
ABEB ; STerm # Po MEETEI MAYEK CHEIKHEI
|
||||
FE52 ; STerm # Po SMALL FULL STOP
|
||||
FE56..FE57 ; STerm # Po [2] SMALL QUESTION MARK..SMALL EXCLAMATION MARK
|
||||
FF01 ; STerm # Po FULLWIDTH EXCLAMATION MARK
|
||||
FF0E ; STerm # Po FULLWIDTH FULL STOP
|
||||
FF1F ; STerm # Po FULLWIDTH QUESTION MARK
|
||||
FF61 ; STerm # Po HALFWIDTH IDEOGRAPHIC FULL STOP
|
||||
10A56..10A57 ; STerm # Po [2] KHAROSHTHI PUNCTUATION DANDA..KHAROSHTHI PUNCTUATION DOUBLE DANDA
|
||||
11047..11048 ; STerm # Po [2] BRAHMI DANDA..BRAHMI DOUBLE DANDA
|
||||
110BE..110C1 ; STerm # Po [4] KAITHI SECTION MARK..KAITHI DOUBLE DANDA
|
||||
11141..11143 ; STerm # Po [3] CHAKMA DANDA..CHAKMA QUESTION MARK
|
||||
111C5..111C6 ; STerm # Po [2] SHARADA DANDA..SHARADA DOUBLE DANDA
|
||||
111CD ; STerm # Po SHARADA SUTRA MARK
|
||||
111DE..111DF ; STerm # Po [2] SHARADA SECTION MARK-1..SHARADA SECTION MARK-2
|
||||
11238..11239 ; STerm # Po [2] KHOJKI DANDA..KHOJKI DOUBLE DANDA
|
||||
1123B..1123C ; STerm # Po [2] KHOJKI SECTION MARK..KHOJKI DOUBLE SECTION MARK
|
||||
112A9 ; STerm # Po MULTANI SECTION MARK
|
||||
115C2..115C3 ; STerm # Po [2] SIDDHAM DANDA..SIDDHAM DOUBLE DANDA
|
||||
115C9..115D7 ; STerm # Po [15] SIDDHAM END OF TEXT MARK..SIDDHAM SECTION MARK WITH CIRCLES AND FOUR ENCLOSURES
|
||||
11641..11642 ; STerm # Po [2] MODI DANDA..MODI DOUBLE DANDA
|
||||
1173C..1173E ; STerm # Po [3] AHOM SIGN SMALL SECTION..AHOM SIGN RULAI
|
||||
16A6E..16A6F ; STerm # Po [2] MRO DANDA..MRO DOUBLE DANDA
|
||||
16AF5 ; STerm # Po BASSA VAH FULL STOP
|
||||
16B37..16B38 ; STerm # Po [2] PAHAWH HMONG SIGN VOS THOM..PAHAWH HMONG SIGN VOS TSHAB CEEB
|
||||
16B44 ; STerm # Po PAHAWH HMONG SIGN XAUS
|
||||
1BC9F ; STerm # Po DUPLOYAN PUNCTUATION CHINOOK FULL STOP
|
||||
1DA88 ; STerm # Po SIGNWRITING FULL STOP
|
||||
0021 ; Sentence_Terminal # Po EXCLAMATION MARK
|
||||
002E ; Sentence_Terminal # Po FULL STOP
|
||||
003F ; Sentence_Terminal # Po QUESTION MARK
|
||||
0589 ; Sentence_Terminal # Po ARMENIAN FULL STOP
|
||||
061F ; Sentence_Terminal # Po ARABIC QUESTION MARK
|
||||
06D4 ; Sentence_Terminal # Po ARABIC FULL STOP
|
||||
0700..0702 ; Sentence_Terminal # Po [3] SYRIAC END OF PARAGRAPH..SYRIAC SUBLINEAR FULL STOP
|
||||
07F9 ; Sentence_Terminal # Po NKO EXCLAMATION MARK
|
||||
0964..0965 ; Sentence_Terminal # Po [2] DEVANAGARI DANDA..DEVANAGARI DOUBLE DANDA
|
||||
104A..104B ; Sentence_Terminal # Po [2] MYANMAR SIGN LITTLE SECTION..MYANMAR SIGN SECTION
|
||||
1362 ; Sentence_Terminal # Po ETHIOPIC FULL STOP
|
||||
1367..1368 ; Sentence_Terminal # Po [2] ETHIOPIC QUESTION MARK..ETHIOPIC PARAGRAPH SEPARATOR
|
||||
166E ; Sentence_Terminal # Po CANADIAN SYLLABICS FULL STOP
|
||||
1735..1736 ; Sentence_Terminal # Po [2] PHILIPPINE SINGLE PUNCTUATION..PHILIPPINE DOUBLE PUNCTUATION
|
||||
1803 ; Sentence_Terminal # Po MONGOLIAN FULL STOP
|
||||
1809 ; Sentence_Terminal # Po MONGOLIAN MANCHU FULL STOP
|
||||
1944..1945 ; Sentence_Terminal # Po [2] LIMBU EXCLAMATION MARK..LIMBU QUESTION MARK
|
||||
1AA8..1AAB ; Sentence_Terminal # Po [4] TAI THAM SIGN KAAN..TAI THAM SIGN SATKAANKUU
|
||||
1B5A..1B5B ; Sentence_Terminal # Po [2] BALINESE PANTI..BALINESE PAMADA
|
||||
1B5E..1B5F ; Sentence_Terminal # Po [2] BALINESE CARIK SIKI..BALINESE CARIK PAREREN
|
||||
1C3B..1C3C ; Sentence_Terminal # Po [2] LEPCHA PUNCTUATION TA-ROL..LEPCHA PUNCTUATION NYET THYOOM TA-ROL
|
||||
1C7E..1C7F ; Sentence_Terminal # Po [2] OL CHIKI PUNCTUATION MUCAAD..OL CHIKI PUNCTUATION DOUBLE MUCAAD
|
||||
203C..203D ; Sentence_Terminal # Po [2] DOUBLE EXCLAMATION MARK..INTERROBANG
|
||||
2047..2049 ; Sentence_Terminal # Po [3] DOUBLE QUESTION MARK..EXCLAMATION QUESTION MARK
|
||||
2E2E ; Sentence_Terminal # Po REVERSED QUESTION MARK
|
||||
2E3C ; Sentence_Terminal # Po STENOGRAPHIC FULL STOP
|
||||
3002 ; Sentence_Terminal # Po IDEOGRAPHIC FULL STOP
|
||||
A4FF ; Sentence_Terminal # Po LISU PUNCTUATION FULL STOP
|
||||
A60E..A60F ; Sentence_Terminal # Po [2] VAI FULL STOP..VAI QUESTION MARK
|
||||
A6F3 ; Sentence_Terminal # Po BAMUM FULL STOP
|
||||
A6F7 ; Sentence_Terminal # Po BAMUM QUESTION MARK
|
||||
A876..A877 ; Sentence_Terminal # Po [2] PHAGS-PA MARK SHAD..PHAGS-PA MARK DOUBLE SHAD
|
||||
A8CE..A8CF ; Sentence_Terminal # Po [2] SAURASHTRA DANDA..SAURASHTRA DOUBLE DANDA
|
||||
A92F ; Sentence_Terminal # Po KAYAH LI SIGN SHYA
|
||||
A9C8..A9C9 ; Sentence_Terminal # Po [2] JAVANESE PADA LINGSA..JAVANESE PADA LUNGSI
|
||||
AA5D..AA5F ; Sentence_Terminal # Po [3] CHAM PUNCTUATION DANDA..CHAM PUNCTUATION TRIPLE DANDA
|
||||
AAF0..AAF1 ; Sentence_Terminal # Po [2] MEETEI MAYEK CHEIKHAN..MEETEI MAYEK AHANG KHUDAM
|
||||
ABEB ; Sentence_Terminal # Po MEETEI MAYEK CHEIKHEI
|
||||
FE52 ; Sentence_Terminal # Po SMALL FULL STOP
|
||||
FE56..FE57 ; Sentence_Terminal # Po [2] SMALL QUESTION MARK..SMALL EXCLAMATION MARK
|
||||
FF01 ; Sentence_Terminal # Po FULLWIDTH EXCLAMATION MARK
|
||||
FF0E ; Sentence_Terminal # Po FULLWIDTH FULL STOP
|
||||
FF1F ; Sentence_Terminal # Po FULLWIDTH QUESTION MARK
|
||||
FF61 ; Sentence_Terminal # Po HALFWIDTH IDEOGRAPHIC FULL STOP
|
||||
10A56..10A57 ; Sentence_Terminal # Po [2] KHAROSHTHI PUNCTUATION DANDA..KHAROSHTHI PUNCTUATION DOUBLE DANDA
|
||||
11047..11048 ; Sentence_Terminal # Po [2] BRAHMI DANDA..BRAHMI DOUBLE DANDA
|
||||
110BE..110C1 ; Sentence_Terminal # Po [4] KAITHI SECTION MARK..KAITHI DOUBLE DANDA
|
||||
11141..11143 ; Sentence_Terminal # Po [3] CHAKMA DANDA..CHAKMA QUESTION MARK
|
||||
111C5..111C6 ; Sentence_Terminal # Po [2] SHARADA DANDA..SHARADA DOUBLE DANDA
|
||||
111CD ; Sentence_Terminal # Po SHARADA SUTRA MARK
|
||||
111DE..111DF ; Sentence_Terminal # Po [2] SHARADA SECTION MARK-1..SHARADA SECTION MARK-2
|
||||
11238..11239 ; Sentence_Terminal # Po [2] KHOJKI DANDA..KHOJKI DOUBLE DANDA
|
||||
1123B..1123C ; Sentence_Terminal # Po [2] KHOJKI SECTION MARK..KHOJKI DOUBLE SECTION MARK
|
||||
112A9 ; Sentence_Terminal # Po MULTANI SECTION MARK
|
||||
1144B..1144C ; Sentence_Terminal # Po [2] NEWA DANDA..NEWA DOUBLE DANDA
|
||||
115C2..115C3 ; Sentence_Terminal # Po [2] SIDDHAM DANDA..SIDDHAM DOUBLE DANDA
|
||||
115C9..115D7 ; Sentence_Terminal # Po [15] SIDDHAM END OF TEXT MARK..SIDDHAM SECTION MARK WITH CIRCLES AND FOUR ENCLOSURES
|
||||
11641..11642 ; Sentence_Terminal # Po [2] MODI DANDA..MODI DOUBLE DANDA
|
||||
1173C..1173E ; Sentence_Terminal # Po [3] AHOM SIGN SMALL SECTION..AHOM SIGN RULAI
|
||||
11A42..11A43 ; Sentence_Terminal # Po [2] ZANABAZAR SQUARE MARK SHAD..ZANABAZAR SQUARE MARK DOUBLE SHAD
|
||||
11A9B..11A9C ; Sentence_Terminal # Po [2] SOYOMBO MARK SHAD..SOYOMBO MARK DOUBLE SHAD
|
||||
11C41..11C42 ; Sentence_Terminal # Po [2] BHAIKSUKI DANDA..BHAIKSUKI DOUBLE DANDA
|
||||
16A6E..16A6F ; Sentence_Terminal # Po [2] MRO DANDA..MRO DOUBLE DANDA
|
||||
16AF5 ; Sentence_Terminal # Po BASSA VAH FULL STOP
|
||||
16B37..16B38 ; Sentence_Terminal # Po [2] PAHAWH HMONG SIGN VOS THOM..PAHAWH HMONG SIGN VOS TSHAB CEEB
|
||||
16B44 ; Sentence_Terminal # Po PAHAWH HMONG SIGN XAUS
|
||||
1BC9F ; Sentence_Terminal # Po DUPLOYAN PUNCTUATION CHINOOK FULL STOP
|
||||
1DA88 ; Sentence_Terminal # Po SIGNWRITING FULL STOP
|
||||
|
||||
# Total code points: 120
|
||||
# Total code points: 128
|
||||
|
||||
# ================================================
|
||||
|
||||
@ -1359,9 +1437,7 @@ E0100..E01EF ; Variation_Selector # Mn [240] VARIATION SELECTOR-17..VARIATION S
|
||||
239B..23B3 ; Pattern_Syntax # Sm [25] LEFT PARENTHESIS UPPER HOOK..SUMMATION BOTTOM
|
||||
23B4..23DB ; Pattern_Syntax # So [40] TOP SQUARE BRACKET..FUSE
|
||||
23DC..23E1 ; Pattern_Syntax # Sm [6] TOP PARENTHESIS..BOTTOM TORTOISE SHELL BRACKET
|
||||
23E2..23FA ; Pattern_Syntax # So [25] WHITE TRAPEZIUM..BLACK CIRCLE FOR RECORD
|
||||
23FB..23FF ; Pattern_Syntax # Cn [5] <reserved-23FB>..<reserved-23FF>
|
||||
2400..2426 ; Pattern_Syntax # So [39] SYMBOL FOR NULL..SYMBOL FOR SUBSTITUTE FORM TWO
|
||||
23E2..2426 ; Pattern_Syntax # So [69] WHITE TRAPEZIUM..SYMBOL FOR SUBSTITUTE FORM TWO
|
||||
2427..243F ; Pattern_Syntax # Cn [25] <reserved-2427>..<reserved-243F>
|
||||
2440..244A ; Pattern_Syntax # So [11] OCR HOOK..OCR DOUBLE BACKSLASH
|
||||
244B..245F ; Pattern_Syntax # Cn [21] <reserved-244B>..<reserved-245F>
|
||||
@ -1449,8 +1525,8 @@ E0100..E01EF ; Variation_Selector # Mn [240] VARIATION SELECTOR-17..VARIATION S
|
||||
2BBA..2BBC ; Pattern_Syntax # Cn [3] <reserved-2BBA>..<reserved-2BBC>
|
||||
2BBD..2BC8 ; Pattern_Syntax # So [12] BALLOT BOX WITH LIGHT X..BLACK MEDIUM RIGHT-POINTING TRIANGLE CENTRED
|
||||
2BC9 ; Pattern_Syntax # Cn <reserved-2BC9>
|
||||
2BCA..2BD1 ; Pattern_Syntax # So [8] TOP HALF BLACK CIRCLE..UNCERTAINTY SIGN
|
||||
2BD2..2BEB ; Pattern_Syntax # Cn [26] <reserved-2BD2>..<reserved-2BEB>
|
||||
2BCA..2BD2 ; Pattern_Syntax # So [9] TOP HALF BLACK CIRCLE..GROUP MARK
|
||||
2BD3..2BEB ; Pattern_Syntax # Cn [25] <reserved-2BD3>..<reserved-2BEB>
|
||||
2BEC..2BEF ; Pattern_Syntax # So [4] LEFTWARDS TWO-HEADED ARROW WITH TRIANGLE ARROWHEADS..DOWNWARDS TWO-HEADED ARROW WITH TRIANGLE ARROWHEADS
|
||||
2BF0..2BFF ; Pattern_Syntax # Cn [16] <reserved-2BF0>..<reserved-2BFF>
|
||||
2E00..2E01 ; Pattern_Syntax # Po [2] RIGHT ANGLE SUBSTITUTION MARKER..RIGHT ANGLE DOTTED SUBSTITUTION MARKER
|
||||
@ -1490,7 +1566,8 @@ E0100..E01EF ; Variation_Selector # Mn [240] VARIATION SELECTOR-17..VARIATION S
|
||||
2E40 ; Pattern_Syntax # Pd DOUBLE HYPHEN
|
||||
2E41 ; Pattern_Syntax # Po REVERSED COMMA
|
||||
2E42 ; Pattern_Syntax # Ps DOUBLE LOW-REVERSED-9 QUOTATION MARK
|
||||
2E43..2E7F ; Pattern_Syntax # Cn [61] <reserved-2E43>..<reserved-2E7F>
|
||||
2E43..2E49 ; Pattern_Syntax # Po [7] DASH WITH LEFT UPTURN..DOUBLE STACKED COMMA
|
||||
2E4A..2E7F ; Pattern_Syntax # Cn [54] <reserved-2E4A>..<reserved-2E7F>
|
||||
3001..3003 ; Pattern_Syntax # Po [3] IDEOGRAPHIC COMMA..DITTO MARK
|
||||
3008 ; Pattern_Syntax # Ps LEFT ANGLE BRACKET
|
||||
3009 ; Pattern_Syntax # Pe RIGHT ANGLE BRACKET
|
||||
@ -1522,4 +1599,20 @@ FE45..FE46 ; Pattern_Syntax # Po [2] SESAME DOT..WHITE SESAME DOT
|
||||
|
||||
# Total code points: 2760
|
||||
|
||||
# ================================================
|
||||
|
||||
0600..0605 ; Prepended_Concatenation_Mark # Cf [6] ARABIC NUMBER SIGN..ARABIC NUMBER MARK ABOVE
|
||||
06DD ; Prepended_Concatenation_Mark # Cf ARABIC END OF AYAH
|
||||
070F ; Prepended_Concatenation_Mark # Cf SYRIAC ABBREVIATION MARK
|
||||
08E2 ; Prepended_Concatenation_Mark # Cf ARABIC DISPUTED END OF AYAH
|
||||
110BD ; Prepended_Concatenation_Mark # Cf KAITHI NUMBER SIGN
|
||||
|
||||
# Total code points: 10
|
||||
|
||||
# ================================================
|
||||
|
||||
1F1E6..1F1FF ; Regional_Indicator # So [26] REGIONAL INDICATOR SYMBOL LETTER A..REGIONAL INDICATOR SYMBOL LETTER Z
|
||||
|
||||
# Total code points: 26
|
||||
|
||||
# EOF
|
||||
|
@ -1,10 +1,11 @@
|
||||
# Scripts-8.0.0.txt
|
||||
# Date: 2015-03-11, 22:29:42 GMT [MD]
|
||||
# Scripts-10.0.0.txt
|
||||
# Date: 2017-03-11, 06:40:37 GMT
|
||||
# Copyright (c) 2017 Unicode, Inc.
|
||||
# Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries.
|
||||
# For terms of use, see http://www.unicode.org/terms_of_use.html
|
||||
#
|
||||
# Unicode Character Database
|
||||
# Copyright (c) 1991-2015 Unicode, Inc.
|
||||
# For terms of use, see http://www.unicode.org/terms_of_use.html
|
||||
# For documentation, see http://www.unicode.org/reports/tr44/
|
||||
# For documentation, see http://www.unicode.org/reports/tr44/
|
||||
# For more information, see:
|
||||
# UAX #24, Unicode Script Property: http://www.unicode.org/reports/tr24/
|
||||
# Especially the sections:
|
||||
@ -92,10 +93,10 @@
|
||||
0605 ; Common # Cf ARABIC NUMBER MARK ABOVE
|
||||
060C ; Common # Po ARABIC COMMA
|
||||
061B ; Common # Po ARABIC SEMICOLON
|
||||
061C ; Common # Cf ARABIC LETTER MARK
|
||||
061F ; Common # Po ARABIC QUESTION MARK
|
||||
0640 ; Common # Lm ARABIC TATWEEL
|
||||
06DD ; Common # Cf ARABIC END OF AYAH
|
||||
08E2 ; Common # Cf ARABIC DISPUTED END OF AYAH
|
||||
0964..0965 ; Common # Po [2] DEVANAGARI DANDA..DEVANAGARI DOUBLE DANDA
|
||||
0E3F ; Common # Sc THAI CURRENCY SYMBOL BAHT
|
||||
0FD5..0FD8 ; Common # So [4] RIGHT-FACING SVASTI SIGN..LEFT-FACING SVASTI SIGN WITH DOTS
|
||||
@ -110,6 +111,7 @@
|
||||
1CEE..1CF1 ; Common # Lo [4] VEDIC SIGN HEXIFORM LONG ANUSVARA..VEDIC SIGN ANUSVARA UBHAYATO MUKHA
|
||||
1CF2..1CF3 ; Common # Mc [2] VEDIC SIGN ARDHAVISARGA..VEDIC SIGN ROTATED ARDHAVISARGA
|
||||
1CF5..1CF6 ; Common # Lo [2] VEDIC SIGN JIHVAMULIYA..VEDIC SIGN UPADHMANIYA
|
||||
1CF7 ; Common # Mc VEDIC SIGN ATIKRAMA
|
||||
2000..200A ; Common # Zs [11] EN QUAD..HAIR SPACE
|
||||
200B ; Common # Cf ZERO WIDTH SPACE
|
||||
200E..200F ; Common # Cf [2] LEFT-TO-RIGHT MARK..RIGHT-TO-LEFT MARK
|
||||
@ -153,7 +155,7 @@
|
||||
208A..208C ; Common # Sm [3] SUBSCRIPT PLUS SIGN..SUBSCRIPT EQUALS SIGN
|
||||
208D ; Common # Ps SUBSCRIPT LEFT PARENTHESIS
|
||||
208E ; Common # Pe SUBSCRIPT RIGHT PARENTHESIS
|
||||
20A0..20BE ; Common # Sc [31] EURO-CURRENCY SIGN..LARI SIGN
|
||||
20A0..20BF ; Common # Sc [32] EURO-CURRENCY SIGN..BITCOIN SIGN
|
||||
2100..2101 ; Common # So [2] ACCOUNT OF..ADDRESSED TO THE SUBJECT
|
||||
2102 ; Common # L& DOUBLE-STRUCK CAPITAL C
|
||||
2103..2106 ; Common # So [4] DEGREE CELSIUS..CADA UNA
|
||||
@ -223,8 +225,7 @@
|
||||
239B..23B3 ; Common # Sm [25] LEFT PARENTHESIS UPPER HOOK..SUMMATION BOTTOM
|
||||
23B4..23DB ; Common # So [40] TOP SQUARE BRACKET..FUSE
|
||||
23DC..23E1 ; Common # Sm [6] TOP PARENTHESIS..BOTTOM TORTOISE SHELL BRACKET
|
||||
23E2..23FA ; Common # So [25] WHITE TRAPEZIUM..BLACK CIRCLE FOR RECORD
|
||||
2400..2426 ; Common # So [39] SYMBOL FOR NULL..SYMBOL FOR SUBSTITUTE FORM TWO
|
||||
23E2..2426 ; Common # So [69] WHITE TRAPEZIUM..SYMBOL FOR SUBSTITUTE FORM TWO
|
||||
2440..244A ; Common # So [11] OCR HOOK..OCR DOUBLE BACKSLASH
|
||||
2460..249B ; Common # No [60] CIRCLED DIGIT ONE..NUMBER TWENTY FULL STOP
|
||||
249C..24E9 ; Common # So [78] PARENTHESIZED LATIN SMALL LETTER A..CIRCLED LATIN SMALL LETTER Z
|
||||
@ -309,7 +310,7 @@
|
||||
2B76..2B95 ; Common # So [32] NORTH WEST TRIANGLE-HEADED ARROW TO BAR..RIGHTWARDS BLACK ARROW
|
||||
2B98..2BB9 ; Common # So [34] THREE-D TOP-LIGHTED LEFTWARDS EQUILATERAL ARROWHEAD..UP ARROWHEAD IN A RECTANGLE BOX
|
||||
2BBD..2BC8 ; Common # So [12] BALLOT BOX WITH LIGHT X..BLACK MEDIUM RIGHT-POINTING TRIANGLE CENTRED
|
||||
2BCA..2BD1 ; Common # So [8] TOP HALF BLACK CIRCLE..UNCERTAINTY SIGN
|
||||
2BCA..2BD2 ; Common # So [9] TOP HALF BLACK CIRCLE..GROUP MARK
|
||||
2BEC..2BEF ; Common # So [4] LEFTWARDS TWO-HEADED ARROW WITH TRIANGLE ARROWHEADS..DOWNWARDS TWO-HEADED ARROW WITH TRIANGLE ARROWHEADS
|
||||
2E00..2E01 ; Common # Po [2] RIGHT ANGLE SUBSTITUTION MARKER..RIGHT ANGLE DOTTED SUBSTITUTION MARKER
|
||||
2E02 ; Common # Pi LEFT SUBSTITUTION BRACKET
|
||||
@ -348,6 +349,7 @@
|
||||
2E40 ; Common # Pd DOUBLE HYPHEN
|
||||
2E41 ; Common # Po REVERSED COMMA
|
||||
2E42 ; Common # Ps DOUBLE LOW-REVERSED-9 QUOTATION MARK
|
||||
2E43..2E49 ; Common # Po [7] DASH WITH LEFT UPTURN..DOUBLE STACKED COMMA
|
||||
2FF0..2FFB ; Common # So [12] IDEOGRAPHIC DESCRIPTION CHARACTER LEFT TO RIGHT..IDEOGRAPHIC DESCRIPTION CHARACTER OVERLAID
|
||||
3000 ; Common # Zs IDEOGRAPHIC SPACE
|
||||
3001..3003 ; Common # Po [3] IDEOGRAPHIC COMMA..DITTO MARK
|
||||
@ -572,19 +574,18 @@ FFFC..FFFD ; Common # So [2] OBJECT REPLACEMENT CHARACTER..REPLACEMENT CHAR
|
||||
1F100..1F10C ; Common # No [13] DIGIT ZERO FULL STOP..DINGBAT NEGATIVE CIRCLED SANS-SERIF DIGIT ZERO
|
||||
1F110..1F12E ; Common # So [31] PARENTHESIZED LATIN CAPITAL LETTER A..CIRCLED WZ
|
||||
1F130..1F16B ; Common # So [60] SQUARED LATIN CAPITAL LETTER A..RAISED MD SIGN
|
||||
1F170..1F19A ; Common # So [43] NEGATIVE SQUARED LATIN CAPITAL LETTER A..SQUARED VS
|
||||
1F170..1F1AC ; Common # So [61] NEGATIVE SQUARED LATIN CAPITAL LETTER A..SQUARED VOD
|
||||
1F1E6..1F1FF ; Common # So [26] REGIONAL INDICATOR SYMBOL LETTER A..REGIONAL INDICATOR SYMBOL LETTER Z
|
||||
1F201..1F202 ; Common # So [2] SQUARED KATAKANA KOKO..SQUARED KATAKANA SA
|
||||
1F210..1F23A ; Common # So [43] SQUARED CJK UNIFIED IDEOGRAPH-624B..SQUARED CJK UNIFIED IDEOGRAPH-55B6
|
||||
1F210..1F23B ; Common # So [44] SQUARED CJK UNIFIED IDEOGRAPH-624B..SQUARED CJK UNIFIED IDEOGRAPH-914D
|
||||
1F240..1F248 ; Common # So [9] TORTOISE SHELL BRACKETED CJK UNIFIED IDEOGRAPH-672C..TORTOISE SHELL BRACKETED CJK UNIFIED IDEOGRAPH-6557
|
||||
1F250..1F251 ; Common # So [2] CIRCLED IDEOGRAPH ADVANTAGE..CIRCLED IDEOGRAPH ACCEPT
|
||||
1F260..1F265 ; Common # So [6] ROUNDED SYMBOL FOR FU..ROUNDED SYMBOL FOR CAI
|
||||
1F300..1F3FA ; Common # So [251] CYCLONE..AMPHORA
|
||||
1F3FB..1F3FF ; Common # Sk [5] EMOJI MODIFIER FITZPATRICK TYPE-1-2..EMOJI MODIFIER FITZPATRICK TYPE-6
|
||||
1F400..1F579 ; Common # So [378] RAT..JOYSTICK
|
||||
1F57B..1F5A3 ; Common # So [41] LEFT HAND TELEPHONE RECEIVER..BLACK DOWN POINTING BACKHAND INDEX
|
||||
1F5A5..1F6D0 ; Common # So [300] DESKTOP COMPUTER..PLACE OF WORSHIP
|
||||
1F400..1F6D4 ; Common # So [725] RAT..PAGODA
|
||||
1F6E0..1F6EC ; Common # So [13] HAMMER AND WRENCH..AIRPLANE ARRIVING
|
||||
1F6F0..1F6F3 ; Common # So [4] SATELLITE..PASSENGER SHIP
|
||||
1F6F0..1F6F8 ; Common # So [9] SATELLITE..FLYING SAUCER
|
||||
1F700..1F773 ; Common # So [116] ALCHEMICAL SYMBOL FOR QUINTESSENCE..ALCHEMICAL SYMBOL FOR HALF OUNCE
|
||||
1F780..1F7D4 ; Common # So [85] BLACK LEFT-POINTING ISOSCELES RIGHT TRIANGLE..HEAVY TWELVE POINTED PINWHEEL STAR
|
||||
1F800..1F80B ; Common # So [12] LEFTWARDS ARROW WITH SMALL TRIANGLE ARROWHEAD..DOWNWARDS ARROW WITH LARGE TRIANGLE ARROWHEAD
|
||||
@ -592,13 +593,17 @@ FFFC..FFFD ; Common # So [2] OBJECT REPLACEMENT CHARACTER..REPLACEMENT CHAR
|
||||
1F850..1F859 ; Common # So [10] LEFTWARDS SANS-SERIF ARROW..UP DOWN SANS-SERIF ARROW
|
||||
1F860..1F887 ; Common # So [40] WIDE-HEADED LEFTWARDS LIGHT BARB ARROW..WIDE-HEADED SOUTH WEST VERY HEAVY BARB ARROW
|
||||
1F890..1F8AD ; Common # So [30] LEFTWARDS TRIANGLE ARROWHEAD..WHITE ARROW SHAFT WIDTH TWO THIRDS
|
||||
1F910..1F918 ; Common # So [9] ZIPPER-MOUTH FACE..SIGN OF THE HORNS
|
||||
1F980..1F984 ; Common # So [5] CRAB..UNICORN FACE
|
||||
1F900..1F90B ; Common # So [12] CIRCLED CROSS FORMEE WITH FOUR DOTS..DOWNWARD FACING NOTCHED HOOK WITH DOT
|
||||
1F910..1F93E ; Common # So [47] ZIPPER-MOUTH FACE..HANDBALL
|
||||
1F940..1F94C ; Common # So [13] WILTED FLOWER..CURLING STONE
|
||||
1F950..1F96B ; Common # So [28] CROISSANT..CANNED FOOD
|
||||
1F980..1F997 ; Common # So [24] CRAB..CRICKET
|
||||
1F9C0 ; Common # So CHEESE WEDGE
|
||||
1F9D0..1F9E6 ; Common # So [23] FACE WITH MONOCLE..SOCKS
|
||||
E0001 ; Common # Cf LANGUAGE TAG
|
||||
E0020..E007F ; Common # Cf [96] TAG SPACE..CANCEL TAG
|
||||
|
||||
# Total code points: 7179
|
||||
# Total code points: 7363
|
||||
|
||||
# ================================================
|
||||
|
||||
@ -641,7 +646,7 @@ A770 ; Latin # Lm MODIFIER LETTER US
|
||||
A771..A787 ; Latin # L& [23] LATIN SMALL LETTER DUM..LATIN SMALL LETTER INSULAR T
|
||||
A78B..A78E ; Latin # L& [4] LATIN CAPITAL LETTER SALTILLO..LATIN SMALL LETTER L WITH RETROFLEX HOOK AND BELT
|
||||
A78F ; Latin # Lo LATIN LETTER SINOLOGICAL DOT
|
||||
A790..A7AD ; Latin # L& [30] LATIN CAPITAL LETTER N WITH DESCENDER..LATIN CAPITAL LETTER L WITH BELT
|
||||
A790..A7AE ; Latin # L& [31] LATIN CAPITAL LETTER N WITH DESCENDER..LATIN CAPITAL LETTER SMALL CAPITAL I
|
||||
A7B0..A7B7 ; Latin # L& [8] LATIN CAPITAL LETTER TURNED K..LATIN SMALL LETTER OMEGA
|
||||
A7F7 ; Latin # Lo LATIN EPIGRAPHIC LETTER SIDEWAYS I
|
||||
A7F8..A7F9 ; Latin # Lm [2] MODIFIER LETTER CAPITAL H WITH STROKE..MODIFIER LETTER SMALL LIGATURE OE
|
||||
@ -654,7 +659,7 @@ FB00..FB06 ; Latin # L& [7] LATIN SMALL LIGATURE FF..LATIN SMALL LIGATURE S
|
||||
FF21..FF3A ; Latin # L& [26] FULLWIDTH LATIN CAPITAL LETTER A..FULLWIDTH LATIN CAPITAL LETTER Z
|
||||
FF41..FF5A ; Latin # L& [26] FULLWIDTH LATIN SMALL LETTER A..FULLWIDTH LATIN SMALL LETTER Z
|
||||
|
||||
# Total code points: 1349
|
||||
# Total code points: 1350
|
||||
|
||||
# ================================================
|
||||
|
||||
@ -708,13 +713,13 @@ AB65 ; Greek # L& GREEK LETTER SMALL CAPITAL OMEGA
|
||||
10175..10178 ; Greek # No [4] GREEK ONE HALF SIGN..GREEK THREE QUARTERS SIGN
|
||||
10179..10189 ; Greek # So [17] GREEK YEAR SIGN..GREEK TRYBLION BASE SIGN
|
||||
1018A..1018B ; Greek # No [2] GREEK ZERO SIGN..GREEK ONE QUARTER SIGN
|
||||
1018C ; Greek # So GREEK SINUSOID SIGN
|
||||
1018C..1018E ; Greek # So [3] GREEK SINUSOID SIGN..NOMISMA SIGN
|
||||
101A0 ; Greek # So GREEK SYMBOL TAU RHO
|
||||
1D200..1D241 ; Greek # So [66] GREEK VOCAL NOTATION SYMBOL-1..GREEK INSTRUMENTAL NOTATION SYMBOL-54
|
||||
1D242..1D244 ; Greek # Mn [3] COMBINING GREEK MUSICAL TRISEME..COMBINING GREEK MUSICAL PENTASEME
|
||||
1D245 ; Greek # So GREEK MUSICAL LEIMMA
|
||||
|
||||
# Total code points: 516
|
||||
# Total code points: 518
|
||||
|
||||
# ================================================
|
||||
|
||||
@ -724,6 +729,7 @@ AB65 ; Greek # L& GREEK LETTER SMALL CAPITAL OMEGA
|
||||
0487 ; Cyrillic # Mn COMBINING CYRILLIC POKRYTIE
|
||||
0488..0489 ; Cyrillic # Me [2] COMBINING CYRILLIC HUNDRED THOUSANDS SIGN..COMBINING CYRILLIC MILLIONS SIGN
|
||||
048A..052F ; Cyrillic # L& [166] CYRILLIC CAPITAL LETTER SHORT I WITH TAIL..CYRILLIC SMALL LETTER EL WITH DESCENDER
|
||||
1C80..1C88 ; Cyrillic # L& [9] CYRILLIC SMALL LETTER ROUNDED VE..CYRILLIC SMALL LETTER UNBLENDED UK
|
||||
1D2B ; Cyrillic # L& CYRILLIC LETTER SMALL CAPITAL EL
|
||||
1D78 ; Cyrillic # Lm MODIFIER LETTER CYRILLIC EN
|
||||
2DE0..2DFF ; Cyrillic # Mn [32] COMBINING CYRILLIC LETTER BE..COMBINING CYRILLIC LETTER IOTIFIED BIG YUS
|
||||
@ -740,7 +746,7 @@ A69C..A69D ; Cyrillic # Lm [2] MODIFIER LETTER CYRILLIC HARD SIGN..MODIFIER
|
||||
A69E..A69F ; Cyrillic # Mn [2] COMBINING CYRILLIC LETTER EF..COMBINING CYRILLIC LETTER IOTIFIED E
|
||||
FE2E..FE2F ; Cyrillic # Mn [2] COMBINING CYRILLIC TITLO LEFT HALF..COMBINING CYRILLIC TITLO RIGHT HALF
|
||||
|
||||
# Total code points: 434
|
||||
# Total code points: 443
|
||||
|
||||
# ================================================
|
||||
|
||||
@ -791,6 +797,7 @@ FB46..FB4F ; Hebrew # Lo [10] HEBREW LETTER TSADI WITH DAGESH..HEBREW LIGATU
|
||||
060D ; Arabic # Po ARABIC DATE SEPARATOR
|
||||
060E..060F ; Arabic # So [2] ARABIC POETIC VERSE SIGN..ARABIC SIGN MISRA
|
||||
0610..061A ; Arabic # Mn [11] ARABIC SIGN SALLALLAHOU ALAYHE WASSALLAM..ARABIC SMALL KASRA
|
||||
061C ; Arabic # Cf ARABIC LETTER MARK
|
||||
061E ; Arabic # Po ARABIC TRIPLE DOT PUNCTUATION MARK
|
||||
0620..063F ; Arabic # Lo [32] ARABIC LETTER KASHMIRI YEH..ARABIC LETTER FARSI YEH WITH THREE DOTS ABOVE
|
||||
0641..064A ; Arabic # Lo [10] ARABIC LETTER FEH..ARABIC LETTER YEH
|
||||
@ -815,6 +822,8 @@ FB46..FB4F ; Hebrew # Lo [10] HEBREW LETTER TSADI WITH DAGESH..HEBREW LIGATU
|
||||
06FF ; Arabic # Lo ARABIC LETTER HEH WITH INVERTED V
|
||||
0750..077F ; Arabic # Lo [48] ARABIC LETTER BEH WITH THREE DOTS HORIZONTALLY BELOW..ARABIC LETTER KAF WITH TWO DOTS ABOVE
|
||||
08A0..08B4 ; Arabic # Lo [21] ARABIC LETTER BEH WITH SMALL V BELOW..ARABIC LETTER KAF WITH DOT BELOW
|
||||
08B6..08BD ; Arabic # Lo [8] ARABIC LETTER BEH WITH SMALL MEEM ABOVE..ARABIC LETTER AFRICAN NOON
|
||||
08D4..08E1 ; Arabic # Mn [14] ARABIC SMALL HIGH WORD AR-RUB..ARABIC SMALL HIGH SIGN SAFHA
|
||||
08E3..08FF ; Arabic # Mn [29] ARABIC TURNED DAMMA BELOW..ARABIC MARK SIDEWAYS NOON GHUNNA
|
||||
FB50..FBB1 ; Arabic # Lo [98] ARABIC LETTER ALEF WASLA ISOLATED FORM..ARABIC LETTER YEH BARREE WITH HAMZA ABOVE FINAL FORM
|
||||
FBB2..FBC1 ; Arabic # Sk [16] ARABIC SYMBOL DOT ABOVE..ARABIC SYMBOL SMALL TAH BELOW
|
||||
@ -862,7 +871,7 @@ FE76..FEFC ; Arabic # Lo [135] ARABIC FATHA ISOLATED FORM..ARABIC LIGATURE LA
|
||||
1EEAB..1EEBB ; Arabic # Lo [17] ARABIC MATHEMATICAL DOUBLE-STRUCK LAM..ARABIC MATHEMATICAL DOUBLE-STRUCK GHAIN
|
||||
1EEF0..1EEF1 ; Arabic # Sm [2] ARABIC MATHEMATICAL OPERATOR MEEM WITH HAH WITH TATWEEL..ARABIC MATHEMATICAL OPERATOR HAH WITH DAL
|
||||
|
||||
# Total code points: 1257
|
||||
# Total code points: 1280
|
||||
|
||||
# ================================================
|
||||
|
||||
@ -873,8 +882,9 @@ FE76..FEFC ; Arabic # Lo [135] ARABIC FATHA ISOLATED FORM..ARABIC LIGATURE LA
|
||||
0712..072F ; Syriac # Lo [30] SYRIAC LETTER BETH..SYRIAC LETTER PERSIAN DHALATH
|
||||
0730..074A ; Syriac # Mn [27] SYRIAC PTHAHA ABOVE..SYRIAC BARREKH
|
||||
074D..074F ; Syriac # Lo [3] SYRIAC LETTER SOGDIAN ZHAIN..SYRIAC LETTER SOGDIAN FE
|
||||
0860..086A ; Syriac # Lo [11] SYRIAC LETTER MALAYALAM NGA..SYRIAC LETTER MALAYALAM SSA
|
||||
|
||||
# Total code points: 77
|
||||
# Total code points: 88
|
||||
|
||||
# ================================================
|
||||
|
||||
@ -944,8 +954,10 @@ A8FD ; Devanagari # Lo DEVANAGARI JAIN OM
|
||||
09F4..09F9 ; Bengali # No [6] BENGALI CURRENCY NUMERATOR ONE..BENGALI CURRENCY DENOMINATOR SIXTEEN
|
||||
09FA ; Bengali # So BENGALI ISSHAR
|
||||
09FB ; Bengali # Sc BENGALI GANDA MARK
|
||||
09FC ; Bengali # Lo BENGALI LETTER VEDIC ANUSVARA
|
||||
09FD ; Bengali # Po BENGALI ABBREVIATION SIGN
|
||||
|
||||
# Total code points: 93
|
||||
# Total code points: 95
|
||||
|
||||
# ================================================
|
||||
|
||||
@ -998,8 +1010,9 @@ A8FD ; Devanagari # Lo DEVANAGARI JAIN OM
|
||||
0AF0 ; Gujarati # Po GUJARATI ABBREVIATION SIGN
|
||||
0AF1 ; Gujarati # Sc GUJARATI RUPEE SIGN
|
||||
0AF9 ; Gujarati # Lo GUJARATI LETTER ZHA
|
||||
0AFA..0AFF ; Gujarati # Mn [6] GUJARATI SIGN SUKUN..GUJARATI SIGN TWO-CIRCLE NUKTA ABOVE
|
||||
|
||||
# Total code points: 85
|
||||
# Total code points: 91
|
||||
|
||||
# ================================================
|
||||
|
||||
@ -1086,6 +1099,7 @@ A8FD ; Devanagari # Lo DEVANAGARI JAIN OM
|
||||
|
||||
# ================================================
|
||||
|
||||
0C80 ; Kannada # Lo KANNADA SIGN SPACING CANDRABINDU
|
||||
0C81 ; Kannada # Mn KANNADA SIGN CANDRABINDU
|
||||
0C82..0C83 ; Kannada # Mc [2] KANNADA SIGN ANUSVARA..KANNADA SIGN VISARGA
|
||||
0C85..0C8C ; Kannada # Lo [8] KANNADA LETTER A..KANNADA LETTER VOCALIC L
|
||||
@ -1109,15 +1123,16 @@ A8FD ; Devanagari # Lo DEVANAGARI JAIN OM
|
||||
0CE6..0CEF ; Kannada # Nd [10] KANNADA DIGIT ZERO..KANNADA DIGIT NINE
|
||||
0CF1..0CF2 ; Kannada # Lo [2] KANNADA SIGN JIHVAMULIYA..KANNADA SIGN UPADHMANIYA
|
||||
|
||||
# Total code points: 87
|
||||
# Total code points: 88
|
||||
|
||||
# ================================================
|
||||
|
||||
0D01 ; Malayalam # Mn MALAYALAM SIGN CANDRABINDU
|
||||
0D00..0D01 ; Malayalam # Mn [2] MALAYALAM SIGN COMBINING ANUSVARA ABOVE..MALAYALAM SIGN CANDRABINDU
|
||||
0D02..0D03 ; Malayalam # Mc [2] MALAYALAM SIGN ANUSVARA..MALAYALAM SIGN VISARGA
|
||||
0D05..0D0C ; Malayalam # Lo [8] MALAYALAM LETTER A..MALAYALAM LETTER VOCALIC L
|
||||
0D0E..0D10 ; Malayalam # Lo [3] MALAYALAM LETTER E..MALAYALAM LETTER AI
|
||||
0D12..0D3A ; Malayalam # Lo [41] MALAYALAM LETTER O..MALAYALAM LETTER TTTA
|
||||
0D3B..0D3C ; Malayalam # Mn [2] MALAYALAM SIGN VERTICAL BAR VIRAMA..MALAYALAM SIGN CIRCULAR VIRAMA
|
||||
0D3D ; Malayalam # Lo MALAYALAM SIGN AVAGRAHA
|
||||
0D3E..0D40 ; Malayalam # Mc [3] MALAYALAM VOWEL SIGN AA..MALAYALAM VOWEL SIGN II
|
||||
0D41..0D44 ; Malayalam # Mn [4] MALAYALAM VOWEL SIGN U..MALAYALAM VOWEL SIGN VOCALIC RR
|
||||
@ -1125,15 +1140,18 @@ A8FD ; Devanagari # Lo DEVANAGARI JAIN OM
|
||||
0D4A..0D4C ; Malayalam # Mc [3] MALAYALAM VOWEL SIGN O..MALAYALAM VOWEL SIGN AU
|
||||
0D4D ; Malayalam # Mn MALAYALAM SIGN VIRAMA
|
||||
0D4E ; Malayalam # Lo MALAYALAM LETTER DOT REPH
|
||||
0D4F ; Malayalam # So MALAYALAM SIGN PARA
|
||||
0D54..0D56 ; Malayalam # Lo [3] MALAYALAM LETTER CHILLU M..MALAYALAM LETTER CHILLU LLL
|
||||
0D57 ; Malayalam # Mc MALAYALAM AU LENGTH MARK
|
||||
0D58..0D5E ; Malayalam # No [7] MALAYALAM FRACTION ONE ONE-HUNDRED-AND-SIXTIETH..MALAYALAM FRACTION ONE FIFTH
|
||||
0D5F..0D61 ; Malayalam # Lo [3] MALAYALAM LETTER ARCHAIC II..MALAYALAM LETTER VOCALIC LL
|
||||
0D62..0D63 ; Malayalam # Mn [2] MALAYALAM VOWEL SIGN VOCALIC L..MALAYALAM VOWEL SIGN VOCALIC LL
|
||||
0D66..0D6F ; Malayalam # Nd [10] MALAYALAM DIGIT ZERO..MALAYALAM DIGIT NINE
|
||||
0D70..0D75 ; Malayalam # No [6] MALAYALAM NUMBER TEN..MALAYALAM FRACTION THREE QUARTERS
|
||||
0D70..0D78 ; Malayalam # No [9] MALAYALAM NUMBER TEN..MALAYALAM FRACTION THREE SIXTEENTHS
|
||||
0D79 ; Malayalam # So MALAYALAM DATE MARK
|
||||
0D7A..0D7F ; Malayalam # Lo [6] MALAYALAM LETTER CHILLU NN..MALAYALAM LETTER CHILLU K
|
||||
|
||||
# Total code points: 100
|
||||
# Total code points: 117
|
||||
|
||||
# ================================================
|
||||
|
||||
@ -1436,21 +1454,24 @@ AB70..ABBF ; Cherokee # L& [80] CHEROKEE SMALL LETTER A..CHEROKEE SMALL LETT
|
||||
1820..1842 ; Mongolian # Lo [35] MONGOLIAN LETTER A..MONGOLIAN LETTER CHI
|
||||
1843 ; Mongolian # Lm MONGOLIAN LETTER TODO LONG VOWEL SIGN
|
||||
1844..1877 ; Mongolian # Lo [52] MONGOLIAN LETTER TODO E..MONGOLIAN LETTER MANCHU ZHA
|
||||
1880..18A8 ; Mongolian # Lo [41] MONGOLIAN LETTER ALI GALI ANUSVARA ONE..MONGOLIAN LETTER MANCHU ALI GALI BHA
|
||||
1880..1884 ; Mongolian # Lo [5] MONGOLIAN LETTER ALI GALI ANUSVARA ONE..MONGOLIAN LETTER ALI GALI INVERTED UBADAMA
|
||||
1885..1886 ; Mongolian # Mn [2] MONGOLIAN LETTER ALI GALI BALUDA..MONGOLIAN LETTER ALI GALI THREE BALUDA
|
||||
1887..18A8 ; Mongolian # Lo [34] MONGOLIAN LETTER ALI GALI A..MONGOLIAN LETTER MANCHU ALI GALI BHA
|
||||
18A9 ; Mongolian # Mn MONGOLIAN LETTER ALI GALI DAGALGA
|
||||
18AA ; Mongolian # Lo MONGOLIAN LETTER MANCHU ALI GALI LHA
|
||||
11660..1166C ; Mongolian # Po [13] MONGOLIAN BIRGA WITH ORNAMENT..MONGOLIAN TURNED SWIRL BIRGA WITH DOUBLE ORNAMENT
|
||||
|
||||
# Total code points: 153
|
||||
# Total code points: 166
|
||||
|
||||
# ================================================
|
||||
|
||||
3041..3096 ; Hiragana # Lo [86] HIRAGANA LETTER SMALL A..HIRAGANA LETTER SMALL KE
|
||||
309D..309E ; Hiragana # Lm [2] HIRAGANA ITERATION MARK..HIRAGANA VOICED ITERATION MARK
|
||||
309F ; Hiragana # Lo HIRAGANA DIGRAPH YORI
|
||||
1B001 ; Hiragana # Lo HIRAGANA LETTER ARCHAIC YE
|
||||
1B001..1B11E ; Hiragana # Lo [286] HIRAGANA LETTER ARCHAIC YE..HENTAIGANA LETTER N-MU-MO-2
|
||||
1F200 ; Hiragana # So SQUARE HIRAGANA HOKA
|
||||
|
||||
# Total code points: 91
|
||||
# Total code points: 376
|
||||
|
||||
# ================================================
|
||||
|
||||
@ -1469,10 +1490,10 @@ FF71..FF9D ; Katakana # Lo [45] HALFWIDTH KATAKANA LETTER A..HALFWIDTH KATAK
|
||||
# ================================================
|
||||
|
||||
02EA..02EB ; Bopomofo # Sk [2] MODIFIER LETTER YIN DEPARTING TONE MARK..MODIFIER LETTER YANG DEPARTING TONE MARK
|
||||
3105..312D ; Bopomofo # Lo [41] BOPOMOFO LETTER B..BOPOMOFO LETTER IH
|
||||
3105..312E ; Bopomofo # Lo [42] BOPOMOFO LETTER B..BOPOMOFO LETTER O WITH DOT ABOVE
|
||||
31A0..31BA ; Bopomofo # Lo [27] BOPOMOFO LETTER BU..BOPOMOFO LETTER ZY
|
||||
|
||||
# Total code points: 70
|
||||
# Total code points: 71
|
||||
|
||||
# ================================================
|
||||
|
||||
@ -1485,16 +1506,17 @@ FF71..FF9D ; Katakana # Lo [45] HALFWIDTH KATAKANA LETTER A..HALFWIDTH KATAK
|
||||
3038..303A ; Han # Nl [3] HANGZHOU NUMERAL TEN..HANGZHOU NUMERAL THIRTY
|
||||
303B ; Han # Lm VERTICAL IDEOGRAPHIC ITERATION MARK
|
||||
3400..4DB5 ; Han # Lo [6582] CJK UNIFIED IDEOGRAPH-3400..CJK UNIFIED IDEOGRAPH-4DB5
|
||||
4E00..9FD5 ; Han # Lo [20950] CJK UNIFIED IDEOGRAPH-4E00..CJK UNIFIED IDEOGRAPH-9FD5
|
||||
4E00..9FEA ; Han # Lo [20971] CJK UNIFIED IDEOGRAPH-4E00..CJK UNIFIED IDEOGRAPH-9FEA
|
||||
F900..FA6D ; Han # Lo [366] CJK COMPATIBILITY IDEOGRAPH-F900..CJK COMPATIBILITY IDEOGRAPH-FA6D
|
||||
FA70..FAD9 ; Han # Lo [106] CJK COMPATIBILITY IDEOGRAPH-FA70..CJK COMPATIBILITY IDEOGRAPH-FAD9
|
||||
20000..2A6D6 ; Han # Lo [42711] CJK UNIFIED IDEOGRAPH-20000..CJK UNIFIED IDEOGRAPH-2A6D6
|
||||
2A700..2B734 ; Han # Lo [4149] CJK UNIFIED IDEOGRAPH-2A700..CJK UNIFIED IDEOGRAPH-2B734
|
||||
2B740..2B81D ; Han # Lo [222] CJK UNIFIED IDEOGRAPH-2B740..CJK UNIFIED IDEOGRAPH-2B81D
|
||||
2B820..2CEA1 ; Han # Lo [5762] CJK UNIFIED IDEOGRAPH-2B820..CJK UNIFIED IDEOGRAPH-2CEA1
|
||||
2CEB0..2EBE0 ; Han # Lo [7473] CJK UNIFIED IDEOGRAPH-2CEB0..CJK UNIFIED IDEOGRAPH-2EBE0
|
||||
2F800..2FA1D ; Han # Lo [542] CJK COMPATIBILITY IDEOGRAPH-2F800..CJK COMPATIBILITY IDEOGRAPH-2FA1D
|
||||
|
||||
# Total code points: 81734
|
||||
# Total code points: 89228
|
||||
|
||||
# ================================================
|
||||
|
||||
@ -1509,8 +1531,9 @@ A490..A4C6 ; Yi # So [55] YI RADICAL QOT..YI RADICAL KE
|
||||
|
||||
10300..1031F ; Old_Italic # Lo [32] OLD ITALIC LETTER A..OLD ITALIC LETTER ESS
|
||||
10320..10323 ; Old_Italic # No [4] OLD ITALIC NUMERAL ONE..OLD ITALIC NUMERAL FIFTY
|
||||
1032D..1032F ; Old_Italic # Lo [3] OLD ITALIC LETTER YE..OLD ITALIC LETTER SOUTHERN TSE
|
||||
|
||||
# Total code points: 36
|
||||
# Total code points: 39
|
||||
|
||||
# ================================================
|
||||
|
||||
@ -1542,8 +1565,8 @@ A490..A4C6 ; Yi # So [55] YI RADICAL QOT..YI RADICAL KE
|
||||
1CED ; Inherited # Mn VEDIC SIGN TIRYAK
|
||||
1CF4 ; Inherited # Mn VEDIC TONE CANDRA ABOVE
|
||||
1CF8..1CF9 ; Inherited # Mn [2] VEDIC TONE RING ABOVE..VEDIC TONE DOUBLE RING ABOVE
|
||||
1DC0..1DF5 ; Inherited # Mn [54] COMBINING DOTTED GRAVE ACCENT..COMBINING UP TACK ABOVE
|
||||
1DFC..1DFF ; Inherited # Mn [4] COMBINING DOUBLE INVERTED BREVE BELOW..COMBINING RIGHT ARROWHEAD AND DOWN ARROWHEAD BELOW
|
||||
1DC0..1DF9 ; Inherited # Mn [58] COMBINING DOTTED GRAVE ACCENT..COMBINING WIDE INVERTED BRIDGE BELOW
|
||||
1DFB..1DFF ; Inherited # Mn [5] COMBINING DELETION MARK..COMBINING RIGHT ARROWHEAD AND DOWN ARROWHEAD BELOW
|
||||
200C..200D ; Inherited # Cf [2] ZERO WIDTH NON-JOINER..ZERO WIDTH JOINER
|
||||
20D0..20DC ; Inherited # Mn [13] COMBINING LEFT HARPOON ABOVE..COMBINING FOUR DOTS ABOVE
|
||||
20DD..20E0 ; Inherited # Me [4] COMBINING ENCLOSING CIRCLE..COMBINING ENCLOSING CIRCLE BACKSLASH
|
||||
@ -1562,7 +1585,7 @@ FE20..FE2D ; Inherited # Mn [14] COMBINING LIGATURE LEFT HALF..COMBINING CON
|
||||
1D1AA..1D1AD ; Inherited # Mn [4] MUSICAL SYMBOL COMBINING DOWN BOW..MUSICAL SYMBOL COMBINING SNAP PIZZICATO
|
||||
E0100..E01EF ; Inherited # Mn [240] VARIATION SELECTOR-17..VARIATION SELECTOR-256
|
||||
|
||||
# Total code points: 563
|
||||
# Total code points: 568
|
||||
|
||||
# ================================================
|
||||
|
||||
@ -1705,8 +1728,13 @@ E0100..E01EF ; Inherited # Mn [240] VARIATION SELECTOR-17..VARIATION SELECTOR-2
|
||||
|
||||
2C00..2C2E ; Glagolitic # L& [47] GLAGOLITIC CAPITAL LETTER AZU..GLAGOLITIC CAPITAL LETTER LATINATE MYSLITE
|
||||
2C30..2C5E ; Glagolitic # L& [47] GLAGOLITIC SMALL LETTER AZU..GLAGOLITIC SMALL LETTER LATINATE MYSLITE
|
||||
1E000..1E006 ; Glagolitic # Mn [7] COMBINING GLAGOLITIC LETTER AZU..COMBINING GLAGOLITIC LETTER ZHIVETE
|
||||
1E008..1E018 ; Glagolitic # Mn [17] COMBINING GLAGOLITIC LETTER ZEMLJA..COMBINING GLAGOLITIC LETTER HERU
|
||||
1E01B..1E021 ; Glagolitic # Mn [7] COMBINING GLAGOLITIC LETTER SHTA..COMBINING GLAGOLITIC LETTER YATI
|
||||
1E023..1E024 ; Glagolitic # Mn [2] COMBINING GLAGOLITIC LETTER YU..COMBINING GLAGOLITIC LETTER SMALL YUS
|
||||
1E026..1E02A ; Glagolitic # Mn [5] COMBINING GLAGOLITIC LETTER YO..COMBINING GLAGOLITIC LETTER FITA
|
||||
|
||||
# Total code points: 94
|
||||
# Total code points: 132
|
||||
|
||||
# ================================================
|
||||
|
||||
@ -1872,11 +1900,11 @@ A62A..A62B ; Vai # Lo [2] VAI SYLLABLE NDOLE MA..VAI SYLLABLE NDOLE DO
|
||||
A880..A881 ; Saurashtra # Mc [2] SAURASHTRA SIGN ANUSVARA..SAURASHTRA SIGN VISARGA
|
||||
A882..A8B3 ; Saurashtra # Lo [50] SAURASHTRA LETTER A..SAURASHTRA LETTER LLA
|
||||
A8B4..A8C3 ; Saurashtra # Mc [16] SAURASHTRA CONSONANT SIGN HAARU..SAURASHTRA VOWEL SIGN AU
|
||||
A8C4 ; Saurashtra # Mn SAURASHTRA SIGN VIRAMA
|
||||
A8C4..A8C5 ; Saurashtra # Mn [2] SAURASHTRA SIGN VIRAMA..SAURASHTRA SIGN CANDRABINDU
|
||||
A8CE..A8CF ; Saurashtra # Po [2] SAURASHTRA DANDA..SAURASHTRA DOUBLE DANDA
|
||||
A8D0..A8D9 ; Saurashtra # Nd [10] SAURASHTRA DIGIT ZERO..SAURASHTRA DIGIT NINE
|
||||
|
||||
# Total code points: 81
|
||||
# Total code points: 82
|
||||
|
||||
# ================================================
|
||||
|
||||
@ -2314,8 +2342,9 @@ ABF0..ABF9 ; Meetei_Mayek # Nd [10] MEETEI MAYEK DIGIT ZERO..MEETEI MAYEK DI
|
||||
11235 ; Khojki # Mc KHOJKI SIGN VIRAMA
|
||||
11236..11237 ; Khojki # Mn [2] KHOJKI SIGN NUKTA..KHOJKI SIGN SHADDA
|
||||
11238..1123D ; Khojki # Po [6] KHOJKI DANDA..KHOJKI ABBREVIATION SIGN
|
||||
1123E ; Khojki # Mn KHOJKI SIGN SUKUN
|
||||
|
||||
# Total code points: 61
|
||||
# Total code points: 62
|
||||
|
||||
# ================================================
|
||||
|
||||
@ -2536,4 +2565,129 @@ ABF0..ABF9 ; Meetei_Mayek # Nd [10] MEETEI MAYEK DIGIT ZERO..MEETEI MAYEK DI
|
||||
|
||||
# Total code points: 672
|
||||
|
||||
# ================================================
|
||||
|
||||
1E900..1E943 ; Adlam # L& [68] ADLAM CAPITAL LETTER ALIF..ADLAM SMALL LETTER SHA
|
||||
1E944..1E94A ; Adlam # Mn [7] ADLAM ALIF LENGTHENER..ADLAM NUKTA
|
||||
1E950..1E959 ; Adlam # Nd [10] ADLAM DIGIT ZERO..ADLAM DIGIT NINE
|
||||
1E95E..1E95F ; Adlam # Po [2] ADLAM INITIAL EXCLAMATION MARK..ADLAM INITIAL QUESTION MARK
|
||||
|
||||
# Total code points: 87
|
||||
|
||||
# ================================================
|
||||
|
||||
11C00..11C08 ; Bhaiksuki # Lo [9] BHAIKSUKI LETTER A..BHAIKSUKI LETTER VOCALIC L
|
||||
11C0A..11C2E ; Bhaiksuki # Lo [37] BHAIKSUKI LETTER E..BHAIKSUKI LETTER HA
|
||||
11C2F ; Bhaiksuki # Mc BHAIKSUKI VOWEL SIGN AA
|
||||
11C30..11C36 ; Bhaiksuki # Mn [7] BHAIKSUKI VOWEL SIGN I..BHAIKSUKI VOWEL SIGN VOCALIC L
|
||||
11C38..11C3D ; Bhaiksuki # Mn [6] BHAIKSUKI VOWEL SIGN E..BHAIKSUKI SIGN ANUSVARA
|
||||
11C3E ; Bhaiksuki # Mc BHAIKSUKI SIGN VISARGA
|
||||
11C3F ; Bhaiksuki # Mn BHAIKSUKI SIGN VIRAMA
|
||||
11C40 ; Bhaiksuki # Lo BHAIKSUKI SIGN AVAGRAHA
|
||||
11C41..11C45 ; Bhaiksuki # Po [5] BHAIKSUKI DANDA..BHAIKSUKI GAP FILLER-2
|
||||
11C50..11C59 ; Bhaiksuki # Nd [10] BHAIKSUKI DIGIT ZERO..BHAIKSUKI DIGIT NINE
|
||||
11C5A..11C6C ; Bhaiksuki # No [19] BHAIKSUKI NUMBER ONE..BHAIKSUKI HUNDREDS UNIT MARK
|
||||
|
||||
# Total code points: 97
|
||||
|
||||
# ================================================
|
||||
|
||||
11C70..11C71 ; Marchen # Po [2] MARCHEN HEAD MARK..MARCHEN MARK SHAD
|
||||
11C72..11C8F ; Marchen # Lo [30] MARCHEN LETTER KA..MARCHEN LETTER A
|
||||
11C92..11CA7 ; Marchen # Mn [22] MARCHEN SUBJOINED LETTER KA..MARCHEN SUBJOINED LETTER ZA
|
||||
11CA9 ; Marchen # Mc MARCHEN SUBJOINED LETTER YA
|
||||
11CAA..11CB0 ; Marchen # Mn [7] MARCHEN SUBJOINED LETTER RA..MARCHEN VOWEL SIGN AA
|
||||
11CB1 ; Marchen # Mc MARCHEN VOWEL SIGN I
|
||||
11CB2..11CB3 ; Marchen # Mn [2] MARCHEN VOWEL SIGN U..MARCHEN VOWEL SIGN E
|
||||
11CB4 ; Marchen # Mc MARCHEN VOWEL SIGN O
|
||||
11CB5..11CB6 ; Marchen # Mn [2] MARCHEN SIGN ANUSVARA..MARCHEN SIGN CANDRABINDU
|
||||
|
||||
# Total code points: 68
|
||||
|
||||
# ================================================
|
||||
|
||||
11400..11434 ; Newa # Lo [53] NEWA LETTER A..NEWA LETTER HA
|
||||
11435..11437 ; Newa # Mc [3] NEWA VOWEL SIGN AA..NEWA VOWEL SIGN II
|
||||
11438..1143F ; Newa # Mn [8] NEWA VOWEL SIGN U..NEWA VOWEL SIGN AI
|
||||
11440..11441 ; Newa # Mc [2] NEWA VOWEL SIGN O..NEWA VOWEL SIGN AU
|
||||
11442..11444 ; Newa # Mn [3] NEWA SIGN VIRAMA..NEWA SIGN ANUSVARA
|
||||
11445 ; Newa # Mc NEWA SIGN VISARGA
|
||||
11446 ; Newa # Mn NEWA SIGN NUKTA
|
||||
11447..1144A ; Newa # Lo [4] NEWA SIGN AVAGRAHA..NEWA SIDDHI
|
||||
1144B..1144F ; Newa # Po [5] NEWA DANDA..NEWA ABBREVIATION SIGN
|
||||
11450..11459 ; Newa # Nd [10] NEWA DIGIT ZERO..NEWA DIGIT NINE
|
||||
1145B ; Newa # Po NEWA PLACEHOLDER MARK
|
||||
1145D ; Newa # Po NEWA INSERTION SIGN
|
||||
|
||||
# Total code points: 92
|
||||
|
||||
# ================================================
|
||||
|
||||
104B0..104D3 ; Osage # L& [36] OSAGE CAPITAL LETTER A..OSAGE CAPITAL LETTER ZHA
|
||||
104D8..104FB ; Osage # L& [36] OSAGE SMALL LETTER A..OSAGE SMALL LETTER ZHA
|
||||
|
||||
# Total code points: 72
|
||||
|
||||
# ================================================
|
||||
|
||||
16FE0 ; Tangut # Lm TANGUT ITERATION MARK
|
||||
17000..187EC ; Tangut # Lo [6125] TANGUT IDEOGRAPH-17000..TANGUT IDEOGRAPH-187EC
|
||||
18800..18AF2 ; Tangut # Lo [755] TANGUT COMPONENT-001..TANGUT COMPONENT-755
|
||||
|
||||
# Total code points: 6881
|
||||
|
||||
# ================================================
|
||||
|
||||
11D00..11D06 ; Masaram_Gondi # Lo [7] MASARAM GONDI LETTER A..MASARAM GONDI LETTER E
|
||||
11D08..11D09 ; Masaram_Gondi # Lo [2] MASARAM GONDI LETTER AI..MASARAM GONDI LETTER O
|
||||
11D0B..11D30 ; Masaram_Gondi # Lo [38] MASARAM GONDI LETTER AU..MASARAM GONDI LETTER TRA
|
||||
11D31..11D36 ; Masaram_Gondi # Mn [6] MASARAM GONDI VOWEL SIGN AA..MASARAM GONDI VOWEL SIGN VOCALIC R
|
||||
11D3A ; Masaram_Gondi # Mn MASARAM GONDI VOWEL SIGN E
|
||||
11D3C..11D3D ; Masaram_Gondi # Mn [2] MASARAM GONDI VOWEL SIGN AI..MASARAM GONDI VOWEL SIGN O
|
||||
11D3F..11D45 ; Masaram_Gondi # Mn [7] MASARAM GONDI VOWEL SIGN AU..MASARAM GONDI VIRAMA
|
||||
11D46 ; Masaram_Gondi # Lo MASARAM GONDI REPHA
|
||||
11D47 ; Masaram_Gondi # Mn MASARAM GONDI RA-KARA
|
||||
11D50..11D59 ; Masaram_Gondi # Nd [10] MASARAM GONDI DIGIT ZERO..MASARAM GONDI DIGIT NINE
|
||||
|
||||
# Total code points: 75
|
||||
|
||||
# ================================================
|
||||
|
||||
16FE1 ; Nushu # Lm NUSHU ITERATION MARK
|
||||
1B170..1B2FB ; Nushu # Lo [396] NUSHU CHARACTER-1B170..NUSHU CHARACTER-1B2FB
|
||||
|
||||
# Total code points: 397
|
||||
|
||||
# ================================================
|
||||
|
||||
11A50 ; Soyombo # Lo SOYOMBO LETTER A
|
||||
11A51..11A56 ; Soyombo # Mn [6] SOYOMBO VOWEL SIGN I..SOYOMBO VOWEL SIGN OE
|
||||
11A57..11A58 ; Soyombo # Mc [2] SOYOMBO VOWEL SIGN AI..SOYOMBO VOWEL SIGN AU
|
||||
11A59..11A5B ; Soyombo # Mn [3] SOYOMBO VOWEL SIGN VOCALIC R..SOYOMBO VOWEL LENGTH MARK
|
||||
11A5C..11A83 ; Soyombo # Lo [40] SOYOMBO LETTER KA..SOYOMBO LETTER KSSA
|
||||
11A86..11A89 ; Soyombo # Lo [4] SOYOMBO CLUSTER-INITIAL LETTER RA..SOYOMBO CLUSTER-INITIAL LETTER SA
|
||||
11A8A..11A96 ; Soyombo # Mn [13] SOYOMBO FINAL CONSONANT SIGN G..SOYOMBO SIGN ANUSVARA
|
||||
11A97 ; Soyombo # Mc SOYOMBO SIGN VISARGA
|
||||
11A98..11A99 ; Soyombo # Mn [2] SOYOMBO GEMINATION MARK..SOYOMBO SUBJOINER
|
||||
11A9A..11A9C ; Soyombo # Po [3] SOYOMBO MARK TSHEG..SOYOMBO MARK DOUBLE SHAD
|
||||
11A9E..11AA2 ; Soyombo # Po [5] SOYOMBO HEAD MARK WITH MOON AND SUN AND TRIPLE FLAME..SOYOMBO TERMINAL MARK-2
|
||||
|
||||
# Total code points: 80
|
||||
|
||||
# ================================================
|
||||
|
||||
11A00 ; Zanabazar_Square # Lo ZANABAZAR SQUARE LETTER A
|
||||
11A01..11A06 ; Zanabazar_Square # Mn [6] ZANABAZAR SQUARE VOWEL SIGN I..ZANABAZAR SQUARE VOWEL SIGN O
|
||||
11A07..11A08 ; Zanabazar_Square # Mc [2] ZANABAZAR SQUARE VOWEL SIGN AI..ZANABAZAR SQUARE VOWEL SIGN AU
|
||||
11A09..11A0A ; Zanabazar_Square # Mn [2] ZANABAZAR SQUARE VOWEL SIGN REVERSED I..ZANABAZAR SQUARE VOWEL LENGTH MARK
|
||||
11A0B..11A32 ; Zanabazar_Square # Lo [40] ZANABAZAR SQUARE LETTER KA..ZANABAZAR SQUARE LETTER KSSA
|
||||
11A33..11A38 ; Zanabazar_Square # Mn [6] ZANABAZAR SQUARE FINAL CONSONANT MARK..ZANABAZAR SQUARE SIGN ANUSVARA
|
||||
11A39 ; Zanabazar_Square # Mc ZANABAZAR SQUARE SIGN VISARGA
|
||||
11A3A ; Zanabazar_Square # Lo ZANABAZAR SQUARE CLUSTER-INITIAL LETTER RA
|
||||
11A3B..11A3E ; Zanabazar_Square # Mn [4] ZANABAZAR SQUARE CLUSTER-FINAL LETTER YA..ZANABAZAR SQUARE CLUSTER-FINAL LETTER VA
|
||||
11A3F..11A46 ; Zanabazar_Square # Po [8] ZANABAZAR SQUARE INITIAL HEAD MARK..ZANABAZAR SQUARE CLOSING DOUBLE-LINED HEAD MARK
|
||||
11A47 ; Zanabazar_Square # Mn ZANABAZAR SQUARE SUBJOINER
|
||||
|
||||
# Total code points: 72
|
||||
|
||||
# EOF
|
||||
|
@ -1,10 +1,11 @@
|
||||
# SpecialCasing-8.0.0.txt
|
||||
# Date: 2014-12-16, 23:08:04 GMT [MD]
|
||||
# SpecialCasing-10.0.0.txt
|
||||
# Date: 2017-04-14, 05:40:43 GMT
|
||||
# Copyright (c) 2017 Unicode, Inc.
|
||||
# Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries.
|
||||
# For terms of use, see http://www.unicode.org/terms_of_use.html
|
||||
#
|
||||
# Unicode Character Database
|
||||
# Copyright (c) 1991-2014 Unicode, Inc.
|
||||
# For terms of use, see http://www.unicode.org/terms_of_use.html
|
||||
# For documentation, see http://www.unicode.org/reports/tr44/
|
||||
# For documentation, see http://www.unicode.org/reports/tr44/
|
||||
#
|
||||
# Special Casing
|
||||
#
|
||||
@ -196,7 +197,7 @@ FB17; FB17; 0544 056D; 0544 053D; # ARMENIAN SMALL LIGATURE MEN XEH
|
||||
|
||||
# ================================================================================
|
||||
# Conditional Mappings
|
||||
# The remainder of this file provides conditional casing data used to produce
|
||||
# The remainder of this file provides conditional casing data used to produce
|
||||
# full case mappings.
|
||||
# ================================================================================
|
||||
# Language-Insensitive Mappings
|
||||
|
File diff suppressed because it is too large
Load Diff
@ -1 +1 @@
|
||||
8.0.0
|
||||
10.0.0
|
||||
|
File diff suppressed because it is too large
Load Diff
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) 2003, 2015, Oracle and/or its affiliates. All rights reserved.
|
||||
* Copyright (c) 2003, 2018, Oracle and/or its affiliates. All rights reserved.
|
||||
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
||||
*
|
||||
* This code is free software; you can redistribute it and/or modify it
|
||||
@ -34,16 +34,11 @@ package sun.text.normalizer;
|
||||
|
||||
import java.io.BufferedInputStream;
|
||||
import java.io.DataInputStream;
|
||||
import java.io.File;
|
||||
import java.io.FileInputStream;
|
||||
import java.io.InputStream;
|
||||
import java.io.IOException;
|
||||
import java.io.UncheckedIOException;
|
||||
import java.net.URL;
|
||||
import java.nio.ByteBuffer;
|
||||
import java.nio.ByteOrder;
|
||||
import java.nio.channels.FileChannel;
|
||||
import java.nio.file.FileSystems;
|
||||
import java.util.Arrays;
|
||||
import java.security.AccessController;
|
||||
import java.security.PrivilegedAction;
|
||||
@ -51,7 +46,7 @@ import java.security.PrivilegedAction;
|
||||
public final class ICUBinary {
|
||||
|
||||
private static final class IsAcceptable implements Authenticate {
|
||||
// @Override when we switch to Java 6
|
||||
@Override
|
||||
public boolean isDataVersionAcceptable(byte version[]) {
|
||||
return version[0] == 1;
|
||||
}
|
||||
@ -93,7 +88,7 @@ public final class ICUBinary {
|
||||
|
||||
BufferedInputStream b=new BufferedInputStream(is, 4096 /* data buffer size */);
|
||||
DataInputStream inputStream = new DataInputStream(b);
|
||||
byte[] bb = new byte[120000];
|
||||
byte[] bb = new byte[130000];
|
||||
int n = inputStream.read(bb);
|
||||
ByteBuffer bytes = ByteBuffer.wrap(bb, 0, n);
|
||||
return bytes;
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) 2015, Oracle and/or its affiliates. All rights reserved.
|
||||
* Copyright (c) 2015, 2018 Oracle and/or its affiliates. All rights reserved.
|
||||
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
||||
*
|
||||
* This code is free software; you can redistribute it and/or modify it
|
||||
@ -194,7 +194,7 @@ final class Norm2AllModes {
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean hasBoundaryBefore(int c) { return impl.hasDecompBoundary(c, true); }
|
||||
public boolean hasBoundaryBefore(int c) { return impl.hasDecompBoundaryBefore(c); }
|
||||
}
|
||||
|
||||
public static final class ComposeNormalizer2 extends Normalizer2WithImpl {
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) 2015, Oracle and/or its affiliates. All rights reserved.
|
||||
* Copyright (c) 2015, 2018 Oracle and/or its affiliates. All rights reserved.
|
||||
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
||||
*
|
||||
* This code is free software; you can redistribute it and/or modify it
|
||||
@ -135,8 +135,10 @@ abstract class Normalizer2 {
|
||||
if(spanLength==src.length()) {
|
||||
return (String)src;
|
||||
}
|
||||
StringBuilder sb=new StringBuilder(src.length()).append(src, 0, spanLength);
|
||||
return normalizeSecondAndAppend(sb, src.subSequence(spanLength, src.length())).toString();
|
||||
if (spanLength != 0) {
|
||||
StringBuilder sb=new StringBuilder(src.length()).append(src, 0, spanLength);
|
||||
return normalizeSecondAndAppend(sb, src.subSequence(spanLength, src.length())).toString();
|
||||
}
|
||||
}
|
||||
return normalize(src, new StringBuilder(src.length())).toString();
|
||||
}
|
||||
|
File diff suppressed because it is too large
Load Diff
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) 2000, 2017, Oracle and/or its affiliates. All rights reserved.
|
||||
* Copyright (c) 2000, 2018, Oracle and/or its affiliates. All rights reserved.
|
||||
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
||||
*
|
||||
* This code is free software; you can redistribute it and/or modify it
|
||||
@ -699,7 +699,8 @@ public final class NumericShaper implements java.io.Serializable {
|
||||
0x09de, 0x09df,
|
||||
0x09e2, 0x09e6,
|
||||
0x09f2, 0x09f4,
|
||||
0x09fb, 0x0a03,
|
||||
0x09fb, 0x09fc,
|
||||
0x09fe, 0x0a03,
|
||||
0x0a04, 0x0a05,
|
||||
0x0a0b, 0x0a0f,
|
||||
0x0a11, 0x0a13,
|
||||
@ -769,7 +770,7 @@ public final class NumericShaper implements java.io.Serializable {
|
||||
0x0c5b, 0x0c60,
|
||||
0x0c62, 0x0c66,
|
||||
0x0c70, 0x0c7f,
|
||||
0x0c80, 0x0c82,
|
||||
0x0c81, 0x0c82,
|
||||
0x0c84, 0x0c85,
|
||||
0x0c8d, 0x0c8e,
|
||||
0x0c91, 0x0c92,
|
||||
@ -791,10 +792,7 @@ public final class NumericShaper implements java.io.Serializable {
|
||||
0x0d41, 0x0d46,
|
||||
0x0d49, 0x0d4a,
|
||||
0x0d4d, 0x0d4e,
|
||||
0x0d4f, 0x0d57,
|
||||
0x0d58, 0x0d5f,
|
||||
0x0d62, 0x0d66,
|
||||
0x0d76, 0x0d79,
|
||||
0x0d80, 0x0d82,
|
||||
0x0d84, 0x0d85,
|
||||
0x0d97, 0x0d9a,
|
||||
@ -892,7 +890,8 @@ public final class NumericShaper implements java.io.Serializable {
|
||||
0x17dd, 0x17e0,
|
||||
0x17ea, 0x1810,
|
||||
0x181a, 0x1820,
|
||||
0x1878, 0x1880,
|
||||
0x1878, 0x1884,
|
||||
0x1885, 0x1887,
|
||||
0x18a9, 0x18aa,
|
||||
0x18ab, 0x18b0,
|
||||
0x18f6, 0x1900,
|
||||
@ -934,13 +933,12 @@ public final class NumericShaper implements java.io.Serializable {
|
||||
0x1c2c, 0x1c34,
|
||||
0x1c36, 0x1c3b,
|
||||
0x1c4a, 0x1c4d,
|
||||
0x1c80, 0x1cc0,
|
||||
0x1cc8, 0x1cd3,
|
||||
0x1cd4, 0x1ce1,
|
||||
0x1ce2, 0x1ce9,
|
||||
0x1ced, 0x1cee,
|
||||
0x1cf4, 0x1cf5,
|
||||
0x1cf7, 0x1d00,
|
||||
0x1cf8, 0x1d00,
|
||||
0x1dc0, 0x1e00,
|
||||
0x1f16, 0x1f18,
|
||||
0x1f1e, 0x1f20,
|
||||
@ -1012,7 +1010,7 @@ public final class NumericShaper implements java.io.Serializable {
|
||||
0x30a0, 0x30a1,
|
||||
0x30fb, 0x30fc,
|
||||
0x3100, 0x3105,
|
||||
0x312e, 0x3131,
|
||||
0x312f, 0x3131,
|
||||
0x318f, 0x3190,
|
||||
0x31bb, 0x31f0,
|
||||
0x321d, 0x3220,
|
||||
@ -1025,7 +1023,7 @@ public final class NumericShaper implements java.io.Serializable {
|
||||
0x33de, 0x33e0,
|
||||
0x33ff, 0x3400,
|
||||
0x4db6, 0x4e00,
|
||||
0x9fd6, 0xa000,
|
||||
0x9feb, 0xa000,
|
||||
0xa48d, 0xa4d0,
|
||||
0xa60d, 0xa610,
|
||||
0xa62c, 0xa640,
|
||||
@ -1034,7 +1032,7 @@ public final class NumericShaper implements java.io.Serializable {
|
||||
0xa6f0, 0xa6f2,
|
||||
0xa6f8, 0xa722,
|
||||
0xa788, 0xa789,
|
||||
0xa7ae, 0xa7b0,
|
||||
0xa7af, 0xa7b0,
|
||||
0xa7b8, 0xa7f7,
|
||||
0xa802, 0xa803,
|
||||
0xa806, 0xa807,
|
||||
@ -1114,18 +1112,21 @@ public final class NumericShaper implements java.io.Serializable {
|
||||
0x10101, 0x10102,
|
||||
0x10103, 0x10107,
|
||||
0x10134, 0x10137,
|
||||
0x10140, 0x101d0,
|
||||
0x10140, 0x1018d,
|
||||
0x1018f, 0x101d0,
|
||||
0x101fd, 0x10280,
|
||||
0x1029d, 0x102a0,
|
||||
0x102d1, 0x10300,
|
||||
0x10324, 0x10330,
|
||||
0x10324, 0x1032d,
|
||||
0x1034b, 0x10350,
|
||||
0x10376, 0x10380,
|
||||
0x1039e, 0x1039f,
|
||||
0x103c4, 0x103c8,
|
||||
0x103d6, 0x10400,
|
||||
0x1049e, 0x104a0,
|
||||
0x104aa, 0x10500,
|
||||
0x104aa, 0x104d3,
|
||||
0x104d4, 0x104d8,
|
||||
0x104fc, 0x10500,
|
||||
0x10528, 0x10530,
|
||||
0x10564, 0x1056f,
|
||||
0x10570, 0x10600,
|
||||
@ -1186,7 +1187,13 @@ public final class NumericShaper implements java.io.Serializable {
|
||||
0x1134e, 0x11350,
|
||||
0x11351, 0x11357,
|
||||
0x11358, 0x1135d,
|
||||
0x11364, 0x11480,
|
||||
0x11364, 0x11400,
|
||||
0x11438, 0x11440,
|
||||
0x11442, 0x11445,
|
||||
0x11446, 0x11447,
|
||||
0x1145a, 0x1145b,
|
||||
0x1145c, 0x1145d,
|
||||
0x1145e, 0x11480,
|
||||
0x114b3, 0x114b9,
|
||||
0x114ba, 0x114bb,
|
||||
0x114bf, 0x114c1,
|
||||
@ -1212,8 +1219,33 @@ public final class NumericShaper implements java.io.Serializable {
|
||||
0x11727, 0x11730,
|
||||
0x11740, 0x118a0,
|
||||
0x118f3, 0x118ff,
|
||||
0x11900, 0x11ac0,
|
||||
0x11af9, 0x12000,
|
||||
0x11900, 0x11a00,
|
||||
0x11a01, 0x11a07,
|
||||
0x11a09, 0x11a0b,
|
||||
0x11a33, 0x11a3a,
|
||||
0x11a3b, 0x11a3f,
|
||||
0x11a47, 0x11a50,
|
||||
0x11a51, 0x11a57,
|
||||
0x11a59, 0x11a5c,
|
||||
0x11a84, 0x11a86,
|
||||
0x11a8a, 0x11a97,
|
||||
0x11a98, 0x11a9a,
|
||||
0x11a9d, 0x11a9e,
|
||||
0x11aa3, 0x11ac0,
|
||||
0x11af9, 0x11C00,
|
||||
0x11C09, 0x11c0a,
|
||||
0x11c30, 0x11c3e,
|
||||
0x11c46, 0x11c50,
|
||||
0x11c6d, 0x11c70,
|
||||
0x11c90, 0x11ca9,
|
||||
0x11caa, 0x11cb1,
|
||||
0x11cb2, 0x11cb4,
|
||||
0x11cb5, 0x11d00,
|
||||
0x11d07, 0x11d08,
|
||||
0x11d0a, 0x11d0b,
|
||||
0x11d31, 0x11d46,
|
||||
0x11d47, 0x11d50,
|
||||
0x11d5a, 0x12000,
|
||||
0x1239a, 0x12400,
|
||||
0x1246f, 0x12470,
|
||||
0x12475, 0x12480,
|
||||
@ -1234,8 +1266,12 @@ public final class NumericShaper implements java.io.Serializable {
|
||||
0x16b90, 0x16f00,
|
||||
0x16f45, 0x16f50,
|
||||
0x16f7f, 0x16f93,
|
||||
0x16fa0, 0x1b000,
|
||||
0x1b002, 0x1bc00,
|
||||
0x16fa0, 0x16fe0,
|
||||
0x16fe2, 0x17000,
|
||||
0x187ed, 0x18800,
|
||||
0x18af3, 0x1b000,
|
||||
0x1b11f, 0x1b170,
|
||||
0x1b2fc, 0x1bc00,
|
||||
0x1bc6b, 0x1bc70,
|
||||
0x1bc7d, 0x1bc80,
|
||||
0x1bc89, 0x1bc90,
|
||||
@ -1281,19 +1317,21 @@ public final class NumericShaper implements java.io.Serializable {
|
||||
0x1da84, 0x1da85,
|
||||
0x1da8c, 0x1e800,
|
||||
0x1e8d0, 0x1e8d7,
|
||||
0x1e944, 0x1e94b,
|
||||
0x1eef0, 0x1eef2,
|
||||
0x1f000, 0x1f110,
|
||||
0x1f12f, 0x1f130,
|
||||
0x1f16a, 0x1f170,
|
||||
0x1f19b, 0x1f1e6,
|
||||
0x1f1ad, 0x1f1e6,
|
||||
0x1f203, 0x1f210,
|
||||
0x1f23b, 0x1f240,
|
||||
0x1f23c, 0x1f240,
|
||||
0x1f249, 0x1f250,
|
||||
0x1f252, 0x20000,
|
||||
0x2a6d7, 0x2a700,
|
||||
0x2b735, 0x2b740,
|
||||
0x2b81e, 0x2b820,
|
||||
0x2cea2, 0x2f800,
|
||||
0x2cea2, 0x2ceb0,
|
||||
0x2ebe1, 0x2f800,
|
||||
0x2fa1e, 0xf0000,
|
||||
0xffffe, 0x100000,
|
||||
0x10fffe, 0x10ffff // sentinel
|
||||
|
316
test/jdk/java/lang/Character/Blocks.txt
Normal file
316
test/jdk/java/lang/Character/Blocks.txt
Normal file
@ -0,0 +1,316 @@
|
||||
# Blocks-10.0.0.txt
|
||||
# Date: 2017-04-12, 17:30:00 GMT [KW]
|
||||
# Copyright (c) 2017 Unicode, Inc.
|
||||
# For terms of use, see http://www.unicode.org/terms_of_use.html
|
||||
#
|
||||
# Unicode Character Database
|
||||
# For documentation, see http://www.unicode.org/reports/tr44/
|
||||
#
|
||||
# Format:
|
||||
# Start Code..End Code; Block Name
|
||||
|
||||
# ================================================
|
||||
|
||||
# Note: When comparing block names, casing, whitespace, hyphens,
|
||||
# and underbars are ignored.
|
||||
# For example, "Latin Extended-A" and "latin extended a" are equivalent.
|
||||
# For more information on the comparison of property values,
|
||||
# see UAX #44: http://www.unicode.org/reports/tr44/
|
||||
#
|
||||
# All block ranges start with a value where (cp MOD 16) = 0,
|
||||
# and end with a value where (cp MOD 16) = 15. In other words,
|
||||
# the last hexadecimal digit of the start of range is ...0
|
||||
# and the last hexadecimal digit of the end of range is ...F.
|
||||
# This constraint on block ranges guarantees that allocations
|
||||
# are done in terms of whole columns, and that code chart display
|
||||
# never involves splitting columns in the charts.
|
||||
#
|
||||
# All code points not explicitly listed for Block
|
||||
# have the value No_Block.
|
||||
|
||||
# Property: Block
|
||||
#
|
||||
# @missing: 0000..10FFFF; No_Block
|
||||
|
||||
0000..007F; Basic Latin
|
||||
0080..00FF; Latin-1 Supplement
|
||||
0100..017F; Latin Extended-A
|
||||
0180..024F; Latin Extended-B
|
||||
0250..02AF; IPA Extensions
|
||||
02B0..02FF; Spacing Modifier Letters
|
||||
0300..036F; Combining Diacritical Marks
|
||||
0370..03FF; Greek and Coptic
|
||||
0400..04FF; Cyrillic
|
||||
0500..052F; Cyrillic Supplement
|
||||
0530..058F; Armenian
|
||||
0590..05FF; Hebrew
|
||||
0600..06FF; Arabic
|
||||
0700..074F; Syriac
|
||||
0750..077F; Arabic Supplement
|
||||
0780..07BF; Thaana
|
||||
07C0..07FF; NKo
|
||||
0800..083F; Samaritan
|
||||
0840..085F; Mandaic
|
||||
0860..086F; Syriac Supplement
|
||||
08A0..08FF; Arabic Extended-A
|
||||
0900..097F; Devanagari
|
||||
0980..09FF; Bengali
|
||||
0A00..0A7F; Gurmukhi
|
||||
0A80..0AFF; Gujarati
|
||||
0B00..0B7F; Oriya
|
||||
0B80..0BFF; Tamil
|
||||
0C00..0C7F; Telugu
|
||||
0C80..0CFF; Kannada
|
||||
0D00..0D7F; Malayalam
|
||||
0D80..0DFF; Sinhala
|
||||
0E00..0E7F; Thai
|
||||
0E80..0EFF; Lao
|
||||
0F00..0FFF; Tibetan
|
||||
1000..109F; Myanmar
|
||||
10A0..10FF; Georgian
|
||||
1100..11FF; Hangul Jamo
|
||||
1200..137F; Ethiopic
|
||||
1380..139F; Ethiopic Supplement
|
||||
13A0..13FF; Cherokee
|
||||
1400..167F; Unified Canadian Aboriginal Syllabics
|
||||
1680..169F; Ogham
|
||||
16A0..16FF; Runic
|
||||
1700..171F; Tagalog
|
||||
1720..173F; Hanunoo
|
||||
1740..175F; Buhid
|
||||
1760..177F; Tagbanwa
|
||||
1780..17FF; Khmer
|
||||
1800..18AF; Mongolian
|
||||
18B0..18FF; Unified Canadian Aboriginal Syllabics Extended
|
||||
1900..194F; Limbu
|
||||
1950..197F; Tai Le
|
||||
1980..19DF; New Tai Lue
|
||||
19E0..19FF; Khmer Symbols
|
||||
1A00..1A1F; Buginese
|
||||
1A20..1AAF; Tai Tham
|
||||
1AB0..1AFF; Combining Diacritical Marks Extended
|
||||
1B00..1B7F; Balinese
|
||||
1B80..1BBF; Sundanese
|
||||
1BC0..1BFF; Batak
|
||||
1C00..1C4F; Lepcha
|
||||
1C50..1C7F; Ol Chiki
|
||||
1C80..1C8F; Cyrillic Extended-C
|
||||
1CC0..1CCF; Sundanese Supplement
|
||||
1CD0..1CFF; Vedic Extensions
|
||||
1D00..1D7F; Phonetic Extensions
|
||||
1D80..1DBF; Phonetic Extensions Supplement
|
||||
1DC0..1DFF; Combining Diacritical Marks Supplement
|
||||
1E00..1EFF; Latin Extended Additional
|
||||
1F00..1FFF; Greek Extended
|
||||
2000..206F; General Punctuation
|
||||
2070..209F; Superscripts and Subscripts
|
||||
20A0..20CF; Currency Symbols
|
||||
20D0..20FF; Combining Diacritical Marks for Symbols
|
||||
2100..214F; Letterlike Symbols
|
||||
2150..218F; Number Forms
|
||||
2190..21FF; Arrows
|
||||
2200..22FF; Mathematical Operators
|
||||
2300..23FF; Miscellaneous Technical
|
||||
2400..243F; Control Pictures
|
||||
2440..245F; Optical Character Recognition
|
||||
2460..24FF; Enclosed Alphanumerics
|
||||
2500..257F; Box Drawing
|
||||
2580..259F; Block Elements
|
||||
25A0..25FF; Geometric Shapes
|
||||
2600..26FF; Miscellaneous Symbols
|
||||
2700..27BF; Dingbats
|
||||
27C0..27EF; Miscellaneous Mathematical Symbols-A
|
||||
27F0..27FF; Supplemental Arrows-A
|
||||
2800..28FF; Braille Patterns
|
||||
2900..297F; Supplemental Arrows-B
|
||||
2980..29FF; Miscellaneous Mathematical Symbols-B
|
||||
2A00..2AFF; Supplemental Mathematical Operators
|
||||
2B00..2BFF; Miscellaneous Symbols and Arrows
|
||||
2C00..2C5F; Glagolitic
|
||||
2C60..2C7F; Latin Extended-C
|
||||
2C80..2CFF; Coptic
|
||||
2D00..2D2F; Georgian Supplement
|
||||
2D30..2D7F; Tifinagh
|
||||
2D80..2DDF; Ethiopic Extended
|
||||
2DE0..2DFF; Cyrillic Extended-A
|
||||
2E00..2E7F; Supplemental Punctuation
|
||||
2E80..2EFF; CJK Radicals Supplement
|
||||
2F00..2FDF; Kangxi Radicals
|
||||
2FF0..2FFF; Ideographic Description Characters
|
||||
3000..303F; CJK Symbols and Punctuation
|
||||
3040..309F; Hiragana
|
||||
30A0..30FF; Katakana
|
||||
3100..312F; Bopomofo
|
||||
3130..318F; Hangul Compatibility Jamo
|
||||
3190..319F; Kanbun
|
||||
31A0..31BF; Bopomofo Extended
|
||||
31C0..31EF; CJK Strokes
|
||||
31F0..31FF; Katakana Phonetic Extensions
|
||||
3200..32FF; Enclosed CJK Letters and Months
|
||||
3300..33FF; CJK Compatibility
|
||||
3400..4DBF; CJK Unified Ideographs Extension A
|
||||
4DC0..4DFF; Yijing Hexagram Symbols
|
||||
4E00..9FFF; CJK Unified Ideographs
|
||||
A000..A48F; Yi Syllables
|
||||
A490..A4CF; Yi Radicals
|
||||
A4D0..A4FF; Lisu
|
||||
A500..A63F; Vai
|
||||
A640..A69F; Cyrillic Extended-B
|
||||
A6A0..A6FF; Bamum
|
||||
A700..A71F; Modifier Tone Letters
|
||||
A720..A7FF; Latin Extended-D
|
||||
A800..A82F; Syloti Nagri
|
||||
A830..A83F; Common Indic Number Forms
|
||||
A840..A87F; Phags-pa
|
||||
A880..A8DF; Saurashtra
|
||||
A8E0..A8FF; Devanagari Extended
|
||||
A900..A92F; Kayah Li
|
||||
A930..A95F; Rejang
|
||||
A960..A97F; Hangul Jamo Extended-A
|
||||
A980..A9DF; Javanese
|
||||
A9E0..A9FF; Myanmar Extended-B
|
||||
AA00..AA5F; Cham
|
||||
AA60..AA7F; Myanmar Extended-A
|
||||
AA80..AADF; Tai Viet
|
||||
AAE0..AAFF; Meetei Mayek Extensions
|
||||
AB00..AB2F; Ethiopic Extended-A
|
||||
AB30..AB6F; Latin Extended-E
|
||||
AB70..ABBF; Cherokee Supplement
|
||||
ABC0..ABFF; Meetei Mayek
|
||||
AC00..D7AF; Hangul Syllables
|
||||
D7B0..D7FF; Hangul Jamo Extended-B
|
||||
D800..DB7F; High Surrogates
|
||||
DB80..DBFF; High Private Use Surrogates
|
||||
DC00..DFFF; Low Surrogates
|
||||
E000..F8FF; Private Use Area
|
||||
F900..FAFF; CJK Compatibility Ideographs
|
||||
FB00..FB4F; Alphabetic Presentation Forms
|
||||
FB50..FDFF; Arabic Presentation Forms-A
|
||||
FE00..FE0F; Variation Selectors
|
||||
FE10..FE1F; Vertical Forms
|
||||
FE20..FE2F; Combining Half Marks
|
||||
FE30..FE4F; CJK Compatibility Forms
|
||||
FE50..FE6F; Small Form Variants
|
||||
FE70..FEFF; Arabic Presentation Forms-B
|
||||
FF00..FFEF; Halfwidth and Fullwidth Forms
|
||||
FFF0..FFFF; Specials
|
||||
10000..1007F; Linear B Syllabary
|
||||
10080..100FF; Linear B Ideograms
|
||||
10100..1013F; Aegean Numbers
|
||||
10140..1018F; Ancient Greek Numbers
|
||||
10190..101CF; Ancient Symbols
|
||||
101D0..101FF; Phaistos Disc
|
||||
10280..1029F; Lycian
|
||||
102A0..102DF; Carian
|
||||
102E0..102FF; Coptic Epact Numbers
|
||||
10300..1032F; Old Italic
|
||||
10330..1034F; Gothic
|
||||
10350..1037F; Old Permic
|
||||
10380..1039F; Ugaritic
|
||||
103A0..103DF; Old Persian
|
||||
10400..1044F; Deseret
|
||||
10450..1047F; Shavian
|
||||
10480..104AF; Osmanya
|
||||
104B0..104FF; Osage
|
||||
10500..1052F; Elbasan
|
||||
10530..1056F; Caucasian Albanian
|
||||
10600..1077F; Linear A
|
||||
10800..1083F; Cypriot Syllabary
|
||||
10840..1085F; Imperial Aramaic
|
||||
10860..1087F; Palmyrene
|
||||
10880..108AF; Nabataean
|
||||
108E0..108FF; Hatran
|
||||
10900..1091F; Phoenician
|
||||
10920..1093F; Lydian
|
||||
10980..1099F; Meroitic Hieroglyphs
|
||||
109A0..109FF; Meroitic Cursive
|
||||
10A00..10A5F; Kharoshthi
|
||||
10A60..10A7F; Old South Arabian
|
||||
10A80..10A9F; Old North Arabian
|
||||
10AC0..10AFF; Manichaean
|
||||
10B00..10B3F; Avestan
|
||||
10B40..10B5F; Inscriptional Parthian
|
||||
10B60..10B7F; Inscriptional Pahlavi
|
||||
10B80..10BAF; Psalter Pahlavi
|
||||
10C00..10C4F; Old Turkic
|
||||
10C80..10CFF; Old Hungarian
|
||||
10E60..10E7F; Rumi Numeral Symbols
|
||||
11000..1107F; Brahmi
|
||||
11080..110CF; Kaithi
|
||||
110D0..110FF; Sora Sompeng
|
||||
11100..1114F; Chakma
|
||||
11150..1117F; Mahajani
|
||||
11180..111DF; Sharada
|
||||
111E0..111FF; Sinhala Archaic Numbers
|
||||
11200..1124F; Khojki
|
||||
11280..112AF; Multani
|
||||
112B0..112FF; Khudawadi
|
||||
11300..1137F; Grantha
|
||||
11400..1147F; Newa
|
||||
11480..114DF; Tirhuta
|
||||
11580..115FF; Siddham
|
||||
11600..1165F; Modi
|
||||
11660..1167F; Mongolian Supplement
|
||||
11680..116CF; Takri
|
||||
11700..1173F; Ahom
|
||||
118A0..118FF; Warang Citi
|
||||
11A00..11A4F; Zanabazar Square
|
||||
11A50..11AAF; Soyombo
|
||||
11AC0..11AFF; Pau Cin Hau
|
||||
11C00..11C6F; Bhaiksuki
|
||||
11C70..11CBF; Marchen
|
||||
11D00..11D5F; Masaram Gondi
|
||||
12000..123FF; Cuneiform
|
||||
12400..1247F; Cuneiform Numbers and Punctuation
|
||||
12480..1254F; Early Dynastic Cuneiform
|
||||
13000..1342F; Egyptian Hieroglyphs
|
||||
14400..1467F; Anatolian Hieroglyphs
|
||||
16800..16A3F; Bamum Supplement
|
||||
16A40..16A6F; Mro
|
||||
16AD0..16AFF; Bassa Vah
|
||||
16B00..16B8F; Pahawh Hmong
|
||||
16F00..16F9F; Miao
|
||||
16FE0..16FFF; Ideographic Symbols and Punctuation
|
||||
17000..187FF; Tangut
|
||||
18800..18AFF; Tangut Components
|
||||
1B000..1B0FF; Kana Supplement
|
||||
1B100..1B12F; Kana Extended-A
|
||||
1B170..1B2FF; Nushu
|
||||
1BC00..1BC9F; Duployan
|
||||
1BCA0..1BCAF; Shorthand Format Controls
|
||||
1D000..1D0FF; Byzantine Musical Symbols
|
||||
1D100..1D1FF; Musical Symbols
|
||||
1D200..1D24F; Ancient Greek Musical Notation
|
||||
1D300..1D35F; Tai Xuan Jing Symbols
|
||||
1D360..1D37F; Counting Rod Numerals
|
||||
1D400..1D7FF; Mathematical Alphanumeric Symbols
|
||||
1D800..1DAAF; Sutton SignWriting
|
||||
1E000..1E02F; Glagolitic Supplement
|
||||
1E800..1E8DF; Mende Kikakui
|
||||
1E900..1E95F; Adlam
|
||||
1EE00..1EEFF; Arabic Mathematical Alphabetic Symbols
|
||||
1F000..1F02F; Mahjong Tiles
|
||||
1F030..1F09F; Domino Tiles
|
||||
1F0A0..1F0FF; Playing Cards
|
||||
1F100..1F1FF; Enclosed Alphanumeric Supplement
|
||||
1F200..1F2FF; Enclosed Ideographic Supplement
|
||||
1F300..1F5FF; Miscellaneous Symbols and Pictographs
|
||||
1F600..1F64F; Emoticons
|
||||
1F650..1F67F; Ornamental Dingbats
|
||||
1F680..1F6FF; Transport and Map Symbols
|
||||
1F700..1F77F; Alchemical Symbols
|
||||
1F780..1F7FF; Geometric Shapes Extended
|
||||
1F800..1F8FF; Supplemental Arrows-C
|
||||
1F900..1F9FF; Supplemental Symbols and Pictographs
|
||||
20000..2A6DF; CJK Unified Ideographs Extension B
|
||||
2A700..2B73F; CJK Unified Ideographs Extension C
|
||||
2B740..2B81F; CJK Unified Ideographs Extension D
|
||||
2B820..2CEAF; CJK Unified Ideographs Extension E
|
||||
2CEB0..2EBEF; CJK Unified Ideographs Extension F
|
||||
2F800..2FA1F; CJK Compatibility Ideographs Supplement
|
||||
E0000..E007F; Tags
|
||||
E0100..E01EF; Variation Selectors Supplement
|
||||
F0000..FFFFF; Supplementary Private Use Area-A
|
||||
100000..10FFFF; Supplementary Private Use Area-B
|
||||
|
||||
# EOF
|
560
test/jdk/java/lang/Character/CharCheck.java
Normal file
560
test/jdk/java/lang/Character/CharCheck.java
Normal file
@ -0,0 +1,560 @@
|
||||
/*
|
||||
* Copyright (c) 2018, Oracle and/or its affiliates. All rights reserved.
|
||||
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
||||
*
|
||||
* This code is free software; you can redistribute it and/or modify it
|
||||
* under the terms of the GNU General Public License version 2 only, as
|
||||
* published by the Free Software Foundation. Oracle designates this
|
||||
* particular file as subject to the "Classpath" exception as provided
|
||||
* by Oracle in the LICENSE file that accompanied this code.
|
||||
*
|
||||
* This code is distributed in the hope that it will be useful, but WITHOUT
|
||||
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
||||
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
|
||||
* version 2 for more details (a copy is included in the LICENSE file that
|
||||
* accompanied this code).
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License version
|
||||
* 2 along with this work; if not, write to the Free Software Foundation,
|
||||
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
*
|
||||
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
|
||||
* or visit www.oracle.com if you need additional information or have any
|
||||
* questions.
|
||||
*/
|
||||
|
||||
/**
|
||||
*
|
||||
* @author Alan Liu
|
||||
* @author John O'Conner
|
||||
*/
|
||||
|
||||
import java.io.*;
|
||||
|
||||
|
||||
/**
|
||||
* This class either loads or dumps the character properties of all Unicode
|
||||
* characters out to a file. When loading, it compares the loaded data with
|
||||
* that obtained through the java.lang.Character API. This allows detection of
|
||||
* changes to the character properties between versions of the Java VM. A
|
||||
* typical usage would be to dump the properties under an early VM, and load
|
||||
* them under a later VM.
|
||||
*
|
||||
* Also: Check the current VM's character properties against those in a
|
||||
* Unicode database. The database should be of the format
|
||||
* available on ftp.unicode.org/Public/UNIDATA.
|
||||
*
|
||||
*/
|
||||
public class CharCheck {
|
||||
static int differences = 0;
|
||||
|
||||
public static void main(String args[]) throws Exception {
|
||||
|
||||
if (args.length != 2 && args.length != 3) usage();
|
||||
if (args[0].equals("dump"))
|
||||
dump(Integer.parseInt(args[1], 16), new ObjectOutputStream(new FileOutputStream(args[2])));
|
||||
else if (args[0].equals("load"))
|
||||
load(Integer.parseInt(args[1], 16), new ObjectInputStream(new FileInputStream(args[2])));
|
||||
else if (args[0].equals("check"))
|
||||
check(Integer.parseInt(args[1], 16), new File(args[2]));
|
||||
else if (args[0].equals("char"))
|
||||
showChar(Integer.parseInt(args[1],16));
|
||||
else if (args[0].equals("fchar"))
|
||||
showFileChar(args[1], Integer.parseInt(args[2],16));
|
||||
else usage();
|
||||
if (differences != 0) {
|
||||
throw new RuntimeException("There are differences between Character properties and the specification.");
|
||||
}
|
||||
}
|
||||
|
||||
static void usage() {
|
||||
System.err.println("Usage: java CharCheck <command>");
|
||||
System.err.println("where <command> is one of the following:");
|
||||
System.err.println("dump <plane> <file> - dumps the character properties of the given plane,");
|
||||
System.err.println(" read from the current VM, to the given file.");
|
||||
System.err.println("load <plane> <file> - loads the character properties from the given");
|
||||
System.err.println(" file and compares them to those of the given character plane");
|
||||
System.err.println(" in the current VM.");
|
||||
System.err.println("check <plane> <file> - compare the current VM's character properties");
|
||||
System.err.println(" in the given plane to those listed in the given file, ");
|
||||
System.err.println(" which should be in the format available on ");
|
||||
System.err.println(" ftp.unicode.org/Public/2.0-Update.");
|
||||
System.err.println("char <code> - show current VM properties of the given Unicode char.");
|
||||
System.err.println("fchar <file> <code> - show file properties of the given Unicode char.");
|
||||
System.exit(0);
|
||||
}
|
||||
|
||||
static String getTypeName(int type) {
|
||||
return (type >= 0 && type < UnicodeSpec.generalCategoryList.length) ?
|
||||
(UnicodeSpec.generalCategoryList[type][UnicodeSpec.LONG] + '(' + type + ')') :
|
||||
("<Illegal type value " + type + ">");
|
||||
}
|
||||
|
||||
static int check(int plane, File specFile) throws Exception {
|
||||
|
||||
String version = System.getProperty("java.version");
|
||||
System.out.println("Current VM version " + version);
|
||||
int rangeLimit = (plane << 16) | 0xFFFF;
|
||||
String record;
|
||||
UnicodeSpec[] spec = UnicodeSpec.readSpecFile(specFile, plane);
|
||||
int rangeStart = 0x0000;
|
||||
boolean isRange = false;
|
||||
|
||||
lastCheck = (plane << 16) - 1;
|
||||
|
||||
for (int currentSpec = 0; currentSpec < spec.length; currentSpec++) {
|
||||
int c = spec[currentSpec].getCodePoint();
|
||||
if (isRange) {
|
||||
// Must see end of range now
|
||||
if (spec[currentSpec].getName().endsWith("Last>")) {
|
||||
for (int d=rangeStart; d<=c; d++) {
|
||||
checkOneChar(d, spec[currentSpec]);
|
||||
}
|
||||
}
|
||||
else {
|
||||
// No good -- First without Last
|
||||
System.out.println("BAD FILE: First without last at '" + escape(rangeStart) + "'");
|
||||
}
|
||||
isRange = false;
|
||||
}
|
||||
else {
|
||||
// Look for a First, Last pair: This is a pair of entries like the following:
|
||||
// 4E00;<CJK Ideograph, First>;Lo;0;L;;;;;N;;;;;
|
||||
// 9FA5;<CJK Ideograph, Last>;Lo;0;L;;;;;N;;;;;
|
||||
if (spec[currentSpec].getName().endsWith("First>")) {
|
||||
rangeStart = c;
|
||||
isRange = true;
|
||||
}
|
||||
else {
|
||||
checkOneChar(c, spec[currentSpec]);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Check undefined chars at the end of the range
|
||||
|
||||
while (lastCheck < rangeLimit) checkOneCharDefined(++lastCheck, "?", false);
|
||||
|
||||
System.out.println("Total differences: "+differences);
|
||||
return differences;
|
||||
}
|
||||
|
||||
static int lastCheck = -1;
|
||||
|
||||
static final void checkOneCharDefined(int c, String name, boolean fileDefined) {
|
||||
if (Character.isDefined(c) != fileDefined)
|
||||
showDifference(c, name, "isDefined", ""+(!fileDefined), ""+fileDefined);
|
||||
}
|
||||
|
||||
// In GenerateCharacter, the following ranges are handled specially.
|
||||
// Each is the start of a 26-character range with values 10..35.
|
||||
static final char NUMERIC_EXCEPTION[] = { '\u0041', '\u0061', '\uFF21', '\uFF41' };
|
||||
|
||||
static void checkOneChar(int c, UnicodeSpec charSpec) {
|
||||
// Handle intervening ranges -- we assume that we will be called in monotonically
|
||||
// increasing order. If the last char we checked is more than one before this
|
||||
// char, then check the intervening range -- it should all be undefined.
|
||||
int lowerLimit = (c & 0xFF0000);
|
||||
if (lastCheck >= lowerLimit && (lastCheck+1) != c) {
|
||||
for (int i=lastCheck+1; i<c; ++i)
|
||||
checkOneCharDefined(i, "?", false);
|
||||
}
|
||||
|
||||
lastCheck = c;
|
||||
|
||||
// isDefined should be true
|
||||
checkOneCharDefined(c, charSpec.getName(), true);
|
||||
|
||||
// Check lower, upper, and titlecase conversion
|
||||
int upper = Character.toUpperCase(c);
|
||||
int lower = Character.toLowerCase(c);
|
||||
int title = Character.toTitleCase(c);
|
||||
int upperDB = charSpec.hasUpperMap() ? charSpec.getUpperMap() : c;
|
||||
int lowerDB = charSpec.hasLowerMap() ? charSpec.getLowerMap() : c;
|
||||
int titleDB = charSpec.hasTitleMap() ? charSpec.getTitleMap() : c;
|
||||
if (upper != upperDB) showDifference(c, charSpec.getName(), "upper", hex6(upper), hex6(upperDB));
|
||||
if (lower != lowerDB) showDifference(c, charSpec.getName(), "lower", hex6(lower), hex6(lowerDB));
|
||||
if (title != titleDB) showDifference(c, charSpec.getName(), "title", hex6(title), hex6(titleDB));
|
||||
|
||||
// Check the character general category (type)
|
||||
int type = Character.getType(c);
|
||||
int typeDB = charSpec.getGeneralCategory();
|
||||
if (type != typeDB) {
|
||||
showDifference(c, charSpec.getName(), "type",
|
||||
UnicodeSpec.generalCategoryList[type][UnicodeSpec.SHORT],
|
||||
UnicodeSpec.generalCategoryList[typeDB][UnicodeSpec.SHORT]);
|
||||
}
|
||||
|
||||
// Check the mirrored property
|
||||
boolean isMirrored = Character.isMirrored(c);
|
||||
boolean isMirroredDB = charSpec.isMirrored();
|
||||
if (isMirrored != isMirroredDB) {
|
||||
showDifference(c, charSpec.getName(), "isMirrored", ""+isMirrored, ""+isMirroredDB);
|
||||
}
|
||||
|
||||
// Check the directionality property
|
||||
byte directionality = Character.getDirectionality(c);
|
||||
byte directionalityDB = charSpec.getBidiCategory();
|
||||
if (directionality != directionalityDB) {
|
||||
showDifference(c, charSpec.getName(), "directionality", ""+directionality, ""+directionalityDB);
|
||||
}
|
||||
|
||||
// Check the decimal digit property
|
||||
int decimalDigit = Character.digit(c, 10);
|
||||
int decimalDigitDB = -1;
|
||||
if (charSpec.getGeneralCategory() == UnicodeSpec.DECIMAL_DIGIT_NUMBER) {
|
||||
decimalDigitDB = charSpec.getDecimalValue();
|
||||
}
|
||||
if (decimalDigit != decimalDigitDB)
|
||||
showDifference(c, charSpec.getName(), "decimal digit", ""+decimalDigit, ""+decimalDigitDB);
|
||||
|
||||
// Check the numeric property
|
||||
int numericValue = Character.getNumericValue(c);
|
||||
int numericValueDB;
|
||||
if (charSpec.getNumericValue().length() == 0) {
|
||||
numericValueDB = -1;
|
||||
// Handle exceptions where Character deviates from the UCS spec
|
||||
for (int k=0; k<NUMERIC_EXCEPTION.length; ++k) {
|
||||
if (c >= NUMERIC_EXCEPTION[k] && c < (char)(NUMERIC_EXCEPTION[k]+26)) {
|
||||
numericValueDB = c - NUMERIC_EXCEPTION[k] + 10;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
else {
|
||||
String strValue = charSpec.getNumericValue();
|
||||
int parsedNumericValue;
|
||||
if (strValue.equals("10000000000")
|
||||
|| strValue.equals("1000000000000")) {
|
||||
System.out.println("Skipping strValue: " + strValue
|
||||
+ " for " + charSpec.getName()
|
||||
+ "(0x" + Integer.toHexString(c) + ")");
|
||||
parsedNumericValue = -2;
|
||||
} else {
|
||||
parsedNumericValue = strValue.indexOf('/') < 0 ?
|
||||
Integer.parseInt(strValue) : -2;
|
||||
}
|
||||
numericValueDB = parsedNumericValue < 0 ? -2 : parsedNumericValue;
|
||||
}
|
||||
if (numericValue != numericValueDB)
|
||||
showDifference(c, charSpec.getName(), "numeric value", ""+numericValue, ""+numericValueDB);
|
||||
}
|
||||
|
||||
static void showDifference(int c, String name, String property, String vmValue, String dbValue) {
|
||||
System.out.println(escape("Mismatch at '" + hex6(c) + "' (" + name+ "): " +
|
||||
property + "=" + vmValue + ", db=" + dbValue));
|
||||
++differences;
|
||||
}
|
||||
|
||||
/**
|
||||
* Given a record containing ';'-separated fields, return the fieldno-th
|
||||
* field. The first field is field 0.
|
||||
*/
|
||||
static String getField(String record, int fieldno) {
|
||||
int i=0;
|
||||
int j=record.indexOf(';');
|
||||
while (fieldno > 0) {
|
||||
i=j+1;
|
||||
j=record.indexOf(';', i);
|
||||
}
|
||||
return record.substring(i, j);
|
||||
}
|
||||
|
||||
static final int FIELD_COUNT = 15;
|
||||
|
||||
/**
|
||||
* Given a record containing ';'-separated fields, return an array of
|
||||
* the fields. It is assumed that there are FIELD_COUNT fields per record.
|
||||
*/
|
||||
static void getFields(String record, String[] fields) {
|
||||
int i=0;
|
||||
int j=record.indexOf(';');
|
||||
fields[0] = record.substring(i, j);
|
||||
for (int n=1; n<FIELD_COUNT; ++n) {
|
||||
i=j+1;
|
||||
j=record.indexOf(';', i);
|
||||
fields[n] = (j<0) ? record.substring(i) : record.substring(i, j);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Given a record containing ';'-separated fields, return an array of
|
||||
* the fields. It is assumed that there are FIELD_COUNT fields per record.
|
||||
*/
|
||||
static String[] getFields(String record) {
|
||||
String[] fields = new String[FIELD_COUNT];
|
||||
getFields(record, fields);
|
||||
return fields;
|
||||
}
|
||||
|
||||
static void dump(int plane, ObjectOutputStream out) throws Exception {
|
||||
String version = System.getProperty("java.version");
|
||||
System.out.println("Writing file version " + version);
|
||||
out.writeObject(version);
|
||||
|
||||
long[] data = new long[0x20000];
|
||||
long[] onechar = new long[2];
|
||||
int j=0;
|
||||
int begin = plane<<16;
|
||||
int end = begin + 0xFFFF;
|
||||
for (int i = begin; i <= end; ++i) {
|
||||
getPackedCharacterData(i, onechar);
|
||||
data[j++] = onechar[0];
|
||||
data[j++] = onechar[1];
|
||||
}
|
||||
out.writeObject(data);
|
||||
}
|
||||
|
||||
static long[] loadData(ObjectInputStream in) throws Exception {
|
||||
String version = System.getProperty("java.version");
|
||||
String inVersion = (String)in.readObject();
|
||||
System.out.println("Reading file version " + inVersion);
|
||||
System.out.println("Current version " + version);
|
||||
|
||||
long[] data = (long[])in.readObject();
|
||||
if (data.length != 0x20000) {
|
||||
System.out.println("BAD ARRAY LENGTH: " + data.length);
|
||||
}
|
||||
return data;
|
||||
}
|
||||
|
||||
static int load(int plane, ObjectInputStream in) throws Exception {
|
||||
long[] data = CharCheck.loadData(in);
|
||||
CharCheck.checkData(data, plane);
|
||||
return differences;
|
||||
}
|
||||
|
||||
|
||||
static int checkData(long[] data, int plane) {
|
||||
long[] onechar = new long[2];
|
||||
|
||||
for (int i=0; i<0x10000; ++i) {
|
||||
int c = (plane << 16) | i;
|
||||
getPackedCharacterData(c, onechar);
|
||||
if (data[2*i] != onechar[0] || data[2*i+1] != onechar[1]) {
|
||||
long[] filechar = { data[2*i], data[2*i+1] };
|
||||
showDifference(c, onechar, filechar);
|
||||
}
|
||||
}
|
||||
System.out.println("Total differences: " + differences);
|
||||
return differences;
|
||||
}
|
||||
|
||||
static String hex6(long n) {
|
||||
String q = Long.toHexString(n).toUpperCase();
|
||||
return "000000".substring(Math.min(6, q.length())) + q;
|
||||
}
|
||||
|
||||
static void showChar(int c) {
|
||||
long[] chardata = new long[2];
|
||||
getPackedCharacterData(c, chardata);
|
||||
System.out.println("Current VM properties for '" + hex6(c) + "': " +
|
||||
hex6(chardata[1]) + ' ' + hex6(chardata[0]));
|
||||
String[] data = unpackCharacterData(chardata);
|
||||
for (int i=0; i<data.length; ++i)
|
||||
System.out.println(" " + escape(data[i]));
|
||||
}
|
||||
|
||||
static void showFileChar(String fileName, int c) throws Exception {
|
||||
ObjectInputStream in = new ObjectInputStream(new FileInputStream(fileName));
|
||||
String inVersion = (String)in.readObject();
|
||||
System.out.println("Reading file version " + inVersion);
|
||||
|
||||
long[] data = (long[])in.readObject();
|
||||
if (data.length != 0x20000) {
|
||||
System.out.println("BAD ARRAY LENGTH: " + data.length);
|
||||
}
|
||||
int offset = c & 0xFFFF;
|
||||
long[] chardata = { data[2*offset], data[2*offset+1] };
|
||||
String[] datap = unpackCharacterData(chardata);
|
||||
System.out.println(escape("File properties for '" + hex6(c)+ "':"));
|
||||
for (int i=0; i<datap.length; ++i)
|
||||
System.out.println(" " + escape(datap[i]));
|
||||
}
|
||||
|
||||
/**
|
||||
* The packed character data encapsulates all the information obtainable
|
||||
* about a character in a single numeric value.
|
||||
*
|
||||
* data[0]:
|
||||
*
|
||||
* 5 bits for getType()
|
||||
* 6 bits for digit() -- add one
|
||||
* 6 bits for getNumericValue() -- add two
|
||||
* 15 bits for isXxx()
|
||||
*
|
||||
* 21 bits for toUpperCase()
|
||||
*
|
||||
*
|
||||
* data[1]:
|
||||
* 21 bits for toLowerCase()
|
||||
* 21 bits for toTitleCase()
|
||||
*/
|
||||
static void getPackedCharacterData(int c, long[] data) {
|
||||
data[0] =
|
||||
(long)Character.getType(c) |
|
||||
((long)(Character.digit(c, Character.MAX_RADIX) + 1) << 5) |
|
||||
((long)(Character.getNumericValue(c) + 2) << 11) |
|
||||
(Character.isDefined(c) ? (1L<<17) : 0L) |
|
||||
(Character.isDigit(c) ? (1L<<18) : 0L) |
|
||||
(Character.isIdentifierIgnorable(c) ? (1L<<19) : 0L) |
|
||||
(Character.isISOControl(c) ? (1L<<20) : 0L) |
|
||||
(Character.isJavaIdentifierPart(c) ? (1L<<21) : 0L) |
|
||||
(Character.isJavaIdentifierStart(c) ? (1L<<22) : 0L) |
|
||||
(Character.isLetter(c) ? (1L<<23) : 0L) |
|
||||
(Character.isLetterOrDigit(c) ? (1L<<24) : 0L) |
|
||||
(Character.isLowerCase(c) ? (1L<<25) : 0L) |
|
||||
(Character.isSpaceChar(c) ? (1L<<26) : 0L) |
|
||||
(Character.isTitleCase(c) ? (1L<<27) : 0L) |
|
||||
(Character.isUnicodeIdentifierPart(c) ? (1L<<28) : 0L) |
|
||||
(Character.isUnicodeIdentifierStart(c) ? (1L<<29) : 0L) |
|
||||
(Character.isUpperCase(c) ? (1L<<30) : 0L) |
|
||||
(Character.isWhitespace(c) ? (1L<<31) : 0L) |
|
||||
((long)Character.toUpperCase(c) << 32);
|
||||
data[1] = (long)Character.toLowerCase(c) |
|
||||
((long)Character.toTitleCase(c) << 21);
|
||||
}
|
||||
|
||||
/**
|
||||
* Given a long, set the bits at the given offset and length to the given value.
|
||||
*/
|
||||
static long setBits(long data, int offset, int length, long value) {
|
||||
long himask = -1L << (offset+length);
|
||||
long lomask = ~(-1L << offset);
|
||||
long lengthmask = ~(-1L << length);
|
||||
return (data & (himask | lomask)) | ((value & lengthmask) << offset);
|
||||
}
|
||||
|
||||
/**
|
||||
* Given packed character data, change the attribute
|
||||
* toLower
|
||||
*/
|
||||
static void setToLower(long[] data, int value) {
|
||||
data[0] = setBits(data[0], 48, 16, value);
|
||||
}
|
||||
|
||||
/**
|
||||
* Given packed character data, change the attribute
|
||||
* toUpper
|
||||
*/
|
||||
static void setToUpper(long[] data, int value) {
|
||||
data[0] = setBits(data[0], 32, 16, value);
|
||||
}
|
||||
|
||||
/**
|
||||
* Given packed character data, change the attribute
|
||||
* toTitle
|
||||
*/
|
||||
static void setToTitle(long[] data, int value) {
|
||||
data[1] = value;
|
||||
}
|
||||
|
||||
/**
|
||||
* Given packed character data, change the attribute
|
||||
* getType
|
||||
*/
|
||||
static void setGetType(long[] data, int value) {
|
||||
data[0] = setBits(data[0], 0, 5, value);
|
||||
}
|
||||
|
||||
/**
|
||||
* Given packed character data, change the attribute
|
||||
* isDefined
|
||||
*/
|
||||
static void setIsDefined(long[] data, boolean value) {
|
||||
data[0] = setBits(data[0], 17, 1, value?1:0);
|
||||
}
|
||||
|
||||
/**
|
||||
* Given packed character data, change the attribute
|
||||
* isJavaIdentifierPart
|
||||
*/
|
||||
static void setIsJavaIdentifierPart(long[] data, boolean value) {
|
||||
data[0] = setBits(data[0], 21, 1, value?1:0);
|
||||
}
|
||||
|
||||
/**
|
||||
* Given packed character data, change the attribute
|
||||
* isJavaIdentifierStart
|
||||
*/
|
||||
static void setIsJavaIdentifierStart(long[] data, boolean value) {
|
||||
data[0] = setBits(data[0], 22, 1, value?1:0);
|
||||
}
|
||||
|
||||
static String[] unpackCharacterData(long[] dataL) {
|
||||
long data = dataL[0];
|
||||
String[] result = {
|
||||
"type=" + getTypeName((int)(data&0x1F)),
|
||||
"digit=" + (((data>>5)&0x3F)-1),
|
||||
"numeric=" + (((data>>11)&0x3F)-2),
|
||||
"isDefined=" + (((data>>17)&1)==1),
|
||||
"isDigit=" + (((data>>18)&1)==1),
|
||||
"isIdentifierIgnorable=" + (((data>>19)&1)==1),
|
||||
"isISOControl=" + (((data>>20)&1)==1),
|
||||
"isJavaIdentifierPart=" + (((data>>21)&1)==1),
|
||||
"isJavaIdentifierStart=" + (((data>>22)&1)==1),
|
||||
"isLetter=" + (((data>>23)&1)==1),
|
||||
"isLetterOrDigit=" + (((data>>24)&1)==1),
|
||||
"isLowerCase=" + (((data>>25)&1)==1),
|
||||
"isSpaceChar=" + (((data>>26)&1)==1),
|
||||
"isTitleCase=" + (((data>>27)&1)==1),
|
||||
"isUnicodeIdentifierPart=" + (((data>>28)&1)==1),
|
||||
"isUnicodeIdentifierStart=" + (((data>>29)&1)==1),
|
||||
"isUpperCase=" + (((data>>30)&1)==1),
|
||||
"isWhitespace=" + (((data>>31)&1)==1),
|
||||
"toUpper=" + hex6(((int)(data>>32) & 0X1FFFFF)),
|
||||
"toLower=" + hex6((int)(dataL[1] & 0x1FFFFF)),
|
||||
"toTitle=" + hex6(((int)(dataL[1] >> 21) & 0x1FFFFF))
|
||||
};
|
||||
return result;
|
||||
}
|
||||
|
||||
static String[] getCharacterData(int c) {
|
||||
long[] data = new long[2];
|
||||
getPackedCharacterData(c, data);
|
||||
return unpackCharacterData(data);
|
||||
}
|
||||
|
||||
static void showDifference(int c, long[] currentData, long[] fileData) {
|
||||
System.out.println("Difference at " + hex6(c));
|
||||
String[] current = unpackCharacterData(currentData);
|
||||
String[] file = unpackCharacterData(fileData);
|
||||
for (int i=0; i<current.length; ++i) {
|
||||
if (!current[i].equals(file[i])) {
|
||||
System.out.println(escape(" current " + current[i] +
|
||||
", file " + file[i]));
|
||||
}
|
||||
}
|
||||
++differences;
|
||||
}
|
||||
|
||||
static String escape(String s) {
|
||||
StringBuffer buf = new StringBuffer();
|
||||
for (int i=0; i<s.length(); ++i) {
|
||||
char c = s.charAt(i);
|
||||
if (c >= 0x20 && c <= 0x7F) buf.append(c);
|
||||
else {
|
||||
buf.append("\\u");
|
||||
String h = "000" + Integer.toHexString(c);
|
||||
if (h.length() > 4) h = h.substring(h.length() - 4);
|
||||
buf.append(h);
|
||||
}
|
||||
}
|
||||
return buf.toString();
|
||||
}
|
||||
|
||||
static String escape(int c) {
|
||||
StringBuffer buf = new StringBuffer();
|
||||
if (c >= 0x20 && c <= 0x7F) buf.append(c);
|
||||
else {
|
||||
buf.append("\\u");
|
||||
String h = "000" + Integer.toHexString(c);
|
||||
if (h.length() > 4) h = h.substring(h.length() - 4);
|
||||
buf.append(h);
|
||||
}
|
||||
return buf.toString();
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
//eof
|
301
test/jdk/java/lang/Character/CheckBlocks.java
Normal file
301
test/jdk/java/lang/Character/CheckBlocks.java
Normal file
@ -0,0 +1,301 @@
|
||||
/*
|
||||
* Copyright (c) 2018, Oracle and/or its affiliates. All rights reserved.
|
||||
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
||||
*
|
||||
* This code is free software; you can redistribute it and/or modify it
|
||||
* under the terms of the GNU General Public License version 2 only, as
|
||||
* published by the Free Software Foundation. Oracle designates this
|
||||
* particular file as subject to the "Classpath" exception as provided
|
||||
* by Oracle in the LICENSE file that accompanied this code.
|
||||
*
|
||||
* This code is distributed in the hope that it will be useful, but WITHOUT
|
||||
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
||||
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
|
||||
* version 2 for more details (a copy is included in the LICENSE file that
|
||||
* accompanied this code).
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License version
|
||||
* 2 along with this work; if not, write to the Free Software Foundation,
|
||||
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
*
|
||||
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
|
||||
* or visit www.oracle.com if you need additional information or have any
|
||||
* questions.
|
||||
*/
|
||||
|
||||
/**
|
||||
* @test
|
||||
* @bug 4830803 4886934 6565620 6959267 7070436 7198195 8032446 8072600
|
||||
* @summary Check that the UnicodeBlock forName() method works as expected and block ranges are correct for all Unicode characters.
|
||||
* @run main CheckBlocks
|
||||
* @author John O'Conner
|
||||
*/
|
||||
|
||||
import java.io.*;
|
||||
import java.util.*;
|
||||
import java.lang.Character.UnicodeBlock;
|
||||
|
||||
|
||||
public class CheckBlocks {
|
||||
|
||||
static boolean err = false;
|
||||
static Class<?> character;
|
||||
|
||||
public static void main(String[] args) throws Exception {
|
||||
generateBlockList();
|
||||
|
||||
try {
|
||||
character = Class.forName("java.lang.Character$UnicodeBlock");
|
||||
} catch (ClassNotFoundException e) {
|
||||
throw new RuntimeException("Class.forName(\"Character\") failed.");
|
||||
}
|
||||
|
||||
for (Block blk : blocks) {
|
||||
test4830803_1(blk);
|
||||
test4830803_2();
|
||||
test4886934(blk);
|
||||
}
|
||||
|
||||
if (err) {
|
||||
throw new RuntimeException("Failed");
|
||||
} else {
|
||||
System.out.println("Passed");
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Check that the UnicodeBlock forName() method works as expected.
|
||||
*/
|
||||
private static void test4830803_1(Block blk) throws Exception {
|
||||
|
||||
/*
|
||||
* Try 3 forms of block name in the forName() method. Each form should
|
||||
* produce the same expected block.
|
||||
*/
|
||||
String blkName = blk.getName();
|
||||
|
||||
// For backward compatibility
|
||||
if (blkName.equals("COMBINING_DIACRITICAL_MARKS_FOR_SYMBOLS")) {
|
||||
blkName = "COMBINING_MARKS_FOR_SYMBOLS";
|
||||
System.out.println("*** COMBINING_DIACRITICAL_MARKS_FOR_SYMBOLS is replaced with COMBINING_MARKS_FOR_SYMBOLS for backward compatibility.");
|
||||
} else if (blkName.equals("GREEK_AND_COPTIC")) {
|
||||
blkName = "GREEK";
|
||||
System.out.println("*** GREEK_AND_COPTIC is replaced with GREEK for backward compatibility.");
|
||||
} else if (blkName.equals("CYRILLIC_SUPPLEMENT")) {
|
||||
blkName = "CYRILLIC_SUPPLEMENTARY";
|
||||
System.out.println("*** CYRILLIC_SUPPLEMENT is replaced with CYRILLIC_SUPPLEMENTARY for backward compatibility.");
|
||||
}
|
||||
|
||||
String expectedBlock = null;
|
||||
try {
|
||||
expectedBlock = character.getField(blkName).getName();
|
||||
} catch (NoSuchFieldException | SecurityException e) {
|
||||
System.err.println("Error: " + blkName + " was not found.");
|
||||
err = true;
|
||||
return;
|
||||
}
|
||||
|
||||
String canonicalBlockName = blk.getOriginalName();
|
||||
String idBlockName = expectedBlock;
|
||||
String regexBlockName = toRegExString(canonicalBlockName);
|
||||
|
||||
if (regexBlockName == null) {
|
||||
System.err.println("Error: Block name which was processed with regex was null.");
|
||||
err = true;
|
||||
return;
|
||||
}
|
||||
|
||||
if (!expectedBlock.equals(UnicodeBlock.forName(canonicalBlockName).toString())) {
|
||||
System.err.println("Error #1: UnicodeBlock.forName(\"" +
|
||||
canonicalBlockName + "\") returned wrong value.\n\tGot: " +
|
||||
UnicodeBlock.forName(canonicalBlockName) +
|
||||
"\n\tExpected: " + expectedBlock);
|
||||
err = true;
|
||||
}
|
||||
|
||||
if (!expectedBlock.equals(UnicodeBlock.forName(idBlockName).toString())) {
|
||||
System.err.println("Error #2: UnicodeBlock.forName(\"" +
|
||||
idBlockName + "\") returned wrong value.\n\tGot: " +
|
||||
UnicodeBlock.forName(idBlockName) +
|
||||
"\n\tExpected: " + expectedBlock);
|
||||
err = true;
|
||||
}
|
||||
|
||||
if (!expectedBlock.equals(UnicodeBlock.forName(regexBlockName).toString())) {
|
||||
System.err.println("Error #3: UnicodeBlock.forName(\"" +
|
||||
regexBlockName + "\") returned wrong value.\n\tGot: " +
|
||||
UnicodeBlock.forName(regexBlockName) +
|
||||
"\n\tExpected: " + expectedBlock);
|
||||
err = true;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* now try a bad block name. This should produce an IAE.
|
||||
*/
|
||||
private static void test4830803_2() {
|
||||
boolean threwExpected = false;
|
||||
|
||||
try {
|
||||
UnicodeBlock block = UnicodeBlock.forName("notdefined");
|
||||
}
|
||||
catch(IllegalArgumentException e) {
|
||||
threwExpected = true;
|
||||
}
|
||||
|
||||
if (threwExpected == false) {
|
||||
System.err.println("Error: UnicodeBlock.forName(\"notdefined\") should throw IllegalArgumentException.");
|
||||
err = true;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Convert the argument to a block name form used by the regex package.
|
||||
* That is, remove all spaces.
|
||||
*/
|
||||
private static String toRegExString(String str) {
|
||||
String[] tokens = null;
|
||||
StringBuilder retStr = new StringBuilder();
|
||||
try {
|
||||
tokens = str.split(" ");
|
||||
}
|
||||
catch(java.util.regex.PatternSyntaxException e) {
|
||||
return null;
|
||||
}
|
||||
for(int x=0; x < tokens.length; ++x) {
|
||||
retStr.append(tokens[x]);
|
||||
}
|
||||
return retStr.toString();
|
||||
}
|
||||
|
||||
private static void test4886934(Block blk) {
|
||||
String blkName = blk.getName();
|
||||
String blkOrigName = blk.getOriginalName();
|
||||
int ch = blk.getBegin();
|
||||
UnicodeBlock block = UnicodeBlock.of(ch);
|
||||
|
||||
if (block == null) {
|
||||
System.err.println("Error: The block for " + blkName +
|
||||
" is missing. Please check java.lang.Character.UnicodeBlock.");
|
||||
err = true;
|
||||
return;
|
||||
}
|
||||
|
||||
// For backward compatibility
|
||||
if (blkName.equals("COMBINING_DIACRITICAL_MARKS_FOR_SYMBOLS")) {
|
||||
blkName = "COMBINING_MARKS_FOR_SYMBOLS";
|
||||
System.out.println("*** COMBINING_DIACRITICAL_MARKS_FOR_SYMBOLS is replaced with COMBINING_MARKS_FOR_SYMBOLS for backward compatibility.");
|
||||
} else if (blkName.equals("GREEK_AND_COPTIC")) {
|
||||
blkName = "GREEK";
|
||||
System.out.println("*** GREEK_AND_COPTIC is replaced with GREEK for backward compatibility.");
|
||||
} else if (blkName.equals("CYRILLIC_SUPPLEMENT")) {
|
||||
blkName = "CYRILLIC_SUPPLEMENTARY";
|
||||
System.out.println("*** CYRILLIC_SUPPLEMENT is replaced with CYRILLIC_SUPPLEMENTARY for backward compatibility.");
|
||||
}
|
||||
|
||||
String blockName = block.toString();
|
||||
if (!blockName.equals(blkName)) {
|
||||
System.err.println("Error: Begin-of-block character(0x" +
|
||||
Integer.toHexString(ch).toUpperCase() +
|
||||
") should be in \"" + blkName + "\" block " +
|
||||
"(Block name is \"" + blkOrigName + "\")" +
|
||||
" but found in \"" + blockName + "\" block.");
|
||||
err = true;
|
||||
}
|
||||
|
||||
block = UnicodeBlock.of(++ch);
|
||||
blockName = block.toString();
|
||||
if (!blockName.equals(blkName)) {
|
||||
System.err.println("Error: Character(0x" +
|
||||
Integer.toHexString(ch).toUpperCase() +
|
||||
") should be in \"" + blkName + "\" block " +
|
||||
"(Block name is \"" + blkOrigName + "\")" +
|
||||
" but found in \"" + blockName + "\" block.");
|
||||
err = true;
|
||||
}
|
||||
|
||||
ch = blk.getEnd();
|
||||
block = UnicodeBlock.of(ch);
|
||||
blockName = block.toString();
|
||||
if (!blockName.equals(blkName)) {
|
||||
System.err.println("Error: End-of-block Character(0x" +
|
||||
Integer.toHexString(ch).toUpperCase() +
|
||||
") should be in \"" + blkName + "\" block " +
|
||||
"(Block name is \"" + blkOrigName + "\")" +
|
||||
" but found in \"" + blockName + "\" block.");
|
||||
err = true;
|
||||
}
|
||||
}
|
||||
|
||||
// List of all Unicode blocks, their start, and end codepoints.
|
||||
public static HashSet<Block> blocks = new HashSet<>();
|
||||
|
||||
private static void generateBlockList() throws Exception {
|
||||
BufferedReader f = new BufferedReader(new FileReader(new File(System.getProperty("test.src", "."), "Blocks.txt")));
|
||||
|
||||
String line;
|
||||
while ((line = f.readLine()) != null) {
|
||||
if (line.length() == 0 || line.charAt(0) == '#') {
|
||||
continue;
|
||||
}
|
||||
|
||||
int index1 = line.indexOf('.');
|
||||
int begin = Integer.parseInt(line.substring(0, index1), 16);
|
||||
int index2 = line.indexOf(';');
|
||||
int end = Integer.parseInt(line.substring(index1+2, index2), 16);
|
||||
String name = line.substring(index2+1).trim();
|
||||
|
||||
System.out.println(" Adding a Block(" +
|
||||
Integer.toHexString(begin) + ", " + Integer.toHexString(end) +
|
||||
", " + name + ")");
|
||||
blocks.add(new Block(begin, end, name));
|
||||
}
|
||||
f.close();
|
||||
}
|
||||
}
|
||||
|
||||
class Block {
|
||||
|
||||
public Block() {
|
||||
blockBegin = 0;
|
||||
blockEnd = 0;
|
||||
blockName = null;
|
||||
}
|
||||
|
||||
public Block(int begin, int end, String name) {
|
||||
blockBegin = begin;
|
||||
blockEnd = end;
|
||||
blockName = name.replaceAll("[ -]", "_").toUpperCase(Locale.ENGLISH);
|
||||
originalBlockName = name;
|
||||
}
|
||||
|
||||
public int getBegin() {
|
||||
return blockBegin;
|
||||
}
|
||||
|
||||
public int getEnd() {
|
||||
return blockEnd;
|
||||
}
|
||||
|
||||
public String getName() {
|
||||
return blockName;
|
||||
}
|
||||
|
||||
public String getOriginalName() {
|
||||
return originalBlockName;
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean equals(Object obj) {
|
||||
if (obj == null) return false;
|
||||
if (!(obj instanceof Block)) return false;
|
||||
|
||||
Block other = (Block)obj;
|
||||
return other.blockBegin == blockBegin &&
|
||||
other.blockEnd == blockEnd &&
|
||||
other.blockName.equals(blockName) &&
|
||||
other.originalBlockName.equals(originalBlockName);
|
||||
}
|
||||
int blockBegin, blockEnd;
|
||||
String blockName, originalBlockName;
|
||||
}
|
111
test/jdk/java/lang/Character/CheckUnicode.java
Normal file
111
test/jdk/java/lang/Character/CheckUnicode.java
Normal file
@ -0,0 +1,111 @@
|
||||
/*
|
||||
* Copyright (c) 2018, Oracle and/or its affiliates. All rights reserved.
|
||||
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
||||
*
|
||||
* This code is free software; you can redistribute it and/or modify it
|
||||
* under the terms of the GNU General Public License version 2 only, as
|
||||
* published by the Free Software Foundation. Oracle designates this
|
||||
* particular file as subject to the "Classpath" exception as provided
|
||||
* by Oracle in the LICENSE file that accompanied this code.
|
||||
*
|
||||
* This code is distributed in the hope that it will be useful, but WITHOUT
|
||||
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
||||
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
|
||||
* version 2 for more details (a copy is included in the LICENSE file that
|
||||
* accompanied this code).
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License version
|
||||
* 2 along with this work; if not, write to the Free Software Foundation,
|
||||
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
*
|
||||
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
|
||||
* or visit www.oracle.com if you need additional information or have any
|
||||
* questions.
|
||||
*/
|
||||
|
||||
/**
|
||||
* @test
|
||||
* @bug 4114080 6565620 6959267 7070436 7198195 8032446 8072600
|
||||
* @summary Make sure the attributes of Unicode characters, as
|
||||
* returned by the Character API, are as expected. Do this by
|
||||
* comparing them to a baseline file together with a list of
|
||||
* known diffs.
|
||||
* @build UnicodeSpec CharCheck
|
||||
* @run main CheckUnicode
|
||||
* @author Alan Liu
|
||||
* @author John O'Conner
|
||||
*/
|
||||
|
||||
import java.io.*;
|
||||
|
||||
public class CheckUnicode {
|
||||
public static void main(String args[]) throws Exception {
|
||||
|
||||
// 1. Check that the dumped property files for planes 0, 1, 2, 3, 14, 15, and 16
|
||||
// are the same as in the current Character properties.
|
||||
int[] planes = {0, 1, 2, 3, 14, 15, 16};
|
||||
String[] fileNames = {"charprop00.bin", "charprop01.bin", "charprop02.bin", "charprop03.bin",
|
||||
"charprop0E.bin", "charprop0F.bin", "charprop10.bin" };
|
||||
|
||||
// Read in the Unicode 4.0 data
|
||||
|
||||
for (int x=0; x < planes.length && x < fileNames.length; ++x) {
|
||||
File unicodeProp = new File(System.getProperty("test.src", "."), fileNames[x]);
|
||||
ObjectInputStream ois = new ObjectInputStream(new FileInputStream(unicodeProp));
|
||||
// Find differences -- should be none
|
||||
int diffs = CharCheck.load(planes[x], ois);
|
||||
if (diffs != 0) {
|
||||
throw new RuntimeException("Bug 4114080 - Unicode properties have changed " +
|
||||
"in an unexpected way");
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
// 2. Check that the current 4.0 spec file is handled by the current
|
||||
// version of Character.
|
||||
File unicodeSpec = new File(System.getProperty("test.src", "."), "UnicodeData.txt");
|
||||
for (int x=0; x<planes.length; ++x) {
|
||||
int diffs = CharCheck.check(planes[x], unicodeSpec);
|
||||
if (diffs != 0) {
|
||||
throw new RuntimeException("Bug 4114080 - Unicode properties have changed " +
|
||||
"in an unexpected way");
|
||||
}
|
||||
}
|
||||
|
||||
// 3. Check that Java identifiers are recognized correctly.
|
||||
// test a few characters that are good id starts
|
||||
char[] idStartChar = {'$', '\u20AC', 'a', 'A', 'z', 'Z', '_', '\u0E3F',
|
||||
'\u1004', '\u10A0', '\u3400', '\u4E00', '\uAC00' };
|
||||
for (int x = 0; x < idStartChar.length; x++) {
|
||||
if (Character.isJavaIdentifierStart(idStartChar[x]) != true) {
|
||||
throw new RuntimeException("Java id start characters are not recognized.");
|
||||
}
|
||||
}
|
||||
|
||||
// test a few characters that are good id parts
|
||||
char[] idPartChar = {'0', '9', '\u0000', '\u0008', '\u000E', '\u007F'};
|
||||
for (int x=0; x< idStartChar.length; x++) {
|
||||
if (Character.isJavaIdentifierPart(idStartChar[x]) != true) {
|
||||
throw new RuntimeException("Java id part characters are not recognized.");
|
||||
}
|
||||
}
|
||||
for (int x=0; x<idPartChar.length; x++) {
|
||||
if (Character.isJavaIdentifierPart(idPartChar[x]) != true) {
|
||||
throw new RuntimeException("Java id part characters are not recognized.");
|
||||
}
|
||||
}
|
||||
|
||||
// now do some negative checks
|
||||
for (int x=0; x< idPartChar.length; x++) {
|
||||
if (Character.isJavaIdentifierStart(idPartChar[x]) != false) {
|
||||
throw new RuntimeException("These Java id part characters" +
|
||||
"should not be start characters.");
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
}
|
||||
}
|
72
test/jdk/java/lang/Character/DumpCharProperties.java
Normal file
72
test/jdk/java/lang/Character/DumpCharProperties.java
Normal file
@ -0,0 +1,72 @@
|
||||
/*
|
||||
* Copyright (c) 2018, Oracle and/or its affiliates. All rights reserved.
|
||||
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
||||
*
|
||||
* This code is free software; you can redistribute it and/or modify it
|
||||
* under the terms of the GNU General Public License version 2 only, as
|
||||
* published by the Free Software Foundation. Oracle designates this
|
||||
* particular file as subject to the "Classpath" exception as provided
|
||||
* by Oracle in the LICENSE file that accompanied this code.
|
||||
*
|
||||
* This code is distributed in the hope that it will be useful, but WITHOUT
|
||||
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
||||
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
|
||||
* version 2 for more details (a copy is included in the LICENSE file that
|
||||
* accompanied this code).
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License version
|
||||
* 2 along with this work; if not, write to the Free Software Foundation,
|
||||
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
*
|
||||
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
|
||||
* or visit www.oracle.com if you need additional information or have any
|
||||
* questions.
|
||||
*/
|
||||
|
||||
/**
|
||||
*
|
||||
* @author Martin Buchholz
|
||||
*/
|
||||
|
||||
import java.util.*;
|
||||
import static java.lang.Character.*;
|
||||
|
||||
public class DumpCharProperties {
|
||||
final static Locale turkish = new Locale("tr");
|
||||
|
||||
static String charProps(int i) {
|
||||
String s = new String(new int[]{i},0,1);
|
||||
return String.format
|
||||
("%b %b %b %b %b %b %b %b %b %b %b %b %d %d %d %d %d %b %b %d %d %b %d %d",
|
||||
isLowerCase(i),
|
||||
isUpperCase(i),
|
||||
isTitleCase(i),
|
||||
isDigit(i),
|
||||
isDefined(i),
|
||||
isLetter(i),
|
||||
isLetterOrDigit(i),
|
||||
isJavaIdentifierStart(i),
|
||||
isJavaIdentifierPart(i),
|
||||
isUnicodeIdentifierStart(i),
|
||||
isUnicodeIdentifierPart(i),
|
||||
isIdentifierIgnorable(i),
|
||||
toLowerCase(i),
|
||||
toUpperCase(i),
|
||||
toTitleCase(i),
|
||||
digit(i, 16),
|
||||
getNumericValue(i),
|
||||
isSpaceChar(i),
|
||||
isWhitespace(i),
|
||||
getType(i),
|
||||
getDirectionality(i),
|
||||
isMirrored(i),
|
||||
(int) s.toUpperCase(Locale.GERMAN).charAt(0),
|
||||
(int) s.toUpperCase(turkish).charAt(0));
|
||||
}
|
||||
|
||||
public static void main(String[] args) throws Throwable {
|
||||
for (int i = 0; i < 17*0x10000; i++) {
|
||||
System.out.println(charProps(i));
|
||||
}
|
||||
}
|
||||
}
|
@ -1,10 +1,11 @@
|
||||
# PropList-8.0.0.txt
|
||||
# Date: 2015-05-16, 17:50:38 GMT [MD]
|
||||
# PropList-10.0.0.txt
|
||||
# Date: 2017-03-10, 08:25:30 GMT
|
||||
# Copyright (c) 2017 Unicode, Inc.
|
||||
# Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries.
|
||||
# For terms of use, see http://www.unicode.org/terms_of_use.html
|
||||
#
|
||||
# Unicode Character Database
|
||||
# Copyright (c) 1991-2015 Unicode, Inc.
|
||||
# For terms of use, see http://www.unicode.org/terms_of_use.html
|
||||
# For documentation, see http://www.unicode.org/reports/tr44/
|
||||
# For documentation, see http://www.unicode.org/reports/tr44/
|
||||
|
||||
# ================================================
|
||||
|
||||
@ -192,10 +193,17 @@ FF64 ; Terminal_Punctuation # Po HALFWIDTH IDEOGRAPHIC COMMA
|
||||
111DE..111DF ; Terminal_Punctuation # Po [2] SHARADA SECTION MARK-1..SHARADA SECTION MARK-2
|
||||
11238..1123C ; Terminal_Punctuation # Po [5] KHOJKI DANDA..KHOJKI DOUBLE SECTION MARK
|
||||
112A9 ; Terminal_Punctuation # Po MULTANI SECTION MARK
|
||||
1144B..1144D ; Terminal_Punctuation # Po [3] NEWA DANDA..NEWA COMMA
|
||||
1145B ; Terminal_Punctuation # Po NEWA PLACEHOLDER MARK
|
||||
115C2..115C5 ; Terminal_Punctuation # Po [4] SIDDHAM DANDA..SIDDHAM SEPARATOR BAR
|
||||
115C9..115D7 ; Terminal_Punctuation # Po [15] SIDDHAM END OF TEXT MARK..SIDDHAM SECTION MARK WITH CIRCLES AND FOUR ENCLOSURES
|
||||
11641..11642 ; Terminal_Punctuation # Po [2] MODI DANDA..MODI DOUBLE DANDA
|
||||
1173C..1173E ; Terminal_Punctuation # Po [3] AHOM SIGN SMALL SECTION..AHOM SIGN RULAI
|
||||
11A42..11A43 ; Terminal_Punctuation # Po [2] ZANABAZAR SQUARE MARK SHAD..ZANABAZAR SQUARE MARK DOUBLE SHAD
|
||||
11A9B..11A9C ; Terminal_Punctuation # Po [2] SOYOMBO MARK SHAD..SOYOMBO MARK DOUBLE SHAD
|
||||
11AA1..11AA2 ; Terminal_Punctuation # Po [2] SOYOMBO TERMINAL MARK-1..SOYOMBO TERMINAL MARK-2
|
||||
11C41..11C43 ; Terminal_Punctuation # Po [3] BHAIKSUKI DANDA..BHAIKSUKI WORD SEPARATOR
|
||||
11C71 ; Terminal_Punctuation # Po MARCHEN MARK SHAD
|
||||
12470..12474 ; Terminal_Punctuation # Po [5] CUNEIFORM PUNCTUATION SIGN OLD ASSYRIAN WORD DIVIDER..CUNEIFORM PUNCTUATION SIGN DIAGONAL QUADCOLON
|
||||
16A6E..16A6F ; Terminal_Punctuation # Po [2] MRO DANDA..MRO DOUBLE DANDA
|
||||
16AF5 ; Terminal_Punctuation # Po BASSA VAH FULL STOP
|
||||
@ -204,7 +212,7 @@ FF64 ; Terminal_Punctuation # Po HALFWIDTH IDEOGRAPHIC COMMA
|
||||
1BC9F ; Terminal_Punctuation # Po DUPLOYAN PUNCTUATION CHINOOK FULL STOP
|
||||
1DA87..1DA8A ; Terminal_Punctuation # Po [4] SIGNWRITING COMMA..SIGNWRITING COLON
|
||||
|
||||
# Total code points: 238
|
||||
# Total code points: 252
|
||||
|
||||
# ================================================
|
||||
|
||||
@ -429,6 +437,7 @@ FF41..FF46 ; Hex_Digit # L& [6] FULLWIDTH LATIN SMALL LETTER A..FULLWIDTH L
|
||||
081B..0823 ; Other_Alphabetic # Mn [9] SAMARITAN MARK EPENTHETIC YUT..SAMARITAN VOWEL SIGN A
|
||||
0825..0827 ; Other_Alphabetic # Mn [3] SAMARITAN VOWEL SIGN SHORT A..SAMARITAN VOWEL SIGN U
|
||||
0829..082C ; Other_Alphabetic # Mn [4] SAMARITAN VOWEL SIGN LONG I..SAMARITAN VOWEL SIGN SUKUN
|
||||
08D4..08DF ; Other_Alphabetic # Mn [12] ARABIC SMALL HIGH WORD AR-RUB..ARABIC SMALL HIGH WORD WAQFA
|
||||
08E3..08E9 ; Other_Alphabetic # Mn [7] ARABIC TURNED DAMMA BELOW..ARABIC CURLY KASRATAN
|
||||
08F0..0902 ; Other_Alphabetic # Mn [19] ARABIC OPEN FATHATAN..DEVANAGARI SIGN ANUSVARA
|
||||
0903 ; Other_Alphabetic # Mc DEVANAGARI SIGN VISARGA
|
||||
@ -465,6 +474,7 @@ FF41..FF46 ; Hex_Digit # L& [6] FULLWIDTH LATIN SMALL LETTER A..FULLWIDTH L
|
||||
0AC9 ; Other_Alphabetic # Mc GUJARATI VOWEL SIGN CANDRA O
|
||||
0ACB..0ACC ; Other_Alphabetic # Mc [2] GUJARATI VOWEL SIGN O..GUJARATI VOWEL SIGN AU
|
||||
0AE2..0AE3 ; Other_Alphabetic # Mn [2] GUJARATI VOWEL SIGN VOCALIC L..GUJARATI VOWEL SIGN VOCALIC LL
|
||||
0AFA..0AFC ; Other_Alphabetic # Mn [3] GUJARATI SIGN SUKUN..GUJARATI SIGN MADDAH
|
||||
0B01 ; Other_Alphabetic # Mn ORIYA SIGN CANDRABINDU
|
||||
0B02..0B03 ; Other_Alphabetic # Mc [2] ORIYA SIGN ANUSVARA..ORIYA SIGN VISARGA
|
||||
0B3E ; Other_Alphabetic # Mc ORIYA VOWEL SIGN AA
|
||||
@ -502,7 +512,7 @@ FF41..FF46 ; Hex_Digit # L& [6] FULLWIDTH LATIN SMALL LETTER A..FULLWIDTH L
|
||||
0CCC ; Other_Alphabetic # Mn KANNADA VOWEL SIGN AU
|
||||
0CD5..0CD6 ; Other_Alphabetic # Mc [2] KANNADA LENGTH MARK..KANNADA AI LENGTH MARK
|
||||
0CE2..0CE3 ; Other_Alphabetic # Mn [2] KANNADA VOWEL SIGN VOCALIC L..KANNADA VOWEL SIGN VOCALIC LL
|
||||
0D01 ; Other_Alphabetic # Mn MALAYALAM SIGN CANDRABINDU
|
||||
0D00..0D01 ; Other_Alphabetic # Mn [2] MALAYALAM SIGN COMBINING ANUSVARA ABOVE..MALAYALAM SIGN CANDRABINDU
|
||||
0D02..0D03 ; Other_Alphabetic # Mc [2] MALAYALAM SIGN ANUSVARA..MALAYALAM SIGN VISARGA
|
||||
0D3E..0D40 ; Other_Alphabetic # Mc [3] MALAYALAM VOWEL SIGN AA..MALAYALAM VOWEL SIGN II
|
||||
0D41..0D44 ; Other_Alphabetic # Mn [4] MALAYALAM VOWEL SIGN U..MALAYALAM VOWEL SIGN VOCALIC RR
|
||||
@ -556,6 +566,7 @@ FF41..FF46 ; Hex_Digit # L& [6] FULLWIDTH LATIN SMALL LETTER A..FULLWIDTH L
|
||||
17BE..17C5 ; Other_Alphabetic # Mc [8] KHMER VOWEL SIGN OE..KHMER VOWEL SIGN AU
|
||||
17C6 ; Other_Alphabetic # Mn KHMER SIGN NIKAHIT
|
||||
17C7..17C8 ; Other_Alphabetic # Mc [2] KHMER SIGN REAHMUK..KHMER SIGN YUUKALEAPINTU
|
||||
1885..1886 ; Other_Alphabetic # Mn [2] MONGOLIAN LETTER ALI GALI BALUDA..MONGOLIAN LETTER ALI GALI THREE BALUDA
|
||||
18A9 ; Other_Alphabetic # Mn MONGOLIAN LETTER ALI GALI DAGALGA
|
||||
1920..1922 ; Other_Alphabetic # Mn [3] LIMBU VOWEL SIGN A..LIMBU VOWEL SIGN U
|
||||
1923..1926 ; Other_Alphabetic # Mc [4] LIMBU VOWEL SIGN EE..LIMBU VOWEL SIGN AU
|
||||
@ -613,6 +624,7 @@ A825..A826 ; Other_Alphabetic # Mn [2] SYLOTI NAGRI VOWEL SIGN U..SYLOTI NA
|
||||
A827 ; Other_Alphabetic # Mc SYLOTI NAGRI VOWEL SIGN OO
|
||||
A880..A881 ; Other_Alphabetic # Mc [2] SAURASHTRA SIGN ANUSVARA..SAURASHTRA SIGN VISARGA
|
||||
A8B4..A8C3 ; Other_Alphabetic # Mc [16] SAURASHTRA CONSONANT SIGN HAARU..SAURASHTRA VOWEL SIGN AU
|
||||
A8C5 ; Other_Alphabetic # Mn SAURASHTRA SIGN CANDRABINDU
|
||||
A926..A92A ; Other_Alphabetic # Mn [5] KAYAH LI VOWEL UE..KAYAH LI VOWEL O
|
||||
A947..A951 ; Other_Alphabetic # Mn [11] REJANG VOWEL SIGN I..REJANG CONSONANT SIGN R
|
||||
A952 ; Other_Alphabetic # Mc REJANG CONSONANT SIGN H
|
||||
@ -671,6 +683,7 @@ FB1E ; Other_Alphabetic # Mn HEBREW POINT JUDEO-SPANISH VARIKA
|
||||
11232..11233 ; Other_Alphabetic # Mc [2] KHOJKI VOWEL SIGN O..KHOJKI VOWEL SIGN AU
|
||||
11234 ; Other_Alphabetic # Mn KHOJKI SIGN ANUSVARA
|
||||
11237 ; Other_Alphabetic # Mn KHOJKI SIGN SHADDA
|
||||
1123E ; Other_Alphabetic # Mn KHOJKI SIGN SUKUN
|
||||
112DF ; Other_Alphabetic # Mn KHUDAWADI SIGN ANUSVARA
|
||||
112E0..112E2 ; Other_Alphabetic # Mc [3] KHUDAWADI VOWEL SIGN AA..KHUDAWADI VOWEL SIGN II
|
||||
112E3..112E8 ; Other_Alphabetic # Mn [6] KHUDAWADI VOWEL SIGN U..KHUDAWADI VOWEL SIGN AU
|
||||
@ -683,6 +696,11 @@ FB1E ; Other_Alphabetic # Mn HEBREW POINT JUDEO-SPANISH VARIKA
|
||||
1134B..1134C ; Other_Alphabetic # Mc [2] GRANTHA VOWEL SIGN OO..GRANTHA VOWEL SIGN AU
|
||||
11357 ; Other_Alphabetic # Mc GRANTHA AU LENGTH MARK
|
||||
11362..11363 ; Other_Alphabetic # Mc [2] GRANTHA VOWEL SIGN VOCALIC L..GRANTHA VOWEL SIGN VOCALIC LL
|
||||
11435..11437 ; Other_Alphabetic # Mc [3] NEWA VOWEL SIGN AA..NEWA VOWEL SIGN II
|
||||
11438..1143F ; Other_Alphabetic # Mn [8] NEWA VOWEL SIGN U..NEWA VOWEL SIGN AI
|
||||
11440..11441 ; Other_Alphabetic # Mc [2] NEWA VOWEL SIGN O..NEWA VOWEL SIGN AU
|
||||
11443..11444 ; Other_Alphabetic # Mn [2] NEWA SIGN CANDRABINDU..NEWA SIGN ANUSVARA
|
||||
11445 ; Other_Alphabetic # Mc NEWA SIGN VISARGA
|
||||
114B0..114B2 ; Other_Alphabetic # Mc [3] TIRHUTA VOWEL SIGN AA..TIRHUTA VOWEL SIGN II
|
||||
114B3..114B8 ; Other_Alphabetic # Mn [6] TIRHUTA VOWEL SIGN U..TIRHUTA VOWEL SIGN VOCALIC LL
|
||||
114B9 ; Other_Alphabetic # Mc TIRHUTA VOWEL SIGN E
|
||||
@ -712,14 +730,48 @@ FB1E ; Other_Alphabetic # Mn HEBREW POINT JUDEO-SPANISH VARIKA
|
||||
11722..11725 ; Other_Alphabetic # Mn [4] AHOM VOWEL SIGN I..AHOM VOWEL SIGN UU
|
||||
11726 ; Other_Alphabetic # Mc AHOM VOWEL SIGN E
|
||||
11727..1172A ; Other_Alphabetic # Mn [4] AHOM VOWEL SIGN AW..AHOM VOWEL SIGN AM
|
||||
11A01..11A06 ; Other_Alphabetic # Mn [6] ZANABAZAR SQUARE VOWEL SIGN I..ZANABAZAR SQUARE VOWEL SIGN O
|
||||
11A07..11A08 ; Other_Alphabetic # Mc [2] ZANABAZAR SQUARE VOWEL SIGN AI..ZANABAZAR SQUARE VOWEL SIGN AU
|
||||
11A09..11A0A ; Other_Alphabetic # Mn [2] ZANABAZAR SQUARE VOWEL SIGN REVERSED I..ZANABAZAR SQUARE VOWEL LENGTH MARK
|
||||
11A35..11A38 ; Other_Alphabetic # Mn [4] ZANABAZAR SQUARE SIGN CANDRABINDU..ZANABAZAR SQUARE SIGN ANUSVARA
|
||||
11A39 ; Other_Alphabetic # Mc ZANABAZAR SQUARE SIGN VISARGA
|
||||
11A3B..11A3E ; Other_Alphabetic # Mn [4] ZANABAZAR SQUARE CLUSTER-FINAL LETTER YA..ZANABAZAR SQUARE CLUSTER-FINAL LETTER VA
|
||||
11A51..11A56 ; Other_Alphabetic # Mn [6] SOYOMBO VOWEL SIGN I..SOYOMBO VOWEL SIGN OE
|
||||
11A57..11A58 ; Other_Alphabetic # Mc [2] SOYOMBO VOWEL SIGN AI..SOYOMBO VOWEL SIGN AU
|
||||
11A59..11A5B ; Other_Alphabetic # Mn [3] SOYOMBO VOWEL SIGN VOCALIC R..SOYOMBO VOWEL LENGTH MARK
|
||||
11A8A..11A96 ; Other_Alphabetic # Mn [13] SOYOMBO FINAL CONSONANT SIGN G..SOYOMBO SIGN ANUSVARA
|
||||
11A97 ; Other_Alphabetic # Mc SOYOMBO SIGN VISARGA
|
||||
11C2F ; Other_Alphabetic # Mc BHAIKSUKI VOWEL SIGN AA
|
||||
11C30..11C36 ; Other_Alphabetic # Mn [7] BHAIKSUKI VOWEL SIGN I..BHAIKSUKI VOWEL SIGN VOCALIC L
|
||||
11C38..11C3D ; Other_Alphabetic # Mn [6] BHAIKSUKI VOWEL SIGN E..BHAIKSUKI SIGN ANUSVARA
|
||||
11C3E ; Other_Alphabetic # Mc BHAIKSUKI SIGN VISARGA
|
||||
11C92..11CA7 ; Other_Alphabetic # Mn [22] MARCHEN SUBJOINED LETTER KA..MARCHEN SUBJOINED LETTER ZA
|
||||
11CA9 ; Other_Alphabetic # Mc MARCHEN SUBJOINED LETTER YA
|
||||
11CAA..11CB0 ; Other_Alphabetic # Mn [7] MARCHEN SUBJOINED LETTER RA..MARCHEN VOWEL SIGN AA
|
||||
11CB1 ; Other_Alphabetic # Mc MARCHEN VOWEL SIGN I
|
||||
11CB2..11CB3 ; Other_Alphabetic # Mn [2] MARCHEN VOWEL SIGN U..MARCHEN VOWEL SIGN E
|
||||
11CB4 ; Other_Alphabetic # Mc MARCHEN VOWEL SIGN O
|
||||
11CB5..11CB6 ; Other_Alphabetic # Mn [2] MARCHEN SIGN ANUSVARA..MARCHEN SIGN CANDRABINDU
|
||||
11D31..11D36 ; Other_Alphabetic # Mn [6] MASARAM GONDI VOWEL SIGN AA..MASARAM GONDI VOWEL SIGN VOCALIC R
|
||||
11D3A ; Other_Alphabetic # Mn MASARAM GONDI VOWEL SIGN E
|
||||
11D3C..11D3D ; Other_Alphabetic # Mn [2] MASARAM GONDI VOWEL SIGN AI..MASARAM GONDI VOWEL SIGN O
|
||||
11D3F..11D41 ; Other_Alphabetic # Mn [3] MASARAM GONDI VOWEL SIGN AU..MASARAM GONDI SIGN VISARGA
|
||||
11D43 ; Other_Alphabetic # Mn MASARAM GONDI SIGN CANDRA
|
||||
11D47 ; Other_Alphabetic # Mn MASARAM GONDI RA-KARA
|
||||
16B30..16B36 ; Other_Alphabetic # Mn [7] PAHAWH HMONG MARK CIM TUB..PAHAWH HMONG MARK CIM TAUM
|
||||
16F51..16F7E ; Other_Alphabetic # Mc [46] MIAO SIGN ASPIRATION..MIAO VOWEL SIGN NG
|
||||
1BC9E ; Other_Alphabetic # Mn DUPLOYAN DOUBLE MARK
|
||||
1E000..1E006 ; Other_Alphabetic # Mn [7] COMBINING GLAGOLITIC LETTER AZU..COMBINING GLAGOLITIC LETTER ZHIVETE
|
||||
1E008..1E018 ; Other_Alphabetic # Mn [17] COMBINING GLAGOLITIC LETTER ZEMLJA..COMBINING GLAGOLITIC LETTER HERU
|
||||
1E01B..1E021 ; Other_Alphabetic # Mn [7] COMBINING GLAGOLITIC LETTER SHTA..COMBINING GLAGOLITIC LETTER YATI
|
||||
1E023..1E024 ; Other_Alphabetic # Mn [2] COMBINING GLAGOLITIC LETTER YU..COMBINING GLAGOLITIC LETTER SMALL YUS
|
||||
1E026..1E02A ; Other_Alphabetic # Mn [5] COMBINING GLAGOLITIC LETTER YO..COMBINING GLAGOLITIC LETTER FITA
|
||||
1E947 ; Other_Alphabetic # Mn ADLAM HAMZA
|
||||
1F130..1F149 ; Other_Alphabetic # So [26] SQUARED LATIN CAPITAL LETTER A..SQUARED LATIN CAPITAL LETTER Z
|
||||
1F150..1F169 ; Other_Alphabetic # So [26] NEGATIVE CIRCLED LATIN CAPITAL LETTER A..NEGATIVE CIRCLED LATIN CAPITAL LETTER Z
|
||||
1F170..1F189 ; Other_Alphabetic # So [26] NEGATIVE SQUARED LATIN CAPITAL LETTER A..NEGATIVE SQUARED LATIN CAPITAL LETTER Z
|
||||
|
||||
# Total code points: 1116
|
||||
# Total code points: 1300
|
||||
|
||||
# ================================================
|
||||
|
||||
@ -728,16 +780,20 @@ FB1E ; Other_Alphabetic # Mn HEBREW POINT JUDEO-SPANISH VARIKA
|
||||
3021..3029 ; Ideographic # Nl [9] HANGZHOU NUMERAL ONE..HANGZHOU NUMERAL NINE
|
||||
3038..303A ; Ideographic # Nl [3] HANGZHOU NUMERAL TEN..HANGZHOU NUMERAL THIRTY
|
||||
3400..4DB5 ; Ideographic # Lo [6582] CJK UNIFIED IDEOGRAPH-3400..CJK UNIFIED IDEOGRAPH-4DB5
|
||||
4E00..9FD5 ; Ideographic # Lo [20950] CJK UNIFIED IDEOGRAPH-4E00..CJK UNIFIED IDEOGRAPH-9FD5
|
||||
4E00..9FEA ; Ideographic # Lo [20971] CJK UNIFIED IDEOGRAPH-4E00..CJK UNIFIED IDEOGRAPH-9FEA
|
||||
F900..FA6D ; Ideographic # Lo [366] CJK COMPATIBILITY IDEOGRAPH-F900..CJK COMPATIBILITY IDEOGRAPH-FA6D
|
||||
FA70..FAD9 ; Ideographic # Lo [106] CJK COMPATIBILITY IDEOGRAPH-FA70..CJK COMPATIBILITY IDEOGRAPH-FAD9
|
||||
17000..187EC ; Ideographic # Lo [6125] TANGUT IDEOGRAPH-17000..TANGUT IDEOGRAPH-187EC
|
||||
18800..18AF2 ; Ideographic # Lo [755] TANGUT COMPONENT-001..TANGUT COMPONENT-755
|
||||
1B170..1B2FB ; Ideographic # Lo [396] NUSHU CHARACTER-1B170..NUSHU CHARACTER-1B2FB
|
||||
20000..2A6D6 ; Ideographic # Lo [42711] CJK UNIFIED IDEOGRAPH-20000..CJK UNIFIED IDEOGRAPH-2A6D6
|
||||
2A700..2B734 ; Ideographic # Lo [4149] CJK UNIFIED IDEOGRAPH-2A700..CJK UNIFIED IDEOGRAPH-2B734
|
||||
2B740..2B81D ; Ideographic # Lo [222] CJK UNIFIED IDEOGRAPH-2B740..CJK UNIFIED IDEOGRAPH-2B81D
|
||||
2B820..2CEA1 ; Ideographic # Lo [5762] CJK UNIFIED IDEOGRAPH-2B820..CJK UNIFIED IDEOGRAPH-2CEA1
|
||||
2CEB0..2EBE0 ; Ideographic # Lo [7473] CJK UNIFIED IDEOGRAPH-2CEB0..CJK UNIFIED IDEOGRAPH-2EBE0
|
||||
2F800..2FA1D ; Ideographic # Lo [542] CJK COMPATIBILITY IDEOGRAPH-2F800..CJK COMPATIBILITY IDEOGRAPH-2FA1D
|
||||
|
||||
# Total code points: 81404
|
||||
# Total code points: 96174
|
||||
|
||||
# ================================================
|
||||
|
||||
@ -793,12 +849,14 @@ FA70..FAD9 ; Ideographic # Lo [106] CJK COMPATIBILITY IDEOGRAPH-FA70..CJK COM
|
||||
0A4D ; Diacritic # Mn GURMUKHI SIGN VIRAMA
|
||||
0ABC ; Diacritic # Mn GUJARATI SIGN NUKTA
|
||||
0ACD ; Diacritic # Mn GUJARATI SIGN VIRAMA
|
||||
0AFD..0AFF ; Diacritic # Mn [3] GUJARATI SIGN THREE-DOT NUKTA ABOVE..GUJARATI SIGN TWO-CIRCLE NUKTA ABOVE
|
||||
0B3C ; Diacritic # Mn ORIYA SIGN NUKTA
|
||||
0B4D ; Diacritic # Mn ORIYA SIGN VIRAMA
|
||||
0BCD ; Diacritic # Mn TAMIL SIGN VIRAMA
|
||||
0C4D ; Diacritic # Mn TELUGU SIGN VIRAMA
|
||||
0CBC ; Diacritic # Mn KANNADA SIGN NUKTA
|
||||
0CCD ; Diacritic # Mn KANNADA SIGN VIRAMA
|
||||
0D3B..0D3C ; Diacritic # Mn [2] MALAYALAM SIGN VERTICAL BAR VIRAMA..MALAYALAM SIGN CIRCULAR VIRAMA
|
||||
0D4D ; Diacritic # Mn MALAYALAM SIGN VIRAMA
|
||||
0DCA ; Diacritic # Mn SINHALA SIGN AL-LAKUNA
|
||||
0E47..0E4C ; Diacritic # Mn [6] THAI CHARACTER MAITAIKHU..THAI CHARACTER THANTHAKHAT
|
||||
@ -838,10 +896,11 @@ FA70..FAD9 ; Ideographic # Lo [106] CJK COMPATIBILITY IDEOGRAPH-FA70..CJK COM
|
||||
1CE2..1CE8 ; Diacritic # Mn [7] VEDIC SIGN VISARGA SVARITA..VEDIC SIGN VISARGA ANUDATTA WITH TAIL
|
||||
1CED ; Diacritic # Mn VEDIC SIGN TIRYAK
|
||||
1CF4 ; Diacritic # Mn VEDIC TONE CANDRA ABOVE
|
||||
1CF7 ; Diacritic # Mc VEDIC SIGN ATIKRAMA
|
||||
1CF8..1CF9 ; Diacritic # Mn [2] VEDIC TONE RING ABOVE..VEDIC TONE DOUBLE RING ABOVE
|
||||
1D2C..1D6A ; Diacritic # Lm [63] MODIFIER LETTER CAPITAL A..GREEK SUBSCRIPT SMALL LETTER CHI
|
||||
1DC4..1DCF ; Diacritic # Mn [12] COMBINING MACRON-ACUTE..COMBINING ZIGZAG BELOW
|
||||
1DF5 ; Diacritic # Mn COMBINING UP TACK ABOVE
|
||||
1DF5..1DF9 ; Diacritic # Mn [5] COMBINING UP TACK ABOVE..COMBINING WIDE INVERTED BRIDGE BELOW
|
||||
1DFD..1DFF ; Diacritic # Mn [3] COMBINING ALMOST EQUAL TO BELOW..COMBINING RIGHT ARROWHEAD AND DOWN ARROWHEAD BELOW
|
||||
1FBD ; Diacritic # Sk GREEK KORONIS
|
||||
1FBF..1FC1 ; Diacritic # Sk [3] GREEK PSILI..GREEK DIALYTIKA AND PERISPOMENI
|
||||
@ -906,12 +965,20 @@ FFE3 ; Diacritic # Sk FULLWIDTH MACRON
|
||||
1134D ; Diacritic # Mc GRANTHA SIGN VIRAMA
|
||||
11366..1136C ; Diacritic # Mn [7] COMBINING GRANTHA DIGIT ZERO..COMBINING GRANTHA DIGIT SIX
|
||||
11370..11374 ; Diacritic # Mn [5] COMBINING GRANTHA LETTER A..COMBINING GRANTHA LETTER PA
|
||||
11442 ; Diacritic # Mn NEWA SIGN VIRAMA
|
||||
11446 ; Diacritic # Mn NEWA SIGN NUKTA
|
||||
114C2..114C3 ; Diacritic # Mn [2] TIRHUTA SIGN VIRAMA..TIRHUTA SIGN NUKTA
|
||||
115BF..115C0 ; Diacritic # Mn [2] SIDDHAM SIGN VIRAMA..SIDDHAM SIGN NUKTA
|
||||
1163F ; Diacritic # Mn MODI SIGN VIRAMA
|
||||
116B6 ; Diacritic # Mc TAKRI SIGN VIRAMA
|
||||
116B7 ; Diacritic # Mn TAKRI SIGN NUKTA
|
||||
1172B ; Diacritic # Mn AHOM SIGN KILLER
|
||||
11A34 ; Diacritic # Mn ZANABAZAR SQUARE SIGN VIRAMA
|
||||
11A47 ; Diacritic # Mn ZANABAZAR SQUARE SUBJOINER
|
||||
11A99 ; Diacritic # Mn SOYOMBO SUBJOINER
|
||||
11C3F ; Diacritic # Mn BHAIKSUKI SIGN VIRAMA
|
||||
11D42 ; Diacritic # Mn MASARAM GONDI SIGN NUKTA
|
||||
11D44..11D45 ; Diacritic # Mn [2] MASARAM GONDI SIGN HALANTA..MASARAM GONDI VIRAMA
|
||||
16AF0..16AF4 ; Diacritic # Mn [5] BASSA VAH COMBINING HIGH TONE..BASSA VAH COMBINING HIGH-LOW TONE
|
||||
16F8F..16F92 ; Diacritic # Mn [4] MIAO TONE RIGHT..MIAO TONE BELOW
|
||||
16F93..16F9F ; Diacritic # Lm [13] MIAO LETTER TONE-2..MIAO LETTER REFORMED TONE-8
|
||||
@ -921,8 +988,10 @@ FFE3 ; Diacritic # Sk FULLWIDTH MACRON
|
||||
1D185..1D18B ; Diacritic # Mn [7] MUSICAL SYMBOL COMBINING DOIT..MUSICAL SYMBOL COMBINING TRIPLE TONGUE
|
||||
1D1AA..1D1AD ; Diacritic # Mn [4] MUSICAL SYMBOL COMBINING DOWN BOW..MUSICAL SYMBOL COMBINING SNAP PIZZICATO
|
||||
1E8D0..1E8D6 ; Diacritic # Mn [7] MENDE KIKAKUI COMBINING NUMBER TEENS..MENDE KIKAKUI COMBINING NUMBER MILLIONS
|
||||
1E944..1E946 ; Diacritic # Mn [3] ADLAM ALIF LENGTHENER..ADLAM GEMINATION MARK
|
||||
1E948..1E94A ; Diacritic # Mn [3] ADLAM CONSONANT MODIFIER..ADLAM NUKTA
|
||||
|
||||
# Total code points: 773
|
||||
# Total code points: 798
|
||||
|
||||
# ================================================
|
||||
|
||||
@ -951,9 +1020,12 @@ AAF3..AAF4 ; Extender # Lm [2] MEETEI MAYEK SYLLABLE REPETITION MARK..MEETE
|
||||
FF70 ; Extender # Lm HALFWIDTH KATAKANA-HIRAGANA PROLONGED SOUND MARK
|
||||
1135D ; Extender # Lo GRANTHA SIGN PLUTA
|
||||
115C6..115C8 ; Extender # Po [3] SIDDHAM REPETITION MARK-1..SIDDHAM REPETITION MARK-3
|
||||
11A98 ; Extender # Mn SOYOMBO GEMINATION MARK
|
||||
16B42..16B43 ; Extender # Lm [2] PAHAWH HMONG SIGN VOS NRUA..PAHAWH HMONG SIGN IB YAM
|
||||
16FE0..16FE1 ; Extender # Lm [2] TANGUT ITERATION MARK..NUSHU ITERATION MARK
|
||||
1E944..1E946 ; Extender # Mn [3] ADLAM ALIF LENGTHENER..ADLAM GEMINATION MARK
|
||||
|
||||
# Total code points: 38
|
||||
# Total code points: 44
|
||||
|
||||
# ================================================
|
||||
|
||||
@ -1027,7 +1099,7 @@ FFFFE..FFFFF ; Noncharacter_Code_Point # Cn [2] <noncharacter-FFFFE>..<noncha
|
||||
0D57 ; Other_Grapheme_Extend # Mc MALAYALAM AU LENGTH MARK
|
||||
0DCF ; Other_Grapheme_Extend # Mc SINHALA VOWEL SIGN AELA-PILLA
|
||||
0DDF ; Other_Grapheme_Extend # Mc SINHALA VOWEL SIGN GAYANUKITTA
|
||||
200C..200D ; Other_Grapheme_Extend # Cf [2] ZERO WIDTH NON-JOINER..ZERO WIDTH JOINER
|
||||
200C ; Other_Grapheme_Extend # Cf ZERO WIDTH NON-JOINER
|
||||
302E..302F ; Other_Grapheme_Extend # Mc [2] HANGUL SINGLE DOT TONE MARK..HANGUL DOUBLE DOT TONE MARK
|
||||
FF9E..FF9F ; Other_Grapheme_Extend # Lm [2] HALFWIDTH KATAKANA VOICED SOUND MARK..HALFWIDTH KATAKANA SEMI-VOICED SOUND MARK
|
||||
1133E ; Other_Grapheme_Extend # Mc GRANTHA VOWEL SIGN AA
|
||||
@ -1037,8 +1109,9 @@ FF9E..FF9F ; Other_Grapheme_Extend # Lm [2] HALFWIDTH KATAKANA VOICED SOUND
|
||||
115AF ; Other_Grapheme_Extend # Mc SIDDHAM VOWEL SIGN AA
|
||||
1D165 ; Other_Grapheme_Extend # Mc MUSICAL SYMBOL COMBINING STEM
|
||||
1D16E..1D172 ; Other_Grapheme_Extend # Mc [5] MUSICAL SYMBOL COMBINING FLAG-1..MUSICAL SYMBOL COMBINING FLAG-5
|
||||
E0020..E007F ; Other_Grapheme_Extend # Cf [96] TAG SPACE..CANCEL TAG
|
||||
|
||||
# Total code points: 30
|
||||
# Total code points: 125
|
||||
|
||||
# ================================================
|
||||
|
||||
@ -1064,7 +1137,7 @@ FF9E..FF9F ; Other_Grapheme_Extend # Lm [2] HALFWIDTH KATAKANA VOICED SOUND
|
||||
# ================================================
|
||||
|
||||
3400..4DB5 ; Unified_Ideograph # Lo [6582] CJK UNIFIED IDEOGRAPH-3400..CJK UNIFIED IDEOGRAPH-4DB5
|
||||
4E00..9FD5 ; Unified_Ideograph # Lo [20950] CJK UNIFIED IDEOGRAPH-4E00..CJK UNIFIED IDEOGRAPH-9FD5
|
||||
4E00..9FEA ; Unified_Ideograph # Lo [20971] CJK UNIFIED IDEOGRAPH-4E00..CJK UNIFIED IDEOGRAPH-9FEA
|
||||
FA0E..FA0F ; Unified_Ideograph # Lo [2] CJK COMPATIBILITY IDEOGRAPH-FA0E..CJK COMPATIBILITY IDEOGRAPH-FA0F
|
||||
FA11 ; Unified_Ideograph # Lo CJK COMPATIBILITY IDEOGRAPH-FA11
|
||||
FA13..FA14 ; Unified_Ideograph # Lo [2] CJK COMPATIBILITY IDEOGRAPH-FA13..CJK COMPATIBILITY IDEOGRAPH-FA14
|
||||
@ -1076,8 +1149,9 @@ FA27..FA29 ; Unified_Ideograph # Lo [3] CJK COMPATIBILITY IDEOGRAPH-FA27..C
|
||||
2A700..2B734 ; Unified_Ideograph # Lo [4149] CJK UNIFIED IDEOGRAPH-2A700..CJK UNIFIED IDEOGRAPH-2B734
|
||||
2B740..2B81D ; Unified_Ideograph # Lo [222] CJK UNIFIED IDEOGRAPH-2B740..CJK UNIFIED IDEOGRAPH-2B81D
|
||||
2B820..2CEA1 ; Unified_Ideograph # Lo [5762] CJK UNIFIED IDEOGRAPH-2B820..CJK UNIFIED IDEOGRAPH-2CEA1
|
||||
2CEB0..2EBE0 ; Unified_Ideograph # Lo [7473] CJK UNIFIED IDEOGRAPH-2CEB0..CJK UNIFIED IDEOGRAPH-2EBE0
|
||||
|
||||
# Total code points: 80388
|
||||
# Total code points: 87882
|
||||
|
||||
# ================================================
|
||||
|
||||
@ -1106,9 +1180,8 @@ E01F0..E0FFF ; Other_Default_Ignorable_Code_Point # Cn [3600] <reserved-E01F0>.
|
||||
2329 ; Deprecated # Ps LEFT-POINTING ANGLE BRACKET
|
||||
232A ; Deprecated # Pe RIGHT-POINTING ANGLE BRACKET
|
||||
E0001 ; Deprecated # Cf LANGUAGE TAG
|
||||
E007F ; Deprecated # Cf CANCEL TAG
|
||||
|
||||
# Total code points: 16
|
||||
# Total code points: 15
|
||||
|
||||
# ================================================
|
||||
|
||||
@ -1160,11 +1233,12 @@ AABB..AABC ; Logical_Order_Exception # Lo [2] TAI VIET VOWEL AUE..TAI VIET
|
||||
|
||||
# ================================================
|
||||
|
||||
1885..1886 ; Other_ID_Start # Mn [2] MONGOLIAN LETTER ALI GALI BALUDA..MONGOLIAN LETTER ALI GALI THREE BALUDA
|
||||
2118 ; Other_ID_Start # Sm SCRIPT CAPITAL P
|
||||
212E ; Other_ID_Start # So ESTIMATED SYMBOL
|
||||
309B..309C ; Other_ID_Start # Sk [2] KATAKANA-HIRAGANA VOICED SOUND MARK..KATAKANA-HIRAGANA SEMI-VOICED SOUND MARK
|
||||
|
||||
# Total code points: 4
|
||||
# Total code points: 6
|
||||
|
||||
# ================================================
|
||||
|
||||
@ -1177,72 +1251,76 @@ AABB..AABC ; Logical_Order_Exception # Lo [2] TAI VIET VOWEL AUE..TAI VIET
|
||||
|
||||
# ================================================
|
||||
|
||||
0021 ; STerm # Po EXCLAMATION MARK
|
||||
002E ; STerm # Po FULL STOP
|
||||
003F ; STerm # Po QUESTION MARK
|
||||
0589 ; STerm # Po ARMENIAN FULL STOP
|
||||
061F ; STerm # Po ARABIC QUESTION MARK
|
||||
06D4 ; STerm # Po ARABIC FULL STOP
|
||||
0700..0702 ; STerm # Po [3] SYRIAC END OF PARAGRAPH..SYRIAC SUBLINEAR FULL STOP
|
||||
07F9 ; STerm # Po NKO EXCLAMATION MARK
|
||||
0964..0965 ; STerm # Po [2] DEVANAGARI DANDA..DEVANAGARI DOUBLE DANDA
|
||||
104A..104B ; STerm # Po [2] MYANMAR SIGN LITTLE SECTION..MYANMAR SIGN SECTION
|
||||
1362 ; STerm # Po ETHIOPIC FULL STOP
|
||||
1367..1368 ; STerm # Po [2] ETHIOPIC QUESTION MARK..ETHIOPIC PARAGRAPH SEPARATOR
|
||||
166E ; STerm # Po CANADIAN SYLLABICS FULL STOP
|
||||
1735..1736 ; STerm # Po [2] PHILIPPINE SINGLE PUNCTUATION..PHILIPPINE DOUBLE PUNCTUATION
|
||||
1803 ; STerm # Po MONGOLIAN FULL STOP
|
||||
1809 ; STerm # Po MONGOLIAN MANCHU FULL STOP
|
||||
1944..1945 ; STerm # Po [2] LIMBU EXCLAMATION MARK..LIMBU QUESTION MARK
|
||||
1AA8..1AAB ; STerm # Po [4] TAI THAM SIGN KAAN..TAI THAM SIGN SATKAANKUU
|
||||
1B5A..1B5B ; STerm # Po [2] BALINESE PANTI..BALINESE PAMADA
|
||||
1B5E..1B5F ; STerm # Po [2] BALINESE CARIK SIKI..BALINESE CARIK PAREREN
|
||||
1C3B..1C3C ; STerm # Po [2] LEPCHA PUNCTUATION TA-ROL..LEPCHA PUNCTUATION NYET THYOOM TA-ROL
|
||||
1C7E..1C7F ; STerm # Po [2] OL CHIKI PUNCTUATION MUCAAD..OL CHIKI PUNCTUATION DOUBLE MUCAAD
|
||||
203C..203D ; STerm # Po [2] DOUBLE EXCLAMATION MARK..INTERROBANG
|
||||
2047..2049 ; STerm # Po [3] DOUBLE QUESTION MARK..EXCLAMATION QUESTION MARK
|
||||
2E2E ; STerm # Po REVERSED QUESTION MARK
|
||||
2E3C ; STerm # Po STENOGRAPHIC FULL STOP
|
||||
3002 ; STerm # Po IDEOGRAPHIC FULL STOP
|
||||
A4FF ; STerm # Po LISU PUNCTUATION FULL STOP
|
||||
A60E..A60F ; STerm # Po [2] VAI FULL STOP..VAI QUESTION MARK
|
||||
A6F3 ; STerm # Po BAMUM FULL STOP
|
||||
A6F7 ; STerm # Po BAMUM QUESTION MARK
|
||||
A876..A877 ; STerm # Po [2] PHAGS-PA MARK SHAD..PHAGS-PA MARK DOUBLE SHAD
|
||||
A8CE..A8CF ; STerm # Po [2] SAURASHTRA DANDA..SAURASHTRA DOUBLE DANDA
|
||||
A92F ; STerm # Po KAYAH LI SIGN SHYA
|
||||
A9C8..A9C9 ; STerm # Po [2] JAVANESE PADA LINGSA..JAVANESE PADA LUNGSI
|
||||
AA5D..AA5F ; STerm # Po [3] CHAM PUNCTUATION DANDA..CHAM PUNCTUATION TRIPLE DANDA
|
||||
AAF0..AAF1 ; STerm # Po [2] MEETEI MAYEK CHEIKHAN..MEETEI MAYEK AHANG KHUDAM
|
||||
ABEB ; STerm # Po MEETEI MAYEK CHEIKHEI
|
||||
FE52 ; STerm # Po SMALL FULL STOP
|
||||
FE56..FE57 ; STerm # Po [2] SMALL QUESTION MARK..SMALL EXCLAMATION MARK
|
||||
FF01 ; STerm # Po FULLWIDTH EXCLAMATION MARK
|
||||
FF0E ; STerm # Po FULLWIDTH FULL STOP
|
||||
FF1F ; STerm # Po FULLWIDTH QUESTION MARK
|
||||
FF61 ; STerm # Po HALFWIDTH IDEOGRAPHIC FULL STOP
|
||||
10A56..10A57 ; STerm # Po [2] KHAROSHTHI PUNCTUATION DANDA..KHAROSHTHI PUNCTUATION DOUBLE DANDA
|
||||
11047..11048 ; STerm # Po [2] BRAHMI DANDA..BRAHMI DOUBLE DANDA
|
||||
110BE..110C1 ; STerm # Po [4] KAITHI SECTION MARK..KAITHI DOUBLE DANDA
|
||||
11141..11143 ; STerm # Po [3] CHAKMA DANDA..CHAKMA QUESTION MARK
|
||||
111C5..111C6 ; STerm # Po [2] SHARADA DANDA..SHARADA DOUBLE DANDA
|
||||
111CD ; STerm # Po SHARADA SUTRA MARK
|
||||
111DE..111DF ; STerm # Po [2] SHARADA SECTION MARK-1..SHARADA SECTION MARK-2
|
||||
11238..11239 ; STerm # Po [2] KHOJKI DANDA..KHOJKI DOUBLE DANDA
|
||||
1123B..1123C ; STerm # Po [2] KHOJKI SECTION MARK..KHOJKI DOUBLE SECTION MARK
|
||||
112A9 ; STerm # Po MULTANI SECTION MARK
|
||||
115C2..115C3 ; STerm # Po [2] SIDDHAM DANDA..SIDDHAM DOUBLE DANDA
|
||||
115C9..115D7 ; STerm # Po [15] SIDDHAM END OF TEXT MARK..SIDDHAM SECTION MARK WITH CIRCLES AND FOUR ENCLOSURES
|
||||
11641..11642 ; STerm # Po [2] MODI DANDA..MODI DOUBLE DANDA
|
||||
1173C..1173E ; STerm # Po [3] AHOM SIGN SMALL SECTION..AHOM SIGN RULAI
|
||||
16A6E..16A6F ; STerm # Po [2] MRO DANDA..MRO DOUBLE DANDA
|
||||
16AF5 ; STerm # Po BASSA VAH FULL STOP
|
||||
16B37..16B38 ; STerm # Po [2] PAHAWH HMONG SIGN VOS THOM..PAHAWH HMONG SIGN VOS TSHAB CEEB
|
||||
16B44 ; STerm # Po PAHAWH HMONG SIGN XAUS
|
||||
1BC9F ; STerm # Po DUPLOYAN PUNCTUATION CHINOOK FULL STOP
|
||||
1DA88 ; STerm # Po SIGNWRITING FULL STOP
|
||||
0021 ; Sentence_Terminal # Po EXCLAMATION MARK
|
||||
002E ; Sentence_Terminal # Po FULL STOP
|
||||
003F ; Sentence_Terminal # Po QUESTION MARK
|
||||
0589 ; Sentence_Terminal # Po ARMENIAN FULL STOP
|
||||
061F ; Sentence_Terminal # Po ARABIC QUESTION MARK
|
||||
06D4 ; Sentence_Terminal # Po ARABIC FULL STOP
|
||||
0700..0702 ; Sentence_Terminal # Po [3] SYRIAC END OF PARAGRAPH..SYRIAC SUBLINEAR FULL STOP
|
||||
07F9 ; Sentence_Terminal # Po NKO EXCLAMATION MARK
|
||||
0964..0965 ; Sentence_Terminal # Po [2] DEVANAGARI DANDA..DEVANAGARI DOUBLE DANDA
|
||||
104A..104B ; Sentence_Terminal # Po [2] MYANMAR SIGN LITTLE SECTION..MYANMAR SIGN SECTION
|
||||
1362 ; Sentence_Terminal # Po ETHIOPIC FULL STOP
|
||||
1367..1368 ; Sentence_Terminal # Po [2] ETHIOPIC QUESTION MARK..ETHIOPIC PARAGRAPH SEPARATOR
|
||||
166E ; Sentence_Terminal # Po CANADIAN SYLLABICS FULL STOP
|
||||
1735..1736 ; Sentence_Terminal # Po [2] PHILIPPINE SINGLE PUNCTUATION..PHILIPPINE DOUBLE PUNCTUATION
|
||||
1803 ; Sentence_Terminal # Po MONGOLIAN FULL STOP
|
||||
1809 ; Sentence_Terminal # Po MONGOLIAN MANCHU FULL STOP
|
||||
1944..1945 ; Sentence_Terminal # Po [2] LIMBU EXCLAMATION MARK..LIMBU QUESTION MARK
|
||||
1AA8..1AAB ; Sentence_Terminal # Po [4] TAI THAM SIGN KAAN..TAI THAM SIGN SATKAANKUU
|
||||
1B5A..1B5B ; Sentence_Terminal # Po [2] BALINESE PANTI..BALINESE PAMADA
|
||||
1B5E..1B5F ; Sentence_Terminal # Po [2] BALINESE CARIK SIKI..BALINESE CARIK PAREREN
|
||||
1C3B..1C3C ; Sentence_Terminal # Po [2] LEPCHA PUNCTUATION TA-ROL..LEPCHA PUNCTUATION NYET THYOOM TA-ROL
|
||||
1C7E..1C7F ; Sentence_Terminal # Po [2] OL CHIKI PUNCTUATION MUCAAD..OL CHIKI PUNCTUATION DOUBLE MUCAAD
|
||||
203C..203D ; Sentence_Terminal # Po [2] DOUBLE EXCLAMATION MARK..INTERROBANG
|
||||
2047..2049 ; Sentence_Terminal # Po [3] DOUBLE QUESTION MARK..EXCLAMATION QUESTION MARK
|
||||
2E2E ; Sentence_Terminal # Po REVERSED QUESTION MARK
|
||||
2E3C ; Sentence_Terminal # Po STENOGRAPHIC FULL STOP
|
||||
3002 ; Sentence_Terminal # Po IDEOGRAPHIC FULL STOP
|
||||
A4FF ; Sentence_Terminal # Po LISU PUNCTUATION FULL STOP
|
||||
A60E..A60F ; Sentence_Terminal # Po [2] VAI FULL STOP..VAI QUESTION MARK
|
||||
A6F3 ; Sentence_Terminal # Po BAMUM FULL STOP
|
||||
A6F7 ; Sentence_Terminal # Po BAMUM QUESTION MARK
|
||||
A876..A877 ; Sentence_Terminal # Po [2] PHAGS-PA MARK SHAD..PHAGS-PA MARK DOUBLE SHAD
|
||||
A8CE..A8CF ; Sentence_Terminal # Po [2] SAURASHTRA DANDA..SAURASHTRA DOUBLE DANDA
|
||||
A92F ; Sentence_Terminal # Po KAYAH LI SIGN SHYA
|
||||
A9C8..A9C9 ; Sentence_Terminal # Po [2] JAVANESE PADA LINGSA..JAVANESE PADA LUNGSI
|
||||
AA5D..AA5F ; Sentence_Terminal # Po [3] CHAM PUNCTUATION DANDA..CHAM PUNCTUATION TRIPLE DANDA
|
||||
AAF0..AAF1 ; Sentence_Terminal # Po [2] MEETEI MAYEK CHEIKHAN..MEETEI MAYEK AHANG KHUDAM
|
||||
ABEB ; Sentence_Terminal # Po MEETEI MAYEK CHEIKHEI
|
||||
FE52 ; Sentence_Terminal # Po SMALL FULL STOP
|
||||
FE56..FE57 ; Sentence_Terminal # Po [2] SMALL QUESTION MARK..SMALL EXCLAMATION MARK
|
||||
FF01 ; Sentence_Terminal # Po FULLWIDTH EXCLAMATION MARK
|
||||
FF0E ; Sentence_Terminal # Po FULLWIDTH FULL STOP
|
||||
FF1F ; Sentence_Terminal # Po FULLWIDTH QUESTION MARK
|
||||
FF61 ; Sentence_Terminal # Po HALFWIDTH IDEOGRAPHIC FULL STOP
|
||||
10A56..10A57 ; Sentence_Terminal # Po [2] KHAROSHTHI PUNCTUATION DANDA..KHAROSHTHI PUNCTUATION DOUBLE DANDA
|
||||
11047..11048 ; Sentence_Terminal # Po [2] BRAHMI DANDA..BRAHMI DOUBLE DANDA
|
||||
110BE..110C1 ; Sentence_Terminal # Po [4] KAITHI SECTION MARK..KAITHI DOUBLE DANDA
|
||||
11141..11143 ; Sentence_Terminal # Po [3] CHAKMA DANDA..CHAKMA QUESTION MARK
|
||||
111C5..111C6 ; Sentence_Terminal # Po [2] SHARADA DANDA..SHARADA DOUBLE DANDA
|
||||
111CD ; Sentence_Terminal # Po SHARADA SUTRA MARK
|
||||
111DE..111DF ; Sentence_Terminal # Po [2] SHARADA SECTION MARK-1..SHARADA SECTION MARK-2
|
||||
11238..11239 ; Sentence_Terminal # Po [2] KHOJKI DANDA..KHOJKI DOUBLE DANDA
|
||||
1123B..1123C ; Sentence_Terminal # Po [2] KHOJKI SECTION MARK..KHOJKI DOUBLE SECTION MARK
|
||||
112A9 ; Sentence_Terminal # Po MULTANI SECTION MARK
|
||||
1144B..1144C ; Sentence_Terminal # Po [2] NEWA DANDA..NEWA DOUBLE DANDA
|
||||
115C2..115C3 ; Sentence_Terminal # Po [2] SIDDHAM DANDA..SIDDHAM DOUBLE DANDA
|
||||
115C9..115D7 ; Sentence_Terminal # Po [15] SIDDHAM END OF TEXT MARK..SIDDHAM SECTION MARK WITH CIRCLES AND FOUR ENCLOSURES
|
||||
11641..11642 ; Sentence_Terminal # Po [2] MODI DANDA..MODI DOUBLE DANDA
|
||||
1173C..1173E ; Sentence_Terminal # Po [3] AHOM SIGN SMALL SECTION..AHOM SIGN RULAI
|
||||
11A42..11A43 ; Sentence_Terminal # Po [2] ZANABAZAR SQUARE MARK SHAD..ZANABAZAR SQUARE MARK DOUBLE SHAD
|
||||
11A9B..11A9C ; Sentence_Terminal # Po [2] SOYOMBO MARK SHAD..SOYOMBO MARK DOUBLE SHAD
|
||||
11C41..11C42 ; Sentence_Terminal # Po [2] BHAIKSUKI DANDA..BHAIKSUKI DOUBLE DANDA
|
||||
16A6E..16A6F ; Sentence_Terminal # Po [2] MRO DANDA..MRO DOUBLE DANDA
|
||||
16AF5 ; Sentence_Terminal # Po BASSA VAH FULL STOP
|
||||
16B37..16B38 ; Sentence_Terminal # Po [2] PAHAWH HMONG SIGN VOS THOM..PAHAWH HMONG SIGN VOS TSHAB CEEB
|
||||
16B44 ; Sentence_Terminal # Po PAHAWH HMONG SIGN XAUS
|
||||
1BC9F ; Sentence_Terminal # Po DUPLOYAN PUNCTUATION CHINOOK FULL STOP
|
||||
1DA88 ; Sentence_Terminal # Po SIGNWRITING FULL STOP
|
||||
|
||||
# Total code points: 120
|
||||
# Total code points: 128
|
||||
|
||||
# ================================================
|
||||
|
||||
@ -1359,9 +1437,7 @@ E0100..E01EF ; Variation_Selector # Mn [240] VARIATION SELECTOR-17..VARIATION S
|
||||
239B..23B3 ; Pattern_Syntax # Sm [25] LEFT PARENTHESIS UPPER HOOK..SUMMATION BOTTOM
|
||||
23B4..23DB ; Pattern_Syntax # So [40] TOP SQUARE BRACKET..FUSE
|
||||
23DC..23E1 ; Pattern_Syntax # Sm [6] TOP PARENTHESIS..BOTTOM TORTOISE SHELL BRACKET
|
||||
23E2..23FA ; Pattern_Syntax # So [25] WHITE TRAPEZIUM..BLACK CIRCLE FOR RECORD
|
||||
23FB..23FF ; Pattern_Syntax # Cn [5] <reserved-23FB>..<reserved-23FF>
|
||||
2400..2426 ; Pattern_Syntax # So [39] SYMBOL FOR NULL..SYMBOL FOR SUBSTITUTE FORM TWO
|
||||
23E2..2426 ; Pattern_Syntax # So [69] WHITE TRAPEZIUM..SYMBOL FOR SUBSTITUTE FORM TWO
|
||||
2427..243F ; Pattern_Syntax # Cn [25] <reserved-2427>..<reserved-243F>
|
||||
2440..244A ; Pattern_Syntax # So [11] OCR HOOK..OCR DOUBLE BACKSLASH
|
||||
244B..245F ; Pattern_Syntax # Cn [21] <reserved-244B>..<reserved-245F>
|
||||
@ -1449,8 +1525,8 @@ E0100..E01EF ; Variation_Selector # Mn [240] VARIATION SELECTOR-17..VARIATION S
|
||||
2BBA..2BBC ; Pattern_Syntax # Cn [3] <reserved-2BBA>..<reserved-2BBC>
|
||||
2BBD..2BC8 ; Pattern_Syntax # So [12] BALLOT BOX WITH LIGHT X..BLACK MEDIUM RIGHT-POINTING TRIANGLE CENTRED
|
||||
2BC9 ; Pattern_Syntax # Cn <reserved-2BC9>
|
||||
2BCA..2BD1 ; Pattern_Syntax # So [8] TOP HALF BLACK CIRCLE..UNCERTAINTY SIGN
|
||||
2BD2..2BEB ; Pattern_Syntax # Cn [26] <reserved-2BD2>..<reserved-2BEB>
|
||||
2BCA..2BD2 ; Pattern_Syntax # So [9] TOP HALF BLACK CIRCLE..GROUP MARK
|
||||
2BD3..2BEB ; Pattern_Syntax # Cn [25] <reserved-2BD3>..<reserved-2BEB>
|
||||
2BEC..2BEF ; Pattern_Syntax # So [4] LEFTWARDS TWO-HEADED ARROW WITH TRIANGLE ARROWHEADS..DOWNWARDS TWO-HEADED ARROW WITH TRIANGLE ARROWHEADS
|
||||
2BF0..2BFF ; Pattern_Syntax # Cn [16] <reserved-2BF0>..<reserved-2BFF>
|
||||
2E00..2E01 ; Pattern_Syntax # Po [2] RIGHT ANGLE SUBSTITUTION MARKER..RIGHT ANGLE DOTTED SUBSTITUTION MARKER
|
||||
@ -1490,7 +1566,8 @@ E0100..E01EF ; Variation_Selector # Mn [240] VARIATION SELECTOR-17..VARIATION S
|
||||
2E40 ; Pattern_Syntax # Pd DOUBLE HYPHEN
|
||||
2E41 ; Pattern_Syntax # Po REVERSED COMMA
|
||||
2E42 ; Pattern_Syntax # Ps DOUBLE LOW-REVERSED-9 QUOTATION MARK
|
||||
2E43..2E7F ; Pattern_Syntax # Cn [61] <reserved-2E43>..<reserved-2E7F>
|
||||
2E43..2E49 ; Pattern_Syntax # Po [7] DASH WITH LEFT UPTURN..DOUBLE STACKED COMMA
|
||||
2E4A..2E7F ; Pattern_Syntax # Cn [54] <reserved-2E4A>..<reserved-2E7F>
|
||||
3001..3003 ; Pattern_Syntax # Po [3] IDEOGRAPHIC COMMA..DITTO MARK
|
||||
3008 ; Pattern_Syntax # Ps LEFT ANGLE BRACKET
|
||||
3009 ; Pattern_Syntax # Pe RIGHT ANGLE BRACKET
|
||||
@ -1522,4 +1599,20 @@ FE45..FE46 ; Pattern_Syntax # Po [2] SESAME DOT..WHITE SESAME DOT
|
||||
|
||||
# Total code points: 2760
|
||||
|
||||
# ================================================
|
||||
|
||||
0600..0605 ; Prepended_Concatenation_Mark # Cf [6] ARABIC NUMBER SIGN..ARABIC NUMBER MARK ABOVE
|
||||
06DD ; Prepended_Concatenation_Mark # Cf ARABIC END OF AYAH
|
||||
070F ; Prepended_Concatenation_Mark # Cf SYRIAC ABBREVIATION MARK
|
||||
08E2 ; Prepended_Concatenation_Mark # Cf ARABIC DISPUTED END OF AYAH
|
||||
110BD ; Prepended_Concatenation_Mark # Cf KAITHI NUMBER SIGN
|
||||
|
||||
# Total code points: 10
|
||||
|
||||
# ================================================
|
||||
|
||||
1F1E6..1F1FF ; Regional_Indicator # So [26] REGIONAL INDICATOR SYMBOL LETTER A..REGIONAL INDICATOR SYMBOL LETTER Z
|
||||
|
||||
# Total code points: 26
|
||||
|
||||
# EOF
|
||||
|
@ -1,10 +1,11 @@
|
||||
# PropertyValueAliases-8.0.0.txt
|
||||
# Date: 2015-03-11, 22:29:33 GMT [MD]
|
||||
# PropertyValueAliases-10.0.0.txt
|
||||
# Date: 2017-05-17, 08:45:34 GMT
|
||||
# Copyright (c) 2017 Unicode, Inc.
|
||||
# Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries.
|
||||
# For terms of use, see http://www.unicode.org/terms_of_use.html
|
||||
#
|
||||
# Unicode Character Database
|
||||
# Copyright (c) 1991-2015 Unicode, Inc.
|
||||
# For terms of use, see http://www.unicode.org/terms_of_use.html
|
||||
# For documentation, see http://www.unicode.org/reports/tr44/
|
||||
# For documentation, see http://www.unicode.org/reports/tr44/
|
||||
#
|
||||
# This file contains aliases for property values used in the UCD.
|
||||
# These names can be used for XML formats of UCD data, for regular-expression
|
||||
@ -78,6 +79,8 @@ age; 6.2 ; V6_2
|
||||
age; 6.3 ; V6_3
|
||||
age; 7.0 ; V7_0
|
||||
age; 8.0 ; V8_0
|
||||
age; 9.0 ; V9_0
|
||||
age; 10.0 ; V10_0
|
||||
age; NA ; Unassigned
|
||||
|
||||
# Alphabetic (Alpha)
|
||||
@ -138,6 +141,7 @@ bpt; o ; Open
|
||||
|
||||
# Block (blk)
|
||||
|
||||
blk; Adlam ; Adlam
|
||||
blk; Aegean_Numbers ; Aegean_Numbers
|
||||
blk; Ahom ; Ahom
|
||||
blk; Alchemical ; Alchemical_Symbols
|
||||
@ -162,6 +166,7 @@ blk; Bamum_Sup ; Bamum_Supplement
|
||||
blk; Bassa_Vah ; Bassa_Vah
|
||||
blk; Batak ; Batak
|
||||
blk; Bengali ; Bengali
|
||||
blk; Bhaiksuki ; Bhaiksuki
|
||||
blk; Block_Elements ; Block_Elements
|
||||
blk; Bopomofo ; Bopomofo
|
||||
blk; Bopomofo_Ext ; Bopomofo_Extended
|
||||
@ -187,6 +192,7 @@ blk; CJK_Ext_B ; CJK_Unified_Ideographs_Extension_B
|
||||
blk; CJK_Ext_C ; CJK_Unified_Ideographs_Extension_C
|
||||
blk; CJK_Ext_D ; CJK_Unified_Ideographs_Extension_D
|
||||
blk; CJK_Ext_E ; CJK_Unified_Ideographs_Extension_E
|
||||
blk; CJK_Ext_F ; CJK_Unified_Ideographs_Extension_F
|
||||
blk; CJK_Radicals_Sup ; CJK_Radicals_Supplement
|
||||
blk; CJK_Strokes ; CJK_Strokes
|
||||
blk; CJK_Symbols ; CJK_Symbols_And_Punctuation
|
||||
@ -202,6 +208,7 @@ blk; Cypriot_Syllabary ; Cypriot_Syllabary
|
||||
blk; Cyrillic ; Cyrillic
|
||||
blk; Cyrillic_Ext_A ; Cyrillic_Extended_A
|
||||
blk; Cyrillic_Ext_B ; Cyrillic_Extended_B
|
||||
blk; Cyrillic_Ext_C ; Cyrillic_Extended_C
|
||||
blk; Cyrillic_Sup ; Cyrillic_Supplement ; Cyrillic_Supplementary
|
||||
blk; Deseret ; Deseret
|
||||
blk; Devanagari ; Devanagari
|
||||
@ -230,6 +237,7 @@ blk; Geometric_Shapes_Ext ; Geometric_Shapes_Extended
|
||||
blk; Georgian ; Georgian
|
||||
blk; Georgian_Sup ; Georgian_Supplement
|
||||
blk; Glagolitic ; Glagolitic
|
||||
blk; Glagolitic_Sup ; Glagolitic_Supplement
|
||||
blk; Gothic ; Gothic
|
||||
blk; Grantha ; Grantha
|
||||
blk; Greek ; Greek_And_Coptic
|
||||
@ -246,6 +254,7 @@ blk; High_PU_Surrogates ; High_Private_Use_Surrogates
|
||||
blk; High_Surrogates ; High_Surrogates
|
||||
blk; Hiragana ; Hiragana
|
||||
blk; IDC ; Ideographic_Description_Characters
|
||||
blk; Ideographic_Symbols ; Ideographic_Symbols_And_Punctuation
|
||||
blk; Imperial_Aramaic ; Imperial_Aramaic
|
||||
blk; Indic_Number_Forms ; Common_Indic_Number_Forms
|
||||
blk; Inscriptional_Pahlavi ; Inscriptional_Pahlavi
|
||||
@ -256,6 +265,7 @@ blk; Jamo_Ext_A ; Hangul_Jamo_Extended_A
|
||||
blk; Jamo_Ext_B ; Hangul_Jamo_Extended_B
|
||||
blk; Javanese ; Javanese
|
||||
blk; Kaithi ; Kaithi
|
||||
blk; Kana_Ext_A ; Kana_Extended_A
|
||||
blk; Kana_Sup ; Kana_Supplement
|
||||
blk; Kanbun ; Kanbun
|
||||
blk; Kangxi ; Kangxi_Radicals
|
||||
@ -291,6 +301,8 @@ blk; Mahjong ; Mahjong_Tiles
|
||||
blk; Malayalam ; Malayalam
|
||||
blk; Mandaic ; Mandaic
|
||||
blk; Manichaean ; Manichaean
|
||||
blk; Marchen ; Marchen
|
||||
blk; Masaram_Gondi ; Masaram_Gondi
|
||||
blk; Math_Alphanum ; Mathematical_Alphanumeric_Symbols
|
||||
blk; Math_Operators ; Mathematical_Operators
|
||||
blk; Meetei_Mayek ; Meetei_Mayek
|
||||
@ -309,6 +321,7 @@ blk; Modi ; Modi
|
||||
blk; Modifier_Letters ; Spacing_Modifier_Letters
|
||||
blk; Modifier_Tone_Letters ; Modifier_Tone_Letters
|
||||
blk; Mongolian ; Mongolian
|
||||
blk; Mongolian_Sup ; Mongolian_Supplement
|
||||
blk; Mro ; Mro
|
||||
blk; Multani ; Multani
|
||||
blk; Music ; Musical_Symbols
|
||||
@ -318,8 +331,10 @@ blk; Myanmar_Ext_B ; Myanmar_Extended_B
|
||||
blk; Nabataean ; Nabataean
|
||||
blk; NB ; No_Block
|
||||
blk; New_Tai_Lue ; New_Tai_Lue
|
||||
blk; Newa ; Newa
|
||||
blk; NKo ; NKo
|
||||
blk; Number_Forms ; Number_Forms
|
||||
blk; Nushu ; Nushu
|
||||
blk; OCR ; Optical_Character_Recognition
|
||||
blk; Ogham ; Ogham
|
||||
blk; Ol_Chiki ; Ol_Chiki
|
||||
@ -332,6 +347,7 @@ blk; Old_South_Arabian ; Old_South_Arabian
|
||||
blk; Old_Turkic ; Old_Turkic
|
||||
blk; Oriya ; Oriya
|
||||
blk; Ornamental_Dingbats ; Ornamental_Dingbats
|
||||
blk; Osage ; Osage
|
||||
blk; Osmanya ; Osmanya
|
||||
blk; Pahawh_Hmong ; Pahawh_Hmong
|
||||
blk; Palmyrene ; Palmyrene
|
||||
@ -358,6 +374,7 @@ blk; Sinhala ; Sinhala
|
||||
blk; Sinhala_Archaic_Numbers ; Sinhala_Archaic_Numbers
|
||||
blk; Small_Forms ; Small_Form_Variants
|
||||
blk; Sora_Sompeng ; Sora_Sompeng
|
||||
blk; Soyombo ; Soyombo
|
||||
blk; Specials ; Specials
|
||||
blk; Sundanese ; Sundanese
|
||||
blk; Sundanese_Sup ; Sundanese_Supplement
|
||||
@ -373,6 +390,7 @@ blk; Super_And_Sub ; Superscripts_And_Subscripts
|
||||
blk; Sutton_SignWriting ; Sutton_SignWriting
|
||||
blk; Syloti_Nagri ; Syloti_Nagri
|
||||
blk; Syriac ; Syriac
|
||||
blk; Syriac_Sup ; Syriac_Supplement
|
||||
blk; Tagalog ; Tagalog
|
||||
blk; Tagbanwa ; Tagbanwa
|
||||
blk; Tags ; Tags
|
||||
@ -382,6 +400,8 @@ blk; Tai_Viet ; Tai_Viet
|
||||
blk; Tai_Xuan_Jing ; Tai_Xuan_Jing_Symbols
|
||||
blk; Takri ; Takri
|
||||
blk; Tamil ; Tamil
|
||||
blk; Tangut ; Tangut
|
||||
blk; Tangut_Components ; Tangut_Components
|
||||
blk; Telugu ; Telugu
|
||||
blk; Thaana ; Thaana
|
||||
blk; Thai ; Thai
|
||||
@ -401,6 +421,7 @@ blk; Warang_Citi ; Warang_Citi
|
||||
blk; Yi_Radicals ; Yi_Radicals
|
||||
blk; Yi_Syllables ; Yi_Syllables
|
||||
blk; Yijing ; Yijing_Hexagram_Symbols
|
||||
blk; Zanabazar_Square ; Zanabazar_Square
|
||||
|
||||
# Canonical_Combining_Class (ccc)
|
||||
|
||||
@ -650,7 +671,11 @@ Gr_Base; Y ; Yes ; T
|
||||
|
||||
GCB; CN ; Control
|
||||
GCB; CR ; CR
|
||||
GCB; EB ; E_Base
|
||||
GCB; EBG ; E_Base_GAZ
|
||||
GCB; EM ; E_Modifier
|
||||
GCB; EX ; Extend
|
||||
GCB; GAZ ; Glue_After_Zwj
|
||||
GCB; L ; L
|
||||
GCB; LF ; LF
|
||||
GCB; LV ; LV
|
||||
@ -661,6 +686,7 @@ GCB; SM ; SpacingMark
|
||||
GCB; T ; T
|
||||
GCB; V ; V
|
||||
GCB; XX ; Other
|
||||
GCB; ZWJ ; ZWJ
|
||||
|
||||
# Grapheme_Extend (Gr_Ext)
|
||||
|
||||
@ -723,6 +749,7 @@ Ideo; Y ; Yes ; T
|
||||
# Indic_Positional_Category (InPC)
|
||||
|
||||
InPC; Bottom ; Bottom
|
||||
InPC; Bottom_And_Left ; Bottom_And_Left
|
||||
InPC; Bottom_And_Right ; Bottom_And_Right
|
||||
InPC; Left ; Left
|
||||
InPC; Left_And_Right ; Left_And_Right
|
||||
@ -838,6 +865,9 @@ Join_C; Y ; Yes ; T
|
||||
|
||||
# Joining_Group (jg)
|
||||
|
||||
jg ; African_Feh ; African_Feh
|
||||
jg ; African_Noon ; African_Noon
|
||||
jg ; African_Qaf ; African_Qaf
|
||||
jg ; Ain ; Ain
|
||||
jg ; Alaph ; Alaph
|
||||
jg ; Alef ; Alef
|
||||
@ -864,6 +894,17 @@ jg ; Khaph ; Khaph
|
||||
jg ; Knotted_Heh ; Knotted_Heh
|
||||
jg ; Lam ; Lam
|
||||
jg ; Lamadh ; Lamadh
|
||||
jg ; Malayalam_Bha ; Malayalam_Bha
|
||||
jg ; Malayalam_Ja ; Malayalam_Ja
|
||||
jg ; Malayalam_Lla ; Malayalam_Lla
|
||||
jg ; Malayalam_Llla ; Malayalam_Llla
|
||||
jg ; Malayalam_Nga ; Malayalam_Nga
|
||||
jg ; Malayalam_Nna ; Malayalam_Nna
|
||||
jg ; Malayalam_Nnna ; Malayalam_Nnna
|
||||
jg ; Malayalam_Nya ; Malayalam_Nya
|
||||
jg ; Malayalam_Ra ; Malayalam_Ra
|
||||
jg ; Malayalam_Ssa ; Malayalam_Ssa
|
||||
jg ; Malayalam_Tta ; Malayalam_Tta
|
||||
jg ; Manichaean_Aleph ; Manichaean_Aleph
|
||||
jg ; Manichaean_Ayin ; Manichaean_Ayin
|
||||
jg ; Manichaean_Beth ; Manichaean_Beth
|
||||
@ -948,6 +989,8 @@ lb ; CL ; Close_Punctuation
|
||||
lb ; CM ; Combining_Mark
|
||||
lb ; CP ; Close_Parenthesis
|
||||
lb ; CR ; Carriage_Return
|
||||
lb ; EB ; E_Base
|
||||
lb ; EM ; E_Modifier
|
||||
lb ; EX ; Exclamation
|
||||
lb ; GL ; Glue
|
||||
lb ; H2 ; H2
|
||||
@ -976,6 +1019,7 @@ lb ; SY ; Break_Symbols
|
||||
lb ; WJ ; Word_Joiner
|
||||
lb ; XX ; Unknown
|
||||
lb ; ZW ; ZWSpace
|
||||
lb ; ZWJ ; ZWJ
|
||||
|
||||
# Logical_Order_Exception (LOE)
|
||||
|
||||
@ -1096,6 +1140,11 @@ Pat_Syn; Y ; Yes ; T
|
||||
Pat_WS; N ; No ; F ; False
|
||||
Pat_WS; Y ; Yes ; T ; True
|
||||
|
||||
# Prepended_Concatenation_Mark (PCM)
|
||||
|
||||
PCM; N ; No ; F ; False
|
||||
PCM; Y ; Yes ; T ; True
|
||||
|
||||
# Quotation_Mark (QMark)
|
||||
|
||||
QMark; N ; No ; F ; False
|
||||
@ -1106,13 +1155,14 @@ QMark; Y ; Yes ; T
|
||||
Radical; N ; No ; F ; False
|
||||
Radical; Y ; Yes ; T ; True
|
||||
|
||||
# STerm (STerm)
|
||||
# Regional_Indicator (RI)
|
||||
|
||||
STerm; N ; No ; F ; False
|
||||
STerm; Y ; Yes ; T ; True
|
||||
RI ; N ; No ; F ; False
|
||||
RI ; Y ; Yes ; T ; True
|
||||
|
||||
# Script (sc)
|
||||
|
||||
sc ; Adlm ; Adlam
|
||||
sc ; Aghb ; Caucasian_Albanian
|
||||
sc ; Ahom ; Ahom
|
||||
sc ; Arab ; Arabic
|
||||
@ -1124,6 +1174,7 @@ sc ; Bamu ; Bamum
|
||||
sc ; Bass ; Bassa_Vah
|
||||
sc ; Batk ; Batak
|
||||
sc ; Beng ; Bengali
|
||||
sc ; Bhks ; Bhaiksuki
|
||||
sc ; Bopo ; Bopomofo
|
||||
sc ; Brah ; Brahmi
|
||||
sc ; Brai ; Braille
|
||||
@ -1145,6 +1196,7 @@ sc ; Elba ; Elbasan
|
||||
sc ; Ethi ; Ethiopic
|
||||
sc ; Geor ; Georgian
|
||||
sc ; Glag ; Glagolitic
|
||||
sc ; Gonm ; Masaram_Gondi
|
||||
sc ; Goth ; Gothic
|
||||
sc ; Gran ; Grantha
|
||||
sc ; Grek ; Greek
|
||||
@ -1182,6 +1234,7 @@ sc ; Lydi ; Lydian
|
||||
sc ; Mahj ; Mahajani
|
||||
sc ; Mand ; Mandaic
|
||||
sc ; Mani ; Manichaean
|
||||
sc ; Marc ; Marchen
|
||||
sc ; Mend ; Mende_Kikakui
|
||||
sc ; Merc ; Meroitic_Cursive
|
||||
sc ; Mero ; Meroitic_Hieroglyphs
|
||||
@ -1194,11 +1247,14 @@ sc ; Mult ; Multani
|
||||
sc ; Mymr ; Myanmar
|
||||
sc ; Narb ; Old_North_Arabian
|
||||
sc ; Nbat ; Nabataean
|
||||
sc ; Newa ; Newa
|
||||
sc ; Nkoo ; Nko
|
||||
sc ; Nshu ; Nushu
|
||||
sc ; Ogam ; Ogham
|
||||
sc ; Olck ; Ol_Chiki
|
||||
sc ; Orkh ; Old_Turkic
|
||||
sc ; Orya ; Oriya
|
||||
sc ; Osge ; Osage
|
||||
sc ; Osma ; Osmanya
|
||||
sc ; Palm ; Palmyrene
|
||||
sc ; Pauc ; Pau_Cin_Hau
|
||||
@ -1221,6 +1277,7 @@ sc ; Sidd ; Siddham
|
||||
sc ; Sind ; Khudawadi
|
||||
sc ; Sinh ; Sinhala
|
||||
sc ; Sora ; Sora_Sompeng
|
||||
sc ; Soyo ; Soyombo
|
||||
sc ; Sund ; Sundanese
|
||||
sc ; Sylo ; Syloti_Nagri
|
||||
sc ; Syrc ; Syriac
|
||||
@ -1229,6 +1286,7 @@ sc ; Takr ; Takri
|
||||
sc ; Tale ; Tai_Le
|
||||
sc ; Talu ; New_Tai_Lue
|
||||
sc ; Taml ; Tamil
|
||||
sc ; Tang ; Tangut
|
||||
sc ; Tavt ; Tai_Viet
|
||||
sc ; Telu ; Telugu
|
||||
sc ; Tfng ; Tifinagh
|
||||
@ -1243,6 +1301,7 @@ sc ; Wara ; Warang_Citi
|
||||
sc ; Xpeo ; Old_Persian
|
||||
sc ; Xsux ; Cuneiform
|
||||
sc ; Yiii ; Yi
|
||||
sc ; Zanb ; Zanabazar_Square
|
||||
sc ; Zinh ; Inherited ; Qaai
|
||||
sc ; Zyyy ; Common
|
||||
sc ; Zzzz ; Unknown
|
||||
@ -1269,6 +1328,11 @@ SB ; ST ; STerm
|
||||
SB ; UP ; Upper
|
||||
SB ; XX ; Other
|
||||
|
||||
# Sentence_Terminal (STerm)
|
||||
|
||||
STerm; N ; No ; F ; False
|
||||
STerm; Y ; Yes ; T ; True
|
||||
|
||||
# Simple_Case_Folding (scf)
|
||||
|
||||
# @missing: 0000..10FFFF; Simple_Case_Folding; <code point>
|
||||
@ -1322,6 +1386,13 @@ Upper; Y ; Yes ; T
|
||||
VS ; N ; No ; F ; False
|
||||
VS ; Y ; Yes ; T ; True
|
||||
|
||||
# Vertical_Orientation (vo)
|
||||
|
||||
vo ; R ; Rotated
|
||||
vo ; Tr ; Transformed_Rotated
|
||||
vo ; Tu ; Transformed_Upright
|
||||
vo ; U ; Upright
|
||||
|
||||
# White_Space (WSpace)
|
||||
|
||||
WSpace; N ; No ; F ; False
|
||||
@ -1331,9 +1402,13 @@ WSpace; Y ; Yes ; T
|
||||
|
||||
WB ; CR ; CR
|
||||
WB ; DQ ; Double_Quote
|
||||
WB ; EB ; E_Base
|
||||
WB ; EBG ; E_Base_GAZ
|
||||
WB ; EM ; E_Modifier
|
||||
WB ; EX ; ExtendNumLet
|
||||
WB ; Extend ; Extend
|
||||
WB ; FO ; Format
|
||||
WB ; GAZ ; Glue_After_Zwj
|
||||
WB ; HL ; Hebrew_Letter
|
||||
WB ; KA ; Katakana
|
||||
WB ; LE ; ALetter
|
||||
@ -1346,6 +1421,7 @@ WB ; NU ; Numeric
|
||||
WB ; RI ; Regional_Indicator
|
||||
WB ; SQ ; Single_Quote
|
||||
WB ; XX ; Other
|
||||
WB ; ZWJ ; ZWJ
|
||||
|
||||
# XID_Continue (XIDC)
|
||||
|
||||
|
@ -1,10 +1,11 @@
|
||||
# Scripts-8.0.0.txt
|
||||
# Date: 2015-03-11, 22:29:42 GMT [MD]
|
||||
# Scripts-10.0.0.txt
|
||||
# Date: 2017-03-11, 06:40:37 GMT
|
||||
# Copyright (c) 2017 Unicode, Inc.
|
||||
# Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries.
|
||||
# For terms of use, see http://www.unicode.org/terms_of_use.html
|
||||
#
|
||||
# Unicode Character Database
|
||||
# Copyright (c) 1991-2015 Unicode, Inc.
|
||||
# For terms of use, see http://www.unicode.org/terms_of_use.html
|
||||
# For documentation, see http://www.unicode.org/reports/tr44/
|
||||
# For documentation, see http://www.unicode.org/reports/tr44/
|
||||
# For more information, see:
|
||||
# UAX #24, Unicode Script Property: http://www.unicode.org/reports/tr24/
|
||||
# Especially the sections:
|
||||
@ -92,10 +93,10 @@
|
||||
0605 ; Common # Cf ARABIC NUMBER MARK ABOVE
|
||||
060C ; Common # Po ARABIC COMMA
|
||||
061B ; Common # Po ARABIC SEMICOLON
|
||||
061C ; Common # Cf ARABIC LETTER MARK
|
||||
061F ; Common # Po ARABIC QUESTION MARK
|
||||
0640 ; Common # Lm ARABIC TATWEEL
|
||||
06DD ; Common # Cf ARABIC END OF AYAH
|
||||
08E2 ; Common # Cf ARABIC DISPUTED END OF AYAH
|
||||
0964..0965 ; Common # Po [2] DEVANAGARI DANDA..DEVANAGARI DOUBLE DANDA
|
||||
0E3F ; Common # Sc THAI CURRENCY SYMBOL BAHT
|
||||
0FD5..0FD8 ; Common # So [4] RIGHT-FACING SVASTI SIGN..LEFT-FACING SVASTI SIGN WITH DOTS
|
||||
@ -110,6 +111,7 @@
|
||||
1CEE..1CF1 ; Common # Lo [4] VEDIC SIGN HEXIFORM LONG ANUSVARA..VEDIC SIGN ANUSVARA UBHAYATO MUKHA
|
||||
1CF2..1CF3 ; Common # Mc [2] VEDIC SIGN ARDHAVISARGA..VEDIC SIGN ROTATED ARDHAVISARGA
|
||||
1CF5..1CF6 ; Common # Lo [2] VEDIC SIGN JIHVAMULIYA..VEDIC SIGN UPADHMANIYA
|
||||
1CF7 ; Common # Mc VEDIC SIGN ATIKRAMA
|
||||
2000..200A ; Common # Zs [11] EN QUAD..HAIR SPACE
|
||||
200B ; Common # Cf ZERO WIDTH SPACE
|
||||
200E..200F ; Common # Cf [2] LEFT-TO-RIGHT MARK..RIGHT-TO-LEFT MARK
|
||||
@ -153,7 +155,7 @@
|
||||
208A..208C ; Common # Sm [3] SUBSCRIPT PLUS SIGN..SUBSCRIPT EQUALS SIGN
|
||||
208D ; Common # Ps SUBSCRIPT LEFT PARENTHESIS
|
||||
208E ; Common # Pe SUBSCRIPT RIGHT PARENTHESIS
|
||||
20A0..20BE ; Common # Sc [31] EURO-CURRENCY SIGN..LARI SIGN
|
||||
20A0..20BF ; Common # Sc [32] EURO-CURRENCY SIGN..BITCOIN SIGN
|
||||
2100..2101 ; Common # So [2] ACCOUNT OF..ADDRESSED TO THE SUBJECT
|
||||
2102 ; Common # L& DOUBLE-STRUCK CAPITAL C
|
||||
2103..2106 ; Common # So [4] DEGREE CELSIUS..CADA UNA
|
||||
@ -223,8 +225,7 @@
|
||||
239B..23B3 ; Common # Sm [25] LEFT PARENTHESIS UPPER HOOK..SUMMATION BOTTOM
|
||||
23B4..23DB ; Common # So [40] TOP SQUARE BRACKET..FUSE
|
||||
23DC..23E1 ; Common # Sm [6] TOP PARENTHESIS..BOTTOM TORTOISE SHELL BRACKET
|
||||
23E2..23FA ; Common # So [25] WHITE TRAPEZIUM..BLACK CIRCLE FOR RECORD
|
||||
2400..2426 ; Common # So [39] SYMBOL FOR NULL..SYMBOL FOR SUBSTITUTE FORM TWO
|
||||
23E2..2426 ; Common # So [69] WHITE TRAPEZIUM..SYMBOL FOR SUBSTITUTE FORM TWO
|
||||
2440..244A ; Common # So [11] OCR HOOK..OCR DOUBLE BACKSLASH
|
||||
2460..249B ; Common # No [60] CIRCLED DIGIT ONE..NUMBER TWENTY FULL STOP
|
||||
249C..24E9 ; Common # So [78] PARENTHESIZED LATIN SMALL LETTER A..CIRCLED LATIN SMALL LETTER Z
|
||||
@ -309,7 +310,7 @@
|
||||
2B76..2B95 ; Common # So [32] NORTH WEST TRIANGLE-HEADED ARROW TO BAR..RIGHTWARDS BLACK ARROW
|
||||
2B98..2BB9 ; Common # So [34] THREE-D TOP-LIGHTED LEFTWARDS EQUILATERAL ARROWHEAD..UP ARROWHEAD IN A RECTANGLE BOX
|
||||
2BBD..2BC8 ; Common # So [12] BALLOT BOX WITH LIGHT X..BLACK MEDIUM RIGHT-POINTING TRIANGLE CENTRED
|
||||
2BCA..2BD1 ; Common # So [8] TOP HALF BLACK CIRCLE..UNCERTAINTY SIGN
|
||||
2BCA..2BD2 ; Common # So [9] TOP HALF BLACK CIRCLE..GROUP MARK
|
||||
2BEC..2BEF ; Common # So [4] LEFTWARDS TWO-HEADED ARROW WITH TRIANGLE ARROWHEADS..DOWNWARDS TWO-HEADED ARROW WITH TRIANGLE ARROWHEADS
|
||||
2E00..2E01 ; Common # Po [2] RIGHT ANGLE SUBSTITUTION MARKER..RIGHT ANGLE DOTTED SUBSTITUTION MARKER
|
||||
2E02 ; Common # Pi LEFT SUBSTITUTION BRACKET
|
||||
@ -348,6 +349,7 @@
|
||||
2E40 ; Common # Pd DOUBLE HYPHEN
|
||||
2E41 ; Common # Po REVERSED COMMA
|
||||
2E42 ; Common # Ps DOUBLE LOW-REVERSED-9 QUOTATION MARK
|
||||
2E43..2E49 ; Common # Po [7] DASH WITH LEFT UPTURN..DOUBLE STACKED COMMA
|
||||
2FF0..2FFB ; Common # So [12] IDEOGRAPHIC DESCRIPTION CHARACTER LEFT TO RIGHT..IDEOGRAPHIC DESCRIPTION CHARACTER OVERLAID
|
||||
3000 ; Common # Zs IDEOGRAPHIC SPACE
|
||||
3001..3003 ; Common # Po [3] IDEOGRAPHIC COMMA..DITTO MARK
|
||||
@ -572,19 +574,18 @@ FFFC..FFFD ; Common # So [2] OBJECT REPLACEMENT CHARACTER..REPLACEMENT CHAR
|
||||
1F100..1F10C ; Common # No [13] DIGIT ZERO FULL STOP..DINGBAT NEGATIVE CIRCLED SANS-SERIF DIGIT ZERO
|
||||
1F110..1F12E ; Common # So [31] PARENTHESIZED LATIN CAPITAL LETTER A..CIRCLED WZ
|
||||
1F130..1F16B ; Common # So [60] SQUARED LATIN CAPITAL LETTER A..RAISED MD SIGN
|
||||
1F170..1F19A ; Common # So [43] NEGATIVE SQUARED LATIN CAPITAL LETTER A..SQUARED VS
|
||||
1F170..1F1AC ; Common # So [61] NEGATIVE SQUARED LATIN CAPITAL LETTER A..SQUARED VOD
|
||||
1F1E6..1F1FF ; Common # So [26] REGIONAL INDICATOR SYMBOL LETTER A..REGIONAL INDICATOR SYMBOL LETTER Z
|
||||
1F201..1F202 ; Common # So [2] SQUARED KATAKANA KOKO..SQUARED KATAKANA SA
|
||||
1F210..1F23A ; Common # So [43] SQUARED CJK UNIFIED IDEOGRAPH-624B..SQUARED CJK UNIFIED IDEOGRAPH-55B6
|
||||
1F210..1F23B ; Common # So [44] SQUARED CJK UNIFIED IDEOGRAPH-624B..SQUARED CJK UNIFIED IDEOGRAPH-914D
|
||||
1F240..1F248 ; Common # So [9] TORTOISE SHELL BRACKETED CJK UNIFIED IDEOGRAPH-672C..TORTOISE SHELL BRACKETED CJK UNIFIED IDEOGRAPH-6557
|
||||
1F250..1F251 ; Common # So [2] CIRCLED IDEOGRAPH ADVANTAGE..CIRCLED IDEOGRAPH ACCEPT
|
||||
1F260..1F265 ; Common # So [6] ROUNDED SYMBOL FOR FU..ROUNDED SYMBOL FOR CAI
|
||||
1F300..1F3FA ; Common # So [251] CYCLONE..AMPHORA
|
||||
1F3FB..1F3FF ; Common # Sk [5] EMOJI MODIFIER FITZPATRICK TYPE-1-2..EMOJI MODIFIER FITZPATRICK TYPE-6
|
||||
1F400..1F579 ; Common # So [378] RAT..JOYSTICK
|
||||
1F57B..1F5A3 ; Common # So [41] LEFT HAND TELEPHONE RECEIVER..BLACK DOWN POINTING BACKHAND INDEX
|
||||
1F5A5..1F6D0 ; Common # So [300] DESKTOP COMPUTER..PLACE OF WORSHIP
|
||||
1F400..1F6D4 ; Common # So [725] RAT..PAGODA
|
||||
1F6E0..1F6EC ; Common # So [13] HAMMER AND WRENCH..AIRPLANE ARRIVING
|
||||
1F6F0..1F6F3 ; Common # So [4] SATELLITE..PASSENGER SHIP
|
||||
1F6F0..1F6F8 ; Common # So [9] SATELLITE..FLYING SAUCER
|
||||
1F700..1F773 ; Common # So [116] ALCHEMICAL SYMBOL FOR QUINTESSENCE..ALCHEMICAL SYMBOL FOR HALF OUNCE
|
||||
1F780..1F7D4 ; Common # So [85] BLACK LEFT-POINTING ISOSCELES RIGHT TRIANGLE..HEAVY TWELVE POINTED PINWHEEL STAR
|
||||
1F800..1F80B ; Common # So [12] LEFTWARDS ARROW WITH SMALL TRIANGLE ARROWHEAD..DOWNWARDS ARROW WITH LARGE TRIANGLE ARROWHEAD
|
||||
@ -592,13 +593,17 @@ FFFC..FFFD ; Common # So [2] OBJECT REPLACEMENT CHARACTER..REPLACEMENT CHAR
|
||||
1F850..1F859 ; Common # So [10] LEFTWARDS SANS-SERIF ARROW..UP DOWN SANS-SERIF ARROW
|
||||
1F860..1F887 ; Common # So [40] WIDE-HEADED LEFTWARDS LIGHT BARB ARROW..WIDE-HEADED SOUTH WEST VERY HEAVY BARB ARROW
|
||||
1F890..1F8AD ; Common # So [30] LEFTWARDS TRIANGLE ARROWHEAD..WHITE ARROW SHAFT WIDTH TWO THIRDS
|
||||
1F910..1F918 ; Common # So [9] ZIPPER-MOUTH FACE..SIGN OF THE HORNS
|
||||
1F980..1F984 ; Common # So [5] CRAB..UNICORN FACE
|
||||
1F900..1F90B ; Common # So [12] CIRCLED CROSS FORMEE WITH FOUR DOTS..DOWNWARD FACING NOTCHED HOOK WITH DOT
|
||||
1F910..1F93E ; Common # So [47] ZIPPER-MOUTH FACE..HANDBALL
|
||||
1F940..1F94C ; Common # So [13] WILTED FLOWER..CURLING STONE
|
||||
1F950..1F96B ; Common # So [28] CROISSANT..CANNED FOOD
|
||||
1F980..1F997 ; Common # So [24] CRAB..CRICKET
|
||||
1F9C0 ; Common # So CHEESE WEDGE
|
||||
1F9D0..1F9E6 ; Common # So [23] FACE WITH MONOCLE..SOCKS
|
||||
E0001 ; Common # Cf LANGUAGE TAG
|
||||
E0020..E007F ; Common # Cf [96] TAG SPACE..CANCEL TAG
|
||||
|
||||
# Total code points: 7179
|
||||
# Total code points: 7363
|
||||
|
||||
# ================================================
|
||||
|
||||
@ -641,7 +646,7 @@ A770 ; Latin # Lm MODIFIER LETTER US
|
||||
A771..A787 ; Latin # L& [23] LATIN SMALL LETTER DUM..LATIN SMALL LETTER INSULAR T
|
||||
A78B..A78E ; Latin # L& [4] LATIN CAPITAL LETTER SALTILLO..LATIN SMALL LETTER L WITH RETROFLEX HOOK AND BELT
|
||||
A78F ; Latin # Lo LATIN LETTER SINOLOGICAL DOT
|
||||
A790..A7AD ; Latin # L& [30] LATIN CAPITAL LETTER N WITH DESCENDER..LATIN CAPITAL LETTER L WITH BELT
|
||||
A790..A7AE ; Latin # L& [31] LATIN CAPITAL LETTER N WITH DESCENDER..LATIN CAPITAL LETTER SMALL CAPITAL I
|
||||
A7B0..A7B7 ; Latin # L& [8] LATIN CAPITAL LETTER TURNED K..LATIN SMALL LETTER OMEGA
|
||||
A7F7 ; Latin # Lo LATIN EPIGRAPHIC LETTER SIDEWAYS I
|
||||
A7F8..A7F9 ; Latin # Lm [2] MODIFIER LETTER CAPITAL H WITH STROKE..MODIFIER LETTER SMALL LIGATURE OE
|
||||
@ -654,7 +659,7 @@ FB00..FB06 ; Latin # L& [7] LATIN SMALL LIGATURE FF..LATIN SMALL LIGATURE S
|
||||
FF21..FF3A ; Latin # L& [26] FULLWIDTH LATIN CAPITAL LETTER A..FULLWIDTH LATIN CAPITAL LETTER Z
|
||||
FF41..FF5A ; Latin # L& [26] FULLWIDTH LATIN SMALL LETTER A..FULLWIDTH LATIN SMALL LETTER Z
|
||||
|
||||
# Total code points: 1349
|
||||
# Total code points: 1350
|
||||
|
||||
# ================================================
|
||||
|
||||
@ -708,13 +713,13 @@ AB65 ; Greek # L& GREEK LETTER SMALL CAPITAL OMEGA
|
||||
10175..10178 ; Greek # No [4] GREEK ONE HALF SIGN..GREEK THREE QUARTERS SIGN
|
||||
10179..10189 ; Greek # So [17] GREEK YEAR SIGN..GREEK TRYBLION BASE SIGN
|
||||
1018A..1018B ; Greek # No [2] GREEK ZERO SIGN..GREEK ONE QUARTER SIGN
|
||||
1018C ; Greek # So GREEK SINUSOID SIGN
|
||||
1018C..1018E ; Greek # So [3] GREEK SINUSOID SIGN..NOMISMA SIGN
|
||||
101A0 ; Greek # So GREEK SYMBOL TAU RHO
|
||||
1D200..1D241 ; Greek # So [66] GREEK VOCAL NOTATION SYMBOL-1..GREEK INSTRUMENTAL NOTATION SYMBOL-54
|
||||
1D242..1D244 ; Greek # Mn [3] COMBINING GREEK MUSICAL TRISEME..COMBINING GREEK MUSICAL PENTASEME
|
||||
1D245 ; Greek # So GREEK MUSICAL LEIMMA
|
||||
|
||||
# Total code points: 516
|
||||
# Total code points: 518
|
||||
|
||||
# ================================================
|
||||
|
||||
@ -724,6 +729,7 @@ AB65 ; Greek # L& GREEK LETTER SMALL CAPITAL OMEGA
|
||||
0487 ; Cyrillic # Mn COMBINING CYRILLIC POKRYTIE
|
||||
0488..0489 ; Cyrillic # Me [2] COMBINING CYRILLIC HUNDRED THOUSANDS SIGN..COMBINING CYRILLIC MILLIONS SIGN
|
||||
048A..052F ; Cyrillic # L& [166] CYRILLIC CAPITAL LETTER SHORT I WITH TAIL..CYRILLIC SMALL LETTER EL WITH DESCENDER
|
||||
1C80..1C88 ; Cyrillic # L& [9] CYRILLIC SMALL LETTER ROUNDED VE..CYRILLIC SMALL LETTER UNBLENDED UK
|
||||
1D2B ; Cyrillic # L& CYRILLIC LETTER SMALL CAPITAL EL
|
||||
1D78 ; Cyrillic # Lm MODIFIER LETTER CYRILLIC EN
|
||||
2DE0..2DFF ; Cyrillic # Mn [32] COMBINING CYRILLIC LETTER BE..COMBINING CYRILLIC LETTER IOTIFIED BIG YUS
|
||||
@ -740,7 +746,7 @@ A69C..A69D ; Cyrillic # Lm [2] MODIFIER LETTER CYRILLIC HARD SIGN..MODIFIER
|
||||
A69E..A69F ; Cyrillic # Mn [2] COMBINING CYRILLIC LETTER EF..COMBINING CYRILLIC LETTER IOTIFIED E
|
||||
FE2E..FE2F ; Cyrillic # Mn [2] COMBINING CYRILLIC TITLO LEFT HALF..COMBINING CYRILLIC TITLO RIGHT HALF
|
||||
|
||||
# Total code points: 434
|
||||
# Total code points: 443
|
||||
|
||||
# ================================================
|
||||
|
||||
@ -791,6 +797,7 @@ FB46..FB4F ; Hebrew # Lo [10] HEBREW LETTER TSADI WITH DAGESH..HEBREW LIGATU
|
||||
060D ; Arabic # Po ARABIC DATE SEPARATOR
|
||||
060E..060F ; Arabic # So [2] ARABIC POETIC VERSE SIGN..ARABIC SIGN MISRA
|
||||
0610..061A ; Arabic # Mn [11] ARABIC SIGN SALLALLAHOU ALAYHE WASSALLAM..ARABIC SMALL KASRA
|
||||
061C ; Arabic # Cf ARABIC LETTER MARK
|
||||
061E ; Arabic # Po ARABIC TRIPLE DOT PUNCTUATION MARK
|
||||
0620..063F ; Arabic # Lo [32] ARABIC LETTER KASHMIRI YEH..ARABIC LETTER FARSI YEH WITH THREE DOTS ABOVE
|
||||
0641..064A ; Arabic # Lo [10] ARABIC LETTER FEH..ARABIC LETTER YEH
|
||||
@ -815,6 +822,8 @@ FB46..FB4F ; Hebrew # Lo [10] HEBREW LETTER TSADI WITH DAGESH..HEBREW LIGATU
|
||||
06FF ; Arabic # Lo ARABIC LETTER HEH WITH INVERTED V
|
||||
0750..077F ; Arabic # Lo [48] ARABIC LETTER BEH WITH THREE DOTS HORIZONTALLY BELOW..ARABIC LETTER KAF WITH TWO DOTS ABOVE
|
||||
08A0..08B4 ; Arabic # Lo [21] ARABIC LETTER BEH WITH SMALL V BELOW..ARABIC LETTER KAF WITH DOT BELOW
|
||||
08B6..08BD ; Arabic # Lo [8] ARABIC LETTER BEH WITH SMALL MEEM ABOVE..ARABIC LETTER AFRICAN NOON
|
||||
08D4..08E1 ; Arabic # Mn [14] ARABIC SMALL HIGH WORD AR-RUB..ARABIC SMALL HIGH SIGN SAFHA
|
||||
08E3..08FF ; Arabic # Mn [29] ARABIC TURNED DAMMA BELOW..ARABIC MARK SIDEWAYS NOON GHUNNA
|
||||
FB50..FBB1 ; Arabic # Lo [98] ARABIC LETTER ALEF WASLA ISOLATED FORM..ARABIC LETTER YEH BARREE WITH HAMZA ABOVE FINAL FORM
|
||||
FBB2..FBC1 ; Arabic # Sk [16] ARABIC SYMBOL DOT ABOVE..ARABIC SYMBOL SMALL TAH BELOW
|
||||
@ -862,7 +871,7 @@ FE76..FEFC ; Arabic # Lo [135] ARABIC FATHA ISOLATED FORM..ARABIC LIGATURE LA
|
||||
1EEAB..1EEBB ; Arabic # Lo [17] ARABIC MATHEMATICAL DOUBLE-STRUCK LAM..ARABIC MATHEMATICAL DOUBLE-STRUCK GHAIN
|
||||
1EEF0..1EEF1 ; Arabic # Sm [2] ARABIC MATHEMATICAL OPERATOR MEEM WITH HAH WITH TATWEEL..ARABIC MATHEMATICAL OPERATOR HAH WITH DAL
|
||||
|
||||
# Total code points: 1257
|
||||
# Total code points: 1280
|
||||
|
||||
# ================================================
|
||||
|
||||
@ -873,8 +882,9 @@ FE76..FEFC ; Arabic # Lo [135] ARABIC FATHA ISOLATED FORM..ARABIC LIGATURE LA
|
||||
0712..072F ; Syriac # Lo [30] SYRIAC LETTER BETH..SYRIAC LETTER PERSIAN DHALATH
|
||||
0730..074A ; Syriac # Mn [27] SYRIAC PTHAHA ABOVE..SYRIAC BARREKH
|
||||
074D..074F ; Syriac # Lo [3] SYRIAC LETTER SOGDIAN ZHAIN..SYRIAC LETTER SOGDIAN FE
|
||||
0860..086A ; Syriac # Lo [11] SYRIAC LETTER MALAYALAM NGA..SYRIAC LETTER MALAYALAM SSA
|
||||
|
||||
# Total code points: 77
|
||||
# Total code points: 88
|
||||
|
||||
# ================================================
|
||||
|
||||
@ -944,8 +954,10 @@ A8FD ; Devanagari # Lo DEVANAGARI JAIN OM
|
||||
09F4..09F9 ; Bengali # No [6] BENGALI CURRENCY NUMERATOR ONE..BENGALI CURRENCY DENOMINATOR SIXTEEN
|
||||
09FA ; Bengali # So BENGALI ISSHAR
|
||||
09FB ; Bengali # Sc BENGALI GANDA MARK
|
||||
09FC ; Bengali # Lo BENGALI LETTER VEDIC ANUSVARA
|
||||
09FD ; Bengali # Po BENGALI ABBREVIATION SIGN
|
||||
|
||||
# Total code points: 93
|
||||
# Total code points: 95
|
||||
|
||||
# ================================================
|
||||
|
||||
@ -998,8 +1010,9 @@ A8FD ; Devanagari # Lo DEVANAGARI JAIN OM
|
||||
0AF0 ; Gujarati # Po GUJARATI ABBREVIATION SIGN
|
||||
0AF1 ; Gujarati # Sc GUJARATI RUPEE SIGN
|
||||
0AF9 ; Gujarati # Lo GUJARATI LETTER ZHA
|
||||
0AFA..0AFF ; Gujarati # Mn [6] GUJARATI SIGN SUKUN..GUJARATI SIGN TWO-CIRCLE NUKTA ABOVE
|
||||
|
||||
# Total code points: 85
|
||||
# Total code points: 91
|
||||
|
||||
# ================================================
|
||||
|
||||
@ -1086,6 +1099,7 @@ A8FD ; Devanagari # Lo DEVANAGARI JAIN OM
|
||||
|
||||
# ================================================
|
||||
|
||||
0C80 ; Kannada # Lo KANNADA SIGN SPACING CANDRABINDU
|
||||
0C81 ; Kannada # Mn KANNADA SIGN CANDRABINDU
|
||||
0C82..0C83 ; Kannada # Mc [2] KANNADA SIGN ANUSVARA..KANNADA SIGN VISARGA
|
||||
0C85..0C8C ; Kannada # Lo [8] KANNADA LETTER A..KANNADA LETTER VOCALIC L
|
||||
@ -1109,15 +1123,16 @@ A8FD ; Devanagari # Lo DEVANAGARI JAIN OM
|
||||
0CE6..0CEF ; Kannada # Nd [10] KANNADA DIGIT ZERO..KANNADA DIGIT NINE
|
||||
0CF1..0CF2 ; Kannada # Lo [2] KANNADA SIGN JIHVAMULIYA..KANNADA SIGN UPADHMANIYA
|
||||
|
||||
# Total code points: 87
|
||||
# Total code points: 88
|
||||
|
||||
# ================================================
|
||||
|
||||
0D01 ; Malayalam # Mn MALAYALAM SIGN CANDRABINDU
|
||||
0D00..0D01 ; Malayalam # Mn [2] MALAYALAM SIGN COMBINING ANUSVARA ABOVE..MALAYALAM SIGN CANDRABINDU
|
||||
0D02..0D03 ; Malayalam # Mc [2] MALAYALAM SIGN ANUSVARA..MALAYALAM SIGN VISARGA
|
||||
0D05..0D0C ; Malayalam # Lo [8] MALAYALAM LETTER A..MALAYALAM LETTER VOCALIC L
|
||||
0D0E..0D10 ; Malayalam # Lo [3] MALAYALAM LETTER E..MALAYALAM LETTER AI
|
||||
0D12..0D3A ; Malayalam # Lo [41] MALAYALAM LETTER O..MALAYALAM LETTER TTTA
|
||||
0D3B..0D3C ; Malayalam # Mn [2] MALAYALAM SIGN VERTICAL BAR VIRAMA..MALAYALAM SIGN CIRCULAR VIRAMA
|
||||
0D3D ; Malayalam # Lo MALAYALAM SIGN AVAGRAHA
|
||||
0D3E..0D40 ; Malayalam # Mc [3] MALAYALAM VOWEL SIGN AA..MALAYALAM VOWEL SIGN II
|
||||
0D41..0D44 ; Malayalam # Mn [4] MALAYALAM VOWEL SIGN U..MALAYALAM VOWEL SIGN VOCALIC RR
|
||||
@ -1125,15 +1140,18 @@ A8FD ; Devanagari # Lo DEVANAGARI JAIN OM
|
||||
0D4A..0D4C ; Malayalam # Mc [3] MALAYALAM VOWEL SIGN O..MALAYALAM VOWEL SIGN AU
|
||||
0D4D ; Malayalam # Mn MALAYALAM SIGN VIRAMA
|
||||
0D4E ; Malayalam # Lo MALAYALAM LETTER DOT REPH
|
||||
0D4F ; Malayalam # So MALAYALAM SIGN PARA
|
||||
0D54..0D56 ; Malayalam # Lo [3] MALAYALAM LETTER CHILLU M..MALAYALAM LETTER CHILLU LLL
|
||||
0D57 ; Malayalam # Mc MALAYALAM AU LENGTH MARK
|
||||
0D58..0D5E ; Malayalam # No [7] MALAYALAM FRACTION ONE ONE-HUNDRED-AND-SIXTIETH..MALAYALAM FRACTION ONE FIFTH
|
||||
0D5F..0D61 ; Malayalam # Lo [3] MALAYALAM LETTER ARCHAIC II..MALAYALAM LETTER VOCALIC LL
|
||||
0D62..0D63 ; Malayalam # Mn [2] MALAYALAM VOWEL SIGN VOCALIC L..MALAYALAM VOWEL SIGN VOCALIC LL
|
||||
0D66..0D6F ; Malayalam # Nd [10] MALAYALAM DIGIT ZERO..MALAYALAM DIGIT NINE
|
||||
0D70..0D75 ; Malayalam # No [6] MALAYALAM NUMBER TEN..MALAYALAM FRACTION THREE QUARTERS
|
||||
0D70..0D78 ; Malayalam # No [9] MALAYALAM NUMBER TEN..MALAYALAM FRACTION THREE SIXTEENTHS
|
||||
0D79 ; Malayalam # So MALAYALAM DATE MARK
|
||||
0D7A..0D7F ; Malayalam # Lo [6] MALAYALAM LETTER CHILLU NN..MALAYALAM LETTER CHILLU K
|
||||
|
||||
# Total code points: 100
|
||||
# Total code points: 117
|
||||
|
||||
# ================================================
|
||||
|
||||
@ -1436,21 +1454,24 @@ AB70..ABBF ; Cherokee # L& [80] CHEROKEE SMALL LETTER A..CHEROKEE SMALL LETT
|
||||
1820..1842 ; Mongolian # Lo [35] MONGOLIAN LETTER A..MONGOLIAN LETTER CHI
|
||||
1843 ; Mongolian # Lm MONGOLIAN LETTER TODO LONG VOWEL SIGN
|
||||
1844..1877 ; Mongolian # Lo [52] MONGOLIAN LETTER TODO E..MONGOLIAN LETTER MANCHU ZHA
|
||||
1880..18A8 ; Mongolian # Lo [41] MONGOLIAN LETTER ALI GALI ANUSVARA ONE..MONGOLIAN LETTER MANCHU ALI GALI BHA
|
||||
1880..1884 ; Mongolian # Lo [5] MONGOLIAN LETTER ALI GALI ANUSVARA ONE..MONGOLIAN LETTER ALI GALI INVERTED UBADAMA
|
||||
1885..1886 ; Mongolian # Mn [2] MONGOLIAN LETTER ALI GALI BALUDA..MONGOLIAN LETTER ALI GALI THREE BALUDA
|
||||
1887..18A8 ; Mongolian # Lo [34] MONGOLIAN LETTER ALI GALI A..MONGOLIAN LETTER MANCHU ALI GALI BHA
|
||||
18A9 ; Mongolian # Mn MONGOLIAN LETTER ALI GALI DAGALGA
|
||||
18AA ; Mongolian # Lo MONGOLIAN LETTER MANCHU ALI GALI LHA
|
||||
11660..1166C ; Mongolian # Po [13] MONGOLIAN BIRGA WITH ORNAMENT..MONGOLIAN TURNED SWIRL BIRGA WITH DOUBLE ORNAMENT
|
||||
|
||||
# Total code points: 153
|
||||
# Total code points: 166
|
||||
|
||||
# ================================================
|
||||
|
||||
3041..3096 ; Hiragana # Lo [86] HIRAGANA LETTER SMALL A..HIRAGANA LETTER SMALL KE
|
||||
309D..309E ; Hiragana # Lm [2] HIRAGANA ITERATION MARK..HIRAGANA VOICED ITERATION MARK
|
||||
309F ; Hiragana # Lo HIRAGANA DIGRAPH YORI
|
||||
1B001 ; Hiragana # Lo HIRAGANA LETTER ARCHAIC YE
|
||||
1B001..1B11E ; Hiragana # Lo [286] HIRAGANA LETTER ARCHAIC YE..HENTAIGANA LETTER N-MU-MO-2
|
||||
1F200 ; Hiragana # So SQUARE HIRAGANA HOKA
|
||||
|
||||
# Total code points: 91
|
||||
# Total code points: 376
|
||||
|
||||
# ================================================
|
||||
|
||||
@ -1469,10 +1490,10 @@ FF71..FF9D ; Katakana # Lo [45] HALFWIDTH KATAKANA LETTER A..HALFWIDTH KATAK
|
||||
# ================================================
|
||||
|
||||
02EA..02EB ; Bopomofo # Sk [2] MODIFIER LETTER YIN DEPARTING TONE MARK..MODIFIER LETTER YANG DEPARTING TONE MARK
|
||||
3105..312D ; Bopomofo # Lo [41] BOPOMOFO LETTER B..BOPOMOFO LETTER IH
|
||||
3105..312E ; Bopomofo # Lo [42] BOPOMOFO LETTER B..BOPOMOFO LETTER O WITH DOT ABOVE
|
||||
31A0..31BA ; Bopomofo # Lo [27] BOPOMOFO LETTER BU..BOPOMOFO LETTER ZY
|
||||
|
||||
# Total code points: 70
|
||||
# Total code points: 71
|
||||
|
||||
# ================================================
|
||||
|
||||
@ -1485,16 +1506,17 @@ FF71..FF9D ; Katakana # Lo [45] HALFWIDTH KATAKANA LETTER A..HALFWIDTH KATAK
|
||||
3038..303A ; Han # Nl [3] HANGZHOU NUMERAL TEN..HANGZHOU NUMERAL THIRTY
|
||||
303B ; Han # Lm VERTICAL IDEOGRAPHIC ITERATION MARK
|
||||
3400..4DB5 ; Han # Lo [6582] CJK UNIFIED IDEOGRAPH-3400..CJK UNIFIED IDEOGRAPH-4DB5
|
||||
4E00..9FD5 ; Han # Lo [20950] CJK UNIFIED IDEOGRAPH-4E00..CJK UNIFIED IDEOGRAPH-9FD5
|
||||
4E00..9FEA ; Han # Lo [20971] CJK UNIFIED IDEOGRAPH-4E00..CJK UNIFIED IDEOGRAPH-9FEA
|
||||
F900..FA6D ; Han # Lo [366] CJK COMPATIBILITY IDEOGRAPH-F900..CJK COMPATIBILITY IDEOGRAPH-FA6D
|
||||
FA70..FAD9 ; Han # Lo [106] CJK COMPATIBILITY IDEOGRAPH-FA70..CJK COMPATIBILITY IDEOGRAPH-FAD9
|
||||
20000..2A6D6 ; Han # Lo [42711] CJK UNIFIED IDEOGRAPH-20000..CJK UNIFIED IDEOGRAPH-2A6D6
|
||||
2A700..2B734 ; Han # Lo [4149] CJK UNIFIED IDEOGRAPH-2A700..CJK UNIFIED IDEOGRAPH-2B734
|
||||
2B740..2B81D ; Han # Lo [222] CJK UNIFIED IDEOGRAPH-2B740..CJK UNIFIED IDEOGRAPH-2B81D
|
||||
2B820..2CEA1 ; Han # Lo [5762] CJK UNIFIED IDEOGRAPH-2B820..CJK UNIFIED IDEOGRAPH-2CEA1
|
||||
2CEB0..2EBE0 ; Han # Lo [7473] CJK UNIFIED IDEOGRAPH-2CEB0..CJK UNIFIED IDEOGRAPH-2EBE0
|
||||
2F800..2FA1D ; Han # Lo [542] CJK COMPATIBILITY IDEOGRAPH-2F800..CJK COMPATIBILITY IDEOGRAPH-2FA1D
|
||||
|
||||
# Total code points: 81734
|
||||
# Total code points: 89228
|
||||
|
||||
# ================================================
|
||||
|
||||
@ -1509,8 +1531,9 @@ A490..A4C6 ; Yi # So [55] YI RADICAL QOT..YI RADICAL KE
|
||||
|
||||
10300..1031F ; Old_Italic # Lo [32] OLD ITALIC LETTER A..OLD ITALIC LETTER ESS
|
||||
10320..10323 ; Old_Italic # No [4] OLD ITALIC NUMERAL ONE..OLD ITALIC NUMERAL FIFTY
|
||||
1032D..1032F ; Old_Italic # Lo [3] OLD ITALIC LETTER YE..OLD ITALIC LETTER SOUTHERN TSE
|
||||
|
||||
# Total code points: 36
|
||||
# Total code points: 39
|
||||
|
||||
# ================================================
|
||||
|
||||
@ -1542,8 +1565,8 @@ A490..A4C6 ; Yi # So [55] YI RADICAL QOT..YI RADICAL KE
|
||||
1CED ; Inherited # Mn VEDIC SIGN TIRYAK
|
||||
1CF4 ; Inherited # Mn VEDIC TONE CANDRA ABOVE
|
||||
1CF8..1CF9 ; Inherited # Mn [2] VEDIC TONE RING ABOVE..VEDIC TONE DOUBLE RING ABOVE
|
||||
1DC0..1DF5 ; Inherited # Mn [54] COMBINING DOTTED GRAVE ACCENT..COMBINING UP TACK ABOVE
|
||||
1DFC..1DFF ; Inherited # Mn [4] COMBINING DOUBLE INVERTED BREVE BELOW..COMBINING RIGHT ARROWHEAD AND DOWN ARROWHEAD BELOW
|
||||
1DC0..1DF9 ; Inherited # Mn [58] COMBINING DOTTED GRAVE ACCENT..COMBINING WIDE INVERTED BRIDGE BELOW
|
||||
1DFB..1DFF ; Inherited # Mn [5] COMBINING DELETION MARK..COMBINING RIGHT ARROWHEAD AND DOWN ARROWHEAD BELOW
|
||||
200C..200D ; Inherited # Cf [2] ZERO WIDTH NON-JOINER..ZERO WIDTH JOINER
|
||||
20D0..20DC ; Inherited # Mn [13] COMBINING LEFT HARPOON ABOVE..COMBINING FOUR DOTS ABOVE
|
||||
20DD..20E0 ; Inherited # Me [4] COMBINING ENCLOSING CIRCLE..COMBINING ENCLOSING CIRCLE BACKSLASH
|
||||
@ -1562,7 +1585,7 @@ FE20..FE2D ; Inherited # Mn [14] COMBINING LIGATURE LEFT HALF..COMBINING CON
|
||||
1D1AA..1D1AD ; Inherited # Mn [4] MUSICAL SYMBOL COMBINING DOWN BOW..MUSICAL SYMBOL COMBINING SNAP PIZZICATO
|
||||
E0100..E01EF ; Inherited # Mn [240] VARIATION SELECTOR-17..VARIATION SELECTOR-256
|
||||
|
||||
# Total code points: 563
|
||||
# Total code points: 568
|
||||
|
||||
# ================================================
|
||||
|
||||
@ -1705,8 +1728,13 @@ E0100..E01EF ; Inherited # Mn [240] VARIATION SELECTOR-17..VARIATION SELECTOR-2
|
||||
|
||||
2C00..2C2E ; Glagolitic # L& [47] GLAGOLITIC CAPITAL LETTER AZU..GLAGOLITIC CAPITAL LETTER LATINATE MYSLITE
|
||||
2C30..2C5E ; Glagolitic # L& [47] GLAGOLITIC SMALL LETTER AZU..GLAGOLITIC SMALL LETTER LATINATE MYSLITE
|
||||
1E000..1E006 ; Glagolitic # Mn [7] COMBINING GLAGOLITIC LETTER AZU..COMBINING GLAGOLITIC LETTER ZHIVETE
|
||||
1E008..1E018 ; Glagolitic # Mn [17] COMBINING GLAGOLITIC LETTER ZEMLJA..COMBINING GLAGOLITIC LETTER HERU
|
||||
1E01B..1E021 ; Glagolitic # Mn [7] COMBINING GLAGOLITIC LETTER SHTA..COMBINING GLAGOLITIC LETTER YATI
|
||||
1E023..1E024 ; Glagolitic # Mn [2] COMBINING GLAGOLITIC LETTER YU..COMBINING GLAGOLITIC LETTER SMALL YUS
|
||||
1E026..1E02A ; Glagolitic # Mn [5] COMBINING GLAGOLITIC LETTER YO..COMBINING GLAGOLITIC LETTER FITA
|
||||
|
||||
# Total code points: 94
|
||||
# Total code points: 132
|
||||
|
||||
# ================================================
|
||||
|
||||
@ -1872,11 +1900,11 @@ A62A..A62B ; Vai # Lo [2] VAI SYLLABLE NDOLE MA..VAI SYLLABLE NDOLE DO
|
||||
A880..A881 ; Saurashtra # Mc [2] SAURASHTRA SIGN ANUSVARA..SAURASHTRA SIGN VISARGA
|
||||
A882..A8B3 ; Saurashtra # Lo [50] SAURASHTRA LETTER A..SAURASHTRA LETTER LLA
|
||||
A8B4..A8C3 ; Saurashtra # Mc [16] SAURASHTRA CONSONANT SIGN HAARU..SAURASHTRA VOWEL SIGN AU
|
||||
A8C4 ; Saurashtra # Mn SAURASHTRA SIGN VIRAMA
|
||||
A8C4..A8C5 ; Saurashtra # Mn [2] SAURASHTRA SIGN VIRAMA..SAURASHTRA SIGN CANDRABINDU
|
||||
A8CE..A8CF ; Saurashtra # Po [2] SAURASHTRA DANDA..SAURASHTRA DOUBLE DANDA
|
||||
A8D0..A8D9 ; Saurashtra # Nd [10] SAURASHTRA DIGIT ZERO..SAURASHTRA DIGIT NINE
|
||||
|
||||
# Total code points: 81
|
||||
# Total code points: 82
|
||||
|
||||
# ================================================
|
||||
|
||||
@ -2314,8 +2342,9 @@ ABF0..ABF9 ; Meetei_Mayek # Nd [10] MEETEI MAYEK DIGIT ZERO..MEETEI MAYEK DI
|
||||
11235 ; Khojki # Mc KHOJKI SIGN VIRAMA
|
||||
11236..11237 ; Khojki # Mn [2] KHOJKI SIGN NUKTA..KHOJKI SIGN SHADDA
|
||||
11238..1123D ; Khojki # Po [6] KHOJKI DANDA..KHOJKI ABBREVIATION SIGN
|
||||
1123E ; Khojki # Mn KHOJKI SIGN SUKUN
|
||||
|
||||
# Total code points: 61
|
||||
# Total code points: 62
|
||||
|
||||
# ================================================
|
||||
|
||||
@ -2536,4 +2565,129 @@ ABF0..ABF9 ; Meetei_Mayek # Nd [10] MEETEI MAYEK DIGIT ZERO..MEETEI MAYEK DI
|
||||
|
||||
# Total code points: 672
|
||||
|
||||
# ================================================
|
||||
|
||||
1E900..1E943 ; Adlam # L& [68] ADLAM CAPITAL LETTER ALIF..ADLAM SMALL LETTER SHA
|
||||
1E944..1E94A ; Adlam # Mn [7] ADLAM ALIF LENGTHENER..ADLAM NUKTA
|
||||
1E950..1E959 ; Adlam # Nd [10] ADLAM DIGIT ZERO..ADLAM DIGIT NINE
|
||||
1E95E..1E95F ; Adlam # Po [2] ADLAM INITIAL EXCLAMATION MARK..ADLAM INITIAL QUESTION MARK
|
||||
|
||||
# Total code points: 87
|
||||
|
||||
# ================================================
|
||||
|
||||
11C00..11C08 ; Bhaiksuki # Lo [9] BHAIKSUKI LETTER A..BHAIKSUKI LETTER VOCALIC L
|
||||
11C0A..11C2E ; Bhaiksuki # Lo [37] BHAIKSUKI LETTER E..BHAIKSUKI LETTER HA
|
||||
11C2F ; Bhaiksuki # Mc BHAIKSUKI VOWEL SIGN AA
|
||||
11C30..11C36 ; Bhaiksuki # Mn [7] BHAIKSUKI VOWEL SIGN I..BHAIKSUKI VOWEL SIGN VOCALIC L
|
||||
11C38..11C3D ; Bhaiksuki # Mn [6] BHAIKSUKI VOWEL SIGN E..BHAIKSUKI SIGN ANUSVARA
|
||||
11C3E ; Bhaiksuki # Mc BHAIKSUKI SIGN VISARGA
|
||||
11C3F ; Bhaiksuki # Mn BHAIKSUKI SIGN VIRAMA
|
||||
11C40 ; Bhaiksuki # Lo BHAIKSUKI SIGN AVAGRAHA
|
||||
11C41..11C45 ; Bhaiksuki # Po [5] BHAIKSUKI DANDA..BHAIKSUKI GAP FILLER-2
|
||||
11C50..11C59 ; Bhaiksuki # Nd [10] BHAIKSUKI DIGIT ZERO..BHAIKSUKI DIGIT NINE
|
||||
11C5A..11C6C ; Bhaiksuki # No [19] BHAIKSUKI NUMBER ONE..BHAIKSUKI HUNDREDS UNIT MARK
|
||||
|
||||
# Total code points: 97
|
||||
|
||||
# ================================================
|
||||
|
||||
11C70..11C71 ; Marchen # Po [2] MARCHEN HEAD MARK..MARCHEN MARK SHAD
|
||||
11C72..11C8F ; Marchen # Lo [30] MARCHEN LETTER KA..MARCHEN LETTER A
|
||||
11C92..11CA7 ; Marchen # Mn [22] MARCHEN SUBJOINED LETTER KA..MARCHEN SUBJOINED LETTER ZA
|
||||
11CA9 ; Marchen # Mc MARCHEN SUBJOINED LETTER YA
|
||||
11CAA..11CB0 ; Marchen # Mn [7] MARCHEN SUBJOINED LETTER RA..MARCHEN VOWEL SIGN AA
|
||||
11CB1 ; Marchen # Mc MARCHEN VOWEL SIGN I
|
||||
11CB2..11CB3 ; Marchen # Mn [2] MARCHEN VOWEL SIGN U..MARCHEN VOWEL SIGN E
|
||||
11CB4 ; Marchen # Mc MARCHEN VOWEL SIGN O
|
||||
11CB5..11CB6 ; Marchen # Mn [2] MARCHEN SIGN ANUSVARA..MARCHEN SIGN CANDRABINDU
|
||||
|
||||
# Total code points: 68
|
||||
|
||||
# ================================================
|
||||
|
||||
11400..11434 ; Newa # Lo [53] NEWA LETTER A..NEWA LETTER HA
|
||||
11435..11437 ; Newa # Mc [3] NEWA VOWEL SIGN AA..NEWA VOWEL SIGN II
|
||||
11438..1143F ; Newa # Mn [8] NEWA VOWEL SIGN U..NEWA VOWEL SIGN AI
|
||||
11440..11441 ; Newa # Mc [2] NEWA VOWEL SIGN O..NEWA VOWEL SIGN AU
|
||||
11442..11444 ; Newa # Mn [3] NEWA SIGN VIRAMA..NEWA SIGN ANUSVARA
|
||||
11445 ; Newa # Mc NEWA SIGN VISARGA
|
||||
11446 ; Newa # Mn NEWA SIGN NUKTA
|
||||
11447..1144A ; Newa # Lo [4] NEWA SIGN AVAGRAHA..NEWA SIDDHI
|
||||
1144B..1144F ; Newa # Po [5] NEWA DANDA..NEWA ABBREVIATION SIGN
|
||||
11450..11459 ; Newa # Nd [10] NEWA DIGIT ZERO..NEWA DIGIT NINE
|
||||
1145B ; Newa # Po NEWA PLACEHOLDER MARK
|
||||
1145D ; Newa # Po NEWA INSERTION SIGN
|
||||
|
||||
# Total code points: 92
|
||||
|
||||
# ================================================
|
||||
|
||||
104B0..104D3 ; Osage # L& [36] OSAGE CAPITAL LETTER A..OSAGE CAPITAL LETTER ZHA
|
||||
104D8..104FB ; Osage # L& [36] OSAGE SMALL LETTER A..OSAGE SMALL LETTER ZHA
|
||||
|
||||
# Total code points: 72
|
||||
|
||||
# ================================================
|
||||
|
||||
16FE0 ; Tangut # Lm TANGUT ITERATION MARK
|
||||
17000..187EC ; Tangut # Lo [6125] TANGUT IDEOGRAPH-17000..TANGUT IDEOGRAPH-187EC
|
||||
18800..18AF2 ; Tangut # Lo [755] TANGUT COMPONENT-001..TANGUT COMPONENT-755
|
||||
|
||||
# Total code points: 6881
|
||||
|
||||
# ================================================
|
||||
|
||||
11D00..11D06 ; Masaram_Gondi # Lo [7] MASARAM GONDI LETTER A..MASARAM GONDI LETTER E
|
||||
11D08..11D09 ; Masaram_Gondi # Lo [2] MASARAM GONDI LETTER AI..MASARAM GONDI LETTER O
|
||||
11D0B..11D30 ; Masaram_Gondi # Lo [38] MASARAM GONDI LETTER AU..MASARAM GONDI LETTER TRA
|
||||
11D31..11D36 ; Masaram_Gondi # Mn [6] MASARAM GONDI VOWEL SIGN AA..MASARAM GONDI VOWEL SIGN VOCALIC R
|
||||
11D3A ; Masaram_Gondi # Mn MASARAM GONDI VOWEL SIGN E
|
||||
11D3C..11D3D ; Masaram_Gondi # Mn [2] MASARAM GONDI VOWEL SIGN AI..MASARAM GONDI VOWEL SIGN O
|
||||
11D3F..11D45 ; Masaram_Gondi # Mn [7] MASARAM GONDI VOWEL SIGN AU..MASARAM GONDI VIRAMA
|
||||
11D46 ; Masaram_Gondi # Lo MASARAM GONDI REPHA
|
||||
11D47 ; Masaram_Gondi # Mn MASARAM GONDI RA-KARA
|
||||
11D50..11D59 ; Masaram_Gondi # Nd [10] MASARAM GONDI DIGIT ZERO..MASARAM GONDI DIGIT NINE
|
||||
|
||||
# Total code points: 75
|
||||
|
||||
# ================================================
|
||||
|
||||
16FE1 ; Nushu # Lm NUSHU ITERATION MARK
|
||||
1B170..1B2FB ; Nushu # Lo [396] NUSHU CHARACTER-1B170..NUSHU CHARACTER-1B2FB
|
||||
|
||||
# Total code points: 397
|
||||
|
||||
# ================================================
|
||||
|
||||
11A50 ; Soyombo # Lo SOYOMBO LETTER A
|
||||
11A51..11A56 ; Soyombo # Mn [6] SOYOMBO VOWEL SIGN I..SOYOMBO VOWEL SIGN OE
|
||||
11A57..11A58 ; Soyombo # Mc [2] SOYOMBO VOWEL SIGN AI..SOYOMBO VOWEL SIGN AU
|
||||
11A59..11A5B ; Soyombo # Mn [3] SOYOMBO VOWEL SIGN VOCALIC R..SOYOMBO VOWEL LENGTH MARK
|
||||
11A5C..11A83 ; Soyombo # Lo [40] SOYOMBO LETTER KA..SOYOMBO LETTER KSSA
|
||||
11A86..11A89 ; Soyombo # Lo [4] SOYOMBO CLUSTER-INITIAL LETTER RA..SOYOMBO CLUSTER-INITIAL LETTER SA
|
||||
11A8A..11A96 ; Soyombo # Mn [13] SOYOMBO FINAL CONSONANT SIGN G..SOYOMBO SIGN ANUSVARA
|
||||
11A97 ; Soyombo # Mc SOYOMBO SIGN VISARGA
|
||||
11A98..11A99 ; Soyombo # Mn [2] SOYOMBO GEMINATION MARK..SOYOMBO SUBJOINER
|
||||
11A9A..11A9C ; Soyombo # Po [3] SOYOMBO MARK TSHEG..SOYOMBO MARK DOUBLE SHAD
|
||||
11A9E..11AA2 ; Soyombo # Po [5] SOYOMBO HEAD MARK WITH MOON AND SUN AND TRIPLE FLAME..SOYOMBO TERMINAL MARK-2
|
||||
|
||||
# Total code points: 80
|
||||
|
||||
# ================================================
|
||||
|
||||
11A00 ; Zanabazar_Square # Lo ZANABAZAR SQUARE LETTER A
|
||||
11A01..11A06 ; Zanabazar_Square # Mn [6] ZANABAZAR SQUARE VOWEL SIGN I..ZANABAZAR SQUARE VOWEL SIGN O
|
||||
11A07..11A08 ; Zanabazar_Square # Mc [2] ZANABAZAR SQUARE VOWEL SIGN AI..ZANABAZAR SQUARE VOWEL SIGN AU
|
||||
11A09..11A0A ; Zanabazar_Square # Mn [2] ZANABAZAR SQUARE VOWEL SIGN REVERSED I..ZANABAZAR SQUARE VOWEL LENGTH MARK
|
||||
11A0B..11A32 ; Zanabazar_Square # Lo [40] ZANABAZAR SQUARE LETTER KA..ZANABAZAR SQUARE LETTER KSSA
|
||||
11A33..11A38 ; Zanabazar_Square # Mn [6] ZANABAZAR SQUARE FINAL CONSONANT MARK..ZANABAZAR SQUARE SIGN ANUSVARA
|
||||
11A39 ; Zanabazar_Square # Mc ZANABAZAR SQUARE SIGN VISARGA
|
||||
11A3A ; Zanabazar_Square # Lo ZANABAZAR SQUARE CLUSTER-INITIAL LETTER RA
|
||||
11A3B..11A3E ; Zanabazar_Square # Mn [4] ZANABAZAR SQUARE CLUSTER-FINAL LETTER YA..ZANABAZAR SQUARE CLUSTER-FINAL LETTER VA
|
||||
11A3F..11A46 ; Zanabazar_Square # Po [8] ZANABAZAR SQUARE INITIAL HEAD MARK..ZANABAZAR SQUARE CLOSING DOUBLE-LINED HEAD MARK
|
||||
11A47 ; Zanabazar_Square # Mn ZANABAZAR SQUARE SUBJOINER
|
||||
|
||||
# Total code points: 72
|
||||
|
||||
# EOF
|
||||
|
281
test/jdk/java/lang/Character/SpecialCasing.txt
Normal file
281
test/jdk/java/lang/Character/SpecialCasing.txt
Normal file
@ -0,0 +1,281 @@
|
||||
# SpecialCasing-10.0.0.txt
|
||||
# Date: 2017-04-14, 05:40:43 GMT
|
||||
# Copyright (c) 2017 Unicode, Inc.
|
||||
# Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries.
|
||||
# For terms of use, see http://www.unicode.org/terms_of_use.html
|
||||
#
|
||||
# Unicode Character Database
|
||||
# For documentation, see http://www.unicode.org/reports/tr44/
|
||||
#
|
||||
# Special Casing
|
||||
#
|
||||
# This file is a supplement to the UnicodeData.txt file. It does not define any
|
||||
# properties, but rather provides additional information about the casing of
|
||||
# Unicode characters, for situations when casing incurs a change in string length
|
||||
# or is dependent on context or locale. For compatibility, the UnicodeData.txt
|
||||
# file only contains simple case mappings for characters where they are one-to-one
|
||||
# and independent of context and language. The data in this file, combined with
|
||||
# the simple case mappings in UnicodeData.txt, defines the full case mappings
|
||||
# Lowercase_Mapping (lc), Titlecase_Mapping (tc), and Uppercase_Mapping (uc).
|
||||
#
|
||||
# Note that the preferred mechanism for defining tailored casing operations is
|
||||
# the Unicode Common Locale Data Repository (CLDR). For more information, see the
|
||||
# discussion of case mappings and case algorithms in the Unicode Standard.
|
||||
#
|
||||
# All code points not listed in this file that do not have a simple case mappings
|
||||
# in UnicodeData.txt map to themselves.
|
||||
# ================================================================================
|
||||
# Format
|
||||
# ================================================================================
|
||||
# The entries in this file are in the following machine-readable format:
|
||||
#
|
||||
# <code>; <lower>; <title>; <upper>; (<condition_list>;)? # <comment>
|
||||
#
|
||||
# <code>, <lower>, <title>, and <upper> provide the respective full case mappings
|
||||
# of <code>, expressed as character values in hex. If there is more than one character,
|
||||
# they are separated by spaces. Other than as used to separate elements, spaces are
|
||||
# to be ignored.
|
||||
#
|
||||
# The <condition_list> is optional. Where present, it consists of one or more language IDs
|
||||
# or casing contexts, separated by spaces. In these conditions:
|
||||
# - A condition list overrides the normal behavior if all of the listed conditions are true.
|
||||
# - The casing context is always the context of the characters in the original string,
|
||||
# NOT in the resulting string.
|
||||
# - Case distinctions in the condition list are not significant.
|
||||
# - Conditions preceded by "Not_" represent the negation of the condition.
|
||||
# The condition list is not represented in the UCD as a formal property.
|
||||
#
|
||||
# A language ID is defined by BCP 47, with '-' and '_' treated equivalently.
|
||||
#
|
||||
# A casing context for a character is defined by Section 3.13 Default Case Algorithms
|
||||
# of The Unicode Standard.
|
||||
#
|
||||
# Parsers of this file must be prepared to deal with future additions to this format:
|
||||
# * Additional contexts
|
||||
# * Additional fields
|
||||
# ================================================================================
|
||||
|
||||
# ================================================================================
|
||||
# Unconditional mappings
|
||||
# ================================================================================
|
||||
|
||||
# The German es-zed is special--the normal mapping is to SS.
|
||||
# Note: the titlecase should never occur in practice. It is equal to titlecase(uppercase(<es-zed>))
|
||||
|
||||
00DF; 00DF; 0053 0073; 0053 0053; # LATIN SMALL LETTER SHARP S
|
||||
|
||||
# Preserve canonical equivalence for I with dot. Turkic is handled below.
|
||||
|
||||
0130; 0069 0307; 0130; 0130; # LATIN CAPITAL LETTER I WITH DOT ABOVE
|
||||
|
||||
# Ligatures
|
||||
|
||||
FB00; FB00; 0046 0066; 0046 0046; # LATIN SMALL LIGATURE FF
|
||||
FB01; FB01; 0046 0069; 0046 0049; # LATIN SMALL LIGATURE FI
|
||||
FB02; FB02; 0046 006C; 0046 004C; # LATIN SMALL LIGATURE FL
|
||||
FB03; FB03; 0046 0066 0069; 0046 0046 0049; # LATIN SMALL LIGATURE FFI
|
||||
FB04; FB04; 0046 0066 006C; 0046 0046 004C; # LATIN SMALL LIGATURE FFL
|
||||
FB05; FB05; 0053 0074; 0053 0054; # LATIN SMALL LIGATURE LONG S T
|
||||
FB06; FB06; 0053 0074; 0053 0054; # LATIN SMALL LIGATURE ST
|
||||
|
||||
0587; 0587; 0535 0582; 0535 0552; # ARMENIAN SMALL LIGATURE ECH YIWN
|
||||
FB13; FB13; 0544 0576; 0544 0546; # ARMENIAN SMALL LIGATURE MEN NOW
|
||||
FB14; FB14; 0544 0565; 0544 0535; # ARMENIAN SMALL LIGATURE MEN ECH
|
||||
FB15; FB15; 0544 056B; 0544 053B; # ARMENIAN SMALL LIGATURE MEN INI
|
||||
FB16; FB16; 054E 0576; 054E 0546; # ARMENIAN SMALL LIGATURE VEW NOW
|
||||
FB17; FB17; 0544 056D; 0544 053D; # ARMENIAN SMALL LIGATURE MEN XEH
|
||||
|
||||
# No corresponding uppercase precomposed character
|
||||
|
||||
0149; 0149; 02BC 004E; 02BC 004E; # LATIN SMALL LETTER N PRECEDED BY APOSTROPHE
|
||||
0390; 0390; 0399 0308 0301; 0399 0308 0301; # GREEK SMALL LETTER IOTA WITH DIALYTIKA AND TONOS
|
||||
03B0; 03B0; 03A5 0308 0301; 03A5 0308 0301; # GREEK SMALL LETTER UPSILON WITH DIALYTIKA AND TONOS
|
||||
01F0; 01F0; 004A 030C; 004A 030C; # LATIN SMALL LETTER J WITH CARON
|
||||
1E96; 1E96; 0048 0331; 0048 0331; # LATIN SMALL LETTER H WITH LINE BELOW
|
||||
1E97; 1E97; 0054 0308; 0054 0308; # LATIN SMALL LETTER T WITH DIAERESIS
|
||||
1E98; 1E98; 0057 030A; 0057 030A; # LATIN SMALL LETTER W WITH RING ABOVE
|
||||
1E99; 1E99; 0059 030A; 0059 030A; # LATIN SMALL LETTER Y WITH RING ABOVE
|
||||
1E9A; 1E9A; 0041 02BE; 0041 02BE; # LATIN SMALL LETTER A WITH RIGHT HALF RING
|
||||
1F50; 1F50; 03A5 0313; 03A5 0313; # GREEK SMALL LETTER UPSILON WITH PSILI
|
||||
1F52; 1F52; 03A5 0313 0300; 03A5 0313 0300; # GREEK SMALL LETTER UPSILON WITH PSILI AND VARIA
|
||||
1F54; 1F54; 03A5 0313 0301; 03A5 0313 0301; # GREEK SMALL LETTER UPSILON WITH PSILI AND OXIA
|
||||
1F56; 1F56; 03A5 0313 0342; 03A5 0313 0342; # GREEK SMALL LETTER UPSILON WITH PSILI AND PERISPOMENI
|
||||
1FB6; 1FB6; 0391 0342; 0391 0342; # GREEK SMALL LETTER ALPHA WITH PERISPOMENI
|
||||
1FC6; 1FC6; 0397 0342; 0397 0342; # GREEK SMALL LETTER ETA WITH PERISPOMENI
|
||||
1FD2; 1FD2; 0399 0308 0300; 0399 0308 0300; # GREEK SMALL LETTER IOTA WITH DIALYTIKA AND VARIA
|
||||
1FD3; 1FD3; 0399 0308 0301; 0399 0308 0301; # GREEK SMALL LETTER IOTA WITH DIALYTIKA AND OXIA
|
||||
1FD6; 1FD6; 0399 0342; 0399 0342; # GREEK SMALL LETTER IOTA WITH PERISPOMENI
|
||||
1FD7; 1FD7; 0399 0308 0342; 0399 0308 0342; # GREEK SMALL LETTER IOTA WITH DIALYTIKA AND PERISPOMENI
|
||||
1FE2; 1FE2; 03A5 0308 0300; 03A5 0308 0300; # GREEK SMALL LETTER UPSILON WITH DIALYTIKA AND VARIA
|
||||
1FE3; 1FE3; 03A5 0308 0301; 03A5 0308 0301; # GREEK SMALL LETTER UPSILON WITH DIALYTIKA AND OXIA
|
||||
1FE4; 1FE4; 03A1 0313; 03A1 0313; # GREEK SMALL LETTER RHO WITH PSILI
|
||||
1FE6; 1FE6; 03A5 0342; 03A5 0342; # GREEK SMALL LETTER UPSILON WITH PERISPOMENI
|
||||
1FE7; 1FE7; 03A5 0308 0342; 03A5 0308 0342; # GREEK SMALL LETTER UPSILON WITH DIALYTIKA AND PERISPOMENI
|
||||
1FF6; 1FF6; 03A9 0342; 03A9 0342; # GREEK SMALL LETTER OMEGA WITH PERISPOMENI
|
||||
|
||||
# IMPORTANT-when iota-subscript (0345) is uppercased or titlecased,
|
||||
# the result will be incorrect unless the iota-subscript is moved to the end
|
||||
# of any sequence of combining marks. Otherwise, the accents will go on the capital iota.
|
||||
# This process can be achieved by first transforming the text to NFC before casing.
|
||||
# E.g. <alpha><iota_subscript><acute> is uppercased to <ALPHA><acute><IOTA>
|
||||
|
||||
# The following cases are already in the UnicodeData.txt file, so are only commented here.
|
||||
|
||||
# 0345; 0345; 0345; 0399; # COMBINING GREEK YPOGEGRAMMENI
|
||||
|
||||
# All letters with YPOGEGRAMMENI (iota-subscript) or PROSGEGRAMMENI (iota adscript)
|
||||
# have special uppercases.
|
||||
# Note: characters with PROSGEGRAMMENI are actually titlecase, not uppercase!
|
||||
|
||||
1F80; 1F80; 1F88; 1F08 0399; # GREEK SMALL LETTER ALPHA WITH PSILI AND YPOGEGRAMMENI
|
||||
1F81; 1F81; 1F89; 1F09 0399; # GREEK SMALL LETTER ALPHA WITH DASIA AND YPOGEGRAMMENI
|
||||
1F82; 1F82; 1F8A; 1F0A 0399; # GREEK SMALL LETTER ALPHA WITH PSILI AND VARIA AND YPOGEGRAMMENI
|
||||
1F83; 1F83; 1F8B; 1F0B 0399; # GREEK SMALL LETTER ALPHA WITH DASIA AND VARIA AND YPOGEGRAMMENI
|
||||
1F84; 1F84; 1F8C; 1F0C 0399; # GREEK SMALL LETTER ALPHA WITH PSILI AND OXIA AND YPOGEGRAMMENI
|
||||
1F85; 1F85; 1F8D; 1F0D 0399; # GREEK SMALL LETTER ALPHA WITH DASIA AND OXIA AND YPOGEGRAMMENI
|
||||
1F86; 1F86; 1F8E; 1F0E 0399; # GREEK SMALL LETTER ALPHA WITH PSILI AND PERISPOMENI AND YPOGEGRAMMENI
|
||||
1F87; 1F87; 1F8F; 1F0F 0399; # GREEK SMALL LETTER ALPHA WITH DASIA AND PERISPOMENI AND YPOGEGRAMMENI
|
||||
1F88; 1F80; 1F88; 1F08 0399; # GREEK CAPITAL LETTER ALPHA WITH PSILI AND PROSGEGRAMMENI
|
||||
1F89; 1F81; 1F89; 1F09 0399; # GREEK CAPITAL LETTER ALPHA WITH DASIA AND PROSGEGRAMMENI
|
||||
1F8A; 1F82; 1F8A; 1F0A 0399; # GREEK CAPITAL LETTER ALPHA WITH PSILI AND VARIA AND PROSGEGRAMMENI
|
||||
1F8B; 1F83; 1F8B; 1F0B 0399; # GREEK CAPITAL LETTER ALPHA WITH DASIA AND VARIA AND PROSGEGRAMMENI
|
||||
1F8C; 1F84; 1F8C; 1F0C 0399; # GREEK CAPITAL LETTER ALPHA WITH PSILI AND OXIA AND PROSGEGRAMMENI
|
||||
1F8D; 1F85; 1F8D; 1F0D 0399; # GREEK CAPITAL LETTER ALPHA WITH DASIA AND OXIA AND PROSGEGRAMMENI
|
||||
1F8E; 1F86; 1F8E; 1F0E 0399; # GREEK CAPITAL LETTER ALPHA WITH PSILI AND PERISPOMENI AND PROSGEGRAMMENI
|
||||
1F8F; 1F87; 1F8F; 1F0F 0399; # GREEK CAPITAL LETTER ALPHA WITH DASIA AND PERISPOMENI AND PROSGEGRAMMENI
|
||||
1F90; 1F90; 1F98; 1F28 0399; # GREEK SMALL LETTER ETA WITH PSILI AND YPOGEGRAMMENI
|
||||
1F91; 1F91; 1F99; 1F29 0399; # GREEK SMALL LETTER ETA WITH DASIA AND YPOGEGRAMMENI
|
||||
1F92; 1F92; 1F9A; 1F2A 0399; # GREEK SMALL LETTER ETA WITH PSILI AND VARIA AND YPOGEGRAMMENI
|
||||
1F93; 1F93; 1F9B; 1F2B 0399; # GREEK SMALL LETTER ETA WITH DASIA AND VARIA AND YPOGEGRAMMENI
|
||||
1F94; 1F94; 1F9C; 1F2C 0399; # GREEK SMALL LETTER ETA WITH PSILI AND OXIA AND YPOGEGRAMMENI
|
||||
1F95; 1F95; 1F9D; 1F2D 0399; # GREEK SMALL LETTER ETA WITH DASIA AND OXIA AND YPOGEGRAMMENI
|
||||
1F96; 1F96; 1F9E; 1F2E 0399; # GREEK SMALL LETTER ETA WITH PSILI AND PERISPOMENI AND YPOGEGRAMMENI
|
||||
1F97; 1F97; 1F9F; 1F2F 0399; # GREEK SMALL LETTER ETA WITH DASIA AND PERISPOMENI AND YPOGEGRAMMENI
|
||||
1F98; 1F90; 1F98; 1F28 0399; # GREEK CAPITAL LETTER ETA WITH PSILI AND PROSGEGRAMMENI
|
||||
1F99; 1F91; 1F99; 1F29 0399; # GREEK CAPITAL LETTER ETA WITH DASIA AND PROSGEGRAMMENI
|
||||
1F9A; 1F92; 1F9A; 1F2A 0399; # GREEK CAPITAL LETTER ETA WITH PSILI AND VARIA AND PROSGEGRAMMENI
|
||||
1F9B; 1F93; 1F9B; 1F2B 0399; # GREEK CAPITAL LETTER ETA WITH DASIA AND VARIA AND PROSGEGRAMMENI
|
||||
1F9C; 1F94; 1F9C; 1F2C 0399; # GREEK CAPITAL LETTER ETA WITH PSILI AND OXIA AND PROSGEGRAMMENI
|
||||
1F9D; 1F95; 1F9D; 1F2D 0399; # GREEK CAPITAL LETTER ETA WITH DASIA AND OXIA AND PROSGEGRAMMENI
|
||||
1F9E; 1F96; 1F9E; 1F2E 0399; # GREEK CAPITAL LETTER ETA WITH PSILI AND PERISPOMENI AND PROSGEGRAMMENI
|
||||
1F9F; 1F97; 1F9F; 1F2F 0399; # GREEK CAPITAL LETTER ETA WITH DASIA AND PERISPOMENI AND PROSGEGRAMMENI
|
||||
1FA0; 1FA0; 1FA8; 1F68 0399; # GREEK SMALL LETTER OMEGA WITH PSILI AND YPOGEGRAMMENI
|
||||
1FA1; 1FA1; 1FA9; 1F69 0399; # GREEK SMALL LETTER OMEGA WITH DASIA AND YPOGEGRAMMENI
|
||||
1FA2; 1FA2; 1FAA; 1F6A 0399; # GREEK SMALL LETTER OMEGA WITH PSILI AND VARIA AND YPOGEGRAMMENI
|
||||
1FA3; 1FA3; 1FAB; 1F6B 0399; # GREEK SMALL LETTER OMEGA WITH DASIA AND VARIA AND YPOGEGRAMMENI
|
||||
1FA4; 1FA4; 1FAC; 1F6C 0399; # GREEK SMALL LETTER OMEGA WITH PSILI AND OXIA AND YPOGEGRAMMENI
|
||||
1FA5; 1FA5; 1FAD; 1F6D 0399; # GREEK SMALL LETTER OMEGA WITH DASIA AND OXIA AND YPOGEGRAMMENI
|
||||
1FA6; 1FA6; 1FAE; 1F6E 0399; # GREEK SMALL LETTER OMEGA WITH PSILI AND PERISPOMENI AND YPOGEGRAMMENI
|
||||
1FA7; 1FA7; 1FAF; 1F6F 0399; # GREEK SMALL LETTER OMEGA WITH DASIA AND PERISPOMENI AND YPOGEGRAMMENI
|
||||
1FA8; 1FA0; 1FA8; 1F68 0399; # GREEK CAPITAL LETTER OMEGA WITH PSILI AND PROSGEGRAMMENI
|
||||
1FA9; 1FA1; 1FA9; 1F69 0399; # GREEK CAPITAL LETTER OMEGA WITH DASIA AND PROSGEGRAMMENI
|
||||
1FAA; 1FA2; 1FAA; 1F6A 0399; # GREEK CAPITAL LETTER OMEGA WITH PSILI AND VARIA AND PROSGEGRAMMENI
|
||||
1FAB; 1FA3; 1FAB; 1F6B 0399; # GREEK CAPITAL LETTER OMEGA WITH DASIA AND VARIA AND PROSGEGRAMMENI
|
||||
1FAC; 1FA4; 1FAC; 1F6C 0399; # GREEK CAPITAL LETTER OMEGA WITH PSILI AND OXIA AND PROSGEGRAMMENI
|
||||
1FAD; 1FA5; 1FAD; 1F6D 0399; # GREEK CAPITAL LETTER OMEGA WITH DASIA AND OXIA AND PROSGEGRAMMENI
|
||||
1FAE; 1FA6; 1FAE; 1F6E 0399; # GREEK CAPITAL LETTER OMEGA WITH PSILI AND PERISPOMENI AND PROSGEGRAMMENI
|
||||
1FAF; 1FA7; 1FAF; 1F6F 0399; # GREEK CAPITAL LETTER OMEGA WITH DASIA AND PERISPOMENI AND PROSGEGRAMMENI
|
||||
1FB3; 1FB3; 1FBC; 0391 0399; # GREEK SMALL LETTER ALPHA WITH YPOGEGRAMMENI
|
||||
1FBC; 1FB3; 1FBC; 0391 0399; # GREEK CAPITAL LETTER ALPHA WITH PROSGEGRAMMENI
|
||||
1FC3; 1FC3; 1FCC; 0397 0399; # GREEK SMALL LETTER ETA WITH YPOGEGRAMMENI
|
||||
1FCC; 1FC3; 1FCC; 0397 0399; # GREEK CAPITAL LETTER ETA WITH PROSGEGRAMMENI
|
||||
1FF3; 1FF3; 1FFC; 03A9 0399; # GREEK SMALL LETTER OMEGA WITH YPOGEGRAMMENI
|
||||
1FFC; 1FF3; 1FFC; 03A9 0399; # GREEK CAPITAL LETTER OMEGA WITH PROSGEGRAMMENI
|
||||
|
||||
# Some characters with YPOGEGRAMMENI also have no corresponding titlecases
|
||||
|
||||
1FB2; 1FB2; 1FBA 0345; 1FBA 0399; # GREEK SMALL LETTER ALPHA WITH VARIA AND YPOGEGRAMMENI
|
||||
1FB4; 1FB4; 0386 0345; 0386 0399; # GREEK SMALL LETTER ALPHA WITH OXIA AND YPOGEGRAMMENI
|
||||
1FC2; 1FC2; 1FCA 0345; 1FCA 0399; # GREEK SMALL LETTER ETA WITH VARIA AND YPOGEGRAMMENI
|
||||
1FC4; 1FC4; 0389 0345; 0389 0399; # GREEK SMALL LETTER ETA WITH OXIA AND YPOGEGRAMMENI
|
||||
1FF2; 1FF2; 1FFA 0345; 1FFA 0399; # GREEK SMALL LETTER OMEGA WITH VARIA AND YPOGEGRAMMENI
|
||||
1FF4; 1FF4; 038F 0345; 038F 0399; # GREEK SMALL LETTER OMEGA WITH OXIA AND YPOGEGRAMMENI
|
||||
|
||||
1FB7; 1FB7; 0391 0342 0345; 0391 0342 0399; # GREEK SMALL LETTER ALPHA WITH PERISPOMENI AND YPOGEGRAMMENI
|
||||
1FC7; 1FC7; 0397 0342 0345; 0397 0342 0399; # GREEK SMALL LETTER ETA WITH PERISPOMENI AND YPOGEGRAMMENI
|
||||
1FF7; 1FF7; 03A9 0342 0345; 03A9 0342 0399; # GREEK SMALL LETTER OMEGA WITH PERISPOMENI AND YPOGEGRAMMENI
|
||||
|
||||
# ================================================================================
|
||||
# Conditional Mappings
|
||||
# The remainder of this file provides conditional casing data used to produce
|
||||
# full case mappings.
|
||||
# ================================================================================
|
||||
# Language-Insensitive Mappings
|
||||
# These are characters whose full case mappings do not depend on language, but do
|
||||
# depend on context (which characters come before or after). For more information
|
||||
# see the header of this file and the Unicode Standard.
|
||||
# ================================================================================
|
||||
|
||||
# Special case for final form of sigma
|
||||
|
||||
03A3; 03C2; 03A3; 03A3; Final_Sigma; # GREEK CAPITAL LETTER SIGMA
|
||||
|
||||
# Note: the following cases for non-final are already in the UnicodeData.txt file.
|
||||
|
||||
# 03A3; 03C3; 03A3; 03A3; # GREEK CAPITAL LETTER SIGMA
|
||||
# 03C3; 03C3; 03A3; 03A3; # GREEK SMALL LETTER SIGMA
|
||||
# 03C2; 03C2; 03A3; 03A3; # GREEK SMALL LETTER FINAL SIGMA
|
||||
|
||||
# Note: the following cases are not included, since they would case-fold in lowercasing
|
||||
|
||||
# 03C3; 03C2; 03A3; 03A3; Final_Sigma; # GREEK SMALL LETTER SIGMA
|
||||
# 03C2; 03C3; 03A3; 03A3; Not_Final_Sigma; # GREEK SMALL LETTER FINAL SIGMA
|
||||
|
||||
# ================================================================================
|
||||
# Language-Sensitive Mappings
|
||||
# These are characters whose full case mappings depend on language and perhaps also
|
||||
# context (which characters come before or after). For more information
|
||||
# see the header of this file and the Unicode Standard.
|
||||
# ================================================================================
|
||||
|
||||
# Lithuanian
|
||||
|
||||
# Lithuanian retains the dot in a lowercase i when followed by accents.
|
||||
|
||||
# Remove DOT ABOVE after "i" with upper or titlecase
|
||||
|
||||
0307; 0307; ; ; lt After_Soft_Dotted; # COMBINING DOT ABOVE
|
||||
|
||||
# Introduce an explicit dot above when lowercasing capital I's and J's
|
||||
# whenever there are more accents above.
|
||||
# (of the accents used in Lithuanian: grave, acute, tilde above, and ogonek)
|
||||
|
||||
0049; 0069 0307; 0049; 0049; lt More_Above; # LATIN CAPITAL LETTER I
|
||||
004A; 006A 0307; 004A; 004A; lt More_Above; # LATIN CAPITAL LETTER J
|
||||
012E; 012F 0307; 012E; 012E; lt More_Above; # LATIN CAPITAL LETTER I WITH OGONEK
|
||||
00CC; 0069 0307 0300; 00CC; 00CC; lt; # LATIN CAPITAL LETTER I WITH GRAVE
|
||||
00CD; 0069 0307 0301; 00CD; 00CD; lt; # LATIN CAPITAL LETTER I WITH ACUTE
|
||||
0128; 0069 0307 0303; 0128; 0128; lt; # LATIN CAPITAL LETTER I WITH TILDE
|
||||
|
||||
# ================================================================================
|
||||
|
||||
# Turkish and Azeri
|
||||
|
||||
# I and i-dotless; I-dot and i are case pairs in Turkish and Azeri
|
||||
# The following rules handle those cases.
|
||||
|
||||
0130; 0069; 0130; 0130; tr; # LATIN CAPITAL LETTER I WITH DOT ABOVE
|
||||
0130; 0069; 0130; 0130; az; # LATIN CAPITAL LETTER I WITH DOT ABOVE
|
||||
|
||||
# When lowercasing, remove dot_above in the sequence I + dot_above, which will turn into i.
|
||||
# This matches the behavior of the canonically equivalent I-dot_above
|
||||
|
||||
0307; ; 0307; 0307; tr After_I; # COMBINING DOT ABOVE
|
||||
0307; ; 0307; 0307; az After_I; # COMBINING DOT ABOVE
|
||||
|
||||
# When lowercasing, unless an I is before a dot_above, it turns into a dotless i.
|
||||
|
||||
0049; 0131; 0049; 0049; tr Not_Before_Dot; # LATIN CAPITAL LETTER I
|
||||
0049; 0131; 0049; 0049; az Not_Before_Dot; # LATIN CAPITAL LETTER I
|
||||
|
||||
# When uppercasing, i turns into a dotted capital I
|
||||
|
||||
0069; 0069; 0130; 0130; tr; # LATIN SMALL LETTER I
|
||||
0069; 0069; 0130; 0130; az; # LATIN SMALL LETTER I
|
||||
|
||||
# Note: the following case is already in the UnicodeData.txt file.
|
||||
|
||||
# 0131; 0131; 0049; 0049; tr; # LATIN SMALL LETTER DOTLESS I
|
||||
|
||||
# EOF
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) 2015, Oracle and/or its affiliates. All rights reserved.
|
||||
* Copyright (c) 2015, 2018 Oracle and/or its affiliates. All rights reserved.
|
||||
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
||||
*
|
||||
* This code is free software; you can redistribute it and/or modify it
|
||||
@ -23,7 +23,7 @@
|
||||
|
||||
/**
|
||||
* @test
|
||||
* @bug 8080535
|
||||
* @bug 8080535 8191410
|
||||
* @summary Expected size of Character.UnicodeBlock.map is not optimal
|
||||
* @library /lib/testlibrary
|
||||
* @modules java.base/java.lang:open
|
||||
@ -41,8 +41,8 @@ import jdk.testlibrary.OptimalCapacity;
|
||||
// According to http://www.unicode.org/versions/beta-8.0.0.html ,
|
||||
// in Unicode 8 there will be added 10 more blocks (30 with aliases).
|
||||
//
|
||||
// After implementing support of Unicode 7 and 8 in Java, there will
|
||||
// be 510+96+30 = 636 entries in Character.UnicodeBlock.map.
|
||||
// After implementing support of Unicode 9 and 10 in Java, there will
|
||||
// be 638 entries in Character.UnicodeBlock.map.
|
||||
//
|
||||
// Initialization of the map and this test will have to be adjusted
|
||||
// accordingly then.
|
||||
@ -51,7 +51,7 @@ public class OptimalMapSize {
|
||||
public static void main(String[] args) throws Throwable {
|
||||
// The initial size of Character.UnicodeBlock.map.
|
||||
// See src/java.base/share/classes/java/lang/Character.java
|
||||
int initialCapacity = (int)(510 / 0.75f + 1.0f);
|
||||
int initialCapacity = (int)(638 / 0.75f + 1.0f);
|
||||
|
||||
OptimalCapacity.ofHashMap(Character.UnicodeBlock.class,
|
||||
"map", initialCapacity);
|
||||
|
177
test/jdk/java/lang/Character/UnicodeCasingTest.java
Normal file
177
test/jdk/java/lang/Character/UnicodeCasingTest.java
Normal file
@ -0,0 +1,177 @@
|
||||
/*
|
||||
* Copyright (c) 2018, Oracle and/or its affiliates. All rights reserved.
|
||||
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
||||
*
|
||||
* This code is free software; you can redistribute it and/or modify it
|
||||
* under the terms of the GNU General Public License version 2 only, as
|
||||
* published by the Free Software Foundation. Oracle designates this
|
||||
* particular file as subject to the "Classpath" exception as provided
|
||||
* by Oracle in the LICENSE file that accompanied this code.
|
||||
*
|
||||
* This code is distributed in the hope that it will be useful, but WITHOUT
|
||||
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
||||
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
|
||||
* version 2 for more details (a copy is included in the LICENSE file that
|
||||
* accompanied this code).
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License version
|
||||
* 2 along with this work; if not, write to the Free Software Foundation,
|
||||
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
*
|
||||
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
|
||||
* or visit www.oracle.com if you need additional information or have any
|
||||
* questions.
|
||||
*/
|
||||
|
||||
/*
|
||||
* @test
|
||||
* @bug 4397357 6565620 6959267 8032446 8072600
|
||||
* @summary Confirm normal case mappings are handled correctly.
|
||||
* @run main/timeout=200 UnicodeCasingTest
|
||||
*/
|
||||
|
||||
import java.io.BufferedReader;
|
||||
import java.io.File;
|
||||
import java.io.FileReader;
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
import java.util.Locale;
|
||||
|
||||
public class UnicodeCasingTest {
|
||||
|
||||
private static boolean err = false;
|
||||
|
||||
// Locales which are used for testing
|
||||
private static List<Locale> locales = new ArrayList<>();
|
||||
static {
|
||||
locales.add(new Locale("az", ""));
|
||||
locales.addAll(java.util.Arrays.asList(Locale.getAvailableLocales()));
|
||||
}
|
||||
|
||||
|
||||
public static void main(String[] args) {
|
||||
UnicodeCasingTest specialCasingTest = new UnicodeCasingTest();
|
||||
specialCasingTest.test();
|
||||
}
|
||||
|
||||
private void test() {
|
||||
Locale defaultLocale = Locale.getDefault();
|
||||
|
||||
BufferedReader in = null;
|
||||
|
||||
try {
|
||||
File file = new File(System.getProperty("test.src", "."),
|
||||
"UnicodeData.txt");
|
||||
|
||||
int locale_num = locales.size();
|
||||
for (int l = 0; l < locale_num; l++) {
|
||||
Locale locale = locales.get(l);
|
||||
Locale.setDefault(locale);
|
||||
System.out.println("Testing on " + locale + " locale....");
|
||||
|
||||
in = new BufferedReader(new FileReader(file));
|
||||
|
||||
String line;
|
||||
while ((line = in.readLine()) != null) {
|
||||
if (line.length() == 0 || line.charAt(0) == '#') {
|
||||
continue;
|
||||
}
|
||||
|
||||
test(line);
|
||||
}
|
||||
|
||||
in.close();
|
||||
in = null;
|
||||
}
|
||||
}
|
||||
catch (Exception e) {
|
||||
err = true;
|
||||
e.printStackTrace();
|
||||
}
|
||||
finally {
|
||||
if (in != null) {
|
||||
try {
|
||||
in.close();
|
||||
}
|
||||
catch (Exception e) {
|
||||
}
|
||||
}
|
||||
|
||||
Locale.setDefault(defaultLocale);
|
||||
|
||||
if (err) {
|
||||
throw new RuntimeException("UnicodeCasingTest failed.");
|
||||
} else {
|
||||
System.out.println("UnicodeCasingTest passed.");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private void test(String line) {
|
||||
String[] fields = line.split(";", 15);
|
||||
int orig = convert(fields[0]);
|
||||
|
||||
if (fields[12].length() != 0) {
|
||||
testUpperCase(orig, convert(fields[12]));
|
||||
} else {
|
||||
testUpperCase(orig, orig);
|
||||
}
|
||||
|
||||
if (fields[13].length() != 0) {
|
||||
testLowerCase(orig, convert(fields[13]));
|
||||
} else {
|
||||
testLowerCase(orig, orig);
|
||||
}
|
||||
|
||||
if (fields[14].length() != 0) {
|
||||
testTitleCase(orig, convert(fields[14]));
|
||||
} else {
|
||||
testTitleCase(orig, orig);
|
||||
}
|
||||
}
|
||||
|
||||
private void testUpperCase(int orig, int expected) {
|
||||
int got = Character.toUpperCase(orig);
|
||||
|
||||
if (expected != got) {
|
||||
err = true;
|
||||
System.err.println("toUpperCase(" +
|
||||
") failed.\n\tOriginal: " + toString(orig) +
|
||||
"\n\tGot: " + toString(got) +
|
||||
"\n\tExpected: " + toString(expected));
|
||||
}
|
||||
}
|
||||
|
||||
private void testLowerCase(int orig, int expected) {
|
||||
int got = Character.toLowerCase(orig);
|
||||
|
||||
if (expected != got) {
|
||||
err = true;
|
||||
System.err.println("toLowerCase(" +
|
||||
") failed.\n\tOriginal: " + toString(orig) +
|
||||
"\n\tGot: " + toString(got) +
|
||||
"\n\tExpected: " + toString(expected));
|
||||
}
|
||||
}
|
||||
|
||||
private void testTitleCase(int orig, int expected) {
|
||||
int got = Character.toTitleCase(orig);
|
||||
|
||||
if (expected != got) {
|
||||
err = true;
|
||||
System.err.println("toTitleCase(" +
|
||||
") failed.\n\tOriginal: " + toString(orig) +
|
||||
"\n\tGot: " + toString(got) +
|
||||
"\n\tExpected: " + toString(expected));
|
||||
}
|
||||
}
|
||||
|
||||
private int convert(String str) {
|
||||
return Integer.parseInt(str, 16);
|
||||
}
|
||||
|
||||
private String toString(int i) {
|
||||
return Integer.toHexString(i).toUpperCase();
|
||||
}
|
||||
|
||||
}
|
31618
test/jdk/java/lang/Character/UnicodeData.txt
Normal file
31618
test/jdk/java/lang/Character/UnicodeData.txt
Normal file
File diff suppressed because it is too large
Load Diff
756
test/jdk/java/lang/Character/UnicodeSpec.java
Normal file
756
test/jdk/java/lang/Character/UnicodeSpec.java
Normal file
@ -0,0 +1,756 @@
|
||||
/*
|
||||
* Copyright (c) 2018, Oracle and/or its affiliates. All rights reserved.
|
||||
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
||||
*
|
||||
* This code is free software; you can redistribute it and/or modify it
|
||||
* under the terms of the GNU General Public License version 2 only, as
|
||||
* published by the Free Software Foundation. Oracle designates this
|
||||
* particular file as subject to the "Classpath" exception as provided
|
||||
* by Oracle in the LICENSE file that accompanied this code.
|
||||
*
|
||||
* This code is distributed in the hope that it will be useful, but WITHOUT
|
||||
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
||||
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
|
||||
* version 2 for more details (a copy is included in the LICENSE file that
|
||||
* accompanied this code).
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License version
|
||||
* 2 along with this work; if not, write to the Free Software Foundation,
|
||||
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
*
|
||||
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
|
||||
* or visit www.oracle.com if you need additional information or have any
|
||||
* questions.
|
||||
*/
|
||||
|
||||
|
||||
import java.io.BufferedReader;
|
||||
import java.io.FileReader;
|
||||
import java.io.FileNotFoundException;
|
||||
import java.io.IOException;
|
||||
import java.io.File;
|
||||
import java.util.regex.Pattern;
|
||||
import java.util.ArrayList;
|
||||
|
||||
/**
|
||||
* The UnicodeSpec class provides a way to read in Unicode character
|
||||
* properties from a Unicode data file. One instance of class UnicodeSpec
|
||||
* holds a decoded version of one line of the data file. The file may
|
||||
* be obtained from www.unicode.org. The method readSpecFile returns an array
|
||||
* of UnicodeSpec objects.
|
||||
*
|
||||
* @author Guy Steele
|
||||
* @author John O'Conner
|
||||
*/
|
||||
|
||||
public class UnicodeSpec {
|
||||
|
||||
public UnicodeSpec() {
|
||||
this(0xffff);
|
||||
}
|
||||
|
||||
public UnicodeSpec(int codePoint) {
|
||||
this.codePoint = codePoint;
|
||||
generalCategory = UNASSIGNED;
|
||||
bidiCategory = DIRECTIONALITY_UNDEFINED;
|
||||
mirrored = false;
|
||||
titleMap = 0xFFFF;
|
||||
upperMap = 0xFFFF;
|
||||
lowerMap = 0xFFFF;
|
||||
decimalValue = -1;
|
||||
digitValue = -1;
|
||||
numericValue = "";
|
||||
oldName = null;
|
||||
comment = null;
|
||||
name = null;
|
||||
}
|
||||
|
||||
public String toString() {
|
||||
StringBuffer result = new StringBuffer(hex6(codePoint));
|
||||
if (getUpperMap() != 0xffff) {
|
||||
result.append(", upper=").append(hex6(upperMap));
|
||||
}
|
||||
if (getLowerMap() != 0xffff) {
|
||||
result.append(", lower=").append(hex6(lowerMap));
|
||||
}
|
||||
if (getTitleMap() != 0xffff) {
|
||||
result.append(", title=").append(hex6(titleMap));
|
||||
}
|
||||
return result.toString();
|
||||
}
|
||||
|
||||
static String hex4(int n) {
|
||||
String q = Long.toHexString(n & 0xFFFF).toUpperCase();
|
||||
return "0000".substring(Math.min(4, q.length())) + q;
|
||||
}
|
||||
|
||||
static String hex6(int n) {
|
||||
String str = Integer.toHexString(n & 0xFFFFFF).toUpperCase();
|
||||
return "000000".substring(Math.min(6, str.length())) + str;
|
||||
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Given one line of a Unicode data file as a String, parse the line
|
||||
* and return a UnicodeSpec object that contains the same character information.
|
||||
*
|
||||
* @param s a line of the Unicode data file to be parsed
|
||||
* @return a UnicodeSpec object, or null if the parsing process failed for some reason
|
||||
*/
|
||||
public static UnicodeSpec parse(String s) {
|
||||
UnicodeSpec spec = null;
|
||||
String[] tokens = null;
|
||||
|
||||
try {
|
||||
tokens = tokenSeparator.split(s, REQUIRED_FIELDS);
|
||||
spec = new UnicodeSpec();
|
||||
spec.setCodePoint(parseCodePoint(tokens[FIELD_VALUE]));
|
||||
spec.setName(parseName(tokens[FIELD_NAME]));
|
||||
spec.setGeneralCategory(parseGeneralCategory(tokens[FIELD_CATEGORY]));
|
||||
spec.setBidiCategory(parseBidiCategory(tokens[FIELD_BIDI]));
|
||||
spec.setCombiningClass(parseCombiningClass(tokens[FIELD_CLASS]));
|
||||
spec.setDecomposition(parseDecomposition(tokens[FIELD_DECOMPOSITION]));
|
||||
spec.setDecimalValue(parseDecimalValue(tokens[FIELD_DECIMAL]));
|
||||
spec.setDigitValue(parseDigitValue(tokens[FIELD_DIGIT]));
|
||||
spec.setNumericValue(parseNumericValue(tokens[FIELD_NUMERIC]));
|
||||
spec.setMirrored(parseMirrored(tokens[FIELD_MIRRORED]));
|
||||
spec.setOldName(parseOldName(tokens[FIELD_OLDNAME]));
|
||||
spec.setComment(parseComment(tokens[FIELD_COMMENT]));
|
||||
spec.setUpperMap(parseUpperMap(tokens[FIELD_UPPERCASE]));
|
||||
spec.setLowerMap(parseLowerMap(tokens[FIELD_LOWERCASE]));
|
||||
spec.setTitleMap(parseTitleMap(tokens[FIELD_TITLECASE]));
|
||||
}
|
||||
catch(Exception e) {
|
||||
spec = null;
|
||||
System.out.println("Error parsing spec line.");
|
||||
}
|
||||
return spec;
|
||||
}
|
||||
|
||||
/**
|
||||
* Parse the codePoint attribute for a Unicode character. If the parse succeeds,
|
||||
* the codePoint field of this UnicodeSpec object is updated and false is returned.
|
||||
*
|
||||
* The codePoint attribute should be a four-digit hexadecimal integer.
|
||||
*
|
||||
* @param s the codePoint attribute extracted from a line of the Unicode data file
|
||||
* @return code point if successful
|
||||
* @exception NumberFormatException if unable to parse argument
|
||||
*/
|
||||
public static int parseCodePoint(String s) throws NumberFormatException {
|
||||
return Integer.parseInt(s, 16);
|
||||
}
|
||||
|
||||
public static String parseName(String s) throws Exception {
|
||||
if (s==null) throw new Exception("Cannot parse name.");
|
||||
return s;
|
||||
}
|
||||
|
||||
public static byte parseGeneralCategory(String s) throws Exception {
|
||||
byte category = GENERAL_CATEGORY_COUNT;
|
||||
|
||||
for (byte x=0; x<generalCategoryList.length; x++) {
|
||||
if (s.equals(generalCategoryList[x][SHORT])) {
|
||||
category = x;
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (category >= GENERAL_CATEGORY_COUNT) {
|
||||
throw new Exception("Could not parse general category.");
|
||||
}
|
||||
return category;
|
||||
}
|
||||
|
||||
public static byte parseBidiCategory(String s) throws Exception {
|
||||
byte category = DIRECTIONALITY_CATEGORY_COUNT;
|
||||
|
||||
for (byte x=0; x<bidiCategoryList.length; x++) {
|
||||
if (s.equals(bidiCategoryList[x][SHORT])) {
|
||||
category = x;
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (category >= DIRECTIONALITY_CATEGORY_COUNT) {
|
||||
throw new Exception("Could not parse bidi category.");
|
||||
}
|
||||
return category;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Parse the combining attribute for a Unicode character. If there is a combining
|
||||
* attribute and the parse succeeds, then the hasCombining field is set to true,
|
||||
* the combining field of this UnicodeSpec object is updated, and false is returned.
|
||||
* If the combining attribute is an empty string, the parse succeeds but the
|
||||
* hasCombining field is set to false. (and false is returned).
|
||||
*
|
||||
* The combining attribute, if any, should be a nonnegative decimal integer.
|
||||
*
|
||||
* @param s the combining attribute extracted from a line of the Unicode data file
|
||||
* @return the combining class value if any, -1 if property not defined
|
||||
* @exception Exception if can't parse the combining class
|
||||
*/
|
||||
|
||||
public static int parseCombiningClass(String s) throws Exception {
|
||||
int combining = -1;
|
||||
if (s.length()>0) {
|
||||
combining = Integer.parseInt(s, 10);
|
||||
}
|
||||
return combining;
|
||||
}
|
||||
|
||||
/**
|
||||
* Parse the decomposition attribute for a Unicode character. If the parse succeeds,
|
||||
* the decomposition field of this UnicodeSpec object is updated and false is returned.
|
||||
*
|
||||
* The decomposition attribute is complicated; for now, it is treated as a string.
|
||||
*
|
||||
* @param s the decomposition attribute extracted from a line of the Unicode data file
|
||||
* @return true if the parse failed; otherwise false
|
||||
*/
|
||||
|
||||
public static String parseDecomposition(String s) throws Exception {
|
||||
if (s==null) throw new Exception("Cannot parse decomposition.");
|
||||
return s;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Parse the decimal value attribute for a Unicode character. If there is a decimal value
|
||||
* attribute and the parse succeeds, then the hasDecimalValue field is set to true,
|
||||
* the decimalValue field of this UnicodeSpec object is updated, and false is returned.
|
||||
* If the decimal value attribute is an empty string, the parse succeeds but the
|
||||
* hasDecimalValue field is set to false. (and false is returned).
|
||||
*
|
||||
* The decimal value attribute, if any, should be a nonnegative decimal integer.
|
||||
*
|
||||
* @param s the decimal value attribute extracted from a line of the Unicode data file
|
||||
* @return the decimal value as an int, -1 if no decimal value defined
|
||||
* @exception NumberFormatException if the parse fails
|
||||
*/
|
||||
public static int parseDecimalValue(String s) throws NumberFormatException {
|
||||
int value = -1;
|
||||
|
||||
if (s.length() > 0) {
|
||||
value = Integer.parseInt(s, 10);
|
||||
}
|
||||
return value;
|
||||
}
|
||||
|
||||
/**
|
||||
* Parse the digit value attribute for a Unicode character. If there is a digit value
|
||||
* attribute and the parse succeeds, then the hasDigitValue field is set to true,
|
||||
* the digitValue field of this UnicodeSpec object is updated, and false is returned.
|
||||
* If the digit value attribute is an empty string, the parse succeeds but the
|
||||
* hasDigitValue field is set to false. (and false is returned).
|
||||
*
|
||||
* The digit value attribute, if any, should be a nonnegative decimal integer.
|
||||
*
|
||||
* @param s the digit value attribute extracted from a line of the Unicode data file
|
||||
* @return the digit value as an non-negative int, or -1 if no digit property defined
|
||||
* @exception NumberFormatException if the parse fails
|
||||
*/
|
||||
public static int parseDigitValue(String s) throws NumberFormatException {
|
||||
int value = -1;
|
||||
|
||||
if (s.length() > 0) {
|
||||
value = Integer.parseInt(s, 10);
|
||||
}
|
||||
return value;
|
||||
}
|
||||
|
||||
public static String parseNumericValue(String s) throws Exception {
|
||||
if (s == null) throw new Exception("Cannot parse numeric value.");
|
||||
return s;
|
||||
}
|
||||
|
||||
public static String parseComment(String s) throws Exception {
|
||||
if (s == null) throw new Exception("Cannot parse comment.");
|
||||
return s;
|
||||
}
|
||||
|
||||
public static boolean parseMirrored(String s) throws Exception {
|
||||
boolean mirrored;
|
||||
if (s.length() == 1) {
|
||||
if (s.charAt(0) == 'Y') {mirrored = true;}
|
||||
else if (s.charAt(0) == 'N') {mirrored = false;}
|
||||
else {throw new Exception("Cannot parse mirrored property.");}
|
||||
}
|
||||
else { throw new Exception("Cannot parse mirrored property.");}
|
||||
return mirrored;
|
||||
}
|
||||
|
||||
public static String parseOldName(String s) throws Exception {
|
||||
if (s == null) throw new Exception("Cannot parse old name");
|
||||
return s;
|
||||
}
|
||||
|
||||
/**
|
||||
* Parse the uppercase mapping attribute for a Unicode character. If there is a uppercase
|
||||
* mapping attribute and the parse succeeds, then the hasUpperMap field is set to true,
|
||||
* the upperMap field of this UnicodeSpec object is updated, and false is returned.
|
||||
* If the uppercase mapping attribute is an empty string, the parse succeeds but the
|
||||
* hasUpperMap field is set to false. (and false is returned).
|
||||
*
|
||||
* The uppercase mapping attribute should be a four-digit hexadecimal integer.
|
||||
*
|
||||
* @param s the uppercase mapping attribute extracted from a line of the Unicode data file
|
||||
* @return uppercase char if defined, \uffff otherwise
|
||||
* @exception NumberFormatException if parse fails
|
||||
*/
|
||||
public static int parseUpperMap(String s) throws NumberFormatException {
|
||||
int upperCase = 0xFFFF;
|
||||
|
||||
if (s.length() >= 4) {
|
||||
upperCase = Integer.parseInt(s, 16);
|
||||
}
|
||||
else if (s.length() != 0) {
|
||||
throw new NumberFormatException();
|
||||
}
|
||||
return upperCase;
|
||||
}
|
||||
|
||||
/**
|
||||
* Parse the lowercase mapping attribute for a Unicode character. If there is a lowercase
|
||||
* mapping attribute and the parse succeeds, then the hasLowerMap field is set to true,
|
||||
* the lowerMap field of this UnicodeSpec object is updated, and false is returned.
|
||||
* If the lowercase mapping attribute is an empty string, the parse succeeds but the
|
||||
* hasLowerMap field is set to false. (and false is returned).
|
||||
*
|
||||
* The lowercase mapping attribute should be a four-digit hexadecimal integer.
|
||||
*
|
||||
* @param s the lowercase mapping attribute extracted from a line of the Unicode data file
|
||||
* @return lowercase char mapping if defined, \uFFFF otherwise
|
||||
* @exception NumberFormatException if parse fails
|
||||
*/
|
||||
public static int parseLowerMap(String s) throws NumberFormatException {
|
||||
int lowerCase = 0xFFFF;
|
||||
|
||||
if (s.length() >= 4) {
|
||||
lowerCase = Integer.parseInt(s, 16);
|
||||
}
|
||||
else if (s.length() != 0) {
|
||||
throw new NumberFormatException();
|
||||
}
|
||||
return lowerCase;
|
||||
}
|
||||
|
||||
/**
|
||||
* Parse the titlecase mapping attribute for a Unicode character. If there is a titlecase
|
||||
* mapping attribute and the parse succeeds, then the hasTitleMap field is set to true,
|
||||
* the titleMap field of this UnicodeSpec object is updated, and false is returned.
|
||||
* If the titlecase mapping attribute is an empty string, the parse succeeds but the
|
||||
* hasTitleMap field is set to false. (and false is returned).
|
||||
*
|
||||
* The titlecase mapping attribute should be a four-digit hexadecimal integer.
|
||||
*
|
||||
* @param s the titlecase mapping attribute extracted from a line of the Unicode data file
|
||||
* @return title case char mapping if defined, \uFFFF otherwise
|
||||
* @exception NumberFormatException if parse fails
|
||||
*/
|
||||
public static int parseTitleMap(String s) throws NumberFormatException {
|
||||
int titleCase = 0xFFFF;
|
||||
|
||||
if (s.length() >= 4) {
|
||||
titleCase = Integer.parseInt(s, 16);
|
||||
}
|
||||
else if (s.length() != 0) {
|
||||
throw new NumberFormatException();
|
||||
}
|
||||
return titleCase;
|
||||
}
|
||||
|
||||
/**
|
||||
* Read and parse a Unicode data file.
|
||||
*
|
||||
* @param file a file specifying the Unicode data file to be read
|
||||
* @return an array of UnicodeSpec objects, one for each line of the
|
||||
* Unicode data file that could be successfully parsed as
|
||||
* specifying Unicode character attributes
|
||||
*/
|
||||
|
||||
public static UnicodeSpec[] readSpecFile(File file, int plane) throws FileNotFoundException {
|
||||
ArrayList<UnicodeSpec> list = new ArrayList<>(3000);
|
||||
UnicodeSpec[] result = null;
|
||||
int count = 0;
|
||||
BufferedReader f = new BufferedReader(new FileReader(file));
|
||||
String line = null;
|
||||
loop:
|
||||
while(true) {
|
||||
try {
|
||||
line = f.readLine();
|
||||
}
|
||||
catch (IOException e) {
|
||||
break loop;
|
||||
}
|
||||
if (line == null) break loop;
|
||||
UnicodeSpec item = parse(line.trim());
|
||||
int specPlane = item.getCodePoint() >>> 16;
|
||||
if (specPlane < plane) continue;
|
||||
if (specPlane > plane) break;
|
||||
|
||||
if (item != null) {
|
||||
list.add(item);
|
||||
}
|
||||
}
|
||||
result = new UnicodeSpec[list.size()];
|
||||
list.toArray(result);
|
||||
return result;
|
||||
}
|
||||
|
||||
void setCodePoint(int value) {
|
||||
codePoint = value;
|
||||
}
|
||||
|
||||
/**
|
||||
* Return the code point in this Unicode specification
|
||||
* @return the char code point representing by the specification
|
||||
*/
|
||||
public int getCodePoint() {
|
||||
return codePoint;
|
||||
}
|
||||
|
||||
void setName(String name) {
|
||||
this.name = name;
|
||||
}
|
||||
|
||||
public String getName() {
|
||||
return name;
|
||||
}
|
||||
|
||||
void setGeneralCategory(byte category) {
|
||||
generalCategory = category;
|
||||
}
|
||||
|
||||
public byte getGeneralCategory() {
|
||||
return generalCategory;
|
||||
}
|
||||
|
||||
void setBidiCategory(byte category) {
|
||||
bidiCategory = category;
|
||||
}
|
||||
|
||||
public byte getBidiCategory() {
|
||||
return bidiCategory;
|
||||
}
|
||||
|
||||
void setCombiningClass(int combiningClass) {
|
||||
this.combiningClass = combiningClass;
|
||||
}
|
||||
|
||||
public int getCombiningClass() {
|
||||
return combiningClass;
|
||||
}
|
||||
|
||||
void setDecomposition(String decomposition) {
|
||||
this.decomposition = decomposition;
|
||||
}
|
||||
|
||||
public String getDecomposition() {
|
||||
return decomposition;
|
||||
}
|
||||
|
||||
void setDecimalValue(int value) {
|
||||
decimalValue = value;
|
||||
}
|
||||
|
||||
public int getDecimalValue() {
|
||||
return decimalValue;
|
||||
}
|
||||
|
||||
public boolean isDecimalValue() {
|
||||
return decimalValue != -1;
|
||||
}
|
||||
|
||||
void setDigitValue(int value) {
|
||||
digitValue = value;
|
||||
}
|
||||
|
||||
public int getDigitValue() {
|
||||
return digitValue;
|
||||
}
|
||||
|
||||
public boolean isDigitValue() {
|
||||
return digitValue != -1;
|
||||
}
|
||||
|
||||
void setNumericValue(String value) {
|
||||
numericValue = value;
|
||||
}
|
||||
|
||||
public String getNumericValue() {
|
||||
return numericValue;
|
||||
}
|
||||
|
||||
public boolean isNumericValue() {
|
||||
return numericValue.length() > 0;
|
||||
}
|
||||
|
||||
void setMirrored(boolean value) {
|
||||
mirrored = value;
|
||||
}
|
||||
|
||||
public boolean isMirrored() {
|
||||
return mirrored;
|
||||
}
|
||||
|
||||
void setOldName(String name) {
|
||||
oldName = name;
|
||||
}
|
||||
|
||||
public String getOldName() {
|
||||
return oldName;
|
||||
}
|
||||
|
||||
void setComment(String comment) {
|
||||
this.comment = comment;
|
||||
}
|
||||
|
||||
public String getComment() {
|
||||
return comment;
|
||||
}
|
||||
|
||||
void setUpperMap(int ch) {
|
||||
upperMap = ch;
|
||||
};
|
||||
|
||||
public int getUpperMap() {
|
||||
return upperMap;
|
||||
}
|
||||
|
||||
public boolean hasUpperMap() {
|
||||
return upperMap != 0xffff;
|
||||
}
|
||||
|
||||
void setLowerMap(int ch) {
|
||||
lowerMap = ch;
|
||||
}
|
||||
|
||||
public int getLowerMap() {
|
||||
return lowerMap;
|
||||
}
|
||||
|
||||
public boolean hasLowerMap() {
|
||||
return lowerMap != 0xffff;
|
||||
}
|
||||
|
||||
void setTitleMap(int ch) {
|
||||
titleMap = ch;
|
||||
}
|
||||
|
||||
public int getTitleMap() {
|
||||
return titleMap;
|
||||
}
|
||||
|
||||
public boolean hasTitleMap() {
|
||||
return titleMap != 0xffff;
|
||||
}
|
||||
|
||||
int codePoint; // the characters UTF-32 code value
|
||||
String name; // the ASCII name
|
||||
byte generalCategory; // general category, available via Characte.getType()
|
||||
byte bidiCategory; // available via Character.getBidiType()
|
||||
int combiningClass; // not used in Character
|
||||
String decomposition; // not used in Character
|
||||
int decimalValue; // decimal digit value
|
||||
int digitValue; // not all digits are decimal
|
||||
String numericValue; // numeric value if digit or non-digit
|
||||
boolean mirrored; //
|
||||
String oldName;
|
||||
String comment;
|
||||
int upperMap;
|
||||
int lowerMap;
|
||||
int titleMap;
|
||||
|
||||
// this is the number of fields in one line of the UnicodeData.txt file
|
||||
// each field is separated by a semicolon (a token)
|
||||
static final int REQUIRED_FIELDS = 15;
|
||||
|
||||
/**
|
||||
* General category types
|
||||
* To preserve compatibility, these values cannot be changed
|
||||
*/
|
||||
public static final byte
|
||||
UNASSIGNED = 0, // Cn normative
|
||||
UPPERCASE_LETTER = 1, // Lu normative
|
||||
LOWERCASE_LETTER = 2, // Ll normative
|
||||
TITLECASE_LETTER = 3, // Lt normative
|
||||
MODIFIER_LETTER = 4, // Lm normative
|
||||
OTHER_LETTER = 5, // Lo normative
|
||||
NON_SPACING_MARK = 6, // Mn informative
|
||||
ENCLOSING_MARK = 7, // Me informative
|
||||
COMBINING_SPACING_MARK = 8, // Mc normative
|
||||
DECIMAL_DIGIT_NUMBER = 9, // Nd normative
|
||||
LETTER_NUMBER = 10, // Nl normative
|
||||
OTHER_NUMBER = 11, // No normative
|
||||
SPACE_SEPARATOR = 12, // Zs normative
|
||||
LINE_SEPARATOR = 13, // Zl normative
|
||||
PARAGRAPH_SEPARATOR = 14, // Zp normative
|
||||
CONTROL = 15, // Cc normative
|
||||
FORMAT = 16, // Cf normative
|
||||
// 17 is unused for no apparent reason,
|
||||
// but must preserve forward compatibility
|
||||
PRIVATE_USE = 18, // Co normative
|
||||
SURROGATE = 19, // Cs normative
|
||||
DASH_PUNCTUATION = 20, // Pd informative
|
||||
START_PUNCTUATION = 21, // Ps informative
|
||||
END_PUNCTUATION = 22, // Pe informative
|
||||
CONNECTOR_PUNCTUATION = 23, // Pc informative
|
||||
OTHER_PUNCTUATION = 24, // Po informative
|
||||
MATH_SYMBOL = 25, // Sm informative
|
||||
CURRENCY_SYMBOL = 26, // Sc informative
|
||||
MODIFIER_SYMBOL = 27, // Sk informative
|
||||
OTHER_SYMBOL = 28, // So informative
|
||||
INITIAL_QUOTE_PUNCTUATION = 29, // Pi informative
|
||||
FINAL_QUOTE_PUNCTUATION = 30, // Pf informative
|
||||
|
||||
// this value is only used in the character generation tool
|
||||
// it can change to accommodate the addition of new categories.
|
||||
GENERAL_CATEGORY_COUNT = 31; // sentinel value
|
||||
|
||||
static final byte SHORT = 0, LONG = 1;
|
||||
// general category type strings
|
||||
// NOTE: The order of this category array is dependent on the assignment of
|
||||
// category constants above. We want to access this array using constants above.
|
||||
// [][SHORT] is the SHORT name, [][LONG] is the LONG name
|
||||
static final String[][] generalCategoryList = {
|
||||
{"Cn", "UNASSIGNED"},
|
||||
{"Lu", "UPPERCASE_LETTER"},
|
||||
{"Ll", "LOWERCASE_LETTER"},
|
||||
{"Lt", "TITLECASE_LETTER"},
|
||||
{"Lm", "MODIFIER_LETTER"},
|
||||
{"Lo", "OTHER_LETTER"},
|
||||
{"Mn", "NON_SPACING_MARK"},
|
||||
{"Me", "ENCLOSING_MARK"},
|
||||
{"Mc", "COMBINING_SPACING_MARK"},
|
||||
{"Nd", "DECIMAL_DIGIT_NUMBER"},
|
||||
{"Nl", "LETTER_NUMBER"},
|
||||
{"No", "OTHER_NUMBER"},
|
||||
{"Zs", "SPACE_SEPARATOR"},
|
||||
{"Zl", "LINE_SEPARATOR"},
|
||||
{"Zp", "PARAGRAPH_SEPARATOR"},
|
||||
{"Cc", "CONTROL"},
|
||||
{"Cf", "FORMAT"},
|
||||
{"xx", "unused"},
|
||||
{"Co", "PRIVATE_USE"},
|
||||
{"Cs", "SURROGATE"},
|
||||
{"Pd", "DASH_PUNCTUATION"},
|
||||
{"Ps", "START_PUNCTUATION"},
|
||||
{"Pe", "END_PUNCTUATION"},
|
||||
{"Pc", "CONNECTOR_PUNCTUATION"},
|
||||
{"Po", "OTHER_PUNCTUATION"},
|
||||
{"Sm", "MATH_SYMBOL"},
|
||||
{"Sc", "CURRENCY_SYMBOL"},
|
||||
{"Sk", "MODIFIER_SYMBOL"},
|
||||
{"So", "OTHER_SYMBOL"},
|
||||
{"Pi", "INITIAL_QUOTE_PUNCTUATION"},
|
||||
{"Pf", "FINAL_QUOTE_PUNCTUATION"}
|
||||
};
|
||||
|
||||
/**
|
||||
* Bidirectional categories
|
||||
*/
|
||||
public static final byte
|
||||
DIRECTIONALITY_UNDEFINED = -1,
|
||||
// Strong category
|
||||
DIRECTIONALITY_LEFT_TO_RIGHT = 0, // L
|
||||
DIRECTIONALITY_RIGHT_TO_LEFT = 1, // R
|
||||
DIRECTIONALITY_RIGHT_TO_LEFT_ARABIC = 2, // AL
|
||||
// Weak category
|
||||
DIRECTIONALITY_EUROPEAN_NUMBER = 3, // EN
|
||||
DIRECTIONALITY_EUROPEAN_NUMBER_SEPARATOR = 4, // ES
|
||||
DIRECTIONALITY_EUROPEAN_NUMBER_TERMINATOR = 5, // ET
|
||||
DIRECTIONALITY_ARABIC_NUMBER = 6, // AN
|
||||
DIRECTIONALITY_COMMON_NUMBER_SEPARATOR = 7, // CS
|
||||
DIRECTIONALITY_NONSPACING_MARK = 8, // NSM
|
||||
DIRECTIONALITY_BOUNDARY_NEUTRAL = 9, // BN
|
||||
// Neutral category
|
||||
DIRECTIONALITY_PARAGRAPH_SEPARATOR = 10, // B
|
||||
DIRECTIONALITY_SEGMENT_SEPARATOR = 11, // S
|
||||
DIRECTIONALITY_WHITESPACE = 12, // WS
|
||||
DIRECTIONALITY_OTHER_NEUTRALS = 13, // ON
|
||||
|
||||
DIRECTIONALITY_LEFT_TO_RIGHT_EMBEDDING = 14, // LRE
|
||||
DIRECTIONALITY_LEFT_TO_RIGHT_OVERRIDE = 15, // LRO
|
||||
DIRECTIONALITY_RIGHT_TO_LEFT_EMBEDDING = 16, // RLE
|
||||
DIRECTIONALITY_RIGHT_TO_LEFT_OVERRIDE = 17, // RLO
|
||||
DIRECTIONALITY_POP_DIRECTIONAL_FORMAT = 18, // PDF
|
||||
|
||||
DIRECTIONALITY_LEFT_TO_RIGHT_ISOLATE = 19, // LRI
|
||||
DIRECTIONALITY_RIGHT_TO_LEFT_ISOLATE = 20, // RLI
|
||||
DIRECTIONALITY_FIRST_STRONG_ISOLATE = 21, // FSI
|
||||
DIRECTIONALITY_POP_DIRECTIONAL_ISOLATE = 22, // PDI
|
||||
|
||||
DIRECTIONALITY_CATEGORY_COUNT = 23; // sentinel value
|
||||
|
||||
// If changes are made to the above bidi category assignments, this
|
||||
// list of bidi category names must be changed to keep their order in synch.
|
||||
// Access this list using the bidi category constants above.
|
||||
static final String[][] bidiCategoryList = {
|
||||
{"L", "DIRECTIONALITY_LEFT_TO_RIGHT"},
|
||||
{"R", "DIRECTIONALITY_RIGHT_TO_LEFT"},
|
||||
{"AL", "DIRECTIONALITY_RIGHT_TO_LEFT_ARABIC"},
|
||||
{"EN", "DIRECTIONALITY_EUROPEAN_NUMBER"},
|
||||
{"ES", "DIRECTIONALITY_EUROPEAN_NUMBER_SEPARATOR"},
|
||||
{"ET", "DIRECTIONALITY_EUROPEAN_NUMBER_TERMINATOR"},
|
||||
{"AN", "DIRECTIONALITY_ARABIC_NUMBER"},
|
||||
{"CS", "DIRECTIONALITY_COMMON_NUMBER_SEPARATOR"},
|
||||
{"NSM", "DIRECTIONALITY_NONSPACING_MARK"},
|
||||
{"BN", "DIRECTIONALITY_BOUNDARY_NEUTRAL"},
|
||||
{"B", "DIRECTIONALITY_PARAGRAPH_SEPARATOR"},
|
||||
{"S", "DIRECTIONALITY_SEGMENT_SEPARATOR"},
|
||||
{"WS", "DIRECTIONALITY_WHITESPACE"},
|
||||
{"ON", "DIRECTIONALITY_OTHER_NEUTRALS"},
|
||||
{"LRE", "DIRECTIONALITY_LEFT_TO_RIGHT_EMBEDDING"},
|
||||
{"LRO", "DIRECTIONALITY_LEFT_TO_RIGHT_OVERRIDE"},
|
||||
{"RLE", "DIRECTIONALITY_RIGHT_TO_LEFT_EMBEDDING"},
|
||||
{"RLO", "DIRECTIONALITY_RIGHT_TO_LEFT_OVERRIDE"},
|
||||
{"PDF", "DIRECTIONALITY_POP_DIRECTIONAL_FORMAT"},
|
||||
{"LRI", "DIRECTIONALITY_LEFT_TO_RIGHT_ISOLATE"},
|
||||
{"RLI", "DIRECTIONALITY_RIGHT_TO_LEFT_ISOLATE"},
|
||||
{"FSI", "DIRECTIONALITY_FIRST_STRONG_ISOLATE"},
|
||||
{"PDI", "DIRECTIONALITY_POP_DIRECTIONAL_ISOLATE"},
|
||||
|
||||
};
|
||||
|
||||
// Unicode specification lines have fields in this order.
|
||||
static final byte
|
||||
FIELD_VALUE = 0,
|
||||
FIELD_NAME = 1,
|
||||
FIELD_CATEGORY = 2,
|
||||
FIELD_CLASS = 3,
|
||||
FIELD_BIDI = 4,
|
||||
FIELD_DECOMPOSITION = 5,
|
||||
FIELD_DECIMAL = 6,
|
||||
FIELD_DIGIT = 7,
|
||||
FIELD_NUMERIC = 8,
|
||||
FIELD_MIRRORED = 9,
|
||||
FIELD_OLDNAME = 10,
|
||||
FIELD_COMMENT = 11,
|
||||
FIELD_UPPERCASE = 12,
|
||||
FIELD_LOWERCASE = 13,
|
||||
FIELD_TITLECASE = 14;
|
||||
|
||||
static final Pattern tokenSeparator = Pattern.compile(";");
|
||||
|
||||
public static void main(String[] args) {
|
||||
UnicodeSpec[] spec = null;
|
||||
if (args.length == 2 ) {
|
||||
try {
|
||||
File file = new File(args[0]);
|
||||
int plane = Integer.parseInt(args[1]);
|
||||
spec = UnicodeSpec.readSpecFile(file, plane);
|
||||
System.out.println("UnicodeSpec[" + spec.length + "]:");
|
||||
for (int x=0; x<spec.length; x++) {
|
||||
System.out.println(spec[x].toString());
|
||||
}
|
||||
}
|
||||
catch(Exception e) {
|
||||
e.printStackTrace();
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
}
|
BIN
test/jdk/java/lang/Character/charprop00.bin
Normal file
BIN
test/jdk/java/lang/Character/charprop00.bin
Normal file
Binary file not shown.
BIN
test/jdk/java/lang/Character/charprop01.bin
Normal file
BIN
test/jdk/java/lang/Character/charprop01.bin
Normal file
Binary file not shown.
BIN
test/jdk/java/lang/Character/charprop02.bin
Normal file
BIN
test/jdk/java/lang/Character/charprop02.bin
Normal file
Binary file not shown.
BIN
test/jdk/java/lang/Character/charprop03.bin
Normal file
BIN
test/jdk/java/lang/Character/charprop03.bin
Normal file
Binary file not shown.
BIN
test/jdk/java/lang/Character/charprop0E.bin
Normal file
BIN
test/jdk/java/lang/Character/charprop0E.bin
Normal file
Binary file not shown.
BIN
test/jdk/java/lang/Character/charprop0F.bin
Normal file
BIN
test/jdk/java/lang/Character/charprop0F.bin
Normal file
Binary file not shown.
BIN
test/jdk/java/lang/Character/charprop10.bin
Normal file
BIN
test/jdk/java/lang/Character/charprop10.bin
Normal file
Binary file not shown.
354
test/jdk/java/lang/String/SpecialCasingTest.java
Normal file
354
test/jdk/java/lang/String/SpecialCasingTest.java
Normal file
@ -0,0 +1,354 @@
|
||||
/*
|
||||
* Copyright (c) 2018, Oracle and/or its affiliates. All rights reserved.
|
||||
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
||||
*
|
||||
* This code is free software; you can redistribute it and/or modify it
|
||||
* under the terms of the GNU General Public License version 2 only, as
|
||||
* published by the Free Software Foundation. Oracle designates this
|
||||
* particular file as subject to the "Classpath" exception as provided
|
||||
* by Oracle in the LICENSE file that accompanied this code.
|
||||
*
|
||||
* This code is distributed in the hope that it will be useful, but WITHOUT
|
||||
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
||||
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
|
||||
* version 2 for more details (a copy is included in the LICENSE file that
|
||||
* accompanied this code).
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License version
|
||||
* 2 along with this work; if not, write to the Free Software Foundation,
|
||||
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
*
|
||||
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
|
||||
* or visit www.oracle.com if you need additional information or have any
|
||||
* questions.
|
||||
*/
|
||||
|
||||
/*
|
||||
* @test
|
||||
* @bug 4397357 6565620 6959267 7070436 7198195 8041791 8032446 8072600
|
||||
* @summary Confirm special case mappings are handled correctly.
|
||||
*/
|
||||
|
||||
import java.io.BufferedReader;
|
||||
import java.io.IOException;
|
||||
import java.nio.file.Files;
|
||||
import java.nio.file.Paths;
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
import java.util.Locale;
|
||||
import java.util.StringTokenizer;
|
||||
|
||||
public class SpecialCasingTest {
|
||||
|
||||
private static boolean err = false;
|
||||
|
||||
// Locales which are used for testing
|
||||
private static List<Locale> locales = new ArrayList<>();
|
||||
static {
|
||||
locales.add(new Locale("az", ""));
|
||||
locales.addAll(java.util.Arrays.asList(Locale.getAvailableLocales()));
|
||||
}
|
||||
|
||||
// Default locale
|
||||
private static String defaultLang;
|
||||
|
||||
// True if the default language is az, lt, or tr which has locale-specific
|
||||
// mappings.
|
||||
private static boolean specificLocale;
|
||||
|
||||
// Additional test cases
|
||||
// Pseudo-locales which are used here:
|
||||
// L1: locales other than lt
|
||||
// L2: locales other than az and tr
|
||||
// L3: locales other than az, lt and tr
|
||||
private static final String[] additionalTestData = {
|
||||
// Format:
|
||||
// <code>; <lower>; <title>; <upper>; (<condition_list>)
|
||||
|
||||
// Counterpart of Final_Sigma test case
|
||||
// 03A3; 03C2; 03A3; 03A3; Final_Sigma
|
||||
"03A3; 03C3; 03A3; 03A3; SunSpecific_Not_Final_Sigma1",
|
||||
"03A3; 03C3; 03A3; 03A3; SunSpecific_Not_Final_Sigma2",
|
||||
|
||||
// Counterpart of After_Soft_Dotted test case
|
||||
// 0307; 0307; ; ; lt After_Soft_Dotted
|
||||
"0307; 0307; 0307; 0307; L1 After_Soft_Dotted",
|
||||
"0307; 0307; 0307; 0307; lt SunSpecific_Not_After_Soft_Dotted",
|
||||
"0307; 0307; 0307; 0307; L1 SunSpecific_Not_After_Soft_Dotted",
|
||||
|
||||
// Counterpart of More_Above test cases
|
||||
// 0049; 0069 0307; 0049; 0049; lt More_Above
|
||||
"0049; 0131 ; 0049; 0049; az More_Above",
|
||||
"0049; 0131 ; 0049; 0049; tr More_Above",
|
||||
"0049; 0069 ; 0049; 0049; L3 More_Above",
|
||||
"0049; 0069 ; 0049; 0049; lt SunSpecific_Not_More_Above",
|
||||
"0049; 0131 ; 0049; 0049; az SunSpecific_Not_More_Above",
|
||||
"0049; 0131 ; 0049; 0049; tr SunSpecific_Not_More_Above",
|
||||
"0049; 0069 ; 0049; 0049; L3 SunSpecific_Not_More_Above",
|
||||
// 004A; 006A 0307; 004A; 004A; lt More_Above
|
||||
"004A; 006A ; 004A; 004A; L1 More_Above",
|
||||
"004A; 006A ; 004A; 004A; lt SunSpecific_Not_More_Above",
|
||||
"004A; 006A ; 004A; 004A; L1 SunSpecific_Not_More_Above",
|
||||
// 012E; 012F 0307; 012E; 012E; lt More_Above
|
||||
"012E; 012F ; 012E; 012E; L1 More_Above",
|
||||
"012E; 012F ; 012E; 012E; lt SunSpecific_Not_More_Above",
|
||||
"012E; 012F ; 012E; 012E; L1 SunSpecific_Not_More_Above",
|
||||
|
||||
// Counterpart of After_I test cases
|
||||
// 0307; ; 0307; 0307; tr After_I
|
||||
// 0307; ; 0307; 0307; az After_I
|
||||
"0307; 0307 0307; 0307; 0307; lt After_I",
|
||||
"0307; 0307 ; 0307; 0307; L3 After_I",
|
||||
"0307; 0307 ; 0307; 0307; tr SunSpecific_Not_After_I",
|
||||
"0307; 0307 ; 0307; 0307; az SunSpecific_Not_After_I",
|
||||
"0307; 0307 ; 0307; 0307; L2 SunSpecific_Not_After_I",
|
||||
|
||||
// Counterpart of Not_Before_Dot test cases
|
||||
// 0049; 0131 ; 0049; 0049; tr Not_Before_Dot
|
||||
// 0049; 0131 ; 0049; 0049; az Not_Before_Dot
|
||||
"0049; 0069 ; 0049; 0049; L2 Not_Before_Dot",
|
||||
"0049; 0069 ; 0049; 0049; tr SunSpecific_Before_Dot",
|
||||
"0049; 0069 ; 0049; 0049; az SunSpecific_Before_Dot",
|
||||
"0049; 0069 0307 0307; 0049; 0049; lt SunSpecific_Before_Dot",
|
||||
"0049; 0069 0307 ; 0049; 0049; L3 SunSpecific_Before_Dot",
|
||||
};
|
||||
|
||||
public static void main (String[] args) {
|
||||
SpecialCasingTest specialCasingTest = new SpecialCasingTest();
|
||||
specialCasingTest.test();
|
||||
}
|
||||
|
||||
private void test () {
|
||||
Locale defaultLocale = Locale.getDefault();
|
||||
BufferedReader in = null;
|
||||
|
||||
try {
|
||||
int locale_num = locales.size();
|
||||
for (int l = 0; l < locale_num; l++) {
|
||||
Locale locale = locales.get(l);
|
||||
Locale.setDefault(locale);
|
||||
System.out.println("Testing on " + locale + " locale....");
|
||||
|
||||
defaultLang = locale.getLanguage();
|
||||
if (defaultLang.equals("az") ||
|
||||
defaultLang.equals("lt") ||
|
||||
defaultLang.equals("tr")) {
|
||||
specificLocale = true;
|
||||
} else {
|
||||
specificLocale = false;
|
||||
}
|
||||
in = Files.newBufferedReader(Paths.get(System.getProperty("test.src.path"), "..", "/Character/SpecialCasing.txt")
|
||||
.toRealPath());
|
||||
String line;
|
||||
while ((line = in.readLine()) != null) {
|
||||
if (line.length() == 0 || line.charAt(0) == '#') {
|
||||
continue;
|
||||
}
|
||||
test(line);
|
||||
}
|
||||
in.close();
|
||||
in = null;
|
||||
System.out.println("Testing with Sun original data....");
|
||||
for (String additionalTestData1 : additionalTestData) {
|
||||
test(additionalTestData1);
|
||||
}
|
||||
}
|
||||
}
|
||||
catch (IOException e) {
|
||||
err = true;
|
||||
e.printStackTrace();
|
||||
}
|
||||
finally {
|
||||
if (in != null) {
|
||||
try {
|
||||
in.close();
|
||||
}
|
||||
catch (IOException e) {
|
||||
}
|
||||
}
|
||||
Locale.setDefault(defaultLocale);
|
||||
if (err) {
|
||||
throw new RuntimeException("SpecialCasingTest failed.");
|
||||
} else {
|
||||
System.out.println("*** SpecialCasingTest passed.");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private void test(String line) {
|
||||
int index = line.indexOf('#');
|
||||
if (index != -1) {
|
||||
line = line.substring(0, index);
|
||||
}
|
||||
|
||||
String lang = null;
|
||||
String condition = null;
|
||||
String[] fields = line.split("; ");
|
||||
|
||||
for (int i = 0; i < 4; i++) {
|
||||
if (fields[i].length() != 0) {
|
||||
fields[i] = convert(fields[i]);
|
||||
}
|
||||
}
|
||||
if (fields.length != 4) {
|
||||
StringTokenizer st = new StringTokenizer(fields[4]);
|
||||
|
||||
while (st.hasMoreTokens()) {
|
||||
String token = st.nextToken();
|
||||
|
||||
if (token.equals("Final_Sigma")) {
|
||||
condition = "Final Sigma";
|
||||
fields[0] = "Abc" + fields[0];
|
||||
fields[1] = "abc" + fields[1];
|
||||
fields[3] = "ABC" + fields[3];
|
||||
} else if (token.equals("SunSpecific_Not_Final_Sigma1")) {
|
||||
condition = "*Sun Specific* Not Final Sigma 1";
|
||||
fields[0] = "Abc" + fields[0] + "xyz";
|
||||
fields[1] = "abc" + fields[1] + "xyz";
|
||||
fields[3] = "ABC" + fields[3] + "XYZ";
|
||||
} else if (token.equals("SunSpecific_Not_Final_Sigma2")) {
|
||||
condition = "*Sun Specific* Not Final Sigma 2";
|
||||
} else if (token.equals("After_Soft_Dotted")) {
|
||||
condition = "After Soft-Dotted";
|
||||
fields[0] = "\u1E2D" + fields[0];
|
||||
fields[1] = "\u1E2D" + fields[1];
|
||||
fields[3] = "\u1E2C" + fields[3];
|
||||
} else if (token.equals("SunSpecific_Not_After_Soft_Dotted")) {
|
||||
condition = "*Sun Specific* Not After Soft-Dotted";
|
||||
fields[0] = "Dot" + fields[0];
|
||||
fields[1] = "dot" + fields[1];
|
||||
fields[3] = "DOT" + fields[3];
|
||||
} else if (token.equals("More_Above")) {
|
||||
condition = "More Above";
|
||||
fields[0] = fields[0] + "\u0306";
|
||||
fields[1] = fields[1] + "\u0306";
|
||||
fields[3] = fields[3] + "\u0306";
|
||||
} else if (token.equals("SunSpecific_Not_More_Above")) {
|
||||
condition = "*Sun Specific* Not More Above";
|
||||
fields[0] = fields[0] + "breve";
|
||||
fields[1] = fields[1] + "breve";
|
||||
fields[3] = fields[3] + "BREVE";
|
||||
} else if (token.equals("After_I")) {
|
||||
condition = "After I";
|
||||
fields[0] = "I" + fields[0];
|
||||
fields[1] = "i" + fields[1];
|
||||
fields[3] = "I" + fields[3];
|
||||
} else if (token.equals("SunSpecific_Not_After_I")) {
|
||||
condition = "*Sun Specific* Not After I";
|
||||
fields[0] = "A" + fields[0];
|
||||
fields[1] = "a" + fields[1];
|
||||
fields[3] = "A" + fields[3];
|
||||
} else if (token.equals("Not_Before_Dot")) {
|
||||
condition = "Not Before Dot";
|
||||
fields[0] = fields[0] + "Z";
|
||||
fields[1] = fields[1] + "z";
|
||||
fields[3] = fields[3] + "Z";
|
||||
} else if (token.equals("SunSpecific_Before_Dot")) {
|
||||
condition = "*Sun Specific* Before Dot";
|
||||
fields[0] = fields[0] + "\u0307";
|
||||
fields[3] = fields[3] + "\u0307";
|
||||
} else if (token.length() == 2) {
|
||||
lang = token;
|
||||
|
||||
if (lang.equals("L1")) {
|
||||
if (defaultLang.equals("lt")) {
|
||||
lang = "en";
|
||||
} else {
|
||||
lang = defaultLang;
|
||||
}
|
||||
} else if (lang.equals("L2")) {
|
||||
if (defaultLang.equals("az") ||
|
||||
defaultLang.equals("tr")) {
|
||||
lang = "en";
|
||||
} else {
|
||||
lang = defaultLang;
|
||||
}
|
||||
} else if (lang.equals("L3")) {
|
||||
if (defaultLang.equals("az") ||
|
||||
defaultLang.equals("lt") ||
|
||||
defaultLang.equals("tr")) {
|
||||
lang = "en";
|
||||
} else {
|
||||
lang = defaultLang;
|
||||
}
|
||||
// I want to have another test case here for double-check.
|
||||
// Current implementation for Character and String considers
|
||||
// only az, lt, and tr locales. I want to detect if other
|
||||
// locales are specified.
|
||||
} else if (!lang.equals("az") &&
|
||||
!lang.equals("lt") &&
|
||||
!lang.equals("tr")) {
|
||||
throw new RuntimeException("Unsupported locale: " +
|
||||
lang + ". It may need to be considered in ConditionalSpecialCasing.java. Please confirm.");
|
||||
}
|
||||
} else {
|
||||
throw new RuntimeException("Unknown condition: " + token);
|
||||
}
|
||||
}
|
||||
} else if (fields[0].equals("\u0130")) {
|
||||
// special case for \u0130
|
||||
if (defaultLang.equals("az") ||
|
||||
defaultLang.equals("tr")) {
|
||||
lang = "en";
|
||||
} else {
|
||||
lang = defaultLang;
|
||||
}
|
||||
}
|
||||
testLowerCase(fields[0], fields[1], lang, condition);
|
||||
testUpperCase(fields[0], fields[3], lang, condition);
|
||||
}
|
||||
|
||||
private void testLowerCase(String orig, String expected,
|
||||
String lang, String condition) {
|
||||
String got = (lang == null) ?
|
||||
orig.toLowerCase() : orig.toLowerCase(new Locale(lang, ""));
|
||||
|
||||
if (!expected.equals(got)) {
|
||||
err = true;
|
||||
System.err.println("toLowerCase(lang=" + lang +
|
||||
") failed.\n\tOriginal: " + toString(orig) +
|
||||
"\n\tGot: " + toString(got) +
|
||||
"\n\tExpected: " + toString(expected) +
|
||||
((condition == null) ? "" : ("\n under condition(" +
|
||||
condition + ")")));
|
||||
}
|
||||
}
|
||||
|
||||
private void testUpperCase(String orig, String expected,
|
||||
String lang, String condition) {
|
||||
String got = (lang == null) ?
|
||||
orig.toUpperCase() : orig.toUpperCase(new Locale(lang, ""));
|
||||
|
||||
if (!expected.equals(got)) {
|
||||
err = true;
|
||||
System.err.println("toUpperCase(lang=" + lang +
|
||||
") failed.\n\tOriginal: " + toString(orig) +
|
||||
"\n\tGot: " + toString(got) +
|
||||
"\n\tExpected: " + toString(expected) +
|
||||
((condition == null) ? "" : ("\n under condition(" +
|
||||
condition + ")")));
|
||||
}
|
||||
}
|
||||
StringBuilder sb = new StringBuilder();
|
||||
|
||||
private String convert(String str) {
|
||||
sb.setLength(0);
|
||||
|
||||
String[] tokens = str.split(" ");
|
||||
for (String token : tokens) {
|
||||
sb.append((char) Integer.parseInt(token, 16));
|
||||
}
|
||||
return sb.toString();
|
||||
}
|
||||
|
||||
private String toString(String str) {
|
||||
sb.setLength(0);
|
||||
|
||||
int len = str.length();
|
||||
for (int i = 0; i < len; i++) {
|
||||
sb.append("0x").append(Integer.toHexString(str.charAt(i)).toUpperCase()).append(" ");
|
||||
}
|
||||
return sb.toString();
|
||||
}
|
||||
|
||||
}
|
233
test/jdk/java/lang/String/UnicodeCasingTest.java
Normal file
233
test/jdk/java/lang/String/UnicodeCasingTest.java
Normal file
@ -0,0 +1,233 @@
|
||||
/*
|
||||
* Copyright (c) 2018, Oracle and/or its affiliates. All rights reserved.
|
||||
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
||||
*
|
||||
* This code is free software; you can redistribute it and/or modify it
|
||||
* under the terms of the GNU General Public License version 2 only, as
|
||||
* published by the Free Software Foundation. Oracle designates this
|
||||
* particular file as subject to the "Classpath" exception as provided
|
||||
* by Oracle in the LICENSE file that accompanied this code.
|
||||
*
|
||||
* This code is distributed in the hope that it will be useful, but WITHOUT
|
||||
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
||||
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
|
||||
* version 2 for more details (a copy is included in the LICENSE file that
|
||||
* accompanied this code).
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License version
|
||||
* 2 along with this work; if not, write to the Free Software Foundation,
|
||||
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
*
|
||||
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
|
||||
* or visit www.oracle.com if you need additional information or have any
|
||||
* questions.
|
||||
*/
|
||||
|
||||
/*
|
||||
* @test
|
||||
* @bug 4397357 6565620 6959267 7070436 7198195 8032446 8072600
|
||||
* @summary Confirm normal case mappings are handled correctly.
|
||||
* @run main/timeout=200 UnicodeCasingTest
|
||||
*/
|
||||
|
||||
import java.io.BufferedReader;
|
||||
import java.io.IOException;
|
||||
import java.nio.file.Files;
|
||||
import java.nio.file.Paths;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Locale;
|
||||
import java.util.HashMap;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
|
||||
public class UnicodeCasingTest {
|
||||
|
||||
private static boolean err = false;
|
||||
|
||||
// Locales which are used for testing
|
||||
private static List<Locale> locales = new ArrayList<>();
|
||||
static {
|
||||
locales.add(new Locale("az", ""));
|
||||
locales.addAll(java.util.Arrays.asList(Locale.getAvailableLocales()));
|
||||
}
|
||||
|
||||
// Default locale
|
||||
private static String defaultLang;
|
||||
|
||||
// List for Unicode characters whose mappings are included in
|
||||
// SpecialCasing.txt and mappings in UnicodeData.txt isn't applicable.
|
||||
private static Map<String, String> excludeList = new HashMap<>();
|
||||
|
||||
public static void main(String[] args) {
|
||||
UnicodeCasingTest specialCasingTest = new UnicodeCasingTest();
|
||||
specialCasingTest.test();
|
||||
}
|
||||
|
||||
private void test() {
|
||||
Locale defaultLocale = Locale.getDefault();
|
||||
BufferedReader in = null;
|
||||
try {
|
||||
// First, we create exlude lists of characters whose mappings exist
|
||||
// in SpecialCasing.txt and mapping rules in UnicodeData.txt aren't
|
||||
// applicable.
|
||||
in = Files.newBufferedReader(Paths.get(System.getProperty("test.src.path"), "..", "/Character/SpecialCasing.txt")
|
||||
.toRealPath());
|
||||
String line;
|
||||
while ((line = in.readLine()) != null) {
|
||||
if (line.length() == 0 || line.charAt(0) == '#') {
|
||||
continue;
|
||||
}
|
||||
updateExcludeList(line);
|
||||
}
|
||||
in.close();
|
||||
in = null;
|
||||
int locale_num = locales.size();
|
||||
for (int l = 0; l < locale_num; l++) {
|
||||
Locale locale = locales.get(l);
|
||||
Locale.setDefault(locale);
|
||||
defaultLang = locale.getLanguage();
|
||||
// System.out.println("Testing on " + locale + " locale....");
|
||||
System.err.println("Testing on " + locale + " locale....");
|
||||
in = Files.newBufferedReader(Paths.get(System.getProperty("test.src.path"), "..", "/Character/UnicodeData.txt")
|
||||
.toRealPath());
|
||||
while ((line = in.readLine()) != null) {
|
||||
if (line.length() == 0 || line.charAt(0) == '#') {
|
||||
continue;
|
||||
}
|
||||
test(line);
|
||||
}
|
||||
in.close();
|
||||
in = null;
|
||||
}
|
||||
}
|
||||
catch (IOException e) {
|
||||
err = true;
|
||||
e.printStackTrace();
|
||||
}
|
||||
finally {
|
||||
if (in != null) {
|
||||
try {
|
||||
in.close();
|
||||
}
|
||||
catch (IOException e) {
|
||||
}
|
||||
}
|
||||
|
||||
Locale.setDefault(defaultLocale);
|
||||
|
||||
if (err) {
|
||||
throw new RuntimeException("UnicodeCasingTest failed.");
|
||||
} else {
|
||||
System.out.println("*** UnicodeCasingTest passed.");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private void updateExcludeList(String line) {
|
||||
int index = line.indexOf('#');
|
||||
if (index != -1) {
|
||||
line = line.substring(0, index);
|
||||
}
|
||||
|
||||
String lang = null;
|
||||
String condition = null;
|
||||
String[] fields = line.split("; ");
|
||||
|
||||
// If the given character is mapped to multiple characters under the
|
||||
// normal condition, add it to the exclude list.
|
||||
if (fields.length == 4) {
|
||||
excludeList.put(fields[0], "all");
|
||||
} else if (fields.length == 5) {
|
||||
if (fields[4].length() == 2) { /// locale
|
||||
if (excludeList.get(fields[0]) == null) {
|
||||
excludeList.put(fields[0], fields[4]);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private void test(String line) {
|
||||
String[] fields = line.split(";", 15);
|
||||
String orig = convert(fields[0]);
|
||||
|
||||
String lang = excludeList.get(fields[0]);
|
||||
if (!"all".equals(lang) && !defaultLang.equals(lang)) {
|
||||
if (fields[12].length() == 0) {
|
||||
testUpperCase(orig, convert(fields[0]));
|
||||
} else {
|
||||
testUpperCase(orig, convert(fields[12]));
|
||||
}
|
||||
|
||||
if (fields[13].length() == 0) {
|
||||
testLowerCase(orig, convert(fields[0]));
|
||||
} else {
|
||||
testLowerCase(orig, convert(fields[13]));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private void testUpperCase(String orig, String expected) {
|
||||
String got = orig.toUpperCase();
|
||||
|
||||
// Ugly workaround for special mappings for az and tr locales....
|
||||
if (orig.equals("\u0069") &&
|
||||
(defaultLang.equals("az") || defaultLang.equals("tr"))) {
|
||||
expected = "\u0130";
|
||||
}
|
||||
|
||||
if (!expected.equals(got)) {
|
||||
err = true;
|
||||
System.err.println("toUpperCase(" +
|
||||
") failed.\n\tOriginal: " + toString(orig) +
|
||||
"\n\tGot: " + toString(got) +
|
||||
"\n\tExpected: " + toString(expected));
|
||||
}
|
||||
}
|
||||
|
||||
private void testLowerCase(String orig, String expected) {
|
||||
String got = orig.toLowerCase();
|
||||
// Ugly workaround for special mappings for az and tr locales....
|
||||
if (orig.equals("\u0049") &&
|
||||
(defaultLang.equals("az") || defaultLang.equals("tr"))) {
|
||||
expected = "\u0131";
|
||||
}
|
||||
|
||||
if (!expected.equals(got)) {
|
||||
err = true;
|
||||
System.err.println("toLowerCase(" +
|
||||
") failed.\n\tOriginal: " + toString(orig) +
|
||||
"\n\tGot: " + toString(got) +
|
||||
"\n\tExpected: " + toString(expected));
|
||||
}
|
||||
}
|
||||
|
||||
StringBuilder sb = new StringBuilder();
|
||||
|
||||
private String convert(String str) {
|
||||
sb.setLength(0);
|
||||
|
||||
String[] tokens = str.split(" ");
|
||||
for (String token : tokens) {
|
||||
int j = Integer.parseInt(token, 16);
|
||||
if (j < Character.MIN_SUPPLEMENTARY_CODE_POINT) {
|
||||
sb.append((char)j);
|
||||
} else {
|
||||
sb.append(Character.toChars(j));
|
||||
}
|
||||
}
|
||||
|
||||
return sb.toString();
|
||||
}
|
||||
|
||||
private String toString(String str) {
|
||||
sb.setLength(0);
|
||||
|
||||
int len = str.length();
|
||||
for (int i = 0; i < len; i++) {
|
||||
sb.append("0x").append(Integer.toHexString(str.charAt(i)).toUpperCase()).append(" ");
|
||||
}
|
||||
|
||||
return sb.toString();
|
||||
}
|
||||
|
||||
}
|
Loading…
Reference in New Issue
Block a user