8229831: Upgrade Character.isUnicodeIdentifierStart/Part() methods to the latest standard
Reviewed-by: rriggs
This commit is contained in:
parent
4d70cdac4f
commit
2aac0e925d
@ -115,13 +115,14 @@ class CharacterData00 extends CharacterData {
|
||||
}
|
||||
|
||||
boolean isUnicodeIdentifierStart(int ch) {
|
||||
int props = getProperties(ch);
|
||||
return ((props & $$maskIdentifierInfo) == $$valueUnicodeStart);
|
||||
return (getPropertiesEx(ch) & $$maskIDStart) != 0 ||
|
||||
ch == 0x2E2F;
|
||||
}
|
||||
|
||||
boolean isUnicodeIdentifierPart(int ch) {
|
||||
int props = getProperties(ch);
|
||||
return ((props & $$maskUnicodePart) != 0);
|
||||
return (getPropertiesEx(ch) & $$maskIDContinue) != 0 ||
|
||||
isIdentifierIgnorable(ch) ||
|
||||
ch == 0x2E2F;
|
||||
}
|
||||
|
||||
boolean isIdentifierIgnorable(int ch) {
|
||||
|
@ -114,13 +114,14 @@ class CharacterData01 extends CharacterData {
|
||||
}
|
||||
|
||||
boolean isUnicodeIdentifierStart(int ch) {
|
||||
int props = getProperties(ch);
|
||||
return ((props & $$maskIdentifierInfo) == $$valueUnicodeStart);
|
||||
return (getPropertiesEx(ch) & $$maskIDStart) != 0 ||
|
||||
ch == 0x2E2F;
|
||||
}
|
||||
|
||||
boolean isUnicodeIdentifierPart(int ch) {
|
||||
int props = getProperties(ch);
|
||||
return ((props & $$maskUnicodePart) != 0);
|
||||
return (getPropertiesEx(ch) & $$maskIDContinue) != 0 ||
|
||||
isIdentifierIgnorable(ch) ||
|
||||
ch == 0x2E2F;
|
||||
}
|
||||
|
||||
boolean isIdentifierIgnorable(int ch) {
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) 2003, 2018, Oracle and/or its affiliates. All rights reserved.
|
||||
* Copyright (c) 2003, 2019, Oracle and/or its affiliates. All rights reserved.
|
||||
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
||||
*
|
||||
* This code is free software; you can redistribute it and/or modify it
|
||||
@ -113,13 +113,14 @@ class CharacterData02 extends CharacterData {
|
||||
}
|
||||
|
||||
boolean isUnicodeIdentifierStart(int ch) {
|
||||
int props = getProperties(ch);
|
||||
return ((props & $$maskIdentifierInfo) == $$valueUnicodeStart);
|
||||
return (getPropertiesEx(ch) & $$maskIDStart) != 0 ||
|
||||
ch == 0x2E2F;
|
||||
}
|
||||
|
||||
boolean isUnicodeIdentifierPart(int ch) {
|
||||
int props = getProperties(ch);
|
||||
return ((props & $$maskUnicodePart) != 0);
|
||||
return (getPropertiesEx(ch) & $$maskIDContinue) != 0 ||
|
||||
isIdentifierIgnorable(ch) ||
|
||||
ch == 0x2E2F;
|
||||
}
|
||||
|
||||
boolean isIdentifierIgnorable(int ch) {
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) 2003, 2018, Oracle and/or its affiliates. All rights reserved.
|
||||
* Copyright (c) 2003, 2019, Oracle and/or its affiliates. All rights reserved.
|
||||
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
||||
*
|
||||
* This code is free software; you can redistribute it and/or modify it
|
||||
@ -113,15 +113,16 @@ class CharacterData0E extends CharacterData {
|
||||
}
|
||||
|
||||
boolean isUnicodeIdentifierStart(int ch) {
|
||||
int props = getProperties(ch);
|
||||
return ((props & $$maskIdentifierInfo) == $$valueUnicodeStart);
|
||||
return (getPropertiesEx(ch) & $$maskIDStart) != 0 ||
|
||||
ch == 0x2E2F;
|
||||
}
|
||||
|
||||
boolean isUnicodeIdentifierPart(int ch) {
|
||||
int props = getProperties(ch);
|
||||
return ((props & $$maskUnicodePart) != 0);
|
||||
return (getPropertiesEx(ch) & $$maskIDContinue) != 0 ||
|
||||
isIdentifierIgnorable(ch) ||
|
||||
ch == 0x2E2F;
|
||||
}
|
||||
|
||||
|
||||
boolean isIdentifierIgnorable(int ch) {
|
||||
int props = getProperties(ch);
|
||||
return ((props & $$maskIdentifierInfo) == $$valueIgnorable);
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) 2002, 2018, Oracle and/or its affiliates. All rights reserved.
|
||||
* Copyright (c) 2002, 2019, Oracle and/or its affiliates. All rights reserved.
|
||||
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
||||
*
|
||||
* This code is free software; you can redistribute it and/or modify it
|
||||
@ -133,13 +133,14 @@ class CharacterDataLatin1 extends CharacterData {
|
||||
}
|
||||
|
||||
boolean isUnicodeIdentifierStart(int ch) {
|
||||
int props = getProperties(ch);
|
||||
return ((props & $$maskIdentifierInfo) == $$valueUnicodeStart);
|
||||
return (getPropertiesEx(ch) & $$maskIDStart) != 0 ||
|
||||
ch == 0x2E2F;
|
||||
}
|
||||
|
||||
boolean isUnicodeIdentifierPart(int ch) {
|
||||
int props = getProperties(ch);
|
||||
return ((props & $$maskUnicodePart) != 0);
|
||||
return (getPropertiesEx(ch) & $$maskIDContinue) != 0 ||
|
||||
isIdentifierIgnorable(ch) ||
|
||||
ch == 0x2E2F;
|
||||
}
|
||||
|
||||
boolean isIdentifierIgnorable(int ch) {
|
||||
|
11885
make/data/unicodedata/DerivedCoreProperties.txt
Normal file
11885
make/data/unicodedata/DerivedCoreProperties.txt
Normal file
File diff suppressed because it is too large
Load Diff
@ -42,6 +42,7 @@ define SetupCharacterData
|
||||
-spec $(UNICODEDATA)/UnicodeData.txt \
|
||||
-specialcasing $(UNICODEDATA)/SpecialCasing.txt \
|
||||
-proplist $(UNICODEDATA)/PropList.txt \
|
||||
-derivedprops $(UNICODEDATA)/DerivedCoreProperties.txt \
|
||||
-o $(SUPPORT_OUTPUTDIR)/gensrc/java.base/java/lang/$1.java \
|
||||
-usecharforbyte $3
|
||||
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) 2002, 2015, Oracle and/or its affiliates. All rights reserved.
|
||||
* Copyright (c) 2002, 2019, Oracle and/or its affiliates. All rights reserved.
|
||||
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
||||
*
|
||||
* This code is free software; you can redistribute it and/or modify it
|
||||
@ -73,6 +73,7 @@ public class GenerateCharacter {
|
||||
static String DefaultUnicodeSpecFileName = ROOT + "UnicodeData.txt";
|
||||
static String DefaultSpecialCasingFileName = ROOT + "SpecialCasing.txt";
|
||||
static String DefaultPropListFileName = ROOT + "PropList.txt";
|
||||
static String DefaultDerivedPropsFileName = ROOT + "DerivedCoreProperties.txt";
|
||||
static String DefaultJavaTemplateFileName = ROOT + "Character.java.template";
|
||||
static String DefaultJavaOutputFileName = ROOT + "Character.java";
|
||||
static String DefaultCTemplateFileName = ROOT + "Character.c.template";
|
||||
@ -159,6 +160,8 @@ public class GenerateCharacter {
|
||||
1 bit Other_Math property
|
||||
1 bit Ideographic property
|
||||
1 bit Noncharacter codepoint property
|
||||
1 bit ID_Start property
|
||||
1 bit ID_Continue property
|
||||
*/
|
||||
|
||||
|
||||
@ -190,7 +193,7 @@ public class GenerateCharacter {
|
||||
// maskMirrored needs to be long, if up 16-bit
|
||||
private static final long maskMirrored = 0x80000000L;
|
||||
|
||||
// bit masks identify the 16-bit priperty field described above, in B
|
||||
// bit masks identify the 16-bit property field described above, in B
|
||||
// table
|
||||
private static final long
|
||||
maskOtherLowercase = 0x100000000L,
|
||||
@ -198,7 +201,9 @@ public class GenerateCharacter {
|
||||
maskOtherAlphabetic = 0x400000000L,
|
||||
maskOtherMath = 0x800000000L,
|
||||
maskIdeographic = 0x1000000000L,
|
||||
maskNoncharacterCP = 0x2000000000L;
|
||||
maskNoncharacterCP = 0x2000000000L,
|
||||
maskIDStart = 0x4000000000L,
|
||||
maskIDContinue = 0x8000000000L;
|
||||
|
||||
// Can compare masked values with these to determine
|
||||
// numeric or lexical types.
|
||||
@ -367,6 +372,8 @@ public class GenerateCharacter {
|
||||
addExProp(result, propList, "Ideographic", maskIdeographic);
|
||||
//addExProp(result, propList, "Other_Math", maskOtherMath);
|
||||
//addExProp(result, propList, "Noncharacter_CodePoint", maskNoncharacterCP);
|
||||
addExProp(result, propList, "ID_Start", maskIDStart);
|
||||
addExProp(result, propList, "ID_Continue", maskIDContinue);
|
||||
|
||||
return result;
|
||||
}
|
||||
@ -780,6 +787,8 @@ OUTER: for (int i = 0; i < n; i += m) {
|
||||
if (x.equals("maskOtherUppercase")) return "0x" + hex4(maskOtherUppercase >> 32);
|
||||
if (x.equals("maskOtherAlphabetic")) return "0x" + hex4(maskOtherAlphabetic >> 32);
|
||||
if (x.equals("maskIdeographic")) return "0x" + hex4(maskIdeographic >> 32);
|
||||
if (x.equals("maskIDStart")) return "0x" + hex4(maskIDStart >> 32);
|
||||
if (x.equals("maskIDContinue")) return "0x" + hex4(maskIDContinue >> 32);
|
||||
if (x.equals("valueIgnorable")) return "0x" + hex8(valueIgnorable);
|
||||
if (x.equals("valueJavaUnicodeStart")) return "0x" + hex8(valueJavaUnicodeStart);
|
||||
if (x.equals("valueJavaOnlyStart")) return "0x" + hex8(valueJavaOnlyStart);
|
||||
@ -1612,6 +1621,7 @@ OUTER: for (int i = 0; i < n; i += m) {
|
||||
static String UnicodeSpecFileName = null; // liu
|
||||
static String SpecialCasingFileName = null;
|
||||
static String PropListFileName = null;
|
||||
static String DerivedPropsFileName = null;
|
||||
static boolean useCharForByte = false;
|
||||
static int[] sizes;
|
||||
static int bins = 0; // liu; if > 0, then perform search
|
||||
@ -1739,6 +1749,14 @@ OUTER: for (int i = 0; i < n; i += m) {
|
||||
PropListFileName = args[++j];
|
||||
}
|
||||
}
|
||||
else if (args[j].equals("-derivedprops")) {
|
||||
if (j == args.length -1) {
|
||||
FAIL("File name missing after -derivedprops");
|
||||
}
|
||||
else {
|
||||
DerivedPropsFileName = args[++j];
|
||||
}
|
||||
}
|
||||
else if (args[j].equals("-plane")) {
|
||||
if (j == args.length -1) {
|
||||
FAIL("Plane number missing after -plane");
|
||||
@ -1803,6 +1821,10 @@ OUTER: for (int i = 0; i < n; i += m) {
|
||||
PropListFileName = DefaultPropListFileName;
|
||||
desc.append(" [-proplist " + PropListFileName + ']');
|
||||
}
|
||||
if (DerivedPropsFileName == null) {
|
||||
DerivedPropsFileName = DefaultDerivedPropsFileName;
|
||||
desc.append(" [-derivedprops " + DerivedPropsFileName + ']');
|
||||
}
|
||||
if (TemplateFileName == null) {
|
||||
TemplateFileName = (Csyntax ? DefaultCTemplateFileName
|
||||
: DefaultJavaTemplateFileName);
|
||||
@ -1954,6 +1976,7 @@ OUTER: for (int i = 0; i < n; i += m) {
|
||||
UnicodeSpec[] data = UnicodeSpec.readSpecFile(new File(UnicodeSpecFileName), plane);
|
||||
specialCaseMaps = SpecialCaseMap.readSpecFile(new File(SpecialCasingFileName), plane);
|
||||
PropList propList = PropList.readSpecFile(new File(PropListFileName), plane);
|
||||
propList.putAll(PropList.readSpecFile(new File(DerivedPropsFileName), plane));
|
||||
|
||||
if (verbose) {
|
||||
System.out.println(data.length + " items read from Unicode spec file " + UnicodeSpecFileName); // liu
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) 2011, 2013, Oracle and/or its affiliates. All rights reserved.
|
||||
* Copyright (c) 2011, 2019, Oracle and/or its affiliates. All rights reserved.
|
||||
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
||||
*
|
||||
* This code is free software; you can redistribute it and/or modify it
|
||||
@ -31,7 +31,8 @@ import java.io.*;
|
||||
|
||||
/**
|
||||
* A PropList object contains the lists of code points that have
|
||||
* the same Unicode property defined in PropList.txt
|
||||
* the same Unicode property defined in PropList.txt and
|
||||
* DerivedCoreProperties.txt
|
||||
*
|
||||
* @author Xueming Shen
|
||||
*/
|
||||
@ -51,8 +52,13 @@ public class PropList {
|
||||
return propMap.keySet();
|
||||
}
|
||||
|
||||
private Map<String, ArrayList<Integer>> propMap =
|
||||
new LinkedHashMap<String, ArrayList<Integer>>();
|
||||
public void putAll(PropList pl) {
|
||||
pl.names().stream()
|
||||
.forEach(name -> propMap.put(name, pl.codepoints(name)));
|
||||
}
|
||||
|
||||
private Map<String, List<Integer>> propMap =
|
||||
new LinkedHashMap<String, List<Integer>>();
|
||||
|
||||
private PropList(File file, int plane) throws IOException {
|
||||
|
||||
@ -78,7 +84,7 @@ public class PropList {
|
||||
start &= 0xffff;
|
||||
end &= 0xffff;
|
||||
|
||||
ArrayList<Integer> list = propMap.get(name);
|
||||
List<Integer> list = propMap.get(name);
|
||||
if (list == null) {
|
||||
list = new ArrayList<Integer>();
|
||||
propMap.put(name, list);
|
||||
|
@ -9917,7 +9917,18 @@ class Character implements java.io.Serializable, Comparable<Character> {
|
||||
* <li> {@link #isLetter(char) isLetter(ch)} returns {@code true}
|
||||
* <li> {@link #getType(char) getType(ch)} returns
|
||||
* {@code LETTER_NUMBER}.
|
||||
* <li> it is an <a href="http://www.unicode.org/reports/tr44/#Other_ID_Start">
|
||||
* {@code Other_ID_Start}</a> character.
|
||||
* </ul>
|
||||
* <p>
|
||||
* This method conforms to <a href="https://unicode.org/reports/tr31/#R1">
|
||||
* UAX31-R1: Default Identifiers</a> requirement of the Unicode Standard,
|
||||
* with the following profile of UAX31:
|
||||
* <pre>
|
||||
* Start := ID_Start + 'VERTICAL TILDE' (U+2E2F)
|
||||
* </pre>
|
||||
* {@code 'VERTICAL TILDE'} is added to {@code Start} for backward
|
||||
* compatibility.
|
||||
*
|
||||
* <p><b>Note:</b> This method cannot handle <a
|
||||
* href="#supplementary"> supplementary characters</a>. To support
|
||||
@ -9947,7 +9958,19 @@ class Character implements java.io.Serializable, Comparable<Character> {
|
||||
* returns {@code true}
|
||||
* <li> {@link #getType(int) getType(codePoint)}
|
||||
* returns {@code LETTER_NUMBER}.
|
||||
* <li> it is an <a href="http://www.unicode.org/reports/tr44/#Other_ID_Start">
|
||||
* {@code Other_ID_Start}</a> character.
|
||||
* </ul>
|
||||
* <p>
|
||||
* This method conforms to <a href="https://unicode.org/reports/tr31/#R1">
|
||||
* UAX31-R1: Default Identifiers</a> requirement of the Unicode Standard,
|
||||
* with the following profile of UAX31:
|
||||
* <pre>
|
||||
* Start := ID_Start + 'VERTICAL TILDE' (U+2E2F)
|
||||
* </pre>
|
||||
* {@code 'VERTICAL TILDE'} is added to {@code Start} for backward
|
||||
* compatibility.
|
||||
*
|
||||
* @param codePoint the character (Unicode code point) to be tested.
|
||||
* @return {@code true} if the character may start a Unicode
|
||||
* identifier; {@code false} otherwise.
|
||||
@ -9975,7 +9998,22 @@ class Character implements java.io.Serializable, Comparable<Character> {
|
||||
* <li> it is a non-spacing mark
|
||||
* <li> {@code isIdentifierIgnorable} returns
|
||||
* {@code true} for this character.
|
||||
* <li> it is an <a href="http://www.unicode.org/reports/tr44/#Other_ID_Start">
|
||||
* {@code Other_ID_Start}</a> character.
|
||||
* <li> it is an <a href="http://www.unicode.org/reports/tr44/#Other_ID_Continue">
|
||||
* {@code Other_ID_Continue}</a> character.
|
||||
* </ul>
|
||||
* <p>
|
||||
* This method conforms to <a href="https://unicode.org/reports/tr31/#R1">
|
||||
* UAX31-R1: Default Identifiers</a> requirement of the Unicode Standard,
|
||||
* with the following profile of UAX31:
|
||||
* <pre>
|
||||
* Continue := Start + ID_Continue + ignorable
|
||||
* Medial := empty
|
||||
* ignorable := isIdentifierIgnorable(char) returns true for the character
|
||||
* </pre>
|
||||
* {@code ignorable} is added to {@code Continue} for backward
|
||||
* compatibility.
|
||||
*
|
||||
* <p><b>Note:</b> This method cannot handle <a
|
||||
* href="#supplementary"> supplementary characters</a>. To support
|
||||
@ -10010,7 +10048,23 @@ class Character implements java.io.Serializable, Comparable<Character> {
|
||||
* <li> it is a non-spacing mark
|
||||
* <li> {@code isIdentifierIgnorable} returns
|
||||
* {@code true} for this character.
|
||||
* <li> it is an <a href="http://www.unicode.org/reports/tr44/#Other_ID_Start">
|
||||
* {@code Other_ID_Start}</a> character.
|
||||
* <li> it is an <a href="http://www.unicode.org/reports/tr44/#Other_ID_Continue">
|
||||
* {@code Other_ID_Continue}</a> character.
|
||||
* </ul>
|
||||
* <p>
|
||||
* This method conforms to <a href="https://unicode.org/reports/tr31/#R1">
|
||||
* UAX31-R1: Default Identifiers</a> requirement of the Unicode Standard,
|
||||
* with the following profile of UAX31:
|
||||
* <pre>
|
||||
* Continue := Start + ID_Continue + ignorable
|
||||
* Medial := empty
|
||||
* ignorable := isIdentifierIgnorable(int) returns true for the character
|
||||
* </pre>
|
||||
* {@code ignorable} is added to {@code Continue} for backward
|
||||
* compatibility.
|
||||
*
|
||||
* @param codePoint the character (Unicode code point) to be tested.
|
||||
* @return {@code true} if the character may be part of a
|
||||
* Unicode identifier; {@code false} otherwise.
|
||||
|
@ -23,7 +23,7 @@
|
||||
|
||||
/*
|
||||
* @test
|
||||
* @bug 8202771 8221431
|
||||
* @bug 8202771 8221431 8229831
|
||||
* @summary Check j.l.Character.isDigit/isLetter/isLetterOrDigit/isSpaceChar
|
||||
* /isWhitespace/isTitleCase/isISOControl/isIdentifierIgnorable
|
||||
* /isJavaIdentifierStart/isJavaIdentifierPart/isUnicodeIdentifierStart
|
||||
@ -182,7 +182,7 @@ public class CharPropTest {
|
||||
|
||||
private static void isUnicodeIdentifierStartTest(int codePoint, String category) {
|
||||
boolean actual = Character.isUnicodeIdentifierStart(codePoint);
|
||||
boolean expected = isUnicodeIdentifierStart(category);
|
||||
boolean expected = isUnicodeIdentifierStart(codePoint, category);
|
||||
if (actual != expected) {
|
||||
printDiff(codePoint, "isUnicodeIdentifierStart", actual, expected);
|
||||
}
|
||||
@ -266,14 +266,33 @@ public class CharPropTest {
|
||||
|| isIdentifierIgnorable(codePoint, category);
|
||||
}
|
||||
|
||||
private static boolean isUnicodeIdentifierStart(String category) {
|
||||
return isLetter(category) || category.equals("Nl");
|
||||
private static boolean isUnicodeIdentifierStart(int codePoint, String category) {
|
||||
return isLetter(category) || category.equals("Nl")
|
||||
|| isOtherIDStart(codePoint);
|
||||
}
|
||||
|
||||
private static boolean isUnicodeIdentifierPart(int codePoint, String category) {
|
||||
return isLetter(category) || category.equals("Pc") || category.equals("Nd")
|
||||
|| category.equals("Nl") || category.equals("Mc") || category.equals("Mn")
|
||||
|| isIdentifierIgnorable(codePoint, category);
|
||||
|| isIdentifierIgnorable(codePoint, category)
|
||||
|| isOtherIDStart(codePoint)
|
||||
|| isOtherIDContinue(codePoint);
|
||||
}
|
||||
|
||||
private static boolean isOtherIDStart(int codePoint) {
|
||||
return codePoint == 0x1885 ||
|
||||
codePoint == 0x1886 ||
|
||||
codePoint == 0x2118 ||
|
||||
codePoint == 0x212E ||
|
||||
codePoint == 0x309B ||
|
||||
codePoint == 0x309C;
|
||||
}
|
||||
|
||||
private static boolean isOtherIDContinue(int codePoint) {
|
||||
return codePoint == 0x00B7 ||
|
||||
codePoint == 0x0387 ||
|
||||
(codePoint >= 0x1369 && codePoint <= 0x1371) ||
|
||||
codePoint == 0x19DA;
|
||||
}
|
||||
|
||||
private static void printDiff(int codePoint, String method, boolean actual, boolean expected) {
|
||||
|
@ -24,8 +24,9 @@
|
||||
|
||||
/**
|
||||
* @test
|
||||
* @bug 7037261 7070436 7198195 8032446 8072600 8221431
|
||||
* @summary Check j.l.Character.isLowerCase/isUppercase/isAlphabetic/isIdeographic
|
||||
* @bug 7037261 7070436 7198195 8032446 8072600 8221431 8229831
|
||||
* @summary Check j.l.Character.isLowerCase/isUppercase/isAlphabetic/isIdeographic/
|
||||
* isUnicodeIdentifierStart/isUnicodeIdentifierPart
|
||||
* @library /lib/testlibrary/java/lang
|
||||
*/
|
||||
|
||||
@ -36,47 +37,17 @@ import static java.lang.Character.*;
|
||||
|
||||
public class CheckProp {
|
||||
|
||||
public static void main(String[] args) throws IOException {
|
||||
File fPropList = UCDFiles.PROP_LIST.toFile();
|
||||
int i, j;
|
||||
BufferedReader sbfr = new BufferedReader(new FileReader(fPropList));
|
||||
Matcher m = Pattern.compile("(\\p{XDigit}+)(?:\\.{2}(\\p{XDigit}+))?\\s*;\\s+(\\w+)\\s+#.*").matcher("");
|
||||
Map<String, ArrayList<Integer>> propMap = new LinkedHashMap<>();
|
||||
|
||||
String line = null;
|
||||
int lineNo = 0;
|
||||
while ((line = sbfr.readLine()) != null) {
|
||||
lineNo++;
|
||||
if (line.length() <= 1 || line.charAt(0) == '#') {
|
||||
continue;
|
||||
}
|
||||
m.reset(line);
|
||||
if (m.matches()) {
|
||||
int start = Integer.parseInt(m.group(1), 16);
|
||||
int end = (m.group(2)==null)?start
|
||||
:Integer.parseInt(m.group(2), 16);
|
||||
String name = m.group(3);
|
||||
|
||||
ArrayList<Integer> list = propMap.get(name);
|
||||
if (list == null) {
|
||||
list = new ArrayList<Integer>();
|
||||
propMap.put(name, list);
|
||||
}
|
||||
while (start <= end)
|
||||
list.add(start++);
|
||||
} else {
|
||||
System.out.printf("Warning: Unrecognized line %d <%s>%n", lineNo, line);
|
||||
}
|
||||
}
|
||||
sbfr.close();
|
||||
//for (String name: propMap.keySet()) {
|
||||
// System.out.printf("%s %d%n", name, propMap.get(name).size());
|
||||
//}
|
||||
public static void main(String[] args) {
|
||||
Map<String, List<Integer>> propMap = new LinkedHashMap<>();
|
||||
List.of(UCDFiles.PROP_LIST.toFile(), UCDFiles.DERIVED_PROPS.toFile()).stream()
|
||||
.forEach(f -> readPropMap(propMap, f));
|
||||
|
||||
Integer[] otherLowercase = propMap.get("Other_Lowercase").toArray(new Integer[0]);
|
||||
Integer[] otherUppercase = propMap.get("Other_Uppercase").toArray(new Integer[0]);
|
||||
Integer[] otherAlphabetic = propMap.get("Other_Alphabetic").toArray(new Integer[0]);
|
||||
Integer[] ideographic = propMap.get("Ideographic").toArray(new Integer[0]);
|
||||
Integer[] IDStart = propMap.get("ID_Start").toArray(new Integer[0]);
|
||||
Integer[] IDContinue = propMap.get("ID_Continue").toArray(new Integer[0]);
|
||||
|
||||
int fails = 0;
|
||||
for (int cp = MIN_CODE_POINT; cp < MAX_CODE_POINT; cp++) {
|
||||
@ -111,8 +82,63 @@ public class CheckProp {
|
||||
fails++;
|
||||
System.err.printf("Wrong isIdeographic(U+%04x)\n", cp);
|
||||
}
|
||||
if (isUnicodeIdentifierStart(cp) !=
|
||||
(cp == 0x2E2F ||
|
||||
Arrays.binarySearch(IDStart, cp) >= 0))
|
||||
{
|
||||
fails++;
|
||||
System.err.printf("Wrong isUnicodeIdentifierStart(U+%04x)\n", cp);
|
||||
}
|
||||
if (isUnicodeIdentifierPart(cp) !=
|
||||
(isIdentifierIgnorable(cp) ||
|
||||
cp == 0x2E2F ||
|
||||
Arrays.binarySearch(IDContinue, cp) >= 0))
|
||||
{
|
||||
fails++;
|
||||
System.err.printf("Wrong isUnicodeIdentifierPart(U+%04x)\n", cp);
|
||||
}
|
||||
}
|
||||
if (fails != 0)
|
||||
throw new RuntimeException("CheckProp failed=" + fails);
|
||||
}
|
||||
|
||||
private static void readPropMap(Map<String, List<Integer>> propMap, File fPropList) {
|
||||
try {
|
||||
BufferedReader sbfr = new BufferedReader(new FileReader(fPropList));
|
||||
Matcher m = Pattern.compile("(\\p{XDigit}+)(?:\\.{2}(\\p{XDigit}+))?\\s*;\\s+(\\w+)\\s+#.*").matcher("");
|
||||
|
||||
String line = null;
|
||||
int lineNo = 0;
|
||||
while ((line = sbfr.readLine()) != null) {
|
||||
lineNo++;
|
||||
if (line.length() <= 1 || line.charAt(0) == '#') {
|
||||
continue;
|
||||
}
|
||||
m.reset(line);
|
||||
if (m.matches()) {
|
||||
int start = Integer.parseInt(m.group(1), 16);
|
||||
int end = (m.group(2)==null)?start
|
||||
:Integer.parseInt(m.group(2), 16);
|
||||
String name = m.group(3);
|
||||
|
||||
List<Integer> list = propMap.get(name);
|
||||
if (list == null) {
|
||||
list = new ArrayList<Integer>();
|
||||
propMap.put(name, list);
|
||||
}
|
||||
while (start <= end)
|
||||
list.add(start++);
|
||||
} else {
|
||||
System.out.printf("Warning: Unrecognized line %d <%s>%n", lineNo, line);
|
||||
}
|
||||
}
|
||||
sbfr.close();
|
||||
} catch (IOException ioe) {
|
||||
throw new UncheckedIOException(ioe);
|
||||
}
|
||||
|
||||
//for (String name: propMap.keySet()) {
|
||||
// System.out.printf("%s %d%n", name, propMap.get(name).size());
|
||||
//}
|
||||
}
|
||||
}
|
||||
|
@ -36,6 +36,8 @@ public class UCDFiles {
|
||||
|
||||
public static Path BLOCKS =
|
||||
UCD_DIR.resolve("Blocks.txt");
|
||||
public static Path DERIVED_PROPS =
|
||||
UCD_DIR.resolve("DerivedCoreProperties.txt");
|
||||
public static Path GRAPHEME_BREAK_PROPERTY =
|
||||
UCD_DIR.resolve("auxiliary").resolve("GraphemeBreakProperty.txt");
|
||||
public static Path GRAPHEME_BREAK_TEST =
|
||||
|
Loading…
x
Reference in New Issue
Block a user