From e152cc0312915d48efc67a133195b38ee9e45306 Mon Sep 17 00:00:00 2001 From: Claes Redestad Date: Wed, 17 Mar 2021 15:22:06 +0000 Subject: [PATCH] 8263677: Improve Character.isLowerCase/isUpperCase lookups Reviewed-by: erikj, ihse, naoto, rriggs --- .../CharacterData00.java.template | 8 +- .../CharacterData01.java.template | 8 +- .../CharacterData02.java.template | 9 +- .../CharacterData03.java.template | 8 +- .../CharacterData0E.java.template | 8 +- .../CharacterDataLatin1.java.template | 7 +- .../generatecharacter/GenerateCharacter.java | 360 ++++++------------ .../openjdk/bench/java/lang/Characters.java | 2 +- 8 files changed, 144 insertions(+), 266 deletions(-) diff --git a/make/data/characterdata/CharacterData00.java.template b/make/data/characterdata/CharacterData00.java.template index 5705297a53e..d8e8b0cf8f1 100644 --- a/make/data/characterdata/CharacterData00.java.template +++ b/make/data/characterdata/CharacterData00.java.template @@ -1,5 +1,5 @@ /* - * Copyright (c) 2003, 2019, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2003, 2021, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -755,13 +755,11 @@ class CharacterData00 extends CharacterData { } boolean isLowerCase(int ch) { - return (getProperties(ch) & $$maskType) == Character.LOWERCASE_LETTER - || (getPropertiesEx(ch) & $$maskOtherLowercase) != 0; + return (getPropertiesEx(ch) & $$maskOtherLowercase) != 0; } boolean isUpperCase(int ch) { - return (getProperties(ch) & $$maskType) == Character.UPPERCASE_LETTER - || (getPropertiesEx(ch) & $$maskOtherUppercase) != 0; + return (getPropertiesEx(ch) & $$maskOtherUppercase) != 0; } boolean isWhitespace(int ch) { diff --git a/make/data/characterdata/CharacterData01.java.template b/make/data/characterdata/CharacterData01.java.template index a44450b37ea..f7897d9c254 100644 --- a/make/data/characterdata/CharacterData01.java.template +++ b/make/data/characterdata/CharacterData01.java.template @@ -1,5 +1,5 @@ /* - * Copyright (c) 2003, 2020, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2003, 2021, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -493,13 +493,11 @@ class CharacterData01 extends CharacterData { } boolean isLowerCase(int ch) { - return (getProperties(ch) & $$maskType) == Character.LOWERCASE_LETTER - || (getPropertiesEx(ch) & $$maskOtherLowercase) != 0; + return (getPropertiesEx(ch) & $$maskOtherLowercase) != 0; } boolean isUpperCase(int ch) { - return (getProperties(ch) & $$maskType) == Character.UPPERCASE_LETTER - || (getPropertiesEx(ch) & $$maskOtherUppercase) != 0; + return (getPropertiesEx(ch) & $$maskOtherUppercase) != 0; } boolean isWhitespace(int ch) { diff --git a/make/data/characterdata/CharacterData02.java.template b/make/data/characterdata/CharacterData02.java.template index 739bc9d32ab..410f8ec9e99 100644 --- a/make/data/characterdata/CharacterData02.java.template +++ b/make/data/characterdata/CharacterData02.java.template @@ -1,5 +1,5 @@ /* - * Copyright (c) 2003, 2020, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2003, 2021, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -212,16 +212,13 @@ class CharacterData02 extends CharacterData { } boolean isLowerCase(int ch) { - return (getProperties(ch) & $$maskType) == Character.LOWERCASE_LETTER - || (getPropertiesEx(ch) & $$maskOtherLowercase) != 0; + return (getPropertiesEx(ch) & $$maskOtherLowercase) != 0; } boolean isUpperCase(int ch) { - return (getProperties(ch) & $$maskType) == Character.UPPERCASE_LETTER - || (getPropertiesEx(ch) & $$maskOtherUppercase) != 0; + return (getPropertiesEx(ch) & $$maskOtherUppercase) != 0; } - boolean isWhitespace(int ch) { return (getProperties(ch) & $$maskIdentifierInfo) == $$valueJavaWhitespace; } diff --git a/make/data/characterdata/CharacterData03.java.template b/make/data/characterdata/CharacterData03.java.template index 06d4dfbdc2c..d390a0cfd0e 100644 --- a/make/data/characterdata/CharacterData03.java.template +++ b/make/data/characterdata/CharacterData03.java.template @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2020, 2021, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -212,13 +212,11 @@ class CharacterData03 extends CharacterData { } boolean isLowerCase(int ch) { - return (getProperties(ch) & $$maskType) == Character.LOWERCASE_LETTER - || (getPropertiesEx(ch) & $$maskOtherLowercase) != 0; + return (getPropertiesEx(ch) & $$maskOtherLowercase) != 0; } boolean isUpperCase(int ch) { - return (getProperties(ch) & $$maskType) == Character.UPPERCASE_LETTER - || (getPropertiesEx(ch) & $$maskOtherUppercase) != 0; + return (getPropertiesEx(ch) & $$maskOtherUppercase) != 0; } boolean isWhitespace(int ch) { diff --git a/make/data/characterdata/CharacterData0E.java.template b/make/data/characterdata/CharacterData0E.java.template index aa6db8469a0..f9ffcd58e16 100644 --- a/make/data/characterdata/CharacterData0E.java.template +++ b/make/data/characterdata/CharacterData0E.java.template @@ -1,5 +1,5 @@ /* - * Copyright (c) 2003, 2020, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2003, 2021, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -212,13 +212,11 @@ class CharacterData0E extends CharacterData { } boolean isLowerCase(int ch) { - return (getProperties(ch) & $$maskType) == Character.LOWERCASE_LETTER - || (getPropertiesEx(ch) & $$maskOtherLowercase) != 0; + return (getPropertiesEx(ch) & $$maskOtherLowercase) != 0; } boolean isUpperCase(int ch) { - return (getProperties(ch) & $$maskType) == Character.UPPERCASE_LETTER - || (getPropertiesEx(ch) & $$maskOtherUppercase) != 0; + return (getPropertiesEx(ch) & $$maskOtherUppercase) != 0; } boolean isWhitespace(int ch) { diff --git a/make/data/characterdata/CharacterDataLatin1.java.template b/make/data/characterdata/CharacterDataLatin1.java.template index c2ff37321e5..d2f4ba249d8 100644 --- a/make/data/characterdata/CharacterDataLatin1.java.template +++ b/make/data/characterdata/CharacterDataLatin1.java.template @@ -1,5 +1,5 @@ /* - * Copyright (c) 2002, 2020, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2002, 2021, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -87,13 +87,12 @@ class CharacterDataLatin1 extends CharacterData { @IntrinsicCandidate boolean isLowerCase(int ch) { - return (getProperties(ch) & $$maskType) == Character.LOWERCASE_LETTER - || (getPropertiesEx(ch) & $$maskOtherLowercase) != 0; // 0xaa, 0xba + return (getPropertiesEx(ch) & $$maskOtherLowercase) != 0; } @IntrinsicCandidate boolean isUpperCase(int ch) { - return (getProperties(ch) & $$maskType) == Character.UPPERCASE_LETTER; + return (getPropertiesEx(ch) & $$maskOtherUppercase) != 0; } boolean isOtherAlphabetic(int ch) { diff --git a/make/jdk/src/classes/build/tools/generatecharacter/GenerateCharacter.java b/make/jdk/src/classes/build/tools/generatecharacter/GenerateCharacter.java index a21b662e2ac..e6c8ff53ed8 100644 --- a/make/jdk/src/classes/build/tools/generatecharacter/GenerateCharacter.java +++ b/make/jdk/src/classes/build/tools/generatecharacter/GenerateCharacter.java @@ -1,5 +1,5 @@ /* - * Copyright (c) 2002, 2020, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2002, 2021, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -105,7 +105,7 @@ public class GenerateCharacter { entries are short rather than byte). */ - /* The character properties are currently encoded into A (32 bits)and B (16 bits) + /* The character properties are currently encoded into A (32 bits) and B (8 bits) two parts. A: the low 32 bits are defined in the following manner: @@ -157,9 +157,7 @@ public class GenerateCharacter { 1 bit Other_Lowercase property 1 bit Other_Uppercase property 1 bit Other_Alphabetic property - 1 bit Other_Math property 1 bit Ideographic property - 1 bit Noncharacter codepoint property 1 bit ID_Start property 1 bit ID_Continue property */ @@ -175,7 +173,6 @@ public class GenerateCharacter { shiftNumericType = 10, maskNumericType = 0x0C00, shiftIdentifierInfo = 12, maskIdentifierInfo = 0x7000, maskUnicodePart = 0x1000, - shiftCaseInfo = 15, maskCaseInfo = 0x38000, maskLowerCase = 0x20000, maskUpperCase = 0x10000, maskTitleCase = 0x08000, @@ -186,24 +183,20 @@ public class GenerateCharacter { maskDigit = 0x001F, // case offset are 9 bits maskCase = 0x01FF, - shiftBidi = 27, maskBidi = 0x78000000, - shiftMirrored = 31, //maskMirrored = 0x80000000, - shiftPlane = 16, maskPlane = 0xFF0000; + shiftBidi = 27, maskBidi = 0x78000000; // maskMirrored needs to be long, if up 16-bit private static final long maskMirrored = 0x80000000L; - // bit masks identify the 16-bit property field described above, in B + // bit masks identify the 8-bit property field described above, in B // table private static final long - maskOtherLowercase = 0x100000000L, - maskOtherUppercase = 0x200000000L, - maskOtherAlphabetic = 0x400000000L, - maskOtherMath = 0x800000000L, - maskIdeographic = 0x1000000000L, - maskNoncharacterCP = 0x2000000000L, - maskIDStart = 0x4000000000L, - maskIDContinue = 0x8000000000L; + maskOtherLowercase = 0x0100000000L, + maskOtherUppercase = 0x0200000000L, + maskOtherAlphabetic = 0x0400000000L, + maskIdeographic = 0x0800000000L, + maskIDStart = 0x1000000000L, + maskIDContinue = 0x2000000000L; // Can compare masked values with these to determine // numeric or lexical types. @@ -313,14 +306,9 @@ public class GenerateCharacter { static long[] buildMap(UnicodeSpec[] data, SpecialCaseMap[] specialMaps, PropList propList) { - long[] result; - if (bLatin1 == true) { - result = new long[256]; - } else { - result = new long[1<<16]; - } - int k=0; - int codePoint = plane<<16; + long[] result = new long[bLatin1 ? 256 : 1 << 16]; + int k = 0; + int codePoint = plane << 16; UnicodeSpec nonCharSpec = new UnicodeSpec(); for (int j = 0; j < data.length && k < result.length; j++) { if (data[j].codePoint == codePoint) { @@ -370,8 +358,6 @@ public class GenerateCharacter { addExProp(result, propList, "Other_Uppercase", maskOtherUppercase); addExProp(result, propList, "Other_Alphabetic", maskOtherAlphabetic); addExProp(result, propList, "Ideographic", maskIdeographic); - //addExProp(result, propList, "Other_Math", maskOtherMath); - //addExProp(result, propList, "Noncharacter_CodePoint", maskNoncharacterCP); addExProp(result, propList, "ID_Start", maskIDStart); addExProp(result, propList, "ID_Continue", maskIDContinue); @@ -390,8 +376,8 @@ public class GenerateCharacter { static boolean isInvalidJavaWhiteSpace(int c) { int[] exceptions = {0x00A0, 0x2007, 0x202F, 0xFEFF}; boolean retValue = false; - for(int x=0;x 0) System.arraycopy(map, i, buffer, ptr, m); newmap[i >> size] = (ptr >> size); ptr += m; } // end OUTER // Now we know how long the compressed table should be, // so create a new array and copy data from the temporary buffer. long[] newdata = new long[ptr]; - for (int j = 0; j < ptr; j++) { - newdata[j] = buffer[j]; - } + if (ptr > 0) System.arraycopy(buffer, 0, newdata, 0, ptr); // Return the new map and the new data table. - long[][] result = { newmap, newdata }; - return result; + return new long[][]{ newmap, newdata }; } /** @@ -695,7 +685,7 @@ OUTER: for (int i = 0; i < n; i += m) { static void generateCharacterClass(String theTemplateFileName, String theOutputFileName) - throws FileNotFoundException, IOException { + throws IOException { BufferedReader in = new BufferedReader(new FileReader(theTemplateFileName)); PrintWriter out = new PrintWriter(new BufferedWriter(new FileWriter(theOutputFileName))); out.println(commentStart + @@ -710,7 +700,7 @@ OUTER: for (int i = 0; i < n; i += m) { int depth = 0; while ((pos = line.indexOf(commandMarker, pos)) >= 0) { int newpos = pos + marklen; - char ch = 'x'; + char ch; SCAN: while (newpos < line.length() && (Character.isJavaIdentifierStart(ch = line.charAt(newpos)) || ch == '(' || (ch == ')' && depth > 0))) { @@ -765,18 +755,15 @@ OUTER: for (int i = 0; i < n; i += m) { static String replaceCommand(String x) { if (x.equals("Tables")) return genTables(); if (x.equals("Initializers")) return genInitializers(); - if (x.length() >= 9 && x.substring(0, 7).equals("Lookup(") && - x.substring(x.length()-1).equals(")") ) + if (x.length() >= 9 && x.startsWith("Lookup(") && x.endsWith(")") ) return genAccess("A", x.substring(7, x.length()-1), (identifiers ? 2 : 32)); - if (x.length() >= 11 && x.substring(0, 9).equals("LookupEx(") && - x.substring(x.length()-1).equals(")") ) + if (x.length() >= 11 && x.startsWith("LookupEx(") && x.endsWith(")") ) return genAccess("B", x.substring(9, x.length()-1), 16); if (x.equals("shiftType")) return Long.toString(shiftType); if (x.equals("shiftIdentifierInfo")) return Long.toString(shiftIdentifierInfo); if (x.equals("maskIdentifierInfo")) return "0x" + hex8(maskIdentifierInfo); if (x.equals("maskUnicodePart")) return "0x" + hex8(maskUnicodePart); if (x.equals("shiftCaseOffset")) return Long.toString(shiftCaseOffset); - if (x.equals("shiftCaseInfo")) return Long.toString(shiftCaseInfo); if (x.equals("shiftCaseOffsetSign")) return Long.toString(shiftCaseOffsetSign); if (x.equals("maskCase")) return "0x" + hex8(maskCase); if (x.equals("maskCaseOffset")) return "0x" + hex8(maskCaseOffset); @@ -811,8 +798,6 @@ OUTER: for (int i = 0; i < n; i += m) { if (x.equals("valueDigit")) return "0x" + hex8(valueDigit); if (x.equals("valueStrangeNumeric")) return "0x" + hex8(valueStrangeNumeric); if (x.equals("valueJavaSupradecimal")) return "0x" + hex8(valueJavaSupradecimal); - if (x.equals("valueDigit")) return "0x" + hex8(valueDigit); - if (x.equals("valueStrangeNumeric")) return "0x" + hex8(valueStrangeNumeric); if (x.equals("maskType")) return "0x" + hex(maskType); if (x.equals("shiftBidi")) return Long.toString(shiftBidi); if (x.equals("maskBidi")) return "0x" + hex(maskBidi); @@ -943,11 +928,12 @@ OUTER: for (int i = 0; i < n; i += m) { StringBuffer result = new StringBuffer(); // liu : Add a comment showing the source of this table if (debug) { - result.append(commentStart + " The following tables and code generated using:" + - commentEnd + "\n "); - result.append(commentStart + ' ' + commandLineDescription + commentEnd + "\n "); + result.append(commentStart).append(" The following tables and code generated using:") + .append(commentEnd).append("\n ") + .append(commentStart).append(' ') + .append(commandLineDescription).append(commentEnd).append("\n "); } - if (plane == 0 && bLatin1 == false) { + if (plane == 0 && !bLatin1) { genCaseMapTableDeclaration(result); genCaseMapTable(initializers, specialCaseMaps); } @@ -966,7 +952,7 @@ OUTER: for (int i = 0; i < n; i += m) { // If we ever need more than 32 bits to represent the character properties, // then a table "B" may be needed as well. - genTable(result, "B", tables[n - 1], 32, 16, sizes[n - 1], false, 0, true, true, false); + genTable(result, "B", tables[n - 1], 32, 8, sizes[n - 1], false, 0, true, true, false); totalBytes += ((((tables[n - 1].length * (identifiers ? 2 : 32)) + 31) >> 5) << 2); result.append(commentStart); @@ -1003,23 +989,8 @@ OUTER: for (int i = 0; i < n; i += m) { return totalBytes; } - static void appendEscapedStringFragment(StringBuffer result, - char[] line, - int length, - boolean lastFragment) { - result.append(" \""); - for (int k=0; k= 0) { int newpos = pos + marklen; - char ch = 'x'; + char ch; while (newpos < template.length() && Character.isJavaIdentifierStart(ch = template.charAt(newpos)) && ch != '_') // Don't allow this in token names ++newpos; String token = template.substring(pos+marklen, newpos); - String replacement = "ERROR"; - - if (token.equals("name")) replacement = name; - else if (token.equals("type")) replacement = type; - else if (token.equals("bits")) replacement = ""+bits; - else if (token.equals("size")) replacement = ""+size; - else if (token.equals("entriesPerChar")) replacement = ""+entriesPerChar; - else if (token.equals("charsPerEntry")) replacement = ""+(-entriesPerChar); - else FAIL("Unrecognized token: " + token); + String replacement = switch (token) { + case "name" -> name; + case "type" -> type; + case "bits" -> "" + bits; + case "size" -> "" + size; + case "entriesPerChar" -> "" + entriesPerChar; + case "charsPerEntry" -> "" + (-entriesPerChar); + default -> { + FAIL("Unrecognized token: " + token); + yield "ERROR"; + } + }; template = template.substring(0, pos) + replacement + template.substring(newpos); pos += replacement.length(); @@ -1178,7 +1150,7 @@ OUTER: for (int i = 0; i < n; i += m) { result.append(" The ").append(name).append(" table has ").append(table.length); result.append(" entries for a total of "); int sizeOfTable = ((table.length * bits + 31) >> 5) << 2; - if (bits == 8 && useCharForByte) { + if (bits == 8 && tableAsString && useCharForByte) { sizeOfTable *= 2; } result.append(sizeOfTable); @@ -1195,28 +1167,26 @@ OUTER: for (int i = 0; i < n; i += m) { if (noConversion) { result.append("] = (\n"); } else { - result.append("] = new ").append(atype).append("["+table.length+"];\n "); + result.append("] = new ").append(atype).append("[").append(table.length).append("];\n "); result.append("static final String ").append(name).append("_DATA =\n"); } - int CHARS_PER_LINE = 8; - StringBuffer theString = new StringBuffer(); + StringBuilder theString = new StringBuilder(); int entriesInCharSoFar = 0; char ch = '\u0000'; int charsPerEntry = -entriesPerChar; - for (int j=0; j> extract; + for (long l : table) { long entry; if ("A".equals(name)) - entry = (table[j] & 0xffffffffL) >> extract; + entry = (l & 0xffffffffL) >> extract; else - entry = (table[j] >> extract); + entry = (l >> extract); if (shiftEntries) entry <<= shift; if (entry >= (1L << bits)) { FAIL("Entry too big"); } if (entriesPerChar > 0) { // Pack multiple entries into a character - ch = (char)(((int)ch >> bits) | (entry << (entriesPerChar-1)*bits)); + ch = (char) (((int) ch >> bits) | (entry << (entriesPerChar - 1) * bits)); ++entriesInCharSoFar; if (entriesInCharSoFar == entriesPerChar) { // Character is full @@ -1224,11 +1194,10 @@ OUTER: for (int i = 0; i < n; i += m) { entriesInCharSoFar = 0; ch = '\u0000'; } - } - else { + } else { // Use multiple characters per entry - for (int k=0; k> ((charsPerEntry-1)*16)); + for (int k = 0; k < charsPerEntry; ++k) { + ch = (char) (entry >> ((charsPerEntry - 1) * 16)); entry <<= 16; theString.append(ch); } @@ -1240,7 +1209,6 @@ OUTER: for (int i = 0; i < n; i += m) { ++entriesInCharSoFar; } theString.append(ch); - entriesInCharSoFar = 0; } result.append(Utility.formatForSource(theString.toString(), " ")); if (noConversion) { @@ -1337,7 +1305,7 @@ OUTER: for (int i = 0; i < n; i += m) { result.append("0x").append(hex4((j & ~commentMask) << (16 - size))); else result.append(dec3((j & ~commentMask) >> commentShift)); - if (properties) propertiesComments(result, val); + if (properties) propertiesComments(result, val << extract); result.append(commentEnd); } } // end PRINT @@ -1347,27 +1315,25 @@ OUTER: for (int i = 0; i < n; i += m) { } static void genCaseMapTableDeclaration(StringBuffer result) { - String myTab = " "; - result.append(myTab + "static final char[][][] charMap;\n"); + result.append(" static final char[][][] charMap;\n"); } static void genCaseMapTable(StringBuffer result, SpecialCaseMap[] specialCaseMaps){ String myTab = " "; int ch; char[] map; - result.append(myTab + "charMap = new char[][][] {\n"); - for (int x = 0; x < specialCaseMaps.length; x++) { - ch = specialCaseMaps[x].getCharSource(); - map = specialCaseMaps[x].getUpperCaseMap(); - result.append(myTab + myTab); - result.append("{ "); - result.append("{\'\\u"+hex4(ch)+"\'}, {"); - for (int y = 0; y < map.length; y++) { - result.append("\'\\u"+hex4(map[y])+"\', "); + result.append(myTab).append("charMap = new char[][][] {\n"); + for (SpecialCaseMap specialCaseMap : specialCaseMaps) { + ch = specialCaseMap.getCharSource(); + map = specialCaseMap.getUpperCaseMap(); + result.append(myTab).append(myTab).append("{ "); + result.append("{'\\u").append(hex4(ch)).append("'}, {"); + for (char c : map) { + result.append("'\\u").append(hex4(c)).append("', "); } result.append("} },\n"); } - result.append(myTab + "};\n"); + result.append(myTab).append("};\n"); } @@ -1384,126 +1350,49 @@ OUTER: for (int i = 0; i < n; i += m) { static void propertiesComments(StringBuffer result, long val) { result.append(" "); - switch ((int)(val & maskType)) { - case UnicodeSpec.CONTROL: - result.append("Cc"); - break; - case UnicodeSpec.FORMAT: - result.append("Cf"); - break; - case UnicodeSpec.PRIVATE_USE: - result.append("Co"); - break; - case UnicodeSpec.SURROGATE: - result.append("Cs"); - break; - case UnicodeSpec.LOWERCASE_LETTER: - result.append("Ll"); - break; - case UnicodeSpec.MODIFIER_LETTER: - result.append("Lm"); - break; - case UnicodeSpec.OTHER_LETTER: - result.append("Lo"); - break; - case UnicodeSpec.TITLECASE_LETTER: - result.append("Lt"); - break; - case UnicodeSpec.UPPERCASE_LETTER: - result.append("Lu"); - break; - case UnicodeSpec.COMBINING_SPACING_MARK: - result.append("Mc"); - break; - case UnicodeSpec.ENCLOSING_MARK: - result.append("Me"); - break; - case UnicodeSpec.NON_SPACING_MARK: - result.append("Mn"); - break; - case UnicodeSpec.DECIMAL_DIGIT_NUMBER: - result.append("Nd"); - break; - case UnicodeSpec.LETTER_NUMBER: - result.append("Nl"); - break; - case UnicodeSpec.OTHER_NUMBER: - result.append("No"); - break; - case UnicodeSpec.CONNECTOR_PUNCTUATION: - result.append("Pc"); - break; - case UnicodeSpec.DASH_PUNCTUATION: - result.append("Pd"); - break; - case UnicodeSpec.END_PUNCTUATION: - result.append("Pe"); - break; - case UnicodeSpec.OTHER_PUNCTUATION: - result.append("Po"); - break; - case UnicodeSpec.START_PUNCTUATION: - result.append("Ps"); - break; - case UnicodeSpec.CURRENCY_SYMBOL: - result.append("Sc"); - break; - case UnicodeSpec.MODIFIER_SYMBOL: - result.append("Sk"); - break; - case UnicodeSpec.MATH_SYMBOL: - result.append("Sm"); - break; - case UnicodeSpec.OTHER_SYMBOL: - result.append("So"); - break; - case UnicodeSpec.LINE_SEPARATOR: - result.append("Zl"); break; - case UnicodeSpec.PARAGRAPH_SEPARATOR: - result.append("Zp"); - break; - case UnicodeSpec.SPACE_SEPARATOR: - result.append("Zs"); - break; - case UnicodeSpec.UNASSIGNED: - result.append("unassigned"); - break; + switch ((int) (val & maskType)) { + case UnicodeSpec.CONTROL -> result.append("Cc"); + case UnicodeSpec.FORMAT -> result.append("Cf"); + case UnicodeSpec.PRIVATE_USE -> result.append("Co"); + case UnicodeSpec.SURROGATE -> result.append("Cs"); + case UnicodeSpec.LOWERCASE_LETTER -> result.append("Ll"); + case UnicodeSpec.MODIFIER_LETTER -> result.append("Lm"); + case UnicodeSpec.OTHER_LETTER -> result.append("Lo"); + case UnicodeSpec.TITLECASE_LETTER -> result.append("Lt"); + case UnicodeSpec.UPPERCASE_LETTER -> result.append("Lu"); + case UnicodeSpec.COMBINING_SPACING_MARK -> result.append("Mc"); + case UnicodeSpec.ENCLOSING_MARK -> result.append("Me"); + case UnicodeSpec.NON_SPACING_MARK -> result.append("Mn"); + case UnicodeSpec.DECIMAL_DIGIT_NUMBER -> result.append("Nd"); + case UnicodeSpec.LETTER_NUMBER -> result.append("Nl"); + case UnicodeSpec.OTHER_NUMBER -> result.append("No"); + case UnicodeSpec.CONNECTOR_PUNCTUATION -> result.append("Pc"); + case UnicodeSpec.DASH_PUNCTUATION -> result.append("Pd"); + case UnicodeSpec.END_PUNCTUATION -> result.append("Pe"); + case UnicodeSpec.OTHER_PUNCTUATION -> result.append("Po"); + case UnicodeSpec.START_PUNCTUATION -> result.append("Ps"); + case UnicodeSpec.CURRENCY_SYMBOL -> result.append("Sc"); + case UnicodeSpec.MODIFIER_SYMBOL -> result.append("Sk"); + case UnicodeSpec.MATH_SYMBOL -> result.append("Sm"); + case UnicodeSpec.OTHER_SYMBOL -> result.append("So"); + case UnicodeSpec.LINE_SEPARATOR -> result.append("Zl"); + case UnicodeSpec.PARAGRAPH_SEPARATOR -> result.append("Zp"); + case UnicodeSpec.SPACE_SEPARATOR -> result.append("Zs"); + case UnicodeSpec.UNASSIGNED -> result.append("unassigned"); } - switch ((int)((val & maskBidi) >> shiftBidi)) { - case UnicodeSpec.DIRECTIONALITY_LEFT_TO_RIGHT: - result.append(", L"); - break; - case UnicodeSpec.DIRECTIONALITY_RIGHT_TO_LEFT: - result.append(", R"); - break; - case UnicodeSpec.DIRECTIONALITY_EUROPEAN_NUMBER: - result.append(", EN"); - break; - case UnicodeSpec.DIRECTIONALITY_EUROPEAN_NUMBER_SEPARATOR: - result.append(", ES"); - break; - case UnicodeSpec.DIRECTIONALITY_EUROPEAN_NUMBER_TERMINATOR: - result.append(", ET"); - break; - case UnicodeSpec.DIRECTIONALITY_ARABIC_NUMBER: - result.append(", AN"); - break; - case UnicodeSpec.DIRECTIONALITY_COMMON_NUMBER_SEPARATOR: - result.append(", CS"); - break; - case UnicodeSpec.DIRECTIONALITY_PARAGRAPH_SEPARATOR: - result.append(", B"); - break; - case UnicodeSpec.DIRECTIONALITY_SEGMENT_SEPARATOR: - result.append(", S"); - break; - case UnicodeSpec.DIRECTIONALITY_WHITESPACE: - result.append(", WS"); - break; - case UnicodeSpec.DIRECTIONALITY_OTHER_NEUTRALS: - result.append(", ON"); - break; + switch ((int) ((val & maskBidi) >> shiftBidi)) { + case UnicodeSpec.DIRECTIONALITY_LEFT_TO_RIGHT -> result.append(", L"); + case UnicodeSpec.DIRECTIONALITY_RIGHT_TO_LEFT -> result.append(", R"); + case UnicodeSpec.DIRECTIONALITY_EUROPEAN_NUMBER -> result.append(", EN"); + case UnicodeSpec.DIRECTIONALITY_EUROPEAN_NUMBER_SEPARATOR -> result.append(", ES"); + case UnicodeSpec.DIRECTIONALITY_EUROPEAN_NUMBER_TERMINATOR -> result.append(", ET"); + case UnicodeSpec.DIRECTIONALITY_ARABIC_NUMBER -> result.append(", AN"); + case UnicodeSpec.DIRECTIONALITY_COMMON_NUMBER_SEPARATOR -> result.append(", CS"); + case UnicodeSpec.DIRECTIONALITY_PARAGRAPH_SEPARATOR -> result.append(", B"); + case UnicodeSpec.DIRECTIONALITY_SEGMENT_SEPARATOR -> result.append(", S"); + case UnicodeSpec.DIRECTIONALITY_WHITESPACE -> result.append(", WS"); + case UnicodeSpec.DIRECTIONALITY_OTHER_NEUTRALS -> result.append(", ON"); } if ((val & maskUpperCase) != 0) { result.append(", hasUpper (subtract "); @@ -1600,10 +1489,9 @@ OUTER: for (int i = 0; i < n; i += m) { String bitshift = (bits == 1) ? "(" + var + "&0x1F)" : (bits == 2) ? "((" + var + "&0xF)<<1)" : (bits == 4) ? "((" + var + "&7)<<2)" : null; - String extracted = ((k < sizes.length - 1) || (bits >= 8)) ? adjusted : + access = ((k < sizes.length - 1) || (bits >= 8)) ? adjusted : "((" + adjusted + ">>" + bitshift + ")&" + (bits == 4 ? "0xF" : "" + ((1 << bits) - 1)) + ")"; - access = extracted; } return access; } @@ -1687,9 +1575,9 @@ OUTER: for (int i = 0; i < n; i += m) { */ static void processArgs(String[] args) { - StringBuffer desc = new StringBuffer("java GenerateCharacter"); - for (int j=0; j 0x100 && (len >> size) <= 0x100) { len >>= size; preshifted[j] = false; @@ -1922,7 +1810,9 @@ OUTER: for (int i = 0; i < n; i += m) { if (ch == '<' || ch == '>') ++j; } System.out.print("("); - for (int j=0; j