diff --git a/src/java.base/share/classes/java/lang/CharacterDataLatin1.java.template b/src/java.base/share/classes/java/lang/CharacterDataLatin1.java.template index d2f4ba249d8..8a704635f14 100644 --- a/src/java.base/share/classes/java/lang/CharacterDataLatin1.java.template +++ b/src/java.base/share/classes/java/lang/CharacterDataLatin1.java.template @@ -1,5 +1,5 @@ /* - * Copyright (c) 2002, 2021, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2002, 2023, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -135,30 +135,36 @@ class CharacterDataLatin1 extends CharacterData { } int toLowerCase(int ch) { - int mapChar = ch; - int val = getProperties(ch); - - if (((val & $$maskLowerCase) != 0) && - ((val & $$maskCaseOffset) != $$maskCaseOffset)) { - int offset = val << $$shiftCaseOffsetSign >> ($$shiftCaseOffsetSign+$$shiftCaseOffset); - mapChar = ch + offset; + if (ch < 'A') { // Fast path for low code points + return ch; } - return mapChar; + int l = ch | 0x20; // Lowercase using 'oldest ASCII trick in the book' + if (l <= 'z' // In range a-z + || (l >= 0xE0 && l <= 0xFE && l != 0xF7)) { // ..or agrave-thorn, excluding division + return l; + } + return ch; } int toUpperCase(int ch) { - int mapChar = ch; - int val = getProperties(ch); - - if ((val & $$maskUpperCase) != 0) { - if ((val & $$maskCaseOffset) != $$maskCaseOffset) { - int offset = val << $$shiftCaseOffsetSign >> ($$shiftCaseOffsetSign+$$shiftCaseOffset); - mapChar = ch - offset; - } else if (ch == 0x00B5) { - mapChar = 0x039C; - } + if (ch < 'a') { // Fast path for low code points + return ch; } - return mapChar; + int U = ch & 0xDF; // Uppercase using 'oldest ASCII trick in the book' + if (U <= 'Z' // In range A-Z + || (U >= 0xC0 && U <= 0xDE && U != 0xD7)) { // ..or Agrave-Thorn, excluding multiplication + return U; + } + + // Special-case for 'y with Diaeresis' which uppercases out of latin1 + if (ch == 0xFF) { + return 0x178; // Capital Letter Y with Diaeresis + } + // Special-case for 'Micro Sign' which uppercases out of latin1 + if (ch == 0xB5) { + return 0x39C; // Greek Capital Letter Mu + } + return ch; } int toTitleCase(int ch) { diff --git a/test/jdk/java/lang/Character/Latin1CaseConversion.java b/test/jdk/java/lang/Character/Latin1CaseConversion.java new file mode 100644 index 00000000000..a176bd4b002 --- /dev/null +++ b/test/jdk/java/lang/Character/Latin1CaseConversion.java @@ -0,0 +1,92 @@ +/* + * Copyright (c) 2023, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + */ + +import org.testng.annotations.Test; + +import static org.testng.Assert.assertEquals; +import static org.testng.Assert.fail; + +/** + * @test + * @bug 8302877 + * @summary Provides exhaustive verification of Character.toUpperCase and Character.toLowerCase + * for all code points in the latin1 range 0-255. + * @run testng Latin1CaseConversion + */ +public class Latin1CaseConversion { + + @Test + public void shouldUpperCaseAndLowerCaseLatin1() { + for (int c = 0; c < 256; c++) { + int upper = Character.toUpperCase(c); + int lower = Character.toLowerCase(c); + + if (c < 0x41) { // Before A + assertUnchanged(upper, lower, c); + } else if (c <= 0x5A) { // A-Z + assertEquals(upper, c); + assertEquals(lower, c + 32); + } else if (c < 0x61) { // Between Z and a + assertUnchanged(upper, lower, c); + } else if (c <= 0x7A) { // a-z + assertEquals(upper, c - 32); + assertEquals(lower, c); + } else if (c < 0xB5) { // Between z and Micro Sign + assertUnchanged(upper, lower, c); + } else if (c == 0xB5) { // Special case for Micro Sign + assertEquals(upper, 0x39C); + assertEquals(lower, c); + } else if (c < 0xC0) { // Between my and A-grave + assertUnchanged(upper, lower, c); + } else if (c < 0xD7) { // A-grave - O with Diaeresis + assertEquals(upper, c); + assertEquals(lower, c + 32); + } else if (c == 0xD7) { // Multiplication + assertUnchanged(upper, lower, c); + } else if (c <= 0xDE) { // O with slash - Thorn + assertEquals(upper, c); + assertEquals(lower, c + 32); + } else if (c == 0xDF) { // Sharp s + assertUnchanged(upper, lower, c); + } else if (c < 0xF7) { // a-grave - divsion + assertEquals(upper, c - 32); + assertEquals(lower, c); + } else if (c == 0xF7) { // Division + assertUnchanged(upper, lower, c); + } else if (c < 0xFF) { // o with slash - thorn + assertEquals(upper, c - 32); + assertEquals(lower, c); + } else if (c == 0XFF) { // Special case for y with Diaeresis + assertEquals(upper, 0x178); + assertEquals(lower, c); + } else { + fail("Uncovered code point: " + Integer.toHexString(c)); + } + } + } + + private static void assertUnchanged(int upper, int lower, int c) { + assertEquals(upper, c); + assertEquals(lower, c); + } +} diff --git a/test/jdk/sun/text/resources/LocaleData b/test/jdk/sun/text/resources/LocaleData index a708b47aa59..c4dfa184832 100644 --- a/test/jdk/sun/text/resources/LocaleData +++ b/test/jdk/sun/text/resources/LocaleData @@ -2230,7 +2230,7 @@ FormatData/ar_YE/NumberElements/8=\u2030 FormatData/ar_YE/NumberElements/9=\u221e FormatData/ar_YE/NumberElements/10=\ufffd -# bug #4113654 (this is obviously not an exchaustive test; I'm trying it here for a single +# bug #4113654 (this is obviously not an exhaustive test; I'm trying it here for a single # inheritance chain only. This bug fix also gets tested fairly well by the tests for all # the other bugs as given above) FormatData//NumberPatterns/0=#,##0.###;-#,##0.### diff --git a/test/jdk/sun/text/resources/LocaleData.cldr b/test/jdk/sun/text/resources/LocaleData.cldr index f467a0a5aa5..620c51f313d 100644 --- a/test/jdk/sun/text/resources/LocaleData.cldr +++ b/test/jdk/sun/text/resources/LocaleData.cldr @@ -2185,7 +2185,7 @@ FormatData/ar_YE/arab.NumberElements/8=\u0609 FormatData/ar_YE/arab.NumberElements/9=\u221e FormatData/ar_YE/arab.NumberElements/10=\u0644\u064a\u0633\u00a0\u0631\u0642\u0645 -# bug #4113654 (this is obviously not an exchaustive test; I'm trying it here for a single +# bug #4113654 (this is obviously not an exhaustive test; I'm trying it here for a single # inheritance chain only. This bug fix also gets tested fairly well by the tests for all # the other bugs as given above) FormatData//latn.NumberPatterns/0=#,##0.### diff --git a/test/micro/org/openjdk/bench/java/lang/Characters.java b/test/micro/org/openjdk/bench/java/lang/Characters.java index b94a57c69f8..b3159cf5574 100644 --- a/test/micro/org/openjdk/bench/java/lang/Characters.java +++ b/test/micro/org/openjdk/bench/java/lang/Characters.java @@ -1,5 +1,5 @@ /* - * Copyright (c) 2018, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2018, 2023, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -27,11 +27,13 @@ package org.openjdk.bench.java.lang; import org.openjdk.jmh.annotations.Benchmark; import org.openjdk.jmh.annotations.BenchmarkMode; import org.openjdk.jmh.annotations.Fork; +import org.openjdk.jmh.annotations.Level; import org.openjdk.jmh.annotations.Measurement; import org.openjdk.jmh.annotations.Mode; import org.openjdk.jmh.annotations.OutputTimeUnit; import org.openjdk.jmh.annotations.Param; import org.openjdk.jmh.annotations.Scope; +import org.openjdk.jmh.annotations.Setup; import org.openjdk.jmh.annotations.State; import org.openjdk.jmh.annotations.Warmup; @@ -80,4 +82,47 @@ public class Characters { } } + + @BenchmarkMode(Mode.AverageTime) + @OutputTimeUnit(TimeUnit.NANOSECONDS) + @State(Scope.Thread) + @Warmup(iterations = 5, time = 1) + @Measurement(iterations = 5, time = 1) + @Fork(3) + public static class CaseConversions { + @Param({ + "low", // 0x09 pre A + "A", // 0x41 uppercase A + "a", // 0x61 lowercase a + "A-grave", // 0xC0 uppercase A-grave + "a-grave", // 0xE0 lowercase a-grave + "micro", // 0xB5 lowercase 'Micro Sign' + "yD" // 0xFF lowercase 'y with Diaeresis' + }) + private String codePoint; + private int cp; + + @Setup(Level.Trial) + public void setup() { + cp = switch (codePoint) { + case "low" -> 0x09; + case "A" -> 0x41; + case "a" -> 0x61; + case "A-grave" -> 0xC0; + case "a-grave" -> 0xE0; + case "yD" -> 0xE0; + case "micro" -> 0xFF; + default -> Integer.parseInt(codePoint);; + }; + } + @Benchmark + public int toUpperCase() { + return Character.toUpperCase(cp); + } + + @Benchmark + public int toLowerCase() { + return Character.toLowerCase(cp); + } + } }