8302877: Speed up latin1 case conversions
Reviewed-by: naoto, redestad
This commit is contained in:
parent
1ea5f9f7cd
commit
ef1f7bd3b8
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) 2002, 2021, Oracle and/or its affiliates. All rights reserved.
|
||||
* Copyright (c) 2002, 2023, Oracle and/or its affiliates. All rights reserved.
|
||||
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
||||
*
|
||||
* This code is free software; you can redistribute it and/or modify it
|
||||
@ -135,30 +135,36 @@ class CharacterDataLatin1 extends CharacterData {
|
||||
}
|
||||
|
||||
int toLowerCase(int ch) {
|
||||
int mapChar = ch;
|
||||
int val = getProperties(ch);
|
||||
|
||||
if (((val & $$maskLowerCase) != 0) &&
|
||||
((val & $$maskCaseOffset) != $$maskCaseOffset)) {
|
||||
int offset = val << $$shiftCaseOffsetSign >> ($$shiftCaseOffsetSign+$$shiftCaseOffset);
|
||||
mapChar = ch + offset;
|
||||
if (ch < 'A') { // Fast path for low code points
|
||||
return ch;
|
||||
}
|
||||
return mapChar;
|
||||
int l = ch | 0x20; // Lowercase using 'oldest ASCII trick in the book'
|
||||
if (l <= 'z' // In range a-z
|
||||
|| (l >= 0xE0 && l <= 0xFE && l != 0xF7)) { // ..or agrave-thorn, excluding division
|
||||
return l;
|
||||
}
|
||||
return ch;
|
||||
}
|
||||
|
||||
int toUpperCase(int ch) {
|
||||
int mapChar = ch;
|
||||
int val = getProperties(ch);
|
||||
|
||||
if ((val & $$maskUpperCase) != 0) {
|
||||
if ((val & $$maskCaseOffset) != $$maskCaseOffset) {
|
||||
int offset = val << $$shiftCaseOffsetSign >> ($$shiftCaseOffsetSign+$$shiftCaseOffset);
|
||||
mapChar = ch - offset;
|
||||
} else if (ch == 0x00B5) {
|
||||
mapChar = 0x039C;
|
||||
}
|
||||
if (ch < 'a') { // Fast path for low code points
|
||||
return ch;
|
||||
}
|
||||
return mapChar;
|
||||
int U = ch & 0xDF; // Uppercase using 'oldest ASCII trick in the book'
|
||||
if (U <= 'Z' // In range A-Z
|
||||
|| (U >= 0xC0 && U <= 0xDE && U != 0xD7)) { // ..or Agrave-Thorn, excluding multiplication
|
||||
return U;
|
||||
}
|
||||
|
||||
// Special-case for 'y with Diaeresis' which uppercases out of latin1
|
||||
if (ch == 0xFF) {
|
||||
return 0x178; // Capital Letter Y with Diaeresis
|
||||
}
|
||||
// Special-case for 'Micro Sign' which uppercases out of latin1
|
||||
if (ch == 0xB5) {
|
||||
return 0x39C; // Greek Capital Letter Mu
|
||||
}
|
||||
return ch;
|
||||
}
|
||||
|
||||
int toTitleCase(int ch) {
|
||||
|
92
test/jdk/java/lang/Character/Latin1CaseConversion.java
Normal file
92
test/jdk/java/lang/Character/Latin1CaseConversion.java
Normal file
@ -0,0 +1,92 @@
|
||||
/*
|
||||
* Copyright (c) 2023, Oracle and/or its affiliates. All rights reserved.
|
||||
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
||||
*
|
||||
* This code is free software; you can redistribute it and/or modify it
|
||||
* under the terms of the GNU General Public License version 2 only, as
|
||||
* published by the Free Software Foundation.
|
||||
*
|
||||
* This code is distributed in the hope that it will be useful, but WITHOUT
|
||||
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
||||
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
|
||||
* version 2 for more details (a copy is included in the LICENSE file that
|
||||
* accompanied this code).
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License version
|
||||
* 2 along with this work; if not, write to the Free Software Foundation,
|
||||
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
*
|
||||
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
|
||||
* or visit www.oracle.com if you need additional information or have any
|
||||
* questions.
|
||||
*/
|
||||
|
||||
import org.testng.annotations.Test;
|
||||
|
||||
import static org.testng.Assert.assertEquals;
|
||||
import static org.testng.Assert.fail;
|
||||
|
||||
/**
|
||||
* @test
|
||||
* @bug 8302877
|
||||
* @summary Provides exhaustive verification of Character.toUpperCase and Character.toLowerCase
|
||||
* for all code points in the latin1 range 0-255.
|
||||
* @run testng Latin1CaseConversion
|
||||
*/
|
||||
public class Latin1CaseConversion {
|
||||
|
||||
@Test
|
||||
public void shouldUpperCaseAndLowerCaseLatin1() {
|
||||
for (int c = 0; c < 256; c++) {
|
||||
int upper = Character.toUpperCase(c);
|
||||
int lower = Character.toLowerCase(c);
|
||||
|
||||
if (c < 0x41) { // Before A
|
||||
assertUnchanged(upper, lower, c);
|
||||
} else if (c <= 0x5A) { // A-Z
|
||||
assertEquals(upper, c);
|
||||
assertEquals(lower, c + 32);
|
||||
} else if (c < 0x61) { // Between Z and a
|
||||
assertUnchanged(upper, lower, c);
|
||||
} else if (c <= 0x7A) { // a-z
|
||||
assertEquals(upper, c - 32);
|
||||
assertEquals(lower, c);
|
||||
} else if (c < 0xB5) { // Between z and Micro Sign
|
||||
assertUnchanged(upper, lower, c);
|
||||
} else if (c == 0xB5) { // Special case for Micro Sign
|
||||
assertEquals(upper, 0x39C);
|
||||
assertEquals(lower, c);
|
||||
} else if (c < 0xC0) { // Between my and A-grave
|
||||
assertUnchanged(upper, lower, c);
|
||||
} else if (c < 0xD7) { // A-grave - O with Diaeresis
|
||||
assertEquals(upper, c);
|
||||
assertEquals(lower, c + 32);
|
||||
} else if (c == 0xD7) { // Multiplication
|
||||
assertUnchanged(upper, lower, c);
|
||||
} else if (c <= 0xDE) { // O with slash - Thorn
|
||||
assertEquals(upper, c);
|
||||
assertEquals(lower, c + 32);
|
||||
} else if (c == 0xDF) { // Sharp s
|
||||
assertUnchanged(upper, lower, c);
|
||||
} else if (c < 0xF7) { // a-grave - divsion
|
||||
assertEquals(upper, c - 32);
|
||||
assertEquals(lower, c);
|
||||
} else if (c == 0xF7) { // Division
|
||||
assertUnchanged(upper, lower, c);
|
||||
} else if (c < 0xFF) { // o with slash - thorn
|
||||
assertEquals(upper, c - 32);
|
||||
assertEquals(lower, c);
|
||||
} else if (c == 0XFF) { // Special case for y with Diaeresis
|
||||
assertEquals(upper, 0x178);
|
||||
assertEquals(lower, c);
|
||||
} else {
|
||||
fail("Uncovered code point: " + Integer.toHexString(c));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private static void assertUnchanged(int upper, int lower, int c) {
|
||||
assertEquals(upper, c);
|
||||
assertEquals(lower, c);
|
||||
}
|
||||
}
|
@ -2230,7 +2230,7 @@ FormatData/ar_YE/NumberElements/8=\u2030
|
||||
FormatData/ar_YE/NumberElements/9=\u221e
|
||||
FormatData/ar_YE/NumberElements/10=\ufffd
|
||||
|
||||
# bug #4113654 (this is obviously not an exchaustive test; I'm trying it here for a single
|
||||
# bug #4113654 (this is obviously not an exhaustive test; I'm trying it here for a single
|
||||
# inheritance chain only. This bug fix also gets tested fairly well by the tests for all
|
||||
# the other bugs as given above)
|
||||
FormatData//NumberPatterns/0=#,##0.###;-#,##0.###
|
||||
|
@ -2185,7 +2185,7 @@ FormatData/ar_YE/arab.NumberElements/8=\u0609
|
||||
FormatData/ar_YE/arab.NumberElements/9=\u221e
|
||||
FormatData/ar_YE/arab.NumberElements/10=\u0644\u064a\u0633\u00a0\u0631\u0642\u0645
|
||||
|
||||
# bug #4113654 (this is obviously not an exchaustive test; I'm trying it here for a single
|
||||
# bug #4113654 (this is obviously not an exhaustive test; I'm trying it here for a single
|
||||
# inheritance chain only. This bug fix also gets tested fairly well by the tests for all
|
||||
# the other bugs as given above)
|
||||
FormatData//latn.NumberPatterns/0=#,##0.###
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) 2018, Oracle and/or its affiliates. All rights reserved.
|
||||
* Copyright (c) 2018, 2023, Oracle and/or its affiliates. All rights reserved.
|
||||
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
||||
*
|
||||
* This code is free software; you can redistribute it and/or modify it
|
||||
@ -27,11 +27,13 @@ package org.openjdk.bench.java.lang;
|
||||
import org.openjdk.jmh.annotations.Benchmark;
|
||||
import org.openjdk.jmh.annotations.BenchmarkMode;
|
||||
import org.openjdk.jmh.annotations.Fork;
|
||||
import org.openjdk.jmh.annotations.Level;
|
||||
import org.openjdk.jmh.annotations.Measurement;
|
||||
import org.openjdk.jmh.annotations.Mode;
|
||||
import org.openjdk.jmh.annotations.OutputTimeUnit;
|
||||
import org.openjdk.jmh.annotations.Param;
|
||||
import org.openjdk.jmh.annotations.Scope;
|
||||
import org.openjdk.jmh.annotations.Setup;
|
||||
import org.openjdk.jmh.annotations.State;
|
||||
import org.openjdk.jmh.annotations.Warmup;
|
||||
|
||||
@ -80,4 +82,47 @@ public class Characters {
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
@BenchmarkMode(Mode.AverageTime)
|
||||
@OutputTimeUnit(TimeUnit.NANOSECONDS)
|
||||
@State(Scope.Thread)
|
||||
@Warmup(iterations = 5, time = 1)
|
||||
@Measurement(iterations = 5, time = 1)
|
||||
@Fork(3)
|
||||
public static class CaseConversions {
|
||||
@Param({
|
||||
"low", // 0x09 pre A
|
||||
"A", // 0x41 uppercase A
|
||||
"a", // 0x61 lowercase a
|
||||
"A-grave", // 0xC0 uppercase A-grave
|
||||
"a-grave", // 0xE0 lowercase a-grave
|
||||
"micro", // 0xB5 lowercase 'Micro Sign'
|
||||
"yD" // 0xFF lowercase 'y with Diaeresis'
|
||||
})
|
||||
private String codePoint;
|
||||
private int cp;
|
||||
|
||||
@Setup(Level.Trial)
|
||||
public void setup() {
|
||||
cp = switch (codePoint) {
|
||||
case "low" -> 0x09;
|
||||
case "A" -> 0x41;
|
||||
case "a" -> 0x61;
|
||||
case "A-grave" -> 0xC0;
|
||||
case "a-grave" -> 0xE0;
|
||||
case "yD" -> 0xE0;
|
||||
case "micro" -> 0xFF;
|
||||
default -> Integer.parseInt(codePoint);;
|
||||
};
|
||||
}
|
||||
@Benchmark
|
||||
public int toUpperCase() {
|
||||
return Character.toUpperCase(cp);
|
||||
}
|
||||
|
||||
@Benchmark
|
||||
public int toLowerCase() {
|
||||
return Character.toLowerCase(cp);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user