8302877: Speed up latin1 case conversions
Reviewed-by: naoto, redestad
This commit is contained in:
parent
1ea5f9f7cd
commit
ef1f7bd3b8
@ -1,5 +1,5 @@
|
|||||||
/*
|
/*
|
||||||
* Copyright (c) 2002, 2021, Oracle and/or its affiliates. All rights reserved.
|
* Copyright (c) 2002, 2023, Oracle and/or its affiliates. All rights reserved.
|
||||||
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
||||||
*
|
*
|
||||||
* This code is free software; you can redistribute it and/or modify it
|
* This code is free software; you can redistribute it and/or modify it
|
||||||
@ -135,30 +135,36 @@ class CharacterDataLatin1 extends CharacterData {
|
|||||||
}
|
}
|
||||||
|
|
||||||
int toLowerCase(int ch) {
|
int toLowerCase(int ch) {
|
||||||
int mapChar = ch;
|
if (ch < 'A') { // Fast path for low code points
|
||||||
int val = getProperties(ch);
|
return ch;
|
||||||
|
|
||||||
if (((val & $$maskLowerCase) != 0) &&
|
|
||||||
((val & $$maskCaseOffset) != $$maskCaseOffset)) {
|
|
||||||
int offset = val << $$shiftCaseOffsetSign >> ($$shiftCaseOffsetSign+$$shiftCaseOffset);
|
|
||||||
mapChar = ch + offset;
|
|
||||||
}
|
}
|
||||||
return mapChar;
|
int l = ch | 0x20; // Lowercase using 'oldest ASCII trick in the book'
|
||||||
|
if (l <= 'z' // In range a-z
|
||||||
|
|| (l >= 0xE0 && l <= 0xFE && l != 0xF7)) { // ..or agrave-thorn, excluding division
|
||||||
|
return l;
|
||||||
|
}
|
||||||
|
return ch;
|
||||||
}
|
}
|
||||||
|
|
||||||
int toUpperCase(int ch) {
|
int toUpperCase(int ch) {
|
||||||
int mapChar = ch;
|
if (ch < 'a') { // Fast path for low code points
|
||||||
int val = getProperties(ch);
|
return ch;
|
||||||
|
|
||||||
if ((val & $$maskUpperCase) != 0) {
|
|
||||||
if ((val & $$maskCaseOffset) != $$maskCaseOffset) {
|
|
||||||
int offset = val << $$shiftCaseOffsetSign >> ($$shiftCaseOffsetSign+$$shiftCaseOffset);
|
|
||||||
mapChar = ch - offset;
|
|
||||||
} else if (ch == 0x00B5) {
|
|
||||||
mapChar = 0x039C;
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
return mapChar;
|
int U = ch & 0xDF; // Uppercase using 'oldest ASCII trick in the book'
|
||||||
|
if (U <= 'Z' // In range A-Z
|
||||||
|
|| (U >= 0xC0 && U <= 0xDE && U != 0xD7)) { // ..or Agrave-Thorn, excluding multiplication
|
||||||
|
return U;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Special-case for 'y with Diaeresis' which uppercases out of latin1
|
||||||
|
if (ch == 0xFF) {
|
||||||
|
return 0x178; // Capital Letter Y with Diaeresis
|
||||||
|
}
|
||||||
|
// Special-case for 'Micro Sign' which uppercases out of latin1
|
||||||
|
if (ch == 0xB5) {
|
||||||
|
return 0x39C; // Greek Capital Letter Mu
|
||||||
|
}
|
||||||
|
return ch;
|
||||||
}
|
}
|
||||||
|
|
||||||
int toTitleCase(int ch) {
|
int toTitleCase(int ch) {
|
||||||
|
92
test/jdk/java/lang/Character/Latin1CaseConversion.java
Normal file
92
test/jdk/java/lang/Character/Latin1CaseConversion.java
Normal file
@ -0,0 +1,92 @@
|
|||||||
|
/*
|
||||||
|
* Copyright (c) 2023, Oracle and/or its affiliates. All rights reserved.
|
||||||
|
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
||||||
|
*
|
||||||
|
* This code is free software; you can redistribute it and/or modify it
|
||||||
|
* under the terms of the GNU General Public License version 2 only, as
|
||||||
|
* published by the Free Software Foundation.
|
||||||
|
*
|
||||||
|
* This code is distributed in the hope that it will be useful, but WITHOUT
|
||||||
|
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
||||||
|
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
|
||||||
|
* version 2 for more details (a copy is included in the LICENSE file that
|
||||||
|
* accompanied this code).
|
||||||
|
*
|
||||||
|
* You should have received a copy of the GNU General Public License version
|
||||||
|
* 2 along with this work; if not, write to the Free Software Foundation,
|
||||||
|
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||||
|
*
|
||||||
|
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
|
||||||
|
* or visit www.oracle.com if you need additional information or have any
|
||||||
|
* questions.
|
||||||
|
*/
|
||||||
|
|
||||||
|
import org.testng.annotations.Test;
|
||||||
|
|
||||||
|
import static org.testng.Assert.assertEquals;
|
||||||
|
import static org.testng.Assert.fail;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @test
|
||||||
|
* @bug 8302877
|
||||||
|
* @summary Provides exhaustive verification of Character.toUpperCase and Character.toLowerCase
|
||||||
|
* for all code points in the latin1 range 0-255.
|
||||||
|
* @run testng Latin1CaseConversion
|
||||||
|
*/
|
||||||
|
public class Latin1CaseConversion {
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void shouldUpperCaseAndLowerCaseLatin1() {
|
||||||
|
for (int c = 0; c < 256; c++) {
|
||||||
|
int upper = Character.toUpperCase(c);
|
||||||
|
int lower = Character.toLowerCase(c);
|
||||||
|
|
||||||
|
if (c < 0x41) { // Before A
|
||||||
|
assertUnchanged(upper, lower, c);
|
||||||
|
} else if (c <= 0x5A) { // A-Z
|
||||||
|
assertEquals(upper, c);
|
||||||
|
assertEquals(lower, c + 32);
|
||||||
|
} else if (c < 0x61) { // Between Z and a
|
||||||
|
assertUnchanged(upper, lower, c);
|
||||||
|
} else if (c <= 0x7A) { // a-z
|
||||||
|
assertEquals(upper, c - 32);
|
||||||
|
assertEquals(lower, c);
|
||||||
|
} else if (c < 0xB5) { // Between z and Micro Sign
|
||||||
|
assertUnchanged(upper, lower, c);
|
||||||
|
} else if (c == 0xB5) { // Special case for Micro Sign
|
||||||
|
assertEquals(upper, 0x39C);
|
||||||
|
assertEquals(lower, c);
|
||||||
|
} else if (c < 0xC0) { // Between my and A-grave
|
||||||
|
assertUnchanged(upper, lower, c);
|
||||||
|
} else if (c < 0xD7) { // A-grave - O with Diaeresis
|
||||||
|
assertEquals(upper, c);
|
||||||
|
assertEquals(lower, c + 32);
|
||||||
|
} else if (c == 0xD7) { // Multiplication
|
||||||
|
assertUnchanged(upper, lower, c);
|
||||||
|
} else if (c <= 0xDE) { // O with slash - Thorn
|
||||||
|
assertEquals(upper, c);
|
||||||
|
assertEquals(lower, c + 32);
|
||||||
|
} else if (c == 0xDF) { // Sharp s
|
||||||
|
assertUnchanged(upper, lower, c);
|
||||||
|
} else if (c < 0xF7) { // a-grave - divsion
|
||||||
|
assertEquals(upper, c - 32);
|
||||||
|
assertEquals(lower, c);
|
||||||
|
} else if (c == 0xF7) { // Division
|
||||||
|
assertUnchanged(upper, lower, c);
|
||||||
|
} else if (c < 0xFF) { // o with slash - thorn
|
||||||
|
assertEquals(upper, c - 32);
|
||||||
|
assertEquals(lower, c);
|
||||||
|
} else if (c == 0XFF) { // Special case for y with Diaeresis
|
||||||
|
assertEquals(upper, 0x178);
|
||||||
|
assertEquals(lower, c);
|
||||||
|
} else {
|
||||||
|
fail("Uncovered code point: " + Integer.toHexString(c));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private static void assertUnchanged(int upper, int lower, int c) {
|
||||||
|
assertEquals(upper, c);
|
||||||
|
assertEquals(lower, c);
|
||||||
|
}
|
||||||
|
}
|
@ -2230,7 +2230,7 @@ FormatData/ar_YE/NumberElements/8=\u2030
|
|||||||
FormatData/ar_YE/NumberElements/9=\u221e
|
FormatData/ar_YE/NumberElements/9=\u221e
|
||||||
FormatData/ar_YE/NumberElements/10=\ufffd
|
FormatData/ar_YE/NumberElements/10=\ufffd
|
||||||
|
|
||||||
# bug #4113654 (this is obviously not an exchaustive test; I'm trying it here for a single
|
# bug #4113654 (this is obviously not an exhaustive test; I'm trying it here for a single
|
||||||
# inheritance chain only. This bug fix also gets tested fairly well by the tests for all
|
# inheritance chain only. This bug fix also gets tested fairly well by the tests for all
|
||||||
# the other bugs as given above)
|
# the other bugs as given above)
|
||||||
FormatData//NumberPatterns/0=#,##0.###;-#,##0.###
|
FormatData//NumberPatterns/0=#,##0.###;-#,##0.###
|
||||||
|
@ -2185,7 +2185,7 @@ FormatData/ar_YE/arab.NumberElements/8=\u0609
|
|||||||
FormatData/ar_YE/arab.NumberElements/9=\u221e
|
FormatData/ar_YE/arab.NumberElements/9=\u221e
|
||||||
FormatData/ar_YE/arab.NumberElements/10=\u0644\u064a\u0633\u00a0\u0631\u0642\u0645
|
FormatData/ar_YE/arab.NumberElements/10=\u0644\u064a\u0633\u00a0\u0631\u0642\u0645
|
||||||
|
|
||||||
# bug #4113654 (this is obviously not an exchaustive test; I'm trying it here for a single
|
# bug #4113654 (this is obviously not an exhaustive test; I'm trying it here for a single
|
||||||
# inheritance chain only. This bug fix also gets tested fairly well by the tests for all
|
# inheritance chain only. This bug fix also gets tested fairly well by the tests for all
|
||||||
# the other bugs as given above)
|
# the other bugs as given above)
|
||||||
FormatData//latn.NumberPatterns/0=#,##0.###
|
FormatData//latn.NumberPatterns/0=#,##0.###
|
||||||
|
@ -1,5 +1,5 @@
|
|||||||
/*
|
/*
|
||||||
* Copyright (c) 2018, Oracle and/or its affiliates. All rights reserved.
|
* Copyright (c) 2018, 2023, Oracle and/or its affiliates. All rights reserved.
|
||||||
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
||||||
*
|
*
|
||||||
* This code is free software; you can redistribute it and/or modify it
|
* This code is free software; you can redistribute it and/or modify it
|
||||||
@ -27,11 +27,13 @@ package org.openjdk.bench.java.lang;
|
|||||||
import org.openjdk.jmh.annotations.Benchmark;
|
import org.openjdk.jmh.annotations.Benchmark;
|
||||||
import org.openjdk.jmh.annotations.BenchmarkMode;
|
import org.openjdk.jmh.annotations.BenchmarkMode;
|
||||||
import org.openjdk.jmh.annotations.Fork;
|
import org.openjdk.jmh.annotations.Fork;
|
||||||
|
import org.openjdk.jmh.annotations.Level;
|
||||||
import org.openjdk.jmh.annotations.Measurement;
|
import org.openjdk.jmh.annotations.Measurement;
|
||||||
import org.openjdk.jmh.annotations.Mode;
|
import org.openjdk.jmh.annotations.Mode;
|
||||||
import org.openjdk.jmh.annotations.OutputTimeUnit;
|
import org.openjdk.jmh.annotations.OutputTimeUnit;
|
||||||
import org.openjdk.jmh.annotations.Param;
|
import org.openjdk.jmh.annotations.Param;
|
||||||
import org.openjdk.jmh.annotations.Scope;
|
import org.openjdk.jmh.annotations.Scope;
|
||||||
|
import org.openjdk.jmh.annotations.Setup;
|
||||||
import org.openjdk.jmh.annotations.State;
|
import org.openjdk.jmh.annotations.State;
|
||||||
import org.openjdk.jmh.annotations.Warmup;
|
import org.openjdk.jmh.annotations.Warmup;
|
||||||
|
|
||||||
@ -80,4 +82,47 @@ public class Characters {
|
|||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@BenchmarkMode(Mode.AverageTime)
|
||||||
|
@OutputTimeUnit(TimeUnit.NANOSECONDS)
|
||||||
|
@State(Scope.Thread)
|
||||||
|
@Warmup(iterations = 5, time = 1)
|
||||||
|
@Measurement(iterations = 5, time = 1)
|
||||||
|
@Fork(3)
|
||||||
|
public static class CaseConversions {
|
||||||
|
@Param({
|
||||||
|
"low", // 0x09 pre A
|
||||||
|
"A", // 0x41 uppercase A
|
||||||
|
"a", // 0x61 lowercase a
|
||||||
|
"A-grave", // 0xC0 uppercase A-grave
|
||||||
|
"a-grave", // 0xE0 lowercase a-grave
|
||||||
|
"micro", // 0xB5 lowercase 'Micro Sign'
|
||||||
|
"yD" // 0xFF lowercase 'y with Diaeresis'
|
||||||
|
})
|
||||||
|
private String codePoint;
|
||||||
|
private int cp;
|
||||||
|
|
||||||
|
@Setup(Level.Trial)
|
||||||
|
public void setup() {
|
||||||
|
cp = switch (codePoint) {
|
||||||
|
case "low" -> 0x09;
|
||||||
|
case "A" -> 0x41;
|
||||||
|
case "a" -> 0x61;
|
||||||
|
case "A-grave" -> 0xC0;
|
||||||
|
case "a-grave" -> 0xE0;
|
||||||
|
case "yD" -> 0xE0;
|
||||||
|
case "micro" -> 0xFF;
|
||||||
|
default -> Integer.parseInt(codePoint);;
|
||||||
|
};
|
||||||
|
}
|
||||||
|
@Benchmark
|
||||||
|
public int toUpperCase() {
|
||||||
|
return Character.toUpperCase(cp);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Benchmark
|
||||||
|
public int toLowerCase() {
|
||||||
|
return Character.toLowerCase(cp);
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
Loading…
Reference in New Issue
Block a user