8032012: String.toLowerCase/toUpperCase performance improvement

Updated the implementation to improve the performance

Reviewed-by: psandoz, forax
This commit is contained in:
Xueming Shen 2014-02-07 09:04:17 -08:00
parent 7be40556f6
commit 8cdace2575
2 changed files with 120 additions and 104 deletions

View File

@ -2549,87 +2549,88 @@ public final class String
if (locale == null) {
throw new NullPointerException();
}
int firstUpper;
int first;
boolean hasSurr = false;
final int len = value.length;
/* Now check if there are any characters that need to be changed. */
scan: {
for (firstUpper = 0 ; firstUpper < len; ) {
char c = value[firstUpper];
if ((c >= Character.MIN_HIGH_SURROGATE)
&& (c <= Character.MAX_HIGH_SURROGATE)) {
int supplChar = codePointAt(firstUpper);
if (supplChar != Character.toLowerCase(supplChar)) {
break scan;
}
firstUpper += Character.charCount(supplChar);
} else {
if (c != Character.toLowerCase(c)) {
break scan;
}
firstUpper++;
}
// Now check if there are any characters that need to be changed, or are surrogate
for (first = 0 ; first < len; first++) {
int cp = (int)value[first];
if (Character.isSurrogate((char)cp)) {
hasSurr = true;
break;
}
if (cp != Character.toLowerCase(cp)) { // no need to check Character.ERROR
break;
}
return this;
}
if (first == len)
return this;
char[] result = new char[len];
int resultOffset = 0; /* result may grow, so i+resultOffset
* is the write location in result */
/* Just copy the first few lowerCase characters. */
System.arraycopy(value, 0, result, 0, firstUpper);
System.arraycopy(value, 0, result, 0, first); // Just copy the first few
// lowerCase characters.
String lang = locale.getLanguage();
boolean localeDependent =
(lang == "tr" || lang == "az" || lang == "lt");
char[] lowerCharArray;
int lowerChar;
int srcChar;
if (lang == "tr" || lang == "az" || lang == "lt") {
return toLowerCaseEx(result, first, locale, true);
}
if (hasSurr) {
return toLowerCaseEx(result, first, locale, false);
}
for (int i = first; i < len; i++) {
int cp = (int)value[i];
if (cp == '\u03A3') { // GREEK CAPITAL LETTER SIGMA
return toLowerCaseEx(result, i, locale, false);
}
cp = Character.toLowerCase(cp);
if (!Character.isBmpCodePoint(cp)) {
return toLowerCaseEx(result, i, locale, false);
}
result[i] = (char)cp;
}
return new String(result, true);
}
private String toLowerCaseEx(char[] result, int first, Locale locale, boolean localeDependent) {
int resultOffset = first;
int srcCount;
for (int i = firstUpper; i < len; i += srcCount) {
srcChar = (int)value[i];
if ((char)srcChar >= Character.MIN_HIGH_SURROGATE
&& (char)srcChar <= Character.MAX_HIGH_SURROGATE) {
for (int i = first; i < value.length; i += srcCount) {
int srcChar = (int)value[i];
int lowerChar;
char[] lowerCharArray;
srcCount = 1;
if (Character.isSurrogate((char)srcChar)) {
srcChar = codePointAt(i);
srcCount = Character.charCount(srcChar);
} else {
srcCount = 1;
}
if (localeDependent || srcChar == '\u03A3') { // GREEK CAPITAL LETTER SIGMA
lowerChar = ConditionalSpecialCasing.toLowerCaseEx(this, i, locale);
} else {
lowerChar = Character.toLowerCase(srcChar);
}
if ((lowerChar == Character.ERROR)
|| (lowerChar >= Character.MIN_SUPPLEMENTARY_CODE_POINT)) {
if (Character.isBmpCodePoint(lowerChar)) { // Character.ERROR is not a bmp
result[resultOffset++] = (char)lowerChar;
} else {
if (lowerChar == Character.ERROR) {
lowerCharArray =
ConditionalSpecialCasing.toLowerCaseCharArray(this, i, locale);
lowerCharArray = ConditionalSpecialCasing.toLowerCaseCharArray(this, i, locale);
} else if (srcCount == 2) {
resultOffset += Character.toChars(lowerChar, result, i + resultOffset) - srcCount;
resultOffset += Character.toChars(lowerChar, result, resultOffset);
continue;
} else {
lowerCharArray = Character.toChars(lowerChar);
}
/* Grow result if needed */
int mapLen = lowerCharArray.length;
if (mapLen > srcCount) {
char[] result2 = new char[result.length + mapLen - srcCount];
System.arraycopy(result, 0, result2, 0, i + resultOffset);
System.arraycopy(result, 0, result2, 0, resultOffset);
result = result2;
}
for (int x = 0; x < mapLen; ++x) {
result[i + resultOffset + x] = lowerCharArray[x];
result[resultOffset++] = lowerCharArray[x];
}
resultOffset += (mapLen - srcCount);
} else {
result[i + resultOffset] = (char)lowerChar;
}
}
return new String(result, 0, len + resultOffset);
return new String(result, 0, resultOffset);
}
/**
@ -2707,92 +2708,91 @@ public final class String
if (locale == null) {
throw new NullPointerException();
}
int firstLower;
int first;
boolean hasSurr = false;
final int len = value.length;
/* Now check if there are any characters that need to be changed. */
scan: {
for (firstLower = 0 ; firstLower < len; ) {
int c = (int)value[firstLower];
int srcCount;
if ((c >= Character.MIN_HIGH_SURROGATE)
&& (c <= Character.MAX_HIGH_SURROGATE)) {
c = codePointAt(firstLower);
srcCount = Character.charCount(c);
} else {
srcCount = 1;
}
int upperCaseChar = Character.toUpperCaseEx(c);
if ((upperCaseChar == Character.ERROR)
|| (c != upperCaseChar)) {
break scan;
}
firstLower += srcCount;
// Now check if there are any characters that need to be changed, or are surrogate
for (first = 0 ; first < len; first++ ) {
int cp = (int)value[first];
if (Character.isSurrogate((char)cp)) {
hasSurr = true;
break;
}
if (cp != Character.toUpperCaseEx(cp)) { // no need to check Character.ERROR
break;
}
}
if (first == len) {
return this;
}
/* result may grow, so i+resultOffset is the write location in result */
int resultOffset = 0;
char[] result = new char[len]; /* may grow */
/* Just copy the first few upperCase characters. */
System.arraycopy(value, 0, result, 0, firstLower);
char[] result = new char[len];
System.arraycopy(value, 0, result, 0, first); // Just copy the first few
// upperCase characters.
String lang = locale.getLanguage();
boolean localeDependent =
(lang == "tr" || lang == "az" || lang == "lt");
char[] upperCharArray;
int upperChar;
int srcChar;
if (lang == "tr" || lang == "az" || lang == "lt") {
return toUpperCaseEx(result, first, locale, true);
}
if (hasSurr) {
return toUpperCaseEx(result, first, locale, false);
}
for (int i = first; i < len; i++) {
int cp = Character.toUpperCaseEx((int)value[i]);
if (!Character.isBmpCodePoint(cp)) { // Character.ERROR is not bmp
return toUpperCaseEx(result, i, locale, false);
}
result[i] = (char)cp;
}
return new String(result, true);
}
private String toUpperCaseEx(char[] result, int first, Locale locale,
boolean localeDependent) {
int resultOffset = first;
int srcCount;
for (int i = firstLower; i < len; i += srcCount) {
srcChar = (int)value[i];
if ((char)srcChar >= Character.MIN_HIGH_SURROGATE &&
(char)srcChar <= Character.MAX_HIGH_SURROGATE) {
for (int i = first; i < value.length; i += srcCount) {
int srcChar = (int)value[i];
int upperChar;
char[] upperCharArray;
srcCount = 1;
if (Character.isSurrogate((char)srcChar)) {
srcChar = codePointAt(i);
srcCount = Character.charCount(srcChar);
} else {
srcCount = 1;
}
if (localeDependent) {
upperChar = ConditionalSpecialCasing.toUpperCaseEx(this, i, locale);
} else {
upperChar = Character.toUpperCaseEx(srcChar);
}
if ((upperChar == Character.ERROR)
|| (upperChar >= Character.MIN_SUPPLEMENTARY_CODE_POINT)) {
if (Character.isBmpCodePoint(upperChar)) {
result[resultOffset++] = (char)upperChar;
} else {
if (upperChar == Character.ERROR) {
if (localeDependent) {
upperCharArray =
ConditionalSpecialCasing.toUpperCaseCharArray(this, i, locale);
ConditionalSpecialCasing.toUpperCaseCharArray(this, i, locale);
} else {
upperCharArray = Character.toUpperCaseCharArray(srcChar);
}
} else if (srcCount == 2) {
resultOffset += Character.toChars(upperChar, result, i + resultOffset) - srcCount;
resultOffset += Character.toChars(upperChar, result, resultOffset);
continue;
} else {
upperCharArray = Character.toChars(upperChar);
}
/* Grow result if needed */
int mapLen = upperCharArray.length;
if (mapLen > srcCount) {
char[] result2 = new char[result.length + mapLen - srcCount];
System.arraycopy(result, 0, result2, 0, i + resultOffset);
System.arraycopy(result, 0, result2, 0, resultOffset);
result = result2;
}
for (int x = 0; x < mapLen; ++x) {
result[i + resultOffset + x] = upperCharArray[x];
}
resultOffset += (mapLen - srcCount);
} else {
result[i + resultOffset] = (char)upperChar;
}
for (int x = 0; x < mapLen; ++x) {
result[resultOffset++] = upperCharArray[x];
}
}
}
return new String(result, 0, len + resultOffset);
return new String(result, 0, resultOffset);
}
/**

View File

@ -23,7 +23,7 @@
/*
@test
@bug 4217441 4533872 4900935 8020037
@bug 4217441 4533872 4900935 8020037 8032012
@summary toLowerCase should lower-case Greek Sigma correctly depending
on the context (final/non-final). Also it should handle
Locale specific (lt, tr, and az) lowercasings and supplementary
@ -104,6 +104,22 @@ public class ToLowerCase {
// invalid code point tests:
test("\uD800\uD800\uD801A\uDC00\uDC00\uDC00B", Locale.US, "\uD800\uD800\uD801a\uDC00\uDC00\uDC00b");
// test bmp + supp1
StringBuilder src = new StringBuilder(0x20000);
StringBuilder exp = new StringBuilder(0x20000);
for (int cp = 0; cp < 0x20000; cp++) {
if (cp >= Character.MIN_HIGH_SURROGATE && cp <= Character.MAX_HIGH_SURROGATE) {
continue;
}
int lowerCase = Character.toLowerCase(cp);
if (lowerCase == -1) { //Character.ERROR
continue;
}
src.appendCodePoint(cp);
exp.appendCodePoint(lowerCase);
}
test(src.toString(), Locale.US, exp.toString());
}
static void test(String in, Locale locale, String expected) {