8032012: String.toLowerCase/toUpperCase performance improvement
Updated the implementation to improve the performance Reviewed-by: psandoz, forax
This commit is contained in:
parent
7be40556f6
commit
8cdace2575
@ -2549,87 +2549,88 @@ public final class String
|
||||
if (locale == null) {
|
||||
throw new NullPointerException();
|
||||
}
|
||||
|
||||
int firstUpper;
|
||||
int first;
|
||||
boolean hasSurr = false;
|
||||
final int len = value.length;
|
||||
|
||||
/* Now check if there are any characters that need to be changed. */
|
||||
scan: {
|
||||
for (firstUpper = 0 ; firstUpper < len; ) {
|
||||
char c = value[firstUpper];
|
||||
if ((c >= Character.MIN_HIGH_SURROGATE)
|
||||
&& (c <= Character.MAX_HIGH_SURROGATE)) {
|
||||
int supplChar = codePointAt(firstUpper);
|
||||
if (supplChar != Character.toLowerCase(supplChar)) {
|
||||
break scan;
|
||||
}
|
||||
firstUpper += Character.charCount(supplChar);
|
||||
} else {
|
||||
if (c != Character.toLowerCase(c)) {
|
||||
break scan;
|
||||
}
|
||||
firstUpper++;
|
||||
}
|
||||
// Now check if there are any characters that need to be changed, or are surrogate
|
||||
for (first = 0 ; first < len; first++) {
|
||||
int cp = (int)value[first];
|
||||
if (Character.isSurrogate((char)cp)) {
|
||||
hasSurr = true;
|
||||
break;
|
||||
}
|
||||
if (cp != Character.toLowerCase(cp)) { // no need to check Character.ERROR
|
||||
break;
|
||||
}
|
||||
return this;
|
||||
}
|
||||
|
||||
if (first == len)
|
||||
return this;
|
||||
char[] result = new char[len];
|
||||
int resultOffset = 0; /* result may grow, so i+resultOffset
|
||||
* is the write location in result */
|
||||
|
||||
/* Just copy the first few lowerCase characters. */
|
||||
System.arraycopy(value, 0, result, 0, firstUpper);
|
||||
|
||||
System.arraycopy(value, 0, result, 0, first); // Just copy the first few
|
||||
// lowerCase characters.
|
||||
String lang = locale.getLanguage();
|
||||
boolean localeDependent =
|
||||
(lang == "tr" || lang == "az" || lang == "lt");
|
||||
char[] lowerCharArray;
|
||||
int lowerChar;
|
||||
int srcChar;
|
||||
if (lang == "tr" || lang == "az" || lang == "lt") {
|
||||
return toLowerCaseEx(result, first, locale, true);
|
||||
}
|
||||
if (hasSurr) {
|
||||
return toLowerCaseEx(result, first, locale, false);
|
||||
}
|
||||
for (int i = first; i < len; i++) {
|
||||
int cp = (int)value[i];
|
||||
if (cp == '\u03A3') { // GREEK CAPITAL LETTER SIGMA
|
||||
return toLowerCaseEx(result, i, locale, false);
|
||||
}
|
||||
cp = Character.toLowerCase(cp);
|
||||
if (!Character.isBmpCodePoint(cp)) {
|
||||
return toLowerCaseEx(result, i, locale, false);
|
||||
}
|
||||
result[i] = (char)cp;
|
||||
}
|
||||
return new String(result, true);
|
||||
}
|
||||
|
||||
private String toLowerCaseEx(char[] result, int first, Locale locale, boolean localeDependent) {
|
||||
int resultOffset = first;
|
||||
int srcCount;
|
||||
for (int i = firstUpper; i < len; i += srcCount) {
|
||||
srcChar = (int)value[i];
|
||||
if ((char)srcChar >= Character.MIN_HIGH_SURROGATE
|
||||
&& (char)srcChar <= Character.MAX_HIGH_SURROGATE) {
|
||||
for (int i = first; i < value.length; i += srcCount) {
|
||||
int srcChar = (int)value[i];
|
||||
int lowerChar;
|
||||
char[] lowerCharArray;
|
||||
srcCount = 1;
|
||||
if (Character.isSurrogate((char)srcChar)) {
|
||||
srcChar = codePointAt(i);
|
||||
srcCount = Character.charCount(srcChar);
|
||||
} else {
|
||||
srcCount = 1;
|
||||
}
|
||||
if (localeDependent || srcChar == '\u03A3') { // GREEK CAPITAL LETTER SIGMA
|
||||
lowerChar = ConditionalSpecialCasing.toLowerCaseEx(this, i, locale);
|
||||
} else {
|
||||
lowerChar = Character.toLowerCase(srcChar);
|
||||
}
|
||||
if ((lowerChar == Character.ERROR)
|
||||
|| (lowerChar >= Character.MIN_SUPPLEMENTARY_CODE_POINT)) {
|
||||
if (Character.isBmpCodePoint(lowerChar)) { // Character.ERROR is not a bmp
|
||||
result[resultOffset++] = (char)lowerChar;
|
||||
} else {
|
||||
if (lowerChar == Character.ERROR) {
|
||||
lowerCharArray =
|
||||
ConditionalSpecialCasing.toLowerCaseCharArray(this, i, locale);
|
||||
lowerCharArray = ConditionalSpecialCasing.toLowerCaseCharArray(this, i, locale);
|
||||
} else if (srcCount == 2) {
|
||||
resultOffset += Character.toChars(lowerChar, result, i + resultOffset) - srcCount;
|
||||
resultOffset += Character.toChars(lowerChar, result, resultOffset);
|
||||
continue;
|
||||
} else {
|
||||
lowerCharArray = Character.toChars(lowerChar);
|
||||
}
|
||||
|
||||
/* Grow result if needed */
|
||||
int mapLen = lowerCharArray.length;
|
||||
if (mapLen > srcCount) {
|
||||
char[] result2 = new char[result.length + mapLen - srcCount];
|
||||
System.arraycopy(result, 0, result2, 0, i + resultOffset);
|
||||
System.arraycopy(result, 0, result2, 0, resultOffset);
|
||||
result = result2;
|
||||
}
|
||||
for (int x = 0; x < mapLen; ++x) {
|
||||
result[i + resultOffset + x] = lowerCharArray[x];
|
||||
result[resultOffset++] = lowerCharArray[x];
|
||||
}
|
||||
resultOffset += (mapLen - srcCount);
|
||||
} else {
|
||||
result[i + resultOffset] = (char)lowerChar;
|
||||
}
|
||||
}
|
||||
return new String(result, 0, len + resultOffset);
|
||||
return new String(result, 0, resultOffset);
|
||||
}
|
||||
|
||||
/**
|
||||
@ -2707,92 +2708,91 @@ public final class String
|
||||
if (locale == null) {
|
||||
throw new NullPointerException();
|
||||
}
|
||||
|
||||
int firstLower;
|
||||
int first;
|
||||
boolean hasSurr = false;
|
||||
final int len = value.length;
|
||||
|
||||
/* Now check if there are any characters that need to be changed. */
|
||||
scan: {
|
||||
for (firstLower = 0 ; firstLower < len; ) {
|
||||
int c = (int)value[firstLower];
|
||||
int srcCount;
|
||||
if ((c >= Character.MIN_HIGH_SURROGATE)
|
||||
&& (c <= Character.MAX_HIGH_SURROGATE)) {
|
||||
c = codePointAt(firstLower);
|
||||
srcCount = Character.charCount(c);
|
||||
} else {
|
||||
srcCount = 1;
|
||||
}
|
||||
int upperCaseChar = Character.toUpperCaseEx(c);
|
||||
if ((upperCaseChar == Character.ERROR)
|
||||
|| (c != upperCaseChar)) {
|
||||
break scan;
|
||||
}
|
||||
firstLower += srcCount;
|
||||
// Now check if there are any characters that need to be changed, or are surrogate
|
||||
for (first = 0 ; first < len; first++ ) {
|
||||
int cp = (int)value[first];
|
||||
if (Character.isSurrogate((char)cp)) {
|
||||
hasSurr = true;
|
||||
break;
|
||||
}
|
||||
if (cp != Character.toUpperCaseEx(cp)) { // no need to check Character.ERROR
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (first == len) {
|
||||
return this;
|
||||
}
|
||||
|
||||
/* result may grow, so i+resultOffset is the write location in result */
|
||||
int resultOffset = 0;
|
||||
char[] result = new char[len]; /* may grow */
|
||||
|
||||
/* Just copy the first few upperCase characters. */
|
||||
System.arraycopy(value, 0, result, 0, firstLower);
|
||||
|
||||
char[] result = new char[len];
|
||||
System.arraycopy(value, 0, result, 0, first); // Just copy the first few
|
||||
// upperCase characters.
|
||||
String lang = locale.getLanguage();
|
||||
boolean localeDependent =
|
||||
(lang == "tr" || lang == "az" || lang == "lt");
|
||||
char[] upperCharArray;
|
||||
int upperChar;
|
||||
int srcChar;
|
||||
if (lang == "tr" || lang == "az" || lang == "lt") {
|
||||
return toUpperCaseEx(result, first, locale, true);
|
||||
}
|
||||
if (hasSurr) {
|
||||
return toUpperCaseEx(result, first, locale, false);
|
||||
}
|
||||
for (int i = first; i < len; i++) {
|
||||
int cp = Character.toUpperCaseEx((int)value[i]);
|
||||
if (!Character.isBmpCodePoint(cp)) { // Character.ERROR is not bmp
|
||||
return toUpperCaseEx(result, i, locale, false);
|
||||
}
|
||||
result[i] = (char)cp;
|
||||
}
|
||||
return new String(result, true);
|
||||
}
|
||||
|
||||
private String toUpperCaseEx(char[] result, int first, Locale locale,
|
||||
boolean localeDependent) {
|
||||
int resultOffset = first;
|
||||
int srcCount;
|
||||
for (int i = firstLower; i < len; i += srcCount) {
|
||||
srcChar = (int)value[i];
|
||||
if ((char)srcChar >= Character.MIN_HIGH_SURROGATE &&
|
||||
(char)srcChar <= Character.MAX_HIGH_SURROGATE) {
|
||||
for (int i = first; i < value.length; i += srcCount) {
|
||||
int srcChar = (int)value[i];
|
||||
int upperChar;
|
||||
char[] upperCharArray;
|
||||
srcCount = 1;
|
||||
if (Character.isSurrogate((char)srcChar)) {
|
||||
srcChar = codePointAt(i);
|
||||
srcCount = Character.charCount(srcChar);
|
||||
} else {
|
||||
srcCount = 1;
|
||||
}
|
||||
if (localeDependent) {
|
||||
upperChar = ConditionalSpecialCasing.toUpperCaseEx(this, i, locale);
|
||||
} else {
|
||||
upperChar = Character.toUpperCaseEx(srcChar);
|
||||
}
|
||||
if ((upperChar == Character.ERROR)
|
||||
|| (upperChar >= Character.MIN_SUPPLEMENTARY_CODE_POINT)) {
|
||||
if (Character.isBmpCodePoint(upperChar)) {
|
||||
result[resultOffset++] = (char)upperChar;
|
||||
} else {
|
||||
if (upperChar == Character.ERROR) {
|
||||
if (localeDependent) {
|
||||
upperCharArray =
|
||||
ConditionalSpecialCasing.toUpperCaseCharArray(this, i, locale);
|
||||
ConditionalSpecialCasing.toUpperCaseCharArray(this, i, locale);
|
||||
} else {
|
||||
upperCharArray = Character.toUpperCaseCharArray(srcChar);
|
||||
}
|
||||
} else if (srcCount == 2) {
|
||||
resultOffset += Character.toChars(upperChar, result, i + resultOffset) - srcCount;
|
||||
resultOffset += Character.toChars(upperChar, result, resultOffset);
|
||||
continue;
|
||||
} else {
|
||||
upperCharArray = Character.toChars(upperChar);
|
||||
}
|
||||
|
||||
/* Grow result if needed */
|
||||
int mapLen = upperCharArray.length;
|
||||
if (mapLen > srcCount) {
|
||||
char[] result2 = new char[result.length + mapLen - srcCount];
|
||||
System.arraycopy(result, 0, result2, 0, i + resultOffset);
|
||||
System.arraycopy(result, 0, result2, 0, resultOffset);
|
||||
result = result2;
|
||||
}
|
||||
for (int x = 0; x < mapLen; ++x) {
|
||||
result[i + resultOffset + x] = upperCharArray[x];
|
||||
}
|
||||
resultOffset += (mapLen - srcCount);
|
||||
} else {
|
||||
result[i + resultOffset] = (char)upperChar;
|
||||
}
|
||||
for (int x = 0; x < mapLen; ++x) {
|
||||
result[resultOffset++] = upperCharArray[x];
|
||||
}
|
||||
}
|
||||
}
|
||||
return new String(result, 0, len + resultOffset);
|
||||
return new String(result, 0, resultOffset);
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -23,7 +23,7 @@
|
||||
|
||||
/*
|
||||
@test
|
||||
@bug 4217441 4533872 4900935 8020037
|
||||
@bug 4217441 4533872 4900935 8020037 8032012
|
||||
@summary toLowerCase should lower-case Greek Sigma correctly depending
|
||||
on the context (final/non-final). Also it should handle
|
||||
Locale specific (lt, tr, and az) lowercasings and supplementary
|
||||
@ -104,6 +104,22 @@ public class ToLowerCase {
|
||||
// invalid code point tests:
|
||||
test("\uD800\uD800\uD801A\uDC00\uDC00\uDC00B", Locale.US, "\uD800\uD800\uD801a\uDC00\uDC00\uDC00b");
|
||||
|
||||
// test bmp + supp1
|
||||
StringBuilder src = new StringBuilder(0x20000);
|
||||
StringBuilder exp = new StringBuilder(0x20000);
|
||||
for (int cp = 0; cp < 0x20000; cp++) {
|
||||
if (cp >= Character.MIN_HIGH_SURROGATE && cp <= Character.MAX_HIGH_SURROGATE) {
|
||||
continue;
|
||||
}
|
||||
int lowerCase = Character.toLowerCase(cp);
|
||||
if (lowerCase == -1) { //Character.ERROR
|
||||
continue;
|
||||
}
|
||||
src.appendCodePoint(cp);
|
||||
exp.appendCodePoint(lowerCase);
|
||||
}
|
||||
test(src.toString(), Locale.US, exp.toString());
|
||||
|
||||
}
|
||||
|
||||
static void test(String in, Locale locale, String expected) {
|
||||
|
Loading…
x
Reference in New Issue
Block a user