8041791: String.toLowerCase regression - violates Unicode standard
Reviewed-by: peytoia
This commit is contained in:
parent
3b959b9adf
commit
d0dadf23c5
@ -62,6 +62,7 @@ final class ConditionalSpecialCasing {
|
|||||||
//# Conditional mappings
|
//# Conditional mappings
|
||||||
//# ================================================================================
|
//# ================================================================================
|
||||||
new Entry(0x03A3, new char[]{0x03C2}, new char[]{0x03A3}, null, FINAL_CASED), // # GREEK CAPITAL LETTER SIGMA
|
new Entry(0x03A3, new char[]{0x03C2}, new char[]{0x03A3}, null, FINAL_CASED), // # GREEK CAPITAL LETTER SIGMA
|
||||||
|
new Entry(0x0130, new char[]{0x0069, 0x0307}, new char[]{0x0130}, null, 0), // # LATIN CAPITAL LETTER I WITH DOT ABOVE
|
||||||
|
|
||||||
//# ================================================================================
|
//# ================================================================================
|
||||||
//# Locale-sensitive mappings
|
//# Locale-sensitive mappings
|
||||||
@ -77,8 +78,8 @@ final class ConditionalSpecialCasing {
|
|||||||
|
|
||||||
//# ================================================================================
|
//# ================================================================================
|
||||||
//# Turkish and Azeri
|
//# Turkish and Azeri
|
||||||
// new Entry(0x0130, new char[]{0x0069}, new char[]{0x0130}, "tr", 0), // # LATIN CAPITAL LETTER I WITH DOT ABOVE
|
new Entry(0x0130, new char[]{0x0069}, new char[]{0x0130}, "tr", 0), // # LATIN CAPITAL LETTER I WITH DOT ABOVE
|
||||||
// new Entry(0x0130, new char[]{0x0069}, new char[]{0x0130}, "az", 0), // # LATIN CAPITAL LETTER I WITH DOT ABOVE
|
new Entry(0x0130, new char[]{0x0069}, new char[]{0x0130}, "az", 0), // # LATIN CAPITAL LETTER I WITH DOT ABOVE
|
||||||
new Entry(0x0307, new char[]{}, new char[]{0x0307}, "tr", AFTER_I), // # COMBINING DOT ABOVE
|
new Entry(0x0307, new char[]{}, new char[]{0x0307}, "tr", AFTER_I), // # COMBINING DOT ABOVE
|
||||||
new Entry(0x0307, new char[]{}, new char[]{0x0307}, "az", AFTER_I), // # COMBINING DOT ABOVE
|
new Entry(0x0307, new char[]{}, new char[]{0x0307}, "az", AFTER_I), // # COMBINING DOT ABOVE
|
||||||
new Entry(0x0049, new char[]{0x0131}, new char[]{0x0049}, "tr", NOT_BEFORE_DOT), // # LATIN CAPITAL LETTER I
|
new Entry(0x0049, new char[]{0x0131}, new char[]{0x0049}, "tr", NOT_BEFORE_DOT), // # LATIN CAPITAL LETTER I
|
||||||
@ -147,21 +148,25 @@ final class ConditionalSpecialCasing {
|
|||||||
|
|
||||||
private static char[] lookUpTable(String src, int index, Locale locale, boolean bLowerCasing) {
|
private static char[] lookUpTable(String src, int index, Locale locale, boolean bLowerCasing) {
|
||||||
HashSet<Entry> set = entryTable.get(new Integer(src.codePointAt(index)));
|
HashSet<Entry> set = entryTable.get(new Integer(src.codePointAt(index)));
|
||||||
|
char[] ret = null;
|
||||||
|
|
||||||
if (set != null) {
|
if (set != null) {
|
||||||
Iterator<Entry> iter = set.iterator();
|
Iterator<Entry> iter = set.iterator();
|
||||||
String currentLang = locale.getLanguage();
|
String currentLang = locale.getLanguage();
|
||||||
while (iter.hasNext()) {
|
while (iter.hasNext()) {
|
||||||
Entry entry = iter.next();
|
Entry entry = iter.next();
|
||||||
String conditionLang= entry.getLanguage();
|
String conditionLang = entry.getLanguage();
|
||||||
if (((conditionLang == null) || (conditionLang.equals(currentLang))) &&
|
if (((conditionLang == null) || (conditionLang.equals(currentLang))) &&
|
||||||
isConditionMet(src, index, locale, entry.getCondition())) {
|
isConditionMet(src, index, locale, entry.getCondition())) {
|
||||||
return (bLowerCasing ? entry.getLowerCase() : entry.getUpperCase());
|
ret = bLowerCasing ? entry.getLowerCase() : entry.getUpperCase();
|
||||||
|
if (conditionLang != null) {
|
||||||
|
break;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
return null;
|
return ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
private static boolean isConditionMet(String src, int index, Locale locale, int condition) {
|
private static boolean isConditionMet(String src, int index, Locale locale, int condition) {
|
||||||
|
@ -2583,6 +2583,9 @@ public final class String
|
|||||||
if (cp == '\u03A3') { // GREEK CAPITAL LETTER SIGMA
|
if (cp == '\u03A3') { // GREEK CAPITAL LETTER SIGMA
|
||||||
return toLowerCaseEx(result, i, locale, false);
|
return toLowerCaseEx(result, i, locale, false);
|
||||||
}
|
}
|
||||||
|
if (cp == '\u0130') { // LATIN CAPITAL LETTER I WITH DOT ABOVE
|
||||||
|
return toLowerCaseEx(result, i, locale, true);
|
||||||
|
}
|
||||||
cp = Character.toLowerCase(cp);
|
cp = Character.toLowerCase(cp);
|
||||||
if (!Character.isBmpCodePoint(cp)) {
|
if (!Character.isBmpCodePoint(cp)) {
|
||||||
return toLowerCaseEx(result, i, locale, false);
|
return toLowerCaseEx(result, i, locale, false);
|
||||||
|
@ -1,5 +1,5 @@
|
|||||||
/*
|
/*
|
||||||
* Copyright (c) 2003, 2013, Oracle and/or its affiliates. All rights reserved.
|
* Copyright (c) 2003, 2014, Oracle and/or its affiliates. All rights reserved.
|
||||||
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
||||||
*
|
*
|
||||||
* This code is free software; you can redistribute it and/or modify it
|
* This code is free software; you can redistribute it and/or modify it
|
||||||
@ -23,7 +23,7 @@
|
|||||||
|
|
||||||
/*
|
/*
|
||||||
@test
|
@test
|
||||||
@bug 4217441 4533872 4900935 8020037 8032012
|
@bug 4217441 4533872 4900935 8020037 8032012 8041791
|
||||||
@summary toLowerCase should lower-case Greek Sigma correctly depending
|
@summary toLowerCase should lower-case Greek Sigma correctly depending
|
||||||
on the context (final/non-final). Also it should handle
|
on the context (final/non-final). Also it should handle
|
||||||
Locale specific (lt, tr, and az) lowercasings and supplementary
|
Locale specific (lt, tr, and az) lowercasings and supplementary
|
||||||
@ -72,8 +72,10 @@ public class ToLowerCase {
|
|||||||
// I-dot tests
|
// I-dot tests
|
||||||
test("\u0130", turkish, "i");
|
test("\u0130", turkish, "i");
|
||||||
test("\u0130", az, "i");
|
test("\u0130", az, "i");
|
||||||
test("\u0130", lt, "i");
|
test("\u0130", lt, "\u0069\u0307");
|
||||||
test("\u0130", Locale.US, "i");
|
test("\u0130", Locale.US, "\u0069\u0307");
|
||||||
|
test("\u0130", Locale.JAPAN, "\u0069\u0307");
|
||||||
|
test("\u0130", Locale.ROOT, "\u0069\u0307");
|
||||||
|
|
||||||
// Remove dot_above in the sequence I + dot_above (Turkish and Azeri)
|
// Remove dot_above in the sequence I + dot_above (Turkish and Azeri)
|
||||||
test("I\u0307", turkish, "i");
|
test("I\u0307", turkish, "i");
|
||||||
@ -111,6 +113,12 @@ public class ToLowerCase {
|
|||||||
if (cp >= Character.MIN_HIGH_SURROGATE && cp <= Character.MAX_HIGH_SURROGATE) {
|
if (cp >= Character.MIN_HIGH_SURROGATE && cp <= Character.MAX_HIGH_SURROGATE) {
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
if (cp == 0x0130) {
|
||||||
|
// Although UnicodeData.txt has the lower case char as \u0069, it should be
|
||||||
|
// handled with the rules in SpecialCasing.txt, i.e., \u0069\u0307 in
|
||||||
|
// non Turkic locales.
|
||||||
|
continue;
|
||||||
|
}
|
||||||
int lowerCase = Character.toLowerCase(cp);
|
int lowerCase = Character.toLowerCase(cp);
|
||||||
if (lowerCase == -1) { //Character.ERROR
|
if (lowerCase == -1) { //Character.ERROR
|
||||||
continue;
|
continue;
|
||||||
|
Loading…
x
Reference in New Issue
Block a user