2007-12-01 00:00:00 +00:00
|
|
|
/*
|
2022-04-08 15:23:25 +00:00
|
|
|
* Copyright (c) 2003, 2022, Oracle and/or its affiliates. All rights reserved.
|
2007-12-01 00:00:00 +00:00
|
|
|
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
|
|
|
*
|
|
|
|
* This code is free software; you can redistribute it and/or modify it
|
|
|
|
* under the terms of the GNU General Public License version 2 only, as
|
|
|
|
* published by the Free Software Foundation.
|
|
|
|
*
|
|
|
|
* This code is distributed in the hope that it will be useful, but WITHOUT
|
|
|
|
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
|
|
|
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
|
|
|
|
* version 2 for more details (a copy is included in the LICENSE file that
|
|
|
|
* accompanied this code).
|
|
|
|
*
|
|
|
|
* You should have received a copy of the GNU General Public License version
|
|
|
|
* 2 along with this work; if not, write to the Free Software Foundation,
|
|
|
|
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
|
|
|
|
*
|
2010-05-25 15:58:33 -07:00
|
|
|
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
|
|
|
|
* or visit www.oracle.com if you need additional information or have any
|
|
|
|
* questions.
|
2007-12-01 00:00:00 +00:00
|
|
|
*/
|
|
|
|
|
|
|
|
/*
|
|
|
|
@test
|
8141132: JEP 254: Compact Strings
Adopt a more space-efficient internal representation for strings.
Co-authored-by: Brent Christian <brent.christian@oracle.com>
Co-authored-by: Vivek Deshpande <vivek.r.deshpande@intel.com>
Co-authored-by: Charlie Hunt <charlie.hunt@oracle.com>
Co-authored-by: Vladimir Kozlov <vladimir.kozlov@oracle.com>
Co-authored-by: Roger Riggs <roger.riggs@oracle.com>
Co-authored-by: Xueming Shen <xueming.shen@oracle.com>
Co-authored-by: Aleksey Shipilev <aleksey.shipilev@oracle.com>
Co-authored-by: Sandhya Viswanathan <sandhya.viswanathan@intel.com>
Reviewed-by: alanb, bdelsart, coleenp, iklam, jiangli, jrose, kevinw, naoto, pliden, roland, smarks, twisti
2015-11-03 09:42:11 +01:00
|
|
|
@bug 4217441 4533872 4900935 8020037 8032012 8041791 8042589 8054307
|
2007-12-01 00:00:00 +00:00
|
|
|
@summary toLowerCase should lower-case Greek Sigma correctly depending
|
|
|
|
on the context (final/non-final). Also it should handle
|
|
|
|
Locale specific (lt, tr, and az) lowercasings and supplementary
|
|
|
|
characters correctly.
|
|
|
|
*/
|
|
|
|
|
|
|
|
import java.util.Locale;
|
|
|
|
|
|
|
|
public class ToLowerCase {
|
|
|
|
|
|
|
|
public static void main(String[] args) {
|
2022-04-08 15:23:25 +00:00
|
|
|
Locale turkish = Locale.of("tr", "TR");
|
|
|
|
Locale lt = Locale.of("lt"); // Lithanian
|
|
|
|
Locale az = Locale.of("az"); // Azeri
|
2007-12-01 00:00:00 +00:00
|
|
|
|
|
|
|
// Greek Sigma final/non-final tests
|
|
|
|
test("\u03A3", Locale.US, "\u03C3");
|
|
|
|
test("LAST\u03A3", Locale.US, "last\u03C2");
|
|
|
|
test("MID\u03A3DLE", Locale.US, "mid\u03C3dle");
|
|
|
|
test("WORD1 \u03A3 WORD3", Locale.US, "word1 \u03C3 word3");
|
|
|
|
test("WORD1 LAST\u03A3 WORD3", Locale.US, "word1 last\u03C2 word3");
|
|
|
|
test("WORD1 MID\u03A3DLE WORD3", Locale.US, "word1 mid\u03C3dle word3");
|
|
|
|
test("\u0399\u0395\u03a3\u03a5\u03a3 \u03a7\u03a1\u0399\u03a3\u03a4\u039f\u03a3", Locale.US,
|
|
|
|
"\u03b9\u03b5\u03c3\u03c5\u03c2 \u03c7\u03c1\u03b9\u03c3\u03c4\u03bf\u03c2"); // "IESUS XRISTOS"
|
|
|
|
|
|
|
|
// Explicit dot above for I's and J's whenever there are more accents above (Lithanian)
|
|
|
|
test("I", lt, "i");
|
|
|
|
test("I\u0300", lt, "i\u0307\u0300"); // "I" followed by COMBINING GRAVE ACCENT (cc==230)
|
|
|
|
test("I\u0316", lt, "i\u0316"); // "I" followed by COMBINING GRAVE ACCENT BELOW (cc!=230)
|
|
|
|
test("J", lt, "j");
|
|
|
|
test("J\u0300", lt, "j\u0307\u0300"); // "J" followed by COMBINING GRAVE ACCENT (cc==230)
|
|
|
|
test("J\u0316", lt, "j\u0316"); // "J" followed by COMBINING GRAVE ACCENT BELOW (cc!=230)
|
|
|
|
test("\u012E", lt, "\u012F");
|
|
|
|
test("\u012E\u0300", lt, "\u012F\u0307\u0300"); // "I (w/ OGONEK)" followed by COMBINING GRAVE ACCENT (cc==230)
|
|
|
|
test("\u012E\u0316", lt, "\u012F\u0316"); // "I (w/ OGONEK)" followed by COMBINING GRAVE ACCENT BELOW (cc!=230)
|
|
|
|
test("\u00CC", lt, "i\u0307\u0300");
|
|
|
|
test("\u00CD", lt, "i\u0307\u0301");
|
|
|
|
test("\u0128", lt, "i\u0307\u0303");
|
|
|
|
test("I\u0300", Locale.US, "i\u0300"); // "I" followed by COMBINING GRAVE ACCENT (cc==230)
|
|
|
|
test("J\u0300", Locale.US, "j\u0300"); // "J" followed by COMBINING GRAVE ACCENT (cc==230)
|
|
|
|
test("\u012E\u0300", Locale.US, "\u012F\u0300"); // "I (w/ OGONEK)" followed by COMBINING GRAVE ACCENT (cc==230)
|
|
|
|
test("\u00CC", Locale.US, "\u00EC");
|
|
|
|
test("\u00CD", Locale.US, "\u00ED");
|
|
|
|
test("\u0128", Locale.US, "\u0129");
|
|
|
|
|
2013-10-22 06:13:01 +09:00
|
|
|
// I-dot tests
|
2007-12-01 00:00:00 +00:00
|
|
|
test("\u0130", turkish, "i");
|
|
|
|
test("\u0130", az, "i");
|
2014-05-14 10:52:51 -07:00
|
|
|
test("\u0130", lt, "\u0069\u0307");
|
|
|
|
test("\u0130", Locale.US, "\u0069\u0307");
|
|
|
|
test("\u0130", Locale.JAPAN, "\u0069\u0307");
|
|
|
|
test("\u0130", Locale.ROOT, "\u0069\u0307");
|
2007-12-01 00:00:00 +00:00
|
|
|
|
|
|
|
// Remove dot_above in the sequence I + dot_above (Turkish and Azeri)
|
|
|
|
test("I\u0307", turkish, "i");
|
|
|
|
test("I\u0307", az, "i");
|
|
|
|
test("J\u0307", turkish, "j\u0307");
|
|
|
|
test("J\u0307", az, "j\u0307");
|
|
|
|
|
|
|
|
// Unless an I is before a dot_above, it turns into a dotless i (Turkish and Azeri)
|
|
|
|
test("I", turkish, "\u0131");
|
|
|
|
test("I", az, "\u0131");
|
|
|
|
test("I", Locale.US, "i");
|
|
|
|
test("IABC", turkish, "\u0131abc");
|
|
|
|
test("IABC", az, "\u0131abc");
|
|
|
|
test("IABC", Locale.US, "iabc");
|
|
|
|
|
|
|
|
// Supplementary character tests
|
|
|
|
//
|
|
|
|
// U+10400 ("\uD801\uDC00"): DESERET CAPITAL LETTER LONG I
|
|
|
|
// U+10401 ("\uD801\uDC01"): DESERET CAPITAL LETTER LONG E
|
|
|
|
// U+10402 ("\uD801\uDC02"): DESERET CAPITAL LETTER LONG A
|
|
|
|
// U+10428 ("\uD801\uDC28"): DESERET SMALL LETTER LONG I
|
|
|
|
// U+10429 ("\uD801\uDC29"): DESERET SMALL LETTER LONG E
|
|
|
|
// U+1042A ("\uD801\uDC2A"): DESERET SMALL LETTER LONG A
|
|
|
|
//
|
|
|
|
// valid code point tests:
|
|
|
|
test("\uD801\uDC00\uD801\uDC01\uD801\uDC02", Locale.US, "\uD801\uDC28\uD801\uDC29\uD801\uDC2A");
|
|
|
|
test("\uD801\uDC00A\uD801\uDC01B\uD801\uDC02C", Locale.US, "\uD801\uDC28a\uD801\uDC29b\uD801\uDC2Ac");
|
|
|
|
// invalid code point tests:
|
|
|
|
test("\uD800\uD800\uD801A\uDC00\uDC00\uDC00B", Locale.US, "\uD800\uD800\uD801a\uDC00\uDC00\uDC00b");
|
|
|
|
|
2014-07-17 11:08:50 -07:00
|
|
|
// lower/uppercase + surrogates
|
|
|
|
test("a\uD801\uDC1c", Locale.ROOT, "a\uD801\uDC44");
|
|
|
|
test("A\uD801\uDC1c", Locale.ROOT, "a\uD801\uDC44");
|
|
|
|
test("a\uD801\uDC00\uD801\uDC01\uD801\uDC02", Locale.US, "a\uD801\uDC28\uD801\uDC29\uD801\uDC2A");
|
|
|
|
test("A\uD801\uDC00\uD801\uDC01\uD801\uDC02", Locale.US, "a\uD801\uDC28\uD801\uDC29\uD801\uDC2A");
|
|
|
|
|
2014-02-07 09:04:17 -08:00
|
|
|
// test bmp + supp1
|
|
|
|
StringBuilder src = new StringBuilder(0x20000);
|
|
|
|
StringBuilder exp = new StringBuilder(0x20000);
|
|
|
|
for (int cp = 0; cp < 0x20000; cp++) {
|
|
|
|
if (cp >= Character.MIN_HIGH_SURROGATE && cp <= Character.MAX_HIGH_SURROGATE) {
|
|
|
|
continue;
|
|
|
|
}
|
2014-05-14 10:52:51 -07:00
|
|
|
if (cp == 0x0130) {
|
|
|
|
// Although UnicodeData.txt has the lower case char as \u0069, it should be
|
|
|
|
// handled with the rules in SpecialCasing.txt, i.e., \u0069\u0307 in
|
|
|
|
// non Turkic locales.
|
|
|
|
continue;
|
|
|
|
}
|
2014-02-07 09:04:17 -08:00
|
|
|
int lowerCase = Character.toLowerCase(cp);
|
|
|
|
if (lowerCase == -1) { //Character.ERROR
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
src.appendCodePoint(cp);
|
|
|
|
exp.appendCodePoint(lowerCase);
|
|
|
|
}
|
|
|
|
test(src.toString(), Locale.US, exp.toString());
|
|
|
|
|
8141132: JEP 254: Compact Strings
Adopt a more space-efficient internal representation for strings.
Co-authored-by: Brent Christian <brent.christian@oracle.com>
Co-authored-by: Vivek Deshpande <vivek.r.deshpande@intel.com>
Co-authored-by: Charlie Hunt <charlie.hunt@oracle.com>
Co-authored-by: Vladimir Kozlov <vladimir.kozlov@oracle.com>
Co-authored-by: Roger Riggs <roger.riggs@oracle.com>
Co-authored-by: Xueming Shen <xueming.shen@oracle.com>
Co-authored-by: Aleksey Shipilev <aleksey.shipilev@oracle.com>
Co-authored-by: Sandhya Viswanathan <sandhya.viswanathan@intel.com>
Reviewed-by: alanb, bdelsart, coleenp, iklam, jiangli, jrose, kevinw, naoto, pliden, roland, smarks, twisti
2015-11-03 09:42:11 +01:00
|
|
|
// test latin1
|
|
|
|
src = new StringBuilder(0x100);
|
|
|
|
exp = new StringBuilder(0x100);
|
|
|
|
for (int cp = 0; cp < 0x100; cp++) {
|
|
|
|
int lowerCase = Character.toLowerCase(cp);
|
|
|
|
if (lowerCase == -1) { //Character.ERROR
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
src.appendCodePoint(cp);
|
|
|
|
exp.appendCodePoint(lowerCase);
|
|
|
|
}
|
|
|
|
test(src.toString(), Locale.US, exp.toString());
|
|
|
|
|
|
|
|
// test non-latin1 -> latin1
|
|
|
|
src = new StringBuilder(0x100).append("abc");
|
|
|
|
exp = new StringBuilder(0x100).append("abc");
|
|
|
|
for (int cp = 0x100; cp < 0x10000; cp++) {
|
|
|
|
int lowerCase = Character.toLowerCase(cp);
|
|
|
|
if (lowerCase < 0x100 && cp != '\u0130') {
|
|
|
|
src.appendCodePoint(cp);
|
|
|
|
exp.appendCodePoint(lowerCase);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
test(src.toString(), Locale.US, exp.toString());
|
2007-12-01 00:00:00 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
static void test(String in, Locale locale, String expected) {
|
8141132: JEP 254: Compact Strings
Adopt a more space-efficient internal representation for strings.
Co-authored-by: Brent Christian <brent.christian@oracle.com>
Co-authored-by: Vivek Deshpande <vivek.r.deshpande@intel.com>
Co-authored-by: Charlie Hunt <charlie.hunt@oracle.com>
Co-authored-by: Vladimir Kozlov <vladimir.kozlov@oracle.com>
Co-authored-by: Roger Riggs <roger.riggs@oracle.com>
Co-authored-by: Xueming Shen <xueming.shen@oracle.com>
Co-authored-by: Aleksey Shipilev <aleksey.shipilev@oracle.com>
Co-authored-by: Sandhya Viswanathan <sandhya.viswanathan@intel.com>
Reviewed-by: alanb, bdelsart, coleenp, iklam, jiangli, jrose, kevinw, naoto, pliden, roland, smarks, twisti
2015-11-03 09:42:11 +01:00
|
|
|
test0(in, locale,expected);
|
|
|
|
for (String[] ss : new String[][] {
|
|
|
|
new String[] {"abc", "abc"},
|
|
|
|
new String[] {"aBc", "abc"},
|
|
|
|
new String[] {"ABC", "abc"},
|
|
|
|
new String[] {"ab\u4e00", "ab\u4e00"},
|
|
|
|
new String[] {"aB\u4e00", "ab\u4e00"},
|
|
|
|
new String[] {"AB\u4e00", "ab\u4e00"},
|
|
|
|
new String[] {"ab\uD800\uDC00", "ab\uD800\uDC00"},
|
|
|
|
new String[] {"aB\uD800\uDC00", "ab\uD800\uDC00"},
|
|
|
|
new String[] {"AB\uD800\uDC00", "ab\uD800\uDC00"},
|
|
|
|
new String[] {"ab\uD801\uDC1C", "ab\uD801\uDC44"},
|
|
|
|
new String[] {"aB\uD801\uDC1C", "ab\uD801\uDC44"},
|
|
|
|
new String[] {"AB\uD801\uDC1C", "ab\uD801\uDC44"},
|
|
|
|
|
|
|
|
}) {
|
|
|
|
test0(ss[0] + " " + in, locale, ss[1] + " " + expected);
|
|
|
|
test0(in + " " + ss[0], locale, expected + " " + ss[1]);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
static void test0(String in, Locale locale, String expected) {
|
2007-12-01 00:00:00 +00:00
|
|
|
String result = in.toLowerCase(locale);
|
|
|
|
if (!result.equals(expected)) {
|
|
|
|
System.err.println("input: " + in + ", locale: " + locale +
|
|
|
|
", expected: " + expected + ", actual: " + result);
|
|
|
|
throw new RuntimeException();
|
|
|
|
}
|
8141132: JEP 254: Compact Strings
Adopt a more space-efficient internal representation for strings.
Co-authored-by: Brent Christian <brent.christian@oracle.com>
Co-authored-by: Vivek Deshpande <vivek.r.deshpande@intel.com>
Co-authored-by: Charlie Hunt <charlie.hunt@oracle.com>
Co-authored-by: Vladimir Kozlov <vladimir.kozlov@oracle.com>
Co-authored-by: Roger Riggs <roger.riggs@oracle.com>
Co-authored-by: Xueming Shen <xueming.shen@oracle.com>
Co-authored-by: Aleksey Shipilev <aleksey.shipilev@oracle.com>
Co-authored-by: Sandhya Viswanathan <sandhya.viswanathan@intel.com>
Reviewed-by: alanb, bdelsart, coleenp, iklam, jiangli, jrose, kevinw, naoto, pliden, roland, smarks, twisti
2015-11-03 09:42:11 +01:00
|
|
|
}
|
2007-12-01 00:00:00 +00:00
|
|
|
}
|