6755060: Collator.compare() does not compare correctly for the Thai locale
Reviewed-by: naoto
This commit is contained in:
parent
614fdf63b3
commit
0db43a9227
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) 2005, Oracle and/or its affiliates. All rights reserved.
|
||||
* Copyright (c) 2011, Oracle and/or its affiliates. All rights reserved.
|
||||
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
||||
*
|
||||
* This code is free software; you can redistribute it and/or modify it
|
||||
@ -103,18 +103,13 @@ public class CollationData_th extends ListResourceBundle {
|
||||
//
|
||||
// Normal vowels
|
||||
//
|
||||
+ "< \u0E4D " // NIKHAHIT
|
||||
+ "< \u0E30 " // SARA A
|
||||
+ "< \u0E31 " // MAI HAN-AKAT
|
||||
+ "< \u0E32 " // SARA AA
|
||||
|
||||
// Normalizer will decompose this character to \u0e4d\u0e32. This is
|
||||
// a Bad Thing, because we want the separate characters to sort
|
||||
// differently than this individual one. Since there's no public way to
|
||||
// set the decomposition to be used when creating a collator, there's
|
||||
// no way around this right now.
|
||||
// It's best to go ahead and leave the character in, because it occurs
|
||||
// this way a lot more often than it occurs as separate characters.
|
||||
+ "< \u0E33 " // SARA AM
|
||||
// Normalizer will decompose this character to \u0e4d\u0e32.
|
||||
+ "< \u0E33 = \u0E4D\u0E32 " // SARA AM
|
||||
|
||||
+ "< \u0E34 " // SARA I
|
||||
|
||||
@ -133,62 +128,58 @@ public class CollationData_th extends ListResourceBundle {
|
||||
+ "< \u0E43 " // SARA AI MAIMUAN
|
||||
+ "< \u0E44 " // SARA AI MAIMALAI
|
||||
|
||||
//
|
||||
// Digits
|
||||
//
|
||||
+ "< \u0E50 " // DIGIT ZERO
|
||||
+ "< \u0E51 " // DIGIT ONE
|
||||
+ "< \u0E52 " // DIGIT TWO
|
||||
+ "< \u0E53 " // DIGIT THREE
|
||||
+ "< \u0E54 " // DIGIT FOUR
|
||||
+ "< \u0E55 " // DIGIT FIVE
|
||||
+ "< \u0E56 " // DIGIT SIX
|
||||
+ "< \u0E57 " // DIGIT SEVEN
|
||||
+ "< \u0E58 " // DIGIT EIGHT
|
||||
+ "< \u0E59 " // DIGIT NINE
|
||||
|
||||
// Sorta tonal marks, but maybe not really
|
||||
+ "< \u0E4D " // NIKHAHIT
|
||||
//according to CLDR, it's after 0e44
|
||||
+ "< \u0E3A " // PHINTHU
|
||||
|
||||
//
|
||||
// Thai symbols are supposed to sort "after white space".
|
||||
// I'm treating this as making them sort just after the normal Latin-1
|
||||
// symbols, which are in turn after the white space.
|
||||
//
|
||||
+ "&'\u007d'" // right-brace
|
||||
+ "< \u0E2F " // PAIYANNOI (ellipsis, abbreviation)
|
||||
+ "< \u0E46 " // MAIYAMOK
|
||||
+ "< \u0E4F " // FONGMAN
|
||||
+ "< \u0E5A " // ANGKHANKHU
|
||||
+ "< \u0E5B " // KHOMUT
|
||||
+ "< \u0E3F " // CURRENCY SYMBOL BAHT
|
||||
|
||||
// These symbols are supposed to be "after all characters"
|
||||
+ "< \u0E4E " // YAMAKKAN
|
||||
|
||||
// This rare symbol also comes after all characters. But when it is
|
||||
// used in combination with RU and LU, the combination is treated as
|
||||
// a separate letter, ala "CH" sorting after "C" in traditional Spanish.
|
||||
// This rare symbol comes after all characters.
|
||||
+ "< \u0E45 " // LAKKHANGYAO
|
||||
+ "& \u0E24 < \u0E24\u0E45 "
|
||||
+ "& \u0E26 < \u0E26\u0E45 "
|
||||
+ "& \u0E32 , \0E45 " // According to CLDR, 0E45 is after 0E32 in tertiary level
|
||||
|
||||
// Tonal marks are primary ignorables but are treated as secondary
|
||||
// differences
|
||||
|
||||
|
||||
|
||||
// Below are thai puntuation marks and Tonal(Accent) marks. According to CLDR 1.9 and
|
||||
// ISO/IEC 14651, Annex C, C.2.1 Thai ordering principles, 0E2F to 0E5B are punctuaion marks that need to be ignored
|
||||
// in the first three leveles. 0E4E to 0E4B are tonal marks to be compared in secondary level.
|
||||
// In real implmentation, set puncutation marks in tertiary as there is no fourth level in Java.
|
||||
// Set all these special marks after \u0301, the accute accent.
|
||||
+ "& \u0301 " // acute accent
|
||||
|
||||
//puncutation marks
|
||||
+ ", \u0E2F " // PAIYANNOI (ellipsis, abbreviation)
|
||||
+ ", \u0E46 " // MAIYAMOK
|
||||
+ ", \u0E4F " // FONGMAN
|
||||
+ ", \u0E5A " // ANGKHANKHU
|
||||
+ ", \u0E5B " // KHOMUT
|
||||
|
||||
//tonal marks
|
||||
+ "; \u0E4E " // YAMAKKAN
|
||||
+ "; \u0E4C " // THANTHAKHAT
|
||||
+ "; \u0E47 " // MAITAIKHU
|
||||
+ "; \u0E48 " // MAI EK
|
||||
+ "; \u0E49 " // MAI THO
|
||||
+ "; \u0E4A " // MAI TRI
|
||||
+ "; \u0E4B " // MAI CHATTAWA
|
||||
+ "; \u0E4C " // THANTHAKHAT
|
||||
|
||||
//
|
||||
// Digits are equal to their corresponding Arabic digits in the first level
|
||||
//
|
||||
+ "& 0 = \u0E50 " // DIGIT ZERO
|
||||
+ "& 1 = \u0E51 " // DIGIT ONE
|
||||
+ "& 2 = \u0E52 " // DIGIT TWO
|
||||
+ "& 3 = \u0E53 " // DIGIT THREE
|
||||
+ "& 4 = \u0E54 " // DIGIT FOUR
|
||||
+ "& 5 = \u0E55 " // DIGIT FIVE
|
||||
+ "& 6 = \u0E56 " // DIGIT SIX
|
||||
+ "& 7 = \u0E57 " // DIGIT SEVEN
|
||||
+ "& 8 = \u0E58 " // DIGIT EIGHT
|
||||
+ "& 9 = \u0E59 " // DIGIT NINE
|
||||
|
||||
|
||||
// These are supposed to be ignored, so I'm treating them as controls
|
||||
+ "& \u0001 "
|
||||
+ "= \u0E3A " // PHINTHU
|
||||
+ "= '.' " // period
|
||||
}
|
||||
}
|
||||
};
|
||||
}
|
||||
}
|
||||
|
100
jdk/test/sun/text/resources/Collator/Bug6755060.java
Normal file
100
jdk/test/sun/text/resources/Collator/Bug6755060.java
Normal file
@ -0,0 +1,100 @@
|
||||
/*
|
||||
* Copyright (c) 2011, Oracle and/or its affiliates. All rights reserved.
|
||||
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
||||
*
|
||||
* This code is free software; you can redistribute it and/or modify it
|
||||
* under the terms of the GNU General Public License version 2 only, as
|
||||
* published by the Free Software Foundation.
|
||||
*
|
||||
* This code is distributed in the hope that it will be useful, but WITHOUT
|
||||
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
||||
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
|
||||
* version 2 for more details (a copy is included in the LICENSE file that
|
||||
* accompanied this code).
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License version
|
||||
* 2 along with this work; if not, write to the Free Software Foundation,
|
||||
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
*
|
||||
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
|
||||
* or visit www.oracle.com if you need additional information or have any
|
||||
* questions.
|
||||
*/
|
||||
|
||||
/*
|
||||
* @test
|
||||
* @bug 6755060
|
||||
* @summary updating collation tables for thai to make it consistent with CLDR 1.9
|
||||
*/
|
||||
|
||||
import java.text.*;
|
||||
import java.util.*;
|
||||
|
||||
public class Bug6755060 {
|
||||
|
||||
/********************************************************
|
||||
*********************************************************/
|
||||
public static void main (String[] args) {
|
||||
|
||||
Locale reservedLocale = Locale.getDefault();
|
||||
|
||||
try{
|
||||
|
||||
int errors=0;
|
||||
|
||||
Locale loc = new Locale ("th", "TH"); // Thai
|
||||
|
||||
Locale.setDefault (loc);
|
||||
Collator col = Collator.getInstance ();
|
||||
|
||||
/*
|
||||
* The original data "data" are the data to be sorted provided by the submitter of the CR.
|
||||
* It's in correct order in accord with thai collation in CLDR 1.9. If we use old Java without this fix,
|
||||
* the output order will be incorrect. Correct order will be turned into incorrect order.
|
||||
|
||||
* If fix is there, "data" after sorting will be unchanged, same as "sortedData". If fix is lost (regression),
|
||||
* "data" after sorting will be changed, not as "sortedData".(not correct anymore)
|
||||
|
||||
* The submitter of the CR also gives a expected "sortedData" in the CR, but it's in accord with collation in CLDR 1.4.
|
||||
* His data to be sorted are actually well sorted in accord with CLDR 1.9.
|
||||
*/
|
||||
|
||||
String[] data = {"\u0e01", "\u0e01\u0e2f", "\u0e01\u0e46", "\u0e01\u0e4f", "\u0e01\u0e5a", "\u0e01\u0e5b", "\u0e01\u0e4e", "\u0e01\u0e4c", "\u0e01\u0e48", "\u0e01\u0e01", "\u0e01\u0e4b\u0e01", "\u0e01\u0e4d", "\u0e01\u0e30", "\u0e01\u0e31\u0e01", "\u0e01\u0e32", "\u0e01\u0e33", "\u0e01\u0e34", "\u0e01\u0e35", "\u0e01\u0e36", "\u0e01\u0e37", "\u0e01\u0e38", "\u0e01\u0e39", "\u0e40\u0e01", "\u0e40\u0e01\u0e48", "\u0e40\u0e01\u0e49", "\u0e40\u0e01\u0e4b", "\u0e41\u0e01", "\u0e42\u0e01", "\u0e43\u0e01", "\u0e44\u0e01", "\u0e01\u0e3a", "\u0e24\u0e32", "\u0e24\u0e45", "\u0e40\u0e25", "\u0e44\u0e26"};
|
||||
|
||||
String[] sortedData = {"\u0e01", "\u0e01\u0e2f", "\u0e01\u0e46", "\u0e01\u0e4f", "\u0e01\u0e5a", "\u0e01\u0e5b", "\u0e01\u0e4e", "\u0e01\u0e4c", "\u0e01\u0e48", "\u0e01\u0e01", "\u0e01\u0e4b\u0e01", "\u0e01\u0e4d", "\u0e01\u0e30", "\u0e01\u0e31\u0e01", "\u0e01\u0e32", "\u0e01\u0e33", "\u0e01\u0e34", "\u0e01\u0e35", "\u0e01\u0e36", "\u0e01\u0e37", "\u0e01\u0e38", "\u0e01\u0e39", "\u0e40\u0e01", "\u0e40\u0e01\u0e48", "\u0e40\u0e01\u0e49", "\u0e40\u0e01\u0e4b", "\u0e41\u0e01", "\u0e42\u0e01", "\u0e43\u0e01", "\u0e44\u0e01", "\u0e01\u0e3a", "\u0e24\u0e32", "\u0e24\u0e45", "\u0e40\u0e25", "\u0e44\u0e26"};
|
||||
|
||||
Arrays.sort (data, col);
|
||||
|
||||
System.out.println ("Using " + loc.getDisplayName());
|
||||
for (int i = 0; i < data.length; i++) {
|
||||
System.out.println(data[i] + " : " + sortedData[i]);
|
||||
if (sortedData[i].compareTo(data[i]) != 0) {
|
||||
errors++;
|
||||
}
|
||||
}//end for
|
||||
|
||||
if (errors > 0){
|
||||
StringBuffer expected = new StringBuffer(), actual = new StringBuffer();
|
||||
expected.append(sortedData[0]);
|
||||
actual.append(data[0]);
|
||||
|
||||
for (int i=1; i<data.length; i++) {
|
||||
expected.append(",");
|
||||
expected.append(sortedData[i]);
|
||||
|
||||
actual.append(",");
|
||||
actual.append(data[i]);
|
||||
}
|
||||
|
||||
String errmsg = "Error is found in collation testing in Thai\n" + "exepected order is: " + expected.toString() + "\n" + "actual order is: " + actual.toString() + "\n";
|
||||
|
||||
throw new RuntimeException(errmsg);
|
||||
}
|
||||
}finally{
|
||||
// restore the reserved locale
|
||||
Locale.setDefault(reservedLocale);
|
||||
}
|
||||
|
||||
}//end main
|
||||
|
||||
}//end class CollatorTest
|
Loading…
x
Reference in New Issue
Block a user