/* * Copyright (c) 1997, 2023, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it * under the terms of the GNU General Public License version 2 only, as * published by the Free Software Foundation. * * This code is distributed in the hope that it will be useful, but WITHOUT * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License * version 2 for more details (a copy is included in the LICENSE file that * accompanied this code). * * You should have received a copy of the GNU General Public License version * 2 along with this work; if not, write to the Free Software Foundation, * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. * * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA * or visit www.oracle.com if you need additional information or have any * questions. */ /** * @test * @bug 4048446 4051866 4053636 4054238 4054734 4054736 4058613 4059820 4060154 * 4062418 4065540 4066189 4066696 4076676 4078588 4079231 4081866 4087241 * 4087243 4092260 4095316 4101940 4103436 4114076 4114077 4124632 4132736 * 4133509 4139572 4141640 4179126 4179686 4244884 4663220 * @library /java/text/testlib * @summary Regression tests for Collation and associated classes * @modules jdk.localedata * @run junit Regression */ /* (C) Copyright Taligent, Inc. 1996 - All Rights Reserved (C) Copyright IBM Corp. 1996 - All Rights Reserved The original version of this source code and documentation is copyrighted and owned by Taligent, Inc., a wholly-owned subsidiary of IBM. These materials are provided under terms of a License Agreement between Taligent and Sun. This technology is protected by multiple US and International patents. This notice and attribution to Taligent may not be removed. Taligent is a registered trademark of Taligent, Inc. */ import java.text.*; import java.util.Locale; import java.util.Vector; import org.junit.jupiter.api.Test; import static org.junit.jupiter.api.Assertions.fail; public class Regression { // CollationElementIterator.reset() doesn't work // @Test public void Test4048446() { CollationElementIterator i1 = en_us.getCollationElementIterator(test1); CollationElementIterator i2 = en_us.getCollationElementIterator(test1); while ( i1.next() != CollationElementIterator.NULLORDER ) { } i1.reset(); TestUtils.compareCollationElementIters(i1, i2); } // Collator -> rules -> Collator round-trip broken for expanding characters // @Test public void Test4051866() throws ParseException { // Build a collator containing expanding characters RuleBasedCollator c1 = new RuleBasedCollator("< o " +"& oe ,o\u3080" +"& oe ,\u1530 ,O" +"& OE ,O\u3080" +"& OE ,\u1520" +"< p ,P"); // Build another using the rules from the first RuleBasedCollator c2 = new RuleBasedCollator(c1.getRules()); // Make sure they're the same if (!c1.getRules().equals(c2.getRules())) { fail("Rules are not equal"); } } // Collator thinks "black-bird" == "black" // @Test public void Test4053636() { if (en_us.equals("black-bird","black")) { fail("black-bird == black"); } } // CollationElementIterator will not work correctly if the associated // Collator object's mode is changed // @Test public void Test4054238() { RuleBasedCollator c = (RuleBasedCollator) en_us.clone(); c.setDecomposition(Collator.CANONICAL_DECOMPOSITION); CollationElementIterator i1 = en_us.getCollationElementIterator(test3); c.setDecomposition(Collator.NO_DECOMPOSITION); CollationElementIterator i2 = en_us.getCollationElementIterator(test3); // At this point, BOTH iterators should use NO_DECOMPOSITION, since the // collator itself is in that mode TestUtils.compareCollationElementIters(i1, i2); } // Collator.IDENTICAL documented but not implemented // @Test public void Test4054734() { RuleBasedCollator c = (RuleBasedCollator) en_us.clone(); try { c.setStrength(Collator.IDENTICAL); } catch (Exception e) { fail("Caught " + e.toString() + " setting Collator.IDENTICAL"); } String[] decomp = { "\u0001", "<", "\u0002", "\u0001", "=", "\u0001", "A\u0001", ">", "~\u0002", // Ensure A and ~ are not compared bitwise "\u00C0", "=", "A\u0300" // Decomp should make these equal }; c.setDecomposition(Collator.CANONICAL_DECOMPOSITION); compareArray(c, decomp); String[] nodecomp = { "\u00C0", ">", "A\u0300" // A-grave vs. A combining-grave }; c.setDecomposition(Collator.NO_DECOMPOSITION); compareArray(c, nodecomp); } // Full Decomposition mode not implemented // @Test public void Test4054736() { RuleBasedCollator c = (RuleBasedCollator) en_us.clone(); c.setDecomposition(Collator.FULL_DECOMPOSITION); String[] tests = { "\uFB4f", "=", "\u05D0\u05DC", // Alef-Lamed vs. Alef, Lamed }; compareArray(c, tests); } // Collator.getInstance() causes an ArrayIndexOutofBoundsException for Korean // @Test public void Test4058613() { // Creating a default collator doesn't work when Korean is the default // locale Locale oldDefault = Locale.getDefault(); Locale.setDefault( Locale.KOREAN ); try { Collator c = Collator.getInstance(); // Since the fix to this bug was to turn of decomposition for Korean collators, // ensure that's what we got if (c.getDecomposition() != Collator.NO_DECOMPOSITION) { fail("Decomposition is not set to NO_DECOMPOSITION"); } } finally { Locale.setDefault(oldDefault); } } // RuleBasedCollator.getRules does not return the exact pattern as input // for expanding character sequences // @Test public void Test4059820() { RuleBasedCollator c = null; try { c = new RuleBasedCollator("< a < b , c/a < d < z"); } catch (ParseException e) { fail("Exception building collator: " + e.toString()); return; } if ( c.getRules().indexOf("c/a") == -1) { fail("returned rules do not contain 'c/a'"); } } // MergeCollation::fixEntry broken for "& H < \u0131, \u0130, i, I" // @Test public void Test4060154() { RuleBasedCollator c = null; try { c = new RuleBasedCollator("< g, G < h, H < i, I < j, J" + " & H < \u0131, \u0130, i, I" ); } catch (ParseException e) { fail("Exception building collator: " + e.toString()); return; } c.setDecomposition(Collator.CANONICAL_DECOMPOSITION); String[] tertiary = { "A", "<", "B", "H", "<", "\u0131", "H", "<", "I", "\u0131", "<", "\u0130", "\u0130", "<", "i", "\u0130", ">", "H", }; c.setStrength(Collator.TERTIARY); compareArray(c, tertiary); String[] secondary = { "H", "<", "I", "\u0131", "=", "\u0130", }; c.setStrength(Collator.PRIMARY); compareArray(c, secondary); }; // Secondary/Tertiary comparison incorrect in French Secondary // @Test public void Test4062418() throws ParseException { RuleBasedCollator c = (RuleBasedCollator) Collator.getInstance(Locale.FRANCE); c.setStrength(Collator.SECONDARY); String[] tests = { "p\u00eache", "<", "p\u00e9ch\u00e9", // Comparing accents from end, p\u00e9ch\u00e9 is greater }; compareArray(c, tests); } // Collator.compare() method broken if either string contains spaces // @Test public void Test4065540() { if (en_us.compare("abcd e", "abcd f") == 0) { fail("'abcd e' == 'abcd f'"); } } // Unicode characters need to be recursively decomposed to get the // correct result. For example, // u1EB1 -> \u0103 + \u0300 -> a + \u0306 + \u0300. // @Test public void Test4066189() { String test1 = "\u1EB1"; String test2 = "a\u0306\u0300"; RuleBasedCollator c1 = (RuleBasedCollator) en_us.clone(); c1.setDecomposition(Collator.FULL_DECOMPOSITION); CollationElementIterator i1 = en_us.getCollationElementIterator(test1); RuleBasedCollator c2 = (RuleBasedCollator) en_us.clone(); c2.setDecomposition(Collator.NO_DECOMPOSITION); CollationElementIterator i2 = en_us.getCollationElementIterator(test2); TestUtils.compareCollationElementIters(i1, i2); } // French secondary collation checking at the end of compare iteration fails // @Test public void Test4066696() { RuleBasedCollator c = (RuleBasedCollator) Collator.getInstance(Locale.FRANCE); c.setStrength(Collator.SECONDARY); String[] tests = { "\u00e0", "<", "\u01fa", // a-grave < A-ring-acute }; compareArray(c, tests); } // Bad canonicalization of same-class combining characters // @Test public void Test4076676() { // These combining characters are all in the same class, so they should not // be reordered, and they should compare as unequal. String s1 = "A\u0301\u0302\u0300"; String s2 = "A\u0302\u0300\u0301"; RuleBasedCollator c = (RuleBasedCollator) en_us.clone(); c.setStrength(Collator.TERTIARY); if (c.compare(s1,s2) == 0) { fail("Same-class combining chars were reordered"); } } // RuleBasedCollator.equals(null) throws NullPointerException // @Test public void Test4079231() { try { if (en_us.equals(null)) { fail("en_us.equals(null) returned true"); } } catch (Exception e) { fail("en_us.equals(null) threw " + e.toString()); } } // RuleBasedCollator breaks on "< a < bb" rule // @Test public void Test4078588() throws ParseException { RuleBasedCollator rbc=new RuleBasedCollator("< a < bb"); int result = rbc.compare("a","bb"); if (result != -1) { fail("Compare(a,bb) returned " + result + "; expected -1"); } } // Combining characters in different classes not reordered properly. // @Test public void Test4081866() throws ParseException { // These combining characters are all in different classes, // so they should be reordered and the strings should compare as equal. String s1 = "A\u0300\u0316\u0327\u0315"; String s2 = "A\u0327\u0316\u0315\u0300"; RuleBasedCollator c = (RuleBasedCollator) en_us.clone(); c.setStrength(Collator.TERTIARY); // Now that the default collators are set to NO_DECOMPOSITION // (as a result of fixing bug 4114077), we must set it explicitly // when we're testing reordering behavior. -- lwerner, 5/5/98 c.setDecomposition(Collator.CANONICAL_DECOMPOSITION); if (c.compare(s1,s2) != 0) { fail("Combining chars were not reordered"); } } // string comparison errors in Scandinavian collators // @Test public void Test4087241() { RuleBasedCollator c = (RuleBasedCollator) Collator.getInstance( Locale.of("da", "DK")); c.setStrength(Collator.SECONDARY); String[] tests = { "\u007a", "<", "\u00e6", // z < ae "a\u0308", "<", "a\u030a", // a-unlaut < a-ring "Y", "<", "u\u0308", // Y < u-umlaut }; compareArray(c, tests); } // CollationKey takes ignorable strings into account when it shouldn't // @Test public void Test4087243() { RuleBasedCollator c = (RuleBasedCollator) en_us.clone(); c.setStrength(Collator.TERTIARY); String[] tests = { "123", "=", "123\u0001", // 1 2 3 = 1 2 3 ctrl-A }; compareArray(c, tests); } // Mu/micro conflict // Micro symbol and greek lowercase letter Mu should sort identically // @Test public void Test4092260() { Collator c = Collator.getInstance(Locale.of("el")); // will only be equal when FULL_DECOMPOSITION is used c.setDecomposition(Collator.FULL_DECOMPOSITION); String[] tests = { "\u00B5", "=", "\u03BC", }; compareArray(c, tests); } void Test4095316() { Collator c = Collator.getInstance(Locale.of("el", "GR")); c.setStrength(Collator.TERTIARY); // javadocs for RuleBasedCollator clearly specify that characters containing compatability // chars MUST use FULL_DECOMPOSITION to get accurate comparisons. c.setDecomposition(Collator.FULL_DECOMPOSITION); String[] tests = { "\u03D4", "=", "\u03AB", }; compareArray(c, tests); } @Test public void Test4101940() { try { RuleBasedCollator c = new RuleBasedCollator("< a < b"); CollationElementIterator i = c.getCollationElementIterator(""); i.reset(); if (i.next() != i.NULLORDER) { fail("next did not return NULLORDER"); } } catch (Exception e) { fail("Caught " + e ); } } // Collator.compare not handling spaces properly // @Test public void Test4103436() { RuleBasedCollator c = (RuleBasedCollator) en_us.clone(); c.setStrength(Collator.TERTIARY); String[] tests = { "file", "<", "file access", "file", "<", "fileaccess", }; compareArray(c, tests); } // Collation not Unicode conformant with Hangul syllables // @Test public void Test4114076() { RuleBasedCollator c = (RuleBasedCollator) en_us.clone(); c.setStrength(Collator.TERTIARY); // // With Canonical decomposition, Hangul syllables should get decomposed // into Jamo, but Jamo characters should not be decomposed into // conjoining Jamo // c.setDecomposition(Collator.CANONICAL_DECOMPOSITION); String[] test1 = { "\ud4db", "=", "\u1111\u1171\u11b6", }; compareArray(c, test1); // Full decomposition result should be the same as canonical decomposition // for all hangul. c.setDecomposition(Collator.FULL_DECOMPOSITION); compareArray(c, test1); } // Collator.getCollationKey was hanging on certain character sequences // @Test public void Test4124632() throws Exception { Collator coll = Collator.getInstance(Locale.JAPAN); try { coll.getCollationKey("A\u0308bc"); } catch (OutOfMemoryError e) { fail("Ran out of memory -- probably an infinite loop"); } } // sort order of french words with multiple accents has errors // @Test public void Test4132736() { Collator c = Collator.getInstance(Locale.FRANCE); String[] test1 = { "e\u0300e\u0301", "<", "e\u0301e\u0300", "e\u0300\u0301", ">", "e\u0301\u0300", }; compareArray(c, test1); } // The sorting using java.text.CollationKey is not in the exact order // @Test public void Test4133509() { String[] test1 = { "Exception", "<", "ExceptionInInitializerError", "Graphics", "<", "GraphicsEnvironment", "String", "<", "StringBuffer", }; compareArray(en_us, test1); } // Collation with decomposition off doesn't work for Europe // @Test public void Test4114077() { // Ensure that we get the same results with decomposition off // as we do with it on.... RuleBasedCollator c = (RuleBasedCollator) en_us.clone(); c.setStrength(Collator.TERTIARY); String[] test1 = { "\u00C0", "=", "A\u0300", // Should be equivalent "p\u00eache", ">", "p\u00e9ch\u00e9", "\u0204", "=", "E\u030F", "\u01fa", "=", "A\u030a\u0301", // a-ring-acute -> a-ring, acute // -> a, ring, acute "A\u0300\u0316", "<", "A\u0316\u0300", // No reordering --> unequal }; c.setDecomposition(Collator.NO_DECOMPOSITION); compareArray(c, test1); String[] test2 = { "A\u0300\u0316", "=", "A\u0316\u0300", // Reordering --> equal }; c.setDecomposition(Collator.CANONICAL_DECOMPOSITION); compareArray(c, test2); } // Support for Swedish gone in 1.1.6 (Can't create Swedish collator) // @Test public void Test4141640() { // // Rather than just creating a Swedish collator, we might as well // try to instantiate one for every locale available on the system // in order to prevent this sort of bug from cropping up in the future // Locale[] locales = Collator.getAvailableLocales(); for (int i = 0; i < locales.length; i++) { try { Collator c = Collator.getInstance(locales[i]); } catch (Exception e) { fail("Caught " + e + " creating collator for " + locales[i]); } } } // getCollationKey throws exception for spanish text // Cannot reproduce this bug on 1.2, however it DOES fail on 1.1.6 // @Test public void Test4139572() { // // Code pasted straight from the bug report // // create spanish locale and collator Locale l = Locale.of("es", "es"); Collator col = Collator.getInstance(l); // this spanish phrase kills it! col.getCollationKey("Nombre De Objeto"); } // RuleBasedCollator doesn't use getCollationElementIterator internally // @Test public void Test4146160() throws ParseException { // // Use a custom collator class whose getCollationElementIterator // methods increment a count.... // My4146160Collator.count = 0; new My4146160Collator().getCollationKey("1"); if (My4146160Collator.count < 1) { fail("getCollationElementIterator not called"); } My4146160Collator.count = 0; new My4146160Collator().compare("1", "2"); if (My4146160Collator.count < 1) { fail("getCollationElementIterator not called"); } } static class My4146160Collator extends RuleBasedCollator { public My4146160Collator() throws ParseException { super(Regression.en_us.getRules()); } public CollationElementIterator getCollationElementIterator( String text) { count++; return super.getCollationElementIterator(text); } public CollationElementIterator getCollationElementIterator( CharacterIterator text) { count++; return super.getCollationElementIterator(text); } public static int count = 0; }; // CollationElementIterator.previous broken for expanding char sequences // @Test public void Test4179686() throws ParseException { // Create a collator with a few expanding character sequences in it.... RuleBasedCollator coll = new RuleBasedCollator(en_us.getRules() + " & ae ; \u00e4 & AE ; \u00c4" + " & oe ; \u00f6 & OE ; \u00d6" + " & ue ; \u00fc & UE ; \u00dc"); String text = "T\u00f6ne"; // o-umlaut CollationElementIterator iter = coll.getCollationElementIterator(text); Vector elements = new Vector(); int elem; // Iterate forward and collect all of the elements into a Vector while ((elem = iter.next()) != iter.NULLORDER) { elements.addElement(new Integer(elem)); } // Now iterate backward and make sure they're the same int index = elements.size() - 1; while ((elem = iter.previous()) != iter.NULLORDER) { int expect = ((Integer)elements.elementAt(index)).intValue(); if (elem != expect) { fail("Mismatch at index " + index + ": got " + Integer.toString(elem,16) + ", expected " + Integer.toString(expect,16)); } index--; } } @Test public void Test4244884() throws ParseException { RuleBasedCollator coll = (RuleBasedCollator)Collator.getInstance(Locale.US); coll = new RuleBasedCollator(coll.getRules() + " & C < ch , cH , Ch , CH < cat < crunchy"); String[] testStrings = new String[] { "car", "cave", "clamp", "cramp", "czar", "church", "catalogue", "crunchy", "dog" }; for (int i = 1; i < testStrings.length; i++) { if (coll.compare(testStrings[i - 1], testStrings[i]) >= 0) { fail("error: \"" + testStrings[i - 1] + "\" is greater than or equal to \"" + testStrings[i] + "\"."); } } } @Test public void Test4179216() throws ParseException { // you can position a CollationElementIterator in the middle of // a contracting character sequence, yielding a bogus collation // element RuleBasedCollator coll = (RuleBasedCollator)Collator.getInstance(Locale.US); coll = new RuleBasedCollator(coll.getRules() + " & C < ch , cH , Ch , CH < cat < crunchy"); String testText = "church church catcatcher runcrunchynchy"; CollationElementIterator iter = coll.getCollationElementIterator( testText); // test that the "ch" combination works properly iter.setOffset(4); int elt4 = CollationElementIterator.primaryOrder(iter.next()); iter.reset(); int elt0 = CollationElementIterator.primaryOrder(iter.next()); iter.setOffset(5); int elt5 = CollationElementIterator.primaryOrder(iter.next()); if (elt4 != elt0 || elt5 != elt0) fail("The collation elements at positions 0 (" + elt0 + "), 4 (" + elt4 + "), and 5 (" + elt5 + ") don't match."); // test that the "cat" combination works properly iter.setOffset(14); int elt14 = CollationElementIterator.primaryOrder(iter.next()); iter.setOffset(15); int elt15 = CollationElementIterator.primaryOrder(iter.next()); iter.setOffset(16); int elt16 = CollationElementIterator.primaryOrder(iter.next()); iter.setOffset(17); int elt17 = CollationElementIterator.primaryOrder(iter.next()); iter.setOffset(18); int elt18 = CollationElementIterator.primaryOrder(iter.next()); iter.setOffset(19); int elt19 = CollationElementIterator.primaryOrder(iter.next()); if (elt14 != elt15 || elt14 != elt16 || elt14 != elt17 || elt14 != elt18 || elt14 != elt19) fail("\"cat\" elements don't match: elt14 = " + elt14 + ", elt15 = " + elt15 + ", elt16 = " + elt16 + ", elt17 = " + elt17 + ", elt18 = " + elt18 + ", elt19 = " + elt19); // now generate a complete list of the collation elements, // first using next() and then using setOffset(), and // make sure both interfaces return the same set of elements iter.reset(); int elt = iter.next(); int count = 0; while (elt != CollationElementIterator.NULLORDER) { ++count; elt = iter.next(); } String[] nextElements = new String[count]; String[] setOffsetElements = new String[count]; int lastPos = 0; iter.reset(); elt = iter.next(); count = 0; while (elt != CollationElementIterator.NULLORDER) { nextElements[count++] = testText.substring(lastPos, iter.getOffset()); lastPos = iter.getOffset(); elt = iter.next(); } count = 0; for (int i = 0; i < testText.length(); ) { iter.setOffset(i); lastPos = iter.getOffset(); elt = iter.next(); setOffsetElements[count++] = testText.substring(lastPos, iter.getOffset()); i = iter.getOffset(); } for (int i = 0; i < nextElements.length; i++) { if (nextElements[i].equals(setOffsetElements[i])) { System.out.println(nextElements[i]); } else { fail("Error: next() yielded " + nextElements[i] + ", but setOffset() yielded " + setOffsetElements[i]); } } } @Test public void Test4216006() throws Exception { // rule parser barfs on "<\u00e0=a\u0300", and on other cases // where the same token (after normalization) appears twice in a row boolean caughtException = false; try { RuleBasedCollator dummy = new RuleBasedCollator("\u00e0= 0) { fail("List out of order at element #" + i + ": " + TestUtils.prettify(sortedList[i]) + " >= " + TestUtils.prettify(sortedList[i + 1])); } } } // CollationElementIterator set doesn't work propertly with next/prev @Test public void Test4663220() { RuleBasedCollator collator = (RuleBasedCollator)Collator.getInstance(Locale.US); CharacterIterator stringIter = new StringCharacterIterator("fox"); CollationElementIterator iter = collator.getCollationElementIterator(stringIter); int[] elements_next = new int[3]; System.out.println("calling next:"); for (int i = 0; i < 3; ++i) { System.out.println("[" + i + "] " + (elements_next[i] = iter.next())); } int[] elements_fwd = new int[3]; System.out.println("calling set/next:"); for (int i = 0; i < 3; ++i) { iter.setOffset(i); System.out.println("[" + i + "] " + (elements_fwd[i] = iter.next())); } for (int i = 0; i < 3; ++i) { if (elements_next[i] != elements_fwd[i]) { fail("mismatch at position " + i + ": " + elements_next[i] + " != " + elements_fwd[i]); } } } //------------------------------------------------------------------------ // Internal utilities // private void compareArray(Collator c, String[] tests) { for (int i = 0; i < tests.length; i += 3) { int expect = 0; if (tests[i+1].equals("<")) { expect = -1; } else if (tests[i+1].equals(">")) { expect = 1; } else if (tests[i+1].equals("=")) { expect = 0; } else { expect = Integer.decode(tests[i+1]).intValue(); } int result = c.compare(tests[i], tests[i+2]); if (sign(result) != sign(expect)) { fail( i/3 + ": compare(" + TestUtils.prettify(tests[i]) + " , " + TestUtils.prettify(tests[i+2]) + ") got " + result + "; expected " + expect); } else { // Collator.compare worked OK; now try the collation keys CollationKey k1 = c.getCollationKey(tests[i]); CollationKey k2 = c.getCollationKey(tests[i+2]); result = k1.compareTo(k2); if (sign(result) != sign(expect)) { fail( i/3 + ": key(" + TestUtils.prettify(tests[i]) + ").compareTo(key(" + TestUtils.prettify(tests[i+2]) + ")) got " + result + "; expected " + expect); fail(" " + TestUtils.prettifyCKey(k1) + " vs. " + TestUtils.prettifyCKey(k2)); } } } } private static final int sign(int i) { if (i < 0) return -1; if (i > 0) return 1; return 0; } static RuleBasedCollator en_us = (RuleBasedCollator)Collator.getInstance(Locale.US); String test1 = "XFILE What subset of all possible test cases has the highest probability of detecting the most errors?"; String test2 = "Xf ile What subset of all possible test cases has the lowest probability of detecting the least errors?"; String test3 = "a\u00FCbeck Gr\u00F6\u00DFe L\u00FCbeck"; }