8307547: Support variant collations

Reviewed-by: srl, jlu, alanb
This commit is contained in:
Naoto Sato 2023-05-16 18:00:45 +00:00
parent 4e92991809
commit f9a785e855
4 changed files with 72 additions and 19 deletions

View File

@ -41,7 +41,6 @@ package java.text;
import java.lang.ref.SoftReference;
import java.text.spi.CollatorProvider;
import java.util.Locale;
import java.util.ResourceBundle;
import java.util.concurrent.ConcurrentHashMap;
import java.util.concurrent.ConcurrentMap;
import sun.util.locale.provider.LocaleProviderAdapter;
@ -71,7 +70,6 @@ import sun.util.locale.provider.LocaleServiceProviderPool;
* <p>
* The following example shows how to compare two strings using
* the {@code Collator} for the default locale.
* <blockquote>
* {@snippet lang=java :
* // Compare two strings in the default locale
* Collator myCollator = Collator.getInstance();
@ -81,7 +79,6 @@ import sun.util.locale.provider.LocaleServiceProviderPool;
* System.out.println("abc is greater than or equal to ABC");
* }
* }
* </blockquote>
*
* <p>
* You can set a {@code Collator}'s <em>strength</em> property
@ -94,7 +91,6 @@ import sun.util.locale.provider.LocaleServiceProviderPool;
* "e" and "E" are tertiary differences and "e" and "e" are identical.
* The following shows how both case and accents could be ignored for
* US English.
* <blockquote>
* {@snippet lang=java :
* // Get the Collator for US English and set its strength to PRIMARY
* Collator usCollator = Collator.getInstance(Locale.US);
@ -103,7 +99,6 @@ import sun.util.locale.provider.LocaleServiceProviderPool;
* System.out.println("Strings are equivalent");
* }
* }
* </blockquote>
* <p>
* For comparing {@code String}s exactly once, the {@code compare}
* method provides the best performance. When sorting a list of
@ -114,7 +109,7 @@ import sun.util.locale.provider.LocaleServiceProviderPool;
* against other {@code CollationKey}s. A {@code CollationKey} is
* created by a {@code Collator} object for a given {@code String}.
* <br>
* <strong>Note:</strong> {@code CollationKey}s from different
* @apiNote {@code CollationKey}s from different
* {@code Collator}s can not be compared. See the class description
* for {@link CollationKey}
* for an example using {@code CollationKey}s.
@ -222,7 +217,7 @@ public abstract class Collator
/**
* Gets the Collator for the current default locale.
* The default locale is determined by java.util.Locale.getDefault.
* The default locale is determined by {@link Locale#getDefault()}.
* @return the Collator for the default locale.(for example, en_US)
* @see java.util.Locale#getDefault
*/
@ -232,6 +227,19 @@ public abstract class Collator
/**
* Gets the Collator for the desired locale.
* @apiNote Implementations of {@code Collator} class may produce
* different instances based on the "{@code co}"
* <a href="https://www.unicode.org/reports/tr35/#UnicodeCollationIdentifier">
* Unicode collation identifier</a> in the {@code desiredLocale}.
* For example:
* {@snippet lang = java:
* Collator.getInstance(Locale.forLanguageTag("sv-u-co-trad"));
* }
* may return a {@code Collator} instance with the Swedish traditional sorting, which
* gives 'v' and 'w' the same sorting order, while the {@code Collator} instance
* for the Swedish locale without "co" identifier distinguishes 'v' and 'w'.
* @spec https://www.unicode.org/reports/tr35/ Unicode Locale Data Markup Language
* (LDML)
* @param desiredLocale the desired locale.
* @return the Collator for the desired locale.
* @see java.util.Locale

View File

@ -1,5 +1,5 @@
/*
* Copyright (c) 2012, 2022, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2012, 2023, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@ -86,7 +86,7 @@ public class LocaleResources {
// cache key prefixes
private static final String BREAK_ITERATOR_INFO = "BII.";
private static final String CALENDAR_DATA = "CALD.";
private static final String COLLATION_DATA_CACHEKEY = "COLD";
private static final String COLLATION_DATA = "COLD.";
private static final String DECIMAL_FORMAT_SYMBOLS_DATA_CACHEKEY = "DFSD";
private static final String CURRENCY_NAMES = "CN.";
private static final String LOCALE_NAMES = "LN.";
@ -186,17 +186,25 @@ public class LocaleResources {
public String getCollationData() {
String key = "Rule";
String cacheKey = COLLATION_DATA;
String coldata = "";
try {
var type = locale.getUnicodeLocaleType("co");
if (type != null && !type.isEmpty() && !type.equalsIgnoreCase("standard")) {
key += "." + type;
cacheKey += type;
}
} catch (IllegalArgumentException ignore) {}
removeEmptyReferences();
ResourceReference data = cache.get(COLLATION_DATA_CACHEKEY);
ResourceReference data = cache.get(cacheKey);
if (data == null || ((coldata = (String) data.get()) == null)) {
ResourceBundle rb = localeData.getCollationData(locale);
if (rb.containsKey(key)) {
coldata = rb.getString(key);
}
cache.put(COLLATION_DATA_CACHEKEY,
new ResourceReference(COLLATION_DATA_CACHEKEY, coldata, referenceQueue));
cache.put(cacheKey, new ResourceReference(cacheKey, coldata, referenceQueue));
}
return coldata;

View File

@ -54,6 +54,16 @@ public class CollationData_sv extends ListResourceBundle {
"< o\u030b , O\u030b ; \u00f8 , \u00d8 " + // o-double-acute < o-stroke
"& Y, u\u0308 , U\u0308" + // u-double-acute
"; u\u030b, U\u030b "
},
{"Rule.trad",
"& Z < a\u030a , A\u030a" + // a-ring, aa ligaure
"< a\u0308 , A\u0308 < a\u030b, A\u030b " + // a-umlaut, a-double-acute
"< \u00e6 , \u00c6 " + // ae ligature
"< o\u0308 , O\u0308 " + // o-umlaut
"< o\u030b , O\u030b ; \u00f8 , \u00d8 " + // o-double-acute < o-stroke
"& V ; w , W" +
"& Y, u\u0308 , U\u0308" + // u-double-acute
"; u\u030b, U\u030b "
}
};
}

View File

@ -23,23 +23,50 @@
/*
* @test
* @bug 8306927
* @bug 8306927 8307547
* @modules jdk.localedata
* @summary Tests Swedish collation involving 'v' and 'w'.
* @run junit SwedishTest
*/
import java.text.Collator;
import java.util.Arrays;
import java.util.Locale;
import java.util.stream.Stream;
import org.junit.jupiter.api.Test;
import org.junit.jupiter.params.ParameterizedTest;
import org.junit.jupiter.params.provider.Arguments;
import org.junit.jupiter.params.provider.MethodSource;
import static org.junit.jupiter.api.Assertions.assertArrayEquals;
public class SwedishTest {
private static final String[] src = {"wb", "va", "vc"};
private static final String[] expected = {"va", "vc", "wb"};
private static final String[] standard = {"va", "vc", "wb"};
private static final String[] traditional = {"va", "wb", "vc"};
public static void main (String[] args) {
Arrays.sort(src, Collator.getInstance(Locale.of("sv")));
if (!Arrays.equals(src, expected)) {
throw new RuntimeException("Swedish collation failed");
}
@ParameterizedTest
@MethodSource("swedishData")
public void testSwedishCollation(Locale l, String[] expected) {
Arrays.sort(src, Collator.getInstance(l));
assertArrayEquals(expected, src);
}
private static Stream<Arguments> swedishData() {
return Stream.of(
Arguments.of(Locale.forLanguageTag("sv"), standard),
Arguments.of(Locale.forLanguageTag("sv-u-co-standard"), standard),
Arguments.of(Locale.forLanguageTag("sv-u-co-STANDARD"), standard),
Arguments.of(Locale.forLanguageTag("sv-u-co-traditio"), standard),
Arguments.of(Locale.forLanguageTag("sv-u-co-TRADITIO"), standard),
Arguments.of(Locale.forLanguageTag("sv-u-co-traditional"), standard),
Arguments.of(Locale.forLanguageTag("sv-u-co-TRADITIONAL"), standard),
// the new standard used to be called "reformed"
Arguments.of(Locale.forLanguageTag("sv-u-co-reformed"), standard),
Arguments.of(Locale.forLanguageTag("sv-u-co-REFORMED"), standard),
Arguments.of(Locale.forLanguageTag("sv-u-co-trad"), traditional),
Arguments.of(Locale.forLanguageTag("sv-u-co-TRAD"), traditional)
);
}
}