8307547: Support variant collations
Reviewed-by: srl, jlu, alanb
This commit is contained in:
parent
4e92991809
commit
f9a785e855
@ -41,7 +41,6 @@ package java.text;
|
||||
import java.lang.ref.SoftReference;
|
||||
import java.text.spi.CollatorProvider;
|
||||
import java.util.Locale;
|
||||
import java.util.ResourceBundle;
|
||||
import java.util.concurrent.ConcurrentHashMap;
|
||||
import java.util.concurrent.ConcurrentMap;
|
||||
import sun.util.locale.provider.LocaleProviderAdapter;
|
||||
@ -71,7 +70,6 @@ import sun.util.locale.provider.LocaleServiceProviderPool;
|
||||
* <p>
|
||||
* The following example shows how to compare two strings using
|
||||
* the {@code Collator} for the default locale.
|
||||
* <blockquote>
|
||||
* {@snippet lang=java :
|
||||
* // Compare two strings in the default locale
|
||||
* Collator myCollator = Collator.getInstance();
|
||||
@ -81,7 +79,6 @@ import sun.util.locale.provider.LocaleServiceProviderPool;
|
||||
* System.out.println("abc is greater than or equal to ABC");
|
||||
* }
|
||||
* }
|
||||
* </blockquote>
|
||||
*
|
||||
* <p>
|
||||
* You can set a {@code Collator}'s <em>strength</em> property
|
||||
@ -94,7 +91,6 @@ import sun.util.locale.provider.LocaleServiceProviderPool;
|
||||
* "e" and "E" are tertiary differences and "e" and "e" are identical.
|
||||
* The following shows how both case and accents could be ignored for
|
||||
* US English.
|
||||
* <blockquote>
|
||||
* {@snippet lang=java :
|
||||
* // Get the Collator for US English and set its strength to PRIMARY
|
||||
* Collator usCollator = Collator.getInstance(Locale.US);
|
||||
@ -103,7 +99,6 @@ import sun.util.locale.provider.LocaleServiceProviderPool;
|
||||
* System.out.println("Strings are equivalent");
|
||||
* }
|
||||
* }
|
||||
* </blockquote>
|
||||
* <p>
|
||||
* For comparing {@code String}s exactly once, the {@code compare}
|
||||
* method provides the best performance. When sorting a list of
|
||||
@ -114,7 +109,7 @@ import sun.util.locale.provider.LocaleServiceProviderPool;
|
||||
* against other {@code CollationKey}s. A {@code CollationKey} is
|
||||
* created by a {@code Collator} object for a given {@code String}.
|
||||
* <br>
|
||||
* <strong>Note:</strong> {@code CollationKey}s from different
|
||||
* @apiNote {@code CollationKey}s from different
|
||||
* {@code Collator}s can not be compared. See the class description
|
||||
* for {@link CollationKey}
|
||||
* for an example using {@code CollationKey}s.
|
||||
@ -222,7 +217,7 @@ public abstract class Collator
|
||||
|
||||
/**
|
||||
* Gets the Collator for the current default locale.
|
||||
* The default locale is determined by java.util.Locale.getDefault.
|
||||
* The default locale is determined by {@link Locale#getDefault()}.
|
||||
* @return the Collator for the default locale.(for example, en_US)
|
||||
* @see java.util.Locale#getDefault
|
||||
*/
|
||||
@ -232,6 +227,19 @@ public abstract class Collator
|
||||
|
||||
/**
|
||||
* Gets the Collator for the desired locale.
|
||||
* @apiNote Implementations of {@code Collator} class may produce
|
||||
* different instances based on the "{@code co}"
|
||||
* <a href="https://www.unicode.org/reports/tr35/#UnicodeCollationIdentifier">
|
||||
* Unicode collation identifier</a> in the {@code desiredLocale}.
|
||||
* For example:
|
||||
* {@snippet lang = java:
|
||||
* Collator.getInstance(Locale.forLanguageTag("sv-u-co-trad"));
|
||||
* }
|
||||
* may return a {@code Collator} instance with the Swedish traditional sorting, which
|
||||
* gives 'v' and 'w' the same sorting order, while the {@code Collator} instance
|
||||
* for the Swedish locale without "co" identifier distinguishes 'v' and 'w'.
|
||||
* @spec https://www.unicode.org/reports/tr35/ Unicode Locale Data Markup Language
|
||||
* (LDML)
|
||||
* @param desiredLocale the desired locale.
|
||||
* @return the Collator for the desired locale.
|
||||
* @see java.util.Locale
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) 2012, 2022, Oracle and/or its affiliates. All rights reserved.
|
||||
* Copyright (c) 2012, 2023, Oracle and/or its affiliates. All rights reserved.
|
||||
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
||||
*
|
||||
* This code is free software; you can redistribute it and/or modify it
|
||||
@ -86,7 +86,7 @@ public class LocaleResources {
|
||||
// cache key prefixes
|
||||
private static final String BREAK_ITERATOR_INFO = "BII.";
|
||||
private static final String CALENDAR_DATA = "CALD.";
|
||||
private static final String COLLATION_DATA_CACHEKEY = "COLD";
|
||||
private static final String COLLATION_DATA = "COLD.";
|
||||
private static final String DECIMAL_FORMAT_SYMBOLS_DATA_CACHEKEY = "DFSD";
|
||||
private static final String CURRENCY_NAMES = "CN.";
|
||||
private static final String LOCALE_NAMES = "LN.";
|
||||
@ -186,17 +186,25 @@ public class LocaleResources {
|
||||
|
||||
public String getCollationData() {
|
||||
String key = "Rule";
|
||||
String cacheKey = COLLATION_DATA;
|
||||
String coldata = "";
|
||||
|
||||
try {
|
||||
var type = locale.getUnicodeLocaleType("co");
|
||||
if (type != null && !type.isEmpty() && !type.equalsIgnoreCase("standard")) {
|
||||
key += "." + type;
|
||||
cacheKey += type;
|
||||
}
|
||||
} catch (IllegalArgumentException ignore) {}
|
||||
|
||||
removeEmptyReferences();
|
||||
ResourceReference data = cache.get(COLLATION_DATA_CACHEKEY);
|
||||
ResourceReference data = cache.get(cacheKey);
|
||||
if (data == null || ((coldata = (String) data.get()) == null)) {
|
||||
ResourceBundle rb = localeData.getCollationData(locale);
|
||||
if (rb.containsKey(key)) {
|
||||
coldata = rb.getString(key);
|
||||
}
|
||||
cache.put(COLLATION_DATA_CACHEKEY,
|
||||
new ResourceReference(COLLATION_DATA_CACHEKEY, coldata, referenceQueue));
|
||||
cache.put(cacheKey, new ResourceReference(cacheKey, coldata, referenceQueue));
|
||||
}
|
||||
|
||||
return coldata;
|
||||
|
@ -54,6 +54,16 @@ public class CollationData_sv extends ListResourceBundle {
|
||||
"< o\u030b , O\u030b ; \u00f8 , \u00d8 " + // o-double-acute < o-stroke
|
||||
"& Y, u\u0308 , U\u0308" + // u-double-acute
|
||||
"; u\u030b, U\u030b "
|
||||
},
|
||||
{"Rule.trad",
|
||||
"& Z < a\u030a , A\u030a" + // a-ring, aa ligaure
|
||||
"< a\u0308 , A\u0308 < a\u030b, A\u030b " + // a-umlaut, a-double-acute
|
||||
"< \u00e6 , \u00c6 " + // ae ligature
|
||||
"< o\u0308 , O\u0308 " + // o-umlaut
|
||||
"< o\u030b , O\u030b ; \u00f8 , \u00d8 " + // o-double-acute < o-stroke
|
||||
"& V ; w , W" +
|
||||
"& Y, u\u0308 , U\u0308" + // u-double-acute
|
||||
"; u\u030b, U\u030b "
|
||||
}
|
||||
};
|
||||
}
|
||||
|
@ -23,23 +23,50 @@
|
||||
|
||||
/*
|
||||
* @test
|
||||
* @bug 8306927
|
||||
* @bug 8306927 8307547
|
||||
* @modules jdk.localedata
|
||||
* @summary Tests Swedish collation involving 'v' and 'w'.
|
||||
* @run junit SwedishTest
|
||||
*/
|
||||
|
||||
import java.text.Collator;
|
||||
import java.util.Arrays;
|
||||
import java.util.Locale;
|
||||
import java.util.stream.Stream;
|
||||
|
||||
import org.junit.jupiter.api.Test;
|
||||
import org.junit.jupiter.params.ParameterizedTest;
|
||||
import org.junit.jupiter.params.provider.Arguments;
|
||||
import org.junit.jupiter.params.provider.MethodSource;
|
||||
import static org.junit.jupiter.api.Assertions.assertArrayEquals;
|
||||
|
||||
public class SwedishTest {
|
||||
private static final String[] src = {"wb", "va", "vc"};
|
||||
private static final String[] expected = {"va", "vc", "wb"};
|
||||
private static final String[] standard = {"va", "vc", "wb"};
|
||||
private static final String[] traditional = {"va", "wb", "vc"};
|
||||
|
||||
public static void main (String[] args) {
|
||||
Arrays.sort(src, Collator.getInstance(Locale.of("sv")));
|
||||
if (!Arrays.equals(src, expected)) {
|
||||
throw new RuntimeException("Swedish collation failed");
|
||||
@ParameterizedTest
|
||||
@MethodSource("swedishData")
|
||||
public void testSwedishCollation(Locale l, String[] expected) {
|
||||
Arrays.sort(src, Collator.getInstance(l));
|
||||
assertArrayEquals(expected, src);
|
||||
}
|
||||
|
||||
private static Stream<Arguments> swedishData() {
|
||||
return Stream.of(
|
||||
Arguments.of(Locale.forLanguageTag("sv"), standard),
|
||||
Arguments.of(Locale.forLanguageTag("sv-u-co-standard"), standard),
|
||||
Arguments.of(Locale.forLanguageTag("sv-u-co-STANDARD"), standard),
|
||||
Arguments.of(Locale.forLanguageTag("sv-u-co-traditio"), standard),
|
||||
Arguments.of(Locale.forLanguageTag("sv-u-co-TRADITIO"), standard),
|
||||
Arguments.of(Locale.forLanguageTag("sv-u-co-traditional"), standard),
|
||||
Arguments.of(Locale.forLanguageTag("sv-u-co-TRADITIONAL"), standard),
|
||||
// the new standard used to be called "reformed"
|
||||
Arguments.of(Locale.forLanguageTag("sv-u-co-reformed"), standard),
|
||||
Arguments.of(Locale.forLanguageTag("sv-u-co-REFORMED"), standard),
|
||||
|
||||
Arguments.of(Locale.forLanguageTag("sv-u-co-trad"), traditional),
|
||||
Arguments.of(Locale.forLanguageTag("sv-u-co-TRAD"), traditional)
|
||||
);
|
||||
}
|
||||
}
|
||||
|
Loading…
x
Reference in New Issue
Block a user