8329691: Support nonlikelyScript parent locale inheritance

Reviewed-by: joehw
This commit is contained in:
Naoto Sato 2024-05-09 15:54:25 +00:00
parent aa4cddd4b8
commit c7d98df2ac
5 changed files with 255 additions and 75 deletions

View File

@ -87,6 +87,7 @@ public class CLDRConverter {
static final String ZONE_NAME_PREFIX = "timezone.displayname.";
static final String METAZONE_ID_PREFIX = "metazone.id.";
static final String PARENT_LOCALE_PREFIX = "parentLocale.";
static final String LIKELY_SCRIPT_PREFIX = "likelyScript.";
static final String META_EMPTY_ZONE_NAME = "EMPTY_ZONE";
static final String[] EMPTY_ZONE = {"", "", "", "", "", ""};
static final String META_ETCUTC_ZONE_NAME = "ETC_UTC";
@ -114,9 +115,13 @@ public class CLDRConverter {
// "parentLocales" map
private static final Map<String, SortedSet<String>> parentLocalesMap = new HashMap<>();
static boolean nonlikelyScript;
private static final ResourceBundle.Control defCon =
ResourceBundle.Control.getControl(ResourceBundle.Control.FORMAT_DEFAULT);
// "likelyScript" map
private static final Map<String, SortedSet<String>> likelyScriptMap = new HashMap<>();
private static Set<String> AVAILABLE_TZIDS;
static int copyrightYear;
static String jdkHeaderTemplate;
@ -175,7 +180,7 @@ public class CLDRConverter {
private static boolean verbose;
private CLDRConverter() {
// no instantiation
// no instantiation
}
@SuppressWarnings("AssignmentToForLoopParameter")
@ -475,8 +480,8 @@ public class CLDRConverter {
parseLDMLFile(new File(SPPL_SOURCE_FILE), handlerSuppl);
Map<String, Object> parentData = handlerSuppl.getData("root");
parentData.keySet().stream()
.filter(key -> key.startsWith(PARENT_LOCALE_PREFIX))
.forEach(key -> {
.filter(key -> key.startsWith(PARENT_LOCALE_PREFIX))
.forEach(key -> {
parentLocalesMap.put(key, new TreeSet<String>(
Arrays.asList(((String)parentData.get(key)).split(" "))));
});
@ -492,6 +497,16 @@ public class CLDRConverter {
// Parse likelySubtags
handlerLikelySubtags = new LikelySubtagsParseHandler();
parseLDMLFile(new File(LIKELYSUBTAGS_SOURCE_FILE), handlerLikelySubtags);
handlerLikelySubtags.getData().forEach((from, to) -> {
if (!from.contains("-")) { // look for language-only tag
var script = to.split("-")[1];
var key = LIKELY_SCRIPT_PREFIX + script;
var prev = likelyScriptMap.putIfAbsent(key, new TreeSet<String>(Set.of(from)));
if (prev != null) {
prev.add(from);
}
}
});
// Parse supplementalMetadata
// Currently interested in deprecated time zone ids and language aliases.
@ -561,6 +576,7 @@ public class CLDRConverter {
// for now.
if (isBaseModule) {
metaInfo.putAll(parentLocalesMap);
metaInfo.putAll(likelyScriptMap);
}
for (Bundle bundle : bundles) {
@ -1135,7 +1151,7 @@ public class CLDRConverter {
// check irregular parents
for (int i = 0; i < candidates.size(); i++) {
Locale l = candidates.get(i);
Locale p = childToParentLocaleMap.get(l);
Locale p = getParentLocale(l);
if (!l.equals(Locale.ROOT) &&
Objects.nonNull(p) &&
!candidates.get(i+1).equals(p)) {
@ -1152,6 +1168,27 @@ public class CLDRConverter {
return candidates;
}
private static Locale getParentLocale(Locale child) {
Locale parent = childToParentLocaleMap.get(child);
// check non-likely script for root
if (nonlikelyScript && parent == null && child.getCountry().isEmpty()) {
var lang = " " + child.getLanguage() + " ";
var script = child.getScript();
if (!script.isEmpty()) {
parent = likelyScriptMap.entrySet().stream()
.filter(e -> e.getValue().contains(lang))
.findAny()
.map(Map.Entry::getKey)
.map(likely -> likely.equals(script) ? null : Locale.ROOT)
.orElse(null);
}
}
return parent;
}
private static void generateZoneName() throws Exception {
Files.createDirectories(Paths.get(DESTINATION_DIR, "java", "time", "format"));
Files.write(Paths.get(DESTINATION_DIR, "java", "time", "format", "ZoneName.java"),

View File

@ -1,5 +1,5 @@
/*
* Copyright (c) 2012, 2023, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2012, 2024, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@ -38,6 +38,7 @@ import java.util.Locale;
import java.util.Objects;
import java.util.Set;
import java.util.SortedSet;
import java.util.stream.Collectors;
class ResourceBundleGenerator implements BundleGenerator {
// preferred timezones - keeping compatibility with JDK1.1 3 letter abbreviations
@ -306,81 +307,77 @@ class ResourceBundleGenerator implements BundleGenerator {
import sun.util.locale.provider.LocaleProviderAdapter;
public class %s implements LocaleDataMetaInfo {
private static final Map<String, String> resourceNameToLocales = HashMap.newHashMap(%d);
%s
static {
""", CLDRConverter.isBaseModule ? "cldr" : "resources.cldr.provider",
className, metaInfo.keySet().stream().filter(k -> k.equals("AvailableLocales")).count(),
CLDRConverter.isBaseModule ?
"""
""",
CLDRConverter.isBaseModule ? "cldr" : "resources.cldr.provider",
className);
if (CLDRConverter.isBaseModule) {
out.printf("""
private static final Map<Locale, String[]> parentLocalesMap = HashMap.newHashMap(%d);
private static final Map<String, String> languageAliasMap = HashMap.newHashMap(%d);
static final boolean nonlikelyScript = %s; // package access from CLDRLocaleProviderAdapter
static {
""".formatted(
metaInfo.keySet().stream().filter(k -> k.startsWith(CLDRConverter.PARENT_LOCALE_PREFIX)).count(),
CLDRConverter.handlerSupplMeta.getLanguageAliasData().size()) :
"");
CLDRConverter.handlerSupplMeta.getLanguageAliasData().size(),
Boolean.valueOf(CLDRConverter.nonlikelyScript)));
for (String key : metaInfo.keySet()) {
if (key.startsWith(CLDRConverter.PARENT_LOCALE_PREFIX)) {
String parentTag = key.substring(CLDRConverter.PARENT_LOCALE_PREFIX.length());
if ("root".equals(parentTag)) {
out.printf(" parentLocalesMap.put(Locale.ROOT,\n");
} else {
out.printf(" parentLocalesMap.put(Locale.forLanguageTag(\"%s\"),\n",
parentTag);
}
String[] children = toLocaleList(metaInfo.get(key), true).split(" ");
Arrays.sort(children);
out.printf(" new String[] {\n" +
" ");
int count = 0;
for (int i = 0; i < children.length; i++) {
String child = children[i];
out.printf("\"%s\", ", child);
count += child.length() + 4;
if (i != children.length - 1 && count > 64) {
out.printf("\n ");
count = 0;
for (String key : metaInfo.keySet()) {
if (key.startsWith(CLDRConverter.PARENT_LOCALE_PREFIX)) {
String parentTag = key.substring(CLDRConverter.PARENT_LOCALE_PREFIX.length());
if ("root".equals(parentTag)) {
out.printf(" parentLocalesMap.put(Locale.ROOT,\n");
} else {
out.printf(" parentLocalesMap.put(Locale.forLanguageTag(\"%s\"),\n",
parentTag);
}
}
out.printf("\n });\n");
} else {
if ("AvailableLocales".equals(key)) {
out.printf(" resourceNameToLocales.put(\"%s\",\n", key);
out.printf(" \"%s\");\n", toLocaleList(applyLanguageAliases(metaInfo.get(key)), false));
generateStringArray(metaInfo.get(key), out);
}
}
}
// for languageAliasMap
if (CLDRConverter.isBaseModule) {
out.println();
// for languageAliasMap
CLDRConverter.handlerSupplMeta.getLanguageAliasData().forEach((key, value) -> {
out.printf(" languageAliasMap.put(\"%s\", \"%s\");\n", key, value);
});
}
out.printf(" }\n\n");
out.printf(" }\n\n");
// end of static initializer block.
// end of static initializer block.
// Canonical TZ names for delayed initialization
if (CLDRConverter.isBaseModule) {
// Delayed initialization section
out.printf("""
private static class TZCanonicalIDMapHolder {
static final Map<String, String> tzCanonicalIDMap = HashMap.newHashMap(%d);
private static class CLDRMapHolder {
private static final Map<String, String> tzCanonicalIDMap = HashMap.newHashMap(%d);
private static final Map<String, String> likelyScriptMap = HashMap.newHashMap(%d);
static {
""", CLDRConverter.handlerTimeZone.getData().size());
""", CLDRConverter.handlerTimeZone.getData().size(),
metaInfo.keySet().stream().filter(k -> k.startsWith(CLDRConverter.LIKELY_SCRIPT_PREFIX)).count());
CLDRConverter.handlerTimeZone.getData().entrySet().stream()
.forEach(e -> {
String[] ids = ((String)e.getValue()).split("\\s");
out.printf(" tzCanonicalIDMap.put(\"%s\", \"%s\");\n", e.getKey(),
ids[0]);
ids[0]);
for (int i = 1; i < ids.length; i++) {
out.printf(" tzCanonicalIDMap.put(\"%s\", \"%s\");\n", ids[i],
ids[0]);
}
});
out.printf(" }\n }\n\n");
out.println();
// for likelyScript map
for (String key : metaInfo.keySet()) {
if (key.startsWith(CLDRConverter.LIKELY_SCRIPT_PREFIX)) {
// ensure spaces at the begin/end for delimiting purposes
out.printf(" likelyScriptMap.put(\"%s\", \"%s\");\n",
key.substring(CLDRConverter.LIKELY_SCRIPT_PREFIX.length()),
" " + metaInfo.get(key).stream().collect(Collectors.joining(" ")) + " ");
}
}
out.printf(" }\n }\n");
}
out.println();
out.printf("""
@Override
@ -390,12 +387,13 @@ class ResourceBundleGenerator implements BundleGenerator {
@Override
public String availableLanguageTags(String category) {
return resourceNameToLocales.getOrDefault(category, "");
return " %s";
}
%s
}
""",
CLDRConverter.isBaseModule ? """
toLocaleList(applyLanguageAliases(metaInfo.get("AvailableLocales")), false));
if(CLDRConverter.isBaseModule) {
out.printf("""
@Override
public Map<String, String> getLanguageAliasMap() {
@ -404,16 +402,41 @@ class ResourceBundleGenerator implements BundleGenerator {
@Override
public Map<String, String> tzCanonicalIDs() {
return TZCanonicalIDMapHolder.tzCanonicalIDMap;
return CLDRMapHolder.tzCanonicalIDMap;
}
public Map<Locale, String[]> parentLocales() {
return parentLocalesMap;
}
""" : "");
// package access from CLDRLocaleProviderAdapter
Map<String, String> likelyScriptMap() {
return CLDRMapHolder.likelyScriptMap;
}
""");
}
out.printf("}\n");
}
}
private static void generateStringArray(SortedSet<String> set, PrintWriter out) throws IOException {
String[] children = toLocaleList(set, true).split(" ");
Arrays.sort(children);
out.printf(" new String[] {\n" +
" ");
int count = 0;
for (int i = 0; i < children.length; i++) {
String child = children[i];
out.printf("\"%s\", ", child);
count += child.length() + 4;
if (i != children.length - 1 && count > 64) {
out.printf("\n ");
count = 0;
}
}
out.printf("\n });\n");
}
private static final Locale.Builder LOCALE_BUILDER = new Locale.Builder();
private static boolean isBaseLocale(String localeID) {
localeID = localeID.replaceAll("-", "_");
@ -433,7 +456,9 @@ class ResourceBundleGenerator implements BundleGenerator {
if (!all && CLDRConverter.isBaseModule ^ isBaseLocale(id)) {
continue;
}
sb.append(' ');
if (sb.length() > 0) {
sb.append(' ');
}
sb.append(id);
}
}

View File

@ -57,29 +57,22 @@ class SupplementalDataParseHandler extends AbstractLDMLHandler<Object> {
// the weekData is listed using country code.
//
// weekData are generated per each country
private final Map<String, Object> firstDayMap;
private final Map<String, Object> minDaysMap;
private static final Map<String, Object> firstDayMap = new HashMap<>();
private static final Map<String, Object> minDaysMap = new HashMap<>();
// Parent locales. These information will only be
// generated towards the base meta info, with the format of
//
// parentLocale.<parent_locale_id>=<child_locale_id>(" "<child_locale_id>)+
private final Map<String, String> parentLocalesMap;
private static final Map<String, String> parentLocalesMap = new HashMap<>();
// Input Skeleton map for "preferred" and "allowed"
// Map<"preferred"/"allowed", Map<"skeleton", SortedSet<"regions">>>
private final Map<String, Map<String, SortedSet<String>>> inputSkeletonMap;
private static final Map<String, Map<String, SortedSet<String>>> inputSkeletonMap = new HashMap<>();
// "component" specific to this parent locale chain
private String currentParentLocaleComponent;
SupplementalDataParseHandler() {
firstDayMap = new HashMap<>();
minDaysMap = new HashMap<>();
parentLocalesMap = new HashMap<>();
inputSkeletonMap = new HashMap<>();
}
/**
* It returns Map that contains the firstDay and minDays information for
* the country. The Map is created in JRE format after obtaining the data
@ -158,9 +151,15 @@ class SupplementalDataParseHandler extends AbstractLDMLHandler<Object> {
// Ignore component for now, otherwise "zh-Hant" falling back to "zh" would happen
// https://github.com/unicode-org/cldr/pull/2664
if (currentParentLocaleComponent == null) {
var parent = attributes.getValue("parent").replaceAll("_", "-");
parentLocalesMap.put(
attributes.getValue("parent").replaceAll("_", "-"),
parent,
attributes.getValue("locales").replaceAll("_", "-"));
if ("root".equals(parent)) {
CLDRConverter.nonlikelyScript = "nonlikelyScript".equals(attributes.getValue("localeRules"));
}
}
}
break;

View File

@ -1,5 +1,5 @@
/*
* Copyright (c) 2012, 2023, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2012, 2024, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@ -259,6 +259,24 @@ public class CLDRLocaleProviderAdapter extends JRELocaleProviderAdapter {
break;
}
}
if (parent == null) {
// check nonlikelyScript locales
if (CLDRBaseLocaleDataMetaInfo.nonlikelyScript && locale.getCountry().isEmpty()) {
var lang = " " + locale.getLanguage() + " ";
var script= locale.getScript();
if (!script.isEmpty()) {
parent = baseMetaInfo.likelyScriptMap().entrySet().stream()
.filter(e -> e.getValue().contains(lang))
.findAny()
.map(Map.Entry::getKey)
.map(likely -> likely.equals(script) ? null : Locale.ROOT)
.orElse(null);
}
}
}
// no parent found
if (parent == null) {
parent = locale; // non existent marker
}

View File

@ -0,0 +1,101 @@
/*
* Copyright (c) 2024, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License version 2 only, as
* published by the Free Software Foundation.
*
* This code is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
* version 2 for more details (a copy is included in the LICENSE file that
* accompanied this code).
*
* You should have received a copy of the GNU General Public License version
* 2 along with this work; if not, write to the Free Software Foundation,
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
*
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
* or visit www.oracle.com if you need additional information or have any
* questions.
*/
/*
* @test
* @bug 8329691
* @modules java.base/sun.util.locale.provider
* java.base/sun.util.cldr
* @summary Tests CLDR's `nonlikelyScript` attribute is correctly implemented
* with the CLDRLocaleProviderAdapter
* @run junit NonLikelyScriptTest
*/
import java.util.List;
import java.util.Locale;
import java.util.stream.Stream;
import sun.util.cldr.CLDRLocaleProviderAdapter;
import sun.util.locale.provider.LocaleProviderAdapter;
import org.junit.jupiter.api.Test;
import static org.junit.jupiter.api.Assertions.assertEquals;
import org.junit.jupiter.params.ParameterizedTest;
import org.junit.jupiter.params.provider.Arguments;
import org.junit.jupiter.params.provider.MethodSource;
public class NonLikelyScriptTest {
private static final CLDRLocaleProviderAdapter CLDR_LOCALE_PROVIDER_ADAPTER
= (CLDRLocaleProviderAdapter) LocaleProviderAdapter.forType(LocaleProviderAdapter.Type.CLDR);
private static final Locale AZ_ARAB = Locale.forLanguageTag("az-Arab");
private static final Locale AZ_CYRL = Locale.forLanguageTag("az-Cyrl");
private static final Locale AZ_LATN = Locale.forLanguageTag("az-Latn");
private static final Locale AZ_XXXX = Locale.forLanguageTag("az-Xxxx");
private static final Locale RU_LATN = Locale.forLanguageTag("ru-Latn");
private static final Locale RU_CYRL = Locale.forLanguageTag("ru-Cyrl");
private static final Locale RU_XXXX = Locale.forLanguageTag("ru-Xxxx");
private static final Locale EN_LATN = Locale.forLanguageTag("en-Latn");
private static final Locale EN_DSRT = Locale.forLanguageTag("en-Dsrt");
private static final Locale EN_XXXX = Locale.forLanguageTag("en-Xxxx");
private static final Locale ZH_HANT_MO = Locale.forLanguageTag("zh-Hant-MO");
private static final Locale ZH_HANS_SG = Locale.forLanguageTag("zh-Hans-SG");
private static final Locale ZH_HANS = Locale.forLanguageTag("zh-Hans");
private static final Locale ZH_HANT = Locale.forLanguageTag("zh-Hant");
private static final Locale ZH_XXXX = Locale.forLanguageTag("zh-Xxxx");
private static Stream<Arguments> parentLocales() {
return Stream.of(
// likely script
Arguments.of(AZ_LATN, List.of(AZ_LATN, Locale.of("az"), Locale.ROOT)),
Arguments.of(RU_CYRL, List.of(RU_CYRL, Locale.of("ru"), Locale.ROOT)),
Arguments.of(EN_LATN, List.of(EN_LATN, Locale.ENGLISH, Locale.ROOT)),
Arguments.of(ZH_HANS, List.of(ZH_HANS, Locale.CHINA, Locale.CHINESE, Locale.ROOT)),
Arguments.of(Locale.CHINA, List.of(Locale.forLanguageTag("zh-Hans-CN"), ZH_HANS, Locale.CHINA, Locale.CHINESE, Locale.ROOT)),
Arguments.of(ZH_HANS_SG, List.of(ZH_HANS_SG, ZH_HANS, Locale.forLanguageTag("zh-SG"), Locale.CHINESE, Locale.ROOT)),
// non-likely script, explicit (as of CLDR 45)
Arguments.of(AZ_ARAB, List.of(AZ_ARAB, Locale.ROOT)),
Arguments.of(AZ_CYRL, List.of(AZ_CYRL, Locale.ROOT)),
Arguments.of(EN_DSRT, List.of(EN_DSRT, Locale.ROOT)),
Arguments.of(ZH_HANT, List.of(ZH_HANT, Locale.ROOT)),
Arguments.of(Locale.TAIWAN, List.of(Locale.forLanguageTag("zh-Hant-TW"), ZH_HANT, Locale.ROOT)),
Arguments.of(ZH_HANT_MO, List.of(ZH_HANT_MO, Locale.forLanguageTag("zh-Hant-HK"), ZH_HANT, Locale.ROOT)),
// non-likely script, implicit
Arguments.of(AZ_XXXX, List.of(AZ_XXXX, Locale.ROOT)),
Arguments.of(RU_LATN, List.of(RU_LATN, Locale.ROOT)),
Arguments.of(RU_XXXX, List.of(RU_XXXX, Locale.ROOT)),
Arguments.of(EN_XXXX, List.of(EN_XXXX, Locale.ROOT)),
Arguments.of(ZH_XXXX, List.of(ZH_XXXX, Locale.ROOT))
);
}
@ParameterizedTest
@MethodSource("parentLocales")
public void checkParentLocales(Locale locale, List<Locale> expected) {
var actual = CLDR_LOCALE_PROVIDER_ADAPTER.getCandidateLocales("", locale);
assertEquals(expected, actual);
}
}