8179071: Month value is inconsistent between CLDR and Java in some locales

Handled Language aliases from CLDR SupplementalMetaData

Reviewed-by: naoto
This commit is contained in:
Rachna Goel 2018-04-30 11:59:42 +05:30
parent 9037ee0ef1
commit a01b2f3b73
9 changed files with 185 additions and 13 deletions
make/jdk/src/classes/build/tools/cldrconverter
src/java.base/share/classes/sun/util
test/jdk
java/util/Locale
sun/text/resources
tools/jlink/plugins

@ -90,8 +90,8 @@ public class CLDRConverter {
static final String[] EMPTY_ZONE = {"", "", "", "", "", ""};
private static SupplementDataParseHandler handlerSuppl;
private static SupplementalMetadataParseHandler handlerSupplMeta;
private static LikelySubtagsParseHandler handlerLikelySubtags;
static SupplementalMetadataParseHandler handlerSupplMeta;
static NumberingSystemsParseHandler handlerNumbering;
static MetaZonesParseHandler handlerMetaZones;
static TimeZoneParseHandler handlerTimeZone;
@ -428,7 +428,7 @@ public class CLDRConverter {
parseLDMLFile(new File(LIKELYSUBTAGS_SOURCE_FILE), handlerLikelySubtags);
// Parse supplementalMetadata
// Currently only interested in deprecated time zone ids.
// Currently interested in deprecated time zone ids and language aliases.
handlerSupplMeta = new SupplementalMetadataParseHandler();
parseLDMLFile(new File(SPPL_META_SOURCE_FILE), handlerSupplMeta);
}

@ -270,7 +270,8 @@ class ResourceBundleGenerator implements BundleGenerator {
out.printf("public class %s implements LocaleDataMetaInfo {\n", className);
out.printf(" private static final Map<String, String> resourceNameToLocales = new HashMap<>();\n" +
(CLDRConverter.isBaseModule ?
" private static final Map<Locale, String[]> parentLocalesMap = new HashMap<>();\n\n" :
" private static final Map<Locale, String[]> parentLocalesMap = new HashMap<>();\n" +
" private static final Map<String, String> languageAliasMap = new HashMap<>();\n\n" :
"\n") +
" static {\n");
@ -301,10 +302,16 @@ class ResourceBundleGenerator implements BundleGenerator {
} else {
if ("AvailableLocales".equals(key)) {
out.printf(" resourceNameToLocales.put(\"%s\",\n", key);
out.printf(" \"%s\");\n", toLocaleList(metaInfo.get(key), false));
out.printf(" \"%s\");\n", toLocaleList(applyLanguageAliases(metaInfo.get(key)), false));
}
}
}
// for languageAliasMap
if (CLDRConverter.isBaseModule) {
CLDRConverter.handlerSupplMeta.getLanguageAliasData().forEach((key, value) -> {
out.printf(" languageAliasMap.put(\"%s\", \"%s\");\n", key, value);
});
}
out.printf(" }\n\n");
@ -339,6 +346,10 @@ class ResourceBundleGenerator implements BundleGenerator {
" }\n\n");
if (CLDRConverter.isBaseModule) {
out.printf(" @Override\n" +
" public Map<String, String> getLanguageAliasMap() {\n" +
" return languageAliasMap;\n" +
" }\n\n");
out.printf(" @Override\n" +
" public Map<String, String> tzCanonicalIDs() {\n" +
" return TZCanonicalIDMapHolder.tzCanonicalIDMap;\n" +
@ -377,4 +388,13 @@ class ResourceBundleGenerator implements BundleGenerator {
}
return sb.toString();
}
private static SortedSet<String> applyLanguageAliases(SortedSet<String> tags) {
CLDRConverter.handlerSupplMeta.getLanguageAliasData().forEach((key, value) -> {
if (tags.remove(key)) {
tags.add(value);
}
});
return tags;
}
}

@ -27,6 +27,8 @@ package build.tools.cldrconverter;
import java.io.File;
import java.io.IOException;
import java.util.HashMap;
import java.util.Map;
import java.util.stream.Stream;
import org.xml.sax.Attributes;
import org.xml.sax.InputSource;
@ -38,6 +40,12 @@ import org.xml.sax.SAXException;
*/
class SupplementalMetadataParseHandler extends AbstractLDMLHandler<Object> {
private final Map<String, String> languageAliasMap;
SupplementalMetadataParseHandler() {
languageAliasMap = new HashMap<>();
}
@Override
public InputSource resolveEntity(String publicID, String systemID) throws IOException, SAXException {
// avoid HTTP traffic to unicode.org
@ -57,6 +65,17 @@ class SupplementalMetadataParseHandler extends AbstractLDMLHandler<Object> {
}
pushIgnoredContainer(qName);
break;
case "languageAlias":
String aliasReason = attributes.getValue("reason");
if ("deprecated".equals(aliasReason) || "legacy".equals(aliasReason)) {
String tag = attributes.getValue("type");
if (!checkLegacyLocales(tag)) {
languageAliasMap.put(tag.replaceAll("_", "-"),
attributes.getValue("replacement").replaceAll("_", "-"));
}
}
pushIgnoredContainer(qName);
break;
default:
// treat anything else as a container
pushContainer(qName, attributes);
@ -69,4 +88,13 @@ class SupplementalMetadataParseHandler extends AbstractLDMLHandler<Object> {
.map(k -> String.format(" \"%s\", \"%s\",", k, get(k)))
.sorted();
}
Map<String, String> getLanguageAliasData() {
return languageAliasMap;
}
// skip language aliases for JDK legacy locales for ISO compatibility
private boolean checkLegacyLocales(String tag) {
return (tag.startsWith("no") || tag.startsWith("in")
|| tag.startsWith("iw") || tag.startsWith("ji"));
}
}

@ -64,8 +64,14 @@ public class CLDRLocaleProviderAdapter extends JRELocaleProviderAdapter {
// parent locales map
private static volatile Map<Locale, Locale> parentLocalesMap;
// language aliases map
private static volatile Map<String,String> langAliasesMap;
// cache to hold locale to locale mapping for language aliases.
private static final Map<Locale, Locale> langAliasesCache;
static {
parentLocalesMap = new ConcurrentHashMap<>();
langAliasesMap = new ConcurrentHashMap<>();
langAliasesCache = new ConcurrentHashMap<>();
// Assuming these locales do NOT have irregular parent locales.
parentLocalesMap.put(Locale.ROOT, Locale.ROOT);
parentLocalesMap.put(Locale.ENGLISH, Locale.ENGLISH);
@ -160,6 +166,22 @@ public class CLDRLocaleProviderAdapter extends JRELocaleProviderAdapter {
return locs;
}
private Locale applyAliases(Locale loc) {
if (langAliasesMap.isEmpty()) {
langAliasesMap = baseMetaInfo.getLanguageAliasMap();
}
Locale locale = langAliasesCache.get(loc);
if (locale == null) {
String locTag = loc.toLanguageTag();
Locale aliasLocale = langAliasesMap.containsKey(locTag)
? Locale.forLanguageTag(langAliasesMap.get(locTag)) : loc;
langAliasesCache.putIfAbsent(loc, aliasLocale);
return aliasLocale;
} else {
return locale;
}
}
@Override
protected Set<String> createLanguageTagSet(String category) {
// Assume all categories support the same set as AvailableLocales
@ -194,7 +216,7 @@ public class CLDRLocaleProviderAdapter extends JRELocaleProviderAdapter {
// Implementation of ResourceBundleBasedAdapter
@Override
public List<Locale> getCandidateLocales(String baseName, Locale locale) {
List<Locale> candidates = super.getCandidateLocales(baseName, locale);
List<Locale> candidates = super.getCandidateLocales(baseName, applyAliases(locale));
return applyParentLocales(baseName, candidates);
}

@ -1,5 +1,5 @@
/*
* Copyright (c) 2014, 2017, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2014, 2018, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@ -58,4 +58,13 @@ public interface LocaleDataMetaInfo {
default public Map<String, String> tzCanonicalIDs() {
return null;
}
/**
* Returns a map for language aliases which specifies mapping from source language
* to from which it should be replaced.
* @return map of source language to replacement language, separated by a space.
*/
default public Map<String, String> getLanguageAliasMap(){
return null;
}
}

@ -0,0 +1,93 @@
/*
* Copyright (c) 2018, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License version 2 only, as
* published by the Free Software Foundation. Oracle designates this
* particular file as subject to the "Classpath" exception as provided
* by Oracle in the LICENSE file that accompanied this code.
*
* This code is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
* version 2 for more details (a copy is included in the LICENSE file that
* accompanied this code).
*
* You should have received a copy of the GNU General Public License version
* 2 along with this work; if not, write to the Free Software Foundation,
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
*
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
* or visit www.oracle.com if you need additional information or have any
* questions.
*/
/*
* @test
* @bug 8179071
* @summary Test that language aliases of CLDR supplemental metadata are handled correctly.
* @modules jdk.localedata
* @run main/othervm -Djava.locale.providers=CLDR Bug8179071
*/
/**
* This fix is dependent on a particular version of CLDR data.
*/
import java.time.Month;
import java.time.format.TextStyle;
import java.util.Arrays;
import java.util.HashSet;
import java.util.Locale;
import java.util.Map;
import java.util.Set;
public class Bug8179071 {
// Deprecated and Legacy tags.
private static final Set<String> LegacyAliases = Set.of("pa-PK", "ug-Arab-CN", "kk-Cyrl-KZ",
"bs-BA", "ks-Arab-IN", "mn-Cyrl-MN", "ha-Latn-NE",
"shi-MA", "ha-Latn-NG", "ms-Latn-BN","ms-Latn-SG",
"ky-Cyrl-KG", "az-AZ", "zh-guoyu", "zh-min-nan", "i-klingon", "i-tsu",
"sr-XK", "sgn-CH-DE", "mo", "i-tay", "scc", "uz-UZ", "uz-AF", "sr-RS",
"i-hak", "sgn-BE-FR", "i-lux", "vai-LR", "tl", "zh-hakka", "i-ami", "aa-SAAHO", "ha-Latn-GH",
"zh-xiang", "i-pwn", "sgn-BE-NL", "jw", "sh", "tzm-Latn-MA", "i-bnn");
// expected month format data for locales after language aliases replacement.
private static Map<String, String> shortJanuaryNames = Map.of( "pa-PK", "\u062c\u0646\u0648\u0631\u06cc",
"uz-AF" , "\u062c\u0646\u0648",
"sr-ME", "jan",
"scc", "\u0458\u0430\u043d",
"sh", "jan",
"ha-Latn-NE", "Jan",
"i-lux", "Jan.");
private static void test(String tag, String expected) {
Locale target = Locale.forLanguageTag(tag);
Month day = Month.JANUARY;
TextStyle style = TextStyle.SHORT;
String actual = day.getDisplayName(style, target);
if (!actual.equals(expected)) {
throw new RuntimeException("failed for locale " + tag + " actual output " + actual +" does not match with " + expected);
}
}
/**
* getAvailableLocales() should not contain any deprecated or Legacy language tags
*/
private static void checkInvalidTags() {
Set<String> invalidTags = new HashSet<>();
Arrays.asList(Locale.getAvailableLocales()).stream()
.map(loc -> loc.toLanguageTag())
.forEach( tag -> {if(LegacyAliases.contains(tag)) {invalidTags.add(tag);}});
if (!invalidTags.isEmpty()) {
throw new RuntimeException("failed: Deprecated and Legacy tags found " + invalidTags + " in AvailableLocales ");
}
}
public static void main(String[] args) {
shortJanuaryNames.forEach((key, value) -> test(key, value));
checkInvalidTags();
}
}

@ -5419,10 +5419,10 @@ FormatData/sr_BA/MonthNames/5=\u0458\u0443\u043d
FormatData/sr_BA/MonthNames/6=\u0458\u0443\u043b
FormatData/sr_BA/DayNames/3=\u0441\u0440\u0435\u0434\u0430
FormatData/sr_BA/DayAbbreviations/3=\u0441\u0440\u0435
FormatData/sr_BA/TimePatterns/0=HH.mm.ss zzzz
FormatData/sr_BA/TimePatterns/1=HH.mm.ss z
FormatData/sr_BA/TimePatterns/2=HH.mm.ss
FormatData/sr_BA/TimePatterns/3=HH.mm
FormatData/sr_BA/TimePatterns/0=HH:mm:ss zzzz
FormatData/sr_BA/TimePatterns/1=HH:mm:ss z
FormatData/sr_BA/TimePatterns/2=HH:mm:ss
FormatData/sr_BA/TimePatterns/3=HH:mm
FormatData/sr_BA/DatePatterns/0=EEEE, dd. MMMM y.
FormatData/sr_BA/DatePatterns/1=dd. MMMM y.
FormatData/sr_BA/DatePatterns/2=dd.MM.y.

@ -38,7 +38,7 @@
* 7114053 7074882 7040556 8008577 8013836 8021121 6192407 6931564 8027695
* 8017142 8037343 8055222 8042126 8074791 8075173 8080774 8129361 8134916
* 8145136 8145952 8164784 8037111 8081643 7037368 8178872 8185841 8190918
* 8187946 8195478 8181157
* 8187946 8195478 8181157 8179071
* @summary Verify locale data
* @modules java.base/sun.util.resources
* @modules jdk.localedata

@ -40,7 +40,7 @@ import tests.Result;
/*
* @test
* @bug 8152143 8152704 8155649 8165804 8185841 8176841 8190918
* @bug 8152143 8152704 8155649 8165804 8185841 8176841 8190918 8179071
* @summary IncludeLocalesPlugin tests
* @author Naoto Sato
* @requires (vm.compMode != "Xcomp" & os.maxMemory >= 2g)
@ -256,7 +256,7 @@ public class IncludeLocalesPluginTest {
"(root)", "as_IN", "as", "bn_IN", "bn", "bo_IN", "bo", "brx_IN", "brx",
"en", "en_001", "en_IN", "en_US", "en_US_POSIX", "gu_IN", "gu", "hi_IN",
"hi", "kn_IN", "kn", "kok_IN", "kok", "ks_IN", "ks", "ml_IN", "ml",
"mr_IN", "mr", "ne_IN", "ne", "or_IN", "or", "pa_IN", "pa", "pa_IN_#Guru",
"mr_IN", "mr", "ne_IN", "ne", "or_IN", "or", "pa", "pa_IN_#Guru",
"pa__#Guru", "ta_IN", "ta", "te_IN", "te", "ur_IN", "ur"),
"",
},