8317979: Use TZ database style abbreviations in the CLDR locale provider

Reviewed-by: rriggs, joehw, lancea, erikj, iris, ihse
This commit is contained in:
Naoto Sato 2023-10-18 18:26:49 +00:00
parent ab135683a6
commit ce8ebebc77
6 changed files with 160 additions and 23 deletions

View File

@ -30,7 +30,6 @@ import java.io.File;
import java.io.IOException;
import java.io.UncheckedIOException;
import java.nio.file.*;
import java.text.MessageFormat;
import java.time.*;
import java.util.*;
import java.util.ResourceBundle.Control;
@ -92,6 +91,12 @@ public class CLDRConverter {
static final String[] EMPTY_ZONE = {"", "", "", "", "", ""};
static final String META_ETCUTC_ZONE_NAME = "ETC_UTC";
// constants used for TZDB short names
private static final String NBSP = "\u00A0";
private static final String STD = "std";
private static final String DST = "dst";
private static final String NO_SUBST = "-";
private static SupplementDataParseHandler handlerSuppl;
private static LikelySubtagsParseHandler handlerLikelySubtags;
private static WinZonesParseHandler handlerWinZones;
@ -123,6 +128,10 @@ public class CLDRConverter {
static Map<String, String> pluralRules;
static Map<String, String> dayPeriodRules;
// TZDB Short Names Map
private static final Map<String, String> tzdbShortNamesMap = HashMap.newHashMap(512);
private static final Map<String, String> tzdbSubstLetters = HashMap.newHashMap(512);
static enum DraftType {
UNCONFIRMED,
PROVISIONAL,
@ -284,6 +293,9 @@ public class CLDRConverter {
pluralRules = generateRules(handlerPlurals);
dayPeriodRules = generateRules(handlerDayPeriodRule);
// TZDB short names map
generateTZDBShortNamesMap();
List<Bundle> bundles = readBundleList();
convertBundles(bundles);
@ -757,21 +769,25 @@ public class CLDRConverter {
.orElse(tzid);
Object data = map.get(TIMEZONE_ID_PREFIX + tzKey);
if (data instanceof String[]) {
if (data instanceof String[] tznames) {
// Hack for UTC. UTC is an alias to Etc/UTC in CLDR
if (tzid.equals("Etc/UTC") && !map.containsKey(TIMEZONE_ID_PREFIX + "UTC")) {
names.put(METAZONE_ID_PREFIX + META_ETCUTC_ZONE_NAME, data);
names.put(METAZONE_ID_PREFIX + META_ETCUTC_ZONE_NAME, tznames);
names.put(tzid, META_ETCUTC_ZONE_NAME);
names.put("UTC", META_ETCUTC_ZONE_NAME);
} else {
names.put(tzid, data);
// TZDB short names
fillTZDBShortNames(tzid, tznames);
names.put(tzid, tznames);
}
} else {
String meta = handlerMetaZones.get(tzKey);
if (meta != null) {
String metaKey = METAZONE_ID_PREFIX + meta;
data = map.get(metaKey);
if (data instanceof String[]) {
if (data instanceof String[] tznames) {
// TZDB short names
fillTZDBShortNames(tzid, tznames);
// Keep the metazone prefix here.
names.put(metaKey, data);
names.put(tzid, meta);
@ -1246,6 +1262,125 @@ public class CLDRConverter {
return covMap;
}
/*
* Generates two maps from TZ database files, where they have usual abbreviation
* of the time zone names as "FORMAT".
*
* `tzdbShortNamesMap` maps the time zone id, such as "America/Los_Angeles" to
* its FORMAT and Rule which determines the substitution. In "America/Los_Angeles"
* case, its FORMAT is "P%sT" and the Rule is "US". They are concatenated with
* an NBSP, so the eventual mapping will be:
*
* "America/Los_Angeles" -> "P%sT<NBSP>US"
*
* The other map, `tzdbSubstLetters` maps the Rule to its substitution letters.
* The key of the map is the Rule name, appended with "<NBSP>std" or "<NBSP>dst"
* depending on the savings, e.g.,
*
* "US<NBSP>std" -> "S"
* "US<NBSP>dst" -> "D"
*
* These two mappings resolve the short names for time zones in each type,
* such as:
*
* Standard short name for "America/Los_Angeles" -> "PST"
* DST short name for "America/Los_Angeles" -> "PDT"
* Generic short name for "America/Los_Angeles" -> "PT"
*/
private static void generateTZDBShortNamesMap() throws IOException {
Files.walk(Path.of(tzDataDir), 1, FileVisitOption.FOLLOW_LINKS)
.filter(p -> p.toFile().isFile())
.forEach(p -> {
try {
String zone = null;
String rule = null;
String format = null;
for (var line : Files.readAllLines(p)) {
if (line.contains("#STDOFF")) continue;
line = line.replaceAll("[ \t]*#.*", "");
// Zone line
if (line.startsWith("Zone")) {
var zl = line.split("[ \t]+", -1);
zone = zl[1];
rule = zl[3];
format = zl[4];
} else {
if (zone != null) {
if (line.isBlank()) {
tzdbShortNamesMap.put(zone, format + NBSP + rule);
zone = null;
rule = null;
format = null;
} else {
var s = line.split("[ \t]+", -1);
rule = s[2];
format = s[3];
}
}
}
// Rule line
if (line.startsWith("Rule")) {
var rl = line.split("[ \t]+", -1);
tzdbSubstLetters.put(rl[1] + NBSP + (rl[8].equals("0") ? STD : DST),
rl[9].replace(NO_SUBST, ""));
}
}
} catch (IOException ioe) {
throw new UncheckedIOException(ioe);
}
});
}
/*
* Fill the TZDB short names if there is no name provided by the CLDR
*/
private static void fillTZDBShortNames(String tzid, String[] names) {
var val = tzdbShortNamesMap.get(tzid);
if (val != null) {
var format = val.split(NBSP)[0];
var rule = val.split(NBSP)[1];
IntStream.of(1, 3, 5).forEach(i -> {
if (names[i] == null) {
if (format.contains("%s")) {
names[i] = switch (i) {
case 1 -> format.formatted(tzdbSubstLetters.get(rule + NBSP + STD));
case 3 -> format.formatted(tzdbSubstLetters.get(rule + NBSP + DST));
case 5 -> format.formatted("");
default -> throw new InternalError();
};
} else if (format.contains("/")) { // such as "+08/+09" or "GMT/BST"
names[i] = switch (i) {
case 1, 5 -> convertGMTName(format.substring(0, format.indexOf("/")));
case 3 -> convertGMTName(format.substring(format.indexOf("/") + 1));
default -> throw new InternalError();
};
} else {
names[i] = convertGMTName(format);
}
}
});
}
}
/*
* Convert TZDB offsets to JDK's offsets, eg, "-08" to "GMT-08:00".
* If it cannot recognize the pattern, return the argument as is.
*/
private static String convertGMTName(String f) {
try {
// Should pre-fill GMT format once COMPAT is gone.
// Till then, fall back to GMT format at runtime, after COMPAT short
// names are populated
ZoneOffset.of(f);
return null;
} catch (DateTimeException dte) {
// textual representation. return as is
}
return f;
}
// for debug
static void dumpMap(Map<String, Object> map) {
map.entrySet().stream()

View File

@ -1,5 +1,5 @@
#
# Copyright (c) 2014, 2022, Oracle and/or its affiliates. All rights reserved.
# Copyright (c) 2014, 2023, Oracle and/or its affiliates. All rights reserved.
# DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
#
# This code is free software; you can redistribute it and/or modify it
@ -36,6 +36,7 @@ TARGETS += $(GENSRC_LOCALEDATA)
CLDR_DATA_DIR := $(TOPDIR)/make/data/cldr/common
GENSRC_DIR := $(SUPPORT_OUTPUTDIR)/gensrc/jdk.localedata
CLDR_GEN_DONE := $(GENSRC_DIR)/_cldr-gensrc.marker
TZ_DATA_DIR := $(TOPDIR)/src/java.base/share/data/tzdata
$(CLDR_GEN_DONE): $(wildcard $(CLDR_DATA_DIR)/dtd/*.dtd) \
$(wildcard $(CLDR_DATA_DIR)/main/*.xml) \
@ -47,7 +48,8 @@ $(CLDR_GEN_DONE): $(wildcard $(CLDR_DATA_DIR)/dtd/*.dtd) \
$(TOOL_CLDRCONVERTER) -base $(CLDR_DATA_DIR) \
-baselocales "en-US" \
-year $(COPYRIGHT_YEAR) \
-o $(GENSRC_DIR))
-o $(GENSRC_DIR) \
-tzdatadir $(TZ_DATA_DIR))
$(TOUCH) $@
TARGETS += $(CLDR_GEN_DONE)

View File

@ -23,7 +23,7 @@
/*
* @test
* @modules jdk.localedata
* @bug 8303440
* @bug 8303440 8317979
* @summary Test parsing "UTC-XX:XX" text works correctly
*/
package test.java.time.format;
@ -43,8 +43,8 @@ import static org.testng.Assert.assertEquals;
public class TestUTCParse {
static {
// Assuming CLDR's SHORT name for "America/Los_Angeles"
// produces "UTC\u212208:00"
// Assuming CLDR's SHORT name for "America/Juneau"
// produces "UTC\u212209:00"
System.setProperty("java.locale.providers", "CLDR");
}
@ -60,9 +60,9 @@ public class TestUTCParse {
@Test
public void testUTCShortNameRoundTrip() {
var fmt = DateTimeFormatter.ofPattern("z", Locale.FRANCE);
var zdt = ZonedDateTime.of(2023, 3, 3, 0, 0, 0, 0, ZoneId.of("America/Los_Angeles"));
var zdt = ZonedDateTime.of(2023, 3, 3, 0, 0, 0, 0, ZoneId.of("America/Juneau"));
var formatted = fmt.format(zdt);
assertEquals(formatted, "UTC\u221208:00");
assertEquals(formatted, "UTC\u221209:00");
assertEquals(fmt.parse(formatted).query(TemporalQueries.zoneId()), zdt.getZone());
}

View File

@ -1,5 +1,5 @@
/*
* Copyright (c) 2012, 2020, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2012, 2023, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@ -24,7 +24,7 @@
/*
* @test
* @bug 8005471 8008577 8129881 8130845 8136518 8181157 8210490 8220037
* 8234347 8236548
* 8234347 8236548 8317979
* @modules jdk.localedata
* @run main/othervm -Djava.locale.providers=CLDR CLDRDisplayNamesTest
* @summary Make sure that localized time zone names of CLDR are used
@ -48,27 +48,27 @@ public class CLDRDisplayNamesTest {
{
"ja-JP",
"\u30a2\u30e1\u30ea\u30ab\u592a\u5e73\u6d0b\u6a19\u6e96\u6642",
"GMT-08:00",
"PST",
"\u30a2\u30e1\u30ea\u30ab\u592a\u5e73\u6d0b\u590f\u6642\u9593",
"GMT-07:00",
"PDT",
//"\u30a2\u30e1\u30ea\u30ab\u592a\u5e73\u6d0b\u6642\u9593",
//"PT"
},
{
"zh-CN",
"\u5317\u7f8e\u592a\u5e73\u6d0b\u6807\u51c6\u65f6\u95f4",
"GMT-08:00",
"PST",
"\u5317\u7f8e\u592a\u5e73\u6d0b\u590f\u4ee4\u65f6\u95f4",
"GMT-07:00",
"PDT",
//"\u5317\u7f8e\u592a\u5e73\u6d0b\u65f6\u95f4",
//"PT",
},
{
"de-DE",
"Nordamerikanische Westk\u00fcsten-Normalzeit",
"GMT-08:00",
"PST",
"Nordamerikanische Westk\u00fcsten-Sommerzeit",
"GMT-07:00",
"PDT",
//"Nordamerikanische Westk\u00fcstenzeit",
//"PT",
},

View File

@ -5240,9 +5240,9 @@ FormatData/de/TimePatterns/0=HH:mm:ss zzzz
FormatData/fi/AmPmMarkers/0=ap.
FormatData/fi/AmPmMarkers/1=ip.
# bug 6507067
# bug 6507067 8317979
TimeZoneNames/zh_TW/Asia\/Taipei/1=\u53f0\u5317\u6a19\u6e96\u6642\u9593
TimeZoneNames/zh_TW/Asia\/Taipei/2=
TimeZoneNames/zh_TW/Asia\/Taipei/2=CST
# bug 6645271
FormatData/hr_HR/DatePatterns/2=d. MMM y.

View File

@ -41,7 +41,7 @@
* 8187946 8195478 8181157 8179071 8193552 8202026 8204269 8202537 8208746
* 8209775 8221432 8227127 8230284 8231273 8233579 8234288 8250665 8255086
* 8251317 8274658 8283277 8283805 8265315 8287868 8295564 8284840 8296715
* 8301206 8303472
* 8301206 8303472 8317979
* @summary Verify locale data
* @modules java.base/sun.util.resources
* @modules jdk.localedata