From ce8ebebc77f4ef73852364f6188b43c482337350 Mon Sep 17 00:00:00 2001 From: Naoto Sato Date: Wed, 18 Oct 2023 18:26:49 +0000 Subject: [PATCH] 8317979: Use TZ database style abbreviations in the CLDR locale provider Reviewed-by: rriggs, joehw, lancea, erikj, iris, ihse --- .../tools/cldrconverter/CLDRConverter.java | 145 +++++++++++++++++- make/modules/jdk.localedata/Gensrc.gmk | 6 +- .../test/java/time/format/TestUTCParse.java | 10 +- .../util/TimeZone/CLDRDisplayNamesTest.java | 16 +- test/jdk/sun/text/resources/LocaleData.cldr | 4 +- .../sun/text/resources/LocaleDataTest.java | 2 +- 6 files changed, 160 insertions(+), 23 deletions(-) diff --git a/make/jdk/src/classes/build/tools/cldrconverter/CLDRConverter.java b/make/jdk/src/classes/build/tools/cldrconverter/CLDRConverter.java index 49ed1c3aaa8..b6298c00c9a 100644 --- a/make/jdk/src/classes/build/tools/cldrconverter/CLDRConverter.java +++ b/make/jdk/src/classes/build/tools/cldrconverter/CLDRConverter.java @@ -30,7 +30,6 @@ import java.io.File; import java.io.IOException; import java.io.UncheckedIOException; import java.nio.file.*; -import java.text.MessageFormat; import java.time.*; import java.util.*; import java.util.ResourceBundle.Control; @@ -92,6 +91,12 @@ public class CLDRConverter { static final String[] EMPTY_ZONE = {"", "", "", "", "", ""}; static final String META_ETCUTC_ZONE_NAME = "ETC_UTC"; + // constants used for TZDB short names + private static final String NBSP = "\u00A0"; + private static final String STD = "std"; + private static final String DST = "dst"; + private static final String NO_SUBST = "-"; + private static SupplementDataParseHandler handlerSuppl; private static LikelySubtagsParseHandler handlerLikelySubtags; private static WinZonesParseHandler handlerWinZones; @@ -123,6 +128,10 @@ public class CLDRConverter { static Map pluralRules; static Map dayPeriodRules; + // TZDB Short Names Map + private static final Map tzdbShortNamesMap = HashMap.newHashMap(512); + private static final Map tzdbSubstLetters = HashMap.newHashMap(512); + static enum DraftType { UNCONFIRMED, PROVISIONAL, @@ -284,6 +293,9 @@ public class CLDRConverter { pluralRules = generateRules(handlerPlurals); dayPeriodRules = generateRules(handlerDayPeriodRule); + // TZDB short names map + generateTZDBShortNamesMap(); + List bundles = readBundleList(); convertBundles(bundles); @@ -757,21 +769,25 @@ public class CLDRConverter { .orElse(tzid); Object data = map.get(TIMEZONE_ID_PREFIX + tzKey); - if (data instanceof String[]) { + if (data instanceof String[] tznames) { // Hack for UTC. UTC is an alias to Etc/UTC in CLDR if (tzid.equals("Etc/UTC") && !map.containsKey(TIMEZONE_ID_PREFIX + "UTC")) { - names.put(METAZONE_ID_PREFIX + META_ETCUTC_ZONE_NAME, data); + names.put(METAZONE_ID_PREFIX + META_ETCUTC_ZONE_NAME, tznames); names.put(tzid, META_ETCUTC_ZONE_NAME); names.put("UTC", META_ETCUTC_ZONE_NAME); } else { - names.put(tzid, data); + // TZDB short names + fillTZDBShortNames(tzid, tznames); + names.put(tzid, tznames); } } else { String meta = handlerMetaZones.get(tzKey); if (meta != null) { String metaKey = METAZONE_ID_PREFIX + meta; data = map.get(metaKey); - if (data instanceof String[]) { + if (data instanceof String[] tznames) { + // TZDB short names + fillTZDBShortNames(tzid, tznames); // Keep the metazone prefix here. names.put(metaKey, data); names.put(tzid, meta); @@ -1246,6 +1262,125 @@ public class CLDRConverter { return covMap; } + /* + * Generates two maps from TZ database files, where they have usual abbreviation + * of the time zone names as "FORMAT". + * + * `tzdbShortNamesMap` maps the time zone id, such as "America/Los_Angeles" to + * its FORMAT and Rule which determines the substitution. In "America/Los_Angeles" + * case, its FORMAT is "P%sT" and the Rule is "US". They are concatenated with + * an NBSP, so the eventual mapping will be: + * + * "America/Los_Angeles" -> "P%sTUS" + * + * The other map, `tzdbSubstLetters` maps the Rule to its substitution letters. + * The key of the map is the Rule name, appended with "std" or "dst" + * depending on the savings, e.g., + * + * "USstd" -> "S" + * "USdst" -> "D" + * + * These two mappings resolve the short names for time zones in each type, + * such as: + * + * Standard short name for "America/Los_Angeles" -> "PST" + * DST short name for "America/Los_Angeles" -> "PDT" + * Generic short name for "America/Los_Angeles" -> "PT" + */ + private static void generateTZDBShortNamesMap() throws IOException { + Files.walk(Path.of(tzDataDir), 1, FileVisitOption.FOLLOW_LINKS) + .filter(p -> p.toFile().isFile()) + .forEach(p -> { + try { + String zone = null; + String rule = null; + String format = null; + for (var line : Files.readAllLines(p)) { + if (line.contains("#STDOFF")) continue; + line = line.replaceAll("[ \t]*#.*", ""); + + // Zone line + if (line.startsWith("Zone")) { + var zl = line.split("[ \t]+", -1); + zone = zl[1]; + rule = zl[3]; + format = zl[4]; + } else { + if (zone != null) { + if (line.isBlank()) { + tzdbShortNamesMap.put(zone, format + NBSP + rule); + zone = null; + rule = null; + format = null; + } else { + var s = line.split("[ \t]+", -1); + rule = s[2]; + format = s[3]; + } + } + } + + // Rule line + if (line.startsWith("Rule")) { + var rl = line.split("[ \t]+", -1); + tzdbSubstLetters.put(rl[1] + NBSP + (rl[8].equals("0") ? STD : DST), + rl[9].replace(NO_SUBST, "")); + } + } + } catch (IOException ioe) { + throw new UncheckedIOException(ioe); + } + }); + } + + /* + * Fill the TZDB short names if there is no name provided by the CLDR + */ + private static void fillTZDBShortNames(String tzid, String[] names) { + var val = tzdbShortNamesMap.get(tzid); + if (val != null) { + var format = val.split(NBSP)[0]; + var rule = val.split(NBSP)[1]; + IntStream.of(1, 3, 5).forEach(i -> { + if (names[i] == null) { + if (format.contains("%s")) { + names[i] = switch (i) { + case 1 -> format.formatted(tzdbSubstLetters.get(rule + NBSP + STD)); + case 3 -> format.formatted(tzdbSubstLetters.get(rule + NBSP + DST)); + case 5 -> format.formatted(""); + default -> throw new InternalError(); + }; + } else if (format.contains("/")) { // such as "+08/+09" or "GMT/BST" + names[i] = switch (i) { + case 1, 5 -> convertGMTName(format.substring(0, format.indexOf("/"))); + case 3 -> convertGMTName(format.substring(format.indexOf("/") + 1)); + default -> throw new InternalError(); + }; + } else { + names[i] = convertGMTName(format); + } + } + }); + } + } + + /* + * Convert TZDB offsets to JDK's offsets, eg, "-08" to "GMT-08:00". + * If it cannot recognize the pattern, return the argument as is. + */ + private static String convertGMTName(String f) { + try { + // Should pre-fill GMT format once COMPAT is gone. + // Till then, fall back to GMT format at runtime, after COMPAT short + // names are populated + ZoneOffset.of(f); + return null; + } catch (DateTimeException dte) { + // textual representation. return as is + } + return f; + } + // for debug static void dumpMap(Map map) { map.entrySet().stream() diff --git a/make/modules/jdk.localedata/Gensrc.gmk b/make/modules/jdk.localedata/Gensrc.gmk index fc0e09dd8bb..df6400b16b1 100644 --- a/make/modules/jdk.localedata/Gensrc.gmk +++ b/make/modules/jdk.localedata/Gensrc.gmk @@ -1,5 +1,5 @@ # -# Copyright (c) 2014, 2022, Oracle and/or its affiliates. All rights reserved. +# Copyright (c) 2014, 2023, Oracle and/or its affiliates. All rights reserved. # DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. # # This code is free software; you can redistribute it and/or modify it @@ -36,6 +36,7 @@ TARGETS += $(GENSRC_LOCALEDATA) CLDR_DATA_DIR := $(TOPDIR)/make/data/cldr/common GENSRC_DIR := $(SUPPORT_OUTPUTDIR)/gensrc/jdk.localedata CLDR_GEN_DONE := $(GENSRC_DIR)/_cldr-gensrc.marker +TZ_DATA_DIR := $(TOPDIR)/src/java.base/share/data/tzdata $(CLDR_GEN_DONE): $(wildcard $(CLDR_DATA_DIR)/dtd/*.dtd) \ $(wildcard $(CLDR_DATA_DIR)/main/*.xml) \ @@ -47,7 +48,8 @@ $(CLDR_GEN_DONE): $(wildcard $(CLDR_DATA_DIR)/dtd/*.dtd) \ $(TOOL_CLDRCONVERTER) -base $(CLDR_DATA_DIR) \ -baselocales "en-US" \ -year $(COPYRIGHT_YEAR) \ - -o $(GENSRC_DIR)) + -o $(GENSRC_DIR) \ + -tzdatadir $(TZ_DATA_DIR)) $(TOUCH) $@ TARGETS += $(CLDR_GEN_DONE) diff --git a/test/jdk/java/time/test/java/time/format/TestUTCParse.java b/test/jdk/java/time/test/java/time/format/TestUTCParse.java index 3f991810f8b..6da94f04f04 100644 --- a/test/jdk/java/time/test/java/time/format/TestUTCParse.java +++ b/test/jdk/java/time/test/java/time/format/TestUTCParse.java @@ -23,7 +23,7 @@ /* * @test * @modules jdk.localedata - * @bug 8303440 + * @bug 8303440 8317979 * @summary Test parsing "UTC-XX:XX" text works correctly */ package test.java.time.format; @@ -43,8 +43,8 @@ import static org.testng.Assert.assertEquals; public class TestUTCParse { static { - // Assuming CLDR's SHORT name for "America/Los_Angeles" - // produces "UTC\u212208:00" + // Assuming CLDR's SHORT name for "America/Juneau" + // produces "UTC\u212209:00" System.setProperty("java.locale.providers", "CLDR"); } @@ -60,9 +60,9 @@ public class TestUTCParse { @Test public void testUTCShortNameRoundTrip() { var fmt = DateTimeFormatter.ofPattern("z", Locale.FRANCE); - var zdt = ZonedDateTime.of(2023, 3, 3, 0, 0, 0, 0, ZoneId.of("America/Los_Angeles")); + var zdt = ZonedDateTime.of(2023, 3, 3, 0, 0, 0, 0, ZoneId.of("America/Juneau")); var formatted = fmt.format(zdt); - assertEquals(formatted, "UTC\u221208:00"); + assertEquals(formatted, "UTC\u221209:00"); assertEquals(fmt.parse(formatted).query(TemporalQueries.zoneId()), zdt.getZone()); } diff --git a/test/jdk/java/util/TimeZone/CLDRDisplayNamesTest.java b/test/jdk/java/util/TimeZone/CLDRDisplayNamesTest.java index 67a082410ec..2ff278a583c 100644 --- a/test/jdk/java/util/TimeZone/CLDRDisplayNamesTest.java +++ b/test/jdk/java/util/TimeZone/CLDRDisplayNamesTest.java @@ -1,5 +1,5 @@ /* - * Copyright (c) 2012, 2020, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2012, 2023, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -24,7 +24,7 @@ /* * @test * @bug 8005471 8008577 8129881 8130845 8136518 8181157 8210490 8220037 - * 8234347 8236548 + * 8234347 8236548 8317979 * @modules jdk.localedata * @run main/othervm -Djava.locale.providers=CLDR CLDRDisplayNamesTest * @summary Make sure that localized time zone names of CLDR are used @@ -48,27 +48,27 @@ public class CLDRDisplayNamesTest { { "ja-JP", "\u30a2\u30e1\u30ea\u30ab\u592a\u5e73\u6d0b\u6a19\u6e96\u6642", - "GMT-08:00", + "PST", "\u30a2\u30e1\u30ea\u30ab\u592a\u5e73\u6d0b\u590f\u6642\u9593", - "GMT-07:00", + "PDT", //"\u30a2\u30e1\u30ea\u30ab\u592a\u5e73\u6d0b\u6642\u9593", //"PT" }, { "zh-CN", "\u5317\u7f8e\u592a\u5e73\u6d0b\u6807\u51c6\u65f6\u95f4", - "GMT-08:00", + "PST", "\u5317\u7f8e\u592a\u5e73\u6d0b\u590f\u4ee4\u65f6\u95f4", - "GMT-07:00", + "PDT", //"\u5317\u7f8e\u592a\u5e73\u6d0b\u65f6\u95f4", //"PT", }, { "de-DE", "Nordamerikanische Westk\u00fcsten-Normalzeit", - "GMT-08:00", + "PST", "Nordamerikanische Westk\u00fcsten-Sommerzeit", - "GMT-07:00", + "PDT", //"Nordamerikanische Westk\u00fcstenzeit", //"PT", }, diff --git a/test/jdk/sun/text/resources/LocaleData.cldr b/test/jdk/sun/text/resources/LocaleData.cldr index 21f1d016cfc..a6b62961a07 100644 --- a/test/jdk/sun/text/resources/LocaleData.cldr +++ b/test/jdk/sun/text/resources/LocaleData.cldr @@ -5240,9 +5240,9 @@ FormatData/de/TimePatterns/0=HH:mm:ss zzzz FormatData/fi/AmPmMarkers/0=ap. FormatData/fi/AmPmMarkers/1=ip. -# bug 6507067 +# bug 6507067 8317979 TimeZoneNames/zh_TW/Asia\/Taipei/1=\u53f0\u5317\u6a19\u6e96\u6642\u9593 -TimeZoneNames/zh_TW/Asia\/Taipei/2= +TimeZoneNames/zh_TW/Asia\/Taipei/2=CST # bug 6645271 FormatData/hr_HR/DatePatterns/2=d. MMM y. diff --git a/test/jdk/sun/text/resources/LocaleDataTest.java b/test/jdk/sun/text/resources/LocaleDataTest.java index 2d76ca9e2ea..0d40ef9bde0 100644 --- a/test/jdk/sun/text/resources/LocaleDataTest.java +++ b/test/jdk/sun/text/resources/LocaleDataTest.java @@ -41,7 +41,7 @@ * 8187946 8195478 8181157 8179071 8193552 8202026 8204269 8202537 8208746 * 8209775 8221432 8227127 8230284 8231273 8233579 8234288 8250665 8255086 * 8251317 8274658 8283277 8283805 8265315 8287868 8295564 8284840 8296715 - * 8301206 8303472 + * 8301206 8303472 8317979 * @summary Verify locale data * @modules java.base/sun.util.resources * @modules jdk.localedata