Reviewed-by: jlu, joehw, coffeys
1529 lines
61 KiB
1529 lines
61 KiB
* Copyright (c) 2012, 2024, Oracle and/or its affiliates. All rights reserved.
* This code is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License version 2 only, as
* published by the Free Software Foundation. Oracle designates this
* particular file as subject to the "Classpath" exception as provided
* by Oracle in the LICENSE file that accompanied this code.
* This code is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
* version 2 for more details (a copy is included in the LICENSE file that
* accompanied this code).
* You should have received a copy of the GNU General Public License version
* 2 along with this work; if not, write to the Free Software Foundation,
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
* or visit www.oracle.com if you need additional information or have any
* questions.
package build.tools.cldrconverter;
import build.tools.cldrconverter.BundleGenerator.BundleType;
import java.io.File;
import java.io.IOException;
import java.io.UncheckedIOException;
import java.nio.file.*;
import java.time.*;
import java.util.*;
import java.util.ResourceBundle.Control;
import java.util.stream.Collectors;
import java.util.stream.IntStream;
import java.util.stream.Stream;
import javax.xml.parsers.SAXParser;
import javax.xml.parsers.SAXParserFactory;
import org.xml.sax.SAXNotRecognizedException;
import org.xml.sax.SAXNotSupportedException;
* Converts locale data from "Locale Data Markup Language" format to
* JRE resource bundle format. LDML is the format used by the Common
* Locale Data Repository maintained by the Unicode Consortium.
public class CLDRConverter {
static final String LDML_DTD_SYSTEM_ID = "http://www.unicode.org/cldr/dtd/2.0/ldml.dtd";
static final String SPPL_LDML_DTD_SYSTEM_ID = "http://www.unicode.org/cldr/dtd/2.0/ldmlSupplemental.dtd";
static final String BCP47_LDML_DTD_SYSTEM_ID = "http://www.unicode.org/cldr/dtd/2.0/ldmlBCP47.dtd";
private static String CLDR_BASE;
static String LOCAL_LDML_DTD;
static String LOCAL_SPPL_LDML_DTD;
static String LOCAL_BCP47_LDML_DTD;
private static String SOURCE_FILE_DIR;
private static String SPPL_SOURCE_FILE;
private static String SPPL_META_SOURCE_FILE;
private static String NUMBERING_SOURCE_FILE;
private static String METAZONES_SOURCE_FILE;
private static String LIKELYSUBTAGS_SOURCE_FILE;
private static String TIMEZONE_SOURCE_FILE;
private static String WINZONES_SOURCE_FILE;
private static String PLURALS_SOURCE_FILE;
private static String DAYPERIODRULE_SOURCE_FILE;
private static String COVERAGELEVELS_FILE;
static String DESTINATION_DIR = "build/gensrc";
static final String LOCALE_NAME_PREFIX = "locale.displayname.";
static final String LOCALE_SEPARATOR = LOCALE_NAME_PREFIX + "separator";
static final String LOCALE_KEYTYPE = LOCALE_NAME_PREFIX + "keytype";
static final String LOCALE_KEY_PREFIX = LOCALE_NAME_PREFIX + "key.";
static final String LOCALE_TYPE_PREFIX = LOCALE_NAME_PREFIX + "type.";
static final String LOCALE_TYPE_PREFIX_CA = LOCALE_TYPE_PREFIX + "ca.";
static final String CURRENCY_SYMBOL_PREFIX = "currency.symbol.";
static final String CURRENCY_NAME_PREFIX = "currency.displayname.";
static final String CALENDAR_NAME_PREFIX = "calendarname.";
static final String CALENDAR_FIRSTDAY_PREFIX = "firstDay.";
static final String CALENDAR_MINDAYS_PREFIX = "minDays.";
static final String TIMEZONE_ID_PREFIX = "timezone.id.";
static final String EXEMPLAR_CITY_PREFIX = "timezone.excity.";
static final String ZONE_NAME_PREFIX = "timezone.displayname.";
static final String METAZONE_ID_PREFIX = "metazone.id.";
static final String PARENT_LOCALE_PREFIX = "parentLocale.";
static final String LIKELY_SCRIPT_PREFIX = "likelyScript.";
static final String META_EMPTY_ZONE_NAME = "EMPTY_ZONE";
static final String[] EMPTY_ZONE = {"", "", "", "", "", ""};
static final String META_ETCUTC_ZONE_NAME = "ETC_UTC";
// constants used for TZDB short names
private static final String NBSP = "\u00A0";
private static final String STD = "std";
private static final String DST = "dst";
private static final String NO_SUBST = "-";
private static SupplementalDataParseHandler handlerSuppl;
private static LikelySubtagsParseHandler handlerLikelySubtags;
private static WinZonesParseHandler handlerWinZones;
static PluralsParseHandler handlerPlurals;
static SupplementalMetadataParseHandler handlerSupplMeta;
static NumberingSystemsParseHandler handlerNumbering;
static MetaZonesParseHandler handlerMetaZones;
static TimeZoneParseHandler handlerTimeZone;
static DayPeriodRuleParseHandler handlerDayPeriodRule;
private static BundleGenerator bundleGenerator;
// java.base module related
static boolean isBaseModule = false;
static final Set<Locale> BASE_LOCALES = new HashSet<>();
// "parentLocales" map
private static final Map<String, SortedSet<String>> parentLocalesMap = new HashMap<>();
static boolean nonlikelyScript;
private static final ResourceBundle.Control defCon =
// "likelyScript" map
private static final Map<String, SortedSet<String>> likelyScriptMap = new HashMap<>();
private static Set<String> AVAILABLE_TZIDS;
static int copyrightYear;
static String jdkHeaderTemplate;
private static String zoneNameTempFile;
private static String tzDataDir;
private static final Map<String, String> canonicalTZMap = new HashMap<>();
// rules maps
static Map<String, String> pluralRules;
static Map<String, String> dayPeriodRules;
// TZDB maps
private static final Map<String, String> tzdbShortNamesMap = HashMap.newHashMap(512);
private static final Map<String, String> tzdbSubstLetters = HashMap.newHashMap(512);
private static final Map<String, String> tzdbLinks = HashMap.newHashMap(512);
static enum DraftType {
private static final Map<String, DraftType> map = new HashMap<>();
static {
for (DraftType dt : values()) {
map.put(dt.getKeyword(), dt);
static private DraftType defaultType = CONTRIBUTED;
private final String keyword;
private DraftType() {
keyword = this.name().toLowerCase(Locale.ROOT);
static DraftType forKeyword(String keyword) {
return map.get(keyword);
static DraftType getDefault() {
return defaultType;
static void setDefault(String keyword) {
defaultType = Objects.requireNonNull(forKeyword(keyword));
String getKeyword() {
return keyword;
static boolean USE_UTF8 = false;
private static boolean verbose;
private CLDRConverter() {
// no instantiation
public static void main(String[] args) throws Exception {
if (args.length != 0) {
String currentArg = null;
try {
for (int i = 0; i < args.length; i++) {
currentArg = args[i];
switch (currentArg) {
case "-draft":
String draftDataType = args[++i];
try {
} catch (NullPointerException e) {
severe("Error: incorrect draft value: %s%n", draftDataType);
info("Using the specified data type: %s%n", draftDataType);
case "-base":
// base directory for input files
CLDR_BASE = args[++i];
if (!CLDR_BASE.endsWith("/")) {
CLDR_BASE += "/";
case "-baselocales":
// base locales
case "-basemodule":
// indicates java.base module resource generation
isBaseModule = true;
case "-o":
// output directory
DESTINATION_DIR = args[++i];
case "-utf8":
USE_UTF8 = true;
case "-verbose":
verbose = true;
case "-year":
copyrightYear = Integer.parseInt(args[++i]);
case "-zntempfile":
zoneNameTempFile = args[++i];
case "-tzdatadir":
tzDataDir = args[++i];
case "-jdk-header-template":
jdkHeaderTemplate = Files.readString(Paths.get(args[++i]));
case "-help":
throw new RuntimeException();
} catch (RuntimeException e) {
severe("unknown or incomplete arg(s): " + currentArg);
// Set up path names
LOCAL_LDML_DTD = CLDR_BASE + "/dtd/ldml.dtd";
LOCAL_SPPL_LDML_DTD = CLDR_BASE + "/dtd/ldmlSupplemental.dtd";
LOCAL_BCP47_LDML_DTD = CLDR_BASE + "/dtd/ldmlBCP47.dtd";
SPPL_SOURCE_FILE = CLDR_BASE + "/supplemental/supplementalData.xml";
LIKELYSUBTAGS_SOURCE_FILE = CLDR_BASE + "/supplemental/likelySubtags.xml";
NUMBERING_SOURCE_FILE = CLDR_BASE + "/supplemental/numberingSystems.xml";
METAZONES_SOURCE_FILE = CLDR_BASE + "/supplemental/metaZones.xml";
TIMEZONE_SOURCE_FILE = CLDR_BASE + "/bcp47/timezone.xml";
SPPL_META_SOURCE_FILE = CLDR_BASE + "/supplemental/supplementalMetadata.xml";
WINZONES_SOURCE_FILE = CLDR_BASE + "/supplemental/windowsZones.xml";
PLURALS_SOURCE_FILE = CLDR_BASE + "/supplemental/plurals.xml";
DAYPERIODRULE_SOURCE_FILE = CLDR_BASE + "/supplemental/dayPeriods.xml";
COVERAGELEVELS_FILE = CLDR_BASE + "/properties/coverageLevels.txt";
if (BASE_LOCALES.isEmpty()) {
if (copyrightYear == 0) {
copyrightYear = ZonedDateTime.now(ZoneId.of("America/Los_Angeles")).getYear();
bundleGenerator = new ResourceBundleGenerator();
// Parse data independent of locales
// rules maps
pluralRules = generateRules(handlerPlurals);
dayPeriodRules = generateRules(handlerDayPeriodRule);
// TZDB short names map
List<Bundle> bundles = readBundleList();
if (isBaseModule) {
// Generate java.time.format.ZoneName.java
// Generate Windows tzmappings
private static void usage() {
errout("Usage: java CLDRConverter [options]%n"
+ "\t-help output this usage message and exit%n"
+ "\t-verbose output information%n"
+ "\t-draft [contributed | approved | provisional | unconfirmed]%n"
+ "\t\t draft level for using data (default: contributed)%n"
+ "\t-base dir base directory for CLDR input files%n"
+ "\t-basemodule generates bundles that go into java.base module%n"
+ "\t-baselocales loc(,loc)* locales that go into the base module%n"
+ "\t-o dir output directory (default: ./build/gensrc)%n"
+ "\t-year year copyright year in output%n"
+ "\t-zntempfile template file for java.time.format.ZoneName.java%n"
+ "\t-tzdatadir tzdata directory for java.time.format.ZoneName.java%n"
+ "\t-utf8 use UTF-8 rather than \\uxxxx (for debug)%n"
+ "\t-jdk-header-template <file>%n"
+ "\t\t override default GPL header with contents of file%n");
static void info(String fmt, Object... args) {
if (verbose) {
System.out.printf(fmt, args);
static void info(String msg) {
if (verbose) {
static void warning(String fmt, Object... args) {
System.err.print("Warning: ");
System.err.printf(fmt, args);
static void warning(String msg) {
System.err.print("Warning: ");
static void severe(String fmt, Object... args) {
System.err.print("Error: ");
System.err.printf(fmt, args);
static void severe(String msg) {
System.err.print("Error: ");
private static void errout(String msg) {
if (msg.contains("%n")) {
} else {
* Configure the parser to allow access to DTDs on the file system.
private static void enableFileAccess(SAXParser parser) throws SAXNotSupportedException {
try {
parser.setProperty("http://javax.xml.XMLConstants/property/accessExternalDTD", "file");
} catch (SAXNotRecognizedException ignore) {
// property requires >= JAXP 1.5
private static List<Bundle> readBundleList() throws Exception {
List<Bundle> retList = new ArrayList<>();
Path path = FileSystems.getDefault().getPath(SOURCE_FILE_DIR);
var coverageMap = coverageLevelsMap();
try (DirectoryStream<Path> dirStr = Files.newDirectoryStream(path)) {
for (Path entry : dirStr) {
String fileName = entry.getFileName().toString();
if (fileName.endsWith(".xml")) {
String id = fileName.substring(0, fileName.indexOf('.'));
Locale cldrLoc = Locale.forLanguageTag(toLanguageTag(id));
List<Locale> candList = getCandidateLocales(cldrLoc);
if (!"root".equals(id) && candList.stream().noneMatch(coverageMap::containsKey)) {
StringBuilder sb = getCandLocales(candList);
if (sb.indexOf("root") == -1) {
retList.add(new Bundle(id, sb.toString(), null, null));
// Sort the bundles based on id. This will make sure all the parent bundles are
// processed first, e.g., for en_GB bundle, en_001, and "root" comes before
// en_GB. In order for "root" to come at the beginning, "root" is replaced with
// empty string on comparison.
retList.sort((o1, o2) -> {
String id1 = o1.getID();
String id2 = o2.getID();
if(id1.equals("root")) {
id1 = "";
if(id2.equals("root")) {
id2 = "";
return id1.compareTo(id2);
return retList;
private static final Map<String, Map<String, Object>> cldrBundles = new HashMap<>();
private static Map<String, SortedSet<String>> metaInfo = new HashMap<>();
static {
// For generating information on supported locales.
metaInfo.put("AvailableLocales", new TreeSet<>());
static Map<String, Object> getCLDRBundle(String id) throws Exception {
Map<String, Object> bundle = cldrBundles.get(id);
if (bundle != null) {
return bundle;
File file = new File(SOURCE_FILE_DIR + File.separator + id + ".xml");
if (!file.exists()) {
// Skip if the file doesn't exist.
return Collections.emptyMap();
info("..... main directory .....");
LDMLParseHandler handler = new LDMLParseHandler(id);
parseLDMLFile(file, handler);
bundle = handler.getData();
cldrBundles.put(id, bundle);
if (id.equals("root")) {
// Calendar data (firstDayOfWeek & minDaysInFirstWeek)
bundle = handlerSuppl.getData("root");
if (bundle != null) {
//merge two maps into one map
Map<String, Object> temp = cldrBundles.remove(id);
cldrBundles.put(id, bundle);
return bundle;
// Parsers for data in "supplemental" directory
private static void parseSupplemental() throws Exception {
// Parse SupplementalData file and store the information in the HashMap
// Calendar information such as firstDay and minDay are stored in
// supplementalData.xml as of CLDR1.4. Individual territory is listed
// with its ISO 3166 country code while default is listed using UNM49
// region and composition numerical code (001 for World.)
// SupplementalData file also provides the "parent" locales which
// are othrwise not to be fallen back. Process them here as well.
handlerSuppl = new SupplementalDataParseHandler();
parseLDMLFile(new File(SPPL_SOURCE_FILE), handlerSuppl);
Map<String, Object> parentData = handlerSuppl.getData("root");
.filter(key -> key.startsWith(PARENT_LOCALE_PREFIX))
.forEach(key -> {
parentLocalesMap.put(key, new TreeSet<String>(
Arrays.asList(((String)parentData.get(key)).split(" "))));
// Parse numberingSystems to get digit zero character information.
handlerNumbering = new NumberingSystemsParseHandler();
parseLDMLFile(new File(NUMBERING_SOURCE_FILE), handlerNumbering);
// Parse metaZones to create mappings between Olson tzids and CLDR meta zone names
handlerMetaZones = new MetaZonesParseHandler();
parseLDMLFile(new File(METAZONES_SOURCE_FILE), handlerMetaZones);
// Parse likelySubtags
handlerLikelySubtags = new LikelySubtagsParseHandler();
parseLDMLFile(new File(LIKELYSUBTAGS_SOURCE_FILE), handlerLikelySubtags);
handlerLikelySubtags.getData().forEach((from, to) -> {
if (!from.contains("-")) { // look for language-only tag
var script = to.split("-")[1];
var key = LIKELY_SCRIPT_PREFIX + script;
var prev = likelyScriptMap.putIfAbsent(key, new TreeSet<String>(Set.of(from)));
if (prev != null) {
// Parse supplementalMetadata
// Currently interested in deprecated time zone ids and language aliases.
handlerSupplMeta = new SupplementalMetadataParseHandler();
parseLDMLFile(new File(SPPL_META_SOURCE_FILE), handlerSupplMeta);
// Parse windowsZones
handlerWinZones = new WinZonesParseHandler();
parseLDMLFile(new File(WINZONES_SOURCE_FILE), handlerWinZones);
// Parse plurals
handlerPlurals = new PluralsParseHandler();
parseLDMLFile(new File(PLURALS_SOURCE_FILE), handlerPlurals);
// Parse day period rules
handlerDayPeriodRule = new DayPeriodRuleParseHandler();
parseLDMLFile(new File(DAYPERIODRULE_SOURCE_FILE), handlerDayPeriodRule);
// Parsers for data in "bcp47" directory
private static void parseBCP47() throws Exception {
// Parse timezone
handlerTimeZone = new TimeZoneParseHandler();
parseLDMLFile(new File(TIMEZONE_SOURCE_FILE), handlerTimeZone);
// canonical tz name map
// alias -> primary
handlerTimeZone.getData().forEach((k, v) -> {
String[] ids = ((String)v).split("\\s");
for (int i = 1; i < ids.length; i++) {
canonicalTZMap.put(ids[i], ids[0]);
private static void parseLDMLFile(File srcfile, AbstractLDMLHandler<?> handler) throws Exception {
info("..... Parsing " + srcfile.getName() + " .....");
SAXParserFactory pf = SAXParserFactory.newInstance();
SAXParser parser = pf.newSAXParser();
parser.parse(srcfile, handler);
private static StringBuilder getCandLocales(List<Locale> candList) {
StringBuilder sb = new StringBuilder();
for (Locale loc : candList) {
if (!loc.equals(Locale.ROOT)) {
return sb;
private static List<Locale> getCandidateLocales(Locale cldrLoc) {
List<Locale> candList = new ArrayList<>();
candList = applyParentLocales("", defCon.getCandidateLocales("", cldrLoc));
return candList;
private static void convertBundles(List<Bundle> bundles) throws Exception {
var availableLangTags = metaInfo.get("AvailableLocales");
// parent locales map. The mappings are put in base metaInfo file
// for now.
if (isBaseModule) {
for (Bundle bundle : bundles) {
// Get the target map, which contains all the data that should be
// visible for the bundle's locale
Map<String, Object> targetMap = bundle.getTargetMap();
EnumSet<Bundle.Type> bundleTypes = bundle.getBundleTypes();
var id = bundle.getID();
if (bundle.isRoot()) {
// Add DateTimePatternChars because CLDR no longer supports localized patterns.
targetMap.put("DateTimePatternChars", "GyMdkHmsSEDFwWahKzZ");
// Now the map contains just the entries that need to be in the resources bundles.
// Go ahead and generate them.
if (bundleTypes.contains(Bundle.Type.LOCALENAMES)) {
Map<String, Object> localeNamesMap = extractLocaleNames(targetMap, id);
if (!localeNamesMap.isEmpty() || bundle.isRoot()) {
bundleGenerator.generateBundle("util", "LocaleNames", id, true, localeNamesMap, BundleType.OPEN);
if (bundleTypes.contains(Bundle.Type.CURRENCYNAMES)) {
Map<String, Object> currencyNamesMap = extractCurrencyNames(targetMap, id, bundle.getCurrencies());
if (!currencyNamesMap.isEmpty() || bundle.isRoot()) {
bundleGenerator.generateBundle("util", "CurrencyNames", id, true, currencyNamesMap, BundleType.OPEN);
if (bundleTypes.contains(Bundle.Type.TIMEZONENAMES)) {
Map<String, Object> zoneNamesMap = extractZoneNames(targetMap, id);
if (!zoneNamesMap.isEmpty() || bundle.isRoot()) {
bundleGenerator.generateBundle("util", "TimeZoneNames", id, true, zoneNamesMap, BundleType.TIMEZONE);
if (bundleTypes.contains(Bundle.Type.CALENDARDATA)) {
Map<String, Object> calendarDataMap = extractCalendarData(targetMap, id);
if (!calendarDataMap.isEmpty() || bundle.isRoot()) {
bundleGenerator.generateBundle("util", "CalendarData", id, true, calendarDataMap, BundleType.PLAIN);
if (bundleTypes.contains(Bundle.Type.FORMATDATA)) {
Map<String, Object> formatDataMap = extractFormatData(targetMap, id);
if (!formatDataMap.isEmpty() || bundle.isRoot()) {
bundleGenerator.generateBundle("text", "FormatData", id, true, formatDataMap, BundleType.PLAIN);
// For AvailableLocales
var langTag = toLanguageTag(id);
// Add extra language tags from likely subtags that meet the following conditions
// 1. Its likely subtag is supported (already in the available langtag set)
// 2. Neither of old obsolete ones (in/iw/ji)
.filter(e -> availableLangTags.contains(e.getValue()))
.filter(t -> !t.equals("in") && !t.equals("iw") && !t.equals("ji"))
static final Map<String, String> aliases = new HashMap<>();
* Translate the aliases into the real entries in the bundle map.
static void handleAliases(Map<String, Object> bundleMap) {
for (String key : aliases.keySet()) {
var sourceKey = aliases.get(key);
if (key.startsWith("ListPatterns_")) {
String k;
while ((k = aliases.get(sourceKey)) != null) {
sourceKey = k;
var source = bundleMap.get(sourceKey);
if (source != null) {
if (bundleMap.get(key) instanceof String[] sa) {
// fill missing elements in case of String array
for (int i = 0; i < sa.length; i++) {
if (sa[i] == null && ((String[])source)[i] != null) {
sa[i] = ((String[])source)[i];
bundleMap.putIfAbsent(key, source);
* Returns the language portion of the given id.
* If id is "root", "" is returned.
static String getLanguageCode(String id) {
return "root".equals(id) ? "" : Locale.forLanguageTag(id.replaceAll("_", "-")).getLanguage();
* Examine if the id includes the country (territory) code. If it does, it returns
* the country code.
* Otherwise, it returns null. eg. when the id is "zh_Hans_SG", it returns "SG".
* It does NOT return UN M.49 code, e.g., '001', as those three digit numbers cannot
* be translated into package names.
static String getCountryCode(String id) {
String rgn = getRegionCode(id);
return rgn.length() == 2 ? rgn: null;
* Examine if the id includes the region code. If it does, it returns
* the region code.
* Otherwise, it returns null. eg. when the id is "zh_Hans_SG", it returns "SG".
* It DOES return UN M.49 code, e.g., '001', as well as ISO 3166 two letter country codes.
static String getRegionCode(String id) {
return Locale.forLanguageTag(id.replaceAll("_", "-")).getCountry();
* Examine if the id includes the script code. If it does, it returns
* the script code.
static String getScriptCode(String id) {
return Locale.forLanguageTag(id.replaceAll("_", "-")).getScript();
private static class KeyComparator implements Comparator<String> {
static KeyComparator INSTANCE = new KeyComparator();
private KeyComparator() {
public int compare(String o1, String o2) {
int len1 = o1.length();
int len2 = o2.length();
if (!isDigit(o1.charAt(0)) && !isDigit(o2.charAt(0))) {
// Shorter string comes first unless either starts with a digit.
if (len1 < len2) {
return -1;
if (len1 > len2) {
return 1;
return o1.compareTo(o2);
private boolean isDigit(char c) {
return c >= '0' && c <= '9';
private static Map<String, Object> extractLocaleNames(Map<String, Object> map, String id) {
Map<String, Object> localeNames = new TreeMap<>(KeyComparator.INSTANCE);
for (String key : map.keySet()) {
if (key.startsWith(LOCALE_NAME_PREFIX)) {
switch (key) {
localeNames.put("ListCompositionPattern", map.get(key));
localeNames.put("ListKeyTypePattern", map.get(key));
localeNames.put(key.substring(LOCALE_NAME_PREFIX.length()), map.get(key));
if (id.equals("root")) {
// Add display name pattern, which is not in CLDR
localeNames.put("DisplayNamePattern", "{0,choice,0#|1#{1}|2#{1} ({2})}");
return localeNames;
private static Map<String, Object> extractCurrencyNames(Map<String, Object> map, String id, String names)
throws Exception {
Map<String, Object> currencyNames = new TreeMap<>(KeyComparator.INSTANCE);
for (String key : map.keySet()) {
if (key.startsWith(CURRENCY_NAME_PREFIX)) {
currencyNames.put(key.substring(CURRENCY_NAME_PREFIX.length()), map.get(key));
} else if (key.startsWith(CURRENCY_SYMBOL_PREFIX)) {
currencyNames.put(key.substring(CURRENCY_SYMBOL_PREFIX.length()), map.get(key));
return currencyNames;
private static Map<String, Object> extractZoneNames(Map<String, Object> map, String id) {
Map<String, Object> names = new TreeMap<>(KeyComparator.INSTANCE);
var availableIds = getAvailableZoneIds();
availableIds.forEach(tzid -> {
// If the tzid is deprecated, get the data for the replacement id
String tzKey = Optional.ofNullable((String)handlerSupplMeta.get(tzid))
// Follow link, if needed
String tzLink = null;
for (var k = tzKey; tzdbLinks.containsKey(k);) {
k = tzLink = tzdbLinks.get(k);
if (tzLink == null && tzdbLinks.containsValue(tzKey)) {
// reverse link search
// this is needed as in tzdb, "America/Buenos_Aires" links to
// "America/Argentina/Buenos_Aires", but CLDR contains metaZone
// "Argentina" only for "America/Buenos_Aires" (as of CLDR 44)
// Both tzids should have "Argentina" meta zone names
tzLink = tzdbLinks.entrySet().stream()
.filter(e -> e.getValue().equals(tzKey))
Object data = map.get(TIMEZONE_ID_PREFIX + tzKey);
if (data == null && tzLink != null) {
// data for tzLink
data = map.get(TIMEZONE_ID_PREFIX + tzLink);
if (data instanceof String[] tznames) {
// Hack for UTC. UTC is an alias to Etc/UTC in CLDR
if (tzid.equals("Etc/UTC") && !map.containsKey(TIMEZONE_ID_PREFIX + "UTC")) {
names.put(tzid, META_ETCUTC_ZONE_NAME);
} else {
// TZDB short names
tznames = Arrays.copyOf(tznames, tznames.length);
fillTZDBShortNames(tzid, tznames);
names.put(tzid, tznames);
} else {
String meta = handlerMetaZones.get(tzKey);
if (meta == null && tzLink != null) {
// Check for tzLink
meta = handlerMetaZones.get(tzLink);
if (meta != null) {
String metaKey = METAZONE_ID_PREFIX + meta;
data = map.get(metaKey);
if (data instanceof String[] tznames) {
// TZDB short names
tznames = Arrays.copyOf((String[])names.getOrDefault(metaKey, tznames), 6);
fillTZDBShortNames(tzid, tznames);
// Keep the metazone prefix here.
names.putIfAbsent(metaKey, tznames);
names.put(tzid, meta);
if (tzLink != null && availableIds.contains(tzLink)) {
names.put(tzLink, meta);
} else if (id.equals("root")) {
// supply TZDB short names if available
if (tzdbShortNamesMap.containsKey(tzid)) {
var tznames = new String[6];
fillTZDBShortNames(tzid, tznames);
names.put(tzid, tznames);
// exemplar cities.
Map<String, Object> exCities = map.entrySet().stream()
.filter(e -> e.getKey().startsWith(CLDRConverter.EXEMPLAR_CITY_PREFIX))
.collect(Collectors.toMap(Map.Entry::getKey, Map.Entry::getValue));
// If there's no UTC entry at this point, add an empty one
if (!names.isEmpty() && !names.containsKey("UTC")) {
// Finally some compatibility stuff
.filter(e -> !names.containsKey(e.getKey()) && names.containsKey(e.getValue()))
.forEach(e -> {
names.put(e.getKey(), names.get(e.getValue()));
return names;
* Extracts the language independent calendar data. Each of the two keys,
* "firstDayOfWeek" and "minimalDaysInFirstWeek" has a string value consists of
* one or multiple occurrences of:
* i: rg1 rg2 ... rgn;
* where "i" is the data for the following regions (delimited by a space) after
* ":", and ends with a ";".
private static Map<String, Object> extractCalendarData(Map<String, Object> map, String id) {
Map<String, Object> calendarData = new LinkedHashMap<>();
if (id.equals("root")) {
IntStream.range(1, 8)
.filter(d -> map.keySet().contains(CALENDAR_FIRSTDAY_PREFIX + d))
.map(d -> d + ": " + map.get(CALENDAR_FIRSTDAY_PREFIX + d))
IntStream.range(0, 7)
.filter(d -> map.keySet().contains(CALENDAR_MINDAYS_PREFIX + d))
.map(d -> d + ": " + map.get(CALENDAR_MINDAYS_PREFIX + d))
return calendarData;
static final String[] FORMAT_DATA_ELEMENTS = {
static final Set<String> availableSkeletons = new HashSet<>();
private static Map<String, Object> extractFormatData(Map<String, Object> map, String id) {
Map<String, Object> formatData = new LinkedHashMap<>();
for (CalendarType calendarType : CalendarType.values()) {
String prefix = calendarType.keyElementName();
.forEach(elem -> {
var key = prefix + elem;
copyIfPresent(map, "java.time." + key, formatData);
copyIfPresent(map, key, formatData);
availableSkeletons.forEach(s ->
copyIfPresent(map, prefix + "DateFormatItem." + s, formatData));
for (String key : map.keySet()) {
// Copy available calendar names
if (key.startsWith(CLDRConverter.LOCALE_TYPE_PREFIX_CA)) {
String type = key.substring(CLDRConverter.LOCALE_TYPE_PREFIX_CA.length());
for (CalendarType calendarType : CalendarType.values()) {
if (type.equals(calendarType.lname())) {
Object value = map.get(key);
String dataKey = key.replace(LOCALE_TYPE_PREFIX_CA,
formatData.put(dataKey, value);
String ukey = CALENDAR_NAME_PREFIX + calendarType.uname();
if (!dataKey.equals(ukey)) {
formatData.put(ukey, value);
copyIfPresent(map, "DefaultNumberingSystem", formatData);
List<String> numberingScripts = (List<String>) map.remove("numberingScripts");
if (numberingScripts != null) {
for (String script : numberingScripts) {
copyIfPresent(map, script + ".NumberElements", formatData);
copyIfPresent(map, script + ".NumberPatterns", formatData);
} else {
copyIfPresent(map, "NumberElements", formatData);
copyIfPresent(map, "NumberPatterns", formatData);
copyIfPresent(map, "short.CompactNumberPatterns", formatData);
copyIfPresent(map, "long.CompactNumberPatterns", formatData);
// put extra number elements for available scripts into formatData, if it is "root"
if (id.equals("root")) {
.filter(k -> !numberingScripts.contains(k))
.forEach(k -> {
String[] ne = (String[])map.get("latn.NumberElements");
String[] neNew = Arrays.copyOf(ne, ne.length);
neNew[4] = handlerNumbering.get(k).substring(0, 1);
formatData.put(k + ".NumberElements", neNew);
// ListPatterns
for (var lpKey : Bundle.LIST_PATTERN_KEYS) {
copyIfPresent(map, lpKey, formatData);
copyIfPresent(map, lpKey + "-short", formatData);
copyIfPresent(map, lpKey + "-narrow", formatData);
return formatData;
private static void copyIfPresent(Map<String, Object> src, String key, Map<String, Object> dest) {
Object value = src.get(key);
if (value != null) {
dest.put(key, value);
// --- code below here is adapted from java.util.Properties ---
private static final String specialSaveCharsJava = "\"";
private static final String specialSaveCharsProperties = "=: \t\r\n\f#!";
* Converts unicodes to encoded \uxxxx
* and writes out any of the characters in specialSaveChars
* with a preceding slash
static String saveConvert(String theString, boolean useJava) {
if (theString == null) {
return "";
String specialSaveChars;
if (useJava) {
specialSaveChars = specialSaveCharsJava;
} else {
specialSaveChars = specialSaveCharsProperties;
boolean escapeSpace = false;
int len = theString.length();
StringBuilder outBuffer = new StringBuilder(len * 2);
Formatter formatter = new Formatter(outBuffer, Locale.ROOT);
for (int x = 0; x < len; x++) {
char aChar = theString.charAt(x);
switch (aChar) {
case ' ':
if (x == 0 || escapeSpace) {
outBuffer.append(' ');
case '\\':
case '\t':
case '\n':
case '\r':
case '\f':
if (aChar < 0x0020 || (!USE_UTF8 && aChar > 0x007e)) {
formatter.format("\\u%04x", (int)aChar);
} else {
if (specialSaveChars.indexOf(aChar) != -1) {
return outBuffer.toString();
static String toLanguageTag(String locName) {
if (locName.indexOf('_') == -1) {
return locName;
String tag = locName.replaceAll("_", "-");
Locale loc = Locale.forLanguageTag(tag);
return loc.toLanguageTag();
private static void addLikelySubtags(String langTag) {
String likelySubtag = handlerLikelySubtags.get(langTag);
if (likelySubtag != null) {
var availableLangTags = metaInfo.get("AvailableLocales");
availableLangTags.add(likelySubtag.replaceFirst("-[A-Z][a-z]{3}", ""));
private static String toLocaleName(String tag) {
if (tag.indexOf('-') == -1) {
return tag;
return tag.replaceAll("-", "_");
private static void setupBaseLocales(String localeList) {
.map(l -> new Locale.Builder().setLocale(l).setScript("Latn").build())
.map(l -> Control.getControl(Control.FORMAT_DEFAULT)
.getCandidateLocales("", l))
// applying parent locale rules to the passed candidates list
// This has to match with the one in sun.util.cldr.CLDRLocaleProviderAdapter
private static Map<Locale, Locale> childToParentLocaleMap = null;
private static List<Locale> applyParentLocales(String baseName, List<Locale> candidates) {
if (Objects.isNull(childToParentLocaleMap)) {
childToParentLocaleMap = new HashMap<>();
parentLocalesMap.keySet().forEach(key -> {
String parent = key.substring(PARENT_LOCALE_PREFIX.length()).replaceAll("_", "-");
parentLocalesMap.get(key).stream().forEach(child -> {
"root".equals(parent) ? Locale.ROOT : Locale.forLanguageTag(parent));
// check irregular parents
for (int i = 0; i < candidates.size(); i++) {
Locale l = candidates.get(i);
Locale p = getParentLocale(l);
if (!l.equals(Locale.ROOT) &&
Objects.nonNull(p) &&
!candidates.get(i+1).equals(p)) {
List<Locale> applied = candidates.subList(0, i+1);
if (applied.contains(p)) {
// avoid circular recursion (could happen with nb/no case)
applied.addAll(applyParentLocales(baseName, defCon.getCandidateLocales(baseName, p)));
return applied;
return candidates;
private static Locale getParentLocale(Locale child) {
Locale parent = childToParentLocaleMap.get(child);
// check non-likely script for root
if (nonlikelyScript && parent == null && child.getCountry().isEmpty()) {
var lang = " " + child.getLanguage() + " ";
var script = child.getScript();
if (!script.isEmpty()) {
parent = likelyScriptMap.entrySet().stream()
.filter(e -> e.getValue().contains(lang))
.map(likely -> likely.equals(script) ? null : Locale.ROOT)
return parent;
private static void generateZoneName() throws Exception {
Files.createDirectories(Paths.get(DESTINATION_DIR, "java", "time", "format"));
Files.write(Paths.get(DESTINATION_DIR, "java", "time", "format", "ZoneName.java"),
.flatMap(l -> {
if (l.equals("%%%%ZIDMAP%%%%")) {
return zidMapEntry();
} else if (l.equals("%%%%MZONEMAP%%%%")) {
return handlerMetaZones.mzoneMapEntry();
} else if (l.equals("%%%%DEPRECATED%%%%")) {
return handlerSupplMeta.deprecatedMap();
} else if (l.equals("%%%%TZDATALINK%%%%")) {
return tzDataLinkEntry();
} else {
return Stream.of(l);
StandardOpenOption.CREATE, StandardOpenOption.TRUNCATE_EXISTING);
// This method assumes handlerMetaZones is already initialized
private static Set<String> getAvailableZoneIds() {
assert handlerMetaZones != null;
if (AVAILABLE_TZIDS == null) {
AVAILABLE_TZIDS = new HashSet<>(Arrays.asList(TimeZone.getAvailableIDs()));
private static Stream<String> zidMapEntry() {
return getAvailableZoneIds().stream()
.map(id -> {
String canonId = canonicalTZMap.getOrDefault(id, id);
String meta = handlerMetaZones.get(canonId);
String zone001 = handlerMetaZones.zidMap().get(meta);
return zone001 == null ? "" :
String.format(" \"%s\", \"%s\", \"%s\",",
id, meta, zone001);
.filter(s -> !s.isEmpty())
private static Stream<String> tzDataLinkEntry() {
try {
return Files.walk(Paths.get(tzDataDir), 1)
.filter(p -> !Files.isDirectory(p))
} catch (IOException e) {
throw new UncheckedIOException(e);
private static Stream<String> extractLinks(Path tzFile) {
try {
return Files.lines(tzFile)
.filter(l -> l.startsWith("Link"))
.map(l -> l.replaceFirst("^Link[\\s]+(\\S+)\\s+(\\S+).*",
" \"$2\", \"$1\","));
} catch (IOException e) {
throw new UncheckedIOException(e);
// Generate tzmappings for Windows. The format is:
// (Windows Zone Name):(REGION):(Java TZID)
// where:
// Windows Zone Name: arbitrary time zone name string used in Windows
// REGION: ISO3166 or UN M.49 code
// Java TZID: Java's time zone ID
// Note: the entries are alphabetically sorted, *except* the "world" region
// code, i.e., "001". It should be the last entry for the same windows time
// zone name entries. (cf. TimeZone_md.c)
private static void generateWindowsTZMappings() throws Exception {
Files.createDirectories(Paths.get(DESTINATION_DIR, "windows", "conf"));
Files.write(Paths.get(DESTINATION_DIR, "windows", "conf", "tzmappings"),
.filter(k -> k.endsWith(":001") ||
!handlerWinZones.get(k).equals(handlerWinZones.get(k.replaceFirst(":\\w{2,3}$", ":001"))))
.map(k -> k + ":" + handlerWinZones.get(k) + ":")
.sorted(new Comparator<String>() {
public int compare(String t1, String t2) {
String[] s1 = t1.split(":");
String[] s2 = t2.split(":");
if (s1[0].equals(s2[0])) {
if (s1[1].equals("001")) {
return 1;
} else if (s2[1].equals("001")) {
return -1;
} else {
return s1[1].compareTo(s2[1]);
} else {
return s1[0].compareTo(s2[0]);
StandardOpenOption.CREATE, StandardOpenOption.TRUNCATE_EXISTING);
* Generates rules map for Plural rules and DayPeriod rules. The key is the locale id,
* and the value is rules, defined by the LDML spec. Each rule consists of {@code type:rule}
* notation, concatenated with a ";" as a delimiter.
* @param handler handler containing rules
* @return the map
private static Map<String, String> generateRules(AbstractLDMLHandler<Map<String, String>> handler) {
return handler.getData().entrySet().stream()
.collect(Collectors.toMap(Map.Entry::getKey, e -> {
Map<String, String> rules = e.getValue();
return rules.entrySet().stream()
.map(rule -> rule.getKey() + ":" + rule.getValue().replaceFirst("@.*", ""))
private static Map<Locale, String> coverageLevelsMap() throws Exception {
// First, parse `coverageLevels.txt` file
var covMap = Files.readAllLines(Path.of(COVERAGELEVELS_FILE)).stream()
.filter(line -> !line.isBlank() && !line.startsWith("#"))
.map(line -> line.split("[\s\t]*;[\s\t]*", 3))
.filter(a -> a[1].matches("basic|moderate|modern|comprehensive"))
a -> Locale.forLanguageTag(a[0].replaceAll("_", "-")),
a -> a[1],
(v1, v2) -> v2, // should never happen
// Add other common (non-seed) locales (below `basic` coverage level) as of v42
ResourceBundle.getBundle(CLDRConverter.class.getPackageName() + ".OtherCommonLocales")
.forEach(k -> covMap.put(Locale.forLanguageTag(k), ""));
return covMap;
* Generates three maps from TZ database files, where they have usual abbreviation
* of the time zone names as "FORMAT".
* `tzdbShortNamesMap` maps the time zone id, such as "America/Los_Angeles" to
* its FORMAT and Rule which determines the substitution. In "America/Los_Angeles"
* case, its FORMAT is "P%sT" and the Rule is "US". They are concatenated with
* an NBSP, so the eventual mapping will be:
* "America/Los_Angeles" -> "P%sT<NBSP>US"
* The map, `tzdbSubstLetters` maps the Rule to its substitution letters.
* The key of the map is the Rule name, appended with "<NBSP>std" or "<NBSP>dst"
* depending on the savings, e.g.,
* "US<NBSP>std" -> "S"
* "US<NBSP>dst" -> "D"
* These mappings resolve the short names for time zones in each type,
* such as:
* Standard short name for "America/Los_Angeles" -> "PST"
* DST short name for "America/Los_Angeles" -> "PDT"
* Generic short name for "America/Los_Angeles" -> "PT"
* The map, `tzdbLinks` retains `Link`s of time zones. For example,
* the mapping:
* "US/Hawaii" -> "Pacific/Honolulu"
* resolves names for "US/Hawaii" correctly with "Pacific/Honolulu"
* names.
private static void generateTZDBShortNamesMap() throws IOException {
Files.walk(Path.of(tzDataDir), 1, FileVisitOption.FOLLOW_LINKS)
.filter(p -> p.toFile().isFile())
.filter(p -> p.getFileName().toString().matches("africa|antarctica|asia|australasia|backward|etcetera|europe|northamerica|southamerica"))
.forEach(p -> {
try {
String zone = null;
String rule = null;
String format = null;
boolean inVanguard = false;
for (var line : Files.readAllLines(p)) {
// check for Vanguard lines
if (line.startsWith("# Vanguard section")) {
inVanguard = true;
if (inVanguard && line.startsWith("# Rearguard section")) {
inVanguard = false;
if (line.isBlank() || line.matches("^[ \t]*#.*")) {
// ignore blank/comment lines
// remove comments in-line
line = line.replaceAll("[ \t]*#.*", "");
var tokens = line.split("[ \t]+", -1);
var token0len = tokens.length > 0 ? tokens[0].length() : 0;
// Zone line
if (token0len > 0 && tokens[0].regionMatches(true, 0, "Zone", 0, token0len)) {
if (zone != null) {
tzdbShortNamesMap.put(zone, format + NBSP + rule);
zone = tokens[1];
rule = tokens[3];
format = flipIfNeeded(inVanguard, tokens[4]);
} else {
if (zone != null) {
if (token0len > 0 &&
(tokens[0].regionMatches(true, 0, "Rule", 0, token0len) ||
tokens[0].regionMatches(true, 0, "Link", 0, token0len))) {
tzdbShortNamesMap.put(zone, format + NBSP + rule);
zone = null;
rule = null;
format = null;
} else {
rule = tokens[2];
format = flipIfNeeded(inVanguard, tokens[3]);
// Rule line
if (token0len > 0 && tokens[0].regionMatches(true, 0, "Rule", 0, token0len)) {
tzdbSubstLetters.put(tokens[1] + NBSP + (tokens[8].equals("0") ? STD : DST),
tokens[9].replace(NO_SUBST, ""));
// Link line
if (token0len > 0 && tokens[0].regionMatches(true, 0, "Link", 0, token0len)) {
tzdbLinks.put(tokens[2], tokens[1]);
// Last entry
if (zone != null) {
tzdbShortNamesMap.put(zone, format + NBSP + rule);
} catch (IOException ioe) {
throw new UncheckedIOException(ioe);
// Reverse the std/dst FORMAT in Vanguard so that it
// correctly handles negative DST cases, such as "GMT/IST"
// vs. "IST/GMT" case for Europe/Dublin
private static String flipIfNeeded(boolean inVanguard, String format) {
if (inVanguard) {
var stddst = format.split("/");
if (stddst.length == 2) {
return stddst[1] + "/" + stddst[0];
return format;
* Fill the TZDB short names if there is no name provided by the CLDR
private static void fillTZDBShortNames(String tzid, String[] names) {
var val = tzdbShortNamesMap.get(tzdbLinks.getOrDefault(tzid, tzid));
if (val != null) {
var format = val.split(NBSP)[0];
var rule = val.split(NBSP)[1];
IntStream.of(1, 3, 5).forEach(i -> {
if (names[i] == null) {
if (format.contains("%s")) {
names[i] = switch (i) {
case 1 -> format.formatted(tzdbSubstLetters.get(rule + NBSP + STD));
case 3 -> format.formatted(tzdbSubstLetters.get(rule + NBSP + DST));
case 5 -> format.formatted("");
default -> throw new InternalError();
} else if (format.contains("/")) { // such as "+08/+09" or "GMT/BST"
names[i] = switch (i) {
case 1, 5 -> convertGMTName(format.substring(0, format.indexOf("/")));
case 3 -> convertGMTName(format.substring(format.indexOf("/") + 1));
default -> throw new InternalError();
} else {
names[i] = convertGMTName(format);
* Convert TZDB offsets to JDK's offsets, eg, "-08" to "GMT-08:00".
* If it cannot recognize the pattern, return the argument as is.
* Returning null results in generating the GMT format at runtime.
private static String convertGMTName(String f) {
try {
if (!f.equals("%z")) {
// Validate if the format is an offset
return null;
} catch (DateTimeException dte) {
// textual representation. return as is
return f;
// for debug
static void dumpMap(Map<String, Object> map) {
.map(e -> {
Object val = e.getValue();
String valStr = null;
if (val instanceof String[]) {
valStr = Arrays.asList((String[])val).toString();
} else if (val != null) {
valStr = val.toString();
return e.getKey() + " = " + valStr;