8303039: Utilize coverageLevels.txt

Reviewed-by: iris, joehw
This commit is contained in:
Naoto Sato 2023-03-02 18:31:53 +00:00
parent 32247c336a
commit 0b63557941
4 changed files with 318 additions and 4 deletions
make
CompileToolsJdk.gmk
data/cldr/common/properties
jdk/src/classes/build/tools/cldrconverter

@ -1,5 +1,5 @@
#
# Copyright (c) 2011, 2022, Oracle and/or its affiliates. All rights reserved.
# Copyright (c) 2011, 2023, Oracle and/or its affiliates. All rights reserved.
# DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
#
# This code is free software; you can redistribute it and/or modify it
@ -69,6 +69,13 @@ $(eval $(call SetupCopyFiles,COPY_NIMBUS_TEMPLATES, \
TARGETS += $(COPY_NIMBUS_TEMPLATES)
$(eval $(call SetupCopyFiles,COPY_CLDRCONVERTER_PROPERTIES, \
SRC := $(TOPDIR)/make/jdk/src/classes/build/tools/cldrconverter, \
DEST := $(BUILDTOOLS_OUTPUTDIR)/jdk_tools_classes/build/tools/cldrconverter, \
FILES := $(wildcard $(TOPDIR)/make/jdk/src/classes/build/tools/cldrconverter/*.properties)))
TARGETS += $(COPY_CLDRCONVERTER_PROPERTIES)
################################################################################
$(eval $(call SetupJavaCompilation, COMPILE_DEPEND, \

@ -0,0 +1,141 @@
# coverageLevels.txt
# Copyright © 2022 Unicode, Inc.
# CLDR data files are interpreted according to the LDML specification (http://unicode.org/reports/tr35/)
# For terms of use, see http://www.unicode.org/copyright.html
#
# Provides the Coverage Level of locales at Basic or Above.
# For more info,see the Locale Coverage Chart for this version.
# Generated by ShowLocaleCoverage.
#
#Locale ; Level
af ; modern
am ; modern
ar ; modern
as ; modern
ast ; basic
az ; modern
be ; modern
bg ; modern
bgc ; basic
bho ; basic
bn ; modern
br ; moderate
brx ; basic
bs ; modern
bs_Cyrl ; basic
ca ; modern
ceb ; moderate
chr ; modern
cs ; modern
cv ; moderate
cy ; modern
da ; modern
de ; modern
doi ; basic
dsb ; modern
el ; modern
en ; modern
es ; modern
et ; modern
eu ; modern
fa ; modern
ff_Adlm ; basic
fi ; modern
fil ; modern
fo ; basic
fr ; modern
ga ; modern
gd ; modern
gl ; modern
gu ; modern
ha ; modern
he ; modern
hi ; modern
hi_Latn ; modern
hr ; modern
hsb ; modern
hu ; modern
hy ; modern
ia ; basic
id ; modern
ig ; modern
is ; modern
it ; modern
ja ; modern
jv ; modern
ka ; modern
kea ; basic
kgp ; basic
kk ; modern
km ; modern
kn ; modern
ko ; modern
kok ; modern
ks ; basic
ks_Deva ; basic
ky ; modern
lo ; modern
lt ; modern
lv ; modern
mai ; basic
mi ; basic
mk ; modern
ml ; modern
mn ; modern
mni ; basic
mr ; modern
ms ; modern
my ; modern
ne ; modern
nl ; modern
nn ; modern
no ; modern
or ; modern
pa ; modern
pcm ; modern
pl ; modern
ps ; modern
pt ; modern
qu ; moderate
raj ; basic
rm ; basic
ro ; modern
ru ; modern
sa ; basic
sat ; basic
sc ; moderate
sd ; modern
sd_Deva ; basic
si ; modern
sk ; modern
sl ; modern
so ; modern
sq ; modern
sr ; modern
su ; basic
sv ; modern
sw ; modern
ta ; modern
te ; modern
tg ; basic
th ; modern
ti ; basic
tk ; modern
to ; basic
tr ; modern
tt ; basic
uk ; modern
ur ; modern
uz ; modern
uz_Cyrl ; basic
vi ; modern
wo ; basic
xh ; moderate
yo ; modern
yrl ; basic
yue ; modern
yue_Hans ; modern
zh ; modern
zh_Hant ; modern
zu ; modern
#EOF

@ -69,6 +69,7 @@ public class CLDRConverter {
private static String WINZONES_SOURCE_FILE;
private static String PLURALS_SOURCE_FILE;
private static String DAYPERIODRULE_SOURCE_FILE;
private static String COVERAGELEVELS_FILE;
static String DESTINATION_DIR = "build/gensrc";
static final String LOCALE_NAME_PREFIX = "locale.displayname.";
@ -258,6 +259,7 @@ public class CLDRConverter {
WINZONES_SOURCE_FILE = CLDR_BASE + "/supplemental/windowsZones.xml";
PLURALS_SOURCE_FILE = CLDR_BASE + "/supplemental/plurals.xml";
DAYPERIODRULE_SOURCE_FILE = CLDR_BASE + "/supplemental/dayPeriods.xml";
COVERAGELEVELS_FILE = CLDR_BASE + "/properties/coverageLevels.txt";
if (BASE_LOCALES.isEmpty()) {
setupBaseLocales("en-US");
@ -359,13 +361,18 @@ public class CLDRConverter {
private static List<Bundle> readBundleList() throws Exception {
List<Bundle> retList = new ArrayList<>();
Path path = FileSystems.getDefault().getPath(SOURCE_FILE_DIR);
var coverageMap = coverageLevelsMap();
try (DirectoryStream<Path> dirStr = Files.newDirectoryStream(path)) {
for (Path entry : dirStr) {
String fileName = entry.getFileName().toString();
if (fileName.endsWith(".xml")) {
String id = fileName.substring(0, fileName.indexOf('.'));
Locale cldrLoc = Locale.forLanguageTag(toLanguageTag(id));
StringBuilder sb = getCandLocales(cldrLoc);
List<Locale> candList = getCandidateLocales(cldrLoc);
if (!"root".equals(id) && candList.stream().noneMatch(coverageMap::containsKey)) {
continue;
}
StringBuilder sb = getCandLocales(candList);
if (sb.indexOf("root") == -1) {
sb.append("root");
}
@ -510,8 +517,7 @@ public class CLDRConverter {
parser.parse(srcfile, handler);
}
private static StringBuilder getCandLocales(Locale cldrLoc) {
List<Locale> candList = getCandidateLocales(cldrLoc);
private static StringBuilder getCandLocales(List<Locale> candList) {
StringBuilder sb = new StringBuilder();
for (Locale loc : candList) {
if (!loc.equals(Locale.ROOT)) {
@ -1195,6 +1201,26 @@ public class CLDRConverter {
}));
}
private static Map<Locale, String> coverageLevelsMap() throws Exception {
// First, parse `coverageLevels.txt` file
var covMap = Files.readAllLines(Path.of(COVERAGELEVELS_FILE)).stream()
.filter(line -> !line.isBlank() && !line.startsWith("#"))
.map(line -> line.split("[\s\t]*;[\s\t]*", 3))
.filter(a -> a[1].matches("basic|moderate|modern|comprehensive"))
.collect(Collectors.toMap(
a -> Locale.forLanguageTag(a[0].replaceAll("_", "-")),
a -> a[1],
(v1, v2) -> v2, // should never happen
HashMap::new));
// Add other common (non-seed) locales (below `basic` coverage level) as of v42
ResourceBundle.getBundle(CLDRConverter.class.getPackageName() + ".OtherCommonLocales")
.keySet()
.forEach(k -> covMap.put(Locale.forLanguageTag(k), ""));
return covMap;
}
// for debug
static void dumpMap(Map<String, Object> map) {
map.entrySet().stream()

@ -0,0 +1,140 @@
#
# Copyright (c) 2023, Oracle and/or its affiliates. All rights reserved.
# DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
#
# This code is free software; you can redistribute it and/or modify it
# under the terms of the GNU General Public License version 2 only, as
# published by the Free Software Foundation. Oracle designates this
# particular file as subject to the "Classpath" exception as provided
# by Oracle in the LICENSE file that accompanied this code.
#
# This code is distributed in the hope that it will be useful, but WITHOUT
# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
# FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
# version 2 for more details (a copy is included in the LICENSE file that
# accompanied this code).
#
# You should have received a copy of the GNU General Public License version
# 2 along with this work; if not, write to the Free Software Foundation,
# Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
#
# Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
# or visit www.oracle.com if you need additional information or have any
# questions.
#
# Other common (non-seed) locales that are below `Basic` coverage level
# as of CLDR v42 (Extracted from
# https://github.com/unicode-org/cldr-staging/blob/main/docs/charts/42/tsv/locale-coverage.tsv)
agq=Aghem
ak=Akan
ann=Obolo
asa=Asu
az-Cyrl=Azerbaijani (Cyrillic)
bas=Basaa
bem=Bemba
bez=Bena
bm=Bambara
bo=Tibetan
ccp=Chakma
ce=Chechen
cgg=Chiga
ckb=Central Kurdish
dav=Taita
dje=Zarma
dua=Duala
dyo=Jola-Fonyi
dz=Dzongkha
ebu=Embu
ee=Ewe
eo=Esperanto
ewo=Ewondo
ff=Fula
frr=Northern Frisian
fur=Friulian
fy=Western Frisian
gsw=Swiss German
guz=Gusii
gv=Manx
haw=Hawaiian
ii=Sichuan Yi
jgo=Ngomba
jmc=Machame
kab=Kabyle
kam=Kamba
kde=Makonde
khq=Koyra Chiini
ki=Kikuyu
kkj=Kako
kl=Kalaallisut
kln=Kalenjin
ksb=Shambala
ksf=Bafia
ksh=Colognian
ku=Kurdish
kw=Cornish
lag=Langi
lb=Luxembourgish
lg=Ganda
lkt=Lakota
ln=Lingala
lrc=Northern Luri
lu=Luba-Katanga
luo=Luo
luy=Luyia
mas=Masai
mdf=Moksha
mer=Meru
mfe=Morisyen
mg=Malagasy
mgh=Makhuwa-Meetto
mgo=Meta º
mt=Maltese
mua=Mundang
mzn=Mazanderani
naq=Nama
nd=North Ndebele
nds=Low German
nmg=Kwasio
nnh=Ngiemboon
nus=Nuer
nyn=Nyankole
oc=Occitan
om=Oromo
os=Ossetic
pa-Arab=Punjabi (Arabic)
pis=Pijin
rn=Rundi
rof=Rombo
rw=Kinyarwanda
rwk=Rwa
sah=Yakut
saq=Samburu
sbp=Sangu
se=Northern Sami
seh=Sena
ses=Koyraboro Senni
sg=Sango
shi=Tachelhit
shi-Latn=Tachelhit (Latin)
smn=Inari Sami
sms=Skolt Sami
sn=Shona
teo=Teso
tok=Toki Pona
twq=Tasawaq
tzm=Central Atlas Tamazight
ug=Uyghur
uz-Arab=Uzbek (Arabic)
vai=Vai
vai-Latn=Vai (Latin)
vun=Vunjo
wae=Walser
xog=Soga
yav=Yangben
yi=Yiddish
zgh=Standard Moroccan Tamazight
# Not listed, but existed
sr-Latn=Serbian (Latin)