8303039: Utilize coverageLevels.txt
Reviewed-by: iris, joehw
This commit is contained in:
parent
32247c336a
commit
0b63557941
make
@ -1,5 +1,5 @@
|
||||
#
|
||||
# Copyright (c) 2011, 2022, Oracle and/or its affiliates. All rights reserved.
|
||||
# Copyright (c) 2011, 2023, Oracle and/or its affiliates. All rights reserved.
|
||||
# DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
||||
#
|
||||
# This code is free software; you can redistribute it and/or modify it
|
||||
@ -69,6 +69,13 @@ $(eval $(call SetupCopyFiles,COPY_NIMBUS_TEMPLATES, \
|
||||
|
||||
TARGETS += $(COPY_NIMBUS_TEMPLATES)
|
||||
|
||||
$(eval $(call SetupCopyFiles,COPY_CLDRCONVERTER_PROPERTIES, \
|
||||
SRC := $(TOPDIR)/make/jdk/src/classes/build/tools/cldrconverter, \
|
||||
DEST := $(BUILDTOOLS_OUTPUTDIR)/jdk_tools_classes/build/tools/cldrconverter, \
|
||||
FILES := $(wildcard $(TOPDIR)/make/jdk/src/classes/build/tools/cldrconverter/*.properties)))
|
||||
|
||||
TARGETS += $(COPY_CLDRCONVERTER_PROPERTIES)
|
||||
|
||||
################################################################################
|
||||
|
||||
$(eval $(call SetupJavaCompilation, COMPILE_DEPEND, \
|
||||
|
141
make/data/cldr/common/properties/coverageLevels.txt
Normal file
141
make/data/cldr/common/properties/coverageLevels.txt
Normal file
@ -0,0 +1,141 @@
|
||||
# coverageLevels.txt
|
||||
# Copyright © 2022 Unicode, Inc.
|
||||
# CLDR data files are interpreted according to the LDML specification (http://unicode.org/reports/tr35/)
|
||||
# For terms of use, see http://www.unicode.org/copyright.html
|
||||
#
|
||||
# Provides the Coverage Level of locales at Basic or Above.
|
||||
# For more info,see the Locale Coverage Chart for this version.
|
||||
# Generated by ShowLocaleCoverage.
|
||||
#
|
||||
#Locale ; Level
|
||||
af ; modern
|
||||
am ; modern
|
||||
ar ; modern
|
||||
as ; modern
|
||||
ast ; basic
|
||||
az ; modern
|
||||
be ; modern
|
||||
bg ; modern
|
||||
bgc ; basic
|
||||
bho ; basic
|
||||
bn ; modern
|
||||
br ; moderate
|
||||
brx ; basic
|
||||
bs ; modern
|
||||
bs_Cyrl ; basic
|
||||
ca ; modern
|
||||
ceb ; moderate
|
||||
chr ; modern
|
||||
cs ; modern
|
||||
cv ; moderate
|
||||
cy ; modern
|
||||
da ; modern
|
||||
de ; modern
|
||||
doi ; basic
|
||||
dsb ; modern
|
||||
el ; modern
|
||||
en ; modern
|
||||
es ; modern
|
||||
et ; modern
|
||||
eu ; modern
|
||||
fa ; modern
|
||||
ff_Adlm ; basic
|
||||
fi ; modern
|
||||
fil ; modern
|
||||
fo ; basic
|
||||
fr ; modern
|
||||
ga ; modern
|
||||
gd ; modern
|
||||
gl ; modern
|
||||
gu ; modern
|
||||
ha ; modern
|
||||
he ; modern
|
||||
hi ; modern
|
||||
hi_Latn ; modern
|
||||
hr ; modern
|
||||
hsb ; modern
|
||||
hu ; modern
|
||||
hy ; modern
|
||||
ia ; basic
|
||||
id ; modern
|
||||
ig ; modern
|
||||
is ; modern
|
||||
it ; modern
|
||||
ja ; modern
|
||||
jv ; modern
|
||||
ka ; modern
|
||||
kea ; basic
|
||||
kgp ; basic
|
||||
kk ; modern
|
||||
km ; modern
|
||||
kn ; modern
|
||||
ko ; modern
|
||||
kok ; modern
|
||||
ks ; basic
|
||||
ks_Deva ; basic
|
||||
ky ; modern
|
||||
lo ; modern
|
||||
lt ; modern
|
||||
lv ; modern
|
||||
mai ; basic
|
||||
mi ; basic
|
||||
mk ; modern
|
||||
ml ; modern
|
||||
mn ; modern
|
||||
mni ; basic
|
||||
mr ; modern
|
||||
ms ; modern
|
||||
my ; modern
|
||||
ne ; modern
|
||||
nl ; modern
|
||||
nn ; modern
|
||||
no ; modern
|
||||
or ; modern
|
||||
pa ; modern
|
||||
pcm ; modern
|
||||
pl ; modern
|
||||
ps ; modern
|
||||
pt ; modern
|
||||
qu ; moderate
|
||||
raj ; basic
|
||||
rm ; basic
|
||||
ro ; modern
|
||||
ru ; modern
|
||||
sa ; basic
|
||||
sat ; basic
|
||||
sc ; moderate
|
||||
sd ; modern
|
||||
sd_Deva ; basic
|
||||
si ; modern
|
||||
sk ; modern
|
||||
sl ; modern
|
||||
so ; modern
|
||||
sq ; modern
|
||||
sr ; modern
|
||||
su ; basic
|
||||
sv ; modern
|
||||
sw ; modern
|
||||
ta ; modern
|
||||
te ; modern
|
||||
tg ; basic
|
||||
th ; modern
|
||||
ti ; basic
|
||||
tk ; modern
|
||||
to ; basic
|
||||
tr ; modern
|
||||
tt ; basic
|
||||
uk ; modern
|
||||
ur ; modern
|
||||
uz ; modern
|
||||
uz_Cyrl ; basic
|
||||
vi ; modern
|
||||
wo ; basic
|
||||
xh ; moderate
|
||||
yo ; modern
|
||||
yrl ; basic
|
||||
yue ; modern
|
||||
yue_Hans ; modern
|
||||
zh ; modern
|
||||
zh_Hant ; modern
|
||||
zu ; modern
|
||||
#EOF
|
@ -69,6 +69,7 @@ public class CLDRConverter {
|
||||
private static String WINZONES_SOURCE_FILE;
|
||||
private static String PLURALS_SOURCE_FILE;
|
||||
private static String DAYPERIODRULE_SOURCE_FILE;
|
||||
private static String COVERAGELEVELS_FILE;
|
||||
static String DESTINATION_DIR = "build/gensrc";
|
||||
|
||||
static final String LOCALE_NAME_PREFIX = "locale.displayname.";
|
||||
@ -258,6 +259,7 @@ public class CLDRConverter {
|
||||
WINZONES_SOURCE_FILE = CLDR_BASE + "/supplemental/windowsZones.xml";
|
||||
PLURALS_SOURCE_FILE = CLDR_BASE + "/supplemental/plurals.xml";
|
||||
DAYPERIODRULE_SOURCE_FILE = CLDR_BASE + "/supplemental/dayPeriods.xml";
|
||||
COVERAGELEVELS_FILE = CLDR_BASE + "/properties/coverageLevels.txt";
|
||||
|
||||
if (BASE_LOCALES.isEmpty()) {
|
||||
setupBaseLocales("en-US");
|
||||
@ -359,13 +361,18 @@ public class CLDRConverter {
|
||||
private static List<Bundle> readBundleList() throws Exception {
|
||||
List<Bundle> retList = new ArrayList<>();
|
||||
Path path = FileSystems.getDefault().getPath(SOURCE_FILE_DIR);
|
||||
var coverageMap = coverageLevelsMap();
|
||||
try (DirectoryStream<Path> dirStr = Files.newDirectoryStream(path)) {
|
||||
for (Path entry : dirStr) {
|
||||
String fileName = entry.getFileName().toString();
|
||||
if (fileName.endsWith(".xml")) {
|
||||
String id = fileName.substring(0, fileName.indexOf('.'));
|
||||
Locale cldrLoc = Locale.forLanguageTag(toLanguageTag(id));
|
||||
StringBuilder sb = getCandLocales(cldrLoc);
|
||||
List<Locale> candList = getCandidateLocales(cldrLoc);
|
||||
if (!"root".equals(id) && candList.stream().noneMatch(coverageMap::containsKey)) {
|
||||
continue;
|
||||
}
|
||||
StringBuilder sb = getCandLocales(candList);
|
||||
if (sb.indexOf("root") == -1) {
|
||||
sb.append("root");
|
||||
}
|
||||
@ -510,8 +517,7 @@ public class CLDRConverter {
|
||||
parser.parse(srcfile, handler);
|
||||
}
|
||||
|
||||
private static StringBuilder getCandLocales(Locale cldrLoc) {
|
||||
List<Locale> candList = getCandidateLocales(cldrLoc);
|
||||
private static StringBuilder getCandLocales(List<Locale> candList) {
|
||||
StringBuilder sb = new StringBuilder();
|
||||
for (Locale loc : candList) {
|
||||
if (!loc.equals(Locale.ROOT)) {
|
||||
@ -1195,6 +1201,26 @@ public class CLDRConverter {
|
||||
}));
|
||||
}
|
||||
|
||||
private static Map<Locale, String> coverageLevelsMap() throws Exception {
|
||||
// First, parse `coverageLevels.txt` file
|
||||
var covMap = Files.readAllLines(Path.of(COVERAGELEVELS_FILE)).stream()
|
||||
.filter(line -> !line.isBlank() && !line.startsWith("#"))
|
||||
.map(line -> line.split("[\s\t]*;[\s\t]*", 3))
|
||||
.filter(a -> a[1].matches("basic|moderate|modern|comprehensive"))
|
||||
.collect(Collectors.toMap(
|
||||
a -> Locale.forLanguageTag(a[0].replaceAll("_", "-")),
|
||||
a -> a[1],
|
||||
(v1, v2) -> v2, // should never happen
|
||||
HashMap::new));
|
||||
|
||||
// Add other common (non-seed) locales (below `basic` coverage level) as of v42
|
||||
ResourceBundle.getBundle(CLDRConverter.class.getPackageName() + ".OtherCommonLocales")
|
||||
.keySet()
|
||||
.forEach(k -> covMap.put(Locale.forLanguageTag(k), ""));
|
||||
|
||||
return covMap;
|
||||
}
|
||||
|
||||
// for debug
|
||||
static void dumpMap(Map<String, Object> map) {
|
||||
map.entrySet().stream()
|
||||
|
@ -0,0 +1,140 @@
|
||||
#
|
||||
# Copyright (c) 2023, Oracle and/or its affiliates. All rights reserved.
|
||||
# DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
||||
#
|
||||
# This code is free software; you can redistribute it and/or modify it
|
||||
# under the terms of the GNU General Public License version 2 only, as
|
||||
# published by the Free Software Foundation. Oracle designates this
|
||||
# particular file as subject to the "Classpath" exception as provided
|
||||
# by Oracle in the LICENSE file that accompanied this code.
|
||||
#
|
||||
# This code is distributed in the hope that it will be useful, but WITHOUT
|
||||
# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
||||
# FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
|
||||
# version 2 for more details (a copy is included in the LICENSE file that
|
||||
# accompanied this code).
|
||||
#
|
||||
# You should have received a copy of the GNU General Public License version
|
||||
# 2 along with this work; if not, write to the Free Software Foundation,
|
||||
# Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
#
|
||||
# Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
|
||||
# or visit www.oracle.com if you need additional information or have any
|
||||
# questions.
|
||||
#
|
||||
|
||||
# Other common (non-seed) locales that are below `Basic` coverage level
|
||||
# as of CLDR v42 (Extracted from
|
||||
# https://github.com/unicode-org/cldr-staging/blob/main/docs/charts/42/tsv/locale-coverage.tsv)
|
||||
|
||||
agq=Aghem
|
||||
ak=Akan
|
||||
ann=Obolo
|
||||
asa=Asu
|
||||
az-Cyrl=Azerbaijani (Cyrillic)
|
||||
bas=Basaa
|
||||
bem=Bemba
|
||||
bez=Bena
|
||||
bm=Bambara
|
||||
bo=Tibetan
|
||||
ccp=Chakma
|
||||
ce=Chechen
|
||||
cgg=Chiga
|
||||
ckb=Central Kurdish
|
||||
dav=Taita
|
||||
dje=Zarma
|
||||
dua=Duala
|
||||
dyo=Jola-Fonyi
|
||||
dz=Dzongkha
|
||||
ebu=Embu
|
||||
ee=Ewe
|
||||
eo=Esperanto
|
||||
ewo=Ewondo
|
||||
ff=Fula
|
||||
frr=Northern Frisian
|
||||
fur=Friulian
|
||||
fy=Western Frisian
|
||||
gsw=Swiss German
|
||||
guz=Gusii
|
||||
gv=Manx
|
||||
haw=Hawaiian
|
||||
ii=Sichuan Yi
|
||||
jgo=Ngomba
|
||||
jmc=Machame
|
||||
kab=Kabyle
|
||||
kam=Kamba
|
||||
kde=Makonde
|
||||
khq=Koyra Chiini
|
||||
ki=Kikuyu
|
||||
kkj=Kako
|
||||
kl=Kalaallisut
|
||||
kln=Kalenjin
|
||||
ksb=Shambala
|
||||
ksf=Bafia
|
||||
ksh=Colognian
|
||||
ku=Kurdish
|
||||
kw=Cornish
|
||||
lag=Langi
|
||||
lb=Luxembourgish
|
||||
lg=Ganda
|
||||
lkt=Lakota
|
||||
ln=Lingala
|
||||
lrc=Northern Luri
|
||||
lu=Luba-Katanga
|
||||
luo=Luo
|
||||
luy=Luyia
|
||||
mas=Masai
|
||||
mdf=Moksha
|
||||
mer=Meru
|
||||
mfe=Morisyen
|
||||
mg=Malagasy
|
||||
mgh=Makhuwa-Meetto
|
||||
mgo=Meta º
|
||||
mt=Maltese
|
||||
mua=Mundang
|
||||
mzn=Mazanderani
|
||||
naq=Nama
|
||||
nd=North Ndebele
|
||||
nds=Low German
|
||||
nmg=Kwasio
|
||||
nnh=Ngiemboon
|
||||
nus=Nuer
|
||||
nyn=Nyankole
|
||||
oc=Occitan
|
||||
om=Oromo
|
||||
os=Ossetic
|
||||
pa-Arab=Punjabi (Arabic)
|
||||
pis=Pijin
|
||||
rn=Rundi
|
||||
rof=Rombo
|
||||
rw=Kinyarwanda
|
||||
rwk=Rwa
|
||||
sah=Yakut
|
||||
saq=Samburu
|
||||
sbp=Sangu
|
||||
se=Northern Sami
|
||||
seh=Sena
|
||||
ses=Koyraboro Senni
|
||||
sg=Sango
|
||||
shi=Tachelhit
|
||||
shi-Latn=Tachelhit (Latin)
|
||||
smn=Inari Sami
|
||||
sms=Skolt Sami
|
||||
sn=Shona
|
||||
teo=Teso
|
||||
tok=Toki Pona
|
||||
twq=Tasawaq
|
||||
tzm=Central Atlas Tamazight
|
||||
ug=Uyghur
|
||||
uz-Arab=Uzbek (Arabic)
|
||||
vai=Vai
|
||||
vai-Latn=Vai (Latin)
|
||||
vun=Vunjo
|
||||
wae=Walser
|
||||
xog=Soga
|
||||
yav=Yangben
|
||||
yi=Yiddish
|
||||
zgh=Standard Moroccan Tamazight
|
||||
|
||||
# Not listed, but existed
|
||||
sr-Latn=Serbian (Latin)
|
Loading…
x
Reference in New Issue
Block a user