From d0be73a78038faf9509623bc4ba71eb4385cd645 Mon Sep 17 00:00:00 2001 From: Naoto Sato Date: Mon, 11 Sep 2023 16:48:22 +0000 Subject: [PATCH] 8041488: Locale-Dependent List Patterns Reviewed-by: joehw, rriggs --- .../build/tools/cldrconverter/Bundle.java | 8 +- .../tools/cldrconverter/CLDRConverter.java | 18 +- .../tools/cldrconverter/LDMLParseHandler.java | 52 +- .../share/classes/java/text/Format.java | 11 +- .../share/classes/java/text/ListFormat.java | 611 ++++++++++++++++++ .../util/locale/provider/LocaleResources.java | 28 + .../Format/ListFormat/TestListFormat.java | 299 +++++++++ 7 files changed, 1019 insertions(+), 8 deletions(-) create mode 100644 src/java.base/share/classes/java/text/ListFormat.java create mode 100644 test/jdk/java/text/Format/ListFormat/TestListFormat.java diff --git a/make/jdk/src/classes/build/tools/cldrconverter/Bundle.java b/make/jdk/src/classes/build/tools/cldrconverter/Bundle.java index 64d36544cf3..c2b0c8b83c7 100644 --- a/make/jdk/src/classes/build/tools/cldrconverter/Bundle.java +++ b/make/jdk/src/classes/build/tools/cldrconverter/Bundle.java @@ -1,5 +1,5 @@ /* - * Copyright (c) 2012, 2022, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2012, 2023, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -108,6 +108,12 @@ class Bundle { "narrow.Eras" }; + static final String[] LIST_PATTERN_KEYS = { + "ListPatterns_standard", + "ListPatterns_or", + "ListPatterns_unit", + }; + // DateFormatItem prefix static final String DATEFORMATITEM_KEY_PREFIX = "DateFormatItem."; static final String DATEFORMATITEM_INPUT_REGIONS_PREFIX = "DateFormatItemInputRegions."; diff --git a/make/jdk/src/classes/build/tools/cldrconverter/CLDRConverter.java b/make/jdk/src/classes/build/tools/cldrconverter/CLDRConverter.java index 614221b26bb..a511ae586f2 100644 --- a/make/jdk/src/classes/build/tools/cldrconverter/CLDRConverter.java +++ b/make/jdk/src/classes/build/tools/cldrconverter/CLDRConverter.java @@ -621,7 +621,14 @@ public class CLDRConverter { */ static void handleAliases(Map bundleMap) { for (String key : aliases.keySet()) { - var source = bundleMap.get(aliases.get(key)); + var sourceKey = aliases.get(key); + if (key.startsWith("ListPatterns_")) { + String k; + while ((k = aliases.get(sourceKey)) != null) { + sourceKey = k; + } + } + var source = bundleMap.get(sourceKey); if (source != null) { if (bundleMap.get(key) instanceof String[] sa) { // fill missing elements in case of String array @@ -871,6 +878,7 @@ public class CLDRConverter { "DayPeriodRules", "DateFormatItemInputRegions.allowed", "DateFormatItemInputRegions.preferred", + "ListPatterns", }; static final Set availableSkeletons = new HashSet<>(); @@ -935,6 +943,14 @@ public class CLDRConverter { formatData.put(k + ".NumberElements", neNew); }); } + + // ListPatterns + for (var lpKey : Bundle.LIST_PATTERN_KEYS) { + copyIfPresent(map, lpKey, formatData); + copyIfPresent(map, lpKey + "-short", formatData); + copyIfPresent(map, lpKey + "-narrow", formatData); + } + return formatData; } diff --git a/make/jdk/src/classes/build/tools/cldrconverter/LDMLParseHandler.java b/make/jdk/src/classes/build/tools/cldrconverter/LDMLParseHandler.java index 941f45d408e..d2d7e94fc98 100644 --- a/make/jdk/src/classes/build/tools/cldrconverter/LDMLParseHandler.java +++ b/make/jdk/src/classes/build/tools/cldrconverter/LDMLParseHandler.java @@ -1,5 +1,5 @@ /* - * Copyright (c) 2012, 2022, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2012, 2023, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -35,6 +35,7 @@ import java.util.HashSet; import java.util.List; import java.util.Locale; import java.util.Map; +import java.util.Optional; import java.util.Set; import org.xml.sax.Attributes; import org.xml.sax.InputSource; @@ -812,6 +813,7 @@ class LDMLParseHandler extends AbstractLDMLHandler { && ((currentContainer.getqName().equals("decimalFormatLength")) || (currentContainer.getqName().equals("currencyFormat")) || (currentContainer.getqName().equals("percentFormat")) + || (currentContainer.getqName().equals("listPattern")) || (currentCalendarType != null && !currentCalendarType.lname().startsWith("islamic-")))) { // ignore islamic variants pushAliasEntry(qName, attributes, attributes.getValue("path")); } else { @@ -820,6 +822,28 @@ class LDMLParseHandler extends AbstractLDMLHandler { } break; + // ListPatterns + case "listPattern": + currentStyle = Optional.ofNullable(attributes.getValue("type")).orElse("standard"); + pushStringArrayEntry(qName, attributes, "ListPatterns_" + currentStyle, 5); + break; + case "listPatternPart": + type = attributes.getValue("type"); + pushStringArrayElement(qName, attributes, + switch (type) { + case "start" -> 0; + case "middle" -> 1; + case "end" -> 2; + case "2" -> 3; + case "3" -> 4; + default -> throw new IllegalArgumentException( + """ + The "type" attribute value for "listPatternPart" element is not recognized: %s + """.formatted(type) + ); + }); + break; + default: // treat anything else as a container pushContainer(qName, attributes); @@ -973,6 +997,9 @@ class LDMLParseHandler extends AbstractLDMLHandler { "NumberPatterns/" + (type.equals("standard") ? containerqName.replaceFirst("Format", "") : type); break; + case "listPattern": + keyName = type; + break; default: keyName = ""; break; @@ -1035,6 +1062,19 @@ class LDMLParseHandler extends AbstractLDMLHandler { return toJDKKey(qName, "", style); } + // listPattern + if (path.indexOf("../listPattern") != -1) { + typeKey = "[@type='"; + start = path.indexOf(typeKey); + String style; + if (start != -1) { + style = "ListPatterns_" + path.substring(start + typeKey.length(), path.indexOf("']", start)); + } else { + style = "ListPatterns_standard"; + } + return toJDKKey(qName, "", style); + } + return calType + "." + toJDKKey(qName, context, width); } @@ -1107,6 +1147,10 @@ class LDMLParseHandler extends AbstractLDMLHandler { case "timeFormatLength": currentStyle = ""; break; + case "listPattern": + currentStyle = ""; + putIfEntry(); + break; default: putIfEntry(); } @@ -1128,6 +1172,12 @@ class LDMLParseHandler extends AbstractLDMLHandler { toJDKKey(containerqName, "", kc.getKey()), getTarget(entry.getKey(), "", "", "") ); + } else if (containerqName.equals("listPattern")) { + var sae = (StringArrayEntry)entry.getParent(); + CLDRConverter.aliases.put( + toJDKKey(containerqName, "", sae.getKey()), + getTarget(entry.getKey(), "", "", "") + ); } else { Set keyNames = populateAliasKeys(containerqName, currentContext, currentWidth); if (!keyNames.isEmpty()) { diff --git a/src/java.base/share/classes/java/text/Format.java b/src/java.base/share/classes/java/text/Format.java index 80352f5bbc5..18b656c2c84 100644 --- a/src/java.base/share/classes/java/text/Format.java +++ b/src/java.base/share/classes/java/text/Format.java @@ -1,5 +1,5 @@ /* - * Copyright (c) 1996, 2019, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 1996, 2023, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -42,7 +42,7 @@ import java.io.Serializable; /** * {@code Format} is an abstract base class for formatting locale-sensitive - * information such as dates, messages, and numbers. + * information such as dates, messages, numbers, and lists. * *

* {@code Format} defines the programming interface for formatting @@ -61,9 +61,9 @@ import java.io.Serializable; *

Subclassing

* *

- * The Java Platform provides three specialized subclasses of {@code Format}-- - * {@code DateFormat}, {@code MessageFormat}, and - * {@code NumberFormat}--for formatting dates, messages, and numbers, + * The Java Platform provides specialized subclasses of {@code Format}-- + * {@code DateFormat}, {@code MessageFormat}, {@code NumberFormat}, and + * {@code ListFormat}--for formatting dates, messages, numbers, and lists * respectively. *

* Concrete subclasses must implement three methods: @@ -128,6 +128,7 @@ import java.io.Serializable; * @see java.text.NumberFormat * @see java.text.DateFormat * @see java.text.MessageFormat + * @see java.text.ListFormat * @author Mark Davis * @since 1.1 */ diff --git a/src/java.base/share/classes/java/text/ListFormat.java b/src/java.base/share/classes/java/text/ListFormat.java new file mode 100644 index 00000000000..1f60ffd28bd --- /dev/null +++ b/src/java.base/share/classes/java/text/ListFormat.java @@ -0,0 +1,611 @@ +/* + * Copyright (c) 2023, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. Oracle designates this + * particular file as subject to the "Classpath" exception as provided + * by Oracle in the LICENSE file that accompanied this code. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + */ + +package java.text; + +import java.io.IOException; +import java.io.ObjectInputStream; +import java.io.Serial; +import java.util.Arrays; +import java.util.List; +import java.util.Locale; +import java.util.Objects; +import java.util.regex.Pattern; +import java.util.stream.IntStream; +import sun.util.locale.provider.LocaleProviderAdapter; + +/** + * {@code ListFormat} formats or parses a list of strings in a locale-sensitive way. + * Use {@code ListFormat} to construct a list of strings displayed for end users. + * For example, displaying a list of 3 weekdays, e.g. "Monday", "Wednesday", "Friday" + * as "Monday, Wednesday, and Friday" in an inclusive list type. This class provides + * the functionality defined in Unicode Consortium's LDML specification for + * + * List Patterns. + *

+ * Three formatting types are provided: {@link Type#STANDARD STANDARD}, {@link Type#OR OR}, + * and {@link Type#UNIT UNIT}, which determines the punctuation + * between the strings and the connecting words if any. Also, three formatting styles for each + * type are provided: {@link Style#FULL FULL}, {@link Style#SHORT SHORT}, and + * {@link Style#NARROW NARROW}, suitable for how the strings are abbreviated (or not). + * The following snippet is an example of formatting + * the list of Strings {@code "Foo", "Bar", "Baz"} in US English with + * {@code STANDARD} type and {@code FULL} style: + * {@snippet lang=java : + * ListFormat.getInstance(Locale.US, ListFormat.Type.STANDARD, ListFormat.Style.FULL) + * .format(List.of("Foo", "Bar", "Baz")) + * } + * This will produce the concatenated list string, "Foo, Bar, and Baz" as seen in + * the following: + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + *
Formatting examples
FULLSHORTNARROW
STANDARDFoo, Bar, and BazFoo, Bar, & BazFoo, Bar, Baz
ORFoo, Bar, or BazFoo, Bar, or BazFoo, Bar, or Baz
UNITFoo, Bar, BazFoo, Bar, BazFoo Bar Baz
+ * Note: these examples are from CLDR, there could be different results from other locale providers. + *

+ * Alternatively, Locale, Type, and/or Style independent instances + * can be created with {@link #getInstance(String[])}. The String array to the + * method specifies the delimiting patterns for the start/middle/end portion of + * the formatted string, as well as optional specialized patterns for two or three + * elements. Refer to the method description for more detail. + *

+ * On parsing, if some ambiguity is found in the input string, such as delimiting + * sequences in the input string, the result, when formatted with the same formatting, does not + * re-produce the input string. For example, a two element String list + * "a, b,", "c" will be formatted as "a, b, and c", but may be parsed as three elements + * "a", "b", "c". + * + * @implSpec This class is immutable and thread-safe + * + * @spec https://www.unicode.org/reports/tr35 Unicode Locale Data Markup Language (LDML) + * @since 22 + */ +public final class ListFormat extends Format { + + @Serial + private static final long serialVersionUID = 5272525550078071946L; + + private static final int START = 0; + private static final int MIDDLE = 1; + private static final int END = 2; + private static final int TWO = 3; + private static final int THREE = 4; + private static final int PATTERN_ARRAY_LENGTH = THREE + 1; + + /** + * The locale to use for formatting list patterns. + * @serial + */ + private final Locale locale; + + /** + * The array of five pattern Strings. Each element corresponds to the Unicode LDML's + * `listPatternsPart` type, i.e, start/middle/end/two/three. + * @serial + */ + private final String[] patterns; + + private static final Pattern PARSE_START = Pattern.compile("(.*?)\\{0}(.*?)\\{1}"); + private static final Pattern PARSE_MIDDLE = Pattern.compile("\\{0}(.*?)\\{1}"); + private static final Pattern PARSE_END = Pattern.compile("\\{0}(.*?)\\{1}(.*?)"); + private static final Pattern PARSE_TWO = Pattern.compile("(.*?)\\{0}(.*?)\\{1}(.*?)"); + private static final Pattern PARSE_THREE = Pattern.compile("(.*?)\\{0}(.*?)\\{1}(.*?)\\{2}(.*?)"); + private transient Pattern startPattern; + private transient String middleBetween; + private transient Pattern endPattern; + + private ListFormat(Locale l, String[] patterns) { + locale = l; + this.patterns = patterns; + init(); + } + + private void init() { + // check for null pattern elements + for (String elem : patterns) { + if (elem == null) { + throw new IllegalArgumentException("patterns array contains one or more null elements"); + } + } + + // get pattern strings + var m = PARSE_START.matcher(patterns[START]); + String startBefore; + String startBetween; + if (m.matches()) { + startBefore = m.group(1); + startBetween = m.group(2); + } else { + throw new IllegalArgumentException("start pattern is incorrect: " + patterns[START]); + } + m = PARSE_MIDDLE.matcher(patterns[MIDDLE]); + if (m.matches()) { + middleBetween = m.group(1); + } else { + throw new IllegalArgumentException("middle pattern is incorrect: " + patterns[MIDDLE]); + } + m = PARSE_END.matcher(patterns[END]); + String endBetween; + String endAfter; + if (m.matches()) { + endBetween = m.group(1); + endAfter = m.group(2); + } else { + throw new IllegalArgumentException("end pattern is incorrect: " + patterns[END]); + } + + // Validate two/three patterns, if given. Otherwise, generate them + if (!patterns[TWO].isEmpty()) { + if (!PARSE_TWO.matcher(patterns[TWO]).matches()) { + throw new IllegalArgumentException("pattern for two is incorrect: " + patterns[TWO]); + } + } else { + patterns[TWO] = startBefore + "{0}" + endBetween + "{1}" + endAfter; + } + if (!patterns[THREE].isEmpty()) { + if (!PARSE_THREE.matcher(patterns[THREE]).matches()) { + throw new IllegalArgumentException("pattern for three is incorrect: " + patterns[THREE]); + } + } else { + patterns[THREE] = startBefore + "{0}" + startBetween + "{1}" + endBetween + "{2}" + endAfter; + } + + startPattern = Pattern.compile(startBefore + "(.+?)" + startBetween); + endPattern = Pattern.compile(endBetween + "(.+?)" + endAfter); + } + + /** + * {@return the available locales that support ListFormat} + */ + public static Locale[] getAvailableLocales() { + // Same as a typical format class + return DateFormat.getAvailableLocales(); + } + + /** + * {@return the ListFormat object for the default + * {@link Locale.Category#FORMAT FORMAT Locale}, {@link Type#STANDARD STANDARD} type, + * and {@link Style#FULL FULL} style} + */ + public static ListFormat getInstance() { + return getInstance(Locale.getDefault(Locale.Category.FORMAT), Type.STANDARD, Style.FULL); + } + + /** + * {@return the ListFormat object for the specified {@link Locale}, {@link Type Type}, + * and {@link Style Style}} + * @param locale {@code Locale} to be used, not null + * @param type type of the ListFormat. One of {@code STANDARD}, {@code OR}, + * or {@code UNIT}, not null + * @param style style of the ListFormat. One of {@code FULL}, {@code SHORT}, + * or {@code NARROW}, not null + * @throws NullPointerException if any of the arguments are null + */ + public static ListFormat getInstance(Locale locale, Type type, Style style) { + Objects.requireNonNull(locale); + Objects.requireNonNull(type); + Objects.requireNonNull(style); + return new ListFormat(locale, LocaleProviderAdapter.forType(LocaleProviderAdapter.Type.CLDR) + .getLocaleResources(locale) + .getListPatterns(type, style)); + } + + /** + * {@return the ListFormat object for the specified patterns} + *

+ * This factory returns an instance based on the customized patterns array, + * instead of letting the runtime provide appropriate patterns for the {@code Locale}, + * {@code Type}, or {@code Style}. + *

+ * The patterns array should contain five String patterns, each corresponding to the Unicode LDML's + * {@code listPatternPart}, i.e., "start", "middle", "end", two element, and three element patterns + * in this order. Each pattern contains "{0}" and "{1}" (and "{2}" for the three element pattern) + * placeholders that are substituted with the passed input strings on formatting. + * If the length of the patterns array is not 5, an {@code IllegalArgumentException} + * is thrown. + *

+ * Each pattern string is first parsed as follows. Literals in parentheses, such as + * "start_before", are optional: + *

+     * start := (start_before){0}start_between{1}
+     * middle := {0}middle_between{1}
+     * end := {0}end_between{1}(end_after)
+     * two := (two_before){0}two_between{1}(two_after)
+     * three := (three_before){0}three_between1{1}three_between2{2}(three_after)
+     * 
+ * If two or three pattern string is empty, it falls back to + * {@code "(start_before){0}end_between{1}(end_after)"}, + * {@code "(start_before){0}start_between{1}end_between{2}(end_after)"} respectively. + * If parsing of any pattern string for start, middle, end, two, or three fails, + * it throws an {@code IllegalArgumentException}. + *

+ * On formatting, the input string list with {@code n} elements substitutes above + * placeholders based on the number of elements: + *

+     * n = 1: {0}
+     * n = 2: parsed pattern for "two"
+     * n = 3: parsed pattern for "three"
+     * n > 3: (start_before){0}start_between{1}middle_between{2} ... middle_between{m}end_between{n}(end_after)
+     * 
+ * As an example, the following table shows a pattern array which is equivalent to + * {@code STANDARD} type, {@code FULL} style in US English: + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + *
Standard/Full Patterns in US English
Pattern KindPattern String
start"{0}, {1}"
middle"{0}, {1}"
end"{0}, and {1}"
two"{0} and {1}"
three""
+ * Here are the resulting formatted strings with the above pattern array. + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + *
Formatting examples
Input String ListFormatted String
"Foo", "Bar", "Baz", "Qux""Foo, Bar, Baz, and Qux"
"Foo", "Bar", "Baz""Foo, Bar, and Baz"
"Foo", "Bar""Foo and Bar"
"Foo""Foo"
+ * + * @param patterns array of patterns, not null + * @throws IllegalArgumentException if the length {@code patterns} array is not 5, or + * any of {@code start}, {@code middle}, {@code end}, {@code two}, or + * {@code three} patterns cannot be parsed. + * @throws NullPointerException if {@code patterns} is null. + */ + public static ListFormat getInstance(String[] patterns) { + Objects.requireNonNull(patterns); + if (patterns.length != PATTERN_ARRAY_LENGTH) { + throw new IllegalArgumentException("Pattern array length should be " + PATTERN_ARRAY_LENGTH); + } + return new ListFormat(Locale.ROOT, Arrays.copyOf(patterns, PATTERN_ARRAY_LENGTH)); + } + + /** + * {@return the string that consists of the input strings, concatenated with the + * patterns of this {@code ListFormat}} + * @apiNote Formatting the string from an excessively long list may exceed memory + * or string sizes. + * @param input The list of input strings to format. There should at least + * one String element in this list, otherwise an {@code IllegalArgumentException} + * is thrown. + * @throws IllegalArgumentException if the length of {@code input} is zero. + * @throws NullPointerException if {@code input} is null. + */ + public String format(List input) { + Objects.requireNonNull(input); + + return format(input, new StringBuffer(), + DontCareFieldPosition.INSTANCE).toString(); + } + + /** + * Formats an object and appends the resulting text to a given string + * buffer. The object should either be a List or an array of Objects. + * + * @apiNote Formatting the string from an excessively long list or array + * may exceed memory or string sizes. + * @param obj The object to format. Must be a List or an array + * of Object. + * @param toAppendTo where the text is to be appended + * @param pos Ignored. Not used in ListFormat. May be null + * @return the string buffer passed in as {@code toAppendTo}, + * with formatted text appended + * @throws NullPointerException if {@code obj} or {@code toAppendTo} is null + * @throws IllegalArgumentException if the given object cannot + * be formatted + */ + @Override + public StringBuffer format(Object obj, StringBuffer toAppendTo, FieldPosition pos) { + Objects.requireNonNull(obj); + Objects.requireNonNull(toAppendTo); + + if (obj instanceof Object[] objs) { + return generateMessageFormat(objs).format(objs, toAppendTo, DontCareFieldPosition.INSTANCE); + } else if (obj instanceof List objs) { + var a = objs.toArray(new Object[0]); + return generateMessageFormat(a).format(a, toAppendTo, DontCareFieldPosition.INSTANCE); + } else { + throw new IllegalArgumentException("The object to format should be a List or an Object[]"); + } + } + + /** + * {@return the parsed list of strings from the {@code source} string} + * + * Note that {@link #format(List)} and this method + * may not guarantee a round-trip, if the input strings contain ambiguous + * delimiters. For example, a two element String list {@code "a, b,", "c"} will be + * formatted as {@code "a, b, and c"}, but may be parsed as three elements + * {@code "a", "b", "c"}. + * + * @param source the string to parse, not null. + * @throws ParseException if parse failed + * @throws NullPointerException if source is null + */ + public List parse(String source) throws ParseException { + var pp = new ParsePosition(0); + if (parseObject(source, pp) instanceof List orig) { + // parseObject() should've returned List + return orig.stream().map(o -> (String)o).toList(); + } else { + throw new ParseException("Parse failed", pp.getErrorIndex()); + } + } + + /** + * Parses text from a string to produce a list of strings. + *

+ * The method attempts to parse text starting at the index given by + * {@code parsePos}. + * If parsing succeeds, then the index of {@code parsePos} is updated + * to the index after the last character used (parsing does not necessarily + * use all characters up to the end of the string), and the parsed + * object is returned. The updated {@code parsePos} can be used to + * indicate the starting point for the next call to parse additional text. + * If an error occurs, then the index of {@code parsePos} is not + * changed, the error index of {@code parsePos} is set to the index of + * the character where the error occurred, and null is returned. + * See the {@link #parse(String)} method for more information + * on list parsing. + * + * @param source A string, part of which should be parsed. + * @param parsePos A {@code ParsePosition} object with index and error + * index information as described above. + * @return A list of string parsed from the {@code source}. + * In case of error, returns null. + * @throws NullPointerException if {@code source} or {@code parsePos} is null. + */ + @Override + public Object parseObject(String source, ParsePosition parsePos) { + Objects.requireNonNull(source); + Objects.requireNonNull(parsePos); + var sm = startPattern.matcher(source); + var em = endPattern.matcher(source); + Object parsed = null; + if (sm.find(parsePos.getIndex()) && em.find(parsePos.getIndex())) { + // get em to the last + var c = em.start(); + while (em.find()) { + c = em.start(); + } + em.find(c); + var startEnd = sm.end(); + var endStart = em.start(); + if (startEnd <= endStart) { + var mid = source.substring(startEnd, endStart); + var count = mid.split(middleBetween).length + 2; + parsed = new MessageFormat(createMessageFormatString(count), locale).parseObject(source, parsePos); + } + } + + if (parsed == null) { + // now try exact number patterns + parsed = new MessageFormat(patterns[TWO], locale).parseObject(source, parsePos); + if (parsed == null) { + parsed = new MessageFormat(patterns[THREE], locale).parseObject(source, parsePos); + } + } + + // return the entire source from parsePos if still no match + if (parsed == null) { + parsed = new String[]{source.substring(parsePos.getIndex())}; + parsePos.setIndex(source.length()); + } + + if (parsed instanceof Object[] objs) { + parsePos.setErrorIndex(-1); + return Arrays.asList(objs); + } else { + // MessageFormat.parseObject() failed + return null; + } + } + + @Override + public AttributedCharacterIterator formatToCharacterIterator(Object arguments) { + Objects.requireNonNull(arguments); + + if (arguments instanceof List objs) { + var a = objs.toArray(new Object[0]); + return generateMessageFormat(a).formatToCharacterIterator(a); + } else if (arguments instanceof Object[] objs) { + return generateMessageFormat(objs).formatToCharacterIterator(objs); + } else { + throw new IllegalArgumentException("The arguments should be a List or an Object[]"); + } + } + + /** + * Checks if this {@code ListFormat} is equal to another {@code ListFormat}. + * The comparison is based on the {@code Locale} and formatting patterns, given or + * generated with {@code Locale}, {@code Type}, and {@code Style}. + * @param obj the object to check, {@code null} returns {@code false} + * @return {@code true} if this is equals to the other {@code ListFormat} + */ + @Override + public boolean equals(Object obj) { + if (this == obj) { + return true; + } + + if (obj instanceof ListFormat other) { + return locale.equals(other.locale) && + Arrays.equals(patterns, other.patterns); + } + + return false; + } + + /** + * {@inheritDoc} + */ + @Override + public int hashCode() { + return Objects.hash(locale, Arrays.hashCode(patterns)); + } + + /** + * {@inheritDoc} + */ + @Override + public String toString() { + return + """ + ListFormat [locale: "%s", start: "%s", middle: "%s", end: "%s", two: "%s", three: "%s"] + """.formatted(locale.getDisplayName(), patterns[START], patterns[MIDDLE], patterns[END], patterns[TWO], patterns[THREE]); + } + + private MessageFormat generateMessageFormat(Object[] input) { + var len = input.length; + return switch (len) { + case 0 -> throw new IllegalArgumentException("There should at least be one input string"); + case 1 -> new MessageFormat("{0}", locale); + case 2, 3 -> new MessageFormat(patterns[len + 1], locale); + default -> new MessageFormat(createMessageFormatString(len), locale); + }; + } + + private String createMessageFormatString(int count) { + var sb = new StringBuilder(256).append(patterns[START]); + IntStream.range(2, count - 1).forEach(i -> sb.append(middleBetween).append("{").append(i).append("}")); + sb.append(patterns[END].replaceFirst("\\{0}", "").replaceFirst("\\{1}", "\\{" + (count - 1) + "\\}")); + return sb.toString(); + } + + @java.io.Serial + private void readObject(ObjectInputStream stream) + throws IOException, ClassNotFoundException { + stream.defaultReadObject(); + try { + init(); + } catch (IllegalArgumentException iae) { + throw new IOException("Deserialization failed.", iae); + } + } + + /** + * A ListFormat type - {@link #STANDARD STANDARD}, {@link #OR OR}, and + * {@link #UNIT UNIT}. + *

+ * {@code Type} is an enum which represents the type for formatting + * a list within a given {@code ListFormat} instance. It determines + * the punctuation and the connecting words in the formatted text. + * + * @since 22 + */ + public enum Type { + + /** + * The {@code STANDARD} ListFormat type. This is the default + * type, which concatenates elements in "and" enumeration. + */ + STANDARD, + + /** + * The {@code OR} ListFormat type. This type concatenates + * elements in "or" enumeration. + */ + OR, + + /** + * The {@code UNIT} ListFormat type. This type concatenates + * elements, useful for enumerating units. + */ + UNIT + } + + /** + * A ListFormat style - {@link #FULL FULL}, {@link #SHORT SHORT}, + * and {@link #NARROW NARROW}. + *

+ * {@code Style} is an enum which represents the style for formatting + * a list within a given {@code ListFormat} instance. + * + * @since 22 + */ + public enum Style { + + /** + * The {@code FULL} ListFormat style. This is the default style, which typically is the + * full description of the text and punctuation that appear between the list elements. + * Suitable for elements, such as "Monday", "Tuesday", "Wednesday", etc. + */ + FULL, + + /** + * The {@code SHORT} ListFormat style. This style is typically an abbreviation + * of the text and punctuation that appear between the list elements. + * Suitable for elements, such as "Mon", "Tue", "Wed", etc. + */ + SHORT, + + /** + * The {@code NARROW} ListFormat style. This style is typically the shortest description + * of the text and punctuation that appear between the list elements. + * Suitable for elements, such as "M", "T", "W", etc. + */ + NARROW + } +} diff --git a/src/java.base/share/classes/sun/util/locale/provider/LocaleResources.java b/src/java.base/share/classes/sun/util/locale/provider/LocaleResources.java index 650add2094c..c023788188b 100644 --- a/src/java.base/share/classes/sun/util/locale/provider/LocaleResources.java +++ b/src/java.base/share/classes/sun/util/locale/provider/LocaleResources.java @@ -42,6 +42,7 @@ package sun.util.locale.provider; import java.lang.ref.ReferenceQueue; import java.lang.ref.SoftReference; +import java.text.ListFormat; import java.text.MessageFormat; import java.text.NumberFormat; import java.util.Arrays; @@ -98,6 +99,7 @@ public class LocaleResources { private static final String DATE_TIME_PATTERN = "DTP."; private static final String RULES_CACHEKEY = "RULE"; private static final String SKELETON_PATTERN = "SP."; + private static final String LIST_PATTERN = "LP."; // ResourceBundle key names for skeletons private static final String SKELETON_INPUT_REGIONS_KEY = "DateFormatItemInputRegions"; @@ -831,6 +833,32 @@ public class LocaleResources { return rules; } + /** + * {@return the list patterns for the locale} + * + * @param type a {@link ListFormat.Type} + * @param style a {@link ListFormat.Style} + */ + public String[] getListPatterns(ListFormat.Type type, ListFormat.Style style) { + String typeStr = type.toString().toLowerCase(Locale.ROOT); + String styleStr = style.toString().toLowerCase(Locale.ROOT); + String[] lpArray; + String cacheKey = LIST_PATTERN + typeStr; + + removeEmptyReferences(); + ResourceReference data = cache.get(cacheKey); + + if (data == null || ((lpArray = (String[]) data.get()) == null)) { + ResourceBundle rb = localeData.getDateFormatData(locale); + lpArray = rb.getStringArray("ListPatterns_" + typeStr + (style == ListFormat.Style.FULL ? "" : "-" + styleStr)); + if (lpArray == null) { + cache.put(cacheKey, new ResourceReference(cacheKey, new String[5], referenceQueue)); + } + } + + return lpArray; + } + private static class ResourceReference extends SoftReference { private final String cacheKey; diff --git a/test/jdk/java/text/Format/ListFormat/TestListFormat.java b/test/jdk/java/text/Format/ListFormat/TestListFormat.java new file mode 100644 index 00000000000..b6c3c2ae6fc --- /dev/null +++ b/test/jdk/java/text/Format/ListFormat/TestListFormat.java @@ -0,0 +1,299 @@ +/* + * Copyright (c) 2023, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + */ + +/* + * @test + * @bug 8041488 + * @summary Tests for ListFormat class + * @run junit TestListFormat + */ + +import java.text.DateFormat; +import java.text.FieldPosition; +import java.text.ListFormat; +import java.text.ParseException; +import java.text.ParsePosition; +import java.util.List; +import java.util.Locale; + +import org.junit.jupiter.api.Test; +import org.junit.jupiter.params.ParameterizedTest; +import org.junit.jupiter.params.provider.Arguments; +import org.junit.jupiter.params.provider.MethodSource; + +import static org.junit.jupiter.api.Assertions.assertArrayEquals; +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertNotEquals; +import static org.junit.jupiter.api.Assertions.assertThrows; +import static org.junit.jupiter.params.provider.Arguments.arguments; + +public class TestListFormat { + private static final List SAMPLE1 = List.of("foo"); + private static final List SAMPLE2 = List.of("foo", "bar"); + private static final List SAMPLE3 = List.of("foo", "bar", "baz"); + private static final List SAMPLE4 = List.of("foo", "bar", "baz", "qux"); + private static final String[] CUSTOM_PATTERNS_FULL = { + "sbef {0} sbet {1}", + "{0} mid {1}", + "{0} ebet {1} eaft", + "twobef {0} two {1} twoaft", + "threebef {0} three {1} three {2} threeaft", + }; + private static final String[] CUSTOM_PATTERNS_MINIMAL = { + "sbef {0} sbet {1}", + "{0} mid {1}", + "{0} ebet {1} eaft", + "", + "", + }; + private static final String[] CUSTOM_PATTERNS_IAE_START = { + "{0}", + "{0} mid {1}", + "{0} ebet {1} eaft", + "", + "", + }; + private static final String[] CUSTOM_PATTERNS_IAE_MIDDLE = { + "{0} sbet {1}", + "{0} {1} {2}", + "{0} ebet {1} eaft", + "", + "", + }; + private static final String[] CUSTOM_PATTERNS_IAE_END = { + "{0} sbet {1}", + "{0} mid {1}", + "error {0} ebet {1}", + "", + "", + }; + private static final String[] CUSTOM_PATTERNS_IAE_TWO = { + "sbef {0} sbet {1}", + "{0} mid {1}", + "{0} ebet {1} eaft", + "{1}error{0}", + "", + }; + private static final String[] CUSTOM_PATTERNS_IAE_THREE = { + "sbef {0} sbet {1}", + "{0} mid {1}", + "{0} ebet {1} eaft", + "", + "{0}error{1}", + }; + private static final String[] CUSTOM_PATTERNS_IAE_NULL = { + null, + null, + null, + null, + null, + }; + + + @Test + void getAvailableLocales() { + assertArrayEquals(DateFormat.getAvailableLocales(), ListFormat.getAvailableLocales()); + } + + @Test + void getInstance_noArg() { + assertEquals(ListFormat.getInstance(), ListFormat.getInstance(Locale.getDefault(Locale.Category.FORMAT), ListFormat.Type.STANDARD, ListFormat.Style.FULL)); + } + + static Arguments[] getInstance_1Arg() { + return new Arguments[] { + arguments(CUSTOM_PATTERNS_FULL, SAMPLE1, "foo"), + arguments(CUSTOM_PATTERNS_FULL, SAMPLE2, "twobef foo two bar twoaft"), + arguments(CUSTOM_PATTERNS_FULL, SAMPLE3, "threebef foo three bar three baz threeaft"), + arguments(CUSTOM_PATTERNS_FULL, SAMPLE4, "sbef foo sbet bar mid baz ebet qux eaft"), + arguments(CUSTOM_PATTERNS_MINIMAL, SAMPLE1, "foo"), + arguments(CUSTOM_PATTERNS_MINIMAL, SAMPLE2, "sbef foo ebet bar eaft"), + arguments(CUSTOM_PATTERNS_MINIMAL, SAMPLE3, "sbef foo sbet bar ebet baz eaft"), + arguments(CUSTOM_PATTERNS_MINIMAL, SAMPLE4, "sbef foo sbet bar mid baz ebet qux eaft"), + }; + } + + static Arguments[] getInstance_1Arg_IAE() { + return new Arguments[] { + arguments(new String[1], "Pattern array length should be 5"), + arguments(new String[6], "Pattern array length should be 5"), + arguments(CUSTOM_PATTERNS_IAE_START, "start pattern is incorrect: {0}"), + arguments(CUSTOM_PATTERNS_IAE_MIDDLE, "middle pattern is incorrect: {0} {1} {2}"), + arguments(CUSTOM_PATTERNS_IAE_END, "end pattern is incorrect: error {0} ebet {1}"), + arguments(CUSTOM_PATTERNS_IAE_TWO, "pattern for two is incorrect: {1}error{0}"), + arguments(CUSTOM_PATTERNS_IAE_THREE, "pattern for three is incorrect: {0}error{1}"), + arguments(CUSTOM_PATTERNS_IAE_NULL, "patterns array contains one or more null elements"), + }; + } + + static Arguments[] getInstance_3Arg() { + return new Arguments[] { + arguments(Locale.US, ListFormat.Type.STANDARD, ListFormat.Style.FULL, + "foo, bar, and baz", true), + arguments(Locale.US, ListFormat.Type.OR, ListFormat.Style.FULL, + "foo, bar, or baz", true), + arguments(Locale.US, ListFormat.Type.UNIT, ListFormat.Style.FULL, + "foo, bar, baz", true), + arguments(Locale.US, ListFormat.Type.STANDARD, ListFormat.Style.SHORT, + "foo, bar, & baz", true), + arguments(Locale.US, ListFormat.Type.OR, ListFormat.Style.SHORT, + "foo, bar, or baz", true), + arguments(Locale.US, ListFormat.Type.UNIT, ListFormat.Style.SHORT, + "foo, bar, baz", true), + arguments(Locale.US, ListFormat.Type.STANDARD, ListFormat.Style.NARROW, + "foo, bar, baz", true), + arguments(Locale.US, ListFormat.Type.OR, ListFormat.Style.NARROW, + "foo, bar, or baz", true), + arguments(Locale.US, ListFormat.Type.UNIT, ListFormat.Style.NARROW, + "foo bar baz", true), + + arguments(Locale.JAPAN, ListFormat.Type.STANDARD, ListFormat.Style.FULL, + "foo\u3001bar\u3001baz", true), + arguments(Locale.JAPAN, ListFormat.Type.OR, ListFormat.Style.FULL, + "foo\u3001bar\u3001\u307e\u305f\u306fbaz", true), + arguments(Locale.JAPAN, ListFormat.Type.UNIT, ListFormat.Style.FULL, + "foo bar baz", true), + arguments(Locale.JAPAN, ListFormat.Type.STANDARD, ListFormat.Style.SHORT, + "foo\u3001bar\u3001baz", true), + arguments(Locale.JAPAN, ListFormat.Type.OR, ListFormat.Style.SHORT, + "foo\u3001bar\u3001\u307e\u305f\u306fbaz", true), + arguments(Locale.JAPAN, ListFormat.Type.UNIT, ListFormat.Style.SHORT, + "foo bar baz", true), + arguments(Locale.JAPAN, ListFormat.Type.STANDARD, ListFormat.Style.NARROW, + "foo\u3001bar\u3001baz", true), + arguments(Locale.JAPAN, ListFormat.Type.OR, ListFormat.Style.NARROW, + "foo\u3001bar\u3001\u307e\u305f\u306fbaz", true), + arguments(Locale.JAPAN, ListFormat.Type.UNIT, ListFormat.Style.NARROW, + "foobarbaz", false), // no delimiter, impossible to parse/roundtrip + }; + } + + static Arguments[] parseObject_parsePos() { + return new Arguments[] { + arguments(CUSTOM_PATTERNS_FULL, SAMPLE1), + arguments(CUSTOM_PATTERNS_FULL, SAMPLE2), + arguments(CUSTOM_PATTERNS_FULL, SAMPLE3), + arguments(CUSTOM_PATTERNS_FULL, SAMPLE4), + arguments(CUSTOM_PATTERNS_MINIMAL, SAMPLE1), + arguments(CUSTOM_PATTERNS_MINIMAL, SAMPLE2), + arguments(CUSTOM_PATTERNS_MINIMAL, SAMPLE3), + arguments(CUSTOM_PATTERNS_MINIMAL, SAMPLE4), + }; + } + + @ParameterizedTest + @MethodSource + void getInstance_1Arg(String[] patterns, List input, String expected) throws ParseException { + var f = ListFormat.getInstance(patterns); + compareResult(f, input, expected, true); + } + + @ParameterizedTest + @MethodSource + void getInstance_1Arg_IAE(String[] invalidPatterns, String errorMsg) { + var ex = assertThrows(IllegalArgumentException.class, + () -> ListFormat.getInstance(invalidPatterns)); + assertEquals(errorMsg, ex.getMessage()); + } + + @ParameterizedTest + @MethodSource + void getInstance_3Arg(Locale l, ListFormat.Type type, ListFormat.Style style, String expected, boolean roundTrip) throws ParseException { + var f = ListFormat.getInstance(l, type, style); + compareResult(f, SAMPLE3, expected, roundTrip); + } + + @Test + void format_3Arg() { + var f = ListFormat.getInstance(); + // Ensures it accepts both List and [] + assertEquals(f.format(SAMPLE4, new StringBuffer(), null).toString(), + f.format(SAMPLE4.toArray(), new StringBuffer(), null).toString()); + + // Tests NPE + assertThrows(NullPointerException.class, + () -> f.format(null, new StringBuffer(), new FieldPosition(0))); + assertThrows(NullPointerException.class, + () -> f.format(new Object(), null, new FieldPosition(0))); + + // Tests IAE + var ex = assertThrows(IllegalArgumentException.class, + () -> f.format(new Object(), new StringBuffer(), null)); + assertEquals("The object to format should be a List or an Object[]", ex.getMessage()); + } + + @Test + void formatToCharacterIterator() { + var f = ListFormat.getInstance(); + // Ensures it accepts both List and [] + assertEquals(f.formatToCharacterIterator(SAMPLE4).toString(), + f.formatToCharacterIterator(SAMPLE4.toArray()).toString()); + + // Tests NPE + assertThrows(NullPointerException.class, + () -> f.formatToCharacterIterator(null)); + + // Tests IAE + var ex = assertThrows(IllegalArgumentException.class, + () -> f.formatToCharacterIterator(new Object())); + assertEquals("The arguments should be a List or an Object[]", ex.getMessage()); + } + + @Test + void format_emptyInput() { + var ex = assertThrows(IllegalArgumentException.class, + () -> ListFormat.getInstance().format(List.of())); + assertEquals("There should at least be one input string", ex.getMessage()); + } + + @ParameterizedTest + @MethodSource + void parseObject_parsePos(String[] patterns, List input) { + var prefix = "prefix"; + var f = ListFormat.getInstance(patterns); + var testStr = prefix + f.format(input); + + var pp = new ParsePosition(prefix.length()); + var parsed = f.parseObject(testStr, pp); + assertEquals(input, parsed, pp.toString()); + assertEquals(new ParsePosition(testStr.length()), pp); + + pp.setIndex(0); + parsed = f.parseObject(testStr, pp); + assertNotEquals(input, parsed); + assertEquals(-1, pp.getErrorIndex()); + + pp.setIndex(prefix.length() + 1); + parsed = f.parseObject(testStr, pp); + assertNotEquals(input, parsed); + assertEquals(-1, pp.getErrorIndex()); + } + + private static void compareResult(ListFormat f, List input, String expected, boolean roundTrip) throws ParseException { + var result = f.format(input); + assertEquals(expected, result); + if (roundTrip) { + assertEquals(input, f.parse(result)); + } + } +}