8041488: Locale-Dependent List Patterns

Reviewed-by: joehw, rriggs
This commit is contained in:
Naoto Sato 2023-09-11 16:48:22 +00:00
parent dd214d0f95
commit d0be73a780
7 changed files with 1019 additions and 8 deletions

View File

@ -1,5 +1,5 @@
/*
* Copyright (c) 2012, 2022, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2012, 2023, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@ -108,6 +108,12 @@ class Bundle {
"narrow.Eras"
};
static final String[] LIST_PATTERN_KEYS = {
"ListPatterns_standard",
"ListPatterns_or",
"ListPatterns_unit",
};
// DateFormatItem prefix
static final String DATEFORMATITEM_KEY_PREFIX = "DateFormatItem.";
static final String DATEFORMATITEM_INPUT_REGIONS_PREFIX = "DateFormatItemInputRegions.";

View File

@ -621,7 +621,14 @@ public class CLDRConverter {
*/
static void handleAliases(Map<String, Object> bundleMap) {
for (String key : aliases.keySet()) {
var source = bundleMap.get(aliases.get(key));
var sourceKey = aliases.get(key);
if (key.startsWith("ListPatterns_")) {
String k;
while ((k = aliases.get(sourceKey)) != null) {
sourceKey = k;
}
}
var source = bundleMap.get(sourceKey);
if (source != null) {
if (bundleMap.get(key) instanceof String[] sa) {
// fill missing elements in case of String array
@ -871,6 +878,7 @@ public class CLDRConverter {
"DayPeriodRules",
"DateFormatItemInputRegions.allowed",
"DateFormatItemInputRegions.preferred",
"ListPatterns",
};
static final Set<String> availableSkeletons = new HashSet<>();
@ -935,6 +943,14 @@ public class CLDRConverter {
formatData.put(k + ".NumberElements", neNew);
});
}
// ListPatterns
for (var lpKey : Bundle.LIST_PATTERN_KEYS) {
copyIfPresent(map, lpKey, formatData);
copyIfPresent(map, lpKey + "-short", formatData);
copyIfPresent(map, lpKey + "-narrow", formatData);
}
return formatData;
}

View File

@ -1,5 +1,5 @@
/*
* Copyright (c) 2012, 2022, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2012, 2023, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@ -35,6 +35,7 @@ import java.util.HashSet;
import java.util.List;
import java.util.Locale;
import java.util.Map;
import java.util.Optional;
import java.util.Set;
import org.xml.sax.Attributes;
import org.xml.sax.InputSource;
@ -812,6 +813,7 @@ class LDMLParseHandler extends AbstractLDMLHandler<Object> {
&& ((currentContainer.getqName().equals("decimalFormatLength"))
|| (currentContainer.getqName().equals("currencyFormat"))
|| (currentContainer.getqName().equals("percentFormat"))
|| (currentContainer.getqName().equals("listPattern"))
|| (currentCalendarType != null && !currentCalendarType.lname().startsWith("islamic-")))) { // ignore islamic variants
pushAliasEntry(qName, attributes, attributes.getValue("path"));
} else {
@ -820,6 +822,28 @@ class LDMLParseHandler extends AbstractLDMLHandler<Object> {
}
break;
// ListPatterns
case "listPattern":
currentStyle = Optional.ofNullable(attributes.getValue("type")).orElse("standard");
pushStringArrayEntry(qName, attributes, "ListPatterns_" + currentStyle, 5);
break;
case "listPatternPart":
type = attributes.getValue("type");
pushStringArrayElement(qName, attributes,
switch (type) {
case "start" -> 0;
case "middle" -> 1;
case "end" -> 2;
case "2" -> 3;
case "3" -> 4;
default -> throw new IllegalArgumentException(
"""
The "type" attribute value for "listPatternPart" element is not recognized: %s
""".formatted(type)
);
});
break;
default:
// treat anything else as a container
pushContainer(qName, attributes);
@ -973,6 +997,9 @@ class LDMLParseHandler extends AbstractLDMLHandler<Object> {
"NumberPatterns/" +
(type.equals("standard") ? containerqName.replaceFirst("Format", "") : type);
break;
case "listPattern":
keyName = type;
break;
default:
keyName = "";
break;
@ -1035,6 +1062,19 @@ class LDMLParseHandler extends AbstractLDMLHandler<Object> {
return toJDKKey(qName, "", style);
}
// listPattern
if (path.indexOf("../listPattern") != -1) {
typeKey = "[@type='";
start = path.indexOf(typeKey);
String style;
if (start != -1) {
style = "ListPatterns_" + path.substring(start + typeKey.length(), path.indexOf("']", start));
} else {
style = "ListPatterns_standard";
}
return toJDKKey(qName, "", style);
}
return calType + "." + toJDKKey(qName, context, width);
}
@ -1107,6 +1147,10 @@ class LDMLParseHandler extends AbstractLDMLHandler<Object> {
case "timeFormatLength":
currentStyle = "";
break;
case "listPattern":
currentStyle = "";
putIfEntry();
break;
default:
putIfEntry();
}
@ -1128,6 +1172,12 @@ class LDMLParseHandler extends AbstractLDMLHandler<Object> {
toJDKKey(containerqName, "", kc.getKey()),
getTarget(entry.getKey(), "", "", "")
);
} else if (containerqName.equals("listPattern")) {
var sae = (StringArrayEntry)entry.getParent();
CLDRConverter.aliases.put(
toJDKKey(containerqName, "", sae.getKey()),
getTarget(entry.getKey(), "", "", "")
);
} else {
Set<String> keyNames = populateAliasKeys(containerqName, currentContext, currentWidth);
if (!keyNames.isEmpty()) {

View File

@ -1,5 +1,5 @@
/*
* Copyright (c) 1996, 2019, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 1996, 2023, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@ -42,7 +42,7 @@ import java.io.Serializable;
/**
* {@code Format} is an abstract base class for formatting locale-sensitive
* information such as dates, messages, and numbers.
* information such as dates, messages, numbers, and lists.
*
* <p>
* {@code Format} defines the programming interface for formatting
@ -61,9 +61,9 @@ import java.io.Serializable;
* <h2>Subclassing</h2>
*
* <p>
* The Java Platform provides three specialized subclasses of {@code Format}--
* {@code DateFormat}, {@code MessageFormat}, and
* {@code NumberFormat}--for formatting dates, messages, and numbers,
* The Java Platform provides specialized subclasses of {@code Format}--
* {@code DateFormat}, {@code MessageFormat}, {@code NumberFormat}, and
* {@code ListFormat}--for formatting dates, messages, numbers, and lists
* respectively.
* <p>
* Concrete subclasses must implement three methods:
@ -128,6 +128,7 @@ import java.io.Serializable;
* @see java.text.NumberFormat
* @see java.text.DateFormat
* @see java.text.MessageFormat
* @see java.text.ListFormat
* @author Mark Davis
* @since 1.1
*/

View File

@ -0,0 +1,611 @@
/*
* Copyright (c) 2023, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License version 2 only, as
* published by the Free Software Foundation. Oracle designates this
* particular file as subject to the "Classpath" exception as provided
* by Oracle in the LICENSE file that accompanied this code.
*
* This code is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
* version 2 for more details (a copy is included in the LICENSE file that
* accompanied this code).
*
* You should have received a copy of the GNU General Public License version
* 2 along with this work; if not, write to the Free Software Foundation,
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
*
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
* or visit www.oracle.com if you need additional information or have any
* questions.
*/
package java.text;
import java.io.IOException;
import java.io.ObjectInputStream;
import java.io.Serial;
import java.util.Arrays;
import java.util.List;
import java.util.Locale;
import java.util.Objects;
import java.util.regex.Pattern;
import java.util.stream.IntStream;
import sun.util.locale.provider.LocaleProviderAdapter;
/**
* {@code ListFormat} formats or parses a list of strings in a locale-sensitive way.
* Use {@code ListFormat} to construct a list of strings displayed for end users.
* For example, displaying a list of 3 weekdays, e.g. "Monday", "Wednesday", "Friday"
* as "Monday, Wednesday, and Friday" in an inclusive list type. This class provides
* the functionality defined in Unicode Consortium's LDML specification for
* <a href="https://www.unicode.org/reports/tr35/tr35-general.html#ListPatterns">
* List Patterns</a>.
* <p>
* Three formatting types are provided: {@link Type#STANDARD STANDARD}, {@link Type#OR OR},
* and {@link Type#UNIT UNIT}, which determines the punctuation
* between the strings and the connecting words if any. Also, three formatting styles for each
* type are provided: {@link Style#FULL FULL}, {@link Style#SHORT SHORT}, and
* {@link Style#NARROW NARROW}, suitable for how the strings are abbreviated (or not).
* The following snippet is an example of formatting
* the list of Strings {@code "Foo", "Bar", "Baz"} in US English with
* {@code STANDARD} type and {@code FULL} style:
* {@snippet lang=java :
* ListFormat.getInstance(Locale.US, ListFormat.Type.STANDARD, ListFormat.Style.FULL)
* .format(List.of("Foo", "Bar", "Baz"))
* }
* This will produce the concatenated list string, "Foo, Bar, and Baz" as seen in
* the following:
* <table class="striped">
* <caption style="display:none">Formatting examples</caption>
* <thead>
* <tr><th scope="col"></th>
* <th scope="col">FULL</th>
* <th scope="col">SHORT</th>
* <th scope="col">NARROW</th></tr>
* </thead>
* <tbody>
* <tr><th scope="row" style="text-align:left">STANDARD</th>
* <td>Foo, Bar, and Baz</td>
* <td>Foo, Bar, &amp; Baz</td>
* <td>Foo, Bar, Baz</td>
* <tr><th scope="row" style="text-align:left">OR</th>
* <td>Foo, Bar, or Baz</td>
* <td>Foo, Bar, or Baz</td>
* <td>Foo, Bar, or Baz</td>
* <tr><th scope="row" style="text-align:left">UNIT</th>
* <td>Foo, Bar, Baz</td>
* <td>Foo, Bar, Baz</td>
* <td>Foo Bar Baz</td>
* </tbody>
* </table>
* Note: these examples are from CLDR, there could be different results from other locale providers.
* <p>
* Alternatively, Locale, Type, and/or Style independent instances
* can be created with {@link #getInstance(String[])}. The String array to the
* method specifies the delimiting patterns for the start/middle/end portion of
* the formatted string, as well as optional specialized patterns for two or three
* elements. Refer to the method description for more detail.
* <p>
* On parsing, if some ambiguity is found in the input string, such as delimiting
* sequences in the input string, the result, when formatted with the same formatting, does not
* re-produce the input string. For example, a two element String list
* "a, b,", "c" will be formatted as "a, b, and c", but may be parsed as three elements
* "a", "b", "c".
*
* @implSpec This class is immutable and thread-safe
*
* @spec https://www.unicode.org/reports/tr35 Unicode Locale Data Markup Language (LDML)
* @since 22
*/
public final class ListFormat extends Format {
@Serial
private static final long serialVersionUID = 5272525550078071946L;
private static final int START = 0;
private static final int MIDDLE = 1;
private static final int END = 2;
private static final int TWO = 3;
private static final int THREE = 4;
private static final int PATTERN_ARRAY_LENGTH = THREE + 1;
/**
* The locale to use for formatting list patterns.
* @serial
*/
private final Locale locale;
/**
* The array of five pattern Strings. Each element corresponds to the Unicode LDML's
* `listPatternsPart` type, i.e, start/middle/end/two/three.
* @serial
*/
private final String[] patterns;
private static final Pattern PARSE_START = Pattern.compile("(.*?)\\{0}(.*?)\\{1}");
private static final Pattern PARSE_MIDDLE = Pattern.compile("\\{0}(.*?)\\{1}");
private static final Pattern PARSE_END = Pattern.compile("\\{0}(.*?)\\{1}(.*?)");
private static final Pattern PARSE_TWO = Pattern.compile("(.*?)\\{0}(.*?)\\{1}(.*?)");
private static final Pattern PARSE_THREE = Pattern.compile("(.*?)\\{0}(.*?)\\{1}(.*?)\\{2}(.*?)");
private transient Pattern startPattern;
private transient String middleBetween;
private transient Pattern endPattern;
private ListFormat(Locale l, String[] patterns) {
locale = l;
this.patterns = patterns;
init();
}
private void init() {
// check for null pattern elements
for (String elem : patterns) {
if (elem == null) {
throw new IllegalArgumentException("patterns array contains one or more null elements");
}
}
// get pattern strings
var m = PARSE_START.matcher(patterns[START]);
String startBefore;
String startBetween;
if (m.matches()) {
startBefore = m.group(1);
startBetween = m.group(2);
} else {
throw new IllegalArgumentException("start pattern is incorrect: " + patterns[START]);
}
m = PARSE_MIDDLE.matcher(patterns[MIDDLE]);
if (m.matches()) {
middleBetween = m.group(1);
} else {
throw new IllegalArgumentException("middle pattern is incorrect: " + patterns[MIDDLE]);
}
m = PARSE_END.matcher(patterns[END]);
String endBetween;
String endAfter;
if (m.matches()) {
endBetween = m.group(1);
endAfter = m.group(2);
} else {
throw new IllegalArgumentException("end pattern is incorrect: " + patterns[END]);
}
// Validate two/three patterns, if given. Otherwise, generate them
if (!patterns[TWO].isEmpty()) {
if (!PARSE_TWO.matcher(patterns[TWO]).matches()) {
throw new IllegalArgumentException("pattern for two is incorrect: " + patterns[TWO]);
}
} else {
patterns[TWO] = startBefore + "{0}" + endBetween + "{1}" + endAfter;
}
if (!patterns[THREE].isEmpty()) {
if (!PARSE_THREE.matcher(patterns[THREE]).matches()) {
throw new IllegalArgumentException("pattern for three is incorrect: " + patterns[THREE]);
}
} else {
patterns[THREE] = startBefore + "{0}" + startBetween + "{1}" + endBetween + "{2}" + endAfter;
}
startPattern = Pattern.compile(startBefore + "(.+?)" + startBetween);
endPattern = Pattern.compile(endBetween + "(.+?)" + endAfter);
}
/**
* {@return the available locales that support ListFormat}
*/
public static Locale[] getAvailableLocales() {
// Same as a typical format class
return DateFormat.getAvailableLocales();
}
/**
* {@return the ListFormat object for the default
* {@link Locale.Category#FORMAT FORMAT Locale}, {@link Type#STANDARD STANDARD} type,
* and {@link Style#FULL FULL} style}
*/
public static ListFormat getInstance() {
return getInstance(Locale.getDefault(Locale.Category.FORMAT), Type.STANDARD, Style.FULL);
}
/**
* {@return the ListFormat object for the specified {@link Locale}, {@link Type Type},
* and {@link Style Style}}
* @param locale {@code Locale} to be used, not null
* @param type type of the ListFormat. One of {@code STANDARD}, {@code OR},
* or {@code UNIT}, not null
* @param style style of the ListFormat. One of {@code FULL}, {@code SHORT},
* or {@code NARROW}, not null
* @throws NullPointerException if any of the arguments are null
*/
public static ListFormat getInstance(Locale locale, Type type, Style style) {
Objects.requireNonNull(locale);
Objects.requireNonNull(type);
Objects.requireNonNull(style);
return new ListFormat(locale, LocaleProviderAdapter.forType(LocaleProviderAdapter.Type.CLDR)
.getLocaleResources(locale)
.getListPatterns(type, style));
}
/**
* {@return the ListFormat object for the specified patterns}
* <p>
* This factory returns an instance based on the customized patterns array,
* instead of letting the runtime provide appropriate patterns for the {@code Locale},
* {@code Type}, or {@code Style}.
* <p>
* The patterns array should contain five String patterns, each corresponding to the Unicode LDML's
* {@code listPatternPart}, i.e., "start", "middle", "end", two element, and three element patterns
* in this order. Each pattern contains "{0}" and "{1}" (and "{2}" for the three element pattern)
* placeholders that are substituted with the passed input strings on formatting.
* If the length of the patterns array is not 5, an {@code IllegalArgumentException}
* is thrown.
* <p>
* Each pattern string is first parsed as follows. Literals in parentheses, such as
* "start_before", are optional:
* <blockquote><pre>
* start := (start_before){0}start_between{1}
* middle := {0}middle_between{1}
* end := {0}end_between{1}(end_after)
* two := (two_before){0}two_between{1}(two_after)
* three := (three_before){0}three_between1{1}three_between2{2}(three_after)
* </pre></blockquote>
* If two or three pattern string is empty, it falls back to
* {@code "(start_before){0}end_between{1}(end_after)"},
* {@code "(start_before){0}start_between{1}end_between{2}(end_after)"} respectively.
* If parsing of any pattern string for start, middle, end, two, or three fails,
* it throws an {@code IllegalArgumentException}.
* <p>
* On formatting, the input string list with {@code n} elements substitutes above
* placeholders based on the number of elements:
* <blockquote><pre>
* n = 1: {0}
* n = 2: parsed pattern for "two"
* n = 3: parsed pattern for "three"
* n > 3: (start_before){0}start_between{1}middle_between{2} ... middle_between{m}end_between{n}(end_after)
* </pre></blockquote>
* As an example, the following table shows a pattern array which is equivalent to
* {@code STANDARD} type, {@code FULL} style in US English:
* <table class="striped">
* <caption style="display:none">Standard/Full Patterns in US English</caption>
* <thead>
* <tr><th scope="col">Pattern Kind</th>
* <th scope="col">Pattern String</th></tr>
* </thead>
* <tbody>
* <tr><th scope="row" style="text-align:left">start</th>
* <td>"{0}, {1}"</td>
* <tr><th scope="row" style="text-align:left">middle</th>
* <td>"{0}, {1}"</td>
* <tr><th scope="row" style="text-align:left">end</th>
* <td>"{0}, and {1}"</td>
* <tr><th scope="row" style="text-align:left">two</th>
* <td>"{0} and {1}"</td>
* <tr><th scope="row" style="text-align:left">three</th>
* <td>""</td>
* </tbody>
* </table>
* Here are the resulting formatted strings with the above pattern array.
* <table class="striped">
* <caption style="display:none">Formatting examples</caption>
* <thead>
* <tr><th scope="col">Input String List</th>
* <th scope="col">Formatted String</th></tr>
* </thead>
* <tbody>
* <tr><th scope="row" style="text-align:left">"Foo", "Bar", "Baz", "Qux"</th>
* <td>"Foo, Bar, Baz, and Qux"</td>
* <tr><th scope="row" style="text-align:left">"Foo", "Bar", "Baz"</th>
* <td>"Foo, Bar, and Baz"</td>
* <tr><th scope="row" style="text-align:left">"Foo", "Bar"</th>
* <td>"Foo and Bar"</td>
* <tr><th scope="row" style="text-align:left">"Foo"</th>
* <td>"Foo"</td>
* </tbody>
* </table>
*
* @param patterns array of patterns, not null
* @throws IllegalArgumentException if the length {@code patterns} array is not 5, or
* any of {@code start}, {@code middle}, {@code end}, {@code two}, or
* {@code three} patterns cannot be parsed.
* @throws NullPointerException if {@code patterns} is null.
*/
public static ListFormat getInstance(String[] patterns) {
Objects.requireNonNull(patterns);
if (patterns.length != PATTERN_ARRAY_LENGTH) {
throw new IllegalArgumentException("Pattern array length should be " + PATTERN_ARRAY_LENGTH);
}
return new ListFormat(Locale.ROOT, Arrays.copyOf(patterns, PATTERN_ARRAY_LENGTH));
}
/**
* {@return the string that consists of the input strings, concatenated with the
* patterns of this {@code ListFormat}}
* @apiNote Formatting the string from an excessively long list may exceed memory
* or string sizes.
* @param input The list of input strings to format. There should at least
* one String element in this list, otherwise an {@code IllegalArgumentException}
* is thrown.
* @throws IllegalArgumentException if the length of {@code input} is zero.
* @throws NullPointerException if {@code input} is null.
*/
public String format(List<String> input) {
Objects.requireNonNull(input);
return format(input, new StringBuffer(),
DontCareFieldPosition.INSTANCE).toString();
}
/**
* Formats an object and appends the resulting text to a given string
* buffer. The object should either be a List or an array of Objects.
*
* @apiNote Formatting the string from an excessively long list or array
* may exceed memory or string sizes.
* @param obj The object to format. Must be a List or an array
* of Object.
* @param toAppendTo where the text is to be appended
* @param pos Ignored. Not used in ListFormat. May be null
* @return the string buffer passed in as {@code toAppendTo},
* with formatted text appended
* @throws NullPointerException if {@code obj} or {@code toAppendTo} is null
* @throws IllegalArgumentException if the given object cannot
* be formatted
*/
@Override
public StringBuffer format(Object obj, StringBuffer toAppendTo, FieldPosition pos) {
Objects.requireNonNull(obj);
Objects.requireNonNull(toAppendTo);
if (obj instanceof Object[] objs) {
return generateMessageFormat(objs).format(objs, toAppendTo, DontCareFieldPosition.INSTANCE);
} else if (obj instanceof List<?> objs) {
var a = objs.toArray(new Object[0]);
return generateMessageFormat(a).format(a, toAppendTo, DontCareFieldPosition.INSTANCE);
} else {
throw new IllegalArgumentException("The object to format should be a List<Object> or an Object[]");
}
}
/**
* {@return the parsed list of strings from the {@code source} string}
*
* Note that {@link #format(List)} and this method
* may not guarantee a round-trip, if the input strings contain ambiguous
* delimiters. For example, a two element String list {@code "a, b,", "c"} will be
* formatted as {@code "a, b, and c"}, but may be parsed as three elements
* {@code "a", "b", "c"}.
*
* @param source the string to parse, not null.
* @throws ParseException if parse failed
* @throws NullPointerException if source is null
*/
public List<String> parse(String source) throws ParseException {
var pp = new ParsePosition(0);
if (parseObject(source, pp) instanceof List<?> orig) {
// parseObject() should've returned List<String>
return orig.stream().map(o -> (String)o).toList();
} else {
throw new ParseException("Parse failed", pp.getErrorIndex());
}
}
/**
* Parses text from a string to produce a list of strings.
* <p>
* The method attempts to parse text starting at the index given by
* {@code parsePos}.
* If parsing succeeds, then the index of {@code parsePos} is updated
* to the index after the last character used (parsing does not necessarily
* use all characters up to the end of the string), and the parsed
* object is returned. The updated {@code parsePos} can be used to
* indicate the starting point for the next call to parse additional text.
* If an error occurs, then the index of {@code parsePos} is not
* changed, the error index of {@code parsePos} is set to the index of
* the character where the error occurred, and null is returned.
* See the {@link #parse(String)} method for more information
* on list parsing.
*
* @param source A string, part of which should be parsed.
* @param parsePos A {@code ParsePosition} object with index and error
* index information as described above.
* @return A list of string parsed from the {@code source}.
* In case of error, returns null.
* @throws NullPointerException if {@code source} or {@code parsePos} is null.
*/
@Override
public Object parseObject(String source, ParsePosition parsePos) {
Objects.requireNonNull(source);
Objects.requireNonNull(parsePos);
var sm = startPattern.matcher(source);
var em = endPattern.matcher(source);
Object parsed = null;
if (sm.find(parsePos.getIndex()) && em.find(parsePos.getIndex())) {
// get em to the last
var c = em.start();
while (em.find()) {
c = em.start();
}
em.find(c);
var startEnd = sm.end();
var endStart = em.start();
if (startEnd <= endStart) {
var mid = source.substring(startEnd, endStart);
var count = mid.split(middleBetween).length + 2;
parsed = new MessageFormat(createMessageFormatString(count), locale).parseObject(source, parsePos);
}
}
if (parsed == null) {
// now try exact number patterns
parsed = new MessageFormat(patterns[TWO], locale).parseObject(source, parsePos);
if (parsed == null) {
parsed = new MessageFormat(patterns[THREE], locale).parseObject(source, parsePos);
}
}
// return the entire source from parsePos if still no match
if (parsed == null) {
parsed = new String[]{source.substring(parsePos.getIndex())};
parsePos.setIndex(source.length());
}
if (parsed instanceof Object[] objs) {
parsePos.setErrorIndex(-1);
return Arrays.asList(objs);
} else {
// MessageFormat.parseObject() failed
return null;
}
}
@Override
public AttributedCharacterIterator formatToCharacterIterator(Object arguments) {
Objects.requireNonNull(arguments);
if (arguments instanceof List<?> objs) {
var a = objs.toArray(new Object[0]);
return generateMessageFormat(a).formatToCharacterIterator(a);
} else if (arguments instanceof Object[] objs) {
return generateMessageFormat(objs).formatToCharacterIterator(objs);
} else {
throw new IllegalArgumentException("The arguments should be a List<Object> or an Object[]");
}
}
/**
* Checks if this {@code ListFormat} is equal to another {@code ListFormat}.
* The comparison is based on the {@code Locale} and formatting patterns, given or
* generated with {@code Locale}, {@code Type}, and {@code Style}.
* @param obj the object to check, {@code null} returns {@code false}
* @return {@code true} if this is equals to the other {@code ListFormat}
*/
@Override
public boolean equals(Object obj) {
if (this == obj) {
return true;
}
if (obj instanceof ListFormat other) {
return locale.equals(other.locale) &&
Arrays.equals(patterns, other.patterns);
}
return false;
}
/**
* {@inheritDoc}
*/
@Override
public int hashCode() {
return Objects.hash(locale, Arrays.hashCode(patterns));
}
/**
* {@inheritDoc}
*/
@Override
public String toString() {
return
"""
ListFormat [locale: "%s", start: "%s", middle: "%s", end: "%s", two: "%s", three: "%s"]
""".formatted(locale.getDisplayName(), patterns[START], patterns[MIDDLE], patterns[END], patterns[TWO], patterns[THREE]);
}
private MessageFormat generateMessageFormat(Object[] input) {
var len = input.length;
return switch (len) {
case 0 -> throw new IllegalArgumentException("There should at least be one input string");
case 1 -> new MessageFormat("{0}", locale);
case 2, 3 -> new MessageFormat(patterns[len + 1], locale);
default -> new MessageFormat(createMessageFormatString(len), locale);
};
}
private String createMessageFormatString(int count) {
var sb = new StringBuilder(256).append(patterns[START]);
IntStream.range(2, count - 1).forEach(i -> sb.append(middleBetween).append("{").append(i).append("}"));
sb.append(patterns[END].replaceFirst("\\{0}", "").replaceFirst("\\{1}", "\\{" + (count - 1) + "\\}"));
return sb.toString();
}
@java.io.Serial
private void readObject(ObjectInputStream stream)
throws IOException, ClassNotFoundException {
stream.defaultReadObject();
try {
init();
} catch (IllegalArgumentException iae) {
throw new IOException("Deserialization failed.", iae);
}
}
/**
* A ListFormat type - {@link #STANDARD STANDARD}, {@link #OR OR}, and
* {@link #UNIT UNIT}.
* <p>
* {@code Type} is an enum which represents the type for formatting
* a list within a given {@code ListFormat} instance. It determines
* the punctuation and the connecting words in the formatted text.
*
* @since 22
*/
public enum Type {
/**
* The {@code STANDARD} ListFormat type. This is the default
* type, which concatenates elements in "and" enumeration.
*/
STANDARD,
/**
* The {@code OR} ListFormat type. This type concatenates
* elements in "or" enumeration.
*/
OR,
/**
* The {@code UNIT} ListFormat type. This type concatenates
* elements, useful for enumerating units.
*/
UNIT
}
/**
* A ListFormat style - {@link #FULL FULL}, {@link #SHORT SHORT},
* and {@link #NARROW NARROW}.
* <p>
* {@code Style} is an enum which represents the style for formatting
* a list within a given {@code ListFormat} instance.
*
* @since 22
*/
public enum Style {
/**
* The {@code FULL} ListFormat style. This is the default style, which typically is the
* full description of the text and punctuation that appear between the list elements.
* Suitable for elements, such as "Monday", "Tuesday", "Wednesday", etc.
*/
FULL,
/**
* The {@code SHORT} ListFormat style. This style is typically an abbreviation
* of the text and punctuation that appear between the list elements.
* Suitable for elements, such as "Mon", "Tue", "Wed", etc.
*/
SHORT,
/**
* The {@code NARROW} ListFormat style. This style is typically the shortest description
* of the text and punctuation that appear between the list elements.
* Suitable for elements, such as "M", "T", "W", etc.
*/
NARROW
}
}

View File

@ -42,6 +42,7 @@ package sun.util.locale.provider;
import java.lang.ref.ReferenceQueue;
import java.lang.ref.SoftReference;
import java.text.ListFormat;
import java.text.MessageFormat;
import java.text.NumberFormat;
import java.util.Arrays;
@ -98,6 +99,7 @@ public class LocaleResources {
private static final String DATE_TIME_PATTERN = "DTP.";
private static final String RULES_CACHEKEY = "RULE";
private static final String SKELETON_PATTERN = "SP.";
private static final String LIST_PATTERN = "LP.";
// ResourceBundle key names for skeletons
private static final String SKELETON_INPUT_REGIONS_KEY = "DateFormatItemInputRegions";
@ -831,6 +833,32 @@ public class LocaleResources {
return rules;
}
/**
* {@return the list patterns for the locale}
*
* @param type a {@link ListFormat.Type}
* @param style a {@link ListFormat.Style}
*/
public String[] getListPatterns(ListFormat.Type type, ListFormat.Style style) {
String typeStr = type.toString().toLowerCase(Locale.ROOT);
String styleStr = style.toString().toLowerCase(Locale.ROOT);
String[] lpArray;
String cacheKey = LIST_PATTERN + typeStr;
removeEmptyReferences();
ResourceReference data = cache.get(cacheKey);
if (data == null || ((lpArray = (String[]) data.get()) == null)) {
ResourceBundle rb = localeData.getDateFormatData(locale);
lpArray = rb.getStringArray("ListPatterns_" + typeStr + (style == ListFormat.Style.FULL ? "" : "-" + styleStr));
if (lpArray == null) {
cache.put(cacheKey, new ResourceReference(cacheKey, new String[5], referenceQueue));
}
}
return lpArray;
}
private static class ResourceReference extends SoftReference<Object> {
private final String cacheKey;

View File

@ -0,0 +1,299 @@
/*
* Copyright (c) 2023, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License version 2 only, as
* published by the Free Software Foundation.
*
* This code is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
* version 2 for more details (a copy is included in the LICENSE file that
* accompanied this code).
*
* You should have received a copy of the GNU General Public License version
* 2 along with this work; if not, write to the Free Software Foundation,
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
*
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
* or visit www.oracle.com if you need additional information or have any
* questions.
*/
/*
* @test
* @bug 8041488
* @summary Tests for ListFormat class
* @run junit TestListFormat
*/
import java.text.DateFormat;
import java.text.FieldPosition;
import java.text.ListFormat;
import java.text.ParseException;
import java.text.ParsePosition;
import java.util.List;
import java.util.Locale;
import org.junit.jupiter.api.Test;
import org.junit.jupiter.params.ParameterizedTest;
import org.junit.jupiter.params.provider.Arguments;
import org.junit.jupiter.params.provider.MethodSource;
import static org.junit.jupiter.api.Assertions.assertArrayEquals;
import static org.junit.jupiter.api.Assertions.assertEquals;
import static org.junit.jupiter.api.Assertions.assertNotEquals;
import static org.junit.jupiter.api.Assertions.assertThrows;
import static org.junit.jupiter.params.provider.Arguments.arguments;
public class TestListFormat {
private static final List<String> SAMPLE1 = List.of("foo");
private static final List<String> SAMPLE2 = List.of("foo", "bar");
private static final List<String> SAMPLE3 = List.of("foo", "bar", "baz");
private static final List<String> SAMPLE4 = List.of("foo", "bar", "baz", "qux");
private static final String[] CUSTOM_PATTERNS_FULL = {
"sbef {0} sbet {1}",
"{0} mid {1}",
"{0} ebet {1} eaft",
"twobef {0} two {1} twoaft",
"threebef {0} three {1} three {2} threeaft",
};
private static final String[] CUSTOM_PATTERNS_MINIMAL = {
"sbef {0} sbet {1}",
"{0} mid {1}",
"{0} ebet {1} eaft",
"",
"",
};
private static final String[] CUSTOM_PATTERNS_IAE_START = {
"{0}",
"{0} mid {1}",
"{0} ebet {1} eaft",
"",
"",
};
private static final String[] CUSTOM_PATTERNS_IAE_MIDDLE = {
"{0} sbet {1}",
"{0} {1} {2}",
"{0} ebet {1} eaft",
"",
"",
};
private static final String[] CUSTOM_PATTERNS_IAE_END = {
"{0} sbet {1}",
"{0} mid {1}",
"error {0} ebet {1}",
"",
"",
};
private static final String[] CUSTOM_PATTERNS_IAE_TWO = {
"sbef {0} sbet {1}",
"{0} mid {1}",
"{0} ebet {1} eaft",
"{1}error{0}",
"",
};
private static final String[] CUSTOM_PATTERNS_IAE_THREE = {
"sbef {0} sbet {1}",
"{0} mid {1}",
"{0} ebet {1} eaft",
"",
"{0}error{1}",
};
private static final String[] CUSTOM_PATTERNS_IAE_NULL = {
null,
null,
null,
null,
null,
};
@Test
void getAvailableLocales() {
assertArrayEquals(DateFormat.getAvailableLocales(), ListFormat.getAvailableLocales());
}
@Test
void getInstance_noArg() {
assertEquals(ListFormat.getInstance(), ListFormat.getInstance(Locale.getDefault(Locale.Category.FORMAT), ListFormat.Type.STANDARD, ListFormat.Style.FULL));
}
static Arguments[] getInstance_1Arg() {
return new Arguments[] {
arguments(CUSTOM_PATTERNS_FULL, SAMPLE1, "foo"),
arguments(CUSTOM_PATTERNS_FULL, SAMPLE2, "twobef foo two bar twoaft"),
arguments(CUSTOM_PATTERNS_FULL, SAMPLE3, "threebef foo three bar three baz threeaft"),
arguments(CUSTOM_PATTERNS_FULL, SAMPLE4, "sbef foo sbet bar mid baz ebet qux eaft"),
arguments(CUSTOM_PATTERNS_MINIMAL, SAMPLE1, "foo"),
arguments(CUSTOM_PATTERNS_MINIMAL, SAMPLE2, "sbef foo ebet bar eaft"),
arguments(CUSTOM_PATTERNS_MINIMAL, SAMPLE3, "sbef foo sbet bar ebet baz eaft"),
arguments(CUSTOM_PATTERNS_MINIMAL, SAMPLE4, "sbef foo sbet bar mid baz ebet qux eaft"),
};
}
static Arguments[] getInstance_1Arg_IAE() {
return new Arguments[] {
arguments(new String[1], "Pattern array length should be 5"),
arguments(new String[6], "Pattern array length should be 5"),
arguments(CUSTOM_PATTERNS_IAE_START, "start pattern is incorrect: {0}"),
arguments(CUSTOM_PATTERNS_IAE_MIDDLE, "middle pattern is incorrect: {0} {1} {2}"),
arguments(CUSTOM_PATTERNS_IAE_END, "end pattern is incorrect: error {0} ebet {1}"),
arguments(CUSTOM_PATTERNS_IAE_TWO, "pattern for two is incorrect: {1}error{0}"),
arguments(CUSTOM_PATTERNS_IAE_THREE, "pattern for three is incorrect: {0}error{1}"),
arguments(CUSTOM_PATTERNS_IAE_NULL, "patterns array contains one or more null elements"),
};
}
static Arguments[] getInstance_3Arg() {
return new Arguments[] {
arguments(Locale.US, ListFormat.Type.STANDARD, ListFormat.Style.FULL,
"foo, bar, and baz", true),
arguments(Locale.US, ListFormat.Type.OR, ListFormat.Style.FULL,
"foo, bar, or baz", true),
arguments(Locale.US, ListFormat.Type.UNIT, ListFormat.Style.FULL,
"foo, bar, baz", true),
arguments(Locale.US, ListFormat.Type.STANDARD, ListFormat.Style.SHORT,
"foo, bar, & baz", true),
arguments(Locale.US, ListFormat.Type.OR, ListFormat.Style.SHORT,
"foo, bar, or baz", true),
arguments(Locale.US, ListFormat.Type.UNIT, ListFormat.Style.SHORT,
"foo, bar, baz", true),
arguments(Locale.US, ListFormat.Type.STANDARD, ListFormat.Style.NARROW,
"foo, bar, baz", true),
arguments(Locale.US, ListFormat.Type.OR, ListFormat.Style.NARROW,
"foo, bar, or baz", true),
arguments(Locale.US, ListFormat.Type.UNIT, ListFormat.Style.NARROW,
"foo bar baz", true),
arguments(Locale.JAPAN, ListFormat.Type.STANDARD, ListFormat.Style.FULL,
"foo\u3001bar\u3001baz", true),
arguments(Locale.JAPAN, ListFormat.Type.OR, ListFormat.Style.FULL,
"foo\u3001bar\u3001\u307e\u305f\u306fbaz", true),
arguments(Locale.JAPAN, ListFormat.Type.UNIT, ListFormat.Style.FULL,
"foo bar baz", true),
arguments(Locale.JAPAN, ListFormat.Type.STANDARD, ListFormat.Style.SHORT,
"foo\u3001bar\u3001baz", true),
arguments(Locale.JAPAN, ListFormat.Type.OR, ListFormat.Style.SHORT,
"foo\u3001bar\u3001\u307e\u305f\u306fbaz", true),
arguments(Locale.JAPAN, ListFormat.Type.UNIT, ListFormat.Style.SHORT,
"foo bar baz", true),
arguments(Locale.JAPAN, ListFormat.Type.STANDARD, ListFormat.Style.NARROW,
"foo\u3001bar\u3001baz", true),
arguments(Locale.JAPAN, ListFormat.Type.OR, ListFormat.Style.NARROW,
"foo\u3001bar\u3001\u307e\u305f\u306fbaz", true),
arguments(Locale.JAPAN, ListFormat.Type.UNIT, ListFormat.Style.NARROW,
"foobarbaz", false), // no delimiter, impossible to parse/roundtrip
};
}
static Arguments[] parseObject_parsePos() {
return new Arguments[] {
arguments(CUSTOM_PATTERNS_FULL, SAMPLE1),
arguments(CUSTOM_PATTERNS_FULL, SAMPLE2),
arguments(CUSTOM_PATTERNS_FULL, SAMPLE3),
arguments(CUSTOM_PATTERNS_FULL, SAMPLE4),
arguments(CUSTOM_PATTERNS_MINIMAL, SAMPLE1),
arguments(CUSTOM_PATTERNS_MINIMAL, SAMPLE2),
arguments(CUSTOM_PATTERNS_MINIMAL, SAMPLE3),
arguments(CUSTOM_PATTERNS_MINIMAL, SAMPLE4),
};
}
@ParameterizedTest
@MethodSource
void getInstance_1Arg(String[] patterns, List<String> input, String expected) throws ParseException {
var f = ListFormat.getInstance(patterns);
compareResult(f, input, expected, true);
}
@ParameterizedTest
@MethodSource
void getInstance_1Arg_IAE(String[] invalidPatterns, String errorMsg) {
var ex = assertThrows(IllegalArgumentException.class,
() -> ListFormat.getInstance(invalidPatterns));
assertEquals(errorMsg, ex.getMessage());
}
@ParameterizedTest
@MethodSource
void getInstance_3Arg(Locale l, ListFormat.Type type, ListFormat.Style style, String expected, boolean roundTrip) throws ParseException {
var f = ListFormat.getInstance(l, type, style);
compareResult(f, SAMPLE3, expected, roundTrip);
}
@Test
void format_3Arg() {
var f = ListFormat.getInstance();
// Ensures it accepts both List and []
assertEquals(f.format(SAMPLE4, new StringBuffer(), null).toString(),
f.format(SAMPLE4.toArray(), new StringBuffer(), null).toString());
// Tests NPE
assertThrows(NullPointerException.class,
() -> f.format(null, new StringBuffer(), new FieldPosition(0)));
assertThrows(NullPointerException.class,
() -> f.format(new Object(), null, new FieldPosition(0)));
// Tests IAE
var ex = assertThrows(IllegalArgumentException.class,
() -> f.format(new Object(), new StringBuffer(), null));
assertEquals("The object to format should be a List<Object> or an Object[]", ex.getMessage());
}
@Test
void formatToCharacterIterator() {
var f = ListFormat.getInstance();
// Ensures it accepts both List and []
assertEquals(f.formatToCharacterIterator(SAMPLE4).toString(),
f.formatToCharacterIterator(SAMPLE4.toArray()).toString());
// Tests NPE
assertThrows(NullPointerException.class,
() -> f.formatToCharacterIterator(null));
// Tests IAE
var ex = assertThrows(IllegalArgumentException.class,
() -> f.formatToCharacterIterator(new Object()));
assertEquals("The arguments should be a List<Object> or an Object[]", ex.getMessage());
}
@Test
void format_emptyInput() {
var ex = assertThrows(IllegalArgumentException.class,
() -> ListFormat.getInstance().format(List.of()));
assertEquals("There should at least be one input string", ex.getMessage());
}
@ParameterizedTest
@MethodSource
void parseObject_parsePos(String[] patterns, List<String> input) {
var prefix = "prefix";
var f = ListFormat.getInstance(patterns);
var testStr = prefix + f.format(input);
var pp = new ParsePosition(prefix.length());
var parsed = f.parseObject(testStr, pp);
assertEquals(input, parsed, pp.toString());
assertEquals(new ParsePosition(testStr.length()), pp);
pp.setIndex(0);
parsed = f.parseObject(testStr, pp);
assertNotEquals(input, parsed);
assertEquals(-1, pp.getErrorIndex());
pp.setIndex(prefix.length() + 1);
parsed = f.parseObject(testStr, pp);
assertNotEquals(input, parsed);
assertEquals(-1, pp.getErrorIndex());
}
private static void compareResult(ListFormat f, List<String> input, String expected, boolean roundTrip) throws ParseException {
var result = f.format(input);
assertEquals(expected, result);
if (roundTrip) {
assertEquals(input, f.parse(result));
}
}
}