8324665: Loose matching of space separators in the lenient date/time parsing mode

Reviewed-by: joehw, jlu
This commit is contained in:
Naoto Sato 2024-02-06 17:43:12 +00:00
parent 2d252ee06e
commit 96eb0390d6
4 changed files with 177 additions and 6 deletions
src/java.base/share/classes/java
test/jdk/java/text/Format/DateFormat

@ -1,5 +1,5 @@
/*
* Copyright (c) 1996, 2023, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 1996, 2024, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@ -746,6 +746,10 @@ public abstract class DateFormat extends Format {
* <p>This leniency value is overwritten by a call to {@link
* #setCalendar(java.util.Calendar) setCalendar()}.
*
* @implSpec A {@link Character#SPACE_SEPARATOR SPACE_SEPARATOR} in the input
* text will match any other {@link Character#SPACE_SEPARATOR SPACE_SEPARATOR}s
* in the pattern with lenient parsing; otherwise, it will not match.
*
* @param lenient when {@code true}, parsing is lenient
* @see java.util.Calendar#setLenient(boolean)
*/

@ -1487,7 +1487,8 @@ public class SimpleDateFormat extends DateFormat {
switch (tag) {
case TAG_QUOTE_ASCII_CHAR:
if (start >= textLength || text.charAt(start) != (char)count) {
if (start >= textLength ||
!charEquals(text.charAt(start), (char)count)) {
pos.index = oldStart;
pos.errorIndex = start;
return null;
@ -1497,7 +1498,8 @@ public class SimpleDateFormat extends DateFormat {
case TAG_QUOTE_CHARS:
while (count-- > 0) {
if (start >= textLength || text.charAt(start) != compiledPattern[i++]) {
if (start >= textLength ||
!charEquals(text.charAt(start), compiledPattern[i++])) {
pos.index = oldStart;
pos.errorIndex = start;
return null;
@ -1580,6 +1582,13 @@ public class SimpleDateFormat extends DateFormat {
return parsedDate;
}
private boolean charEquals(char ch1, char ch2) {
return ch1 == ch2 ||
isLenient() &&
Character.getType(ch1) == Character.SPACE_SEPARATOR &&
Character.getType(ch2) == Character.SPACE_SEPARATOR;
}
/* If the next tag/pattern is a <Numeric_Field> then the parser
* should consider the count of digits while parsing the contiguous digits
* for the current tag/pattern

@ -1,5 +1,5 @@
/*
* Copyright (c) 2012, 2023, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2012, 2024, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@ -353,6 +353,10 @@ public final class DateTimeFormatterBuilder {
* The change will remain in force until the end of the formatter that is eventually
* constructed or until {@code parseLenient} is called.
*
* @implSpec A {@link Character#SPACE_SEPARATOR SPACE_SEPARATOR} in the input
* text will not match any other {@link Character#SPACE_SEPARATOR SPACE_SEPARATOR}s
* in the pattern with the strict parse style.
*
* @return this, for chaining, not null
*/
public DateTimeFormatterBuilder parseStrict() {
@ -372,6 +376,10 @@ public final class DateTimeFormatterBuilder {
* The change will remain in force until the end of the formatter that is eventually
* constructed or until {@code parseStrict} is called.
*
* @implSpec A {@link Character#SPACE_SEPARATOR SPACE_SEPARATOR} in the input
* text will match any other {@link Character#SPACE_SEPARATOR SPACE_SEPARATOR}s
* in the pattern with the lenient parse style.
*
* @return this, for chaining, not null
*/
public DateTimeFormatterBuilder parseLenient() {
@ -2731,9 +2739,11 @@ public final class DateTimeFormatterBuilder {
*/
static final class CharLiteralPrinterParser implements DateTimePrinterParser {
private final char literal;
private final boolean isSpaceSeparator;
private CharLiteralPrinterParser(char literal) {
this.literal = literal;
isSpaceSeparator = Character.getType(literal) == Character.SPACE_SEPARATOR;
}
@Override
@ -2750,9 +2760,10 @@ public final class DateTimeFormatterBuilder {
}
char ch = text.charAt(position);
if (ch != literal) {
if (context.isCaseSensitive() ||
if ((context.isCaseSensitive() ||
(Character.toUpperCase(ch) != Character.toUpperCase(literal) &&
Character.toLowerCase(ch) != Character.toLowerCase(literal))) {
Character.toLowerCase(ch) != Character.toLowerCase(literal))) &&
!spaceEquals(context, ch)) {
return ~position;
}
}
@ -2766,6 +2777,12 @@ public final class DateTimeFormatterBuilder {
}
return "'" + literal + "'";
}
private boolean spaceEquals(DateTimeParseContext context, char ch) {
return !context.isStrict() && isSpaceSeparator &&
Character.getType(ch) == Character.SPACE_SEPARATOR;
}
}
//-----------------------------------------------------------------------

@ -0,0 +1,141 @@
/*
* Copyright (c) 2024, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License version 2 only, as
* published by the Free Software Foundation.
*
* This code is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
* version 2 for more details (a copy is included in the LICENSE file that
* accompanied this code).
*
* You should have received a copy of the GNU General Public License version
* 2 along with this work; if not, write to the Free Software Foundation,
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
*
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
* or visit www.oracle.com if you need additional information or have any
* questions.
*/
/*
* @test
* @bug 8324665
* @summary Checks if SPACE_SEPARATOR are correctly parsed in lenient mode
* @run junit LenientSpaceParsingTest
*/
import org.junit.jupiter.api.Test;
import org.junit.jupiter.params.ParameterizedTest;
import org.junit.jupiter.params.provider.Arguments;
import org.junit.jupiter.params.provider.MethodSource;
import static org.junit.jupiter.api.Assertions.assertThrows;
import java.text.ParseException;
import java.text.SimpleDateFormat;
import java.time.format.DateTimeFormatterBuilder;
import java.time.format.DateTimeParseException;
import java.util.stream.Stream;
public class LenientSpaceParsingTest {
@MethodSource
private static Stream<Arguments> strictSpaces() {
// input, pattern
return Stream.of(
Arguments.of("00\u002000", "H\u0020m"),
Arguments.of("00\u202f00", "H\u202fm"),
Arguments.of("00\u00a000", "H\u00a0m"),
Arguments.of("00\u0020\u202f\u0020\u00a000", "H\u0020\u202f\u0020\u00a0m")
);
}
@MethodSource
private static Stream<Arguments> lenientSpaces() {
// input, pattern
return Stream.of(
Arguments.of("00\u002000", "H\u202fm"),
Arguments.of("00\u202f00", "H\u0020m"),
Arguments.of("00\u00a000", "H\u0020m"),
Arguments.of("00\u002000", "H\u00a0m"),
Arguments.of("00\u0020\u202f\u0020\u00a000", "H\u0020\u0020\u0020\u0020m"),
Arguments.of("00\u0020\u202f\u0020\u00a000", "H\u202f\u00a0\u202f\u00a0m")
);
}
@MethodSource
private static Stream<Arguments> nonSpaces() {
// input, pattern
return Stream.of(
Arguments.of("00a00", "H\u202fm"),
Arguments.of("00a00", "H\u00a0m"),
Arguments.of("00a00", "H\u0020m"),
Arguments.of("00aa00", "H\u0020\u0020m"),
Arguments.of("00aa00", "H\u00a0\u202fm")
);
}
@ParameterizedTest
@MethodSource({"strictSpaces", "lenientSpaces"})
public void checkDateTimeFormatter_Lenient(String input, String pattern) {
new DateTimeFormatterBuilder().parseLenient().appendPattern(pattern).toFormatter().parse(input);
}
@ParameterizedTest
@MethodSource("nonSpaces")
public void checkDateTimeFormatter_Lenient_Exception(String input, String pattern) {
var dtf = new DateTimeFormatterBuilder().parseLenient().appendPattern(pattern).toFormatter();
assertThrows(DateTimeParseException.class, () -> {
dtf.parse(input);
});
}
@ParameterizedTest
@MethodSource("strictSpaces")
public void checkDateTimeFormatter_Strict(String input, String pattern) {
new DateTimeFormatterBuilder().parseStrict().appendPattern(pattern).toFormatter().parse(input);
}
@ParameterizedTest
@MethodSource({"lenientSpaces", "nonSpaces"})
public void checkDateTimeFormatter_Strict_Exception(String input, String pattern) {
var dtf = new DateTimeFormatterBuilder().parseStrict().appendPattern(pattern).toFormatter();
assertThrows(DateTimeParseException.class, () -> {
dtf.parse(input);
});
}
@ParameterizedTest
@MethodSource({"strictSpaces", "lenientSpaces"})
public void checkSimpleDateFormat_Lenient(String input, String pattern) throws ParseException {
new SimpleDateFormat(pattern).parse(input);
}
@ParameterizedTest
@MethodSource("nonSpaces")
public void checkSimpleDateFormat_Lenient_Exception(String input, String pattern) {
var sdf = new SimpleDateFormat(pattern);
assertThrows(ParseException.class, () -> {
sdf.parse(input);
});
}
@ParameterizedTest
@MethodSource("strictSpaces")
public void checkSimpleDateFormat_Strict(String input, String pattern) throws ParseException {
var sdf = new SimpleDateFormat(pattern);
sdf.setLenient(false);
sdf.parse(input);
}
@ParameterizedTest
@MethodSource({"lenientSpaces", "nonSpaces"})
public void checkSimpleDateFormat_Strict_Exception(String input, String pattern) {
var sdf = new SimpleDateFormat(pattern);
sdf.setLenient(false);
assertThrows(ParseException.class, () -> {
sdf.parse(input);
});
}
}