From ffb0867e2c07b41cb7124e11fe6cf63d9471f0d2 Mon Sep 17 00:00:00 2001 From: Justin Lu Date: Thu, 30 May 2024 20:05:22 +0000 Subject: [PATCH] 8331485: Odd Results when Parsing Scientific Notation with Large Exponent 8331680: NumberFormat is missing some bad exponent strict parse cases Reviewed-by: naoto --- .../classes/java/text/DecimalFormat.java | 60 ++++-- .../DecimalFormat/LargeExponentsTest.java | 193 ++++++++++++++++++ .../Format/NumberFormat/LenientParseTest.java | 15 +- .../Format/NumberFormat/StrictParseTest.java | 14 +- 4 files changed, 267 insertions(+), 15 deletions(-) create mode 100644 test/jdk/java/text/Format/DecimalFormat/LargeExponentsTest.java diff --git a/src/java.base/share/classes/java/text/DecimalFormat.java b/src/java.base/share/classes/java/text/DecimalFormat.java index 2fc64d8e63e..f575e86eced 100644 --- a/src/java.base/share/classes/java/text/DecimalFormat.java +++ b/src/java.base/share/classes/java/text/DecimalFormat.java @@ -2479,9 +2479,12 @@ public class DecimalFormat extends NumberFormat { symbols.getGroupingSeparator(); String exponentString = symbols.getExponentSeparator(); boolean sawDecimal = false; - boolean sawExponent = false; boolean sawDigit = false; - int exponent = 0; // Set to the exponent value, if any + // Storing as long allows us to maintain accuracy of exponent + // when the exponent value as well as the decimalAt nears + // Integer.MAX/MIN value. However, the final expressed value is an int + long exponent = 0; + boolean[] expStat = new boolean[STATUS_LENGTH]; // We have to track digitCount ourselves, because digits.count will // pin when the maximum allowable digits is reached. @@ -2579,21 +2582,27 @@ public class DecimalFormat extends NumberFormat { // require that they be followed by a digit. Otherwise // we backup and reprocess them. backup = position; - } else if (checkExponent && !isExponent && text.regionMatches(position, exponentString, 0, exponentString.length()) - && !sawExponent) { + } else if (checkExponent && !isExponent + && text.regionMatches(position, exponentString, 0, exponentString.length())) { // Process the exponent by recursively calling this method. ParsePosition pos = new ParsePosition(position + exponentString.length()); - boolean[] stat = new boolean[STATUS_LENGTH]; DigitList exponentDigits = new DigitList(); - if (subparse(text, pos, "", symbols.getMinusSignText(), exponentDigits, true, stat) && - exponentDigits.fitsIntoLong(stat[STATUS_POSITIVE], true)) { - position = pos.index; // Advance past the exponent - exponent = (int)exponentDigits.getLong(); - if (!stat[STATUS_POSITIVE]) { - exponent = -exponent; + if (subparse(text, pos, "", symbols.getMinusSignText(), exponentDigits, true, expStat)) { + // We parse the exponent with isExponent == true, thus fitsIntoLong() + // only returns false here if the exponent DigitList value exceeds + // Long.MAX_VALUE. We do not need to worry about false being + // returned for faulty values as they are ignored by DigitList. + if (exponentDigits.fitsIntoLong(expStat[STATUS_POSITIVE], true)) { + exponent = exponentDigits.getLong(); + if (!expStat[STATUS_POSITIVE]) { + exponent = -exponent; + } + } else { + exponent = expStat[STATUS_POSITIVE] ? + Long.MAX_VALUE : Long.MIN_VALUE; } - sawExponent = true; + position = pos.index; // Advance past the exponent } break; // Whether we fail or succeed, we exit this loop } else { @@ -2628,7 +2637,9 @@ public class DecimalFormat extends NumberFormat { } // Adjust for exponent, if any - digits.decimalAt += exponent; + if (exponent != 0) { + digits.decimalAt = shiftDecimalAt(digits.decimalAt, exponent); + } // If none of the text string was recognized. For example, parse // "x" with pattern "#0.00" (return index and error index both 0) @@ -2641,6 +2652,29 @@ public class DecimalFormat extends NumberFormat { return position; } + // Calculate the final decimal position based off the exponent value + // and the existing decimalAt position. If overflow/underflow, the value + // should be set as either Integer.MAX/MIN + private int shiftDecimalAt(int decimalAt, long exponent) { + try { + exponent = Math.addExact(decimalAt, exponent); + } catch (ArithmeticException ex) { + // If we under/overflow a Long do not bother with the decimalAt + // As it can only shift up to Integer.MAX/MIN which has no affect + if (exponent > 0 && decimalAt > 0) { + return Integer.MAX_VALUE; + } else { + return Integer.MIN_VALUE; + } + } + try { + decimalAt = Math.toIntExact(exponent); + } catch (ArithmeticException ex) { + decimalAt = exponent > 0 ? Integer.MAX_VALUE : Integer.MIN_VALUE; + } + return decimalAt; + } + // Checks to make sure grouping size is not violated. Used when strict. private boolean isGroupingViolation(int pos, int prevGroupingPos) { assert parseStrict : "Grouping violations should only occur when strict"; diff --git a/test/jdk/java/text/Format/DecimalFormat/LargeExponentsTest.java b/test/jdk/java/text/Format/DecimalFormat/LargeExponentsTest.java new file mode 100644 index 00000000000..c49bad2edef --- /dev/null +++ b/test/jdk/java/text/Format/DecimalFormat/LargeExponentsTest.java @@ -0,0 +1,193 @@ +/* + * Copyright (c) 2024, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + */ + +/* + * @test + * @bug 8331485 + * @summary Ensure correctness when parsing large (+/-) exponent values that + * exceed Integer.MAX_VALUE and Long.MAX_VALUE. + * @run junit/othervm --add-opens java.base/java.text=ALL-UNNAMED LargeExponentsTest + */ + +import org.junit.jupiter.params.ParameterizedTest; +import org.junit.jupiter.params.provider.Arguments; +import org.junit.jupiter.params.provider.MethodSource; + +import java.lang.reflect.InvocationTargetException; +import java.lang.reflect.Method; +import java.text.DecimalFormat; +import java.text.NumberFormat; +import java.text.ParseException; +import java.text.ParsePosition; +import java.util.ArrayList; +import java.util.List; +import java.util.Locale; +import java.util.stream.Stream; + +import static org.junit.jupiter.api.Assertions.assertEquals; + +// We prevent odd results when parsing large exponent values by ensuring +// that we properly handle overflow in the implementation of DigitList +public class LargeExponentsTest { + + // Exponent symbol is 'E' + private static final NumberFormat FMT = NumberFormat.getInstance(Locale.US); + + // Check that the parsed value and parse position index are both equal to the expected values. + // We are mainly checking that an exponent > Integer.MAX_VALUE no longer + // parses to 0 and that an exponent > Long.MAX_VALUE no longer parses to the mantissa. + @ParameterizedTest + @MethodSource({"largeExponentValues", "smallExponentValues", "bugReportValues", "edgeCases"}) + public void overflowTest(String parseString, Double expectedValue) throws ParseException { + checkParse(parseString, expectedValue); + checkParseWithPP(parseString, expectedValue); + } + + // A separate white-box test to avoid the memory consumption of testing cases + // when the String is near Integer.MAX_LENGTH + @ParameterizedTest + @MethodSource + public void largeDecimalAtExponentTest(int expected, int decimalAt, long expVal) + throws NoSuchMethodException, InvocationTargetException, IllegalAccessException { + DecimalFormat df = new DecimalFormat(); + Method m = df.getClass().getDeclaredMethod( + "shiftDecimalAt", int.class, long.class); + m.setAccessible(true); + assertEquals(expected, m.invoke(df, decimalAt, expVal)); + } + + // Cases where we can test behavior when the String is near Integer.MAX_LENGTH + private static Stream largeDecimalAtExponentTest() { + return Stream.of( + // Equivalent to testing Arguments.of("0."+"0".repeat(Integer.MAX_VALUE-20)+"1"+"E2147483650", 1.0E22) + // This is an absurdly long decimal string with length close to Integer.MAX_VALUE + // where the decimal position correctly negates the exponent value, even if it exceeds Integer.MAX_VALUE + Arguments.of(23, -(Integer.MAX_VALUE-20), 3L+Integer.MAX_VALUE), + Arguments.of(-23, Integer.MAX_VALUE-20, -(3L+Integer.MAX_VALUE)), + Arguments.of(Integer.MIN_VALUE, -(Integer.MAX_VALUE-20), -(3L+Integer.MAX_VALUE)), + Arguments.of(Integer.MAX_VALUE, Integer.MAX_VALUE-20, 3L+Integer.MAX_VALUE), + Arguments.of(Integer.MAX_VALUE, -(Integer.MAX_VALUE-20), Long.MAX_VALUE), + Arguments.of(Integer.MIN_VALUE, Integer.MAX_VALUE-20, Long.MIN_VALUE) + ); + } + + // Checks the parse(String, ParsePosition) method + public void checkParse(String parseString, Double expectedValue) { + ParsePosition pp = new ParsePosition(0); + Number actualValue = FMT.parse(parseString, pp); + assertEquals(expectedValue, (double)actualValue); + assertEquals(parseString.length(), pp.getIndex()); + } + + // Checks the parse(String) method + public void checkParseWithPP(String parseString, Double expectedValue) + throws ParseException { + Number actualValue = FMT.parse(parseString); + assertEquals(expectedValue, (double)actualValue); + } + + // Generate large enough exponents that should all be parsed as infinity + // when positive. This includes exponents that exceed Long.MAX_VALUE + private static List largeExponentValues() { + return createExponentValues(false); + } + + // Same as previous provider but for negative exponent values, so expecting + // a parsed value of 0. + private static List smallExponentValues() { + return createExponentValues(true); + } + + // Programmatically generate some large parse values that are expected + // to be parsed as infinity or 0 + private static List createExponentValues(boolean negative) { + List args = new ArrayList<>(); + // Start with a base value that should be parsed as infinity + String baseValue = "12234.123E1100"; + // Continuously add to the String until we trigger the overflow condition + for (int i = 0; i < 100; i++) { + StringBuilder bldr = new StringBuilder(); + // Add to exponent + bldr.append(baseValue).append("1".repeat(i)); + // Add to mantissa + bldr.insert(0, "1".repeat(i)); + args.add(Arguments.of( + // Prepend "-" to exponent if negative + negative ? bldr.insert(bldr.indexOf("E")+1, "-").toString() : bldr.toString(), + // Expect 0 if negative, else infinity + negative ? 0.0 : Double.POSITIVE_INFINITY)); + } + return args; + } + + // The provided values are all from the JBS issue + // These contain exponents that exceed Integer.MAX_VALUE, but not Long.MAX_VALUE + private static Stream bugReportValues() { + return Stream.of( + Arguments.of("0.123E1", 1.23), + Arguments.of("0.123E309", 1.23E308), + Arguments.of("0.123E310", Double.POSITIVE_INFINITY), + Arguments.of("0.123E2147483647", Double.POSITIVE_INFINITY), + Arguments.of("0.123E2147483648", Double.POSITIVE_INFINITY), + Arguments.of("0.0123E2147483648", Double.POSITIVE_INFINITY), + Arguments.of("0.0123E2147483649", Double.POSITIVE_INFINITY), + Arguments.of("1.23E2147483646", Double.POSITIVE_INFINITY), + Arguments.of("1.23E2147483647", Double.POSITIVE_INFINITY), + Arguments.of("0.123E4294967296", Double.POSITIVE_INFINITY), + Arguments.of("0.123E-322", 9.9E-324), + Arguments.of("0.123E-323", 0.0), + Arguments.of("0.123E-2147483647", 0.0), + Arguments.of("0.123E-2147483648", 0.0), + Arguments.of("0.123E-2147483649", 0.0) + ); + } + + // Some other edge case values to ensure parse correctness + private static Stream edgeCases() { + return Stream.of( + // Exponent itself does not cause underflow, but decimalAt adjustment + // based off mantissa should. decimalAt(-1) + exponent(Integer.MIN_VALUE) = underflow + Arguments.of("0.0123E-2147483648", 0.0), + // 0 exponent + Arguments.of("1.23E0", 1.23), + // Leading zeroes + Arguments.of("1.23E0000123", 1.23E123), + // Leading zeroes - Past Long.MAX_VALUE length + Arguments.of("1.23E00000000000000000000000000000000000000000123", 1.23E123), + // Trailing zeroes + Arguments.of("1.23E100", 1.23E100), + // Long.MAX_VALUE length + Arguments.of("1.23E1234567891234567800", Double.POSITIVE_INFINITY), + // Long.MAX_VALUE with trailing zeroes + Arguments.of("1.23E9223372036854775807000", Double.POSITIVE_INFINITY), + // Long.MIN_VALUE + Arguments.of("1.23E-9223372036854775808", 0.0), + // Exponent value smaller than Long.MIN_VALUE + Arguments.of("1.23E-9223372036854775809", 0.0), + // Exponent value equal to Long.MAX_VALUE + Arguments.of("1.23E9223372036854775807", Double.POSITIVE_INFINITY), + // Exponent value larger than Long.MAX_VALUE + Arguments.of("1.23E9223372036854775808", Double.POSITIVE_INFINITY) + ); + } +} diff --git a/test/jdk/java/text/Format/NumberFormat/LenientParseTest.java b/test/jdk/java/text/Format/NumberFormat/LenientParseTest.java index 948ccb247e8..2f07521ff23 100644 --- a/test/jdk/java/text/Format/NumberFormat/LenientParseTest.java +++ b/test/jdk/java/text/Format/NumberFormat/LenientParseTest.java @@ -23,7 +23,7 @@ /* * @test - * @bug 8327640 + * @bug 8327640 8331485 * @summary Test suite for NumberFormat parsing when lenient. * @run junit/othervm -Duser.language=en -Duser.country=US LenientParseTest * @run junit/othervm -Duser.language=ja -Duser.country=JP LenientParseTest @@ -34,6 +34,7 @@ * @run junit/othervm -Duser.language=ar -Duser.country=AR LenientParseTest */ +import org.junit.jupiter.api.Test; import org.junit.jupiter.api.condition.EnabledIfSystemProperty; import org.junit.jupiter.params.ParameterizedTest; import org.junit.jupiter.params.provider.Arguments; @@ -127,6 +128,18 @@ public class LenientParseTest { dFmt.setParseIntegerOnly(false); } + @Test // Non-localized, only run once + @EnabledIfSystemProperty(named = "user.language", matches = "en") + public void badExponentParseNumberFormatTest() { + // Some fmt, with an "E" exponent string + DecimalFormat fmt = (DecimalFormat) NumberFormat.getNumberInstance(Locale.US); + // Upon non-numeric in exponent, parse will still successfully complete + // but index should end on the last valid char in exponent + assertEquals(1.23E45, successParse(fmt, "1.23E45.123", 7)); + assertEquals(1.23E45, successParse(fmt, "1.23E45.", 7)); + assertEquals(1.23E45, successParse(fmt, "1.23E45FOO3222", 7)); + } + // ---- CurrencyFormat tests ---- // All input Strings should pass and return expected value. @ParameterizedTest diff --git a/test/jdk/java/text/Format/NumberFormat/StrictParseTest.java b/test/jdk/java/text/Format/NumberFormat/StrictParseTest.java index 3fa1fc70faf..d62a867e573 100644 --- a/test/jdk/java/text/Format/NumberFormat/StrictParseTest.java +++ b/test/jdk/java/text/Format/NumberFormat/StrictParseTest.java @@ -23,7 +23,7 @@ /* * @test - * @bug 8327640 + * @bug 8327640 8331485 * @summary Test suite for NumberFormat parsing with strict leniency * @run junit/othervm -Duser.language=en -Duser.country=US StrictParseTest * @run junit/othervm -Duser.language=ja -Duser.country=JP StrictParseTest @@ -112,7 +112,19 @@ public class StrictParseTest { successParse(nonLocalizedDFmt, "a12345,67890b"); successParse(nonLocalizedDFmt, "a1234,67890b"); failParse(nonLocalizedDFmt, "a123456,7890b", 6); + } + @Test // Non-localized, only run once + @EnabledIfSystemProperty(named = "user.language", matches = "en") + public void badExponentParseNumberFormatTest() { + // Some fmt, with an "E" exponent string + DecimalFormat fmt = (DecimalFormat) NumberFormat.getNumberInstance(Locale.US); + fmt.setStrict(true); + // Upon non-numeric in exponent, parse will exit early and suffix will not + // exactly match, causing failure + failParse(fmt, "1.23E45.1", 7); + failParse(fmt, "1.23E45.", 7); + failParse(fmt, "1.23E45FOO3222", 7); } // All input Strings should fail