8316704: Regex-free parsing of Formatter and FormatProcessor specifiers

Reviewed-by: redestad, rgiulietti
This commit is contained in:
Shaojin Wen 2024-02-06 15:16:40 +00:00 committed by Raffaello Giulietti
parent 51d7169bd9
commit 50b17d9846
5 changed files with 294 additions and 39 deletions

View File

@ -1,5 +1,6 @@
/*
* Copyright (c) 2023, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2023, Alibaba Group Holding Limited. All Rights Reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@ -30,7 +31,6 @@ import java.lang.invoke.MethodHandles;
import java.lang.invoke.MethodType;
import java.lang.StringTemplate.Processor;
import java.lang.StringTemplate.Processor.Linkage;
import java.util.regex.Matcher;
import jdk.internal.javac.PreviewFeature;
@ -218,22 +218,35 @@ public final class FormatProcessor implements Processor<String, RuntimeException
* @throws MissingFormatArgumentException if not at end or found and not needed
*/
private static boolean findFormat(String fragment, boolean needed) {
Matcher matcher = Formatter.FORMAT_SPECIFIER_PATTERN.matcher(fragment);
String group;
while (matcher.find()) {
group = matcher.group();
if (!group.equals("%%") && !group.equals("%n")) {
if (matcher.end() == fragment.length() && needed) {
return true;
}
throw new MissingFormatArgumentException(group +
" is not immediately followed by an embedded expression");
int max = fragment.length();
for (int i = 0; i < max;) {
int n = fragment.indexOf('%', i);
if (n < 0) {
return false;
}
}
i = n + 1;
if (i >= max) {
return false;
}
char c = fragment.charAt(i);
if (c == '%' || c == 'n') {
i++;
continue;
}
int off = new Formatter.FormatSpecifierParser(null, c, i, fragment, max)
.parse();
if (off == 0) {
return false;
}
if (i + off == max && needed) {
return true;
}
throw new MissingFormatArgumentException(
fragment.substring(i - 1, i + off)
+ " is not immediately followed by an embedded expression");
}
return false;
}

View File

@ -1,5 +1,6 @@
/*
* Copyright (c) 2003, 2023, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2023, Alibaba Group Holding Limited. All Rights Reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@ -49,8 +50,6 @@ import java.text.DecimalFormat;
import java.text.DecimalFormatSymbols;
import java.text.NumberFormat;
import java.text.spi.NumberFormatProvider;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import java.time.DateTimeException;
import java.time.Instant;
@ -2810,20 +2809,14 @@ public final class Formatter implements Closeable, Flushable {
return this;
}
// %[argument_index$][flags][width][.precision][t]conversion
static final String FORMAT_SPECIFIER
= "%(\\d+\\$)?([-#+ 0,(\\<]*)?(\\d+)?(\\.\\d+)?([tT])?([a-zA-Z%])";
static final Pattern FORMAT_SPECIFIER_PATTERN = Pattern.compile(FORMAT_SPECIFIER);
/**
* Finds format specifiers in the format string.
*/
static List<FormatString> parse(String s) {
FormatSpecifierParser parser = null;
ArrayList<FormatString> al = new ArrayList<>();
int i = 0;
int max = s.length();
Matcher m = null; // create if needed
while (i < max) {
int n = s.indexOf('%', i);
if (n < 0) {
@ -2846,14 +2839,16 @@ public final class Formatter implements Closeable, Flushable {
al.add(new FormatSpecifier(c));
i++;
} else {
if (m == null) {
m = FORMAT_SPECIFIER_PATTERN.matcher(s);
}
// We have already parsed a '%' at n, so we either have a
// match or the specifier at n is invalid
if (m.find(n) && m.start() == n) {
al.add(new FormatSpecifier(s, m));
i = m.end();
if (parser == null) {
parser = new FormatSpecifierParser(al, c, i, s, max);
} else {
parser.reset(c, i);
}
int off = parser.parse();
if (off > 0) {
i += off;
} else {
throw new UnknownFormatConversionException(String.valueOf(c));
}
@ -2862,6 +2857,159 @@ public final class Formatter implements Closeable, Flushable {
return al;
}
static final class FormatSpecifierParser {
final ArrayList<FormatString> al;
final String s;
final int max;
char first;
int start;
int off;
char c;
int argSize;
int flagSize;
int widthSize;
FormatSpecifierParser(ArrayList<FormatString> al, char first, int start, String s, int max) {
this.al = al;
this.first = first;
this.c = first;
this.start = start;
this.off = start;
this.s = s;
this.max = max;
}
void reset(char first, int start) {
this.first = first;
this.c = first;
this.start = start;
this.off = start;
argSize = 0;
flagSize = 0;
widthSize = 0;
}
/**
* If a valid format specifier is found, construct a FormatString and add it to {@link #al}.
* The format specifiers for general, character, and numeric types have
* the following syntax:
*
* <blockquote><pre>
* %[argument_index$][flags][width][.precision]conversion
* </pre></blockquote>
*
* As described by the following regular expression:
*
* <blockquote><pre>
* %(\d+\$)?([-#+ 0,(\<]*)?(\d+)?(\.\d+)?([tT])?([a-zA-Z%])
* </pre></blockquote>
*
* @return the length of the format specifier. If no valid format specifier is found, 0 is returned.
*/
int parse() {
int precisionSize = 0;
// (\d+\$)?
parseArgument();
// ([-#+ 0,(\<]*)?
parseFlag();
// (\d+)?
parseWidth();
if (c == '.') {
// (\.\d+)?
precisionSize = parsePrecision();
if (precisionSize == -1) {
return 0;
}
}
// ([tT])?([a-zA-Z%])
char t = '\0', conversion = '\0';
if ((c == 't' || c == 'T') && off + 1 < max) {
char c1 = s.charAt(off + 1);
if (isConversion(c1)) {
t = c;
conversion = c1;
off += 2;
}
} else if (isConversion(c)) {
conversion = c;
++off;
} else {
return 0;
}
if (argSize + flagSize + widthSize + precisionSize + t + conversion != 0) {
if (al != null) {
FormatSpecifier formatSpecifier
= new FormatSpecifier(s, start, argSize, flagSize, widthSize, precisionSize, t, conversion);
al.add(formatSpecifier);
}
return off - start;
}
return 0;
}
private void parseArgument() {
// (\d+\$)?
int i = off;
for (; i < max && isDigit(c = s.charAt(i)); ++i); // empty body
if (i == off || c != '$') {
c = first;
return;
}
i++; // skip '$'
if (i < max) {
c = s.charAt(i);
}
argSize = i - off;
off = i;
}
private void parseFlag() {
// ([-#+ 0,(\<]*)?
int i = off;
for (; i < max && Flags.isFlag(c = s.charAt(i)); ++i); // empty body
flagSize = i - off;
off = i;
}
private void parseWidth() {
// (\d+)?
int i = off;
for (; i < max && isDigit(c = s.charAt(i)); ++i); // empty body
widthSize = i - off;
off = i;
}
private int parsePrecision() {
int i = ++off;
for (; i < max && isDigit(c = s.charAt(i)); ++i); // empty body
if (i != off) {
int size = i - off + 1;
off = i;
return size;
}
return -1;
}
}
static boolean isConversion(char c) {
return (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z') || c == '%';
}
private static boolean isDigit(char c) {
return c >= '0' && c <= '9';
}
interface FormatString {
int index();
void print(Formatter fmt, Object arg, Locale l) throws IOException;
@ -2984,21 +3132,44 @@ public final class Formatter implements Closeable, Flushable {
}
}
FormatSpecifier(String s, Matcher m) {
index(s, m.start(1), m.end(1));
flags(s, m.start(2), m.end(2));
width(s, m.start(3), m.end(3));
precision(s, m.start(4), m.end(4));
FormatSpecifier(
String s,
int i,
int argSize,
int flagSize,
int widthSize,
int precisionSize,
char t,
char conversion
) {
int argEnd = i + argSize;
int flagEnd = argEnd + flagSize;
int widthEnd = flagEnd + widthSize;
int precisionEnd = widthEnd + precisionSize;
int tTStart = m.start(5);
if (tTStart >= 0) {
if (argSize > 0) {
index(s, i, argEnd);
}
if (flagSize > 0) {
flags(s, argEnd, flagEnd);
}
if (widthSize > 0) {
width(s, flagEnd, widthEnd);
}
if (precisionSize > 0) {
precision(s, widthEnd, precisionEnd);
}
if (t != '\0') {
dt = true;
if (s.charAt(tTStart) == 'T') {
if (t == 'T') {
flags = Flags.add(flags, Flags.UPPERCASE);
}
}
conversion(s.charAt(m.start(6)));
conversion(conversion);
check();
}
private void check() {
if (dt)
checkDateTime();
else if (Conversion.isGeneral(c))
@ -4705,6 +4876,13 @@ public final class Formatter implements Closeable, Flushable {
};
}
private static boolean isFlag(char c) {
return switch (c) {
case '-', '#', '+', ' ', '0', ',', '(', '<' -> true;
default -> false;
};
}
// Returns a string representation of the current {@code Flags}.
public static String toString(int f) {
StringBuilder sb = new StringBuilder();

View File

@ -31,6 +31,8 @@
import java.util.FormatProcessor;
import java.util.Objects;
import java.util.Locale;
import java.util.MissingFormatArgumentException;
import java.util.UnknownFormatConversionException;
import static java.util.FormatProcessor.FMT;
@ -50,6 +52,28 @@ public class FormatterBuilder {
}
}
public interface Executable {
void execute() throws Throwable;
}
static <T extends Throwable> void assertThrows(Class<T> expectedType, Executable executable, String message) {
Throwable actualException = null;
try {
executable.execute();
} catch (Throwable e) {
actualException = e;
}
if (actualException == null) {
throw new RuntimeException("Expected " + expectedType + " to be thrown, but nothing was thrown.");
}
if (!expectedType.isInstance(actualException)) {
throw new RuntimeException("Expected " + expectedType + " to be thrown, but was thrown " + actualException.getClass());
}
if (message != null && !message.equals(actualException.getMessage())) {
throw new RuntimeException("Expected " + message + " to be thrown, but was thrown " + actualException.getMessage());
}
}
static void suite(FormatProcessor fmt) {
Object nullObject = null;
test(String.format("%b", false), fmt."%b\{false}");
@ -911,5 +935,27 @@ public class FormatterBuilder {
test(String.format("%-10A", -12345.6), fmt."%-10A\{-12345.6}");
test(String.format("%-10A", 0.0), fmt."%-10A\{0.0}");
test(String.format("%-10A", 12345.6), fmt."%-10A\{12345.6}");
test("aaa%false", fmt."aaa%%%b\{false}");
test("aaa" + System.lineSeparator() + "false", fmt."aaa%n%b\{false}");
assertThrows(
MissingFormatArgumentException.class,
() -> fmt. "%10ba\{ false }",
"Format specifier '%10b is not immediately followed by an embedded expression'");
assertThrows(
MissingFormatArgumentException.class,
() ->fmt. "%ba\{ false }",
"Format specifier '%b is not immediately followed by an embedded expression'");
assertThrows(
MissingFormatArgumentException.class,
() ->fmt. "%b",
"Format specifier '%b is not immediately followed by an embedded expression'");
assertThrows(
UnknownFormatConversionException.class,
() ->fmt. "%0",
"Conversion = '0'");
}
}

View File

@ -24,6 +24,7 @@
import java.io.*;
import java.util.Formatter;
import java.util.Locale;
import java.util.UnknownFormatConversionException;
public class Basic {
@ -168,6 +169,8 @@ public class Basic {
}
public static void main(String[] args) {
common();
BasicBoolean.test();
BasicBooleanObject.test();
BasicByte.test();
@ -197,4 +200,12 @@ public class Basic {
System.out.printf("All %d tests passed", pass);
}
}
private static void common() {
// non-conversion
tryCatch("%12", UnknownFormatConversionException.class);
tryCatch("% ", UnknownFormatConversionException.class);
tryCatch("%,", UnknownFormatConversionException.class);
tryCatch("%03.2", UnknownFormatConversionException.class);
}
}

View File

@ -32,6 +32,7 @@ import org.openjdk.jmh.annotations.Scope;
import org.openjdk.jmh.annotations.State;
import org.openjdk.jmh.annotations.Warmup;
import java.math.BigDecimal;
import java.util.concurrent.TimeUnit;
/*
@ -47,6 +48,12 @@ public class StringFormat {
public String s = "str";
public int i = 17;
public static final BigDecimal pi = new BigDecimal(Math.PI);
@Benchmark
public String decimalFormat() {
return "%010.3f".formatted(pi);
}
@Benchmark
public String stringFormat() {