8316704: Regex-free parsing of Formatter and FormatProcessor specifiers
Reviewed-by: redestad, rgiulietti
This commit is contained in:
parent
51d7169bd9
commit
50b17d9846
@ -1,5 +1,6 @@
|
||||
/*
|
||||
* Copyright (c) 2023, Oracle and/or its affiliates. All rights reserved.
|
||||
* Copyright (c) 2023, Alibaba Group Holding Limited. All Rights Reserved.
|
||||
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
||||
*
|
||||
* This code is free software; you can redistribute it and/or modify it
|
||||
@ -30,7 +31,6 @@ import java.lang.invoke.MethodHandles;
|
||||
import java.lang.invoke.MethodType;
|
||||
import java.lang.StringTemplate.Processor;
|
||||
import java.lang.StringTemplate.Processor.Linkage;
|
||||
import java.util.regex.Matcher;
|
||||
|
||||
import jdk.internal.javac.PreviewFeature;
|
||||
|
||||
@ -218,22 +218,35 @@ public final class FormatProcessor implements Processor<String, RuntimeException
|
||||
* @throws MissingFormatArgumentException if not at end or found and not needed
|
||||
*/
|
||||
private static boolean findFormat(String fragment, boolean needed) {
|
||||
Matcher matcher = Formatter.FORMAT_SPECIFIER_PATTERN.matcher(fragment);
|
||||
String group;
|
||||
|
||||
while (matcher.find()) {
|
||||
group = matcher.group();
|
||||
|
||||
if (!group.equals("%%") && !group.equals("%n")) {
|
||||
if (matcher.end() == fragment.length() && needed) {
|
||||
return true;
|
||||
}
|
||||
|
||||
throw new MissingFormatArgumentException(group +
|
||||
" is not immediately followed by an embedded expression");
|
||||
int max = fragment.length();
|
||||
for (int i = 0; i < max;) {
|
||||
int n = fragment.indexOf('%', i);
|
||||
if (n < 0) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
i = n + 1;
|
||||
if (i >= max) {
|
||||
return false;
|
||||
}
|
||||
|
||||
char c = fragment.charAt(i);
|
||||
if (c == '%' || c == 'n') {
|
||||
i++;
|
||||
continue;
|
||||
}
|
||||
int off = new Formatter.FormatSpecifierParser(null, c, i, fragment, max)
|
||||
.parse();
|
||||
if (off == 0) {
|
||||
return false;
|
||||
}
|
||||
if (i + off == max && needed) {
|
||||
return true;
|
||||
}
|
||||
throw new MissingFormatArgumentException(
|
||||
fragment.substring(i - 1, i + off)
|
||||
+ " is not immediately followed by an embedded expression");
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
|
@ -1,5 +1,6 @@
|
||||
/*
|
||||
* Copyright (c) 2003, 2023, Oracle and/or its affiliates. All rights reserved.
|
||||
* Copyright (c) 2023, Alibaba Group Holding Limited. All Rights Reserved.
|
||||
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
||||
*
|
||||
* This code is free software; you can redistribute it and/or modify it
|
||||
@ -49,8 +50,6 @@ import java.text.DecimalFormat;
|
||||
import java.text.DecimalFormatSymbols;
|
||||
import java.text.NumberFormat;
|
||||
import java.text.spi.NumberFormatProvider;
|
||||
import java.util.regex.Matcher;
|
||||
import java.util.regex.Pattern;
|
||||
|
||||
import java.time.DateTimeException;
|
||||
import java.time.Instant;
|
||||
@ -2810,20 +2809,14 @@ public final class Formatter implements Closeable, Flushable {
|
||||
return this;
|
||||
}
|
||||
|
||||
// %[argument_index$][flags][width][.precision][t]conversion
|
||||
static final String FORMAT_SPECIFIER
|
||||
= "%(\\d+\\$)?([-#+ 0,(\\<]*)?(\\d+)?(\\.\\d+)?([tT])?([a-zA-Z%])";
|
||||
|
||||
static final Pattern FORMAT_SPECIFIER_PATTERN = Pattern.compile(FORMAT_SPECIFIER);
|
||||
|
||||
/**
|
||||
* Finds format specifiers in the format string.
|
||||
*/
|
||||
static List<FormatString> parse(String s) {
|
||||
FormatSpecifierParser parser = null;
|
||||
ArrayList<FormatString> al = new ArrayList<>();
|
||||
int i = 0;
|
||||
int max = s.length();
|
||||
Matcher m = null; // create if needed
|
||||
while (i < max) {
|
||||
int n = s.indexOf('%', i);
|
||||
if (n < 0) {
|
||||
@ -2846,14 +2839,16 @@ public final class Formatter implements Closeable, Flushable {
|
||||
al.add(new FormatSpecifier(c));
|
||||
i++;
|
||||
} else {
|
||||
if (m == null) {
|
||||
m = FORMAT_SPECIFIER_PATTERN.matcher(s);
|
||||
}
|
||||
// We have already parsed a '%' at n, so we either have a
|
||||
// match or the specifier at n is invalid
|
||||
if (m.find(n) && m.start() == n) {
|
||||
al.add(new FormatSpecifier(s, m));
|
||||
i = m.end();
|
||||
if (parser == null) {
|
||||
parser = new FormatSpecifierParser(al, c, i, s, max);
|
||||
} else {
|
||||
parser.reset(c, i);
|
||||
}
|
||||
int off = parser.parse();
|
||||
if (off > 0) {
|
||||
i += off;
|
||||
} else {
|
||||
throw new UnknownFormatConversionException(String.valueOf(c));
|
||||
}
|
||||
@ -2862,6 +2857,159 @@ public final class Formatter implements Closeable, Flushable {
|
||||
return al;
|
||||
}
|
||||
|
||||
static final class FormatSpecifierParser {
|
||||
final ArrayList<FormatString> al;
|
||||
final String s;
|
||||
final int max;
|
||||
char first;
|
||||
int start;
|
||||
int off;
|
||||
char c;
|
||||
int argSize;
|
||||
int flagSize;
|
||||
int widthSize;
|
||||
|
||||
FormatSpecifierParser(ArrayList<FormatString> al, char first, int start, String s, int max) {
|
||||
this.al = al;
|
||||
|
||||
this.first = first;
|
||||
this.c = first;
|
||||
this.start = start;
|
||||
this.off = start;
|
||||
|
||||
this.s = s;
|
||||
this.max = max;
|
||||
}
|
||||
|
||||
void reset(char first, int start) {
|
||||
this.first = first;
|
||||
this.c = first;
|
||||
this.start = start;
|
||||
this.off = start;
|
||||
|
||||
argSize = 0;
|
||||
flagSize = 0;
|
||||
widthSize = 0;
|
||||
}
|
||||
|
||||
/**
|
||||
* If a valid format specifier is found, construct a FormatString and add it to {@link #al}.
|
||||
* The format specifiers for general, character, and numeric types have
|
||||
* the following syntax:
|
||||
*
|
||||
* <blockquote><pre>
|
||||
* %[argument_index$][flags][width][.precision]conversion
|
||||
* </pre></blockquote>
|
||||
*
|
||||
* As described by the following regular expression:
|
||||
*
|
||||
* <blockquote><pre>
|
||||
* %(\d+\$)?([-#+ 0,(\<]*)?(\d+)?(\.\d+)?([tT])?([a-zA-Z%])
|
||||
* </pre></blockquote>
|
||||
*
|
||||
* @return the length of the format specifier. If no valid format specifier is found, 0 is returned.
|
||||
*/
|
||||
int parse() {
|
||||
int precisionSize = 0;
|
||||
|
||||
// (\d+\$)?
|
||||
parseArgument();
|
||||
|
||||
// ([-#+ 0,(\<]*)?
|
||||
parseFlag();
|
||||
|
||||
// (\d+)?
|
||||
parseWidth();
|
||||
|
||||
if (c == '.') {
|
||||
// (\.\d+)?
|
||||
precisionSize = parsePrecision();
|
||||
if (precisionSize == -1) {
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
|
||||
// ([tT])?([a-zA-Z%])
|
||||
char t = '\0', conversion = '\0';
|
||||
if ((c == 't' || c == 'T') && off + 1 < max) {
|
||||
char c1 = s.charAt(off + 1);
|
||||
if (isConversion(c1)) {
|
||||
t = c;
|
||||
conversion = c1;
|
||||
off += 2;
|
||||
}
|
||||
} else if (isConversion(c)) {
|
||||
conversion = c;
|
||||
++off;
|
||||
} else {
|
||||
return 0;
|
||||
}
|
||||
|
||||
if (argSize + flagSize + widthSize + precisionSize + t + conversion != 0) {
|
||||
if (al != null) {
|
||||
FormatSpecifier formatSpecifier
|
||||
= new FormatSpecifier(s, start, argSize, flagSize, widthSize, precisionSize, t, conversion);
|
||||
al.add(formatSpecifier);
|
||||
}
|
||||
return off - start;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
private void parseArgument() {
|
||||
// (\d+\$)?
|
||||
int i = off;
|
||||
for (; i < max && isDigit(c = s.charAt(i)); ++i); // empty body
|
||||
if (i == off || c != '$') {
|
||||
c = first;
|
||||
return;
|
||||
}
|
||||
|
||||
i++; // skip '$'
|
||||
if (i < max) {
|
||||
c = s.charAt(i);
|
||||
}
|
||||
|
||||
argSize = i - off;
|
||||
off = i;
|
||||
}
|
||||
|
||||
private void parseFlag() {
|
||||
// ([-#+ 0,(\<]*)?
|
||||
int i = off;
|
||||
for (; i < max && Flags.isFlag(c = s.charAt(i)); ++i); // empty body
|
||||
flagSize = i - off;
|
||||
off = i;
|
||||
}
|
||||
|
||||
private void parseWidth() {
|
||||
// (\d+)?
|
||||
int i = off;
|
||||
for (; i < max && isDigit(c = s.charAt(i)); ++i); // empty body
|
||||
widthSize = i - off;
|
||||
off = i;
|
||||
}
|
||||
|
||||
private int parsePrecision() {
|
||||
int i = ++off;
|
||||
for (; i < max && isDigit(c = s.charAt(i)); ++i); // empty body
|
||||
if (i != off) {
|
||||
int size = i - off + 1;
|
||||
off = i;
|
||||
return size;
|
||||
}
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
static boolean isConversion(char c) {
|
||||
return (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z') || c == '%';
|
||||
}
|
||||
|
||||
private static boolean isDigit(char c) {
|
||||
return c >= '0' && c <= '9';
|
||||
}
|
||||
|
||||
interface FormatString {
|
||||
int index();
|
||||
void print(Formatter fmt, Object arg, Locale l) throws IOException;
|
||||
@ -2984,21 +3132,44 @@ public final class Formatter implements Closeable, Flushable {
|
||||
}
|
||||
}
|
||||
|
||||
FormatSpecifier(String s, Matcher m) {
|
||||
index(s, m.start(1), m.end(1));
|
||||
flags(s, m.start(2), m.end(2));
|
||||
width(s, m.start(3), m.end(3));
|
||||
precision(s, m.start(4), m.end(4));
|
||||
FormatSpecifier(
|
||||
String s,
|
||||
int i,
|
||||
int argSize,
|
||||
int flagSize,
|
||||
int widthSize,
|
||||
int precisionSize,
|
||||
char t,
|
||||
char conversion
|
||||
) {
|
||||
int argEnd = i + argSize;
|
||||
int flagEnd = argEnd + flagSize;
|
||||
int widthEnd = flagEnd + widthSize;
|
||||
int precisionEnd = widthEnd + precisionSize;
|
||||
|
||||
int tTStart = m.start(5);
|
||||
if (tTStart >= 0) {
|
||||
if (argSize > 0) {
|
||||
index(s, i, argEnd);
|
||||
}
|
||||
if (flagSize > 0) {
|
||||
flags(s, argEnd, flagEnd);
|
||||
}
|
||||
if (widthSize > 0) {
|
||||
width(s, flagEnd, widthEnd);
|
||||
}
|
||||
if (precisionSize > 0) {
|
||||
precision(s, widthEnd, precisionEnd);
|
||||
}
|
||||
if (t != '\0') {
|
||||
dt = true;
|
||||
if (s.charAt(tTStart) == 'T') {
|
||||
if (t == 'T') {
|
||||
flags = Flags.add(flags, Flags.UPPERCASE);
|
||||
}
|
||||
}
|
||||
conversion(s.charAt(m.start(6)));
|
||||
conversion(conversion);
|
||||
check();
|
||||
}
|
||||
|
||||
private void check() {
|
||||
if (dt)
|
||||
checkDateTime();
|
||||
else if (Conversion.isGeneral(c))
|
||||
@ -4705,6 +4876,13 @@ public final class Formatter implements Closeable, Flushable {
|
||||
};
|
||||
}
|
||||
|
||||
private static boolean isFlag(char c) {
|
||||
return switch (c) {
|
||||
case '-', '#', '+', ' ', '0', ',', '(', '<' -> true;
|
||||
default -> false;
|
||||
};
|
||||
}
|
||||
|
||||
// Returns a string representation of the current {@code Flags}.
|
||||
public static String toString(int f) {
|
||||
StringBuilder sb = new StringBuilder();
|
||||
|
@ -31,6 +31,8 @@
|
||||
import java.util.FormatProcessor;
|
||||
import java.util.Objects;
|
||||
import java.util.Locale;
|
||||
import java.util.MissingFormatArgumentException;
|
||||
import java.util.UnknownFormatConversionException;
|
||||
|
||||
import static java.util.FormatProcessor.FMT;
|
||||
|
||||
@ -50,6 +52,28 @@ public class FormatterBuilder {
|
||||
}
|
||||
}
|
||||
|
||||
public interface Executable {
|
||||
void execute() throws Throwable;
|
||||
}
|
||||
|
||||
static <T extends Throwable> void assertThrows(Class<T> expectedType, Executable executable, String message) {
|
||||
Throwable actualException = null;
|
||||
try {
|
||||
executable.execute();
|
||||
} catch (Throwable e) {
|
||||
actualException = e;
|
||||
}
|
||||
if (actualException == null) {
|
||||
throw new RuntimeException("Expected " + expectedType + " to be thrown, but nothing was thrown.");
|
||||
}
|
||||
if (!expectedType.isInstance(actualException)) {
|
||||
throw new RuntimeException("Expected " + expectedType + " to be thrown, but was thrown " + actualException.getClass());
|
||||
}
|
||||
if (message != null && !message.equals(actualException.getMessage())) {
|
||||
throw new RuntimeException("Expected " + message + " to be thrown, but was thrown " + actualException.getMessage());
|
||||
}
|
||||
}
|
||||
|
||||
static void suite(FormatProcessor fmt) {
|
||||
Object nullObject = null;
|
||||
test(String.format("%b", false), fmt."%b\{false}");
|
||||
@ -911,5 +935,27 @@ public class FormatterBuilder {
|
||||
test(String.format("%-10A", -12345.6), fmt."%-10A\{-12345.6}");
|
||||
test(String.format("%-10A", 0.0), fmt."%-10A\{0.0}");
|
||||
test(String.format("%-10A", 12345.6), fmt."%-10A\{12345.6}");
|
||||
|
||||
test("aaa%false", fmt."aaa%%%b\{false}");
|
||||
test("aaa" + System.lineSeparator() + "false", fmt."aaa%n%b\{false}");
|
||||
|
||||
assertThrows(
|
||||
MissingFormatArgumentException.class,
|
||||
() -> fmt. "%10ba\{ false }",
|
||||
"Format specifier '%10b is not immediately followed by an embedded expression'");
|
||||
|
||||
assertThrows(
|
||||
MissingFormatArgumentException.class,
|
||||
() ->fmt. "%ba\{ false }",
|
||||
"Format specifier '%b is not immediately followed by an embedded expression'");
|
||||
|
||||
assertThrows(
|
||||
MissingFormatArgumentException.class,
|
||||
() ->fmt. "%b",
|
||||
"Format specifier '%b is not immediately followed by an embedded expression'");
|
||||
assertThrows(
|
||||
UnknownFormatConversionException.class,
|
||||
() ->fmt. "%0",
|
||||
"Conversion = '0'");
|
||||
}
|
||||
}
|
||||
|
@ -24,6 +24,7 @@
|
||||
import java.io.*;
|
||||
import java.util.Formatter;
|
||||
import java.util.Locale;
|
||||
import java.util.UnknownFormatConversionException;
|
||||
|
||||
public class Basic {
|
||||
|
||||
@ -168,6 +169,8 @@ public class Basic {
|
||||
}
|
||||
|
||||
public static void main(String[] args) {
|
||||
common();
|
||||
|
||||
BasicBoolean.test();
|
||||
BasicBooleanObject.test();
|
||||
BasicByte.test();
|
||||
@ -197,4 +200,12 @@ public class Basic {
|
||||
System.out.printf("All %d tests passed", pass);
|
||||
}
|
||||
}
|
||||
|
||||
private static void common() {
|
||||
// non-conversion
|
||||
tryCatch("%12", UnknownFormatConversionException.class);
|
||||
tryCatch("% ", UnknownFormatConversionException.class);
|
||||
tryCatch("%,", UnknownFormatConversionException.class);
|
||||
tryCatch("%03.2", UnknownFormatConversionException.class);
|
||||
}
|
||||
}
|
||||
|
@ -32,6 +32,7 @@ import org.openjdk.jmh.annotations.Scope;
|
||||
import org.openjdk.jmh.annotations.State;
|
||||
import org.openjdk.jmh.annotations.Warmup;
|
||||
|
||||
import java.math.BigDecimal;
|
||||
import java.util.concurrent.TimeUnit;
|
||||
|
||||
/*
|
||||
@ -47,6 +48,12 @@ public class StringFormat {
|
||||
|
||||
public String s = "str";
|
||||
public int i = 17;
|
||||
public static final BigDecimal pi = new BigDecimal(Math.PI);
|
||||
|
||||
@Benchmark
|
||||
public String decimalFormat() {
|
||||
return "%010.3f".formatted(pi);
|
||||
}
|
||||
|
||||
@Benchmark
|
||||
public String stringFormat() {
|
||||
|
Loading…
x
Reference in New Issue
Block a user