8316681: Rewrite URLEncoder.encode to use small reusable buffers

Reviewed-by: dfuchs, rriggs
This commit is contained in:
Claes Redestad 2023-09-22 09:41:01 +00:00
parent bd2439f3fc
commit c24c66db97
4 changed files with 82 additions and 19 deletions

View File

@ -26,8 +26,13 @@
package java.net;
import java.io.UnsupportedEncodingException;
import java.io.CharArrayWriter;
import java.nio.ByteBuffer;
import java.nio.CharBuffer;
import java.nio.charset.CharacterCodingException;
import java.nio.charset.Charset;
import java.nio.charset.CharsetEncoder;
import java.nio.charset.CoderResult;
import java.nio.charset.CodingErrorAction;
import java.nio.charset.IllegalCharsetNameException;
import java.nio.charset.UnsupportedCharsetException ;
import java.util.BitSet;
@ -138,11 +143,6 @@ public class URLEncoder {
DEFAULT_ENCODING_NAME = StaticProperty.fileEncoding();
}
private static void encodeByte(StringBuilder out, byte b) {
out.append('%');
HexFormat.of().withUpperCase().toHexDigits(out, b);
}
/**
* You can't call the constructor.
*/
@ -205,6 +205,8 @@ public class URLEncoder {
}
}
private static final int ENCODING_CHUNK_SIZE = 8;
/**
* Translates a string into {@code application/x-www-form-urlencoded}
* format using a specific {@linkplain Charset Charset}.
@ -239,11 +241,16 @@ public class URLEncoder {
}
StringBuilder out = new StringBuilder(s.length() << 1);
CharArrayWriter charArrayWriter = new CharArrayWriter();
if (i > 0) {
out.append(s, 0, i);
}
CharsetEncoder ce = charset.newEncoder()
.onMalformedInput(CodingErrorAction.REPLACE)
.onUnmappableCharacter(CodingErrorAction.REPLACE);
CharBuffer cb = CharBuffer.allocate(ENCODING_CHUNK_SIZE);
ByteBuffer bb = ByteBuffer.allocate((int)(ENCODING_CHUNK_SIZE * ce.maxBytesPerChar()));
while (i < s.length()) {
char c = s.charAt(i);
if (DONT_NEED_ENCODING.test(c)) {
@ -255,7 +262,7 @@ public class URLEncoder {
} else {
// convert to external encoding before hex conversion
do {
charArrayWriter.write(c);
cb.put(c);
/*
* If this character represents the start of a Unicode
* surrogate pair, then pass in two characters. It's not
@ -268,23 +275,63 @@ public class URLEncoder {
if ((i + 1) < s.length()) {
char d = s.charAt(i + 1);
if (Character.isLowSurrogate(d)) {
charArrayWriter.write(d);
cb.put(d);
i++;
}
}
}
// Limit to ENCODING_CHUNK_SIZE - 1 so that we can always fit in
// a surrogate pair on the next iteration
if (cb.position() >= ENCODING_CHUNK_SIZE - 1) {
flushToStringBuilder(out, ce, cb, bb, false);
}
i++;
} while (i < s.length() && !DONT_NEED_ENCODING.test((c = s.charAt(i))));
String str = charArrayWriter.toString();
byte[] ba = str.getBytes(charset);
for (byte b : ba) {
encodeByte(out, b);
}
charArrayWriter.reset();
flushToStringBuilder(out, ce, cb, bb, true);
}
}
return out.toString();
}
/**
* Encodes input chars in {@code cb} and appends the byte values in an escaped
* format ({@code "%XX"}) to {@code out}. The temporary byte buffer, {@code bb},
* must be able to accept {@code cb.position() * ce.maxBytesPerChar()} bytes.
*
* @param out the StringBuilder to output encoded and escaped bytes to
* @param ce charset encoder. Will be reset if endOfInput is true
* @param cb input buffer, will be cleared
* @param bb output buffer, will be cleared
* @param endOfInput true if this is the last flush for an encoding chunk,
* to all bytes in ce is flushed to out and reset
*/
private static void flushToStringBuilder(StringBuilder out,
CharsetEncoder ce,
CharBuffer cb,
ByteBuffer bb,
boolean endOfInput) {
cb.flip();
try {
CoderResult cr = ce.encode(cb, bb, endOfInput);
if (!cr.isUnderflow())
cr.throwException();
if (endOfInput) {
cr = ce.flush(bb);
if (!cr.isUnderflow())
cr.throwException();
ce.reset();
}
} catch (CharacterCodingException x) {
throw new Error(x); // Can't happen
}
HexFormat hex = HexFormat.of().withUpperCase();
byte[] bytes = bb.array();
int len = bb.position();
for (int i = 0; i < len; i++) {
out.append('%');
hex.toHexDigits(out, bytes[i]);
}
cb.clear();
bb.clear();
}
}

View File

@ -679,9 +679,8 @@ public final class HexFormat {
* @throws UncheckedIOException if an I/O exception occurs appending to the output
*/
public <A extends Appendable> A toHexDigits(A out, byte value) {
Objects.requireNonNull(out, "out");
try {
out.append(toHighHexDigit(value));
out.append(toHighHexDigit(value)); // implicit null-check
out.append(toLowHexDigit(value));
return out;
} catch (IOException ioe) {

View File

@ -58,6 +58,9 @@ public class SurrogatePairs {
{"\uDBFF\uDC001", "%F4%8F%B0%801"},
{"\uDBFF\uDC00@", "%F4%8F%B0%80%40"},
{"\u0101\uDBFF\uDC00", "%C4%81%F4%8F%B0%80"},
{"\u0101\u0101\u0101\u0101\u0101\u0101\uDBFF\uDC00\u0101", "%C4%81%C4%81%C4%81%C4%81%C4%81%C4%81%F4%8F%B0%80%C4%81"},
{"\u0101\u0101\u0101\u0101\u0101\u0101\u0101\uDBFF\uDC00\u0101", "%C4%81%C4%81%C4%81%C4%81%C4%81%C4%81%C4%81%F4%8F%B0%80%C4%81"},
{"\u0101\u0101\u0101\u0101\u0101\u0101\u0101\u0101\uDBFF\uDC00\u0101", "%C4%81%C4%81%C4%81%C4%81%C4%81%C4%81%C4%81%C4%81%F4%8F%B0%80%C4%81"},
{"\uDBFF\uDC00\u0101", "%F4%8F%B0%80%C4%81"},
{"\uDE0A\uD83D", "%3F%3F"},
{"1\uDE0A\uD83D", "1%3F%3F"},

View File

@ -190,4 +190,18 @@ public class URLEncodeDecode {
}
@Benchmark
public void testEncodeLatin1(Blackhole bh) throws UnsupportedEncodingException {
for (String s : testStringsEncode) {
bh.consume(java.net.URLEncoder.encode(s, StandardCharsets.ISO_8859_1));
}
}
@Benchmark
public void testDecodeLatin1(Blackhole bh) throws UnsupportedEncodingException {
for (String s : testStringsDecode) {
bh.consume(URLDecoder.decode(s, StandardCharsets.ISO_8859_1));
}
}
}