8316681: Rewrite URLEncoder.encode to use small reusable buffers
Reviewed-by: dfuchs, rriggs
This commit is contained in:
parent
bd2439f3fc
commit
c24c66db97
@ -26,8 +26,13 @@
|
||||
package java.net;
|
||||
|
||||
import java.io.UnsupportedEncodingException;
|
||||
import java.io.CharArrayWriter;
|
||||
import java.nio.ByteBuffer;
|
||||
import java.nio.CharBuffer;
|
||||
import java.nio.charset.CharacterCodingException;
|
||||
import java.nio.charset.Charset;
|
||||
import java.nio.charset.CharsetEncoder;
|
||||
import java.nio.charset.CoderResult;
|
||||
import java.nio.charset.CodingErrorAction;
|
||||
import java.nio.charset.IllegalCharsetNameException;
|
||||
import java.nio.charset.UnsupportedCharsetException ;
|
||||
import java.util.BitSet;
|
||||
@ -138,11 +143,6 @@ public class URLEncoder {
|
||||
DEFAULT_ENCODING_NAME = StaticProperty.fileEncoding();
|
||||
}
|
||||
|
||||
private static void encodeByte(StringBuilder out, byte b) {
|
||||
out.append('%');
|
||||
HexFormat.of().withUpperCase().toHexDigits(out, b);
|
||||
}
|
||||
|
||||
/**
|
||||
* You can't call the constructor.
|
||||
*/
|
||||
@ -205,6 +205,8 @@ public class URLEncoder {
|
||||
}
|
||||
}
|
||||
|
||||
private static final int ENCODING_CHUNK_SIZE = 8;
|
||||
|
||||
/**
|
||||
* Translates a string into {@code application/x-www-form-urlencoded}
|
||||
* format using a specific {@linkplain Charset Charset}.
|
||||
@ -239,11 +241,16 @@ public class URLEncoder {
|
||||
}
|
||||
|
||||
StringBuilder out = new StringBuilder(s.length() << 1);
|
||||
CharArrayWriter charArrayWriter = new CharArrayWriter();
|
||||
if (i > 0) {
|
||||
out.append(s, 0, i);
|
||||
}
|
||||
|
||||
CharsetEncoder ce = charset.newEncoder()
|
||||
.onMalformedInput(CodingErrorAction.REPLACE)
|
||||
.onUnmappableCharacter(CodingErrorAction.REPLACE);
|
||||
CharBuffer cb = CharBuffer.allocate(ENCODING_CHUNK_SIZE);
|
||||
ByteBuffer bb = ByteBuffer.allocate((int)(ENCODING_CHUNK_SIZE * ce.maxBytesPerChar()));
|
||||
|
||||
while (i < s.length()) {
|
||||
char c = s.charAt(i);
|
||||
if (DONT_NEED_ENCODING.test(c)) {
|
||||
@ -255,7 +262,7 @@ public class URLEncoder {
|
||||
} else {
|
||||
// convert to external encoding before hex conversion
|
||||
do {
|
||||
charArrayWriter.write(c);
|
||||
cb.put(c);
|
||||
/*
|
||||
* If this character represents the start of a Unicode
|
||||
* surrogate pair, then pass in two characters. It's not
|
||||
@ -268,23 +275,63 @@ public class URLEncoder {
|
||||
if ((i + 1) < s.length()) {
|
||||
char d = s.charAt(i + 1);
|
||||
if (Character.isLowSurrogate(d)) {
|
||||
charArrayWriter.write(d);
|
||||
cb.put(d);
|
||||
i++;
|
||||
}
|
||||
}
|
||||
}
|
||||
// Limit to ENCODING_CHUNK_SIZE - 1 so that we can always fit in
|
||||
// a surrogate pair on the next iteration
|
||||
if (cb.position() >= ENCODING_CHUNK_SIZE - 1) {
|
||||
flushToStringBuilder(out, ce, cb, bb, false);
|
||||
}
|
||||
i++;
|
||||
} while (i < s.length() && !DONT_NEED_ENCODING.test((c = s.charAt(i))));
|
||||
|
||||
String str = charArrayWriter.toString();
|
||||
byte[] ba = str.getBytes(charset);
|
||||
for (byte b : ba) {
|
||||
encodeByte(out, b);
|
||||
}
|
||||
charArrayWriter.reset();
|
||||
flushToStringBuilder(out, ce, cb, bb, true);
|
||||
}
|
||||
}
|
||||
|
||||
return out.toString();
|
||||
}
|
||||
|
||||
/**
|
||||
* Encodes input chars in {@code cb} and appends the byte values in an escaped
|
||||
* format ({@code "%XX"}) to {@code out}. The temporary byte buffer, {@code bb},
|
||||
* must be able to accept {@code cb.position() * ce.maxBytesPerChar()} bytes.
|
||||
*
|
||||
* @param out the StringBuilder to output encoded and escaped bytes to
|
||||
* @param ce charset encoder. Will be reset if endOfInput is true
|
||||
* @param cb input buffer, will be cleared
|
||||
* @param bb output buffer, will be cleared
|
||||
* @param endOfInput true if this is the last flush for an encoding chunk,
|
||||
* to all bytes in ce is flushed to out and reset
|
||||
*/
|
||||
private static void flushToStringBuilder(StringBuilder out,
|
||||
CharsetEncoder ce,
|
||||
CharBuffer cb,
|
||||
ByteBuffer bb,
|
||||
boolean endOfInput) {
|
||||
cb.flip();
|
||||
try {
|
||||
CoderResult cr = ce.encode(cb, bb, endOfInput);
|
||||
if (!cr.isUnderflow())
|
||||
cr.throwException();
|
||||
if (endOfInput) {
|
||||
cr = ce.flush(bb);
|
||||
if (!cr.isUnderflow())
|
||||
cr.throwException();
|
||||
ce.reset();
|
||||
}
|
||||
} catch (CharacterCodingException x) {
|
||||
throw new Error(x); // Can't happen
|
||||
}
|
||||
HexFormat hex = HexFormat.of().withUpperCase();
|
||||
byte[] bytes = bb.array();
|
||||
int len = bb.position();
|
||||
for (int i = 0; i < len; i++) {
|
||||
out.append('%');
|
||||
hex.toHexDigits(out, bytes[i]);
|
||||
}
|
||||
cb.clear();
|
||||
bb.clear();
|
||||
}
|
||||
}
|
||||
|
@ -679,9 +679,8 @@ public final class HexFormat {
|
||||
* @throws UncheckedIOException if an I/O exception occurs appending to the output
|
||||
*/
|
||||
public <A extends Appendable> A toHexDigits(A out, byte value) {
|
||||
Objects.requireNonNull(out, "out");
|
||||
try {
|
||||
out.append(toHighHexDigit(value));
|
||||
out.append(toHighHexDigit(value)); // implicit null-check
|
||||
out.append(toLowHexDigit(value));
|
||||
return out;
|
||||
} catch (IOException ioe) {
|
||||
|
@ -58,6 +58,9 @@ public class SurrogatePairs {
|
||||
{"\uDBFF\uDC001", "%F4%8F%B0%801"},
|
||||
{"\uDBFF\uDC00@", "%F4%8F%B0%80%40"},
|
||||
{"\u0101\uDBFF\uDC00", "%C4%81%F4%8F%B0%80"},
|
||||
{"\u0101\u0101\u0101\u0101\u0101\u0101\uDBFF\uDC00\u0101", "%C4%81%C4%81%C4%81%C4%81%C4%81%C4%81%F4%8F%B0%80%C4%81"},
|
||||
{"\u0101\u0101\u0101\u0101\u0101\u0101\u0101\uDBFF\uDC00\u0101", "%C4%81%C4%81%C4%81%C4%81%C4%81%C4%81%C4%81%F4%8F%B0%80%C4%81"},
|
||||
{"\u0101\u0101\u0101\u0101\u0101\u0101\u0101\u0101\uDBFF\uDC00\u0101", "%C4%81%C4%81%C4%81%C4%81%C4%81%C4%81%C4%81%C4%81%F4%8F%B0%80%C4%81"},
|
||||
{"\uDBFF\uDC00\u0101", "%F4%8F%B0%80%C4%81"},
|
||||
{"\uDE0A\uD83D", "%3F%3F"},
|
||||
{"1\uDE0A\uD83D", "1%3F%3F"},
|
||||
|
@ -190,4 +190,18 @@ public class URLEncodeDecode {
|
||||
}
|
||||
|
||||
|
||||
@Benchmark
|
||||
public void testEncodeLatin1(Blackhole bh) throws UnsupportedEncodingException {
|
||||
for (String s : testStringsEncode) {
|
||||
bh.consume(java.net.URLEncoder.encode(s, StandardCharsets.ISO_8859_1));
|
||||
}
|
||||
}
|
||||
|
||||
@Benchmark
|
||||
public void testDecodeLatin1(Blackhole bh) throws UnsupportedEncodingException {
|
||||
for (String s : testStringsDecode) {
|
||||
bh.consume(URLDecoder.decode(s, StandardCharsets.ISO_8859_1));
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user