From c24c66db97a52371875a63862f85ea5c2010d5a7 Mon Sep 17 00:00:00 2001 From: Claes Redestad Date: Fri, 22 Sep 2023 09:41:01 +0000 Subject: [PATCH] 8316681: Rewrite URLEncoder.encode to use small reusable buffers Reviewed-by: dfuchs, rriggs --- .../share/classes/java/net/URLEncoder.java | 81 +++++++++++++++---- .../share/classes/java/util/HexFormat.java | 3 +- .../java/net/URLEncoder/SurrogatePairs.java | 3 + .../bench/java/net/URLEncodeDecode.java | 14 ++++ 4 files changed, 82 insertions(+), 19 deletions(-) diff --git a/src/java.base/share/classes/java/net/URLEncoder.java b/src/java.base/share/classes/java/net/URLEncoder.java index 2f4c855af8d..46b5ee37605 100644 --- a/src/java.base/share/classes/java/net/URLEncoder.java +++ b/src/java.base/share/classes/java/net/URLEncoder.java @@ -26,8 +26,13 @@ package java.net; import java.io.UnsupportedEncodingException; -import java.io.CharArrayWriter; +import java.nio.ByteBuffer; +import java.nio.CharBuffer; +import java.nio.charset.CharacterCodingException; import java.nio.charset.Charset; +import java.nio.charset.CharsetEncoder; +import java.nio.charset.CoderResult; +import java.nio.charset.CodingErrorAction; import java.nio.charset.IllegalCharsetNameException; import java.nio.charset.UnsupportedCharsetException ; import java.util.BitSet; @@ -138,11 +143,6 @@ public class URLEncoder { DEFAULT_ENCODING_NAME = StaticProperty.fileEncoding(); } - private static void encodeByte(StringBuilder out, byte b) { - out.append('%'); - HexFormat.of().withUpperCase().toHexDigits(out, b); - } - /** * You can't call the constructor. */ @@ -205,6 +205,8 @@ public class URLEncoder { } } + private static final int ENCODING_CHUNK_SIZE = 8; + /** * Translates a string into {@code application/x-www-form-urlencoded} * format using a specific {@linkplain Charset Charset}. @@ -239,11 +241,16 @@ public class URLEncoder { } StringBuilder out = new StringBuilder(s.length() << 1); - CharArrayWriter charArrayWriter = new CharArrayWriter(); if (i > 0) { out.append(s, 0, i); } + CharsetEncoder ce = charset.newEncoder() + .onMalformedInput(CodingErrorAction.REPLACE) + .onUnmappableCharacter(CodingErrorAction.REPLACE); + CharBuffer cb = CharBuffer.allocate(ENCODING_CHUNK_SIZE); + ByteBuffer bb = ByteBuffer.allocate((int)(ENCODING_CHUNK_SIZE * ce.maxBytesPerChar())); + while (i < s.length()) { char c = s.charAt(i); if (DONT_NEED_ENCODING.test(c)) { @@ -255,7 +262,7 @@ public class URLEncoder { } else { // convert to external encoding before hex conversion do { - charArrayWriter.write(c); + cb.put(c); /* * If this character represents the start of a Unicode * surrogate pair, then pass in two characters. It's not @@ -268,23 +275,63 @@ public class URLEncoder { if ((i + 1) < s.length()) { char d = s.charAt(i + 1); if (Character.isLowSurrogate(d)) { - charArrayWriter.write(d); + cb.put(d); i++; } } } + // Limit to ENCODING_CHUNK_SIZE - 1 so that we can always fit in + // a surrogate pair on the next iteration + if (cb.position() >= ENCODING_CHUNK_SIZE - 1) { + flushToStringBuilder(out, ce, cb, bb, false); + } i++; } while (i < s.length() && !DONT_NEED_ENCODING.test((c = s.charAt(i)))); - - String str = charArrayWriter.toString(); - byte[] ba = str.getBytes(charset); - for (byte b : ba) { - encodeByte(out, b); - } - charArrayWriter.reset(); + flushToStringBuilder(out, ce, cb, bb, true); } } - return out.toString(); } + + /** + * Encodes input chars in {@code cb} and appends the byte values in an escaped + * format ({@code "%XX"}) to {@code out}. The temporary byte buffer, {@code bb}, + * must be able to accept {@code cb.position() * ce.maxBytesPerChar()} bytes. + * + * @param out the StringBuilder to output encoded and escaped bytes to + * @param ce charset encoder. Will be reset if endOfInput is true + * @param cb input buffer, will be cleared + * @param bb output buffer, will be cleared + * @param endOfInput true if this is the last flush for an encoding chunk, + * to all bytes in ce is flushed to out and reset + */ + private static void flushToStringBuilder(StringBuilder out, + CharsetEncoder ce, + CharBuffer cb, + ByteBuffer bb, + boolean endOfInput) { + cb.flip(); + try { + CoderResult cr = ce.encode(cb, bb, endOfInput); + if (!cr.isUnderflow()) + cr.throwException(); + if (endOfInput) { + cr = ce.flush(bb); + if (!cr.isUnderflow()) + cr.throwException(); + ce.reset(); + } + } catch (CharacterCodingException x) { + throw new Error(x); // Can't happen + } + HexFormat hex = HexFormat.of().withUpperCase(); + byte[] bytes = bb.array(); + int len = bb.position(); + for (int i = 0; i < len; i++) { + out.append('%'); + hex.toHexDigits(out, bytes[i]); + } + cb.clear(); + bb.clear(); + } } diff --git a/src/java.base/share/classes/java/util/HexFormat.java b/src/java.base/share/classes/java/util/HexFormat.java index 107c362cbc2..d54d491f3ab 100644 --- a/src/java.base/share/classes/java/util/HexFormat.java +++ b/src/java.base/share/classes/java/util/HexFormat.java @@ -679,9 +679,8 @@ public final class HexFormat { * @throws UncheckedIOException if an I/O exception occurs appending to the output */ public A toHexDigits(A out, byte value) { - Objects.requireNonNull(out, "out"); try { - out.append(toHighHexDigit(value)); + out.append(toHighHexDigit(value)); // implicit null-check out.append(toLowHexDigit(value)); return out; } catch (IOException ioe) { diff --git a/test/jdk/java/net/URLEncoder/SurrogatePairs.java b/test/jdk/java/net/URLEncoder/SurrogatePairs.java index da68f394507..6ffe8f30e11 100644 --- a/test/jdk/java/net/URLEncoder/SurrogatePairs.java +++ b/test/jdk/java/net/URLEncoder/SurrogatePairs.java @@ -58,6 +58,9 @@ public class SurrogatePairs { {"\uDBFF\uDC001", "%F4%8F%B0%801"}, {"\uDBFF\uDC00@", "%F4%8F%B0%80%40"}, {"\u0101\uDBFF\uDC00", "%C4%81%F4%8F%B0%80"}, + {"\u0101\u0101\u0101\u0101\u0101\u0101\uDBFF\uDC00\u0101", "%C4%81%C4%81%C4%81%C4%81%C4%81%C4%81%F4%8F%B0%80%C4%81"}, + {"\u0101\u0101\u0101\u0101\u0101\u0101\u0101\uDBFF\uDC00\u0101", "%C4%81%C4%81%C4%81%C4%81%C4%81%C4%81%C4%81%F4%8F%B0%80%C4%81"}, + {"\u0101\u0101\u0101\u0101\u0101\u0101\u0101\u0101\uDBFF\uDC00\u0101", "%C4%81%C4%81%C4%81%C4%81%C4%81%C4%81%C4%81%C4%81%F4%8F%B0%80%C4%81"}, {"\uDBFF\uDC00\u0101", "%F4%8F%B0%80%C4%81"}, {"\uDE0A\uD83D", "%3F%3F"}, {"1\uDE0A\uD83D", "1%3F%3F"}, diff --git a/test/micro/org/openjdk/bench/java/net/URLEncodeDecode.java b/test/micro/org/openjdk/bench/java/net/URLEncodeDecode.java index a599a68f924..a002a0a0320 100644 --- a/test/micro/org/openjdk/bench/java/net/URLEncodeDecode.java +++ b/test/micro/org/openjdk/bench/java/net/URLEncodeDecode.java @@ -190,4 +190,18 @@ public class URLEncodeDecode { } + @Benchmark + public void testEncodeLatin1(Blackhole bh) throws UnsupportedEncodingException { + for (String s : testStringsEncode) { + bh.consume(java.net.URLEncoder.encode(s, StandardCharsets.ISO_8859_1)); + } + } + + @Benchmark + public void testDecodeLatin1(Blackhole bh) throws UnsupportedEncodingException { + for (String s : testStringsDecode) { + bh.consume(URLDecoder.decode(s, StandardCharsets.ISO_8859_1)); + } + } + }