8325340: Add ASCII fast-path to Data-/ObjectInputStream.readUTF

Reviewed-by: rgiulietti, bpb, rriggs
This commit is contained in:
Claes Redestad 2024-02-26 16:05:18 +00:00
parent 3780ad3133
commit 9a9cfbe0ba
4 changed files with 362 additions and 24 deletions

View File

@ -1,5 +1,5 @@
/* /*
* Copyright (c) 1994, 2023, Oracle and/or its affiliates. All rights reserved. * Copyright (c) 1994, 2024, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
* *
* This code is free software; you can redistribute it and/or modify it * This code is free software; you can redistribute it and/or modify it
@ -571,10 +571,10 @@ loop: while (true) {
* valid modified UTF-8 encoding of a Unicode string. * valid modified UTF-8 encoding of a Unicode string.
* @see java.io.DataInputStream#readUnsignedShort() * @see java.io.DataInputStream#readUnsignedShort()
*/ */
public static final String readUTF(DataInput in) throws IOException { public static String readUTF(DataInput in) throws IOException {
int utflen = in.readUnsignedShort(); int utflen = in.readUnsignedShort();
byte[] bytearr = null; byte[] bytearr;
char[] chararr = null; char[] chararr;
if (in instanceof DataInputStream dis) { if (in instanceof DataInputStream dis) {
if (dis.bytearr.length < utflen) { if (dis.bytearr.length < utflen) {
dis.bytearr = new byte[utflen*2]; dis.bytearr = new byte[utflen*2];

View File

@ -33,6 +33,7 @@ import java.lang.reflect.Array;
import java.lang.reflect.InvocationHandler; import java.lang.reflect.InvocationHandler;
import java.lang.reflect.Modifier; import java.lang.reflect.Modifier;
import java.lang.reflect.Proxy; import java.lang.reflect.Proxy;
import java.nio.charset.StandardCharsets;
import java.security.AccessControlContext; import java.security.AccessControlContext;
import java.security.AccessController; import java.security.AccessController;
import java.security.PrivilegedAction; import java.security.PrivilegedAction;
@ -42,6 +43,7 @@ import java.util.Arrays;
import java.util.Map; import java.util.Map;
import java.util.Objects; import java.util.Objects;
import jdk.internal.access.JavaLangAccess;
import jdk.internal.access.SharedSecrets; import jdk.internal.access.SharedSecrets;
import jdk.internal.event.DeserializationEvent; import jdk.internal.event.DeserializationEvent;
import jdk.internal.misc.Unsafe; import jdk.internal.misc.Unsafe;
@ -2995,6 +2997,8 @@ public class ObjectInputStream
private static final int CHAR_BUF_SIZE = 256; private static final int CHAR_BUF_SIZE = 256;
/** readBlockHeader() return value indicating header read may block */ /** readBlockHeader() return value indicating header read may block */
private static final int HEADER_BLOCKED = -2; private static final int HEADER_BLOCKED = -2;
/** access to internal methods to count ASCII and inflate latin1/ASCII bytes to char */
private static final JavaLangAccess JLA = SharedSecrets.getJavaLangAccess();
/** buffer for reading general/block data */ /** buffer for reading general/block data */
private final byte[] buf = new byte[MAX_BLOCK_SIZE]; private final byte[] buf = new byte[MAX_BLOCK_SIZE];
@ -3671,8 +3675,32 @@ public class ObjectInputStream
* utflen bytes. * utflen bytes.
*/ */
private String readUTFBody(long utflen) throws IOException { private String readUTFBody(long utflen) throws IOException {
if (!blkmode) {
end = pos = 0;
}
StringBuilder sbuf; StringBuilder sbuf;
if (utflen > 0 && utflen < Integer.MAX_VALUE) { if (utflen > 0 && utflen < Integer.MAX_VALUE) {
// Scan for leading ASCII chars
int avail = end - pos;
int ascii = JLA.countPositives(buf, pos, Math.min(avail, (int)utflen));
if (ascii == utflen) {
// Complete match, consume the buf[pos ... pos + ascii] range and return.
// Modified UTF-8 and ISO-8859-1 are both ASCII-compatible encodings bytes
// thus we can treat the range as ISO-8859-1 and avoid a redundant scan
// in the String constructor
String utf = new String(buf, pos, ascii, StandardCharsets.ISO_8859_1);
pos += ascii;
return utf;
}
// Avoid allocating a StringBuilder if there's enough data in buf and
// cbuf is large enough
if (avail >= utflen && utflen <= CHAR_BUF_SIZE) {
JLA.inflateBytesToChars(buf, pos, cbuf, 0, ascii);
pos += ascii;
int cbufPos = readUTFSpan(ascii, utflen - ascii);
return new String(cbuf, 0, cbufPos);
}
// a reasonable initial capacity based on the UTF length // a reasonable initial capacity based on the UTF length
int initialCapacity = Math.min((int)utflen, 0xFFFF); int initialCapacity = Math.min((int)utflen, 0xFFFF);
sbuf = new StringBuilder(initialCapacity); sbuf = new StringBuilder(initialCapacity);
@ -3680,14 +3708,14 @@ public class ObjectInputStream
sbuf = new StringBuilder(); sbuf = new StringBuilder();
} }
if (!blkmode) {
end = pos = 0;
}
while (utflen > 0) { while (utflen > 0) {
int avail = end - pos; int avail = end - pos;
if (avail >= 3 || (long) avail == utflen) { if (avail >= 3 || (long) avail == utflen) {
utflen -= readUTFSpan(sbuf, utflen); int cbufPos = readUTFSpan(0, utflen);
// pos has advanced: adjust utflen by the difference in
// available bytes
utflen -= avail - (end - pos);
sbuf.append(cbuf, 0, cbufPos);
} else { } else {
if (blkmode) { if (blkmode) {
// near block boundary, read one byte at a time // near block boundary, read one byte at a time
@ -3709,18 +3737,17 @@ public class ObjectInputStream
/** /**
* Reads span of UTF-encoded characters out of internal buffer * Reads span of UTF-encoded characters out of internal buffer
* (starting at offset pos and ending at or before offset end), * (starting at offset pos), consuming no more than utflen bytes.
* consuming no more than utflen bytes. Appends read characters to * Appends read characters to cbuf. Returns the current position
* sbuf. Returns the number of bytes consumed. * in cbuf.
*/ */
private long readUTFSpan(StringBuilder sbuf, long utflen) private int readUTFSpan(int cpos, long utflen)
throws IOException throws IOException
{ {
int cpos = 0;
int start = pos; int start = pos;
int avail = Math.min(end - pos, CHAR_BUF_SIZE); int avail = Math.min(end - pos, CHAR_BUF_SIZE);
// stop short of last char unless all of utf bytes in buffer // stop short of last char unless all of utf bytes in buffer
int stop = pos + ((utflen > avail) ? avail - 2 : (int) utflen); int stop = start + ((utflen > avail) ? avail - 2 : (int) utflen);
boolean outOfBounds = false; boolean outOfBounds = false;
try { try {
@ -3765,9 +3792,7 @@ public class ObjectInputStream
throw new UTFDataFormatException(); throw new UTFDataFormatException();
} }
} }
return cpos;
sbuf.append(cbuf, 0, cpos);
return pos - start;
} }
/** /**

View File

@ -1,5 +1,6 @@
/* /*
* Copyright (c) 2020, 2022, Red Hat Inc. All rights reserved. * Copyright (c) 2020, Red Hat Inc. All rights reserved.
* Copyright (c) 2024, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
* *
* This code is free software; you can redistribute it and/or modify it * This code is free software; you can redistribute it and/or modify it
@ -27,7 +28,10 @@ import org.openjdk.jmh.annotations.*;
import org.openjdk.jmh.infra.Blackhole; import org.openjdk.jmh.infra.Blackhole;
import java.io.ByteArrayInputStream; import java.io.ByteArrayInputStream;
import java.io.ByteArrayOutputStream;
import java.io.DataInputStream; import java.io.DataInputStream;
import java.io.DataOutputStream;
import java.io.IOException;
import java.util.concurrent.ThreadLocalRandom; import java.util.concurrent.ThreadLocalRandom;
import java.util.concurrent.TimeUnit; import java.util.concurrent.TimeUnit;
@ -38,22 +42,85 @@ import java.util.concurrent.TimeUnit;
@Warmup(iterations = 2, time = 2) @Warmup(iterations = 2, time = 2)
@State(Scope.Thread) @State(Scope.Thread)
public class DataInputStreamTest { public class DataInputStreamTest {
private final int size = 1024; private static final int SIZE = 1024;
private ByteArrayInputStream bais; private ByteArrayInputStream bais;
private ByteArrayInputStream utfDataAsciiMixed;
private ByteArrayInputStream utfDataMixed;
private ByteArrayInputStream utfDataAsciiSmall;
private ByteArrayInputStream utfDataSmall;
private ByteArrayInputStream utfDataAsciiLarge;
private ByteArrayInputStream utfDataLarge;
private static final int REPEATS = 20;
@Setup(Level.Iteration) @Setup(Level.Iteration)
public void setup() { public void setup() throws IOException, ClassNotFoundException, NoSuchMethodException, IllegalAccessException {
byte[] bytes = new byte[size]; byte[] bytes = new byte[SIZE];
ThreadLocalRandom.current().nextBytes(bytes); ThreadLocalRandom.current().nextBytes(bytes);
bais = new ByteArrayInputStream(bytes); bais = new ByteArrayInputStream(bytes);
ByteArrayOutputStream baos = new ByteArrayOutputStream();
DataOutputStream dataOut = new DataOutputStream(baos);
for (int i = 0; i < REPEATS; i++) {
dataOut.writeUTF("small");
dataOut.writeUTF("slightly longer string that is more likely to trigger use of simd intrinsics");
}
dataOut.flush();
utfDataAsciiMixed = new ByteArrayInputStream(baos.toByteArray());
baos = new ByteArrayOutputStream();
dataOut = new DataOutputStream(baos);
for (int i = 0; i < REPEATS; i++) {
dataOut.writeUTF("slightly longer string that is more likely to trigger use of simd intrinsics");
dataOut.writeUTF("slightly longer string that is more likely to trigger use of simd intrinsics");
}
dataOut.flush();
utfDataAsciiLarge = new ByteArrayInputStream(baos.toByteArray());
baos = new ByteArrayOutputStream();
dataOut = new DataOutputStream(baos);
for (int i = 0; i < REPEATS; i++) {
dataOut.writeUTF("smol");
dataOut.writeUTF("smally");
}
dataOut.flush();
utfDataAsciiSmall = new ByteArrayInputStream(baos.toByteArray());
baos = new ByteArrayOutputStream();
dataOut = new DataOutputStream(baos);
for (int i = 0; i < REPEATS; i++) {
dataOut.writeUTF("sm\u00FFll");
dataOut.writeUTF("slightly longer string th\u01F3t is more likely to trigger use of simd intrinsics");
}
dataOut.flush();
utfDataMixed = new ByteArrayInputStream(baos.toByteArray());
baos = new ByteArrayOutputStream();
dataOut = new DataOutputStream(baos);
for (int i = 0; i < REPEATS; i++) {
dataOut.writeUTF("sm\u00F3l");
dataOut.writeUTF("small\u0132");
}
dataOut.flush();
utfDataSmall = new ByteArrayInputStream(baos.toByteArray());
baos = new ByteArrayOutputStream();
dataOut = new DataOutputStream(baos);
for (int i = 0; i < REPEATS; i++) {
dataOut.writeUTF("slightly longer string that is more likely to trigg\u0131r use of simd intrinsics");
dataOut.writeUTF("slightly longer string th\u0131t is more likely to trigger use of simd intrinsics");
}
dataOut.flush();
utfDataLarge = new ByteArrayInputStream(baos.toByteArray());
} }
@Benchmark @Benchmark
public void readChar(Blackhole bh) throws Exception { public void readChar(Blackhole bh) throws Exception {
bais.reset(); bais.reset();
DataInputStream dis = new DataInputStream(bais); DataInputStream dis = new DataInputStream(bais);
for (int i = 0; i < size / 2; i++) { for (int i = 0; i < SIZE / 2; i++) {
bh.consume(dis.readChar()); bh.consume(dis.readChar());
} }
} }
@ -62,8 +129,68 @@ public class DataInputStreamTest {
public void readInt(Blackhole bh) throws Exception { public void readInt(Blackhole bh) throws Exception {
bais.reset(); bais.reset();
DataInputStream dis = new DataInputStream(bais); DataInputStream dis = new DataInputStream(bais);
for (int i = 0; i < size / 4; i++) { for (int i = 0; i < SIZE / 4; i++) {
bh.consume(dis.readInt()); bh.consume(dis.readInt());
} }
} }
@Benchmark
public void readUTFAsciiMixed(Blackhole bh) throws Exception {
utfDataAsciiMixed.reset();
DataInputStream dis = new DataInputStream(utfDataAsciiMixed);
for (int i = 0; i < REPEATS; i++) {
bh.consume(dis.readUTF());
bh.consume(dis.readUTF());
}
}
@Benchmark
public void readUTFAsciiSmall(Blackhole bh) throws Exception {
utfDataAsciiSmall.reset();
DataInputStream dis = new DataInputStream(utfDataAsciiSmall);
for (int i = 0; i < REPEATS; i++) {
bh.consume(dis.readUTF());
bh.consume(dis.readUTF());
}
}
@Benchmark
public void readUTFAsciiLarge(Blackhole bh) throws Exception {
utfDataAsciiLarge.reset();
DataInputStream dis = new DataInputStream(utfDataAsciiLarge);
for (int i = 0; i < REPEATS; i++) {
bh.consume(dis.readUTF());
bh.consume(dis.readUTF());
}
}
@Benchmark
public void readUTFMixed(Blackhole bh) throws Exception {
utfDataMixed.reset();
DataInputStream dis = new DataInputStream(utfDataMixed);
for (int i = 0; i < REPEATS; i++) {
bh.consume(dis.readUTF());
bh.consume(dis.readUTF());
}
}
@Benchmark
public void readUTFSmall(Blackhole bh) throws Exception {
utfDataSmall.reset();
DataInputStream dis = new DataInputStream(utfDataSmall);
for (int i = 0; i < REPEATS; i++) {
bh.consume(dis.readUTF());
bh.consume(dis.readUTF());
}
}
@Benchmark
public void readUTFLarge(Blackhole bh) throws Exception {
utfDataLarge.reset();
DataInputStream dis = new DataInputStream(utfDataLarge);
for (int i = 0; i < REPEATS; i++) {
bh.consume(dis.readUTF());
bh.consume(dis.readUTF());
}
}
} }

View File

@ -0,0 +1,186 @@
/*
* Copyright (c) 2024, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License version 2 only, as
* published by the Free Software Foundation.
*
* This code is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
* version 2 for more details (a copy is included in the LICENSE file that
* accompanied this code).
*
* You should have received a copy of the GNU General Public License version
* 2 along with this work; if not, write to the Free Software Foundation,
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
*
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
* or visit www.oracle.com if you need additional information or have any
* questions.
*/
package org.openjdk.bench.java.io;
import org.openjdk.jmh.annotations.Benchmark;
import org.openjdk.jmh.annotations.BenchmarkMode;
import org.openjdk.jmh.annotations.Fork;
import org.openjdk.jmh.annotations.Level;
import org.openjdk.jmh.annotations.Measurement;
import org.openjdk.jmh.annotations.Mode;
import org.openjdk.jmh.annotations.OutputTimeUnit;
import org.openjdk.jmh.annotations.Scope;
import org.openjdk.jmh.annotations.Setup;
import org.openjdk.jmh.annotations.State;
import org.openjdk.jmh.annotations.Warmup;
import org.openjdk.jmh.infra.Blackhole;
import java.io.ByteArrayInputStream;
import java.io.ByteArrayOutputStream;
import java.io.DataInputStream;
import java.io.DataOutputStream;
import java.io.IOException;
import java.io.ObjectInputStream;
import java.io.ObjectOutputStream;
import java.util.concurrent.ThreadLocalRandom;
import java.util.concurrent.TimeUnit;
@BenchmarkMode(Mode.AverageTime)
@OutputTimeUnit(TimeUnit.MICROSECONDS)
@Fork(value = 3, warmups = 0)
@Measurement(iterations = 5, time = 1)
@Warmup(iterations = 2, time = 2)
@State(Scope.Thread)
public class ObjectInputStreamTest {
private ByteArrayInputStream utfDataAsciiMixed;
private ByteArrayInputStream utfDataMixed;
private ByteArrayInputStream utfDataAsciiSmall;
private ByteArrayInputStream utfDataSmall;
private ByteArrayInputStream utfDataAsciiLarge;
private ByteArrayInputStream utfDataLarge;
// Overhead of creating an ObjectInputStream is significant, need to increase the number of data elements
// to balance work
private static final int REPEATS = 20;
@Setup(Level.Iteration)
public void setup() throws IOException, ClassNotFoundException, NoSuchMethodException, IllegalAccessException {
ByteArrayOutputStream baos = new ByteArrayOutputStream();
ObjectOutputStream dataOut = new ObjectOutputStream(baos);
for (int i = 0; i < REPEATS; i++) {
dataOut.writeUTF("small");
dataOut.writeUTF("slightly longer string that is more likely to trigger use of simd intrinsics");
}
dataOut.flush();
utfDataAsciiMixed = new ByteArrayInputStream(baos.toByteArray());
baos = new ByteArrayOutputStream();
dataOut = new ObjectOutputStream(baos);
for (int i = 0; i < REPEATS; i++) {
dataOut.writeUTF("slightly longer string that is more likely to trigger use of simd intrinsics");
dataOut.writeUTF("slightly longer string that is more likely to trigger use of simd intrinsics");
}
dataOut.flush();
utfDataAsciiLarge = new ByteArrayInputStream(baos.toByteArray());
baos = new ByteArrayOutputStream();
dataOut = new ObjectOutputStream(baos);
for (int i = 0; i < REPEATS; i++) {
dataOut.writeUTF("smol");
dataOut.writeUTF("smally");
}
dataOut.flush();
utfDataAsciiSmall = new ByteArrayInputStream(baos.toByteArray());
baos = new ByteArrayOutputStream();
dataOut = new ObjectOutputStream(baos);
for (int i = 0; i < REPEATS; i++) {
dataOut.writeUTF("sm\u00FFll");
dataOut.writeUTF("slightly longer string th\u01F3t is more likely to trigger use of simd intrinsics");
}
dataOut.flush();
utfDataMixed = new ByteArrayInputStream(baos.toByteArray());
baos = new ByteArrayOutputStream();
dataOut = new ObjectOutputStream(baos);
for (int i = 0; i < REPEATS; i++) {
dataOut.writeUTF("sm\u00F3l");
dataOut.writeUTF("small\u0132");
}
dataOut.flush();
utfDataSmall = new ByteArrayInputStream(baos.toByteArray());
baos = new ByteArrayOutputStream();
dataOut = new ObjectOutputStream(baos);
for (int i = 0; i < REPEATS; i++) {
dataOut.writeUTF("slightly longer string that is more likely to trigg\u0131r use of simd intrinsics");
dataOut.writeUTF("slightly longer string th\u0131t is more likely to trigger use of simd intrinsics");
}
dataOut.flush();
utfDataLarge = new ByteArrayInputStream(baos.toByteArray());
}
@Benchmark
public void readUTFAsciiMixed(Blackhole bh) throws Exception {
utfDataAsciiMixed.reset();
ObjectInputStream ois = new ObjectInputStream(utfDataAsciiMixed);
for (int i = 0; i < REPEATS; i++) {
bh.consume(ois.readUTF());
bh.consume(ois.readUTF());
}
}
@Benchmark
public void readUTFAsciiSmall(Blackhole bh) throws Exception {
utfDataAsciiSmall.reset();
ObjectInputStream ois = new ObjectInputStream(utfDataAsciiSmall);
for (int i = 0; i < REPEATS; i++) {
bh.consume(ois.readUTF());
bh.consume(ois.readUTF());
}
}
@Benchmark
public void readUTFAsciiLarge(Blackhole bh) throws Exception {
utfDataAsciiLarge.reset();
ObjectInputStream ois = new ObjectInputStream(utfDataAsciiLarge);
for (int i = 0; i < REPEATS; i++) {
bh.consume(ois.readUTF());
bh.consume(ois.readUTF());
}
}
@Benchmark
public void readUTFMixed(Blackhole bh) throws Exception {
utfDataMixed.reset();
ObjectInputStream ois = new ObjectInputStream(utfDataMixed);
for (int i = 0; i < REPEATS; i++) {
bh.consume(ois.readUTF());
bh.consume(ois.readUTF());
}
}
@Benchmark
public void readUTFSmall(Blackhole bh) throws Exception {
utfDataSmall.reset();
ObjectInputStream ois = new ObjectInputStream(utfDataSmall);
for (int i = 0; i < REPEATS; i++) {
bh.consume(ois.readUTF());
bh.consume(ois.readUTF());
}
}
@Benchmark
public void readUTFLarge(Blackhole bh) throws Exception {
utfDataLarge.reset();
ObjectInputStream ois = new ObjectInputStream(utfDataLarge);
for (int i = 0; i < REPEATS; i++) {
bh.consume(ois.readUTF());
bh.consume(ois.readUTF());
}
}
}