8325340: Add ASCII fast-path to Data-/ObjectInputStream.readUTF

Reviewed-by: rgiulietti, bpb, rriggs
This commit is contained in:
Claes Redestad 2024-02-26 16:05:18 +00:00
parent 3780ad3133
commit 9a9cfbe0ba
4 changed files with 362 additions and 24 deletions

View File

@ -1,5 +1,5 @@
/*
* Copyright (c) 1994, 2023, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 1994, 2024, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@ -571,10 +571,10 @@ loop: while (true) {
* valid modified UTF-8 encoding of a Unicode string.
* @see java.io.DataInputStream#readUnsignedShort()
*/
public static final String readUTF(DataInput in) throws IOException {
public static String readUTF(DataInput in) throws IOException {
int utflen = in.readUnsignedShort();
byte[] bytearr = null;
char[] chararr = null;
byte[] bytearr;
char[] chararr;
if (in instanceof DataInputStream dis) {
if (dis.bytearr.length < utflen) {
dis.bytearr = new byte[utflen*2];

View File

@ -33,6 +33,7 @@ import java.lang.reflect.Array;
import java.lang.reflect.InvocationHandler;
import java.lang.reflect.Modifier;
import java.lang.reflect.Proxy;
import java.nio.charset.StandardCharsets;
import java.security.AccessControlContext;
import java.security.AccessController;
import java.security.PrivilegedAction;
@ -42,6 +43,7 @@ import java.util.Arrays;
import java.util.Map;
import java.util.Objects;
import jdk.internal.access.JavaLangAccess;
import jdk.internal.access.SharedSecrets;
import jdk.internal.event.DeserializationEvent;
import jdk.internal.misc.Unsafe;
@ -2995,6 +2997,8 @@ public class ObjectInputStream
private static final int CHAR_BUF_SIZE = 256;
/** readBlockHeader() return value indicating header read may block */
private static final int HEADER_BLOCKED = -2;
/** access to internal methods to count ASCII and inflate latin1/ASCII bytes to char */
private static final JavaLangAccess JLA = SharedSecrets.getJavaLangAccess();
/** buffer for reading general/block data */
private final byte[] buf = new byte[MAX_BLOCK_SIZE];
@ -3671,8 +3675,32 @@ public class ObjectInputStream
* utflen bytes.
*/
private String readUTFBody(long utflen) throws IOException {
if (!blkmode) {
end = pos = 0;
}
StringBuilder sbuf;
if (utflen > 0 && utflen < Integer.MAX_VALUE) {
// Scan for leading ASCII chars
int avail = end - pos;
int ascii = JLA.countPositives(buf, pos, Math.min(avail, (int)utflen));
if (ascii == utflen) {
// Complete match, consume the buf[pos ... pos + ascii] range and return.
// Modified UTF-8 and ISO-8859-1 are both ASCII-compatible encodings bytes
// thus we can treat the range as ISO-8859-1 and avoid a redundant scan
// in the String constructor
String utf = new String(buf, pos, ascii, StandardCharsets.ISO_8859_1);
pos += ascii;
return utf;
}
// Avoid allocating a StringBuilder if there's enough data in buf and
// cbuf is large enough
if (avail >= utflen && utflen <= CHAR_BUF_SIZE) {
JLA.inflateBytesToChars(buf, pos, cbuf, 0, ascii);
pos += ascii;
int cbufPos = readUTFSpan(ascii, utflen - ascii);
return new String(cbuf, 0, cbufPos);
}
// a reasonable initial capacity based on the UTF length
int initialCapacity = Math.min((int)utflen, 0xFFFF);
sbuf = new StringBuilder(initialCapacity);
@ -3680,14 +3708,14 @@ public class ObjectInputStream
sbuf = new StringBuilder();
}
if (!blkmode) {
end = pos = 0;
}
while (utflen > 0) {
int avail = end - pos;
if (avail >= 3 || (long) avail == utflen) {
utflen -= readUTFSpan(sbuf, utflen);
int cbufPos = readUTFSpan(0, utflen);
// pos has advanced: adjust utflen by the difference in
// available bytes
utflen -= avail - (end - pos);
sbuf.append(cbuf, 0, cbufPos);
} else {
if (blkmode) {
// near block boundary, read one byte at a time
@ -3709,18 +3737,17 @@ public class ObjectInputStream
/**
* Reads span of UTF-encoded characters out of internal buffer
* (starting at offset pos and ending at or before offset end),
* consuming no more than utflen bytes. Appends read characters to
* sbuf. Returns the number of bytes consumed.
* (starting at offset pos), consuming no more than utflen bytes.
* Appends read characters to cbuf. Returns the current position
* in cbuf.
*/
private long readUTFSpan(StringBuilder sbuf, long utflen)
private int readUTFSpan(int cpos, long utflen)
throws IOException
{
int cpos = 0;
int start = pos;
int avail = Math.min(end - pos, CHAR_BUF_SIZE);
// stop short of last char unless all of utf bytes in buffer
int stop = pos + ((utflen > avail) ? avail - 2 : (int) utflen);
int stop = start + ((utflen > avail) ? avail - 2 : (int) utflen);
boolean outOfBounds = false;
try {
@ -3765,9 +3792,7 @@ public class ObjectInputStream
throw new UTFDataFormatException();
}
}
sbuf.append(cbuf, 0, cpos);
return pos - start;
return cpos;
}
/**

View File

@ -1,5 +1,6 @@
/*
* Copyright (c) 2020, 2022, Red Hat Inc. All rights reserved.
* Copyright (c) 2020, Red Hat Inc. All rights reserved.
* Copyright (c) 2024, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@ -27,7 +28,10 @@ import org.openjdk.jmh.annotations.*;
import org.openjdk.jmh.infra.Blackhole;
import java.io.ByteArrayInputStream;
import java.io.ByteArrayOutputStream;
import java.io.DataInputStream;
import java.io.DataOutputStream;
import java.io.IOException;
import java.util.concurrent.ThreadLocalRandom;
import java.util.concurrent.TimeUnit;
@ -38,22 +42,85 @@ import java.util.concurrent.TimeUnit;
@Warmup(iterations = 2, time = 2)
@State(Scope.Thread)
public class DataInputStreamTest {
private final int size = 1024;
private static final int SIZE = 1024;
private ByteArrayInputStream bais;
private ByteArrayInputStream utfDataAsciiMixed;
private ByteArrayInputStream utfDataMixed;
private ByteArrayInputStream utfDataAsciiSmall;
private ByteArrayInputStream utfDataSmall;
private ByteArrayInputStream utfDataAsciiLarge;
private ByteArrayInputStream utfDataLarge;
private static final int REPEATS = 20;
@Setup(Level.Iteration)
public void setup() {
byte[] bytes = new byte[size];
public void setup() throws IOException, ClassNotFoundException, NoSuchMethodException, IllegalAccessException {
byte[] bytes = new byte[SIZE];
ThreadLocalRandom.current().nextBytes(bytes);
bais = new ByteArrayInputStream(bytes);
ByteArrayOutputStream baos = new ByteArrayOutputStream();
DataOutputStream dataOut = new DataOutputStream(baos);
for (int i = 0; i < REPEATS; i++) {
dataOut.writeUTF("small");
dataOut.writeUTF("slightly longer string that is more likely to trigger use of simd intrinsics");
}
dataOut.flush();
utfDataAsciiMixed = new ByteArrayInputStream(baos.toByteArray());
baos = new ByteArrayOutputStream();
dataOut = new DataOutputStream(baos);
for (int i = 0; i < REPEATS; i++) {
dataOut.writeUTF("slightly longer string that is more likely to trigger use of simd intrinsics");
dataOut.writeUTF("slightly longer string that is more likely to trigger use of simd intrinsics");
}
dataOut.flush();
utfDataAsciiLarge = new ByteArrayInputStream(baos.toByteArray());
baos = new ByteArrayOutputStream();
dataOut = new DataOutputStream(baos);
for (int i = 0; i < REPEATS; i++) {
dataOut.writeUTF("smol");
dataOut.writeUTF("smally");
}
dataOut.flush();
utfDataAsciiSmall = new ByteArrayInputStream(baos.toByteArray());
baos = new ByteArrayOutputStream();
dataOut = new DataOutputStream(baos);
for (int i = 0; i < REPEATS; i++) {
dataOut.writeUTF("sm\u00FFll");
dataOut.writeUTF("slightly longer string th\u01F3t is more likely to trigger use of simd intrinsics");
}
dataOut.flush();
utfDataMixed = new ByteArrayInputStream(baos.toByteArray());
baos = new ByteArrayOutputStream();
dataOut = new DataOutputStream(baos);
for (int i = 0; i < REPEATS; i++) {
dataOut.writeUTF("sm\u00F3l");
dataOut.writeUTF("small\u0132");
}
dataOut.flush();
utfDataSmall = new ByteArrayInputStream(baos.toByteArray());
baos = new ByteArrayOutputStream();
dataOut = new DataOutputStream(baos);
for (int i = 0; i < REPEATS; i++) {
dataOut.writeUTF("slightly longer string that is more likely to trigg\u0131r use of simd intrinsics");
dataOut.writeUTF("slightly longer string th\u0131t is more likely to trigger use of simd intrinsics");
}
dataOut.flush();
utfDataLarge = new ByteArrayInputStream(baos.toByteArray());
}
@Benchmark
public void readChar(Blackhole bh) throws Exception {
bais.reset();
DataInputStream dis = new DataInputStream(bais);
for (int i = 0; i < size / 2; i++) {
for (int i = 0; i < SIZE / 2; i++) {
bh.consume(dis.readChar());
}
}
@ -62,8 +129,68 @@ public class DataInputStreamTest {
public void readInt(Blackhole bh) throws Exception {
bais.reset();
DataInputStream dis = new DataInputStream(bais);
for (int i = 0; i < size / 4; i++) {
for (int i = 0; i < SIZE / 4; i++) {
bh.consume(dis.readInt());
}
}
@Benchmark
public void readUTFAsciiMixed(Blackhole bh) throws Exception {
utfDataAsciiMixed.reset();
DataInputStream dis = new DataInputStream(utfDataAsciiMixed);
for (int i = 0; i < REPEATS; i++) {
bh.consume(dis.readUTF());
bh.consume(dis.readUTF());
}
}
@Benchmark
public void readUTFAsciiSmall(Blackhole bh) throws Exception {
utfDataAsciiSmall.reset();
DataInputStream dis = new DataInputStream(utfDataAsciiSmall);
for (int i = 0; i < REPEATS; i++) {
bh.consume(dis.readUTF());
bh.consume(dis.readUTF());
}
}
@Benchmark
public void readUTFAsciiLarge(Blackhole bh) throws Exception {
utfDataAsciiLarge.reset();
DataInputStream dis = new DataInputStream(utfDataAsciiLarge);
for (int i = 0; i < REPEATS; i++) {
bh.consume(dis.readUTF());
bh.consume(dis.readUTF());
}
}
@Benchmark
public void readUTFMixed(Blackhole bh) throws Exception {
utfDataMixed.reset();
DataInputStream dis = new DataInputStream(utfDataMixed);
for (int i = 0; i < REPEATS; i++) {
bh.consume(dis.readUTF());
bh.consume(dis.readUTF());
}
}
@Benchmark
public void readUTFSmall(Blackhole bh) throws Exception {
utfDataSmall.reset();
DataInputStream dis = new DataInputStream(utfDataSmall);
for (int i = 0; i < REPEATS; i++) {
bh.consume(dis.readUTF());
bh.consume(dis.readUTF());
}
}
@Benchmark
public void readUTFLarge(Blackhole bh) throws Exception {
utfDataLarge.reset();
DataInputStream dis = new DataInputStream(utfDataLarge);
for (int i = 0; i < REPEATS; i++) {
bh.consume(dis.readUTF());
bh.consume(dis.readUTF());
}
}
}

View File

@ -0,0 +1,186 @@
/*
* Copyright (c) 2024, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License version 2 only, as
* published by the Free Software Foundation.
*
* This code is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
* version 2 for more details (a copy is included in the LICENSE file that
* accompanied this code).
*
* You should have received a copy of the GNU General Public License version
* 2 along with this work; if not, write to the Free Software Foundation,
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
*
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
* or visit www.oracle.com if you need additional information or have any
* questions.
*/
package org.openjdk.bench.java.io;
import org.openjdk.jmh.annotations.Benchmark;
import org.openjdk.jmh.annotations.BenchmarkMode;
import org.openjdk.jmh.annotations.Fork;
import org.openjdk.jmh.annotations.Level;
import org.openjdk.jmh.annotations.Measurement;
import org.openjdk.jmh.annotations.Mode;
import org.openjdk.jmh.annotations.OutputTimeUnit;
import org.openjdk.jmh.annotations.Scope;
import org.openjdk.jmh.annotations.Setup;
import org.openjdk.jmh.annotations.State;
import org.openjdk.jmh.annotations.Warmup;
import org.openjdk.jmh.infra.Blackhole;
import java.io.ByteArrayInputStream;
import java.io.ByteArrayOutputStream;
import java.io.DataInputStream;
import java.io.DataOutputStream;
import java.io.IOException;
import java.io.ObjectInputStream;
import java.io.ObjectOutputStream;
import java.util.concurrent.ThreadLocalRandom;
import java.util.concurrent.TimeUnit;
@BenchmarkMode(Mode.AverageTime)
@OutputTimeUnit(TimeUnit.MICROSECONDS)
@Fork(value = 3, warmups = 0)
@Measurement(iterations = 5, time = 1)
@Warmup(iterations = 2, time = 2)
@State(Scope.Thread)
public class ObjectInputStreamTest {
private ByteArrayInputStream utfDataAsciiMixed;
private ByteArrayInputStream utfDataMixed;
private ByteArrayInputStream utfDataAsciiSmall;
private ByteArrayInputStream utfDataSmall;
private ByteArrayInputStream utfDataAsciiLarge;
private ByteArrayInputStream utfDataLarge;
// Overhead of creating an ObjectInputStream is significant, need to increase the number of data elements
// to balance work
private static final int REPEATS = 20;
@Setup(Level.Iteration)
public void setup() throws IOException, ClassNotFoundException, NoSuchMethodException, IllegalAccessException {
ByteArrayOutputStream baos = new ByteArrayOutputStream();
ObjectOutputStream dataOut = new ObjectOutputStream(baos);
for (int i = 0; i < REPEATS; i++) {
dataOut.writeUTF("small");
dataOut.writeUTF("slightly longer string that is more likely to trigger use of simd intrinsics");
}
dataOut.flush();
utfDataAsciiMixed = new ByteArrayInputStream(baos.toByteArray());
baos = new ByteArrayOutputStream();
dataOut = new ObjectOutputStream(baos);
for (int i = 0; i < REPEATS; i++) {
dataOut.writeUTF("slightly longer string that is more likely to trigger use of simd intrinsics");
dataOut.writeUTF("slightly longer string that is more likely to trigger use of simd intrinsics");
}
dataOut.flush();
utfDataAsciiLarge = new ByteArrayInputStream(baos.toByteArray());
baos = new ByteArrayOutputStream();
dataOut = new ObjectOutputStream(baos);
for (int i = 0; i < REPEATS; i++) {
dataOut.writeUTF("smol");
dataOut.writeUTF("smally");
}
dataOut.flush();
utfDataAsciiSmall = new ByteArrayInputStream(baos.toByteArray());
baos = new ByteArrayOutputStream();
dataOut = new ObjectOutputStream(baos);
for (int i = 0; i < REPEATS; i++) {
dataOut.writeUTF("sm\u00FFll");
dataOut.writeUTF("slightly longer string th\u01F3t is more likely to trigger use of simd intrinsics");
}
dataOut.flush();
utfDataMixed = new ByteArrayInputStream(baos.toByteArray());
baos = new ByteArrayOutputStream();
dataOut = new ObjectOutputStream(baos);
for (int i = 0; i < REPEATS; i++) {
dataOut.writeUTF("sm\u00F3l");
dataOut.writeUTF("small\u0132");
}
dataOut.flush();
utfDataSmall = new ByteArrayInputStream(baos.toByteArray());
baos = new ByteArrayOutputStream();
dataOut = new ObjectOutputStream(baos);
for (int i = 0; i < REPEATS; i++) {
dataOut.writeUTF("slightly longer string that is more likely to trigg\u0131r use of simd intrinsics");
dataOut.writeUTF("slightly longer string th\u0131t is more likely to trigger use of simd intrinsics");
}
dataOut.flush();
utfDataLarge = new ByteArrayInputStream(baos.toByteArray());
}
@Benchmark
public void readUTFAsciiMixed(Blackhole bh) throws Exception {
utfDataAsciiMixed.reset();
ObjectInputStream ois = new ObjectInputStream(utfDataAsciiMixed);
for (int i = 0; i < REPEATS; i++) {
bh.consume(ois.readUTF());
bh.consume(ois.readUTF());
}
}
@Benchmark
public void readUTFAsciiSmall(Blackhole bh) throws Exception {
utfDataAsciiSmall.reset();
ObjectInputStream ois = new ObjectInputStream(utfDataAsciiSmall);
for (int i = 0; i < REPEATS; i++) {
bh.consume(ois.readUTF());
bh.consume(ois.readUTF());
}
}
@Benchmark
public void readUTFAsciiLarge(Blackhole bh) throws Exception {
utfDataAsciiLarge.reset();
ObjectInputStream ois = new ObjectInputStream(utfDataAsciiLarge);
for (int i = 0; i < REPEATS; i++) {
bh.consume(ois.readUTF());
bh.consume(ois.readUTF());
}
}
@Benchmark
public void readUTFMixed(Blackhole bh) throws Exception {
utfDataMixed.reset();
ObjectInputStream ois = new ObjectInputStream(utfDataMixed);
for (int i = 0; i < REPEATS; i++) {
bh.consume(ois.readUTF());
bh.consume(ois.readUTF());
}
}
@Benchmark
public void readUTFSmall(Blackhole bh) throws Exception {
utfDataSmall.reset();
ObjectInputStream ois = new ObjectInputStream(utfDataSmall);
for (int i = 0; i < REPEATS; i++) {
bh.consume(ois.readUTF());
bh.consume(ois.readUTF());
}
}
@Benchmark
public void readUTFLarge(Blackhole bh) throws Exception {
utfDataLarge.reset();
ObjectInputStream ois = new ObjectInputStream(utfDataLarge);
for (int i = 0; i < REPEATS; i++) {
bh.consume(ois.readUTF());
bh.consume(ois.readUTF());
}
}
}