From 9a9cfbe0ba18084bbeae212c9e0da2715a3086e7 Mon Sep 17 00:00:00 2001 From: Claes Redestad Date: Mon, 26 Feb 2024 16:05:18 +0000 Subject: [PATCH] 8325340: Add ASCII fast-path to Data-/ObjectInputStream.readUTF Reviewed-by: rgiulietti, bpb, rriggs --- .../classes/java/io/DataInputStream.java | 8 +- .../classes/java/io/ObjectInputStream.java | 53 +++-- .../bench/java/io/DataInputStreamTest.java | 139 ++++++++++++- .../bench/java/io/ObjectInputStreamTest.java | 186 ++++++++++++++++++ 4 files changed, 362 insertions(+), 24 deletions(-) create mode 100644 test/micro/org/openjdk/bench/java/io/ObjectInputStreamTest.java diff --git a/src/java.base/share/classes/java/io/DataInputStream.java b/src/java.base/share/classes/java/io/DataInputStream.java index 7192b30d5f2..88cd4edc411 100644 --- a/src/java.base/share/classes/java/io/DataInputStream.java +++ b/src/java.base/share/classes/java/io/DataInputStream.java @@ -1,5 +1,5 @@ /* - * Copyright (c) 1994, 2023, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 1994, 2024, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -571,10 +571,10 @@ loop: while (true) { * valid modified UTF-8 encoding of a Unicode string. * @see java.io.DataInputStream#readUnsignedShort() */ - public static final String readUTF(DataInput in) throws IOException { + public static String readUTF(DataInput in) throws IOException { int utflen = in.readUnsignedShort(); - byte[] bytearr = null; - char[] chararr = null; + byte[] bytearr; + char[] chararr; if (in instanceof DataInputStream dis) { if (dis.bytearr.length < utflen) { dis.bytearr = new byte[utflen*2]; diff --git a/src/java.base/share/classes/java/io/ObjectInputStream.java b/src/java.base/share/classes/java/io/ObjectInputStream.java index d2fe57c14ef..502db6a1a21 100644 --- a/src/java.base/share/classes/java/io/ObjectInputStream.java +++ b/src/java.base/share/classes/java/io/ObjectInputStream.java @@ -33,6 +33,7 @@ import java.lang.reflect.Array; import java.lang.reflect.InvocationHandler; import java.lang.reflect.Modifier; import java.lang.reflect.Proxy; +import java.nio.charset.StandardCharsets; import java.security.AccessControlContext; import java.security.AccessController; import java.security.PrivilegedAction; @@ -42,6 +43,7 @@ import java.util.Arrays; import java.util.Map; import java.util.Objects; +import jdk.internal.access.JavaLangAccess; import jdk.internal.access.SharedSecrets; import jdk.internal.event.DeserializationEvent; import jdk.internal.misc.Unsafe; @@ -2995,6 +2997,8 @@ public class ObjectInputStream private static final int CHAR_BUF_SIZE = 256; /** readBlockHeader() return value indicating header read may block */ private static final int HEADER_BLOCKED = -2; + /** access to internal methods to count ASCII and inflate latin1/ASCII bytes to char */ + private static final JavaLangAccess JLA = SharedSecrets.getJavaLangAccess(); /** buffer for reading general/block data */ private final byte[] buf = new byte[MAX_BLOCK_SIZE]; @@ -3671,8 +3675,32 @@ public class ObjectInputStream * utflen bytes. */ private String readUTFBody(long utflen) throws IOException { + if (!blkmode) { + end = pos = 0; + } + StringBuilder sbuf; if (utflen > 0 && utflen < Integer.MAX_VALUE) { + // Scan for leading ASCII chars + int avail = end - pos; + int ascii = JLA.countPositives(buf, pos, Math.min(avail, (int)utflen)); + if (ascii == utflen) { + // Complete match, consume the buf[pos ... pos + ascii] range and return. + // Modified UTF-8 and ISO-8859-1 are both ASCII-compatible encodings bytes + // thus we can treat the range as ISO-8859-1 and avoid a redundant scan + // in the String constructor + String utf = new String(buf, pos, ascii, StandardCharsets.ISO_8859_1); + pos += ascii; + return utf; + } + // Avoid allocating a StringBuilder if there's enough data in buf and + // cbuf is large enough + if (avail >= utflen && utflen <= CHAR_BUF_SIZE) { + JLA.inflateBytesToChars(buf, pos, cbuf, 0, ascii); + pos += ascii; + int cbufPos = readUTFSpan(ascii, utflen - ascii); + return new String(cbuf, 0, cbufPos); + } // a reasonable initial capacity based on the UTF length int initialCapacity = Math.min((int)utflen, 0xFFFF); sbuf = new StringBuilder(initialCapacity); @@ -3680,14 +3708,14 @@ public class ObjectInputStream sbuf = new StringBuilder(); } - if (!blkmode) { - end = pos = 0; - } - while (utflen > 0) { int avail = end - pos; if (avail >= 3 || (long) avail == utflen) { - utflen -= readUTFSpan(sbuf, utflen); + int cbufPos = readUTFSpan(0, utflen); + // pos has advanced: adjust utflen by the difference in + // available bytes + utflen -= avail - (end - pos); + sbuf.append(cbuf, 0, cbufPos); } else { if (blkmode) { // near block boundary, read one byte at a time @@ -3709,18 +3737,17 @@ public class ObjectInputStream /** * Reads span of UTF-encoded characters out of internal buffer - * (starting at offset pos and ending at or before offset end), - * consuming no more than utflen bytes. Appends read characters to - * sbuf. Returns the number of bytes consumed. + * (starting at offset pos), consuming no more than utflen bytes. + * Appends read characters to cbuf. Returns the current position + * in cbuf. */ - private long readUTFSpan(StringBuilder sbuf, long utflen) + private int readUTFSpan(int cpos, long utflen) throws IOException { - int cpos = 0; int start = pos; int avail = Math.min(end - pos, CHAR_BUF_SIZE); // stop short of last char unless all of utf bytes in buffer - int stop = pos + ((utflen > avail) ? avail - 2 : (int) utflen); + int stop = start + ((utflen > avail) ? avail - 2 : (int) utflen); boolean outOfBounds = false; try { @@ -3765,9 +3792,7 @@ public class ObjectInputStream throw new UTFDataFormatException(); } } - - sbuf.append(cbuf, 0, cpos); - return pos - start; + return cpos; } /** diff --git a/test/micro/org/openjdk/bench/java/io/DataInputStreamTest.java b/test/micro/org/openjdk/bench/java/io/DataInputStreamTest.java index 3856cea22ef..287a6a88967 100644 --- a/test/micro/org/openjdk/bench/java/io/DataInputStreamTest.java +++ b/test/micro/org/openjdk/bench/java/io/DataInputStreamTest.java @@ -1,5 +1,6 @@ /* - * Copyright (c) 2020, 2022, Red Hat Inc. All rights reserved. + * Copyright (c) 2020, Red Hat Inc. All rights reserved. + * Copyright (c) 2024, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -27,7 +28,10 @@ import org.openjdk.jmh.annotations.*; import org.openjdk.jmh.infra.Blackhole; import java.io.ByteArrayInputStream; +import java.io.ByteArrayOutputStream; import java.io.DataInputStream; +import java.io.DataOutputStream; +import java.io.IOException; import java.util.concurrent.ThreadLocalRandom; import java.util.concurrent.TimeUnit; @@ -38,22 +42,85 @@ import java.util.concurrent.TimeUnit; @Warmup(iterations = 2, time = 2) @State(Scope.Thread) public class DataInputStreamTest { - private final int size = 1024; + private static final int SIZE = 1024; private ByteArrayInputStream bais; + private ByteArrayInputStream utfDataAsciiMixed; + private ByteArrayInputStream utfDataMixed; + + private ByteArrayInputStream utfDataAsciiSmall; + private ByteArrayInputStream utfDataSmall; + + private ByteArrayInputStream utfDataAsciiLarge; + private ByteArrayInputStream utfDataLarge; + + private static final int REPEATS = 20; @Setup(Level.Iteration) - public void setup() { - byte[] bytes = new byte[size]; + public void setup() throws IOException, ClassNotFoundException, NoSuchMethodException, IllegalAccessException { + byte[] bytes = new byte[SIZE]; ThreadLocalRandom.current().nextBytes(bytes); bais = new ByteArrayInputStream(bytes); + ByteArrayOutputStream baos = new ByteArrayOutputStream(); + DataOutputStream dataOut = new DataOutputStream(baos); + for (int i = 0; i < REPEATS; i++) { + dataOut.writeUTF("small"); + dataOut.writeUTF("slightly longer string that is more likely to trigger use of simd intrinsics"); + } + dataOut.flush(); + utfDataAsciiMixed = new ByteArrayInputStream(baos.toByteArray()); + + baos = new ByteArrayOutputStream(); + dataOut = new DataOutputStream(baos); + for (int i = 0; i < REPEATS; i++) { + dataOut.writeUTF("slightly longer string that is more likely to trigger use of simd intrinsics"); + dataOut.writeUTF("slightly longer string that is more likely to trigger use of simd intrinsics"); + } + dataOut.flush(); + utfDataAsciiLarge = new ByteArrayInputStream(baos.toByteArray()); + + baos = new ByteArrayOutputStream(); + dataOut = new DataOutputStream(baos); + for (int i = 0; i < REPEATS; i++) { + dataOut.writeUTF("smol"); + dataOut.writeUTF("smally"); + } + dataOut.flush(); + utfDataAsciiSmall = new ByteArrayInputStream(baos.toByteArray()); + + baos = new ByteArrayOutputStream(); + dataOut = new DataOutputStream(baos); + for (int i = 0; i < REPEATS; i++) { + dataOut.writeUTF("sm\u00FFll"); + dataOut.writeUTF("slightly longer string th\u01F3t is more likely to trigger use of simd intrinsics"); + } + dataOut.flush(); + utfDataMixed = new ByteArrayInputStream(baos.toByteArray()); + + baos = new ByteArrayOutputStream(); + dataOut = new DataOutputStream(baos); + for (int i = 0; i < REPEATS; i++) { + dataOut.writeUTF("sm\u00F3l"); + dataOut.writeUTF("small\u0132"); + } + dataOut.flush(); + utfDataSmall = new ByteArrayInputStream(baos.toByteArray()); + + baos = new ByteArrayOutputStream(); + dataOut = new DataOutputStream(baos); + for (int i = 0; i < REPEATS; i++) { + dataOut.writeUTF("slightly longer string that is more likely to trigg\u0131r use of simd intrinsics"); + dataOut.writeUTF("slightly longer string th\u0131t is more likely to trigger use of simd intrinsics"); + } + dataOut.flush(); + utfDataLarge = new ByteArrayInputStream(baos.toByteArray()); } @Benchmark public void readChar(Blackhole bh) throws Exception { bais.reset(); DataInputStream dis = new DataInputStream(bais); - for (int i = 0; i < size / 2; i++) { + for (int i = 0; i < SIZE / 2; i++) { bh.consume(dis.readChar()); } } @@ -62,8 +129,68 @@ public class DataInputStreamTest { public void readInt(Blackhole bh) throws Exception { bais.reset(); DataInputStream dis = new DataInputStream(bais); - for (int i = 0; i < size / 4; i++) { + for (int i = 0; i < SIZE / 4; i++) { bh.consume(dis.readInt()); } } + + @Benchmark + public void readUTFAsciiMixed(Blackhole bh) throws Exception { + utfDataAsciiMixed.reset(); + DataInputStream dis = new DataInputStream(utfDataAsciiMixed); + for (int i = 0; i < REPEATS; i++) { + bh.consume(dis.readUTF()); + bh.consume(dis.readUTF()); + } + } + + @Benchmark + public void readUTFAsciiSmall(Blackhole bh) throws Exception { + utfDataAsciiSmall.reset(); + DataInputStream dis = new DataInputStream(utfDataAsciiSmall); + for (int i = 0; i < REPEATS; i++) { + bh.consume(dis.readUTF()); + bh.consume(dis.readUTF()); + } + } + + @Benchmark + public void readUTFAsciiLarge(Blackhole bh) throws Exception { + utfDataAsciiLarge.reset(); + DataInputStream dis = new DataInputStream(utfDataAsciiLarge); + for (int i = 0; i < REPEATS; i++) { + bh.consume(dis.readUTF()); + bh.consume(dis.readUTF()); + } + } + + @Benchmark + public void readUTFMixed(Blackhole bh) throws Exception { + utfDataMixed.reset(); + DataInputStream dis = new DataInputStream(utfDataMixed); + for (int i = 0; i < REPEATS; i++) { + bh.consume(dis.readUTF()); + bh.consume(dis.readUTF()); + } + } + + @Benchmark + public void readUTFSmall(Blackhole bh) throws Exception { + utfDataSmall.reset(); + DataInputStream dis = new DataInputStream(utfDataSmall); + for (int i = 0; i < REPEATS; i++) { + bh.consume(dis.readUTF()); + bh.consume(dis.readUTF()); + } + } + + @Benchmark + public void readUTFLarge(Blackhole bh) throws Exception { + utfDataLarge.reset(); + DataInputStream dis = new DataInputStream(utfDataLarge); + for (int i = 0; i < REPEATS; i++) { + bh.consume(dis.readUTF()); + bh.consume(dis.readUTF()); + } + } } diff --git a/test/micro/org/openjdk/bench/java/io/ObjectInputStreamTest.java b/test/micro/org/openjdk/bench/java/io/ObjectInputStreamTest.java new file mode 100644 index 00000000000..76f4cd62efe --- /dev/null +++ b/test/micro/org/openjdk/bench/java/io/ObjectInputStreamTest.java @@ -0,0 +1,186 @@ +/* + * Copyright (c) 2024, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + */ + +package org.openjdk.bench.java.io; + +import org.openjdk.jmh.annotations.Benchmark; +import org.openjdk.jmh.annotations.BenchmarkMode; +import org.openjdk.jmh.annotations.Fork; +import org.openjdk.jmh.annotations.Level; +import org.openjdk.jmh.annotations.Measurement; +import org.openjdk.jmh.annotations.Mode; +import org.openjdk.jmh.annotations.OutputTimeUnit; +import org.openjdk.jmh.annotations.Scope; +import org.openjdk.jmh.annotations.Setup; +import org.openjdk.jmh.annotations.State; +import org.openjdk.jmh.annotations.Warmup; +import org.openjdk.jmh.infra.Blackhole; + +import java.io.ByteArrayInputStream; +import java.io.ByteArrayOutputStream; +import java.io.DataInputStream; +import java.io.DataOutputStream; +import java.io.IOException; +import java.io.ObjectInputStream; +import java.io.ObjectOutputStream; +import java.util.concurrent.ThreadLocalRandom; +import java.util.concurrent.TimeUnit; + +@BenchmarkMode(Mode.AverageTime) +@OutputTimeUnit(TimeUnit.MICROSECONDS) +@Fork(value = 3, warmups = 0) +@Measurement(iterations = 5, time = 1) +@Warmup(iterations = 2, time = 2) +@State(Scope.Thread) +public class ObjectInputStreamTest { + private ByteArrayInputStream utfDataAsciiMixed; + private ByteArrayInputStream utfDataMixed; + + private ByteArrayInputStream utfDataAsciiSmall; + private ByteArrayInputStream utfDataSmall; + + private ByteArrayInputStream utfDataAsciiLarge; + private ByteArrayInputStream utfDataLarge; + + // Overhead of creating an ObjectInputStream is significant, need to increase the number of data elements + // to balance work + private static final int REPEATS = 20; + + + @Setup(Level.Iteration) + public void setup() throws IOException, ClassNotFoundException, NoSuchMethodException, IllegalAccessException { + ByteArrayOutputStream baos = new ByteArrayOutputStream(); + ObjectOutputStream dataOut = new ObjectOutputStream(baos); + for (int i = 0; i < REPEATS; i++) { + dataOut.writeUTF("small"); + dataOut.writeUTF("slightly longer string that is more likely to trigger use of simd intrinsics"); + } + dataOut.flush(); + utfDataAsciiMixed = new ByteArrayInputStream(baos.toByteArray()); + + baos = new ByteArrayOutputStream(); + dataOut = new ObjectOutputStream(baos); + for (int i = 0; i < REPEATS; i++) { + dataOut.writeUTF("slightly longer string that is more likely to trigger use of simd intrinsics"); + dataOut.writeUTF("slightly longer string that is more likely to trigger use of simd intrinsics"); + } + dataOut.flush(); + utfDataAsciiLarge = new ByteArrayInputStream(baos.toByteArray()); + + baos = new ByteArrayOutputStream(); + dataOut = new ObjectOutputStream(baos); + for (int i = 0; i < REPEATS; i++) { + dataOut.writeUTF("smol"); + dataOut.writeUTF("smally"); + } + dataOut.flush(); + utfDataAsciiSmall = new ByteArrayInputStream(baos.toByteArray()); + + baos = new ByteArrayOutputStream(); + dataOut = new ObjectOutputStream(baos); + for (int i = 0; i < REPEATS; i++) { + dataOut.writeUTF("sm\u00FFll"); + dataOut.writeUTF("slightly longer string th\u01F3t is more likely to trigger use of simd intrinsics"); + } + dataOut.flush(); + utfDataMixed = new ByteArrayInputStream(baos.toByteArray()); + + baos = new ByteArrayOutputStream(); + dataOut = new ObjectOutputStream(baos); + for (int i = 0; i < REPEATS; i++) { + dataOut.writeUTF("sm\u00F3l"); + dataOut.writeUTF("small\u0132"); + } + dataOut.flush(); + utfDataSmall = new ByteArrayInputStream(baos.toByteArray()); + + baos = new ByteArrayOutputStream(); + dataOut = new ObjectOutputStream(baos); + for (int i = 0; i < REPEATS; i++) { + dataOut.writeUTF("slightly longer string that is more likely to trigg\u0131r use of simd intrinsics"); + dataOut.writeUTF("slightly longer string th\u0131t is more likely to trigger use of simd intrinsics"); + } + dataOut.flush(); + utfDataLarge = new ByteArrayInputStream(baos.toByteArray()); + } + + @Benchmark + public void readUTFAsciiMixed(Blackhole bh) throws Exception { + utfDataAsciiMixed.reset(); + ObjectInputStream ois = new ObjectInputStream(utfDataAsciiMixed); + for (int i = 0; i < REPEATS; i++) { + bh.consume(ois.readUTF()); + bh.consume(ois.readUTF()); + } + } + + @Benchmark + public void readUTFAsciiSmall(Blackhole bh) throws Exception { + utfDataAsciiSmall.reset(); + ObjectInputStream ois = new ObjectInputStream(utfDataAsciiSmall); + for (int i = 0; i < REPEATS; i++) { + bh.consume(ois.readUTF()); + bh.consume(ois.readUTF()); + } + } + + @Benchmark + public void readUTFAsciiLarge(Blackhole bh) throws Exception { + utfDataAsciiLarge.reset(); + ObjectInputStream ois = new ObjectInputStream(utfDataAsciiLarge); + for (int i = 0; i < REPEATS; i++) { + bh.consume(ois.readUTF()); + bh.consume(ois.readUTF()); + } + } + + @Benchmark + public void readUTFMixed(Blackhole bh) throws Exception { + utfDataMixed.reset(); + ObjectInputStream ois = new ObjectInputStream(utfDataMixed); + for (int i = 0; i < REPEATS; i++) { + bh.consume(ois.readUTF()); + bh.consume(ois.readUTF()); + } + } + + @Benchmark + public void readUTFSmall(Blackhole bh) throws Exception { + utfDataSmall.reset(); + ObjectInputStream ois = new ObjectInputStream(utfDataSmall); + for (int i = 0; i < REPEATS; i++) { + bh.consume(ois.readUTF()); + bh.consume(ois.readUTF()); + } + } + + @Benchmark + public void readUTFLarge(Blackhole bh) throws Exception { + utfDataLarge.reset(); + ObjectInputStream ois = new ObjectInputStream(utfDataLarge); + for (int i = 0; i < REPEATS; i++) { + bh.consume(ois.readUTF()); + bh.consume(ois.readUTF()); + } + } +}