diff --git a/src/java.base/share/classes/java/io/DataOutputStream.java b/src/java.base/share/classes/java/io/DataOutputStream.java index d16ae73f913..4b22d65bd39 100644 --- a/src/java.base/share/classes/java/io/DataOutputStream.java +++ b/src/java.base/share/classes/java/io/DataOutputStream.java @@ -1,5 +1,6 @@ /* - * Copyright (c) 1994, 2023, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 1994, 2024, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2024, Alibaba Group Holding Limited. All Rights Reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -25,8 +26,13 @@ package java.io; +import jdk.internal.access.JavaLangAccess; +import jdk.internal.access.SharedSecrets; import jdk.internal.util.ByteArray; +import static jdk.internal.util.ModifiedUtf.putChar; +import static jdk.internal.util.ModifiedUtf.utfLen; + /** * A data output stream lets an application write primitive Java data * types to an output stream in a portable way. An application can @@ -44,6 +50,8 @@ import jdk.internal.util.ByteArray; * @since 1.0 */ public class DataOutputStream extends FilterOutputStream implements DataOutput { + private static final JavaLangAccess JLA = SharedSecrets.getJavaLangAccess(); + /** * The number of bytes written to the data output stream so far. * If this counter overflows, it will be wrapped to Integer.MAX_VALUE. @@ -352,15 +360,11 @@ public class DataOutputStream extends FilterOutputStream implements DataOutput { * {@code str} would exceed 65535 bytes in length * @throws IOException if some other I/O error occurs. */ + @SuppressWarnings("deprecation") static int writeUTF(String str, DataOutput out) throws IOException { final int strlen = str.length(); - int utflen = strlen; // optimized for ASCII - - for (int i = 0; i < strlen; i++) { - int c = str.charAt(i); - if (c >= 0x80 || c == 0) - utflen += (c >= 0x800) ? 2 : 1; - } + int countNonZeroAscii = JLA.countNonZeroAscii(str); + int utflen = utfLen(str, countNonZeroAscii); if (utflen > 65535 || /* overflow */ utflen < strlen) throw new UTFDataFormatException(tooLongMsg(str, utflen)); @@ -377,25 +381,11 @@ public class DataOutputStream extends FilterOutputStream implements DataOutput { int count = 0; ByteArray.setUnsignedShort(bytearr, count, utflen); count += 2; - int i = 0; - for (i = 0; i < strlen; i++) { // optimized for initial run of ASCII - int c = str.charAt(i); - if (c >= 0x80 || c == 0) break; - bytearr[count++] = (byte) c; - } + str.getBytes(0, countNonZeroAscii, bytearr, count); + count += countNonZeroAscii; - for (; i < strlen; i++) { - int c = str.charAt(i); - if (c < 0x80 && c != 0) { - bytearr[count++] = (byte) c; - } else if (c >= 0x800) { - bytearr[count++] = (byte) (0xE0 | ((c >> 12) & 0x0F)); - bytearr[count++] = (byte) (0x80 | ((c >> 6) & 0x3F)); - bytearr[count++] = (byte) (0x80 | ((c >> 0) & 0x3F)); - } else { - bytearr[count++] = (byte) (0xC0 | ((c >> 6) & 0x1F)); - bytearr[count++] = (byte) (0x80 | ((c >> 0) & 0x3F)); - } + for (int i = countNonZeroAscii; i < strlen;) { + count = putChar(bytearr, count, str.charAt(i++)); } out.write(bytearr, 0, utflen + 2); return utflen + 2; diff --git a/src/java.base/share/classes/java/io/ObjectOutputStream.java b/src/java.base/share/classes/java/io/ObjectOutputStream.java index 3650b101353..bde069a1774 100644 --- a/src/java.base/share/classes/java/io/ObjectOutputStream.java +++ b/src/java.base/share/classes/java/io/ObjectOutputStream.java @@ -1,5 +1,6 @@ /* * Copyright (c) 1996, 2024, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2024, Alibaba Group Holding Limited. All Rights Reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -34,8 +35,13 @@ import java.util.Objects; import java.util.StringJoiner; import jdk.internal.util.ByteArray; +import jdk.internal.access.JavaLangAccess; +import jdk.internal.access.SharedSecrets; import sun.reflect.misc.ReflectUtil; +import static jdk.internal.util.ModifiedUtf.putChar; +import static jdk.internal.util.ModifiedUtf.utfLen; + /** * An ObjectOutputStream writes primitive data types and graphs of Java objects * to an OutputStream. The objects can be read (reconstituted) using an @@ -169,6 +175,7 @@ import sun.reflect.misc.ReflectUtil; public class ObjectOutputStream extends OutputStream implements ObjectOutput, ObjectStreamConstants { + private static final JavaLangAccess JLA = SharedSecrets.getJavaLangAccess(); private static class Caches { /** cache of subclass security audit results */ @@ -885,7 +892,7 @@ public class ObjectOutputStream * stream */ public void writeUTF(String str) throws IOException { - bout.writeUTF(str); + bout.writeUTFInternal(str, false); } /** @@ -1317,14 +1324,7 @@ public class ObjectOutputStream */ private void writeString(String str, boolean unshared) throws IOException { handles.assign(unshared ? null : str); - long utflen = bout.getUTFLength(str); - if (utflen <= 0xFFFF) { - bout.writeByte(TC_STRING); - bout.writeUTF(str, utflen); - } else { - bout.writeByte(TC_LONGSTRING); - bout.writeLongUTF(str, utflen); - } + bout.writeUTFInternal(str, true); } /** @@ -1994,26 +1994,27 @@ public class ObjectOutputStream } } - public void writeBytes(String s) throws IOException { - int endoff = s.length(); - int cpos = 0; - int csize = 0; - for (int off = 0; off < endoff; ) { - if (cpos >= csize) { - cpos = 0; - csize = Math.min(endoff - off, CHAR_BUF_SIZE); - s.getChars(off, off + csize, cbuf, 0); - } - if (pos >= MAX_BLOCK_SIZE) { + @SuppressWarnings("deprecation") + void writeBytes(String s, int len) throws IOException { + int pos = this.pos; + for (int strpos = 0; strpos < len;) { + int rem = MAX_BLOCK_SIZE - pos; + int csize = Math.min(len - strpos, rem); + s.getBytes(strpos, strpos + csize, buf, pos); + pos += csize; + strpos += csize; + + if (pos == MAX_BLOCK_SIZE) { + this.pos = pos; drain(); + pos = 0; } - int n = Math.min(csize - cpos, MAX_BLOCK_SIZE - pos); - int stop = pos + n; - while (pos < stop) { - buf[pos++] = (byte) cbuf[cpos++]; - } - off += n; } + this.pos = pos; + } + + public void writeBytes(String s) throws IOException { + writeBytes(s, s.length()); } public void writeChars(String s) throws IOException { @@ -2026,8 +2027,47 @@ public class ObjectOutputStream } } - public void writeUTF(String s) throws IOException { - writeUTF(s, getUTFLength(s)); + public void writeUTF(String str) throws IOException { + writeUTFInternal(str, false); + } + + private void writeUTFInternal(String str, boolean writeHeader) throws IOException { + int strlen = str.length(); + int countNonZeroAscii = JLA.countNonZeroAscii(str); + int utflen = utfLen(str, countNonZeroAscii); + if (utflen <= 0xFFFF) { + if(writeHeader) { + writeByte(TC_STRING); + } + writeShort(utflen); + } else { + if(writeHeader) { + writeByte(TC_LONGSTRING); + } + writeLong(utflen); + } + + if (countNonZeroAscii != 0) { + writeBytes(str, countNonZeroAscii); + } + if (countNonZeroAscii != strlen) { + writeMoreUTF(str, countNonZeroAscii); + } + } + + private void writeMoreUTF(String str, int stroff) throws IOException { + int pos = this.pos; + for (int strlen = str.length(); stroff < strlen;) { + char c = str.charAt(stroff++); + int csize = c != 0 && c < 0x80 ? 1 : c >= 0x800 ? 3 : 2; + if (pos + csize >= MAX_BLOCK_SIZE) { + this.pos = pos; + drain(); + pos = 0; + } + pos = putChar(buf, pos, c); + } + this.pos = pos; } @@ -2153,112 +2193,6 @@ public class ObjectOutputStream } } } - - /** - * Returns the length in bytes of the UTF encoding of the given string. - */ - long getUTFLength(String s) { - int len = s.length(); - long utflen = 0; - for (int off = 0; off < len; ) { - int csize = Math.min(len - off, CHAR_BUF_SIZE); - s.getChars(off, off + csize, cbuf, 0); - for (int cpos = 0; cpos < csize; cpos++) { - char c = cbuf[cpos]; - if (c >= 0x0001 && c <= 0x007F) { - utflen++; - } else if (c > 0x07FF) { - utflen += 3; - } else { - utflen += 2; - } - } - off += csize; - } - return utflen; - } - - /** - * Writes the given string in UTF format. This method is used in - * situations where the UTF encoding length of the string is already - * known; specifying it explicitly avoids a prescan of the string to - * determine its UTF length. - */ - void writeUTF(String s, long utflen) throws IOException { - if (utflen > 0xFFFFL) { - throw new UTFDataFormatException(); - } - writeShort((int) utflen); - if (utflen == (long) s.length()) { - writeBytes(s); - } else { - writeUTFBody(s); - } - } - - /** - * Writes given string in "long" UTF format. "Long" UTF format is - * identical to standard UTF, except that it uses an 8 byte header - * (instead of the standard 2 bytes) to convey the UTF encoding length. - */ - void writeLongUTF(String s) throws IOException { - writeLongUTF(s, getUTFLength(s)); - } - - /** - * Writes given string in "long" UTF format, where the UTF encoding - * length of the string is already known. - */ - void writeLongUTF(String s, long utflen) throws IOException { - writeLong(utflen); - if (utflen == (long) s.length()) { - writeBytes(s); - } else { - writeUTFBody(s); - } - } - - /** - * Writes the "body" (i.e., the UTF representation minus the 2-byte or - * 8-byte length header) of the UTF encoding for the given string. - */ - private void writeUTFBody(String s) throws IOException { - int limit = MAX_BLOCK_SIZE - 3; - int len = s.length(); - for (int off = 0; off < len; ) { - int csize = Math.min(len - off, CHAR_BUF_SIZE); - s.getChars(off, off + csize, cbuf, 0); - for (int cpos = 0; cpos < csize; cpos++) { - char c = cbuf[cpos]; - if (pos <= limit) { - if (c <= 0x007F && c != 0) { - buf[pos++] = (byte) c; - } else if (c > 0x07FF) { - buf[pos + 2] = (byte) (0x80 | ((c >> 0) & 0x3F)); - buf[pos + 1] = (byte) (0x80 | ((c >> 6) & 0x3F)); - buf[pos + 0] = (byte) (0xE0 | ((c >> 12) & 0x0F)); - pos += 3; - } else { - buf[pos + 1] = (byte) (0x80 | ((c >> 0) & 0x3F)); - buf[pos + 0] = (byte) (0xC0 | ((c >> 6) & 0x1F)); - pos += 2; - } - } else { // write one byte at a time to normalize block - if (c <= 0x007F && c != 0) { - write(c); - } else if (c > 0x07FF) { - write(0xE0 | ((c >> 12) & 0x0F)); - write(0x80 | ((c >> 6) & 0x3F)); - write(0x80 | ((c >> 0) & 0x3F)); - } else { - write(0xC0 | ((c >> 6) & 0x1F)); - write(0x80 | ((c >> 0) & 0x3F)); - } - } - } - off += csize; - } - } } /** diff --git a/src/java.base/share/classes/jdk/internal/classfile/impl/BufWriterImpl.java b/src/java.base/share/classes/jdk/internal/classfile/impl/BufWriterImpl.java index 4cc6c205fe4..1a7c9a36c44 100644 --- a/src/java.base/share/classes/jdk/internal/classfile/impl/BufWriterImpl.java +++ b/src/java.base/share/classes/jdk/internal/classfile/impl/BufWriterImpl.java @@ -38,6 +38,9 @@ import jdk.internal.access.JavaLangAccess; import jdk.internal.access.SharedSecrets; import jdk.internal.vm.annotation.ForceInline; +import static jdk.internal.util.ModifiedUtf.putChar; +import static jdk.internal.util.ModifiedUtf.utfLen; + public final class BufWriterImpl implements BufWriter { private static final JavaLangAccess JLA = SharedSecrets.getJavaLangAccess(); @@ -162,14 +165,7 @@ public final class BufWriterImpl implements BufWriter { void writeUTF(String str) { int strlen = str.length(); int countNonZeroAscii = JLA.countNonZeroAscii(str); - int utflen = strlen; - if (countNonZeroAscii != strlen) { - for (int i = countNonZeroAscii; i < strlen; i++) { - int c = str.charAt(i); - if (c >= 0x80 || c == 0) - utflen += (c >= 0x800) ? 2 : 1; - } - } + int utflen = utfLen(str, countNonZeroAscii); if (utflen > 65535) { throw new IllegalArgumentException("string too long"); } @@ -185,20 +181,8 @@ public final class BufWriterImpl implements BufWriter { str.getBytes(0, countNonZeroAscii, elems, offset); offset += countNonZeroAscii; - for (int i = countNonZeroAscii; i < strlen; ++i) { - char c = str.charAt(i); - if (c >= '\001' && c <= '\177') { - elems[offset++] = (byte) c; - } else if (c > '\u07FF') { - elems[offset ] = (byte) (0xE0 | c >> 12 & 0xF); - elems[offset + 1] = (byte) (0x80 | c >> 6 & 0x3F); - elems[offset + 2] = (byte) (0x80 | c & 0x3F); - offset += 3; - } else { - elems[offset ] = (byte) (0xC0 | c >> 6 & 0x1F); - elems[offset + 1] = (byte) (0x80 | c & 0x3F); - offset += 2; - } + for (int i = countNonZeroAscii; i < strlen; i++) { + offset = putChar(elems, offset, str.charAt(i)); } this.offset = offset; diff --git a/src/java.base/share/classes/jdk/internal/util/ModifiedUtf.java b/src/java.base/share/classes/jdk/internal/util/ModifiedUtf.java new file mode 100644 index 00000000000..a8d2fe8bb74 --- /dev/null +++ b/src/java.base/share/classes/jdk/internal/util/ModifiedUtf.java @@ -0,0 +1,71 @@ +/* + * Copyright (c) 2024, Alibaba Group Holding Limited. All Rights Reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. Oracle designates this + * particular file as subject to the "Classpath" exception as provided + * by Oracle in the LICENSE file that accompanied this code. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + */ + +package jdk.internal.util; + +import jdk.internal.vm.annotation.ForceInline; + +/** + * Helper to JDK UTF putChar and Calculate length + * + * @since 24 + */ +public abstract class ModifiedUtf { + private ModifiedUtf() { + } + + @ForceInline + public static int putChar(byte[] buf, int offset, char c) { + if (c != 0 && c < 0x80) { + buf[offset++] = (byte) c; + } else if (c >= 0x800) { + buf[offset ] = (byte) (0xE0 | c >> 12 & 0x0F); + buf[offset + 1] = (byte) (0x80 | c >> 6 & 0x3F); + buf[offset + 2] = (byte) (0x80 | c & 0x3F); + offset += 3; + } else { + buf[offset ] = (byte) (0xC0 | c >> 6 & 0x1F); + buf[offset + 1] = (byte) (0x80 | c & 0x3F); + offset += 2; + } + return offset; + } + + /** + * Calculate the utf length of a string + * @param str input string + * @param countNonZeroAscii the number of non-zero ascii characters in the prefix calculated by JLA.countNonZeroAscii(str) + */ + @ForceInline + public static int utfLen(String str, int countNonZeroAscii) { + int utflen = str.length(); + for (int i = utflen - 1; i >= countNonZeroAscii; i--) { + int c = str.charAt(i); + if (c >= 0x80 || c == 0) + utflen += (c >= 0x800) ? 2 : 1; + } + return utflen; + } +} diff --git a/test/micro/org/openjdk/bench/java/io/DataOutputStreamBench.java b/test/micro/org/openjdk/bench/java/io/DataOutputStreamBench.java new file mode 100644 index 00000000000..34568110dde --- /dev/null +++ b/test/micro/org/openjdk/bench/java/io/DataOutputStreamBench.java @@ -0,0 +1,91 @@ +/* + * Copyright (c) 2024, Alibaba Group Holding Limited. All Rights Reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + */ + +package org.openjdk.bench.java.io; + +import org.openjdk.jmh.annotations.*; +import org.openjdk.jmh.infra.Blackhole; + +import java.io.*; +import java.nio.charset.StandardCharsets; +import java.util.HexFormat; +import java.util.concurrent.TimeUnit; + +@BenchmarkMode(Mode.AverageTime) +@OutputTimeUnit(TimeUnit.MICROSECONDS) +@Fork(2) +@Measurement(iterations = 6, time = 1) +@Warmup(iterations = 4, time = 2) +@State(Scope.Thread) +public class DataOutputStreamBench { + + @Param({"ascii", "utf8_2_bytes", "utf8_3_bytes", "emoji"}) + public String charType; + + ByteArrayOutputStream bytesOutput; + DataOutputStream dataOutput; + ObjectOutputStream objectOutput; + String[] strings; + + @Setup(Level.Trial) + public void setup() throws Exception { + byte[] bytes = HexFormat.of().parseHex( + switch (charType) { + case "ascii" -> "78"; + case "utf8_2_bytes" -> "c2a9"; + case "utf8_3_bytes" -> "e6b8a9"; + case "emoji" -> "e29da3efb88f"; + default -> throw new IllegalArgumentException("bad charType: " + charType); + } + ); + String s = new String(bytes, 0, bytes.length, StandardCharsets.UTF_8); + strings = new String[128]; + for (int i = 0; i < strings.length; i++) { + strings[i] = "A".repeat(i).concat(s.repeat(i)); + } + + bytesOutput = new ByteArrayOutputStream(1024 * 64); + dataOutput = new DataOutputStream(bytesOutput); + objectOutput = new ObjectOutputStream(bytesOutput); + } + + @Benchmark + public void dataOutwriteUTF(Blackhole bh) throws Exception { + bytesOutput.reset(); + for (var s : strings) { + dataOutput.writeUTF(s); + } + dataOutput.flush(); + bh.consume(bytesOutput.size()); + } + + @Benchmark + public void objectWriteUTF(Blackhole bh) throws Exception { + bytesOutput.reset(); + for (var s : strings) { + objectOutput.writeUTF(s); + } + objectOutput.flush(); + bh.consume(bytesOutput.size()); + } +}