8339290: Optimize ClassFile Utf8EntryImpl#writeTo

Reviewed-by: redestad, liach
This commit is contained in:
Shaojin Wen 2024-09-05 11:45:49 +00:00 committed by Claes Redestad
parent 340e131d61
commit cb9f5c5791
7 changed files with 268 additions and 49 deletions

View File

@ -1,5 +1,6 @@
/*
* Copyright (c) 2000, 2022, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2000, 2024, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2024, Alibaba Group Holding Limited. All Rights Reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@ -34,6 +35,45 @@ class StringCoding {
private StringCoding() { }
/**
* Count the number of leading non-zero ascii chars in the range.
*/
public static int countNonZeroAscii(String s) {
byte[] value = s.value();
if (s.isLatin1()) {
return countNonZeroAsciiLatin1(value, 0, value.length);
} else {
return countNonZeroAsciiUTF16(value, 0, s.length());
}
}
/**
* Count the number of non-zero ascii chars in the range.
*/
public static int countNonZeroAsciiLatin1(byte[] ba, int off, int len) {
int limit = off + len;
for (int i = off; i < limit; i++) {
if (ba[i] <= 0) {
return i - off;
}
}
return len;
}
/**
* Count the number of leading non-zero ascii chars in the range.
*/
public static int countNonZeroAsciiUTF16(byte[] ba, int off, int strlen) {
int limit = off + strlen;
for (int i = off; i < limit; i++) {
char c = StringUTF16.charAt(ba, i);
if (c == 0 || c > 0x7F) {
return i - off;
}
}
return strlen;
}
public static boolean hasNegatives(byte[] ba, int off, int len) {
return countPositives(ba, off, len) != len;
}

View File

@ -2569,6 +2569,9 @@ public final class System {
public int countPositives(byte[] bytes, int offset, int length) {
return StringCoding.countPositives(bytes, offset, length);
}
public int countNonZeroAscii(String s) {
return StringCoding.countNonZeroAscii(s);
}
public String newStringNoRepl(byte[] bytes, Charset cs) throws CharacterCodingException {
return String.newStringNoRepl(bytes, cs);
}

View File

@ -318,6 +318,11 @@ public interface JavaLangAccess {
*/
int countPositives(byte[] ba, int off, int len);
/**
* Count the number of leading non-zero ascii chars in the String.
*/
int countNonZeroAscii(String s);
/**
* Constructs a new {@code String} by decoding the specified subarray of
* bytes using the specified {@linkplain java.nio.charset.Charset charset}.

View File

@ -1,5 +1,6 @@
/*
* Copyright (c) 2022, 2024, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2024, Alibaba Group Holding Limited. All Rights Reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@ -409,60 +410,14 @@ public abstract sealed class AbstractPoolEntry {
@Override
void writeTo(BufWriterImpl pool) {
pool.writeU1(tag);
if (rawBytes != null) {
pool.writeU1(tag);
pool.writeU2(rawLen);
pool.writeBytes(rawBytes, offset, rawLen);
}
else {
// state == STRING and no raw bytes
if (stringValue.length() > 65535) {
throw new IllegalArgumentException("string too long");
}
pool.writeU1(tag);
pool.writeU2(charLen);
for (int i = 0; i < charLen; ++i) {
char c = stringValue.charAt(i);
if (c >= '\001' && c <= '\177') {
// Optimistic writing -- hope everything is bytes
// If not, we bail out, and alternate path patches the length
pool.writeU1((byte) c);
}
else {
int charLength = stringValue.length();
int byteLength = i;
char c1;
for (int j = i; j < charLength; ++j) {
c1 = (stringValue).charAt(j);
if (c1 >= '\001' && c1 <= '\177') {
byteLength++;
} else if (c1 > '\u07FF') {
byteLength += 3;
} else {
byteLength += 2;
}
}
if (byteLength > 65535) {
throw new IllegalArgumentException();
}
int byteLengthFinal = byteLength;
pool.patchInt(pool.size() - i - 2, 2, byteLengthFinal);
for (int j = i; j < charLength; ++j) {
c1 = (stringValue).charAt(j);
if (c1 >= '\001' && c1 <= '\177') {
pool.writeU1((byte) c1);
} else if (c1 > '\u07FF') {
pool.writeU1((byte) (0xE0 | c1 >> 12 & 0xF));
pool.writeU1((byte) (0x80 | c1 >> 6 & 0x3F));
pool.writeU1((byte) (0x80 | c1 & 0x3F));
} else {
pool.writeU1((byte) (0xC0 | c1 >> 6 & 0x1F));
pool.writeU1((byte) (0x80 | c1 & 0x3F));
}
}
break;
}
}
pool.writeUTF(stringValue);
}
}
}

View File

@ -1,5 +1,6 @@
/*
* Copyright (c) 2022, 2024, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2024, Alibaba Group Holding Limited. All Rights Reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@ -34,7 +35,11 @@ import java.lang.classfile.constantpool.ConstantPool;
import java.lang.classfile.constantpool.ConstantPoolBuilder;
import java.lang.classfile.constantpool.PoolEntry;
import jdk.internal.access.JavaLangAccess;
import jdk.internal.access.SharedSecrets;
public final class BufWriterImpl implements BufWriter {
private static final JavaLangAccess JLA = SharedSecrets.getJavaLangAccess();
private final ConstantPoolBuilder constantPool;
private final ClassFileImpl context;
@ -152,6 +157,52 @@ public final class BufWriterImpl implements BufWriter {
writeBytes(other.elems, 0, other.offset);
}
@SuppressWarnings("deprecation")
void writeUTF(String str) {
int strlen = str.length();
int countNonZeroAscii = JLA.countNonZeroAscii(str);
int utflen = strlen;
if (countNonZeroAscii != strlen) {
for (int i = countNonZeroAscii; i < strlen; i++) {
int c = str.charAt(i);
if (c >= 0x80 || c == 0)
utflen += (c >= 0x800) ? 2 : 1;
}
}
if (utflen > 65535) {
throw new IllegalArgumentException("string too long");
}
reserveSpace(utflen + 2);
int offset = this.offset;
byte[] elems = this.elems;
elems[offset ] = (byte) (utflen >> 8);
elems[offset + 1] = (byte) utflen;
offset += 2;
str.getBytes(0, countNonZeroAscii, elems, offset);
offset += countNonZeroAscii;
for (int i = countNonZeroAscii; i < strlen; ++i) {
char c = str.charAt(i);
if (c >= '\001' && c <= '\177') {
elems[offset++] = (byte) c;
} else if (c > '\u07FF') {
elems[offset ] = (byte) (0xE0 | c >> 12 & 0xF);
elems[offset + 1] = (byte) (0x80 | c >> 6 & 0x3F);
elems[offset + 2] = (byte) (0x80 | c & 0x3F);
offset += 3;
} else {
elems[offset ] = (byte) (0xC0 | c >> 6 & 0x1F);
elems[offset + 1] = (byte) (0x80 | c & 0x3F);
offset += 2;
}
}
this.offset = offset;
}
@Override
public void writeBytes(byte[] arr, int start, int length) {
reserveSpace(length);

View File

@ -0,0 +1,63 @@
/*
* Copyright (c) 2024, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2024, Alibaba Group Holding Limited. All Rights Reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License version 2 only, as
* published by the Free Software Foundation.
*
* This code is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
* version 2 for more details (a copy is included in the LICENSE file that
* accompanied this code).
*
* You should have received a copy of the GNU General Public License version
* 2 along with this work; if not, write to the Free Software Foundation,
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
*
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
* or visit www.oracle.com if you need additional information or have any
* questions.
*/
import jdk.internal.access.JavaLangAccess;
import jdk.internal.access.SharedSecrets;
import java.nio.charset.StandardCharsets;
import java.util.Arrays;
/*
* @test
* @modules java.base/jdk.internal.access
* @summary test latin1 String countNonZeroAscii
* @run main/othervm -XX:+CompactStrings CountNonZeroAscii
* @run main/othervm -XX:-CompactStrings CountNonZeroAscii
*/
public class CountNonZeroAscii {
private static final JavaLangAccess JLA = SharedSecrets.getJavaLangAccess();
public static void main(String [] args) {
byte[] bytes = new byte[1000];
Arrays.fill(bytes, (byte) 'A');
String s = new String(bytes, StandardCharsets.ISO_8859_1);
assertEquals(bytes.length, JLA.countNonZeroAscii(s));
for (int i = 0; i < bytes.length; i++) {
for (int j = Byte.MIN_VALUE; j <= 0; j++) {
bytes[i] = (byte) j;
s = new String(bytes, StandardCharsets.ISO_8859_1);
assertEquals(i, JLA.countNonZeroAscii(s));
}
bytes[i] = (byte) 'A';
}
}
static void assertEquals(int expected, int actual) {
if (expected != actual) {
throw new AssertionError("Expected " + expected + " but got " + actual);
}
}
}

View File

@ -0,0 +1,102 @@
/*
* Copyright (c) 2024, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2024, Alibaba Group Holding Limited. All Rights Reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License version 2 only, as
* published by the Free Software Foundation.
*
* This code is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
* version 2 for more details (a copy is included in the LICENSE file that
* accompanied this code).
*
* You should have received a copy of the GNU General Public License version
* 2 along with this work; if not, write to the Free Software Foundation,
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
*
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
* or visit www.oracle.com if you need additional information or have any
* questions.
*/
package org.openjdk.bench.java.lang.classfile;
import org.openjdk.jmh.annotations.Benchmark;
import org.openjdk.jmh.annotations.BenchmarkMode;
import org.openjdk.jmh.annotations.Fork;
import org.openjdk.jmh.annotations.Measurement;
import org.openjdk.jmh.annotations.Mode;
import org.openjdk.jmh.annotations.OutputTimeUnit;
import org.openjdk.jmh.annotations.Param;
import org.openjdk.jmh.annotations.Scope;
import org.openjdk.jmh.annotations.Setup;
import org.openjdk.jmh.annotations.State;
import org.openjdk.jmh.annotations.Warmup;
import org.openjdk.jmh.infra.Blackhole;
import java.lang.classfile.constantpool.ConstantPoolBuilder;
import java.lang.classfile.constantpool.ClassEntry;
import java.lang.classfile.*;
import java.lang.constant.*;
import java.nio.charset.StandardCharsets;
import java.util.HexFormat;
import java.util.concurrent.TimeUnit;
import java.util.function.Consumer;
import static java.lang.classfile.ClassFile.*;
import static java.lang.constant.ConstantDescs.*;
import jdk.internal.classfile.impl.*;
/**
* Test various operations on
*/
@BenchmarkMode(Mode.AverageTime)
@OutputTimeUnit(TimeUnit.NANOSECONDS)
@Warmup(iterations = 1, time = 2)
@Measurement(iterations = 3, time = 1)
@Fork(jvmArgsAppend = "--enable-preview", value = 3)
@State(Scope.Thread)
public class Utf8EntryWriteTo {
static final ClassDesc STRING_BUILDER = ClassDesc.ofDescriptor("Ljava/lang/StringBuilder;");
static final MethodTypeDesc MTD_append = MethodTypeDesc.of(STRING_BUILDER, CD_String);
static final MethodTypeDesc MTD_String = MethodTypeDesc.of(CD_String);
static final ClassDesc CLASS_DESC = ClassDesc.ofDescriptor("Lorg/openjdk/bench/java/lang/classfile/String$$StringConcat;");
@Param({"ascii", "utf8_2_bytes", "utf8_3_bytes", "emoji"})
public String charType;
ConstantPoolBuilder poolBuilder;
ClassEntry thisClass;
@Setup
public void setup() throws Exception {
byte[] bytes = HexFormat.of().parseHex(
switch (charType) {
case "ascii" -> "78";
case "utf8_2_bytes" -> "c2a9";
case "utf8_3_bytes" -> "e6b8a9";
case "emoji" -> "e29da3efb88f";
default -> throw new IllegalArgumentException("bad charType: " + charType);
}
);
String s = new String(bytes, 0, bytes.length, StandardCharsets.UTF_8);
String[] constants = new String[128];
for (int i = 0; i < constants.length; i++) {
constants[i] = "A".repeat(i).concat(s);
}
poolBuilder = ConstantPoolBuilder.of();
thisClass = poolBuilder.classEntry(CLASS_DESC);
for (var c : constants) {
poolBuilder.utf8Entry(c);
}
}
@Benchmark
public void writeTo(Blackhole bh) {
bh.consume(ClassFile
.of()
.build(thisClass, poolBuilder, (ClassBuilder clb) -> {}));
}
}