8339290: Optimize ClassFile Utf8EntryImpl#writeTo
Reviewed-by: redestad, liach
This commit is contained in:
parent
340e131d61
commit
cb9f5c5791
@ -1,5 +1,6 @@
|
||||
/*
|
||||
* Copyright (c) 2000, 2022, Oracle and/or its affiliates. All rights reserved.
|
||||
* Copyright (c) 2000, 2024, Oracle and/or its affiliates. All rights reserved.
|
||||
* Copyright (c) 2024, Alibaba Group Holding Limited. All Rights Reserved.
|
||||
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
||||
*
|
||||
* This code is free software; you can redistribute it and/or modify it
|
||||
@ -34,6 +35,45 @@ class StringCoding {
|
||||
|
||||
private StringCoding() { }
|
||||
|
||||
/**
|
||||
* Count the number of leading non-zero ascii chars in the range.
|
||||
*/
|
||||
public static int countNonZeroAscii(String s) {
|
||||
byte[] value = s.value();
|
||||
if (s.isLatin1()) {
|
||||
return countNonZeroAsciiLatin1(value, 0, value.length);
|
||||
} else {
|
||||
return countNonZeroAsciiUTF16(value, 0, s.length());
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Count the number of non-zero ascii chars in the range.
|
||||
*/
|
||||
public static int countNonZeroAsciiLatin1(byte[] ba, int off, int len) {
|
||||
int limit = off + len;
|
||||
for (int i = off; i < limit; i++) {
|
||||
if (ba[i] <= 0) {
|
||||
return i - off;
|
||||
}
|
||||
}
|
||||
return len;
|
||||
}
|
||||
|
||||
/**
|
||||
* Count the number of leading non-zero ascii chars in the range.
|
||||
*/
|
||||
public static int countNonZeroAsciiUTF16(byte[] ba, int off, int strlen) {
|
||||
int limit = off + strlen;
|
||||
for (int i = off; i < limit; i++) {
|
||||
char c = StringUTF16.charAt(ba, i);
|
||||
if (c == 0 || c > 0x7F) {
|
||||
return i - off;
|
||||
}
|
||||
}
|
||||
return strlen;
|
||||
}
|
||||
|
||||
public static boolean hasNegatives(byte[] ba, int off, int len) {
|
||||
return countPositives(ba, off, len) != len;
|
||||
}
|
||||
|
@ -2569,6 +2569,9 @@ public final class System {
|
||||
public int countPositives(byte[] bytes, int offset, int length) {
|
||||
return StringCoding.countPositives(bytes, offset, length);
|
||||
}
|
||||
public int countNonZeroAscii(String s) {
|
||||
return StringCoding.countNonZeroAscii(s);
|
||||
}
|
||||
public String newStringNoRepl(byte[] bytes, Charset cs) throws CharacterCodingException {
|
||||
return String.newStringNoRepl(bytes, cs);
|
||||
}
|
||||
|
@ -318,6 +318,11 @@ public interface JavaLangAccess {
|
||||
*/
|
||||
int countPositives(byte[] ba, int off, int len);
|
||||
|
||||
/**
|
||||
* Count the number of leading non-zero ascii chars in the String.
|
||||
*/
|
||||
int countNonZeroAscii(String s);
|
||||
|
||||
/**
|
||||
* Constructs a new {@code String} by decoding the specified subarray of
|
||||
* bytes using the specified {@linkplain java.nio.charset.Charset charset}.
|
||||
|
@ -1,5 +1,6 @@
|
||||
/*
|
||||
* Copyright (c) 2022, 2024, Oracle and/or its affiliates. All rights reserved.
|
||||
* Copyright (c) 2024, Alibaba Group Holding Limited. All Rights Reserved.
|
||||
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
||||
*
|
||||
* This code is free software; you can redistribute it and/or modify it
|
||||
@ -409,60 +410,14 @@ public abstract sealed class AbstractPoolEntry {
|
||||
|
||||
@Override
|
||||
void writeTo(BufWriterImpl pool) {
|
||||
pool.writeU1(tag);
|
||||
if (rawBytes != null) {
|
||||
pool.writeU1(tag);
|
||||
pool.writeU2(rawLen);
|
||||
pool.writeBytes(rawBytes, offset, rawLen);
|
||||
}
|
||||
else {
|
||||
// state == STRING and no raw bytes
|
||||
if (stringValue.length() > 65535) {
|
||||
throw new IllegalArgumentException("string too long");
|
||||
}
|
||||
pool.writeU1(tag);
|
||||
pool.writeU2(charLen);
|
||||
for (int i = 0; i < charLen; ++i) {
|
||||
char c = stringValue.charAt(i);
|
||||
if (c >= '\001' && c <= '\177') {
|
||||
// Optimistic writing -- hope everything is bytes
|
||||
// If not, we bail out, and alternate path patches the length
|
||||
pool.writeU1((byte) c);
|
||||
}
|
||||
else {
|
||||
int charLength = stringValue.length();
|
||||
int byteLength = i;
|
||||
char c1;
|
||||
for (int j = i; j < charLength; ++j) {
|
||||
c1 = (stringValue).charAt(j);
|
||||
if (c1 >= '\001' && c1 <= '\177') {
|
||||
byteLength++;
|
||||
} else if (c1 > '\u07FF') {
|
||||
byteLength += 3;
|
||||
} else {
|
||||
byteLength += 2;
|
||||
}
|
||||
}
|
||||
if (byteLength > 65535) {
|
||||
throw new IllegalArgumentException();
|
||||
}
|
||||
int byteLengthFinal = byteLength;
|
||||
pool.patchInt(pool.size() - i - 2, 2, byteLengthFinal);
|
||||
for (int j = i; j < charLength; ++j) {
|
||||
c1 = (stringValue).charAt(j);
|
||||
if (c1 >= '\001' && c1 <= '\177') {
|
||||
pool.writeU1((byte) c1);
|
||||
} else if (c1 > '\u07FF') {
|
||||
pool.writeU1((byte) (0xE0 | c1 >> 12 & 0xF));
|
||||
pool.writeU1((byte) (0x80 | c1 >> 6 & 0x3F));
|
||||
pool.writeU1((byte) (0x80 | c1 & 0x3F));
|
||||
} else {
|
||||
pool.writeU1((byte) (0xC0 | c1 >> 6 & 0x1F));
|
||||
pool.writeU1((byte) (0x80 | c1 & 0x3F));
|
||||
}
|
||||
}
|
||||
break;
|
||||
}
|
||||
}
|
||||
pool.writeUTF(stringValue);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -1,5 +1,6 @@
|
||||
/*
|
||||
* Copyright (c) 2022, 2024, Oracle and/or its affiliates. All rights reserved.
|
||||
* Copyright (c) 2024, Alibaba Group Holding Limited. All Rights Reserved.
|
||||
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
||||
*
|
||||
* This code is free software; you can redistribute it and/or modify it
|
||||
@ -34,7 +35,11 @@ import java.lang.classfile.constantpool.ConstantPool;
|
||||
import java.lang.classfile.constantpool.ConstantPoolBuilder;
|
||||
import java.lang.classfile.constantpool.PoolEntry;
|
||||
|
||||
import jdk.internal.access.JavaLangAccess;
|
||||
import jdk.internal.access.SharedSecrets;
|
||||
|
||||
public final class BufWriterImpl implements BufWriter {
|
||||
private static final JavaLangAccess JLA = SharedSecrets.getJavaLangAccess();
|
||||
|
||||
private final ConstantPoolBuilder constantPool;
|
||||
private final ClassFileImpl context;
|
||||
@ -152,6 +157,52 @@ public final class BufWriterImpl implements BufWriter {
|
||||
writeBytes(other.elems, 0, other.offset);
|
||||
}
|
||||
|
||||
@SuppressWarnings("deprecation")
|
||||
void writeUTF(String str) {
|
||||
int strlen = str.length();
|
||||
int countNonZeroAscii = JLA.countNonZeroAscii(str);
|
||||
int utflen = strlen;
|
||||
if (countNonZeroAscii != strlen) {
|
||||
for (int i = countNonZeroAscii; i < strlen; i++) {
|
||||
int c = str.charAt(i);
|
||||
if (c >= 0x80 || c == 0)
|
||||
utflen += (c >= 0x800) ? 2 : 1;
|
||||
}
|
||||
}
|
||||
if (utflen > 65535) {
|
||||
throw new IllegalArgumentException("string too long");
|
||||
}
|
||||
reserveSpace(utflen + 2);
|
||||
|
||||
int offset = this.offset;
|
||||
byte[] elems = this.elems;
|
||||
|
||||
elems[offset ] = (byte) (utflen >> 8);
|
||||
elems[offset + 1] = (byte) utflen;
|
||||
offset += 2;
|
||||
|
||||
str.getBytes(0, countNonZeroAscii, elems, offset);
|
||||
offset += countNonZeroAscii;
|
||||
|
||||
for (int i = countNonZeroAscii; i < strlen; ++i) {
|
||||
char c = str.charAt(i);
|
||||
if (c >= '\001' && c <= '\177') {
|
||||
elems[offset++] = (byte) c;
|
||||
} else if (c > '\u07FF') {
|
||||
elems[offset ] = (byte) (0xE0 | c >> 12 & 0xF);
|
||||
elems[offset + 1] = (byte) (0x80 | c >> 6 & 0x3F);
|
||||
elems[offset + 2] = (byte) (0x80 | c & 0x3F);
|
||||
offset += 3;
|
||||
} else {
|
||||
elems[offset ] = (byte) (0xC0 | c >> 6 & 0x1F);
|
||||
elems[offset + 1] = (byte) (0x80 | c & 0x3F);
|
||||
offset += 2;
|
||||
}
|
||||
}
|
||||
|
||||
this.offset = offset;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void writeBytes(byte[] arr, int start, int length) {
|
||||
reserveSpace(length);
|
||||
|
63
test/jdk/java/lang/String/CountNonZeroAscii.java
Normal file
63
test/jdk/java/lang/String/CountNonZeroAscii.java
Normal file
@ -0,0 +1,63 @@
|
||||
/*
|
||||
* Copyright (c) 2024, Oracle and/or its affiliates. All rights reserved.
|
||||
* Copyright (c) 2024, Alibaba Group Holding Limited. All Rights Reserved.
|
||||
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
||||
*
|
||||
* This code is free software; you can redistribute it and/or modify it
|
||||
* under the terms of the GNU General Public License version 2 only, as
|
||||
* published by the Free Software Foundation.
|
||||
*
|
||||
* This code is distributed in the hope that it will be useful, but WITHOUT
|
||||
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
||||
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
|
||||
* version 2 for more details (a copy is included in the LICENSE file that
|
||||
* accompanied this code).
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License version
|
||||
* 2 along with this work; if not, write to the Free Software Foundation,
|
||||
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
*
|
||||
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
|
||||
* or visit www.oracle.com if you need additional information or have any
|
||||
* questions.
|
||||
*/
|
||||
|
||||
import jdk.internal.access.JavaLangAccess;
|
||||
import jdk.internal.access.SharedSecrets;
|
||||
|
||||
import java.nio.charset.StandardCharsets;
|
||||
import java.util.Arrays;
|
||||
|
||||
/*
|
||||
* @test
|
||||
* @modules java.base/jdk.internal.access
|
||||
* @summary test latin1 String countNonZeroAscii
|
||||
* @run main/othervm -XX:+CompactStrings CountNonZeroAscii
|
||||
* @run main/othervm -XX:-CompactStrings CountNonZeroAscii
|
||||
*/
|
||||
public class CountNonZeroAscii {
|
||||
private static final JavaLangAccess JLA = SharedSecrets.getJavaLangAccess();
|
||||
|
||||
public static void main(String [] args) {
|
||||
byte[] bytes = new byte[1000];
|
||||
|
||||
Arrays.fill(bytes, (byte) 'A');
|
||||
String s = new String(bytes, StandardCharsets.ISO_8859_1);
|
||||
assertEquals(bytes.length, JLA.countNonZeroAscii(s));
|
||||
|
||||
for (int i = 0; i < bytes.length; i++) {
|
||||
for (int j = Byte.MIN_VALUE; j <= 0; j++) {
|
||||
bytes[i] = (byte) j;
|
||||
s = new String(bytes, StandardCharsets.ISO_8859_1);
|
||||
assertEquals(i, JLA.countNonZeroAscii(s));
|
||||
}
|
||||
bytes[i] = (byte) 'A';
|
||||
}
|
||||
}
|
||||
|
||||
static void assertEquals(int expected, int actual) {
|
||||
if (expected != actual) {
|
||||
throw new AssertionError("Expected " + expected + " but got " + actual);
|
||||
}
|
||||
}
|
||||
}
|
@ -0,0 +1,102 @@
|
||||
/*
|
||||
* Copyright (c) 2024, Oracle and/or its affiliates. All rights reserved.
|
||||
* Copyright (c) 2024, Alibaba Group Holding Limited. All Rights Reserved.
|
||||
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
||||
*
|
||||
* This code is free software; you can redistribute it and/or modify it
|
||||
* under the terms of the GNU General Public License version 2 only, as
|
||||
* published by the Free Software Foundation.
|
||||
*
|
||||
* This code is distributed in the hope that it will be useful, but WITHOUT
|
||||
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
||||
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
|
||||
* version 2 for more details (a copy is included in the LICENSE file that
|
||||
* accompanied this code).
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License version
|
||||
* 2 along with this work; if not, write to the Free Software Foundation,
|
||||
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
*
|
||||
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
|
||||
* or visit www.oracle.com if you need additional information or have any
|
||||
* questions.
|
||||
*/
|
||||
package org.openjdk.bench.java.lang.classfile;
|
||||
|
||||
import org.openjdk.jmh.annotations.Benchmark;
|
||||
import org.openjdk.jmh.annotations.BenchmarkMode;
|
||||
import org.openjdk.jmh.annotations.Fork;
|
||||
import org.openjdk.jmh.annotations.Measurement;
|
||||
import org.openjdk.jmh.annotations.Mode;
|
||||
import org.openjdk.jmh.annotations.OutputTimeUnit;
|
||||
import org.openjdk.jmh.annotations.Param;
|
||||
import org.openjdk.jmh.annotations.Scope;
|
||||
import org.openjdk.jmh.annotations.Setup;
|
||||
import org.openjdk.jmh.annotations.State;
|
||||
import org.openjdk.jmh.annotations.Warmup;
|
||||
import org.openjdk.jmh.infra.Blackhole;
|
||||
|
||||
import java.lang.classfile.constantpool.ConstantPoolBuilder;
|
||||
import java.lang.classfile.constantpool.ClassEntry;
|
||||
import java.lang.classfile.*;
|
||||
import java.lang.constant.*;
|
||||
import java.nio.charset.StandardCharsets;
|
||||
import java.util.HexFormat;
|
||||
import java.util.concurrent.TimeUnit;
|
||||
import java.util.function.Consumer;
|
||||
|
||||
import static java.lang.classfile.ClassFile.*;
|
||||
import static java.lang.constant.ConstantDescs.*;
|
||||
|
||||
import jdk.internal.classfile.impl.*;
|
||||
/**
|
||||
* Test various operations on
|
||||
*/
|
||||
@BenchmarkMode(Mode.AverageTime)
|
||||
@OutputTimeUnit(TimeUnit.NANOSECONDS)
|
||||
@Warmup(iterations = 1, time = 2)
|
||||
@Measurement(iterations = 3, time = 1)
|
||||
@Fork(jvmArgsAppend = "--enable-preview", value = 3)
|
||||
@State(Scope.Thread)
|
||||
public class Utf8EntryWriteTo {
|
||||
static final ClassDesc STRING_BUILDER = ClassDesc.ofDescriptor("Ljava/lang/StringBuilder;");
|
||||
static final MethodTypeDesc MTD_append = MethodTypeDesc.of(STRING_BUILDER, CD_String);
|
||||
static final MethodTypeDesc MTD_String = MethodTypeDesc.of(CD_String);
|
||||
static final ClassDesc CLASS_DESC = ClassDesc.ofDescriptor("Lorg/openjdk/bench/java/lang/classfile/String$$StringConcat;");
|
||||
|
||||
@Param({"ascii", "utf8_2_bytes", "utf8_3_bytes", "emoji"})
|
||||
public String charType;
|
||||
ConstantPoolBuilder poolBuilder;
|
||||
ClassEntry thisClass;
|
||||
|
||||
@Setup
|
||||
public void setup() throws Exception {
|
||||
byte[] bytes = HexFormat.of().parseHex(
|
||||
switch (charType) {
|
||||
case "ascii" -> "78";
|
||||
case "utf8_2_bytes" -> "c2a9";
|
||||
case "utf8_3_bytes" -> "e6b8a9";
|
||||
case "emoji" -> "e29da3efb88f";
|
||||
default -> throw new IllegalArgumentException("bad charType: " + charType);
|
||||
}
|
||||
);
|
||||
String s = new String(bytes, 0, bytes.length, StandardCharsets.UTF_8);
|
||||
String[] constants = new String[128];
|
||||
for (int i = 0; i < constants.length; i++) {
|
||||
constants[i] = "A".repeat(i).concat(s);
|
||||
}
|
||||
|
||||
poolBuilder = ConstantPoolBuilder.of();
|
||||
thisClass = poolBuilder.classEntry(CLASS_DESC);
|
||||
for (var c : constants) {
|
||||
poolBuilder.utf8Entry(c);
|
||||
}
|
||||
}
|
||||
|
||||
@Benchmark
|
||||
public void writeTo(Blackhole bh) {
|
||||
bh.consume(ClassFile
|
||||
.of()
|
||||
.build(thisClass, poolBuilder, (ClassBuilder clb) -> {}));
|
||||
}
|
||||
}
|
Loading…
x
Reference in New Issue
Block a user