8280124: Reduce branches decoding latin-1 chars from UTF-8 encoded bytes
Reviewed-by: rriggs, alanb, naoto
This commit is contained in:
parent
bdfa15d92c
commit
e314a4cfda
@ -541,8 +541,7 @@ public final class String
|
|||||||
offset++;
|
offset++;
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
if ((b1 == (byte)0xc2 || b1 == (byte)0xc3) &&
|
if ((b1 & 0xfe) == 0xc2 && offset + 1 < sl) { // b1 either 0xc2 or 0xc3
|
||||||
offset + 1 < sl) {
|
|
||||||
int b2 = bytes[offset + 1];
|
int b2 = bytes[offset + 1];
|
||||||
if (!isNotContinuation(b2)) {
|
if (!isNotContinuation(b2)) {
|
||||||
dst[dp++] = (byte)decode2(b1, b2);
|
dst[dp++] = (byte)decode2(b1, b2);
|
||||||
@ -698,8 +697,7 @@ public final class String
|
|||||||
offset++;
|
offset++;
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
if ((b1 == (byte) 0xc2 || b1 == (byte) 0xc3) &&
|
if ((b1 & 0xfe) == 0xc2 && offset + 1 < sl) { // b1 either 0xc2 or 0xc3
|
||||||
offset + 1 < sl) {
|
|
||||||
int b2 = bytes[offset + 1];
|
int b2 = bytes[offset + 1];
|
||||||
if (!isNotContinuation(b2)) {
|
if (!isNotContinuation(b2)) {
|
||||||
dst[dp++] = (byte) decode2(b1, b2);
|
dst[dp++] = (byte) decode2(b1, b2);
|
||||||
|
@ -40,59 +40,120 @@ import java.util.concurrent.TimeUnit;
|
|||||||
|
|
||||||
@BenchmarkMode(Mode.AverageTime)
|
@BenchmarkMode(Mode.AverageTime)
|
||||||
@OutputTimeUnit(TimeUnit.NANOSECONDS)
|
@OutputTimeUnit(TimeUnit.NANOSECONDS)
|
||||||
@Fork(value = 3, jvmArgs = "-Xmx1g")
|
@Fork(value = 3)
|
||||||
@Warmup(iterations = 5, time = 2)
|
@Warmup(iterations = 5, time = 2)
|
||||||
@Measurement(iterations = 5, time = 3)
|
@Measurement(iterations = 5, time = 3)
|
||||||
@State(Scope.Thread)
|
@State(Scope.Thread)
|
||||||
public class StringDecode {
|
public class StringDecode {
|
||||||
|
|
||||||
@BenchmarkMode(Mode.AverageTime)
|
@Param({"US-ASCII", "ISO-8859-1", "UTF-8", "MS932", "ISO-8859-6", "ISO-2022-KR"})
|
||||||
@OutputTimeUnit(TimeUnit.NANOSECONDS)
|
private String charsetName;
|
||||||
@Fork(value = 3, jvmArgs = "-Xmx1g")
|
|
||||||
@Warmup(iterations = 5, time = 2)
|
|
||||||
@Measurement(iterations = 5, time = 2)
|
|
||||||
@State(Scope.Thread)
|
|
||||||
public static class WithCharset {
|
|
||||||
|
|
||||||
@Param({"US-ASCII", "ISO-8859-1", "UTF-8", "MS932", "ISO-8859-6", "ISO-2022-KR"})
|
private Charset charset;
|
||||||
private String charsetName;
|
private byte[] asciiString;
|
||||||
|
private byte[] utf16String;
|
||||||
private Charset charset;
|
private byte[] longUtf16String;
|
||||||
private byte[] asciiString;
|
private byte[] longUtf16StartString;
|
||||||
private byte[] utf16String;
|
private byte[] longLatin1String;
|
||||||
|
|
||||||
@Setup
|
|
||||||
public void setup() {
|
|
||||||
charset = Charset.forName(charsetName);
|
|
||||||
asciiString = "ascii string".getBytes(charset);
|
|
||||||
utf16String = "UTF-\uFF11\uFF16 string".getBytes(charset);
|
|
||||||
}
|
|
||||||
|
|
||||||
@Benchmark
|
|
||||||
public void decodeCharsetName(Blackhole bh) throws Exception {
|
|
||||||
bh.consume(new String(asciiString, charsetName));
|
|
||||||
bh.consume(new String(utf16String, charsetName));
|
|
||||||
}
|
|
||||||
|
|
||||||
@Benchmark
|
|
||||||
public void decodeCharset(Blackhole bh) throws Exception {
|
|
||||||
bh.consume(new String(asciiString, charset));
|
|
||||||
bh.consume(new String(utf16String, charset));
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
private byte[] asciiDefaultString;
|
|
||||||
private byte[] utf16DefaultString;
|
|
||||||
|
|
||||||
@Setup
|
@Setup
|
||||||
public void setup() {
|
public void setup() {
|
||||||
asciiDefaultString = "ascii string".getBytes();
|
charset = Charset.forName(charsetName);
|
||||||
utf16DefaultString = "UTF-\uFF11\uFF16 string".getBytes();
|
asciiString = "ascii string".getBytes(charset);
|
||||||
|
utf16String = "UTF-\uFF11\uFF16 string".getBytes(charset);
|
||||||
|
longUtf16String = """
|
||||||
|
Lorem ipsum dolor sit amet, consectetur adipiscing elit. Aliquam ac sem eu
|
||||||
|
urna egestas placerat. Etiam finibus ipsum nulla, non mattis dolor cursus a.
|
||||||
|
Nulla nec nisl consectetur, lacinia neque id, accumsan ante. Curabitur et
|
||||||
|
sapien in magna porta ultricies. Sed vel pellentesque nibh. Pellentesque dictum
|
||||||
|
dignissim diam eu ultricies. Class aptent taciti sociosqu ad litora torquent
|
||||||
|
per conubia nostra, per inceptos himenaeos. Suspendisse erat diam, fringilla
|
||||||
|
sed massa sed, posuere viverra orci. Suspendisse tempor libero non gravida
|
||||||
|
efficitur. Vivamus lacinia risus non orci viverra, at consectetur odio laoreet.
|
||||||
|
Suspendisse potenti.
|
||||||
|
|
||||||
|
Phasellus vel nisi iaculis, accumsan quam sed, bibendum eros. Sed venenatis
|
||||||
|
nulla tortor, et eleifend urna sodales id. Nullam tempus ac metus sit amet
|
||||||
|
sollicitudin. Nam sed ex diam. Praesent vitae eros et neque condimentum
|
||||||
|
consectetur eget non tortor. Praesent bibendum vel felis nec dignissim.
|
||||||
|
Maecenas a enim diam. Suspendisse quis ligula at nisi accumsan lacinia id
|
||||||
|
hendrerit sapien. \uFF11Donec aliquam mattis lectus eu ultrices. Duis eu nisl\uFF11
|
||||||
|
euismod, blandit mauris vel, \uFF11placerat urna. Etiam malesuada enim purus,
|
||||||
|
tristique mollis odio blandit quis.\uFF11 Vivamus posuere. \uFF11
|
||||||
|
\uFF11
|
||||||
|
""".getBytes(charset);
|
||||||
|
longUtf16StartString = """
|
||||||
|
\uFF11
|
||||||
|
Lorem ipsum dolor sit amet, \uFF11consectetur adipiscing elit. Aliquam ac sem eu
|
||||||
|
urna egestas \uFF11placerat. Etiam finibus ipsum nulla, non mattis dolor cursus a.
|
||||||
|
Nulla \uFF11nec nisl consectetur, lacinia neque id, accumsan ante. Curabitur et
|
||||||
|
sapien in \uFF11magna porta ultricies. \uFF11Sed vel pellentesque nibh. Pellentesque dictum
|
||||||
|
dignissim diam eu ultricies. Class aptent taciti sociosqu ad litora torquent
|
||||||
|
per conubia nostra, per inceptos himenaeos. Suspendisse erat diam, fringilla
|
||||||
|
sed massa sed, posuere viverra orci. Suspendisse tempor libero non gravida
|
||||||
|
efficitur. Vivamus lacinia risus non orci viverra, at consectetur odio laoreet.
|
||||||
|
Suspendisse potenti.
|
||||||
|
|
||||||
|
Phasellus vel nisi iaculis, accumsan quam sed, bibendum eros. Sed venenatis
|
||||||
|
nulla tortor, et eleifend urna sodales id. Nullam tempus ac metus sit amet
|
||||||
|
sollicitudin. Nam sed ex diam. Praesent vitae eros et neque condimentum
|
||||||
|
consectetur eget non tortor. Praesent bibendum vel felis nec dignissim.
|
||||||
|
Maecenas a enim diam. Suspendisse quis ligula at nisi accumsan lacinia id
|
||||||
|
hendrerit sapien. Donec aliquam mattis lectus eu ultrices. Duis eu nisl
|
||||||
|
euismod, blandit mauris vel, placerat urna. Etiam malesuada enim purus,
|
||||||
|
tristique mollis odio blandit quis. Vivamus posuere.
|
||||||
|
""".getBytes(charset);
|
||||||
|
|
||||||
|
longLatin1String = """
|
||||||
|
a\u00B6\u00F6\u00F6\u00F6\u00F6\u00F6\u00F6\u00F6\u00F6\u00F6\u00F6\u00F6\u00F6\u00F6\u00F6\u00F6
|
||||||
|
b\u00F6\u00F6\u00B6\u00F6\u00F6\u00F6\u00F6\u00F6\u00F6\u00F6\u00F6\u00F6\u00F6\u00F6\u00F6\u00F6
|
||||||
|
c\u00F6\u00F6\u00F6\u00B6\u00F6\u00F6\u00F6\u00F6\u00F6\u00F6\u00F6\u00F6\u00F6\u00F6\u00F6\u00F6
|
||||||
|
d\u00F6\u00F6\u00F6\u00F6\u00B6\u00F6\u00F6\u00F6\u00F6\u00F6\u00F6\u00F6\u00F6\u00F6\u00F6\u00F6
|
||||||
|
e\u00F6\u00F6\u00F6\u00F6\u00F6\u00B6\u00F6\u00F6\u00F6\u00F6\u00F6\u00F6\u00F6\u00F6\u00F6\u00F6
|
||||||
|
f\u00F6\u00F6\u00F6\u00F6\u00F6\u00F6\u00B6\u00F6\u00F6\u00F6\u00F6\u00F6\u00F6\u00F6\u00F6\u00F6
|
||||||
|
g\u00F6\u00F6\u00F6\u00F6\u00F6\u00F6\u00F6\u00B6\u00F6\u00F6\u00F6\u00F6\u00F6\u00F6\u00F6\u00F6
|
||||||
|
h\u00F6\u00F6\u00F6\u00F6\u00F6\u00F6\u00F6\u00F6\u00B6\u00F6\u00F6\u00F6\u00F6\u00F6\u00F6\u00F6
|
||||||
|
i\u00F6\u00F6\u00F6\u00F6\u00F6\u00F6\u00F6\u00F6\u00F6\u00B6\u00F6\u00F6\u00F6\u00F6\u00F6\u00F6
|
||||||
|
j\u00F6\u00F6\u00F6\u00F6\u00F6\u00F6\u00F6\u00F6\u00F6\u00F6\u00B6\u00F6\u00F6\u00F6\u00F6\u00F6
|
||||||
|
k\u00F6\u00F6\u00F6\u00F6\u00F6\u00F6\u00F6\u00F6\u00F6\u00F6\u00F6\u00B6\u00F6\u00F6\u00F6\u00F6
|
||||||
|
l\u00F6\u00F6\u00F6\u00F6\u00F6\u00F6\u00F6\u00F6\u00F6\u00F6\u00F6\u00F6\u00B6\u00F6\u00F6\u00F6
|
||||||
|
m\u00F6\u00F6\u00F6\u00F6\u00F6\u00F6\u00F6\u00F6\u00F6\u00F6\u00F6\u00F6\u00F6\u00B6\u00F6\u00F6
|
||||||
|
""".getBytes(charset);
|
||||||
}
|
}
|
||||||
|
|
||||||
@Benchmark
|
@Benchmark
|
||||||
public void decodeDefault(Blackhole bh) throws Exception {
|
public String decodeAsciiCharsetName() throws Exception {
|
||||||
bh.consume(new String(asciiDefaultString));
|
return new String(asciiString, charsetName);
|
||||||
bh.consume(new String(utf16DefaultString));
|
}
|
||||||
|
|
||||||
|
@Benchmark
|
||||||
|
public String decodeAscii() throws Exception {
|
||||||
|
return new String(asciiString, charset);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Benchmark
|
||||||
|
public String decodeLatin1Long() throws Exception {
|
||||||
|
return new String(longLatin1String, charset);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Benchmark
|
||||||
|
public String decodeUTF16Short() throws Exception {
|
||||||
|
return new String(utf16String, charset);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Benchmark
|
||||||
|
public String decodeUTF16LongEnd() throws Exception {
|
||||||
|
return new String(longUtf16String, charset);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Benchmark
|
||||||
|
public String decodeUTF16LongStart() throws Exception {
|
||||||
|
return new String(longUtf16StartString, charset);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Benchmark
|
||||||
|
public void decodeUTF16LongMixed(Blackhole bh) throws Exception {
|
||||||
|
bh.consume(new String(longUtf16StartString, charset));
|
||||||
|
bh.consume(new String(longUtf16String, charset));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
Loading…
Reference in New Issue
Block a user