8280124: Reduce branches decoding latin-1 chars from UTF-8 encoded bytes
Reviewed-by: rriggs, alanb, naoto
This commit is contained in:
parent
bdfa15d92c
commit
e314a4cfda
@ -541,8 +541,7 @@ public final class String
|
||||
offset++;
|
||||
continue;
|
||||
}
|
||||
if ((b1 == (byte)0xc2 || b1 == (byte)0xc3) &&
|
||||
offset + 1 < sl) {
|
||||
if ((b1 & 0xfe) == 0xc2 && offset + 1 < sl) { // b1 either 0xc2 or 0xc3
|
||||
int b2 = bytes[offset + 1];
|
||||
if (!isNotContinuation(b2)) {
|
||||
dst[dp++] = (byte)decode2(b1, b2);
|
||||
@ -698,8 +697,7 @@ public final class String
|
||||
offset++;
|
||||
continue;
|
||||
}
|
||||
if ((b1 == (byte) 0xc2 || b1 == (byte) 0xc3) &&
|
||||
offset + 1 < sl) {
|
||||
if ((b1 & 0xfe) == 0xc2 && offset + 1 < sl) { // b1 either 0xc2 or 0xc3
|
||||
int b2 = bytes[offset + 1];
|
||||
if (!isNotContinuation(b2)) {
|
||||
dst[dp++] = (byte) decode2(b1, b2);
|
||||
|
@ -40,59 +40,120 @@ import java.util.concurrent.TimeUnit;
|
||||
|
||||
@BenchmarkMode(Mode.AverageTime)
|
||||
@OutputTimeUnit(TimeUnit.NANOSECONDS)
|
||||
@Fork(value = 3, jvmArgs = "-Xmx1g")
|
||||
@Fork(value = 3)
|
||||
@Warmup(iterations = 5, time = 2)
|
||||
@Measurement(iterations = 5, time = 3)
|
||||
@State(Scope.Thread)
|
||||
public class StringDecode {
|
||||
|
||||
@BenchmarkMode(Mode.AverageTime)
|
||||
@OutputTimeUnit(TimeUnit.NANOSECONDS)
|
||||
@Fork(value = 3, jvmArgs = "-Xmx1g")
|
||||
@Warmup(iterations = 5, time = 2)
|
||||
@Measurement(iterations = 5, time = 2)
|
||||
@State(Scope.Thread)
|
||||
public static class WithCharset {
|
||||
|
||||
@Param({"US-ASCII", "ISO-8859-1", "UTF-8", "MS932", "ISO-8859-6", "ISO-2022-KR"})
|
||||
private String charsetName;
|
||||
|
||||
private Charset charset;
|
||||
private byte[] asciiString;
|
||||
private byte[] utf16String;
|
||||
private byte[] longUtf16String;
|
||||
private byte[] longUtf16StartString;
|
||||
private byte[] longLatin1String;
|
||||
|
||||
@Setup
|
||||
public void setup() {
|
||||
charset = Charset.forName(charsetName);
|
||||
asciiString = "ascii string".getBytes(charset);
|
||||
utf16String = "UTF-\uFF11\uFF16 string".getBytes(charset);
|
||||
longUtf16String = """
|
||||
Lorem ipsum dolor sit amet, consectetur adipiscing elit. Aliquam ac sem eu
|
||||
urna egestas placerat. Etiam finibus ipsum nulla, non mattis dolor cursus a.
|
||||
Nulla nec nisl consectetur, lacinia neque id, accumsan ante. Curabitur et
|
||||
sapien in magna porta ultricies. Sed vel pellentesque nibh. Pellentesque dictum
|
||||
dignissim diam eu ultricies. Class aptent taciti sociosqu ad litora torquent
|
||||
per conubia nostra, per inceptos himenaeos. Suspendisse erat diam, fringilla
|
||||
sed massa sed, posuere viverra orci. Suspendisse tempor libero non gravida
|
||||
efficitur. Vivamus lacinia risus non orci viverra, at consectetur odio laoreet.
|
||||
Suspendisse potenti.
|
||||
|
||||
Phasellus vel nisi iaculis, accumsan quam sed, bibendum eros. Sed venenatis
|
||||
nulla tortor, et eleifend urna sodales id. Nullam tempus ac metus sit amet
|
||||
sollicitudin. Nam sed ex diam. Praesent vitae eros et neque condimentum
|
||||
consectetur eget non tortor. Praesent bibendum vel felis nec dignissim.
|
||||
Maecenas a enim diam. Suspendisse quis ligula at nisi accumsan lacinia id
|
||||
hendrerit sapien. \uFF11Donec aliquam mattis lectus eu ultrices. Duis eu nisl\uFF11
|
||||
euismod, blandit mauris vel, \uFF11placerat urna. Etiam malesuada enim purus,
|
||||
tristique mollis odio blandit quis.\uFF11 Vivamus posuere. \uFF11
|
||||
\uFF11
|
||||
""".getBytes(charset);
|
||||
longUtf16StartString = """
|
||||
\uFF11
|
||||
Lorem ipsum dolor sit amet, \uFF11consectetur adipiscing elit. Aliquam ac sem eu
|
||||
urna egestas \uFF11placerat. Etiam finibus ipsum nulla, non mattis dolor cursus a.
|
||||
Nulla \uFF11nec nisl consectetur, lacinia neque id, accumsan ante. Curabitur et
|
||||
sapien in \uFF11magna porta ultricies. \uFF11Sed vel pellentesque nibh. Pellentesque dictum
|
||||
dignissim diam eu ultricies. Class aptent taciti sociosqu ad litora torquent
|
||||
per conubia nostra, per inceptos himenaeos. Suspendisse erat diam, fringilla
|
||||
sed massa sed, posuere viverra orci. Suspendisse tempor libero non gravida
|
||||
efficitur. Vivamus lacinia risus non orci viverra, at consectetur odio laoreet.
|
||||
Suspendisse potenti.
|
||||
|
||||
Phasellus vel nisi iaculis, accumsan quam sed, bibendum eros. Sed venenatis
|
||||
nulla tortor, et eleifend urna sodales id. Nullam tempus ac metus sit amet
|
||||
sollicitudin. Nam sed ex diam. Praesent vitae eros et neque condimentum
|
||||
consectetur eget non tortor. Praesent bibendum vel felis nec dignissim.
|
||||
Maecenas a enim diam. Suspendisse quis ligula at nisi accumsan lacinia id
|
||||
hendrerit sapien. Donec aliquam mattis lectus eu ultrices. Duis eu nisl
|
||||
euismod, blandit mauris vel, placerat urna. Etiam malesuada enim purus,
|
||||
tristique mollis odio blandit quis. Vivamus posuere.
|
||||
""".getBytes(charset);
|
||||
|
||||
longLatin1String = """
|
||||
a\u00B6\u00F6\u00F6\u00F6\u00F6\u00F6\u00F6\u00F6\u00F6\u00F6\u00F6\u00F6\u00F6\u00F6\u00F6\u00F6
|
||||
b\u00F6\u00F6\u00B6\u00F6\u00F6\u00F6\u00F6\u00F6\u00F6\u00F6\u00F6\u00F6\u00F6\u00F6\u00F6\u00F6
|
||||
c\u00F6\u00F6\u00F6\u00B6\u00F6\u00F6\u00F6\u00F6\u00F6\u00F6\u00F6\u00F6\u00F6\u00F6\u00F6\u00F6
|
||||
d\u00F6\u00F6\u00F6\u00F6\u00B6\u00F6\u00F6\u00F6\u00F6\u00F6\u00F6\u00F6\u00F6\u00F6\u00F6\u00F6
|
||||
e\u00F6\u00F6\u00F6\u00F6\u00F6\u00B6\u00F6\u00F6\u00F6\u00F6\u00F6\u00F6\u00F6\u00F6\u00F6\u00F6
|
||||
f\u00F6\u00F6\u00F6\u00F6\u00F6\u00F6\u00B6\u00F6\u00F6\u00F6\u00F6\u00F6\u00F6\u00F6\u00F6\u00F6
|
||||
g\u00F6\u00F6\u00F6\u00F6\u00F6\u00F6\u00F6\u00B6\u00F6\u00F6\u00F6\u00F6\u00F6\u00F6\u00F6\u00F6
|
||||
h\u00F6\u00F6\u00F6\u00F6\u00F6\u00F6\u00F6\u00F6\u00B6\u00F6\u00F6\u00F6\u00F6\u00F6\u00F6\u00F6
|
||||
i\u00F6\u00F6\u00F6\u00F6\u00F6\u00F6\u00F6\u00F6\u00F6\u00B6\u00F6\u00F6\u00F6\u00F6\u00F6\u00F6
|
||||
j\u00F6\u00F6\u00F6\u00F6\u00F6\u00F6\u00F6\u00F6\u00F6\u00F6\u00B6\u00F6\u00F6\u00F6\u00F6\u00F6
|
||||
k\u00F6\u00F6\u00F6\u00F6\u00F6\u00F6\u00F6\u00F6\u00F6\u00F6\u00F6\u00B6\u00F6\u00F6\u00F6\u00F6
|
||||
l\u00F6\u00F6\u00F6\u00F6\u00F6\u00F6\u00F6\u00F6\u00F6\u00F6\u00F6\u00F6\u00B6\u00F6\u00F6\u00F6
|
||||
m\u00F6\u00F6\u00F6\u00F6\u00F6\u00F6\u00F6\u00F6\u00F6\u00F6\u00F6\u00F6\u00F6\u00B6\u00F6\u00F6
|
||||
""".getBytes(charset);
|
||||
}
|
||||
|
||||
@Benchmark
|
||||
public void decodeCharsetName(Blackhole bh) throws Exception {
|
||||
bh.consume(new String(asciiString, charsetName));
|
||||
bh.consume(new String(utf16String, charsetName));
|
||||
public String decodeAsciiCharsetName() throws Exception {
|
||||
return new String(asciiString, charsetName);
|
||||
}
|
||||
|
||||
@Benchmark
|
||||
public void decodeCharset(Blackhole bh) throws Exception {
|
||||
bh.consume(new String(asciiString, charset));
|
||||
bh.consume(new String(utf16String, charset));
|
||||
}
|
||||
}
|
||||
|
||||
private byte[] asciiDefaultString;
|
||||
private byte[] utf16DefaultString;
|
||||
|
||||
@Setup
|
||||
public void setup() {
|
||||
asciiDefaultString = "ascii string".getBytes();
|
||||
utf16DefaultString = "UTF-\uFF11\uFF16 string".getBytes();
|
||||
public String decodeAscii() throws Exception {
|
||||
return new String(asciiString, charset);
|
||||
}
|
||||
|
||||
@Benchmark
|
||||
public void decodeDefault(Blackhole bh) throws Exception {
|
||||
bh.consume(new String(asciiDefaultString));
|
||||
bh.consume(new String(utf16DefaultString));
|
||||
public String decodeLatin1Long() throws Exception {
|
||||
return new String(longLatin1String, charset);
|
||||
}
|
||||
|
||||
@Benchmark
|
||||
public String decodeUTF16Short() throws Exception {
|
||||
return new String(utf16String, charset);
|
||||
}
|
||||
|
||||
@Benchmark
|
||||
public String decodeUTF16LongEnd() throws Exception {
|
||||
return new String(longUtf16String, charset);
|
||||
}
|
||||
|
||||
@Benchmark
|
||||
public String decodeUTF16LongStart() throws Exception {
|
||||
return new String(longUtf16StartString, charset);
|
||||
}
|
||||
|
||||
@Benchmark
|
||||
public void decodeUTF16LongMixed(Blackhole bh) throws Exception {
|
||||
bh.consume(new String(longUtf16StartString, charset));
|
||||
bh.consume(new String(longUtf16String, charset));
|
||||
}
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user