From e314a4cfda30cc680b3f0aef8c62b75ff81bdbb1 Mon Sep 17 00:00:00 2001 From: Claes Redestad Date: Tue, 18 Jan 2022 19:28:12 +0000 Subject: [PATCH] 8280124: Reduce branches decoding latin-1 chars from UTF-8 encoded bytes Reviewed-by: rriggs, alanb, naoto --- .../share/classes/java/lang/String.java | 6 +- .../openjdk/bench/java/lang/StringDecode.java | 145 +++++++++++++----- 2 files changed, 105 insertions(+), 46 deletions(-) diff --git a/src/java.base/share/classes/java/lang/String.java b/src/java.base/share/classes/java/lang/String.java index 756b8efacda..43c76d5eb3e 100644 --- a/src/java.base/share/classes/java/lang/String.java +++ b/src/java.base/share/classes/java/lang/String.java @@ -541,8 +541,7 @@ public final class String offset++; continue; } - if ((b1 == (byte)0xc2 || b1 == (byte)0xc3) && - offset + 1 < sl) { + if ((b1 & 0xfe) == 0xc2 && offset + 1 < sl) { // b1 either 0xc2 or 0xc3 int b2 = bytes[offset + 1]; if (!isNotContinuation(b2)) { dst[dp++] = (byte)decode2(b1, b2); @@ -698,8 +697,7 @@ public final class String offset++; continue; } - if ((b1 == (byte) 0xc2 || b1 == (byte) 0xc3) && - offset + 1 < sl) { + if ((b1 & 0xfe) == 0xc2 && offset + 1 < sl) { // b1 either 0xc2 or 0xc3 int b2 = bytes[offset + 1]; if (!isNotContinuation(b2)) { dst[dp++] = (byte) decode2(b1, b2); diff --git a/test/micro/org/openjdk/bench/java/lang/StringDecode.java b/test/micro/org/openjdk/bench/java/lang/StringDecode.java index ace4252c7a3..186d2aed6d9 100644 --- a/test/micro/org/openjdk/bench/java/lang/StringDecode.java +++ b/test/micro/org/openjdk/bench/java/lang/StringDecode.java @@ -40,59 +40,120 @@ import java.util.concurrent.TimeUnit; @BenchmarkMode(Mode.AverageTime) @OutputTimeUnit(TimeUnit.NANOSECONDS) -@Fork(value = 3, jvmArgs = "-Xmx1g") +@Fork(value = 3) @Warmup(iterations = 5, time = 2) @Measurement(iterations = 5, time = 3) @State(Scope.Thread) public class StringDecode { - @BenchmarkMode(Mode.AverageTime) - @OutputTimeUnit(TimeUnit.NANOSECONDS) - @Fork(value = 3, jvmArgs = "-Xmx1g") - @Warmup(iterations = 5, time = 2) - @Measurement(iterations = 5, time = 2) - @State(Scope.Thread) - public static class WithCharset { + @Param({"US-ASCII", "ISO-8859-1", "UTF-8", "MS932", "ISO-8859-6", "ISO-2022-KR"}) + private String charsetName; - @Param({"US-ASCII", "ISO-8859-1", "UTF-8", "MS932", "ISO-8859-6", "ISO-2022-KR"}) - private String charsetName; - - private Charset charset; - private byte[] asciiString; - private byte[] utf16String; - - @Setup - public void setup() { - charset = Charset.forName(charsetName); - asciiString = "ascii string".getBytes(charset); - utf16String = "UTF-\uFF11\uFF16 string".getBytes(charset); - } - - @Benchmark - public void decodeCharsetName(Blackhole bh) throws Exception { - bh.consume(new String(asciiString, charsetName)); - bh.consume(new String(utf16String, charsetName)); - } - - @Benchmark - public void decodeCharset(Blackhole bh) throws Exception { - bh.consume(new String(asciiString, charset)); - bh.consume(new String(utf16String, charset)); - } - } - - private byte[] asciiDefaultString; - private byte[] utf16DefaultString; + private Charset charset; + private byte[] asciiString; + private byte[] utf16String; + private byte[] longUtf16String; + private byte[] longUtf16StartString; + private byte[] longLatin1String; @Setup public void setup() { - asciiDefaultString = "ascii string".getBytes(); - utf16DefaultString = "UTF-\uFF11\uFF16 string".getBytes(); + charset = Charset.forName(charsetName); + asciiString = "ascii string".getBytes(charset); + utf16String = "UTF-\uFF11\uFF16 string".getBytes(charset); + longUtf16String = """ + Lorem ipsum dolor sit amet, consectetur adipiscing elit. Aliquam ac sem eu + urna egestas placerat. Etiam finibus ipsum nulla, non mattis dolor cursus a. + Nulla nec nisl consectetur, lacinia neque id, accumsan ante. Curabitur et + sapien in magna porta ultricies. Sed vel pellentesque nibh. Pellentesque dictum + dignissim diam eu ultricies. Class aptent taciti sociosqu ad litora torquent + per conubia nostra, per inceptos himenaeos. Suspendisse erat diam, fringilla + sed massa sed, posuere viverra orci. Suspendisse tempor libero non gravida + efficitur. Vivamus lacinia risus non orci viverra, at consectetur odio laoreet. + Suspendisse potenti. + + Phasellus vel nisi iaculis, accumsan quam sed, bibendum eros. Sed venenatis + nulla tortor, et eleifend urna sodales id. Nullam tempus ac metus sit amet + sollicitudin. Nam sed ex diam. Praesent vitae eros et neque condimentum + consectetur eget non tortor. Praesent bibendum vel felis nec dignissim. + Maecenas a enim diam. Suspendisse quis ligula at nisi accumsan lacinia id + hendrerit sapien. \uFF11Donec aliquam mattis lectus eu ultrices. Duis eu nisl\uFF11 + euismod, blandit mauris vel, \uFF11placerat urna. Etiam malesuada enim purus, + tristique mollis odio blandit quis.\uFF11 Vivamus posuere. \uFF11 + \uFF11 + """.getBytes(charset); + longUtf16StartString = """ + \uFF11 + Lorem ipsum dolor sit amet, \uFF11consectetur adipiscing elit. Aliquam ac sem eu + urna egestas \uFF11placerat. Etiam finibus ipsum nulla, non mattis dolor cursus a. + Nulla \uFF11nec nisl consectetur, lacinia neque id, accumsan ante. Curabitur et + sapien in \uFF11magna porta ultricies. \uFF11Sed vel pellentesque nibh. Pellentesque dictum + dignissim diam eu ultricies. Class aptent taciti sociosqu ad litora torquent + per conubia nostra, per inceptos himenaeos. Suspendisse erat diam, fringilla + sed massa sed, posuere viverra orci. Suspendisse tempor libero non gravida + efficitur. Vivamus lacinia risus non orci viverra, at consectetur odio laoreet. + Suspendisse potenti. + + Phasellus vel nisi iaculis, accumsan quam sed, bibendum eros. Sed venenatis + nulla tortor, et eleifend urna sodales id. Nullam tempus ac metus sit amet + sollicitudin. Nam sed ex diam. Praesent vitae eros et neque condimentum + consectetur eget non tortor. Praesent bibendum vel felis nec dignissim. + Maecenas a enim diam. Suspendisse quis ligula at nisi accumsan lacinia id + hendrerit sapien. Donec aliquam mattis lectus eu ultrices. Duis eu nisl + euismod, blandit mauris vel, placerat urna. Etiam malesuada enim purus, + tristique mollis odio blandit quis. Vivamus posuere. + """.getBytes(charset); + + longLatin1String = """ + a\u00B6\u00F6\u00F6\u00F6\u00F6\u00F6\u00F6\u00F6\u00F6\u00F6\u00F6\u00F6\u00F6\u00F6\u00F6\u00F6 + b\u00F6\u00F6\u00B6\u00F6\u00F6\u00F6\u00F6\u00F6\u00F6\u00F6\u00F6\u00F6\u00F6\u00F6\u00F6\u00F6 + c\u00F6\u00F6\u00F6\u00B6\u00F6\u00F6\u00F6\u00F6\u00F6\u00F6\u00F6\u00F6\u00F6\u00F6\u00F6\u00F6 + d\u00F6\u00F6\u00F6\u00F6\u00B6\u00F6\u00F6\u00F6\u00F6\u00F6\u00F6\u00F6\u00F6\u00F6\u00F6\u00F6 + e\u00F6\u00F6\u00F6\u00F6\u00F6\u00B6\u00F6\u00F6\u00F6\u00F6\u00F6\u00F6\u00F6\u00F6\u00F6\u00F6 + f\u00F6\u00F6\u00F6\u00F6\u00F6\u00F6\u00B6\u00F6\u00F6\u00F6\u00F6\u00F6\u00F6\u00F6\u00F6\u00F6 + g\u00F6\u00F6\u00F6\u00F6\u00F6\u00F6\u00F6\u00B6\u00F6\u00F6\u00F6\u00F6\u00F6\u00F6\u00F6\u00F6 + h\u00F6\u00F6\u00F6\u00F6\u00F6\u00F6\u00F6\u00F6\u00B6\u00F6\u00F6\u00F6\u00F6\u00F6\u00F6\u00F6 + i\u00F6\u00F6\u00F6\u00F6\u00F6\u00F6\u00F6\u00F6\u00F6\u00B6\u00F6\u00F6\u00F6\u00F6\u00F6\u00F6 + j\u00F6\u00F6\u00F6\u00F6\u00F6\u00F6\u00F6\u00F6\u00F6\u00F6\u00B6\u00F6\u00F6\u00F6\u00F6\u00F6 + k\u00F6\u00F6\u00F6\u00F6\u00F6\u00F6\u00F6\u00F6\u00F6\u00F6\u00F6\u00B6\u00F6\u00F6\u00F6\u00F6 + l\u00F6\u00F6\u00F6\u00F6\u00F6\u00F6\u00F6\u00F6\u00F6\u00F6\u00F6\u00F6\u00B6\u00F6\u00F6\u00F6 + m\u00F6\u00F6\u00F6\u00F6\u00F6\u00F6\u00F6\u00F6\u00F6\u00F6\u00F6\u00F6\u00F6\u00B6\u00F6\u00F6 + """.getBytes(charset); } @Benchmark - public void decodeDefault(Blackhole bh) throws Exception { - bh.consume(new String(asciiDefaultString)); - bh.consume(new String(utf16DefaultString)); + public String decodeAsciiCharsetName() throws Exception { + return new String(asciiString, charsetName); + } + + @Benchmark + public String decodeAscii() throws Exception { + return new String(asciiString, charset); + } + + @Benchmark + public String decodeLatin1Long() throws Exception { + return new String(longLatin1String, charset); + } + + @Benchmark + public String decodeUTF16Short() throws Exception { + return new String(utf16String, charset); + } + + @Benchmark + public String decodeUTF16LongEnd() throws Exception { + return new String(longUtf16String, charset); + } + + @Benchmark + public String decodeUTF16LongStart() throws Exception { + return new String(longUtf16StartString, charset); + } + + @Benchmark + public void decodeUTF16LongMixed(Blackhole bh) throws Exception { + bh.consume(new String(longUtf16StartString, charset)); + bh.consume(new String(longUtf16String, charset)); } }