From ff8565931115d581afff679ea85b1a2d80c03b99 Mon Sep 17 00:00:00 2001 From: Claes Redestad Date: Thu, 13 Jan 2022 15:25:16 +0000 Subject: [PATCH] 8279833: Loop optimization issue in String.encodeUTF8_UTF16 Reviewed-by: rriggs Backport-of: c3d0a94040d9bd0f4b99da97b89fbfce252a41c0 --- .../share/classes/java/lang/String.java | 11 +- .../openjdk/bench/java/lang/StringEncode.java | 126 ++++++++++++------ 2 files changed, 89 insertions(+), 48 deletions(-) diff --git a/src/java.base/share/classes/java/lang/String.java b/src/java.base/share/classes/java/lang/String.java index abb35ebaeb1..756b8efacda 100644 --- a/src/java.base/share/classes/java/lang/String.java +++ b/src/java.base/share/classes/java/lang/String.java @@ -1,5 +1,5 @@ /* - * Copyright (c) 1994, 2021, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 1994, 2022, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -1284,14 +1284,17 @@ public final class String int sp = 0; int sl = val.length >> 1; byte[] dst = new byte[sl * 3]; - char c; - while (sp < sl && (c = StringUTF16.getChar(val, sp)) < '\u0080') { + while (sp < sl) { // ascii fast loop; + char c = StringUTF16.getChar(val, sp); + if (c >= '\u0080') { + break; + } dst[dp++] = (byte)c; sp++; } while (sp < sl) { - c = StringUTF16.getChar(val, sp++); + char c = StringUTF16.getChar(val, sp++); if (c < 0x80) { dst[dp++] = (byte)c; } else if (c < 0x800) { diff --git a/test/micro/org/openjdk/bench/java/lang/StringEncode.java b/test/micro/org/openjdk/bench/java/lang/StringEncode.java index 4cf5032a0da..6e67d3e8cee 100644 --- a/test/micro/org/openjdk/bench/java/lang/StringEncode.java +++ b/test/micro/org/openjdk/bench/java/lang/StringEncode.java @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2021, 2022, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -30,59 +30,97 @@ import java.util.concurrent.TimeUnit; @BenchmarkMode(Mode.AverageTime) @OutputTimeUnit(TimeUnit.NANOSECONDS) -@Fork(value = 3, jvmArgs = "-Xmx1g") +@Fork(value = 3) @Warmup(iterations = 5, time = 2) @Measurement(iterations = 5, time = 3) @State(Scope.Thread) public class StringEncode { - @BenchmarkMode(Mode.AverageTime) - @OutputTimeUnit(TimeUnit.NANOSECONDS) - @Fork(value = 3, jvmArgs = "-Xmx1g") - @Warmup(iterations = 5, time = 2) - @Measurement(iterations = 5, time = 2) - @State(Scope.Thread) - public static class WithCharset { - - @Param({"US-ASCII", "ISO-8859-1", "UTF-8", "MS932", "ISO-8859-6"}) - private String charsetName; - - private Charset charset; - private String asciiString; - private String utf16String; - - @Setup - public void setup() { - charset = Charset.forName(charsetName); - asciiString = "ascii string"; - utf16String = "UTF-\uFF11\uFF16 string"; - } - - @Benchmark - public void encodeCharsetName(Blackhole bh) throws Exception { - bh.consume(asciiString.getBytes(charsetName)); - bh.consume(utf16String.getBytes(charsetName)); - } - - @Benchmark - public void encodeCharset(Blackhole bh) throws Exception { - bh.consume(asciiString.getBytes(charset)); - bh.consume(utf16String.getBytes(charset)); - } - } - - private String asciiDefaultString; - private String utf16DefaultString; + @Param({"US-ASCII", "ISO-8859-1", "UTF-8", "MS932", "ISO-8859-6"}) + private String charsetName; + private Charset charset; + private String asciiString; + private String utf16String; + private String longUtf16String; + private String longUtf16StartString; @Setup public void setup() { - asciiDefaultString = "ascii string"; - utf16DefaultString = "UTF-\uFF11\uFF16 string"; + charset = Charset.forName(charsetName); + asciiString = "ascii string"; + utf16String = "UTF-\uFF11\uFF16 string"; + longUtf16String = """ + Lorem ipsum dolor sit amet, consectetur adipiscing elit. Aliquam ac sem eu + urna egestas placerat. Etiam finibus ipsum nulla, non mattis dolor cursus a. + Nulla nec nisl consectetur, lacinia neque id, accumsan ante. Curabitur et + sapien in magna porta ultricies. Sed vel pellentesque nibh. Pellentesque dictum + dignissim diam eu ultricies. Class aptent taciti sociosqu ad litora torquent + per conubia nostra, per inceptos himenaeos. Suspendisse erat diam, fringilla + sed massa sed, posuere viverra orci. Suspendisse tempor libero non gravida + efficitur. Vivamus lacinia risus non orci viverra, at consectetur odio laoreet. + Suspendisse potenti. + + Phasellus vel nisi iaculis, accumsan quam sed, bibendum eros. Sed venenatis + nulla tortor, et eleifend urna sodales id. Nullam tempus ac metus sit amet + sollicitudin. Nam sed ex diam. Praesent vitae eros et neque condimentum + consectetur eget non tortor. Praesent bibendum vel felis nec dignissim. + Maecenas a enim diam. Suspendisse quis ligula at nisi accumsan lacinia id + hendrerit sapien. Donec aliquam mattis lectus eu ultrices. Duis eu nisl + euismod, blandit mauris vel, placerat urna. Etiam malesuada enim purus, + tristique mollis odio blandit quis. Vivamus posuere. + \uFF11 + """; + longUtf16StartString = """ + \uFF11 + Lorem ipsum dolor sit amet, consectetur adipiscing elit. Aliquam ac sem eu + urna egestas placerat. Etiam finibus ipsum nulla, non mattis dolor cursus a. + Nulla nec nisl consectetur, lacinia neque id, accumsan ante. Curabitur et + sapien in magna porta ultricies. Sed vel pellentesque nibh. Pellentesque dictum + dignissim diam eu ultricies. Class aptent taciti sociosqu ad litora torquent + per conubia nostra, per inceptos himenaeos. Suspendisse erat diam, fringilla + sed massa sed, posuere viverra orci. Suspendisse tempor libero non gravida + efficitur. Vivamus lacinia risus non orci viverra, at consectetur odio laoreet. + Suspendisse potenti. + + Phasellus vel nisi iaculis, accumsan quam sed, bibendum eros. Sed venenatis + nulla tortor, et eleifend urna sodales id. Nullam tempus ac metus sit amet + sollicitudin. Nam sed ex diam. Praesent vitae eros et neque condimentum + consectetur eget non tortor. Praesent bibendum vel felis nec dignissim. + Maecenas a enim diam. Suspendisse quis ligula at nisi accumsan lacinia id + hendrerit sapien. Donec aliquam mattis lectus eu ultrices. Duis eu nisl + euismod, blandit mauris vel, placerat urna. Etiam malesuada enim purus, + tristique mollis odio blandit quis. Vivamus posuere. + """; } @Benchmark - public void encodeDefault(Blackhole bh) throws Exception { - bh.consume(asciiDefaultString.getBytes()); - bh.consume(utf16DefaultString.getBytes()); + public byte[] encodeAsciiCharsetName() throws Exception { + return asciiString.getBytes(charset); + } + + @Benchmark + public byte[] encodeAscii() throws Exception { + return asciiString.getBytes(charset); + } + + @Benchmark + public void encodeMix(Blackhole bh) throws Exception { + bh.consume(asciiString.getBytes(charset)); + bh.consume(utf16String.getBytes(charset)); + } + + @Benchmark + public byte[] encodeUTF16LongEnd() throws Exception { + return longUtf16String.getBytes(charset); + } + + @Benchmark + public byte[] encodeUTF16LongStart() throws Exception { + return longUtf16StartString.getBytes(charset); + } + + @Benchmark + public byte[] encodeUTF16() throws Exception { + return utf16String.getBytes(charset); } }