From bb42e61a6176a7f4f9485efa47a248b23b09a16d Mon Sep 17 00:00:00 2001 From: Claes Redestad Date: Sat, 21 Jan 2023 11:54:51 +0000 Subject: [PATCH] 8300493: Use ArraysSupport.vectorizedHashCode in j.u.zip.ZipCoder Reviewed-by: alanb, lancea --- .../share/classes/java/lang/System.java | 3 + .../share/classes/java/util/zip/ZipCoder.java | 28 +++--- .../jdk/internal/access/JavaLangAccess.java | 5 + .../bench/java/util/zip/ZipFileOpen.java | 93 +++++++++++++++++++ 4 files changed, 112 insertions(+), 17 deletions(-) create mode 100644 test/micro/org/openjdk/bench/java/util/zip/ZipFileOpen.java diff --git a/src/java.base/share/classes/java/lang/System.java b/src/java.base/share/classes/java/lang/System.java index 08881286d6c..51d688e0d4a 100644 --- a/src/java.base/share/classes/java/lang/System.java +++ b/src/java.base/share/classes/java/lang/System.java @@ -2461,6 +2461,9 @@ public final class System { return ModuleLayer.layers(loader); } + public int countPositives(byte[] bytes, int offset, int length) { + return StringCoding.countPositives(bytes, offset, length); + } public String newStringNoRepl(byte[] bytes, Charset cs) throws CharacterCodingException { return String.newStringNoRepl(bytes, cs); } diff --git a/src/java.base/share/classes/java/util/zip/ZipCoder.java b/src/java.base/share/classes/java/util/zip/ZipCoder.java index 621b0b0bc8b..d9d228c2ce9 100644 --- a/src/java.base/share/classes/java/util/zip/ZipCoder.java +++ b/src/java.base/share/classes/java/util/zip/ZipCoder.java @@ -34,6 +34,7 @@ import java.nio.charset.CharacterCodingException; import java.nio.charset.CodingErrorAction; import java.util.Arrays; +import jdk.internal.util.ArraysSupport; import sun.nio.cs.UTF_8; /** @@ -209,25 +210,18 @@ class ZipCoder { if (len == 0) { return 0; } - int end = off + len; - int h = 0; - while (off < end) { - byte b = a[off]; - if (b >= 0) { - // ASCII, keep going - h = 31 * h + b; - off++; - } else { - // Non-ASCII, fall back to decoding a String - // We avoid using decoder() here since the UTF8ZipCoder is - // shared and that decoder is not thread safe. - // We use the JLA.newStringUTF8NoRepl variant to throw - // exceptions eagerly when opening ZipFiles - return hash(JLA.newStringUTF8NoRepl(a, end - len, len)); - } + int asciiLen = JLA.countPositives(a, off, len); + if (asciiLen != len) { + // Non-ASCII, fall back to decoding a String + // We avoid using decoder() here since the UTF8ZipCoder is + // shared and that decoder is not thread safe. + // We use the JLA.newStringUTF8NoRepl variant to throw + // exceptions eagerly when opening ZipFiles + return hash(JLA.newStringUTF8NoRepl(a, off, len)); } - + // T_BOOLEAN to treat the array as unsigned bytes, in line with StringLatin1.hashCode + int h = ArraysSupport.vectorizedHashCode(a, off, len, 0, ArraysSupport.T_BOOLEAN); if (a[end - 1] != '/') { h = 31 * h + '/'; } diff --git a/src/java.base/share/classes/jdk/internal/access/JavaLangAccess.java b/src/java.base/share/classes/jdk/internal/access/JavaLangAccess.java index ba08b281ccf..39adfb2130a 100644 --- a/src/java.base/share/classes/jdk/internal/access/JavaLangAccess.java +++ b/src/java.base/share/classes/jdk/internal/access/JavaLangAccess.java @@ -302,6 +302,11 @@ public interface JavaLangAccess { */ Stream layers(ClassLoader loader); + /** + * Count the number of leading positive bytes in the range. + */ + int countPositives(byte[] ba, int off, int len); + /** * Constructs a new {@code String} by decoding the specified subarray of * bytes using the specified {@linkplain java.nio.charset.Charset charset}. diff --git a/test/micro/org/openjdk/bench/java/util/zip/ZipFileOpen.java b/test/micro/org/openjdk/bench/java/util/zip/ZipFileOpen.java new file mode 100644 index 00000000000..ffbbc3d245f --- /dev/null +++ b/test/micro/org/openjdk/bench/java/util/zip/ZipFileOpen.java @@ -0,0 +1,93 @@ +/* + * Copyright (c) 2023, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + */ + +package org.openjdk.bench.java.util.zip; + +import org.openjdk.jmh.annotations.*; + +import java.io.File; +import java.io.FileOutputStream; +import java.io.IOException; +import java.nio.file.Files; +import java.util.concurrent.TimeUnit; +import java.util.zip.ZipEntry; +import java.util.zip.ZipFile; +import java.util.zip.ZipOutputStream; + +/** + * Simple benchmark measuring cost of opening zip files, parsing CEN + * entries. + */ +@BenchmarkMode(Mode.AverageTime) +@OutputTimeUnit(TimeUnit.NANOSECONDS) +@State(Scope.Thread) +@Warmup(iterations = 10, time = 500, timeUnit = TimeUnit.MILLISECONDS) +@Measurement(iterations = 5, time = 1000, timeUnit = TimeUnit.MILLISECONDS) +@Fork(3) +public class ZipFileOpen { + + @Param({"512", "1024"}) + private int size; + + public File zipFile; + + @Setup(Level.Trial) + public void beforeRun() throws IOException { + // Create a test Zip file with the number of entries. + File tempFile = Files.createTempFile("zip-micro", ".zip").toFile(); + tempFile.deleteOnExit(); + try (FileOutputStream fos = new FileOutputStream(tempFile); + ZipOutputStream zos = new ZipOutputStream(fos)) { + + // Vary dir and entry sizes, with a bias towards shorter entries. + String[] dirPrefixes = new String[] { "dir1", "dir2", "dir3", + "longer-directory-name-", "ridiculously-long-pathname-to-help-exercize-vectorized-subroutines-"}; + String[] entryPrefixes = new String[] { "e", "long-entry-name-", + "ridiculously-long-entry-name-to-help-exercize-vectorized-subroutines-"}; + + for (int i = 0; i < size; i++) { + String ename = dirPrefixes[i % dirPrefixes.length] + i + "/"; + zos.putNextEntry(new ZipEntry(ename)); + + ename += entryPrefixes[i % entryPrefixes.length] + "-" + i; + zos.putNextEntry(new ZipEntry(ename)); + } + } + zipFile = tempFile; + } + + @Benchmark + public ZipFile openCloseZipFile() throws Exception { + // Some shared resources in ZipFile are cached in a shared structure + // and needs to be cleaned up to properly capture overhead of creating + // a ZipFile - otherwise opening the same zip file again will reuse the + // cached data and look artificially fast. By including the ZipFile.close() + // we aggressively clear those resources pre-emptively. The operations + // appears to be complex enough to not be subject to DCE but care needs + // to be taken to check that things like initCEN is properly accounted + // for if/when the ZipFile setup improves. + ZipFile zf = new ZipFile(zipFile); + zf.close(); + return zf; + } +}