8243254: Examine ZipFile slash optimization for non-ASCII compatible charsets

Reviewed-by: lancea, naoto, martin
This commit is contained in:
Claes Redestad 2020-04-22 21:13:10 +02:00
parent 72446bb0dc
commit 268ea904ec
3 changed files with 362 additions and 67 deletions

View File

@ -1,5 +1,5 @@
/*
* Copyright (c) 2009, 2019, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2009, 2020, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@ -38,15 +38,17 @@ import sun.nio.cs.UTF_8;
/**
* Utility class for zipfile name and comment decoding and encoding
*/
class ZipCoder {
private static final jdk.internal.access.JavaLangAccess JLA =
jdk.internal.access.SharedSecrets.getJavaLangAccess();
static final class UTF8 extends ZipCoder {
static final class UTF8ZipCoder extends ZipCoder {
UTF8(Charset utf8) {
// Encoding/decoding is stateless, so make it singleton.
static final ZipCoder INSTANCE = new UTF8ZipCoder(UTF_8.INSTANCE);
private UTF8ZipCoder(Charset utf8) {
super(utf8);
}
@ -64,14 +66,28 @@ class ZipCoder {
byte[] getBytes(String s) {
return JLA.getBytesUTF8NoRepl(s);
}
}
// UTF_8.ArrayEn/Decoder is stateless, so make it singleton.
private static ZipCoder utf8 = new UTF8(UTF_8.INSTANCE);
@Override
int hashN(byte[] a, int off, int len) {
// Performance optimization: when UTF8-encoded, ZipFile.getEntryPos
// assume that the hash of a name remains unchanged when appending a
// trailing '/', which allows lookups to avoid rehashing
int end = off + len;
if (len > 0 && a[end - 1] == '/') {
end--;
}
int h = 1;
for (int i = off; i < end; i++) {
h = 31 * h + a[i];
}
return h;
}
}
public static ZipCoder get(Charset charset) {
if (charset == UTF_8.INSTANCE)
return utf8;
return UTF8ZipCoder.INSTANCE;
return new ZipCoder(charset);
}
@ -109,21 +125,29 @@ class ZipCoder {
// assume invoked only if "this" is not utf8
byte[] getBytesUTF8(String s) {
return utf8.getBytes(s);
return UTF8ZipCoder.INSTANCE.getBytes(s);
}
String toStringUTF8(byte[] ba, int len) {
return utf8.toString(ba, 0, len);
return UTF8ZipCoder.INSTANCE.toString(ba, 0, len);
}
String toStringUTF8(byte[] ba, int off, int len) {
return utf8.toString(ba, off, len);
return UTF8ZipCoder.INSTANCE.toString(ba, off, len);
}
boolean isUTF8() {
return false;
}
int hashN(byte[] a, int off, int len) {
int h = 1;
while (len-- > 0) {
h = 31 * h + a[off++];
}
return h;
}
private Charset cs;
private CharsetDecoder dec;
private CharsetEncoder enc;
@ -132,7 +156,7 @@ class ZipCoder {
this.cs = cs;
}
protected CharsetDecoder decoder() {
private CharsetDecoder decoder() {
if (dec == null) {
dec = cs.newDecoder()
.onMalformedInput(CodingErrorAction.REPORT)
@ -141,7 +165,7 @@ class ZipCoder {
return dec;
}
protected CharsetEncoder encoder() {
private CharsetEncoder encoder() {
if (enc == null) {
enc = cs.newEncoder()
.onMalformedInput(CodingErrorAction.REPORT)

View File

@ -335,15 +335,24 @@ public class ZipFile implements ZipConstants, Closeable {
*/
private ZipEntry getEntry(String name, Function<String, ? extends ZipEntry> func) {
Objects.requireNonNull(name, "name");
ZipEntry entry = null;
synchronized (this) {
ensureOpen();
byte[] bname = zc.getBytes(name);
int pos = res.zsrc.getEntryPos(bname, true);
if (pos != -1) {
return getZipEntry(name, bname, pos, func);
entry = getZipEntry(name, bname, pos, func);
} else if (!zc.isUTF8() && !name.isEmpty() && !name.endsWith("/")) {
// non-UTF-8 charsets need to lookup again with added slash
name = name + '/';
bname = zc.getBytes(name);
pos = res.zsrc.getEntryPos(bname, false);
if (pos != -1) {
entry = getZipEntry(name, bname, pos, func);
}
}
}
return null;
return entry;
}
/**
@ -716,7 +725,7 @@ public class ZipFile implements ZipConstants, Closeable {
this.cleanable = CleanerFactory.cleaner().register(zf, this);
this.istreams = Collections.newSetFromMap(new WeakHashMap<>());
this.inflaterCache = new ArrayDeque<>();
this.zsrc = Source.get(file, (mode & OPEN_DELETE) != 0);
this.zsrc = Source.get(file, (mode & OPEN_DELETE) != 0, zf.zc);
}
void clean() {
@ -809,14 +818,6 @@ public class ZipFile implements ZipConstants, Closeable {
}
}
CleanableResource(File file, int mode)
throws IOException {
this.cleanable = null;
this.istreams = Collections.newSetFromMap(new WeakHashMap<>());
this.inflaterCache = new ArrayDeque<>();
this.zsrc = Source.get(file, (mode & OPEN_DELETE) != 0);
}
}
/**
@ -1042,6 +1043,8 @@ public class ZipFile implements ZipConstants, Closeable {
byte[] cen = zsrc.cen;
for (int i = 0; i < names.length; i++) {
int pos = zsrc.metanames[i];
// This will only be invoked on JarFile, which is guaranteed
// to use (or be compatible with) UTF-8 encoding.
names[i] = new String(cen, pos + CENHDR, CENNAM(cen, pos),
UTF_8.INSTANCE);
}
@ -1110,6 +1113,8 @@ public class ZipFile implements ZipConstants, Closeable {
private static final int[] EMPTY_META_VERSIONS = new int[0];
private final Key key; // the key in files
private final ZipCoder zc; // zip coder used to decode/encode
private int refs = 1;
private RandomAccessFile zfile; // zfile of the underlying zip file
@ -1155,22 +1160,28 @@ public class ZipFile implements ZipConstants, Closeable {
private int tablelen; // number of hash heads
private static class Key {
BasicFileAttributes attrs;
final BasicFileAttributes attrs;
File file;
final boolean utf8;
public Key(File file, BasicFileAttributes attrs) {
public Key(File file, BasicFileAttributes attrs, ZipCoder zc) {
this.attrs = attrs;
this.file = file;
this.utf8 = zc.isUTF8();
}
public int hashCode() {
long t = attrs.lastModifiedTime().toMillis();
long t = utf8 ? 0 : Long.MAX_VALUE;
t += attrs.lastModifiedTime().toMillis();
return ((int)(t ^ (t >>> 32))) + file.hashCode();
}
public boolean equals(Object obj) {
if (obj instanceof Key) {
Key key = (Key)obj;
if (key.utf8 != utf8) {
return false;
}
if (!attrs.lastModifiedTime().equals(key.attrs.lastModifiedTime())) {
return false;
}
@ -1187,11 +1198,12 @@ public class ZipFile implements ZipConstants, Closeable {
private static final HashMap<Key, Source> files = new HashMap<>();
static Source get(File file, boolean toDelete) throws IOException {
static Source get(File file, boolean toDelete, ZipCoder zc) throws IOException {
final Key key;
try {
key = new Key(file,
Files.readAttributes(file.toPath(), BasicFileAttributes.class));
Files.readAttributes(file.toPath(), BasicFileAttributes.class),
zc);
} catch (InvalidPathException ipe) {
throw new IOException(ipe);
}
@ -1203,7 +1215,7 @@ public class ZipFile implements ZipConstants, Closeable {
return src;
}
}
src = new Source(key, toDelete);
src = new Source(key, toDelete, zc);
synchronized (files) {
if (files.containsKey(key)) { // someone else put in first
@ -1226,7 +1238,8 @@ public class ZipFile implements ZipConstants, Closeable {
}
}
private Source(Key key, boolean toDelete) throws IOException {
private Source(Key key, boolean toDelete, ZipCoder zc) throws IOException {
this.zc = zc;
this.key = key;
if (toDelete) {
if (isWindows) {
@ -1288,20 +1301,6 @@ public class ZipFile implements ZipConstants, Closeable {
}
}
private static final int hashN(byte[] a, int off, int len) {
// Performance optimization: getEntryPos assumes that the hash
// of a name remains unchanged when appending a trailing '/'.
if (len > 0 && a[off + len - 1] == '/') {
len--;
}
int h = 1;
while (len-- > 0) {
h = 31 * h + a[off++];
}
return h;
}
private static class End {
int centot; // 4 bytes
long cenlen; // 4 bytes
@ -1411,13 +1410,17 @@ public class ZipFile implements ZipConstants, Closeable {
// Reads zip file central directory.
private void initCEN(int knownTotal) throws IOException {
// Prefer locals for better performance during startup
byte[] cen;
ZipCoder zc = this.zc;
if (knownTotal == -1) {
End end = findEND();
if (end.endpos == 0) {
locpos = 0;
total = 0;
entries = new int[0];
cen = null;
entries = new int[0];
this.cen = null;
return; // only END header present
}
if (end.cenlen > end.endpos)
@ -1430,22 +1433,28 @@ public class ZipFile implements ZipConstants, Closeable {
zerror("invalid END header (bad central directory offset)");
}
// read in the CEN and END
cen = new byte[(int)(end.cenlen + ENDHDR)];
cen = this.cen = new byte[(int)(end.cenlen + ENDHDR)];
if (readFullyAt(cen, 0, cen.length, cenpos) != end.cenlen + ENDHDR) {
zerror("read CEN tables failed");
}
total = end.centot;
} else {
cen = this.cen;
total = knownTotal;
}
// hash table for entries
entries = new int[total * 3];
tablelen = ((total/2) | 1); // Odd -> fewer collisions
table = new int[tablelen];
this.tablelen = ((total/2) | 1); // Odd -> fewer collisions
int tablelen = this.tablelen;
this.table = new int[tablelen];
int[] table = this.table;
Arrays.fill(table, ZIP_ENDCHAIN);
int idx = 0;
int hash = 0;
int next = -1;
int hash;
int next;
// list for all meta entries
ArrayList<Integer> metanamesList = null;
@ -1454,10 +1463,11 @@ public class ZipFile implements ZipConstants, Closeable {
// Iterate through the entries in the central directory
int i = 0;
int hsh = 0;
int hsh;
int pos = 0;
int entryPos = CENHDR;
int limit = cen.length - ENDHDR;
while (pos + CENHDR <= limit) {
while (entryPos <= limit) {
if (i >= total) {
// This will only happen if the zip file has an incorrect
// ENDTOT field, which usually means it contains more than
@ -1475,16 +1485,16 @@ public class ZipFile implements ZipConstants, Closeable {
zerror("invalid CEN header (encrypted entry)");
if (method != STORED && method != DEFLATED)
zerror("invalid CEN header (bad compression method: " + method + ")");
if (pos + CENHDR + nlen > limit)
if (entryPos + nlen > limit)
zerror("invalid CEN header (bad header size)");
// Record the CEN offset and the name hash in our hash cell.
hash = hashN(cen, pos + CENHDR, nlen);
hash = zc.hashN(cen, entryPos, nlen);
hsh = (hash & 0x7fffffff) % tablelen;
next = table[hsh];
table[hsh] = idx;
idx = addEntry(idx, hash, next, pos);
// Adds name to metanames.
if (isMetaName(cen, pos + CENHDR, nlen)) {
if (isMetaName(cen, entryPos, nlen)) {
if (metanamesList == null)
metanamesList = new ArrayList<>(4);
metanamesList.add(pos);
@ -1493,7 +1503,7 @@ public class ZipFile implements ZipConstants, Closeable {
// and store it for later. This optimizes lookup
// performance in multi-release jar files
int version = getMetaVersion(cen,
pos + CENHDR + META_INF_LENGTH, nlen - META_INF_LENGTH);
entryPos + META_INF_LENGTH, nlen - META_INF_LENGTH);
if (version > 0) {
if (metaVersionsSet == null)
metaVersionsSet = new TreeSet<>();
@ -1501,7 +1511,8 @@ public class ZipFile implements ZipConstants, Closeable {
}
}
// skip ext and comment
pos += (CENHDR + nlen + elen + clen);
pos = entryPos + nlen + elen + clen;
entryPos = pos + CENHDR;
i++;
}
total = i;
@ -1537,7 +1548,7 @@ public class ZipFile implements ZipConstants, Closeable {
if (total == 0) {
return -1;
}
int hsh = hashN(name, 0, name.length);
int hsh = zc.hashN(name, 0, name.length);
int idx = table[(hsh & 0x7fffffff) % tablelen];
// Search down the target hash chain for a entry whose
@ -1546,20 +1557,22 @@ public class ZipFile implements ZipConstants, Closeable {
if (getEntryHash(idx) == hsh) {
// The CEN name must match the specfied one
int pos = getEntryPos(idx);
byte[] cen = this.cen;
final int nlen = CENNAM(cen, pos);
final int nstart = pos + CENHDR;
int nameoff = pos + CENHDR;
// If addSlash is true, we're testing for name+/ in
// addition to name, unless name is the empty string
// or already ends with a slash
// If addSlash is true and we're using the UTF-8 zip coder,
// we'll directly test for name+/ in addition to name,
// unless name is the empty string or already ends with a
// slash
if (name.length == nlen ||
(addSlash &&
zc.isUTF8() &&
name.length > 0 &&
name.length + 1 == nlen &&
cen[nstart + nlen - 1] == '/' &&
cen[nameoff + nlen - 1] == '/' &&
name[name.length - 1] != '/')) {
boolean matched = true;
int nameoff = pos + CENHDR;
for (int i = 0; i < name.length; i++) {
if (name[i] != cen[nameoff++]) {
matched = false;
@ -1613,7 +1626,7 @@ public class ZipFile implements ZipConstants, Closeable {
&& (name[off++] | 0x20) == 'o'
&& (name[off++] | 0x20) == 'n'
&& (name[off++] | 0x20) == 's'
&& (name[off++] ) == '/')) {
&& (name[off++] ) == '/')) {
return 0;
}
int version = 0;

View File

@ -0,0 +1,258 @@
/*
* Copyright (c) 2020, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License version 2 only, as
* published by the Free Software Foundation.
*
* This code is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
* version 2 for more details (a copy is included in the LICENSE file that
* accompanied this code).
*
* You should have received a copy of the GNU General Public License version
* 2 along with this work; if not, write to the Free Software Foundation,
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
*
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
* or visit www.oracle.com if you need additional information or have any
* questions.
*/
/*
* @test
* @bug 8243254
* @summary Tests a simple set of operations on Zip files in various encodings
* focusing on ensuring metadata is properly encoded and read.
* @run testng TestZipFileEncodings
*/
import org.testng.annotations.AfterClass;
import org.testng.annotations.DataProvider;
import org.testng.annotations.Test;
import java.io.BufferedOutputStream;
import java.io.File;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.nio.charset.Charset;
import java.nio.file.Files;
import java.nio.file.Path;
import java.nio.file.Paths;
import java.nio.file.attribute.BasicFileAttributes;
import java.util.ArrayList;
import java.util.HashSet;
import java.util.LinkedHashMap;
import java.util.List;
import java.util.Map;
import java.util.Random;
import java.util.Set;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Executors;
import java.util.concurrent.ThreadLocalRandom;
import java.util.concurrent.atomic.AtomicInteger;
import java.util.zip.CRC32;
import java.util.zip.ZipEntry;
import java.util.zip.ZipFile;
import java.util.zip.ZipOutputStream;
import static org.testng.Assert.*;
public class TestZipFileEncodings {
private static int NUM_ENTRIES = 100;
private static int METAINF_ENTRIES = 5;
private static int ENTRY_SIZE = 100;
private static final AtomicInteger SEQUENCE = new AtomicInteger(0);
private static Set<Path> paths = new HashSet<>();
private static Random random() {
return ThreadLocalRandom.current();
}
@DataProvider(name = "non-unicode-charsets")
public Object[][] nonUnicodeCharsets() {
return new Object[][] {
{ "ISO-8859-1" },
{ "IBM01149" },
{ "IBM037" },
{ "IBM-Thai" }
};
}
@DataProvider(name = "unicode-charsets")
public Object[][] unicodeCharsets() {
return new Object[][] {
{ "UTF-8" },
{ "UTF-16" },
{ "UTF-16LE" },
{ "UTF-16BE" },
{ "UTF-32" }
};
}
@Test(dataProvider = "non-unicode-charsets")
public void testNonUnicode(String charsetName) throws Throwable {
test(NUM_ENTRIES, 100 + random().nextInt(ENTRY_SIZE), false, Charset.forName(charsetName));
}
@Test(dataProvider = "unicode-charsets")
public void testUnicode(String charsetName) throws Throwable {
test(NUM_ENTRIES, 100 + random().nextInt(ENTRY_SIZE), true, Charset.forName(charsetName));
}
@Test(dataProvider = "non-unicode-charsets")
public void testNonUnicodeManyEntries(String charsetName) throws Throwable {
test(70000, 10, false, Charset.forName(charsetName));
}
@Test(dataProvider = "unicode-charsets")
public void testUnicodeManyEntries(String charsetName) throws Throwable {
test(70000, 10, true, Charset.forName(charsetName));
}
@AfterClass
public void tearDown() {
for (Path path : paths) {
path.toFile().deleteOnExit();
}
}
static void test(int numEntry, int szMax, boolean unicode, Charset cs) throws Throwable {
String name = "zfenc-" + SEQUENCE.incrementAndGet() + ".zip";
Zip zip = new Zip(name, numEntry, szMax, unicode, cs);
doTest(zip);
}
static void checkEqual(ZipEntry x, ZipEntry y) {
assertEquals(x.getName(), y.getName());
assertEquals(x.isDirectory(), y.isDirectory());
assertEquals(x.getMethod(), y.getMethod());
assertEquals((x.getTime() / 2000), y.getTime() / 2000);
assertEquals(x.getSize(), y.getSize());
assertEquals(x.getCompressedSize(), y.getCompressedSize());
assertEquals(x.getCrc(), y.getCrc());
assertEquals(x.getComment(), y.getComment());
}
static void doTest(Zip zip) throws Throwable {
try (ZipFile zf = new ZipFile(zip.name, zip.cs)) {
doTest0(zip, zf);
}
}
static void doTest0(Zip zip, ZipFile zf) throws Throwable {
// (0) check zero-length entry name, no AIOOBE
assertEquals(zf.getEntry(""), null);
List<ZipEntry> list = new ArrayList(zip.entries.keySet());
// check each entry and its bytes
for (ZipEntry ze : list) {
byte[] data = zip.entries.get(ze);
String name = ze.getName();
ZipEntry e = zf.getEntry(name);
checkEqual(e, ze);
if (!e.isDirectory()) {
// check with readAllBytes
try (InputStream is = zf.getInputStream(e)) {
assertEquals(data, is.readAllBytes());
}
int slash = name.indexOf('/');
if (slash > 0) {
ZipEntry dir1 = zf.getEntry(name.substring(0, slash));
ZipEntry dir2 = zf.getEntry(name.substring(0, slash + 1));
assertNotNull(dir1);
assertNotNull(dir2);
assertTrue(dir1.isDirectory());
assertTrue(dir2.isDirectory());
checkEqual(dir1, dir2);
}
} else {
ZipEntry unslashLookup = zf.getEntry(name.substring(0, name.length() - 1));
checkEqual(e, unslashLookup);
}
}
}
private static class Zip {
String name;
Charset cs;
Map<ZipEntry, byte[]> entries;
BasicFileAttributes attrs;
long lastModified;
Zip(String name, int num, int szMax, boolean unicode, Charset cs) {
this.cs = cs;
this.name = name;
entries = new LinkedHashMap<>(num);
try {
Path p = Paths.get(name);
Files.deleteIfExists(p);
paths.add(p);
} catch (Exception x) {
throw (RuntimeException)x;
}
try (FileOutputStream fos = new FileOutputStream(name);
BufferedOutputStream bos = new BufferedOutputStream(fos);
ZipOutputStream zos = new ZipOutputStream(bos, cs))
{
CRC32 crc = new CRC32();
for (int i = 0; i < num; i++) {
String ename = "entry-" + i + "-name-" + random().nextLong();
if (unicode) {
// Provokes compatibility issue with slash handling for
// non-ASCII compatible Unicode encodings
ename = ename + '\u2F2F';
zos.putNextEntry(new ZipEntry(ename + '/'));
ename = ename + '/' + ename;
}
ZipEntry ze = new ZipEntry(ename);
assertTrue(!ze.isDirectory());
writeEntry(zos, crc, ze, ZipEntry.STORED, szMax);
}
// add some manifest entries
zos.putNextEntry(new ZipEntry("META-INF/"));
for (int i = 0; i < METAINF_ENTRIES; i++) {
String meta = "META-INF/" + "entry-" + i + "-metainf-" + random().nextLong();
ZipEntry ze = new ZipEntry(meta);
writeEntry(zos, crc, ze, ZipEntry.STORED, szMax);
}
} catch (Exception x) {
throw (RuntimeException)x;
}
try {
this.attrs = Files.readAttributes(Paths.get(name), BasicFileAttributes.class);
this.lastModified = new File(name).lastModified();
} catch (Exception x) {
throw (RuntimeException)x;
}
}
private void writeEntry(ZipOutputStream zos, CRC32 crc,
ZipEntry ze, int method, int szMax)
throws IOException
{
ze.setMethod(method);
byte[] data = new byte[random().nextInt(szMax + 1)];
random().nextBytes(data);
if (method == ZipEntry.STORED) { // must set size/csize/crc
ze.setSize(data.length);
ze.setCompressedSize(data.length);
crc.reset();
crc.update(data);
ze.setCrc(crc.getValue());
}
ze.setTime(System.currentTimeMillis());
ze.setComment(ze.getName());
zos.putNextEntry(ze);
zos.write(data);
zos.closeEntry();
entries.put(ze, data);
}
}
}