8260617: Merge ZipFile encoding check with the initial hash calculation

Reviewed-by: lancea
This commit is contained in:
Claes Redestad 2021-02-03 11:43:17 +00:00
parent ae2c5f07ce
commit c8de943c1f
2 changed files with 109 additions and 125 deletions

View File

@ -54,14 +54,6 @@ class ZipCoder {
return new ZipCoder(charset);
}
void checkEncoding(byte[] a, int pos, int nlen) throws ZipException {
try {
toString(a, pos, nlen);
} catch(Exception e) {
throw new ZipException("invalid CEN header (bad entry name)");
}
}
String toString(byte[] ba, int off, int length) {
try {
return decoder().decode(ByteBuffer.wrap(ba, off, length)).toString();
@ -98,10 +90,6 @@ class ZipCoder {
return UTF8.toString(ba, 0, len);
}
static String toStringUTF8(byte[] ba, int off, int len) {
return UTF8.toString(ba, off, len);
}
boolean isUTF8() {
return false;
}
@ -110,15 +98,33 @@ class ZipCoder {
// we first decoded the byte sequence to a String, then appended '/' if no
// trailing slash was found, then called String.hashCode(). This
// normalization ensures we can simplify and speed up lookups.
int normalizedHash(byte[] a, int off, int len) {
//
// Does encoding error checking and hashing in a single pass for efficiency.
// On an error, this function will throw CharacterCodingException while the
// UTF8ZipCoder override will throw IllegalArgumentException, so we declare
// throws Exception to keep things simple.
int checkedHash(byte[] a, int off, int len) throws Exception {
if (len == 0) {
return 0;
}
return normalizedHashDecode(0, a, off, off + len);
int h = 0;
// cb will be a newly allocated CharBuffer with pos == 0,
// arrayOffset == 0, backed by an array.
CharBuffer cb = decoder().decode(ByteBuffer.wrap(a, off, len));
int limit = cb.limit();
char[] decoded = cb.array();
for (int i = 0; i < limit; i++) {
h = 31 * h + decoded[i];
}
if (limit > 0 && decoded[limit - 1] != '/') {
h = 31 * h + '/';
}
return h;
}
// Matching normalized hash code function for Strings
static int normalizedHash(String name) {
// Hash function equivalent of checkedHash for String inputs
static int hash(String name) {
int hsh = name.hashCode();
int len = name.length();
if (len > 0 && name.charAt(len - 1) != '/') {
@ -133,29 +139,6 @@ class ZipCoder {
Arrays.mismatch(a, end - slashBytes.length, end, slashBytes, 0, slashBytes.length) == -1;
}
// Implements normalizedHash by decoding byte[] to char[] and then computing
// the hash. This is a slow-path used for non-UTF8 charsets and also when
// aborting the ASCII fast-path in the UTF8 implementation, so {@code h}
// might be a partially calculated hash code
int normalizedHashDecode(int h, byte[] a, int off, int end) {
try {
// cb will be a newly allocated CharBuffer with pos == 0,
// arrayOffset == 0, backed by an array.
CharBuffer cb = decoder().decode(ByteBuffer.wrap(a, off, end - off));
int limit = cb.limit();
char[] decoded = cb.array();
for (int i = 0; i < limit; i++) {
h = 31 * h + decoded[i];
}
if (limit > 0 && decoded[limit - 1] != '/') {
h = 31 * h + '/';
}
} catch (CharacterCodingException cce) {
// Ignore - return the hash code generated so far.
}
return h;
}
private byte[] slashBytes;
private final Charset cs;
protected CharsetDecoder dec;
@ -211,25 +194,6 @@ class ZipCoder {
return true;
}
@Override
void checkEncoding(byte[] a, int pos, int len) throws ZipException {
try {
int end = pos + len;
while (pos < end) {
// ASCII fast-path: When checking that a range of bytes is
// valid UTF-8, we can avoid some allocation by skipping
// past bytes in the 0-127 range
if (a[pos] < 0) {
ZipCoder.toStringUTF8(a, pos, end - pos);
break;
}
pos++;
}
} catch(Exception e) {
throw new ZipException("invalid CEN header (bad entry name)");
}
}
@Override
String toString(byte[] ba, int off, int length) {
return JLA.newStringUTF8NoRepl(ba, off, length);
@ -241,7 +205,7 @@ class ZipCoder {
}
@Override
int normalizedHash(byte[] a, int off, int len) {
int checkedHash(byte[] a, int off, int len) throws Exception {
if (len == 0) {
return 0;
}
@ -250,18 +214,17 @@ class ZipCoder {
int h = 0;
while (off < end) {
byte b = a[off];
if (b < 0) {
if (b >= 0) {
// ASCII, keep going
h = 31 * h + b;
off++;
} else {
// Non-ASCII, fall back to decoding a String
// We avoid using decoder() here since the UTF8ZipCoder is
// shared and that decoder is not thread safe.
// We also avoid the JLA.newStringUTF8NoRepl variant at
// this point to avoid throwing exceptions eagerly when
// opening ZipFiles (exceptions are expected when accessing
// malformed entries.)
return normalizedHash(new String(a, end - len, len, UTF_8.INSTANCE));
} else {
h = 31 * h + b;
off++;
// We use the JLA.newStringUTF8NoRepl variant to throw
// exceptions eagerly when opening ZipFiles
return hash(JLA.newStringUTF8NoRepl(a, end - len, len));
}
}

View File

@ -33,6 +33,7 @@ import java.io.File;
import java.io.RandomAccessFile;
import java.io.UncheckedIOException;
import java.lang.ref.Cleaner.Cleanable;
import java.nio.charset.CharacterCodingException;
import java.nio.charset.Charset;
import java.nio.file.InvalidPathException;
import java.nio.file.attribute.BasicFileAttributes;
@ -1115,7 +1116,7 @@ public class ZipFile implements ZipConstants, Closeable {
// a bootstrap cycle that would leave this initialized as null
private static final JavaUtilJarAccess JUJA = SharedSecrets.javaUtilJarAccess();
// "META-INF/".length()
private static final int META_INF_LENGTH = 9;
private static final int META_INF_LEN = 9;
private static final int[] EMPTY_META_VERSIONS = new int[0];
private final Key key; // the key in files
@ -1148,16 +1149,49 @@ public class ZipFile implements ZipConstants, Closeable {
// private Entry[] entries; // array of hashed cen entry
//
// To reduce the total size of entries further, we use a int[] here to store 3 "int"
// {@code hash}, {@code next and {@code "pos for each entry. The entry can then be
// {@code hash}, {@code next} and {@code pos} for each entry. The entry can then be
// referred by their index of their positions in the {@code entries}.
//
private int[] entries; // array of hashed cen entry
private int addEntry(int index, int hash, int next, int pos) {
entries[index++] = hash;
entries[index++] = next;
entries[index++] = pos;
return index;
// Checks the entry at offset pos in the CEN, calculates the Entry values as per above,
// then returns the length of the entry name.
private int checkAndAddEntry(int pos, int index)
throws ZipException
{
byte[] cen = this.cen;
if (CENSIG(cen, pos) != CENSIG) {
zerror("invalid CEN header (bad signature)");
}
int method = CENHOW(cen, pos);
int flag = CENFLG(cen, pos);
if ((flag & 1) != 0) {
zerror("invalid CEN header (encrypted entry)");
}
if (method != STORED && method != DEFLATED) {
zerror("invalid CEN header (bad compression method: " + method + ")");
}
int entryPos = pos + CENHDR;
int nlen = CENNAM(cen, pos);
if (entryPos + nlen > cen.length - ENDHDR) {
zerror("invalid CEN header (bad header size)");
}
try {
ZipCoder zcp = zipCoderForPos(pos);
int hash = zcp.checkedHash(cen, entryPos, nlen);
int hsh = (hash & 0x7fffffff) % tablelen;
int next = table[hsh];
table[hsh] = index;
// Record the CEN offset and the name hash in our hash cell.
entries[index++] = hash;
entries[index++] = next;
entries[index ] = pos;
} catch (Exception e) {
zerror("invalid CEN header (bad entry name)");
}
return nlen;
}
private int getEntryHash(int index) { return entries[index]; }
private int getEntryNext(int index) { return entries[index + 1]; }
private int getEntryPos(int index) { return entries[index + 2]; }
@ -1413,8 +1447,7 @@ public class ZipFile implements ZipConstants, Closeable {
}
}
}
zerror("zip END header not found");
return null; //make compiler happy
throw new ZipException("zip END header not found");
}
// Reads zip file central directory.
@ -1444,24 +1477,22 @@ public class ZipFile implements ZipConstants, Closeable {
if (readFullyAt(cen, 0, cen.length, cenpos) != end.cenlen + ENDHDR) {
zerror("read CEN tables failed");
}
total = end.centot;
this.total = end.centot;
} else {
cen = this.cen;
total = knownTotal;
this.total = knownTotal;
}
// hash table for entries
entries = new int[total * 3];
int entriesLength = this.total * 3;
entries = new int[entriesLength];
this.tablelen = ((total/2) | 1); // Odd -> fewer collisions
int tablelen = this.tablelen;
int tablelen = ((total/2) | 1); // Odd -> fewer collisions
this.tablelen = tablelen;
this.table = new int[tablelen];
int[] table = this.table;
int[] table = new int[tablelen];
this.table = table;
Arrays.fill(table, ZIP_ENDCHAIN);
int idx = 0;
int hash;
int next;
// list for all meta entries
ArrayList<Integer> signatureNames = null;
@ -1469,48 +1500,30 @@ public class ZipFile implements ZipConstants, Closeable {
Set<Integer> metaVersionsSet = null;
// Iterate through the entries in the central directory
int i = 0;
int hsh;
int idx = 0; // Index into the entries array
int pos = 0;
int entryPos = CENHDR;
int limit = cen.length - ENDHDR;
while (entryPos <= limit) {
if (i >= total) {
if (idx >= entriesLength) {
// This will only happen if the zip file has an incorrect
// ENDTOT field, which usually means it contains more than
// 65535 entries.
initCEN(countCENHeaders(cen, limit));
return;
}
if (CENSIG(cen, pos) != CENSIG)
zerror("invalid CEN header (bad signature)");
int method = CENHOW(cen, pos);
int nlen = CENNAM(cen, pos);
int elen = CENEXT(cen, pos);
int clen = CENCOM(cen, pos);
int flag = CENFLG(cen, pos);
if ((flag & 1) != 0)
zerror("invalid CEN header (encrypted entry)");
if (method != STORED && method != DEFLATED)
zerror("invalid CEN header (bad compression method: " + method + ")");
if (entryPos + nlen > limit)
zerror("invalid CEN header (bad header size)");
ZipCoder zcp = zipCoderForPos(pos);
zcp.checkEncoding(cen, pos + CENHDR, nlen);
// Record the CEN offset and the name hash in our hash cell.
hash = zcp.normalizedHash(cen, entryPos, nlen);
hsh = (hash & 0x7fffffff) % tablelen;
next = table[hsh];
table[hsh] = idx;
idx = addEntry(idx, hash, next, pos);
// Checks the entry and adds values to entries[idx ... idx+2]
int nlen = checkAndAddEntry(pos, idx);
idx += 3;
// Adds name to metanames.
if (isMetaName(cen, entryPos, nlen)) {
// nlen is at least META_INF_LENGTH
if (isManifestName(cen, entryPos + META_INF_LENGTH,
nlen - META_INF_LENGTH)) {
if (isManifestName(entryPos + META_INF_LEN, nlen - META_INF_LEN)) {
manifestPos = pos;
} else {
if (isSignatureRelated(cen, entryPos, nlen)) {
if (isSignatureRelated(entryPos, nlen)) {
if (signatureNames == null)
signatureNames = new ArrayList<>(4);
signatureNames.add(pos);
@ -1519,8 +1532,7 @@ public class ZipFile implements ZipConstants, Closeable {
// If this is a versioned entry, parse the version
// and store it for later. This optimizes lookup
// performance in multi-release jar files
int version = getMetaVersion(cen,
entryPos + META_INF_LENGTH, nlen - META_INF_LENGTH);
int version = getMetaVersion(entryPos + META_INF_LEN, nlen - META_INF_LEN);
if (version > 0) {
if (metaVersionsSet == null)
metaVersionsSet = new TreeSet<>();
@ -1528,12 +1540,14 @@ public class ZipFile implements ZipConstants, Closeable {
}
}
}
// skip ext and comment
pos = entryPos + nlen + elen + clen;
// skip to the start of the next entry
pos = nextEntryPos(pos, entryPos, nlen);
entryPos = pos + CENHDR;
i++;
}
total = i;
// Adjust the total entries
this.total = idx / 3;
if (signatureNames != null) {
int len = signatureNames.size();
signatureMetaNames = new int[len];
@ -1555,6 +1569,10 @@ public class ZipFile implements ZipConstants, Closeable {
}
}
private int nextEntryPos(int pos, int entryPos, int nlen) {
return entryPos + nlen + CENCOM(cen, pos) + CENEXT(cen, pos);
}
private static void zerror(String msg) throws ZipException {
throw new ZipException(msg);
}
@ -1568,7 +1586,7 @@ public class ZipFile implements ZipConstants, Closeable {
return -1;
}
int hsh = ZipCoder.normalizedHash(name);
int hsh = ZipCoder.hash(name);
int idx = table[(hsh & 0x7fffffff) % tablelen];
// Search down the target hash chain for a entry whose
@ -1620,7 +1638,7 @@ public class ZipFile implements ZipConstants, Closeable {
private static boolean isMetaName(byte[] name, int off, int len) {
// Use the "oldest ASCII trick in the book":
// ch | 0x20 == Character.toLowerCase(ch)
return len > META_INF_LENGTH // "META-INF/".length()
return len > META_INF_LEN // "META-INF/".length()
&& name[off + len - 1] != '/' // non-directory
&& (name[off++] | 0x20) == 'm'
&& (name[off++] | 0x20) == 'e'
@ -1636,7 +1654,8 @@ public class ZipFile implements ZipConstants, Closeable {
/*
* Check if the bytes represents a name equals to MANIFEST.MF
*/
private static boolean isManifestName(byte[] name, int off, int len) {
private boolean isManifestName(int off, int len) {
byte[] name = cen;
return (len == 11 // "MANIFEST.MF".length()
&& (name[off++] | 0x20) == 'm'
&& (name[off++] | 0x20) == 'a'
@ -1651,11 +1670,12 @@ public class ZipFile implements ZipConstants, Closeable {
&& (name[off] | 0x20) == 'f');
}
private static boolean isSignatureRelated(byte[] name, int off, int len) {
private boolean isSignatureRelated(int off, int len) {
// Only called when isMetaName(name, off, len) is true, which means
// len is at least META_INF_LENGTH
// assert isMetaName(name, off, len)
boolean signatureRelated = false;
byte[] name = cen;
if (name[off + len - 3] == '.') {
// Check if entry ends with .EC and .SF
int b1 = name[off + len - 2] | 0x20;
@ -1685,7 +1705,8 @@ public class ZipFile implements ZipConstants, Closeable {
* followed by a '/', then return that integer value.
* Otherwise, return 0
*/
private static int getMetaVersion(byte[] name, int off, int len) {
private int getMetaVersion(int off, int len) {
byte[] name = cen;
int nend = off + len;
if (!(len > 10 // "versions//".length()
&& name[off + len - 1] != '/' // non-directory