8260617: Merge ZipFile encoding check with the initial hash calculation
Reviewed-by: lancea
This commit is contained in:
parent
ae2c5f07ce
commit
c8de943c1f
@ -54,14 +54,6 @@ class ZipCoder {
|
||||
return new ZipCoder(charset);
|
||||
}
|
||||
|
||||
void checkEncoding(byte[] a, int pos, int nlen) throws ZipException {
|
||||
try {
|
||||
toString(a, pos, nlen);
|
||||
} catch(Exception e) {
|
||||
throw new ZipException("invalid CEN header (bad entry name)");
|
||||
}
|
||||
}
|
||||
|
||||
String toString(byte[] ba, int off, int length) {
|
||||
try {
|
||||
return decoder().decode(ByteBuffer.wrap(ba, off, length)).toString();
|
||||
@ -98,10 +90,6 @@ class ZipCoder {
|
||||
return UTF8.toString(ba, 0, len);
|
||||
}
|
||||
|
||||
static String toStringUTF8(byte[] ba, int off, int len) {
|
||||
return UTF8.toString(ba, off, len);
|
||||
}
|
||||
|
||||
boolean isUTF8() {
|
||||
return false;
|
||||
}
|
||||
@ -110,15 +98,33 @@ class ZipCoder {
|
||||
// we first decoded the byte sequence to a String, then appended '/' if no
|
||||
// trailing slash was found, then called String.hashCode(). This
|
||||
// normalization ensures we can simplify and speed up lookups.
|
||||
int normalizedHash(byte[] a, int off, int len) {
|
||||
//
|
||||
// Does encoding error checking and hashing in a single pass for efficiency.
|
||||
// On an error, this function will throw CharacterCodingException while the
|
||||
// UTF8ZipCoder override will throw IllegalArgumentException, so we declare
|
||||
// throws Exception to keep things simple.
|
||||
int checkedHash(byte[] a, int off, int len) throws Exception {
|
||||
if (len == 0) {
|
||||
return 0;
|
||||
}
|
||||
return normalizedHashDecode(0, a, off, off + len);
|
||||
|
||||
int h = 0;
|
||||
// cb will be a newly allocated CharBuffer with pos == 0,
|
||||
// arrayOffset == 0, backed by an array.
|
||||
CharBuffer cb = decoder().decode(ByteBuffer.wrap(a, off, len));
|
||||
int limit = cb.limit();
|
||||
char[] decoded = cb.array();
|
||||
for (int i = 0; i < limit; i++) {
|
||||
h = 31 * h + decoded[i];
|
||||
}
|
||||
if (limit > 0 && decoded[limit - 1] != '/') {
|
||||
h = 31 * h + '/';
|
||||
}
|
||||
return h;
|
||||
}
|
||||
|
||||
// Matching normalized hash code function for Strings
|
||||
static int normalizedHash(String name) {
|
||||
// Hash function equivalent of checkedHash for String inputs
|
||||
static int hash(String name) {
|
||||
int hsh = name.hashCode();
|
||||
int len = name.length();
|
||||
if (len > 0 && name.charAt(len - 1) != '/') {
|
||||
@ -133,29 +139,6 @@ class ZipCoder {
|
||||
Arrays.mismatch(a, end - slashBytes.length, end, slashBytes, 0, slashBytes.length) == -1;
|
||||
}
|
||||
|
||||
// Implements normalizedHash by decoding byte[] to char[] and then computing
|
||||
// the hash. This is a slow-path used for non-UTF8 charsets and also when
|
||||
// aborting the ASCII fast-path in the UTF8 implementation, so {@code h}
|
||||
// might be a partially calculated hash code
|
||||
int normalizedHashDecode(int h, byte[] a, int off, int end) {
|
||||
try {
|
||||
// cb will be a newly allocated CharBuffer with pos == 0,
|
||||
// arrayOffset == 0, backed by an array.
|
||||
CharBuffer cb = decoder().decode(ByteBuffer.wrap(a, off, end - off));
|
||||
int limit = cb.limit();
|
||||
char[] decoded = cb.array();
|
||||
for (int i = 0; i < limit; i++) {
|
||||
h = 31 * h + decoded[i];
|
||||
}
|
||||
if (limit > 0 && decoded[limit - 1] != '/') {
|
||||
h = 31 * h + '/';
|
||||
}
|
||||
} catch (CharacterCodingException cce) {
|
||||
// Ignore - return the hash code generated so far.
|
||||
}
|
||||
return h;
|
||||
}
|
||||
|
||||
private byte[] slashBytes;
|
||||
private final Charset cs;
|
||||
protected CharsetDecoder dec;
|
||||
@ -211,25 +194,6 @@ class ZipCoder {
|
||||
return true;
|
||||
}
|
||||
|
||||
@Override
|
||||
void checkEncoding(byte[] a, int pos, int len) throws ZipException {
|
||||
try {
|
||||
int end = pos + len;
|
||||
while (pos < end) {
|
||||
// ASCII fast-path: When checking that a range of bytes is
|
||||
// valid UTF-8, we can avoid some allocation by skipping
|
||||
// past bytes in the 0-127 range
|
||||
if (a[pos] < 0) {
|
||||
ZipCoder.toStringUTF8(a, pos, end - pos);
|
||||
break;
|
||||
}
|
||||
pos++;
|
||||
}
|
||||
} catch(Exception e) {
|
||||
throw new ZipException("invalid CEN header (bad entry name)");
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
String toString(byte[] ba, int off, int length) {
|
||||
return JLA.newStringUTF8NoRepl(ba, off, length);
|
||||
@ -241,7 +205,7 @@ class ZipCoder {
|
||||
}
|
||||
|
||||
@Override
|
||||
int normalizedHash(byte[] a, int off, int len) {
|
||||
int checkedHash(byte[] a, int off, int len) throws Exception {
|
||||
if (len == 0) {
|
||||
return 0;
|
||||
}
|
||||
@ -250,18 +214,17 @@ class ZipCoder {
|
||||
int h = 0;
|
||||
while (off < end) {
|
||||
byte b = a[off];
|
||||
if (b < 0) {
|
||||
if (b >= 0) {
|
||||
// ASCII, keep going
|
||||
h = 31 * h + b;
|
||||
off++;
|
||||
} else {
|
||||
// Non-ASCII, fall back to decoding a String
|
||||
// We avoid using decoder() here since the UTF8ZipCoder is
|
||||
// shared and that decoder is not thread safe.
|
||||
// We also avoid the JLA.newStringUTF8NoRepl variant at
|
||||
// this point to avoid throwing exceptions eagerly when
|
||||
// opening ZipFiles (exceptions are expected when accessing
|
||||
// malformed entries.)
|
||||
return normalizedHash(new String(a, end - len, len, UTF_8.INSTANCE));
|
||||
} else {
|
||||
h = 31 * h + b;
|
||||
off++;
|
||||
// We use the JLA.newStringUTF8NoRepl variant to throw
|
||||
// exceptions eagerly when opening ZipFiles
|
||||
return hash(JLA.newStringUTF8NoRepl(a, end - len, len));
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -33,6 +33,7 @@ import java.io.File;
|
||||
import java.io.RandomAccessFile;
|
||||
import java.io.UncheckedIOException;
|
||||
import java.lang.ref.Cleaner.Cleanable;
|
||||
import java.nio.charset.CharacterCodingException;
|
||||
import java.nio.charset.Charset;
|
||||
import java.nio.file.InvalidPathException;
|
||||
import java.nio.file.attribute.BasicFileAttributes;
|
||||
@ -1115,7 +1116,7 @@ public class ZipFile implements ZipConstants, Closeable {
|
||||
// a bootstrap cycle that would leave this initialized as null
|
||||
private static final JavaUtilJarAccess JUJA = SharedSecrets.javaUtilJarAccess();
|
||||
// "META-INF/".length()
|
||||
private static final int META_INF_LENGTH = 9;
|
||||
private static final int META_INF_LEN = 9;
|
||||
private static final int[] EMPTY_META_VERSIONS = new int[0];
|
||||
|
||||
private final Key key; // the key in files
|
||||
@ -1148,16 +1149,49 @@ public class ZipFile implements ZipConstants, Closeable {
|
||||
// private Entry[] entries; // array of hashed cen entry
|
||||
//
|
||||
// To reduce the total size of entries further, we use a int[] here to store 3 "int"
|
||||
// {@code hash}, {@code next and {@code "pos for each entry. The entry can then be
|
||||
// {@code hash}, {@code next} and {@code pos} for each entry. The entry can then be
|
||||
// referred by their index of their positions in the {@code entries}.
|
||||
//
|
||||
private int[] entries; // array of hashed cen entry
|
||||
private int addEntry(int index, int hash, int next, int pos) {
|
||||
entries[index++] = hash;
|
||||
entries[index++] = next;
|
||||
entries[index++] = pos;
|
||||
return index;
|
||||
|
||||
// Checks the entry at offset pos in the CEN, calculates the Entry values as per above,
|
||||
// then returns the length of the entry name.
|
||||
private int checkAndAddEntry(int pos, int index)
|
||||
throws ZipException
|
||||
{
|
||||
byte[] cen = this.cen;
|
||||
if (CENSIG(cen, pos) != CENSIG) {
|
||||
zerror("invalid CEN header (bad signature)");
|
||||
}
|
||||
int method = CENHOW(cen, pos);
|
||||
int flag = CENFLG(cen, pos);
|
||||
if ((flag & 1) != 0) {
|
||||
zerror("invalid CEN header (encrypted entry)");
|
||||
}
|
||||
if (method != STORED && method != DEFLATED) {
|
||||
zerror("invalid CEN header (bad compression method: " + method + ")");
|
||||
}
|
||||
int entryPos = pos + CENHDR;
|
||||
int nlen = CENNAM(cen, pos);
|
||||
if (entryPos + nlen > cen.length - ENDHDR) {
|
||||
zerror("invalid CEN header (bad header size)");
|
||||
}
|
||||
try {
|
||||
ZipCoder zcp = zipCoderForPos(pos);
|
||||
int hash = zcp.checkedHash(cen, entryPos, nlen);
|
||||
int hsh = (hash & 0x7fffffff) % tablelen;
|
||||
int next = table[hsh];
|
||||
table[hsh] = index;
|
||||
// Record the CEN offset and the name hash in our hash cell.
|
||||
entries[index++] = hash;
|
||||
entries[index++] = next;
|
||||
entries[index ] = pos;
|
||||
} catch (Exception e) {
|
||||
zerror("invalid CEN header (bad entry name)");
|
||||
}
|
||||
return nlen;
|
||||
}
|
||||
|
||||
private int getEntryHash(int index) { return entries[index]; }
|
||||
private int getEntryNext(int index) { return entries[index + 1]; }
|
||||
private int getEntryPos(int index) { return entries[index + 2]; }
|
||||
@ -1413,8 +1447,7 @@ public class ZipFile implements ZipConstants, Closeable {
|
||||
}
|
||||
}
|
||||
}
|
||||
zerror("zip END header not found");
|
||||
return null; //make compiler happy
|
||||
throw new ZipException("zip END header not found");
|
||||
}
|
||||
|
||||
// Reads zip file central directory.
|
||||
@ -1444,24 +1477,22 @@ public class ZipFile implements ZipConstants, Closeable {
|
||||
if (readFullyAt(cen, 0, cen.length, cenpos) != end.cenlen + ENDHDR) {
|
||||
zerror("read CEN tables failed");
|
||||
}
|
||||
total = end.centot;
|
||||
this.total = end.centot;
|
||||
} else {
|
||||
cen = this.cen;
|
||||
total = knownTotal;
|
||||
this.total = knownTotal;
|
||||
}
|
||||
// hash table for entries
|
||||
entries = new int[total * 3];
|
||||
int entriesLength = this.total * 3;
|
||||
entries = new int[entriesLength];
|
||||
|
||||
this.tablelen = ((total/2) | 1); // Odd -> fewer collisions
|
||||
int tablelen = this.tablelen;
|
||||
int tablelen = ((total/2) | 1); // Odd -> fewer collisions
|
||||
this.tablelen = tablelen;
|
||||
|
||||
this.table = new int[tablelen];
|
||||
int[] table = this.table;
|
||||
int[] table = new int[tablelen];
|
||||
this.table = table;
|
||||
|
||||
Arrays.fill(table, ZIP_ENDCHAIN);
|
||||
int idx = 0;
|
||||
int hash;
|
||||
int next;
|
||||
|
||||
// list for all meta entries
|
||||
ArrayList<Integer> signatureNames = null;
|
||||
@ -1469,48 +1500,30 @@ public class ZipFile implements ZipConstants, Closeable {
|
||||
Set<Integer> metaVersionsSet = null;
|
||||
|
||||
// Iterate through the entries in the central directory
|
||||
int i = 0;
|
||||
int hsh;
|
||||
int idx = 0; // Index into the entries array
|
||||
int pos = 0;
|
||||
int entryPos = CENHDR;
|
||||
int limit = cen.length - ENDHDR;
|
||||
while (entryPos <= limit) {
|
||||
if (i >= total) {
|
||||
if (idx >= entriesLength) {
|
||||
// This will only happen if the zip file has an incorrect
|
||||
// ENDTOT field, which usually means it contains more than
|
||||
// 65535 entries.
|
||||
initCEN(countCENHeaders(cen, limit));
|
||||
return;
|
||||
}
|
||||
if (CENSIG(cen, pos) != CENSIG)
|
||||
zerror("invalid CEN header (bad signature)");
|
||||
int method = CENHOW(cen, pos);
|
||||
int nlen = CENNAM(cen, pos);
|
||||
int elen = CENEXT(cen, pos);
|
||||
int clen = CENCOM(cen, pos);
|
||||
int flag = CENFLG(cen, pos);
|
||||
if ((flag & 1) != 0)
|
||||
zerror("invalid CEN header (encrypted entry)");
|
||||
if (method != STORED && method != DEFLATED)
|
||||
zerror("invalid CEN header (bad compression method: " + method + ")");
|
||||
if (entryPos + nlen > limit)
|
||||
zerror("invalid CEN header (bad header size)");
|
||||
ZipCoder zcp = zipCoderForPos(pos);
|
||||
zcp.checkEncoding(cen, pos + CENHDR, nlen);
|
||||
// Record the CEN offset and the name hash in our hash cell.
|
||||
hash = zcp.normalizedHash(cen, entryPos, nlen);
|
||||
hsh = (hash & 0x7fffffff) % tablelen;
|
||||
next = table[hsh];
|
||||
table[hsh] = idx;
|
||||
idx = addEntry(idx, hash, next, pos);
|
||||
|
||||
// Checks the entry and adds values to entries[idx ... idx+2]
|
||||
int nlen = checkAndAddEntry(pos, idx);
|
||||
idx += 3;
|
||||
|
||||
// Adds name to metanames.
|
||||
if (isMetaName(cen, entryPos, nlen)) {
|
||||
// nlen is at least META_INF_LENGTH
|
||||
if (isManifestName(cen, entryPos + META_INF_LENGTH,
|
||||
nlen - META_INF_LENGTH)) {
|
||||
if (isManifestName(entryPos + META_INF_LEN, nlen - META_INF_LEN)) {
|
||||
manifestPos = pos;
|
||||
} else {
|
||||
if (isSignatureRelated(cen, entryPos, nlen)) {
|
||||
if (isSignatureRelated(entryPos, nlen)) {
|
||||
if (signatureNames == null)
|
||||
signatureNames = new ArrayList<>(4);
|
||||
signatureNames.add(pos);
|
||||
@ -1519,8 +1532,7 @@ public class ZipFile implements ZipConstants, Closeable {
|
||||
// If this is a versioned entry, parse the version
|
||||
// and store it for later. This optimizes lookup
|
||||
// performance in multi-release jar files
|
||||
int version = getMetaVersion(cen,
|
||||
entryPos + META_INF_LENGTH, nlen - META_INF_LENGTH);
|
||||
int version = getMetaVersion(entryPos + META_INF_LEN, nlen - META_INF_LEN);
|
||||
if (version > 0) {
|
||||
if (metaVersionsSet == null)
|
||||
metaVersionsSet = new TreeSet<>();
|
||||
@ -1528,12 +1540,14 @@ public class ZipFile implements ZipConstants, Closeable {
|
||||
}
|
||||
}
|
||||
}
|
||||
// skip ext and comment
|
||||
pos = entryPos + nlen + elen + clen;
|
||||
// skip to the start of the next entry
|
||||
pos = nextEntryPos(pos, entryPos, nlen);
|
||||
entryPos = pos + CENHDR;
|
||||
i++;
|
||||
}
|
||||
total = i;
|
||||
|
||||
// Adjust the total entries
|
||||
this.total = idx / 3;
|
||||
|
||||
if (signatureNames != null) {
|
||||
int len = signatureNames.size();
|
||||
signatureMetaNames = new int[len];
|
||||
@ -1555,6 +1569,10 @@ public class ZipFile implements ZipConstants, Closeable {
|
||||
}
|
||||
}
|
||||
|
||||
private int nextEntryPos(int pos, int entryPos, int nlen) {
|
||||
return entryPos + nlen + CENCOM(cen, pos) + CENEXT(cen, pos);
|
||||
}
|
||||
|
||||
private static void zerror(String msg) throws ZipException {
|
||||
throw new ZipException(msg);
|
||||
}
|
||||
@ -1568,7 +1586,7 @@ public class ZipFile implements ZipConstants, Closeable {
|
||||
return -1;
|
||||
}
|
||||
|
||||
int hsh = ZipCoder.normalizedHash(name);
|
||||
int hsh = ZipCoder.hash(name);
|
||||
int idx = table[(hsh & 0x7fffffff) % tablelen];
|
||||
|
||||
// Search down the target hash chain for a entry whose
|
||||
@ -1620,7 +1638,7 @@ public class ZipFile implements ZipConstants, Closeable {
|
||||
private static boolean isMetaName(byte[] name, int off, int len) {
|
||||
// Use the "oldest ASCII trick in the book":
|
||||
// ch | 0x20 == Character.toLowerCase(ch)
|
||||
return len > META_INF_LENGTH // "META-INF/".length()
|
||||
return len > META_INF_LEN // "META-INF/".length()
|
||||
&& name[off + len - 1] != '/' // non-directory
|
||||
&& (name[off++] | 0x20) == 'm'
|
||||
&& (name[off++] | 0x20) == 'e'
|
||||
@ -1636,7 +1654,8 @@ public class ZipFile implements ZipConstants, Closeable {
|
||||
/*
|
||||
* Check if the bytes represents a name equals to MANIFEST.MF
|
||||
*/
|
||||
private static boolean isManifestName(byte[] name, int off, int len) {
|
||||
private boolean isManifestName(int off, int len) {
|
||||
byte[] name = cen;
|
||||
return (len == 11 // "MANIFEST.MF".length()
|
||||
&& (name[off++] | 0x20) == 'm'
|
||||
&& (name[off++] | 0x20) == 'a'
|
||||
@ -1651,11 +1670,12 @@ public class ZipFile implements ZipConstants, Closeable {
|
||||
&& (name[off] | 0x20) == 'f');
|
||||
}
|
||||
|
||||
private static boolean isSignatureRelated(byte[] name, int off, int len) {
|
||||
private boolean isSignatureRelated(int off, int len) {
|
||||
// Only called when isMetaName(name, off, len) is true, which means
|
||||
// len is at least META_INF_LENGTH
|
||||
// assert isMetaName(name, off, len)
|
||||
boolean signatureRelated = false;
|
||||
byte[] name = cen;
|
||||
if (name[off + len - 3] == '.') {
|
||||
// Check if entry ends with .EC and .SF
|
||||
int b1 = name[off + len - 2] | 0x20;
|
||||
@ -1685,7 +1705,8 @@ public class ZipFile implements ZipConstants, Closeable {
|
||||
* followed by a '/', then return that integer value.
|
||||
* Otherwise, return 0
|
||||
*/
|
||||
private static int getMetaVersion(byte[] name, int off, int len) {
|
||||
private int getMetaVersion(int off, int len) {
|
||||
byte[] name = cen;
|
||||
int nend = off + len;
|
||||
if (!(len > 10 // "versions//".length()
|
||||
&& name[off + len - 1] != '/' // non-directory
|
||||
|
Loading…
Reference in New Issue
Block a user