From 8ab93c84ecc491e7b98f10a7fd58406b203f82a6 Mon Sep 17 00:00:00 2001 From: Martin Buchholz Date: Wed, 25 Mar 2015 15:36:43 -0700 Subject: [PATCH] 8073158: zip files with total entry count 0xFFFF need not be ZIP64 files Minor rewrite of crufty zip implementation in parse_manifest.c Reviewed-by: sherman --- .../share/native/libjli/manifest_info.h | 2 + .../share/native/libjli/parse_manifest.c | 249 ++++++++++-------- jdk/test/java/util/zip/EntryCount64k.java | 75 +++++- 3 files changed, 197 insertions(+), 129 deletions(-) diff --git a/jdk/src/java.base/share/native/libjli/manifest_info.h b/jdk/src/java.base/share/native/libjli/manifest_info.h index 5f9773e0bca..1e04058f87e 100644 --- a/jdk/src/java.base/share/native/libjli/manifest_info.h +++ b/jdk/src/java.base/share/native/libjli/manifest_info.h @@ -109,6 +109,8 @@ /* * Macros for getting end of central directory header (END) fields */ +#define ENDNMD(b) SH(b, 4) /* number of this disk */ +#define ENDDSK(b) SH(b, 6) /* disk number of start */ #define ENDSUB(b) SH(b, 8) /* number of entries on this disk */ #define ENDTOT(b) SH(b, 10) /* total number of entries */ #define ENDSIZ(b) LG(b, 12) /* central directory size */ diff --git a/jdk/src/java.base/share/native/libjli/parse_manifest.c b/jdk/src/java.base/share/native/libjli/parse_manifest.c index 9ce82c1a1d6..cc0bc370f2f 100644 --- a/jdk/src/java.base/share/native/libjli/parse_manifest.c +++ b/jdk/src/java.base/share/native/libjli/parse_manifest.c @@ -111,52 +111,127 @@ inflate_file(int fd, zentry *entry, int *size_out) return (NULL); } -static jboolean zip64_present = JNI_FALSE; +/* + * Implementation notes: + * + * This is a zip format reader for seekable files, that tolerates + * leading and trailing garbage, and tolerates having had internal + * offsets adjusted for leading garbage (as with Info-Zip's zip -A). + * + * We find the end header by scanning backwards from the end of the + * file for the end signature. This may fail in the presence of + * trailing garbage or a ZIP file comment that contains binary data. + * Similarly, the ZIP64 end header may need to be located by scanning + * backwards from the end header. It may be misidentified, but this + * is very unlikely to happen in practice without adversarial input. + * + * The zip file format is documented at: + * https://www.pkware.com/documents/casestudies/APPNOTE.TXT + * + * TODO: more informative error messages + */ + +/** Reads count bytes from fd at position pos into given buffer. */ +static jboolean +readAt(int fd, jlong pos, size_t count, void *buf) { + return (pos >= 0 + && JLI_Lseek(fd, pos, SEEK_SET) == pos + && read(fd, buf, count) == (jlong) count); +} + /* - * Checks to see if we have ZIP64 archive, and save - * the check for later use + * Tells whether given header values (obtained from either ZIP64 or + * non-ZIP64 header) appear to be correct, by checking the first LOC + * and CEN headers. + */ +static jboolean +is_valid_end_header(int fd, jlong endpos, + jlong censiz, jlong cenoff, jlong entries) { + Byte cenhdr[CENHDR]; + Byte lochdr[LOCHDR]; + // Expected offset of the first central directory header + jlong censtart = endpos - censiz; + // Expected position within the file that offsets are relative to + jlong base_offset = endpos - (censiz + cenoff); + return censtart >= 0 && cenoff >= 0 && + (censiz == 0 || + // Validate first CEN and LOC header signatures. + // Central directory must come directly before the end header. + (readAt(fd, censtart, CENHDR, cenhdr) + && CENSIG_AT(cenhdr) + && readAt(fd, base_offset + CENOFF(cenhdr), LOCHDR, lochdr) + && LOCSIG_AT(lochdr) + && CENNAM(cenhdr) == LOCNAM(lochdr))); +} + +/* + * Tells whether p appears to be pointing at a valid ZIP64 end header. + * Values censiz, cenoff, and entries are the corresponding values + * from the non-ZIP64 end header. We perform extra checks to avoid + * misidentifying data from the last entry as a ZIP64 end header. + */ +static jboolean +is_zip64_endhdr(int fd, const Byte *p, jlong end64pos, + jlong censiz, jlong cenoff, jlong entries) { + if (ZIP64_ENDSIG_AT(p)) { + jlong censiz64 = ZIP64_ENDSIZ(p); + jlong cenoff64 = ZIP64_ENDOFF(p); + jlong entries64 = ZIP64_ENDTOT(p); + return (censiz64 == censiz || censiz == ZIP64_MAGICVAL) + && (cenoff64 == cenoff || cenoff == ZIP64_MAGICVAL) + && (entries64 == entries || entries == ZIP64_MAGICCOUNT) + && is_valid_end_header(fd, end64pos, censiz64, cenoff64, entries64); + } + return JNI_FALSE; +} + +/* + * Given a non-ZIP64 end header located at endhdr and endpos, look for + * an adjacent ZIP64 end header, finding the base offset and censtart + * from the ZIP64 header if available, else from the non-ZIP64 header. + * @return 0 if successful, -1 in case of failure */ static int -haveZIP64(Byte *p) { - jlong cenlen, cenoff, centot; - cenlen = ENDSIZ(p); - cenoff = ENDOFF(p); - centot = ENDTOT(p); - zip64_present = (cenlen == ZIP64_MAGICVAL || - cenoff == ZIP64_MAGICVAL || - centot == ZIP64_MAGICCOUNT); - return zip64_present; -} - -static jlong -find_end64(int fd, Byte *ep, jlong pos) +find_positions64(int fd, const Byte * const endhdr, const jlong endpos, + jlong* base_offset, jlong* censtart) { + jlong censiz = ENDSIZ(endhdr); + jlong cenoff = ENDOFF(endhdr); + jlong entries = ENDTOT(endhdr); jlong end64pos; - jlong bytes; - if ((end64pos = JLI_Lseek(fd, pos - ZIP64_LOCHDR, SEEK_SET)) < (jlong)0) - return -1; - if ((bytes = read(fd, ep, ZIP64_LOCHDR)) < 0) - return -1; - if (ZIP64_LOCSIG_AT(ep)) - return end64pos; - return -1; + Byte buf[ZIP64_ENDHDR + ZIP64_LOCHDR]; + if (censiz + cenoff != endpos + && (end64pos = endpos - sizeof(buf)) >= (jlong)0 + && readAt(fd, end64pos, sizeof(buf), buf) + && ZIP64_LOCSIG_AT(buf + ZIP64_ENDHDR) + && (jlong) ZIP64_LOCDSK(buf + ZIP64_ENDHDR) == ENDDSK(endhdr) + && (is_zip64_endhdr(fd, buf, end64pos, censiz, cenoff, entries) + || // A variable sized "zip64 extensible data sector" ? + ((end64pos = ZIP64_LOCOFF(buf + ZIP64_ENDHDR)) >= (jlong)0 + && readAt(fd, end64pos, ZIP64_ENDHDR, buf) + && is_zip64_endhdr(fd, buf, end64pos, censiz, cenoff, entries))) + ) { + *censtart = end64pos - ZIP64_ENDSIZ(buf); + *base_offset = *censtart - ZIP64_ENDOFF(buf); + } else { + if (!is_valid_end_header(fd, endpos, censiz, cenoff, entries)) + return -1; + *censtart = endpos - censiz; + *base_offset = *censtart - cenoff; + } + return 0; } /* - * A very little used routine to handle the case that zip file has - * a comment at the end. Believe it or not, the only way to find the - * END record is to walk backwards, byte by bloody byte looking for - * the END record signature. + * Finds the base offset and censtart of the zip file. * - * fd: File descriptor of the jar file. - * eb: Pointer to a buffer to receive a copy of the END header. - * - * Returns the offset of the END record in the file on success, - * -1 on failure. + * @param fd file descriptor of the jar file + * @param eb scratch buffer + * @return 0 if successful, -1 in case of failure */ -static jlong -find_end(int fd, Byte *eb) +static int +find_positions(int fd, Byte *eb, jlong* base_offset, jlong* censtart) { jlong len; jlong pos; @@ -177,7 +252,7 @@ find_end(int fd, Byte *eb) if ((bytes = read(fd, eb, ENDHDR)) < 0) return (-1); if (ENDSIG_AT(eb)) { - return haveZIP64(eb) ? find_end64(fd, eb, pos) : pos; + return find_positions64(fd, eb, pos, base_offset, censtart); } /* @@ -208,7 +283,7 @@ find_end(int fd, Byte *eb) (void) memcpy(eb, cp, ENDHDR); free(buffer); pos = flen - (endpos - cp); - return haveZIP64(eb) ? find_end64(fd, eb, pos) : pos; + return find_positions64(fd, eb, pos, base_offset, censtart); } free(buffer); return (-1); @@ -217,82 +292,6 @@ find_end(int fd, Byte *eb) #define BUFSIZE (3 * 65536 + CENHDR + SIGSIZ) #define MINREAD 1024 -/* - * Computes and positions at the start of the CEN header, ie. the central - * directory, this will also return the offset if there is a zip file comment - * at the end of the archive, for most cases this would be 0. - */ -static jlong -compute_cen(int fd, Byte *bp) -{ - int bytes; - Byte *p; - jlong base_offset; - jlong offset; - char buffer[MINREAD]; - p = (Byte*) buffer; - /* - * Read the END Header, which is the starting point for ZIP files. - * (Clearly designed to make writing a zip file easier than reading - * one. Now isn't that precious...) - */ - if ((base_offset = find_end(fd, bp)) == -1) { - return (-1); - } - p = bp; - /* - * There is a historical, but undocumented, ability to allow for - * additional "stuff" to be prepended to the zip/jar file. It seems - * that this has been used to prepend an actual java launcher - * executable to the jar on Windows. Although this is just another - * form of statically linking a small piece of the JVM to the - * application, we choose to continue to support it. Note that no - * guarantees have been made (or should be made) to the customer that - * this will continue to work. - * - * Therefore, calculate the base offset of the zip file (within the - * expanded file) by assuming that the central directory is followed - * immediately by the end record. - */ - if (zip64_present) { - if ((offset = ZIP64_LOCOFF(p)) < (jlong)0) { - return -1; - } - if (JLI_Lseek(fd, offset, SEEK_SET) < (jlong) 0) { - return (-1); - } - if ((bytes = read(fd, buffer, MINREAD)) < 0) { - return (-1); - } - if (!ZIP64_ENDSIG_AT(buffer)) { - return -1; - } - if ((offset = ZIP64_ENDOFF(buffer)) < (jlong)0) { - return -1; - } - if (JLI_Lseek(fd, offset, SEEK_SET) < (jlong)0) { - return (-1); - } - p = (Byte*) buffer; - base_offset = base_offset - ZIP64_ENDSIZ(p) - ZIP64_ENDOFF(p) - ZIP64_ENDHDR; - } else { - base_offset = base_offset - ENDSIZ(p) - ENDOFF(p); - /* - * The END Header indicates the start of the Central Directory - * Headers. Remember that the desired Central Directory Header (CEN) - * will almost always be the second one and the first one is a small - * directory entry ("META-INF/"). Keep the code optimized for - * that case. - * - * Seek to the beginning of the Central Directory. - */ - if (JLI_Lseek(fd, base_offset + ENDOFF(p), SEEK_SET) < (jlong) 0) { - return (-1); - } - } - return base_offset; -} - /* * Locate the manifest file with the zip/jar file. * @@ -327,7 +326,23 @@ find_file(int fd, zentry *entry, const char *file_name) int res; int entry_size; int read_size; + + /* + * The (imaginary) position within the file relative to which + * offsets within the zip file refer. This is usually the + * location of the first local header (the start of the zip data) + * (which in turn is usually 0), but if the zip file has content + * prepended, then it will be either 0 or the length of the + * prepended content, depending on whether or not internal offsets + * have been adjusted (via e.g. zip -A). May be negative if + * content is prepended, zip -A is run, then the prefix is + * detached! + */ jlong base_offset; + + /** The position within the file of the start of the central directory. */ + jlong censtart; + Byte *p; Byte *bp; Byte *buffer; @@ -338,9 +353,11 @@ find_file(int fd, zentry *entry, const char *file_name) } bp = buffer; - base_offset = compute_cen(fd, bp); - if (base_offset == -1) { - free(buffer); + + if (find_positions(fd, bp, &base_offset, &censtart) == -1) { + return -1; + } + if (JLI_Lseek(fd, censtart, SEEK_SET) < (jlong) 0) { return -1; } diff --git a/jdk/test/java/util/zip/EntryCount64k.java b/jdk/test/java/util/zip/EntryCount64k.java index c815efdded6..b391ea99a7c 100644 --- a/jdk/test/java/util/zip/EntryCount64k.java +++ b/jdk/test/java/util/zip/EntryCount64k.java @@ -24,31 +24,64 @@ /** * @test * @summary Test java.util.zip behavior with ~64k entries + * @library /lib/testlibrary * @run main/othervm EntryCount64k * @run main/othervm -Djdk.util.zip.inhibitZip64=true EntryCount64k * @run main/othervm -Djdk.util.zip.inhibitZip64=false EntryCount64k */ import java.io.*; +import java.nio.file.Paths; import java.util.*; import java.util.zip.*; -public class EntryCount64k { +import jdk.testlibrary.OutputAnalyzer; +import jdk.testlibrary.ProcessTools; - public static void main(String[] args) throws Exception { - for (int i = (1 << 16) - 2; i < (1 << 16) + 2; i++) +public class EntryCount64k { + public static class Main { + public static void main(String[] args) { + System.out.println("Main"); + } + } + + static final String mainClass = "EntryCount64k$Main"; + + public static void main(String[] args) throws Throwable { + for (int i = (1 << 16) - 3; i < (1 << 16) + 2; i++) test(i); } - static void test(int entryCount) throws Exception { + static void test(int entryCount) throws Throwable { File zipFile = new File("EntryCount64k-tmp.zip"); zipFile.delete(); - try (ZipOutputStream zos = - new ZipOutputStream( - new BufferedOutputStream( - new FileOutputStream(zipFile)))) { - for (int i = 0; i < entryCount; i++) { + try (FileOutputStream fos = new FileOutputStream(zipFile); + BufferedOutputStream bos = new BufferedOutputStream(fos); + ZipOutputStream zos = new ZipOutputStream(bos)) { + + // Add 2 special entries, manifest and main class, + // to allow the zip file to be used with "java -jar" + ZipEntry man = new ZipEntry("META-INF/MANIFEST.MF"); + zos.putNextEntry(man); + zos.write("Manifest-Version: 1.0\n".getBytes("US-ASCII")); + zos.write(("Main-Class: " + mainClass + "\n").getBytes("US-ASCII")); + zos.closeEntry(); + + String mainName = mainClass + ".class"; + ZipEntry mainEntry = new ZipEntry(mainName); + String testClasses = System.getProperty("test.classes"); + File mainFile = new File(testClasses, mainName); + zos.putNextEntry(mainEntry); + try (FileInputStream fis = new FileInputStream(mainFile)) { + byte[] buf = new byte[4096]; + int n; + while ((n = fis.read(buf)) > 0) + zos.write(buf, 0, n); + } + zos.closeEntry(); + + for (int i = 2; i < entryCount; i++) { ZipEntry e = new ZipEntry(Integer.toString(i)); zos.putNextEntry(e); zos.closeEntry(); @@ -86,16 +119,19 @@ public class EntryCount64k { return false; } - static void checkCanRead(File zipFile, int entryCount) throws Exception { + static void checkCanRead(File zipFile, int entryCount) throws Throwable { // Check ZipInputStream API try (ZipInputStream zis = new ZipInputStream( new BufferedInputStream( new FileInputStream(zipFile)))) { - for (int i = 0; i < entryCount; i++) { + // skip over first two entries + for (int i = 0; i < 2; i++) + zis.getNextEntry(); + for (int i = 2; i < entryCount; i++) { ZipEntry e = zis.getNextEntry(); if (Integer.parseInt(e.getName()) != i) - throw new AssertionError(); + throw new AssertionError(e.getName()); } if (zis.getNextEntry() != null) throw new AssertionError(); @@ -104,7 +140,10 @@ public class EntryCount64k { // Check ZipFile API try (ZipFile zf = new ZipFile(zipFile)) { Enumeration en = zf.entries(); - for (int i = 0; i < entryCount; i++) { + // skip over first two entries + for (int i = 0; i < 2; i++) + en.nextElement(); + for (int i = 2; i < entryCount; i++) { ZipEntry e = en.nextElement(); if (Integer.parseInt(e.getName()) != i) throw new AssertionError(); @@ -115,5 +154,15 @@ public class EntryCount64k { || (zf.getEntry(Integer.toString(entryCount)) != null)) throw new AssertionError(); } + + // Check java -jar + String javaHome = System.getProperty("java.home"); + String java = Paths.get(javaHome, "bin", "java").toString(); + String[] cmd = { java, "-jar", zipFile.getName() }; + ProcessBuilder pb = new ProcessBuilder(cmd); + OutputAnalyzer a = ProcessTools.executeProcess(pb); + a.shouldHaveExitValue(0); + a.stdoutShouldMatch("\\AMain\n\\Z"); + a.stderrShouldMatch("\\A\\Z"); } }