From 628cd8a489fd54db18204c3bbaf4339d7ab5e9d6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Eirik=20Bj=C3=B8rsn=C3=B8s?= Date: Tue, 13 Feb 2024 16:26:37 +0000 Subject: [PATCH] 8303866: Allow ZipInputStream.readEnd to parse small Zip64 ZIP files Reviewed-by: lancea, jpai --- .../classes/java/util/zip/ZipInputStream.java | 62 +++- .../ZipInputStream/Zip64DataDescriptor.java | 282 ++++++++++++++++++ 2 files changed, 341 insertions(+), 3 deletions(-) create mode 100644 test/jdk/java/util/zip/ZipInputStream/Zip64DataDescriptor.java diff --git a/src/java.base/share/classes/java/util/zip/ZipInputStream.java b/src/java.base/share/classes/java/util/zip/ZipInputStream.java index 216a914f023..89fe159d575 100644 --- a/src/java.base/share/classes/java/util/zip/ZipInputStream.java +++ b/src/java.base/share/classes/java/util/zip/ZipInputStream.java @@ -92,6 +92,9 @@ public class ZipInputStream extends InflaterInputStream implements ZipConstants private ZipCoder zc; + // Flag to indicate readEnd should expect 64 bit Data Descriptor size fields + private boolean expect64BitDataDescriptor; + /** * Check to make sure that this stream has not been closed */ @@ -521,6 +524,13 @@ public class ZipInputStream extends InflaterInputStream implements ZipConstants } e.method = get16(tmpbuf, LOCHOW); e.xdostime = get32(tmpbuf, LOCTIM); + + // Expect 32-bit Data Descriptor size fields by default + expect64BitDataDescriptor = false; + + long csize = get32(tmpbuf, LOCSIZ); + long size = get32(tmpbuf, LOCLEN); + if ((flag & 8) == 8) { /* "Data Descriptor" present */ if (e.method != DEFLATED) { @@ -529,8 +539,8 @@ public class ZipInputStream extends InflaterInputStream implements ZipConstants } } else { e.crc = get32(tmpbuf, LOCCRC); - e.csize = get32(tmpbuf, LOCSIZ); - e.size = get32(tmpbuf, LOCLEN); + e.csize = csize; + e.size = size; } len = get16(tmpbuf, LOCEXT); if (len > 0) { @@ -538,6 +548,8 @@ public class ZipInputStream extends InflaterInputStream implements ZipConstants readFully(extra, 0, len); e.setExtra0(extra, e.csize == ZIP64_MAGICVAL || e.size == ZIP64_MAGICVAL, true); + // Determine if readEnd should expect 64-bit size fields in the Data Descriptor + expect64BitDataDescriptor = expect64BitDataDescriptor(extra, flag, csize, size); } return e; } @@ -577,7 +589,8 @@ public class ZipInputStream extends InflaterInputStream implements ZipConstants if ((flag & 8) == 8) { /* "Data Descriptor" present */ if (inf.getBytesWritten() > ZIP64_MAGICVAL || - inf.getBytesRead() > ZIP64_MAGICVAL) { + inf.getBytesRead() > ZIP64_MAGICVAL || + expect64BitDataDescriptor) { // ZIP64 format readFully(tmpbuf, 0, ZIP64_EXTHDR); long sig = get32(tmpbuf, 0); @@ -625,6 +638,49 @@ public class ZipInputStream extends InflaterInputStream implements ZipConstants } } + /** + * Determine whether the {@link #readEnd(ZipEntry)} method should interpret the + * 'compressed size' and 'uncompressed size' fields of the Data Descriptor record + * as 64-bit numbers instead of the regular 32-bit numbers. + * + * Returns true if the LOC has the 'streaming mode' flag set, at least one of the + * 'compressed size' and 'uncompressed size' are set to the Zip64 magic value + * 0xFFFFFFFF, and the LOC's extra field contains a Zip64 Extended Information Field. + * + * @param extra the LOC extra field to look for a Zip64 field in + * @param flag the value of the 'general purpose bit flag' field in the LOC + * @param csize the value of the 'compressed size' field in the LOC + * @param size the value of the 'uncompressed size' field in the LOC + */ + private boolean expect64BitDataDescriptor(byte[] extra, int flag, long csize, long size) { + // The LOC's 'general purpose bit flag' 3 must indicate use of a Data Descriptor + if ((flag & 8) == 0) { + return false; + } + + // At least one LOC size field must be marked for Zip64 + if (csize != ZIP64_MAGICVAL && size != ZIP64_MAGICVAL) { + return false; + } + + // Look for a Zip64 field + int headerSize = 2 * Short.BYTES; // id + size + if (extra != null) { + for (int i = 0; i + headerSize < extra.length;) { + int id = get16(extra, i); + int dsize = get16(extra, i + Short.BYTES); + if (i + headerSize + dsize > extra.length) { + return false; // Invalid size + } + if (id == ZIP64_EXTID) { + return true; + } + i += headerSize + dsize; + } + } + return false; + } + /* * Reads bytes, blocking until all bytes are read. */ diff --git a/test/jdk/java/util/zip/ZipInputStream/Zip64DataDescriptor.java b/test/jdk/java/util/zip/ZipInputStream/Zip64DataDescriptor.java new file mode 100644 index 00000000000..35815610084 --- /dev/null +++ b/test/jdk/java/util/zip/ZipInputStream/Zip64DataDescriptor.java @@ -0,0 +1,282 @@ +/* + * Copyright (c) 2024, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + */ + +/* + * @test + * @bug 8303866 + * @summary ZipInputStream should read 8-byte data descriptors if the LOC has + * a ZIP64 extended information extra field + * @run junit Zip64DataDescriptor + */ + + +import org.junit.jupiter.api.Test; +import org.junit.jupiter.api.BeforeEach; + +import java.io.ByteArrayInputStream; +import java.io.ByteArrayOutputStream; +import java.io.IOException; +import java.io.OutputStream; +import java.nio.ByteBuffer; +import java.nio.ByteOrder; +import java.nio.charset.StandardCharsets; +import java.util.HexFormat; +import java.util.zip.*; + +import static org.junit.jupiter.api.Assertions.*; + +public class Zip64DataDescriptor { + + // A byte array holding a small-sized Zip64 ZIP file, described below + private byte[] zip64File; + + // A byte array holding a ZIP used for testing invalid Zip64 extra fields + private byte[] invalidZip64; + + @BeforeEach + public void setup() throws IOException { + /* + * Structure of the ZIP64 file used below . Note the presence + * of a Zip64 extended information extra field and the + * Data Descriptor having 8-byte values for csize and size. + * + * The file was produced using the zip command on MacOS + * (zip 3.0, by Info-ZIP), in streamming mode (to enable Zip64), + * using the -fd option (to force the use of data descriptors) + * + * The following command was used: + *
echo hello | zip -fd > hello.zip
+ * + * ------ Local File Header ------ + * 000000 signature 0x04034b50 + * 000004 version 45 + * 000006 flags 0x0008 + * 000008 method 8 Deflated + * 000010 time 0xb180 22:12 + * 000012 date 0x565c 2023-02-28 + * 000014 crc 0x00000000 + * 000018 csize -1 + * 000022 size -1 + * 000026 nlen 1 + * 000028 elen 20 + * 000030 name 1 bytes '-' + * 000031 ext id 0x0001 Zip64 extended information extra field + * 000033 ext size 16 + * 000035 z64 size 0 + * 000043 z64 csize 0 + * + * ------ File Data ------ + * 000051 data 8 bytes + * + * ------ Data Desciptor ------ + * 000059 signature 0x08074b50 + * 000063 crc 0x363a3020 + * 000067 csize 8 + * 000075 size 6 + * 000083 ... + */ + + String hex = """ + 504b03042d000800080080b15c5600000000ffffffffffffffff01001400 + 2d0100100000000000000000000000000000000000cb48cdc9c9e7020050 + 4b070820303a3608000000000000000600000000000000504b01021e032d + 000800080080b15c5620303a360800000006000000010000000000000001 + 000000b011000000002d504b050600000000010001002f00000053000000 + 0000"""; + + zip64File = HexFormat.of().parseHex(hex.replaceAll("\n", "")); + + // Create the ZIP file used for testing that invalid Zip64 extra fields are ignored + // This ZIP has the regular 4-bit data descriptor + + byte[] extra = new byte[Long.BYTES + Long.BYTES + Short.BYTES * 2]; // Size of a regular Zip64 extra field + ByteBuffer buffer = ByteBuffer.wrap(extra).order(ByteOrder.LITTLE_ENDIAN); + buffer.putShort(0, (short) 123); // Not processed by ZipEntry.setExtra + buffer.putShort(Short.BYTES, (short) (extra.length - 4)); + + ByteArrayOutputStream baos = new ByteArrayOutputStream(); + try (ZipOutputStream zo = new ZipOutputStream(baos)) { + ZipEntry ze = new ZipEntry("-"); + ze.setExtra(extra); + zo.putNextEntry(ze); + zo.write("hello\n".getBytes(StandardCharsets.UTF_8)); + } + + invalidZip64 = baos.toByteArray(); + + // Set Zip64 magic values on compressed and uncompressed size fields + ByteBuffer.wrap(invalidZip64).order(ByteOrder.LITTLE_ENDIAN) + .putInt(ZipFile.LOCSIZ, 0xFFFFFFFF) + .putInt(ZipFile.LOCLEN, 0xFFFFFFFF); + + // Set the Zip64 header ID 0x1 on the extra field in the invalid file + setExtraHeaderId((short) 0x1); + } + + /* + * Verify that small-sized Zip64 entries can be parsed by ZipInputStream + */ + @Test + public void shouldReadZip64Descriptor() throws IOException { + readZipInputStream(zip64File); + } + + /* + * For maximal backward compatibility when reading Zip64 descriptors, invalid + * Zip64 extra data sizes should be ignored + */ + @Test + public void shouldIgnoreInvalidExtraSize() throws IOException { + setExtraSize((short) 42); + readZipInputStream(invalidZip64); + } + + /* + * Files with Zip64 magic values but no Zip64 field should be ignored + * when considering 8 byte data descriptors + */ + @Test + public void shouldIgnoreNoZip64Header() throws IOException { + setExtraSize((short) 123); + readZipInputStream(invalidZip64); + } + + /* + * Theoretically, ZIP files may exist with ZIP64 format, but with 4-byte + * data descriptors. Such files will fail to parse, as demonstrated by this test. + */ + @Test + public void shouldFailParsingZip64With4ByteDataDescriptor() throws IOException { + ZipException ex = assertThrows(ZipException.class, () -> { + readZipInputStream(invalidZip64); + }); + + String msg = String.format("Expected exeption message to contain 'invalid entry size', was %s", + ex.getMessage()); + assertTrue(ex.getMessage().contains("invalid entry size"), msg); + } + + /* + * Validate that an extra data size exceeding the length of the extra field is ignored + */ + @Test + public void shouldIgnoreExcessiveExtraSize() throws IOException { + + setExtraSize(Short.MAX_VALUE); + + + readZipInputStream(invalidZip64); + } + + /* + * Validate that the Data Descriptor is read with 32-bit fields if neither the + * LOC's 'uncompressed size' or 'compressed size' fields have the Zip64 magic value, + * even when there is a Zip64 field in the extra field. + */ + @Test + public void shouldIgnoreNoMagicMarkers() throws IOException { + // Set compressed and uncompressed size fields to zero + ByteBuffer.wrap(invalidZip64).order(ByteOrder.LITTLE_ENDIAN) + .putInt(ZipFile.LOCSIZ, 0) + .putInt(ZipFile.LOCLEN, 0); + + + readZipInputStream(invalidZip64); + } + + /* + * Validate that an extra data size exceeding the length of the extra field is ignored + */ + @Test + public void shouldIgnoreTrucatedZip64Extra() throws IOException { + + truncateZip64(); + + readZipInputStream(invalidZip64); + } + + /** + * Update the Extra field header ID of the invalid file + */ + private void setExtraHeaderId(short id) { + // Set the header ID on the extra field + ByteBuffer buffer = ByteBuffer.wrap(invalidZip64).order(ByteOrder.LITTLE_ENDIAN); + int nlen = buffer.getShort(ZipFile.LOCNAM); + buffer.putShort(ZipFile.LOCHDR + nlen, id); + } + + /** + * Updates the 16-bit 'data size' field of the Zip64 extended information field, + * potentially to an invalid value. + * @param size the value to set in the 'data size' field. + */ + private void setExtraSize(short size) { + ByteBuffer buffer = ByteBuffer.wrap(invalidZip64).order(ByteOrder.LITTLE_ENDIAN); + // Compute the offset to the Zip64 data block size field + short nlen = buffer.getShort(ZipFile.LOCNAM); + int dataSizeOffset = ZipFile.LOCHDR + nlen + Short.BYTES; + buffer.putShort(dataSizeOffset, size); + } + + /** + * Puts a truncated Zip64 field (just the tag) at the end of the LOC extra field. + * The beginning of the extra field is filled with a generic extra field containing + * just zeros. + */ + private void truncateZip64() { + ByteBuffer buffer = ByteBuffer.wrap(invalidZip64).order(ByteOrder.LITTLE_ENDIAN); + // Get the LOC name and extra sizes + short nlen = buffer.getShort(ZipFile.LOCNAM); + short elen = buffer.getShort(ZipFile.LOCEXT); + int cenOffset = ZipFile.LOCHDR + nlen + elen; + + // Zero out the extra field + int estart = ZipFile.LOCHDR + nlen; + buffer.put(estart, new byte[elen]); + // Put a generic extra field in the start + buffer.putShort(estart, (short) 42); + buffer.putShort(estart + Short.BYTES, (short) (elen - 4 - 2)); + // Put a truncated (just the tag) Zip64 field at the end + buffer.putShort(cenOffset - Short.BYTES, (short) 0x0001); + } + + /* + * Consume and verify the ZIP file using ZipInputStream + */ + private void readZipInputStream(byte[] zip) throws IOException { + try (ZipInputStream in = new ZipInputStream(new ByteArrayInputStream(zip))) { + // Read the ZIP entry, this calls readLOC + ZipEntry e = in.getNextEntry(); + + // Sanity check the zip entry + assertNotNull(e, "Missing zip entry"); + assertEquals("-", e.getName()); + + // Read the entry data, this causes readEND to parse the data descriptor + assertEquals("hello\n", new String(in.readAllBytes(), StandardCharsets.UTF_8)); + + // There should only be a single zip entry + assertNull(in.getNextEntry(), "Unexpected additional zip entry"); + } + } +}