From 628cd8a489fd54db18204c3bbaf4339d7ab5e9d6 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Eirik=20Bj=C3=B8rsn=C3=B8s?= <eirbjo@openjdk.org>
Date: Tue, 13 Feb 2024 16:26:37 +0000
Subject: [PATCH] 8303866: Allow ZipInputStream.readEnd to parse small Zip64
 ZIP files

Reviewed-by: lancea, jpai
---
 .../classes/java/util/zip/ZipInputStream.java |  62 +++-
 .../ZipInputStream/Zip64DataDescriptor.java   | 282 ++++++++++++++++++
 2 files changed, 341 insertions(+), 3 deletions(-)
 create mode 100644 test/jdk/java/util/zip/ZipInputStream/Zip64DataDescriptor.java

diff --git a/src/java.base/share/classes/java/util/zip/ZipInputStream.java b/src/java.base/share/classes/java/util/zip/ZipInputStream.java
index 216a914f023..89fe159d575 100644
--- a/src/java.base/share/classes/java/util/zip/ZipInputStream.java
+++ b/src/java.base/share/classes/java/util/zip/ZipInputStream.java
@@ -92,6 +92,9 @@ public class ZipInputStream extends InflaterInputStream implements ZipConstants
 
     private ZipCoder zc;
 
+    // Flag to indicate readEnd should expect 64 bit Data Descriptor size fields
+    private boolean expect64BitDataDescriptor;
+
     /**
      * Check to make sure that this stream has not been closed
      */
@@ -521,6 +524,13 @@ public class ZipInputStream extends InflaterInputStream implements ZipConstants
         }
         e.method = get16(tmpbuf, LOCHOW);
         e.xdostime = get32(tmpbuf, LOCTIM);
+
+        // Expect 32-bit Data Descriptor size fields by default
+        expect64BitDataDescriptor = false;
+
+        long csize = get32(tmpbuf, LOCSIZ);
+        long size = get32(tmpbuf, LOCLEN);
+
         if ((flag & 8) == 8) {
             /* "Data Descriptor" present */
             if (e.method != DEFLATED) {
@@ -529,8 +539,8 @@ public class ZipInputStream extends InflaterInputStream implements ZipConstants
             }
         } else {
             e.crc = get32(tmpbuf, LOCCRC);
-            e.csize = get32(tmpbuf, LOCSIZ);
-            e.size = get32(tmpbuf, LOCLEN);
+            e.csize = csize;
+            e.size = size;
         }
         len = get16(tmpbuf, LOCEXT);
         if (len > 0) {
@@ -538,6 +548,8 @@ public class ZipInputStream extends InflaterInputStream implements ZipConstants
             readFully(extra, 0, len);
             e.setExtra0(extra,
                         e.csize == ZIP64_MAGICVAL || e.size == ZIP64_MAGICVAL, true);
+            // Determine if readEnd should expect 64-bit size fields in the Data Descriptor
+            expect64BitDataDescriptor = expect64BitDataDescriptor(extra, flag, csize, size);
         }
         return e;
     }
@@ -577,7 +589,8 @@ public class ZipInputStream extends InflaterInputStream implements ZipConstants
         if ((flag & 8) == 8) {
             /* "Data Descriptor" present */
             if (inf.getBytesWritten() > ZIP64_MAGICVAL ||
-                inf.getBytesRead() > ZIP64_MAGICVAL) {
+                inf.getBytesRead() > ZIP64_MAGICVAL ||
+                    expect64BitDataDescriptor) {
                 // ZIP64 format
                 readFully(tmpbuf, 0, ZIP64_EXTHDR);
                 long sig = get32(tmpbuf, 0);
@@ -625,6 +638,49 @@ public class ZipInputStream extends InflaterInputStream implements ZipConstants
         }
     }
 
+    /**
+     * Determine whether the {@link #readEnd(ZipEntry)} method should interpret the
+     * 'compressed size' and 'uncompressed size' fields of the Data Descriptor record
+     * as 64-bit numbers instead of the regular 32-bit numbers.
+     *
+     * Returns true if the LOC has the 'streaming mode' flag set, at least one of the
+     * 'compressed size' and 'uncompressed size' are set to the Zip64 magic value
+     * 0xFFFFFFFF, and the LOC's extra field contains a Zip64 Extended Information Field.
+     *
+     * @param extra the LOC extra field to look for a Zip64 field in
+     * @param flag the value of the 'general purpose bit flag' field in the LOC
+     * @param csize the value of the 'compressed size' field in the LOC
+     * @param size  the value of the 'uncompressed size' field in the LOC
+     */
+    private boolean expect64BitDataDescriptor(byte[] extra, int flag, long csize, long size) {
+        // The LOC's 'general purpose bit flag' 3 must indicate use of a Data Descriptor
+        if ((flag & 8) == 0) {
+            return false;
+        }
+
+        // At least one LOC size field must be marked for Zip64
+        if (csize != ZIP64_MAGICVAL && size != ZIP64_MAGICVAL) {
+            return false;
+        }
+
+        // Look for a Zip64 field
+        int headerSize = 2 * Short.BYTES; // id + size
+        if (extra != null) {
+            for (int i = 0; i + headerSize < extra.length;) {
+                int id = get16(extra, i);
+                int dsize = get16(extra, i + Short.BYTES);
+                if (i + headerSize + dsize > extra.length) {
+                    return false; // Invalid size
+                }
+                if (id == ZIP64_EXTID) {
+                    return true;
+                }
+                i += headerSize + dsize;
+            }
+        }
+        return false;
+    }
+
     /*
      * Reads bytes, blocking until all bytes are read.
      */
diff --git a/test/jdk/java/util/zip/ZipInputStream/Zip64DataDescriptor.java b/test/jdk/java/util/zip/ZipInputStream/Zip64DataDescriptor.java
new file mode 100644
index 00000000000..35815610084
--- /dev/null
+++ b/test/jdk/java/util/zip/ZipInputStream/Zip64DataDescriptor.java
@@ -0,0 +1,282 @@
+/*
+ * Copyright (c) 2024, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ */
+
+/*
+ * @test
+ * @bug 8303866
+ * @summary ZipInputStream should read 8-byte data descriptors if the LOC has
+ *   a ZIP64 extended information extra field
+ * @run junit Zip64DataDescriptor
+ */
+
+
+import org.junit.jupiter.api.Test;
+import org.junit.jupiter.api.BeforeEach;
+
+import java.io.ByteArrayInputStream;
+import java.io.ByteArrayOutputStream;
+import java.io.IOException;
+import java.io.OutputStream;
+import java.nio.ByteBuffer;
+import java.nio.ByteOrder;
+import java.nio.charset.StandardCharsets;
+import java.util.HexFormat;
+import java.util.zip.*;
+
+import static org.junit.jupiter.api.Assertions.*;
+
+public class Zip64DataDescriptor {
+
+    // A byte array holding a small-sized Zip64 ZIP file, described below
+    private byte[] zip64File;
+
+    // A byte array holding a ZIP used for testing invalid Zip64 extra fields
+    private byte[] invalidZip64;
+
+    @BeforeEach
+    public void setup() throws IOException {
+        /*
+         * Structure of the ZIP64 file used below . Note the presence
+         * of a Zip64 extended information extra field and the
+         * Data Descriptor having 8-byte values for csize and size.
+         *
+         * The file was produced using the zip command on MacOS
+         * (zip 3.0, by Info-ZIP), in streamming mode (to enable Zip64),
+         * using the -fd option (to force the use of data descriptors)
+         *
+         * The following command was used:
+         * <pre>echo hello | zip -fd > hello.zip</pre>
+         *
+         * ------  Local File Header  ------
+         * 000000  signature          0x04034b50
+         * 000004  version            45
+         * 000006  flags              0x0008
+         * 000008  method             8              Deflated
+         * 000010  time               0xb180         22:12
+         * 000012  date               0x565c         2023-02-28
+         * 000014  crc                0x00000000
+         * 000018  csize              -1
+         * 000022  size               -1
+         * 000026  nlen               1
+         * 000028  elen               20
+         * 000030  name               1 bytes        '-'
+         * 000031  ext id             0x0001         Zip64 extended information extra field
+         * 000033  ext size           16
+         * 000035  z64 size           0
+         * 000043  z64 csize          0
+         *
+         * ------  File Data  ------
+         * 000051  data               8 bytes
+         *
+         * ------  Data Desciptor  ------
+         * 000059  signature          0x08074b50
+         * 000063  crc                0x363a3020
+         * 000067  csize              8
+         * 000075  size               6
+         * 000083  ...
+         */
+
+        String hex = """
+                504b03042d000800080080b15c5600000000ffffffffffffffff01001400
+                2d0100100000000000000000000000000000000000cb48cdc9c9e7020050
+                4b070820303a3608000000000000000600000000000000504b01021e032d
+                000800080080b15c5620303a360800000006000000010000000000000001
+                000000b011000000002d504b050600000000010001002f00000053000000
+                0000""";
+
+        zip64File = HexFormat.of().parseHex(hex.replaceAll("\n", ""));
+
+        // Create the ZIP file used for testing that invalid Zip64 extra fields are ignored
+        // This ZIP has the regular 4-bit data descriptor
+
+        byte[] extra = new byte[Long.BYTES + Long.BYTES + Short.BYTES * 2]; // Size of a regular Zip64 extra field
+        ByteBuffer buffer = ByteBuffer.wrap(extra).order(ByteOrder.LITTLE_ENDIAN);
+        buffer.putShort(0, (short) 123); // Not processed by ZipEntry.setExtra
+        buffer.putShort(Short.BYTES, (short) (extra.length - 4));
+
+        ByteArrayOutputStream baos = new ByteArrayOutputStream();
+        try (ZipOutputStream zo = new ZipOutputStream(baos)) {
+            ZipEntry ze = new ZipEntry("-");
+            ze.setExtra(extra);
+            zo.putNextEntry(ze);
+            zo.write("hello\n".getBytes(StandardCharsets.UTF_8));
+        }
+
+        invalidZip64 = baos.toByteArray();
+
+        // Set Zip64 magic values on compressed and uncompressed size fields
+        ByteBuffer.wrap(invalidZip64).order(ByteOrder.LITTLE_ENDIAN)
+                .putInt(ZipFile.LOCSIZ, 0xFFFFFFFF)
+                .putInt(ZipFile.LOCLEN, 0xFFFFFFFF);
+
+        // Set the Zip64 header ID 0x1 on the extra field in the invalid file
+        setExtraHeaderId((short) 0x1);
+    }
+
+    /*
+     * Verify that small-sized Zip64 entries can be parsed by ZipInputStream
+     */
+    @Test
+    public void shouldReadZip64Descriptor() throws IOException {
+        readZipInputStream(zip64File);
+    }
+
+    /*
+     * For maximal backward compatibility when reading Zip64 descriptors, invalid
+     * Zip64 extra data sizes should be ignored
+     */
+    @Test
+    public void shouldIgnoreInvalidExtraSize() throws IOException {
+        setExtraSize((short) 42);
+        readZipInputStream(invalidZip64);
+    }
+
+    /*
+     * Files with Zip64 magic values but no Zip64 field should be ignored
+     * when considering 8 byte data descriptors
+     */
+    @Test
+    public void shouldIgnoreNoZip64Header() throws IOException {
+        setExtraSize((short) 123);
+        readZipInputStream(invalidZip64);
+    }
+
+    /*
+     * Theoretically, ZIP files may exist with ZIP64 format, but with 4-byte
+     * data descriptors. Such files will fail to parse, as demonstrated by this test.
+     */
+    @Test
+    public void shouldFailParsingZip64With4ByteDataDescriptor() throws IOException {
+        ZipException ex = assertThrows(ZipException.class, () -> {
+            readZipInputStream(invalidZip64);
+        });
+
+        String msg = String.format("Expected exeption message to contain 'invalid entry size', was %s",
+                ex.getMessage());
+        assertTrue(ex.getMessage().contains("invalid entry size"), msg);
+    }
+
+    /*
+     * Validate that an extra data size exceeding the length of the extra field is ignored
+     */
+    @Test
+    public void shouldIgnoreExcessiveExtraSize() throws IOException {
+
+        setExtraSize(Short.MAX_VALUE);
+
+
+        readZipInputStream(invalidZip64);
+    }
+
+    /*
+     * Validate that the Data Descriptor is read with 32-bit fields if neither the
+     * LOC's 'uncompressed size' or 'compressed size' fields have the Zip64 magic value,
+     * even when there is a Zip64 field in the extra field.
+     */
+    @Test
+    public void shouldIgnoreNoMagicMarkers() throws IOException {
+        // Set compressed and uncompressed size fields to zero
+        ByteBuffer.wrap(invalidZip64).order(ByteOrder.LITTLE_ENDIAN)
+                .putInt(ZipFile.LOCSIZ, 0)
+                .putInt(ZipFile.LOCLEN, 0);
+
+
+        readZipInputStream(invalidZip64);
+    }
+
+    /*
+     * Validate that an extra data size exceeding the length of the extra field is ignored
+     */
+    @Test
+    public void shouldIgnoreTrucatedZip64Extra() throws IOException {
+
+        truncateZip64();
+
+        readZipInputStream(invalidZip64);
+    }
+
+    /**
+     * Update the Extra field header ID of the invalid file
+     */
+    private void setExtraHeaderId(short id) {
+        // Set the header ID on the extra field
+        ByteBuffer buffer = ByteBuffer.wrap(invalidZip64).order(ByteOrder.LITTLE_ENDIAN);
+        int nlen = buffer.getShort(ZipFile.LOCNAM);
+        buffer.putShort(ZipFile.LOCHDR + nlen, id);
+    }
+
+    /**
+     * Updates the 16-bit 'data size' field of the Zip64 extended information field,
+     * potentially to an invalid value.
+     * @param size the value to set in the 'data size' field.
+     */
+    private void setExtraSize(short size) {
+        ByteBuffer buffer = ByteBuffer.wrap(invalidZip64).order(ByteOrder.LITTLE_ENDIAN);
+        // Compute the offset to the Zip64 data block size field
+        short nlen = buffer.getShort(ZipFile.LOCNAM);
+        int dataSizeOffset = ZipFile.LOCHDR + nlen + Short.BYTES;
+        buffer.putShort(dataSizeOffset, size);
+    }
+
+    /**
+     * Puts a truncated Zip64 field (just the tag) at the end of the LOC extra field.
+     * The beginning of the extra field is filled with a generic extra field containing
+     * just zeros.
+     */
+    private void truncateZip64() {
+        ByteBuffer buffer = ByteBuffer.wrap(invalidZip64).order(ByteOrder.LITTLE_ENDIAN);
+        // Get the LOC name and extra sizes
+        short nlen = buffer.getShort(ZipFile.LOCNAM);
+        short elen = buffer.getShort(ZipFile.LOCEXT);
+        int cenOffset = ZipFile.LOCHDR + nlen + elen;
+
+        // Zero out the extra field
+        int estart = ZipFile.LOCHDR + nlen;
+        buffer.put(estart, new byte[elen]);
+        // Put a generic extra field in the start
+        buffer.putShort(estart, (short) 42);
+        buffer.putShort(estart + Short.BYTES, (short) (elen - 4 - 2));
+        // Put a truncated (just the tag) Zip64 field at the end
+        buffer.putShort(cenOffset - Short.BYTES, (short) 0x0001);
+    }
+
+    /*
+     * Consume and verify the ZIP file using ZipInputStream
+     */
+    private void readZipInputStream(byte[] zip) throws IOException {
+        try (ZipInputStream in = new ZipInputStream(new ByteArrayInputStream(zip))) {
+            // Read the ZIP entry, this calls readLOC
+            ZipEntry e = in.getNextEntry();
+
+            // Sanity check the zip entry
+            assertNotNull(e, "Missing zip entry");
+            assertEquals("-", e.getName());
+
+            // Read the entry data, this causes readEND to parse the data descriptor
+            assertEquals("hello\n", new String(in.readAllBytes(), StandardCharsets.UTF_8));
+
+            // There should only be a single zip entry
+            assertNull(in.getNextEntry(), "Unexpected additional zip entry");
+        }
+    }
+}