From 78f71b4d41f8682ea10b1573106b11c00f150a1c Mon Sep 17 00:00:00 2001 From: Eirik Bjorsnos Date: Sat, 18 Feb 2023 12:39:19 +0000 Subject: [PATCH] 8301873: Avoid string decoding in ZipFile.Source.getEntryPos Reviewed-by: redestad, lancea --- .../share/classes/java/util/zip/ZipCoder.java | 89 ++++++++- .../share/classes/java/util/zip/ZipFile.java | 45 ++--- .../InvalidBytesInEntryNameOrComment.java | 183 ++++++++++++++++++ .../zip/ZipFile/TestZipFileEncodings.java | 81 +++++++- 4 files changed, 374 insertions(+), 24 deletions(-) create mode 100644 test/jdk/java/util/zip/ZipFile/InvalidBytesInEntryNameOrComment.java diff --git a/src/java.base/share/classes/java/util/zip/ZipCoder.java b/src/java.base/share/classes/java/util/zip/ZipCoder.java index d9d228c2ce9..8696d2a797e 100644 --- a/src/java.base/share/classes/java/util/zip/ZipCoder.java +++ b/src/java.base/share/classes/java/util/zip/ZipCoder.java @@ -1,5 +1,5 @@ /* - * Copyright (c) 2009, 2021, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2009, 2023, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -55,6 +55,30 @@ class ZipCoder { return new ZipCoder(charset); } + /** + * This enum represents the three possible return values for + * {@link #compare(String, byte[], int, int, boolean)} when + * this method compares a lookup name to a string encoded in the + * CEN byte array. + */ + enum Comparison { + /** + * The lookup string is exactly equal + * to the encoded string. + */ + EXACT_MATCH, + /** + * The lookup string and the encoded string differs only + * by the encoded string having a trailing '/' character. + */ + DIRECTORY_MATCH, + /** + * The lookup string and the encoded string do not match. + * (They are neither an exact match or a directory match.) + */ + NO_MATCH + } + String toString(byte[] ba, int off, int length) { try { return decoder().decode(ByteBuffer.wrap(ba, off, length)).toString(); @@ -184,6 +208,52 @@ class ZipCoder { return slashBytes; } + /** + * This method is used by ZipFile.Source.getEntryPos when comparing the + * name being looked up to candidate names encoded in the CEN byte + * array. + * + * Since ZipCode.getEntry supports looking up a "dir/" entry by + * the name "dir", this method can optionally distinguish an + * exact match from a partial "directory match" (where names only + * differ by the encoded name having an additional trailing '/') + * + * The return values of this method are as follows: + * + * If the lookup name is exactly equal to the encoded string, return + * {@link Comparison#EXACT_MATCH}. + * + * If the parameter {@code matchDirectory} is {@code true} and the + * two strings differ only by the encoded string having an extra + * trailing '/' character, then return {@link Comparison#DIRECTORY_MATCH}. + * + * Otherwise, return {@link Comparison#NO_MATCH} + * + * While a general implementation will need to decode bytes into a + * String for comparison, this can be avoided if the String coder + * and this ZipCoder are known to encode strings to the same bytes. + * + * @param str The lookup string to compare with the encoded string. + * @param b The byte array holding the encoded string + * @param off The offset into the array where the encoded string starts + * @param len The length of the encoded string in bytes + * @param matchDirectory If {@code true} and the strings do not match exactly, + * a directory match will also be tested + * + */ + Comparison compare(String str, byte[] b, int off, int len, boolean matchDirectory) { + String decoded = toString(b, off, len); + if (decoded.startsWith(str)) { + if (decoded.length() == str.length()) { + return Comparison.EXACT_MATCH; + } else if (matchDirectory + && decoded.length() == str.length() + 1 + && decoded.endsWith("/") ) { + return Comparison.DIRECTORY_MATCH; + } + } + return Comparison.NO_MATCH; + } static final class UTF8ZipCoder extends ZipCoder { private UTF8ZipCoder(Charset utf8) { @@ -232,5 +302,22 @@ class ZipCoder { boolean hasTrailingSlash(byte[] a, int end) { return end > 0 && a[end - 1] == '/'; } + + @Override + Comparison compare(String str, byte[] b, int off, int len, boolean matchDirectory) { + try { + byte[] encoded = JLA.getBytesNoRepl(str, UTF_8.INSTANCE); + int mismatch = Arrays.mismatch(encoded, 0, encoded.length, b, off, off+len); + if (mismatch == -1) { + return Comparison.EXACT_MATCH; + } else if (matchDirectory && len == mismatch + 1 && hasTrailingSlash(b, off + len)) { + return Comparison.DIRECTORY_MATCH; + } else { + return Comparison.NO_MATCH; + } + } catch (CharacterCodingException e) { + return Comparison.NO_MATCH; + } + } } } diff --git a/src/java.base/share/classes/java/util/zip/ZipFile.java b/src/java.base/share/classes/java/util/zip/ZipFile.java index aa19de461ec..f6b3588c738 100644 --- a/src/java.base/share/classes/java/util/zip/ZipFile.java +++ b/src/java.base/share/classes/java/util/zip/ZipFile.java @@ -1633,40 +1633,41 @@ public class ZipFile implements ZipConstants, Closeable { int hsh = ZipCoder.hash(name); int idx = table[(hsh & 0x7fffffff) % tablelen]; + int dirPos = -1; // Position of secondary match "name/" + // Search down the target hash chain for a entry whose // 32 bit hash matches the hashed name. while (idx != ZIP_ENDCHAIN) { if (getEntryHash(idx) == hsh) { - // The CEN name must match the specified one + int pos = getEntryPos(idx); + int noff = pos + CENHDR; + int nlen = CENNAM(cen, pos); - try { - ZipCoder zc = zipCoderForPos(pos); - String entry = zc.toString(cen, pos + CENHDR, CENNAM(cen, pos)); + ZipCoder zc = zipCoderForPos(pos); - // If addSlash is true we'll test for name+/ in addition to - // name, unless name is the empty string or already ends with a - // slash - int entryLen = entry.length(); - int nameLen = name.length(); - if (entryLen == nameLen && entry.equals(name)) { - // Found our match + // Compare the lookup name with the name encoded in the CEN + switch (zc.compare(name, cen, noff, nlen, addSlash)) { + case EXACT_MATCH: + // We found an exact match for "name" return pos; - } - // If addSlash is true we'll now test for name+/ providing - if (addSlash && nameLen + 1 == entryLen - && entry.startsWith(name) && - entry.charAt(entryLen - 1) == '/') { - // Found the entry "name+/", now find the CEN entry pos - int exactPos = getEntryPos(name, false); - return exactPos == -1 ? pos : exactPos; - } - } catch (IllegalArgumentException iae) { - // Ignore + case DIRECTORY_MATCH: + // We found the directory "name/" + // Track its position, then continue the search for "name" + dirPos = pos; + break; + case NO_MATCH: + // Hash collision, continue searching } } idx = getEntryNext(idx); } + // Reaching this point means we did not find "name". + // Return the position of "name/" if we found it + if (dirPos != -1) { + return dirPos; + } + // No entry found return -1; } diff --git a/test/jdk/java/util/zip/ZipFile/InvalidBytesInEntryNameOrComment.java b/test/jdk/java/util/zip/ZipFile/InvalidBytesInEntryNameOrComment.java new file mode 100644 index 00000000000..abe69b69374 --- /dev/null +++ b/test/jdk/java/util/zip/ZipFile/InvalidBytesInEntryNameOrComment.java @@ -0,0 +1,183 @@ +/* + * Copyright (c) 2023, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +import org.testng.annotations.BeforeTest; +import org.testng.annotations.Test; + +import java.io.ByteArrayOutputStream; +import java.io.FileOutputStream; +import java.io.IOException; +import java.nio.ByteBuffer; +import java.nio.ByteOrder; +import java.nio.channels.FileChannel; +import java.nio.file.Path; +import java.util.Arrays; +import java.util.zip.ZipEntry; +import java.util.zip.ZipException; +import java.util.zip.ZipFile; +import java.util.zip.ZipOutputStream; + +import static org.testng.Assert.assertEquals; +import static org.testng.Assert.expectThrows; + +/** + * @test + * @summary Validate that opening ZIP files files with invalid UTF-8 + * byte sequences in the name or comment fields fails with ZipException + * @run testng/othervm InvalidBytesInEntryNameOrComment + */ +public class InvalidBytesInEntryNameOrComment { + + // Offsets for navigating the CEN fields + private static final int EOC_OFF = 6; // Offset from EOF to find CEN offset + private static final int CEN_HDR = 45; // Size of a CEN header + private static final int NLEN = 28; // Name length + private static final int ELEN = 30; // Extra length + private static final int CLEN = 32; // Comment length + + // Example invalid UTF-8 byte sequence + private static final byte[] INVALID_UTF8_BYTE_SEQUENCE = {(byte) 0xF0, (byte) 0xA4, (byte) 0xAD}; + + // Expected ZipException regex + private static final String BAD_ENTRY_NAME_OR_COMMENT = "invalid CEN header (bad entry name or comment)"; + + // ZIP file with invalid name field + private Path invalidName; + + // ZIP file with invalid comment field + private Path invalidComment; + + @BeforeTest + public void setup() throws IOException { + // Create a ZIP file with valid name and comment fields + byte[] templateZip = templateZIP(); + + // Create a ZIP with a CEN name field containing an invalid byte sequence + invalidName = invalidName("invalid-name.zip", templateZip); + + // Create a ZIP with a CEN comment field containing an invalid byte sequence + invalidComment = invalidComment("invalid-comment.zip", templateZip); + } + + /** + * Opening a ZipFile with an invalid UTF-8 byte sequence in + * the name field of a CEN file header should throw a + * ZipException with "bad entry name or comment" + */ + @Test + public void shouldRejectInvalidName() throws IOException { + ZipException ex = expectThrows(ZipException.class, () -> { + new ZipFile(invalidName.toFile()); + }); + assertEquals(ex.getMessage(), BAD_ENTRY_NAME_OR_COMMENT); + } + + /** + * Opening a ZipFile with an invalid UTF-8 byte sequence in + * the comment field of a CEN file header should throw a + * ZipException with "bad entry name or comment" + */ + @Test + public void shouldRejectInvalidComment() throws IOException { + ZipException ex = expectThrows(ZipException.class, () -> { + new ZipFile(invalidComment.toFile()); + }); + assertEquals(ex.getMessage(), BAD_ENTRY_NAME_OR_COMMENT); + } + + /** + * Make a valid ZIP file used as a template for invalid files + */ + private byte[] templateZIP() throws IOException { + ByteArrayOutputStream bout = new ByteArrayOutputStream(); + try (ZipOutputStream zo = new ZipOutputStream(bout)) { + ZipEntry commentEntry = new ZipEntry("file"); + commentEntry.setComment("Comment"); + zo.putNextEntry(commentEntry); + } + return bout.toByteArray(); + } + + /** + * Make a ZIP with invalid bytes in the CEN name field + */ + private Path invalidName(String name, byte[] template) throws IOException { + ByteBuffer buffer = copyTemplate(template); + int off = cenStart(buffer); + // Name field starts here + int noff = off + CEN_HDR; + + // Write invalid bytes + buffer.put(noff, INVALID_UTF8_BYTE_SEQUENCE, 0, INVALID_UTF8_BYTE_SEQUENCE.length); + return writeFile(name, buffer); + + } + + /** + * Make a copy of the ZIP template and wrap it in a little-endian + * ByteBuffer + */ + private ByteBuffer copyTemplate(byte[] template) { + return ByteBuffer.wrap(Arrays.copyOf(template, template.length)) + .order(ByteOrder.LITTLE_ENDIAN); + } + + /** + * Make a ZIP with invalid bytes in the CEN comment field + */ + private Path invalidComment(String name, byte[] template) throws IOException { + ByteBuffer buffer = copyTemplate(template); + int off = cenStart(buffer); + // Need to skip past the length of the name and extra fields + int nlen = buffer.getShort(off + NLEN); + int elen = buffer.getShort(off + ELEN); + + // Comment field starts here + int coff = off + CEN_HDR + nlen + elen; + + // Write invalid bytes + buffer.put(coff, INVALID_UTF8_BYTE_SEQUENCE, 0, INVALID_UTF8_BYTE_SEQUENCE.length); + return writeFile(name, buffer); + } + + + /** + * Finds the offset of the start of the CEN directory + */ + private int cenStart(ByteBuffer buffer) { + return buffer.getInt(buffer.capacity() - EOC_OFF); + } + + /** + * Utility to write a ByteBuffer to disk + */ + private Path writeFile(String name, ByteBuffer buffer) throws IOException { + Path zip = Path.of(name); + try (FileChannel ch = new FileOutputStream(zip.toFile()).getChannel()) { + buffer.rewind(); + ch.write(buffer); + } + return zip; + } +} diff --git a/test/jdk/java/util/zip/ZipFile/TestZipFileEncodings.java b/test/jdk/java/util/zip/ZipFile/TestZipFileEncodings.java index d181d025903..d8d474ae8d4 100644 --- a/test/jdk/java/util/zip/ZipFile/TestZipFileEncodings.java +++ b/test/jdk/java/util/zip/ZipFile/TestZipFileEncodings.java @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2023, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -38,6 +38,7 @@ import java.io.FileOutputStream; import java.io.IOException; import java.io.InputStream; import java.nio.charset.Charset; +import java.nio.charset.StandardCharsets; import java.nio.file.Files; import java.nio.file.Path; import java.nio.file.Paths; @@ -53,6 +54,7 @@ import java.util.concurrent.ExecutorService; import java.util.concurrent.Executors; import java.util.concurrent.ThreadLocalRandom; import java.util.concurrent.atomic.AtomicInteger; +import java.util.stream.Stream; import java.util.zip.CRC32; import java.util.zip.ZipEntry; import java.util.zip.ZipFile; @@ -95,6 +97,13 @@ public class TestZipFileEncodings { }; } + @DataProvider(name = "all-charsets") + public Object[][] allCharsets() { + return Stream.concat(Stream.of(nonUnicodeCharsets()), + Stream.of(unicodeCharsets())) + .toArray(Object[][]::new); + } + @Test(dataProvider = "non-unicode-charsets") public void testNonUnicode(String charsetName) throws Throwable { test(NUM_ENTRIES, 100 + random().nextInt(ENTRY_SIZE), false, Charset.forName(charsetName)); @@ -115,6 +124,76 @@ public class TestZipFileEncodings { test(70000, 10, true, Charset.forName(charsetName)); } + /** + * This test was added to catch a regression where UTFZipCoder incorrectly + * treated latin1-encoded Strings as UTF8-compatible, while this actually only + * holds for ASCII strings. + * + * The implementation of UTFZipCoder.compare was later changed to not depend on + * the String's coder. Let's keep this test around anyway, since it provokes + * a corner case which could be easily missed. + */ + @Test + public void latin1NotAscii() throws IOException { + + Path zip = Path.of("latin1-not-ascii.zip"); + + // latin1, but not ASCII + String entryName = "smörgåsbord"; + + try (ZipOutputStream z = new ZipOutputStream(Files.newOutputStream(zip))) { + z.putNextEntry(new ZipEntry(entryName)); + } + + try (ZipFile z = new ZipFile(zip.toFile())) { + assertNotNull(z.getEntry(entryName)); + } + } + /** + * This test was added to catch a regression where ZipCoder.compare did not + * properly verify that the lookup name is a prefix of the entry name. Because of + * this regression, any candidate name with identical lengths and a trailing + * '/' would be incorrectly considered a "directory match". + * + * Since this regression depends on both a hash collision and that the length of names + * are equal, it is rarely found in the wild. Let's keep this test around + * since it explicity provokes this rare condition. + * + */ + @Test(dataProvider = "all-charsets") + public void sameHashAndLengthDirLookup(String charsetName) throws IOException { + // Two directory names with colliding hash codes and same length + // (found in a brute force search) + String one = "_____1637461950/"; + String two = "_____-408231241/"; + + // Create a ZIP containing the two directories + Charset charset = Charset.forName(charsetName); + Path zip = Path.of("hash-collision-slashmatch-utf16.zip"); + try (ZipOutputStream z = new ZipOutputStream(Files.newOutputStream(zip), charset)) { + + // Give the names different comments so they we can distinguish them + ZipEntry first = new ZipEntry(one); + first.setComment("Entry one"); + z.putNextEntry(first); + + ZipEntry second = new ZipEntry(two); + second.setComment("Entry two"); + z.putNextEntry(second); + } + + // Assert that "slashless" lookups returns the correct entry even + // when the directory names have colliding hash codes and equal lengths + try (ZipFile z = new ZipFile(zip.toFile(), charset)) { + + ZipEntry second = z.getEntry("_____-408231241"); + assertEquals(second.getComment(), "Entry two"); + + ZipEntry first = z.getEntry("_____1637461950"); + assertEquals(first.getComment(), "Entry one"); + } + } + @AfterClass public void tearDown() { for (Path path : paths) {