8301873: Avoid string decoding in ZipFile.Source.getEntryPos
Reviewed-by: redestad, lancea
This commit is contained in:
parent
f82385e587
commit
78f71b4d41
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) 2009, 2021, Oracle and/or its affiliates. All rights reserved.
|
||||
* Copyright (c) 2009, 2023, Oracle and/or its affiliates. All rights reserved.
|
||||
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
||||
*
|
||||
* This code is free software; you can redistribute it and/or modify it
|
||||
@ -55,6 +55,30 @@ class ZipCoder {
|
||||
return new ZipCoder(charset);
|
||||
}
|
||||
|
||||
/**
|
||||
* This enum represents the three possible return values for
|
||||
* {@link #compare(String, byte[], int, int, boolean)} when
|
||||
* this method compares a lookup name to a string encoded in the
|
||||
* CEN byte array.
|
||||
*/
|
||||
enum Comparison {
|
||||
/**
|
||||
* The lookup string is exactly equal
|
||||
* to the encoded string.
|
||||
*/
|
||||
EXACT_MATCH,
|
||||
/**
|
||||
* The lookup string and the encoded string differs only
|
||||
* by the encoded string having a trailing '/' character.
|
||||
*/
|
||||
DIRECTORY_MATCH,
|
||||
/**
|
||||
* The lookup string and the encoded string do not match.
|
||||
* (They are neither an exact match or a directory match.)
|
||||
*/
|
||||
NO_MATCH
|
||||
}
|
||||
|
||||
String toString(byte[] ba, int off, int length) {
|
||||
try {
|
||||
return decoder().decode(ByteBuffer.wrap(ba, off, length)).toString();
|
||||
@ -184,6 +208,52 @@ class ZipCoder {
|
||||
return slashBytes;
|
||||
}
|
||||
|
||||
/**
|
||||
* This method is used by ZipFile.Source.getEntryPos when comparing the
|
||||
* name being looked up to candidate names encoded in the CEN byte
|
||||
* array.
|
||||
*
|
||||
* Since ZipCode.getEntry supports looking up a "dir/" entry by
|
||||
* the name "dir", this method can optionally distinguish an
|
||||
* exact match from a partial "directory match" (where names only
|
||||
* differ by the encoded name having an additional trailing '/')
|
||||
*
|
||||
* The return values of this method are as follows:
|
||||
*
|
||||
* If the lookup name is exactly equal to the encoded string, return
|
||||
* {@link Comparison#EXACT_MATCH}.
|
||||
*
|
||||
* If the parameter {@code matchDirectory} is {@code true} and the
|
||||
* two strings differ only by the encoded string having an extra
|
||||
* trailing '/' character, then return {@link Comparison#DIRECTORY_MATCH}.
|
||||
*
|
||||
* Otherwise, return {@link Comparison#NO_MATCH}
|
||||
*
|
||||
* While a general implementation will need to decode bytes into a
|
||||
* String for comparison, this can be avoided if the String coder
|
||||
* and this ZipCoder are known to encode strings to the same bytes.
|
||||
*
|
||||
* @param str The lookup string to compare with the encoded string.
|
||||
* @param b The byte array holding the encoded string
|
||||
* @param off The offset into the array where the encoded string starts
|
||||
* @param len The length of the encoded string in bytes
|
||||
* @param matchDirectory If {@code true} and the strings do not match exactly,
|
||||
* a directory match will also be tested
|
||||
*
|
||||
*/
|
||||
Comparison compare(String str, byte[] b, int off, int len, boolean matchDirectory) {
|
||||
String decoded = toString(b, off, len);
|
||||
if (decoded.startsWith(str)) {
|
||||
if (decoded.length() == str.length()) {
|
||||
return Comparison.EXACT_MATCH;
|
||||
} else if (matchDirectory
|
||||
&& decoded.length() == str.length() + 1
|
||||
&& decoded.endsWith("/") ) {
|
||||
return Comparison.DIRECTORY_MATCH;
|
||||
}
|
||||
}
|
||||
return Comparison.NO_MATCH;
|
||||
}
|
||||
static final class UTF8ZipCoder extends ZipCoder {
|
||||
|
||||
private UTF8ZipCoder(Charset utf8) {
|
||||
@ -232,5 +302,22 @@ class ZipCoder {
|
||||
boolean hasTrailingSlash(byte[] a, int end) {
|
||||
return end > 0 && a[end - 1] == '/';
|
||||
}
|
||||
|
||||
@Override
|
||||
Comparison compare(String str, byte[] b, int off, int len, boolean matchDirectory) {
|
||||
try {
|
||||
byte[] encoded = JLA.getBytesNoRepl(str, UTF_8.INSTANCE);
|
||||
int mismatch = Arrays.mismatch(encoded, 0, encoded.length, b, off, off+len);
|
||||
if (mismatch == -1) {
|
||||
return Comparison.EXACT_MATCH;
|
||||
} else if (matchDirectory && len == mismatch + 1 && hasTrailingSlash(b, off + len)) {
|
||||
return Comparison.DIRECTORY_MATCH;
|
||||
} else {
|
||||
return Comparison.NO_MATCH;
|
||||
}
|
||||
} catch (CharacterCodingException e) {
|
||||
return Comparison.NO_MATCH;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -1633,40 +1633,41 @@ public class ZipFile implements ZipConstants, Closeable {
|
||||
int hsh = ZipCoder.hash(name);
|
||||
int idx = table[(hsh & 0x7fffffff) % tablelen];
|
||||
|
||||
int dirPos = -1; // Position of secondary match "name/"
|
||||
|
||||
// Search down the target hash chain for a entry whose
|
||||
// 32 bit hash matches the hashed name.
|
||||
while (idx != ZIP_ENDCHAIN) {
|
||||
if (getEntryHash(idx) == hsh) {
|
||||
// The CEN name must match the specified one
|
||||
|
||||
int pos = getEntryPos(idx);
|
||||
int noff = pos + CENHDR;
|
||||
int nlen = CENNAM(cen, pos);
|
||||
|
||||
try {
|
||||
ZipCoder zc = zipCoderForPos(pos);
|
||||
String entry = zc.toString(cen, pos + CENHDR, CENNAM(cen, pos));
|
||||
ZipCoder zc = zipCoderForPos(pos);
|
||||
|
||||
// If addSlash is true we'll test for name+/ in addition to
|
||||
// name, unless name is the empty string or already ends with a
|
||||
// slash
|
||||
int entryLen = entry.length();
|
||||
int nameLen = name.length();
|
||||
if (entryLen == nameLen && entry.equals(name)) {
|
||||
// Found our match
|
||||
// Compare the lookup name with the name encoded in the CEN
|
||||
switch (zc.compare(name, cen, noff, nlen, addSlash)) {
|
||||
case EXACT_MATCH:
|
||||
// We found an exact match for "name"
|
||||
return pos;
|
||||
}
|
||||
// If addSlash is true we'll now test for name+/ providing
|
||||
if (addSlash && nameLen + 1 == entryLen
|
||||
&& entry.startsWith(name) &&
|
||||
entry.charAt(entryLen - 1) == '/') {
|
||||
// Found the entry "name+/", now find the CEN entry pos
|
||||
int exactPos = getEntryPos(name, false);
|
||||
return exactPos == -1 ? pos : exactPos;
|
||||
}
|
||||
} catch (IllegalArgumentException iae) {
|
||||
// Ignore
|
||||
case DIRECTORY_MATCH:
|
||||
// We found the directory "name/"
|
||||
// Track its position, then continue the search for "name"
|
||||
dirPos = pos;
|
||||
break;
|
||||
case NO_MATCH:
|
||||
// Hash collision, continue searching
|
||||
}
|
||||
}
|
||||
idx = getEntryNext(idx);
|
||||
}
|
||||
// Reaching this point means we did not find "name".
|
||||
// Return the position of "name/" if we found it
|
||||
if (dirPos != -1) {
|
||||
return dirPos;
|
||||
}
|
||||
// No entry found
|
||||
return -1;
|
||||
}
|
||||
|
||||
|
@ -0,0 +1,183 @@
|
||||
/*
|
||||
* Copyright (c) 2023, Oracle and/or its affiliates. All rights reserved.
|
||||
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
||||
*
|
||||
* This code is free software; you can redistribute it and/or modify it
|
||||
* under the terms of the GNU General Public License version 2 only, as
|
||||
* published by the Free Software Foundation.
|
||||
*
|
||||
* This code is distributed in the hope that it will be useful, but WITHOUT
|
||||
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
||||
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
|
||||
* version 2 for more details (a copy is included in the LICENSE file that
|
||||
* accompanied this code).
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License version
|
||||
* 2 along with this work; if not, write to the Free Software Foundation,
|
||||
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
*
|
||||
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
|
||||
* or visit www.oracle.com if you need additional information or have any
|
||||
* questions.
|
||||
*
|
||||
*/
|
||||
|
||||
import org.testng.annotations.BeforeTest;
|
||||
import org.testng.annotations.Test;
|
||||
|
||||
import java.io.ByteArrayOutputStream;
|
||||
import java.io.FileOutputStream;
|
||||
import java.io.IOException;
|
||||
import java.nio.ByteBuffer;
|
||||
import java.nio.ByteOrder;
|
||||
import java.nio.channels.FileChannel;
|
||||
import java.nio.file.Path;
|
||||
import java.util.Arrays;
|
||||
import java.util.zip.ZipEntry;
|
||||
import java.util.zip.ZipException;
|
||||
import java.util.zip.ZipFile;
|
||||
import java.util.zip.ZipOutputStream;
|
||||
|
||||
import static org.testng.Assert.assertEquals;
|
||||
import static org.testng.Assert.expectThrows;
|
||||
|
||||
/**
|
||||
* @test
|
||||
* @summary Validate that opening ZIP files files with invalid UTF-8
|
||||
* byte sequences in the name or comment fields fails with ZipException
|
||||
* @run testng/othervm InvalidBytesInEntryNameOrComment
|
||||
*/
|
||||
public class InvalidBytesInEntryNameOrComment {
|
||||
|
||||
// Offsets for navigating the CEN fields
|
||||
private static final int EOC_OFF = 6; // Offset from EOF to find CEN offset
|
||||
private static final int CEN_HDR = 45; // Size of a CEN header
|
||||
private static final int NLEN = 28; // Name length
|
||||
private static final int ELEN = 30; // Extra length
|
||||
private static final int CLEN = 32; // Comment length
|
||||
|
||||
// Example invalid UTF-8 byte sequence
|
||||
private static final byte[] INVALID_UTF8_BYTE_SEQUENCE = {(byte) 0xF0, (byte) 0xA4, (byte) 0xAD};
|
||||
|
||||
// Expected ZipException regex
|
||||
private static final String BAD_ENTRY_NAME_OR_COMMENT = "invalid CEN header (bad entry name or comment)";
|
||||
|
||||
// ZIP file with invalid name field
|
||||
private Path invalidName;
|
||||
|
||||
// ZIP file with invalid comment field
|
||||
private Path invalidComment;
|
||||
|
||||
@BeforeTest
|
||||
public void setup() throws IOException {
|
||||
// Create a ZIP file with valid name and comment fields
|
||||
byte[] templateZip = templateZIP();
|
||||
|
||||
// Create a ZIP with a CEN name field containing an invalid byte sequence
|
||||
invalidName = invalidName("invalid-name.zip", templateZip);
|
||||
|
||||
// Create a ZIP with a CEN comment field containing an invalid byte sequence
|
||||
invalidComment = invalidComment("invalid-comment.zip", templateZip);
|
||||
}
|
||||
|
||||
/**
|
||||
* Opening a ZipFile with an invalid UTF-8 byte sequence in
|
||||
* the name field of a CEN file header should throw a
|
||||
* ZipException with "bad entry name or comment"
|
||||
*/
|
||||
@Test
|
||||
public void shouldRejectInvalidName() throws IOException {
|
||||
ZipException ex = expectThrows(ZipException.class, () -> {
|
||||
new ZipFile(invalidName.toFile());
|
||||
});
|
||||
assertEquals(ex.getMessage(), BAD_ENTRY_NAME_OR_COMMENT);
|
||||
}
|
||||
|
||||
/**
|
||||
* Opening a ZipFile with an invalid UTF-8 byte sequence in
|
||||
* the comment field of a CEN file header should throw a
|
||||
* ZipException with "bad entry name or comment"
|
||||
*/
|
||||
@Test
|
||||
public void shouldRejectInvalidComment() throws IOException {
|
||||
ZipException ex = expectThrows(ZipException.class, () -> {
|
||||
new ZipFile(invalidComment.toFile());
|
||||
});
|
||||
assertEquals(ex.getMessage(), BAD_ENTRY_NAME_OR_COMMENT);
|
||||
}
|
||||
|
||||
/**
|
||||
* Make a valid ZIP file used as a template for invalid files
|
||||
*/
|
||||
private byte[] templateZIP() throws IOException {
|
||||
ByteArrayOutputStream bout = new ByteArrayOutputStream();
|
||||
try (ZipOutputStream zo = new ZipOutputStream(bout)) {
|
||||
ZipEntry commentEntry = new ZipEntry("file");
|
||||
commentEntry.setComment("Comment");
|
||||
zo.putNextEntry(commentEntry);
|
||||
}
|
||||
return bout.toByteArray();
|
||||
}
|
||||
|
||||
/**
|
||||
* Make a ZIP with invalid bytes in the CEN name field
|
||||
*/
|
||||
private Path invalidName(String name, byte[] template) throws IOException {
|
||||
ByteBuffer buffer = copyTemplate(template);
|
||||
int off = cenStart(buffer);
|
||||
// Name field starts here
|
||||
int noff = off + CEN_HDR;
|
||||
|
||||
// Write invalid bytes
|
||||
buffer.put(noff, INVALID_UTF8_BYTE_SEQUENCE, 0, INVALID_UTF8_BYTE_SEQUENCE.length);
|
||||
return writeFile(name, buffer);
|
||||
|
||||
}
|
||||
|
||||
/**
|
||||
* Make a copy of the ZIP template and wrap it in a little-endian
|
||||
* ByteBuffer
|
||||
*/
|
||||
private ByteBuffer copyTemplate(byte[] template) {
|
||||
return ByteBuffer.wrap(Arrays.copyOf(template, template.length))
|
||||
.order(ByteOrder.LITTLE_ENDIAN);
|
||||
}
|
||||
|
||||
/**
|
||||
* Make a ZIP with invalid bytes in the CEN comment field
|
||||
*/
|
||||
private Path invalidComment(String name, byte[] template) throws IOException {
|
||||
ByteBuffer buffer = copyTemplate(template);
|
||||
int off = cenStart(buffer);
|
||||
// Need to skip past the length of the name and extra fields
|
||||
int nlen = buffer.getShort(off + NLEN);
|
||||
int elen = buffer.getShort(off + ELEN);
|
||||
|
||||
// Comment field starts here
|
||||
int coff = off + CEN_HDR + nlen + elen;
|
||||
|
||||
// Write invalid bytes
|
||||
buffer.put(coff, INVALID_UTF8_BYTE_SEQUENCE, 0, INVALID_UTF8_BYTE_SEQUENCE.length);
|
||||
return writeFile(name, buffer);
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Finds the offset of the start of the CEN directory
|
||||
*/
|
||||
private int cenStart(ByteBuffer buffer) {
|
||||
return buffer.getInt(buffer.capacity() - EOC_OFF);
|
||||
}
|
||||
|
||||
/**
|
||||
* Utility to write a ByteBuffer to disk
|
||||
*/
|
||||
private Path writeFile(String name, ByteBuffer buffer) throws IOException {
|
||||
Path zip = Path.of(name);
|
||||
try (FileChannel ch = new FileOutputStream(zip.toFile()).getChannel()) {
|
||||
buffer.rewind();
|
||||
ch.write(buffer);
|
||||
}
|
||||
return zip;
|
||||
}
|
||||
}
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) 2020, Oracle and/or its affiliates. All rights reserved.
|
||||
* Copyright (c) 2023, Oracle and/or its affiliates. All rights reserved.
|
||||
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
||||
*
|
||||
* This code is free software; you can redistribute it and/or modify it
|
||||
@ -38,6 +38,7 @@ import java.io.FileOutputStream;
|
||||
import java.io.IOException;
|
||||
import java.io.InputStream;
|
||||
import java.nio.charset.Charset;
|
||||
import java.nio.charset.StandardCharsets;
|
||||
import java.nio.file.Files;
|
||||
import java.nio.file.Path;
|
||||
import java.nio.file.Paths;
|
||||
@ -53,6 +54,7 @@ import java.util.concurrent.ExecutorService;
|
||||
import java.util.concurrent.Executors;
|
||||
import java.util.concurrent.ThreadLocalRandom;
|
||||
import java.util.concurrent.atomic.AtomicInteger;
|
||||
import java.util.stream.Stream;
|
||||
import java.util.zip.CRC32;
|
||||
import java.util.zip.ZipEntry;
|
||||
import java.util.zip.ZipFile;
|
||||
@ -95,6 +97,13 @@ public class TestZipFileEncodings {
|
||||
};
|
||||
}
|
||||
|
||||
@DataProvider(name = "all-charsets")
|
||||
public Object[][] allCharsets() {
|
||||
return Stream.concat(Stream.of(nonUnicodeCharsets()),
|
||||
Stream.of(unicodeCharsets()))
|
||||
.toArray(Object[][]::new);
|
||||
}
|
||||
|
||||
@Test(dataProvider = "non-unicode-charsets")
|
||||
public void testNonUnicode(String charsetName) throws Throwable {
|
||||
test(NUM_ENTRIES, 100 + random().nextInt(ENTRY_SIZE), false, Charset.forName(charsetName));
|
||||
@ -115,6 +124,76 @@ public class TestZipFileEncodings {
|
||||
test(70000, 10, true, Charset.forName(charsetName));
|
||||
}
|
||||
|
||||
/**
|
||||
* This test was added to catch a regression where UTFZipCoder incorrectly
|
||||
* treated latin1-encoded Strings as UTF8-compatible, while this actually only
|
||||
* holds for ASCII strings.
|
||||
*
|
||||
* The implementation of UTFZipCoder.compare was later changed to not depend on
|
||||
* the String's coder. Let's keep this test around anyway, since it provokes
|
||||
* a corner case which could be easily missed.
|
||||
*/
|
||||
@Test
|
||||
public void latin1NotAscii() throws IOException {
|
||||
|
||||
Path zip = Path.of("latin1-not-ascii.zip");
|
||||
|
||||
// latin1, but not ASCII
|
||||
String entryName = "smörgåsbord";
|
||||
|
||||
try (ZipOutputStream z = new ZipOutputStream(Files.newOutputStream(zip))) {
|
||||
z.putNextEntry(new ZipEntry(entryName));
|
||||
}
|
||||
|
||||
try (ZipFile z = new ZipFile(zip.toFile())) {
|
||||
assertNotNull(z.getEntry(entryName));
|
||||
}
|
||||
}
|
||||
/**
|
||||
* This test was added to catch a regression where ZipCoder.compare did not
|
||||
* properly verify that the lookup name is a prefix of the entry name. Because of
|
||||
* this regression, any candidate name with identical lengths and a trailing
|
||||
* '/' would be incorrectly considered a "directory match".
|
||||
*
|
||||
* Since this regression depends on both a hash collision and that the length of names
|
||||
* are equal, it is rarely found in the wild. Let's keep this test around
|
||||
* since it explicity provokes this rare condition.
|
||||
*
|
||||
*/
|
||||
@Test(dataProvider = "all-charsets")
|
||||
public void sameHashAndLengthDirLookup(String charsetName) throws IOException {
|
||||
// Two directory names with colliding hash codes and same length
|
||||
// (found in a brute force search)
|
||||
String one = "_____1637461950/";
|
||||
String two = "_____-408231241/";
|
||||
|
||||
// Create a ZIP containing the two directories
|
||||
Charset charset = Charset.forName(charsetName);
|
||||
Path zip = Path.of("hash-collision-slashmatch-utf16.zip");
|
||||
try (ZipOutputStream z = new ZipOutputStream(Files.newOutputStream(zip), charset)) {
|
||||
|
||||
// Give the names different comments so they we can distinguish them
|
||||
ZipEntry first = new ZipEntry(one);
|
||||
first.setComment("Entry one");
|
||||
z.putNextEntry(first);
|
||||
|
||||
ZipEntry second = new ZipEntry(two);
|
||||
second.setComment("Entry two");
|
||||
z.putNextEntry(second);
|
||||
}
|
||||
|
||||
// Assert that "slashless" lookups returns the correct entry even
|
||||
// when the directory names have colliding hash codes and equal lengths
|
||||
try (ZipFile z = new ZipFile(zip.toFile(), charset)) {
|
||||
|
||||
ZipEntry second = z.getEntry("_____-408231241");
|
||||
assertEquals(second.getComment(), "Entry two");
|
||||
|
||||
ZipEntry first = z.getEntry("_____1637461950");
|
||||
assertEquals(first.getComment(), "Entry one");
|
||||
}
|
||||
}
|
||||
|
||||
@AfterClass
|
||||
public void tearDown() {
|
||||
for (Path path : paths) {
|
||||
|
Loading…
Reference in New Issue
Block a user