8329623: NegativeArraySizeException encoding large String to UTF-8
Reviewed-by: naoto, rgiulietti
This commit is contained in:
parent
dd930c573b
commit
212a253697
src/java.base/share/classes/java/lang
test/jdk/java/lang/String/CompactString
@ -1335,7 +1335,13 @@ public final class String
|
||||
int dp = 0;
|
||||
int sp = 0;
|
||||
int sl = val.length >> 1;
|
||||
byte[] dst = new byte[sl * 3];
|
||||
// UTF-8 encoded can be as much as 3 times the string length
|
||||
// For very large estimate, (as in overflow of 32 bit int), precompute the exact size
|
||||
long allocLen = (sl * 3 < 0) ? computeSizeUTF8_UTF16(val, doReplace) : sl * 3;
|
||||
if (allocLen > (long)Integer.MAX_VALUE) {
|
||||
throw new OutOfMemoryError("Required length exceeds implementation limit");
|
||||
}
|
||||
byte[] dst = new byte[(int) allocLen];
|
||||
while (sp < sl) {
|
||||
// ascii fast loop;
|
||||
char c = StringUTF16.getChar(val, sp);
|
||||
@ -1385,6 +1391,47 @@ public final class String
|
||||
return Arrays.copyOf(dst, dp);
|
||||
}
|
||||
|
||||
/**
|
||||
* {@return the exact size required to UTF_8 encode this UTF16 string}
|
||||
* @param val UTF16 encoded byte array
|
||||
* @param doReplace true to replace unmappable characters
|
||||
*/
|
||||
private static long computeSizeUTF8_UTF16(byte[] val, boolean doReplace) {
|
||||
long dp = 0L;
|
||||
int sp = 0;
|
||||
int sl = val.length >> 1;
|
||||
|
||||
while (sp < sl) {
|
||||
char c = StringUTF16.getChar(val, sp++);
|
||||
if (c < 0x80) {
|
||||
dp++;
|
||||
} else if (c < 0x800) {
|
||||
dp += 2;
|
||||
} else if (Character.isSurrogate(c)) {
|
||||
int uc = -1;
|
||||
char c2;
|
||||
if (Character.isHighSurrogate(c) && sp < sl &&
|
||||
Character.isLowSurrogate(c2 = StringUTF16.getChar(val, sp))) {
|
||||
uc = Character.toCodePoint(c, c2);
|
||||
}
|
||||
if (uc < 0) {
|
||||
if (doReplace) {
|
||||
dp++;
|
||||
} else {
|
||||
throwUnmappable(sp - 1);
|
||||
}
|
||||
} else {
|
||||
dp += 4;
|
||||
sp++; // 2 chars
|
||||
}
|
||||
} else {
|
||||
// 3 bytes, 16 bits
|
||||
dp += 3;
|
||||
}
|
||||
}
|
||||
return dp;
|
||||
}
|
||||
|
||||
/**
|
||||
* Constructs a new {@code String} by decoding the specified array of bytes
|
||||
* using the specified {@linkplain java.nio.charset.Charset charset}. The
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) 2023, Oracle and/or its affiliates. All rights reserved.
|
||||
* Copyright (c) 2023, 2024, Oracle and/or its affiliates. All rights reserved.
|
||||
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
||||
*
|
||||
* This code is free software; you can redistribute it and/or modify it
|
||||
@ -118,4 +118,33 @@ public class MaxSizeUTF16String {
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* Test that UTF-8 of too large strings throws OOME, (not NegativeArraySizeException).
|
||||
*/
|
||||
@Test
|
||||
public void testMaxUTF8_UTF16Encode() {
|
||||
String s = "\uFFFF";
|
||||
final byte[] bytes1 = s.getBytes(StandardCharsets.UTF_8);
|
||||
assertEquals(3, bytes1.length, "UTF_8 encoded length of 0xffff");
|
||||
|
||||
int min = Integer.MAX_VALUE / bytes1.length - 1;
|
||||
int max = min + 3;
|
||||
|
||||
// String of size min can be UTF_8 encoded.
|
||||
System.out.println("testing size: " + min);
|
||||
String s1 = s.repeat(min);
|
||||
byte[] bytes = s1.getBytes(StandardCharsets.UTF_8);
|
||||
int remaining = Integer.MAX_VALUE - bytes.length;
|
||||
assertTrue(remaining >= bytes1.length, "remainder too large: " + remaining);
|
||||
|
||||
// Strings of size min+1...min+2, throw OOME
|
||||
// The resulting byte array would exceed implementation limits
|
||||
for (int count = min + 1; count < max; count++) {
|
||||
System.out.println("testing size: " + count);
|
||||
final String s2 = s.repeat(count);
|
||||
OutOfMemoryError ex = assertThrows(OutOfMemoryError.class, () -> s2.getBytes(StandardCharsets.UTF_8));
|
||||
ex.printStackTrace();
|
||||
};
|
||||
}
|
||||
}
|
||||
|
Loading…
x
Reference in New Issue
Block a user