8329623: NegativeArraySizeException encoding large String to UTF-8

Reviewed-by: naoto, rgiulietti
This commit is contained in:
Roger Riggs 2024-04-08 17:58:21 +00:00
parent dd930c573b
commit 212a253697
2 changed files with 78 additions and 2 deletions
src/java.base/share/classes/java/lang
test/jdk/java/lang/String/CompactString

@ -1335,7 +1335,13 @@ public final class String
int dp = 0;
int sp = 0;
int sl = val.length >> 1;
byte[] dst = new byte[sl * 3];
// UTF-8 encoded can be as much as 3 times the string length
// For very large estimate, (as in overflow of 32 bit int), precompute the exact size
long allocLen = (sl * 3 < 0) ? computeSizeUTF8_UTF16(val, doReplace) : sl * 3;
if (allocLen > (long)Integer.MAX_VALUE) {
throw new OutOfMemoryError("Required length exceeds implementation limit");
}
byte[] dst = new byte[(int) allocLen];
while (sp < sl) {
// ascii fast loop;
char c = StringUTF16.getChar(val, sp);
@ -1385,6 +1391,47 @@ public final class String
return Arrays.copyOf(dst, dp);
}
/**
* {@return the exact size required to UTF_8 encode this UTF16 string}
* @param val UTF16 encoded byte array
* @param doReplace true to replace unmappable characters
*/
private static long computeSizeUTF8_UTF16(byte[] val, boolean doReplace) {
long dp = 0L;
int sp = 0;
int sl = val.length >> 1;
while (sp < sl) {
char c = StringUTF16.getChar(val, sp++);
if (c < 0x80) {
dp++;
} else if (c < 0x800) {
dp += 2;
} else if (Character.isSurrogate(c)) {
int uc = -1;
char c2;
if (Character.isHighSurrogate(c) && sp < sl &&
Character.isLowSurrogate(c2 = StringUTF16.getChar(val, sp))) {
uc = Character.toCodePoint(c, c2);
}
if (uc < 0) {
if (doReplace) {
dp++;
} else {
throwUnmappable(sp - 1);
}
} else {
dp += 4;
sp++; // 2 chars
}
} else {
// 3 bytes, 16 bits
dp += 3;
}
}
return dp;
}
/**
* Constructs a new {@code String} by decoding the specified array of bytes
* using the specified {@linkplain java.nio.charset.Charset charset}. The

@ -1,5 +1,5 @@
/*
* Copyright (c) 2023, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2023, 2024, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@ -118,4 +118,33 @@ public class MaxSizeUTF16String {
}
}
}
/*
* Test that UTF-8 of too large strings throws OOME, (not NegativeArraySizeException).
*/
@Test
public void testMaxUTF8_UTF16Encode() {
String s = "\uFFFF";
final byte[] bytes1 = s.getBytes(StandardCharsets.UTF_8);
assertEquals(3, bytes1.length, "UTF_8 encoded length of 0xffff");
int min = Integer.MAX_VALUE / bytes1.length - 1;
int max = min + 3;
// String of size min can be UTF_8 encoded.
System.out.println("testing size: " + min);
String s1 = s.repeat(min);
byte[] bytes = s1.getBytes(StandardCharsets.UTF_8);
int remaining = Integer.MAX_VALUE - bytes.length;
assertTrue(remaining >= bytes1.length, "remainder too large: " + remaining);
// Strings of size min+1...min+2, throw OOME
// The resulting byte array would exceed implementation limits
for (int count = min + 1; count < max; count++) {
System.out.println("testing size: " + count);
final String s2 = s.repeat(count);
OutOfMemoryError ex = assertThrows(OutOfMemoryError.class, () -> s2.getBytes(StandardCharsets.UTF_8));
ex.printStackTrace();
};
}
}