8311906: Improve robustness of String constructors with mutable array inputs
Co-authored-by: Damon Fenacci <dfenacci@openjdk.org> Co-authored-by: Claes Redestad <redestad@openjdk.org> Co-authored-by: Amit Kumar <amitkumar@openjdk.org> Co-authored-by: Martin Doerr <mdoerr@openjdk.org> Reviewed-by: rgiulietti, thartmann, redestad, dfenacci
This commit is contained in:
parent
316b78336c
commit
155abc576a
@ -5651,7 +5651,7 @@ void MacroAssembler::fill_words(Register base, Register cnt, Register value)
|
||||
// - sun/nio/cs/ISO_8859_1$Encoder.implEncodeISOArray
|
||||
// return the number of characters copied.
|
||||
// - java/lang/StringUTF16.compress
|
||||
// return zero (0) if copy fails, otherwise 'len'.
|
||||
// return index of non-latin1 character if copy fails, otherwise 'len'.
|
||||
//
|
||||
// This version always returns the number of characters copied, and does not
|
||||
// clobber the 'len' register. A successful copy will complete with the post-
|
||||
@ -5868,15 +5868,15 @@ address MacroAssembler::byte_array_inflate(Register src, Register dst, Register
|
||||
}
|
||||
|
||||
// Compress char[] array to byte[].
|
||||
// Intrinsic for java.lang.StringUTF16.compress(char[] src, int srcOff, byte[] dst, int dstOff, int len)
|
||||
// Return the array length if every element in array can be encoded,
|
||||
// otherwise, the index of first non-latin1 (> 0xff) character.
|
||||
void MacroAssembler::char_array_compress(Register src, Register dst, Register len,
|
||||
Register res,
|
||||
FloatRegister tmp0, FloatRegister tmp1,
|
||||
FloatRegister tmp2, FloatRegister tmp3,
|
||||
FloatRegister tmp4, FloatRegister tmp5) {
|
||||
encode_iso_array(src, dst, len, res, false, tmp0, tmp1, tmp2, tmp3, tmp4, tmp5);
|
||||
// Adjust result: res == len ? len : 0
|
||||
cmp(len, res);
|
||||
csel(res, res, zr, EQ);
|
||||
}
|
||||
|
||||
// java.math.round(double a)
|
||||
|
@ -12727,16 +12727,8 @@ instruct string_compress(rarg1RegP src, rarg2RegP dst, iRegIsrc len, iRegIdst re
|
||||
ins_cost(300);
|
||||
format %{ "String Compress $src,$dst,$len -> $result \t// KILL $tmp1, $tmp2, $tmp3, $tmp4, $tmp5" %}
|
||||
ins_encode %{
|
||||
Label Lskip, Ldone;
|
||||
__ li($result$$Register, 0);
|
||||
__ string_compress_16($src$$Register, $dst$$Register, $len$$Register, $tmp1$$Register,
|
||||
$tmp2$$Register, $tmp3$$Register, $tmp4$$Register, $tmp5$$Register, Ldone);
|
||||
__ rldicl_($tmp1$$Register, $len$$Register, 0, 64-3); // Remaining characters.
|
||||
__ beq(CCR0, Lskip);
|
||||
__ string_compress($src$$Register, $dst$$Register, $tmp1$$Register, $tmp2$$Register, Ldone);
|
||||
__ bind(Lskip);
|
||||
__ mr($result$$Register, $len$$Register);
|
||||
__ bind(Ldone);
|
||||
__ encode_iso_array($src$$Register, $dst$$Register, $len$$Register, $tmp1$$Register, $tmp2$$Register,
|
||||
$tmp3$$Register, $tmp4$$Register, $tmp5$$Register, $result$$Register, false);
|
||||
%}
|
||||
ins_pipe(pipe_class_default);
|
||||
%}
|
||||
|
@ -1933,14 +1933,12 @@ void C2_MacroAssembler::byte_array_inflate_v(Register src, Register dst, Registe
|
||||
}
|
||||
|
||||
// Compress char[] array to byte[].
|
||||
// result: the array length if every element in array can be encoded; 0, otherwise.
|
||||
// Intrinsic for java.lang.StringUTF16.compress(char[] src, int srcOff, byte[] dst, int dstOff, int len)
|
||||
// result: the array length if every element in array can be encoded,
|
||||
// otherwise, the index of first non-latin1 (> 0xff) character.
|
||||
void C2_MacroAssembler::char_array_compress_v(Register src, Register dst, Register len,
|
||||
Register result, Register tmp) {
|
||||
Label done;
|
||||
encode_iso_array_v(src, dst, len, result, tmp, false);
|
||||
beqz(len, done);
|
||||
mv(result, zr);
|
||||
bind(done);
|
||||
}
|
||||
|
||||
// Intrinsic for
|
||||
@ -1948,7 +1946,7 @@ void C2_MacroAssembler::char_array_compress_v(Register src, Register dst, Regist
|
||||
// - sun/nio/cs/ISO_8859_1$Encoder.implEncodeISOArray
|
||||
// return the number of characters copied.
|
||||
// - java/lang/StringUTF16.compress
|
||||
// return zero (0) if copy fails, otherwise 'len'.
|
||||
// return index of non-latin1 character if copy fails, otherwise 'len'.
|
||||
//
|
||||
// This version always returns the number of characters copied. A successful
|
||||
// copy will complete with the post-condition: 'res' == 'len', while an
|
||||
|
@ -10190,7 +10190,7 @@ instruct string_compress(iRegP src, iRegP dst, iRegI result, iRegI len, iRegI tm
|
||||
format %{ "String Compress $src->$dst($len) -> $result" %}
|
||||
ins_encode %{
|
||||
__ string_compress($result$$Register, $src$$Register, $dst$$Register, $len$$Register,
|
||||
$tmp$$Register, false, false);
|
||||
$tmp$$Register, true, false);
|
||||
%}
|
||||
ins_pipe(pipe_class_dummy);
|
||||
%}
|
||||
|
@ -8628,15 +8628,19 @@ void MacroAssembler::crc32c_ipl_alg2_alt2(Register in_out, Register in1, Registe
|
||||
#undef BLOCK_COMMENT
|
||||
|
||||
// Compress char[] array to byte[].
|
||||
// ..\jdk\src\java.base\share\classes\java\lang\StringUTF16.java
|
||||
// Intrinsic for java.lang.StringUTF16.compress(char[] src, int srcOff, byte[] dst, int dstOff, int len)
|
||||
// Return the array length if every element in array can be encoded,
|
||||
// otherwise, the index of first non-latin1 (> 0xff) character.
|
||||
// @IntrinsicCandidate
|
||||
// private static int compress(char[] src, int srcOff, byte[] dst, int dstOff, int len) {
|
||||
// public static int compress(char[] src, int srcOff, byte[] dst, int dstOff, int len) {
|
||||
// for (int i = 0; i < len; i++) {
|
||||
// int c = src[srcOff++];
|
||||
// if (c >>> 8 != 0) {
|
||||
// return 0;
|
||||
// char c = src[srcOff];
|
||||
// if (c > 0xff) {
|
||||
// return i; // return index of non-latin1 char
|
||||
// }
|
||||
// dst[dstOff++] = (byte)c;
|
||||
// dst[dstOff] = (byte)c;
|
||||
// srcOff++;
|
||||
// dstOff++;
|
||||
// }
|
||||
// return len;
|
||||
// }
|
||||
@ -8644,7 +8648,7 @@ void MacroAssembler::char_array_compress(Register src, Register dst, Register le
|
||||
XMMRegister tmp1Reg, XMMRegister tmp2Reg,
|
||||
XMMRegister tmp3Reg, XMMRegister tmp4Reg,
|
||||
Register tmp5, Register result, KRegister mask1, KRegister mask2) {
|
||||
Label copy_chars_loop, return_length, return_zero, done;
|
||||
Label copy_chars_loop, done, reset_sp, copy_tail;
|
||||
|
||||
// rsi: src
|
||||
// rdi: dst
|
||||
@ -8659,28 +8663,28 @@ void MacroAssembler::char_array_compress(Register src, Register dst, Register le
|
||||
assert(len != result, "");
|
||||
|
||||
// save length for return
|
||||
push(len);
|
||||
movl(result, len);
|
||||
|
||||
if ((AVX3Threshold == 0) && (UseAVX > 2) && // AVX512
|
||||
VM_Version::supports_avx512vlbw() &&
|
||||
VM_Version::supports_bmi2()) {
|
||||
|
||||
Label copy_32_loop, copy_loop_tail, below_threshold;
|
||||
Label copy_32_loop, copy_loop_tail, below_threshold, reset_for_copy_tail;
|
||||
|
||||
// alignment
|
||||
Label post_alignment;
|
||||
|
||||
// if length of the string is less than 16, handle it in an old fashioned way
|
||||
// if length of the string is less than 32, handle it the old fashioned way
|
||||
testl(len, -32);
|
||||
jcc(Assembler::zero, below_threshold);
|
||||
|
||||
// First check whether a character is compressible ( <= 0xFF).
|
||||
// Create mask to test for Unicode chars inside zmm vector
|
||||
movl(result, 0x00FF);
|
||||
evpbroadcastw(tmp2Reg, result, Assembler::AVX_512bit);
|
||||
movl(tmp5, 0x00FF);
|
||||
evpbroadcastw(tmp2Reg, tmp5, Assembler::AVX_512bit);
|
||||
|
||||
testl(len, -64);
|
||||
jcc(Assembler::zero, post_alignment);
|
||||
jccb(Assembler::zero, post_alignment);
|
||||
|
||||
movl(tmp5, dst);
|
||||
andl(tmp5, (32 - 1));
|
||||
@ -8689,18 +8693,19 @@ void MacroAssembler::char_array_compress(Register src, Register dst, Register le
|
||||
|
||||
// bail out when there is nothing to be done
|
||||
testl(tmp5, 0xFFFFFFFF);
|
||||
jcc(Assembler::zero, post_alignment);
|
||||
jccb(Assembler::zero, post_alignment);
|
||||
|
||||
// ~(~0 << len), where len is the # of remaining elements to process
|
||||
movl(result, 0xFFFFFFFF);
|
||||
shlxl(result, result, tmp5);
|
||||
notl(result);
|
||||
kmovdl(mask2, result);
|
||||
movl(len, 0xFFFFFFFF);
|
||||
shlxl(len, len, tmp5);
|
||||
notl(len);
|
||||
kmovdl(mask2, len);
|
||||
movl(len, result);
|
||||
|
||||
evmovdquw(tmp1Reg, mask2, Address(src, 0), /*merge*/ false, Assembler::AVX_512bit);
|
||||
evpcmpw(mask1, mask2, tmp1Reg, tmp2Reg, Assembler::le, /*signed*/ false, Assembler::AVX_512bit);
|
||||
ktestd(mask1, mask2);
|
||||
jcc(Assembler::carryClear, return_zero);
|
||||
jcc(Assembler::carryClear, copy_tail);
|
||||
|
||||
evpmovwb(Address(dst, 0), mask2, tmp1Reg, Assembler::AVX_512bit);
|
||||
|
||||
@ -8715,7 +8720,7 @@ void MacroAssembler::char_array_compress(Register src, Register dst, Register le
|
||||
movl(tmp5, len);
|
||||
andl(tmp5, (32 - 1)); // tail count (in chars)
|
||||
andl(len, ~(32 - 1)); // vector count (in chars)
|
||||
jcc(Assembler::zero, copy_loop_tail);
|
||||
jccb(Assembler::zero, copy_loop_tail);
|
||||
|
||||
lea(src, Address(src, len, Address::times_2));
|
||||
lea(dst, Address(dst, len, Address::times_1));
|
||||
@ -8725,55 +8730,60 @@ void MacroAssembler::char_array_compress(Register src, Register dst, Register le
|
||||
evmovdquw(tmp1Reg, Address(src, len, Address::times_2), Assembler::AVX_512bit);
|
||||
evpcmpuw(mask1, tmp1Reg, tmp2Reg, Assembler::le, Assembler::AVX_512bit);
|
||||
kortestdl(mask1, mask1);
|
||||
jcc(Assembler::carryClear, return_zero);
|
||||
jccb(Assembler::carryClear, reset_for_copy_tail);
|
||||
|
||||
// All elements in current processed chunk are valid candidates for
|
||||
// compression. Write a truncated byte elements to the memory.
|
||||
evpmovwb(Address(dst, len, Address::times_1), tmp1Reg, Assembler::AVX_512bit);
|
||||
addptr(len, 32);
|
||||
jcc(Assembler::notZero, copy_32_loop);
|
||||
jccb(Assembler::notZero, copy_32_loop);
|
||||
|
||||
bind(copy_loop_tail);
|
||||
// bail out when there is nothing to be done
|
||||
testl(tmp5, 0xFFFFFFFF);
|
||||
jcc(Assembler::zero, return_length);
|
||||
jcc(Assembler::zero, done);
|
||||
|
||||
movl(len, tmp5);
|
||||
|
||||
// ~(~0 << len), where len is the # of remaining elements to process
|
||||
movl(result, 0xFFFFFFFF);
|
||||
shlxl(result, result, len);
|
||||
notl(result);
|
||||
movl(tmp5, 0xFFFFFFFF);
|
||||
shlxl(tmp5, tmp5, len);
|
||||
notl(tmp5);
|
||||
|
||||
kmovdl(mask2, result);
|
||||
kmovdl(mask2, tmp5);
|
||||
|
||||
evmovdquw(tmp1Reg, mask2, Address(src, 0), /*merge*/ false, Assembler::AVX_512bit);
|
||||
evpcmpw(mask1, mask2, tmp1Reg, tmp2Reg, Assembler::le, /*signed*/ false, Assembler::AVX_512bit);
|
||||
ktestd(mask1, mask2);
|
||||
jcc(Assembler::carryClear, return_zero);
|
||||
jcc(Assembler::carryClear, copy_tail);
|
||||
|
||||
evpmovwb(Address(dst, 0), mask2, tmp1Reg, Assembler::AVX_512bit);
|
||||
jmp(return_length);
|
||||
jmp(done);
|
||||
|
||||
bind(reset_for_copy_tail);
|
||||
lea(src, Address(src, tmp5, Address::times_2));
|
||||
lea(dst, Address(dst, tmp5, Address::times_1));
|
||||
subptr(len, tmp5);
|
||||
jmp(copy_chars_loop);
|
||||
|
||||
bind(below_threshold);
|
||||
}
|
||||
|
||||
if (UseSSE42Intrinsics) {
|
||||
Label copy_32_loop, copy_16, copy_tail;
|
||||
|
||||
movl(result, len);
|
||||
|
||||
movl(tmp5, 0xff00ff00); // create mask to test for Unicode chars in vectors
|
||||
Label copy_32_loop, copy_16, copy_tail_sse, reset_for_copy_tail;
|
||||
|
||||
// vectored compression
|
||||
andl(len, 0xfffffff0); // vector count (in chars)
|
||||
andl(result, 0x0000000f); // tail count (in chars)
|
||||
testl(len, len);
|
||||
jcc(Assembler::zero, copy_16);
|
||||
testl(len, 0xfffffff8);
|
||||
jcc(Assembler::zero, copy_tail);
|
||||
|
||||
// compress 16 chars per iter
|
||||
movl(tmp5, 0xff00ff00); // create mask to test for Unicode chars in vectors
|
||||
movdl(tmp1Reg, tmp5);
|
||||
pshufd(tmp1Reg, tmp1Reg, 0); // store Unicode mask in tmp1Reg
|
||||
|
||||
andl(len, 0xfffffff0);
|
||||
jccb(Assembler::zero, copy_16);
|
||||
|
||||
// compress 16 chars per iter
|
||||
pxor(tmp4Reg, tmp4Reg);
|
||||
|
||||
lea(src, Address(src, len, Address::times_2));
|
||||
@ -8786,59 +8796,60 @@ void MacroAssembler::char_array_compress(Register src, Register dst, Register le
|
||||
movdqu(tmp3Reg, Address(src, len, Address::times_2, 16)); // load next 8 characters
|
||||
por(tmp4Reg, tmp3Reg);
|
||||
ptest(tmp4Reg, tmp1Reg); // check for Unicode chars in next vector
|
||||
jcc(Assembler::notZero, return_zero);
|
||||
jccb(Assembler::notZero, reset_for_copy_tail);
|
||||
packuswb(tmp2Reg, tmp3Reg); // only ASCII chars; compress each to 1 byte
|
||||
movdqu(Address(dst, len, Address::times_1), tmp2Reg);
|
||||
addptr(len, 16);
|
||||
jcc(Assembler::notZero, copy_32_loop);
|
||||
jccb(Assembler::notZero, copy_32_loop);
|
||||
|
||||
// compress next vector of 8 chars (if any)
|
||||
bind(copy_16);
|
||||
movl(len, result);
|
||||
andl(len, 0xfffffff8); // vector count (in chars)
|
||||
andl(result, 0x00000007); // tail count (in chars)
|
||||
testl(len, len);
|
||||
jccb(Assembler::zero, copy_tail);
|
||||
// len = 0
|
||||
testl(result, 0x00000008); // check if there's a block of 8 chars to compress
|
||||
jccb(Assembler::zero, copy_tail_sse);
|
||||
|
||||
movdl(tmp1Reg, tmp5);
|
||||
pshufd(tmp1Reg, tmp1Reg, 0); // store Unicode mask in tmp1Reg
|
||||
pxor(tmp3Reg, tmp3Reg);
|
||||
|
||||
movdqu(tmp2Reg, Address(src, 0));
|
||||
ptest(tmp2Reg, tmp1Reg); // check for Unicode chars in vector
|
||||
jccb(Assembler::notZero, return_zero);
|
||||
jccb(Assembler::notZero, reset_for_copy_tail);
|
||||
packuswb(tmp2Reg, tmp3Reg); // only LATIN1 chars; compress each to 1 byte
|
||||
movq(Address(dst, 0), tmp2Reg);
|
||||
addptr(src, 16);
|
||||
addptr(dst, 8);
|
||||
jmpb(copy_tail_sse);
|
||||
|
||||
bind(copy_tail);
|
||||
bind(reset_for_copy_tail);
|
||||
movl(tmp5, result);
|
||||
andl(tmp5, 0x0000000f);
|
||||
lea(src, Address(src, tmp5, Address::times_2));
|
||||
lea(dst, Address(dst, tmp5, Address::times_1));
|
||||
subptr(len, tmp5);
|
||||
jmpb(copy_chars_loop);
|
||||
|
||||
bind(copy_tail_sse);
|
||||
movl(len, result);
|
||||
andl(len, 0x00000007); // tail count (in chars)
|
||||
}
|
||||
// compress 1 char per iter
|
||||
bind(copy_tail);
|
||||
testl(len, len);
|
||||
jccb(Assembler::zero, return_length);
|
||||
jccb(Assembler::zero, done);
|
||||
lea(src, Address(src, len, Address::times_2));
|
||||
lea(dst, Address(dst, len, Address::times_1));
|
||||
negptr(len);
|
||||
|
||||
bind(copy_chars_loop);
|
||||
load_unsigned_short(result, Address(src, len, Address::times_2));
|
||||
testl(result, 0xff00); // check if Unicode char
|
||||
jccb(Assembler::notZero, return_zero);
|
||||
movb(Address(dst, len, Address::times_1), result); // ASCII char; compress to 1 byte
|
||||
load_unsigned_short(tmp5, Address(src, len, Address::times_2));
|
||||
testl(tmp5, 0xff00); // check if Unicode char
|
||||
jccb(Assembler::notZero, reset_sp);
|
||||
movb(Address(dst, len, Address::times_1), tmp5); // ASCII char; compress to 1 byte
|
||||
increment(len);
|
||||
jcc(Assembler::notZero, copy_chars_loop);
|
||||
jccb(Assembler::notZero, copy_chars_loop);
|
||||
|
||||
// if compression succeeded, return length
|
||||
bind(return_length);
|
||||
pop(result);
|
||||
jmpb(done);
|
||||
|
||||
// if compression failed, return 0
|
||||
bind(return_zero);
|
||||
xorl(result, result);
|
||||
addptr(rsp, wordSize);
|
||||
// add len then return (len will be zero if compress succeeded, otherwise negative)
|
||||
bind(reset_sp);
|
||||
addl(result, len);
|
||||
|
||||
bind(done);
|
||||
}
|
||||
|
@ -130,6 +130,9 @@ abstract sealed class AbstractStringBuilder implements Appendable, CharSequence
|
||||
* as the specified {@code CharSequence}. The initial capacity of
|
||||
* the string builder is {@code 16} plus the length of the
|
||||
* {@code CharSequence} argument.
|
||||
* <p>
|
||||
* The contents are unspecified if the {@code CharSequence}
|
||||
* is modified during string construction.
|
||||
*
|
||||
* @param seq the sequence to copy.
|
||||
*/
|
||||
@ -666,6 +669,10 @@ abstract sealed class AbstractStringBuilder implements Appendable, CharSequence
|
||||
* If {@code s} is {@code null}, then this method appends
|
||||
* characters as if the s parameter was a sequence containing the four
|
||||
* characters {@code "null"}.
|
||||
* <p>
|
||||
* The contents are unspecified if the {@code CharSequence}
|
||||
* is modified during the method call or an exception is thrown
|
||||
* when accessing the {@code CharSequence}.
|
||||
*
|
||||
* @param s the sequence to append.
|
||||
* @param start the starting index of the subsequence to be appended.
|
||||
@ -1241,6 +1248,10 @@ abstract sealed class AbstractStringBuilder implements Appendable, CharSequence
|
||||
* invocation of this object's
|
||||
* {@link #insert(int,CharSequence,int,int) insert}(dstOffset, s, 0, s.length())
|
||||
* method.
|
||||
* <p>
|
||||
* The contents are unspecified if the {@code CharSequence}
|
||||
* is modified during the method call or an exception is thrown
|
||||
* when accessing the {@code CharSequence}.
|
||||
*
|
||||
* <p>If {@code s} is {@code null}, then the four characters
|
||||
* {@code "null"} are inserted into this sequence.
|
||||
@ -1289,6 +1300,10 @@ abstract sealed class AbstractStringBuilder implements Appendable, CharSequence
|
||||
* <p>If {@code s} is {@code null}, then this method inserts
|
||||
* characters as if the s parameter was a sequence containing the four
|
||||
* characters {@code "null"}.
|
||||
* <p>
|
||||
* The contents are unspecified if the {@code CharSequence}
|
||||
* is modified during the method call or an exception is thrown
|
||||
* when accessing the {@code CharSequence}.
|
||||
*
|
||||
* @param dstOffset the offset in this sequence.
|
||||
* @param s the sequence to be inserted.
|
||||
@ -1675,11 +1690,10 @@ abstract sealed class AbstractStringBuilder implements Appendable, CharSequence
|
||||
/* for readObject() */
|
||||
void initBytes(char[] value, int off, int len) {
|
||||
if (String.COMPACT_STRINGS) {
|
||||
this.value = StringUTF16.compress(value, off, len);
|
||||
if (this.value != null) {
|
||||
this.coder = LATIN1;
|
||||
return;
|
||||
}
|
||||
byte[] val = StringUTF16.compress(value, off, len);
|
||||
this.coder = StringUTF16.coderFromArrayLen(val, len);
|
||||
this.value = val;
|
||||
return;
|
||||
}
|
||||
this.coder = UTF16;
|
||||
this.value = StringUTF16.toBytes(value, off, len);
|
||||
@ -1720,6 +1734,9 @@ abstract sealed class AbstractStringBuilder implements Appendable, CharSequence
|
||||
val[j++] = (byte)c;
|
||||
} else {
|
||||
inflate();
|
||||
// store c to make sure it has a UTF16 char
|
||||
StringUTF16.putChar(this.value, j++, c);
|
||||
i++;
|
||||
StringUTF16.putCharsSB(this.value, j, s, i, end);
|
||||
return;
|
||||
}
|
||||
@ -1812,6 +1829,10 @@ abstract sealed class AbstractStringBuilder implements Appendable, CharSequence
|
||||
} else {
|
||||
count = j;
|
||||
inflate();
|
||||
// Store c to make sure sb has a UTF16 char
|
||||
StringUTF16.putChar(this.value, j++, c);
|
||||
count = j;
|
||||
i++;
|
||||
StringUTF16.putCharsSB(this.value, j, s, i, end);
|
||||
count += end - i;
|
||||
return;
|
||||
@ -1923,6 +1944,10 @@ abstract sealed class AbstractStringBuilder implements Appendable, CharSequence
|
||||
* <p>
|
||||
* If {@code cs} is {@code null}, then the four characters
|
||||
* {@code "null"} are repeated into this sequence.
|
||||
* <p>
|
||||
* The contents are unspecified if the {@code CharSequence}
|
||||
* is modified during the method call or an exception is thrown
|
||||
* when accessing the {@code CharSequence}.
|
||||
*
|
||||
* @param cs a {@code CharSequence}
|
||||
* @param count number of times to copy
|
||||
|
@ -57,6 +57,10 @@ public interface Appendable {
|
||||
* {@code csq}, the entire sequence may not be appended. For
|
||||
* instance, if {@code csq} is a {@link java.nio.CharBuffer} then
|
||||
* the subsequence to append is defined by the buffer's position and limit.
|
||||
* <p>
|
||||
* The contents of this {@code Appendable} are unspecified if the {@code CharSequence}
|
||||
* is modified during the method call or an exception is thrown
|
||||
* when accessing the {@code CharSequence}.
|
||||
*
|
||||
* @param csq
|
||||
* The character sequence to append. If {@code csq} is
|
||||
@ -81,6 +85,10 @@ public interface Appendable {
|
||||
* <pre>
|
||||
* out.append(csq.subSequence(start, end)) </pre>
|
||||
*
|
||||
* <p>
|
||||
* The contents of this {@code Appendable} are unspecified if the {@code CharSequence}
|
||||
* is modified during the method call or an exception is thrown
|
||||
* when accessing the {@code CharSequence}.
|
||||
* @param csq
|
||||
* The character sequence from which a subsequence will be
|
||||
* appended. If {@code csq} is {@code null}, then characters
|
||||
|
@ -273,6 +273,9 @@ public final class String
|
||||
* contents of the character array are copied; subsequent modification of
|
||||
* the character array does not affect the newly created string.
|
||||
*
|
||||
* <p> The contents of the string are unspecified if the character array
|
||||
* is modified during string construction.
|
||||
*
|
||||
* @param value
|
||||
* The initial value of the string
|
||||
*/
|
||||
@ -288,6 +291,9 @@ public final class String
|
||||
* subarray are copied; subsequent modification of the character array does
|
||||
* not affect the newly created string.
|
||||
*
|
||||
* <p> The contents of the string are unspecified if the character array
|
||||
* is modified during string construction.
|
||||
*
|
||||
* @param value
|
||||
* Array that is the source of characters
|
||||
*
|
||||
@ -319,6 +325,9 @@ public final class String
|
||||
* {@code char}s; subsequent modification of the {@code int} array does not
|
||||
* affect the newly created string.
|
||||
*
|
||||
* <p> The contents of the string are unspecified if the codepoints array
|
||||
* is modified during string construction.
|
||||
*
|
||||
* @param codePoints
|
||||
* Array that is the source of Unicode code points
|
||||
*
|
||||
@ -346,12 +355,10 @@ public final class String
|
||||
return;
|
||||
}
|
||||
if (COMPACT_STRINGS) {
|
||||
byte[] val = StringLatin1.toBytes(codePoints, offset, count);
|
||||
if (val != null) {
|
||||
this.coder = LATIN1;
|
||||
this.value = val;
|
||||
return;
|
||||
}
|
||||
byte[] val = StringUTF16.compress(codePoints, offset, count);
|
||||
this.coder = StringUTF16.coderFromArrayLen(val, count);
|
||||
this.value = val;
|
||||
return;
|
||||
}
|
||||
this.coder = UTF16;
|
||||
this.value = StringUTF16.toBytes(codePoints, offset, count);
|
||||
@ -368,6 +375,9 @@ public final class String
|
||||
* <p> Each {@code byte} in the subarray is converted to a {@code char} as
|
||||
* specified in the {@link #String(byte[],int) String(byte[],int)} constructor.
|
||||
*
|
||||
* <p> The contents of the string are unspecified if the byte array
|
||||
* is modified during string construction.
|
||||
*
|
||||
* @deprecated This method does not properly convert bytes into characters.
|
||||
* As of JDK 1.1, the preferred way to do this is via the
|
||||
* {@code String} constructors that take a {@link Charset}, charset name,
|
||||
@ -429,6 +439,9 @@ public final class String
|
||||
* | (<b><i>b</i></b> & 0xff))
|
||||
* </pre></blockquote>
|
||||
*
|
||||
* <p> The contents of the string are unspecified if the byte array
|
||||
* is modified during string construction.
|
||||
*
|
||||
* @deprecated This method does not properly convert bytes into
|
||||
* characters. As of JDK 1.1, the preferred way to do this is via the
|
||||
* {@code String} constructors that take a {@link Charset}, charset name,
|
||||
@ -463,6 +476,9 @@ public final class String
|
||||
* java.nio.charset.CharsetDecoder} class should be used when more control
|
||||
* over the decoding process is required.
|
||||
*
|
||||
* <p> The contents of the string are unspecified if the byte array
|
||||
* is modified during string construction.
|
||||
*
|
||||
* @param bytes
|
||||
* The bytes to be decoded into characters
|
||||
*
|
||||
@ -501,6 +517,9 @@ public final class String
|
||||
* java.nio.charset.CharsetDecoder} class should be used when more control
|
||||
* over the decoding process is required.
|
||||
*
|
||||
* <p> The contents of the string are unspecified if the byte array
|
||||
* is modified during string construction.
|
||||
*
|
||||
* @param bytes
|
||||
* The bytes to be decoded into characters
|
||||
*
|
||||
@ -543,47 +562,43 @@ public final class String
|
||||
this.coder = LATIN1;
|
||||
return;
|
||||
}
|
||||
int sl = offset + length;
|
||||
byte[] dst = new byte[length];
|
||||
if (dp > 0) {
|
||||
System.arraycopy(bytes, offset, dst, 0, dp);
|
||||
offset += dp;
|
||||
}
|
||||
while (offset < sl) {
|
||||
int b1 = bytes[offset++];
|
||||
// Decode with a stable copy, to be the result if the decoded length is the same
|
||||
byte[] latin1 = Arrays.copyOfRange(bytes, offset, offset + length);
|
||||
int sp = dp; // first dp bytes are already in the copy
|
||||
while (sp < length) {
|
||||
int b1 = latin1[sp++];
|
||||
if (b1 >= 0) {
|
||||
dst[dp++] = (byte)b1;
|
||||
latin1[dp++] = (byte)b1;
|
||||
continue;
|
||||
}
|
||||
if ((b1 & 0xfe) == 0xc2 && offset < sl) { // b1 either 0xc2 or 0xc3
|
||||
int b2 = bytes[offset];
|
||||
if ((b1 & 0xfe) == 0xc2 && sp < length) { // b1 either 0xc2 or 0xc3
|
||||
int b2 = latin1[sp];
|
||||
if (b2 < -64) { // continuation bytes are always negative values in the range -128 to -65
|
||||
dst[dp++] = (byte)decode2(b1, b2);
|
||||
offset++;
|
||||
latin1[dp++] = (byte)decode2(b1, b2);
|
||||
sp++;
|
||||
continue;
|
||||
}
|
||||
}
|
||||
// anything not a latin1, including the REPL
|
||||
// we have to go with the utf16
|
||||
offset--;
|
||||
sp--;
|
||||
break;
|
||||
}
|
||||
if (offset == sl) {
|
||||
if (dp != dst.length) {
|
||||
dst = Arrays.copyOf(dst, dp);
|
||||
if (sp == length) {
|
||||
if (dp != latin1.length) {
|
||||
latin1 = Arrays.copyOf(latin1, dp);
|
||||
}
|
||||
this.value = dst;
|
||||
this.value = latin1;
|
||||
this.coder = LATIN1;
|
||||
return;
|
||||
}
|
||||
byte[] buf = new byte[length << 1];
|
||||
StringLatin1.inflate(dst, 0, buf, 0, dp);
|
||||
dst = buf;
|
||||
dp = decodeUTF8_UTF16(bytes, offset, sl, dst, dp, true);
|
||||
byte[] utf16 = new byte[length << 1];
|
||||
StringLatin1.inflate(latin1, 0, utf16, 0, dp);
|
||||
dp = decodeUTF8_UTF16(latin1, sp, length, utf16, dp, true);
|
||||
if (dp != length) {
|
||||
dst = Arrays.copyOf(dst, dp << 1);
|
||||
utf16 = Arrays.copyOf(utf16, dp << 1);
|
||||
}
|
||||
this.value = dst;
|
||||
this.value = utf16;
|
||||
this.coder = UTF16;
|
||||
} else { // !COMPACT_STRINGS
|
||||
byte[] dst = new byte[length << 1];
|
||||
@ -655,12 +670,10 @@ public final class String
|
||||
char[] ca = new char[en];
|
||||
int clen = ad.decode(bytes, offset, length, ca);
|
||||
if (COMPACT_STRINGS) {
|
||||
byte[] bs = StringUTF16.compress(ca, 0, clen);
|
||||
if (bs != null) {
|
||||
value = bs;
|
||||
coder = LATIN1;
|
||||
return;
|
||||
}
|
||||
byte[] val = StringUTF16.compress(ca, 0, clen);;
|
||||
this.coder = StringUTF16.coderFromArrayLen(val, clen);
|
||||
this.value = val;
|
||||
return;
|
||||
}
|
||||
coder = UTF16;
|
||||
value = StringUTF16.toBytes(ca, 0, clen);
|
||||
@ -686,12 +699,10 @@ public final class String
|
||||
throw new Error(x);
|
||||
}
|
||||
if (COMPACT_STRINGS) {
|
||||
byte[] bs = StringUTF16.compress(ca, 0, caLen);
|
||||
if (bs != null) {
|
||||
value = bs;
|
||||
coder = LATIN1;
|
||||
return;
|
||||
}
|
||||
byte[] val = StringUTF16.compress(ca, 0, caLen);
|
||||
this.coder = StringUTF16.coderFromArrayLen(val, caLen);
|
||||
this.value = val;
|
||||
return;
|
||||
}
|
||||
coder = UTF16;
|
||||
value = StringUTF16.toBytes(ca, 0, caLen);
|
||||
@ -829,10 +840,9 @@ public final class String
|
||||
throw new IllegalArgumentException(x);
|
||||
}
|
||||
if (COMPACT_STRINGS) {
|
||||
byte[] bs = StringUTF16.compress(ca, 0, caLen);
|
||||
if (bs != null) {
|
||||
return new String(bs, LATIN1);
|
||||
}
|
||||
byte[] val = StringUTF16.compress(ca, 0, caLen);
|
||||
int coder = StringUTF16.coderFromArrayLen(val, len);
|
||||
return new String(val, coder);
|
||||
}
|
||||
return new String(StringUTF16.toBytes(ca, 0, caLen), UTF16);
|
||||
}
|
||||
@ -1386,6 +1396,9 @@ public final class String
|
||||
* java.nio.charset.CharsetDecoder} class should be used when more control
|
||||
* over the decoding process is required.
|
||||
*
|
||||
* <p> The contents of the string are unspecified if the byte array
|
||||
* is modified during string construction.
|
||||
*
|
||||
* @param bytes
|
||||
* The bytes to be decoded into characters
|
||||
*
|
||||
@ -1414,6 +1427,9 @@ public final class String
|
||||
* java.nio.charset.CharsetDecoder} class should be used when more control
|
||||
* over the decoding process is required.
|
||||
*
|
||||
* <p> The contents of the string are unspecified if the byte array
|
||||
* is modified during string construction.
|
||||
*
|
||||
* @param bytes
|
||||
* The bytes to be decoded into characters
|
||||
*
|
||||
@ -1438,6 +1454,9 @@ public final class String
|
||||
* java.nio.charset.CharsetDecoder} class should be used when more control
|
||||
* over the decoding process is required.
|
||||
*
|
||||
* <p> The contents of the string are unspecified if the byte array
|
||||
* is modified during string construction.
|
||||
*
|
||||
* @param bytes
|
||||
* The bytes to be decoded into characters
|
||||
*
|
||||
@ -1468,6 +1487,9 @@ public final class String
|
||||
* java.nio.charset.CharsetDecoder} class should be used when more control
|
||||
* over the decoding process is required.
|
||||
*
|
||||
* <p> The contents of the string are unspecified if the byte array
|
||||
* is modified during string construction.
|
||||
*
|
||||
* @param bytes
|
||||
* The bytes to be decoded into characters
|
||||
*
|
||||
@ -1496,6 +1518,9 @@ public final class String
|
||||
* string builder are copied; subsequent modification of the string builder
|
||||
* does not affect the newly created string.
|
||||
*
|
||||
* <p> The contents of the string are unspecified if the {@code StringBuilder}
|
||||
* is modified during string construction.
|
||||
*
|
||||
* <p> This constructor is provided to ease migration to {@code
|
||||
* StringBuilder}. Obtaining a string from a string builder via the {@code
|
||||
* toString} method is likely to run faster and is generally preferred.
|
||||
@ -4488,6 +4513,9 @@ public final class String
|
||||
* modification of the character array does not affect the returned
|
||||
* string.
|
||||
*
|
||||
* <p> The contents of the string are unspecified if the character array
|
||||
* is modified during string construction.
|
||||
*
|
||||
* @param data the character array.
|
||||
* @return a {@code String} that contains the characters of the
|
||||
* character array.
|
||||
@ -4506,6 +4534,9 @@ public final class String
|
||||
* are copied; subsequent modification of the character array does not
|
||||
* affect the returned string.
|
||||
*
|
||||
* <p> The contents of the string are unspecified if the character array
|
||||
* is modified during string construction.
|
||||
*
|
||||
* @param data the character array.
|
||||
* @param offset initial offset of the subarray.
|
||||
* @param count length of the subarray.
|
||||
@ -4767,15 +4798,18 @@ public final class String
|
||||
}
|
||||
|
||||
/*
|
||||
* Package private constructor. Trailing Void argument is there for
|
||||
* Private constructor. Trailing Void argument is there for
|
||||
* disambiguating it against other (public) constructors.
|
||||
*
|
||||
* Stores the char[] value into a byte[] that each byte represents
|
||||
* the8 low-order bits of the corresponding character, if the char[]
|
||||
* contains only latin1 character. Or a byte[] that stores all
|
||||
* characters in their byte sequences defined by the {@code StringUTF16}.
|
||||
*
|
||||
* <p> The contents of the string are unspecified if the character array
|
||||
* is modified during string construction.
|
||||
*/
|
||||
String(char[] value, int off, int len, Void sig) {
|
||||
private String(char[] value, int off, int len, Void sig) {
|
||||
if (len == 0) {
|
||||
this.value = "".value;
|
||||
this.coder = "".coder;
|
||||
@ -4783,11 +4817,9 @@ public final class String
|
||||
}
|
||||
if (COMPACT_STRINGS) {
|
||||
byte[] val = StringUTF16.compress(value, off, len);
|
||||
if (val != null) {
|
||||
this.value = val;
|
||||
this.coder = LATIN1;
|
||||
return;
|
||||
}
|
||||
this.coder = StringUTF16.coderFromArrayLen(val, len);
|
||||
this.value = val;
|
||||
return;
|
||||
}
|
||||
this.coder = UTF16;
|
||||
this.value = StringUTF16.toBytes(value, off, len);
|
||||
@ -4796,6 +4828,9 @@ public final class String
|
||||
/*
|
||||
* Package private constructor. Trailing Void argument is there for
|
||||
* disambiguating it against other (public) constructors.
|
||||
*
|
||||
* <p> The contents of the string are unspecified if the {@code StringBuilder}
|
||||
* is modified during string construction.
|
||||
*/
|
||||
String(AbstractStringBuilder asb, Void sig) {
|
||||
byte[] val = asb.getValue();
|
||||
@ -4806,12 +4841,9 @@ public final class String
|
||||
} else {
|
||||
// only try to compress val if some characters were deleted.
|
||||
if (COMPACT_STRINGS && asb.maybeLatin1) {
|
||||
byte[] buf = StringUTF16.compress(val, 0, length);
|
||||
if (buf != null) {
|
||||
this.coder = LATIN1;
|
||||
this.value = buf;
|
||||
return;
|
||||
}
|
||||
this.value = StringUTF16.compress(val, 0, length);
|
||||
this.coder = StringUTF16.coderFromArrayLen(this.value, length);
|
||||
return;
|
||||
}
|
||||
this.coder = UTF16;
|
||||
this.value = Arrays.copyOfRange(val, 0, length << 1);
|
||||
|
@ -47,8 +47,12 @@ final class StringLatin1 {
|
||||
return (char)(value[index] & 0xff);
|
||||
}
|
||||
|
||||
public static boolean canEncode(char cp) {
|
||||
return cp <= 0xff;
|
||||
}
|
||||
|
||||
public static boolean canEncode(int cp) {
|
||||
return cp >>> 8 == 0;
|
||||
return cp >=0 && cp <= 0xff;
|
||||
}
|
||||
|
||||
public static int length(byte[] value) {
|
||||
|
@ -34,7 +34,6 @@ import java.util.stream.Stream;
|
||||
import java.util.stream.StreamSupport;
|
||||
import jdk.internal.util.ArraysSupport;
|
||||
import jdk.internal.util.DecimalDigits;
|
||||
import jdk.internal.vm.annotation.DontInline;
|
||||
import jdk.internal.vm.annotation.ForceInline;
|
||||
import jdk.internal.vm.annotation.IntrinsicCandidate;
|
||||
|
||||
@ -54,6 +53,19 @@ final class StringUTF16 {
|
||||
return new byte[len << 1];
|
||||
}
|
||||
|
||||
// Check the size of a UTF16-coded string
|
||||
// Throw an exception if out of range
|
||||
public static int newBytesLength(int len) {
|
||||
if (len < 0) {
|
||||
throw new NegativeArraySizeException();
|
||||
}
|
||||
if (len > MAX_LENGTH) {
|
||||
throw new OutOfMemoryError("UTF16 String size is " + len +
|
||||
", should be less than " + MAX_LENGTH);
|
||||
}
|
||||
return len << 1;
|
||||
}
|
||||
|
||||
@IntrinsicCandidate
|
||||
// intrinsic performs no bounds checks
|
||||
static void putChar(byte[] val, int index, int c) {
|
||||
@ -148,6 +160,13 @@ final class StringUTF16 {
|
||||
return dst;
|
||||
}
|
||||
|
||||
/**
|
||||
* {@return an encoded byte[] for the UTF16 characters in char[]}
|
||||
* No checking is done on the characters, some may or may not be latin1.
|
||||
* @param value a char array
|
||||
* @param off an offset
|
||||
* @param len a length
|
||||
*/
|
||||
@IntrinsicCandidate
|
||||
public static byte[] toBytes(char[] value, int off, int len) {
|
||||
byte[] val = newBytesFor(len);
|
||||
@ -158,20 +177,209 @@ final class StringUTF16 {
|
||||
return val;
|
||||
}
|
||||
|
||||
public static byte[] compress(char[] val, int off, int len) {
|
||||
byte[] ret = new byte[len];
|
||||
if (compress(val, off, ret, 0, len) == len) {
|
||||
return ret;
|
||||
}
|
||||
return null;
|
||||
// Clever way to get the coder from a byte array returned from compress
|
||||
// that maybe either latin1 or UTF16-coded
|
||||
// Equivalent to (len == val.length) ? LATIN1 : UTF16
|
||||
@ForceInline
|
||||
static byte coderFromArrayLen(byte[] value, int len) {
|
||||
return (byte) ((len - value.length) >>> Integer.SIZE - 1);
|
||||
}
|
||||
|
||||
public static byte[] compress(byte[] val, int off, int len) {
|
||||
byte[] ret = new byte[len];
|
||||
if (compress(val, off, ret, 0, len) == len) {
|
||||
return ret;
|
||||
/**
|
||||
* {@return Compress the char array (containing UTF16) into a compact strings byte array}
|
||||
* If all the chars are LATIN1, it returns an array with len == count,
|
||||
* otherwise, it contains UTF16 characters.
|
||||
* <p>
|
||||
* A UTF16 array is returned *only* if at least 1 non-latin1 character is present.
|
||||
* This must be true even if the input array is modified while this method is executing.
|
||||
* This is assured by copying the characters while checking for latin1.
|
||||
* If all characters are latin1, a byte array with length equals count is returned,
|
||||
* indicating all latin1 chars. The scan may be implemented as an intrinsic,
|
||||
* which returns the index of the first non-latin1 character.
|
||||
* When the first non-latin1 character is found, it switches to creating a new
|
||||
* buffer; the saved prefix of latin1 characters is copied to the new buffer;
|
||||
* and the remaining input characters are copied to the buffer.
|
||||
* The index of the known non-latin1 character is checked, if it is latin1,
|
||||
* the input has been changed. In this case, a second attempt is made to compress to
|
||||
* latin1 from the copy made in the first pass to the originally allocated latin1 buffer.
|
||||
* If it succeeds the return value is latin1, otherwise, the utf16 value is returned.
|
||||
* In this unusual case, the result is correct for the snapshot of the value.
|
||||
* The resulting string contents are unspecified if the input array is modified during this
|
||||
* operation, but it is ensured that at least 1 non-latin1 character is present in
|
||||
* the non-latin1 buffer.
|
||||
*
|
||||
* @param val a char array
|
||||
* @param off starting offset
|
||||
* @param count count of chars to be compressed, {@code count} > 0
|
||||
*/
|
||||
@ForceInline
|
||||
public static byte[] compress(final char[] val, final int off, final int count) {
|
||||
byte[] latin1 = new byte[count];
|
||||
int ndx = compress(val, off, latin1, 0, count);
|
||||
if (ndx != count) {
|
||||
// Switch to UTF16
|
||||
byte[] utf16 = toBytes(val, off, count);
|
||||
// If the original character that was found to be non-latin1 is latin1 in the copy
|
||||
// try to make a latin1 string from the copy
|
||||
if (getChar(utf16, ndx) > 0xff
|
||||
|| compress(utf16, 0, latin1, 0, count) != count) {
|
||||
return utf16;
|
||||
}
|
||||
}
|
||||
return null;
|
||||
return latin1; // latin1 success
|
||||
}
|
||||
|
||||
/**
|
||||
* {@return Compress the internal byte array (containing UTF16) into a compact strings byte array}
|
||||
* If all the chars are LATIN1, it returns an array with len == count,
|
||||
* otherwise, it contains UTF16 characters.
|
||||
* <p>
|
||||
* Refer to the description of the algorithm in {@link #compress(char[], int, int)}.
|
||||
*
|
||||
* @param val a byte array with UTF16 coding
|
||||
* @param off starting offset
|
||||
* @param count count of chars to be compressed, {@code count} > 0
|
||||
*/
|
||||
public static byte[] compress(final byte[] val, final int off, final int count) {
|
||||
byte[] latin1 = new byte[count];
|
||||
int ndx = compress(val, off, latin1, 0, count);
|
||||
if (ndx != count) {// Switch to UTF16
|
||||
byte[] utf16 = Arrays.copyOfRange(val, off << 1, newBytesLength(off + count));
|
||||
// If the original character that was found to be non-latin1 is latin1 in the copy
|
||||
// try to make a latin1 string from the copy
|
||||
if (getChar(utf16, ndx) > 0xff
|
||||
|| compress(utf16, 0, latin1, 0, count) != count) {
|
||||
return utf16;
|
||||
}
|
||||
}
|
||||
return latin1; // latin1 success
|
||||
}
|
||||
|
||||
/**
|
||||
* {@return compress the code points into a compact strings byte array}
|
||||
* If all the chars are LATIN1, returns an array with len == count.
|
||||
* If not, a new byte array is allocated and code points converted to UTF16.
|
||||
* The algorithm is similar to that of {@link #compress(char[], int, int)}.
|
||||
* <p>
|
||||
* The resulting encoding is attempted in several steps:
|
||||
* <UL>
|
||||
* <LI>If no non-latin1 characters are found, the encoding is latin1</LI>
|
||||
* <LI>If an estimate of the number of characters needed to represent the codepoints is
|
||||
* equal to the string length, they are all BMP with at least 1 UTF16 character
|
||||
* and are copied to the result. </LI>
|
||||
* <LI>The extractCodePoints method is called to carefully expand surrogates. </LI>
|
||||
* </UL>
|
||||
*
|
||||
* @param val an int array of code points
|
||||
* @param off starting offset
|
||||
* @param count length of code points to be compressed, length > 0
|
||||
*/
|
||||
public static byte[] compress(final int[] val, int off, final int count) {
|
||||
// Optimistically copy all latin1 code points to the destination
|
||||
byte[] latin1 = new byte[count];
|
||||
final int end = off + count;
|
||||
for (int ndx = 0; ndx < count; ndx++, off++) {
|
||||
int cp = val[off];
|
||||
if (cp >= 0 && cp <= 0xff) {
|
||||
latin1[ndx] = (byte)cp;
|
||||
} else {
|
||||
// Pass 1: Compute precise size of char[]; see extractCodePoints for caveat
|
||||
int estSize = ndx + computeCodePointSize(val, off, end);
|
||||
|
||||
// Pass 2: Switch to UTF16
|
||||
// cp = val[ndx] is at least one code point known to be UTF16
|
||||
byte[] utf16 = newBytesFor(estSize);
|
||||
if (ndx > 0) {
|
||||
StringLatin1.inflate(latin1, 0, utf16, 0, ndx); // inflate latin1 bytes
|
||||
}
|
||||
|
||||
if (estSize == count) {
|
||||
// Based on the computed size, all remaining code points are BMP and
|
||||
// can be copied without checking again
|
||||
putChar(utf16, ndx, cp); // ensure utf16 has a UTF16 char
|
||||
off++;
|
||||
for (int i = ndx + 1; i < count; i++, off++) {
|
||||
putChar(utf16, i, val[off]);
|
||||
}
|
||||
} else {
|
||||
// Some codepoint is a surrogate pair
|
||||
utf16 = extractCodepoints(val, off, end, utf16, ndx);
|
||||
|
||||
// The original character that was found to be UTF16 is not UTF16 in the copy
|
||||
// Try to make a latin1 string from the copy
|
||||
if (getChar(utf16, ndx) <= 0xff &&
|
||||
compress(utf16, 0, latin1, 0, count) == count) {
|
||||
return latin1; // latin1 success
|
||||
}
|
||||
}
|
||||
return utf16;
|
||||
}
|
||||
}
|
||||
return latin1; // Latin1 success
|
||||
}
|
||||
|
||||
// Extract code points into chars in the byte array
|
||||
//
|
||||
// Guard against possible races with the input array changing between the previous
|
||||
// computation of the required output size and storing the bmp or surrogates.
|
||||
// If a BMP code point is changed to a supplementary code point it would require 2 chars
|
||||
// in the output. Changing a supplementary char to BMP would reduce the size.
|
||||
// If the utf16 destination is not large enough, it is resized to fit the
|
||||
// remaining codepoints assuming they occupy 2 characters.
|
||||
// The destination may be copied to return exactly the final length.
|
||||
// The additional allocations and compression only occur if the input array is modified.
|
||||
private static byte[] extractCodepoints(int[] val, int off, int end, byte[] dst, int dstOff) {
|
||||
while (off < end) {
|
||||
// Compute a minimum estimate on the number of characters can be put into the dst
|
||||
// given the current codepoint and the number of remaining codepoints
|
||||
int codePoint = val[off]; // read each codepoint from val only once
|
||||
int dstLimit = dstOff
|
||||
+ Character.charCount(codePoint)
|
||||
+ (end - off - 1);
|
||||
if (dstLimit > (dst.length >> 1)) {
|
||||
// Resize to hold the remaining codepoints assuming they are all surrogates.
|
||||
// By resizing to the maximum that might be needed, only a single resize will occur.
|
||||
// dstLimit includes only a single char per codepoint, pad with an additional for each.
|
||||
int maxRemaining = dstLimit + (end - off - 1);
|
||||
dst = Arrays.copyOf(dst, newBytesLength(maxRemaining));
|
||||
}
|
||||
// Efficiently copy as many codepoints as fit within the current estimated limit
|
||||
// The dst at least enough space for the current codepoint.
|
||||
while (true) {
|
||||
if (Character.isBmpCodePoint(codePoint)) {
|
||||
putChar(dst, dstOff++, codePoint);
|
||||
} else {
|
||||
putChar(dst, dstOff++, Character.highSurrogate(codePoint));
|
||||
putChar(dst, dstOff++, Character.lowSurrogate(codePoint));
|
||||
}
|
||||
off++;
|
||||
if (dstOff + 2 > dstLimit)
|
||||
break; // no space for another surrogate; recompute limit
|
||||
codePoint = val[off];
|
||||
}
|
||||
}
|
||||
if (dstOff != (dst.length >> 1)) {
|
||||
// Truncate to actual length; should only occur if a codepoint was racily
|
||||
// changed from a surrogate to a BMP character.
|
||||
return Arrays.copyOf(dst, newBytesLength(dstOff));
|
||||
}
|
||||
return dst;
|
||||
}
|
||||
|
||||
// Compute the number of chars needed to represent the code points from off to end-1
|
||||
private static int computeCodePointSize(int[] val, int off, int end) {
|
||||
int n = end - off;
|
||||
while (off < end) {
|
||||
int codePoint = val[off++];
|
||||
if (Character.isBmpCodePoint(codePoint)) {
|
||||
continue;
|
||||
} else if (Character.isValidCodePoint(codePoint)) {
|
||||
n++;
|
||||
} else {
|
||||
throw new IllegalArgumentException(Integer.toString(codePoint));
|
||||
}
|
||||
}
|
||||
return n;
|
||||
}
|
||||
|
||||
// compressedCopy char[] -> byte[]
|
||||
@ -179,9 +387,8 @@ final class StringUTF16 {
|
||||
public static int compress(char[] src, int srcOff, byte[] dst, int dstOff, int len) {
|
||||
for (int i = 0; i < len; i++) {
|
||||
char c = src[srcOff];
|
||||
if (c > 0xFF) {
|
||||
len = 0;
|
||||
break;
|
||||
if (c > 0xff) {
|
||||
return i; // return index of non-latin1 char
|
||||
}
|
||||
dst[dstOff] = (byte)c;
|
||||
srcOff++;
|
||||
@ -197,9 +404,8 @@ final class StringUTF16 {
|
||||
checkBoundsOffCount(srcOff, len, src);
|
||||
for (int i = 0; i < len; i++) {
|
||||
char c = getChar(src, srcOff);
|
||||
if (c > 0xFF) {
|
||||
len = 0;
|
||||
break;
|
||||
if (c > 0xff) {
|
||||
return i; // return index of non-latin1 char
|
||||
}
|
||||
dst[dstOff] = (byte)c;
|
||||
srcOff++;
|
||||
@ -208,31 +414,14 @@ final class StringUTF16 {
|
||||
return len;
|
||||
}
|
||||
|
||||
// Create the UTF16 buffer for !COMPACT_STRINGS
|
||||
public static byte[] toBytes(int[] val, int index, int len) {
|
||||
final int end = index + len;
|
||||
// Pass 1: Compute precise size of char[]
|
||||
int n = len;
|
||||
for (int i = index; i < end; i++) {
|
||||
int cp = val[i];
|
||||
if (Character.isBmpCodePoint(cp))
|
||||
continue;
|
||||
else if (Character.isValidCodePoint(cp))
|
||||
n++;
|
||||
else throw new IllegalArgumentException(Integer.toString(cp));
|
||||
}
|
||||
// Pass 2: Allocate and fill in <high, low> pair
|
||||
int n = computeCodePointSize(val, index, end);
|
||||
|
||||
byte[] buf = newBytesFor(n);
|
||||
for (int i = index, j = 0; i < end; i++, j++) {
|
||||
int cp = val[i];
|
||||
if (Character.isBmpCodePoint(cp)) {
|
||||
putChar(buf, j, cp);
|
||||
} else {
|
||||
putChar(buf, j++, Character.highSurrogate(cp));
|
||||
putChar(buf, j, Character.lowSurrogate(cp));
|
||||
}
|
||||
}
|
||||
return buf;
|
||||
}
|
||||
return extractCodepoints(val, index, len, buf, 0);
|
||||
}
|
||||
|
||||
public static byte[] toBytes(char c) {
|
||||
byte[] result = new byte[2];
|
||||
@ -653,10 +842,9 @@ final class StringUTF16 {
|
||||
if (String.COMPACT_STRINGS &&
|
||||
!StringLatin1.canEncode(oldChar) &&
|
||||
StringLatin1.canEncode(newChar)) {
|
||||
byte[] val = compress(buf, 0, len);
|
||||
if (val != null) {
|
||||
return new String(val, LATIN1);
|
||||
}
|
||||
byte[] res = StringUTF16.compress(buf, 0, len);
|
||||
byte coder = StringUTF16.coderFromArrayLen(res, len);
|
||||
return new String(res, coder);
|
||||
}
|
||||
return new String(buf, UTF16);
|
||||
}
|
||||
@ -771,10 +959,9 @@ final class StringUTF16 {
|
||||
|
||||
if (String.COMPACT_STRINGS && replLat1 && !targLat1) {
|
||||
// combination 6
|
||||
byte[] lat1Result = compress(result, 0, resultLen);
|
||||
if (lat1Result != null) {
|
||||
return new String(lat1Result, LATIN1);
|
||||
}
|
||||
byte[] res = StringUTF16.compress(result, 0, resultLen);
|
||||
byte coder = StringUTF16.coderFromArrayLen(res, resultLen);
|
||||
return new String(res, coder); // combination 6
|
||||
}
|
||||
return new String(result, UTF16);
|
||||
}
|
||||
@ -838,7 +1025,7 @@ final class StringUTF16 {
|
||||
bits |= cp;
|
||||
putChar(result, i, cp);
|
||||
}
|
||||
if (bits > 0xFF) {
|
||||
if (bits < 0 || bits > 0xff) {
|
||||
return new String(result, UTF16);
|
||||
} else {
|
||||
return newString(result, 0, len);
|
||||
@ -939,7 +1126,7 @@ final class StringUTF16 {
|
||||
bits |= cp;
|
||||
putChar(result, i, cp);
|
||||
}
|
||||
if (bits > 0xFF) {
|
||||
if (bits < 0 || bits > 0xff) {
|
||||
return new String(result, UTF16);
|
||||
} else {
|
||||
return newString(result, 0, len);
|
||||
@ -1168,10 +1355,9 @@ final class StringUTF16 {
|
||||
return "";
|
||||
}
|
||||
if (String.COMPACT_STRINGS) {
|
||||
byte[] buf = compress(val, index, len);
|
||||
if (buf != null) {
|
||||
return new String(buf, LATIN1);
|
||||
}
|
||||
byte[] res = StringUTF16.compress(val, index, len);
|
||||
byte coder = StringUTF16.coderFromArrayLen(res, len);
|
||||
return new String(res, coder);
|
||||
}
|
||||
int last = index + len;
|
||||
return new String(Arrays.copyOfRange(val, index << 1, last << 1), UTF16);
|
||||
@ -1502,8 +1688,8 @@ final class StringUTF16 {
|
||||
|
||||
private static native boolean isBigEndian();
|
||||
|
||||
static final int HI_BYTE_SHIFT;
|
||||
static final int LO_BYTE_SHIFT;
|
||||
private static final int HI_BYTE_SHIFT;
|
||||
private static final int LO_BYTE_SHIFT;
|
||||
static {
|
||||
if (isBigEndian()) {
|
||||
HI_BYTE_SHIFT = 8;
|
||||
|
@ -0,0 +1,253 @@
|
||||
/*
|
||||
* Copyright (c) 2023, Oracle and/or its affiliates. All rights reserved.
|
||||
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
||||
*
|
||||
* This code is free software; you can redistribute it and/or modify it
|
||||
* under the terms of the GNU General Public License version 2 only, as
|
||||
* published by the Free Software Foundation.
|
||||
*
|
||||
* This code is distributed in the hope that it will be useful, but WITHOUT
|
||||
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
||||
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
|
||||
* version 2 for more details (a copy is included in the LICENSE file that
|
||||
* accompanied this code).
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License version
|
||||
* 2 along with this work; if not, write to the Free Software Foundation,
|
||||
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
*
|
||||
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
|
||||
* or visit www.oracle.com if you need additional information or have any
|
||||
* questions.
|
||||
*/
|
||||
|
||||
/*
|
||||
* @test
|
||||
* @bug 8311906
|
||||
* @summary Validates String constructor intrinsics using varied input data.
|
||||
* @key randomness
|
||||
* @library /compiler/patches /test/lib
|
||||
* @build java.base/java.lang.Helper
|
||||
* @run main/othervm/timeout=1200 -Xbatch -XX:CompileThreshold=100 compiler.intrinsics.string.TestStringConstructionIntrinsics
|
||||
*/
|
||||
/*
|
||||
* @test
|
||||
* @bug 8311906
|
||||
* @summary Validates String constructor intrinsic for AVX3 works with and without
|
||||
* AVX3Threshold=0
|
||||
* @key randomness
|
||||
* @library /compiler/patches /test/lib
|
||||
* @build java.base/java.lang.Helper
|
||||
* @requires vm.cpu.features ~= ".*avx512.*"
|
||||
* @run main/othervm/timeout=1200 -Xbatch -XX:CompileThreshold=100 -XX:UseAVX=3 compiler.intrinsics.string.TestStringConstructionIntrinsics
|
||||
* @run main/othervm/timeout=1200 -Xbatch -XX:CompileThreshold=100 -XX:UseAVX=3 -XX:+UnlockDiagnosticVMOptions -XX:AVX3Threshold=0 compiler.intrinsics.string.TestStringConstructionIntrinsics
|
||||
*/
|
||||
|
||||
package compiler.intrinsics.string;
|
||||
|
||||
import java.lang.Helper;
|
||||
import java.util.Random;
|
||||
|
||||
import jdk.test.lib.Utils;
|
||||
|
||||
public class TestStringConstructionIntrinsics {
|
||||
|
||||
private static byte[] bytes = new byte[2 * (4096 + 32)];
|
||||
|
||||
private static char[] chars = new char[4096 + 32];
|
||||
|
||||
// Used a scratch buffer, sized to accommodate inflated
|
||||
private static byte[] dst = new byte[bytes.length * 2];
|
||||
|
||||
private static final Random RANDOM = Utils.getRandomInstance();
|
||||
|
||||
/**
|
||||
* Completely initialize the bytes test array. The lowest index that will be
|
||||
* non-latin1 is marked by nlOffset
|
||||
*/
|
||||
public static void initializeBytes(int off, int len, int nonLatin1, int nlOffset) {
|
||||
int maxLen = bytes.length >> 1;
|
||||
assert (len + off < maxLen);
|
||||
// insert "canary" (non-latin1) values before offset
|
||||
for (int i = 0; i < off; i++) {
|
||||
Helper.putCharSB(bytes, i, ((i + 15) & 0x7F) | 0x180);
|
||||
}
|
||||
// fill the array segment
|
||||
for (int i = off; i < len + off; i++) {
|
||||
Helper.putCharSB(bytes, i, ((i - off + 15) & 0xFF));
|
||||
}
|
||||
if (nonLatin1 != 0) {
|
||||
// modify a number disparate indexes to be non-latin1
|
||||
for (int i = 0; i < nonLatin1; i++) {
|
||||
int idx = off + RANDOM.nextInt(len - nlOffset) + nlOffset;
|
||||
Helper.putCharSB(bytes, i, ((i + 15) & 0x7F) | 0x180);
|
||||
}
|
||||
}
|
||||
// insert "canary" non-latin1 values after array segment
|
||||
for (int i = len + off; i < maxLen; i++) {
|
||||
Helper.putCharSB(bytes, i, ((i + 15) & 0x7F) | 0x180);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Completely initialize the char test array. The lowest index that will be
|
||||
* non-latin1 is marked by nlOffset
|
||||
*/
|
||||
public static void initializeChars(int off, int len, int nonLatin1, int nlOffset) {
|
||||
assert (len + off <= chars.length);
|
||||
// insert "canary" non-latin1 values before offset
|
||||
for (int i = 0; i < off; ++i) {
|
||||
chars[i] = (char) (((i + 15) & 0x7F) | 0x180);
|
||||
}
|
||||
// fill the array segment
|
||||
for (int i = off; i < len + off; ++i) {
|
||||
chars[i] = (char) (((i - off + 15) & 0xFF));
|
||||
}
|
||||
if (nonLatin1 != 0) {
|
||||
// modify a number disparate chars inside
|
||||
// segment to be non-latin1.
|
||||
for (int i = 0; i < nonLatin1; i++) {
|
||||
int idx = off + RANDOM.nextInt(len - nlOffset) + nlOffset;
|
||||
chars[idx] = (char) (0x180 | chars[idx]);
|
||||
}
|
||||
}
|
||||
// insert "canary" non-latin1 values after array segment
|
||||
for (int i = len + off; i < chars.length; ++i) {
|
||||
chars[i] = (char) (((i + 15) & 0x7F) | 0x180);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Test different array segment sizes, offsets, and number of non-latin1
|
||||
* chars.
|
||||
*/
|
||||
public static void testConstructBytes() throws Exception {
|
||||
for (int off = 0; off < 16; off++) { // starting offset of array segment
|
||||
// Test all array segment sizes 1-63
|
||||
for (int len = 1; len < 64; len++) {
|
||||
testConstructBytes(off, len, 0, 0);
|
||||
testConstructBytes(off, len, 1, 0);
|
||||
testConstructBytes(off, len, RANDOM.nextInt(30) + 2, 0);
|
||||
}
|
||||
// Test a random selection of sizes between 64 and 4099, inclusive
|
||||
for (int i = 0; i < 20; i++) {
|
||||
int len = 64 + RANDOM.nextInt(4100 - 64);
|
||||
testConstructBytes(off, len, 0, 0);
|
||||
testConstructBytes(off, len, 1, 0);
|
||||
testConstructBytes(off, len, RANDOM.nextInt(len) + 2, 0);
|
||||
}
|
||||
for (int len : new int[] { 128, 2048 }) {
|
||||
// test with negatives only in a 1-63 byte tail
|
||||
int tail = RANDOM.nextInt(63) + 1;
|
||||
int ng = RANDOM.nextInt(tail) + 1;
|
||||
testConstructBytes(off, len + tail, ng, len);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private static void testConstructBytes(int off, int len, int ng, int ngOffset) throws Exception {
|
||||
assert (len + off < bytes.length);
|
||||
initializeBytes(off, len, ng, ngOffset);
|
||||
byte[] dst = new byte[bytes.length];
|
||||
|
||||
int calculated = Helper.compress(bytes, off, dst, 0, len);
|
||||
int expected = compress(bytes, off, dst, 0, len);
|
||||
if (calculated != expected) {
|
||||
if (expected != len && ng >= 0 && calculated >= 0 && calculated < expected) {
|
||||
// allow intrinsics to return early with a lower value,
|
||||
// but only if we're not expecting the full length (no
|
||||
// negative bytes)
|
||||
return;
|
||||
}
|
||||
throw new Exception("Failed testConstructBytes: " + "offset: " + off + " "
|
||||
+ "length: " + len + " " + "return: " + calculated + " expected: " + expected + " negatives: "
|
||||
+ ng + " offset: " + ngOffset);
|
||||
}
|
||||
}
|
||||
|
||||
private static int compress(byte[] src, int srcOff, byte[] dst, int dstOff, int len) {
|
||||
for (int i = 0; i < len; i++) {
|
||||
char c = Helper.charAt(src, srcOff);
|
||||
if (c > 0xff) {
|
||||
return i; // return index of non-latin1 char
|
||||
}
|
||||
dst[dstOff] = (byte)c;
|
||||
srcOff++;
|
||||
dstOff++;
|
||||
}
|
||||
return len;
|
||||
}
|
||||
|
||||
/**
|
||||
* Test different array segment sizes, offsets, and number of non-latin1
|
||||
* chars.
|
||||
*/
|
||||
public static void testConstructChars() throws Exception {
|
||||
for (int off = 0; off < 16; off++) { // starting offset of array segment
|
||||
// Test all array segment sizes 1-63
|
||||
for (int len = 1; len < 64; len++) {
|
||||
testConstructChars(off, len, 0, 0);
|
||||
testConstructChars(off, len, 1, 0);
|
||||
testConstructChars(off, len, RANDOM.nextInt(30) + 2, 0);
|
||||
}
|
||||
// Test a random selection of sizes between 64 and 4099, inclusive
|
||||
for (int i = 0; i < 20; i++) {
|
||||
int len = 64 + RANDOM.nextInt(4100 - 64);
|
||||
testConstructChars(off, len, 0, 0);
|
||||
testConstructChars(off, len, 1, 0);
|
||||
testConstructChars(off, len, RANDOM.nextInt(len) + 2, 0);
|
||||
}
|
||||
for (int len : new int[] { 128, 2048 }) {
|
||||
// test with negatives only in a 1-63 byte tail
|
||||
int tail = RANDOM.nextInt(63) + 1;
|
||||
int ng = RANDOM.nextInt(tail) + 1;
|
||||
testConstructChars(off, len + tail, ng, len);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private static void testConstructChars(int off, int len, int nonLatin1, int nlOffset) throws Exception {
|
||||
assert (len + off < bytes.length);
|
||||
initializeChars(off, len, nonLatin1, nlOffset);
|
||||
|
||||
int calculated = Helper.compress(chars, off, dst, 0, len);
|
||||
int expected = compress(chars, off, dst, 0, len);
|
||||
if (calculated != expected) {
|
||||
if (expected != len && nonLatin1 >= 0 && calculated >= 0 && calculated < expected) {
|
||||
// allow intrinsics to return early with a lower value,
|
||||
// but only if we're not expecting the full length (no
|
||||
// negative bytes)
|
||||
return;
|
||||
}
|
||||
throw new Exception("Failed testConstructChars: " + "offset: " + off + " "
|
||||
+ "length: " + len + " " + "return: " + calculated + " expected: " + expected + " non-latin1: "
|
||||
+ nonLatin1 + " offset: " + nlOffset);
|
||||
}
|
||||
}
|
||||
|
||||
private static int compress(char[] src, int srcOff, byte[] dst, int dstOff, int len) {
|
||||
for (int i = 0; i < len; i++) {
|
||||
char c = src[srcOff];
|
||||
if (c > 0xff) {
|
||||
return i; // return index of non-latin1 char
|
||||
}
|
||||
dst[dstOff] = (byte)c;
|
||||
srcOff++;
|
||||
dstOff++;
|
||||
}
|
||||
return len;
|
||||
}
|
||||
|
||||
public void run() throws Exception {
|
||||
// iterate to eventually get intrinsic inlined
|
||||
for (int j = 0; j < 200; ++j) {
|
||||
testConstructBytes();
|
||||
testConstructChars();
|
||||
}
|
||||
}
|
||||
|
||||
public static void main(String[] args) throws Exception {
|
||||
(new TestStringConstructionIntrinsics()).run();
|
||||
System.out.println("string construction intrinsics validated");
|
||||
}
|
||||
}
|
@ -44,6 +44,11 @@ public class Helper {
|
||||
return dst;
|
||||
}
|
||||
|
||||
@jdk.internal.vm.annotation.ForceInline
|
||||
public static int compress(byte[] src, int srcOff, byte[] dst, int dstOff, int len) {
|
||||
return StringUTF16.compress(src, srcOff, dst, dstOff, len);
|
||||
}
|
||||
|
||||
@jdk.internal.vm.annotation.ForceInline
|
||||
public static byte[] compressChar(char[] src, int srcOff, int dstSize, int dstOff, int len) {
|
||||
byte[] dst = new byte[dstSize];
|
||||
@ -51,6 +56,11 @@ public class Helper {
|
||||
return dst;
|
||||
}
|
||||
|
||||
@jdk.internal.vm.annotation.ForceInline
|
||||
public static int compress(char[] src, int srcOff, byte[] dst, int dstOff, int len) {
|
||||
return StringUTF16.compress(src, srcOff, dst, dstOff, len);
|
||||
}
|
||||
|
||||
@jdk.internal.vm.annotation.ForceInline
|
||||
public static byte[] inflateByte(byte[] src, int srcOff, int dstSize, int dstOff, int len) {
|
||||
byte[] dst = new byte[dstSize];
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) 2015, Oracle and/or its affiliates. All rights reserved.
|
||||
* Copyright (c) 2015, 2023, Oracle and/or its affiliates. All rights reserved.
|
||||
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
||||
*
|
||||
* This code is free software; you can redistribute it and/or modify it
|
||||
@ -22,10 +22,10 @@
|
||||
*/
|
||||
|
||||
/*
|
||||
@test
|
||||
@bug 8054307
|
||||
@summary test chars() and codePoints()
|
||||
*/
|
||||
* @test
|
||||
* @bug 8054307 8311906
|
||||
* @summary test String chars() and codePoints()
|
||||
*/
|
||||
|
||||
import java.util.Arrays;
|
||||
import java.util.Random;
|
||||
@ -44,6 +44,7 @@ public class Chars {
|
||||
cc[j] = (char)(ccExp[j] = cpExp[j] = r.nextInt(0x80));
|
||||
}
|
||||
testChars(cc, ccExp);
|
||||
testCharsSubrange(cc, ccExp);
|
||||
testCPs(cc, cpExp);
|
||||
|
||||
// bmp without surrogates
|
||||
@ -51,6 +52,7 @@ public class Chars {
|
||||
cc[j] = (char)(ccExp[j] = cpExp[j] = r.nextInt(0x8000));
|
||||
}
|
||||
testChars(cc, ccExp);
|
||||
testCharsSubrange(cc, ccExp);
|
||||
testCPs(cc, cpExp);
|
||||
|
||||
// bmp with surrogates
|
||||
@ -69,6 +71,7 @@ public class Chars {
|
||||
}
|
||||
cpExp = Arrays.copyOf(cpExp, k);
|
||||
testChars(cc, ccExp);
|
||||
testCharsSubrange(cc, ccExp);
|
||||
testCPs(cc, cpExp);
|
||||
}
|
||||
}
|
||||
@ -76,14 +79,35 @@ public class Chars {
|
||||
static void testChars(char[] cc, int[] expected) {
|
||||
String str = new String(cc);
|
||||
if (!Arrays.equals(expected, str.chars().toArray())) {
|
||||
throw new RuntimeException("chars/codePoints() failed!");
|
||||
throw new RuntimeException("testChars failed!");
|
||||
}
|
||||
}
|
||||
|
||||
static void testCharsSubrange(char[] cc, int[] expected) {
|
||||
int[] offsets = { 7, 31 }; // offsets to test
|
||||
int LENGTH = 13;
|
||||
for (int i = 0; i < offsets.length; i++) {
|
||||
int offset = Math.max(0, offsets[i]); // confine to the input array
|
||||
int count = Math.min(LENGTH, cc.length - offset);
|
||||
String str = new String(cc, offset, count);
|
||||
int[] actual = str.chars().toArray();
|
||||
int errOffset = Arrays.mismatch(actual, 0, actual.length,
|
||||
expected, offset, offset + count);
|
||||
if (errOffset >= 0) {
|
||||
System.err.printf("expected[%d] (%d) != actual[%d] (%d)%n",
|
||||
offset + errOffset, expected[offset + errOffset],
|
||||
errOffset, actual[errOffset]);
|
||||
System.err.println("expected: " + Arrays.toString(expected));
|
||||
System.err.println("actual: " + Arrays.toString(actual));
|
||||
throw new RuntimeException("testCharsSubrange failed!");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static void testCPs(char[] cc, int[] expected) {
|
||||
String str = new String(cc);
|
||||
if (!Arrays.equals(expected, str.codePoints().toArray())) {
|
||||
throw new RuntimeException("chars/codePoints() failed!");
|
||||
throw new RuntimeException("testCPs failed!");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
437
test/jdk/java/lang/String/StringRacyConstructor.java
Normal file
437
test/jdk/java/lang/String/StringRacyConstructor.java
Normal file
@ -0,0 +1,437 @@
|
||||
/*
|
||||
* Copyright (c) 2023, Oracle and/or its affiliates. All rights reserved.
|
||||
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
||||
*
|
||||
* This code is free software; you can redistribute it and/or modify it
|
||||
* under the terms of the GNU General Public License version 2 only, as
|
||||
* published by the Free Software Foundation.
|
||||
*
|
||||
* This code is distributed in the hope that it will be useful, but WITHOUT
|
||||
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
||||
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
|
||||
* version 2 for more details (a copy is included in the LICENSE file that
|
||||
* accompanied this code).
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License version
|
||||
* 2 along with this work; if not, write to the Free Software Foundation,
|
||||
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
*
|
||||
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
|
||||
* or visit www.oracle.com if you need additional information or have any
|
||||
* questions.
|
||||
*/
|
||||
|
||||
package test.java.lang.String;
|
||||
|
||||
import java.lang.reflect.Field;
|
||||
import java.lang.reflect.InvocationTargetException;
|
||||
import java.lang.reflect.Method;
|
||||
import java.util.Arrays;
|
||||
import java.util.ConcurrentModificationException;
|
||||
import java.util.List;
|
||||
|
||||
import org.junit.jupiter.api.Test;
|
||||
import org.junit.jupiter.api.condition.EnabledIf;
|
||||
import org.junit.jupiter.params.ParameterizedTest;
|
||||
import org.junit.jupiter.params.provider.MethodSource;
|
||||
import static org.junit.jupiter.api.Assertions.assertEquals;
|
||||
import static org.junit.jupiter.api.Assertions.assertTrue;
|
||||
import static org.junit.jupiter.api.Assertions.fail;
|
||||
|
||||
/*
|
||||
* @test
|
||||
* @bug 8311906
|
||||
* @modules java.base/java.lang:open
|
||||
* @summary check String's racy constructors
|
||||
* @run junit/othervm -XX:+CompactStrings test.java.lang.String.StringRacyConstructor
|
||||
* @run junit/othervm -XX:-CompactStrings test.java.lang.String.StringRacyConstructor
|
||||
*/
|
||||
|
||||
public class StringRacyConstructor {
|
||||
private static final byte LATIN1 = 0;
|
||||
private static final byte UTF16 = 1;
|
||||
|
||||
private static final Field STRING_CODER_FIELD;
|
||||
private static final Field SB_CODER_FIELD;
|
||||
private static final boolean COMPACT_STRINGS;
|
||||
|
||||
static {
|
||||
try {
|
||||
STRING_CODER_FIELD = String.class.getDeclaredField("coder");
|
||||
STRING_CODER_FIELD.setAccessible(true);
|
||||
SB_CODER_FIELD = Class.forName("java.lang.AbstractStringBuilder").getDeclaredField("coder");
|
||||
SB_CODER_FIELD.setAccessible(true);
|
||||
COMPACT_STRINGS = isCompactStrings();
|
||||
} catch (NoSuchFieldException ex ) {
|
||||
throw new ExceptionInInitializerError(ex);
|
||||
} catch (ClassNotFoundException e) {
|
||||
throw new RuntimeException(e);
|
||||
}
|
||||
}
|
||||
|
||||
/* {@return true iff CompactStrings are enabled}
|
||||
*/
|
||||
public static boolean isCompactStrings() {
|
||||
try {
|
||||
Field compactStringField = String.class.getDeclaredField("COMPACT_STRINGS");
|
||||
compactStringField.setAccessible(true);
|
||||
return compactStringField.getBoolean(null);
|
||||
} catch (NoSuchFieldException ex) {
|
||||
throw new ExceptionInInitializerError(ex);
|
||||
} catch (IllegalAccessException iae) {
|
||||
throw new AssertionError(iae);
|
||||
}
|
||||
}
|
||||
|
||||
// Return the coder for the String
|
||||
private static int coder(String s) {
|
||||
try {
|
||||
return STRING_CODER_FIELD.getByte(s);
|
||||
} catch (IllegalAccessException iae) {
|
||||
throw new AssertionError(iae);
|
||||
}
|
||||
}
|
||||
|
||||
// Return the coder for the StringBuilder
|
||||
private static int sbCoder(StringBuilder sb) {
|
||||
try {
|
||||
return SB_CODER_FIELD.getByte(sb);
|
||||
} catch (IllegalAccessException iae) {
|
||||
throw new AssertionError(iae);
|
||||
}
|
||||
}
|
||||
|
||||
// Return a summary of the internals of the String
|
||||
// The coder and indicate if the coder matches the string contents
|
||||
private static String inspectString(String s) {
|
||||
try {
|
||||
char[] chars = s.toCharArray();
|
||||
String r = new String(chars);
|
||||
|
||||
boolean invalidCoder = coder(s) != coder(r);
|
||||
String coder = STRING_CODER_FIELD.getByte(s) == 0 ? "isLatin1" : "utf16";
|
||||
return (invalidCoder ? "INVALID CODER" : "" ) + " \"" + s + "\", coder: " + coder;
|
||||
} catch (IllegalAccessException ex ) {
|
||||
return "EXCEPTION: " + ex.getMessage();
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* {@return true if the coder matches the presence/lack of UTF16 characters}
|
||||
* If it returns false, the coder and the contents have failed the precondition for string.
|
||||
* @param orig a string
|
||||
*/
|
||||
private static boolean validCoder(String orig) {
|
||||
if (!COMPACT_STRINGS) {
|
||||
assertEquals(UTF16, coder(orig), "Non-COMPACT STRINGS coder must be UTF16");
|
||||
}
|
||||
int accum = 0;
|
||||
for (int i = 0; i < orig.length(); i++)
|
||||
accum |= orig.charAt(i);
|
||||
byte expectedCoder = (accum < 256) ? LATIN1 : UTF16;
|
||||
return expectedCoder == coder(orig);
|
||||
}
|
||||
|
||||
// Check a StringBuilder for consistency of coder and latin1 vs UTF16
|
||||
private static boolean validCoder(StringBuilder orig) {
|
||||
int accum = 0;
|
||||
for (int i = 0; i < orig.length(); i++)
|
||||
accum |= orig.charAt(i);
|
||||
byte expectedCoder = (accum < 256) ? LATIN1 : UTF16;
|
||||
return expectedCoder == sbCoder(orig);
|
||||
}
|
||||
|
||||
@Test
|
||||
@EnabledIf("test.java.lang.String.StringRacyConstructor#isCompactStrings")
|
||||
public void checkStringRange() {
|
||||
char[] chars = {'a', 'b', 'c', 0xff21, 0xff22, 0xff23};
|
||||
String orig = new String(chars);
|
||||
char[] xx = orig.toCharArray();
|
||||
String stringFromChars = new String(xx);
|
||||
assertEquals(orig, stringFromChars, "mixed chars");
|
||||
assertTrue(validCoder(stringFromChars), "invalid coder"
|
||||
+ ", invalid coder: " + inspectString(stringFromChars));
|
||||
}
|
||||
|
||||
private static List<String> strings() {
|
||||
return List.of("01234", " ");
|
||||
}
|
||||
|
||||
@ParameterizedTest
|
||||
@MethodSource("strings")
|
||||
@EnabledIf("test.java.lang.String.StringRacyConstructor#isCompactStrings")
|
||||
public void racyString(String orig) {
|
||||
String racyString = racyStringConstruction(orig);
|
||||
// The contents are indeterminate due to the race
|
||||
assertTrue(validCoder(racyString), orig + " string invalid"
|
||||
+ ", racyString: " + inspectString(racyString));
|
||||
}
|
||||
|
||||
@ParameterizedTest
|
||||
@MethodSource("strings")
|
||||
@EnabledIf("test.java.lang.String.StringRacyConstructor#isCompactStrings")
|
||||
public void racyCodePoint(String orig) {
|
||||
String iffyString = racyStringConstructionCodepoints(orig);
|
||||
// The contents are indeterminate due to the race
|
||||
assertTrue(validCoder(iffyString), "invalid coder in non-deterministic string"
|
||||
+ ", orig:" + inspectString(orig)
|
||||
+ ", iffyString: " + inspectString(iffyString));
|
||||
}
|
||||
|
||||
@ParameterizedTest
|
||||
@MethodSource("strings")
|
||||
@EnabledIf("test.java.lang.String.StringRacyConstructor#isCompactStrings")
|
||||
public void racyCodePointSurrogates(String orig) {
|
||||
String iffyString = racyStringConstructionCodepointsSurrogates(orig);
|
||||
// The contents are indeterminate due to the race
|
||||
if (!orig.equals(iffyString))
|
||||
System.err.println("orig: " + orig + ", iffy: " + iffyString + Arrays.toString(iffyString.codePoints().toArray()));
|
||||
assertTrue(validCoder(iffyString), "invalid coder in non-deterministic string"
|
||||
+ ", orig:" + inspectString(orig)
|
||||
+ ", iffyString: " + inspectString(iffyString));
|
||||
}
|
||||
|
||||
// Test the private methods of StringUTF16 that compress and copy COMPRESSED_STRING
|
||||
// encoded byte arrays.
|
||||
@Test
|
||||
public void verifyUTF16CopyBytes()
|
||||
throws ClassNotFoundException, NoSuchMethodException, InvocationTargetException, IllegalAccessException {
|
||||
Class<?> stringUTF16 = Class.forName("java.lang.StringUTF16");
|
||||
Method mCompressChars = stringUTF16.getDeclaredMethod("compress",
|
||||
char[].class, int.class, byte[].class, int.class, int.class);
|
||||
mCompressChars.setAccessible(true);
|
||||
|
||||
// First warmup the intrinsic and check 1 case
|
||||
char[] chars = {'a', 'b', 'c', 0xff21, 0xff22, 0xff23};
|
||||
byte[] bytes = new byte[chars.length];
|
||||
int printWarningCount = 0;
|
||||
|
||||
for (int i = 0; i < 1_000_000; i++) { // repeat to get C2 to kick in
|
||||
// Copy only latin1 chars from UTF-16 converted prefix (3 chars -> 3 bytes)
|
||||
int intResult = (int) mCompressChars.invoke(null, chars, 0, bytes, 0, chars.length);
|
||||
if (intResult == 0) {
|
||||
if (printWarningCount == 0) {
|
||||
printWarningCount = 1;
|
||||
System.err.println("Intrinsic for StringUTF16.compress returned 0, may not have been updated.");
|
||||
}
|
||||
} else {
|
||||
assertEquals(3, intResult, "return length not-equal, iteration: " + i);
|
||||
}
|
||||
}
|
||||
|
||||
// Exhaustively check compress returning the correct index of the non-latin1 char.
|
||||
final int SIZE = 48;
|
||||
final byte FILL_BYTE = 'R';
|
||||
chars = new char[SIZE];
|
||||
bytes = new byte[chars.length];
|
||||
for (int i = 0; i < SIZE; i++) { // Every starting index
|
||||
for (int j = i; j < SIZE; j++) { // Every location of non-latin1
|
||||
Arrays.fill(chars, 'A');
|
||||
Arrays.fill(bytes, FILL_BYTE);
|
||||
chars[j] = 0xFF21;
|
||||
int intResult = (int) mCompressChars.invoke(null, chars, i, bytes, 0, chars.length - i);
|
||||
assertEquals(j - i, intResult, "compress found wrong index");
|
||||
assertEquals(FILL_BYTE, bytes[j], "extra character stored");
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
// Check that a concatenated "hello" has a valid coder
|
||||
@Test
|
||||
@EnabledIf("test.java.lang.String.StringRacyConstructor#isCompactStrings")
|
||||
public void checkConcatAndIntern() {
|
||||
var helloWorld = "hello world";
|
||||
String helloToo = racyStringConstruction("hell".concat("o"));
|
||||
String o = helloToo.intern();
|
||||
var hello = "hello";
|
||||
assertTrue(validCoder(helloToo), "startsWith: "
|
||||
+ ", hell: " + inspectString(helloToo)
|
||||
+ ", o: " + inspectString(o)
|
||||
+ ", hello: " + inspectString(hello)
|
||||
+ ", hello world: " + inspectString(helloWorld));
|
||||
}
|
||||
|
||||
// Check that an empty string with racy construction has a valid coder
|
||||
@Test
|
||||
@EnabledIf("test.java.lang.String.StringRacyConstructor#isCompactStrings")
|
||||
public void racyEmptyString() {
|
||||
var space = racyStringConstruction(" ");
|
||||
var trimmed = space.trim();
|
||||
assertTrue(validCoder(trimmed), "empty string invalid coder"
|
||||
+ ", trimmed: " + inspectString(trimmed));
|
||||
}
|
||||
|
||||
// Check that an exception in a user implemented CharSequence doesn't result in
|
||||
// an invalid coder when appended to a StringBuilder
|
||||
@Test
|
||||
@EnabledIf("test.java.lang.String.StringRacyConstructor#isCompactStrings")
|
||||
void charSequenceException() {
|
||||
ThrowingCharSequence bs = new ThrowingCharSequence("A\u2030\uFFFD");
|
||||
var sb = new StringBuilder();
|
||||
try {
|
||||
sb.append(bs);
|
||||
fail("An IllegalArgumentException should have been thrown");
|
||||
} catch (IllegalArgumentException ex) {
|
||||
// ignore expected
|
||||
}
|
||||
assertTrue(validCoder(sb), "invalid coder in StringBuilder");
|
||||
}
|
||||
|
||||
/**
|
||||
* Given a latin-1 String, attempt to create a copy that is
|
||||
* incorrectly encoded as UTF-16.
|
||||
*/
|
||||
public static String racyStringConstruction(String original) throws ConcurrentModificationException {
|
||||
if (original.chars().max().getAsInt() >= 256) {
|
||||
throw new IllegalArgumentException(
|
||||
"Only work with latin-1 Strings");
|
||||
}
|
||||
|
||||
char[] chars = original.toCharArray();
|
||||
|
||||
// In another thread, flip the first character back
|
||||
// and forth between being latin-1 or not
|
||||
Thread thread = new Thread(() -> {
|
||||
while (!Thread.interrupted()) {
|
||||
chars[0] ^= 256;
|
||||
}
|
||||
});
|
||||
thread.start();
|
||||
|
||||
// at the same time call the String constructor,
|
||||
// until we hit the race condition
|
||||
int i = 0;
|
||||
while (true) {
|
||||
i++;
|
||||
String s = new String(chars);
|
||||
if ((s.charAt(0) < 256 && !original.equals(s)) || i > 1_000_000) {
|
||||
thread.interrupt();
|
||||
try {
|
||||
thread.join();
|
||||
} catch (InterruptedException ie) {
|
||||
// ignore interrupt
|
||||
}
|
||||
return s;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Given a latin-1 String, creates a copy that is
|
||||
* incorrectly encoded as UTF-16 using the APIs for Codepoints.
|
||||
*/
|
||||
public static String racyStringConstructionCodepoints(String original) throws ConcurrentModificationException {
|
||||
if (original.chars().max().getAsInt() >= 256) {
|
||||
throw new IllegalArgumentException(
|
||||
"Can only work with latin-1 Strings");
|
||||
}
|
||||
|
||||
int len = original.length();
|
||||
int[] codePoints = new int[len];
|
||||
for (int i = 0; i < len; i++) {
|
||||
codePoints[i] = original.charAt(i);
|
||||
}
|
||||
|
||||
// In another thread, flip the first character back
|
||||
// and forth between being latin-1 or not
|
||||
Thread thread = new Thread(() -> {
|
||||
while (!Thread.interrupted()) {
|
||||
codePoints[0] ^= 256;
|
||||
}
|
||||
});
|
||||
thread.start();
|
||||
|
||||
// at the same time call the String constructor,
|
||||
// until we hit the race condition
|
||||
int i = 0;
|
||||
while (true) {
|
||||
i++;
|
||||
String s = new String(codePoints, 0, len);
|
||||
if ((s.charAt(0) < 256 && !original.equals(s)) || i > 1_000_000) {
|
||||
thread.interrupt();
|
||||
try {
|
||||
thread.join();
|
||||
} catch (InterruptedException ie) {
|
||||
// ignore interrupt
|
||||
}
|
||||
return s;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns a string created from a codepoint array that has been racily
|
||||
* modified to contain high and low surrogates. The string is a different length
|
||||
* than the original due to the surrogate encoding.
|
||||
*/
|
||||
public static String racyStringConstructionCodepointsSurrogates(String original) throws ConcurrentModificationException {
|
||||
if (original.chars().max().getAsInt() >= 256) {
|
||||
throw new IllegalArgumentException(
|
||||
"Can only work with latin-1 Strings");
|
||||
}
|
||||
|
||||
int len = original.length();
|
||||
int[] codePoints = new int[len];
|
||||
for (int i = 0; i < len; i++) {
|
||||
codePoints[i] = original.charAt(i);
|
||||
}
|
||||
|
||||
// In another thread, flip the first character back
|
||||
// and forth between being latin-1 or as a surrogate pair.
|
||||
Thread thread = new Thread(() -> {
|
||||
while (!Thread.interrupted()) {
|
||||
codePoints[0] ^= 0x10000;
|
||||
}
|
||||
});
|
||||
thread.start();
|
||||
|
||||
// at the same time call the String constructor,
|
||||
// until we hit the race condition
|
||||
int i = 0;
|
||||
while (true) {
|
||||
i++;
|
||||
String s = new String(codePoints, 0, len);
|
||||
if ((s.length() != original.length()) || i > 1_000_000) {
|
||||
thread.interrupt();
|
||||
try {
|
||||
thread.join();
|
||||
} catch (InterruptedException ie) {
|
||||
// ignore interrupt
|
||||
}
|
||||
return s;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// A CharSequence that returns characters from a string and throws IllegalArgumentException
|
||||
// when the character requested is 0xFFFD (the replacement character)
|
||||
// The string contents determine when the exception is thrown.
|
||||
static class ThrowingCharSequence implements CharSequence {
|
||||
private final String aString;
|
||||
|
||||
ThrowingCharSequence(String aString) {
|
||||
this.aString = aString;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int length() {
|
||||
return aString.length();
|
||||
}
|
||||
|
||||
@Override
|
||||
public char charAt(int index) {
|
||||
char ch = aString.charAt(index);
|
||||
if (ch == 0xFFFD) {
|
||||
throw new IllegalArgumentException("Replacement character at index " + index);
|
||||
}
|
||||
return ch;
|
||||
}
|
||||
|
||||
@Override
|
||||
// Not used; returns the entire string
|
||||
public CharSequence subSequence(int start, int end) {
|
||||
return this;
|
||||
}
|
||||
}
|
||||
}
|
@ -21,11 +21,13 @@
|
||||
* questions.
|
||||
*/
|
||||
|
||||
package micro.org.openjdk.bench.java.lang;
|
||||
package org.openjdk.bench.java.lang;
|
||||
|
||||
import org.openjdk.jmh.annotations.*;
|
||||
import org.openjdk.jmh.infra.Blackhole;
|
||||
|
||||
import java.nio.charset.StandardCharsets;
|
||||
import java.util.Arrays;
|
||||
import java.util.concurrent.TimeUnit;
|
||||
|
||||
@State(Scope.Thread)
|
||||
@ -36,45 +38,115 @@ import java.util.concurrent.TimeUnit;
|
||||
@Fork(3)
|
||||
public class StringConstructor {
|
||||
|
||||
@Param({"7", "64"})
|
||||
public int size;
|
||||
private static final char INTEROBANG = 0x2030;
|
||||
|
||||
// Offset to use for ranged newStrings
|
||||
@Param("1")
|
||||
public int offset;
|
||||
private byte[] array;
|
||||
// Fixed offset to use for ranged newStrings
|
||||
public final int offset = 1;
|
||||
|
||||
@Setup
|
||||
public void setup() {
|
||||
if (offset > size) {
|
||||
offset = size;
|
||||
}
|
||||
array = "a".repeat(size).getBytes(StandardCharsets.UTF_8);
|
||||
}
|
||||
@Param({"7", "64"})
|
||||
public int size;
|
||||
|
||||
@Benchmark
|
||||
public String newStringFromArray() {
|
||||
return new String(array);
|
||||
}
|
||||
private byte[] array;
|
||||
private char[] chars;
|
||||
private char[] charsMixedBegin;
|
||||
private char[] charsMixedSmall;
|
||||
private char[] charsMixedEnd;
|
||||
private int[] codePointsLatin1;
|
||||
private int[] codePointsMixedBegin;
|
||||
private int[] codePointsMixedSmall;
|
||||
|
||||
@Benchmark
|
||||
public String newStringFromArrayWithCharset() {
|
||||
return new String(array, StandardCharsets.UTF_8);
|
||||
}
|
||||
private static int[] intCopyOfChars(char[] chars, int newLength) {
|
||||
int[] res = new int[newLength];
|
||||
for (int i = 0; i < Math.min(chars.length, newLength); i++)
|
||||
res[i] = chars[i];
|
||||
return res;
|
||||
}
|
||||
|
||||
@Benchmark
|
||||
public String newStringFromArrayWithCharsetName() throws Exception {
|
||||
return new String(array, StandardCharsets.UTF_8.name());
|
||||
}
|
||||
@Setup
|
||||
public void setup() {
|
||||
String s = "a".repeat(size);
|
||||
array = s.getBytes(StandardCharsets.UTF_8);
|
||||
chars = s.toCharArray();
|
||||
charsMixedBegin = Arrays.copyOf(chars, array.length);
|
||||
charsMixedBegin[0] = INTEROBANG;
|
||||
charsMixedSmall = Arrays.copyOf(chars, array.length);
|
||||
charsMixedSmall[Math.min(charsMixedSmall.length - 1, 7)] = INTEROBANG;
|
||||
charsMixedEnd = new char[size + 7];
|
||||
Arrays.fill(charsMixedEnd, 'a');
|
||||
charsMixedEnd[charsMixedEnd.length - 1] = INTEROBANG;
|
||||
|
||||
@Benchmark
|
||||
public String newStringFromRangedArray() {
|
||||
return new String(array, offset, array.length - offset);
|
||||
}
|
||||
codePointsLatin1 = intCopyOfChars(chars, array.length);
|
||||
codePointsMixedBegin = intCopyOfChars(chars, array.length);
|
||||
codePointsMixedBegin[0] = INTEROBANG;
|
||||
codePointsMixedSmall = intCopyOfChars(chars, array.length);
|
||||
codePointsMixedSmall[Math.min(codePointsMixedSmall.length - 1, 7)] = INTEROBANG;
|
||||
}
|
||||
|
||||
@Benchmark
|
||||
public String newStringFromRangedArrayWithCharset() {
|
||||
return new String(array, offset, array.length - offset, StandardCharsets.UTF_8);
|
||||
}
|
||||
@Benchmark
|
||||
public String newStringFromBytes() {
|
||||
return new String(array);
|
||||
}
|
||||
|
||||
@Benchmark
|
||||
public String newStringFromBytesRanged() {
|
||||
return new String(array, offset, array.length - offset);
|
||||
}
|
||||
|
||||
@Benchmark
|
||||
public String newStringFromBytesRangedWithCharsetUTF8() {
|
||||
return new String(array, offset, array.length - offset, StandardCharsets.UTF_8);
|
||||
}
|
||||
|
||||
@Benchmark
|
||||
public String newStringFromBytesWithCharsetUTF8() {
|
||||
return new String(array, StandardCharsets.UTF_8);
|
||||
}
|
||||
|
||||
@Benchmark
|
||||
public String newStringFromBytesWithCharsetNameUTF8() throws Exception {
|
||||
return new String(array, StandardCharsets.UTF_8.name());
|
||||
}
|
||||
|
||||
@Benchmark
|
||||
public String newStringFromCharsLatin1() {
|
||||
return new String(chars);
|
||||
}
|
||||
|
||||
@Benchmark
|
||||
public String newStringFromCharsMixedBegin() {
|
||||
return new String(charsMixedBegin);
|
||||
}
|
||||
|
||||
@Benchmark
|
||||
public String newStringFromCharsMixedSmall() {
|
||||
return new String(charsMixedSmall);
|
||||
}
|
||||
|
||||
@Benchmark
|
||||
public String newStringFromCharsMixedEnd() {
|
||||
return new String(charsMixedEnd);
|
||||
}
|
||||
|
||||
@Benchmark
|
||||
@CompilerControl(CompilerControl.Mode.DONT_INLINE)
|
||||
public void newStringFromCharsMixedAll(Blackhole bh) {
|
||||
bh.consume(new String(charsMixedBegin));
|
||||
bh.consume(new String(charsMixedSmall));
|
||||
bh.consume(new String(chars));
|
||||
}
|
||||
|
||||
@Benchmark
|
||||
public String newStringFromCodePointRangedLatin1() {
|
||||
return new String(codePointsLatin1, 0, codePointsLatin1.length);
|
||||
}
|
||||
|
||||
@Benchmark
|
||||
public String newStringFromCodePointRangedMixedBegin() {
|
||||
return new String(codePointsMixedBegin, 0, codePointsMixedBegin.length);
|
||||
}
|
||||
|
||||
@Benchmark
|
||||
public String newStringFromCodePointRangedMixedSmall() {
|
||||
return new String(codePointsMixedSmall, 0, codePointsMixedSmall.length);
|
||||
}
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user