8259842: Remove Result cache from StringCoding

Reviewed-by: naoto, plevart, rriggs
This commit is contained in:
Claes Redestad 2021-01-22 11:27:13 +00:00
parent d066f2b06c
commit 58ceb25443
5 changed files with 1000 additions and 1122 deletions

View File

@ -1,5 +1,5 @@
/*
* Copyright (c) 1994, 2020, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 1994, 2021, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@ -31,7 +31,9 @@ import java.lang.annotation.Native;
import java.lang.invoke.MethodHandles;
import java.lang.constant.Constable;
import java.lang.constant.ConstantDesc;
import java.nio.charset.Charset;
import java.nio.ByteBuffer;
import java.nio.CharBuffer;
import java.nio.charset.*;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Comparator;
@ -51,8 +53,12 @@ import java.util.stream.Stream;
import java.util.stream.StreamSupport;
import jdk.internal.vm.annotation.IntrinsicCandidate;
import jdk.internal.vm.annotation.Stable;
import sun.nio.cs.ArrayDecoder;
import sun.nio.cs.ArrayEncoder;
import static java.util.function.Predicate.not;
import sun.nio.cs.ISO_8859_1;
import sun.nio.cs.US_ASCII;
import sun.nio.cs.UTF_8;
/**
* The {@code String} class represents character strings. All
@ -475,15 +481,9 @@ public final class String
*
* @since 1.1
*/
public String(byte bytes[], int offset, int length, String charsetName)
public String(byte[] bytes, int offset, int length, String charsetName)
throws UnsupportedEncodingException {
if (charsetName == null)
throw new NullPointerException("charsetName");
checkBoundsOffCount(offset, length, bytes.length);
StringCoding.Result ret =
StringCoding.decode(charsetName, bytes, offset, length);
this.value = ret.value;
this.coder = ret.coder;
this(bytes, offset, length, lookupCharset(charsetName));
}
/**
@ -516,14 +516,793 @@ public final class String
*
* @since 1.6
*/
public String(byte bytes[], int offset, int length, Charset charset) {
if (charset == null)
throw new NullPointerException("charset");
public String(byte[] bytes, int offset, int length, Charset charset) {
Objects.requireNonNull(charset);
checkBoundsOffCount(offset, length, bytes.length);
StringCoding.Result ret =
StringCoding.decode(charset, bytes, offset, length);
this.value = ret.value;
this.coder = ret.coder;
if (length == 0) {
this.value = "".value;
this.coder = "".coder;
} else if (charset == UTF_8.INSTANCE) {
if (COMPACT_STRINGS && !StringCoding.hasNegatives(bytes, offset, length)) {
this.value = Arrays.copyOfRange(bytes, offset, offset + length);
this.coder = LATIN1;
} else {
int sl = offset + length;
int dp = 0;
byte[] dst = null;
if (COMPACT_STRINGS) {
dst = new byte[length];
while (offset < sl) {
int b1 = bytes[offset];
if (b1 >= 0) {
dst[dp++] = (byte)b1;
offset++;
continue;
}
if ((b1 == (byte)0xc2 || b1 == (byte)0xc3) &&
offset + 1 < sl) {
int b2 = bytes[offset + 1];
if (!isNotContinuation(b2)) {
dst[dp++] = (byte)decode2(b1, b2);
offset += 2;
continue;
}
}
// anything not a latin1, including the repl
// we have to go with the utf16
break;
}
if (offset == sl) {
if (dp != dst.length) {
dst = Arrays.copyOf(dst, dp);
}
this.value = dst;
this.coder = LATIN1;
return;
}
}
if (dp == 0 || dst == null) {
dst = new byte[length << 1];
} else {
byte[] buf = new byte[length << 1];
StringLatin1.inflate(dst, 0, buf, 0, dp);
dst = buf;
}
dp = decodeUTF8_UTF16(bytes, offset, sl, dst, dp, true);
if (dp != length) {
dst = Arrays.copyOf(dst, dp << 1);
}
this.value = dst;
this.coder = UTF16;
}
} else if (charset == ISO_8859_1.INSTANCE) {
if (COMPACT_STRINGS) {
this.value = Arrays.copyOfRange(bytes, offset, offset + length);
this.coder = LATIN1;
} else {
this.value = StringLatin1.inflate(bytes, offset, length);
this.coder = UTF16;
}
} else if (charset == US_ASCII.INSTANCE) {
if (COMPACT_STRINGS && !StringCoding.hasNegatives(bytes, offset, length)) {
this.value = Arrays.copyOfRange(bytes, offset, offset + length);
this.coder = LATIN1;
} else {
byte[] dst = new byte[length << 1];
int dp = 0;
while (dp < length) {
int b = bytes[offset++];
StringUTF16.putChar(dst, dp++, (b >= 0) ? (char) b : REPL);
}
this.value = dst;
this.coder = UTF16;
}
} else {
// (1)We never cache the "external" cs, the only benefit of creating
// an additional StringDe/Encoder object to wrap it is to share the
// de/encode() method. These SD/E objects are short-lived, the young-gen
// gc should be able to take care of them well. But the best approach
// is still not to generate them if not really necessary.
// (2)The defensive copy of the input byte/char[] has a big performance
// impact, as well as the outgoing result byte/char[]. Need to do the
// optimization check of (sm==null && classLoader0==null) for both.
CharsetDecoder cd = charset.newDecoder();
// ArrayDecoder fastpaths
if (cd instanceof ArrayDecoder ad) {
// ascii
if (ad.isASCIICompatible() && !StringCoding.hasNegatives(bytes, offset, length)) {
if (COMPACT_STRINGS) {
this.value = Arrays.copyOfRange(bytes, offset, offset + length);
this.coder = LATIN1;
return;
}
this.value = StringLatin1.inflate(bytes, offset, length);
this.coder = UTF16;
return;
}
// fastpath for always Latin1 decodable single byte
if (COMPACT_STRINGS && ad.isLatin1Decodable()) {
byte[] dst = new byte[length];
ad.decodeToLatin1(bytes, offset, length, dst);
this.value = dst;
this.coder = LATIN1;
return;
}
int en = scale(length, cd.maxCharsPerByte());
cd.onMalformedInput(CodingErrorAction.REPLACE)
.onUnmappableCharacter(CodingErrorAction.REPLACE);
char[] ca = new char[en];
int clen = ad.decode(bytes, offset, length, ca);
if (COMPACT_STRINGS) {
byte[] bs = StringUTF16.compress(ca, 0, clen);
if (bs != null) {
value = bs;
coder = LATIN1;
return;
}
}
coder = UTF16;
value = StringUTF16.toBytes(ca, 0, clen);
return;
}
// decode using CharsetDecoder
int en = scale(length, cd.maxCharsPerByte());
cd.onMalformedInput(CodingErrorAction.REPLACE)
.onUnmappableCharacter(CodingErrorAction.REPLACE);
char[] ca = new char[en];
if (charset.getClass().getClassLoader0() != null &&
System.getSecurityManager() != null) {
bytes = Arrays.copyOfRange(bytes, offset, offset + length);
offset = 0;
}
int caLen = decodeWithDecoder(cd, ca, bytes, offset, length);
if (COMPACT_STRINGS) {
byte[] bs = StringUTF16.compress(ca, 0, caLen);
if (bs != null) {
value = bs;
coder = LATIN1;
return;
}
}
coder = UTF16;
value = StringUTF16.toBytes(ca, 0, caLen);
}
}
/*
* Throws iae, instead of replacing, if malformed or unmappable.
*/
static String newStringUTF8NoRepl(byte[] bytes, int offset, int length) {
checkBoundsOffCount(offset, length, bytes.length);
if (length == 0) {
return "";
}
if (COMPACT_STRINGS && !StringCoding.hasNegatives(bytes, offset, length)) {
return new String(Arrays.copyOfRange(bytes, offset, offset + length), LATIN1);
} else {
int sl = offset + length;
int dp = 0;
byte[] dst = null;
if (COMPACT_STRINGS) {
dst = new byte[length];
while (offset < sl) {
int b1 = bytes[offset];
if (b1 >= 0) {
dst[dp++] = (byte) b1;
offset++;
continue;
}
if ((b1 == (byte) 0xc2 || b1 == (byte) 0xc3) &&
offset + 1 < sl) {
int b2 = bytes[offset + 1];
if (!isNotContinuation(b2)) {
dst[dp++] = (byte) decode2(b1, b2);
offset += 2;
continue;
}
}
// anything not a latin1, including the REPL
// we have to go with the utf16
break;
}
if (offset == sl) {
if (dp != dst.length) {
dst = Arrays.copyOf(dst, dp);
}
return new String(dst, LATIN1);
}
}
if (dp == 0 || dst == null) {
dst = new byte[length << 1];
} else {
byte[] buf = new byte[length << 1];
StringLatin1.inflate(dst, 0, buf, 0, dp);
dst = buf;
}
dp = decodeUTF8_UTF16(bytes, offset, sl, dst, dp, false);
if (dp != length) {
dst = Arrays.copyOf(dst, dp << 1);
}
return new String(dst, UTF16);
}
}
static String newStringNoRepl(byte[] src, Charset cs) throws CharacterCodingException {
try {
return newStringNoRepl1(src, cs);
} catch (IllegalArgumentException e) {
//newStringNoRepl1 throws IAE with MalformedInputException or CCE as the cause
Throwable cause = e.getCause();
if (cause instanceof MalformedInputException mie) {
throw mie;
}
throw (CharacterCodingException)cause;
}
}
private static String newStringNoRepl1(byte[] src, Charset cs) {
int len = src.length;
if (len == 0) {
return "";
}
if (cs == UTF_8.INSTANCE) {
return newStringUTF8NoRepl(src, 0, src.length);
}
if (cs == ISO_8859_1.INSTANCE) {
if (COMPACT_STRINGS)
return new String(src, LATIN1);
return new String(StringLatin1.inflate(src, 0, src.length), UTF16);
}
if (cs == US_ASCII.INSTANCE) {
if (!StringCoding.hasNegatives(src, 0, src.length)) {
if (COMPACT_STRINGS)
return new String(src, LATIN1);
return new String(StringLatin1.inflate(src, 0, src.length), UTF16);
} else {
throwMalformed(src);
}
}
CharsetDecoder cd = cs.newDecoder();
// ascii fastpath
if (cd instanceof ArrayDecoder ad &&
ad.isASCIICompatible() &&
!StringCoding.hasNegatives(src, 0, src.length)) {
return new String(src, 0, src.length, ISO_8859_1.INSTANCE);
}
int en = scale(len, cd.maxCharsPerByte());
char[] ca = new char[en];
if (cs.getClass().getClassLoader0() != null &&
System.getSecurityManager() != null) {
src = Arrays.copyOf(src, len);
}
int caLen = decodeWithDecoder(cd, ca, src, 0, src.length);
if (COMPACT_STRINGS) {
byte[] bs = StringUTF16.compress(ca, 0, caLen);
if (bs != null) {
return new String(bs, LATIN1);
}
}
return new String(StringUTF16.toBytes(ca, 0, caLen), UTF16);
}
private static final char REPL = '\ufffd';
// Trim the given byte array to the given length
private static byte[] safeTrim(byte[] ba, int len, boolean isTrusted) {
if (len == ba.length && (isTrusted || System.getSecurityManager() == null)) {
return ba;
} else {
return Arrays.copyOf(ba, len);
}
}
private static int scale(int len, float expansionFactor) {
// We need to perform double, not float, arithmetic; otherwise
// we lose low order bits when len is larger than 2**24.
return (int)(len * (double)expansionFactor);
}
private static Charset lookupCharset(String csn) throws UnsupportedEncodingException {
Objects.requireNonNull(csn);
try {
return Charset.forName(csn);
} catch (UnsupportedCharsetException | IllegalCharsetNameException x) {
throw new UnsupportedEncodingException(csn);
}
}
private static byte[] encode(Charset cs, byte coder, byte[] val) {
if (cs == UTF_8.INSTANCE) {
return encodeUTF8(coder, val, true);
}
if (cs == ISO_8859_1.INSTANCE) {
return encode8859_1(coder, val);
}
if (cs == US_ASCII.INSTANCE) {
return encodeASCII(coder, val);
}
return encodeWithEncoder(cs, coder, val, true);
}
private static byte[] encodeWithEncoder(Charset cs, byte coder, byte[] val, boolean doReplace) {
CharsetEncoder ce = cs.newEncoder();
int len = val.length >> coder; // assume LATIN1=0/UTF16=1;
int en = scale(len, ce.maxBytesPerChar());
if (ce instanceof ArrayEncoder ae) {
// fastpath for ascii compatible
if (coder == LATIN1 &&
ae.isASCIICompatible() &&
!StringCoding.hasNegatives(val, 0, val.length)) {
return Arrays.copyOf(val, val.length);
}
byte[] ba = new byte[en];
if (len == 0) {
return ba;
}
if (doReplace) {
ce.onMalformedInput(CodingErrorAction.REPLACE)
.onUnmappableCharacter(CodingErrorAction.REPLACE);
}
int blen = (coder == LATIN1) ? ae.encodeFromLatin1(val, 0, len, ba)
: ae.encodeFromUTF16(val, 0, len, ba);
if (blen != -1) {
return safeTrim(ba, blen, true);
}
}
byte[] ba = new byte[en];
if (len == 0) {
return ba;
}
if (doReplace) {
ce.onMalformedInput(CodingErrorAction.REPLACE)
.onUnmappableCharacter(CodingErrorAction.REPLACE);
}
char[] ca = (coder == LATIN1 ) ? StringLatin1.toChars(val)
: StringUTF16.toChars(val);
ByteBuffer bb = ByteBuffer.wrap(ba);
CharBuffer cb = CharBuffer.wrap(ca, 0, len);
try {
CoderResult cr = ce.encode(cb, bb, true);
if (!cr.isUnderflow())
cr.throwException();
cr = ce.flush(bb);
if (!cr.isUnderflow())
cr.throwException();
} catch (CharacterCodingException x) {
if (!doReplace) {
throw new IllegalArgumentException(x);
} else {
throw new Error(x);
}
}
return safeTrim(ba, bb.position(), cs.getClass().getClassLoader0() == null);
}
/*
* Throws iae, instead of replacing, if unmappable.
*/
static byte[] getBytesUTF8NoRepl(String s) {
return encodeUTF8(s.coder(), s.value(), false);
}
private static boolean isASCII(byte[] src) {
return !StringCoding.hasNegatives(src, 0, src.length);
}
/*
* Throws CCE, instead of replacing, if unmappable.
*/
static byte[] getBytesNoRepl(String s, Charset cs) throws CharacterCodingException {
try {
return getBytesNoRepl1(s, cs);
} catch (IllegalArgumentException e) {
//getBytesNoRepl1 throws IAE with UnmappableCharacterException or CCE as the cause
Throwable cause = e.getCause();
if (cause instanceof UnmappableCharacterException) {
throw (UnmappableCharacterException)cause;
}
throw (CharacterCodingException)cause;
}
}
private static byte[] getBytesNoRepl1(String s, Charset cs) {
byte[] val = s.value();
byte coder = s.coder();
if (cs == UTF_8.INSTANCE) {
if (coder == LATIN1 && isASCII(val)) {
return val;
}
return encodeUTF8(coder, val, false);
}
if (cs == ISO_8859_1.INSTANCE) {
if (coder == LATIN1) {
return val;
}
return encode8859_1(coder, val, false);
}
if (cs == US_ASCII.INSTANCE) {
if (coder == LATIN1) {
if (isASCII(val)) {
return val;
} else {
throwUnmappable(val);
}
}
}
return encodeWithEncoder(cs, coder, val, false);
}
private static byte[] encodeASCII(byte coder, byte[] val) {
if (coder == LATIN1) {
byte[] dst = Arrays.copyOf(val, val.length);
for (int i = 0; i < dst.length; i++) {
if (dst[i] < 0) {
dst[i] = '?';
}
}
return dst;
}
int len = val.length >> 1;
byte[] dst = new byte[len];
int dp = 0;
for (int i = 0; i < len; i++) {
char c = StringUTF16.getChar(val, i);
if (c < 0x80) {
dst[dp++] = (byte)c;
continue;
}
if (Character.isHighSurrogate(c) && i + 1 < len &&
Character.isLowSurrogate(StringUTF16.getChar(val, i + 1))) {
i++;
}
dst[dp++] = '?';
}
if (len == dp) {
return dst;
}
return Arrays.copyOf(dst, dp);
}
private static byte[] encode8859_1(byte coder, byte[] val) {
return encode8859_1(coder, val, true);
}
private static byte[] encode8859_1(byte coder, byte[] val, boolean doReplace) {
if (coder == LATIN1) {
return Arrays.copyOf(val, val.length);
}
int len = val.length >> 1;
byte[] dst = new byte[len];
int dp = 0;
int sp = 0;
int sl = len;
while (sp < sl) {
int ret = StringCoding.implEncodeISOArray(val, sp, dst, dp, len);
sp = sp + ret;
dp = dp + ret;
if (ret != len) {
if (!doReplace) {
throwUnmappable(sp);
}
char c = StringUTF16.getChar(val, sp++);
if (Character.isHighSurrogate(c) && sp < sl &&
Character.isLowSurrogate(StringUTF16.getChar(val, sp))) {
sp++;
}
dst[dp++] = '?';
len = sl - sp;
}
}
if (dp == dst.length) {
return dst;
}
return Arrays.copyOf(dst, dp);
}
//////////////////////////////// utf8 ////////////////////////////////////
private static boolean isNotContinuation(int b) {
return (b & 0xc0) != 0x80;
}
private static boolean isMalformed3(int b1, int b2, int b3) {
return (b1 == (byte)0xe0 && (b2 & 0xe0) == 0x80) ||
(b2 & 0xc0) != 0x80 || (b3 & 0xc0) != 0x80;
}
private static boolean isMalformed3_2(int b1, int b2) {
return (b1 == (byte)0xe0 && (b2 & 0xe0) == 0x80) ||
(b2 & 0xc0) != 0x80;
}
private static boolean isMalformed4(int b2, int b3, int b4) {
return (b2 & 0xc0) != 0x80 || (b3 & 0xc0) != 0x80 ||
(b4 & 0xc0) != 0x80;
}
private static boolean isMalformed4_2(int b1, int b2) {
return (b1 == 0xf0 && (b2 < 0x90 || b2 > 0xbf)) ||
(b1 == 0xf4 && (b2 & 0xf0) != 0x80) ||
(b2 & 0xc0) != 0x80;
}
private static boolean isMalformed4_3(int b3) {
return (b3 & 0xc0) != 0x80;
}
private static char decode2(int b1, int b2) {
return (char)(((b1 << 6) ^ b2) ^
(((byte) 0xC0 << 6) ^
((byte) 0x80 << 0)));
}
private static char decode3(int b1, int b2, int b3) {
return (char)((b1 << 12) ^
(b2 << 6) ^
(b3 ^
(((byte) 0xE0 << 12) ^
((byte) 0x80 << 6) ^
((byte) 0x80 << 0))));
}
private static int decode4(int b1, int b2, int b3, int b4) {
return ((b1 << 18) ^
(b2 << 12) ^
(b3 << 6) ^
(b4 ^
(((byte) 0xF0 << 18) ^
((byte) 0x80 << 12) ^
((byte) 0x80 << 6) ^
((byte) 0x80 << 0))));
}
private static int decodeUTF8_UTF16(byte[] src, int sp, int sl, byte[] dst, int dp, boolean doReplace) {
while (sp < sl) {
int b1 = src[sp++];
if (b1 >= 0) {
StringUTF16.putChar(dst, dp++, (char) b1);
} else if ((b1 >> 5) == -2 && (b1 & 0x1e) != 0) {
if (sp < sl) {
int b2 = src[sp++];
if (isNotContinuation(b2)) {
if (!doReplace) {
throwMalformed(sp - 1, 1);
}
StringUTF16.putChar(dst, dp++, REPL);
sp--;
} else {
StringUTF16.putChar(dst, dp++, decode2(b1, b2));
}
continue;
}
if (!doReplace) {
throwMalformed(sp, 1); // underflow()
}
StringUTF16.putChar(dst, dp++, REPL);
break;
} else if ((b1 >> 4) == -2) {
if (sp + 1 < sl) {
int b2 = src[sp++];
int b3 = src[sp++];
if (isMalformed3(b1, b2, b3)) {
if (!doReplace) {
throwMalformed(sp - 3, 3);
}
StringUTF16.putChar(dst, dp++, REPL);
sp -= 3;
sp += malformed3(src, sp);
} else {
char c = decode3(b1, b2, b3);
if (Character.isSurrogate(c)) {
if (!doReplace) {
throwMalformed(sp - 3, 3);
}
StringUTF16.putChar(dst, dp++, REPL);
} else {
StringUTF16.putChar(dst, dp++, c);
}
}
continue;
}
if (sp < sl && isMalformed3_2(b1, src[sp])) {
if (!doReplace) {
throwMalformed(sp - 1, 2);
}
StringUTF16.putChar(dst, dp++, REPL);
continue;
}
if (!doReplace) {
throwMalformed(sp, 1);
}
StringUTF16.putChar(dst, dp++, REPL);
break;
} else if ((b1 >> 3) == -2) {
if (sp + 2 < sl) {
int b2 = src[sp++];
int b3 = src[sp++];
int b4 = src[sp++];
int uc = decode4(b1, b2, b3, b4);
if (isMalformed4(b2, b3, b4) ||
!Character.isSupplementaryCodePoint(uc)) { // shortest form check
if (!doReplace) {
throwMalformed(sp - 4, 4);
}
StringUTF16.putChar(dst, dp++, REPL);
sp -= 4;
sp += malformed4(src, sp);
} else {
StringUTF16.putChar(dst, dp++, Character.highSurrogate(uc));
StringUTF16.putChar(dst, dp++, Character.lowSurrogate(uc));
}
continue;
}
b1 &= 0xff;
if (b1 > 0xf4 || sp < sl && isMalformed4_2(b1, src[sp] & 0xff)) {
if (!doReplace) {
throwMalformed(sp - 1, 1); // or 2
}
StringUTF16.putChar(dst, dp++, REPL);
continue;
}
if (!doReplace) {
throwMalformed(sp - 1, 1);
}
sp++;
StringUTF16.putChar(dst, dp++, REPL);
if (sp < sl && isMalformed4_3(src[sp])) {
continue;
}
break;
} else {
if (!doReplace) {
throwMalformed(sp - 1, 1);
}
StringUTF16.putChar(dst, dp++, REPL);
}
}
return dp;
}
private static int decodeWithDecoder(CharsetDecoder cd, char[] dst, byte[] src, int offset, int length) {
ByteBuffer bb = ByteBuffer.wrap(src, offset, length);
CharBuffer cb = CharBuffer.wrap(dst, 0, dst.length);
try {
CoderResult cr = cd.decode(bb, cb, true);
if (!cr.isUnderflow())
cr.throwException();
cr = cd.flush(cb);
if (!cr.isUnderflow())
cr.throwException();
} catch (CharacterCodingException x) {
// Substitution is always enabled,
// so this shouldn't happen
throw new Error(x);
}
return cb.position();
}
private static int malformed3(byte[] src, int sp) {
int b1 = src[sp++];
int b2 = src[sp]; // no need to lookup b3
return ((b1 == (byte)0xe0 && (b2 & 0xe0) == 0x80) ||
isNotContinuation(b2)) ? 1 : 2;
}
private static int malformed4(byte[] src, int sp) {
// we don't care the speed here
int b1 = src[sp++] & 0xff;
int b2 = src[sp++] & 0xff;
if (b1 > 0xf4 ||
(b1 == 0xf0 && (b2 < 0x90 || b2 > 0xbf)) ||
(b1 == 0xf4 && (b2 & 0xf0) != 0x80) ||
isNotContinuation(b2))
return 1;
if (isNotContinuation(src[sp]))
return 2;
return 3;
}
private static void throwMalformed(int off, int nb) {
String msg = "malformed input off : " + off + ", length : " + nb;
throw new IllegalArgumentException(msg, new MalformedInputException(nb));
}
private static void throwMalformed(byte[] val) {
int dp = 0;
while (dp < val.length && val[dp] >=0) { dp++; }
throwMalformed(dp, 1);
}
private static void throwUnmappable(int off) {
String msg = "malformed input off : " + off + ", length : 1";
throw new IllegalArgumentException(msg, new UnmappableCharacterException(1));
}
private static void throwUnmappable(byte[] val) {
int dp = 0;
while (dp < val.length && val[dp] >=0) { dp++; }
throwUnmappable(dp);
}
private static byte[] encodeUTF8(byte coder, byte[] val, boolean doReplace) {
if (coder == UTF16)
return encodeUTF8_UTF16(val, doReplace);
if (!StringCoding.hasNegatives(val, 0, val.length))
return Arrays.copyOf(val, val.length);
int dp = 0;
byte[] dst = new byte[val.length << 1];
for (byte c : val) {
if (c < 0) {
dst[dp++] = (byte) (0xc0 | ((c & 0xff) >> 6));
dst[dp++] = (byte) (0x80 | (c & 0x3f));
} else {
dst[dp++] = c;
}
}
if (dp == dst.length)
return dst;
return Arrays.copyOf(dst, dp);
}
private static byte[] encodeUTF8_UTF16(byte[] val, boolean doReplace) {
int dp = 0;
int sp = 0;
int sl = val.length >> 1;
byte[] dst = new byte[sl * 3];
char c;
while (sp < sl && (c = StringUTF16.getChar(val, sp)) < '\u0080') {
// ascii fast loop;
dst[dp++] = (byte)c;
sp++;
}
while (sp < sl) {
c = StringUTF16.getChar(val, sp++);
if (c < 0x80) {
dst[dp++] = (byte)c;
} else if (c < 0x800) {
dst[dp++] = (byte)(0xc0 | (c >> 6));
dst[dp++] = (byte)(0x80 | (c & 0x3f));
} else if (Character.isSurrogate(c)) {
int uc = -1;
char c2;
if (Character.isHighSurrogate(c) && sp < sl &&
Character.isLowSurrogate(c2 = StringUTF16.getChar(val, sp))) {
uc = Character.toCodePoint(c, c2);
}
if (uc < 0) {
if (doReplace) {
dst[dp++] = '?';
} else {
throwUnmappable(sp - 1);
}
} else {
dst[dp++] = (byte)(0xf0 | ((uc >> 18)));
dst[dp++] = (byte)(0x80 | ((uc >> 12) & 0x3f));
dst[dp++] = (byte)(0x80 | ((uc >> 6) & 0x3f));
dst[dp++] = (byte)(0x80 | (uc & 0x3f));
sp++; // 2 chars
}
} else {
// 3 bytes, 16 bits
dst[dp++] = (byte)(0xe0 | ((c >> 12)));
dst[dp++] = (byte)(0x80 | ((c >> 6) & 0x3f));
dst[dp++] = (byte)(0x80 | (c & 0x3f));
}
}
if (dp == dst.length) {
return dst;
}
return Arrays.copyOf(dst, dp);
}
/**
@ -604,11 +1383,8 @@ public final class String
*
* @since 1.1
*/
public String(byte bytes[], int offset, int length) {
checkBoundsOffCount(offset, length, bytes.length);
StringCoding.Result ret = StringCoding.decode(bytes, offset, length);
this.value = ret.value;
this.coder = ret.coder;
public String(byte[] bytes, int offset, int length) {
this(bytes, offset, length, Charset.defaultCharset());
}
/**
@ -956,7 +1732,7 @@ public final class String
public byte[] getBytes(String charsetName)
throws UnsupportedEncodingException {
if (charsetName == null) throw new NullPointerException();
return StringCoding.encode(charsetName, coder(), value);
return encode(lookupCharset(charsetName), coder(), value);
}
/**
@ -979,7 +1755,7 @@ public final class String
*/
public byte[] getBytes(Charset charset) {
if (charset == null) throw new NullPointerException();
return StringCoding.encode(charset, coder(), value);
return encode(charset, coder(), value);
}
/**
@ -996,7 +1772,7 @@ public final class String
* @since 1.1
*/
public byte[] getBytes() {
return StringCoding.encode(coder(), value);
return encode(Charset.defaultCharset(), coder(), value);
}
/**

File diff suppressed because it is too large Load Diff

View File

@ -1,5 +1,5 @@
/*
* Copyright (c) 1994, 2020, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 1994, 2021, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@ -2262,19 +2262,19 @@ public final class System {
}
public String newStringNoRepl(byte[] bytes, Charset cs) throws CharacterCodingException {
return StringCoding.newStringNoRepl(bytes, cs);
return String.newStringNoRepl(bytes, cs);
}
public byte[] getBytesNoRepl(String s, Charset cs) throws CharacterCodingException {
return StringCoding.getBytesNoRepl(s, cs);
return String.getBytesNoRepl(s, cs);
}
public String newStringUTF8NoRepl(byte[] bytes, int off, int len) {
return StringCoding.newStringUTF8NoRepl(bytes, off, len);
return String.newStringUTF8NoRepl(bytes, off, len);
}
public byte[] getBytesUTF8NoRepl(String s) {
return StringCoding.getBytesUTF8NoRepl(s);
return String.getBytesUTF8NoRepl(s);
}
public void setCause(Throwable t, Throwable cause) {

View File

@ -0,0 +1,98 @@
/*
* Copyright (c) 2021, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License version 2 only, as
* published by the Free Software Foundation.
*
* This code is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
* version 2 for more details (a copy is included in the LICENSE file that
* accompanied this code).
*
* You should have received a copy of the GNU General Public License version
* 2 along with this work; if not, write to the Free Software Foundation,
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
*
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
* or visit www.oracle.com if you need additional information or have any
* questions.
*/
package org.openjdk.bench.java.lang;
import org.openjdk.jmh.annotations.Benchmark;
import org.openjdk.jmh.annotations.BenchmarkMode;
import org.openjdk.jmh.annotations.Fork;
import org.openjdk.jmh.annotations.Measurement;
import org.openjdk.jmh.annotations.Mode;
import org.openjdk.jmh.annotations.OutputTimeUnit;
import org.openjdk.jmh.annotations.Param;
import org.openjdk.jmh.annotations.Scope;
import org.openjdk.jmh.annotations.Setup;
import org.openjdk.jmh.annotations.State;
import org.openjdk.jmh.annotations.Warmup;
import org.openjdk.jmh.infra.Blackhole;
import java.nio.charset.Charset;
import java.util.concurrent.TimeUnit;
@BenchmarkMode(Mode.AverageTime)
@OutputTimeUnit(TimeUnit.NANOSECONDS)
@Fork(value = 3, jvmArgs = "-Xmx1g")
@Warmup(iterations = 5, time = 2)
@Measurement(iterations = 5, time = 3)
@State(Scope.Thread)
public class StringDecode {
@BenchmarkMode(Mode.AverageTime)
@OutputTimeUnit(TimeUnit.NANOSECONDS)
@Fork(value = 3, jvmArgs = "-Xmx1g")
@Warmup(iterations = 5, time = 2)
@Measurement(iterations = 5, time = 2)
@State(Scope.Thread)
public static class WithCharset {
@Param({"US-ASCII", "ISO-8859-1", "UTF-8", "MS932", "ISO-8859-6"})
private String charsetName;
private Charset charset;
private byte[] asciiString;
private byte[] utf16String;
@Setup
public void setup() {
charset = Charset.forName(charsetName);
asciiString = "ascii string".getBytes(charset);
utf16String = "UTF-\uFF11\uFF16 string".getBytes(charset);
}
@Benchmark
public void decodeCharsetName(Blackhole bh) throws Exception {
bh.consume(new String(asciiString, charsetName));
bh.consume(new String(utf16String, charsetName));
}
@Benchmark
public void decodeCharset(Blackhole bh) throws Exception {
bh.consume(new String(asciiString, charset));
bh.consume(new String(utf16String, charset));
}
}
private byte[] asciiDefaultString;
private byte[] utf16DefaultString;
@Setup
public void setup() {
asciiDefaultString = "ascii string".getBytes();
utf16DefaultString = "UTF-\uFF11\uFF16 string".getBytes();
}
@Benchmark
public void decodeDefault(Blackhole bh) throws Exception {
bh.consume(new String(asciiDefaultString));
bh.consume(new String(utf16DefaultString));
}
}

View File

@ -0,0 +1,88 @@
/*
* Copyright (c) 2021, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License version 2 only, as
* published by the Free Software Foundation.
*
* This code is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
* version 2 for more details (a copy is included in the LICENSE file that
* accompanied this code).
*
* You should have received a copy of the GNU General Public License version
* 2 along with this work; if not, write to the Free Software Foundation,
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
*
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
* or visit www.oracle.com if you need additional information or have any
* questions.
*/
package org.openjdk.bench.java.lang;
import org.openjdk.jmh.annotations.*;
import org.openjdk.jmh.infra.Blackhole;
import java.nio.charset.Charset;
import java.util.concurrent.TimeUnit;
@BenchmarkMode(Mode.AverageTime)
@OutputTimeUnit(TimeUnit.NANOSECONDS)
@Fork(value = 3, jvmArgs = "-Xmx1g")
@Warmup(iterations = 5, time = 2)
@Measurement(iterations = 5, time = 3)
@State(Scope.Thread)
public class StringEncode {
@BenchmarkMode(Mode.AverageTime)
@OutputTimeUnit(TimeUnit.NANOSECONDS)
@Fork(value = 3, jvmArgs = "-Xmx1g")
@Warmup(iterations = 5, time = 2)
@Measurement(iterations = 5, time = 2)
@State(Scope.Thread)
public static class WithCharset {
@Param({"US-ASCII", "ISO-8859-1", "UTF-8", "MS932", "ISO-8859-6"})
private String charsetName;
private Charset charset;
private String asciiString;
private String utf16String;
@Setup
public void setup() {
charset = Charset.forName(charsetName);
asciiString = "ascii string";
utf16String = "UTF-\uFF11\uFF16 string";
}
@Benchmark
public void encodeCharsetName(Blackhole bh) throws Exception {
bh.consume(asciiString.getBytes(charsetName));
bh.consume(utf16String.getBytes(charsetName));
}
@Benchmark
public void encodeCharset(Blackhole bh) throws Exception {
bh.consume(asciiString.getBytes(charset));
bh.consume(utf16String.getBytes(charset));
}
}
private String asciiDefaultString;
private String utf16DefaultString;
@Setup
public void setup() {
asciiDefaultString = "ascii string";
utf16DefaultString = "UTF-\uFF11\uFF16 string";
}
@Benchmark
public void encodeDefault(Blackhole bh) throws Exception {
bh.consume(asciiDefaultString.getBytes());
bh.consume(utf16DefaultString.getBytes());
}
}