8274242: Implement fast-path for ASCII-compatible CharsetEncoders on x86

Reviewed-by: naoto, thartmann
This commit is contained in:
Claes Redestad 2021-09-29 12:58:14 +00:00
parent c4d115701d
commit aaa36cc006
28 changed files with 428 additions and 391 deletions

View File

@ -16864,6 +16864,7 @@ instruct encode_iso_array(iRegP_R2 src, iRegP_R1 dst, iRegI_R3 len,
vRegD_V2 Vtmp3, vRegD_V3 Vtmp4,
iRegI_R0 result, rFlagsReg cr)
%{
predicate(!((EncodeISOArrayNode*)n)->is_ascii());
match(Set result (EncodeISOArray src (Binary dst len)));
effect(USE_KILL src, USE_KILL dst, USE_KILL len,
KILL Vtmp1, KILL Vtmp2, KILL Vtmp3, KILL Vtmp4, KILL cr);

View File

@ -163,4 +163,7 @@
return true;
}
// Implements a variant of EncodeISOArrayNode that encode ASCII only
static const bool supports_encode_ascii_array = false;
#endif // CPU_AARCH64_MATCHER_AARCH64_HPP

View File

@ -155,4 +155,7 @@
return false;
}
// Implements a variant of EncodeISOArrayNode that encode ASCII only
static const bool supports_encode_ascii_array = false;
#endif // CPU_ARM_MATCHER_ARM_HPP

View File

@ -164,5 +164,7 @@
return VM_Version::has_fcfids();
}
// Implements a variant of EncodeISOArrayNode that encode ASCII only
static const bool supports_encode_ascii_array = false;
#endif // CPU_PPC_MATCHER_PPC_HPP

View File

@ -12789,6 +12789,7 @@ instruct has_negatives(rarg1RegP ary1, iRegIsrc len, iRegIdst result, iRegLdst t
// encode char[] to byte[] in ISO_8859_1
instruct encode_iso_array(rarg1RegP src, rarg2RegP dst, iRegIsrc len, iRegIdst result, iRegLdst tmp1,
iRegLdst tmp2, iRegLdst tmp3, iRegLdst tmp4, iRegLdst tmp5, regCTR ctr, flagsRegCR0 cr0) %{
predicate(!((EncodeISOArrayNode*)n)->is_ascii());
match(Set result (EncodeISOArray src (Binary dst len)));
effect(TEMP_DEF result, TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, TEMP tmp5,
USE_KILL src, USE_KILL dst, KILL ctr, KILL cr0);

View File

@ -152,4 +152,7 @@
return true;
}
// Implements a variant of EncodeISOArrayNode that encode ASCII only
static const bool supports_encode_ascii_array = false;
#endif // CPU_S390_MATCHER_S390_HPP

View File

@ -10282,6 +10282,7 @@ instruct has_negatives(rarg5RegP ary1, iRegI len, iRegI result, roddRegI oddReg,
// encode char[] to byte[] in ISO_8859_1
instruct encode_iso_array(iRegP src, iRegP dst, iRegI result, iRegI len, iRegI tmp, flagsReg cr) %{
predicate(!((EncodeISOArrayNode*)n)->is_ascii());
match(Set result (EncodeISOArray src (Binary dst len)));
effect(TEMP_DEF result, TEMP tmp, KILL cr); // R0, R1 are killed, too.
ins_cost(300);

View File

@ -5423,7 +5423,7 @@ void MacroAssembler::generate_fill(BasicType t, bool aligned,
BIND(L_exit);
}
// encode char[] to byte[] in ISO_8859_1
// encode char[] to byte[] in ISO_8859_1 or ASCII
//@IntrinsicCandidate
//private static int implEncodeISOArray(byte[] sa, int sp,
//byte[] da, int dp, int len) {
@ -5436,10 +5436,23 @@ void MacroAssembler::generate_fill(BasicType t, bool aligned,
// }
// return i;
//}
//
//@IntrinsicCandidate
//private static int implEncodeAsciiArray(char[] sa, int sp,
// byte[] da, int dp, int len) {
// int i = 0;
// for (; i < len; i++) {
// char c = sa[sp++];
// if (c >= '\u0080')
// break;
// da[dp++] = (byte)c;
// }
// return i;
//}
void MacroAssembler::encode_iso_array(Register src, Register dst, Register len,
XMMRegister tmp1Reg, XMMRegister tmp2Reg,
XMMRegister tmp3Reg, XMMRegister tmp4Reg,
Register tmp5, Register result) {
Register tmp5, Register result, bool ascii) {
// rsi: src
// rdi: dst
@ -5450,6 +5463,9 @@ void MacroAssembler::encode_iso_array(Register src, Register dst, Register len,
assert_different_registers(src, dst, len, tmp5, result);
Label L_done, L_copy_1_char, L_copy_1_char_exit;
int mask = ascii ? 0xff80ff80 : 0xff00ff00;
int short_mask = ascii ? 0xff80 : 0xff00;
// set result
xorl(result, result);
// check for zero length
@ -5469,7 +5485,7 @@ void MacroAssembler::encode_iso_array(Register src, Register dst, Register len,
if (UseAVX >= 2) {
Label L_chars_32_check, L_copy_32_chars, L_copy_32_chars_exit;
movl(tmp5, 0xff00ff00); // create mask to test for Unicode chars in vector
movl(tmp5, mask); // create mask to test for Unicode or non-ASCII chars in vector
movdl(tmp1Reg, tmp5);
vpbroadcastd(tmp1Reg, tmp1Reg, Assembler::AVX_256bit);
jmp(L_chars_32_check);
@ -5478,7 +5494,7 @@ void MacroAssembler::encode_iso_array(Register src, Register dst, Register len,
vmovdqu(tmp3Reg, Address(src, len, Address::times_2, -64));
vmovdqu(tmp4Reg, Address(src, len, Address::times_2, -32));
vpor(tmp2Reg, tmp3Reg, tmp4Reg, /* vector_len */ 1);
vptest(tmp2Reg, tmp1Reg); // check for Unicode chars in vector
vptest(tmp2Reg, tmp1Reg); // check for Unicode or non-ASCII chars in vector
jccb(Assembler::notZero, L_copy_32_chars_exit);
vpackuswb(tmp3Reg, tmp3Reg, tmp4Reg, /* vector_len */ 1);
vpermq(tmp4Reg, tmp3Reg, 0xD8, /* vector_len */ 1);
@ -5493,7 +5509,7 @@ void MacroAssembler::encode_iso_array(Register src, Register dst, Register len,
jccb(Assembler::greater, L_copy_16_chars_exit);
} else if (UseSSE42Intrinsics) {
movl(tmp5, 0xff00ff00); // create mask to test for Unicode chars in vector
movl(tmp5, mask); // create mask to test for Unicode or non-ASCII chars in vector
movdl(tmp1Reg, tmp5);
pshufd(tmp1Reg, tmp1Reg, 0);
jmpb(L_chars_16_check);
@ -5517,7 +5533,7 @@ void MacroAssembler::encode_iso_array(Register src, Register dst, Register len,
movdqu(tmp4Reg, Address(src, len, Address::times_2, -16));
por(tmp2Reg, tmp4Reg);
}
ptest(tmp2Reg, tmp1Reg); // check for Unicode chars in vector
ptest(tmp2Reg, tmp1Reg); // check for Unicode or non-ASCII chars in vector
jccb(Assembler::notZero, L_copy_16_chars_exit);
packuswb(tmp3Reg, tmp4Reg);
}
@ -5555,7 +5571,7 @@ void MacroAssembler::encode_iso_array(Register src, Register dst, Register len,
bind(L_copy_1_char);
load_unsigned_short(tmp5, Address(src, len, Address::times_2, 0));
testl(tmp5, 0xff00); // check if Unicode char
testl(tmp5, short_mask); // check if Unicode or non-ASCII char
jccb(Assembler::notZero, L_copy_1_char_exit);
movb(Address(dst, len, Address::times_1, 0), tmp5);
addptr(len, 1);

View File

@ -1725,7 +1725,7 @@ public:
void encode_iso_array(Register src, Register dst, Register len,
XMMRegister tmp1, XMMRegister tmp2, XMMRegister tmp3,
XMMRegister tmp4, Register tmp5, Register result);
XMMRegister tmp4, Register tmp5, Register result, bool ascii);
#ifdef _LP64
void add2_with_carry(Register dest_hi, Register dest_lo, Register src1, Register src2);

View File

@ -195,4 +195,7 @@
return true;
}
// Implements a variant of EncodeISOArrayNode that encode ASCII only
static const bool supports_encode_ascii_array = true;
#endif // CPU_X86_MATCHER_X86_HPP

View File

@ -12199,18 +12199,35 @@ instruct string_inflate_evex(Universe dummy, eSIRegP src, eDIRegP dst, eDXRegI l
instruct encode_iso_array(eSIRegP src, eDIRegP dst, eDXRegI len,
regD tmp1, regD tmp2, regD tmp3, regD tmp4,
eCXRegI tmp5, eAXRegI result, eFlagsReg cr) %{
predicate(!((EncodeISOArrayNode*)n)->is_ascii());
match(Set result (EncodeISOArray src (Binary dst len)));
effect(TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, USE_KILL src, USE_KILL dst, USE_KILL len, KILL tmp5, KILL cr);
format %{ "Encode array $src,$dst,$len -> $result // KILL ECX, EDX, $tmp1, $tmp2, $tmp3, $tmp4, ESI, EDI " %}
format %{ "Encode iso array $src,$dst,$len -> $result // KILL ECX, EDX, $tmp1, $tmp2, $tmp3, $tmp4, ESI, EDI " %}
ins_encode %{
__ encode_iso_array($src$$Register, $dst$$Register, $len$$Register,
$tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister,
$tmp4$$XMMRegister, $tmp5$$Register, $result$$Register);
$tmp4$$XMMRegister, $tmp5$$Register, $result$$Register, false);
%}
ins_pipe( pipe_slow );
%}
// encode char[] to byte[] in ASCII
instruct encode_ascii_array(eSIRegP src, eDIRegP dst, eDXRegI len,
regD tmp1, regD tmp2, regD tmp3, regD tmp4,
eCXRegI tmp5, eAXRegI result, eFlagsReg cr) %{
predicate(((EncodeISOArrayNode*)n)->is_ascii());
match(Set result (EncodeISOArray src (Binary dst len)));
effect(TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, USE_KILL src, USE_KILL dst, USE_KILL len, KILL tmp5, KILL cr);
format %{ "Encode ascii array $src,$dst,$len -> $result // KILL ECX, EDX, $tmp1, $tmp2, $tmp3, $tmp4, ESI, EDI " %}
ins_encode %{
__ encode_iso_array($src$$Register, $dst$$Register, $len$$Register,
$tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister,
$tmp4$$XMMRegister, $tmp5$$Register, $result$$Register, true);
%}
ins_pipe( pipe_slow );
%}
//----------Control Flow Instructions------------------------------------------
// Signed compare Instructions

View File

@ -11770,14 +11770,32 @@ instruct string_inflate_evex(Universe dummy, rsi_RegP src, rdi_RegP dst, rdx_Reg
instruct encode_iso_array(rsi_RegP src, rdi_RegP dst, rdx_RegI len,
legRegD tmp1, legRegD tmp2, legRegD tmp3, legRegD tmp4,
rcx_RegI tmp5, rax_RegI result, rFlagsReg cr) %{
predicate(!((EncodeISOArrayNode*)n)->is_ascii());
match(Set result (EncodeISOArray src (Binary dst len)));
effect(TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, USE_KILL src, USE_KILL dst, USE_KILL len, KILL tmp5, KILL cr);
format %{ "Encode array $src,$dst,$len -> $result // KILL RCX, RDX, $tmp1, $tmp2, $tmp3, $tmp4, RSI, RDI " %}
format %{ "Encode iso array $src,$dst,$len -> $result // KILL RCX, RDX, $tmp1, $tmp2, $tmp3, $tmp4, RSI, RDI " %}
ins_encode %{
__ encode_iso_array($src$$Register, $dst$$Register, $len$$Register,
$tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister,
$tmp4$$XMMRegister, $tmp5$$Register, $result$$Register);
$tmp4$$XMMRegister, $tmp5$$Register, $result$$Register, false);
%}
ins_pipe( pipe_slow );
%}
// encode char[] to byte[] in ASCII
instruct encode_ascii_array(rsi_RegP src, rdi_RegP dst, rdx_RegI len,
legRegD tmp1, legRegD tmp2, legRegD tmp3, legRegD tmp4,
rcx_RegI tmp5, rax_RegI result, rFlagsReg cr) %{
predicate(((EncodeISOArrayNode*)n)->is_ascii());
match(Set result (EncodeISOArray src (Binary dst len)));
effect(TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, USE_KILL src, USE_KILL dst, USE_KILL len, KILL tmp5, KILL cr);
format %{ "Encode ascii array $src,$dst,$len -> $result // KILL RCX, RDX, $tmp1, $tmp2, $tmp3, $tmp4, RSI, RDI " %}
ins_encode %{
__ encode_iso_array($src$$Register, $dst$$Register, $len$$Register,
$tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister,
$tmp4$$XMMRegister, $tmp5$$Register, $result$$Register, true);
%}
ins_pipe( pipe_slow );
%}

View File

@ -505,6 +505,7 @@ bool vmIntrinsics::disabled_by_jvm_flags(vmIntrinsics::ID id) {
if (!SpecialArraysEquals) return true;
break;
case vmIntrinsics::_encodeISOArray:
case vmIntrinsics::_encodeAsciiArray:
case vmIntrinsics::_encodeByteISOArray:
if (!SpecialEncodeISOArray) return true;
break;

View File

@ -353,6 +353,9 @@ class methodHandle;
\
do_intrinsic(_encodeByteISOArray, java_lang_StringCoding, encodeISOArray_name, indexOfI_signature, F_S) \
\
do_intrinsic(_encodeAsciiArray, java_lang_StringCoding, encodeAsciiArray_name, encodeISOArray_signature, F_S) \
do_name( encodeAsciiArray_name, "implEncodeAsciiArray") \
\
do_class(java_math_BigInteger, "java/math/BigInteger") \
do_intrinsic(_multiplyToLen, java_math_BigInteger, multiplyToLen_name, multiplyToLen_signature, F_S) \
do_name( multiplyToLen_name, "implMultiplyToLen") \

View File

@ -216,6 +216,9 @@ bool C2Compiler::is_intrinsic_supported(const methodHandle& method, bool is_virt
case vmIntrinsics::_copyMemory:
if (StubRoutines::unsafe_arraycopy() == NULL) return false;
break;
case vmIntrinsics::_encodeAsciiArray:
if (!Matcher::match_rule_supported(Op_EncodeISOArray) || !Matcher::supports_encode_ascii_array) return false;
break;
case vmIntrinsics::_encodeISOArray:
case vmIntrinsics::_encodeByteISOArray:
if (!Matcher::match_rule_supported(Op_EncodeISOArray)) return false;

View File

@ -168,10 +168,14 @@ class HasNegativesNode: public StrIntrinsicNode {
//------------------------------EncodeISOArray--------------------------------
// encode char[] to byte[] in ISO_8859_1
// encode char[] to byte[] in ISO_8859_1 or ASCII
class EncodeISOArrayNode: public Node {
bool ascii;
public:
EncodeISOArrayNode(Node* control, Node* arymem, Node* s1, Node* s2, Node* c): Node(control, arymem, s1, s2, c) {};
EncodeISOArrayNode(Node* control, Node* arymem, Node* s1, Node* s2, Node* c, bool ascii)
: Node(control, arymem, s1, s2, c), ascii(ascii) {}
bool is_ascii() { return ascii; }
virtual int Opcode() const;
virtual bool depends_only_on_test() const { return false; }
virtual const Type* bottom_type() const { return TypeInt::INT; }

View File

@ -591,7 +591,9 @@ bool LibraryCallKit::try_to_inline(int predicate) {
case vmIntrinsics::_encodeISOArray:
case vmIntrinsics::_encodeByteISOArray:
return inline_encodeISOArray();
return inline_encodeISOArray(false);
case vmIntrinsics::_encodeAsciiArray:
return inline_encodeISOArray(true);
case vmIntrinsics::_updateCRC32:
return inline_updateCRC32();
@ -4882,8 +4884,8 @@ LibraryCallKit::tightly_coupled_allocation(Node* ptr) {
}
//-------------inline_encodeISOArray-----------------------------------
// encode char[] to byte[] in ISO_8859_1
bool LibraryCallKit::inline_encodeISOArray() {
// encode char[] to byte[] in ISO_8859_1 or ASCII
bool LibraryCallKit::inline_encodeISOArray(bool ascii) {
assert(callee()->signature()->size() == 5, "encodeISOArray has 5 parameters");
// no receiver since it is static method
Node *src = argument(0);
@ -4918,7 +4920,7 @@ bool LibraryCallKit::inline_encodeISOArray() {
// 'dst_start' points to dst array + scaled offset
const TypeAryPtr* mtype = TypeAryPtr::BYTES;
Node* enc = new EncodeISOArrayNode(control(), memory(mtype), src_start, dst_start, length);
Node* enc = new EncodeISOArrayNode(control(), memory(mtype), src_start, dst_start, length, ascii);
enc = _gvn.transform(enc);
Node* res_mem = _gvn.transform(new SCMemProjNode(enc));
set_memory(res_mem, mtype);

View File

@ -285,7 +285,7 @@ class LibraryCallKit : public GraphKit {
Node* get_state_from_digest_object(Node *digestBase_object, const char* state_type);
Node* get_digest_length_from_digest_object(Node *digestBase_object);
Node* inline_digestBase_implCompressMB_predicate(int predicate);
bool inline_encodeISOArray();
bool inline_encodeISOArray(bool ascii);
bool inline_updateCRC32();
bool inline_updateBytesCRC32();
bool inline_updateByteBufferCRC32();

View File

@ -46,7 +46,7 @@ class StringCoding {
@IntrinsicCandidate
public static int implEncodeISOArray(byte[] sa, int sp,
byte[] da, int dp, int len) {
byte[] da, int dp, int len) {
int i = 0;
for (; i < len; i++) {
char c = StringUTF16.getChar(sa, sp++);
@ -57,4 +57,18 @@ class StringCoding {
return i;
}
@IntrinsicCandidate
public static int implEncodeAsciiArray(char[] sa, int sp,
byte[] da, int dp, int len)
{
int i = 0;
for (; i < len; i++) {
char c = sa[sp++];
if (c >= '\u0080')
break;
da[dp++] = (byte)c;
}
return i;
}
}

View File

@ -2419,6 +2419,10 @@ public final class System {
return String.decodeASCII(src, srcOff, dst, dstOff, len);
}
public int encodeASCII(char[] src, int srcOff, byte[] dst, int dstOff, int len) {
return StringCoding.implEncodeAsciiArray(src, srcOff, dst, dstOff, len);
}
public void setCause(Throwable t, Throwable cause) {
t.setCause(cause);
}

View File

@ -356,6 +356,15 @@ public interface JavaLangAccess {
*/
int decodeASCII(byte[] src, int srcOff, char[] dst, int dstOff, int len);
/**
* Encodes ASCII codepoints as possible from the source array into
* the destination byte array, assuming that the encoding is ASCII
* compatible
*
* @return the number of bytes successfully encoded, or 0 if none
*/
int encodeASCII(char[] src, int srcOff, byte[] dst, int dstOff, int len);
/**
* Set the cause of Throwable
* @param cause set t's cause to new value

View File

@ -76,11 +76,11 @@ class CESU_8 extends Unicode
dst.position(dp - dst.arrayOffset());
}
private static final JavaLangAccess JLA = SharedSecrets.getJavaLangAccess();
private static class Decoder extends CharsetDecoder
implements ArrayDecoder {
private static final JavaLangAccess JLA = SharedSecrets.getJavaLangAccess();
private Decoder(Charset cs) {
super(cs, 1.0f, 1.0f);
}
@ -434,7 +434,6 @@ class CESU_8 extends Unicode
}
private Surrogate.Parser sgp;
private char[] c2;
private CoderResult encodeArrayLoop(CharBuffer src,
ByteBuffer dst)
{
@ -445,11 +444,12 @@ class CESU_8 extends Unicode
byte[] da = dst.array();
int dp = dst.arrayOffset() + dst.position();
int dl = dst.arrayOffset() + dst.limit();
int dlASCII = dp + Math.min(sl - sp, dl - dp);
// ASCII only loop
while (dp < dlASCII && sa[sp] < '\u0080')
da[dp++] = (byte) sa[sp++];
// Handle ASCII-only prefix
int n = JLA.encodeASCII(sa, sp, da, dp, Math.min(sl - sp, dl - dp));
sp += n;
dp += n;
while (sp < sl) {
char c = sa[sp];
if (c < 0x80) {
@ -549,11 +549,11 @@ class CESU_8 extends Unicode
public int encode(char[] sa, int sp, int len, byte[] da) {
int sl = sp + len;
int dp = 0;
int dlASCII = dp + Math.min(len, da.length);
// ASCII only optimized loop
while (dp < dlASCII && sa[sp] < '\u0080')
da[dp++] = (byte) sa[sp++];
// Handle ASCII-only prefix
int n = JLA.encodeASCII(sa, sp, da, dp, Math.min(len, da.length));
sp += n;
dp += n;
while (sp < sl) {
char c = sa[sp++];

View File

@ -49,11 +49,11 @@ public class SingleByte
return cr;
}
private static final JavaLangAccess JLA = SharedSecrets.getJavaLangAccess();
public static final class Decoder extends CharsetDecoder
implements ArrayDecoder {
private static final JavaLangAccess JLA = SharedSecrets.getJavaLangAccess();
private final char[] b2c;
private final boolean isASCIICompatible;
private final boolean isLatin1Decodable;
@ -214,8 +214,14 @@ public class SingleByte
byte[] da = dst.array();
int dp = dst.arrayOffset() + dst.position();
int dl = dst.arrayOffset() + dst.limit();
int len = Math.min(dl - dp, sl - sp);
int len = Math.min(dl - dp, sl - sp);
if (isASCIICompatible) {
int n = JLA.encodeASCII(sa, sp, da, dp, len);
sp += n;
dp += n;
len -= n;
}
while (len-- > 0) {
char c = sa[sp];
int b = encode(c);

View File

@ -61,9 +61,9 @@ public class US_ASCII
return new Encoder(this);
}
private static class Decoder extends CharsetDecoder {
private static final JavaLangAccess JLA = SharedSecrets.getJavaLangAccess();
private static final JavaLangAccess JLA = SharedSecrets.getJavaLangAccess();
private static class Decoder extends CharsetDecoder {
private Decoder(Charset cs) {
super(cs, 1.0f, 1.0f);
@ -159,6 +159,10 @@ public class US_ASCII
assert (dp <= dl);
dp = (dp <= dl ? dp : dl);
int n = JLA.encodeASCII(sa, sp, da, dp, Math.min(sl - sp, dl - dp));
sp += n;
dp += n;
try {
while (sp < sl) {
char c = sa[sp];

View File

@ -83,9 +83,9 @@ public final class UTF_8 extends Unicode {
dst.position(dp - dst.arrayOffset());
}
private static class Decoder extends CharsetDecoder {
private static final JavaLangAccess JLA = SharedSecrets.getJavaLangAccess();
private static final JavaLangAccess JLA = SharedSecrets.getJavaLangAccess();
private static class Decoder extends CharsetDecoder {
private Decoder(Charset cs) {
super(cs, 1.0f, 1.0f);
@ -443,8 +443,7 @@ public final class UTF_8 extends Unicode {
private Surrogate.Parser sgp;
private CoderResult encodeArrayLoop(CharBuffer src,
ByteBuffer dst)
{
ByteBuffer dst) {
char[] sa = src.array();
int sp = src.arrayOffset() + src.position();
int sl = src.arrayOffset() + src.limit();
@ -452,11 +451,22 @@ public final class UTF_8 extends Unicode {
byte[] da = dst.array();
int dp = dst.arrayOffset() + dst.position();
int dl = dst.arrayOffset() + dst.limit();
int dlASCII = dp + Math.min(sl - sp, dl - dp);
// ASCII only loop
while (dp < dlASCII && sa[sp] < '\u0080')
da[dp++] = (byte) sa[sp++];
// Handle ASCII-only prefix
int n = JLA.encodeASCII(sa, sp, da, dp, Math.min(sl - sp, dl - dp));
sp += n;
dp += n;
if (sp < sl) {
return encodeArrayLoopSlow(src, sa, sp, sl, dst, da, dp, dl);
} else {
updatePositions(src, sp, dst, dp);
return CoderResult.UNDERFLOW;
}
}
private CoderResult encodeArrayLoopSlow(CharBuffer src, char[] sa, int sp, int sl,
ByteBuffer dst, byte[] da, int dp, int dl) {
while (sp < sl) {
char c = sa[sp];
if (c < 0x80) {

View File

@ -1,346 +0,0 @@
/*
* Copyright (c) 2013, 2020, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License version 2 only, as
* published by the Free Software Foundation.
*
* This code is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
* version 2 for more details (a copy is included in the LICENSE file that
* accompanied this code).
*
* You should have received a copy of the GNU General Public License version
* 2 along with this work; if not, write to the Free Software Foundation,
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
*
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
* or visit www.oracle.com if you need additional information or have any
* questions.
*/
/*
* @test
* @key randomness
* @bug 6896617
* @summary Optimize sun.nio.cs.ISO_8859_1$Encode.encodeArrayLoop() with SSE instructions on x86
* @library /test/lib
* @modules java.base/jdk.internal.misc
* java.base/sun.nio.cs
* java.management
*
* @ignore 8193479
* @run main/othervm/timeout=1200 -Xbatch -Xmx256m compiler.codegen.Test6896617
*/
package compiler.codegen;
import jdk.test.lib.Utils;
import java.nio.ByteBuffer;
import java.nio.CharBuffer;
import java.nio.charset.Charset;
import java.nio.charset.CharsetDecoder;
import java.nio.charset.CharsetEncoder;
import java.nio.charset.CodingErrorAction;
import java.util.Arrays;
import java.util.Random;
public class Test6896617 {
final static int SIZE = 256;
public static void main(String[] args) {
String csn = "ISO-8859-1";
Charset cs = Charset.forName(csn);
CharsetEncoder enc = cs.newEncoder();
enc.onMalformedInput(CodingErrorAction.REPLACE)
.onUnmappableCharacter(CodingErrorAction.REPLACE);
CharsetDecoder dec = cs.newDecoder();
dec.onMalformedInput(CodingErrorAction.REPLACE)
.onUnmappableCharacter(CodingErrorAction.REPLACE);
byte repl = (byte)'?';
enc.replaceWith(new byte[] { repl });
// Use internal API for tests.
sun.nio.cs.ArrayEncoder arrenc = (sun.nio.cs.ArrayEncoder)enc;
sun.nio.cs.ArrayDecoder arrdec = (sun.nio.cs.ArrayDecoder)dec;
// Populate char[] with chars which can be encoded by ISO_8859_1 (<= 0xFF)
Random rnd = Utils.getRandomInstance();
int maxchar = 0xFF;
char[] a = new char[SIZE];
byte[] b = new byte[SIZE];
char[] at = new char[SIZE];
byte[] bt = new byte[SIZE];
for (int i = 0; i < SIZE; i++) {
char c = (char) rnd.nextInt(maxchar);
if (!enc.canEncode(c)) {
System.out.printf("Something wrong: can't encode c=%03x\n", (int)c);
System.exit(97);
}
a[i] = c;
b[i] = (byte)c;
at[i] = (char)-1;
bt[i] = (byte)-1;
}
if (arrenc.encode(a, 0, SIZE, bt) != SIZE || !Arrays.equals(b, bt)) {
System.out.println("Something wrong: ArrayEncoder.encode failed");
System.exit(97);
}
if (arrdec.decode(b, 0, SIZE, at) != SIZE || !Arrays.equals(a, at)) {
System.out.println("Something wrong: ArrayDecoder.decode failed");
System.exit(97);
}
for (int i = 0; i < SIZE; i++) {
at[i] = (char)-1;
bt[i] = (byte)-1;
}
ByteBuffer bb = ByteBuffer.wrap(b);
CharBuffer ba = CharBuffer.wrap(a);
ByteBuffer bbt = ByteBuffer.wrap(bt);
CharBuffer bat = CharBuffer.wrap(at);
if (!enc.encode(ba, bbt, true).isUnderflow() || !Arrays.equals(b, bt)) {
System.out.println("Something wrong: Encoder.encode failed");
System.exit(97);
}
if (!dec.decode(bb, bat, true).isUnderflow() || !Arrays.equals(a, at)) {
System.out.println("Something wrong: Decoder.decode failed");
System.exit(97);
}
for (int i = 0; i < SIZE; i++) {
at[i] = (char)-1;
bt[i] = (byte)-1;
}
// Warm up
boolean failed = false;
int result = 0;
for (int i = 0; i < 10000; i++) {
result += arrenc.encode(a, 0, SIZE, bt);
result -= arrdec.decode(b, 0, SIZE, at);
}
for (int i = 0; i < 10000; i++) {
result += arrenc.encode(a, 0, SIZE, bt);
result -= arrdec.decode(b, 0, SIZE, at);
}
for (int i = 0; i < 10000; i++) {
result += arrenc.encode(a, 0, SIZE, bt);
result -= arrdec.decode(b, 0, SIZE, at);
}
if (result != 0 || !Arrays.equals(b, bt) || !Arrays.equals(a, at)) {
failed = true;
System.out.println("Failed: ArrayEncoder.encode char[" + SIZE + "] and ArrayDecoder.decode byte[" + SIZE + "]");
}
for (int i = 0; i < SIZE; i++) {
at[i] = (char)-1;
bt[i] = (byte)-1;
}
boolean is_underflow = true;
for (int i = 0; i < 10000; i++) {
ba.clear(); bb.clear(); bat.clear(); bbt.clear();
boolean enc_res = enc.encode(ba, bbt, true).isUnderflow();
boolean dec_res = dec.decode(bb, bat, true).isUnderflow();
is_underflow = is_underflow && enc_res && dec_res;
}
for (int i = 0; i < SIZE; i++) {
at[i] = (char)-1;
bt[i] = (byte)-1;
}
for (int i = 0; i < 10000; i++) {
ba.clear(); bb.clear(); bat.clear(); bbt.clear();
boolean enc_res = enc.encode(ba, bbt, true).isUnderflow();
boolean dec_res = dec.decode(bb, bat, true).isUnderflow();
is_underflow = is_underflow && enc_res && dec_res;
}
for (int i = 0; i < SIZE; i++) {
at[i] = (char)-1;
bt[i] = (byte)-1;
}
for (int i = 0; i < 10000; i++) {
ba.clear(); bb.clear(); bat.clear(); bbt.clear();
boolean enc_res = enc.encode(ba, bbt, true).isUnderflow();
boolean dec_res = dec.decode(bb, bat, true).isUnderflow();
is_underflow = is_underflow && enc_res && dec_res;
}
if (!is_underflow || !Arrays.equals(b, bt) || !Arrays.equals(a, at)) {
failed = true;
System.out.println("Failed: Encoder.encode char[" + SIZE + "] and Decoder.decode byte[" + SIZE + "]");
}
// Test encoder with different source and destination sizes
System.out.println("Testing different source and destination sizes");
for (int i = 1; i <= SIZE; i++) {
for (int j = 1; j <= SIZE; j++) {
bt = new byte[j];
// very source's SIZE
result = arrenc.encode(a, 0, i, bt);
int l = Math.min(i, j);
if (result != l) {
failed = true;
System.out.println("Failed: encode char[" + i + "] to byte[" + j + "]: result = " + result + ", expected " + l);
}
for (int k = 0; k < l; k++) {
if (bt[k] != b[k]) {
failed = true;
System.out.println("Failed: encoded byte[" + k + "] (" + bt[k] + ") != " + b[k]);
}
}
// very source's offset
int sz = SIZE - i + 1;
result = arrenc.encode(a, i-1, sz, bt);
l = Math.min(sz, j);
if (result != l) {
failed = true;
System.out.println("Failed: encode char[" + sz + "] to byte[" + j + "]: result = " + result + ", expected " + l);
}
for (int k = 0; k < l; k++) {
if (bt[k] != b[i+k-1]) {
failed = true;
System.out.println("Failed: encoded byte[" + k + "] (" + bt[k] + ") != " + b[i+k-1]);
}
}
}
}
// Test encoder with char > 0xFF
System.out.println("Testing big char");
byte orig = (byte)'A';
bt = new byte[SIZE];
for (int i = 1; i <= SIZE; i++) {
for (int j = 0; j < i; j++) {
a[j] += 0x100;
// make sure to replace a different byte
bt[j] = orig;
result = arrenc.encode(a, 0, i, bt);
if (result != i) {
failed = true;
System.out.println("Failed: encode char[" + i + "] to byte[" + i + "]: result = " + result + ", expected " + i);
}
if (bt[j] != repl) {
failed = true;
System.out.println("Failed: encoded replace byte[" + j + "] (" + bt[j] + ") != " + repl);
}
bt[j] = b[j]; // Restore to compare whole array
for (int k = 0; k < i; k++) {
if (bt[k] != b[k]) {
failed = true;
System.out.println("Failed: encoded byte[" + k + "] (" + bt[k] + ") != " + b[k]);
}
}
a[j] -= 0x100; // Restore
}
}
// Test sun.nio.cs.ISO_8859_1$Encode.encodeArrayLoop() performance.
int itrs = Integer.getInteger("iterations", 1000000);
int size = Integer.getInteger("size", 256);
a = new char[size];
b = new byte[size];
bt = new byte[size];
for (int i = 0; i < size; i++) {
char c = (char) rnd.nextInt(maxchar);
if (!enc.canEncode(c)) {
System.out.printf("Something wrong: can't encode c=%03x\n", (int)c);
System.exit(97);
}
a[i] = c;
b[i] = (byte)-1;
bt[i] = (byte)c;
}
ba = CharBuffer.wrap(a);
bb = ByteBuffer.wrap(b);
boolean enc_res = enc.encode(ba, bb, true).isUnderflow();
if (!enc_res || !Arrays.equals(b, bt)) {
failed = true;
System.out.println("Failed 1: Encoder.encode char[" + size + "]");
}
for (int i = 0; i < size; i++) {
b[i] = (byte)-1;
}
// Make sure to recompile method if needed before performance run.
for (int i = 0; i < 10000; i++) {
ba.clear(); bb.clear();
enc_res = enc_res && enc.encode(ba, bb, true).isUnderflow();
}
for (int i = 0; i < size; i++) {
b[i] = (byte)-1;
}
for (int i = 0; i < 10000; i++) {
ba.clear(); bb.clear();
enc_res = enc_res && enc.encode(ba, bb, true).isUnderflow();
}
if (!enc_res || !Arrays.equals(b, bt)) {
failed = true;
System.out.println("Failed 2: Encoder.encode char[" + size + "]");
}
for (int i = 0; i < size; i++) {
b[i] = (byte)-1;
}
System.out.println("Testing ISO_8859_1$Encode.encodeArrayLoop() performance");
long start = System.currentTimeMillis();
for (int i = 0; i < itrs; i++) {
ba.clear(); bb.clear();
enc_res = enc_res && enc.encode(ba, bb, true).isUnderflow();
}
long end = System.currentTimeMillis();
if (!enc_res || !Arrays.equals(b, bt)) {
failed = true;
System.out.println("Failed 3: Encoder.encode char[" + size + "]");
} else {
System.out.println("size: " + size + " time: " + (end - start));
}
// Test sun.nio.cs.ISO_8859_1$Encode.encode() performance.
// Make sure to recompile method if needed before performance run.
result = 0;
for (int i = 0; i < size; i++) {
b[i] = (byte)-1;
}
for (int i = 0; i < 10000; i++) {
result += arrenc.encode(a, 0, size, b);
}
for (int i = 0; i < size; i++) {
b[i] = (byte)-1;
}
for (int i = 0; i < 10000; i++) {
result += arrenc.encode(a, 0, size, b);
}
if (result != size*20000 || !Arrays.equals(b, bt)) {
failed = true;
System.out.println("Failed 1: ArrayEncoder.encode char[" + SIZE + "]");
}
for (int i = 0; i < size; i++) {
b[i] = (byte)-1;
}
System.out.println("Testing ISO_8859_1$Encode.encode() performance");
result = 0;
start = System.currentTimeMillis();
for (int i = 0; i < itrs; i++) {
result += arrenc.encode(a, 0, size, b);
}
end = System.currentTimeMillis();
if (!Arrays.equals(b, bt)) {
failed = true;
System.out.println("Failed 2: ArrayEncoder.encode char[" + size + "]");
} else {
System.out.println("size: " + size + " time: " + (end - start));
}
if (failed) {
System.out.println("FAILED");
System.exit(97);
}
System.out.println("PASSED");
}
}

View File

@ -0,0 +1,249 @@
/*
* Copyright (c) 2013, 2021, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License version 2 only, as
* published by the Free Software Foundation.
*
* This code is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
* version 2 for more details (a copy is included in the LICENSE file that
* accompanied this code).
*
* You should have received a copy of the GNU General Public License version
* 2 along with this work; if not, write to the Free Software Foundation,
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
*
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
* or visit www.oracle.com if you need additional information or have any
* questions.
*/
/*
* @test
* @key randomness
* @bug 6896617 8274242
* @summary Verify potentially intrinsified encoders behave well before and after compilation
* @library /test/lib
*
* @run main/othervm/timeout=1200 --add-opens=java.base/sun.nio.cs=ALL-UNNAMED -Xbatch -Xmx256m compiler.intrinsics.string.TestEncodeIntrinsics
*/
package compiler.intrinsics.string;
import jdk.test.lib.Utils;
import java.lang.reflect.Method;
import java.nio.ByteBuffer;
import java.nio.CharBuffer;
import java.nio.charset.Charset;
import java.nio.charset.CharsetDecoder;
import java.nio.charset.CharsetEncoder;
import java.nio.charset.CodingErrorAction;
import java.util.Arrays;
import java.util.Random;
public class TestEncodeIntrinsics {
final static int SIZE = 256;
public static void main(String[] args) {
test("ISO-8859-1", false);
test("UTF-8", true);
test("US-ASCII", true);
test("CESU-8", true);
}
private static void test(String csn, boolean asciiOnly) {
try {
System.out.println("Testing " + csn);
Charset cs = Charset.forName(csn);
CharsetEncoder enc = cs.newEncoder();
enc.onMalformedInput(CodingErrorAction.REPLACE)
.onUnmappableCharacter(CodingErrorAction.REPLACE);
CharsetDecoder dec = cs.newDecoder();
dec.onMalformedInput(CodingErrorAction.REPLACE)
.onUnmappableCharacter(CodingErrorAction.REPLACE);
byte repl = (byte) '?';
enc.replaceWith(new byte[]{repl});
// Populate char[] with chars which can be encoded by ISO_8859_1 (<= 0xFF)
// - or ASCII (<= 0x7F) if requested
Random rnd = Utils.getRandomInstance();
int maxchar = asciiOnly ? 0x7F : 0xFF;
char[] a = new char[SIZE];
byte[] b = new byte[SIZE];
char[] at = new char[SIZE];
byte[] bt = new byte[SIZE];
for (int i = 0; i < SIZE; i++) {
char c = (char) rnd.nextInt(maxchar);
if (!enc.canEncode(c)) {
System.out.printf("Something wrong: can't encode c=%03x\n", (int) c);
System.exit(97);
}
a[i] = c;
b[i] = (byte) c;
at[i] = (char) -1;
bt[i] = (byte) -1;
}
Method encodeArray = null;
if (csn.equals("ISO-8859-1")) {
// Use internal API for tests
encodeArray = enc.getClass().getDeclaredMethod("encodeISOArray",
char[].class, int.class, byte[].class, int.class, int.class);
encodeArray.setAccessible(true);
if ((int) encodeArray.invoke(enc, a, 0, bt, 0, SIZE) != SIZE || !Arrays.equals(b, bt)) {
System.out.println("Something wrong: ArrayEncoder.encode failed");
System.exit(97);
}
for (int i = 0; i < SIZE; i++) {
at[i] = (char) -1;
}
}
ByteBuffer bb = ByteBuffer.wrap(b);
CharBuffer ba = CharBuffer.wrap(a);
ByteBuffer bbt = ByteBuffer.wrap(bt);
CharBuffer bat = CharBuffer.wrap(at);
if (!enc.encode(ba, bbt, true).isUnderflow() || !Arrays.equals(b, bt)) {
System.out.println("Something wrong: Encoder.encode failed");
System.exit(97);
}
if (!dec.decode(bb, bat, true).isUnderflow() || !Arrays.equals(a, at)) {
System.out.println("Something wrong: Decoder.decode failed (a == at: " + !Arrays.equals(a, at) + ")");
System.exit(97);
}
for (int i = 0; i < SIZE; i++) {
at[i] = (char) -1;
bt[i] = (byte) -1;
}
// Warm up
boolean failed = false;
if (csn.equals("ISO-8859-1")) {
for (int i = 0; i < 10000; i++) {
failed |= (int) encodeArray.invoke(enc, a, 0, bt, 0, SIZE) != SIZE;
}
for (int i = 0; i < 10000; i++) {
failed |= (int) encodeArray.invoke(enc, a, 0, bt, 0, SIZE) != SIZE;
}
for (int i = 0; i < 10000; i++) {
failed |= (int) encodeArray.invoke(enc, a, 0, bt, 0, SIZE) != SIZE;
}
if (failed || !Arrays.equals(b, bt)) {
failed = true;
System.out.println("Failed: ISO_8859_1$Encoder.encode char[" + SIZE + "]");
}
}
for (int i = 0; i < SIZE; i++) {
at[i] = (char) -1;
bt[i] = (byte) -1;
}
boolean is_underflow = true;
for (int i = 0; i < 10000; i++) {
ba.clear();
bb.clear();
bat.clear();
bbt.clear();
boolean enc_res = enc.encode(ba, bbt, true).isUnderflow();
boolean dec_res = dec.decode(bb, bat, true).isUnderflow();
is_underflow = is_underflow && enc_res && dec_res;
}
for (int i = 0; i < SIZE; i++) {
at[i] = (char) -1;
bt[i] = (byte) -1;
}
for (int i = 0; i < 10000; i++) {
ba.clear();
bb.clear();
bat.clear();
bbt.clear();
boolean enc_res = enc.encode(ba, bbt, true).isUnderflow();
boolean dec_res = dec.decode(bb, bat, true).isUnderflow();
is_underflow = is_underflow && enc_res && dec_res;
}
for (int i = 0; i < SIZE; i++) {
at[i] = (char) -1;
bt[i] = (byte) -1;
}
for (int i = 0; i < 10000; i++) {
ba.clear();
bb.clear();
bat.clear();
bbt.clear();
boolean enc_res = enc.encode(ba, bbt, true).isUnderflow();
boolean dec_res = dec.decode(bb, bat, true).isUnderflow();
is_underflow = is_underflow && enc_res && dec_res;
}
if (!is_underflow) {
failed = true;
System.out.println("Failed: got a non-underflow");
}
if (!Arrays.equals(b, bt)) {
failed = true;
System.out.println("Failed: b != bt");
}
if (!Arrays.equals(a, at)) {
failed = true;
System.out.println("Failed: a != at");
}
// Test encoder with chars outside of the range the intrinsic deals with
System.out.println("Testing big char");
bt = new byte[SIZE + 10]; // add some spare room to deal with encoding multi-byte
ba = CharBuffer.wrap(a);
bbt = ByteBuffer.wrap(bt);
for (int i = 1; i <= SIZE; i++) {
for (int j = 0; j < i; j++) {
char bigChar = (char)((asciiOnly ? 0x7F : 0xFF) + 1 + rnd.nextInt(0x100));
char aOrig = a[j];
a[j] = bigChar;
// make sure to replace with a different byte
bt[j] = (byte)(bt[j] + 1);
ba.clear();
ba.limit(i);
bbt.clear();
if (!enc.encode(ba, bbt, true).isUnderflow()) {
failed = true;
System.out.println("Failed: encode char[" + i + "] to byte[" + i + "]: expected underflow");
}
if (bt[j] == b[j] && b[j] != repl) { // b[j] can be equal to repl; ignore
failed = true;
System.out.println("Failed: different byte expected at pos bt[" + j + "]");
}
if (!enc.canEncode(bigChar) && bt[j] != repl) {
failed = true;
System.out.println("Failed: encoded replace byte[" + j + "] (" + bt[j] + ") != " + repl);
}
// Check that all bytes prior to the replaced one was encoded properly
for (int k = 0; k < j; k++) {
if (bt[k] != b[k]) {
failed = true;
System.out.println("Failed: encoded byte[" + k + "] (" + bt[k] + ") != " + b[k]);
}
}
a[j] = aOrig; // Restore
}
}
if (failed) {
System.out.println("FAILED");
System.exit(97);
}
System.out.println("PASSED");
} catch (Exception e) {
e.printStackTrace();
System.out.println("FAILED");
System.exit(97);
}
}
}

View File

@ -24,12 +24,15 @@ package org.openjdk.bench.java.nio;
import org.openjdk.jmh.annotations.Benchmark;
import org.openjdk.jmh.annotations.BenchmarkMode;
import org.openjdk.jmh.annotations.Fork;
import org.openjdk.jmh.annotations.Measurement;
import org.openjdk.jmh.annotations.Mode;
import org.openjdk.jmh.annotations.OutputTimeUnit;
import org.openjdk.jmh.annotations.Param;
import org.openjdk.jmh.annotations.Scope;
import org.openjdk.jmh.annotations.Setup;
import org.openjdk.jmh.annotations.State;
import org.openjdk.jmh.annotations.Warmup;
import java.nio.ByteBuffer;
import java.nio.CharBuffer;
@ -45,8 +48,11 @@ import java.util.concurrent.TimeUnit;
* char and byte arrays.
*/
@BenchmarkMode(Mode.AverageTime)
@Warmup(iterations = 10, time = 500, timeUnit = TimeUnit.MILLISECONDS)
@Measurement(iterations = 10, time = 500, timeUnit = TimeUnit.MILLISECONDS)
@OutputTimeUnit(TimeUnit.MICROSECONDS)
@State(Scope.Thread)
@Fork(3)
public class CharsetEncodeDecode {
private byte[] BYTES;
@ -55,7 +61,7 @@ public class CharsetEncodeDecode {
private CharsetEncoder encoder;
private CharsetDecoder decoder;
@Param({"BIG5", "ISO-8859-15", "ASCII", "UTF-16"})
@Param({"UTF-8", "BIG5", "ISO-8859-15", "ASCII", "UTF-16"})
private String type;
@Param("16384")