8290034: Auto vectorize reverse bit operations.
Reviewed-by: xgong, kvn
This commit is contained in:
parent
348a0521e1
commit
5d82d67a9e
src
hotspot
cpu/x86
share
java.base/share/classes/java/lang
test
hotspot/jtreg/compiler
micro/org/openjdk/bench/java/lang
@ -10115,6 +10115,14 @@ void Assembler::evpternlogq(XMMRegister dst, int imm8, KRegister mask, XMMRegist
|
|||||||
emit_int8(imm8);
|
emit_int8(imm8);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void Assembler::gf2p8affineqb(XMMRegister dst, XMMRegister src, int imm8) {
|
||||||
|
assert(VM_Version::supports_gfni(), "");
|
||||||
|
assert(VM_Version::supports_sse(), "");
|
||||||
|
InstructionAttr attributes(AVX_128bit, /* rex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
|
||||||
|
int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
|
||||||
|
emit_int24((unsigned char)0xCE, (unsigned char)(0xC0 | encode), imm8);
|
||||||
|
}
|
||||||
|
|
||||||
void Assembler::vgf2p8affineqb(XMMRegister dst, XMMRegister src2, XMMRegister src3, int imm8, int vector_len) {
|
void Assembler::vgf2p8affineqb(XMMRegister dst, XMMRegister src2, XMMRegister src3, int imm8, int vector_len) {
|
||||||
assert(VM_Version::supports_gfni(), "requires GFNI support");
|
assert(VM_Version::supports_gfni(), "requires GFNI support");
|
||||||
assert(VM_Version::supports_sse(), "");
|
assert(VM_Version::supports_sse(), "");
|
||||||
|
@ -2801,6 +2801,7 @@ private:
|
|||||||
void evpblendmq(XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len);
|
void evpblendmq(XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len);
|
||||||
|
|
||||||
// Galois field affine transformation instructions.
|
// Galois field affine transformation instructions.
|
||||||
|
void gf2p8affineqb(XMMRegister dst, XMMRegister src, int imm8);
|
||||||
void vgf2p8affineqb(XMMRegister dst, XMMRegister src2, XMMRegister src3, int imm8, int vector_len);
|
void vgf2p8affineqb(XMMRegister dst, XMMRegister src2, XMMRegister src3, int imm8, int vector_len);
|
||||||
|
|
||||||
protected:
|
protected:
|
||||||
|
@ -5484,6 +5484,90 @@ void C2_MacroAssembler::udivmodI(Register rax, Register divisor, Register rdx, R
|
|||||||
}
|
}
|
||||||
|
|
||||||
#ifdef _LP64
|
#ifdef _LP64
|
||||||
|
void C2_MacroAssembler::reverseI(Register dst, Register src, XMMRegister xtmp1,
|
||||||
|
XMMRegister xtmp2, Register rtmp) {
|
||||||
|
if(VM_Version::supports_gfni()) {
|
||||||
|
// Galois field instruction based bit reversal based on following algorithm.
|
||||||
|
// http://0x80.pl/articles/avx512-galois-field-for-bit-shuffling.html
|
||||||
|
mov64(rtmp, 0x8040201008040201L);
|
||||||
|
movq(xtmp1, src);
|
||||||
|
movq(xtmp2, rtmp);
|
||||||
|
gf2p8affineqb(xtmp1, xtmp2, 0);
|
||||||
|
movq(dst, xtmp1);
|
||||||
|
} else {
|
||||||
|
// Swap even and odd numbered bits.
|
||||||
|
movl(rtmp, src);
|
||||||
|
andl(rtmp, 0x55555555);
|
||||||
|
shll(rtmp, 1);
|
||||||
|
movl(dst, src);
|
||||||
|
andl(dst, 0xAAAAAAAA);
|
||||||
|
shrl(dst, 1);
|
||||||
|
orl(dst, rtmp);
|
||||||
|
|
||||||
|
// Swap LSB and MSB 2 bits of each nibble.
|
||||||
|
movl(rtmp, dst);
|
||||||
|
andl(rtmp, 0x33333333);
|
||||||
|
shll(rtmp, 2);
|
||||||
|
andl(dst, 0xCCCCCCCC);
|
||||||
|
shrl(dst, 2);
|
||||||
|
orl(dst, rtmp);
|
||||||
|
|
||||||
|
// Swap LSB and MSB 4 bits of each byte.
|
||||||
|
movl(rtmp, dst);
|
||||||
|
andl(rtmp, 0x0F0F0F0F);
|
||||||
|
shll(rtmp, 4);
|
||||||
|
andl(dst, 0xF0F0F0F0);
|
||||||
|
shrl(dst, 4);
|
||||||
|
orl(dst, rtmp);
|
||||||
|
}
|
||||||
|
bswapl(dst);
|
||||||
|
}
|
||||||
|
|
||||||
|
void C2_MacroAssembler::reverseL(Register dst, Register src, XMMRegister xtmp1,
|
||||||
|
XMMRegister xtmp2, Register rtmp1, Register rtmp2) {
|
||||||
|
if(VM_Version::supports_gfni()) {
|
||||||
|
// Galois field instruction based bit reversal based on following algorithm.
|
||||||
|
// http://0x80.pl/articles/avx512-galois-field-for-bit-shuffling.html
|
||||||
|
mov64(rtmp1, 0x8040201008040201L);
|
||||||
|
movq(xtmp1, src);
|
||||||
|
movq(xtmp2, rtmp1);
|
||||||
|
gf2p8affineqb(xtmp1, xtmp2, 0);
|
||||||
|
movq(dst, xtmp1);
|
||||||
|
} else {
|
||||||
|
// Swap even and odd numbered bits.
|
||||||
|
movq(rtmp1, src);
|
||||||
|
mov64(rtmp2, 0x5555555555555555L);
|
||||||
|
andq(rtmp1, rtmp2);
|
||||||
|
shlq(rtmp1, 1);
|
||||||
|
movq(dst, src);
|
||||||
|
notq(rtmp2);
|
||||||
|
andq(dst, rtmp2);
|
||||||
|
shrq(dst, 1);
|
||||||
|
orq(dst, rtmp1);
|
||||||
|
|
||||||
|
// Swap LSB and MSB 2 bits of each nibble.
|
||||||
|
movq(rtmp1, dst);
|
||||||
|
mov64(rtmp2, 0x3333333333333333L);
|
||||||
|
andq(rtmp1, rtmp2);
|
||||||
|
shlq(rtmp1, 2);
|
||||||
|
notq(rtmp2);
|
||||||
|
andq(dst, rtmp2);
|
||||||
|
shrq(dst, 2);
|
||||||
|
orq(dst, rtmp1);
|
||||||
|
|
||||||
|
// Swap LSB and MSB 4 bits of each byte.
|
||||||
|
movq(rtmp1, dst);
|
||||||
|
mov64(rtmp2, 0x0F0F0F0F0F0F0F0FL);
|
||||||
|
andq(rtmp1, rtmp2);
|
||||||
|
shlq(rtmp1, 4);
|
||||||
|
notq(rtmp2);
|
||||||
|
andq(dst, rtmp2);
|
||||||
|
shrq(dst, 4);
|
||||||
|
orq(dst, rtmp1);
|
||||||
|
}
|
||||||
|
bswapq(dst);
|
||||||
|
}
|
||||||
|
|
||||||
void C2_MacroAssembler::udivL(Register rax, Register divisor, Register rdx) {
|
void C2_MacroAssembler::udivL(Register rax, Register divisor, Register rdx) {
|
||||||
Label done;
|
Label done;
|
||||||
Label neg_divisor_fastpath;
|
Label neg_divisor_fastpath;
|
||||||
|
@ -368,6 +368,10 @@ public:
|
|||||||
void udivmodI(Register rax, Register divisor, Register rdx, Register tmp);
|
void udivmodI(Register rax, Register divisor, Register rdx, Register tmp);
|
||||||
|
|
||||||
#ifdef _LP64
|
#ifdef _LP64
|
||||||
|
void reverseI(Register dst, Register src, XMMRegister xtmp1,
|
||||||
|
XMMRegister xtmp2, Register rtmp);
|
||||||
|
void reverseL(Register dst, Register src, XMMRegister xtmp1,
|
||||||
|
XMMRegister xtmp2, Register rtmp1, Register rtmp2);
|
||||||
void udivL(Register rax, Register divisor, Register rdx);
|
void udivL(Register rax, Register divisor, Register rdx);
|
||||||
void umodL(Register rax, Register divisor, Register rdx);
|
void umodL(Register rax, Register divisor, Register rdx);
|
||||||
void udivmodL(Register rax, Register divisor, Register rdx, Register tmp);
|
void udivmodL(Register rax, Register divisor, Register rdx, Register tmp);
|
||||||
|
@ -6721,6 +6721,50 @@ instruct countTrailingZerosL_bsf(rRegI dst, rRegL src, rFlagsReg cr) %{
|
|||||||
ins_pipe(ialu_reg);
|
ins_pipe(ialu_reg);
|
||||||
%}
|
%}
|
||||||
|
|
||||||
|
//--------------- Reverse Operation Instructions ----------------
|
||||||
|
instruct bytes_reversebit_int(rRegI dst, rRegI src, rRegI rtmp, rFlagsReg cr) %{
|
||||||
|
predicate(!VM_Version::supports_gfni());
|
||||||
|
match(Set dst (ReverseI src));
|
||||||
|
effect(TEMP dst, TEMP rtmp, KILL cr);
|
||||||
|
format %{ "reverse_int $dst $src\t! using $rtmp as TEMP" %}
|
||||||
|
ins_encode %{
|
||||||
|
__ reverseI($dst$$Register, $src$$Register, xnoreg, xnoreg, $rtmp$$Register);
|
||||||
|
%}
|
||||||
|
ins_pipe( ialu_reg );
|
||||||
|
%}
|
||||||
|
|
||||||
|
instruct bytes_reversebit_int_gfni(rRegI dst, rRegI src, regF xtmp1, regF xtmp2, rRegL rtmp, rFlagsReg cr) %{
|
||||||
|
predicate(VM_Version::supports_gfni());
|
||||||
|
match(Set dst (ReverseI src));
|
||||||
|
effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP rtmp, KILL cr);
|
||||||
|
format %{ "reverse_int $dst $src\t! using $rtmp, $xtmp1 and $xtmp2 as TEMP" %}
|
||||||
|
ins_encode %{
|
||||||
|
__ reverseI($dst$$Register, $src$$Register, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, $rtmp$$Register);
|
||||||
|
%}
|
||||||
|
ins_pipe( ialu_reg );
|
||||||
|
%}
|
||||||
|
|
||||||
|
instruct bytes_reversebit_long(rRegL dst, rRegL src, rRegL rtmp1, rRegL rtmp2, rFlagsReg cr) %{
|
||||||
|
predicate(!VM_Version::supports_gfni());
|
||||||
|
match(Set dst (ReverseL src));
|
||||||
|
effect(TEMP dst, TEMP rtmp1, TEMP rtmp2, KILL cr);
|
||||||
|
format %{ "reverse_long $dst $src\t! using $rtmp1 and $rtmp2 as TEMP" %}
|
||||||
|
ins_encode %{
|
||||||
|
__ reverseL($dst$$Register, $src$$Register, xnoreg, xnoreg, $rtmp1$$Register, $rtmp2$$Register);
|
||||||
|
%}
|
||||||
|
ins_pipe( ialu_reg );
|
||||||
|
%}
|
||||||
|
|
||||||
|
instruct bytes_reversebit_long_gfni(rRegL dst, rRegL src, regD xtmp1, regD xtmp2, rRegL rtmp, rFlagsReg cr) %{
|
||||||
|
predicate(VM_Version::supports_gfni());
|
||||||
|
match(Set dst (ReverseL src));
|
||||||
|
effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP rtmp, KILL cr);
|
||||||
|
format %{ "reverse_long $dst $src\t! using $rtmp, $xtmp1 and $xtmp2 as TEMP" %}
|
||||||
|
ins_encode %{
|
||||||
|
__ reverseL($dst$$Register, $src$$Register, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, $rtmp$$Register, noreg);
|
||||||
|
%}
|
||||||
|
ins_pipe( ialu_reg );
|
||||||
|
%}
|
||||||
|
|
||||||
//---------- Population Count Instructions -------------------------------------
|
//---------- Population Count Instructions -------------------------------------
|
||||||
|
|
||||||
|
@ -246,6 +246,9 @@ class methodHandle;
|
|||||||
do_intrinsic(_expand_i, java_lang_Integer, expand_name, int2_int_signature, F_S) \
|
do_intrinsic(_expand_i, java_lang_Integer, expand_name, int2_int_signature, F_S) \
|
||||||
do_intrinsic(_expand_l, java_lang_Long, expand_name, long2_long_signature, F_S) \
|
do_intrinsic(_expand_l, java_lang_Long, expand_name, long2_long_signature, F_S) \
|
||||||
\
|
\
|
||||||
|
do_intrinsic(_reverse_i, java_lang_Integer, reverse_name, int_int_signature, F_S) \
|
||||||
|
do_name( reverse_name, "reverse") \
|
||||||
|
do_intrinsic(_reverse_l, java_lang_Long, reverse_name, long_long_signature, F_S) \
|
||||||
do_intrinsic(_reverseBytes_i, java_lang_Integer, reverseBytes_name, int_int_signature, F_S) \
|
do_intrinsic(_reverseBytes_i, java_lang_Integer, reverseBytes_name, int_int_signature, F_S) \
|
||||||
do_name( reverseBytes_name, "reverseBytes") \
|
do_name( reverseBytes_name, "reverseBytes") \
|
||||||
do_intrinsic(_reverseBytes_l, java_lang_Long, reverseBytes_name, long_long_signature, F_S) \
|
do_intrinsic(_reverseBytes_l, java_lang_Long, reverseBytes_name, long_long_signature, F_S) \
|
||||||
|
@ -263,6 +263,12 @@ bool C2Compiler::is_intrinsic_supported(const methodHandle& method, bool is_virt
|
|||||||
case vmIntrinsics::_numberOfTrailingZeros_l:
|
case vmIntrinsics::_numberOfTrailingZeros_l:
|
||||||
if (!Matcher::match_rule_supported(Op_CountTrailingZerosL)) return false;
|
if (!Matcher::match_rule_supported(Op_CountTrailingZerosL)) return false;
|
||||||
break;
|
break;
|
||||||
|
case vmIntrinsics::_reverse_i:
|
||||||
|
if (!Matcher::match_rule_supported(Op_ReverseI)) return false;
|
||||||
|
break;
|
||||||
|
case vmIntrinsics::_reverse_l:
|
||||||
|
if (!Matcher::match_rule_supported(Op_ReverseL)) return false;
|
||||||
|
break;
|
||||||
case vmIntrinsics::_reverseBytes_c:
|
case vmIntrinsics::_reverseBytes_c:
|
||||||
if (!Matcher::match_rule_supported(Op_ReverseBytesUS)) return false;
|
if (!Matcher::match_rule_supported(Op_ReverseBytesUS)) return false;
|
||||||
break;
|
break;
|
||||||
|
@ -525,6 +525,8 @@ bool LibraryCallKit::try_to_inline(int predicate) {
|
|||||||
case vmIntrinsics::_numberOfTrailingZeros_l:
|
case vmIntrinsics::_numberOfTrailingZeros_l:
|
||||||
case vmIntrinsics::_bitCount_i:
|
case vmIntrinsics::_bitCount_i:
|
||||||
case vmIntrinsics::_bitCount_l:
|
case vmIntrinsics::_bitCount_l:
|
||||||
|
case vmIntrinsics::_reverse_i:
|
||||||
|
case vmIntrinsics::_reverse_l:
|
||||||
case vmIntrinsics::_reverseBytes_i:
|
case vmIntrinsics::_reverseBytes_i:
|
||||||
case vmIntrinsics::_reverseBytes_l:
|
case vmIntrinsics::_reverseBytes_l:
|
||||||
case vmIntrinsics::_reverseBytes_s:
|
case vmIntrinsics::_reverseBytes_s:
|
||||||
@ -2060,6 +2062,8 @@ bool LibraryCallKit::inline_number_methods(vmIntrinsics::ID id) {
|
|||||||
case vmIntrinsics::_reverseBytes_s: n = new ReverseBytesSNode( 0, arg); break;
|
case vmIntrinsics::_reverseBytes_s: n = new ReverseBytesSNode( 0, arg); break;
|
||||||
case vmIntrinsics::_reverseBytes_i: n = new ReverseBytesINode( 0, arg); break;
|
case vmIntrinsics::_reverseBytes_i: n = new ReverseBytesINode( 0, arg); break;
|
||||||
case vmIntrinsics::_reverseBytes_l: n = new ReverseBytesLNode( 0, arg); break;
|
case vmIntrinsics::_reverseBytes_l: n = new ReverseBytesLNode( 0, arg); break;
|
||||||
|
case vmIntrinsics::_reverse_i: n = new ReverseINode(0, arg); break;
|
||||||
|
case vmIntrinsics::_reverse_l: n = new ReverseLNode(0, arg); break;
|
||||||
default: fatal_unexpected_iid(id); break;
|
default: fatal_unexpected_iid(id); break;
|
||||||
}
|
}
|
||||||
set_result(_gvn.transform(n));
|
set_result(_gvn.transform(n));
|
||||||
|
@ -1899,3 +1899,50 @@ const Type* SqrtFNode::Value(PhaseGVN* phase) const {
|
|||||||
if( f < 0.0f ) return Type::FLOAT;
|
if( f < 0.0f ) return Type::FLOAT;
|
||||||
return TypeF::make( (float)sqrt( (double)f ) );
|
return TypeF::make( (float)sqrt( (double)f ) );
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static jlong reverse_bits(jlong val) {
|
||||||
|
jlong res = ((val & 0xF0F0F0F0F0F0F0F0L) >> 4) | ((val & 0x0F0F0F0F0F0F0F0F) << 4);
|
||||||
|
res = ((res & 0xCCCCCCCCCCCCCCCCL) >> 2) | ((res & 0x3333333333333333L) << 2);
|
||||||
|
res = ((res & 0xAAAAAAAAAAAAAAAAL) >> 1) | ((res & 0x5555555555555555L) << 1);
|
||||||
|
return res;
|
||||||
|
}
|
||||||
|
|
||||||
|
const Type* ReverseINode::Value(PhaseGVN* phase) const {
|
||||||
|
const Type *t1 = phase->type( in(1) );
|
||||||
|
if (t1 == Type::TOP) {
|
||||||
|
return Type::TOP;
|
||||||
|
}
|
||||||
|
const TypeInt* t1int = t1->isa_int();
|
||||||
|
if (t1int && t1int->is_con()) {
|
||||||
|
jint res = reverse_bits(t1int->get_con());
|
||||||
|
return TypeInt::make(res);
|
||||||
|
}
|
||||||
|
return t1int;
|
||||||
|
}
|
||||||
|
|
||||||
|
const Type* ReverseLNode::Value(PhaseGVN* phase) const {
|
||||||
|
const Type *t1 = phase->type( in(1) );
|
||||||
|
if (t1 == Type::TOP) {
|
||||||
|
return Type::TOP;
|
||||||
|
}
|
||||||
|
const TypeLong* t1long = t1->isa_long();
|
||||||
|
if (t1long && t1long->is_con()) {
|
||||||
|
jint res = reverse_bits(t1long->get_con());
|
||||||
|
return TypeLong::make(res);
|
||||||
|
}
|
||||||
|
return t1long;
|
||||||
|
}
|
||||||
|
|
||||||
|
Node* ReverseINode::Identity(PhaseGVN* phase) {
|
||||||
|
if (in(1)->Opcode() == Op_ReverseI) {
|
||||||
|
return in(1)->in(1);
|
||||||
|
}
|
||||||
|
return this;
|
||||||
|
}
|
||||||
|
|
||||||
|
Node* ReverseLNode::Identity(PhaseGVN* phase) {
|
||||||
|
if (in(1)->Opcode() == Op_ReverseL) {
|
||||||
|
return in(1)->in(1);
|
||||||
|
}
|
||||||
|
return this;
|
||||||
|
}
|
||||||
|
@ -580,6 +580,8 @@ public:
|
|||||||
virtual int Opcode() const;
|
virtual int Opcode() const;
|
||||||
const Type *bottom_type() const { return TypeInt::INT; }
|
const Type *bottom_type() const { return TypeInt::INT; }
|
||||||
virtual uint ideal_reg() const { return Op_RegI; }
|
virtual uint ideal_reg() const { return Op_RegI; }
|
||||||
|
virtual Node* Identity(PhaseGVN* phase);
|
||||||
|
virtual const Type* Value(PhaseGVN* phase) const;
|
||||||
};
|
};
|
||||||
|
|
||||||
//-------------------------------ReverseLNode--------------------------------
|
//-------------------------------ReverseLNode--------------------------------
|
||||||
@ -590,6 +592,8 @@ public:
|
|||||||
virtual int Opcode() const;
|
virtual int Opcode() const;
|
||||||
const Type *bottom_type() const { return TypeLong::LONG; }
|
const Type *bottom_type() const { return TypeLong::LONG; }
|
||||||
virtual uint ideal_reg() const { return Op_RegL; }
|
virtual uint ideal_reg() const { return Op_RegL; }
|
||||||
|
virtual Node* Identity(PhaseGVN* phase);
|
||||||
|
virtual const Type* Value(PhaseGVN* phase) const;
|
||||||
};
|
};
|
||||||
|
|
||||||
#endif // SHARE_OPTO_SUBNODE_HPP
|
#endif // SHARE_OPTO_SUBNODE_HPP
|
||||||
|
@ -2645,6 +2645,7 @@ bool SuperWord::output() {
|
|||||||
opc == Op_PopCountI || opc == Op_PopCountL ||
|
opc == Op_PopCountI || opc == Op_PopCountL ||
|
||||||
opc == Op_ReverseBytesI || opc == Op_ReverseBytesL ||
|
opc == Op_ReverseBytesI || opc == Op_ReverseBytesL ||
|
||||||
opc == Op_ReverseBytesUS || opc == Op_ReverseBytesS ||
|
opc == Op_ReverseBytesUS || opc == Op_ReverseBytesS ||
|
||||||
|
opc == Op_ReverseI || opc == Op_ReverseL ||
|
||||||
opc == Op_CountLeadingZerosI || opc == Op_CountLeadingZerosL ||
|
opc == Op_CountLeadingZerosI || opc == Op_CountLeadingZerosL ||
|
||||||
opc == Op_CountTrailingZerosI || opc == Op_CountTrailingZerosL) {
|
opc == Op_CountTrailingZerosI || opc == Op_CountTrailingZerosL) {
|
||||||
assert(n->req() == 2, "only one input expected");
|
assert(n->req() == 2, "only one input expected");
|
||||||
|
@ -1762,6 +1762,7 @@ public final class Integer extends Number
|
|||||||
* specified {@code int} value.
|
* specified {@code int} value.
|
||||||
* @since 1.5
|
* @since 1.5
|
||||||
*/
|
*/
|
||||||
|
@IntrinsicCandidate
|
||||||
public static int reverse(int i) {
|
public static int reverse(int i) {
|
||||||
// HD, Figure 7-1
|
// HD, Figure 7-1
|
||||||
i = (i & 0x55555555) << 1 | (i >>> 1) & 0x55555555;
|
i = (i & 0x55555555) << 1 | (i >>> 1) & 0x55555555;
|
||||||
|
@ -1901,6 +1901,7 @@ public final class Long extends Number
|
|||||||
* specified {@code long} value.
|
* specified {@code long} value.
|
||||||
* @since 1.5
|
* @since 1.5
|
||||||
*/
|
*/
|
||||||
|
@IntrinsicCandidate
|
||||||
public static long reverse(long i) {
|
public static long reverse(long i) {
|
||||||
// HD, Figure 7-1
|
// HD, Figure 7-1
|
||||||
i = (i & 0x5555555555555555L) << 1 | (i >>> 1) & 0x5555555555555555L;
|
i = (i & 0x5555555555555555L) << 1 | (i >>> 1) & 0x5555555555555555L;
|
||||||
|
@ -461,6 +461,10 @@ public class TestIntVect {
|
|||||||
for (int i=0; i<ARRLEN; i++) {
|
for (int i=0; i<ARRLEN; i++) {
|
||||||
errn += verify("test_reverse_bytes: ", i, a0[i], Integer.reverseBytes(a1[i]));
|
errn += verify("test_reverse_bytes: ", i, a0[i], Integer.reverseBytes(a1[i]));
|
||||||
}
|
}
|
||||||
|
test_reverse(a0, a1);
|
||||||
|
for (int i=0; i<ARRLEN; i++) {
|
||||||
|
errn += verify("test_reverse: ", i, a0[i], Integer.reverse(a1[i]));
|
||||||
|
}
|
||||||
|
|
||||||
test_pack2(p2, a1);
|
test_pack2(p2, a1);
|
||||||
for (int i=0; i<ARRLEN/2; i++) {
|
for (int i=0; i<ARRLEN/2; i++) {
|
||||||
@ -934,6 +938,13 @@ public class TestIntVect {
|
|||||||
end = System.currentTimeMillis();
|
end = System.currentTimeMillis();
|
||||||
System.out.println("test_reverse_bytes: " + (end - start));
|
System.out.println("test_reverse_bytes: " + (end - start));
|
||||||
|
|
||||||
|
start = System.currentTimeMillis();
|
||||||
|
for (int i=0; i<ITERS; i++) {
|
||||||
|
test_reverse(a0, a1);
|
||||||
|
}
|
||||||
|
end = System.currentTimeMillis();
|
||||||
|
System.out.println("test_reverse: " + (end - start));
|
||||||
|
|
||||||
start = System.currentTimeMillis();
|
start = System.currentTimeMillis();
|
||||||
for (int i=0; i<ITERS; i++) {
|
for (int i=0; i<ITERS; i++) {
|
||||||
test_pack2(p2, a1);
|
test_pack2(p2, a1);
|
||||||
@ -1287,6 +1298,12 @@ public class TestIntVect {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static void test_reverse(int [] a0, int [] a1) {
|
||||||
|
for(int i = 0; i < a0.length; i++) {
|
||||||
|
a0[i] = Integer.reverse(a1[i]);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
static int verify(String text, int i, int elem, int val) {
|
static int verify(String text, int i, int elem, int val) {
|
||||||
if (elem != val) {
|
if (elem != val) {
|
||||||
System.err.println(text + "[" + i + "] = " + elem + " != " + val);
|
System.err.println(text + "[" + i + "] = " + elem + " != " + val);
|
||||||
|
@ -436,6 +436,10 @@ public class TestLongVect {
|
|||||||
for (int i=0; i<ARRLEN; i++) {
|
for (int i=0; i<ARRLEN; i++) {
|
||||||
errn += verify("test_reverse_bytes: ", i, a0[i], Long.reverseBytes(a1[i]));
|
errn += verify("test_reverse_bytes: ", i, a0[i], Long.reverseBytes(a1[i]));
|
||||||
}
|
}
|
||||||
|
test_reverse(a0, a1);
|
||||||
|
for (int i=0; i<ARRLEN; i++) {
|
||||||
|
errn += verify("test_reverse: ", i, a0[i], Long.reverse(a1[i]));
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if (errn > 0)
|
if (errn > 0)
|
||||||
@ -863,6 +867,12 @@ public class TestLongVect {
|
|||||||
end = System.currentTimeMillis();
|
end = System.currentTimeMillis();
|
||||||
System.out.println("test_reverse_bytes: " + (end - start));
|
System.out.println("test_reverse_bytes: " + (end - start));
|
||||||
|
|
||||||
|
start = System.currentTimeMillis();
|
||||||
|
for (int i=0; i<ITERS; i++) {
|
||||||
|
test_reverse(a0, a1);
|
||||||
|
}
|
||||||
|
end = System.currentTimeMillis();
|
||||||
|
System.out.println("test_reverse: " + (end - start));
|
||||||
return errn;
|
return errn;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -1133,12 +1143,19 @@ public class TestLongVect {
|
|||||||
a0[i] = (long)((a1[i] & b)>>VALUE);
|
a0[i] = (long)((a1[i] & b)>>VALUE);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
static void test_reverse_bytes(long[] a0, long[] a1) {
|
static void test_reverse_bytes(long[] a0, long[] a1) {
|
||||||
for(int i = 0; i < a0.length; i++) {
|
for(int i = 0; i < a0.length; i++) {
|
||||||
a0[i] = Long.reverseBytes(a1[i]);
|
a0[i] = Long.reverseBytes(a1[i]);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static void test_reverse(long[] a0, long[] a1) {
|
||||||
|
for(int i = 0; i < a0.length; i++) {
|
||||||
|
a0[i] = Long.reverse(a1[i]);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
static int verify(String text, int i, long elem, long val) {
|
static int verify(String text, int i, long elem, long val) {
|
||||||
if (elem != val) {
|
if (elem != val) {
|
||||||
System.err.println(text + "[" + i + "] = " + elem + " != " + val);
|
System.err.println(text + "[" + i + "] = " + elem + " != " + val);
|
||||||
|
@ -0,0 +1,169 @@
|
|||||||
|
/*
|
||||||
|
* Copyright (c) 2022, Oracle and/or its affiliates. All rights reserved.
|
||||||
|
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
||||||
|
*
|
||||||
|
* This code is free software; you can redistribute it and/or modify it
|
||||||
|
* under the terms of the GNU General Public License version 2 only, as
|
||||||
|
* published by the Free Software Foundation.
|
||||||
|
*
|
||||||
|
* This code is distributed in the hope that it will be useful, but WITHOUT
|
||||||
|
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
||||||
|
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
|
||||||
|
* version 2 for more details (a copy is included in the LICENSE file that
|
||||||
|
* accompanied this code).
|
||||||
|
*
|
||||||
|
* You should have received a copy of the GNU General Public License version
|
||||||
|
* 2 along with this work; if not, write to the Free Software Foundation,
|
||||||
|
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||||
|
*
|
||||||
|
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
|
||||||
|
* or visit www.oracle.com if you need additional information or have any
|
||||||
|
* questions.
|
||||||
|
*/
|
||||||
|
/**
|
||||||
|
* @test
|
||||||
|
* @bug 8290034
|
||||||
|
* @summary Auto-vectorization of Reverse bit operation.
|
||||||
|
* @requires vm.compiler2.enabled
|
||||||
|
* @library /test/lib /
|
||||||
|
* @run driver compiler.vectorization.TestReverseBitsVector
|
||||||
|
*/
|
||||||
|
|
||||||
|
package compiler.vectorization;
|
||||||
|
|
||||||
|
import compiler.lib.ir_framework.*;
|
||||||
|
import java.util.Random;
|
||||||
|
|
||||||
|
public class TestReverseBitsVector {
|
||||||
|
private static final int ARRLEN = 1024;
|
||||||
|
private static final int ITERS = 11000;
|
||||||
|
|
||||||
|
private static long [] linp;
|
||||||
|
private static long [] lout;
|
||||||
|
private static int [] iinp;
|
||||||
|
private static int [] iout;
|
||||||
|
private static short [] sinp;
|
||||||
|
private static short [] sout;
|
||||||
|
private static char [] cinp;
|
||||||
|
private static char [] cout;
|
||||||
|
|
||||||
|
public static void setup() {
|
||||||
|
Random r = new Random(1024);
|
||||||
|
linp = new long[ARRLEN];
|
||||||
|
lout = new long[ARRLEN];
|
||||||
|
iinp = new int[ARRLEN];
|
||||||
|
iout = new int[ARRLEN];
|
||||||
|
sinp = new short[ARRLEN];
|
||||||
|
sout = new short[ARRLEN];
|
||||||
|
cinp = new char[ARRLEN];
|
||||||
|
cout = new char[ARRLEN];
|
||||||
|
for(int i = 0; i < ARRLEN; i++) {
|
||||||
|
linp[i] = r.nextLong();
|
||||||
|
iinp[i] = r.nextInt();
|
||||||
|
sinp[i] = (short)r.nextInt();
|
||||||
|
cinp[i] = (char)r.nextInt();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
public static void main(String args[]) {
|
||||||
|
setup();
|
||||||
|
TestFramework.runWithFlags("-XX:-TieredCompilation");
|
||||||
|
System.out.println("PASSED");
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
@IR(applyIfCPUFeature={"avx2", "true"}, counts = {"ReverseV" , " > 0 "})
|
||||||
|
public void test_reverse_long1(long[] lout, long[] linp) {
|
||||||
|
for (int i = 0; i < lout.length; i+=1) {
|
||||||
|
lout[i] = Long.reverse(linp[i]);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
@Run(test = {"test_reverse_long1"}, mode = RunMode.STANDALONE)
|
||||||
|
public void kernel_test_reverse_long1() {
|
||||||
|
setup();
|
||||||
|
for (int i = 0; i < ITERS; i++) {
|
||||||
|
test_reverse_long1(lout , linp);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
@IR(applyIfCPUFeature={"avx2", "true"}, failOn = {"ReverseV" , "ReverseL"})
|
||||||
|
public void test_reverse_long2(long[] lout, long[] linp) {
|
||||||
|
for (int i = 0; i < lout.length; i+=1) {
|
||||||
|
lout[i] = Long.reverse(Long.reverse(linp[i]));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
@Run(test = {"test_reverse_long2"}, mode = RunMode.STANDALONE)
|
||||||
|
public void kernel_test_reverse_long2() {
|
||||||
|
setup();
|
||||||
|
for (int i = 0; i < ITERS; i++) {
|
||||||
|
test_reverse_long2(lout , linp);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
@IR(applyIfCPUFeature={"avx2", "true"}, failOn = {"ReverseV" , "ReverseL"})
|
||||||
|
public void test_reverse_long3(long[] lout, long[] linp) {
|
||||||
|
for (int i = 0; i < lout.length; i+=1) {
|
||||||
|
lout[i] = Long.reverse(linp[i] ^ linp[i]);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
@Run(test = {"test_reverse_long3"}, mode = RunMode.STANDALONE)
|
||||||
|
public void kernel_test_reverse_long3() {
|
||||||
|
setup();
|
||||||
|
for (int i = 0; i < ITERS; i++) {
|
||||||
|
test_reverse_long3(lout , linp);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
@IR(applyIfCPUFeature={"avx2", "true"}, counts = {"ReverseV" , " > 0 "})
|
||||||
|
public void test_reverse_int1(int[] iout, int[] iinp) {
|
||||||
|
for (int i = 0; i < iout.length; i+=1) {
|
||||||
|
iout[i] = Integer.reverse(iinp[i]);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
@Run(test = {"test_reverse_int1"}, mode = RunMode.STANDALONE)
|
||||||
|
public void kernel_test_reverse_int1() {
|
||||||
|
setup();
|
||||||
|
for (int i = 0; i < ITERS; i++) {
|
||||||
|
test_reverse_int1(iout , iinp);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
@IR(applyIfCPUFeature={"avx2", "true"}, failOn = {"ReverseV" , "ReverseI"})
|
||||||
|
public void test_reverse_int2(int[] iout, int[] iinp) {
|
||||||
|
for (int i = 0; i < iout.length; i+=1) {
|
||||||
|
iout[i] = Integer.reverse(Integer.reverse(iinp[i]));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
@Run(test = {"test_reverse_int2"}, mode = RunMode.STANDALONE)
|
||||||
|
public void kernel_test_reverse_int2() {
|
||||||
|
setup();
|
||||||
|
for (int i = 0; i < ITERS; i++) {
|
||||||
|
test_reverse_int2(iout , iinp);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
@IR(applyIfCPUFeature={"avx2", "true"}, failOn = {"ReverseV" , "ReverseI"})
|
||||||
|
public void test_reverse_int3(int[] iout, int[] iinp) {
|
||||||
|
for (int i = 0; i < iout.length; i+=1) {
|
||||||
|
iout[i] = Integer.reverse(iinp[i] ^ iinp[i]);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
@Run(test = {"test_reverse_int3"}, mode = RunMode.STANDALONE)
|
||||||
|
public void kernel_test_reverse_int3() {
|
||||||
|
setup();
|
||||||
|
for (int i = 0; i < ITERS; i++) {
|
||||||
|
test_reverse_int3(iout , iinp);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
@ -173,4 +173,11 @@ public class Integers {
|
|||||||
res[i] = Integer.reverseBytes(intsSmall[i]);
|
res[i] = Integer.reverseBytes(intsSmall[i]);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Benchmark
|
||||||
|
public void reverse() {
|
||||||
|
for (int i = 0; i < size; i++) {
|
||||||
|
res[i] = Integer.reverse(intsSmall[i]);
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
@ -168,4 +168,11 @@ public class Longs {
|
|||||||
res[i] = Long.reverseBytes(longArraySmall[i]);
|
res[i] = Long.reverseBytes(longArraySmall[i]);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Benchmark
|
||||||
|
public void reverse() {
|
||||||
|
for (int i = 0; i < size; i++) {
|
||||||
|
res[i] = Long.reverse(longArraySmall[i]);
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
Loading…
x
Reference in New Issue
Block a user