8297172: Fix some issues of auto-vectorization of Long.bitCount/numberOfTrailingZeros/numberOfLeadingZeros()
Reviewed-by: kvn, thartmann
This commit is contained in:
parent
a61399854a
commit
4458de95f8
@ -132,6 +132,8 @@ source %{
|
||||
// Vector API intrinsics.
|
||||
if ((opcode == Op_VectorCastD2X && bt == T_INT) ||
|
||||
(opcode == Op_VectorCastL2X && bt == T_FLOAT) ||
|
||||
(opcode == Op_CountLeadingZerosV && bt == T_LONG) ||
|
||||
(opcode == Op_CountTrailingZerosV && bt == T_LONG) ||
|
||||
opcode == Op_AddReductionVD || opcode == Op_AddReductionVF ||
|
||||
opcode == Op_MulReductionVD || opcode == Op_MulReductionVF ||
|
||||
opcode == Op_MulVL) {
|
||||
@ -5672,7 +5674,6 @@ instruct vpopcountI(vReg dst, vReg src) %{
|
||||
// vector popcount - LONG
|
||||
|
||||
instruct vpopcountL(vReg dst, vReg src) %{
|
||||
predicate(Matcher::vector_element_basic_type(n) == T_LONG);
|
||||
match(Set dst (PopCountVL src));
|
||||
format %{ "vpopcountL $dst, $src" %}
|
||||
ins_encode %{
|
||||
@ -5688,32 +5689,6 @@ instruct vpopcountL(vReg dst, vReg src) %{
|
||||
ins_pipe(pipe_slow);
|
||||
%}
|
||||
|
||||
// If the PopCountVL is generated by auto-vectorization, the dst basic
|
||||
// type is T_INT. And once we have unified the type definition for
|
||||
// Vector API and auto-vectorization, this rule can be merged with
|
||||
// "vpopcountL" rule.
|
||||
|
||||
instruct vpopcountL_I(vReg dst, vReg src, vReg tmp) %{
|
||||
predicate(Matcher::vector_element_basic_type(n) == T_INT);
|
||||
match(Set dst (PopCountVL src));
|
||||
effect(TEMP_DEF dst, TEMP tmp);
|
||||
format %{ "vpopcountL_I $dst, $src\t# KILL $tmp" %}
|
||||
ins_encode %{
|
||||
if (UseSVE == 0) {
|
||||
__ cnt($dst$$FloatRegister, __ T16B, $src$$FloatRegister);
|
||||
__ uaddlp($dst$$FloatRegister, __ T16B, $dst$$FloatRegister);
|
||||
__ uaddlp($dst$$FloatRegister, __ T8H, $dst$$FloatRegister);
|
||||
__ uaddlp($dst$$FloatRegister, __ T4S, $dst$$FloatRegister);
|
||||
__ xtn($dst$$FloatRegister, __ T2S, $dst$$FloatRegister, __ T2D);
|
||||
} else {
|
||||
__ sve_cnt($dst$$FloatRegister, __ D, ptrue, $src$$FloatRegister);
|
||||
__ sve_vector_narrow($dst$$FloatRegister, __ S,
|
||||
$dst$$FloatRegister, __ D, $tmp$$FloatRegister);
|
||||
}
|
||||
%}
|
||||
ins_pipe(pipe_slow);
|
||||
%}
|
||||
|
||||
// vector popcount - predicated
|
||||
|
||||
instruct vpopcountI_masked(vReg dst_src, pRegGov pg) %{
|
||||
@ -5729,7 +5704,7 @@ instruct vpopcountI_masked(vReg dst_src, pRegGov pg) %{
|
||||
%}
|
||||
|
||||
instruct vpopcountL_masked(vReg dst_src, pRegGov pg) %{
|
||||
predicate(UseSVE > 0 && Matcher::vector_element_basic_type(n) == T_LONG);
|
||||
predicate(UseSVE > 0);
|
||||
match(Set dst_src (PopCountVL dst_src pg));
|
||||
format %{ "vpopcountL_masked $dst_src, $pg, $dst_src" %}
|
||||
ins_encode %{
|
||||
|
@ -122,6 +122,8 @@ source %{
|
||||
// Vector API intrinsics.
|
||||
if ((opcode == Op_VectorCastD2X && bt == T_INT) ||
|
||||
(opcode == Op_VectorCastL2X && bt == T_FLOAT) ||
|
||||
(opcode == Op_CountLeadingZerosV && bt == T_LONG) ||
|
||||
(opcode == Op_CountTrailingZerosV && bt == T_LONG) ||
|
||||
opcode == Op_AddReductionVD || opcode == Op_AddReductionVF ||
|
||||
opcode == Op_MulReductionVD || opcode == Op_MulReductionVF ||
|
||||
opcode == Op_MulVL) {
|
||||
@ -4055,7 +4057,6 @@ instruct vpopcountI(vReg dst, vReg src) %{
|
||||
// vector popcount - LONG
|
||||
|
||||
instruct vpopcountL(vReg dst, vReg src) %{
|
||||
predicate(Matcher::vector_element_basic_type(n) == T_LONG);
|
||||
match(Set dst (PopCountVL src));
|
||||
format %{ "vpopcountL $dst, $src" %}
|
||||
ins_encode %{
|
||||
@ -4071,37 +4072,11 @@ instruct vpopcountL(vReg dst, vReg src) %{
|
||||
ins_pipe(pipe_slow);
|
||||
%}
|
||||
|
||||
// If the PopCountVL is generated by auto-vectorization, the dst basic
|
||||
// type is T_INT. And once we have unified the type definition for
|
||||
// Vector API and auto-vectorization, this rule can be merged with
|
||||
// "vpopcountL" rule.
|
||||
|
||||
instruct vpopcountL_I(vReg dst, vReg src, vReg tmp) %{
|
||||
predicate(Matcher::vector_element_basic_type(n) == T_INT);
|
||||
match(Set dst (PopCountVL src));
|
||||
effect(TEMP_DEF dst, TEMP tmp);
|
||||
format %{ "vpopcountL_I $dst, $src\t# KILL $tmp" %}
|
||||
ins_encode %{
|
||||
if (UseSVE == 0) {
|
||||
__ cnt($dst$$FloatRegister, __ T16B, $src$$FloatRegister);
|
||||
__ uaddlp($dst$$FloatRegister, __ T16B, $dst$$FloatRegister);
|
||||
__ uaddlp($dst$$FloatRegister, __ T8H, $dst$$FloatRegister);
|
||||
__ uaddlp($dst$$FloatRegister, __ T4S, $dst$$FloatRegister);
|
||||
__ xtn($dst$$FloatRegister, __ T2S, $dst$$FloatRegister, __ T2D);
|
||||
} else {
|
||||
__ sve_cnt($dst$$FloatRegister, __ D, ptrue, $src$$FloatRegister);
|
||||
__ sve_vector_narrow($dst$$FloatRegister, __ S,
|
||||
$dst$$FloatRegister, __ D, $tmp$$FloatRegister);
|
||||
}
|
||||
%}
|
||||
ins_pipe(pipe_slow);
|
||||
%}
|
||||
|
||||
// vector popcount - predicated
|
||||
UNARY_OP_PREDICATE(vpopcountI, PopCountVI, sve_cnt)
|
||||
|
||||
instruct vpopcountL_masked(vReg dst_src, pRegGov pg) %{
|
||||
predicate(UseSVE > 0 && Matcher::vector_element_basic_type(n) == T_LONG);
|
||||
predicate(UseSVE > 0);
|
||||
match(Set dst_src (PopCountVL dst_src pg));
|
||||
format %{ "vpopcountL_masked $dst_src, $pg, $dst_src" %}
|
||||
ins_encode %{
|
||||
|
@ -8875,12 +8875,6 @@ instruct vpopcount_integral_reg_evex(vec dst, vec src) %{
|
||||
int vlen_enc = vector_length_encoding(this, $src);
|
||||
BasicType bt = Matcher::vector_element_basic_type(this, $src);
|
||||
__ vector_popcount_integral_evex(bt, $dst$$XMMRegister, $src$$XMMRegister, k0, true, vlen_enc);
|
||||
// TODO: Once auto-vectorizer supports ConvL2I operation, PopCountVL
|
||||
// should be succeeded by its corresponding vector IR and following
|
||||
// special handling should be removed.
|
||||
if (opcode == Op_PopCountVL && Matcher::vector_element_basic_type(this) == T_INT) {
|
||||
__ evpmovqd($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
|
||||
}
|
||||
%}
|
||||
ins_pipe( pipe_slow );
|
||||
%}
|
||||
@ -8911,18 +8905,6 @@ instruct vpopcount_avx_reg(vec dst, vec src, vec xtmp1, vec xtmp2, rRegP rtmp) %
|
||||
BasicType bt = Matcher::vector_element_basic_type(this, $src);
|
||||
__ vector_popcount_integral(bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
|
||||
$xtmp2$$XMMRegister, $rtmp$$Register, vlen_enc);
|
||||
// TODO: Once auto-vectorizer supports ConvL2I operation, PopCountVL
|
||||
// should be succeeded by its corresponding vector IR and following
|
||||
// special handling should be removed.
|
||||
if (opcode == Op_PopCountVL && Matcher::vector_element_basic_type(this) == T_INT) {
|
||||
if (VM_Version::supports_avx512vl()) {
|
||||
__ evpmovqd($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
|
||||
} else {
|
||||
assert(VM_Version::supports_avx2(), "");
|
||||
__ vpshufd($dst$$XMMRegister, $dst$$XMMRegister, 8, vlen_enc);
|
||||
__ vpermq($dst$$XMMRegister, $dst$$XMMRegister, 8, vlen_enc);
|
||||
}
|
||||
}
|
||||
%}
|
||||
ins_pipe( pipe_slow );
|
||||
%}
|
||||
@ -8939,15 +8921,8 @@ instruct vcount_trailing_zeros_reg_evex(vec dst, vec src, vec xtmp, rRegP rtmp)
|
||||
ins_encode %{
|
||||
int vlen_enc = vector_length_encoding(this, $src);
|
||||
BasicType bt = Matcher::vector_element_basic_type(this, $src);
|
||||
BasicType rbt = Matcher::vector_element_basic_type(this);
|
||||
__ vector_count_trailing_zeros_evex(bt, $dst$$XMMRegister, $src$$XMMRegister, xnoreg,
|
||||
xnoreg, xnoreg, $xtmp$$XMMRegister, k0, $rtmp$$Register, vlen_enc);
|
||||
// TODO: Once auto-vectorizer supports ConvL2I operation, CountTrailingZerosV
|
||||
// should be succeeded by its corresponding vector IR and following
|
||||
// special handling should be removed.
|
||||
if (bt == T_LONG && rbt == T_INT) {
|
||||
__ evpmovqd($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
|
||||
}
|
||||
%}
|
||||
ins_pipe( pipe_slow );
|
||||
%}
|
||||
@ -8993,17 +8968,8 @@ instruct vcount_trailing_zeros_reg_avx(vec dst, vec src, vec xtmp1, vec xtmp2, v
|
||||
ins_encode %{
|
||||
int vlen_enc = vector_length_encoding(this, $src);
|
||||
BasicType bt = Matcher::vector_element_basic_type(this, $src);
|
||||
BasicType rbt = Matcher::vector_element_basic_type(this);
|
||||
__ vector_count_trailing_zeros_avx(bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
|
||||
$xtmp2$$XMMRegister, $xtmp3$$XMMRegister, $rtmp$$Register, vlen_enc);
|
||||
// TODO: Once auto-vectorizer supports ConvL2I operation, PopCountVL
|
||||
// should be succeeded by its corresponding vector IR and following
|
||||
// special handling should be removed.
|
||||
if (bt == T_LONG && rbt == T_INT) {
|
||||
assert(VM_Version::supports_avx2(), "");
|
||||
__ vpshufd($dst$$XMMRegister, $dst$$XMMRegister, 8, vlen_enc);
|
||||
__ vpermq($dst$$XMMRegister, $dst$$XMMRegister, 8, vlen_enc);
|
||||
}
|
||||
%}
|
||||
ins_pipe( pipe_slow );
|
||||
%}
|
||||
@ -9408,15 +9374,8 @@ instruct vcount_leading_zeros_IL_reg_evex(vec dst, vec src) %{
|
||||
ins_encode %{
|
||||
int vlen_enc = vector_length_encoding(this, $src);
|
||||
BasicType bt = Matcher::vector_element_basic_type(this, $src);
|
||||
BasicType rbt = Matcher::vector_element_basic_type(this);
|
||||
__ vector_count_leading_zeros_evex(bt, $dst$$XMMRegister, $src$$XMMRegister, xnoreg,
|
||||
xnoreg, xnoreg, k0, noreg, true, vlen_enc);
|
||||
// TODO: Once auto-vectorizer supports ConvL2I operation, CountLeadingZerosV
|
||||
// should be succeeded by its corresponding vector IR and following
|
||||
// special handling should be removed.
|
||||
if (rbt == T_INT && bt == T_LONG) {
|
||||
__ evpmovqd($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
|
||||
}
|
||||
%}
|
||||
ins_pipe( pipe_slow );
|
||||
%}
|
||||
@ -9491,15 +9450,8 @@ instruct vcount_leading_zeros_reg_avx(vec dst, vec src, vec xtmp1, vec xtmp2, ve
|
||||
ins_encode %{
|
||||
int vlen_enc = vector_length_encoding(this, $src);
|
||||
BasicType bt = Matcher::vector_element_basic_type(this, $src);
|
||||
BasicType rbt = Matcher::vector_element_basic_type(this);
|
||||
__ vector_count_leading_zeros_avx(bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
|
||||
$xtmp2$$XMMRegister, $xtmp3$$XMMRegister, $rtmp$$Register, vlen_enc);
|
||||
// TODO: Once auto-vectorizer supports ConvL2I operation, CountLeadingZerosV
|
||||
// should be succeeded by its corresponding vector IR and following
|
||||
// special handling should be removed.
|
||||
if (rbt == T_INT && bt == T_LONG) {
|
||||
__ evpmovqd($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
|
||||
}
|
||||
%}
|
||||
ins_pipe( pipe_slow );
|
||||
%}
|
||||
|
@ -2079,6 +2079,14 @@ bool SuperWord::implemented(Node_List* p) {
|
||||
} else if (is_cmove_fp_opcode(opc)) {
|
||||
retValue = is_cmov_pack(p) && VectorNode::implemented(opc, size, velt_basic_type(p0));
|
||||
NOT_PRODUCT(if(retValue && is_trace_cmov()) {tty->print_cr("SWPointer::implemented: found cmove pack"); print_pack(p);})
|
||||
} else if (requires_long_to_int_conversion(opc)) {
|
||||
// Java API for Long.bitCount/numberOfLeadingZeros/numberOfTrailingZeros
|
||||
// returns int type, but Vector API for them returns long type. To unify
|
||||
// the implementation in backend, superword splits the vector implementation
|
||||
// for Java API into an execution node with long type plus another node
|
||||
// converting long to int.
|
||||
retValue = VectorNode::implemented(opc, size, T_LONG) &&
|
||||
VectorCastNode::implemented(Op_ConvL2I, size, T_LONG, T_INT);
|
||||
} else {
|
||||
// Vector unsigned right shift for signed subword types behaves differently
|
||||
// from Java Spec. But when the shift amount is a constant not greater than
|
||||
@ -2096,6 +2104,18 @@ bool SuperWord::implemented(Node_List* p) {
|
||||
bool SuperWord::is_cmov_pack(Node_List* p) {
|
||||
return _cmovev_kit.pack(p->at(0)) != NULL;
|
||||
}
|
||||
|
||||
bool SuperWord::requires_long_to_int_conversion(int opc) {
|
||||
switch(opc) {
|
||||
case Op_PopCountL:
|
||||
case Op_CountLeadingZerosL:
|
||||
case Op_CountTrailingZerosL:
|
||||
return true;
|
||||
default:
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
//------------------------------same_inputs--------------------------
|
||||
// For pack p, are all idx operands the same?
|
||||
bool SuperWord::same_inputs(Node_List* p, int idx) {
|
||||
@ -2666,16 +2686,28 @@ bool SuperWord::output() {
|
||||
opc == Op_AbsI || opc == Op_AbsL ||
|
||||
opc == Op_NegF || opc == Op_NegD ||
|
||||
opc == Op_RoundF || opc == Op_RoundD ||
|
||||
opc == Op_PopCountI || opc == Op_PopCountL ||
|
||||
opc == Op_ReverseBytesI || opc == Op_ReverseBytesL ||
|
||||
opc == Op_ReverseBytesUS || opc == Op_ReverseBytesS ||
|
||||
opc == Op_ReverseI || opc == Op_ReverseL ||
|
||||
opc == Op_CountLeadingZerosI || opc == Op_CountLeadingZerosL ||
|
||||
opc == Op_CountTrailingZerosI || opc == Op_CountTrailingZerosL) {
|
||||
opc == Op_PopCountI || opc == Op_CountLeadingZerosI ||
|
||||
opc == Op_CountTrailingZerosI) {
|
||||
assert(n->req() == 2, "only one input expected");
|
||||
Node* in = vector_opd(p, 1);
|
||||
vn = VectorNode::make(opc, in, NULL, vlen, velt_basic_type(n));
|
||||
vlen_in_bytes = vn->as_Vector()->length_in_bytes();
|
||||
} else if (requires_long_to_int_conversion(opc)) {
|
||||
// Java API for Long.bitCount/numberOfLeadingZeros/numberOfTrailingZeros
|
||||
// returns int type, but Vector API for them returns long type. To unify
|
||||
// the implementation in backend, superword splits the vector implementation
|
||||
// for Java API into an execution node with long type plus another node
|
||||
// converting long to int.
|
||||
assert(n->req() == 2, "only one input expected");
|
||||
Node* in = vector_opd(p, 1);
|
||||
Node* longval = VectorNode::make(opc, in, NULL, vlen, T_LONG);
|
||||
_igvn.register_new_node_with_optimizer(longval);
|
||||
_phase->set_ctrl(longval, _phase->get_ctrl(p->at(0)));
|
||||
vn = VectorCastNode::make(Op_VectorCastL2X, longval, T_INT, vlen);
|
||||
vlen_in_bytes = vn->as_Vector()->length_in_bytes();
|
||||
} else if (VectorNode::is_convert_opcode(opc)) {
|
||||
assert(n->req() == 2, "only one input expected");
|
||||
BasicType bt = velt_basic_type(n);
|
||||
@ -3198,27 +3230,11 @@ bool SuperWord::is_vector_use(Node* use, int u_idx) {
|
||||
return true;
|
||||
}
|
||||
|
||||
if (VectorNode::is_type_transition_long_to_int(use)) {
|
||||
// PopCountL/CountLeadingZerosL/CountTrailingZerosL takes long and produces
|
||||
// int - hence the special checks on alignment and size.
|
||||
if (u_pk->size() != d_pk->size()) {
|
||||
return false;
|
||||
}
|
||||
for (uint i = 0; i < MIN2(d_pk->size(), u_pk->size()); i++) {
|
||||
Node* ui = u_pk->at(i);
|
||||
Node* di = d_pk->at(i);
|
||||
if (alignment(ui) * 2 != alignment(di)) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
if (u_pk->size() != d_pk->size())
|
||||
return false;
|
||||
|
||||
if (longer_type_for_conversion(use) != T_ILLEGAL) {
|
||||
// type conversion takes a type of a kind of size and produces a type of
|
||||
// These opcodes take a type of a kind of size and produce a type of
|
||||
// another size - hence the special checks on alignment and size.
|
||||
for (uint i = 0; i < u_pk->size(); i++) {
|
||||
Node* ui = u_pk->at(i);
|
||||
@ -3467,7 +3483,8 @@ void SuperWord::compute_max_depth() {
|
||||
}
|
||||
|
||||
BasicType SuperWord::longer_type_for_conversion(Node* n) {
|
||||
if (!VectorNode::is_convert_opcode(n->Opcode()) ||
|
||||
if (!(VectorNode::is_convert_opcode(n->Opcode()) ||
|
||||
requires_long_to_int_conversion(n->Opcode())) ||
|
||||
!in_bb(n->in(1))) {
|
||||
return T_ILLEGAL;
|
||||
}
|
||||
|
@ -457,6 +457,7 @@ class SuperWord : public ResourceObj {
|
||||
bool is_cmov_pack(Node_List* p);
|
||||
bool is_cmov_pack_internal_node(Node_List* p, Node* nd) { return is_cmov_pack(p) && !nd->is_CMove(); }
|
||||
static bool is_cmove_fp_opcode(int opc) { return (opc == Op_CMoveF || opc == Op_CMoveD); }
|
||||
static bool requires_long_to_int_conversion(int opc);
|
||||
// For pack p, are all idx operands the same?
|
||||
bool same_inputs(Node_List* p, int idx);
|
||||
// CloneMap utilities
|
||||
|
@ -343,17 +343,6 @@ bool VectorNode::is_muladds2i(Node* n) {
|
||||
return false;
|
||||
}
|
||||
|
||||
bool VectorNode::is_type_transition_long_to_int(Node* n) {
|
||||
switch(n->Opcode()) {
|
||||
case Op_PopCountL:
|
||||
case Op_CountLeadingZerosL:
|
||||
case Op_CountTrailingZerosL:
|
||||
return true;
|
||||
default:
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
bool VectorNode::is_roundopD(Node* n) {
|
||||
if (n->Opcode() == Op_RoundDoubleMode) {
|
||||
return true;
|
||||
|
@ -99,7 +99,6 @@ class VectorNode : public TypeNode {
|
||||
static bool is_type_transition_short_to_int(Node* n);
|
||||
static bool is_type_transition_to_int(Node* n);
|
||||
static bool is_muladds2i(Node* n);
|
||||
static bool is_type_transition_long_to_int(Node* n);
|
||||
static bool is_roundopD(Node* n);
|
||||
static bool is_scalar_rotate(Node* n);
|
||||
static bool is_vector_rotate_supported(int opc, uint vlen, BasicType bt);
|
||||
@ -551,7 +550,9 @@ class PopCountVINode : public VectorNode {
|
||||
// Vector popcount long bits
|
||||
class PopCountVLNode : public VectorNode {
|
||||
public:
|
||||
PopCountVLNode(Node* in, const TypeVect* vt) : VectorNode(in,vt) {}
|
||||
PopCountVLNode(Node* in, const TypeVect* vt) : VectorNode(in,vt) {
|
||||
assert(vt->element_basic_type() == T_LONG, "must be long");
|
||||
}
|
||||
virtual int Opcode() const;
|
||||
};
|
||||
|
||||
@ -1732,7 +1733,10 @@ public:
|
||||
class CountLeadingZerosVNode : public VectorNode {
|
||||
public:
|
||||
CountLeadingZerosVNode(Node* in, const TypeVect* vt)
|
||||
: VectorNode(in, vt) {}
|
||||
: VectorNode(in, vt) {
|
||||
assert(in->bottom_type()->is_vect()->element_basic_type() == vt->element_basic_type(),
|
||||
"must be the same");
|
||||
}
|
||||
|
||||
virtual int Opcode() const;
|
||||
};
|
||||
@ -1740,7 +1744,10 @@ class CountLeadingZerosVNode : public VectorNode {
|
||||
class CountTrailingZerosVNode : public VectorNode {
|
||||
public:
|
||||
CountTrailingZerosVNode(Node* in, const TypeVect* vt)
|
||||
: VectorNode(in, vt) {}
|
||||
: VectorNode(in, vt) {
|
||||
assert(in->bottom_type()->is_vect()->element_basic_type() == vt->element_basic_type(),
|
||||
"must be the same");
|
||||
}
|
||||
|
||||
virtual int Opcode() const;
|
||||
};
|
||||
|
@ -116,4 +116,20 @@ public class TestDisableAutoVectOpcodes {
|
||||
dresult += result;
|
||||
}
|
||||
|
||||
@Test
|
||||
@IR(failOn = {IRNode.COUNTTRAILINGZEROS_VL})
|
||||
public void testNumberOfTrailingZeros() {
|
||||
for (int i = 0; i < SIZE; ++i) {
|
||||
inta[i] = Long.numberOfTrailingZeros(longa[i]);
|
||||
}
|
||||
}
|
||||
|
||||
@Test
|
||||
@IR(failOn = {IRNode.COUNTLEADINGZEROS_VL})
|
||||
public void testNumberOfLeadingZeros() {
|
||||
for (int i = 0; i < SIZE; ++i) {
|
||||
inta[i] = Long.numberOfLeadingZeros(longa[i]);
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
@ -714,6 +714,16 @@ public class IRNode {
|
||||
superWordNodes(POPCOUNT_VL, "PopCountVL");
|
||||
}
|
||||
|
||||
public static final String COUNTTRAILINGZEROS_VL = PREFIX + "COUNTTRAILINGZEROS_VL" + POSTFIX;
|
||||
static {
|
||||
superWordNodes(COUNTTRAILINGZEROS_VL, "CountTrailingZerosV");
|
||||
}
|
||||
|
||||
public static final String COUNTLEADINGZEROS_VL = PREFIX + "COUNTLEADINGZEROS_VL" + POSTFIX;
|
||||
static {
|
||||
superWordNodes(COUNTLEADINGZEROS_VL, "CountLeadingZerosV");
|
||||
}
|
||||
|
||||
public static final String POPULATE_INDEX = PREFIX + "POPULATE_INDEX" + POSTFIX;
|
||||
static {
|
||||
String regex = START + "PopulateIndex" + MID + END;
|
||||
|
@ -0,0 +1,88 @@
|
||||
/*
|
||||
* Copyright (c) 2022, Arm Limited. All rights reserved.
|
||||
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
||||
*
|
||||
* This code is free software; you can redistribute it and/or modify it
|
||||
* under the terms of the GNU General Public License version 2 only, as
|
||||
* published by the Free Software Foundation.
|
||||
*
|
||||
* This code is distributed in the hope that it will be useful, but WITHOUT
|
||||
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
||||
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
|
||||
* version 2 for more details (a copy is included in the LICENSE file that
|
||||
* accompanied this code).
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License version
|
||||
* 2 along with this work; if not, write to the Free Software Foundation,
|
||||
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
*
|
||||
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
|
||||
* or visit www.oracle.com if you need additional information or have any
|
||||
* questions.
|
||||
*/
|
||||
|
||||
/**
|
||||
* @test
|
||||
* @key randomness
|
||||
* @summary Test vectorization of numberOfTrailingZeros/numberOfLeadingZeros for Long
|
||||
* @requires vm.compiler2.enabled
|
||||
* @requires (os.simpleArch == "x64" & vm.cpu.features ~= ".*avx2.*") |
|
||||
* (os.simpleArch == "aarch64" & vm.cpu.features ~= ".*sve.*" & (vm.opt.UseSVE == "null" | vm.opt.UseSVE > 0))
|
||||
* @library /test/lib /
|
||||
* @run driver compiler.vectorization.TestNumberOfContinuousZeros
|
||||
*/
|
||||
|
||||
package compiler.vectorization;
|
||||
|
||||
import compiler.lib.ir_framework.*;
|
||||
import java.util.Random;
|
||||
import jdk.test.lib.Asserts;
|
||||
|
||||
public class TestNumberOfContinuousZeros {
|
||||
private long[] input;
|
||||
private int[] output;
|
||||
private static final int LEN = 1024;
|
||||
private Random rng;
|
||||
|
||||
public static void main(String args[]) {
|
||||
TestFramework.run();
|
||||
}
|
||||
|
||||
public TestNumberOfContinuousZeros() {
|
||||
input = new long[LEN];
|
||||
output = new int[LEN];
|
||||
rng = new Random(42);
|
||||
for (int i = 0; i < LEN; ++i) {
|
||||
input[i] = rng.nextLong();
|
||||
}
|
||||
}
|
||||
|
||||
@Test
|
||||
@IR(counts = {IRNode.COUNTTRAILINGZEROS_VL, "> 0"})
|
||||
public void vectorizeNumberOfTrailingZeros() {
|
||||
for (int i = 0; i < LEN; ++i) {
|
||||
output[i] = Long.numberOfTrailingZeros(input[i]);
|
||||
}
|
||||
}
|
||||
|
||||
@Test
|
||||
@IR(counts = {IRNode.COUNTLEADINGZEROS_VL, "> 0"})
|
||||
public void vectorizeNumberOfLeadingZeros() {
|
||||
for (int i = 0; i < LEN; ++i) {
|
||||
output[i] = Long.numberOfLeadingZeros(input[i]);
|
||||
}
|
||||
}
|
||||
|
||||
@Run(test = {"vectorizeNumberOfTrailingZeros", "vectorizeNumberOfLeadingZeros"})
|
||||
public void checkResult() {
|
||||
vectorizeNumberOfTrailingZeros();
|
||||
for (int i = 0; i < LEN; ++i) {
|
||||
Asserts.assertEquals(output[i], Long.numberOfTrailingZeros(input[i]));
|
||||
}
|
||||
vectorizeNumberOfLeadingZeros();
|
||||
for (int i = 0; i < LEN; ++i) {
|
||||
Asserts.assertEquals(output[i], Long.numberOfLeadingZeros(input[i]));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -25,8 +25,8 @@
|
||||
* @test
|
||||
* @summary Test vectorization of popcount for Long
|
||||
* @requires vm.compiler2.enabled
|
||||
* @requires vm.cpu.features ~= ".*avx512bw.*" | (vm.cpu.features ~= ".*sve.*" & (vm.opt.UseSVE == "null" | vm.opt.UseSVE > 0))
|
||||
* @requires os.arch=="x86" | os.arch=="i386" | os.arch=="amd64" | os.arch=="x86_64" | os.arch=="aarch64"
|
||||
* @requires ((os.arch=="x86" | os.arch=="i386" | os.arch=="amd64" | os.arch=="x86_64") & vm.cpu.features ~= ".*avx512bw.*") |
|
||||
* os.simpleArch == "aarch64"
|
||||
* @library /test/lib /
|
||||
* @run driver compiler.vectorization.TestPopCountVectorLong
|
||||
*/
|
||||
|
Loading…
x
Reference in New Issue
Block a user