8297172: Fix some issues of auto-vectorization of Long.bitCount/numberOfTrailingZeros/numberOfLeadingZeros()
Reviewed-by: kvn, thartmann
This commit is contained in:
parent
a61399854a
commit
4458de95f8
@ -132,6 +132,8 @@ source %{
|
|||||||
// Vector API intrinsics.
|
// Vector API intrinsics.
|
||||||
if ((opcode == Op_VectorCastD2X && bt == T_INT) ||
|
if ((opcode == Op_VectorCastD2X && bt == T_INT) ||
|
||||||
(opcode == Op_VectorCastL2X && bt == T_FLOAT) ||
|
(opcode == Op_VectorCastL2X && bt == T_FLOAT) ||
|
||||||
|
(opcode == Op_CountLeadingZerosV && bt == T_LONG) ||
|
||||||
|
(opcode == Op_CountTrailingZerosV && bt == T_LONG) ||
|
||||||
opcode == Op_AddReductionVD || opcode == Op_AddReductionVF ||
|
opcode == Op_AddReductionVD || opcode == Op_AddReductionVF ||
|
||||||
opcode == Op_MulReductionVD || opcode == Op_MulReductionVF ||
|
opcode == Op_MulReductionVD || opcode == Op_MulReductionVF ||
|
||||||
opcode == Op_MulVL) {
|
opcode == Op_MulVL) {
|
||||||
@ -5672,7 +5674,6 @@ instruct vpopcountI(vReg dst, vReg src) %{
|
|||||||
// vector popcount - LONG
|
// vector popcount - LONG
|
||||||
|
|
||||||
instruct vpopcountL(vReg dst, vReg src) %{
|
instruct vpopcountL(vReg dst, vReg src) %{
|
||||||
predicate(Matcher::vector_element_basic_type(n) == T_LONG);
|
|
||||||
match(Set dst (PopCountVL src));
|
match(Set dst (PopCountVL src));
|
||||||
format %{ "vpopcountL $dst, $src" %}
|
format %{ "vpopcountL $dst, $src" %}
|
||||||
ins_encode %{
|
ins_encode %{
|
||||||
@ -5688,32 +5689,6 @@ instruct vpopcountL(vReg dst, vReg src) %{
|
|||||||
ins_pipe(pipe_slow);
|
ins_pipe(pipe_slow);
|
||||||
%}
|
%}
|
||||||
|
|
||||||
// If the PopCountVL is generated by auto-vectorization, the dst basic
|
|
||||||
// type is T_INT. And once we have unified the type definition for
|
|
||||||
// Vector API and auto-vectorization, this rule can be merged with
|
|
||||||
// "vpopcountL" rule.
|
|
||||||
|
|
||||||
instruct vpopcountL_I(vReg dst, vReg src, vReg tmp) %{
|
|
||||||
predicate(Matcher::vector_element_basic_type(n) == T_INT);
|
|
||||||
match(Set dst (PopCountVL src));
|
|
||||||
effect(TEMP_DEF dst, TEMP tmp);
|
|
||||||
format %{ "vpopcountL_I $dst, $src\t# KILL $tmp" %}
|
|
||||||
ins_encode %{
|
|
||||||
if (UseSVE == 0) {
|
|
||||||
__ cnt($dst$$FloatRegister, __ T16B, $src$$FloatRegister);
|
|
||||||
__ uaddlp($dst$$FloatRegister, __ T16B, $dst$$FloatRegister);
|
|
||||||
__ uaddlp($dst$$FloatRegister, __ T8H, $dst$$FloatRegister);
|
|
||||||
__ uaddlp($dst$$FloatRegister, __ T4S, $dst$$FloatRegister);
|
|
||||||
__ xtn($dst$$FloatRegister, __ T2S, $dst$$FloatRegister, __ T2D);
|
|
||||||
} else {
|
|
||||||
__ sve_cnt($dst$$FloatRegister, __ D, ptrue, $src$$FloatRegister);
|
|
||||||
__ sve_vector_narrow($dst$$FloatRegister, __ S,
|
|
||||||
$dst$$FloatRegister, __ D, $tmp$$FloatRegister);
|
|
||||||
}
|
|
||||||
%}
|
|
||||||
ins_pipe(pipe_slow);
|
|
||||||
%}
|
|
||||||
|
|
||||||
// vector popcount - predicated
|
// vector popcount - predicated
|
||||||
|
|
||||||
instruct vpopcountI_masked(vReg dst_src, pRegGov pg) %{
|
instruct vpopcountI_masked(vReg dst_src, pRegGov pg) %{
|
||||||
@ -5729,7 +5704,7 @@ instruct vpopcountI_masked(vReg dst_src, pRegGov pg) %{
|
|||||||
%}
|
%}
|
||||||
|
|
||||||
instruct vpopcountL_masked(vReg dst_src, pRegGov pg) %{
|
instruct vpopcountL_masked(vReg dst_src, pRegGov pg) %{
|
||||||
predicate(UseSVE > 0 && Matcher::vector_element_basic_type(n) == T_LONG);
|
predicate(UseSVE > 0);
|
||||||
match(Set dst_src (PopCountVL dst_src pg));
|
match(Set dst_src (PopCountVL dst_src pg));
|
||||||
format %{ "vpopcountL_masked $dst_src, $pg, $dst_src" %}
|
format %{ "vpopcountL_masked $dst_src, $pg, $dst_src" %}
|
||||||
ins_encode %{
|
ins_encode %{
|
||||||
|
@ -122,6 +122,8 @@ source %{
|
|||||||
// Vector API intrinsics.
|
// Vector API intrinsics.
|
||||||
if ((opcode == Op_VectorCastD2X && bt == T_INT) ||
|
if ((opcode == Op_VectorCastD2X && bt == T_INT) ||
|
||||||
(opcode == Op_VectorCastL2X && bt == T_FLOAT) ||
|
(opcode == Op_VectorCastL2X && bt == T_FLOAT) ||
|
||||||
|
(opcode == Op_CountLeadingZerosV && bt == T_LONG) ||
|
||||||
|
(opcode == Op_CountTrailingZerosV && bt == T_LONG) ||
|
||||||
opcode == Op_AddReductionVD || opcode == Op_AddReductionVF ||
|
opcode == Op_AddReductionVD || opcode == Op_AddReductionVF ||
|
||||||
opcode == Op_MulReductionVD || opcode == Op_MulReductionVF ||
|
opcode == Op_MulReductionVD || opcode == Op_MulReductionVF ||
|
||||||
opcode == Op_MulVL) {
|
opcode == Op_MulVL) {
|
||||||
@ -4055,7 +4057,6 @@ instruct vpopcountI(vReg dst, vReg src) %{
|
|||||||
// vector popcount - LONG
|
// vector popcount - LONG
|
||||||
|
|
||||||
instruct vpopcountL(vReg dst, vReg src) %{
|
instruct vpopcountL(vReg dst, vReg src) %{
|
||||||
predicate(Matcher::vector_element_basic_type(n) == T_LONG);
|
|
||||||
match(Set dst (PopCountVL src));
|
match(Set dst (PopCountVL src));
|
||||||
format %{ "vpopcountL $dst, $src" %}
|
format %{ "vpopcountL $dst, $src" %}
|
||||||
ins_encode %{
|
ins_encode %{
|
||||||
@ -4071,37 +4072,11 @@ instruct vpopcountL(vReg dst, vReg src) %{
|
|||||||
ins_pipe(pipe_slow);
|
ins_pipe(pipe_slow);
|
||||||
%}
|
%}
|
||||||
|
|
||||||
// If the PopCountVL is generated by auto-vectorization, the dst basic
|
|
||||||
// type is T_INT. And once we have unified the type definition for
|
|
||||||
// Vector API and auto-vectorization, this rule can be merged with
|
|
||||||
// "vpopcountL" rule.
|
|
||||||
|
|
||||||
instruct vpopcountL_I(vReg dst, vReg src, vReg tmp) %{
|
|
||||||
predicate(Matcher::vector_element_basic_type(n) == T_INT);
|
|
||||||
match(Set dst (PopCountVL src));
|
|
||||||
effect(TEMP_DEF dst, TEMP tmp);
|
|
||||||
format %{ "vpopcountL_I $dst, $src\t# KILL $tmp" %}
|
|
||||||
ins_encode %{
|
|
||||||
if (UseSVE == 0) {
|
|
||||||
__ cnt($dst$$FloatRegister, __ T16B, $src$$FloatRegister);
|
|
||||||
__ uaddlp($dst$$FloatRegister, __ T16B, $dst$$FloatRegister);
|
|
||||||
__ uaddlp($dst$$FloatRegister, __ T8H, $dst$$FloatRegister);
|
|
||||||
__ uaddlp($dst$$FloatRegister, __ T4S, $dst$$FloatRegister);
|
|
||||||
__ xtn($dst$$FloatRegister, __ T2S, $dst$$FloatRegister, __ T2D);
|
|
||||||
} else {
|
|
||||||
__ sve_cnt($dst$$FloatRegister, __ D, ptrue, $src$$FloatRegister);
|
|
||||||
__ sve_vector_narrow($dst$$FloatRegister, __ S,
|
|
||||||
$dst$$FloatRegister, __ D, $tmp$$FloatRegister);
|
|
||||||
}
|
|
||||||
%}
|
|
||||||
ins_pipe(pipe_slow);
|
|
||||||
%}
|
|
||||||
|
|
||||||
// vector popcount - predicated
|
// vector popcount - predicated
|
||||||
UNARY_OP_PREDICATE(vpopcountI, PopCountVI, sve_cnt)
|
UNARY_OP_PREDICATE(vpopcountI, PopCountVI, sve_cnt)
|
||||||
|
|
||||||
instruct vpopcountL_masked(vReg dst_src, pRegGov pg) %{
|
instruct vpopcountL_masked(vReg dst_src, pRegGov pg) %{
|
||||||
predicate(UseSVE > 0 && Matcher::vector_element_basic_type(n) == T_LONG);
|
predicate(UseSVE > 0);
|
||||||
match(Set dst_src (PopCountVL dst_src pg));
|
match(Set dst_src (PopCountVL dst_src pg));
|
||||||
format %{ "vpopcountL_masked $dst_src, $pg, $dst_src" %}
|
format %{ "vpopcountL_masked $dst_src, $pg, $dst_src" %}
|
||||||
ins_encode %{
|
ins_encode %{
|
||||||
|
@ -8875,12 +8875,6 @@ instruct vpopcount_integral_reg_evex(vec dst, vec src) %{
|
|||||||
int vlen_enc = vector_length_encoding(this, $src);
|
int vlen_enc = vector_length_encoding(this, $src);
|
||||||
BasicType bt = Matcher::vector_element_basic_type(this, $src);
|
BasicType bt = Matcher::vector_element_basic_type(this, $src);
|
||||||
__ vector_popcount_integral_evex(bt, $dst$$XMMRegister, $src$$XMMRegister, k0, true, vlen_enc);
|
__ vector_popcount_integral_evex(bt, $dst$$XMMRegister, $src$$XMMRegister, k0, true, vlen_enc);
|
||||||
// TODO: Once auto-vectorizer supports ConvL2I operation, PopCountVL
|
|
||||||
// should be succeeded by its corresponding vector IR and following
|
|
||||||
// special handling should be removed.
|
|
||||||
if (opcode == Op_PopCountVL && Matcher::vector_element_basic_type(this) == T_INT) {
|
|
||||||
__ evpmovqd($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
|
|
||||||
}
|
|
||||||
%}
|
%}
|
||||||
ins_pipe( pipe_slow );
|
ins_pipe( pipe_slow );
|
||||||
%}
|
%}
|
||||||
@ -8911,18 +8905,6 @@ instruct vpopcount_avx_reg(vec dst, vec src, vec xtmp1, vec xtmp2, rRegP rtmp) %
|
|||||||
BasicType bt = Matcher::vector_element_basic_type(this, $src);
|
BasicType bt = Matcher::vector_element_basic_type(this, $src);
|
||||||
__ vector_popcount_integral(bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
|
__ vector_popcount_integral(bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
|
||||||
$xtmp2$$XMMRegister, $rtmp$$Register, vlen_enc);
|
$xtmp2$$XMMRegister, $rtmp$$Register, vlen_enc);
|
||||||
// TODO: Once auto-vectorizer supports ConvL2I operation, PopCountVL
|
|
||||||
// should be succeeded by its corresponding vector IR and following
|
|
||||||
// special handling should be removed.
|
|
||||||
if (opcode == Op_PopCountVL && Matcher::vector_element_basic_type(this) == T_INT) {
|
|
||||||
if (VM_Version::supports_avx512vl()) {
|
|
||||||
__ evpmovqd($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
|
|
||||||
} else {
|
|
||||||
assert(VM_Version::supports_avx2(), "");
|
|
||||||
__ vpshufd($dst$$XMMRegister, $dst$$XMMRegister, 8, vlen_enc);
|
|
||||||
__ vpermq($dst$$XMMRegister, $dst$$XMMRegister, 8, vlen_enc);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
%}
|
%}
|
||||||
ins_pipe( pipe_slow );
|
ins_pipe( pipe_slow );
|
||||||
%}
|
%}
|
||||||
@ -8939,15 +8921,8 @@ instruct vcount_trailing_zeros_reg_evex(vec dst, vec src, vec xtmp, rRegP rtmp)
|
|||||||
ins_encode %{
|
ins_encode %{
|
||||||
int vlen_enc = vector_length_encoding(this, $src);
|
int vlen_enc = vector_length_encoding(this, $src);
|
||||||
BasicType bt = Matcher::vector_element_basic_type(this, $src);
|
BasicType bt = Matcher::vector_element_basic_type(this, $src);
|
||||||
BasicType rbt = Matcher::vector_element_basic_type(this);
|
|
||||||
__ vector_count_trailing_zeros_evex(bt, $dst$$XMMRegister, $src$$XMMRegister, xnoreg,
|
__ vector_count_trailing_zeros_evex(bt, $dst$$XMMRegister, $src$$XMMRegister, xnoreg,
|
||||||
xnoreg, xnoreg, $xtmp$$XMMRegister, k0, $rtmp$$Register, vlen_enc);
|
xnoreg, xnoreg, $xtmp$$XMMRegister, k0, $rtmp$$Register, vlen_enc);
|
||||||
// TODO: Once auto-vectorizer supports ConvL2I operation, CountTrailingZerosV
|
|
||||||
// should be succeeded by its corresponding vector IR and following
|
|
||||||
// special handling should be removed.
|
|
||||||
if (bt == T_LONG && rbt == T_INT) {
|
|
||||||
__ evpmovqd($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
|
|
||||||
}
|
|
||||||
%}
|
%}
|
||||||
ins_pipe( pipe_slow );
|
ins_pipe( pipe_slow );
|
||||||
%}
|
%}
|
||||||
@ -8993,17 +8968,8 @@ instruct vcount_trailing_zeros_reg_avx(vec dst, vec src, vec xtmp1, vec xtmp2, v
|
|||||||
ins_encode %{
|
ins_encode %{
|
||||||
int vlen_enc = vector_length_encoding(this, $src);
|
int vlen_enc = vector_length_encoding(this, $src);
|
||||||
BasicType bt = Matcher::vector_element_basic_type(this, $src);
|
BasicType bt = Matcher::vector_element_basic_type(this, $src);
|
||||||
BasicType rbt = Matcher::vector_element_basic_type(this);
|
|
||||||
__ vector_count_trailing_zeros_avx(bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
|
__ vector_count_trailing_zeros_avx(bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
|
||||||
$xtmp2$$XMMRegister, $xtmp3$$XMMRegister, $rtmp$$Register, vlen_enc);
|
$xtmp2$$XMMRegister, $xtmp3$$XMMRegister, $rtmp$$Register, vlen_enc);
|
||||||
// TODO: Once auto-vectorizer supports ConvL2I operation, PopCountVL
|
|
||||||
// should be succeeded by its corresponding vector IR and following
|
|
||||||
// special handling should be removed.
|
|
||||||
if (bt == T_LONG && rbt == T_INT) {
|
|
||||||
assert(VM_Version::supports_avx2(), "");
|
|
||||||
__ vpshufd($dst$$XMMRegister, $dst$$XMMRegister, 8, vlen_enc);
|
|
||||||
__ vpermq($dst$$XMMRegister, $dst$$XMMRegister, 8, vlen_enc);
|
|
||||||
}
|
|
||||||
%}
|
%}
|
||||||
ins_pipe( pipe_slow );
|
ins_pipe( pipe_slow );
|
||||||
%}
|
%}
|
||||||
@ -9408,15 +9374,8 @@ instruct vcount_leading_zeros_IL_reg_evex(vec dst, vec src) %{
|
|||||||
ins_encode %{
|
ins_encode %{
|
||||||
int vlen_enc = vector_length_encoding(this, $src);
|
int vlen_enc = vector_length_encoding(this, $src);
|
||||||
BasicType bt = Matcher::vector_element_basic_type(this, $src);
|
BasicType bt = Matcher::vector_element_basic_type(this, $src);
|
||||||
BasicType rbt = Matcher::vector_element_basic_type(this);
|
|
||||||
__ vector_count_leading_zeros_evex(bt, $dst$$XMMRegister, $src$$XMMRegister, xnoreg,
|
__ vector_count_leading_zeros_evex(bt, $dst$$XMMRegister, $src$$XMMRegister, xnoreg,
|
||||||
xnoreg, xnoreg, k0, noreg, true, vlen_enc);
|
xnoreg, xnoreg, k0, noreg, true, vlen_enc);
|
||||||
// TODO: Once auto-vectorizer supports ConvL2I operation, CountLeadingZerosV
|
|
||||||
// should be succeeded by its corresponding vector IR and following
|
|
||||||
// special handling should be removed.
|
|
||||||
if (rbt == T_INT && bt == T_LONG) {
|
|
||||||
__ evpmovqd($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
|
|
||||||
}
|
|
||||||
%}
|
%}
|
||||||
ins_pipe( pipe_slow );
|
ins_pipe( pipe_slow );
|
||||||
%}
|
%}
|
||||||
@ -9491,15 +9450,8 @@ instruct vcount_leading_zeros_reg_avx(vec dst, vec src, vec xtmp1, vec xtmp2, ve
|
|||||||
ins_encode %{
|
ins_encode %{
|
||||||
int vlen_enc = vector_length_encoding(this, $src);
|
int vlen_enc = vector_length_encoding(this, $src);
|
||||||
BasicType bt = Matcher::vector_element_basic_type(this, $src);
|
BasicType bt = Matcher::vector_element_basic_type(this, $src);
|
||||||
BasicType rbt = Matcher::vector_element_basic_type(this);
|
|
||||||
__ vector_count_leading_zeros_avx(bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
|
__ vector_count_leading_zeros_avx(bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
|
||||||
$xtmp2$$XMMRegister, $xtmp3$$XMMRegister, $rtmp$$Register, vlen_enc);
|
$xtmp2$$XMMRegister, $xtmp3$$XMMRegister, $rtmp$$Register, vlen_enc);
|
||||||
// TODO: Once auto-vectorizer supports ConvL2I operation, CountLeadingZerosV
|
|
||||||
// should be succeeded by its corresponding vector IR and following
|
|
||||||
// special handling should be removed.
|
|
||||||
if (rbt == T_INT && bt == T_LONG) {
|
|
||||||
__ evpmovqd($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
|
|
||||||
}
|
|
||||||
%}
|
%}
|
||||||
ins_pipe( pipe_slow );
|
ins_pipe( pipe_slow );
|
||||||
%}
|
%}
|
||||||
|
@ -2079,6 +2079,14 @@ bool SuperWord::implemented(Node_List* p) {
|
|||||||
} else if (is_cmove_fp_opcode(opc)) {
|
} else if (is_cmove_fp_opcode(opc)) {
|
||||||
retValue = is_cmov_pack(p) && VectorNode::implemented(opc, size, velt_basic_type(p0));
|
retValue = is_cmov_pack(p) && VectorNode::implemented(opc, size, velt_basic_type(p0));
|
||||||
NOT_PRODUCT(if(retValue && is_trace_cmov()) {tty->print_cr("SWPointer::implemented: found cmove pack"); print_pack(p);})
|
NOT_PRODUCT(if(retValue && is_trace_cmov()) {tty->print_cr("SWPointer::implemented: found cmove pack"); print_pack(p);})
|
||||||
|
} else if (requires_long_to_int_conversion(opc)) {
|
||||||
|
// Java API for Long.bitCount/numberOfLeadingZeros/numberOfTrailingZeros
|
||||||
|
// returns int type, but Vector API for them returns long type. To unify
|
||||||
|
// the implementation in backend, superword splits the vector implementation
|
||||||
|
// for Java API into an execution node with long type plus another node
|
||||||
|
// converting long to int.
|
||||||
|
retValue = VectorNode::implemented(opc, size, T_LONG) &&
|
||||||
|
VectorCastNode::implemented(Op_ConvL2I, size, T_LONG, T_INT);
|
||||||
} else {
|
} else {
|
||||||
// Vector unsigned right shift for signed subword types behaves differently
|
// Vector unsigned right shift for signed subword types behaves differently
|
||||||
// from Java Spec. But when the shift amount is a constant not greater than
|
// from Java Spec. But when the shift amount is a constant not greater than
|
||||||
@ -2096,6 +2104,18 @@ bool SuperWord::implemented(Node_List* p) {
|
|||||||
bool SuperWord::is_cmov_pack(Node_List* p) {
|
bool SuperWord::is_cmov_pack(Node_List* p) {
|
||||||
return _cmovev_kit.pack(p->at(0)) != NULL;
|
return _cmovev_kit.pack(p->at(0)) != NULL;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
bool SuperWord::requires_long_to_int_conversion(int opc) {
|
||||||
|
switch(opc) {
|
||||||
|
case Op_PopCountL:
|
||||||
|
case Op_CountLeadingZerosL:
|
||||||
|
case Op_CountTrailingZerosL:
|
||||||
|
return true;
|
||||||
|
default:
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
//------------------------------same_inputs--------------------------
|
//------------------------------same_inputs--------------------------
|
||||||
// For pack p, are all idx operands the same?
|
// For pack p, are all idx operands the same?
|
||||||
bool SuperWord::same_inputs(Node_List* p, int idx) {
|
bool SuperWord::same_inputs(Node_List* p, int idx) {
|
||||||
@ -2666,16 +2686,28 @@ bool SuperWord::output() {
|
|||||||
opc == Op_AbsI || opc == Op_AbsL ||
|
opc == Op_AbsI || opc == Op_AbsL ||
|
||||||
opc == Op_NegF || opc == Op_NegD ||
|
opc == Op_NegF || opc == Op_NegD ||
|
||||||
opc == Op_RoundF || opc == Op_RoundD ||
|
opc == Op_RoundF || opc == Op_RoundD ||
|
||||||
opc == Op_PopCountI || opc == Op_PopCountL ||
|
|
||||||
opc == Op_ReverseBytesI || opc == Op_ReverseBytesL ||
|
opc == Op_ReverseBytesI || opc == Op_ReverseBytesL ||
|
||||||
opc == Op_ReverseBytesUS || opc == Op_ReverseBytesS ||
|
opc == Op_ReverseBytesUS || opc == Op_ReverseBytesS ||
|
||||||
opc == Op_ReverseI || opc == Op_ReverseL ||
|
opc == Op_ReverseI || opc == Op_ReverseL ||
|
||||||
opc == Op_CountLeadingZerosI || opc == Op_CountLeadingZerosL ||
|
opc == Op_PopCountI || opc == Op_CountLeadingZerosI ||
|
||||||
opc == Op_CountTrailingZerosI || opc == Op_CountTrailingZerosL) {
|
opc == Op_CountTrailingZerosI) {
|
||||||
assert(n->req() == 2, "only one input expected");
|
assert(n->req() == 2, "only one input expected");
|
||||||
Node* in = vector_opd(p, 1);
|
Node* in = vector_opd(p, 1);
|
||||||
vn = VectorNode::make(opc, in, NULL, vlen, velt_basic_type(n));
|
vn = VectorNode::make(opc, in, NULL, vlen, velt_basic_type(n));
|
||||||
vlen_in_bytes = vn->as_Vector()->length_in_bytes();
|
vlen_in_bytes = vn->as_Vector()->length_in_bytes();
|
||||||
|
} else if (requires_long_to_int_conversion(opc)) {
|
||||||
|
// Java API for Long.bitCount/numberOfLeadingZeros/numberOfTrailingZeros
|
||||||
|
// returns int type, but Vector API for them returns long type. To unify
|
||||||
|
// the implementation in backend, superword splits the vector implementation
|
||||||
|
// for Java API into an execution node with long type plus another node
|
||||||
|
// converting long to int.
|
||||||
|
assert(n->req() == 2, "only one input expected");
|
||||||
|
Node* in = vector_opd(p, 1);
|
||||||
|
Node* longval = VectorNode::make(opc, in, NULL, vlen, T_LONG);
|
||||||
|
_igvn.register_new_node_with_optimizer(longval);
|
||||||
|
_phase->set_ctrl(longval, _phase->get_ctrl(p->at(0)));
|
||||||
|
vn = VectorCastNode::make(Op_VectorCastL2X, longval, T_INT, vlen);
|
||||||
|
vlen_in_bytes = vn->as_Vector()->length_in_bytes();
|
||||||
} else if (VectorNode::is_convert_opcode(opc)) {
|
} else if (VectorNode::is_convert_opcode(opc)) {
|
||||||
assert(n->req() == 2, "only one input expected");
|
assert(n->req() == 2, "only one input expected");
|
||||||
BasicType bt = velt_basic_type(n);
|
BasicType bt = velt_basic_type(n);
|
||||||
@ -3198,27 +3230,11 @@ bool SuperWord::is_vector_use(Node* use, int u_idx) {
|
|||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (VectorNode::is_type_transition_long_to_int(use)) {
|
|
||||||
// PopCountL/CountLeadingZerosL/CountTrailingZerosL takes long and produces
|
|
||||||
// int - hence the special checks on alignment and size.
|
|
||||||
if (u_pk->size() != d_pk->size()) {
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
for (uint i = 0; i < MIN2(d_pk->size(), u_pk->size()); i++) {
|
|
||||||
Node* ui = u_pk->at(i);
|
|
||||||
Node* di = d_pk->at(i);
|
|
||||||
if (alignment(ui) * 2 != alignment(di)) {
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (u_pk->size() != d_pk->size())
|
if (u_pk->size() != d_pk->size())
|
||||||
return false;
|
return false;
|
||||||
|
|
||||||
if (longer_type_for_conversion(use) != T_ILLEGAL) {
|
if (longer_type_for_conversion(use) != T_ILLEGAL) {
|
||||||
// type conversion takes a type of a kind of size and produces a type of
|
// These opcodes take a type of a kind of size and produce a type of
|
||||||
// another size - hence the special checks on alignment and size.
|
// another size - hence the special checks on alignment and size.
|
||||||
for (uint i = 0; i < u_pk->size(); i++) {
|
for (uint i = 0; i < u_pk->size(); i++) {
|
||||||
Node* ui = u_pk->at(i);
|
Node* ui = u_pk->at(i);
|
||||||
@ -3467,7 +3483,8 @@ void SuperWord::compute_max_depth() {
|
|||||||
}
|
}
|
||||||
|
|
||||||
BasicType SuperWord::longer_type_for_conversion(Node* n) {
|
BasicType SuperWord::longer_type_for_conversion(Node* n) {
|
||||||
if (!VectorNode::is_convert_opcode(n->Opcode()) ||
|
if (!(VectorNode::is_convert_opcode(n->Opcode()) ||
|
||||||
|
requires_long_to_int_conversion(n->Opcode())) ||
|
||||||
!in_bb(n->in(1))) {
|
!in_bb(n->in(1))) {
|
||||||
return T_ILLEGAL;
|
return T_ILLEGAL;
|
||||||
}
|
}
|
||||||
|
@ -457,6 +457,7 @@ class SuperWord : public ResourceObj {
|
|||||||
bool is_cmov_pack(Node_List* p);
|
bool is_cmov_pack(Node_List* p);
|
||||||
bool is_cmov_pack_internal_node(Node_List* p, Node* nd) { return is_cmov_pack(p) && !nd->is_CMove(); }
|
bool is_cmov_pack_internal_node(Node_List* p, Node* nd) { return is_cmov_pack(p) && !nd->is_CMove(); }
|
||||||
static bool is_cmove_fp_opcode(int opc) { return (opc == Op_CMoveF || opc == Op_CMoveD); }
|
static bool is_cmove_fp_opcode(int opc) { return (opc == Op_CMoveF || opc == Op_CMoveD); }
|
||||||
|
static bool requires_long_to_int_conversion(int opc);
|
||||||
// For pack p, are all idx operands the same?
|
// For pack p, are all idx operands the same?
|
||||||
bool same_inputs(Node_List* p, int idx);
|
bool same_inputs(Node_List* p, int idx);
|
||||||
// CloneMap utilities
|
// CloneMap utilities
|
||||||
|
@ -343,17 +343,6 @@ bool VectorNode::is_muladds2i(Node* n) {
|
|||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
bool VectorNode::is_type_transition_long_to_int(Node* n) {
|
|
||||||
switch(n->Opcode()) {
|
|
||||||
case Op_PopCountL:
|
|
||||||
case Op_CountLeadingZerosL:
|
|
||||||
case Op_CountTrailingZerosL:
|
|
||||||
return true;
|
|
||||||
default:
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
bool VectorNode::is_roundopD(Node* n) {
|
bool VectorNode::is_roundopD(Node* n) {
|
||||||
if (n->Opcode() == Op_RoundDoubleMode) {
|
if (n->Opcode() == Op_RoundDoubleMode) {
|
||||||
return true;
|
return true;
|
||||||
|
@ -99,7 +99,6 @@ class VectorNode : public TypeNode {
|
|||||||
static bool is_type_transition_short_to_int(Node* n);
|
static bool is_type_transition_short_to_int(Node* n);
|
||||||
static bool is_type_transition_to_int(Node* n);
|
static bool is_type_transition_to_int(Node* n);
|
||||||
static bool is_muladds2i(Node* n);
|
static bool is_muladds2i(Node* n);
|
||||||
static bool is_type_transition_long_to_int(Node* n);
|
|
||||||
static bool is_roundopD(Node* n);
|
static bool is_roundopD(Node* n);
|
||||||
static bool is_scalar_rotate(Node* n);
|
static bool is_scalar_rotate(Node* n);
|
||||||
static bool is_vector_rotate_supported(int opc, uint vlen, BasicType bt);
|
static bool is_vector_rotate_supported(int opc, uint vlen, BasicType bt);
|
||||||
@ -551,7 +550,9 @@ class PopCountVINode : public VectorNode {
|
|||||||
// Vector popcount long bits
|
// Vector popcount long bits
|
||||||
class PopCountVLNode : public VectorNode {
|
class PopCountVLNode : public VectorNode {
|
||||||
public:
|
public:
|
||||||
PopCountVLNode(Node* in, const TypeVect* vt) : VectorNode(in,vt) {}
|
PopCountVLNode(Node* in, const TypeVect* vt) : VectorNode(in,vt) {
|
||||||
|
assert(vt->element_basic_type() == T_LONG, "must be long");
|
||||||
|
}
|
||||||
virtual int Opcode() const;
|
virtual int Opcode() const;
|
||||||
};
|
};
|
||||||
|
|
||||||
@ -1732,7 +1733,10 @@ public:
|
|||||||
class CountLeadingZerosVNode : public VectorNode {
|
class CountLeadingZerosVNode : public VectorNode {
|
||||||
public:
|
public:
|
||||||
CountLeadingZerosVNode(Node* in, const TypeVect* vt)
|
CountLeadingZerosVNode(Node* in, const TypeVect* vt)
|
||||||
: VectorNode(in, vt) {}
|
: VectorNode(in, vt) {
|
||||||
|
assert(in->bottom_type()->is_vect()->element_basic_type() == vt->element_basic_type(),
|
||||||
|
"must be the same");
|
||||||
|
}
|
||||||
|
|
||||||
virtual int Opcode() const;
|
virtual int Opcode() const;
|
||||||
};
|
};
|
||||||
@ -1740,7 +1744,10 @@ class CountLeadingZerosVNode : public VectorNode {
|
|||||||
class CountTrailingZerosVNode : public VectorNode {
|
class CountTrailingZerosVNode : public VectorNode {
|
||||||
public:
|
public:
|
||||||
CountTrailingZerosVNode(Node* in, const TypeVect* vt)
|
CountTrailingZerosVNode(Node* in, const TypeVect* vt)
|
||||||
: VectorNode(in, vt) {}
|
: VectorNode(in, vt) {
|
||||||
|
assert(in->bottom_type()->is_vect()->element_basic_type() == vt->element_basic_type(),
|
||||||
|
"must be the same");
|
||||||
|
}
|
||||||
|
|
||||||
virtual int Opcode() const;
|
virtual int Opcode() const;
|
||||||
};
|
};
|
||||||
|
@ -116,4 +116,20 @@ public class TestDisableAutoVectOpcodes {
|
|||||||
dresult += result;
|
dresult += result;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
@IR(failOn = {IRNode.COUNTTRAILINGZEROS_VL})
|
||||||
|
public void testNumberOfTrailingZeros() {
|
||||||
|
for (int i = 0; i < SIZE; ++i) {
|
||||||
|
inta[i] = Long.numberOfTrailingZeros(longa[i]);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
@IR(failOn = {IRNode.COUNTLEADINGZEROS_VL})
|
||||||
|
public void testNumberOfLeadingZeros() {
|
||||||
|
for (int i = 0; i < SIZE; ++i) {
|
||||||
|
inta[i] = Long.numberOfLeadingZeros(longa[i]);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
@ -714,6 +714,16 @@ public class IRNode {
|
|||||||
superWordNodes(POPCOUNT_VL, "PopCountVL");
|
superWordNodes(POPCOUNT_VL, "PopCountVL");
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public static final String COUNTTRAILINGZEROS_VL = PREFIX + "COUNTTRAILINGZEROS_VL" + POSTFIX;
|
||||||
|
static {
|
||||||
|
superWordNodes(COUNTTRAILINGZEROS_VL, "CountTrailingZerosV");
|
||||||
|
}
|
||||||
|
|
||||||
|
public static final String COUNTLEADINGZEROS_VL = PREFIX + "COUNTLEADINGZEROS_VL" + POSTFIX;
|
||||||
|
static {
|
||||||
|
superWordNodes(COUNTLEADINGZEROS_VL, "CountLeadingZerosV");
|
||||||
|
}
|
||||||
|
|
||||||
public static final String POPULATE_INDEX = PREFIX + "POPULATE_INDEX" + POSTFIX;
|
public static final String POPULATE_INDEX = PREFIX + "POPULATE_INDEX" + POSTFIX;
|
||||||
static {
|
static {
|
||||||
String regex = START + "PopulateIndex" + MID + END;
|
String regex = START + "PopulateIndex" + MID + END;
|
||||||
|
@ -0,0 +1,88 @@
|
|||||||
|
/*
|
||||||
|
* Copyright (c) 2022, Arm Limited. All rights reserved.
|
||||||
|
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
||||||
|
*
|
||||||
|
* This code is free software; you can redistribute it and/or modify it
|
||||||
|
* under the terms of the GNU General Public License version 2 only, as
|
||||||
|
* published by the Free Software Foundation.
|
||||||
|
*
|
||||||
|
* This code is distributed in the hope that it will be useful, but WITHOUT
|
||||||
|
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
||||||
|
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
|
||||||
|
* version 2 for more details (a copy is included in the LICENSE file that
|
||||||
|
* accompanied this code).
|
||||||
|
*
|
||||||
|
* You should have received a copy of the GNU General Public License version
|
||||||
|
* 2 along with this work; if not, write to the Free Software Foundation,
|
||||||
|
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||||
|
*
|
||||||
|
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
|
||||||
|
* or visit www.oracle.com if you need additional information or have any
|
||||||
|
* questions.
|
||||||
|
*/
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @test
|
||||||
|
* @key randomness
|
||||||
|
* @summary Test vectorization of numberOfTrailingZeros/numberOfLeadingZeros for Long
|
||||||
|
* @requires vm.compiler2.enabled
|
||||||
|
* @requires (os.simpleArch == "x64" & vm.cpu.features ~= ".*avx2.*") |
|
||||||
|
* (os.simpleArch == "aarch64" & vm.cpu.features ~= ".*sve.*" & (vm.opt.UseSVE == "null" | vm.opt.UseSVE > 0))
|
||||||
|
* @library /test/lib /
|
||||||
|
* @run driver compiler.vectorization.TestNumberOfContinuousZeros
|
||||||
|
*/
|
||||||
|
|
||||||
|
package compiler.vectorization;
|
||||||
|
|
||||||
|
import compiler.lib.ir_framework.*;
|
||||||
|
import java.util.Random;
|
||||||
|
import jdk.test.lib.Asserts;
|
||||||
|
|
||||||
|
public class TestNumberOfContinuousZeros {
|
||||||
|
private long[] input;
|
||||||
|
private int[] output;
|
||||||
|
private static final int LEN = 1024;
|
||||||
|
private Random rng;
|
||||||
|
|
||||||
|
public static void main(String args[]) {
|
||||||
|
TestFramework.run();
|
||||||
|
}
|
||||||
|
|
||||||
|
public TestNumberOfContinuousZeros() {
|
||||||
|
input = new long[LEN];
|
||||||
|
output = new int[LEN];
|
||||||
|
rng = new Random(42);
|
||||||
|
for (int i = 0; i < LEN; ++i) {
|
||||||
|
input[i] = rng.nextLong();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
@IR(counts = {IRNode.COUNTTRAILINGZEROS_VL, "> 0"})
|
||||||
|
public void vectorizeNumberOfTrailingZeros() {
|
||||||
|
for (int i = 0; i < LEN; ++i) {
|
||||||
|
output[i] = Long.numberOfTrailingZeros(input[i]);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
@IR(counts = {IRNode.COUNTLEADINGZEROS_VL, "> 0"})
|
||||||
|
public void vectorizeNumberOfLeadingZeros() {
|
||||||
|
for (int i = 0; i < LEN; ++i) {
|
||||||
|
output[i] = Long.numberOfLeadingZeros(input[i]);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
@Run(test = {"vectorizeNumberOfTrailingZeros", "vectorizeNumberOfLeadingZeros"})
|
||||||
|
public void checkResult() {
|
||||||
|
vectorizeNumberOfTrailingZeros();
|
||||||
|
for (int i = 0; i < LEN; ++i) {
|
||||||
|
Asserts.assertEquals(output[i], Long.numberOfTrailingZeros(input[i]));
|
||||||
|
}
|
||||||
|
vectorizeNumberOfLeadingZeros();
|
||||||
|
for (int i = 0; i < LEN; ++i) {
|
||||||
|
Asserts.assertEquals(output[i], Long.numberOfLeadingZeros(input[i]));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
@ -25,8 +25,8 @@
|
|||||||
* @test
|
* @test
|
||||||
* @summary Test vectorization of popcount for Long
|
* @summary Test vectorization of popcount for Long
|
||||||
* @requires vm.compiler2.enabled
|
* @requires vm.compiler2.enabled
|
||||||
* @requires vm.cpu.features ~= ".*avx512bw.*" | (vm.cpu.features ~= ".*sve.*" & (vm.opt.UseSVE == "null" | vm.opt.UseSVE > 0))
|
* @requires ((os.arch=="x86" | os.arch=="i386" | os.arch=="amd64" | os.arch=="x86_64") & vm.cpu.features ~= ".*avx512bw.*") |
|
||||||
* @requires os.arch=="x86" | os.arch=="i386" | os.arch=="amd64" | os.arch=="x86_64" | os.arch=="aarch64"
|
* os.simpleArch == "aarch64"
|
||||||
* @library /test/lib /
|
* @library /test/lib /
|
||||||
* @run driver compiler.vectorization.TestPopCountVectorLong
|
* @run driver compiler.vectorization.TestPopCountVectorLong
|
||||||
*/
|
*/
|
||||||
|
Loading…
x
Reference in New Issue
Block a user