8309583: AArch64: Optimize firstTrue() when amount of elements < 8
Reviewed-by: aph, eliu
This commit is contained in:
parent
87e6fab2c4
commit
45b581b7d5
src/hotspot/cpu/aarch64
@ -5534,39 +5534,10 @@ instruct vstoremask_truecount_neon(iRegINoSp dst, vReg src, immI_gt_1 size, vReg
|
||||
|
||||
// first true
|
||||
|
||||
instruct vmask_firsttrue_lt8e(iRegINoSp dst, vReg src, rFlagsReg cr) %{
|
||||
predicate(UseSVE == 0 &&
|
||||
Matcher::vector_length(n->in(1)) < 8);
|
||||
instruct vmask_firsttrue_neon(iRegINoSp dst, vReg src) %{
|
||||
predicate(UseSVE == 0);
|
||||
match(Set dst (VectorMaskFirstTrue src));
|
||||
effect(KILL cr);
|
||||
format %{ "vmask_firsttrue_lt8e $dst, $src\t# vector < 8 elements (neon). KILL cr" %}
|
||||
ins_encode %{
|
||||
// Returns the index of the first active lane of the
|
||||
// vector mask, or VLENGTH if no lane is active.
|
||||
//
|
||||
// Input "src" is a vector of boolean represented as
|
||||
// bytes with 0x00/0x01 as element values.
|
||||
//
|
||||
// Computed by reversing the bits and counting the leading
|
||||
// zero bytes.
|
||||
BasicType bt = Matcher::vector_element_basic_type(this, $src);
|
||||
assert(bt == T_BOOLEAN, "unsupported type");
|
||||
__ fmovd($dst$$Register, $src$$FloatRegister);
|
||||
__ rbit($dst$$Register, $dst$$Register);
|
||||
__ clz($dst$$Register, $dst$$Register);
|
||||
__ lsrw($dst$$Register, $dst$$Register, 3);
|
||||
__ movw(rscratch1, Matcher::vector_length(this, $src));
|
||||
__ cmpw($dst$$Register, rscratch1);
|
||||
__ cselw($dst$$Register, rscratch1, $dst$$Register, Assembler::GE);
|
||||
%}
|
||||
ins_pipe(pipe_slow);
|
||||
%}
|
||||
|
||||
instruct vmask_firsttrue_8or16e(iRegINoSp dst, vReg src) %{
|
||||
predicate(UseSVE == 0 &&
|
||||
(Matcher::vector_length(n->in(1)) == 8 || Matcher::vector_length(n->in(1)) == 16));
|
||||
match(Set dst (VectorMaskFirstTrue src));
|
||||
format %{ "vmask_firsttrue_8or16e $dst, $src\t# vector 8B/16B (neon)" %}
|
||||
format %{ "vmask_firsttrue_neon $dst, $src" %}
|
||||
ins_encode %{
|
||||
// Returns the index of the first active lane of the
|
||||
// vector mask, or VLENGTH if no lane is active.
|
||||
@ -5579,14 +5550,21 @@ instruct vmask_firsttrue_8or16e(iRegINoSp dst, vReg src) %{
|
||||
|
||||
BasicType bt = Matcher::vector_element_basic_type(this, $src);
|
||||
assert(bt == T_BOOLEAN, "unsupported type");
|
||||
uint length_in_bytes = Matcher::vector_length_in_bytes(this, $src);
|
||||
if (length_in_bytes == 8) {
|
||||
uint vlength = Matcher::vector_length(this, $src);
|
||||
if (vlength <= 8) {
|
||||
__ fmovd($dst$$Register, $src$$FloatRegister);
|
||||
if (vlength == 2 || vlength == 4) {
|
||||
// Special handling for 2B or 4B cases:
|
||||
// Vector mask is moved to a 64-bit general register, but only the low 16/32 bits are
|
||||
// significant for 2B/4B cases. We initialize the 16th/32nd bit as bit 1, so as to generate
|
||||
// the expected result (i.e. VLENGTH) for the case that all lanes are zero.
|
||||
__ orr($dst$$Register, $dst$$Register, vlength == 2 ? 0x10000 : 0x100000000);
|
||||
}
|
||||
__ rbit($dst$$Register, $dst$$Register);
|
||||
__ clz($dst$$Register, $dst$$Register);
|
||||
__ lsrw($dst$$Register, $dst$$Register, 3);
|
||||
} else {
|
||||
assert(length_in_bytes == 16, "must be");
|
||||
assert(vlength == 16, "must be");
|
||||
Label FIRST_TRUE_INDEX;
|
||||
|
||||
// Try to compute the result from lower 64 bits.
|
||||
|
@ -3844,39 +3844,10 @@ instruct vstoremask_truecount_neon(iRegINoSp dst, vReg src, immI_gt_1 size, vReg
|
||||
|
||||
// first true
|
||||
|
||||
instruct vmask_firsttrue_lt8e(iRegINoSp dst, vReg src, rFlagsReg cr) %{
|
||||
predicate(UseSVE == 0 &&
|
||||
Matcher::vector_length(n->in(1)) < 8);
|
||||
instruct vmask_firsttrue_neon(iRegINoSp dst, vReg src) %{
|
||||
predicate(UseSVE == 0);
|
||||
match(Set dst (VectorMaskFirstTrue src));
|
||||
effect(KILL cr);
|
||||
format %{ "vmask_firsttrue_lt8e $dst, $src\t# vector < 8 elements (neon). KILL cr" %}
|
||||
ins_encode %{
|
||||
// Returns the index of the first active lane of the
|
||||
// vector mask, or VLENGTH if no lane is active.
|
||||
//
|
||||
// Input "src" is a vector of boolean represented as
|
||||
// bytes with 0x00/0x01 as element values.
|
||||
//
|
||||
// Computed by reversing the bits and counting the leading
|
||||
// zero bytes.
|
||||
BasicType bt = Matcher::vector_element_basic_type(this, $src);
|
||||
assert(bt == T_BOOLEAN, "unsupported type");
|
||||
__ fmovd($dst$$Register, $src$$FloatRegister);
|
||||
__ rbit($dst$$Register, $dst$$Register);
|
||||
__ clz($dst$$Register, $dst$$Register);
|
||||
__ lsrw($dst$$Register, $dst$$Register, 3);
|
||||
__ movw(rscratch1, Matcher::vector_length(this, $src));
|
||||
__ cmpw($dst$$Register, rscratch1);
|
||||
__ cselw($dst$$Register, rscratch1, $dst$$Register, Assembler::GE);
|
||||
%}
|
||||
ins_pipe(pipe_slow);
|
||||
%}
|
||||
|
||||
instruct vmask_firsttrue_8or16e(iRegINoSp dst, vReg src) %{
|
||||
predicate(UseSVE == 0 &&
|
||||
(Matcher::vector_length(n->in(1)) == 8 || Matcher::vector_length(n->in(1)) == 16));
|
||||
match(Set dst (VectorMaskFirstTrue src));
|
||||
format %{ "vmask_firsttrue_8or16e $dst, $src\t# vector 8B/16B (neon)" %}
|
||||
format %{ "vmask_firsttrue_neon $dst, $src" %}
|
||||
ins_encode %{
|
||||
// Returns the index of the first active lane of the
|
||||
// vector mask, or VLENGTH if no lane is active.
|
||||
@ -3889,14 +3860,21 @@ instruct vmask_firsttrue_8or16e(iRegINoSp dst, vReg src) %{
|
||||
|
||||
BasicType bt = Matcher::vector_element_basic_type(this, $src);
|
||||
assert(bt == T_BOOLEAN, "unsupported type");
|
||||
uint length_in_bytes = Matcher::vector_length_in_bytes(this, $src);
|
||||
if (length_in_bytes == 8) {
|
||||
uint vlength = Matcher::vector_length(this, $src);
|
||||
if (vlength <= 8) {
|
||||
__ fmovd($dst$$Register, $src$$FloatRegister);
|
||||
if (vlength == 2 || vlength == 4) {
|
||||
// Special handling for 2B or 4B cases:
|
||||
// Vector mask is moved to a 64-bit general register, but only the low 16/32 bits are
|
||||
// significant for 2B/4B cases. We initialize the 16th/32nd bit as bit 1, so as to generate
|
||||
// the expected result (i.e. VLENGTH) for the case that all lanes are zero.
|
||||
__ orr($dst$$Register, $dst$$Register, vlength == 2 ? 0x10000 : 0x100000000);
|
||||
}
|
||||
__ rbit($dst$$Register, $dst$$Register);
|
||||
__ clz($dst$$Register, $dst$$Register);
|
||||
__ lsrw($dst$$Register, $dst$$Register, 3);
|
||||
} else {
|
||||
assert(length_in_bytes == 16, "must be");
|
||||
assert(vlength == 16, "must be");
|
||||
Label FIRST_TRUE_INDEX;
|
||||
|
||||
// Try to compute the result from lower 64 bits.
|
||||
|
Loading…
x
Reference in New Issue
Block a user