8301739: AArch64: Add optimized rules for vector compare with immediate for SVE
Reviewed-by: aph, eliu
This commit is contained in:
parent
3d3eaed913
commit
0dca573ca5
@ -4318,7 +4318,18 @@ operand immI_positive()
|
|||||||
// BoolTest condition for signed compare
|
// BoolTest condition for signed compare
|
||||||
operand immI_cmp_cond()
|
operand immI_cmp_cond()
|
||||||
%{
|
%{
|
||||||
predicate(n->get_int() < (int)(BoolTest::unsigned_compare));
|
predicate(!Matcher::is_unsigned_booltest_pred(n->get_int()));
|
||||||
|
match(ConI);
|
||||||
|
|
||||||
|
op_cost(0);
|
||||||
|
format %{ %}
|
||||||
|
interface(CONST_INTER);
|
||||||
|
%}
|
||||||
|
|
||||||
|
// BoolTest condition for unsigned compare
|
||||||
|
operand immI_cmpU_cond()
|
||||||
|
%{
|
||||||
|
predicate(Matcher::is_unsigned_booltest_pred(n->get_int()));
|
||||||
match(ConI);
|
match(ConI);
|
||||||
|
|
||||||
op_cost(0);
|
op_cost(0);
|
||||||
@ -4425,6 +4436,28 @@ operand immI19()
|
|||||||
interface(CONST_INTER);
|
interface(CONST_INTER);
|
||||||
%}
|
%}
|
||||||
|
|
||||||
|
// 5 bit signed integer
|
||||||
|
operand immI5()
|
||||||
|
%{
|
||||||
|
predicate(Assembler::is_simm(n->get_int(), 5));
|
||||||
|
match(ConI);
|
||||||
|
|
||||||
|
op_cost(0);
|
||||||
|
format %{ %}
|
||||||
|
interface(CONST_INTER);
|
||||||
|
%}
|
||||||
|
|
||||||
|
// 7 bit unsigned integer
|
||||||
|
operand immIU7()
|
||||||
|
%{
|
||||||
|
predicate(Assembler::is_uimm(n->get_int(), 7));
|
||||||
|
match(ConI);
|
||||||
|
|
||||||
|
op_cost(0);
|
||||||
|
format %{ %}
|
||||||
|
interface(CONST_INTER);
|
||||||
|
%}
|
||||||
|
|
||||||
// 12 bit unsigned offset -- for base plus immediate loads
|
// 12 bit unsigned offset -- for base plus immediate loads
|
||||||
operand immIU12()
|
operand immIU12()
|
||||||
%{
|
%{
|
||||||
@ -4567,6 +4600,28 @@ operand immLoffset16()
|
|||||||
interface(CONST_INTER);
|
interface(CONST_INTER);
|
||||||
%}
|
%}
|
||||||
|
|
||||||
|
// 5 bit signed long integer
|
||||||
|
operand immL5()
|
||||||
|
%{
|
||||||
|
predicate(Assembler::is_simm(n->get_long(), 5));
|
||||||
|
match(ConL);
|
||||||
|
|
||||||
|
op_cost(0);
|
||||||
|
format %{ %}
|
||||||
|
interface(CONST_INTER);
|
||||||
|
%}
|
||||||
|
|
||||||
|
// 7 bit unsigned long integer
|
||||||
|
operand immLU7()
|
||||||
|
%{
|
||||||
|
predicate(Assembler::is_uimm(n->get_long(), 7));
|
||||||
|
match(ConL);
|
||||||
|
|
||||||
|
op_cost(0);
|
||||||
|
format %{ %}
|
||||||
|
interface(CONST_INTER);
|
||||||
|
%}
|
||||||
|
|
||||||
// 8 bit signed value.
|
// 8 bit signed value.
|
||||||
operand immI8()
|
operand immI8()
|
||||||
%{
|
%{
|
||||||
|
@ -5220,6 +5220,118 @@ instruct vmaskcmp_sve(pReg dst, vReg src1, vReg src2, immI cond, rFlagsReg cr) %
|
|||||||
ins_pipe(pipe_slow);
|
ins_pipe(pipe_slow);
|
||||||
%}
|
%}
|
||||||
|
|
||||||
|
instruct vmaskcmp_immB_sve(pReg dst, vReg src, immI5 imm, immI_cmp_cond cond, rFlagsReg cr) %{
|
||||||
|
predicate(UseSVE > 0);
|
||||||
|
match(Set dst (VectorMaskCmp (Binary src (ReplicateB imm)) cond));
|
||||||
|
effect(KILL cr);
|
||||||
|
format %{ "vmaskcmp_immB_sve $dst, $src, $imm, $cond\t# KILL cr" %}
|
||||||
|
ins_encode %{
|
||||||
|
Assembler::Condition condition = to_assembler_cond((BoolTest::mask)$cond$$constant);
|
||||||
|
uint length_in_bytes = Matcher::vector_length_in_bytes(this);
|
||||||
|
assert(length_in_bytes == MaxVectorSize, "invalid vector length");
|
||||||
|
__ sve_cmp(condition, $dst$$PRegister, __ B, ptrue, $src$$FloatRegister, (int)$imm$$constant);
|
||||||
|
%}
|
||||||
|
ins_pipe(pipe_slow);
|
||||||
|
%}
|
||||||
|
|
||||||
|
instruct vmaskcmpU_immB_sve(pReg dst, vReg src, immIU7 imm, immI_cmpU_cond cond, rFlagsReg cr) %{
|
||||||
|
predicate(UseSVE > 0);
|
||||||
|
match(Set dst (VectorMaskCmp (Binary src (ReplicateB imm)) cond));
|
||||||
|
effect(KILL cr);
|
||||||
|
format %{ "vmaskcmpU_immB_sve $dst, $src, $imm, $cond\t# KILL cr" %}
|
||||||
|
ins_encode %{
|
||||||
|
Assembler::Condition condition = to_assembler_cond((BoolTest::mask)$cond$$constant);
|
||||||
|
uint length_in_bytes = Matcher::vector_length_in_bytes(this);
|
||||||
|
assert(length_in_bytes == MaxVectorSize, "invalid vector length");
|
||||||
|
__ sve_cmp(condition, $dst$$PRegister, __ B, ptrue, $src$$FloatRegister, (int)$imm$$constant);
|
||||||
|
%}
|
||||||
|
ins_pipe(pipe_slow);
|
||||||
|
%}
|
||||||
|
|
||||||
|
instruct vmaskcmp_immS_sve(pReg dst, vReg src, immI5 imm, immI_cmp_cond cond, rFlagsReg cr) %{
|
||||||
|
predicate(UseSVE > 0);
|
||||||
|
match(Set dst (VectorMaskCmp (Binary src (ReplicateS imm)) cond));
|
||||||
|
effect(KILL cr);
|
||||||
|
format %{ "vmaskcmp_immS_sve $dst, $src, $imm, $cond\t# KILL cr" %}
|
||||||
|
ins_encode %{
|
||||||
|
Assembler::Condition condition = to_assembler_cond((BoolTest::mask)$cond$$constant);
|
||||||
|
uint length_in_bytes = Matcher::vector_length_in_bytes(this);
|
||||||
|
assert(length_in_bytes == MaxVectorSize, "invalid vector length");
|
||||||
|
__ sve_cmp(condition, $dst$$PRegister, __ H, ptrue, $src$$FloatRegister, (int)$imm$$constant);
|
||||||
|
%}
|
||||||
|
ins_pipe(pipe_slow);
|
||||||
|
%}
|
||||||
|
|
||||||
|
instruct vmaskcmpU_immS_sve(pReg dst, vReg src, immIU7 imm, immI_cmpU_cond cond, rFlagsReg cr) %{
|
||||||
|
predicate(UseSVE > 0);
|
||||||
|
match(Set dst (VectorMaskCmp (Binary src (ReplicateS imm)) cond));
|
||||||
|
effect(KILL cr);
|
||||||
|
format %{ "vmaskcmpU_immS_sve $dst, $src, $imm, $cond\t# KILL cr" %}
|
||||||
|
ins_encode %{
|
||||||
|
Assembler::Condition condition = to_assembler_cond((BoolTest::mask)$cond$$constant);
|
||||||
|
uint length_in_bytes = Matcher::vector_length_in_bytes(this);
|
||||||
|
assert(length_in_bytes == MaxVectorSize, "invalid vector length");
|
||||||
|
__ sve_cmp(condition, $dst$$PRegister, __ H, ptrue, $src$$FloatRegister, (int)$imm$$constant);
|
||||||
|
%}
|
||||||
|
ins_pipe(pipe_slow);
|
||||||
|
%}
|
||||||
|
|
||||||
|
instruct vmaskcmp_immI_sve(pReg dst, vReg src, immI5 imm, immI_cmp_cond cond, rFlagsReg cr) %{
|
||||||
|
predicate(UseSVE > 0);
|
||||||
|
match(Set dst (VectorMaskCmp (Binary src (ReplicateI imm)) cond));
|
||||||
|
effect(KILL cr);
|
||||||
|
format %{ "vmaskcmp_immI_sve $dst, $src, $imm, $cond\t# KILL cr" %}
|
||||||
|
ins_encode %{
|
||||||
|
Assembler::Condition condition = to_assembler_cond((BoolTest::mask)$cond$$constant);
|
||||||
|
uint length_in_bytes = Matcher::vector_length_in_bytes(this);
|
||||||
|
assert(length_in_bytes == MaxVectorSize, "invalid vector length");
|
||||||
|
__ sve_cmp(condition, $dst$$PRegister, __ S, ptrue, $src$$FloatRegister, (int)$imm$$constant);
|
||||||
|
%}
|
||||||
|
ins_pipe(pipe_slow);
|
||||||
|
%}
|
||||||
|
|
||||||
|
instruct vmaskcmpU_immI_sve(pReg dst, vReg src, immIU7 imm, immI_cmpU_cond cond, rFlagsReg cr) %{
|
||||||
|
predicate(UseSVE > 0);
|
||||||
|
match(Set dst (VectorMaskCmp (Binary src (ReplicateI imm)) cond));
|
||||||
|
effect(KILL cr);
|
||||||
|
format %{ "vmaskcmpU_immI_sve $dst, $src, $imm, $cond\t# KILL cr" %}
|
||||||
|
ins_encode %{
|
||||||
|
Assembler::Condition condition = to_assembler_cond((BoolTest::mask)$cond$$constant);
|
||||||
|
uint length_in_bytes = Matcher::vector_length_in_bytes(this);
|
||||||
|
assert(length_in_bytes == MaxVectorSize, "invalid vector length");
|
||||||
|
__ sve_cmp(condition, $dst$$PRegister, __ S, ptrue, $src$$FloatRegister, (int)$imm$$constant);
|
||||||
|
%}
|
||||||
|
ins_pipe(pipe_slow);
|
||||||
|
%}
|
||||||
|
|
||||||
|
instruct vmaskcmp_immL_sve(pReg dst, vReg src, immL5 imm, immI_cmp_cond cond, rFlagsReg cr) %{
|
||||||
|
predicate(UseSVE > 0);
|
||||||
|
match(Set dst (VectorMaskCmp (Binary src (ReplicateL imm)) cond));
|
||||||
|
effect(KILL cr);
|
||||||
|
format %{ "vmaskcmp_immL_sve $dst, $src, $imm, $cond\t# KILL cr" %}
|
||||||
|
ins_encode %{
|
||||||
|
Assembler::Condition condition = to_assembler_cond((BoolTest::mask)$cond$$constant);
|
||||||
|
uint length_in_bytes = Matcher::vector_length_in_bytes(this);
|
||||||
|
assert(length_in_bytes == MaxVectorSize, "invalid vector length");
|
||||||
|
__ sve_cmp(condition, $dst$$PRegister, __ D, ptrue, $src$$FloatRegister, (int)$imm$$constant);
|
||||||
|
%}
|
||||||
|
ins_pipe(pipe_slow);
|
||||||
|
%}
|
||||||
|
|
||||||
|
instruct vmaskcmpU_immL_sve(pReg dst, vReg src, immLU7 imm, immI_cmpU_cond cond, rFlagsReg cr) %{
|
||||||
|
predicate(UseSVE > 0);
|
||||||
|
match(Set dst (VectorMaskCmp (Binary src (ReplicateL imm)) cond));
|
||||||
|
effect(KILL cr);
|
||||||
|
format %{ "vmaskcmpU_immL_sve $dst, $src, $imm, $cond\t# KILL cr" %}
|
||||||
|
ins_encode %{
|
||||||
|
Assembler::Condition condition = to_assembler_cond((BoolTest::mask)$cond$$constant);
|
||||||
|
uint length_in_bytes = Matcher::vector_length_in_bytes(this);
|
||||||
|
assert(length_in_bytes == MaxVectorSize, "invalid vector length");
|
||||||
|
__ sve_cmp(condition, $dst$$PRegister, __ D, ptrue, $src$$FloatRegister, (int)$imm$$constant);
|
||||||
|
%}
|
||||||
|
ins_pipe(pipe_slow);
|
||||||
|
%}
|
||||||
|
|
||||||
instruct vmaskcmp_masked(pReg dst, vReg src1, vReg src2, immI cond,
|
instruct vmaskcmp_masked(pReg dst, vReg src1, vReg src2, immI cond,
|
||||||
pRegGov pg, rFlagsReg cr) %{
|
pRegGov pg, rFlagsReg cr) %{
|
||||||
predicate(UseSVE > 0);
|
predicate(UseSVE > 0);
|
||||||
|
@ -3616,6 +3616,31 @@ instruct vmaskcmp_sve(pReg dst, vReg src1, vReg src2, immI cond, rFlagsReg cr) %
|
|||||||
%}
|
%}
|
||||||
ins_pipe(pipe_slow);
|
ins_pipe(pipe_slow);
|
||||||
%}
|
%}
|
||||||
|
dnl
|
||||||
|
dnl VMASKCMP_SVE_IMM($1 , $2 , $3 , $4 )
|
||||||
|
dnl VMASKCMP_SVE_IMM(element_size, element_type, type_imm, type_condition)
|
||||||
|
define(`VMASKCMP_SVE_IMM', `
|
||||||
|
instruct vmask$4_imm$2_sve(pReg dst, vReg src, $3 imm, immI_$4_cond cond, rFlagsReg cr) %{
|
||||||
|
predicate(UseSVE > 0);
|
||||||
|
match(Set dst (VectorMaskCmp (Binary src (Replicate$2 imm)) cond));
|
||||||
|
effect(KILL cr);
|
||||||
|
format %{ "vmask$4_imm$2_sve $dst, $src, $imm, $cond\t# KILL cr" %}
|
||||||
|
ins_encode %{
|
||||||
|
Assembler::Condition condition = to_assembler_cond((BoolTest::mask)$cond$$constant);
|
||||||
|
uint length_in_bytes = Matcher::vector_length_in_bytes(this);
|
||||||
|
assert(length_in_bytes == MaxVectorSize, "invalid vector length");
|
||||||
|
__ sve_cmp(condition, $dst$$PRegister, __ $1, ptrue, $src$$FloatRegister, (int)$imm$$constant);
|
||||||
|
%}
|
||||||
|
ins_pipe(pipe_slow);
|
||||||
|
%}')dnl
|
||||||
|
VMASKCMP_SVE_IMM(B, B, immI5, cmp)
|
||||||
|
VMASKCMP_SVE_IMM(B, B, immIU7, cmpU)
|
||||||
|
VMASKCMP_SVE_IMM(H, S, immI5, cmp)
|
||||||
|
VMASKCMP_SVE_IMM(H, S, immIU7, cmpU)
|
||||||
|
VMASKCMP_SVE_IMM(S, I, immI5, cmp)
|
||||||
|
VMASKCMP_SVE_IMM(S, I, immIU7, cmpU)
|
||||||
|
VMASKCMP_SVE_IMM(D, L, immL5, cmp)
|
||||||
|
VMASKCMP_SVE_IMM(D, L, immLU7, cmpU)
|
||||||
|
|
||||||
instruct vmaskcmp_masked(pReg dst, vReg src1, vReg src2, immI cond,
|
instruct vmaskcmp_masked(pReg dst, vReg src1, vReg src2, immI cond,
|
||||||
pRegGov pg, rFlagsReg cr) %{
|
pRegGov pg, rFlagsReg cr) %{
|
||||||
|
@ -3786,50 +3786,78 @@ public:
|
|||||||
INSN(sve_fac, 0b01100101, 0b11, 1); // Floating-point absolute compare vectors
|
INSN(sve_fac, 0b01100101, 0b11, 1); // Floating-point absolute compare vectors
|
||||||
#undef INSN
|
#undef INSN
|
||||||
|
|
||||||
// SVE Integer Compare - Signed Immediate
|
private:
|
||||||
void sve_cmp(Condition cond, PRegister Pd, SIMD_RegVariant T,
|
// Convert Assembler::Condition to op encoding - used by sve integer compare encoding
|
||||||
PRegister Pg, FloatRegister Zn, int imm5) {
|
static int assembler_cond_to_sve_op(Condition cond, bool &is_unsigned) {
|
||||||
starti;
|
if (cond == HI || cond == HS || cond == LO || cond == LS) {
|
||||||
assert(T != Q, "invalid size");
|
is_unsigned = true;
|
||||||
guarantee(-16 <= imm5 && imm5 <= 15, "invalid immediate");
|
} else {
|
||||||
int cond_op;
|
is_unsigned = false;
|
||||||
switch(cond) {
|
}
|
||||||
case EQ: cond_op = 0b1000; break;
|
|
||||||
case NE: cond_op = 0b1001; break;
|
|
||||||
case GE: cond_op = 0b0000; break;
|
|
||||||
case GT: cond_op = 0b0001; break;
|
|
||||||
case LE: cond_op = 0b0011; break;
|
|
||||||
case LT: cond_op = 0b0010; break;
|
|
||||||
default:
|
|
||||||
ShouldNotReachHere();
|
|
||||||
}
|
|
||||||
f(0b00100101, 31, 24), f(T, 23, 22), f(0b0, 21), sf(imm5, 20, 16),
|
|
||||||
f((cond_op >> 1) & 0x7, 15, 13), pgrf(Pg, 10), rf(Zn, 5);
|
|
||||||
f(cond_op & 0x1, 4), prf(Pd, 0);
|
|
||||||
}
|
|
||||||
|
|
||||||
// SVE Floating-point compare vector with zero
|
switch (cond) {
|
||||||
void sve_fcm(Condition cond, PRegister Pd, SIMD_RegVariant T,
|
case HI:
|
||||||
PRegister Pg, FloatRegister Zn, double d) {
|
case GT:
|
||||||
starti;
|
return 0b0001;
|
||||||
assert(T != Q, "invalid size");
|
case HS:
|
||||||
guarantee(d == 0.0, "invalid immediate");
|
case GE:
|
||||||
int cond_op;
|
return 0b0000;
|
||||||
switch(cond) {
|
case LO:
|
||||||
case EQ: cond_op = 0b100; break;
|
case LT:
|
||||||
case GT: cond_op = 0b001; break;
|
return 0b0010;
|
||||||
case GE: cond_op = 0b000; break;
|
case LS:
|
||||||
case LT: cond_op = 0b010; break;
|
case LE:
|
||||||
case LE: cond_op = 0b011; break;
|
return 0b0011;
|
||||||
case NE: cond_op = 0b110; break;
|
case EQ:
|
||||||
default:
|
return 0b1000;
|
||||||
ShouldNotReachHere();
|
case NE:
|
||||||
|
return 0b1001;
|
||||||
|
default:
|
||||||
|
ShouldNotReachHere();
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
public:
|
||||||
|
// SVE Integer Compare - 5 bits signed imm and 7 bits unsigned imm
|
||||||
|
void sve_cmp(Condition cond, PRegister Pd, SIMD_RegVariant T,
|
||||||
|
PRegister Pg, FloatRegister Zn, int imm) {
|
||||||
|
starti;
|
||||||
|
assert(T != Q, "invalid size");
|
||||||
|
bool is_unsigned = false;
|
||||||
|
int cond_op = assembler_cond_to_sve_op(cond, is_unsigned);
|
||||||
|
f(is_unsigned ? 0b00100100 : 0b00100101, 31, 24), f(T, 23, 22);
|
||||||
|
f(is_unsigned ? 0b1 : 0b0, 21);
|
||||||
|
if (is_unsigned) {
|
||||||
|
f(imm, 20, 14), f((cond_op >> 1) & 0x1, 13);
|
||||||
|
} else {
|
||||||
|
sf(imm, 20, 16), f((cond_op >> 1) & 0x7, 15, 13);
|
||||||
|
}
|
||||||
|
pgrf(Pg, 10), rf(Zn, 5), f(cond_op & 0x1, 4), prf(Pd, 0);
|
||||||
|
}
|
||||||
|
|
||||||
|
// SVE Floating-point compare vector with zero
|
||||||
|
void sve_fcm(Condition cond, PRegister Pd, SIMD_RegVariant T,
|
||||||
|
PRegister Pg, FloatRegister Zn, double d) {
|
||||||
|
starti;
|
||||||
|
assert(T != Q, "invalid size");
|
||||||
|
guarantee(d == 0.0, "invalid immediate");
|
||||||
|
int cond_op;
|
||||||
|
switch(cond) {
|
||||||
|
case EQ: cond_op = 0b100; break;
|
||||||
|
case GT: cond_op = 0b001; break;
|
||||||
|
case GE: cond_op = 0b000; break;
|
||||||
|
case LT: cond_op = 0b010; break;
|
||||||
|
case LE: cond_op = 0b011; break;
|
||||||
|
case NE: cond_op = 0b110; break;
|
||||||
|
default:
|
||||||
|
ShouldNotReachHere();
|
||||||
|
}
|
||||||
|
f(0b01100101, 31, 24), f(T, 23, 22), f(0b0100, 21, 18),
|
||||||
|
f((cond_op >> 1) & 0x3, 17, 16), f(0b001, 15, 13),
|
||||||
|
pgrf(Pg, 10), rf(Zn, 5);
|
||||||
|
f(cond_op & 0x1, 4), prf(Pd, 0);
|
||||||
}
|
}
|
||||||
f(0b01100101, 31, 24), f(T, 23, 22), f(0b0100, 21, 18),
|
|
||||||
f((cond_op >> 1) & 0x3, 17, 16), f(0b001, 15, 13),
|
|
||||||
pgrf(Pg, 10), rf(Zn, 5);
|
|
||||||
f(cond_op & 0x1, 4), prf(Pd, 0);
|
|
||||||
}
|
|
||||||
|
|
||||||
// SVE unpack vector elements
|
// SVE unpack vector elements
|
||||||
#define INSN(NAME, op) \
|
#define INSN(NAME, op) \
|
||||||
|
@ -1246,10 +1246,6 @@ static inline bool is_vector_popcount_predicate(BasicType bt) {
|
|||||||
(is_non_subword_integral_type(bt) && VM_Version::supports_avx512_vpopcntdq());
|
(is_non_subword_integral_type(bt) && VM_Version::supports_avx512_vpopcntdq());
|
||||||
}
|
}
|
||||||
|
|
||||||
static inline bool is_unsigned_booltest_pred(int bt) {
|
|
||||||
return ((bt & BoolTest::unsigned_compare) == BoolTest::unsigned_compare);
|
|
||||||
}
|
|
||||||
|
|
||||||
static inline bool is_clz_non_subword_predicate_evex(BasicType bt, int vlen_bytes) {
|
static inline bool is_clz_non_subword_predicate_evex(BasicType bt, int vlen_bytes) {
|
||||||
return is_non_subword_integral_type(bt) && VM_Version::supports_avx512cd() &&
|
return is_non_subword_integral_type(bt) && VM_Version::supports_avx512cd() &&
|
||||||
(VM_Version::supports_avx512vl() || vlen_bytes == 64);
|
(VM_Version::supports_avx512vl() || vlen_bytes == 64);
|
||||||
@ -7609,7 +7605,7 @@ instruct evcmpFD(kReg dst, vec src1, vec src2, immI8 cond) %{
|
|||||||
|
|
||||||
instruct vcmp_direct(legVec dst, legVec src1, legVec src2, immI8 cond) %{
|
instruct vcmp_direct(legVec dst, legVec src1, legVec src2, immI8 cond) %{
|
||||||
predicate(n->bottom_type()->isa_vectmask() == NULL &&
|
predicate(n->bottom_type()->isa_vectmask() == NULL &&
|
||||||
!is_unsigned_booltest_pred(n->in(2)->get_int()) &&
|
!Matcher::is_unsigned_booltest_pred(n->in(2)->get_int()) &&
|
||||||
Matcher::vector_length_in_bytes(n->in(1)->in(1)) >= 4 && // src1
|
Matcher::vector_length_in_bytes(n->in(1)->in(1)) >= 4 && // src1
|
||||||
Matcher::vector_length_in_bytes(n->in(1)->in(1)) <= 32 && // src1
|
Matcher::vector_length_in_bytes(n->in(1)->in(1)) <= 32 && // src1
|
||||||
is_integral_type(Matcher::vector_element_basic_type(n->in(1)->in(1))) &&
|
is_integral_type(Matcher::vector_element_basic_type(n->in(1)->in(1))) &&
|
||||||
@ -7629,7 +7625,7 @@ instruct vcmp_direct(legVec dst, legVec src1, legVec src2, immI8 cond) %{
|
|||||||
|
|
||||||
instruct vcmp_negate(legVec dst, legVec src1, legVec src2, immI8 cond, legVec xtmp) %{
|
instruct vcmp_negate(legVec dst, legVec src1, legVec src2, immI8 cond, legVec xtmp) %{
|
||||||
predicate(n->bottom_type()->isa_vectmask() == NULL &&
|
predicate(n->bottom_type()->isa_vectmask() == NULL &&
|
||||||
!is_unsigned_booltest_pred(n->in(2)->get_int()) &&
|
!Matcher::is_unsigned_booltest_pred(n->in(2)->get_int()) &&
|
||||||
Matcher::vector_length_in_bytes(n->in(1)->in(1)) >= 4 && // src1
|
Matcher::vector_length_in_bytes(n->in(1)->in(1)) >= 4 && // src1
|
||||||
Matcher::vector_length_in_bytes(n->in(1)->in(1)) <= 32 && // src1
|
Matcher::vector_length_in_bytes(n->in(1)->in(1)) <= 32 && // src1
|
||||||
is_integral_type(Matcher::vector_element_basic_type(n->in(1)->in(1))) &&
|
is_integral_type(Matcher::vector_element_basic_type(n->in(1)->in(1))) &&
|
||||||
@ -7650,7 +7646,7 @@ instruct vcmp_negate(legVec dst, legVec src1, legVec src2, immI8 cond, legVec xt
|
|||||||
|
|
||||||
instruct vcmpu(legVec dst, legVec src1, legVec src2, immI8 cond, legVec xtmp) %{
|
instruct vcmpu(legVec dst, legVec src1, legVec src2, immI8 cond, legVec xtmp) %{
|
||||||
predicate(n->bottom_type()->isa_vectmask() == NULL &&
|
predicate(n->bottom_type()->isa_vectmask() == NULL &&
|
||||||
is_unsigned_booltest_pred(n->in(2)->get_int()) &&
|
Matcher::is_unsigned_booltest_pred(n->in(2)->get_int()) &&
|
||||||
Matcher::vector_length_in_bytes(n->in(1)->in(1)) >= 4 && // src1
|
Matcher::vector_length_in_bytes(n->in(1)->in(1)) >= 4 && // src1
|
||||||
Matcher::vector_length_in_bytes(n->in(1)->in(1)) <= 32 && // src1
|
Matcher::vector_length_in_bytes(n->in(1)->in(1)) <= 32 && // src1
|
||||||
is_integral_type(Matcher::vector_element_basic_type(n->in(1)->in(1)))); // src1
|
is_integral_type(Matcher::vector_element_basic_type(n->in(1)->in(1)))); // src1
|
||||||
@ -7687,7 +7683,7 @@ instruct vcmp64(vec dst, vec src1, vec src2, immI8 cond, kReg ktmp) %{
|
|||||||
|
|
||||||
int vlen_enc = vector_length_encoding(this, $src1);
|
int vlen_enc = vector_length_encoding(this, $src1);
|
||||||
Assembler::ComparisonPredicate cmp = booltest_pred_to_comparison_pred($cond$$constant);
|
Assembler::ComparisonPredicate cmp = booltest_pred_to_comparison_pred($cond$$constant);
|
||||||
bool is_unsigned = is_unsigned_booltest_pred($cond$$constant);
|
bool is_unsigned = Matcher::is_unsigned_booltest_pred($cond$$constant);
|
||||||
KRegister mask = k0; // The comparison itself is not being masked.
|
KRegister mask = k0; // The comparison itself is not being masked.
|
||||||
bool merge = false;
|
bool merge = false;
|
||||||
BasicType src1_elem_bt = Matcher::vector_element_basic_type(this, $src1);
|
BasicType src1_elem_bt = Matcher::vector_element_basic_type(this, $src1);
|
||||||
@ -7721,7 +7717,7 @@ instruct evcmp(kReg dst, vec src1, vec src2, immI8 cond) %{
|
|||||||
|
|
||||||
int vlen_enc = vector_length_encoding(this, $src1);
|
int vlen_enc = vector_length_encoding(this, $src1);
|
||||||
Assembler::ComparisonPredicate cmp = booltest_pred_to_comparison_pred($cond$$constant);
|
Assembler::ComparisonPredicate cmp = booltest_pred_to_comparison_pred($cond$$constant);
|
||||||
bool is_unsigned = is_unsigned_booltest_pred($cond$$constant);
|
bool is_unsigned = Matcher::is_unsigned_booltest_pred($cond$$constant);
|
||||||
BasicType src1_elem_bt = Matcher::vector_element_basic_type(this, $src1);
|
BasicType src1_elem_bt = Matcher::vector_element_basic_type(this, $src1);
|
||||||
|
|
||||||
// Comparison i
|
// Comparison i
|
||||||
@ -9936,25 +9932,25 @@ instruct evcmp_masked(kReg dst, vec src1, vec src2, immI8 cond, kReg mask) %{
|
|||||||
// Comparison i
|
// Comparison i
|
||||||
switch (src1_elem_bt) {
|
switch (src1_elem_bt) {
|
||||||
case T_BYTE: {
|
case T_BYTE: {
|
||||||
bool is_unsigned = is_unsigned_booltest_pred($cond$$constant);
|
bool is_unsigned = Matcher::is_unsigned_booltest_pred($cond$$constant);
|
||||||
Assembler::ComparisonPredicate cmp = booltest_pred_to_comparison_pred($cond$$constant);
|
Assembler::ComparisonPredicate cmp = booltest_pred_to_comparison_pred($cond$$constant);
|
||||||
__ evpcmpb($dst$$KRegister, $mask$$KRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, !is_unsigned, vlen_enc);
|
__ evpcmpb($dst$$KRegister, $mask$$KRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, !is_unsigned, vlen_enc);
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
case T_SHORT: {
|
case T_SHORT: {
|
||||||
bool is_unsigned = is_unsigned_booltest_pred($cond$$constant);
|
bool is_unsigned = Matcher::is_unsigned_booltest_pred($cond$$constant);
|
||||||
Assembler::ComparisonPredicate cmp = booltest_pred_to_comparison_pred($cond$$constant);
|
Assembler::ComparisonPredicate cmp = booltest_pred_to_comparison_pred($cond$$constant);
|
||||||
__ evpcmpw($dst$$KRegister, $mask$$KRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, !is_unsigned, vlen_enc);
|
__ evpcmpw($dst$$KRegister, $mask$$KRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, !is_unsigned, vlen_enc);
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
case T_INT: {
|
case T_INT: {
|
||||||
bool is_unsigned = is_unsigned_booltest_pred($cond$$constant);
|
bool is_unsigned = Matcher::is_unsigned_booltest_pred($cond$$constant);
|
||||||
Assembler::ComparisonPredicate cmp = booltest_pred_to_comparison_pred($cond$$constant);
|
Assembler::ComparisonPredicate cmp = booltest_pred_to_comparison_pred($cond$$constant);
|
||||||
__ evpcmpd($dst$$KRegister, $mask$$KRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, !is_unsigned, vlen_enc);
|
__ evpcmpd($dst$$KRegister, $mask$$KRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, !is_unsigned, vlen_enc);
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
case T_LONG: {
|
case T_LONG: {
|
||||||
bool is_unsigned = is_unsigned_booltest_pred($cond$$constant);
|
bool is_unsigned = Matcher::is_unsigned_booltest_pred($cond$$constant);
|
||||||
Assembler::ComparisonPredicate cmp = booltest_pred_to_comparison_pred($cond$$constant);
|
Assembler::ComparisonPredicate cmp = booltest_pred_to_comparison_pred($cond$$constant);
|
||||||
__ evpcmpq($dst$$KRegister, $mask$$KRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, !is_unsigned, vlen_enc);
|
__ evpcmpq($dst$$KRegister, $mask$$KRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, !is_unsigned, vlen_enc);
|
||||||
break;
|
break;
|
||||||
|
@ -376,6 +376,11 @@ public:
|
|||||||
static BasicType vector_element_basic_type(const Node* n);
|
static BasicType vector_element_basic_type(const Node* n);
|
||||||
static BasicType vector_element_basic_type(const MachNode* use, const MachOper* opnd);
|
static BasicType vector_element_basic_type(const MachNode* use, const MachOper* opnd);
|
||||||
|
|
||||||
|
// Check if given booltest condition is unsigned or not
|
||||||
|
static inline bool is_unsigned_booltest_pred(int bt) {
|
||||||
|
return ((bt & BoolTest::unsigned_compare) == BoolTest::unsigned_compare);
|
||||||
|
}
|
||||||
|
|
||||||
// These calls are all generated by the ADLC
|
// These calls are all generated by the ADLC
|
||||||
|
|
||||||
// Java-Java calling convention
|
// Java-Java calling convention
|
||||||
|
@ -546,6 +546,33 @@ class SVEComparisonWithZero(Instruction):
|
|||||||
str(self.preg), str(self.reg), self._width.astr()))
|
str(self.preg), str(self.reg), self._width.astr()))
|
||||||
return val
|
return val
|
||||||
|
|
||||||
|
class SVEComparisonWithImm(Instruction):
|
||||||
|
def __init__(self, arg):
|
||||||
|
Instruction.__init__(self, "cmp")
|
||||||
|
self.condition = arg
|
||||||
|
self.dest = OperandFactory.create('p').generate()
|
||||||
|
self.reg = SVEVectorRegister().generate()
|
||||||
|
self._width = RegVariant(0, 3)
|
||||||
|
self.preg = OperandFactory.create('P').generate()
|
||||||
|
|
||||||
|
def generate(self):
|
||||||
|
if self.condition in ['HI', 'HS', 'LO', 'LS']:
|
||||||
|
self.immed = random.randint(0, 127)
|
||||||
|
else:
|
||||||
|
self.immed = random.randint(-16, 15)
|
||||||
|
return Instruction.generate(self)
|
||||||
|
|
||||||
|
def cstr(self):
|
||||||
|
return ("%s(%s, %s, %s, %s, %s, %d);"
|
||||||
|
% ("__ sve_" + self._name, "Assembler::" + self.condition,
|
||||||
|
str(self.dest), self._width.cstr(), str(self.preg), str(self.reg), self.immed))
|
||||||
|
|
||||||
|
def astr(self):
|
||||||
|
val = ("%s%s\t%s%s, %s/z, %s%s, #%d"
|
||||||
|
% (self._name, self.condition.lower(), str(self.dest), self._width.astr(),
|
||||||
|
str(self.preg), str(self.reg), self._width.astr(), self.immed))
|
||||||
|
return val
|
||||||
|
|
||||||
class MultiOp():
|
class MultiOp():
|
||||||
|
|
||||||
def multipleForms(self):
|
def multipleForms(self):
|
||||||
@ -1739,6 +1766,8 @@ generate(NEONVectorCompare, neonVectorCompareArgs)
|
|||||||
|
|
||||||
generate(SVEComparisonWithZero, ["EQ", "GT", "GE", "LT", "LE", "NE"])
|
generate(SVEComparisonWithZero, ["EQ", "GT", "GE", "LT", "LE", "NE"])
|
||||||
|
|
||||||
|
generate(SVEComparisonWithImm, ["EQ", "GT", "GE", "LT", "LE", "NE", "HS", "HI", "LS", "LO"])
|
||||||
|
|
||||||
generate(SpecialCases, [["ccmn", "__ ccmn(zr, zr, 3u, Assembler::LE);", "ccmn\txzr, xzr, #3, LE"],
|
generate(SpecialCases, [["ccmn", "__ ccmn(zr, zr, 3u, Assembler::LE);", "ccmn\txzr, xzr, #3, LE"],
|
||||||
["ccmnw", "__ ccmnw(zr, zr, 5u, Assembler::EQ);", "ccmn\twzr, wzr, #5, EQ"],
|
["ccmnw", "__ ccmnw(zr, zr, 5u, Assembler::EQ);", "ccmn\twzr, wzr, #5, EQ"],
|
||||||
["ccmp", "__ ccmp(zr, 1, 4u, Assembler::NE);", "ccmp\txzr, 1, #4, NE"],
|
["ccmp", "__ ccmp(zr, 1, 4u, Assembler::NE);", "ccmp\txzr, 1, #4, NE"],
|
||||||
|
@ -806,6 +806,18 @@
|
|||||||
__ sve_fcm(Assembler::LE, p0, __ S, p5, z20, 0.0); // fcmle p0.s, p5/z, z20.s, #0.0
|
__ sve_fcm(Assembler::LE, p0, __ S, p5, z20, 0.0); // fcmle p0.s, p5/z, z20.s, #0.0
|
||||||
__ sve_fcm(Assembler::NE, p11, __ D, p6, z27, 0.0); // fcmne p11.d, p6/z, z27.d, #0.0
|
__ sve_fcm(Assembler::NE, p11, __ D, p6, z27, 0.0); // fcmne p11.d, p6/z, z27.d, #0.0
|
||||||
|
|
||||||
|
// SVEComparisonWithImm
|
||||||
|
__ sve_cmp(Assembler::EQ, p12, __ B, p5, z4, 0); // cmpeq p12.b, p5/z, z4.b, #0
|
||||||
|
__ sve_cmp(Assembler::GT, p15, __ H, p2, z5, 12); // cmpgt p15.h, p2/z, z5.h, #12
|
||||||
|
__ sve_cmp(Assembler::GE, p7, __ S, p7, z28, 3); // cmpge p7.s, p7/z, z28.s, #3
|
||||||
|
__ sve_cmp(Assembler::LT, p15, __ H, p4, z5, 15); // cmplt p15.h, p4/z, z5.h, #15
|
||||||
|
__ sve_cmp(Assembler::LE, p9, __ S, p4, z26, -4); // cmple p9.s, p4/z, z26.s, #-4
|
||||||
|
__ sve_cmp(Assembler::NE, p5, __ B, p7, z9, 1); // cmpne p5.b, p7/z, z9.b, #1
|
||||||
|
__ sve_cmp(Assembler::HS, p13, __ D, p1, z27, 43); // cmphs p13.d, p1/z, z27.d, #43
|
||||||
|
__ sve_cmp(Assembler::HI, p10, __ B, p6, z9, 70); // cmphi p10.b, p6/z, z9.b, #70
|
||||||
|
__ sve_cmp(Assembler::LS, p8, __ B, p7, z22, 61); // cmpls p8.b, p7/z, z22.b, #61
|
||||||
|
__ sve_cmp(Assembler::LO, p11, __ S, p5, z17, 11); // cmplo p11.s, p5/z, z17.s, #11
|
||||||
|
|
||||||
// SpecialCases
|
// SpecialCases
|
||||||
__ ccmn(zr, zr, 3u, Assembler::LE); // ccmn xzr, xzr, #3, LE
|
__ ccmn(zr, zr, 3u, Assembler::LE); // ccmn xzr, xzr, #3, LE
|
||||||
__ ccmnw(zr, zr, 5u, Assembler::EQ); // ccmn wzr, wzr, #5, EQ
|
__ ccmnw(zr, zr, 5u, Assembler::EQ); // ccmn wzr, wzr, #5, EQ
|
||||||
@ -1059,215 +1071,215 @@
|
|||||||
__ fmovd(v0, -1.0625); // fmov d0, #-1.0625
|
__ fmovd(v0, -1.0625); // fmov d0, #-1.0625
|
||||||
|
|
||||||
// LSEOp
|
// LSEOp
|
||||||
__ swp(Assembler::xword, r25, r5, r1); // swp x25, x5, [x1]
|
__ swp(Assembler::xword, r15, r6, r12); // swp x15, x6, [x12]
|
||||||
__ ldadd(Assembler::xword, r23, r16, sp); // ldadd x23, x16, [sp]
|
__ ldadd(Assembler::xword, r16, r11, r13); // ldadd x16, x11, [x13]
|
||||||
__ ldbic(Assembler::xword, r5, r12, r9); // ldclr x5, x12, [x9]
|
__ ldbic(Assembler::xword, r23, r1, r30); // ldclr x23, x1, [x30]
|
||||||
__ ldeor(Assembler::xword, r28, r15, r29); // ldeor x28, x15, [x29]
|
__ ldeor(Assembler::xword, r19, r5, r17); // ldeor x19, x5, [x17]
|
||||||
__ ldorr(Assembler::xword, r22, zr, r19); // ldset x22, xzr, [x19]
|
__ ldorr(Assembler::xword, r2, r16, r22); // ldset x2, x16, [x22]
|
||||||
__ ldsmin(Assembler::xword, zr, r5, r14); // ldsmin xzr, x5, [x14]
|
__ ldsmin(Assembler::xword, r13, r10, r21); // ldsmin x13, x10, [x21]
|
||||||
__ ldsmax(Assembler::xword, r16, zr, r15); // ldsmax x16, xzr, [x15]
|
__ ldsmax(Assembler::xword, r29, r27, r12); // ldsmax x29, x27, [x12]
|
||||||
__ ldumin(Assembler::xword, r27, r20, r16); // ldumin x27, x20, [x16]
|
__ ldumin(Assembler::xword, r27, r3, r1); // ldumin x27, x3, [x1]
|
||||||
__ ldumax(Assembler::xword, r12, r11, r9); // ldumax x12, x11, [x9]
|
__ ldumax(Assembler::xword, zr, r24, r19); // ldumax xzr, x24, [x19]
|
||||||
|
|
||||||
// LSEOp
|
// LSEOp
|
||||||
__ swpa(Assembler::xword, r6, r30, r17); // swpa x6, x30, [x17]
|
__ swpa(Assembler::xword, r17, r9, r28); // swpa x17, x9, [x28]
|
||||||
__ ldadda(Assembler::xword, r27, r28, r30); // ldadda x27, x28, [x30]
|
__ ldadda(Assembler::xword, r27, r15, r7); // ldadda x27, x15, [x7]
|
||||||
__ ldbica(Assembler::xword, r7, r10, r20); // ldclra x7, x10, [x20]
|
__ ldbica(Assembler::xword, r21, r23, sp); // ldclra x21, x23, [sp]
|
||||||
__ ldeora(Assembler::xword, r10, r4, r24); // ldeora x10, x4, [x24]
|
__ ldeora(Assembler::xword, r25, r2, sp); // ldeora x25, x2, [sp]
|
||||||
__ ldorra(Assembler::xword, r17, r17, r22); // ldseta x17, x17, [x22]
|
__ ldorra(Assembler::xword, r27, r16, r10); // ldseta x27, x16, [x10]
|
||||||
__ ldsmina(Assembler::xword, r3, r29, r15); // ldsmina x3, x29, [x15]
|
__ ldsmina(Assembler::xword, r23, r19, r3); // ldsmina x23, x19, [x3]
|
||||||
__ ldsmaxa(Assembler::xword, r22, r19, r19); // ldsmaxa x22, x19, [x19]
|
__ ldsmaxa(Assembler::xword, r16, r0, r25); // ldsmaxa x16, x0, [x25]
|
||||||
__ ldumina(Assembler::xword, r22, r2, r15); // ldumina x22, x2, [x15]
|
__ ldumina(Assembler::xword, r26, r23, r2); // ldumina x26, x23, [x2]
|
||||||
__ ldumaxa(Assembler::xword, r6, r12, r16); // ldumaxa x6, x12, [x16]
|
__ ldumaxa(Assembler::xword, r16, r12, r4); // ldumaxa x16, x12, [x4]
|
||||||
|
|
||||||
// LSEOp
|
// LSEOp
|
||||||
__ swpal(Assembler::xword, r11, r13, r23); // swpal x11, x13, [x23]
|
__ swpal(Assembler::xword, r28, r30, r29); // swpal x28, x30, [x29]
|
||||||
__ ldaddal(Assembler::xword, r1, r30, r19); // ldaddal x1, x30, [x19]
|
__ ldaddal(Assembler::xword, r16, r27, r6); // ldaddal x16, x27, [x6]
|
||||||
__ ldbical(Assembler::xword, r5, r17, r2); // ldclral x5, x17, [x2]
|
__ ldbical(Assembler::xword, r9, r29, r15); // ldclral x9, x29, [x15]
|
||||||
__ ldeoral(Assembler::xword, r16, r22, r13); // ldeoral x16, x22, [x13]
|
__ ldeoral(Assembler::xword, r7, r4, r7); // ldeoral x7, x4, [x7]
|
||||||
__ ldorral(Assembler::xword, r10, r21, r29); // ldsetal x10, x21, [x29]
|
__ ldorral(Assembler::xword, r15, r9, r23); // ldsetal x15, x9, [x23]
|
||||||
__ ldsminal(Assembler::xword, r27, r12, r27); // ldsminal x27, x12, [x27]
|
__ ldsminal(Assembler::xword, r8, r2, r28); // ldsminal x8, x2, [x28]
|
||||||
__ ldsmaxal(Assembler::xword, r3, r1, sp); // ldsmaxal x3, x1, [sp]
|
__ ldsmaxal(Assembler::xword, r21, zr, r5); // ldsmaxal x21, xzr, [x5]
|
||||||
__ lduminal(Assembler::xword, r24, r19, r17); // lduminal x24, x19, [x17]
|
__ lduminal(Assembler::xword, r27, r0, r17); // lduminal x27, x0, [x17]
|
||||||
__ ldumaxal(Assembler::xword, r9, r28, r27); // ldumaxal x9, x28, [x27]
|
__ ldumaxal(Assembler::xword, r15, r4, r26); // ldumaxal x15, x4, [x26]
|
||||||
|
|
||||||
// LSEOp
|
// LSEOp
|
||||||
__ swpl(Assembler::xword, r15, r7, r21); // swpl x15, x7, [x21]
|
__ swpl(Assembler::xword, r8, r28, r22); // swpl x8, x28, [x22]
|
||||||
__ ldaddl(Assembler::xword, r23, zr, r25); // ldaddl x23, xzr, [x25]
|
__ ldaddl(Assembler::xword, r27, r27, r25); // ldaddl x27, x27, [x25]
|
||||||
__ ldbicl(Assembler::xword, r2, zr, r27); // ldclrl x2, xzr, [x27]
|
__ ldbicl(Assembler::xword, r23, r0, r4); // ldclrl x23, x0, [x4]
|
||||||
__ ldeorl(Assembler::xword, r16, r10, r23); // ldeorl x16, x10, [x23]
|
__ ldeorl(Assembler::xword, r6, r16, r0); // ldeorl x6, x16, [x0]
|
||||||
__ ldorrl(Assembler::xword, r19, r3, r15); // ldsetl x19, x3, [x15]
|
__ ldorrl(Assembler::xword, r4, r15, r1); // ldsetl x4, x15, [x1]
|
||||||
__ ldsminl(Assembler::xword, r0, r25, r26); // ldsminl x0, x25, [x26]
|
__ ldsminl(Assembler::xword, r10, r7, r5); // ldsminl x10, x7, [x5]
|
||||||
__ ldsmaxl(Assembler::xword, r23, r2, r15); // ldsmaxl x23, x2, [x15]
|
__ ldsmaxl(Assembler::xword, r10, r28, r7); // ldsmaxl x10, x28, [x7]
|
||||||
__ lduminl(Assembler::xword, r12, r4, r28); // lduminl x12, x4, [x28]
|
__ lduminl(Assembler::xword, r20, r23, r21); // lduminl x20, x23, [x21]
|
||||||
__ ldumaxl(Assembler::xword, r30, r29, r16); // ldumaxl x30, x29, [x16]
|
__ ldumaxl(Assembler::xword, r6, r11, r8); // ldumaxl x6, x11, [x8]
|
||||||
|
|
||||||
// LSEOp
|
// LSEOp
|
||||||
__ swp(Assembler::word, r27, r6, r9); // swp w27, w6, [x9]
|
__ swp(Assembler::word, r17, zr, r6); // swp w17, wzr, [x6]
|
||||||
__ ldadd(Assembler::word, r29, r16, r7); // ldadd w29, w16, [x7]
|
__ ldadd(Assembler::word, r17, r2, r12); // ldadd w17, w2, [x12]
|
||||||
__ ldbic(Assembler::word, r4, r7, r15); // ldclr w4, w7, [x15]
|
__ ldbic(Assembler::word, r30, r29, r3); // ldclr w30, w29, [x3]
|
||||||
__ ldeor(Assembler::word, r9, r23, r8); // ldeor w9, w23, [x8]
|
__ ldeor(Assembler::word, r27, r22, r29); // ldeor w27, w22, [x29]
|
||||||
__ ldorr(Assembler::word, r2, r28, r21); // ldset w2, w28, [x21]
|
__ ldorr(Assembler::word, r14, r13, r28); // ldset w14, w13, [x28]
|
||||||
__ ldsmin(Assembler::word, zr, r5, r27); // ldsmin wzr, w5, [x27]
|
__ ldsmin(Assembler::word, r17, r24, r5); // ldsmin w17, w24, [x5]
|
||||||
__ ldsmax(Assembler::word, r0, r17, r15); // ldsmax w0, w17, [x15]
|
__ ldsmax(Assembler::word, r2, r14, r10); // ldsmax w2, w14, [x10]
|
||||||
__ ldumin(Assembler::word, r4, r26, r8); // ldumin w4, w26, [x8]
|
__ ldumin(Assembler::word, r16, r11, r27); // ldumin w16, w11, [x27]
|
||||||
__ ldumax(Assembler::word, r28, r22, r27); // ldumax w28, w22, [x27]
|
__ ldumax(Assembler::word, r23, r12, r4); // ldumax w23, w12, [x4]
|
||||||
|
|
||||||
// LSEOp
|
// LSEOp
|
||||||
__ swpa(Assembler::word, r27, r25, r23); // swpa w27, w25, [x23]
|
__ swpa(Assembler::word, r22, r17, r4); // swpa w22, w17, [x4]
|
||||||
__ ldadda(Assembler::word, r0, r4, r6); // ldadda w0, w4, [x6]
|
__ ldadda(Assembler::word, r1, r19, r16); // ldadda w1, w19, [x16]
|
||||||
__ ldbica(Assembler::word, r16, r0, r4); // ldclra w16, w0, [x4]
|
__ ldbica(Assembler::word, r16, r13, r14); // ldclra w16, w13, [x14]
|
||||||
__ ldeora(Assembler::word, r15, r1, r10); // ldeora w15, w1, [x10]
|
__ ldeora(Assembler::word, r12, r2, r17); // ldeora w12, w2, [x17]
|
||||||
__ ldorra(Assembler::word, r7, r5, r10); // ldseta w7, w5, [x10]
|
__ ldorra(Assembler::word, r3, r21, r23); // ldseta w3, w21, [x23]
|
||||||
__ ldsmina(Assembler::word, r28, r7, r20); // ldsmina w28, w7, [x20]
|
__ ldsmina(Assembler::word, r5, r6, r7); // ldsmina w5, w6, [x7]
|
||||||
__ ldsmaxa(Assembler::word, r23, r21, r6); // ldsmaxa w23, w21, [x6]
|
__ ldsmaxa(Assembler::word, r19, r13, r28); // ldsmaxa w19, w13, [x28]
|
||||||
__ ldumina(Assembler::word, r11, r8, r17); // ldumina w11, w8, [x17]
|
__ ldumina(Assembler::word, r17, r16, r6); // ldumina w17, w16, [x6]
|
||||||
__ ldumaxa(Assembler::word, zr, r6, r17); // ldumaxa wzr, w6, [x17]
|
__ ldumaxa(Assembler::word, r2, r29, r3); // ldumaxa w2, w29, [x3]
|
||||||
|
|
||||||
// LSEOp
|
// LSEOp
|
||||||
__ swpal(Assembler::word, r2, r12, r30); // swpal w2, w12, [x30]
|
__ swpal(Assembler::word, r4, r6, r15); // swpal w4, w6, [x15]
|
||||||
__ ldaddal(Assembler::word, r29, r3, r27); // ldaddal w29, w3, [x27]
|
__ ldaddal(Assembler::word, r20, r13, r12); // ldaddal w20, w13, [x12]
|
||||||
__ ldbical(Assembler::word, r22, r29, r14); // ldclral w22, w29, [x14]
|
__ ldbical(Assembler::word, r20, r8, r25); // ldclral w20, w8, [x25]
|
||||||
__ ldeoral(Assembler::word, r13, r28, r17); // ldeoral w13, w28, [x17]
|
__ ldeoral(Assembler::word, r20, r19, r0); // ldeoral w20, w19, [x0]
|
||||||
__ ldorral(Assembler::word, r24, r5, r2); // ldsetal w24, w5, [x2]
|
__ ldorral(Assembler::word, r11, r24, r6); // ldsetal w11, w24, [x6]
|
||||||
__ ldsminal(Assembler::word, r14, r10, r16); // ldsminal w14, w10, [x16]
|
__ ldsminal(Assembler::word, r20, zr, r14); // ldsminal w20, wzr, [x14]
|
||||||
__ ldsmaxal(Assembler::word, r11, r27, r23); // ldsmaxal w11, w27, [x23]
|
__ ldsmaxal(Assembler::word, r16, r6, r0); // ldsmaxal w16, w6, [x0]
|
||||||
__ lduminal(Assembler::word, r12, r4, r22); // lduminal w12, w4, [x22]
|
__ lduminal(Assembler::word, r7, r15, r19); // lduminal w7, w15, [x19]
|
||||||
__ ldumaxal(Assembler::word, r17, r4, r1); // ldumaxal w17, w4, [x1]
|
__ ldumaxal(Assembler::word, r26, r9, r10); // ldumaxal w26, w9, [x10]
|
||||||
|
|
||||||
// LSEOp
|
// LSEOp
|
||||||
__ swpl(Assembler::word, r19, r16, r15); // swpl w19, w16, [x15]
|
__ swpl(Assembler::word, r23, r21, r22); // swpl w23, w21, [x22]
|
||||||
__ ldaddl(Assembler::word, r13, r14, r12); // ldaddl w13, w14, [x12]
|
__ ldaddl(Assembler::word, r28, r2, r3); // ldaddl w28, w2, [x3]
|
||||||
__ ldbicl(Assembler::word, r2, r17, r3); // ldclrl w2, w17, [x3]
|
__ ldbicl(Assembler::word, r15, r19, r20); // ldclrl w15, w19, [x20]
|
||||||
__ ldeorl(Assembler::word, r21, r23, r5); // ldeorl w21, w23, [x5]
|
__ ldeorl(Assembler::word, r7, r4, r29); // ldeorl w7, w4, [x29]
|
||||||
__ ldorrl(Assembler::word, r6, r7, r19); // ldsetl w6, w7, [x19]
|
__ ldorrl(Assembler::word, r7, r0, r9); // ldsetl w7, w0, [x9]
|
||||||
__ ldsminl(Assembler::word, r13, r28, r17); // ldsminl w13, w28, [x17]
|
__ ldsminl(Assembler::word, r16, r20, r23); // ldsminl w16, w20, [x23]
|
||||||
__ ldsmaxl(Assembler::word, r16, r6, r2); // ldsmaxl w16, w6, [x2]
|
__ ldsmaxl(Assembler::word, r4, r16, r10); // ldsmaxl w4, w16, [x10]
|
||||||
__ lduminl(Assembler::word, r29, r3, r4); // lduminl w29, w3, [x4]
|
__ lduminl(Assembler::word, r23, r11, r25); // lduminl w23, w11, [x25]
|
||||||
__ ldumaxl(Assembler::word, r6, r16, r20); // ldumaxl w6, w16, [x20]
|
__ ldumaxl(Assembler::word, r6, zr, r16); // ldumaxl w6, wzr, [x16]
|
||||||
|
|
||||||
// SHA3SIMDOp
|
// SHA3SIMDOp
|
||||||
__ bcax(v13, __ T16B, v12, v19, v8); // bcax v13.16B, v12.16B, v19.16B, v8.16B
|
__ bcax(v13, __ T16B, v22, v11, v1); // bcax v13.16B, v22.16B, v11.16B, v1.16B
|
||||||
__ eor3(v24, __ T16B, v19, v17, v0); // eor3 v24.16B, v19.16B, v17.16B, v0.16B
|
__ eor3(v13, __ T16B, v8, v20, v16); // eor3 v13.16B, v8.16B, v20.16B, v16.16B
|
||||||
__ rax1(v10, __ T2D, v23, v6); // rax1 v10.2D, v23.2D, v6.2D
|
__ rax1(v25, __ T2D, v15, v4); // rax1 v25.2D, v15.2D, v4.2D
|
||||||
__ xar(v19, __ T2D, v30, v13, 33); // xar v19.2D, v30.2D, v13.2D, #33
|
__ xar(v4, __ T2D, v17, v8, 13); // xar v4.2D, v17.2D, v8.2D, #13
|
||||||
|
|
||||||
// SHA512SIMDOp
|
// SHA512SIMDOp
|
||||||
__ sha512h(v6, __ T2D, v0, v7); // sha512h q6, q0, v7.2D
|
__ sha512h(v29, __ T2D, v4, v28); // sha512h q29, q4, v28.2D
|
||||||
__ sha512h2(v14, __ T2D, v17, v25); // sha512h2 q14, q17, v25.2D
|
__ sha512h2(v16, __ T2D, v29, v26); // sha512h2 q16, q29, v26.2D
|
||||||
__ sha512su0(v8, __ T2D, v10); // sha512su0 v8.2D, v10.2D
|
__ sha512su0(v9, __ T2D, v14); // sha512su0 v9.2D, v14.2D
|
||||||
__ sha512su1(v22, __ T2D, v20, v22); // sha512su1 v22.2D, v20.2D, v22.2D
|
__ sha512su1(v2, __ T2D, v11, v28); // sha512su1 v2.2D, v11.2D, v28.2D
|
||||||
|
|
||||||
// SVEBinaryImmOp
|
// SVEBinaryImmOp
|
||||||
__ sve_add(z27, __ B, 31u); // add z27.b, z27.b, #0x1f
|
__ sve_add(z3, __ B, 10u); // add z3.b, z3.b, #0xa
|
||||||
__ sve_sub(z15, __ S, 167u); // sub z15.s, z15.s, #0xa7
|
__ sve_sub(z26, __ S, 150u); // sub z26.s, z26.s, #0x96
|
||||||
__ sve_and(z7, __ B, 62u); // and z7.b, z7.b, #0x3e
|
__ sve_and(z14, __ H, 57343u); // and z14.h, z14.h, #0xdfff
|
||||||
__ sve_eor(z0, __ H, 51199u); // eor z0.h, z0.h, #0xc7ff
|
__ sve_eor(z24, __ B, 191u); // eor z24.b, z24.b, #0xbf
|
||||||
__ sve_orr(z22, __ B, 96u); // orr z22.b, z22.b, #0x60
|
__ sve_orr(z17, __ S, 4294966791u); // orr z17.s, z17.s, #0xfffffe07
|
||||||
|
|
||||||
// SVEBinaryImmOp
|
// SVEBinaryImmOp
|
||||||
__ sve_add(z22, __ H, 207u); // add z22.h, z22.h, #0xcf
|
__ sve_add(z20, __ S, 3u); // add z20.s, z20.s, #0x3
|
||||||
__ sve_sub(z5, __ D, 133u); // sub z5.d, z5.d, #0x85
|
__ sve_sub(z4, __ S, 196u); // sub z4.s, z4.s, #0xc4
|
||||||
__ sve_and(z13, __ S, 496u); // and z13.s, z13.s, #0x1f0
|
__ sve_and(z4, __ S, 4286578691u); // and z4.s, z4.s, #0xff800003
|
||||||
__ sve_eor(z13, __ H, 33279u); // eor z13.h, z13.h, #0x81ff
|
__ sve_eor(z25, __ S, 33553408u); // eor z25.s, z25.s, #0x1fffc00
|
||||||
__ sve_orr(z25, __ H, 508u); // orr z25.h, z25.h, #0x1fc
|
__ sve_orr(z8, __ H, 49663u); // orr z8.h, z8.h, #0xc1ff
|
||||||
|
|
||||||
// SVEBinaryImmOp
|
// SVEBinaryImmOp
|
||||||
__ sve_add(z17, __ H, 54u); // add z17.h, z17.h, #0x36
|
__ sve_add(z30, __ S, 36u); // add z30.s, z30.s, #0x24
|
||||||
__ sve_sub(z29, __ B, 234u); // sub z29.b, z29.b, #0xea
|
__ sve_sub(z30, __ B, 85u); // sub z30.b, z30.b, #0x55
|
||||||
__ sve_and(z16, __ D, 4503599627354112u); // and z16.d, z16.d, #0xfffffffffc000
|
__ sve_and(z19, __ H, 4032u); // and z19.h, z19.h, #0xfc0
|
||||||
__ sve_eor(z14, __ B, 254u); // eor z14.b, z14.b, #0xfe
|
__ sve_eor(z7, __ D, 274877904896u); // eor z7.d, z7.d, #0x3ffffff800
|
||||||
__ sve_orr(z3, __ B, 243u); // orr z3.b, z3.b, #0xf3
|
__ sve_orr(z27, __ B, 243u); // orr z27.b, z27.b, #0xf3
|
||||||
|
|
||||||
// SVEBinaryImmOp
|
// SVEBinaryImmOp
|
||||||
__ sve_add(z21, __ S, 119u); // add z21.s, z21.s, #0x77
|
__ sve_add(z23, __ H, 132u); // add z23.h, z23.h, #0x84
|
||||||
__ sve_sub(z8, __ S, 179u); // sub z8.s, z8.s, #0xb3
|
__ sve_sub(z30, __ S, 183u); // sub z30.s, z30.s, #0xb7
|
||||||
__ sve_and(z24, __ B, 191u); // and z24.b, z24.b, #0xbf
|
__ sve_and(z20, __ D, 4503599627354112u); // and z20.d, z20.d, #0xfffffffffc000
|
||||||
__ sve_eor(z17, __ S, 4294966791u); // eor z17.s, z17.s, #0xfffffe07
|
__ sve_eor(z13, __ S, 4042322160u); // eor z13.s, z13.s, #0xf0f0f0f0
|
||||||
__ sve_orr(z20, __ S, 491520u); // orr z20.s, z20.s, #0x78000
|
__ sve_orr(z28, __ H, 32256u); // orr z28.h, z28.h, #0x7e00
|
||||||
|
|
||||||
// SVEBinaryImmOp
|
// SVEBinaryImmOp
|
||||||
__ sve_add(z17, __ D, 36u); // add z17.d, z17.d, #0x24
|
__ sve_add(z11, __ S, 13u); // add z11.s, z11.s, #0xd
|
||||||
__ sve_sub(z19, __ B, 195u); // sub z19.b, z19.b, #0xc3
|
__ sve_sub(z24, __ H, 159u); // sub z24.h, z24.h, #0x9f
|
||||||
__ sve_and(z25, __ S, 33553408u); // and z25.s, z25.s, #0x1fffc00
|
__ sve_and(z13, __ S, 2151677951u); // and z13.s, z13.s, #0x803fffff
|
||||||
__ sve_eor(z8, __ H, 49663u); // eor z8.h, z8.h, #0xc1ff
|
__ sve_eor(z4, __ B, 124u); // eor z4.b, z4.b, #0x7c
|
||||||
__ sve_orr(z30, __ S, 4294967231u); // orr z30.s, z30.s, #0xffffffbf
|
__ sve_orr(z7, __ H, 32768u); // orr z7.h, z7.h, #0x8000
|
||||||
|
|
||||||
// SVEBinaryImmOp
|
// SVEBinaryImmOp
|
||||||
__ sve_add(z1, __ H, 163u); // add z1.h, z1.h, #0xa3
|
__ sve_add(z4, __ H, 243u); // add z4.h, z4.h, #0xf3
|
||||||
__ sve_sub(z12, __ B, 75u); // sub z12.b, z12.b, #0x4b
|
__ sve_sub(z5, __ B, 86u); // sub z5.b, z5.b, #0x56
|
||||||
__ sve_and(z7, __ D, 274877904896u); // and z7.d, z7.d, #0x3ffffff800
|
__ sve_and(z21, __ D, 8064u); // and z21.d, z21.d, #0x1f80
|
||||||
__ sve_eor(z27, __ B, 243u); // eor z27.b, z27.b, #0xf3
|
__ sve_eor(z9, __ S, 130023424u); // eor z9.s, z9.s, #0x7c00000
|
||||||
__ sve_orr(z23, __ H, 65534u); // orr z23.h, z23.h, #0xfffe
|
__ sve_orr(z24, __ B, 62u); // orr z24.b, z24.b, #0x3e
|
||||||
|
|
||||||
// SVEVectorOp
|
// SVEVectorOp
|
||||||
__ sve_add(z22, __ D, z22, z20); // add z22.d, z22.d, z20.d
|
__ sve_add(z23, __ S, z28, z13); // add z23.s, z28.s, z13.s
|
||||||
__ sve_sub(z28, __ S, z9, z13); // sub z28.s, z9.s, z13.s
|
__ sve_sub(z10, __ S, z26, z12); // sub z10.s, z26.s, z12.s
|
||||||
__ sve_fadd(z7, __ S, z20, z28); // fadd z7.s, z20.s, z28.s
|
__ sve_fadd(z30, __ S, z17, z14); // fadd z30.s, z17.s, z14.s
|
||||||
__ sve_fmul(z11, __ D, z13, z11); // fmul z11.d, z13.d, z11.d
|
__ sve_fmul(z29, __ D, z16, z21); // fmul z29.d, z16.d, z21.d
|
||||||
__ sve_fsub(z1, __ D, z24, z8); // fsub z1.d, z24.d, z8.d
|
__ sve_fsub(z7, __ S, z19, z2); // fsub z7.s, z19.s, z2.s
|
||||||
__ sve_abs(z13, __ S, p4, z17); // abs z13.s, p4/m, z17.s
|
__ sve_abs(z26, __ S, p4, z9); // abs z26.s, p4/m, z9.s
|
||||||
__ sve_add(z4, __ H, p0, z3); // add z4.h, p0/m, z4.h, z3.h
|
__ sve_add(z17, __ B, p5, z0); // add z17.b, p5/m, z17.b, z0.b
|
||||||
__ sve_and(z7, __ S, p3, z14); // and z7.s, p3/m, z7.s, z14.s
|
__ sve_and(z2, __ B, p6, z14); // and z2.b, p6/m, z2.b, z14.b
|
||||||
__ sve_asr(z4, __ B, p3, z29); // asr z4.b, p3/m, z4.b, z29.b
|
__ sve_asr(z11, __ S, p5, z14); // asr z11.s, p5/m, z11.s, z14.s
|
||||||
__ sve_bic(z0, __ D, p2, z21); // bic z0.d, p2/m, z0.d, z21.d
|
__ sve_bic(z29, __ B, p3, z3); // bic z29.b, p3/m, z29.b, z3.b
|
||||||
__ sve_clz(z3, __ S, p0, z9); // clz z3.s, p0/m, z9.s
|
__ sve_clz(z22, __ D, p2, z3); // clz z22.d, p2/m, z3.d
|
||||||
__ sve_cnt(z28, __ B, p2, z24); // cnt z28.b, p2/m, z24.b
|
__ sve_cnt(z27, __ S, p0, z19); // cnt z27.s, p0/m, z19.s
|
||||||
__ sve_eor(z19, __ D, p1, z23); // eor z19.d, p1/m, z19.d, z23.d
|
__ sve_eor(z7, __ H, p6, z21); // eor z7.h, p6/m, z7.h, z21.h
|
||||||
__ sve_lsl(z13, __ D, p5, z10); // lsl z13.d, p5/m, z13.d, z10.d
|
__ sve_lsl(z5, __ B, p2, z25); // lsl z5.b, p2/m, z5.b, z25.b
|
||||||
__ sve_lsr(z12, __ S, p4, z30); // lsr z12.s, p4/m, z12.s, z30.s
|
__ sve_lsr(z21, __ B, p4, z17); // lsr z21.b, p4/m, z21.b, z17.b
|
||||||
__ sve_mul(z14, __ S, p0, z29); // mul z14.s, p0/m, z14.s, z29.s
|
__ sve_mul(z3, __ H, p2, z19); // mul z3.h, p2/m, z3.h, z19.h
|
||||||
__ sve_neg(z21, __ S, p5, z7); // neg z21.s, p5/m, z7.s
|
__ sve_neg(z7, __ S, p3, z14); // neg z7.s, p3/m, z14.s
|
||||||
__ sve_not(z2, __ S, p0, z26); // not z2.s, p0/m, z26.s
|
__ sve_not(z17, __ D, p2, z13); // not z17.d, p2/m, z13.d
|
||||||
__ sve_orr(z9, __ S, p4, z17); // orr z9.s, p4/m, z9.s, z17.s
|
__ sve_orr(z17, __ H, p7, z17); // orr z17.h, p7/m, z17.h, z17.h
|
||||||
__ sve_rbit(z0, __ D, p1, z2); // rbit z0.d, p1/m, z2.d
|
__ sve_rbit(z15, __ S, p3, z26); // rbit z15.s, p3/m, z26.s
|
||||||
__ sve_revb(z14, __ D, p1, z11); // revb z14.d, p1/m, z11.d
|
__ sve_revb(z27, __ H, p5, z7); // revb z27.h, p5/m, z7.h
|
||||||
__ sve_smax(z14, __ H, p4, z29); // smax z14.h, p4/m, z14.h, z29.h
|
__ sve_smax(z5, __ H, p7, z27); // smax z5.h, p7/m, z5.h, z27.h
|
||||||
__ sve_smin(z3, __ H, p0, z22); // smin z3.h, p0/m, z3.h, z22.h
|
__ sve_smin(z0, __ S, p3, z24); // smin z0.s, p3/m, z0.s, z24.s
|
||||||
__ sve_sub(z3, __ B, p6, z27); // sub z3.b, p6/m, z3.b, z27.b
|
__ sve_sub(z20, __ S, p0, z3); // sub z20.s, p0/m, z20.s, z3.s
|
||||||
__ sve_fabs(z19, __ D, p5, z7); // fabs z19.d, p5/m, z7.d
|
__ sve_fabs(z25, __ D, p1, z25); // fabs z25.d, p1/m, z25.d
|
||||||
__ sve_fadd(z21, __ S, p3, z5); // fadd z21.s, p3/m, z21.s, z5.s
|
__ sve_fadd(z17, __ S, p4, z1); // fadd z17.s, p4/m, z17.s, z1.s
|
||||||
__ sve_fdiv(z25, __ D, p1, z21); // fdiv z25.d, p1/m, z25.d, z21.d
|
__ sve_fdiv(z14, __ S, p7, z13); // fdiv z14.s, p7/m, z14.s, z13.s
|
||||||
__ sve_fmax(z17, __ S, p0, z3); // fmax z17.s, p0/m, z17.s, z3.s
|
__ sve_fmax(z17, __ D, p0, z30); // fmax z17.d, p0/m, z17.d, z30.d
|
||||||
__ sve_fmin(z19, __ S, p3, z7); // fmin z19.s, p3/m, z19.s, z7.s
|
__ sve_fmin(z22, __ S, p5, z29); // fmin z22.s, p5/m, z22.s, z29.s
|
||||||
__ sve_fmul(z14, __ S, p4, z17); // fmul z14.s, p4/m, z14.s, z17.s
|
__ sve_fmul(z8, __ S, p0, z0); // fmul z8.s, p0/m, z8.s, z0.s
|
||||||
__ sve_fneg(z13, __ D, p6, z17); // fneg z13.d, p6/m, z17.d
|
__ sve_fneg(z23, __ D, p5, z0); // fneg z23.d, p5/m, z0.d
|
||||||
__ sve_frintm(z17, __ S, p2, z15); // frintm z17.s, p2/m, z15.s
|
__ sve_frintm(z25, __ S, p6, z23); // frintm z25.s, p6/m, z23.s
|
||||||
__ sve_frintn(z26, __ D, p5, z27); // frintn z26.d, p5/m, z27.d
|
__ sve_frintn(z21, __ S, p5, z1); // frintn z21.s, p5/m, z1.s
|
||||||
__ sve_frintp(z7, __ D, p2, z5); // frintp z7.d, p2/m, z5.d
|
__ sve_frintp(z10, __ D, p5, z11); // frintp z10.d, p5/m, z11.d
|
||||||
__ sve_fsqrt(z27, __ S, p2, z0); // fsqrt z27.s, p2/m, z0.s
|
__ sve_fsqrt(z23, __ D, p6, z8); // fsqrt z23.d, p6/m, z8.d
|
||||||
__ sve_fsub(z24, __ S, p5, z20); // fsub z24.s, p5/m, z24.s, z20.s
|
__ sve_fsub(z17, __ D, p5, z19); // fsub z17.d, p5/m, z17.d, z19.d
|
||||||
__ sve_fmad(z3, __ D, p5, z25, z5); // fmad z3.d, p5/m, z25.d, z5.d
|
__ sve_fmad(z4, __ D, p5, z13, z30); // fmad z4.d, p5/m, z13.d, z30.d
|
||||||
__ sve_fmla(z29, __ S, p4, z17, z1); // fmla z29.s, p4/m, z17.s, z1.s
|
__ sve_fmla(z30, __ D, p7, z25, z17); // fmla z30.d, p7/m, z25.d, z17.d
|
||||||
__ sve_fmls(z14, __ D, p7, z13, z0); // fmls z14.d, p7/m, z13.d, z0.d
|
__ sve_fmls(z14, __ D, p2, z12, z28); // fmls z14.d, p2/m, z12.d, z28.d
|
||||||
__ sve_fmsb(z2, __ D, p7, z20, z22); // fmsb z2.d, p7/m, z20.d, z22.d
|
__ sve_fmsb(z5, __ S, p0, z13, z13); // fmsb z5.s, p0/m, z13.s, z13.s
|
||||||
__ sve_fnmad(z29, __ S, p3, z8, z2); // fnmad z29.s, p3/m, z8.s, z2.s
|
__ sve_fnmad(z7, __ S, p2, z11, z19); // fnmad z7.s, p2/m, z11.s, z19.s
|
||||||
__ sve_fnmsb(z14, __ D, p5, z22, z0); // fnmsb z14.d, p5/m, z22.d, z0.d
|
__ sve_fnmsb(z25, __ D, p3, z2, z3); // fnmsb z25.d, p3/m, z2.d, z3.d
|
||||||
__ sve_fnmla(z25, __ D, p6, z23, z12); // fnmla z25.d, p6/m, z23.d, z12.d
|
__ sve_fnmla(z0, __ D, p5, z5, z20); // fnmla z0.d, p5/m, z5.d, z20.d
|
||||||
__ sve_fnmls(z21, __ D, p0, z1, z10); // fnmls z21.d, p0/m, z1.d, z10.d
|
__ sve_fnmls(z28, __ S, p3, z13, z8); // fnmls z28.s, p3/m, z13.s, z8.s
|
||||||
__ sve_mla(z11, __ H, p5, z23, z23); // mla z11.h, p5/m, z23.h, z23.h
|
__ sve_mla(z29, __ B, p0, z14, z27); // mla z29.b, p0/m, z14.b, z27.b
|
||||||
__ sve_mls(z30, __ S, p4, z19, z19); // mls z30.s, p4/m, z19.s, z19.s
|
__ sve_mls(z3, __ H, p6, z8, z24); // mls z3.h, p6/m, z8.h, z24.h
|
||||||
__ sve_and(z4, z20, z13); // and z4.d, z20.d, z13.d
|
__ sve_and(z1, z25, z10); // and z1.d, z25.d, z10.d
|
||||||
__ sve_eor(z22, z30, z30); // eor z22.d, z30.d, z30.d
|
__ sve_eor(z1, z20, z25); // eor z1.d, z20.d, z25.d
|
||||||
__ sve_orr(z17, z17, z14); // orr z17.d, z17.d, z14.d
|
__ sve_orr(z28, z19, z16); // orr z28.d, z19.d, z16.d
|
||||||
__ sve_bic(z12, z28, z20); // bic z12.d, z28.d, z20.d
|
__ sve_bic(z27, z13, z1); // bic z27.d, z13.d, z1.d
|
||||||
__ sve_uzp1(z1, __ B, z13, z13); // uzp1 z1.b, z13.b, z13.b
|
__ sve_uzp1(z11, __ B, z9, z1); // uzp1 z11.b, z9.b, z1.b
|
||||||
__ sve_uzp2(z7, __ S, z10, z11); // uzp2 z7.s, z10.s, z11.s
|
__ sve_uzp2(z1, __ H, z27, z26); // uzp2 z1.h, z27.h, z26.h
|
||||||
__ sve_fabd(z4, __ S, p6, z15); // fabd z4.s, p6/m, z4.s, z15.s
|
__ sve_fabd(z2, __ D, p1, z29); // fabd z2.d, p1/m, z2.d, z29.d
|
||||||
__ sve_bext(z3, __ S, z29, z0); // bext z3.s, z29.s, z0.s
|
__ sve_bext(z24, __ D, z2, z2); // bext z24.d, z2.d, z2.d
|
||||||
__ sve_bdep(z5, __ D, z20, z30); // bdep z5.d, z20.d, z30.d
|
__ sve_bdep(z3, __ H, z25, z28); // bdep z3.h, z25.h, z28.h
|
||||||
__ sve_eor3(z13, z13, z8); // eor3 z13.d, z13.d, z13.d, z8.d
|
__ sve_eor3(z3, z22, z13); // eor3 z3.d, z3.d, z22.d, z13.d
|
||||||
|
|
||||||
// SVEReductionOp
|
// SVEReductionOp
|
||||||
__ sve_andv(v29, __ D, p0, z14); // andv d29, p0, z14.d
|
__ sve_andv(v27, __ H, p4, z4); // andv h27, p4, z4.h
|
||||||
__ sve_orv(v3, __ H, p0, z25); // orv h3, p0, z25.h
|
__ sve_orv(v26, __ S, p4, z2); // orv s26, p4, z2.s
|
||||||
__ sve_eorv(v24, __ D, p2, z1); // eorv d24, p2, z1.d
|
__ sve_eorv(v1, __ S, p7, z7); // eorv s1, p7, z7.s
|
||||||
__ sve_smaxv(v10, __ S, p3, z1); // smaxv s10, p3, z1.s
|
__ sve_smaxv(v30, __ H, p7, z16); // smaxv h30, p7, z16.h
|
||||||
__ sve_sminv(v25, __ S, p1, z28); // sminv s25, p1, z28.s
|
__ sve_sminv(v21, __ B, p4, z28); // sminv b21, p4, z28.b
|
||||||
__ sve_fminv(v16, __ S, p1, z27); // fminv s16, p1, z27.s
|
__ sve_fminv(v21, __ D, p1, z12); // fminv d21, p1, z12.d
|
||||||
__ sve_fmaxv(v1, __ S, p7, z11); // fmaxv s1, p7, z11.s
|
__ sve_fmaxv(v11, __ S, p2, z10); // fmaxv s11, p2, z10.s
|
||||||
__ sve_fadda(v1, __ D, p0, z1); // fadda d1, p0, d1, z1.d
|
__ sve_fadda(v0, __ D, p1, z22); // fadda d0, p1, d0, z22.d
|
||||||
__ sve_uaddv(v26, __ B, p3, z2); // uaddv d26, p3, z2.b
|
__ sve_uaddv(v20, __ H, p1, z3); // uaddv d20, p1, z3.h
|
||||||
|
|
||||||
__ bind(forth);
|
__ bind(forth);
|
||||||
|
|
||||||
@ -1286,30 +1298,30 @@
|
|||||||
0x9101a1a0, 0xb10a5cc8, 0xd10810aa, 0xf10fd061,
|
0x9101a1a0, 0xb10a5cc8, 0xd10810aa, 0xf10fd061,
|
||||||
0x120cb166, 0x321764bc, 0x52174681, 0x720c0227,
|
0x120cb166, 0x321764bc, 0x52174681, 0x720c0227,
|
||||||
0x9241018e, 0xb25a2969, 0xd278b411, 0xf26aad01,
|
0x9241018e, 0xb25a2969, 0xd278b411, 0xf26aad01,
|
||||||
0x14000000, 0x17ffffd7, 0x1400041e, 0x94000000,
|
0x14000000, 0x17ffffd7, 0x14000428, 0x94000000,
|
||||||
0x97ffffd4, 0x9400041b, 0x3400000a, 0x34fffa2a,
|
0x97ffffd4, 0x94000425, 0x3400000a, 0x34fffa2a,
|
||||||
0x3400830a, 0x35000008, 0x35fff9c8, 0x350082a8,
|
0x3400844a, 0x35000008, 0x35fff9c8, 0x350083e8,
|
||||||
0xb400000b, 0xb4fff96b, 0xb400824b, 0xb500001d,
|
0xb400000b, 0xb4fff96b, 0xb400838b, 0xb500001d,
|
||||||
0xb5fff91d, 0xb50081fd, 0x10000013, 0x10fff8b3,
|
0xb5fff91d, 0xb500833d, 0x10000013, 0x10fff8b3,
|
||||||
0x10008193, 0x90000013, 0x36300016, 0x3637f836,
|
0x100082d3, 0x90000013, 0x36300016, 0x3637f836,
|
||||||
0x36308116, 0x3758000c, 0x375ff7cc, 0x375880ac,
|
0x36308256, 0x3758000c, 0x375ff7cc, 0x375881ec,
|
||||||
0x128313a0, 0x528a32c7, 0x7289173b, 0x92ab3acc,
|
0x128313a0, 0x528a32c7, 0x7289173b, 0x92ab3acc,
|
||||||
0xd2a0bf94, 0xf2c285e8, 0x9358722f, 0x330e652f,
|
0xd2a0bf94, 0xf2c285e8, 0x9358722f, 0x330e652f,
|
||||||
0x53067f3b, 0x93577c53, 0xb34a1aac, 0xd35a4016,
|
0x53067f3b, 0x93577c53, 0xb34a1aac, 0xd35a4016,
|
||||||
0x13946c63, 0x93c3dbc8, 0x54000000, 0x54fff5a0,
|
0x13946c63, 0x93c3dbc8, 0x54000000, 0x54fff5a0,
|
||||||
0x54007e80, 0x54000001, 0x54fff541, 0x54007e21,
|
0x54007fc0, 0x54000001, 0x54fff541, 0x54007f61,
|
||||||
0x54000002, 0x54fff4e2, 0x54007dc2, 0x54000002,
|
0x54000002, 0x54fff4e2, 0x54007f02, 0x54000002,
|
||||||
0x54fff482, 0x54007d62, 0x54000003, 0x54fff423,
|
0x54fff482, 0x54007ea2, 0x54000003, 0x54fff423,
|
||||||
0x54007d03, 0x54000003, 0x54fff3c3, 0x54007ca3,
|
0x54007e43, 0x54000003, 0x54fff3c3, 0x54007de3,
|
||||||
0x54000004, 0x54fff364, 0x54007c44, 0x54000005,
|
0x54000004, 0x54fff364, 0x54007d84, 0x54000005,
|
||||||
0x54fff305, 0x54007be5, 0x54000006, 0x54fff2a6,
|
0x54fff305, 0x54007d25, 0x54000006, 0x54fff2a6,
|
||||||
0x54007b86, 0x54000007, 0x54fff247, 0x54007b27,
|
0x54007cc6, 0x54000007, 0x54fff247, 0x54007c67,
|
||||||
0x54000008, 0x54fff1e8, 0x54007ac8, 0x54000009,
|
0x54000008, 0x54fff1e8, 0x54007c08, 0x54000009,
|
||||||
0x54fff189, 0x54007a69, 0x5400000a, 0x54fff12a,
|
0x54fff189, 0x54007ba9, 0x5400000a, 0x54fff12a,
|
||||||
0x54007a0a, 0x5400000b, 0x54fff0cb, 0x540079ab,
|
0x54007b4a, 0x5400000b, 0x54fff0cb, 0x54007aeb,
|
||||||
0x5400000c, 0x54fff06c, 0x5400794c, 0x5400000d,
|
0x5400000c, 0x54fff06c, 0x54007a8c, 0x5400000d,
|
||||||
0x54fff00d, 0x540078ed, 0x5400000e, 0x54ffefae,
|
0x54fff00d, 0x54007a2d, 0x5400000e, 0x54ffefae,
|
||||||
0x5400788e, 0x5400000f, 0x54ffef4f, 0x5400782f,
|
0x540079ce, 0x5400000f, 0x54ffef4f, 0x5400796f,
|
||||||
0xd40658e1, 0xd4014d22, 0xd4046543, 0xd4273f60,
|
0xd40658e1, 0xd4014d22, 0xd4046543, 0xd4273f60,
|
||||||
0xd44cad80, 0xd503201f, 0xd503203f, 0xd503205f,
|
0xd44cad80, 0xd503201f, 0xd503203f, 0xd503205f,
|
||||||
0xd503209f, 0xd50320bf, 0xd503219f, 0xd50323bf,
|
0xd503209f, 0xd50320bf, 0xd503219f, 0xd50323bf,
|
||||||
@ -1444,112 +1456,114 @@
|
|||||||
0x4e6ce56a, 0x2ebae738, 0x6ea3e441, 0x6eede58b,
|
0x4e6ce56a, 0x2ebae738, 0x6ea3e441, 0x6eede58b,
|
||||||
0x2e20e7fe, 0x6e2ce56a, 0x6e71e60f, 0x65922c43,
|
0x2e20e7fe, 0x6e2ce56a, 0x6e71e60f, 0x65922c43,
|
||||||
0x65d02219, 0x65d02560, 0x65d13dc4, 0x65913690,
|
0x65d02219, 0x65d02560, 0x65d13dc4, 0x65913690,
|
||||||
0x65d33b6b, 0xba5fd3e3, 0x3a5f03e5, 0xfa411be4,
|
0x65d33b6b, 0x2500948c, 0x254c08bf, 0x25831f87,
|
||||||
0x7a42cbe2, 0x93df03ff, 0xc820ffff, 0x8822fc7f,
|
0x254f30af, 0x259c3359, 0x25019d35, 0x24eac76d,
|
||||||
0xc8247cbf, 0x88267fff, 0x4e010fe0, 0x5e040420,
|
0x2431993a, 0x242f7ed8, 0x24a2f62b, 0xba5fd3e3,
|
||||||
0x4e081fe1, 0x4e0c1fe1, 0x4e0a1fe1, 0x4e071fe1,
|
0x3a5f03e5, 0xfa411be4, 0x7a42cbe2, 0x93df03ff,
|
||||||
0x4e042c20, 0x4e062c20, 0x4e052c20, 0x4e083c20,
|
0xc820ffff, 0x8822fc7f, 0xc8247cbf, 0x88267fff,
|
||||||
0x0e0c3c20, 0x0e0a3c20, 0x0e073c20, 0x9eae0020,
|
0x4e010fe0, 0x5e040420, 0x4e081fe1, 0x4e0c1fe1,
|
||||||
0x0f03f409, 0x6f03f40e, 0x4cc0ac3f, 0x0ea1b820,
|
0x4e0a1fe1, 0x4e071fe1, 0x4e042c20, 0x4e062c20,
|
||||||
0x4e21c862, 0x4e61b8a4, 0x05a08020, 0x05104fe0,
|
0x4e052c20, 0x4e083c20, 0x0e0c3c20, 0x0e0a3c20,
|
||||||
0x05505001, 0x05906fe2, 0x05d03005, 0x05101fea,
|
0x0e073c20, 0x9eae0020, 0x0f03f409, 0x6f03f40e,
|
||||||
0x05901feb, 0x04b0e3e0, 0x0470e7e1, 0x042f9c20,
|
0x4cc0ac3f, 0x0ea1b820, 0x4e21c862, 0x4e61b8a4,
|
||||||
0x043f9c35, 0x047f9c20, 0x04ff9c20, 0x04299420,
|
0x05a08020, 0x05104fe0, 0x05505001, 0x05906fe2,
|
||||||
0x04319160, 0x0461943e, 0x04a19020, 0x04038100,
|
0x05d03005, 0x05101fea, 0x05901feb, 0x04b0e3e0,
|
||||||
0x040381a0, 0x040387e1, 0x04438be2, 0x04c38fe3,
|
0x0470e7e1, 0x042f9c20, 0x043f9c35, 0x047f9c20,
|
||||||
0x040181e0, 0x04018100, 0x04018621, 0x04418b22,
|
0x04ff9c20, 0x04299420, 0x04319160, 0x0461943e,
|
||||||
0x04418822, 0x04818c23, 0x040081e0, 0x04008120,
|
0x04a19020, 0x04038100, 0x040381a0, 0x040387e1,
|
||||||
0x04008761, 0x04008621, 0x04408822, 0x04808c23,
|
0x04438be2, 0x04c38fe3, 0x040181e0, 0x04018100,
|
||||||
0x042053ff, 0x047f5401, 0x25208028, 0x2538cfe0,
|
0x04018621, 0x04418b22, 0x04418822, 0x04818c23,
|
||||||
0x2578d001, 0x25b8efe2, 0x25f8f007, 0x2538dfea,
|
0x040081e0, 0x04008120, 0x04008761, 0x04008621,
|
||||||
0x25b8dfeb, 0xa400a3e0, 0xa420a7e0, 0xa4484be0,
|
0x04408822, 0x04808c23, 0x042053ff, 0x047f5401,
|
||||||
0xa467afe0, 0xa4a8a7ea, 0xa547a814, 0xa4084ffe,
|
0x25208028, 0x2538cfe0, 0x2578d001, 0x25b8efe2,
|
||||||
0xa55c53e0, 0xa5e1540b, 0xe400fbf6, 0xe408ffff,
|
0x25f8f007, 0x2538dfea, 0x25b8dfeb, 0xa400a3e0,
|
||||||
0xe420e7e0, 0xe4484be0, 0xe460efe0, 0xe547e400,
|
0xa420a7e0, 0xa4484be0, 0xa467afe0, 0xa4a8a7ea,
|
||||||
0xe4014be0, 0xe4a84fe0, 0xe5f15000, 0x858043e0,
|
0xa547a814, 0xa4084ffe, 0xa55c53e0, 0xa5e1540b,
|
||||||
0x85a043ff, 0xe59f5d08, 0x0420e3e9, 0x0460e3ea,
|
0xe400fbf6, 0xe408ffff, 0xe420e7e0, 0xe4484be0,
|
||||||
0x04a0e3eb, 0x04e0e3ec, 0x25104042, 0x25104871,
|
0xe460efe0, 0xe547e400, 0xe4014be0, 0xe4a84fe0,
|
||||||
0x25904861, 0x25904c92, 0x05344020, 0x05744041,
|
0xe5f15000, 0x858043e0, 0x85a043ff, 0xe59f5d08,
|
||||||
0x05b44062, 0x05f44083, 0x252c8840, 0x253c1420,
|
0x0420e3e9, 0x0460e3ea, 0x04a0e3eb, 0x04e0e3ec,
|
||||||
0x25681572, 0x25a21ce3, 0x25ea1e34, 0x253c0421,
|
0x25104042, 0x25104871, 0x25904861, 0x25904c92,
|
||||||
0x25680572, 0x25a20ce3, 0x25ea0e34, 0x0522c020,
|
0x05344020, 0x05744041, 0x05b44062, 0x05f44083,
|
||||||
0x05e6c0a4, 0x2401a001, 0x2443a051, 0x24858881,
|
0x252c8840, 0x253c1420, 0x25681572, 0x25a21ce3,
|
||||||
0x24c78cd1, 0x24850891, 0x24c70cc1, 0x250f9001,
|
0x25ea1e34, 0x253c0421, 0x25680572, 0x25a20ce3,
|
||||||
0x25508051, 0x25802491, 0x25df28c1, 0x25850c81,
|
0x25ea0e34, 0x0522c020, 0x05e6c0a4, 0x2401a001,
|
||||||
0x251e10d1, 0x65816001, 0x65c36051, 0x65854891,
|
0x2443a051, 0x24858881, 0x24c78cd1, 0x24850891,
|
||||||
0x65c74cc1, 0x05733820, 0x05b238a4, 0x05f138e6,
|
0x24c70cc1, 0x250f9001, 0x25508051, 0x25802491,
|
||||||
0x0570396a, 0x65d0a001, 0x65d6a443, 0x65d4a826,
|
0x25df28c1, 0x25850c81, 0x251e10d1, 0x65816001,
|
||||||
0x6594ac26, 0x6554ac26, 0x6556ac26, 0x6552ac26,
|
0x65c36051, 0x65854891, 0x65c74cc1, 0x05733820,
|
||||||
0x65cbac85, 0x65caac01, 0x6589ac85, 0x6588ac01,
|
0x05b238a4, 0x05f138e6, 0x0570396a, 0x65d0a001,
|
||||||
0x65c9ac85, 0x65c8ac01, 0x65dea833, 0x659ca509,
|
0x65d6a443, 0x65d4a826, 0x6594ac26, 0x6554ac26,
|
||||||
0x65d8a801, 0x65dcac01, 0x655cb241, 0x0520a1e0,
|
0x6556ac26, 0x6552ac26, 0x65cbac85, 0x65caac01,
|
||||||
0x0521a601, 0x052281e0, 0x05238601, 0x04a14026,
|
0x6589ac85, 0x6588ac01, 0x65c9ac85, 0x65c8ac01,
|
||||||
0x042244a6, 0x046344a6, 0x04a444a6, 0x04e544a7,
|
0x65dea833, 0x659ca509, 0x65d8a801, 0x65dcac01,
|
||||||
0x0568aca7, 0x05b23230, 0x853040af, 0xc5b040af,
|
0x655cb241, 0x0520a1e0, 0x0521a601, 0x052281e0,
|
||||||
0xe57080af, 0xe5b080af, 0x25034440, 0x254054c4,
|
0x05238601, 0x04a14026, 0x042244a6, 0x046344a6,
|
||||||
0x25034640, 0x25415a05, 0x25834440, 0x25c54489,
|
0x04a444a6, 0x04e544a7, 0x0568aca7, 0x05b23230,
|
||||||
0x250b5d3a, 0x2550dc20, 0x2518e3e1, 0x2518e021,
|
0x853040af, 0xc5b040af, 0xe57080af, 0xe5b080af,
|
||||||
0x2518e0a1, 0x2518e121, 0x2518e1a1, 0x2558e3e2,
|
0x25034440, 0x254054c4, 0x25034640, 0x25415a05,
|
||||||
0x2558e042, 0x2558e0c2, 0x2558e142, 0x2598e3e3,
|
0x25834440, 0x25c54489, 0x250b5d3a, 0x2550dc20,
|
||||||
0x2598e063, 0x2598e0e3, 0x2598e163, 0x25d8e3e4,
|
0x2518e3e1, 0x2518e021, 0x2518e0a1, 0x2518e121,
|
||||||
0x25d8e084, 0x25d8e104, 0x25d8e184, 0x2518e407,
|
0x2518e1a1, 0x2558e3e2, 0x2558e042, 0x2558e0c2,
|
||||||
0x05214800, 0x05614800, 0x05a14800, 0x05e14800,
|
0x2558e142, 0x2598e3e3, 0x2598e063, 0x2598e0e3,
|
||||||
0x05214c00, 0x05614c00, 0x05a14c00, 0x05e14c00,
|
0x2598e163, 0x25d8e3e4, 0x25d8e084, 0x25d8e104,
|
||||||
0x05304001, 0x05314001, 0x05a18610, 0x05e18610,
|
0x25d8e184, 0x2518e407, 0x05214800, 0x05614800,
|
||||||
0x05271e11, 0x6545e891, 0x6585e891, 0x65c5e891,
|
0x05a14800, 0x05e14800, 0x05214c00, 0x05614c00,
|
||||||
0x6545c891, 0x6585c891, 0x65c5c891, 0x45b0c210,
|
0x05a14c00, 0x05e14c00, 0x05304001, 0x05314001,
|
||||||
0x45f1c231, 0x1e601000, 0x1e603000, 0x1e621000,
|
0x05a18610, 0x05e18610, 0x05271e11, 0x6545e891,
|
||||||
0x1e623000, 0x1e641000, 0x1e643000, 0x1e661000,
|
0x6585e891, 0x65c5e891, 0x6545c891, 0x6585c891,
|
||||||
0x1e663000, 0x1e681000, 0x1e683000, 0x1e6a1000,
|
0x65c5c891, 0x45b0c210, 0x45f1c231, 0x1e601000,
|
||||||
0x1e6a3000, 0x1e6c1000, 0x1e6c3000, 0x1e6e1000,
|
0x1e603000, 0x1e621000, 0x1e623000, 0x1e641000,
|
||||||
0x1e6e3000, 0x1e701000, 0x1e703000, 0x1e721000,
|
0x1e643000, 0x1e661000, 0x1e663000, 0x1e681000,
|
||||||
0x1e723000, 0x1e741000, 0x1e743000, 0x1e761000,
|
0x1e683000, 0x1e6a1000, 0x1e6a3000, 0x1e6c1000,
|
||||||
0x1e763000, 0x1e781000, 0x1e783000, 0x1e7a1000,
|
0x1e6c3000, 0x1e6e1000, 0x1e6e3000, 0x1e701000,
|
||||||
0x1e7a3000, 0x1e7c1000, 0x1e7c3000, 0x1e7e1000,
|
0x1e703000, 0x1e721000, 0x1e723000, 0x1e741000,
|
||||||
0x1e7e3000, 0xf8398025, 0xf83703f0, 0xf825112c,
|
0x1e743000, 0x1e761000, 0x1e763000, 0x1e781000,
|
||||||
0xf83c23af, 0xf836327f, 0xf83f51c5, 0xf83041ff,
|
0x1e783000, 0x1e7a1000, 0x1e7a3000, 0x1e7c1000,
|
||||||
0xf83b7214, 0xf82c612b, 0xf8a6823e, 0xf8bb03dc,
|
0x1e7c3000, 0x1e7e1000, 0x1e7e3000, 0xf82f8186,
|
||||||
0xf8a7128a, 0xf8aa2304, 0xf8b132d1, 0xf8a351fd,
|
0xf83001ab, 0xf83713c1, 0xf8332225, 0xf82232d0,
|
||||||
0xf8b64273, 0xf8b671e2, 0xf8a6620c, 0xf8eb82ed,
|
0xf82d52aa, 0xf83d419b, 0xf83b7023, 0xf83f6278,
|
||||||
0xf8e1027e, 0xf8e51051, 0xf8f021b6, 0xf8ea33b5,
|
0xf8b18389, 0xf8bb00ef, 0xf8b513f7, 0xf8b923e2,
|
||||||
0xf8fb536c, 0xf8e343e1, 0xf8f87233, 0xf8e9637c,
|
0xf8bb3150, 0xf8b75073, 0xf8b04320, 0xf8ba7057,
|
||||||
0xf86f82a7, 0xf877033f, 0xf862137f, 0xf87022ea,
|
0xf8b0608c, 0xf8fc83be, 0xf8f000db, 0xf8e911fd,
|
||||||
0xf87331e3, 0xf8605359, 0xf87741e2, 0xf86c7384,
|
0xf8e720e4, 0xf8ef32e9, 0xf8e85382, 0xf8f540bf,
|
||||||
0xf87e621d, 0xb83b8126, 0xb83d00f0, 0xb82411e7,
|
0xf8fb7220, 0xf8ef6344, 0xf86882dc, 0xf87b033b,
|
||||||
0xb8292117, 0xb82232bc, 0xb83f5365, 0xb82041f1,
|
0xf8771080, 0xf8662010, 0xf864302f, 0xf86a50a7,
|
||||||
0xb824711a, 0xb83c6376, 0xb8bb82f9, 0xb8a000c4,
|
0xf86a40fc, 0xf87472b7, 0xf866610b, 0xb83180df,
|
||||||
0xb8b01080, 0xb8af2141, 0xb8a73145, 0xb8bc5287,
|
0xb8310182, 0xb83e107d, 0xb83b23b6, 0xb82e338d,
|
||||||
0xb8b740d5, 0xb8ab7228, 0xb8bf6226, 0xb8e283cc,
|
0xb83150b8, 0xb822414e, 0xb830736b, 0xb837608c,
|
||||||
0xb8fd0363, 0xb8f611dd, 0xb8ed223c, 0xb8f83045,
|
0xb8b68091, 0xb8a10213, 0xb8b011cd, 0xb8ac2222,
|
||||||
0xb8ee520a, 0xb8eb42fb, 0xb8ec72c4, 0xb8f16024,
|
0xb8a332f5, 0xb8a550e6, 0xb8b3438d, 0xb8b170d0,
|
||||||
0xb87381f0, 0xb86d018e, 0xb8621071, 0xb87520b7,
|
0xb8a2607d, 0xb8e481e6, 0xb8f4018d, 0xb8f41328,
|
||||||
0xb8663267, 0xb86d523c, 0xb8704046, 0xb87d7083,
|
0xb8f42013, 0xb8eb30d8, 0xb8f451df, 0xb8f04006,
|
||||||
0xb8666290, 0xce33218d, 0xce110278, 0xce668eea,
|
0xb8e7726f, 0xb8fa6149, 0xb87782d5, 0xb87c0062,
|
||||||
0xce8d87d3, 0xce678006, 0xce79862e, 0xcec08148,
|
0xb86f1293, 0xb86723a4, 0xb8673120, 0xb87052f4,
|
||||||
0xce768a96, 0x2520c3fb, 0x25a1d4ef, 0x05803e87,
|
0xb8644150, 0xb877732b, 0xb866621f, 0xce2b06cd,
|
||||||
0x05401580, 0x05001e36, 0x2560d9f6, 0x25e1d0a5,
|
0xce14410d, 0xce648df9, 0xce883624, 0xce7c809d,
|
||||||
0x0580e08d, 0x05400d2d, 0x050074d9, 0x2560c6d1,
|
0xce7a87b0, 0xcec081c9, 0xce7c8962, 0x2520c143,
|
||||||
0x2521dd5d, 0x058394b0, 0x05403ece, 0x050026a3,
|
0x25a1d2da, 0x058015ce, 0x05400ed8, 0x0500bb31,
|
||||||
0x25a0cef5, 0x25a1d668, 0x05800ed8, 0x0540bb31,
|
0x25a0c074, 0x25a1d884, 0x05804944, 0x0540b1d9,
|
||||||
0x05008874, 0x25e0c491, 0x2521d873, 0x0580b1d9,
|
0x05001548, 0x25a0c49e, 0x2521cabe, 0x058054b3,
|
||||||
0x05401548, 0x0500cbde, 0x2560d461, 0x2521c96c,
|
0x0543ab47, 0x050026bb, 0x2560d097, 0x25a1d6fe,
|
||||||
0x0583ab47, 0x054026bb, 0x05007dd7, 0x04f402d6,
|
0x058394b4, 0x0540266d, 0x05003cbc, 0x25a0c1ab,
|
||||||
0x04ad053c, 0x659c0287, 0x65cb09ab, 0x65c80701,
|
0x2561d3f8, 0x05800acd, 0x05403684, 0x05000c07,
|
||||||
0x0496b22d, 0x04400064, 0x049a0dc7, 0x04108fa4,
|
0x2560de64, 0x2521cac5, 0x0583c8b5, 0x05405089,
|
||||||
0x04db0aa0, 0x0499a123, 0x041aab1c, 0x04d906f3,
|
0x05003e98, 0x04ad0397, 0x04ac074a, 0x658e023e,
|
||||||
0x04d3954d, 0x049193cc, 0x049003ae, 0x0497b4f5,
|
0x65d50a1d, 0x65820667, 0x0496b13a, 0x04001411,
|
||||||
0x049ea342, 0x04981229, 0x05e78440, 0x05e4856e,
|
0x041a19c2, 0x049095cb, 0x041b0c7d, 0x04d9a876,
|
||||||
0x044813ae, 0x044a02c3, 0x04011b63, 0x04dcb4f3,
|
0x049aa27b, 0x04591aa7, 0x04138b25, 0x04119235,
|
||||||
0x65808cb5, 0x65cd86b9, 0x65868071, 0x65878cf3,
|
0x04500a63, 0x0497adc7, 0x04dea9b1, 0x04581e31,
|
||||||
0x6582922e, 0x04ddba2d, 0x6582a9f1, 0x65c0b77a,
|
0x05a78f4f, 0x056494fb, 0x04481f65, 0x048a0f00,
|
||||||
0x65c1a8a7, 0x658da81b, 0x65819698, 0x65e59723,
|
0x04810074, 0x04dca739, 0x65809031, 0x658d9dae,
|
||||||
0x65a1123d, 0x65e03dae, 0x65f6be82, 0x65a2cd1d,
|
0x65c683d1, 0x658797b6, 0x65828008, 0x04ddb417,
|
||||||
0x65e0f6ce, 0x65ec5af9, 0x65ea6035, 0x045756eb,
|
0x6582baf9, 0x6580b435, 0x65c1b56a, 0x65cdb917,
|
||||||
0x0493727e, 0x042d3284, 0x04be33d6, 0x046e3231,
|
0x65c19671, 0x65fe95a4, 0x65f11f3e, 0x65fc298e,
|
||||||
0x04f4338c, 0x052d69a1, 0x05ab6d47, 0x658899e4,
|
0x65ada1a5, 0x65b3c967, 0x65e3ec59, 0x65f454a0,
|
||||||
0x4580b3a3, 0x45deb685, 0x042d390d, 0x04da21dd,
|
0x65a86dbc, 0x041b41dd, 0x04587903, 0x042a3321,
|
||||||
0x04582323, 0x04d92838, 0x04882c2a, 0x048a2799,
|
0x04b93281, 0x0470327c, 0x04e131bb, 0x0521692b,
|
||||||
0x65872770, 0x65863d61, 0x65d82021, 0x04012c5a,
|
0x057a6f61, 0x65c887a2, 0x45c2b058, 0x455cb723,
|
||||||
|
0x043639a3, 0x045a309b, 0x0498305a, 0x04993ce1,
|
||||||
|
0x04483e1e, 0x040a3395, 0x65c72595, 0x6586294b,
|
||||||
|
0x65d826c0, 0x04412474,
|
||||||
};
|
};
|
||||||
// END Generated code -- do not edit
|
// END Generated code -- do not edit
|
||||||
|
@ -1418,6 +1418,46 @@ public class IRNode {
|
|||||||
machOnlyNameRegex(VMASK_CMP_ZERO_D_NEON, "vmaskcmp_zeroD_neon");
|
machOnlyNameRegex(VMASK_CMP_ZERO_D_NEON, "vmaskcmp_zeroD_neon");
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public static final String VMASK_CMP_IMM_B_SVE = PREFIX + "VMASK_CMP_IMM_B_SVE" + POSTFIX;
|
||||||
|
static {
|
||||||
|
machOnlyNameRegex(VMASK_CMP_IMM_B_SVE, "vmaskcmp_immB_sve");
|
||||||
|
}
|
||||||
|
|
||||||
|
public static final String VMASK_CMPU_IMM_B_SVE = PREFIX + "VMASK_CMPU_IMM_B_SVE" + POSTFIX;
|
||||||
|
static {
|
||||||
|
machOnlyNameRegex(VMASK_CMPU_IMM_B_SVE, "vmaskcmpU_immB_sve");
|
||||||
|
}
|
||||||
|
|
||||||
|
public static final String VMASK_CMP_IMM_S_SVE = PREFIX + "VMASK_CMP_IMM_S_SVE" + POSTFIX;
|
||||||
|
static {
|
||||||
|
machOnlyNameRegex(VMASK_CMP_IMM_S_SVE, "vmaskcmp_immS_sve");
|
||||||
|
}
|
||||||
|
|
||||||
|
public static final String VMASK_CMPU_IMM_S_SVE = PREFIX + "VMASK_CMPU_IMM_S_SVE" + POSTFIX;
|
||||||
|
static {
|
||||||
|
machOnlyNameRegex(VMASK_CMPU_IMM_S_SVE, "vmaskcmpU_immS_sve");
|
||||||
|
}
|
||||||
|
|
||||||
|
public static final String VMASK_CMP_IMM_I_SVE = PREFIX + "VMASK_CMP_IMM_I_SVE" + POSTFIX;
|
||||||
|
static {
|
||||||
|
machOnlyNameRegex(VMASK_CMP_IMM_I_SVE, "vmaskcmp_immI_sve");
|
||||||
|
}
|
||||||
|
|
||||||
|
public static final String VMASK_CMPU_IMM_I_SVE = PREFIX + "VMASK_CMPU_IMM_I_SVE" + POSTFIX;
|
||||||
|
static {
|
||||||
|
machOnlyNameRegex(VMASK_CMPU_IMM_I_SVE, "vmaskcmpU_immI_sve");
|
||||||
|
}
|
||||||
|
|
||||||
|
public static final String VMASK_CMP_IMM_L_SVE = PREFIX + "VMASK_CMP_IMM_L_SVE" + POSTFIX;
|
||||||
|
static {
|
||||||
|
machOnlyNameRegex(VMASK_CMP_IMM_L_SVE, "vmaskcmp_immL_sve");
|
||||||
|
}
|
||||||
|
|
||||||
|
public static final String VMASK_CMPU_IMM_L_SVE = PREFIX + "VMASK_CMPU_IMM_L_SVE" + POSTFIX;
|
||||||
|
static {
|
||||||
|
machOnlyNameRegex(VMASK_CMPU_IMM_L_SVE, "vmaskcmpU_immL_sve");
|
||||||
|
}
|
||||||
|
|
||||||
public static final String VNOT_I_MASKED = PREFIX + "VNOT_I_MASKED" + POSTFIX;
|
public static final String VNOT_I_MASKED = PREFIX + "VNOT_I_MASKED" + POSTFIX;
|
||||||
static {
|
static {
|
||||||
machOnlyNameRegex(VNOT_I_MASKED, "vnotI_masked");
|
machOnlyNameRegex(VNOT_I_MASKED, "vnotI_masked");
|
||||||
|
@ -0,0 +1,336 @@
|
|||||||
|
/*
|
||||||
|
* Copyright (c) 2023, Arm Limited. All rights reserved.
|
||||||
|
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
||||||
|
*
|
||||||
|
* This code is free software; you can redistribute it and/or modify it
|
||||||
|
* under the terms of the GNU General Public License version 2 only, as
|
||||||
|
* published by the Free Software Foundation.
|
||||||
|
*
|
||||||
|
* This code is distributed in the hope that it will be useful, but WITHOUT
|
||||||
|
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
||||||
|
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
|
||||||
|
* version 2 for more details (a copy is included in the LICENSE file that
|
||||||
|
* accompanied this code).
|
||||||
|
*
|
||||||
|
* You should have received a copy of the GNU General Public License version
|
||||||
|
* 2 along with this work; if not, write to the Free Software Foundation,
|
||||||
|
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||||
|
*
|
||||||
|
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
|
||||||
|
* or visit www.oracle.com if you need additional information or have any
|
||||||
|
* questions.
|
||||||
|
*/
|
||||||
|
|
||||||
|
package compiler.vectorapi;
|
||||||
|
|
||||||
|
import compiler.lib.ir_framework.*;
|
||||||
|
|
||||||
|
import java.util.Random;
|
||||||
|
|
||||||
|
import jdk.incubator.vector.ByteVector;
|
||||||
|
import jdk.incubator.vector.IntVector;
|
||||||
|
import jdk.incubator.vector.LongVector;
|
||||||
|
import jdk.incubator.vector.ShortVector;
|
||||||
|
import jdk.incubator.vector.VectorOperators;
|
||||||
|
import jdk.incubator.vector.VectorSpecies;
|
||||||
|
|
||||||
|
import jdk.test.lib.Asserts;
|
||||||
|
import jdk.test.lib.Utils;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @test
|
||||||
|
* @bug 8301739
|
||||||
|
* @key randomness
|
||||||
|
* @library /test/lib /
|
||||||
|
* @requires vm.cpu.features ~= ".*sve.*"
|
||||||
|
* @summary AArch64: Add optimized rules for vector compare with immediate for SVE
|
||||||
|
* @modules jdk.incubator.vector
|
||||||
|
*
|
||||||
|
* @run driver compiler.vectorapi.VectorCompareWithImmTest
|
||||||
|
*/
|
||||||
|
|
||||||
|
public class VectorCompareWithImmTest {
|
||||||
|
private static final VectorSpecies<Byte> B_SPECIES = ByteVector.SPECIES_PREFERRED;
|
||||||
|
private static final VectorSpecies<Short> S_SPECIES = ShortVector.SPECIES_PREFERRED;
|
||||||
|
private static final VectorSpecies<Integer> I_SPECIES = IntVector.SPECIES_PREFERRED;
|
||||||
|
private static final VectorSpecies<Long> L_SPECIES = LongVector.SPECIES_PREFERRED;
|
||||||
|
|
||||||
|
private static final int LENGTH = 3000;
|
||||||
|
private static final Random RD = Utils.getRandomInstance();
|
||||||
|
|
||||||
|
private static byte[] ba;
|
||||||
|
private static boolean[] br;
|
||||||
|
private static short[] sa;
|
||||||
|
private static boolean[] sr;
|
||||||
|
private static int[] ia;
|
||||||
|
private static boolean[] ir;
|
||||||
|
private static long[] la;
|
||||||
|
private static boolean[] lr;
|
||||||
|
|
||||||
|
static {
|
||||||
|
ba = new byte[LENGTH];
|
||||||
|
sa = new short[LENGTH];
|
||||||
|
ia = new int[LENGTH];
|
||||||
|
la = new long[LENGTH];
|
||||||
|
|
||||||
|
br = new boolean[LENGTH];
|
||||||
|
sr = new boolean[LENGTH];
|
||||||
|
ir = new boolean[LENGTH];
|
||||||
|
lr = new boolean[LENGTH];
|
||||||
|
|
||||||
|
for (int i = 0; i < LENGTH; i++) {
|
||||||
|
ba[i] = (byte) RD.nextInt();
|
||||||
|
sa[i] = (short) (RD.nextInt(1000) - 500); // range [-500, 500)
|
||||||
|
ia[i] = RD.nextInt(1000) - 500; // range [-500, 500)
|
||||||
|
la[i] = RD.nextLong(1000) - 500; // range [-500, 500)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
interface ByteOp {
|
||||||
|
boolean apply(byte a);
|
||||||
|
}
|
||||||
|
|
||||||
|
interface ShortOp {
|
||||||
|
boolean apply(Short a);
|
||||||
|
}
|
||||||
|
|
||||||
|
interface IntOp {
|
||||||
|
boolean apply(int a);
|
||||||
|
}
|
||||||
|
|
||||||
|
interface LongOp {
|
||||||
|
boolean apply(long a);
|
||||||
|
}
|
||||||
|
|
||||||
|
private static void assertArrayEquals(byte[] a, boolean[] r, ByteOp f) {
|
||||||
|
for (int i = 0; i < B_SPECIES.length(); i++) {
|
||||||
|
Asserts.assertEquals(f.apply(a[i]), r[i]);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private static void assertArrayEquals(short[] a, boolean[] r, ShortOp f) {
|
||||||
|
for (int i = 0; i < S_SPECIES.length(); i++) {
|
||||||
|
Asserts.assertEquals(f.apply(a[i]), r[i]);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private static void assertArrayEquals(int[] a, boolean[] r, IntOp f) {
|
||||||
|
for (int i = 0; i < I_SPECIES.length(); i++) {
|
||||||
|
Asserts.assertEquals(f.apply(a[i]), r[i]);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private static void assertArrayEquals(long[] a, boolean[] r, LongOp f) {
|
||||||
|
for (int i = 0; i < L_SPECIES.length(); i++) {
|
||||||
|
Asserts.assertEquals(f.apply(a[i]), r[i]);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
@IR(counts = { IRNode.VMASK_CMP_IMM_B_SVE, ">= 1" })
|
||||||
|
public static void testByteGTInRange() {
|
||||||
|
ByteVector av = ByteVector.fromArray(B_SPECIES, ba, 0);
|
||||||
|
av.compare(VectorOperators.GT, 12).intoArray(br, 0);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Run(test = "testByteGTInRange")
|
||||||
|
public static void testByteGTInRange_runner() {
|
||||||
|
testByteGTInRange();
|
||||||
|
assertArrayEquals(ba, br, (a) -> (a > 12 ? true : false));
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
@IR(counts = { IRNode.VMASK_CMPU_IMM_B_SVE, ">= 1" })
|
||||||
|
public static void testByteUnsignedGTInRange() {
|
||||||
|
ByteVector av = ByteVector.fromArray(B_SPECIES, ba, 0);
|
||||||
|
av.compare(VectorOperators.UNSIGNED_GT, 64).intoArray(br, 0);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Run(test = "testByteUnsignedGTInRange")
|
||||||
|
public static void testByteUnsignedGTInRange_runner() {
|
||||||
|
testByteUnsignedGTInRange();
|
||||||
|
assertArrayEquals(ba, br, (a) -> (Byte.toUnsignedInt(a) > 64 ? true : false));
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
@IR(failOn = { IRNode.VMASK_CMP_IMM_B_SVE })
|
||||||
|
public static void testByteGTOutOfRange() {
|
||||||
|
ByteVector av = ByteVector.fromArray(B_SPECIES, ba, 0);
|
||||||
|
av.compare(VectorOperators.GT, -91).intoArray(br, 0);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
@IR(failOn = { IRNode.VMASK_CMPU_IMM_B_SVE })
|
||||||
|
public static void testByteUnsignedGTOutOfRange() {
|
||||||
|
ByteVector av = ByteVector.fromArray(B_SPECIES, ba, 0);
|
||||||
|
av.compare(VectorOperators.UNSIGNED_GT, -91).intoArray(br, 0);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
@IR(counts = { IRNode.VMASK_CMP_IMM_S_SVE, ">= 1" })
|
||||||
|
public static void testShortGEInRange() {
|
||||||
|
ShortVector av = ShortVector.fromArray(S_SPECIES, sa, 0);
|
||||||
|
av.compare(VectorOperators.GE, 5).intoArray(sr, 0);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Run(test = "testShortGEInRange")
|
||||||
|
public static void testShortGEInRange_runner() {
|
||||||
|
testShortGEInRange();
|
||||||
|
assertArrayEquals(sa, sr, (a) -> (a >= 5 ? true : false));
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
@IR(counts = { IRNode.VMASK_CMPU_IMM_S_SVE, ">= 1" })
|
||||||
|
public static void testShortUnsignedGEInRange() {
|
||||||
|
ShortVector av = ShortVector.fromArray(S_SPECIES, sa, 0);
|
||||||
|
av.compare(VectorOperators.UNSIGNED_GE, 56).intoArray(sr, 0);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Run(test = "testShortUnsignedGEInRange")
|
||||||
|
public static void testShortUnsignedGEInRange_runner() {
|
||||||
|
testShortUnsignedGEInRange();
|
||||||
|
assertArrayEquals(sa, sr, (a) -> (Short.toUnsignedInt(a) >= 56 ? true : false));
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
@IR(failOn = { IRNode.VMASK_CMP_IMM_S_SVE })
|
||||||
|
public static void testShortGEOutOfRange() {
|
||||||
|
ShortVector av = ShortVector.fromArray(S_SPECIES, sa, 0);
|
||||||
|
av.compare(VectorOperators.GE, -85).intoArray(sr, 0);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
@IR(failOn = { IRNode.VMASK_CMPU_IMM_S_SVE })
|
||||||
|
public static void testShortUnsignedGEOutOfRange() {
|
||||||
|
ShortVector av = ShortVector.fromArray(S_SPECIES, sa, 0);
|
||||||
|
av.compare(VectorOperators.UNSIGNED_GE, -85).intoArray(sr, 0);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
@IR(counts = { IRNode.VMASK_CMP_IMM_I_SVE, ">= 1" })
|
||||||
|
public static void testIntLTInRange() {
|
||||||
|
IntVector av = IntVector.fromArray(I_SPECIES, ia, 0);
|
||||||
|
av.compare(VectorOperators.LT, 10).intoArray(ir, 0);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Run(test = "testIntLTInRange")
|
||||||
|
public static void testIntLTInRange_runner() {
|
||||||
|
testIntLTInRange();
|
||||||
|
assertArrayEquals(ia, ir, (a) -> (a < 10 ? true : false));
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
@IR(counts = { IRNode.VMASK_CMPU_IMM_I_SVE, ">= 1" })
|
||||||
|
public static void testIntUnsignedLTInRange() {
|
||||||
|
IntVector av = IntVector.fromArray(I_SPECIES, ia, 0);
|
||||||
|
av.compare(VectorOperators.UNSIGNED_LT, 101).intoArray(ir, 0);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Run(test = "testIntUnsignedLTInRange")
|
||||||
|
public static void testIntUnsignedLTInRange_runner() {
|
||||||
|
testIntUnsignedLTInRange();
|
||||||
|
assertArrayEquals(ia, ir, (a) -> (Integer.compareUnsigned(a, 101) < 0 ? true : false));
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
@IR(failOn = { IRNode.VMASK_CMP_IMM_I_SVE })
|
||||||
|
public static void testIntLTOutOfRange() {
|
||||||
|
IntVector av = IntVector.fromArray(I_SPECIES, ia, 0);
|
||||||
|
av.compare(VectorOperators.LT, -110).intoArray(ir, 0);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
@IR(failOn = { IRNode.VMASK_CMPU_IMM_I_SVE })
|
||||||
|
public static void testIntUnsignedLTOutOfRange() {
|
||||||
|
IntVector av = IntVector.fromArray(I_SPECIES, ia, 0);
|
||||||
|
av.compare(VectorOperators.UNSIGNED_LT, -110).intoArray(ir, 0);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
@IR(counts = { IRNode.VMASK_CMP_IMM_L_SVE, ">= 1" })
|
||||||
|
public static void testLongLEInRange() {
|
||||||
|
LongVector av = LongVector.fromArray(L_SPECIES, la, 0);
|
||||||
|
av.compare(VectorOperators.LE, 9).intoArray(lr, 0);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Run(test = "testLongLEInRange")
|
||||||
|
public static void testLongLEInRange_runner() {
|
||||||
|
testLongLEInRange();
|
||||||
|
assertArrayEquals(la, lr, (a) -> (a <= 9 ? true : false));
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
@IR(counts = { IRNode.VMASK_CMPU_IMM_L_SVE, ">= 1" })
|
||||||
|
public static void testLongUnsignedLEInRange() {
|
||||||
|
LongVector av = LongVector.fromArray(L_SPECIES, la, 0);
|
||||||
|
av.compare(VectorOperators.UNSIGNED_LE, 95).intoArray(lr, 0);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Run(test = "testLongUnsignedLEInRange")
|
||||||
|
public static void testLongUnsignedLEInRange_runner() {
|
||||||
|
testLongUnsignedLEInRange();
|
||||||
|
assertArrayEquals(la, lr, (a) -> (Long.compareUnsigned(a, 95) <= 0 ? true : false));
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
@IR(failOn = { IRNode.VMASK_CMP_IMM_L_SVE })
|
||||||
|
public static void testLongLEOutOfRange() {
|
||||||
|
LongVector av = LongVector.fromArray(L_SPECIES, la, 0);
|
||||||
|
av.compare(VectorOperators.LE, -99).intoArray(lr, 0);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
@IR(failOn = { IRNode.VMASK_CMPU_IMM_L_SVE })
|
||||||
|
public static void testLongUnsignedLEOutOfRange() {
|
||||||
|
LongVector av = LongVector.fromArray(L_SPECIES, la, 0);
|
||||||
|
av.compare(VectorOperators.UNSIGNED_LE, -99).intoArray(lr, 0);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
@IR(counts = { IRNode.VMASK_CMP_IMM_I_SVE, ">= 1" })
|
||||||
|
public static void testIntEQInRange() {
|
||||||
|
IntVector av = IntVector.fromArray(I_SPECIES, ia, 0);
|
||||||
|
av.compare(VectorOperators.EQ, 8).intoArray(ir, 0);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Run(test = "testIntEQInRange")
|
||||||
|
public static void testIntEQInRange_runner() {
|
||||||
|
testIntEQInRange();
|
||||||
|
assertArrayEquals(ia, ir, (a) -> (a == 8 ? true : false));
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
@IR(failOn = {IRNode.VMASK_CMP_IMM_I_SVE})
|
||||||
|
public static void testIntEQOutOfRange() {
|
||||||
|
IntVector av = IntVector.fromArray(I_SPECIES, ia, 0);
|
||||||
|
av.compare(VectorOperators.EQ, 19).intoArray(ir, 0);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
@IR(counts = { IRNode.VMASK_CMP_IMM_L_SVE, ">= 1" })
|
||||||
|
public static void testLongNEInRange() {
|
||||||
|
LongVector av = LongVector.fromArray(L_SPECIES, la, 0);
|
||||||
|
av.compare(VectorOperators.NE, 7).intoArray(lr, 0);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Run(test = "testLongNEInRange")
|
||||||
|
public static void testLongNEInRange_runner() {
|
||||||
|
testLongNEInRange();
|
||||||
|
assertArrayEquals(la, lr, (a) -> (a != 7 ? true : false));
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
@IR(failOn = { IRNode.VMASK_CMP_IMM_L_SVE })
|
||||||
|
public static void testLongNEOutOfRange() {
|
||||||
|
LongVector av = LongVector.fromArray(L_SPECIES, la, 0);
|
||||||
|
av.compare(VectorOperators.NE, 17).intoArray(lr, 0);
|
||||||
|
}
|
||||||
|
|
||||||
|
public static void main(String[] args) {
|
||||||
|
TestFramework testFramework = new TestFramework();
|
||||||
|
testFramework.setDefaultWarmup(10000)
|
||||||
|
.addFlags("--add-modules=jdk.incubator.vector")
|
||||||
|
.addFlags("-XX:UseSVE=1")
|
||||||
|
.start();
|
||||||
|
}
|
||||||
|
}
|
Loading…
Reference in New Issue
Block a user