8297753: AArch64: Add optimized rules for vector compare with zero on NEON
Reviewed-by: aph
This commit is contained in:
parent
339ca88783
commit
d23a8bfb14
@ -1191,6 +1191,10 @@ public:
|
|||||||
|
|
||||||
// predicate controlling addressing modes
|
// predicate controlling addressing modes
|
||||||
bool size_fits_all_mem_uses(AddPNode* addp, int shift);
|
bool size_fits_all_mem_uses(AddPNode* addp, int shift);
|
||||||
|
|
||||||
|
// Convert BootTest condition to Assembler condition.
|
||||||
|
// Replicate the logic of cmpOpOper::ccode() and cmpOpUOper::ccode().
|
||||||
|
Assembler::Condition to_assembler_cond(BoolTest::mask cond);
|
||||||
%}
|
%}
|
||||||
|
|
||||||
source %{
|
source %{
|
||||||
@ -2527,6 +2531,50 @@ bool size_fits_all_mem_uses(AddPNode* addp, int shift) {
|
|||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Convert BootTest condition to Assembler condition.
|
||||||
|
// Replicate the logic of cmpOpOper::ccode() and cmpOpUOper::ccode().
|
||||||
|
Assembler::Condition to_assembler_cond(BoolTest::mask cond) {
|
||||||
|
Assembler::Condition result;
|
||||||
|
switch(cond) {
|
||||||
|
case BoolTest::eq:
|
||||||
|
result = Assembler::EQ; break;
|
||||||
|
case BoolTest::ne:
|
||||||
|
result = Assembler::NE; break;
|
||||||
|
case BoolTest::le:
|
||||||
|
result = Assembler::LE; break;
|
||||||
|
case BoolTest::ge:
|
||||||
|
result = Assembler::GE; break;
|
||||||
|
case BoolTest::lt:
|
||||||
|
result = Assembler::LT; break;
|
||||||
|
case BoolTest::gt:
|
||||||
|
result = Assembler::GT; break;
|
||||||
|
case BoolTest::ule:
|
||||||
|
result = Assembler::LS; break;
|
||||||
|
case BoolTest::uge:
|
||||||
|
result = Assembler::HS; break;
|
||||||
|
case BoolTest::ult:
|
||||||
|
result = Assembler::LO; break;
|
||||||
|
case BoolTest::ugt:
|
||||||
|
result = Assembler::HI; break;
|
||||||
|
case BoolTest::overflow:
|
||||||
|
result = Assembler::VS; break;
|
||||||
|
case BoolTest::no_overflow:
|
||||||
|
result = Assembler::VC; break;
|
||||||
|
default:
|
||||||
|
ShouldNotReachHere();
|
||||||
|
return Assembler::Condition(-1);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Check conversion
|
||||||
|
if (cond & BoolTest::unsigned_compare) {
|
||||||
|
assert(cmpOpUOper((BoolTest::mask)((int)cond & ~(BoolTest::unsigned_compare))).ccode() == result, "Invalid conversion");
|
||||||
|
} else {
|
||||||
|
assert(cmpOpOper(cond).ccode() == result, "Invalid conversion");
|
||||||
|
}
|
||||||
|
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
|
||||||
// Binary src (Replicate con)
|
// Binary src (Replicate con)
|
||||||
bool is_valid_sve_arith_imm_pattern(Node* n, Node* m) {
|
bool is_valid_sve_arith_imm_pattern(Node* n, Node* m) {
|
||||||
if (n == NULL || m == NULL) {
|
if (n == NULL || m == NULL) {
|
||||||
@ -4263,6 +4311,17 @@ operand immI_positive()
|
|||||||
interface(CONST_INTER);
|
interface(CONST_INTER);
|
||||||
%}
|
%}
|
||||||
|
|
||||||
|
// BoolTest condition for signed compare
|
||||||
|
operand immI_cmp_cond()
|
||||||
|
%{
|
||||||
|
predicate(n->get_int() < (int)(BoolTest::unsigned_compare));
|
||||||
|
match(ConI);
|
||||||
|
|
||||||
|
op_cost(0);
|
||||||
|
format %{ %}
|
||||||
|
interface(CONST_INTER);
|
||||||
|
%}
|
||||||
|
|
||||||
operand immL_255()
|
operand immL_255()
|
||||||
%{
|
%{
|
||||||
predicate(n->get_long() == 255L);
|
predicate(n->get_long() == 255L);
|
||||||
|
@ -5137,6 +5137,61 @@ instruct vmaskcmp_neon(vReg dst, vReg src1, vReg src2, immI cond) %{
|
|||||||
ins_pipe(pipe_slow);
|
ins_pipe(pipe_slow);
|
||||||
%}
|
%}
|
||||||
|
|
||||||
|
instruct vmaskcmp_zeroI_neon(vReg dst, vReg src, immI0 zero, immI_cmp_cond cond) %{
|
||||||
|
predicate(UseSVE == 0);
|
||||||
|
match(Set dst (VectorMaskCmp (Binary src (ReplicateB zero)) cond));
|
||||||
|
match(Set dst (VectorMaskCmp (Binary src (ReplicateS zero)) cond));
|
||||||
|
match(Set dst (VectorMaskCmp (Binary src (ReplicateI zero)) cond));
|
||||||
|
format %{ "vmaskcmp_zeroI_neon $dst, $src, #0, $cond" %}
|
||||||
|
ins_encode %{
|
||||||
|
Assembler::Condition condition = to_assembler_cond((BoolTest::mask)$cond$$constant);
|
||||||
|
BasicType bt = Matcher::vector_element_basic_type(this);
|
||||||
|
uint length_in_bytes = Matcher::vector_length_in_bytes(this);
|
||||||
|
__ neon_compare_zero($dst$$FloatRegister, bt, $src$$FloatRegister,
|
||||||
|
condition, /* isQ */ length_in_bytes == 16);
|
||||||
|
%}
|
||||||
|
ins_pipe(pipe_slow);
|
||||||
|
%}
|
||||||
|
|
||||||
|
instruct vmaskcmp_zeroL_neon(vReg dst, vReg src, immL0 zero, immI_cmp_cond cond) %{
|
||||||
|
predicate(UseSVE == 0);
|
||||||
|
match(Set dst (VectorMaskCmp (Binary src (ReplicateL zero)) cond));
|
||||||
|
format %{ "vmaskcmp_zeroL_neon $dst, $src, #0, $cond" %}
|
||||||
|
ins_encode %{
|
||||||
|
Assembler::Condition condition = to_assembler_cond((BoolTest::mask)$cond$$constant);
|
||||||
|
uint length_in_bytes = Matcher::vector_length_in_bytes(this);
|
||||||
|
__ neon_compare_zero($dst$$FloatRegister, T_LONG, $src$$FloatRegister,
|
||||||
|
condition, /* isQ */ length_in_bytes == 16);
|
||||||
|
%}
|
||||||
|
ins_pipe(pipe_slow);
|
||||||
|
%}
|
||||||
|
|
||||||
|
instruct vmaskcmp_zeroF_neon(vReg dst, vReg src, immF0 zero, immI_cmp_cond cond) %{
|
||||||
|
predicate(UseSVE == 0);
|
||||||
|
match(Set dst (VectorMaskCmp (Binary src (ReplicateF zero)) cond));
|
||||||
|
format %{ "vmaskcmp_zeroF_neon $dst, $src, #0, $cond" %}
|
||||||
|
ins_encode %{
|
||||||
|
Assembler::Condition condition = to_assembler_cond((BoolTest::mask)$cond$$constant);
|
||||||
|
uint length_in_bytes = Matcher::vector_length_in_bytes(this);
|
||||||
|
__ neon_compare_zero($dst$$FloatRegister, T_FLOAT, $src$$FloatRegister,
|
||||||
|
condition, /* isQ */ length_in_bytes == 16);
|
||||||
|
%}
|
||||||
|
ins_pipe(pipe_slow);
|
||||||
|
%}
|
||||||
|
|
||||||
|
instruct vmaskcmp_zeroD_neon(vReg dst, vReg src, immD0 zero, immI_cmp_cond cond) %{
|
||||||
|
predicate(UseSVE == 0);
|
||||||
|
match(Set dst (VectorMaskCmp (Binary src (ReplicateD zero)) cond));
|
||||||
|
format %{ "vmaskcmp_zeroD_neon $dst, $src, #0, $cond" %}
|
||||||
|
ins_encode %{
|
||||||
|
Assembler::Condition condition = to_assembler_cond((BoolTest::mask)$cond$$constant);
|
||||||
|
uint length_in_bytes = Matcher::vector_length_in_bytes(this);
|
||||||
|
__ neon_compare_zero($dst$$FloatRegister, T_DOUBLE, $src$$FloatRegister,
|
||||||
|
condition, /* isQ */ length_in_bytes == 16);
|
||||||
|
%}
|
||||||
|
ins_pipe(pipe_slow);
|
||||||
|
%}
|
||||||
|
|
||||||
instruct vmaskcmp_sve(pReg dst, vReg src1, vReg src2, immI cond, rFlagsReg cr) %{
|
instruct vmaskcmp_sve(pReg dst, vReg src1, vReg src2, immI cond, rFlagsReg cr) %{
|
||||||
predicate(UseSVE > 0);
|
predicate(UseSVE > 0);
|
||||||
match(Set dst (VectorMaskCmp (Binary src1 src2) cond));
|
match(Set dst (VectorMaskCmp (Binary src1 src2) cond));
|
||||||
|
@ -3553,6 +3553,42 @@ instruct vmaskcmp_neon(vReg dst, vReg src1, vReg src2, immI cond) %{
|
|||||||
ins_pipe(pipe_slow);
|
ins_pipe(pipe_slow);
|
||||||
%}
|
%}
|
||||||
|
|
||||||
|
instruct vmaskcmp_zeroI_neon(vReg dst, vReg src, immI0 zero, immI_cmp_cond cond) %{
|
||||||
|
predicate(UseSVE == 0);
|
||||||
|
match(Set dst (VectorMaskCmp (Binary src (ReplicateB zero)) cond));
|
||||||
|
match(Set dst (VectorMaskCmp (Binary src (ReplicateS zero)) cond));
|
||||||
|
match(Set dst (VectorMaskCmp (Binary src (ReplicateI zero)) cond));
|
||||||
|
format %{ "vmaskcmp_zeroI_neon $dst, $src, #0, $cond" %}
|
||||||
|
ins_encode %{
|
||||||
|
Assembler::Condition condition = to_assembler_cond((BoolTest::mask)$cond$$constant);
|
||||||
|
BasicType bt = Matcher::vector_element_basic_type(this);
|
||||||
|
uint length_in_bytes = Matcher::vector_length_in_bytes(this);
|
||||||
|
__ neon_compare_zero($dst$$FloatRegister, bt, $src$$FloatRegister,
|
||||||
|
condition, /* isQ */ length_in_bytes == 16);
|
||||||
|
%}
|
||||||
|
ins_pipe(pipe_slow);
|
||||||
|
%}
|
||||||
|
dnl
|
||||||
|
dnl VMASKCMP_ZERO_NEON($1, $2 )
|
||||||
|
dnl VMASKCMP_ZERO_NEON(type, basic_type)
|
||||||
|
define(`VMASKCMP_ZERO_NEON', `
|
||||||
|
instruct vmaskcmp_zero$1_neon(vReg dst, vReg src, imm`$1'0 zero, immI_cmp_cond cond) %{
|
||||||
|
predicate(UseSVE == 0);
|
||||||
|
match(Set dst (VectorMaskCmp (Binary src (Replicate$1 zero)) cond));
|
||||||
|
format %{ "vmaskcmp_zero$1_neon $dst, $src, #0, $cond" %}
|
||||||
|
ins_encode %{
|
||||||
|
Assembler::Condition condition = to_assembler_cond((BoolTest::mask)$cond$$constant);
|
||||||
|
uint length_in_bytes = Matcher::vector_length_in_bytes(this);
|
||||||
|
__ neon_compare_zero($dst$$FloatRegister, $2, $src$$FloatRegister,
|
||||||
|
condition, /* isQ */ length_in_bytes == 16);
|
||||||
|
%}
|
||||||
|
ins_pipe(pipe_slow);
|
||||||
|
%}')dnl
|
||||||
|
dnl
|
||||||
|
VMASKCMP_ZERO_NEON(L, T_LONG)
|
||||||
|
VMASKCMP_ZERO_NEON(F, T_FLOAT)
|
||||||
|
VMASKCMP_ZERO_NEON(D, T_DOUBLE)
|
||||||
|
|
||||||
instruct vmaskcmp_sve(pReg dst, vReg src1, vReg src2, immI cond, rFlagsReg cr) %{
|
instruct vmaskcmp_sve(pReg dst, vReg src1, vReg src2, immI cond, rFlagsReg cr) %{
|
||||||
predicate(UseSVE > 0);
|
predicate(UseSVE > 0);
|
||||||
match(Set dst (VectorMaskCmp (Binary src1 src2) cond));
|
match(Set dst (VectorMaskCmp (Binary src1 src2) cond));
|
||||||
|
@ -2653,12 +2653,6 @@ template<typename R, typename... Rx>
|
|||||||
INSN(cnt, 0, 0b100000010110, 0); // accepted arrangements: T8B, T16B
|
INSN(cnt, 0, 0b100000010110, 0); // accepted arrangements: T8B, T16B
|
||||||
INSN(uaddlp, 1, 0b100000001010, 2); // accepted arrangements: T8B, T16B, T4H, T8H, T2S, T4S
|
INSN(uaddlp, 1, 0b100000001010, 2); // accepted arrangements: T8B, T16B, T4H, T8H, T2S, T4S
|
||||||
INSN(uaddlv, 1, 0b110000001110, 1); // accepted arrangements: T8B, T16B, T4H, T8H, T4S
|
INSN(uaddlv, 1, 0b110000001110, 1); // accepted arrangements: T8B, T16B, T4H, T8H, T4S
|
||||||
// Zero compare.
|
|
||||||
INSN(cmeq, 0, 0b100000100110, 3); // accepted arrangements: T8B, T16B, T4H, T8H, T2S, T4S, T2D
|
|
||||||
INSN(cmge, 1, 0b100000100010, 3); // accepted arrangements: T8B, T16B, T4H, T8H, T2S, T4S, T2D
|
|
||||||
INSN(cmgt, 0, 0b100000100010, 3); // accepted arrangements: T8B, T16B, T4H, T8H, T2S, T4S, T2D
|
|
||||||
INSN(cmle, 1, 0b100000100110, 3); // accepted arrangements: T8B, T16B, T4H, T8H, T2S, T4S, T2D
|
|
||||||
INSN(cmlt, 0, 0b100000101010, 3); // accepted arrangements: T8B, T16B, T4H, T8H, T2S, T4S, T2D
|
|
||||||
|
|
||||||
#undef INSN
|
#undef INSN
|
||||||
|
|
||||||
@ -3190,6 +3184,48 @@ public:
|
|||||||
|
|
||||||
#undef INSN
|
#undef INSN
|
||||||
|
|
||||||
|
// AdvSIMD compare with zero (vector)
|
||||||
|
void cm(Condition cond, FloatRegister Vd, SIMD_Arrangement T, FloatRegister Vn) {
|
||||||
|
starti;
|
||||||
|
assert(T != T1Q && T != T1D, "invalid arrangement");
|
||||||
|
int cond_op;
|
||||||
|
switch (cond) {
|
||||||
|
case EQ: cond_op = 0b001; break;
|
||||||
|
case GE: cond_op = 0b100; break;
|
||||||
|
case GT: cond_op = 0b000; break;
|
||||||
|
case LE: cond_op = 0b101; break;
|
||||||
|
case LT: cond_op = 0b010; break;
|
||||||
|
default:
|
||||||
|
ShouldNotReachHere();
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
f(0, 31), f((int)T & 1, 30), f((cond_op >> 2) & 1, 29);
|
||||||
|
f(0b01110, 28, 24), f((int)T >> 1, 23, 22), f(0b10000010, 21, 14);
|
||||||
|
f(cond_op & 0b11, 13, 12), f(0b10, 11, 10), rf(Vn, 5), rf(Vd, 0);
|
||||||
|
}
|
||||||
|
|
||||||
|
// AdvSIMD Floating-point compare with zero (vector)
|
||||||
|
void fcm(Condition cond, FloatRegister Vd, SIMD_Arrangement T, FloatRegister Vn) {
|
||||||
|
starti;
|
||||||
|
assert(T == T2S || T == T4S || T == T2D, "invalid arrangement");
|
||||||
|
int cond_op;
|
||||||
|
switch (cond) {
|
||||||
|
case EQ: cond_op = 0b010; break;
|
||||||
|
case GT: cond_op = 0b000; break;
|
||||||
|
case GE: cond_op = 0b001; break;
|
||||||
|
case LE: cond_op = 0b011; break;
|
||||||
|
case LT: cond_op = 0b100; break;
|
||||||
|
default:
|
||||||
|
ShouldNotReachHere();
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
f(0, 31), f((int)T & 1, 30), f(cond_op & 1, 29), f(0b011101, 28, 23);
|
||||||
|
f(((int)(T >> 1) & 1), 22), f(0b10000011, 21, 14);
|
||||||
|
f((cond_op >> 1) & 0b11, 13, 12), f(0b10, 11, 10), rf(Vn, 5), rf(Vd, 0);
|
||||||
|
}
|
||||||
|
|
||||||
void ext(FloatRegister Vd, SIMD_Arrangement T, FloatRegister Vn, FloatRegister Vm, int index)
|
void ext(FloatRegister Vd, SIMD_Arrangement T, FloatRegister Vn, FloatRegister Vm, int index)
|
||||||
{
|
{
|
||||||
starti;
|
starti;
|
||||||
|
@ -925,7 +925,7 @@ void C2_MacroAssembler::neon_compare(FloatRegister dst, BasicType bt, FloatRegis
|
|||||||
case BoolTest::eq: fcmeq(dst, size, src1, src2); break;
|
case BoolTest::eq: fcmeq(dst, size, src1, src2); break;
|
||||||
case BoolTest::ne: {
|
case BoolTest::ne: {
|
||||||
fcmeq(dst, size, src1, src2);
|
fcmeq(dst, size, src1, src2);
|
||||||
notr(dst, T16B, dst);
|
notr(dst, isQ ? T16B : T8B, dst);
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
case BoolTest::ge: fcmge(dst, size, src1, src2); break;
|
case BoolTest::ge: fcmge(dst, size, src1, src2); break;
|
||||||
@ -941,7 +941,7 @@ void C2_MacroAssembler::neon_compare(FloatRegister dst, BasicType bt, FloatRegis
|
|||||||
case BoolTest::eq: cmeq(dst, size, src1, src2); break;
|
case BoolTest::eq: cmeq(dst, size, src1, src2); break;
|
||||||
case BoolTest::ne: {
|
case BoolTest::ne: {
|
||||||
cmeq(dst, size, src1, src2);
|
cmeq(dst, size, src1, src2);
|
||||||
notr(dst, T16B, dst);
|
notr(dst, isQ ? T16B : T8B, dst);
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
case BoolTest::ge: cmge(dst, size, src1, src2); break;
|
case BoolTest::ge: cmge(dst, size, src1, src2); break;
|
||||||
@ -959,6 +959,26 @@ void C2_MacroAssembler::neon_compare(FloatRegister dst, BasicType bt, FloatRegis
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void C2_MacroAssembler::neon_compare_zero(FloatRegister dst, BasicType bt, FloatRegister src,
|
||||||
|
Condition cond, bool isQ) {
|
||||||
|
SIMD_Arrangement size = esize2arrangement((unsigned)type2aelembytes(bt), isQ);
|
||||||
|
if (bt == T_FLOAT || bt == T_DOUBLE) {
|
||||||
|
if (cond == Assembler::NE) {
|
||||||
|
fcm(Assembler::EQ, dst, size, src);
|
||||||
|
notr(dst, isQ ? T16B : T8B, dst);
|
||||||
|
} else {
|
||||||
|
fcm(cond, dst, size, src);
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
if (cond == Assembler::NE) {
|
||||||
|
cm(Assembler::EQ, dst, size, src);
|
||||||
|
notr(dst, isQ ? T16B : T8B, dst);
|
||||||
|
} else {
|
||||||
|
cm(cond, dst, size, src);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
// Compress the least significant bit of each byte to the rightmost and clear
|
// Compress the least significant bit of each byte to the rightmost and clear
|
||||||
// the higher garbage bits.
|
// the higher garbage bits.
|
||||||
void C2_MacroAssembler::bytemask_compress(Register dst) {
|
void C2_MacroAssembler::bytemask_compress(Register dst) {
|
||||||
|
@ -1,5 +1,5 @@
|
|||||||
/*
|
/*
|
||||||
* Copyright (c) 2020, 2022, Oracle and/or its affiliates. All rights reserved.
|
* Copyright (c) 2020, 2023, Oracle and/or its affiliates. All rights reserved.
|
||||||
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
||||||
*
|
*
|
||||||
* This code is free software; you can redistribute it and/or modify it
|
* This code is free software; you can redistribute it and/or modify it
|
||||||
@ -79,6 +79,9 @@
|
|||||||
void neon_compare(FloatRegister dst, BasicType bt, FloatRegister src1,
|
void neon_compare(FloatRegister dst, BasicType bt, FloatRegister src1,
|
||||||
FloatRegister src2, int cond, bool isQ);
|
FloatRegister src2, int cond, bool isQ);
|
||||||
|
|
||||||
|
void neon_compare_zero(FloatRegister dst, BasicType bt, FloatRegister src,
|
||||||
|
Condition cond, bool isQ);
|
||||||
|
|
||||||
void sve_compare(PRegister pd, BasicType bt, PRegister pg,
|
void sve_compare(PRegister pd, BasicType bt, PRegister pg,
|
||||||
FloatRegister zn, FloatRegister zm, int cond);
|
FloatRegister zn, FloatRegister zm, int cond);
|
||||||
|
|
||||||
|
@ -5598,7 +5598,7 @@ void MacroAssembler::encode_iso_array(Register src, Register dst,
|
|||||||
// ASCII-check on lo-parts (no sign).
|
// ASCII-check on lo-parts (no sign).
|
||||||
FloatRegister vlox = vtmp1; // Merge lower bytes.
|
FloatRegister vlox = vtmp1; // Merge lower bytes.
|
||||||
ASCII(orr(vlox, T16B, vlo0, vlo1));
|
ASCII(orr(vlox, T16B, vlo0, vlo1));
|
||||||
umov(chk, vhix, D, 1); ASCII(cmlt(vlox, T16B, vlox));
|
umov(chk, vhix, D, 1); ASCII(cm(LT, vlox, T16B, vlox));
|
||||||
fmovd(max, vhix); ASCII(umaxv(vlox, T16B, vlox));
|
fmovd(max, vhix); ASCII(umaxv(vlox, T16B, vlox));
|
||||||
orr(chk, chk, max); ASCII(umov(max, vlox, B, 0));
|
orr(chk, chk, max); ASCII(umov(max, vlox, B, 0));
|
||||||
ASCII(orr(chk, chk, max));
|
ASCII(orr(chk, chk, max));
|
||||||
@ -5624,7 +5624,7 @@ void MacroAssembler::encode_iso_array(Register src, Register dst,
|
|||||||
uzp2(vhi, T16B, vtmp3, vtmp3);
|
uzp2(vhi, T16B, vtmp3, vtmp3);
|
||||||
// ISO-check on hi-parts (all zero).
|
// ISO-check on hi-parts (all zero).
|
||||||
// ASCII-check on lo-parts (no sign).
|
// ASCII-check on lo-parts (no sign).
|
||||||
ASCII(cmlt(vtmp2, T16B, vlo));
|
ASCII(cm(LT, vtmp2, T16B, vlo));
|
||||||
fmovd(chk, vhi); ASCII(umaxv(vtmp2, T16B, vtmp2));
|
fmovd(chk, vhi); ASCII(umaxv(vtmp2, T16B, vtmp2));
|
||||||
ASCII(umov(max, vtmp2, B, 0));
|
ASCII(umov(max, vtmp2, B, 0));
|
||||||
ASCII(orr(chk, chk, max));
|
ASCII(orr(chk, chk, max));
|
||||||
|
@ -1363,6 +1363,28 @@ class TwoRegNEONOp(CommonNEONInstruction):
|
|||||||
class ThreeRegNEONOp(TwoRegNEONOp):
|
class ThreeRegNEONOp(TwoRegNEONOp):
|
||||||
numRegs = 3
|
numRegs = 3
|
||||||
|
|
||||||
|
class NEONFloatCompareWithZero(TwoRegNEONOp):
|
||||||
|
def __init__(self, args):
|
||||||
|
self._name = 'fcm'
|
||||||
|
self.arrangement, self.condition = args
|
||||||
|
self.insname = self._name + (self.condition).lower()
|
||||||
|
|
||||||
|
def cstr(self):
|
||||||
|
return ("%s(%s, %s, %s, %s);"
|
||||||
|
% ("__ " + self._name,
|
||||||
|
"Assembler::" + self.condition,
|
||||||
|
self._firstSIMDreg,
|
||||||
|
"__ T" + self.arrangement,
|
||||||
|
self._firstSIMDreg.nextReg()))
|
||||||
|
|
||||||
|
def astr(self):
|
||||||
|
return ("%s\t%s.%s, %s.%s, #0.0"
|
||||||
|
% (self.insname,
|
||||||
|
self._firstSIMDreg,
|
||||||
|
self.arrangement,
|
||||||
|
self._firstSIMDreg.nextReg(),
|
||||||
|
self.arrangement))
|
||||||
|
|
||||||
class SpecialCases(Instruction):
|
class SpecialCases(Instruction):
|
||||||
def __init__(self, data):
|
def __init__(self, data):
|
||||||
self._name = data[0]
|
self._name = data[0]
|
||||||
@ -1596,6 +1618,16 @@ generate(NEONReduceInstruction,
|
|||||||
["fminp", "fminp", "2S"], ["fminp", "fminp", "2D"],
|
["fminp", "fminp", "2S"], ["fminp", "fminp", "2D"],
|
||||||
])
|
])
|
||||||
|
|
||||||
|
neonFloatCompareWithZeroConditions = ['GT', 'GE', 'EQ', 'LT', 'LE']
|
||||||
|
neonFloatArrangement = ['2S', '4S', '2D']
|
||||||
|
neonFloatCompareWithZeroArgs = []
|
||||||
|
for condition in neonFloatCompareWithZeroConditions:
|
||||||
|
for currentArrangement in neonFloatArrangement:
|
||||||
|
currentArgs = [currentArrangement, condition]
|
||||||
|
neonFloatCompareWithZeroArgs.append(currentArgs)
|
||||||
|
|
||||||
|
generate(NEONFloatCompareWithZero, neonFloatCompareWithZeroArgs)
|
||||||
|
|
||||||
generate(TwoRegNEONOp,
|
generate(TwoRegNEONOp,
|
||||||
[["absr", "abs", "8B"], ["absr", "abs", "16B"],
|
[["absr", "abs", "8B"], ["absr", "abs", "16B"],
|
||||||
["absr", "abs", "4H"], ["absr", "abs", "8H"],
|
["absr", "abs", "4H"], ["absr", "abs", "8H"],
|
||||||
|
File diff suppressed because it is too large
Load Diff
@ -1298,6 +1298,26 @@ public class IRNode {
|
|||||||
machOnlyNameRegex(VMLS_MASKED, "vmls_masked");
|
machOnlyNameRegex(VMLS_MASKED, "vmls_masked");
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public static final String VMASK_CMP_ZERO_I_NEON = PREFIX + "VMASK_CMP_ZERO_I_NEON" + POSTFIX;
|
||||||
|
static {
|
||||||
|
machOnlyNameRegex(VMASK_CMP_ZERO_I_NEON, "vmaskcmp_zeroI_neon");
|
||||||
|
}
|
||||||
|
|
||||||
|
public static final String VMASK_CMP_ZERO_L_NEON = PREFIX + "VMASK_CMP_ZERO_L_NEON" + POSTFIX;
|
||||||
|
static {
|
||||||
|
machOnlyNameRegex(VMASK_CMP_ZERO_L_NEON, "vmaskcmp_zeroL_neon");
|
||||||
|
}
|
||||||
|
|
||||||
|
public static final String VMASK_CMP_ZERO_F_NEON = PREFIX + "VMASK_CMP_ZERO_F_NEON" + POSTFIX;
|
||||||
|
static {
|
||||||
|
machOnlyNameRegex(VMASK_CMP_ZERO_F_NEON, "vmaskcmp_zeroF_neon");
|
||||||
|
}
|
||||||
|
|
||||||
|
public static final String VMASK_CMP_ZERO_D_NEON = PREFIX + "VMASK_CMP_ZERO_D_NEON" + POSTFIX;
|
||||||
|
static {
|
||||||
|
machOnlyNameRegex(VMASK_CMP_ZERO_D_NEON, "vmaskcmp_zeroD_neon");
|
||||||
|
}
|
||||||
|
|
||||||
public static final String VNOT_I_MASKED = PREFIX + "VNOT_I_MASKED" + POSTFIX;
|
public static final String VNOT_I_MASKED = PREFIX + "VNOT_I_MASKED" + POSTFIX;
|
||||||
static {
|
static {
|
||||||
machOnlyNameRegex(VNOT_I_MASKED, "vnotI_masked");
|
machOnlyNameRegex(VNOT_I_MASKED, "vnotI_masked");
|
||||||
|
@ -0,0 +1,260 @@
|
|||||||
|
/*
|
||||||
|
* Copyright (c) 2023, Arm Limited. All rights reserved.
|
||||||
|
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
||||||
|
*
|
||||||
|
* This code is free software; you can redistribute it and/or modify it
|
||||||
|
* under the terms of the GNU General Public License version 2 only, as
|
||||||
|
* published by the Free Software Foundation.
|
||||||
|
*
|
||||||
|
* This code is distributed in the hope that it will be useful, but WITHOUT
|
||||||
|
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
||||||
|
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
|
||||||
|
* version 2 for more details (a copy is included in the LICENSE file that
|
||||||
|
* accompanied this code).
|
||||||
|
*
|
||||||
|
* You should have received a copy of the GNU General Public License version
|
||||||
|
* 2 along with this work; if not, write to the Free Software Foundation,
|
||||||
|
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||||
|
*
|
||||||
|
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
|
||||||
|
* or visit www.oracle.com if you need additional information or have any
|
||||||
|
* questions.
|
||||||
|
*/
|
||||||
|
|
||||||
|
package compiler.vectorapi;
|
||||||
|
|
||||||
|
import compiler.lib.ir_framework.*;
|
||||||
|
|
||||||
|
import java.util.Random;
|
||||||
|
|
||||||
|
import jdk.incubator.vector.ByteVector;
|
||||||
|
import jdk.incubator.vector.DoubleVector;
|
||||||
|
import jdk.incubator.vector.FloatVector;
|
||||||
|
import jdk.incubator.vector.IntVector;
|
||||||
|
import jdk.incubator.vector.LongVector;
|
||||||
|
import jdk.incubator.vector.ShortVector;
|
||||||
|
import jdk.incubator.vector.VectorSpecies;
|
||||||
|
import jdk.incubator.vector.VectorOperators;
|
||||||
|
|
||||||
|
import jdk.test.lib.Asserts;
|
||||||
|
import jdk.test.lib.Utils;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @test
|
||||||
|
* @bug 8297753
|
||||||
|
* @key randomness
|
||||||
|
* @library /test/lib /
|
||||||
|
* @requires os.arch=="aarch64"
|
||||||
|
* @summary Add optimized rules for vector compare with zero on NEON
|
||||||
|
* @modules jdk.incubator.vector
|
||||||
|
*
|
||||||
|
* @run driver compiler.vectorapi.VectorCompareWithZeroTest
|
||||||
|
*/
|
||||||
|
|
||||||
|
public class VectorCompareWithZeroTest {
|
||||||
|
private static final VectorSpecies<Byte> B_SPECIES = ByteVector.SPECIES_PREFERRED;
|
||||||
|
private static final VectorSpecies<Short> S_SPECIES = ShortVector.SPECIES_PREFERRED;
|
||||||
|
private static final VectorSpecies<Integer> I_SPECIES = IntVector.SPECIES_PREFERRED;
|
||||||
|
private static final VectorSpecies<Long> L_SPECIES = LongVector.SPECIES_PREFERRED;
|
||||||
|
private static final VectorSpecies<Float> F_SPECIES = FloatVector.SPECIES_PREFERRED;
|
||||||
|
private static final VectorSpecies<Double> D_SPECIES = DoubleVector.SPECIES_PREFERRED;
|
||||||
|
private static final int LENGTH = 1024;
|
||||||
|
private static final Random RD = Utils.getRandomInstance();
|
||||||
|
private static byte[] ba;
|
||||||
|
private static boolean[] br;
|
||||||
|
private static short[] sa;
|
||||||
|
private static boolean[] sr;
|
||||||
|
private static int[] ia;
|
||||||
|
private static boolean[] ir;
|
||||||
|
private static long[] la;
|
||||||
|
private static boolean[] lr;
|
||||||
|
private static float[] fa;
|
||||||
|
private static boolean[] fr;
|
||||||
|
private static double[] da;
|
||||||
|
private static boolean[] dr;
|
||||||
|
|
||||||
|
static {
|
||||||
|
ba = new byte[LENGTH];
|
||||||
|
sa = new short[LENGTH];
|
||||||
|
ia = new int[LENGTH];
|
||||||
|
la = new long[LENGTH];
|
||||||
|
fa = new float[LENGTH];
|
||||||
|
da = new double[LENGTH];
|
||||||
|
|
||||||
|
br = new boolean[LENGTH];
|
||||||
|
sr = new boolean[LENGTH];
|
||||||
|
ir = new boolean[LENGTH];
|
||||||
|
lr = new boolean[LENGTH];
|
||||||
|
fr = new boolean[LENGTH];
|
||||||
|
dr = new boolean[LENGTH];
|
||||||
|
|
||||||
|
for (int i = 0; i < LENGTH; i++) {
|
||||||
|
ba[i] = (byte) RD.nextInt(25);
|
||||||
|
sa[i] = (short) RD.nextInt(25);
|
||||||
|
ia[i] = RD.nextInt(25);
|
||||||
|
la[i] = RD.nextLong(25);
|
||||||
|
fa[i] = RD.nextFloat(25.0F);
|
||||||
|
da[i] = RD.nextDouble(25.0);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
interface ByteOp {
|
||||||
|
boolean apply(byte a);
|
||||||
|
}
|
||||||
|
|
||||||
|
interface ShortOp {
|
||||||
|
boolean apply(short a);
|
||||||
|
}
|
||||||
|
|
||||||
|
interface IntOp {
|
||||||
|
boolean apply(int a);
|
||||||
|
}
|
||||||
|
|
||||||
|
interface LongOp {
|
||||||
|
boolean apply(long a);
|
||||||
|
}
|
||||||
|
|
||||||
|
interface FloatOp {
|
||||||
|
boolean apply(float a);
|
||||||
|
}
|
||||||
|
|
||||||
|
interface DoubleOp {
|
||||||
|
boolean apply(double a);
|
||||||
|
}
|
||||||
|
|
||||||
|
private static void assertArrayEquals(byte[] a, boolean[] r, ByteOp f) {
|
||||||
|
for (int i = 0; i < B_SPECIES.length(); i++) {
|
||||||
|
Asserts.assertEquals(f.apply(a[i]), r[i]);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private static void assertArrayEquals(short[] a, boolean[] r, ShortOp f) {
|
||||||
|
for (int i = 0; i < S_SPECIES.length(); i++) {
|
||||||
|
Asserts.assertEquals(f.apply(a[i]), r[i]);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private static void assertArrayEquals(int[] a, boolean[] r, IntOp f) {
|
||||||
|
for (int i = 0; i < I_SPECIES.length(); i++) {
|
||||||
|
Asserts.assertEquals(f.apply(a[i]), r[i]);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private static void assertArrayEquals(long[] a, boolean[] r, LongOp f) {
|
||||||
|
for (int i = 0; i < L_SPECIES.length(); i++) {
|
||||||
|
Asserts.assertEquals(f.apply(a[i]), r[i]);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private static void assertArrayEquals(float[] a, boolean[] r, FloatOp f) {
|
||||||
|
for (int i = 0; i < F_SPECIES.length(); i++) {
|
||||||
|
Asserts.assertEquals(f.apply(a[i]), r[i]);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private static void assertArrayEquals(double[] a, boolean[] r, DoubleOp f) {
|
||||||
|
for (int i = 0; i < D_SPECIES.length(); i++) {
|
||||||
|
Asserts.assertEquals(f.apply(a[i]), r[i]);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
@IR(counts = { IRNode.VMASK_CMP_ZERO_I_NEON, ">= 1" })
|
||||||
|
public static void testByteVectorEqualToZero() {
|
||||||
|
ByteVector av = ByteVector.fromArray(B_SPECIES, ba, 0);
|
||||||
|
av.compare(VectorOperators.EQ, 0).intoArray(br, 0);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Run(test = "testByteVectorEqualToZero")
|
||||||
|
public static void testByteVectorEqualToZero_runner() {
|
||||||
|
testByteVectorEqualToZero();
|
||||||
|
assertArrayEquals(ba, br, (a) -> (a == (byte) 0 ? true : false));
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
@IR(counts = { IRNode.VMASK_CMP_ZERO_I_NEON, ">= 1" })
|
||||||
|
public static void testShortVectorNotEqualToZero() {
|
||||||
|
ShortVector av = ShortVector.fromArray(S_SPECIES, sa, 0);
|
||||||
|
av.compare(VectorOperators.NE, 0).intoArray(sr, 0);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Run(test = "testShortVectorNotEqualToZero")
|
||||||
|
public static void testShortVectorNotEqualToZero_runner() {
|
||||||
|
testShortVectorNotEqualToZero();
|
||||||
|
assertArrayEquals(sa, sr, (a) -> (a != (short) 0 ? true : false));
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
@IR(counts = { IRNode.VMASK_CMP_ZERO_I_NEON, ">= 1" })
|
||||||
|
public static void testIntVectorGreaterEqualToZero() {
|
||||||
|
IntVector av = IntVector.fromArray(I_SPECIES, ia, 0);
|
||||||
|
av.compare(VectorOperators.GE, 0).intoArray(ir, 0);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Run(test = "testIntVectorGreaterEqualToZero")
|
||||||
|
public static void testIntVectorGreaterEqualToZero_runner() {
|
||||||
|
testIntVectorGreaterEqualToZero();
|
||||||
|
assertArrayEquals(ia, ir, (a) -> (a >= 0 ? true : false));
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
@IR(counts = { IRNode.VMASK_CMP_ZERO_L_NEON, ">= 1" })
|
||||||
|
public static void testLongVectorGreaterThanZero() {
|
||||||
|
LongVector av = LongVector.fromArray(L_SPECIES, la, 0);
|
||||||
|
av.compare(VectorOperators.GT, 0).intoArray(lr, 0);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Run(test = "testLongVectorGreaterThanZero")
|
||||||
|
public static void testLongVectorGreaterThanZero_runner() {
|
||||||
|
testLongVectorGreaterThanZero();
|
||||||
|
assertArrayEquals(la, lr, (a) -> (a > 0 ? true : false));
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
@IR(counts = { IRNode.VMASK_CMP_ZERO_F_NEON, ">= 1" })
|
||||||
|
public static void testFloatVectorLessEqualToZero() {
|
||||||
|
FloatVector av = FloatVector.fromArray(F_SPECIES, fa, 0);
|
||||||
|
av.compare(VectorOperators.LE, 0).intoArray(fr, 0);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Run(test = "testFloatVectorLessEqualToZero")
|
||||||
|
public static void testFloatVectorLessEqualToZero_runner() {
|
||||||
|
testFloatVectorLessEqualToZero();
|
||||||
|
assertArrayEquals(fa, fr, (a) -> (a <= 0.0F ? true : false));
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
@IR(counts = { IRNode.VMASK_CMP_ZERO_D_NEON, ">= 1" })
|
||||||
|
public static void testDoubleVectorLessThanZero() {
|
||||||
|
DoubleVector av = DoubleVector.fromArray(D_SPECIES, da, 0);
|
||||||
|
av.compare(VectorOperators.LT, 0).intoArray(dr, 0);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Run(test = "testDoubleVectorLessThanZero")
|
||||||
|
public static void testDoubleVectorLessThanZero_runner() {
|
||||||
|
testDoubleVectorLessThanZero();
|
||||||
|
assertArrayEquals(da, dr, (a) -> (a < 0.0 ? true : false));
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
@IR(failOn = { IRNode.VMASK_CMP_ZERO_I_NEON })
|
||||||
|
public static void testIntVectorUnsignedCondition() {
|
||||||
|
IntVector av = IntVector.fromArray(I_SPECIES, ia, 0);
|
||||||
|
av.compare(VectorOperators.UNSIGNED_GT, 0).intoArray(ir, 0);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
@IR(failOn = { IRNode.VMASK_CMP_ZERO_L_NEON })
|
||||||
|
public static void testLongVectorUnsignedCondition() {
|
||||||
|
LongVector av = LongVector.fromArray(L_SPECIES, la, 0);
|
||||||
|
av.compare(VectorOperators.UNSIGNED_GE, 0).intoArray(lr, 0);
|
||||||
|
}
|
||||||
|
|
||||||
|
public static void main(String[] args) {
|
||||||
|
TestFramework testFramework = new TestFramework();
|
||||||
|
testFramework.setDefaultWarmup(10000)
|
||||||
|
.addFlags("--add-modules=jdk.incubator.vector")
|
||||||
|
.addFlags("-XX:UseSVE=0")
|
||||||
|
.start();
|
||||||
|
}
|
||||||
|
}
|
Loading…
x
Reference in New Issue
Block a user