8297753: AArch64: Add optimized rules for vector compare with zero on NEON
Reviewed-by: aph
This commit is contained in:
parent
339ca88783
commit
d23a8bfb14
@ -1191,6 +1191,10 @@ public:
|
||||
|
||||
// predicate controlling addressing modes
|
||||
bool size_fits_all_mem_uses(AddPNode* addp, int shift);
|
||||
|
||||
// Convert BootTest condition to Assembler condition.
|
||||
// Replicate the logic of cmpOpOper::ccode() and cmpOpUOper::ccode().
|
||||
Assembler::Condition to_assembler_cond(BoolTest::mask cond);
|
||||
%}
|
||||
|
||||
source %{
|
||||
@ -2527,6 +2531,50 @@ bool size_fits_all_mem_uses(AddPNode* addp, int shift) {
|
||||
return true;
|
||||
}
|
||||
|
||||
// Convert BootTest condition to Assembler condition.
|
||||
// Replicate the logic of cmpOpOper::ccode() and cmpOpUOper::ccode().
|
||||
Assembler::Condition to_assembler_cond(BoolTest::mask cond) {
|
||||
Assembler::Condition result;
|
||||
switch(cond) {
|
||||
case BoolTest::eq:
|
||||
result = Assembler::EQ; break;
|
||||
case BoolTest::ne:
|
||||
result = Assembler::NE; break;
|
||||
case BoolTest::le:
|
||||
result = Assembler::LE; break;
|
||||
case BoolTest::ge:
|
||||
result = Assembler::GE; break;
|
||||
case BoolTest::lt:
|
||||
result = Assembler::LT; break;
|
||||
case BoolTest::gt:
|
||||
result = Assembler::GT; break;
|
||||
case BoolTest::ule:
|
||||
result = Assembler::LS; break;
|
||||
case BoolTest::uge:
|
||||
result = Assembler::HS; break;
|
||||
case BoolTest::ult:
|
||||
result = Assembler::LO; break;
|
||||
case BoolTest::ugt:
|
||||
result = Assembler::HI; break;
|
||||
case BoolTest::overflow:
|
||||
result = Assembler::VS; break;
|
||||
case BoolTest::no_overflow:
|
||||
result = Assembler::VC; break;
|
||||
default:
|
||||
ShouldNotReachHere();
|
||||
return Assembler::Condition(-1);
|
||||
}
|
||||
|
||||
// Check conversion
|
||||
if (cond & BoolTest::unsigned_compare) {
|
||||
assert(cmpOpUOper((BoolTest::mask)((int)cond & ~(BoolTest::unsigned_compare))).ccode() == result, "Invalid conversion");
|
||||
} else {
|
||||
assert(cmpOpOper(cond).ccode() == result, "Invalid conversion");
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
// Binary src (Replicate con)
|
||||
bool is_valid_sve_arith_imm_pattern(Node* n, Node* m) {
|
||||
if (n == NULL || m == NULL) {
|
||||
@ -4263,6 +4311,17 @@ operand immI_positive()
|
||||
interface(CONST_INTER);
|
||||
%}
|
||||
|
||||
// BoolTest condition for signed compare
|
||||
operand immI_cmp_cond()
|
||||
%{
|
||||
predicate(n->get_int() < (int)(BoolTest::unsigned_compare));
|
||||
match(ConI);
|
||||
|
||||
op_cost(0);
|
||||
format %{ %}
|
||||
interface(CONST_INTER);
|
||||
%}
|
||||
|
||||
operand immL_255()
|
||||
%{
|
||||
predicate(n->get_long() == 255L);
|
||||
|
@ -5137,6 +5137,61 @@ instruct vmaskcmp_neon(vReg dst, vReg src1, vReg src2, immI cond) %{
|
||||
ins_pipe(pipe_slow);
|
||||
%}
|
||||
|
||||
instruct vmaskcmp_zeroI_neon(vReg dst, vReg src, immI0 zero, immI_cmp_cond cond) %{
|
||||
predicate(UseSVE == 0);
|
||||
match(Set dst (VectorMaskCmp (Binary src (ReplicateB zero)) cond));
|
||||
match(Set dst (VectorMaskCmp (Binary src (ReplicateS zero)) cond));
|
||||
match(Set dst (VectorMaskCmp (Binary src (ReplicateI zero)) cond));
|
||||
format %{ "vmaskcmp_zeroI_neon $dst, $src, #0, $cond" %}
|
||||
ins_encode %{
|
||||
Assembler::Condition condition = to_assembler_cond((BoolTest::mask)$cond$$constant);
|
||||
BasicType bt = Matcher::vector_element_basic_type(this);
|
||||
uint length_in_bytes = Matcher::vector_length_in_bytes(this);
|
||||
__ neon_compare_zero($dst$$FloatRegister, bt, $src$$FloatRegister,
|
||||
condition, /* isQ */ length_in_bytes == 16);
|
||||
%}
|
||||
ins_pipe(pipe_slow);
|
||||
%}
|
||||
|
||||
instruct vmaskcmp_zeroL_neon(vReg dst, vReg src, immL0 zero, immI_cmp_cond cond) %{
|
||||
predicate(UseSVE == 0);
|
||||
match(Set dst (VectorMaskCmp (Binary src (ReplicateL zero)) cond));
|
||||
format %{ "vmaskcmp_zeroL_neon $dst, $src, #0, $cond" %}
|
||||
ins_encode %{
|
||||
Assembler::Condition condition = to_assembler_cond((BoolTest::mask)$cond$$constant);
|
||||
uint length_in_bytes = Matcher::vector_length_in_bytes(this);
|
||||
__ neon_compare_zero($dst$$FloatRegister, T_LONG, $src$$FloatRegister,
|
||||
condition, /* isQ */ length_in_bytes == 16);
|
||||
%}
|
||||
ins_pipe(pipe_slow);
|
||||
%}
|
||||
|
||||
instruct vmaskcmp_zeroF_neon(vReg dst, vReg src, immF0 zero, immI_cmp_cond cond) %{
|
||||
predicate(UseSVE == 0);
|
||||
match(Set dst (VectorMaskCmp (Binary src (ReplicateF zero)) cond));
|
||||
format %{ "vmaskcmp_zeroF_neon $dst, $src, #0, $cond" %}
|
||||
ins_encode %{
|
||||
Assembler::Condition condition = to_assembler_cond((BoolTest::mask)$cond$$constant);
|
||||
uint length_in_bytes = Matcher::vector_length_in_bytes(this);
|
||||
__ neon_compare_zero($dst$$FloatRegister, T_FLOAT, $src$$FloatRegister,
|
||||
condition, /* isQ */ length_in_bytes == 16);
|
||||
%}
|
||||
ins_pipe(pipe_slow);
|
||||
%}
|
||||
|
||||
instruct vmaskcmp_zeroD_neon(vReg dst, vReg src, immD0 zero, immI_cmp_cond cond) %{
|
||||
predicate(UseSVE == 0);
|
||||
match(Set dst (VectorMaskCmp (Binary src (ReplicateD zero)) cond));
|
||||
format %{ "vmaskcmp_zeroD_neon $dst, $src, #0, $cond" %}
|
||||
ins_encode %{
|
||||
Assembler::Condition condition = to_assembler_cond((BoolTest::mask)$cond$$constant);
|
||||
uint length_in_bytes = Matcher::vector_length_in_bytes(this);
|
||||
__ neon_compare_zero($dst$$FloatRegister, T_DOUBLE, $src$$FloatRegister,
|
||||
condition, /* isQ */ length_in_bytes == 16);
|
||||
%}
|
||||
ins_pipe(pipe_slow);
|
||||
%}
|
||||
|
||||
instruct vmaskcmp_sve(pReg dst, vReg src1, vReg src2, immI cond, rFlagsReg cr) %{
|
||||
predicate(UseSVE > 0);
|
||||
match(Set dst (VectorMaskCmp (Binary src1 src2) cond));
|
||||
|
@ -3553,6 +3553,42 @@ instruct vmaskcmp_neon(vReg dst, vReg src1, vReg src2, immI cond) %{
|
||||
ins_pipe(pipe_slow);
|
||||
%}
|
||||
|
||||
instruct vmaskcmp_zeroI_neon(vReg dst, vReg src, immI0 zero, immI_cmp_cond cond) %{
|
||||
predicate(UseSVE == 0);
|
||||
match(Set dst (VectorMaskCmp (Binary src (ReplicateB zero)) cond));
|
||||
match(Set dst (VectorMaskCmp (Binary src (ReplicateS zero)) cond));
|
||||
match(Set dst (VectorMaskCmp (Binary src (ReplicateI zero)) cond));
|
||||
format %{ "vmaskcmp_zeroI_neon $dst, $src, #0, $cond" %}
|
||||
ins_encode %{
|
||||
Assembler::Condition condition = to_assembler_cond((BoolTest::mask)$cond$$constant);
|
||||
BasicType bt = Matcher::vector_element_basic_type(this);
|
||||
uint length_in_bytes = Matcher::vector_length_in_bytes(this);
|
||||
__ neon_compare_zero($dst$$FloatRegister, bt, $src$$FloatRegister,
|
||||
condition, /* isQ */ length_in_bytes == 16);
|
||||
%}
|
||||
ins_pipe(pipe_slow);
|
||||
%}
|
||||
dnl
|
||||
dnl VMASKCMP_ZERO_NEON($1, $2 )
|
||||
dnl VMASKCMP_ZERO_NEON(type, basic_type)
|
||||
define(`VMASKCMP_ZERO_NEON', `
|
||||
instruct vmaskcmp_zero$1_neon(vReg dst, vReg src, imm`$1'0 zero, immI_cmp_cond cond) %{
|
||||
predicate(UseSVE == 0);
|
||||
match(Set dst (VectorMaskCmp (Binary src (Replicate$1 zero)) cond));
|
||||
format %{ "vmaskcmp_zero$1_neon $dst, $src, #0, $cond" %}
|
||||
ins_encode %{
|
||||
Assembler::Condition condition = to_assembler_cond((BoolTest::mask)$cond$$constant);
|
||||
uint length_in_bytes = Matcher::vector_length_in_bytes(this);
|
||||
__ neon_compare_zero($dst$$FloatRegister, $2, $src$$FloatRegister,
|
||||
condition, /* isQ */ length_in_bytes == 16);
|
||||
%}
|
||||
ins_pipe(pipe_slow);
|
||||
%}')dnl
|
||||
dnl
|
||||
VMASKCMP_ZERO_NEON(L, T_LONG)
|
||||
VMASKCMP_ZERO_NEON(F, T_FLOAT)
|
||||
VMASKCMP_ZERO_NEON(D, T_DOUBLE)
|
||||
|
||||
instruct vmaskcmp_sve(pReg dst, vReg src1, vReg src2, immI cond, rFlagsReg cr) %{
|
||||
predicate(UseSVE > 0);
|
||||
match(Set dst (VectorMaskCmp (Binary src1 src2) cond));
|
||||
|
@ -2653,12 +2653,6 @@ template<typename R, typename... Rx>
|
||||
INSN(cnt, 0, 0b100000010110, 0); // accepted arrangements: T8B, T16B
|
||||
INSN(uaddlp, 1, 0b100000001010, 2); // accepted arrangements: T8B, T16B, T4H, T8H, T2S, T4S
|
||||
INSN(uaddlv, 1, 0b110000001110, 1); // accepted arrangements: T8B, T16B, T4H, T8H, T4S
|
||||
// Zero compare.
|
||||
INSN(cmeq, 0, 0b100000100110, 3); // accepted arrangements: T8B, T16B, T4H, T8H, T2S, T4S, T2D
|
||||
INSN(cmge, 1, 0b100000100010, 3); // accepted arrangements: T8B, T16B, T4H, T8H, T2S, T4S, T2D
|
||||
INSN(cmgt, 0, 0b100000100010, 3); // accepted arrangements: T8B, T16B, T4H, T8H, T2S, T4S, T2D
|
||||
INSN(cmle, 1, 0b100000100110, 3); // accepted arrangements: T8B, T16B, T4H, T8H, T2S, T4S, T2D
|
||||
INSN(cmlt, 0, 0b100000101010, 3); // accepted arrangements: T8B, T16B, T4H, T8H, T2S, T4S, T2D
|
||||
|
||||
#undef INSN
|
||||
|
||||
@ -3190,6 +3184,48 @@ public:
|
||||
|
||||
#undef INSN
|
||||
|
||||
// AdvSIMD compare with zero (vector)
|
||||
void cm(Condition cond, FloatRegister Vd, SIMD_Arrangement T, FloatRegister Vn) {
|
||||
starti;
|
||||
assert(T != T1Q && T != T1D, "invalid arrangement");
|
||||
int cond_op;
|
||||
switch (cond) {
|
||||
case EQ: cond_op = 0b001; break;
|
||||
case GE: cond_op = 0b100; break;
|
||||
case GT: cond_op = 0b000; break;
|
||||
case LE: cond_op = 0b101; break;
|
||||
case LT: cond_op = 0b010; break;
|
||||
default:
|
||||
ShouldNotReachHere();
|
||||
break;
|
||||
}
|
||||
|
||||
f(0, 31), f((int)T & 1, 30), f((cond_op >> 2) & 1, 29);
|
||||
f(0b01110, 28, 24), f((int)T >> 1, 23, 22), f(0b10000010, 21, 14);
|
||||
f(cond_op & 0b11, 13, 12), f(0b10, 11, 10), rf(Vn, 5), rf(Vd, 0);
|
||||
}
|
||||
|
||||
// AdvSIMD Floating-point compare with zero (vector)
|
||||
void fcm(Condition cond, FloatRegister Vd, SIMD_Arrangement T, FloatRegister Vn) {
|
||||
starti;
|
||||
assert(T == T2S || T == T4S || T == T2D, "invalid arrangement");
|
||||
int cond_op;
|
||||
switch (cond) {
|
||||
case EQ: cond_op = 0b010; break;
|
||||
case GT: cond_op = 0b000; break;
|
||||
case GE: cond_op = 0b001; break;
|
||||
case LE: cond_op = 0b011; break;
|
||||
case LT: cond_op = 0b100; break;
|
||||
default:
|
||||
ShouldNotReachHere();
|
||||
break;
|
||||
}
|
||||
|
||||
f(0, 31), f((int)T & 1, 30), f(cond_op & 1, 29), f(0b011101, 28, 23);
|
||||
f(((int)(T >> 1) & 1), 22), f(0b10000011, 21, 14);
|
||||
f((cond_op >> 1) & 0b11, 13, 12), f(0b10, 11, 10), rf(Vn, 5), rf(Vd, 0);
|
||||
}
|
||||
|
||||
void ext(FloatRegister Vd, SIMD_Arrangement T, FloatRegister Vn, FloatRegister Vm, int index)
|
||||
{
|
||||
starti;
|
||||
|
@ -925,7 +925,7 @@ void C2_MacroAssembler::neon_compare(FloatRegister dst, BasicType bt, FloatRegis
|
||||
case BoolTest::eq: fcmeq(dst, size, src1, src2); break;
|
||||
case BoolTest::ne: {
|
||||
fcmeq(dst, size, src1, src2);
|
||||
notr(dst, T16B, dst);
|
||||
notr(dst, isQ ? T16B : T8B, dst);
|
||||
break;
|
||||
}
|
||||
case BoolTest::ge: fcmge(dst, size, src1, src2); break;
|
||||
@ -941,7 +941,7 @@ void C2_MacroAssembler::neon_compare(FloatRegister dst, BasicType bt, FloatRegis
|
||||
case BoolTest::eq: cmeq(dst, size, src1, src2); break;
|
||||
case BoolTest::ne: {
|
||||
cmeq(dst, size, src1, src2);
|
||||
notr(dst, T16B, dst);
|
||||
notr(dst, isQ ? T16B : T8B, dst);
|
||||
break;
|
||||
}
|
||||
case BoolTest::ge: cmge(dst, size, src1, src2); break;
|
||||
@ -959,6 +959,26 @@ void C2_MacroAssembler::neon_compare(FloatRegister dst, BasicType bt, FloatRegis
|
||||
}
|
||||
}
|
||||
|
||||
void C2_MacroAssembler::neon_compare_zero(FloatRegister dst, BasicType bt, FloatRegister src,
|
||||
Condition cond, bool isQ) {
|
||||
SIMD_Arrangement size = esize2arrangement((unsigned)type2aelembytes(bt), isQ);
|
||||
if (bt == T_FLOAT || bt == T_DOUBLE) {
|
||||
if (cond == Assembler::NE) {
|
||||
fcm(Assembler::EQ, dst, size, src);
|
||||
notr(dst, isQ ? T16B : T8B, dst);
|
||||
} else {
|
||||
fcm(cond, dst, size, src);
|
||||
}
|
||||
} else {
|
||||
if (cond == Assembler::NE) {
|
||||
cm(Assembler::EQ, dst, size, src);
|
||||
notr(dst, isQ ? T16B : T8B, dst);
|
||||
} else {
|
||||
cm(cond, dst, size, src);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Compress the least significant bit of each byte to the rightmost and clear
|
||||
// the higher garbage bits.
|
||||
void C2_MacroAssembler::bytemask_compress(Register dst) {
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) 2020, 2022, Oracle and/or its affiliates. All rights reserved.
|
||||
* Copyright (c) 2020, 2023, Oracle and/or its affiliates. All rights reserved.
|
||||
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
||||
*
|
||||
* This code is free software; you can redistribute it and/or modify it
|
||||
@ -79,6 +79,9 @@
|
||||
void neon_compare(FloatRegister dst, BasicType bt, FloatRegister src1,
|
||||
FloatRegister src2, int cond, bool isQ);
|
||||
|
||||
void neon_compare_zero(FloatRegister dst, BasicType bt, FloatRegister src,
|
||||
Condition cond, bool isQ);
|
||||
|
||||
void sve_compare(PRegister pd, BasicType bt, PRegister pg,
|
||||
FloatRegister zn, FloatRegister zm, int cond);
|
||||
|
||||
|
@ -5598,7 +5598,7 @@ void MacroAssembler::encode_iso_array(Register src, Register dst,
|
||||
// ASCII-check on lo-parts (no sign).
|
||||
FloatRegister vlox = vtmp1; // Merge lower bytes.
|
||||
ASCII(orr(vlox, T16B, vlo0, vlo1));
|
||||
umov(chk, vhix, D, 1); ASCII(cmlt(vlox, T16B, vlox));
|
||||
umov(chk, vhix, D, 1); ASCII(cm(LT, vlox, T16B, vlox));
|
||||
fmovd(max, vhix); ASCII(umaxv(vlox, T16B, vlox));
|
||||
orr(chk, chk, max); ASCII(umov(max, vlox, B, 0));
|
||||
ASCII(orr(chk, chk, max));
|
||||
@ -5624,7 +5624,7 @@ void MacroAssembler::encode_iso_array(Register src, Register dst,
|
||||
uzp2(vhi, T16B, vtmp3, vtmp3);
|
||||
// ISO-check on hi-parts (all zero).
|
||||
// ASCII-check on lo-parts (no sign).
|
||||
ASCII(cmlt(vtmp2, T16B, vlo));
|
||||
ASCII(cm(LT, vtmp2, T16B, vlo));
|
||||
fmovd(chk, vhi); ASCII(umaxv(vtmp2, T16B, vtmp2));
|
||||
ASCII(umov(max, vtmp2, B, 0));
|
||||
ASCII(orr(chk, chk, max));
|
||||
|
@ -1363,6 +1363,28 @@ class TwoRegNEONOp(CommonNEONInstruction):
|
||||
class ThreeRegNEONOp(TwoRegNEONOp):
|
||||
numRegs = 3
|
||||
|
||||
class NEONFloatCompareWithZero(TwoRegNEONOp):
|
||||
def __init__(self, args):
|
||||
self._name = 'fcm'
|
||||
self.arrangement, self.condition = args
|
||||
self.insname = self._name + (self.condition).lower()
|
||||
|
||||
def cstr(self):
|
||||
return ("%s(%s, %s, %s, %s);"
|
||||
% ("__ " + self._name,
|
||||
"Assembler::" + self.condition,
|
||||
self._firstSIMDreg,
|
||||
"__ T" + self.arrangement,
|
||||
self._firstSIMDreg.nextReg()))
|
||||
|
||||
def astr(self):
|
||||
return ("%s\t%s.%s, %s.%s, #0.0"
|
||||
% (self.insname,
|
||||
self._firstSIMDreg,
|
||||
self.arrangement,
|
||||
self._firstSIMDreg.nextReg(),
|
||||
self.arrangement))
|
||||
|
||||
class SpecialCases(Instruction):
|
||||
def __init__(self, data):
|
||||
self._name = data[0]
|
||||
@ -1596,6 +1618,16 @@ generate(NEONReduceInstruction,
|
||||
["fminp", "fminp", "2S"], ["fminp", "fminp", "2D"],
|
||||
])
|
||||
|
||||
neonFloatCompareWithZeroConditions = ['GT', 'GE', 'EQ', 'LT', 'LE']
|
||||
neonFloatArrangement = ['2S', '4S', '2D']
|
||||
neonFloatCompareWithZeroArgs = []
|
||||
for condition in neonFloatCompareWithZeroConditions:
|
||||
for currentArrangement in neonFloatArrangement:
|
||||
currentArgs = [currentArrangement, condition]
|
||||
neonFloatCompareWithZeroArgs.append(currentArgs)
|
||||
|
||||
generate(NEONFloatCompareWithZero, neonFloatCompareWithZeroArgs)
|
||||
|
||||
generate(TwoRegNEONOp,
|
||||
[["absr", "abs", "8B"], ["absr", "abs", "16B"],
|
||||
["absr", "abs", "4H"], ["absr", "abs", "8H"],
|
||||
|
File diff suppressed because it is too large
Load Diff
@ -1298,6 +1298,26 @@ public class IRNode {
|
||||
machOnlyNameRegex(VMLS_MASKED, "vmls_masked");
|
||||
}
|
||||
|
||||
public static final String VMASK_CMP_ZERO_I_NEON = PREFIX + "VMASK_CMP_ZERO_I_NEON" + POSTFIX;
|
||||
static {
|
||||
machOnlyNameRegex(VMASK_CMP_ZERO_I_NEON, "vmaskcmp_zeroI_neon");
|
||||
}
|
||||
|
||||
public static final String VMASK_CMP_ZERO_L_NEON = PREFIX + "VMASK_CMP_ZERO_L_NEON" + POSTFIX;
|
||||
static {
|
||||
machOnlyNameRegex(VMASK_CMP_ZERO_L_NEON, "vmaskcmp_zeroL_neon");
|
||||
}
|
||||
|
||||
public static final String VMASK_CMP_ZERO_F_NEON = PREFIX + "VMASK_CMP_ZERO_F_NEON" + POSTFIX;
|
||||
static {
|
||||
machOnlyNameRegex(VMASK_CMP_ZERO_F_NEON, "vmaskcmp_zeroF_neon");
|
||||
}
|
||||
|
||||
public static final String VMASK_CMP_ZERO_D_NEON = PREFIX + "VMASK_CMP_ZERO_D_NEON" + POSTFIX;
|
||||
static {
|
||||
machOnlyNameRegex(VMASK_CMP_ZERO_D_NEON, "vmaskcmp_zeroD_neon");
|
||||
}
|
||||
|
||||
public static final String VNOT_I_MASKED = PREFIX + "VNOT_I_MASKED" + POSTFIX;
|
||||
static {
|
||||
machOnlyNameRegex(VNOT_I_MASKED, "vnotI_masked");
|
||||
|
@ -0,0 +1,260 @@
|
||||
/*
|
||||
* Copyright (c) 2023, Arm Limited. All rights reserved.
|
||||
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
||||
*
|
||||
* This code is free software; you can redistribute it and/or modify it
|
||||
* under the terms of the GNU General Public License version 2 only, as
|
||||
* published by the Free Software Foundation.
|
||||
*
|
||||
* This code is distributed in the hope that it will be useful, but WITHOUT
|
||||
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
||||
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
|
||||
* version 2 for more details (a copy is included in the LICENSE file that
|
||||
* accompanied this code).
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License version
|
||||
* 2 along with this work; if not, write to the Free Software Foundation,
|
||||
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
*
|
||||
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
|
||||
* or visit www.oracle.com if you need additional information or have any
|
||||
* questions.
|
||||
*/
|
||||
|
||||
package compiler.vectorapi;
|
||||
|
||||
import compiler.lib.ir_framework.*;
|
||||
|
||||
import java.util.Random;
|
||||
|
||||
import jdk.incubator.vector.ByteVector;
|
||||
import jdk.incubator.vector.DoubleVector;
|
||||
import jdk.incubator.vector.FloatVector;
|
||||
import jdk.incubator.vector.IntVector;
|
||||
import jdk.incubator.vector.LongVector;
|
||||
import jdk.incubator.vector.ShortVector;
|
||||
import jdk.incubator.vector.VectorSpecies;
|
||||
import jdk.incubator.vector.VectorOperators;
|
||||
|
||||
import jdk.test.lib.Asserts;
|
||||
import jdk.test.lib.Utils;
|
||||
|
||||
/**
|
||||
* @test
|
||||
* @bug 8297753
|
||||
* @key randomness
|
||||
* @library /test/lib /
|
||||
* @requires os.arch=="aarch64"
|
||||
* @summary Add optimized rules for vector compare with zero on NEON
|
||||
* @modules jdk.incubator.vector
|
||||
*
|
||||
* @run driver compiler.vectorapi.VectorCompareWithZeroTest
|
||||
*/
|
||||
|
||||
public class VectorCompareWithZeroTest {
|
||||
private static final VectorSpecies<Byte> B_SPECIES = ByteVector.SPECIES_PREFERRED;
|
||||
private static final VectorSpecies<Short> S_SPECIES = ShortVector.SPECIES_PREFERRED;
|
||||
private static final VectorSpecies<Integer> I_SPECIES = IntVector.SPECIES_PREFERRED;
|
||||
private static final VectorSpecies<Long> L_SPECIES = LongVector.SPECIES_PREFERRED;
|
||||
private static final VectorSpecies<Float> F_SPECIES = FloatVector.SPECIES_PREFERRED;
|
||||
private static final VectorSpecies<Double> D_SPECIES = DoubleVector.SPECIES_PREFERRED;
|
||||
private static final int LENGTH = 1024;
|
||||
private static final Random RD = Utils.getRandomInstance();
|
||||
private static byte[] ba;
|
||||
private static boolean[] br;
|
||||
private static short[] sa;
|
||||
private static boolean[] sr;
|
||||
private static int[] ia;
|
||||
private static boolean[] ir;
|
||||
private static long[] la;
|
||||
private static boolean[] lr;
|
||||
private static float[] fa;
|
||||
private static boolean[] fr;
|
||||
private static double[] da;
|
||||
private static boolean[] dr;
|
||||
|
||||
static {
|
||||
ba = new byte[LENGTH];
|
||||
sa = new short[LENGTH];
|
||||
ia = new int[LENGTH];
|
||||
la = new long[LENGTH];
|
||||
fa = new float[LENGTH];
|
||||
da = new double[LENGTH];
|
||||
|
||||
br = new boolean[LENGTH];
|
||||
sr = new boolean[LENGTH];
|
||||
ir = new boolean[LENGTH];
|
||||
lr = new boolean[LENGTH];
|
||||
fr = new boolean[LENGTH];
|
||||
dr = new boolean[LENGTH];
|
||||
|
||||
for (int i = 0; i < LENGTH; i++) {
|
||||
ba[i] = (byte) RD.nextInt(25);
|
||||
sa[i] = (short) RD.nextInt(25);
|
||||
ia[i] = RD.nextInt(25);
|
||||
la[i] = RD.nextLong(25);
|
||||
fa[i] = RD.nextFloat(25.0F);
|
||||
da[i] = RD.nextDouble(25.0);
|
||||
}
|
||||
}
|
||||
|
||||
interface ByteOp {
|
||||
boolean apply(byte a);
|
||||
}
|
||||
|
||||
interface ShortOp {
|
||||
boolean apply(short a);
|
||||
}
|
||||
|
||||
interface IntOp {
|
||||
boolean apply(int a);
|
||||
}
|
||||
|
||||
interface LongOp {
|
||||
boolean apply(long a);
|
||||
}
|
||||
|
||||
interface FloatOp {
|
||||
boolean apply(float a);
|
||||
}
|
||||
|
||||
interface DoubleOp {
|
||||
boolean apply(double a);
|
||||
}
|
||||
|
||||
private static void assertArrayEquals(byte[] a, boolean[] r, ByteOp f) {
|
||||
for (int i = 0; i < B_SPECIES.length(); i++) {
|
||||
Asserts.assertEquals(f.apply(a[i]), r[i]);
|
||||
}
|
||||
}
|
||||
|
||||
private static void assertArrayEquals(short[] a, boolean[] r, ShortOp f) {
|
||||
for (int i = 0; i < S_SPECIES.length(); i++) {
|
||||
Asserts.assertEquals(f.apply(a[i]), r[i]);
|
||||
}
|
||||
}
|
||||
|
||||
private static void assertArrayEquals(int[] a, boolean[] r, IntOp f) {
|
||||
for (int i = 0; i < I_SPECIES.length(); i++) {
|
||||
Asserts.assertEquals(f.apply(a[i]), r[i]);
|
||||
}
|
||||
}
|
||||
|
||||
private static void assertArrayEquals(long[] a, boolean[] r, LongOp f) {
|
||||
for (int i = 0; i < L_SPECIES.length(); i++) {
|
||||
Asserts.assertEquals(f.apply(a[i]), r[i]);
|
||||
}
|
||||
}
|
||||
|
||||
private static void assertArrayEquals(float[] a, boolean[] r, FloatOp f) {
|
||||
for (int i = 0; i < F_SPECIES.length(); i++) {
|
||||
Asserts.assertEquals(f.apply(a[i]), r[i]);
|
||||
}
|
||||
}
|
||||
|
||||
private static void assertArrayEquals(double[] a, boolean[] r, DoubleOp f) {
|
||||
for (int i = 0; i < D_SPECIES.length(); i++) {
|
||||
Asserts.assertEquals(f.apply(a[i]), r[i]);
|
||||
}
|
||||
}
|
||||
|
||||
@Test
|
||||
@IR(counts = { IRNode.VMASK_CMP_ZERO_I_NEON, ">= 1" })
|
||||
public static void testByteVectorEqualToZero() {
|
||||
ByteVector av = ByteVector.fromArray(B_SPECIES, ba, 0);
|
||||
av.compare(VectorOperators.EQ, 0).intoArray(br, 0);
|
||||
}
|
||||
|
||||
@Run(test = "testByteVectorEqualToZero")
|
||||
public static void testByteVectorEqualToZero_runner() {
|
||||
testByteVectorEqualToZero();
|
||||
assertArrayEquals(ba, br, (a) -> (a == (byte) 0 ? true : false));
|
||||
}
|
||||
|
||||
@Test
|
||||
@IR(counts = { IRNode.VMASK_CMP_ZERO_I_NEON, ">= 1" })
|
||||
public static void testShortVectorNotEqualToZero() {
|
||||
ShortVector av = ShortVector.fromArray(S_SPECIES, sa, 0);
|
||||
av.compare(VectorOperators.NE, 0).intoArray(sr, 0);
|
||||
}
|
||||
|
||||
@Run(test = "testShortVectorNotEqualToZero")
|
||||
public static void testShortVectorNotEqualToZero_runner() {
|
||||
testShortVectorNotEqualToZero();
|
||||
assertArrayEquals(sa, sr, (a) -> (a != (short) 0 ? true : false));
|
||||
}
|
||||
|
||||
@Test
|
||||
@IR(counts = { IRNode.VMASK_CMP_ZERO_I_NEON, ">= 1" })
|
||||
public static void testIntVectorGreaterEqualToZero() {
|
||||
IntVector av = IntVector.fromArray(I_SPECIES, ia, 0);
|
||||
av.compare(VectorOperators.GE, 0).intoArray(ir, 0);
|
||||
}
|
||||
|
||||
@Run(test = "testIntVectorGreaterEqualToZero")
|
||||
public static void testIntVectorGreaterEqualToZero_runner() {
|
||||
testIntVectorGreaterEqualToZero();
|
||||
assertArrayEquals(ia, ir, (a) -> (a >= 0 ? true : false));
|
||||
}
|
||||
|
||||
@Test
|
||||
@IR(counts = { IRNode.VMASK_CMP_ZERO_L_NEON, ">= 1" })
|
||||
public static void testLongVectorGreaterThanZero() {
|
||||
LongVector av = LongVector.fromArray(L_SPECIES, la, 0);
|
||||
av.compare(VectorOperators.GT, 0).intoArray(lr, 0);
|
||||
}
|
||||
|
||||
@Run(test = "testLongVectorGreaterThanZero")
|
||||
public static void testLongVectorGreaterThanZero_runner() {
|
||||
testLongVectorGreaterThanZero();
|
||||
assertArrayEquals(la, lr, (a) -> (a > 0 ? true : false));
|
||||
}
|
||||
|
||||
@Test
|
||||
@IR(counts = { IRNode.VMASK_CMP_ZERO_F_NEON, ">= 1" })
|
||||
public static void testFloatVectorLessEqualToZero() {
|
||||
FloatVector av = FloatVector.fromArray(F_SPECIES, fa, 0);
|
||||
av.compare(VectorOperators.LE, 0).intoArray(fr, 0);
|
||||
}
|
||||
|
||||
@Run(test = "testFloatVectorLessEqualToZero")
|
||||
public static void testFloatVectorLessEqualToZero_runner() {
|
||||
testFloatVectorLessEqualToZero();
|
||||
assertArrayEquals(fa, fr, (a) -> (a <= 0.0F ? true : false));
|
||||
}
|
||||
|
||||
@Test
|
||||
@IR(counts = { IRNode.VMASK_CMP_ZERO_D_NEON, ">= 1" })
|
||||
public static void testDoubleVectorLessThanZero() {
|
||||
DoubleVector av = DoubleVector.fromArray(D_SPECIES, da, 0);
|
||||
av.compare(VectorOperators.LT, 0).intoArray(dr, 0);
|
||||
}
|
||||
|
||||
@Run(test = "testDoubleVectorLessThanZero")
|
||||
public static void testDoubleVectorLessThanZero_runner() {
|
||||
testDoubleVectorLessThanZero();
|
||||
assertArrayEquals(da, dr, (a) -> (a < 0.0 ? true : false));
|
||||
}
|
||||
|
||||
@Test
|
||||
@IR(failOn = { IRNode.VMASK_CMP_ZERO_I_NEON })
|
||||
public static void testIntVectorUnsignedCondition() {
|
||||
IntVector av = IntVector.fromArray(I_SPECIES, ia, 0);
|
||||
av.compare(VectorOperators.UNSIGNED_GT, 0).intoArray(ir, 0);
|
||||
}
|
||||
|
||||
@Test
|
||||
@IR(failOn = { IRNode.VMASK_CMP_ZERO_L_NEON })
|
||||
public static void testLongVectorUnsignedCondition() {
|
||||
LongVector av = LongVector.fromArray(L_SPECIES, la, 0);
|
||||
av.compare(VectorOperators.UNSIGNED_GE, 0).intoArray(lr, 0);
|
||||
}
|
||||
|
||||
public static void main(String[] args) {
|
||||
TestFramework testFramework = new TestFramework();
|
||||
testFramework.setDefaultWarmup(10000)
|
||||
.addFlags("--add-modules=jdk.incubator.vector")
|
||||
.addFlags("-XX:UseSVE=0")
|
||||
.start();
|
||||
}
|
||||
}
|
Loading…
Reference in New Issue
Block a user