8297753: AArch64: Add optimized rules for vector compare with zero on NEON

Reviewed-by: aph
This commit is contained in:
changpeng1997 2023-03-03 12:11:10 +00:00 committed by Andrew Dinn
parent 339ca88783
commit d23a8bfb14
11 changed files with 1048 additions and 506 deletions

View File

@ -1191,6 +1191,10 @@ public:
// predicate controlling addressing modes // predicate controlling addressing modes
bool size_fits_all_mem_uses(AddPNode* addp, int shift); bool size_fits_all_mem_uses(AddPNode* addp, int shift);
// Convert BootTest condition to Assembler condition.
// Replicate the logic of cmpOpOper::ccode() and cmpOpUOper::ccode().
Assembler::Condition to_assembler_cond(BoolTest::mask cond);
%} %}
source %{ source %{
@ -2527,6 +2531,50 @@ bool size_fits_all_mem_uses(AddPNode* addp, int shift) {
return true; return true;
} }
// Convert BootTest condition to Assembler condition.
// Replicate the logic of cmpOpOper::ccode() and cmpOpUOper::ccode().
Assembler::Condition to_assembler_cond(BoolTest::mask cond) {
Assembler::Condition result;
switch(cond) {
case BoolTest::eq:
result = Assembler::EQ; break;
case BoolTest::ne:
result = Assembler::NE; break;
case BoolTest::le:
result = Assembler::LE; break;
case BoolTest::ge:
result = Assembler::GE; break;
case BoolTest::lt:
result = Assembler::LT; break;
case BoolTest::gt:
result = Assembler::GT; break;
case BoolTest::ule:
result = Assembler::LS; break;
case BoolTest::uge:
result = Assembler::HS; break;
case BoolTest::ult:
result = Assembler::LO; break;
case BoolTest::ugt:
result = Assembler::HI; break;
case BoolTest::overflow:
result = Assembler::VS; break;
case BoolTest::no_overflow:
result = Assembler::VC; break;
default:
ShouldNotReachHere();
return Assembler::Condition(-1);
}
// Check conversion
if (cond & BoolTest::unsigned_compare) {
assert(cmpOpUOper((BoolTest::mask)((int)cond & ~(BoolTest::unsigned_compare))).ccode() == result, "Invalid conversion");
} else {
assert(cmpOpOper(cond).ccode() == result, "Invalid conversion");
}
return result;
}
// Binary src (Replicate con) // Binary src (Replicate con)
bool is_valid_sve_arith_imm_pattern(Node* n, Node* m) { bool is_valid_sve_arith_imm_pattern(Node* n, Node* m) {
if (n == NULL || m == NULL) { if (n == NULL || m == NULL) {
@ -4263,6 +4311,17 @@ operand immI_positive()
interface(CONST_INTER); interface(CONST_INTER);
%} %}
// BoolTest condition for signed compare
operand immI_cmp_cond()
%{
predicate(n->get_int() < (int)(BoolTest::unsigned_compare));
match(ConI);
op_cost(0);
format %{ %}
interface(CONST_INTER);
%}
operand immL_255() operand immL_255()
%{ %{
predicate(n->get_long() == 255L); predicate(n->get_long() == 255L);

View File

@ -5137,6 +5137,61 @@ instruct vmaskcmp_neon(vReg dst, vReg src1, vReg src2, immI cond) %{
ins_pipe(pipe_slow); ins_pipe(pipe_slow);
%} %}
instruct vmaskcmp_zeroI_neon(vReg dst, vReg src, immI0 zero, immI_cmp_cond cond) %{
predicate(UseSVE == 0);
match(Set dst (VectorMaskCmp (Binary src (ReplicateB zero)) cond));
match(Set dst (VectorMaskCmp (Binary src (ReplicateS zero)) cond));
match(Set dst (VectorMaskCmp (Binary src (ReplicateI zero)) cond));
format %{ "vmaskcmp_zeroI_neon $dst, $src, #0, $cond" %}
ins_encode %{
Assembler::Condition condition = to_assembler_cond((BoolTest::mask)$cond$$constant);
BasicType bt = Matcher::vector_element_basic_type(this);
uint length_in_bytes = Matcher::vector_length_in_bytes(this);
__ neon_compare_zero($dst$$FloatRegister, bt, $src$$FloatRegister,
condition, /* isQ */ length_in_bytes == 16);
%}
ins_pipe(pipe_slow);
%}
instruct vmaskcmp_zeroL_neon(vReg dst, vReg src, immL0 zero, immI_cmp_cond cond) %{
predicate(UseSVE == 0);
match(Set dst (VectorMaskCmp (Binary src (ReplicateL zero)) cond));
format %{ "vmaskcmp_zeroL_neon $dst, $src, #0, $cond" %}
ins_encode %{
Assembler::Condition condition = to_assembler_cond((BoolTest::mask)$cond$$constant);
uint length_in_bytes = Matcher::vector_length_in_bytes(this);
__ neon_compare_zero($dst$$FloatRegister, T_LONG, $src$$FloatRegister,
condition, /* isQ */ length_in_bytes == 16);
%}
ins_pipe(pipe_slow);
%}
instruct vmaskcmp_zeroF_neon(vReg dst, vReg src, immF0 zero, immI_cmp_cond cond) %{
predicate(UseSVE == 0);
match(Set dst (VectorMaskCmp (Binary src (ReplicateF zero)) cond));
format %{ "vmaskcmp_zeroF_neon $dst, $src, #0, $cond" %}
ins_encode %{
Assembler::Condition condition = to_assembler_cond((BoolTest::mask)$cond$$constant);
uint length_in_bytes = Matcher::vector_length_in_bytes(this);
__ neon_compare_zero($dst$$FloatRegister, T_FLOAT, $src$$FloatRegister,
condition, /* isQ */ length_in_bytes == 16);
%}
ins_pipe(pipe_slow);
%}
instruct vmaskcmp_zeroD_neon(vReg dst, vReg src, immD0 zero, immI_cmp_cond cond) %{
predicate(UseSVE == 0);
match(Set dst (VectorMaskCmp (Binary src (ReplicateD zero)) cond));
format %{ "vmaskcmp_zeroD_neon $dst, $src, #0, $cond" %}
ins_encode %{
Assembler::Condition condition = to_assembler_cond((BoolTest::mask)$cond$$constant);
uint length_in_bytes = Matcher::vector_length_in_bytes(this);
__ neon_compare_zero($dst$$FloatRegister, T_DOUBLE, $src$$FloatRegister,
condition, /* isQ */ length_in_bytes == 16);
%}
ins_pipe(pipe_slow);
%}
instruct vmaskcmp_sve(pReg dst, vReg src1, vReg src2, immI cond, rFlagsReg cr) %{ instruct vmaskcmp_sve(pReg dst, vReg src1, vReg src2, immI cond, rFlagsReg cr) %{
predicate(UseSVE > 0); predicate(UseSVE > 0);
match(Set dst (VectorMaskCmp (Binary src1 src2) cond)); match(Set dst (VectorMaskCmp (Binary src1 src2) cond));

View File

@ -3553,6 +3553,42 @@ instruct vmaskcmp_neon(vReg dst, vReg src1, vReg src2, immI cond) %{
ins_pipe(pipe_slow); ins_pipe(pipe_slow);
%} %}
instruct vmaskcmp_zeroI_neon(vReg dst, vReg src, immI0 zero, immI_cmp_cond cond) %{
predicate(UseSVE == 0);
match(Set dst (VectorMaskCmp (Binary src (ReplicateB zero)) cond));
match(Set dst (VectorMaskCmp (Binary src (ReplicateS zero)) cond));
match(Set dst (VectorMaskCmp (Binary src (ReplicateI zero)) cond));
format %{ "vmaskcmp_zeroI_neon $dst, $src, #0, $cond" %}
ins_encode %{
Assembler::Condition condition = to_assembler_cond((BoolTest::mask)$cond$$constant);
BasicType bt = Matcher::vector_element_basic_type(this);
uint length_in_bytes = Matcher::vector_length_in_bytes(this);
__ neon_compare_zero($dst$$FloatRegister, bt, $src$$FloatRegister,
condition, /* isQ */ length_in_bytes == 16);
%}
ins_pipe(pipe_slow);
%}
dnl
dnl VMASKCMP_ZERO_NEON($1, $2 )
dnl VMASKCMP_ZERO_NEON(type, basic_type)
define(`VMASKCMP_ZERO_NEON', `
instruct vmaskcmp_zero$1_neon(vReg dst, vReg src, imm`$1'0 zero, immI_cmp_cond cond) %{
predicate(UseSVE == 0);
match(Set dst (VectorMaskCmp (Binary src (Replicate$1 zero)) cond));
format %{ "vmaskcmp_zero$1_neon $dst, $src, #0, $cond" %}
ins_encode %{
Assembler::Condition condition = to_assembler_cond((BoolTest::mask)$cond$$constant);
uint length_in_bytes = Matcher::vector_length_in_bytes(this);
__ neon_compare_zero($dst$$FloatRegister, $2, $src$$FloatRegister,
condition, /* isQ */ length_in_bytes == 16);
%}
ins_pipe(pipe_slow);
%}')dnl
dnl
VMASKCMP_ZERO_NEON(L, T_LONG)
VMASKCMP_ZERO_NEON(F, T_FLOAT)
VMASKCMP_ZERO_NEON(D, T_DOUBLE)
instruct vmaskcmp_sve(pReg dst, vReg src1, vReg src2, immI cond, rFlagsReg cr) %{ instruct vmaskcmp_sve(pReg dst, vReg src1, vReg src2, immI cond, rFlagsReg cr) %{
predicate(UseSVE > 0); predicate(UseSVE > 0);
match(Set dst (VectorMaskCmp (Binary src1 src2) cond)); match(Set dst (VectorMaskCmp (Binary src1 src2) cond));

View File

@ -2653,12 +2653,6 @@ template<typename R, typename... Rx>
INSN(cnt, 0, 0b100000010110, 0); // accepted arrangements: T8B, T16B INSN(cnt, 0, 0b100000010110, 0); // accepted arrangements: T8B, T16B
INSN(uaddlp, 1, 0b100000001010, 2); // accepted arrangements: T8B, T16B, T4H, T8H, T2S, T4S INSN(uaddlp, 1, 0b100000001010, 2); // accepted arrangements: T8B, T16B, T4H, T8H, T2S, T4S
INSN(uaddlv, 1, 0b110000001110, 1); // accepted arrangements: T8B, T16B, T4H, T8H, T4S INSN(uaddlv, 1, 0b110000001110, 1); // accepted arrangements: T8B, T16B, T4H, T8H, T4S
// Zero compare.
INSN(cmeq, 0, 0b100000100110, 3); // accepted arrangements: T8B, T16B, T4H, T8H, T2S, T4S, T2D
INSN(cmge, 1, 0b100000100010, 3); // accepted arrangements: T8B, T16B, T4H, T8H, T2S, T4S, T2D
INSN(cmgt, 0, 0b100000100010, 3); // accepted arrangements: T8B, T16B, T4H, T8H, T2S, T4S, T2D
INSN(cmle, 1, 0b100000100110, 3); // accepted arrangements: T8B, T16B, T4H, T8H, T2S, T4S, T2D
INSN(cmlt, 0, 0b100000101010, 3); // accepted arrangements: T8B, T16B, T4H, T8H, T2S, T4S, T2D
#undef INSN #undef INSN
@ -3190,6 +3184,48 @@ public:
#undef INSN #undef INSN
// AdvSIMD compare with zero (vector)
void cm(Condition cond, FloatRegister Vd, SIMD_Arrangement T, FloatRegister Vn) {
starti;
assert(T != T1Q && T != T1D, "invalid arrangement");
int cond_op;
switch (cond) {
case EQ: cond_op = 0b001; break;
case GE: cond_op = 0b100; break;
case GT: cond_op = 0b000; break;
case LE: cond_op = 0b101; break;
case LT: cond_op = 0b010; break;
default:
ShouldNotReachHere();
break;
}
f(0, 31), f((int)T & 1, 30), f((cond_op >> 2) & 1, 29);
f(0b01110, 28, 24), f((int)T >> 1, 23, 22), f(0b10000010, 21, 14);
f(cond_op & 0b11, 13, 12), f(0b10, 11, 10), rf(Vn, 5), rf(Vd, 0);
}
// AdvSIMD Floating-point compare with zero (vector)
void fcm(Condition cond, FloatRegister Vd, SIMD_Arrangement T, FloatRegister Vn) {
starti;
assert(T == T2S || T == T4S || T == T2D, "invalid arrangement");
int cond_op;
switch (cond) {
case EQ: cond_op = 0b010; break;
case GT: cond_op = 0b000; break;
case GE: cond_op = 0b001; break;
case LE: cond_op = 0b011; break;
case LT: cond_op = 0b100; break;
default:
ShouldNotReachHere();
break;
}
f(0, 31), f((int)T & 1, 30), f(cond_op & 1, 29), f(0b011101, 28, 23);
f(((int)(T >> 1) & 1), 22), f(0b10000011, 21, 14);
f((cond_op >> 1) & 0b11, 13, 12), f(0b10, 11, 10), rf(Vn, 5), rf(Vd, 0);
}
void ext(FloatRegister Vd, SIMD_Arrangement T, FloatRegister Vn, FloatRegister Vm, int index) void ext(FloatRegister Vd, SIMD_Arrangement T, FloatRegister Vn, FloatRegister Vm, int index)
{ {
starti; starti;

View File

@ -925,7 +925,7 @@ void C2_MacroAssembler::neon_compare(FloatRegister dst, BasicType bt, FloatRegis
case BoolTest::eq: fcmeq(dst, size, src1, src2); break; case BoolTest::eq: fcmeq(dst, size, src1, src2); break;
case BoolTest::ne: { case BoolTest::ne: {
fcmeq(dst, size, src1, src2); fcmeq(dst, size, src1, src2);
notr(dst, T16B, dst); notr(dst, isQ ? T16B : T8B, dst);
break; break;
} }
case BoolTest::ge: fcmge(dst, size, src1, src2); break; case BoolTest::ge: fcmge(dst, size, src1, src2); break;
@ -941,7 +941,7 @@ void C2_MacroAssembler::neon_compare(FloatRegister dst, BasicType bt, FloatRegis
case BoolTest::eq: cmeq(dst, size, src1, src2); break; case BoolTest::eq: cmeq(dst, size, src1, src2); break;
case BoolTest::ne: { case BoolTest::ne: {
cmeq(dst, size, src1, src2); cmeq(dst, size, src1, src2);
notr(dst, T16B, dst); notr(dst, isQ ? T16B : T8B, dst);
break; break;
} }
case BoolTest::ge: cmge(dst, size, src1, src2); break; case BoolTest::ge: cmge(dst, size, src1, src2); break;
@ -959,6 +959,26 @@ void C2_MacroAssembler::neon_compare(FloatRegister dst, BasicType bt, FloatRegis
} }
} }
void C2_MacroAssembler::neon_compare_zero(FloatRegister dst, BasicType bt, FloatRegister src,
Condition cond, bool isQ) {
SIMD_Arrangement size = esize2arrangement((unsigned)type2aelembytes(bt), isQ);
if (bt == T_FLOAT || bt == T_DOUBLE) {
if (cond == Assembler::NE) {
fcm(Assembler::EQ, dst, size, src);
notr(dst, isQ ? T16B : T8B, dst);
} else {
fcm(cond, dst, size, src);
}
} else {
if (cond == Assembler::NE) {
cm(Assembler::EQ, dst, size, src);
notr(dst, isQ ? T16B : T8B, dst);
} else {
cm(cond, dst, size, src);
}
}
}
// Compress the least significant bit of each byte to the rightmost and clear // Compress the least significant bit of each byte to the rightmost and clear
// the higher garbage bits. // the higher garbage bits.
void C2_MacroAssembler::bytemask_compress(Register dst) { void C2_MacroAssembler::bytemask_compress(Register dst) {

View File

@ -1,5 +1,5 @@
/* /*
* Copyright (c) 2020, 2022, Oracle and/or its affiliates. All rights reserved. * Copyright (c) 2020, 2023, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
* *
* This code is free software; you can redistribute it and/or modify it * This code is free software; you can redistribute it and/or modify it
@ -79,6 +79,9 @@
void neon_compare(FloatRegister dst, BasicType bt, FloatRegister src1, void neon_compare(FloatRegister dst, BasicType bt, FloatRegister src1,
FloatRegister src2, int cond, bool isQ); FloatRegister src2, int cond, bool isQ);
void neon_compare_zero(FloatRegister dst, BasicType bt, FloatRegister src,
Condition cond, bool isQ);
void sve_compare(PRegister pd, BasicType bt, PRegister pg, void sve_compare(PRegister pd, BasicType bt, PRegister pg,
FloatRegister zn, FloatRegister zm, int cond); FloatRegister zn, FloatRegister zm, int cond);

View File

@ -5598,7 +5598,7 @@ void MacroAssembler::encode_iso_array(Register src, Register dst,
// ASCII-check on lo-parts (no sign). // ASCII-check on lo-parts (no sign).
FloatRegister vlox = vtmp1; // Merge lower bytes. FloatRegister vlox = vtmp1; // Merge lower bytes.
ASCII(orr(vlox, T16B, vlo0, vlo1)); ASCII(orr(vlox, T16B, vlo0, vlo1));
umov(chk, vhix, D, 1); ASCII(cmlt(vlox, T16B, vlox)); umov(chk, vhix, D, 1); ASCII(cm(LT, vlox, T16B, vlox));
fmovd(max, vhix); ASCII(umaxv(vlox, T16B, vlox)); fmovd(max, vhix); ASCII(umaxv(vlox, T16B, vlox));
orr(chk, chk, max); ASCII(umov(max, vlox, B, 0)); orr(chk, chk, max); ASCII(umov(max, vlox, B, 0));
ASCII(orr(chk, chk, max)); ASCII(orr(chk, chk, max));
@ -5624,7 +5624,7 @@ void MacroAssembler::encode_iso_array(Register src, Register dst,
uzp2(vhi, T16B, vtmp3, vtmp3); uzp2(vhi, T16B, vtmp3, vtmp3);
// ISO-check on hi-parts (all zero). // ISO-check on hi-parts (all zero).
// ASCII-check on lo-parts (no sign). // ASCII-check on lo-parts (no sign).
ASCII(cmlt(vtmp2, T16B, vlo)); ASCII(cm(LT, vtmp2, T16B, vlo));
fmovd(chk, vhi); ASCII(umaxv(vtmp2, T16B, vtmp2)); fmovd(chk, vhi); ASCII(umaxv(vtmp2, T16B, vtmp2));
ASCII(umov(max, vtmp2, B, 0)); ASCII(umov(max, vtmp2, B, 0));
ASCII(orr(chk, chk, max)); ASCII(orr(chk, chk, max));

View File

@ -1363,6 +1363,28 @@ class TwoRegNEONOp(CommonNEONInstruction):
class ThreeRegNEONOp(TwoRegNEONOp): class ThreeRegNEONOp(TwoRegNEONOp):
numRegs = 3 numRegs = 3
class NEONFloatCompareWithZero(TwoRegNEONOp):
def __init__(self, args):
self._name = 'fcm'
self.arrangement, self.condition = args
self.insname = self._name + (self.condition).lower()
def cstr(self):
return ("%s(%s, %s, %s, %s);"
% ("__ " + self._name,
"Assembler::" + self.condition,
self._firstSIMDreg,
"__ T" + self.arrangement,
self._firstSIMDreg.nextReg()))
def astr(self):
return ("%s\t%s.%s, %s.%s, #0.0"
% (self.insname,
self._firstSIMDreg,
self.arrangement,
self._firstSIMDreg.nextReg(),
self.arrangement))
class SpecialCases(Instruction): class SpecialCases(Instruction):
def __init__(self, data): def __init__(self, data):
self._name = data[0] self._name = data[0]
@ -1596,6 +1618,16 @@ generate(NEONReduceInstruction,
["fminp", "fminp", "2S"], ["fminp", "fminp", "2D"], ["fminp", "fminp", "2S"], ["fminp", "fminp", "2D"],
]) ])
neonFloatCompareWithZeroConditions = ['GT', 'GE', 'EQ', 'LT', 'LE']
neonFloatArrangement = ['2S', '4S', '2D']
neonFloatCompareWithZeroArgs = []
for condition in neonFloatCompareWithZeroConditions:
for currentArrangement in neonFloatArrangement:
currentArgs = [currentArrangement, condition]
neonFloatCompareWithZeroArgs.append(currentArgs)
generate(NEONFloatCompareWithZero, neonFloatCompareWithZeroArgs)
generate(TwoRegNEONOp, generate(TwoRegNEONOp,
[["absr", "abs", "8B"], ["absr", "abs", "16B"], [["absr", "abs", "8B"], ["absr", "abs", "16B"],
["absr", "abs", "4H"], ["absr", "abs", "8H"], ["absr", "abs", "4H"], ["absr", "abs", "8H"],

File diff suppressed because it is too large Load Diff

View File

@ -1298,6 +1298,26 @@ public class IRNode {
machOnlyNameRegex(VMLS_MASKED, "vmls_masked"); machOnlyNameRegex(VMLS_MASKED, "vmls_masked");
} }
public static final String VMASK_CMP_ZERO_I_NEON = PREFIX + "VMASK_CMP_ZERO_I_NEON" + POSTFIX;
static {
machOnlyNameRegex(VMASK_CMP_ZERO_I_NEON, "vmaskcmp_zeroI_neon");
}
public static final String VMASK_CMP_ZERO_L_NEON = PREFIX + "VMASK_CMP_ZERO_L_NEON" + POSTFIX;
static {
machOnlyNameRegex(VMASK_CMP_ZERO_L_NEON, "vmaskcmp_zeroL_neon");
}
public static final String VMASK_CMP_ZERO_F_NEON = PREFIX + "VMASK_CMP_ZERO_F_NEON" + POSTFIX;
static {
machOnlyNameRegex(VMASK_CMP_ZERO_F_NEON, "vmaskcmp_zeroF_neon");
}
public static final String VMASK_CMP_ZERO_D_NEON = PREFIX + "VMASK_CMP_ZERO_D_NEON" + POSTFIX;
static {
machOnlyNameRegex(VMASK_CMP_ZERO_D_NEON, "vmaskcmp_zeroD_neon");
}
public static final String VNOT_I_MASKED = PREFIX + "VNOT_I_MASKED" + POSTFIX; public static final String VNOT_I_MASKED = PREFIX + "VNOT_I_MASKED" + POSTFIX;
static { static {
machOnlyNameRegex(VNOT_I_MASKED, "vnotI_masked"); machOnlyNameRegex(VNOT_I_MASKED, "vnotI_masked");

View File

@ -0,0 +1,260 @@
/*
* Copyright (c) 2023, Arm Limited. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License version 2 only, as
* published by the Free Software Foundation.
*
* This code is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
* version 2 for more details (a copy is included in the LICENSE file that
* accompanied this code).
*
* You should have received a copy of the GNU General Public License version
* 2 along with this work; if not, write to the Free Software Foundation,
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
*
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
* or visit www.oracle.com if you need additional information or have any
* questions.
*/
package compiler.vectorapi;
import compiler.lib.ir_framework.*;
import java.util.Random;
import jdk.incubator.vector.ByteVector;
import jdk.incubator.vector.DoubleVector;
import jdk.incubator.vector.FloatVector;
import jdk.incubator.vector.IntVector;
import jdk.incubator.vector.LongVector;
import jdk.incubator.vector.ShortVector;
import jdk.incubator.vector.VectorSpecies;
import jdk.incubator.vector.VectorOperators;
import jdk.test.lib.Asserts;
import jdk.test.lib.Utils;
/**
* @test
* @bug 8297753
* @key randomness
* @library /test/lib /
* @requires os.arch=="aarch64"
* @summary Add optimized rules for vector compare with zero on NEON
* @modules jdk.incubator.vector
*
* @run driver compiler.vectorapi.VectorCompareWithZeroTest
*/
public class VectorCompareWithZeroTest {
private static final VectorSpecies<Byte> B_SPECIES = ByteVector.SPECIES_PREFERRED;
private static final VectorSpecies<Short> S_SPECIES = ShortVector.SPECIES_PREFERRED;
private static final VectorSpecies<Integer> I_SPECIES = IntVector.SPECIES_PREFERRED;
private static final VectorSpecies<Long> L_SPECIES = LongVector.SPECIES_PREFERRED;
private static final VectorSpecies<Float> F_SPECIES = FloatVector.SPECIES_PREFERRED;
private static final VectorSpecies<Double> D_SPECIES = DoubleVector.SPECIES_PREFERRED;
private static final int LENGTH = 1024;
private static final Random RD = Utils.getRandomInstance();
private static byte[] ba;
private static boolean[] br;
private static short[] sa;
private static boolean[] sr;
private static int[] ia;
private static boolean[] ir;
private static long[] la;
private static boolean[] lr;
private static float[] fa;
private static boolean[] fr;
private static double[] da;
private static boolean[] dr;
static {
ba = new byte[LENGTH];
sa = new short[LENGTH];
ia = new int[LENGTH];
la = new long[LENGTH];
fa = new float[LENGTH];
da = new double[LENGTH];
br = new boolean[LENGTH];
sr = new boolean[LENGTH];
ir = new boolean[LENGTH];
lr = new boolean[LENGTH];
fr = new boolean[LENGTH];
dr = new boolean[LENGTH];
for (int i = 0; i < LENGTH; i++) {
ba[i] = (byte) RD.nextInt(25);
sa[i] = (short) RD.nextInt(25);
ia[i] = RD.nextInt(25);
la[i] = RD.nextLong(25);
fa[i] = RD.nextFloat(25.0F);
da[i] = RD.nextDouble(25.0);
}
}
interface ByteOp {
boolean apply(byte a);
}
interface ShortOp {
boolean apply(short a);
}
interface IntOp {
boolean apply(int a);
}
interface LongOp {
boolean apply(long a);
}
interface FloatOp {
boolean apply(float a);
}
interface DoubleOp {
boolean apply(double a);
}
private static void assertArrayEquals(byte[] a, boolean[] r, ByteOp f) {
for (int i = 0; i < B_SPECIES.length(); i++) {
Asserts.assertEquals(f.apply(a[i]), r[i]);
}
}
private static void assertArrayEquals(short[] a, boolean[] r, ShortOp f) {
for (int i = 0; i < S_SPECIES.length(); i++) {
Asserts.assertEquals(f.apply(a[i]), r[i]);
}
}
private static void assertArrayEquals(int[] a, boolean[] r, IntOp f) {
for (int i = 0; i < I_SPECIES.length(); i++) {
Asserts.assertEquals(f.apply(a[i]), r[i]);
}
}
private static void assertArrayEquals(long[] a, boolean[] r, LongOp f) {
for (int i = 0; i < L_SPECIES.length(); i++) {
Asserts.assertEquals(f.apply(a[i]), r[i]);
}
}
private static void assertArrayEquals(float[] a, boolean[] r, FloatOp f) {
for (int i = 0; i < F_SPECIES.length(); i++) {
Asserts.assertEquals(f.apply(a[i]), r[i]);
}
}
private static void assertArrayEquals(double[] a, boolean[] r, DoubleOp f) {
for (int i = 0; i < D_SPECIES.length(); i++) {
Asserts.assertEquals(f.apply(a[i]), r[i]);
}
}
@Test
@IR(counts = { IRNode.VMASK_CMP_ZERO_I_NEON, ">= 1" })
public static void testByteVectorEqualToZero() {
ByteVector av = ByteVector.fromArray(B_SPECIES, ba, 0);
av.compare(VectorOperators.EQ, 0).intoArray(br, 0);
}
@Run(test = "testByteVectorEqualToZero")
public static void testByteVectorEqualToZero_runner() {
testByteVectorEqualToZero();
assertArrayEquals(ba, br, (a) -> (a == (byte) 0 ? true : false));
}
@Test
@IR(counts = { IRNode.VMASK_CMP_ZERO_I_NEON, ">= 1" })
public static void testShortVectorNotEqualToZero() {
ShortVector av = ShortVector.fromArray(S_SPECIES, sa, 0);
av.compare(VectorOperators.NE, 0).intoArray(sr, 0);
}
@Run(test = "testShortVectorNotEqualToZero")
public static void testShortVectorNotEqualToZero_runner() {
testShortVectorNotEqualToZero();
assertArrayEquals(sa, sr, (a) -> (a != (short) 0 ? true : false));
}
@Test
@IR(counts = { IRNode.VMASK_CMP_ZERO_I_NEON, ">= 1" })
public static void testIntVectorGreaterEqualToZero() {
IntVector av = IntVector.fromArray(I_SPECIES, ia, 0);
av.compare(VectorOperators.GE, 0).intoArray(ir, 0);
}
@Run(test = "testIntVectorGreaterEqualToZero")
public static void testIntVectorGreaterEqualToZero_runner() {
testIntVectorGreaterEqualToZero();
assertArrayEquals(ia, ir, (a) -> (a >= 0 ? true : false));
}
@Test
@IR(counts = { IRNode.VMASK_CMP_ZERO_L_NEON, ">= 1" })
public static void testLongVectorGreaterThanZero() {
LongVector av = LongVector.fromArray(L_SPECIES, la, 0);
av.compare(VectorOperators.GT, 0).intoArray(lr, 0);
}
@Run(test = "testLongVectorGreaterThanZero")
public static void testLongVectorGreaterThanZero_runner() {
testLongVectorGreaterThanZero();
assertArrayEquals(la, lr, (a) -> (a > 0 ? true : false));
}
@Test
@IR(counts = { IRNode.VMASK_CMP_ZERO_F_NEON, ">= 1" })
public static void testFloatVectorLessEqualToZero() {
FloatVector av = FloatVector.fromArray(F_SPECIES, fa, 0);
av.compare(VectorOperators.LE, 0).intoArray(fr, 0);
}
@Run(test = "testFloatVectorLessEqualToZero")
public static void testFloatVectorLessEqualToZero_runner() {
testFloatVectorLessEqualToZero();
assertArrayEquals(fa, fr, (a) -> (a <= 0.0F ? true : false));
}
@Test
@IR(counts = { IRNode.VMASK_CMP_ZERO_D_NEON, ">= 1" })
public static void testDoubleVectorLessThanZero() {
DoubleVector av = DoubleVector.fromArray(D_SPECIES, da, 0);
av.compare(VectorOperators.LT, 0).intoArray(dr, 0);
}
@Run(test = "testDoubleVectorLessThanZero")
public static void testDoubleVectorLessThanZero_runner() {
testDoubleVectorLessThanZero();
assertArrayEquals(da, dr, (a) -> (a < 0.0 ? true : false));
}
@Test
@IR(failOn = { IRNode.VMASK_CMP_ZERO_I_NEON })
public static void testIntVectorUnsignedCondition() {
IntVector av = IntVector.fromArray(I_SPECIES, ia, 0);
av.compare(VectorOperators.UNSIGNED_GT, 0).intoArray(ir, 0);
}
@Test
@IR(failOn = { IRNode.VMASK_CMP_ZERO_L_NEON })
public static void testLongVectorUnsignedCondition() {
LongVector av = LongVector.fromArray(L_SPECIES, la, 0);
av.compare(VectorOperators.UNSIGNED_GE, 0).intoArray(lr, 0);
}
public static void main(String[] args) {
TestFramework testFramework = new TestFramework();
testFramework.setDefaultWarmup(10000)
.addFlags("--add-modules=jdk.incubator.vector")
.addFlags("-XX:UseSVE=0")
.start();
}
}