8302906: AArch64: Add SVE backend support for vector unsigned comparison

Reviewed-by: aph, eliu
This commit is contained in:
changpeng1997 2023-03-16 04:16:04 +00:00 committed by Eric Liu
parent 2b81faeb35
commit 42dd9077a0
9 changed files with 202 additions and 170 deletions

View File

@ -1906,7 +1906,7 @@ instruct vminL_neon(vReg dst, vReg src1, vReg src2) %{
effect(TEMP_DEF dst);
format %{ "vminL_neon $dst, $src1, $src2\t# 2L" %}
ins_encode %{
__ cmgt($dst$$FloatRegister, __ T2D, $src1$$FloatRegister, $src2$$FloatRegister);
__ cm(Assembler::GT, $dst$$FloatRegister, __ T2D, $src1$$FloatRegister, $src2$$FloatRegister);
__ bsl($dst$$FloatRegister, __ T16B, $src2$$FloatRegister, $src1$$FloatRegister);
%}
ins_pipe(pipe_slow);
@ -1993,7 +1993,7 @@ instruct vmaxL_neon(vReg dst, vReg src1, vReg src2) %{
effect(TEMP_DEF dst);
format %{ "vmaxL_neon $dst, $src1, $src2\t# 2L" %}
ins_encode %{
__ cmgt($dst$$FloatRegister, __ T2D, $src1$$FloatRegister, $src2$$FloatRegister);
__ cm(Assembler::GT, $dst$$FloatRegister, __ T2D, $src1$$FloatRegister, $src2$$FloatRegister);
__ bsl($dst$$FloatRegister, __ T16B, $src1$$FloatRegister, $src2$$FloatRegister);
%}
ins_pipe(pipe_slow);
@ -5128,11 +5128,11 @@ instruct vmaskcmp_neon(vReg dst, vReg src1, vReg src2, immI cond) %{
match(Set dst (VectorMaskCmp (Binary src1 src2) cond));
format %{ "vmaskcmp_neon $dst, $src1, $src2, $cond" %}
ins_encode %{
Assembler::Condition condition = to_assembler_cond((BoolTest::mask)$cond$$constant);
BasicType bt = Matcher::vector_element_basic_type(this);
uint length_in_bytes = Matcher::vector_length_in_bytes(this);
__ neon_compare($dst$$FloatRegister, bt, $src1$$FloatRegister,
$src2$$FloatRegister, (int)($cond$$constant),
/* isQ */ length_in_bytes == 16);
$src2$$FloatRegister, condition, /* isQ */ length_in_bytes == 16);
%}
ins_pipe(pipe_slow);
%}
@ -5198,11 +5198,12 @@ instruct vmaskcmp_sve(pReg dst, vReg src1, vReg src2, immI cond, rFlagsReg cr) %
effect(KILL cr);
format %{ "vmaskcmp_sve $dst, $src1, $src2, $cond\t# KILL cr" %}
ins_encode %{
Assembler::Condition condition = to_assembler_cond((BoolTest::mask)$cond$$constant);
BasicType bt = Matcher::vector_element_basic_type(this);
uint length_in_bytes = Matcher::vector_length_in_bytes(this);
assert(length_in_bytes == MaxVectorSize, "invalid vector length");
__ sve_compare($dst$$PRegister, bt, ptrue, $src1$$FloatRegister,
$src2$$FloatRegister, (int)($cond$$constant));
$src2$$FloatRegister, condition);
%}
ins_pipe(pipe_slow);
%}
@ -5214,9 +5215,10 @@ instruct vmaskcmp_masked(pReg dst, vReg src1, vReg src2, immI cond,
effect(KILL cr);
format %{ "vmaskcmp_masked $dst, $pg, $src1, $src2, $cond\t# KILL cr" %}
ins_encode %{
Assembler::Condition condition = to_assembler_cond((BoolTest::mask)$cond$$constant);
BasicType bt = Matcher::vector_element_basic_type(this);
__ sve_compare($dst$$PRegister, bt, $pg$$PRegister, $src1$$FloatRegister,
$src2$$FloatRegister, (int)($cond$$constant));
$src2$$FloatRegister, condition);
%}
ins_pipe(pipe_slow);
%}
@ -5876,12 +5878,12 @@ instruct vcmove_neon(vReg dst, vReg src1, vReg src2, immI cond, cmpOp copnd) %{
effect(TEMP_DEF dst);
format %{ "vcmove_neon.$copnd $dst, $src1, $src2\t# vector conditional move fp" %}
ins_encode %{
Assembler::Condition condition = to_assembler_cond((BoolTest::mask)$cond$$constant);
BasicType bt = Matcher::vector_element_basic_type(this);
uint length_in_bytes = Matcher::vector_length_in_bytes(this);
assert(length_in_bytes == 8 || length_in_bytes == 16, "must be");
__ neon_compare($dst$$FloatRegister, bt, $src1$$FloatRegister,
$src2$$FloatRegister, (int)($cond$$constant),
/* isQ */ length_in_bytes == 16);
$src2$$FloatRegister, condition, /* isQ */ length_in_bytes == 16);
__ bsl($dst$$FloatRegister, length_in_bytes == 16 ? __ T16B : __ T8B,
$src2$$FloatRegister, $src1$$FloatRegister);
%}
@ -5897,10 +5899,11 @@ instruct vcmove_sve(vReg dst, vReg src1, vReg src2, immI cond, cmpOp copnd, pReg
format %{ "vcmove_sve.$copnd $dst, $src1, $src2\t# vector conditional move fp. KILL $pgtmp" %}
ins_encode %{
assert(UseSVE > 0, "must be sve");
Assembler::Condition condition = to_assembler_cond((BoolTest::mask)$cond$$constant);
BasicType bt = Matcher::vector_element_basic_type(this);
uint length_in_bytes = Matcher::vector_length_in_bytes(this);
__ sve_compare($pgtmp$$PRegister, bt, ptrue, $src1$$FloatRegister,
$src2$$FloatRegister, (int)($cond$$constant));
$src2$$FloatRegister, condition);
__ sve_sel($dst$$FloatRegister, __ elemType_to_regVariant(bt),
$pgtmp$$PRegister, $src2$$FloatRegister, $src1$$FloatRegister);
%}

View File

@ -1001,7 +1001,7 @@ instruct v$1L_neon(vReg dst, vReg src1, vReg src2) %{
effect(TEMP_DEF dst);
format %{ "v$1L_neon $dst, $src1, $src2\t# 2L" %}
ins_encode %{
__ cmgt($dst$$FloatRegister, __ T2D, $src1$$FloatRegister, $src2$$FloatRegister);
__ cm(Assembler::GT, $dst$$FloatRegister, __ T2D, $src1$$FloatRegister, $src2$$FloatRegister);
__ bsl($dst$$FloatRegister, __ T16B, ifelse(min, $1, $src2, $src1)$$FloatRegister, ifelse(min, $1, $src1, $src2)$$FloatRegister);
%}
ins_pipe(pipe_slow);
@ -3544,11 +3544,11 @@ instruct vmaskcmp_neon(vReg dst, vReg src1, vReg src2, immI cond) %{
match(Set dst (VectorMaskCmp (Binary src1 src2) cond));
format %{ "vmaskcmp_neon $dst, $src1, $src2, $cond" %}
ins_encode %{
Assembler::Condition condition = to_assembler_cond((BoolTest::mask)$cond$$constant);
BasicType bt = Matcher::vector_element_basic_type(this);
uint length_in_bytes = Matcher::vector_length_in_bytes(this);
__ neon_compare($dst$$FloatRegister, bt, $src1$$FloatRegister,
$src2$$FloatRegister, (int)($cond$$constant),
/* isQ */ length_in_bytes == 16);
$src2$$FloatRegister, condition, /* isQ */ length_in_bytes == 16);
%}
ins_pipe(pipe_slow);
%}
@ -3595,11 +3595,12 @@ instruct vmaskcmp_sve(pReg dst, vReg src1, vReg src2, immI cond, rFlagsReg cr) %
effect(KILL cr);
format %{ "vmaskcmp_sve $dst, $src1, $src2, $cond\t# KILL cr" %}
ins_encode %{
Assembler::Condition condition = to_assembler_cond((BoolTest::mask)$cond$$constant);
BasicType bt = Matcher::vector_element_basic_type(this);
uint length_in_bytes = Matcher::vector_length_in_bytes(this);
assert(length_in_bytes == MaxVectorSize, "invalid vector length");
__ sve_compare($dst$$PRegister, bt, ptrue, $src1$$FloatRegister,
$src2$$FloatRegister, (int)($cond$$constant));
$src2$$FloatRegister, condition);
%}
ins_pipe(pipe_slow);
%}
@ -3611,9 +3612,10 @@ instruct vmaskcmp_masked(pReg dst, vReg src1, vReg src2, immI cond,
effect(KILL cr);
format %{ "vmaskcmp_masked $dst, $pg, $src1, $src2, $cond\t# KILL cr" %}
ins_encode %{
Assembler::Condition condition = to_assembler_cond((BoolTest::mask)$cond$$constant);
BasicType bt = Matcher::vector_element_basic_type(this);
__ sve_compare($dst$$PRegister, bt, $pg$$PRegister, $src1$$FloatRegister,
$src2$$FloatRegister, (int)($cond$$constant));
$src2$$FloatRegister, condition);
%}
ins_pipe(pipe_slow);
%}
@ -4229,12 +4231,12 @@ instruct vcmove_neon(vReg dst, vReg src1, vReg src2, immI cond, cmpOp copnd) %{
effect(TEMP_DEF dst);
format %{ "vcmove_neon.$copnd $dst, $src1, $src2\t# vector conditional move fp" %}
ins_encode %{
Assembler::Condition condition = to_assembler_cond((BoolTest::mask)$cond$$constant);
BasicType bt = Matcher::vector_element_basic_type(this);
uint length_in_bytes = Matcher::vector_length_in_bytes(this);
assert(length_in_bytes == 8 || length_in_bytes == 16, "must be");
__ neon_compare($dst$$FloatRegister, bt, $src1$$FloatRegister,
$src2$$FloatRegister, (int)($cond$$constant),
/* isQ */ length_in_bytes == 16);
$src2$$FloatRegister, condition, /* isQ */ length_in_bytes == 16);
__ bsl($dst$$FloatRegister, length_in_bytes == 16 ? __ T16B : __ T8B,
$src2$$FloatRegister, $src1$$FloatRegister);
%}
@ -4250,10 +4252,11 @@ instruct vcmove_sve(vReg dst, vReg src1, vReg src2, immI cond, cmpOp copnd, pReg
format %{ "vcmove_sve.$copnd $dst, $src1, $src2\t# vector conditional move fp. KILL $pgtmp" %}
ins_encode %{
assert(UseSVE > 0, "must be sve");
Assembler::Condition condition = to_assembler_cond((BoolTest::mask)$cond$$constant);
BasicType bt = Matcher::vector_element_basic_type(this);
uint length_in_bytes = Matcher::vector_length_in_bytes(this);
__ sve_compare($pgtmp$$PRegister, bt, ptrue, $src1$$FloatRegister,
$src2$$FloatRegister, (int)($cond$$constant));
$src2$$FloatRegister, condition);
__ sve_sel($dst$$FloatRegister, __ elemType_to_regVariant(bt),
$pgtmp$$PRegister, $src2$$FloatRegister, $src1$$FloatRegister);
%}

View File

@ -2620,11 +2620,6 @@ template<typename R, typename... Rx>
INSN(minv, 0, 0b011011, false); // accepted arrangements: T8B, T16B, T4H, T8H, T2S, T4S
INSN(smaxp, 0, 0b101001, false); // accepted arrangements: T8B, T16B, T4H, T8H, T2S, T4S
INSN(sminp, 0, 0b101011, false); // accepted arrangements: T8B, T16B, T4H, T8H, T2S, T4S
INSN(cmeq, 1, 0b100011, true); // accepted arrangements: T8B, T16B, T4H, T8H, T2S, T4S, T2D
INSN(cmgt, 0, 0b001101, true); // accepted arrangements: T8B, T16B, T4H, T8H, T2S, T4S, T2D
INSN(cmge, 0, 0b001111, true); // accepted arrangements: T8B, T16B, T4H, T8H, T2S, T4S, T2D
INSN(cmhi, 1, 0b001101, true); // accepted arrangements: T8B, T16B, T4H, T8H, T2S, T4S, T2D
INSN(cmhs, 1, 0b001111, true); // accepted arrangements: T8B, T16B, T4H, T8H, T2S, T4S, T2D
#undef INSN
@ -2730,13 +2725,50 @@ template<typename R, typename... Rx>
INSN(fmls, 0, 1, 0b110011);
INSN(fmax, 0, 0, 0b111101);
INSN(fmin, 0, 1, 0b111101);
INSN(fcmeq, 0, 0, 0b111001);
INSN(fcmgt, 1, 1, 0b111001);
INSN(fcmge, 1, 0, 0b111001);
INSN(facgt, 1, 1, 0b111011);
#undef INSN
// AdvSIMD vector compare
void cm(Condition cond, FloatRegister Vd, SIMD_Arrangement T, FloatRegister Vn, FloatRegister Vm) {
starti;
assert(T != T1Q && T != T1D, "incorrect arrangement");
int cond_op;
switch (cond) {
case EQ: cond_op = 0b110001; break;
case GT: cond_op = 0b000110; break;
case GE: cond_op = 0b000111; break;
case HI: cond_op = 0b100110; break;
case HS: cond_op = 0b100111; break;
default:
ShouldNotReachHere();
break;
}
f(0, 31), f((int)T & 1, 30), f((cond_op >> 5) & 1, 29);
f(0b01110, 28, 24), f((int)T >> 1, 23, 22), f(1, 21), rf(Vm, 16);
f(cond_op & 0b11111, 15, 11), f(1, 10), rf(Vn, 5), rf(Vd, 0);
}
// AdvSIMD Floating-point vector compare
void fcm(Condition cond, FloatRegister Vd, SIMD_Arrangement T, FloatRegister Vn, FloatRegister Vm) {
starti;
assert(T == T2S || T == T4S || T == T2D, "invalid arrangement");
int cond_op;
switch (cond) {
case EQ: cond_op = 0b00; break;
case GT: cond_op = 0b11; break;
case GE: cond_op = 0b10; break;
default:
ShouldNotReachHere();
break;
}
f(0, 31), f((int)T & 1, 30), f((cond_op >> 1) & 1, 29);
f(0b01110, 28, 24), f(cond_op & 1, 23), f(T == T2D ? 1 : 0, 22);
f(1, 21), rf(Vm, 16), f(0b111001, 15, 10), rf(Vn, 5), rf(Vd, 0);
}
#define INSN(NAME, opc) \
void NAME(FloatRegister Vd, SIMD_Arrangement T, FloatRegister Vn, FloatRegister Vm) { \
starti; \

View File

@ -918,44 +918,28 @@ void C2_MacroAssembler::string_compare(Register str1, Register str2,
}
void C2_MacroAssembler::neon_compare(FloatRegister dst, BasicType bt, FloatRegister src1,
FloatRegister src2, int cond, bool isQ) {
FloatRegister src2, Condition cond, bool isQ) {
SIMD_Arrangement size = esize2arrangement((unsigned)type2aelembytes(bt), isQ);
if (bt == T_FLOAT || bt == T_DOUBLE) {
switch (cond) {
case BoolTest::eq: fcmeq(dst, size, src1, src2); break;
case BoolTest::ne: {
fcmeq(dst, size, src1, src2);
notr(dst, isQ ? T16B : T8B, dst);
break;
}
case BoolTest::ge: fcmge(dst, size, src1, src2); break;
case BoolTest::gt: fcmgt(dst, size, src1, src2); break;
case BoolTest::le: fcmge(dst, size, src2, src1); break;
case BoolTest::lt: fcmgt(dst, size, src2, src1); break;
default:
assert(false, "unsupported");
ShouldNotReachHere();
}
FloatRegister zn = src1, zm = src2;
bool needs_negation = false;
switch (cond) {
case LT: cond = GT; zn = src2; zm = src1; break;
case LE: cond = GE; zn = src2; zm = src1; break;
case LO: cond = HI; zn = src2; zm = src1; break;
case LS: cond = HS; zn = src2; zm = src1; break;
case NE: cond = EQ; needs_negation = true; break;
default:
break;
}
if (is_floating_point_type(bt)) {
fcm(cond, dst, size, zn, zm);
} else {
switch (cond) {
case BoolTest::eq: cmeq(dst, size, src1, src2); break;
case BoolTest::ne: {
cmeq(dst, size, src1, src2);
notr(dst, isQ ? T16B : T8B, dst);
break;
}
case BoolTest::ge: cmge(dst, size, src1, src2); break;
case BoolTest::gt: cmgt(dst, size, src1, src2); break;
case BoolTest::le: cmge(dst, size, src2, src1); break;
case BoolTest::lt: cmgt(dst, size, src2, src1); break;
case BoolTest::uge: cmhs(dst, size, src1, src2); break;
case BoolTest::ugt: cmhi(dst, size, src1, src2); break;
case BoolTest::ult: cmhi(dst, size, src2, src1); break;
case BoolTest::ule: cmhs(dst, size, src2, src1); break;
default:
assert(false, "unsupported");
ShouldNotReachHere();
}
cm(cond, dst, size, zn, zm);
}
if (needs_negation) {
notr(dst, isQ ? T16B : T8B, dst);
}
}
@ -1125,29 +1109,24 @@ void C2_MacroAssembler::sve_vmask_fromlong(PRegister dst, Register src, BasicTyp
// Clobbers: rflags
void C2_MacroAssembler::sve_compare(PRegister pd, BasicType bt, PRegister pg,
FloatRegister zn, FloatRegister zm, int cond) {
FloatRegister zn, FloatRegister zm, Condition cond) {
assert(pg->is_governing(), "This register has to be a governing predicate register");
FloatRegister z1 = zn, z2 = zm;
// Convert the original BoolTest condition to Assembler::condition.
Condition condition;
switch (cond) {
case BoolTest::eq: condition = Assembler::EQ; break;
case BoolTest::ne: condition = Assembler::NE; break;
case BoolTest::le: z1 = zm; z2 = zn; condition = Assembler::GE; break;
case BoolTest::ge: condition = Assembler::GE; break;
case BoolTest::lt: z1 = zm; z2 = zn; condition = Assembler::GT; break;
case BoolTest::gt: condition = Assembler::GT; break;
case LE: z1 = zm; z2 = zn; cond = GE; break;
case LT: z1 = zm; z2 = zn; cond = GT; break;
case LO: z1 = zm; z2 = zn; cond = HI; break;
case LS: z1 = zm; z2 = zn; cond = HS; break;
default:
assert(false, "unsupported compare condition");
ShouldNotReachHere();
break;
}
SIMD_RegVariant size = elemType_to_regVariant(bt);
if (bt == T_FLOAT || bt == T_DOUBLE) {
sve_fcm(condition, pd, size, pg, z1, z2);
if (is_floating_point_type(bt)) {
sve_fcm(cond, pd, size, pg, z1, z2);
} else {
assert(is_integral_type(bt), "unsupported element type");
sve_cmp(condition, pd, size, pg, z1, z2);
sve_cmp(cond, pd, size, pg, z1, z2);
}
}
@ -1979,7 +1958,7 @@ void C2_MacroAssembler::vector_round_neon(FloatRegister dst, FloatRegister src,
fneg(tmp3, T, src);
dup(tmp2, T, rscratch1);
cmhs(tmp3, T, tmp3, tmp2);
cm(HS, tmp3, T, tmp3, tmp2);
// tmp3 is now a set of flags
bif(dst, T16B, tmp1, tmp3);

View File

@ -77,13 +77,13 @@
// SIMD&FP comparison
void neon_compare(FloatRegister dst, BasicType bt, FloatRegister src1,
FloatRegister src2, int cond, bool isQ);
FloatRegister src2, Condition cond, bool isQ);
void neon_compare_zero(FloatRegister dst, BasicType bt, FloatRegister src,
Condition cond, bool isQ);
void sve_compare(PRegister pd, BasicType bt, PRegister pg,
FloatRegister zn, FloatRegister zm, int cond);
FloatRegister zn, FloatRegister zm, Condition cond);
void sve_vmask_lasttrue(Register dst, BasicType bt, PRegister src, PRegister ptmp);

View File

@ -1,5 +1,5 @@
/*
* Copyright (c) 2021, 2022, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2021, 2023, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@ -145,8 +145,7 @@
// Does the CPU supports vector unsigned comparison instructions?
static const bool supports_vector_comparison_unsigned(int vlen, BasicType bt) {
// Not supported on SVE yet.
return !UseSVE;
return true;
}
// Some microarchitectures have mask registers used on vectors

View File

@ -6412,10 +6412,10 @@ class StubGenerator: public StubCodeGenerator {
__ orr(decL3, arrangement, decL3, decH3);
// check illegal inputs, value larger than 63 (maximum of 6 bits)
__ cmhi(decH0, arrangement, decL0, v27);
__ cmhi(decH1, arrangement, decL1, v27);
__ cmhi(decH2, arrangement, decL2, v27);
__ cmhi(decH3, arrangement, decL3, v27);
__ cm(Assembler::HI, decH0, arrangement, decL0, v27);
__ cm(Assembler::HI, decH1, arrangement, decL1, v27);
__ cm(Assembler::HI, decH2, arrangement, decL2, v27);
__ cm(Assembler::HI, decH3, arrangement, decL3, v27);
__ orr(in0, arrangement, decH0, decH1);
__ orr(in1, arrangement, decH2, decH3);
__ orr(in2, arrangement, in0, in1);

View File

@ -1385,6 +1385,30 @@ class NEONFloatCompareWithZero(TwoRegNEONOp):
self._firstSIMDreg.nextReg(),
self.arrangement))
class NEONVectorCompare(ThreeRegNEONOp):
def __init__(self, args):
self._name, self.arrangement, self.condition = args
self.insname = self._name + (self.condition).lower()
def cstr(self):
return ("%s(%s, %s, %s, %s, %s);"
% ("__ " + self._name,
"Assembler::" + self.condition,
self._firstSIMDreg,
"__ T" + self.arrangement,
self._firstSIMDreg.nextReg(),
self._firstSIMDreg.nextReg().nextReg()))
def astr(self):
return ("%s\t%s.%s, %s.%s, %s.%s"
% (self.insname,
self._firstSIMDreg,
self.arrangement,
self._firstSIMDreg.nextReg(),
self.arrangement,
self._firstSIMDreg.nextReg().nextReg(),
self.arrangement))
class SpecialCases(Instruction):
def __init__(self, data):
self._name = data[0]
@ -1693,36 +1717,26 @@ generate(ThreeRegNEONOp,
["sminp", "sminp", "2S"], ["sminp", "sminp", "4S"],
["fmin", "fmin", "2S"], ["fmin", "fmin", "4S"],
["fmin", "fmin", "2D"],
["cmeq", "cmeq", "8B"], ["cmeq", "cmeq", "16B"],
["cmeq", "cmeq", "4H"], ["cmeq", "cmeq", "8H"],
["cmeq", "cmeq", "2S"], ["cmeq", "cmeq", "4S"],
["cmeq", "cmeq", "2D"],
["fcmeq", "fcmeq", "2S"], ["fcmeq", "fcmeq", "4S"],
["fcmeq", "fcmeq", "2D"],
["cmgt", "cmgt", "8B"], ["cmgt", "cmgt", "16B"],
["cmgt", "cmgt", "4H"], ["cmgt", "cmgt", "8H"],
["cmgt", "cmgt", "2S"], ["cmgt", "cmgt", "4S"],
["cmgt", "cmgt", "2D"],
["cmhi", "cmhi", "8B"], ["cmhi", "cmhi", "16B"],
["cmhi", "cmhi", "4H"], ["cmhi", "cmhi", "8H"],
["cmhi", "cmhi", "2S"], ["cmhi", "cmhi", "4S"],
["cmhi", "cmhi", "2D"],
["cmhs", "cmhs", "8B"], ["cmhs", "cmhs", "16B"],
["cmhs", "cmhs", "4H"], ["cmhs", "cmhs", "8H"],
["cmhs", "cmhs", "2S"], ["cmhs", "cmhs", "4S"],
["cmhs", "cmhs", "2D"],
["fcmgt", "fcmgt", "2S"], ["fcmgt", "fcmgt", "4S"],
["fcmgt", "fcmgt", "2D"],
["cmge", "cmge", "8B"], ["cmge", "cmge", "16B"],
["cmge", "cmge", "4H"], ["cmge", "cmge", "8H"],
["cmge", "cmge", "2S"], ["cmge", "cmge", "4S"],
["cmge", "cmge", "2D"],
["fcmge", "fcmge", "2S"], ["fcmge", "fcmge", "4S"],
["fcmge", "fcmge", "2D"],
["facgt", "facgt", "2S"], ["facgt", "facgt", "4S"],
["facgt", "facgt", "2D"],
])
neonVectorCompareInstructionPrefix = ['cm', 'fcm']
neonIntegerVectorCompareConditions = ['GT', 'GE', 'EQ', 'HI', 'HS']
neonFloatVectorCompareConditions = ['EQ', 'GT', 'GE']
neonIntegerArrangement = ['8B', '16B', '4H', '8H', '2S', '4S', '2D']
neonFloatArrangement = ['2S', '4S', '2D']
neonVectorCompareArgs = []
for pre in neonVectorCompareInstructionPrefix:
conditions = neonFloatVectorCompareConditions if pre == 'fcm' else neonIntegerVectorCompareConditions
arrangements = neonFloatArrangement if pre == 'fcm' else neonIntegerArrangement
for condition in conditions:
for currentArrangement in arrangements:
currentArgs = [pre, currentArrangement, condition]
neonVectorCompareArgs.append(currentArgs)
generate(NEONVectorCompare, neonVectorCompareArgs)
generate(SVEComparisonWithZero, ["EQ", "GT", "GE", "LT", "LE", "NE"])
generate(SpecialCases, [["ccmn", "__ ccmn(zr, zr, 3u, Assembler::LE);", "ccmn\txzr, xzr, #3, LE"],

View File

@ -748,53 +748,55 @@
__ fmin(v23, __ T2S, v24, v25); // fmin v23.2S, v24.2S, v25.2S
__ fmin(v25, __ T4S, v26, v27); // fmin v25.4S, v26.4S, v27.4S
__ fmin(v15, __ T2D, v16, v17); // fmin v15.2D, v16.2D, v17.2D
__ cmeq(v29, __ T8B, v30, v31); // cmeq v29.8B, v30.8B, v31.8B
__ cmeq(v3, __ T16B, v4, v5); // cmeq v3.16B, v4.16B, v5.16B
__ cmeq(v10, __ T4H, v11, v12); // cmeq v10.4H, v11.4H, v12.4H
__ cmeq(v22, __ T8H, v23, v24); // cmeq v22.8H, v23.8H, v24.8H
__ cmeq(v10, __ T2S, v11, v12); // cmeq v10.2S, v11.2S, v12.2S
__ cmeq(v4, __ T4S, v5, v6); // cmeq v4.4S, v5.4S, v6.4S
__ cmeq(v17, __ T2D, v18, v19); // cmeq v17.2D, v18.2D, v19.2D
__ fcmeq(v1, __ T2S, v2, v3); // fcmeq v1.2S, v2.2S, v3.2S
__ fcmeq(v11, __ T4S, v12, v13); // fcmeq v11.4S, v12.4S, v13.4S
__ fcmeq(v7, __ T2D, v8, v9); // fcmeq v7.2D, v8.2D, v9.2D
__ cmgt(v10, __ T8B, v11, v12); // cmgt v10.8B, v11.8B, v12.8B
__ cmgt(v15, __ T16B, v16, v17); // cmgt v15.16B, v16.16B, v17.16B
__ cmgt(v16, __ T4H, v17, v18); // cmgt v16.4H, v17.4H, v18.4H
__ cmgt(v2, __ T8H, v3, v4); // cmgt v2.8H, v3.8H, v4.8H
__ cmgt(v9, __ T2S, v10, v11); // cmgt v9.2S, v10.2S, v11.2S
__ cmgt(v11, __ T4S, v12, v13); // cmgt v11.4S, v12.4S, v13.4S
__ cmgt(v12, __ T2D, v13, v14); // cmgt v12.2D, v13.2D, v14.2D
__ cmhi(v14, __ T8B, v15, v16); // cmhi v14.8B, v15.8B, v16.8B
__ cmhi(v13, __ T16B, v14, v15); // cmhi v13.16B, v14.16B, v15.16B
__ cmhi(v2, __ T4H, v3, v4); // cmhi v2.4H, v3.4H, v4.4H
__ cmhi(v6, __ T8H, v7, v8); // cmhi v6.8H, v7.8H, v8.8H
__ cmhi(v19, __ T2S, v20, v21); // cmhi v19.2S, v20.2S, v21.2S
__ cmhi(v25, __ T4S, v26, v27); // cmhi v25.4S, v26.4S, v27.4S
__ cmhi(v15, __ T2D, v16, v17); // cmhi v15.2D, v16.2D, v17.2D
__ cmhs(v4, __ T8B, v5, v6); // cmhs v4.8B, v5.8B, v6.8B
__ cmhs(v2, __ T16B, v3, v4); // cmhs v2.16B, v3.16B, v4.16B
__ cmhs(v4, __ T4H, v5, v6); // cmhs v4.4H, v5.4H, v6.4H
__ cmhs(v11, __ T8H, v12, v13); // cmhs v11.8H, v12.8H, v13.8H
__ cmhs(v17, __ T2S, v18, v19); // cmhs v17.2S, v18.2S, v19.2S
__ cmhs(v20, __ T4S, v21, v22); // cmhs v20.4S, v21.4S, v22.4S
__ cmhs(v16, __ T2D, v17, v18); // cmhs v16.2D, v17.2D, v18.2D
__ fcmgt(v17, __ T2S, v18, v19); // fcmgt v17.2S, v18.2S, v19.2S
__ fcmgt(v10, __ T4S, v11, v12); // fcmgt v10.4S, v11.4S, v12.4S
__ fcmgt(v20, __ T2D, v21, v22); // fcmgt v20.2D, v21.2D, v22.2D
__ cmge(v22, __ T8B, v23, v24); // cmge v22.8B, v23.8B, v24.8B
__ cmge(v12, __ T16B, v13, v14); // cmge v12.16B, v13.16B, v14.16B
__ cmge(v25, __ T4H, v26, v27); // cmge v25.4H, v26.4H, v27.4H
__ cmge(v23, __ T8H, v24, v25); // cmge v23.8H, v24.8H, v25.8H
__ cmge(v28, __ T2S, v29, v30); // cmge v28.2S, v29.2S, v30.2S
__ cmge(v14, __ T4S, v15, v16); // cmge v14.4S, v15.4S, v16.4S
__ cmge(v10, __ T2D, v11, v12); // cmge v10.2D, v11.2D, v12.2D
__ fcmge(v24, __ T2S, v25, v26); // fcmge v24.2S, v25.2S, v26.2S
__ fcmge(v1, __ T4S, v2, v3); // fcmge v1.4S, v2.4S, v3.4S
__ fcmge(v11, __ T2D, v12, v13); // fcmge v11.2D, v12.2D, v13.2D
__ facgt(v30, __ T2S, v31, v0); // facgt v30.2S, v31.2S, v0.2S
__ facgt(v10, __ T4S, v11, v12); // facgt v10.4S, v11.4S, v12.4S
__ facgt(v15, __ T2D, v16, v17); // facgt v15.2D, v16.2D, v17.2D
__ facgt(v29, __ T2S, v30, v31); // facgt v29.2S, v30.2S, v31.2S
__ facgt(v3, __ T4S, v4, v5); // facgt v3.4S, v4.4S, v5.4S
__ facgt(v10, __ T2D, v11, v12); // facgt v10.2D, v11.2D, v12.2D
// NEONVectorCompare
__ cm(Assembler::GT, v22, __ T8B, v23, v24); // cmgt v22.8B, v23.8B, v24.8B
__ cm(Assembler::GT, v10, __ T16B, v11, v12); // cmgt v10.16B, v11.16B, v12.16B
__ cm(Assembler::GT, v4, __ T4H, v5, v6); // cmgt v4.4H, v5.4H, v6.4H
__ cm(Assembler::GT, v17, __ T8H, v18, v19); // cmgt v17.8H, v18.8H, v19.8H
__ cm(Assembler::GT, v1, __ T2S, v2, v3); // cmgt v1.2S, v2.2S, v3.2S
__ cm(Assembler::GT, v11, __ T4S, v12, v13); // cmgt v11.4S, v12.4S, v13.4S
__ cm(Assembler::GT, v7, __ T2D, v8, v9); // cmgt v7.2D, v8.2D, v9.2D
__ cm(Assembler::GE, v10, __ T8B, v11, v12); // cmge v10.8B, v11.8B, v12.8B
__ cm(Assembler::GE, v15, __ T16B, v16, v17); // cmge v15.16B, v16.16B, v17.16B
__ cm(Assembler::GE, v16, __ T4H, v17, v18); // cmge v16.4H, v17.4H, v18.4H
__ cm(Assembler::GE, v2, __ T8H, v3, v4); // cmge v2.8H, v3.8H, v4.8H
__ cm(Assembler::GE, v9, __ T2S, v10, v11); // cmge v9.2S, v10.2S, v11.2S
__ cm(Assembler::GE, v11, __ T4S, v12, v13); // cmge v11.4S, v12.4S, v13.4S
__ cm(Assembler::GE, v12, __ T2D, v13, v14); // cmge v12.2D, v13.2D, v14.2D
__ cm(Assembler::EQ, v14, __ T8B, v15, v16); // cmeq v14.8B, v15.8B, v16.8B
__ cm(Assembler::EQ, v13, __ T16B, v14, v15); // cmeq v13.16B, v14.16B, v15.16B
__ cm(Assembler::EQ, v2, __ T4H, v3, v4); // cmeq v2.4H, v3.4H, v4.4H
__ cm(Assembler::EQ, v6, __ T8H, v7, v8); // cmeq v6.8H, v7.8H, v8.8H
__ cm(Assembler::EQ, v19, __ T2S, v20, v21); // cmeq v19.2S, v20.2S, v21.2S
__ cm(Assembler::EQ, v25, __ T4S, v26, v27); // cmeq v25.4S, v26.4S, v27.4S
__ cm(Assembler::EQ, v15, __ T2D, v16, v17); // cmeq v15.2D, v16.2D, v17.2D
__ cm(Assembler::HI, v4, __ T8B, v5, v6); // cmhi v4.8B, v5.8B, v6.8B
__ cm(Assembler::HI, v2, __ T16B, v3, v4); // cmhi v2.16B, v3.16B, v4.16B
__ cm(Assembler::HI, v4, __ T4H, v5, v6); // cmhi v4.4H, v5.4H, v6.4H
__ cm(Assembler::HI, v11, __ T8H, v12, v13); // cmhi v11.8H, v12.8H, v13.8H
__ cm(Assembler::HI, v17, __ T2S, v18, v19); // cmhi v17.2S, v18.2S, v19.2S
__ cm(Assembler::HI, v20, __ T4S, v21, v22); // cmhi v20.4S, v21.4S, v22.4S
__ cm(Assembler::HI, v16, __ T2D, v17, v18); // cmhi v16.2D, v17.2D, v18.2D
__ cm(Assembler::HS, v17, __ T8B, v18, v19); // cmhs v17.8B, v18.8B, v19.8B
__ cm(Assembler::HS, v10, __ T16B, v11, v12); // cmhs v10.16B, v11.16B, v12.16B
__ cm(Assembler::HS, v20, __ T4H, v21, v22); // cmhs v20.4H, v21.4H, v22.4H
__ cm(Assembler::HS, v22, __ T8H, v23, v24); // cmhs v22.8H, v23.8H, v24.8H
__ cm(Assembler::HS, v12, __ T2S, v13, v14); // cmhs v12.2S, v13.2S, v14.2S
__ cm(Assembler::HS, v25, __ T4S, v26, v27); // cmhs v25.4S, v26.4S, v27.4S
__ cm(Assembler::HS, v23, __ T2D, v24, v25); // cmhs v23.2D, v24.2D, v25.2D
__ fcm(Assembler::EQ, v28, __ T2S, v29, v30); // fcmeq v28.2S, v29.2S, v30.2S
__ fcm(Assembler::EQ, v14, __ T4S, v15, v16); // fcmeq v14.4S, v15.4S, v16.4S
__ fcm(Assembler::EQ, v10, __ T2D, v11, v12); // fcmeq v10.2D, v11.2D, v12.2D
__ fcm(Assembler::GT, v24, __ T2S, v25, v26); // fcmgt v24.2S, v25.2S, v26.2S
__ fcm(Assembler::GT, v1, __ T4S, v2, v3); // fcmgt v1.4S, v2.4S, v3.4S
__ fcm(Assembler::GT, v11, __ T2D, v12, v13); // fcmgt v11.2D, v12.2D, v13.2D
__ fcm(Assembler::GE, v30, __ T2S, v31, v0); // fcmge v30.2S, v31.2S, v0.2S
__ fcm(Assembler::GE, v10, __ T4S, v11, v12); // fcmge v10.4S, v11.4S, v12.4S
__ fcm(Assembler::GE, v15, __ T2D, v16, v17); // fcmge v15.2D, v16.2D, v17.2D
// SVEComparisonWithZero
__ sve_fcm(Assembler::EQ, p3, __ S, p3, z2, 0.0); // fcmeq p3.s, p3/z, z2.s, #0.0
@ -1429,18 +1431,18 @@
0x4e696d07, 0x0eae6dac, 0x4ea26c20, 0x0e35ae93,
0x4e23ac41, 0x0e79af17, 0x4e64ac62, 0x0ea2ac20,
0x4eaaad28, 0x0eb9f717, 0x4ebbf759, 0x4ef1f60f,
0x2e3f8fdd, 0x6e258c83, 0x2e6c8d6a, 0x6e788ef6,
0x2eac8d6a, 0x6ea68ca4, 0x6ef38e51, 0x0e23e441,
0x4e2de58b, 0x4e69e507, 0x0e2c356a, 0x4e31360f,
0x0e723630, 0x4e643462, 0x0eab3549, 0x4ead358b,
0x4eee35ac, 0x2e3035ee, 0x6e2f35cd, 0x2e643462,
0x6e6834e6, 0x2eb53693, 0x6ebb3759, 0x6ef1360f,
0x2e263ca4, 0x6e243c62, 0x2e663ca4, 0x6e6d3d8b,
0x2eb33e51, 0x6eb63eb4, 0x6ef23e30, 0x2eb3e651,
0x6eace56a, 0x6ef6e6b4, 0x0e383ef6, 0x4e2e3dac,
0x0e7b3f59, 0x4e793f17, 0x0ebe3fbc, 0x4eb03dee,
0x4eec3d6a, 0x2e3ae738, 0x6e23e441, 0x6e6de58b,
0x2ea0effe, 0x6eaced6a, 0x6ef1ee0f, 0x65922c43,
0x2ebfefdd, 0x6ea5ec83, 0x6eeced6a, 0x0e3836f6,
0x4e2c356a, 0x0e6634a4, 0x4e733651, 0x0ea33441,
0x4ead358b, 0x4ee93507, 0x0e2c3d6a, 0x4e313e0f,
0x0e723e30, 0x4e643c62, 0x0eab3d49, 0x4ead3d8b,
0x4eee3dac, 0x2e308dee, 0x6e2f8dcd, 0x2e648c62,
0x6e688ce6, 0x2eb58e93, 0x6ebb8f59, 0x6ef18e0f,
0x2e2634a4, 0x6e243462, 0x2e6634a4, 0x6e6d358b,
0x2eb33651, 0x6eb636b4, 0x6ef23630, 0x2e333e51,
0x6e2c3d6a, 0x2e763eb4, 0x6e783ef6, 0x2eae3dac,
0x6ebb3f59, 0x6ef93f17, 0x0e3ee7bc, 0x4e30e5ee,
0x4e6ce56a, 0x2ebae738, 0x6ea3e441, 0x6eede58b,
0x2e20e7fe, 0x6e2ce56a, 0x6e71e60f, 0x65922c43,
0x65d02219, 0x65d02560, 0x65d13dc4, 0x65913690,
0x65d33b6b, 0xba5fd3e3, 0x3a5f03e5, 0xfa411be4,
0x7a42cbe2, 0x93df03ff, 0xc820ffff, 0x8822fc7f,