8302906: AArch64: Add SVE backend support for vector unsigned comparison
Reviewed-by: aph, eliu
This commit is contained in:
parent
2b81faeb35
commit
42dd9077a0
@ -1906,7 +1906,7 @@ instruct vminL_neon(vReg dst, vReg src1, vReg src2) %{
|
||||
effect(TEMP_DEF dst);
|
||||
format %{ "vminL_neon $dst, $src1, $src2\t# 2L" %}
|
||||
ins_encode %{
|
||||
__ cmgt($dst$$FloatRegister, __ T2D, $src1$$FloatRegister, $src2$$FloatRegister);
|
||||
__ cm(Assembler::GT, $dst$$FloatRegister, __ T2D, $src1$$FloatRegister, $src2$$FloatRegister);
|
||||
__ bsl($dst$$FloatRegister, __ T16B, $src2$$FloatRegister, $src1$$FloatRegister);
|
||||
%}
|
||||
ins_pipe(pipe_slow);
|
||||
@ -1993,7 +1993,7 @@ instruct vmaxL_neon(vReg dst, vReg src1, vReg src2) %{
|
||||
effect(TEMP_DEF dst);
|
||||
format %{ "vmaxL_neon $dst, $src1, $src2\t# 2L" %}
|
||||
ins_encode %{
|
||||
__ cmgt($dst$$FloatRegister, __ T2D, $src1$$FloatRegister, $src2$$FloatRegister);
|
||||
__ cm(Assembler::GT, $dst$$FloatRegister, __ T2D, $src1$$FloatRegister, $src2$$FloatRegister);
|
||||
__ bsl($dst$$FloatRegister, __ T16B, $src1$$FloatRegister, $src2$$FloatRegister);
|
||||
%}
|
||||
ins_pipe(pipe_slow);
|
||||
@ -5128,11 +5128,11 @@ instruct vmaskcmp_neon(vReg dst, vReg src1, vReg src2, immI cond) %{
|
||||
match(Set dst (VectorMaskCmp (Binary src1 src2) cond));
|
||||
format %{ "vmaskcmp_neon $dst, $src1, $src2, $cond" %}
|
||||
ins_encode %{
|
||||
Assembler::Condition condition = to_assembler_cond((BoolTest::mask)$cond$$constant);
|
||||
BasicType bt = Matcher::vector_element_basic_type(this);
|
||||
uint length_in_bytes = Matcher::vector_length_in_bytes(this);
|
||||
__ neon_compare($dst$$FloatRegister, bt, $src1$$FloatRegister,
|
||||
$src2$$FloatRegister, (int)($cond$$constant),
|
||||
/* isQ */ length_in_bytes == 16);
|
||||
$src2$$FloatRegister, condition, /* isQ */ length_in_bytes == 16);
|
||||
%}
|
||||
ins_pipe(pipe_slow);
|
||||
%}
|
||||
@ -5198,11 +5198,12 @@ instruct vmaskcmp_sve(pReg dst, vReg src1, vReg src2, immI cond, rFlagsReg cr) %
|
||||
effect(KILL cr);
|
||||
format %{ "vmaskcmp_sve $dst, $src1, $src2, $cond\t# KILL cr" %}
|
||||
ins_encode %{
|
||||
Assembler::Condition condition = to_assembler_cond((BoolTest::mask)$cond$$constant);
|
||||
BasicType bt = Matcher::vector_element_basic_type(this);
|
||||
uint length_in_bytes = Matcher::vector_length_in_bytes(this);
|
||||
assert(length_in_bytes == MaxVectorSize, "invalid vector length");
|
||||
__ sve_compare($dst$$PRegister, bt, ptrue, $src1$$FloatRegister,
|
||||
$src2$$FloatRegister, (int)($cond$$constant));
|
||||
$src2$$FloatRegister, condition);
|
||||
%}
|
||||
ins_pipe(pipe_slow);
|
||||
%}
|
||||
@ -5214,9 +5215,10 @@ instruct vmaskcmp_masked(pReg dst, vReg src1, vReg src2, immI cond,
|
||||
effect(KILL cr);
|
||||
format %{ "vmaskcmp_masked $dst, $pg, $src1, $src2, $cond\t# KILL cr" %}
|
||||
ins_encode %{
|
||||
Assembler::Condition condition = to_assembler_cond((BoolTest::mask)$cond$$constant);
|
||||
BasicType bt = Matcher::vector_element_basic_type(this);
|
||||
__ sve_compare($dst$$PRegister, bt, $pg$$PRegister, $src1$$FloatRegister,
|
||||
$src2$$FloatRegister, (int)($cond$$constant));
|
||||
$src2$$FloatRegister, condition);
|
||||
%}
|
||||
ins_pipe(pipe_slow);
|
||||
%}
|
||||
@ -5876,12 +5878,12 @@ instruct vcmove_neon(vReg dst, vReg src1, vReg src2, immI cond, cmpOp copnd) %{
|
||||
effect(TEMP_DEF dst);
|
||||
format %{ "vcmove_neon.$copnd $dst, $src1, $src2\t# vector conditional move fp" %}
|
||||
ins_encode %{
|
||||
Assembler::Condition condition = to_assembler_cond((BoolTest::mask)$cond$$constant);
|
||||
BasicType bt = Matcher::vector_element_basic_type(this);
|
||||
uint length_in_bytes = Matcher::vector_length_in_bytes(this);
|
||||
assert(length_in_bytes == 8 || length_in_bytes == 16, "must be");
|
||||
__ neon_compare($dst$$FloatRegister, bt, $src1$$FloatRegister,
|
||||
$src2$$FloatRegister, (int)($cond$$constant),
|
||||
/* isQ */ length_in_bytes == 16);
|
||||
$src2$$FloatRegister, condition, /* isQ */ length_in_bytes == 16);
|
||||
__ bsl($dst$$FloatRegister, length_in_bytes == 16 ? __ T16B : __ T8B,
|
||||
$src2$$FloatRegister, $src1$$FloatRegister);
|
||||
%}
|
||||
@ -5897,10 +5899,11 @@ instruct vcmove_sve(vReg dst, vReg src1, vReg src2, immI cond, cmpOp copnd, pReg
|
||||
format %{ "vcmove_sve.$copnd $dst, $src1, $src2\t# vector conditional move fp. KILL $pgtmp" %}
|
||||
ins_encode %{
|
||||
assert(UseSVE > 0, "must be sve");
|
||||
Assembler::Condition condition = to_assembler_cond((BoolTest::mask)$cond$$constant);
|
||||
BasicType bt = Matcher::vector_element_basic_type(this);
|
||||
uint length_in_bytes = Matcher::vector_length_in_bytes(this);
|
||||
__ sve_compare($pgtmp$$PRegister, bt, ptrue, $src1$$FloatRegister,
|
||||
$src2$$FloatRegister, (int)($cond$$constant));
|
||||
$src2$$FloatRegister, condition);
|
||||
__ sve_sel($dst$$FloatRegister, __ elemType_to_regVariant(bt),
|
||||
$pgtmp$$PRegister, $src2$$FloatRegister, $src1$$FloatRegister);
|
||||
%}
|
||||
|
@ -1001,7 +1001,7 @@ instruct v$1L_neon(vReg dst, vReg src1, vReg src2) %{
|
||||
effect(TEMP_DEF dst);
|
||||
format %{ "v$1L_neon $dst, $src1, $src2\t# 2L" %}
|
||||
ins_encode %{
|
||||
__ cmgt($dst$$FloatRegister, __ T2D, $src1$$FloatRegister, $src2$$FloatRegister);
|
||||
__ cm(Assembler::GT, $dst$$FloatRegister, __ T2D, $src1$$FloatRegister, $src2$$FloatRegister);
|
||||
__ bsl($dst$$FloatRegister, __ T16B, ifelse(min, $1, $src2, $src1)$$FloatRegister, ifelse(min, $1, $src1, $src2)$$FloatRegister);
|
||||
%}
|
||||
ins_pipe(pipe_slow);
|
||||
@ -3544,11 +3544,11 @@ instruct vmaskcmp_neon(vReg dst, vReg src1, vReg src2, immI cond) %{
|
||||
match(Set dst (VectorMaskCmp (Binary src1 src2) cond));
|
||||
format %{ "vmaskcmp_neon $dst, $src1, $src2, $cond" %}
|
||||
ins_encode %{
|
||||
Assembler::Condition condition = to_assembler_cond((BoolTest::mask)$cond$$constant);
|
||||
BasicType bt = Matcher::vector_element_basic_type(this);
|
||||
uint length_in_bytes = Matcher::vector_length_in_bytes(this);
|
||||
__ neon_compare($dst$$FloatRegister, bt, $src1$$FloatRegister,
|
||||
$src2$$FloatRegister, (int)($cond$$constant),
|
||||
/* isQ */ length_in_bytes == 16);
|
||||
$src2$$FloatRegister, condition, /* isQ */ length_in_bytes == 16);
|
||||
%}
|
||||
ins_pipe(pipe_slow);
|
||||
%}
|
||||
@ -3595,11 +3595,12 @@ instruct vmaskcmp_sve(pReg dst, vReg src1, vReg src2, immI cond, rFlagsReg cr) %
|
||||
effect(KILL cr);
|
||||
format %{ "vmaskcmp_sve $dst, $src1, $src2, $cond\t# KILL cr" %}
|
||||
ins_encode %{
|
||||
Assembler::Condition condition = to_assembler_cond((BoolTest::mask)$cond$$constant);
|
||||
BasicType bt = Matcher::vector_element_basic_type(this);
|
||||
uint length_in_bytes = Matcher::vector_length_in_bytes(this);
|
||||
assert(length_in_bytes == MaxVectorSize, "invalid vector length");
|
||||
__ sve_compare($dst$$PRegister, bt, ptrue, $src1$$FloatRegister,
|
||||
$src2$$FloatRegister, (int)($cond$$constant));
|
||||
$src2$$FloatRegister, condition);
|
||||
%}
|
||||
ins_pipe(pipe_slow);
|
||||
%}
|
||||
@ -3611,9 +3612,10 @@ instruct vmaskcmp_masked(pReg dst, vReg src1, vReg src2, immI cond,
|
||||
effect(KILL cr);
|
||||
format %{ "vmaskcmp_masked $dst, $pg, $src1, $src2, $cond\t# KILL cr" %}
|
||||
ins_encode %{
|
||||
Assembler::Condition condition = to_assembler_cond((BoolTest::mask)$cond$$constant);
|
||||
BasicType bt = Matcher::vector_element_basic_type(this);
|
||||
__ sve_compare($dst$$PRegister, bt, $pg$$PRegister, $src1$$FloatRegister,
|
||||
$src2$$FloatRegister, (int)($cond$$constant));
|
||||
$src2$$FloatRegister, condition);
|
||||
%}
|
||||
ins_pipe(pipe_slow);
|
||||
%}
|
||||
@ -4229,12 +4231,12 @@ instruct vcmove_neon(vReg dst, vReg src1, vReg src2, immI cond, cmpOp copnd) %{
|
||||
effect(TEMP_DEF dst);
|
||||
format %{ "vcmove_neon.$copnd $dst, $src1, $src2\t# vector conditional move fp" %}
|
||||
ins_encode %{
|
||||
Assembler::Condition condition = to_assembler_cond((BoolTest::mask)$cond$$constant);
|
||||
BasicType bt = Matcher::vector_element_basic_type(this);
|
||||
uint length_in_bytes = Matcher::vector_length_in_bytes(this);
|
||||
assert(length_in_bytes == 8 || length_in_bytes == 16, "must be");
|
||||
__ neon_compare($dst$$FloatRegister, bt, $src1$$FloatRegister,
|
||||
$src2$$FloatRegister, (int)($cond$$constant),
|
||||
/* isQ */ length_in_bytes == 16);
|
||||
$src2$$FloatRegister, condition, /* isQ */ length_in_bytes == 16);
|
||||
__ bsl($dst$$FloatRegister, length_in_bytes == 16 ? __ T16B : __ T8B,
|
||||
$src2$$FloatRegister, $src1$$FloatRegister);
|
||||
%}
|
||||
@ -4250,10 +4252,11 @@ instruct vcmove_sve(vReg dst, vReg src1, vReg src2, immI cond, cmpOp copnd, pReg
|
||||
format %{ "vcmove_sve.$copnd $dst, $src1, $src2\t# vector conditional move fp. KILL $pgtmp" %}
|
||||
ins_encode %{
|
||||
assert(UseSVE > 0, "must be sve");
|
||||
Assembler::Condition condition = to_assembler_cond((BoolTest::mask)$cond$$constant);
|
||||
BasicType bt = Matcher::vector_element_basic_type(this);
|
||||
uint length_in_bytes = Matcher::vector_length_in_bytes(this);
|
||||
__ sve_compare($pgtmp$$PRegister, bt, ptrue, $src1$$FloatRegister,
|
||||
$src2$$FloatRegister, (int)($cond$$constant));
|
||||
$src2$$FloatRegister, condition);
|
||||
__ sve_sel($dst$$FloatRegister, __ elemType_to_regVariant(bt),
|
||||
$pgtmp$$PRegister, $src2$$FloatRegister, $src1$$FloatRegister);
|
||||
%}
|
||||
|
@ -2620,11 +2620,6 @@ template<typename R, typename... Rx>
|
||||
INSN(minv, 0, 0b011011, false); // accepted arrangements: T8B, T16B, T4H, T8H, T2S, T4S
|
||||
INSN(smaxp, 0, 0b101001, false); // accepted arrangements: T8B, T16B, T4H, T8H, T2S, T4S
|
||||
INSN(sminp, 0, 0b101011, false); // accepted arrangements: T8B, T16B, T4H, T8H, T2S, T4S
|
||||
INSN(cmeq, 1, 0b100011, true); // accepted arrangements: T8B, T16B, T4H, T8H, T2S, T4S, T2D
|
||||
INSN(cmgt, 0, 0b001101, true); // accepted arrangements: T8B, T16B, T4H, T8H, T2S, T4S, T2D
|
||||
INSN(cmge, 0, 0b001111, true); // accepted arrangements: T8B, T16B, T4H, T8H, T2S, T4S, T2D
|
||||
INSN(cmhi, 1, 0b001101, true); // accepted arrangements: T8B, T16B, T4H, T8H, T2S, T4S, T2D
|
||||
INSN(cmhs, 1, 0b001111, true); // accepted arrangements: T8B, T16B, T4H, T8H, T2S, T4S, T2D
|
||||
|
||||
#undef INSN
|
||||
|
||||
@ -2730,13 +2725,50 @@ template<typename R, typename... Rx>
|
||||
INSN(fmls, 0, 1, 0b110011);
|
||||
INSN(fmax, 0, 0, 0b111101);
|
||||
INSN(fmin, 0, 1, 0b111101);
|
||||
INSN(fcmeq, 0, 0, 0b111001);
|
||||
INSN(fcmgt, 1, 1, 0b111001);
|
||||
INSN(fcmge, 1, 0, 0b111001);
|
||||
INSN(facgt, 1, 1, 0b111011);
|
||||
|
||||
#undef INSN
|
||||
|
||||
// AdvSIMD vector compare
|
||||
void cm(Condition cond, FloatRegister Vd, SIMD_Arrangement T, FloatRegister Vn, FloatRegister Vm) {
|
||||
starti;
|
||||
assert(T != T1Q && T != T1D, "incorrect arrangement");
|
||||
int cond_op;
|
||||
switch (cond) {
|
||||
case EQ: cond_op = 0b110001; break;
|
||||
case GT: cond_op = 0b000110; break;
|
||||
case GE: cond_op = 0b000111; break;
|
||||
case HI: cond_op = 0b100110; break;
|
||||
case HS: cond_op = 0b100111; break;
|
||||
default:
|
||||
ShouldNotReachHere();
|
||||
break;
|
||||
}
|
||||
|
||||
f(0, 31), f((int)T & 1, 30), f((cond_op >> 5) & 1, 29);
|
||||
f(0b01110, 28, 24), f((int)T >> 1, 23, 22), f(1, 21), rf(Vm, 16);
|
||||
f(cond_op & 0b11111, 15, 11), f(1, 10), rf(Vn, 5), rf(Vd, 0);
|
||||
}
|
||||
|
||||
// AdvSIMD Floating-point vector compare
|
||||
void fcm(Condition cond, FloatRegister Vd, SIMD_Arrangement T, FloatRegister Vn, FloatRegister Vm) {
|
||||
starti;
|
||||
assert(T == T2S || T == T4S || T == T2D, "invalid arrangement");
|
||||
int cond_op;
|
||||
switch (cond) {
|
||||
case EQ: cond_op = 0b00; break;
|
||||
case GT: cond_op = 0b11; break;
|
||||
case GE: cond_op = 0b10; break;
|
||||
default:
|
||||
ShouldNotReachHere();
|
||||
break;
|
||||
}
|
||||
|
||||
f(0, 31), f((int)T & 1, 30), f((cond_op >> 1) & 1, 29);
|
||||
f(0b01110, 28, 24), f(cond_op & 1, 23), f(T == T2D ? 1 : 0, 22);
|
||||
f(1, 21), rf(Vm, 16), f(0b111001, 15, 10), rf(Vn, 5), rf(Vd, 0);
|
||||
}
|
||||
|
||||
#define INSN(NAME, opc) \
|
||||
void NAME(FloatRegister Vd, SIMD_Arrangement T, FloatRegister Vn, FloatRegister Vm) { \
|
||||
starti; \
|
||||
|
@ -918,44 +918,28 @@ void C2_MacroAssembler::string_compare(Register str1, Register str2,
|
||||
}
|
||||
|
||||
void C2_MacroAssembler::neon_compare(FloatRegister dst, BasicType bt, FloatRegister src1,
|
||||
FloatRegister src2, int cond, bool isQ) {
|
||||
FloatRegister src2, Condition cond, bool isQ) {
|
||||
SIMD_Arrangement size = esize2arrangement((unsigned)type2aelembytes(bt), isQ);
|
||||
if (bt == T_FLOAT || bt == T_DOUBLE) {
|
||||
switch (cond) {
|
||||
case BoolTest::eq: fcmeq(dst, size, src1, src2); break;
|
||||
case BoolTest::ne: {
|
||||
fcmeq(dst, size, src1, src2);
|
||||
notr(dst, isQ ? T16B : T8B, dst);
|
||||
break;
|
||||
}
|
||||
case BoolTest::ge: fcmge(dst, size, src1, src2); break;
|
||||
case BoolTest::gt: fcmgt(dst, size, src1, src2); break;
|
||||
case BoolTest::le: fcmge(dst, size, src2, src1); break;
|
||||
case BoolTest::lt: fcmgt(dst, size, src2, src1); break;
|
||||
default:
|
||||
assert(false, "unsupported");
|
||||
ShouldNotReachHere();
|
||||
}
|
||||
FloatRegister zn = src1, zm = src2;
|
||||
bool needs_negation = false;
|
||||
switch (cond) {
|
||||
case LT: cond = GT; zn = src2; zm = src1; break;
|
||||
case LE: cond = GE; zn = src2; zm = src1; break;
|
||||
case LO: cond = HI; zn = src2; zm = src1; break;
|
||||
case LS: cond = HS; zn = src2; zm = src1; break;
|
||||
case NE: cond = EQ; needs_negation = true; break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
|
||||
if (is_floating_point_type(bt)) {
|
||||
fcm(cond, dst, size, zn, zm);
|
||||
} else {
|
||||
switch (cond) {
|
||||
case BoolTest::eq: cmeq(dst, size, src1, src2); break;
|
||||
case BoolTest::ne: {
|
||||
cmeq(dst, size, src1, src2);
|
||||
notr(dst, isQ ? T16B : T8B, dst);
|
||||
break;
|
||||
}
|
||||
case BoolTest::ge: cmge(dst, size, src1, src2); break;
|
||||
case BoolTest::gt: cmgt(dst, size, src1, src2); break;
|
||||
case BoolTest::le: cmge(dst, size, src2, src1); break;
|
||||
case BoolTest::lt: cmgt(dst, size, src2, src1); break;
|
||||
case BoolTest::uge: cmhs(dst, size, src1, src2); break;
|
||||
case BoolTest::ugt: cmhi(dst, size, src1, src2); break;
|
||||
case BoolTest::ult: cmhi(dst, size, src2, src1); break;
|
||||
case BoolTest::ule: cmhs(dst, size, src2, src1); break;
|
||||
default:
|
||||
assert(false, "unsupported");
|
||||
ShouldNotReachHere();
|
||||
}
|
||||
cm(cond, dst, size, zn, zm);
|
||||
}
|
||||
|
||||
if (needs_negation) {
|
||||
notr(dst, isQ ? T16B : T8B, dst);
|
||||
}
|
||||
}
|
||||
|
||||
@ -1125,29 +1109,24 @@ void C2_MacroAssembler::sve_vmask_fromlong(PRegister dst, Register src, BasicTyp
|
||||
|
||||
// Clobbers: rflags
|
||||
void C2_MacroAssembler::sve_compare(PRegister pd, BasicType bt, PRegister pg,
|
||||
FloatRegister zn, FloatRegister zm, int cond) {
|
||||
FloatRegister zn, FloatRegister zm, Condition cond) {
|
||||
assert(pg->is_governing(), "This register has to be a governing predicate register");
|
||||
FloatRegister z1 = zn, z2 = zm;
|
||||
// Convert the original BoolTest condition to Assembler::condition.
|
||||
Condition condition;
|
||||
switch (cond) {
|
||||
case BoolTest::eq: condition = Assembler::EQ; break;
|
||||
case BoolTest::ne: condition = Assembler::NE; break;
|
||||
case BoolTest::le: z1 = zm; z2 = zn; condition = Assembler::GE; break;
|
||||
case BoolTest::ge: condition = Assembler::GE; break;
|
||||
case BoolTest::lt: z1 = zm; z2 = zn; condition = Assembler::GT; break;
|
||||
case BoolTest::gt: condition = Assembler::GT; break;
|
||||
case LE: z1 = zm; z2 = zn; cond = GE; break;
|
||||
case LT: z1 = zm; z2 = zn; cond = GT; break;
|
||||
case LO: z1 = zm; z2 = zn; cond = HI; break;
|
||||
case LS: z1 = zm; z2 = zn; cond = HS; break;
|
||||
default:
|
||||
assert(false, "unsupported compare condition");
|
||||
ShouldNotReachHere();
|
||||
break;
|
||||
}
|
||||
|
||||
SIMD_RegVariant size = elemType_to_regVariant(bt);
|
||||
if (bt == T_FLOAT || bt == T_DOUBLE) {
|
||||
sve_fcm(condition, pd, size, pg, z1, z2);
|
||||
if (is_floating_point_type(bt)) {
|
||||
sve_fcm(cond, pd, size, pg, z1, z2);
|
||||
} else {
|
||||
assert(is_integral_type(bt), "unsupported element type");
|
||||
sve_cmp(condition, pd, size, pg, z1, z2);
|
||||
sve_cmp(cond, pd, size, pg, z1, z2);
|
||||
}
|
||||
}
|
||||
|
||||
@ -1979,7 +1958,7 @@ void C2_MacroAssembler::vector_round_neon(FloatRegister dst, FloatRegister src,
|
||||
|
||||
fneg(tmp3, T, src);
|
||||
dup(tmp2, T, rscratch1);
|
||||
cmhs(tmp3, T, tmp3, tmp2);
|
||||
cm(HS, tmp3, T, tmp3, tmp2);
|
||||
// tmp3 is now a set of flags
|
||||
|
||||
bif(dst, T16B, tmp1, tmp3);
|
||||
|
@ -77,13 +77,13 @@
|
||||
|
||||
// SIMD&FP comparison
|
||||
void neon_compare(FloatRegister dst, BasicType bt, FloatRegister src1,
|
||||
FloatRegister src2, int cond, bool isQ);
|
||||
FloatRegister src2, Condition cond, bool isQ);
|
||||
|
||||
void neon_compare_zero(FloatRegister dst, BasicType bt, FloatRegister src,
|
||||
Condition cond, bool isQ);
|
||||
|
||||
void sve_compare(PRegister pd, BasicType bt, PRegister pg,
|
||||
FloatRegister zn, FloatRegister zm, int cond);
|
||||
FloatRegister zn, FloatRegister zm, Condition cond);
|
||||
|
||||
void sve_vmask_lasttrue(Register dst, BasicType bt, PRegister src, PRegister ptmp);
|
||||
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) 2021, 2022, Oracle and/or its affiliates. All rights reserved.
|
||||
* Copyright (c) 2021, 2023, Oracle and/or its affiliates. All rights reserved.
|
||||
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
||||
*
|
||||
* This code is free software; you can redistribute it and/or modify it
|
||||
@ -145,8 +145,7 @@
|
||||
|
||||
// Does the CPU supports vector unsigned comparison instructions?
|
||||
static const bool supports_vector_comparison_unsigned(int vlen, BasicType bt) {
|
||||
// Not supported on SVE yet.
|
||||
return !UseSVE;
|
||||
return true;
|
||||
}
|
||||
|
||||
// Some microarchitectures have mask registers used on vectors
|
||||
|
@ -6412,10 +6412,10 @@ class StubGenerator: public StubCodeGenerator {
|
||||
__ orr(decL3, arrangement, decL3, decH3);
|
||||
|
||||
// check illegal inputs, value larger than 63 (maximum of 6 bits)
|
||||
__ cmhi(decH0, arrangement, decL0, v27);
|
||||
__ cmhi(decH1, arrangement, decL1, v27);
|
||||
__ cmhi(decH2, arrangement, decL2, v27);
|
||||
__ cmhi(decH3, arrangement, decL3, v27);
|
||||
__ cm(Assembler::HI, decH0, arrangement, decL0, v27);
|
||||
__ cm(Assembler::HI, decH1, arrangement, decL1, v27);
|
||||
__ cm(Assembler::HI, decH2, arrangement, decL2, v27);
|
||||
__ cm(Assembler::HI, decH3, arrangement, decL3, v27);
|
||||
__ orr(in0, arrangement, decH0, decH1);
|
||||
__ orr(in1, arrangement, decH2, decH3);
|
||||
__ orr(in2, arrangement, in0, in1);
|
||||
|
@ -1385,6 +1385,30 @@ class NEONFloatCompareWithZero(TwoRegNEONOp):
|
||||
self._firstSIMDreg.nextReg(),
|
||||
self.arrangement))
|
||||
|
||||
class NEONVectorCompare(ThreeRegNEONOp):
|
||||
def __init__(self, args):
|
||||
self._name, self.arrangement, self.condition = args
|
||||
self.insname = self._name + (self.condition).lower()
|
||||
|
||||
def cstr(self):
|
||||
return ("%s(%s, %s, %s, %s, %s);"
|
||||
% ("__ " + self._name,
|
||||
"Assembler::" + self.condition,
|
||||
self._firstSIMDreg,
|
||||
"__ T" + self.arrangement,
|
||||
self._firstSIMDreg.nextReg(),
|
||||
self._firstSIMDreg.nextReg().nextReg()))
|
||||
|
||||
def astr(self):
|
||||
return ("%s\t%s.%s, %s.%s, %s.%s"
|
||||
% (self.insname,
|
||||
self._firstSIMDreg,
|
||||
self.arrangement,
|
||||
self._firstSIMDreg.nextReg(),
|
||||
self.arrangement,
|
||||
self._firstSIMDreg.nextReg().nextReg(),
|
||||
self.arrangement))
|
||||
|
||||
class SpecialCases(Instruction):
|
||||
def __init__(self, data):
|
||||
self._name = data[0]
|
||||
@ -1693,36 +1717,26 @@ generate(ThreeRegNEONOp,
|
||||
["sminp", "sminp", "2S"], ["sminp", "sminp", "4S"],
|
||||
["fmin", "fmin", "2S"], ["fmin", "fmin", "4S"],
|
||||
["fmin", "fmin", "2D"],
|
||||
["cmeq", "cmeq", "8B"], ["cmeq", "cmeq", "16B"],
|
||||
["cmeq", "cmeq", "4H"], ["cmeq", "cmeq", "8H"],
|
||||
["cmeq", "cmeq", "2S"], ["cmeq", "cmeq", "4S"],
|
||||
["cmeq", "cmeq", "2D"],
|
||||
["fcmeq", "fcmeq", "2S"], ["fcmeq", "fcmeq", "4S"],
|
||||
["fcmeq", "fcmeq", "2D"],
|
||||
["cmgt", "cmgt", "8B"], ["cmgt", "cmgt", "16B"],
|
||||
["cmgt", "cmgt", "4H"], ["cmgt", "cmgt", "8H"],
|
||||
["cmgt", "cmgt", "2S"], ["cmgt", "cmgt", "4S"],
|
||||
["cmgt", "cmgt", "2D"],
|
||||
["cmhi", "cmhi", "8B"], ["cmhi", "cmhi", "16B"],
|
||||
["cmhi", "cmhi", "4H"], ["cmhi", "cmhi", "8H"],
|
||||
["cmhi", "cmhi", "2S"], ["cmhi", "cmhi", "4S"],
|
||||
["cmhi", "cmhi", "2D"],
|
||||
["cmhs", "cmhs", "8B"], ["cmhs", "cmhs", "16B"],
|
||||
["cmhs", "cmhs", "4H"], ["cmhs", "cmhs", "8H"],
|
||||
["cmhs", "cmhs", "2S"], ["cmhs", "cmhs", "4S"],
|
||||
["cmhs", "cmhs", "2D"],
|
||||
["fcmgt", "fcmgt", "2S"], ["fcmgt", "fcmgt", "4S"],
|
||||
["fcmgt", "fcmgt", "2D"],
|
||||
["cmge", "cmge", "8B"], ["cmge", "cmge", "16B"],
|
||||
["cmge", "cmge", "4H"], ["cmge", "cmge", "8H"],
|
||||
["cmge", "cmge", "2S"], ["cmge", "cmge", "4S"],
|
||||
["cmge", "cmge", "2D"],
|
||||
["fcmge", "fcmge", "2S"], ["fcmge", "fcmge", "4S"],
|
||||
["fcmge", "fcmge", "2D"],
|
||||
["facgt", "facgt", "2S"], ["facgt", "facgt", "4S"],
|
||||
["facgt", "facgt", "2D"],
|
||||
])
|
||||
|
||||
neonVectorCompareInstructionPrefix = ['cm', 'fcm']
|
||||
neonIntegerVectorCompareConditions = ['GT', 'GE', 'EQ', 'HI', 'HS']
|
||||
neonFloatVectorCompareConditions = ['EQ', 'GT', 'GE']
|
||||
neonIntegerArrangement = ['8B', '16B', '4H', '8H', '2S', '4S', '2D']
|
||||
neonFloatArrangement = ['2S', '4S', '2D']
|
||||
neonVectorCompareArgs = []
|
||||
for pre in neonVectorCompareInstructionPrefix:
|
||||
conditions = neonFloatVectorCompareConditions if pre == 'fcm' else neonIntegerVectorCompareConditions
|
||||
arrangements = neonFloatArrangement if pre == 'fcm' else neonIntegerArrangement
|
||||
for condition in conditions:
|
||||
for currentArrangement in arrangements:
|
||||
currentArgs = [pre, currentArrangement, condition]
|
||||
neonVectorCompareArgs.append(currentArgs)
|
||||
|
||||
generate(NEONVectorCompare, neonVectorCompareArgs)
|
||||
|
||||
generate(SVEComparisonWithZero, ["EQ", "GT", "GE", "LT", "LE", "NE"])
|
||||
|
||||
generate(SpecialCases, [["ccmn", "__ ccmn(zr, zr, 3u, Assembler::LE);", "ccmn\txzr, xzr, #3, LE"],
|
||||
|
@ -748,53 +748,55 @@
|
||||
__ fmin(v23, __ T2S, v24, v25); // fmin v23.2S, v24.2S, v25.2S
|
||||
__ fmin(v25, __ T4S, v26, v27); // fmin v25.4S, v26.4S, v27.4S
|
||||
__ fmin(v15, __ T2D, v16, v17); // fmin v15.2D, v16.2D, v17.2D
|
||||
__ cmeq(v29, __ T8B, v30, v31); // cmeq v29.8B, v30.8B, v31.8B
|
||||
__ cmeq(v3, __ T16B, v4, v5); // cmeq v3.16B, v4.16B, v5.16B
|
||||
__ cmeq(v10, __ T4H, v11, v12); // cmeq v10.4H, v11.4H, v12.4H
|
||||
__ cmeq(v22, __ T8H, v23, v24); // cmeq v22.8H, v23.8H, v24.8H
|
||||
__ cmeq(v10, __ T2S, v11, v12); // cmeq v10.2S, v11.2S, v12.2S
|
||||
__ cmeq(v4, __ T4S, v5, v6); // cmeq v4.4S, v5.4S, v6.4S
|
||||
__ cmeq(v17, __ T2D, v18, v19); // cmeq v17.2D, v18.2D, v19.2D
|
||||
__ fcmeq(v1, __ T2S, v2, v3); // fcmeq v1.2S, v2.2S, v3.2S
|
||||
__ fcmeq(v11, __ T4S, v12, v13); // fcmeq v11.4S, v12.4S, v13.4S
|
||||
__ fcmeq(v7, __ T2D, v8, v9); // fcmeq v7.2D, v8.2D, v9.2D
|
||||
__ cmgt(v10, __ T8B, v11, v12); // cmgt v10.8B, v11.8B, v12.8B
|
||||
__ cmgt(v15, __ T16B, v16, v17); // cmgt v15.16B, v16.16B, v17.16B
|
||||
__ cmgt(v16, __ T4H, v17, v18); // cmgt v16.4H, v17.4H, v18.4H
|
||||
__ cmgt(v2, __ T8H, v3, v4); // cmgt v2.8H, v3.8H, v4.8H
|
||||
__ cmgt(v9, __ T2S, v10, v11); // cmgt v9.2S, v10.2S, v11.2S
|
||||
__ cmgt(v11, __ T4S, v12, v13); // cmgt v11.4S, v12.4S, v13.4S
|
||||
__ cmgt(v12, __ T2D, v13, v14); // cmgt v12.2D, v13.2D, v14.2D
|
||||
__ cmhi(v14, __ T8B, v15, v16); // cmhi v14.8B, v15.8B, v16.8B
|
||||
__ cmhi(v13, __ T16B, v14, v15); // cmhi v13.16B, v14.16B, v15.16B
|
||||
__ cmhi(v2, __ T4H, v3, v4); // cmhi v2.4H, v3.4H, v4.4H
|
||||
__ cmhi(v6, __ T8H, v7, v8); // cmhi v6.8H, v7.8H, v8.8H
|
||||
__ cmhi(v19, __ T2S, v20, v21); // cmhi v19.2S, v20.2S, v21.2S
|
||||
__ cmhi(v25, __ T4S, v26, v27); // cmhi v25.4S, v26.4S, v27.4S
|
||||
__ cmhi(v15, __ T2D, v16, v17); // cmhi v15.2D, v16.2D, v17.2D
|
||||
__ cmhs(v4, __ T8B, v5, v6); // cmhs v4.8B, v5.8B, v6.8B
|
||||
__ cmhs(v2, __ T16B, v3, v4); // cmhs v2.16B, v3.16B, v4.16B
|
||||
__ cmhs(v4, __ T4H, v5, v6); // cmhs v4.4H, v5.4H, v6.4H
|
||||
__ cmhs(v11, __ T8H, v12, v13); // cmhs v11.8H, v12.8H, v13.8H
|
||||
__ cmhs(v17, __ T2S, v18, v19); // cmhs v17.2S, v18.2S, v19.2S
|
||||
__ cmhs(v20, __ T4S, v21, v22); // cmhs v20.4S, v21.4S, v22.4S
|
||||
__ cmhs(v16, __ T2D, v17, v18); // cmhs v16.2D, v17.2D, v18.2D
|
||||
__ fcmgt(v17, __ T2S, v18, v19); // fcmgt v17.2S, v18.2S, v19.2S
|
||||
__ fcmgt(v10, __ T4S, v11, v12); // fcmgt v10.4S, v11.4S, v12.4S
|
||||
__ fcmgt(v20, __ T2D, v21, v22); // fcmgt v20.2D, v21.2D, v22.2D
|
||||
__ cmge(v22, __ T8B, v23, v24); // cmge v22.8B, v23.8B, v24.8B
|
||||
__ cmge(v12, __ T16B, v13, v14); // cmge v12.16B, v13.16B, v14.16B
|
||||
__ cmge(v25, __ T4H, v26, v27); // cmge v25.4H, v26.4H, v27.4H
|
||||
__ cmge(v23, __ T8H, v24, v25); // cmge v23.8H, v24.8H, v25.8H
|
||||
__ cmge(v28, __ T2S, v29, v30); // cmge v28.2S, v29.2S, v30.2S
|
||||
__ cmge(v14, __ T4S, v15, v16); // cmge v14.4S, v15.4S, v16.4S
|
||||
__ cmge(v10, __ T2D, v11, v12); // cmge v10.2D, v11.2D, v12.2D
|
||||
__ fcmge(v24, __ T2S, v25, v26); // fcmge v24.2S, v25.2S, v26.2S
|
||||
__ fcmge(v1, __ T4S, v2, v3); // fcmge v1.4S, v2.4S, v3.4S
|
||||
__ fcmge(v11, __ T2D, v12, v13); // fcmge v11.2D, v12.2D, v13.2D
|
||||
__ facgt(v30, __ T2S, v31, v0); // facgt v30.2S, v31.2S, v0.2S
|
||||
__ facgt(v10, __ T4S, v11, v12); // facgt v10.4S, v11.4S, v12.4S
|
||||
__ facgt(v15, __ T2D, v16, v17); // facgt v15.2D, v16.2D, v17.2D
|
||||
__ facgt(v29, __ T2S, v30, v31); // facgt v29.2S, v30.2S, v31.2S
|
||||
__ facgt(v3, __ T4S, v4, v5); // facgt v3.4S, v4.4S, v5.4S
|
||||
__ facgt(v10, __ T2D, v11, v12); // facgt v10.2D, v11.2D, v12.2D
|
||||
|
||||
// NEONVectorCompare
|
||||
__ cm(Assembler::GT, v22, __ T8B, v23, v24); // cmgt v22.8B, v23.8B, v24.8B
|
||||
__ cm(Assembler::GT, v10, __ T16B, v11, v12); // cmgt v10.16B, v11.16B, v12.16B
|
||||
__ cm(Assembler::GT, v4, __ T4H, v5, v6); // cmgt v4.4H, v5.4H, v6.4H
|
||||
__ cm(Assembler::GT, v17, __ T8H, v18, v19); // cmgt v17.8H, v18.8H, v19.8H
|
||||
__ cm(Assembler::GT, v1, __ T2S, v2, v3); // cmgt v1.2S, v2.2S, v3.2S
|
||||
__ cm(Assembler::GT, v11, __ T4S, v12, v13); // cmgt v11.4S, v12.4S, v13.4S
|
||||
__ cm(Assembler::GT, v7, __ T2D, v8, v9); // cmgt v7.2D, v8.2D, v9.2D
|
||||
__ cm(Assembler::GE, v10, __ T8B, v11, v12); // cmge v10.8B, v11.8B, v12.8B
|
||||
__ cm(Assembler::GE, v15, __ T16B, v16, v17); // cmge v15.16B, v16.16B, v17.16B
|
||||
__ cm(Assembler::GE, v16, __ T4H, v17, v18); // cmge v16.4H, v17.4H, v18.4H
|
||||
__ cm(Assembler::GE, v2, __ T8H, v3, v4); // cmge v2.8H, v3.8H, v4.8H
|
||||
__ cm(Assembler::GE, v9, __ T2S, v10, v11); // cmge v9.2S, v10.2S, v11.2S
|
||||
__ cm(Assembler::GE, v11, __ T4S, v12, v13); // cmge v11.4S, v12.4S, v13.4S
|
||||
__ cm(Assembler::GE, v12, __ T2D, v13, v14); // cmge v12.2D, v13.2D, v14.2D
|
||||
__ cm(Assembler::EQ, v14, __ T8B, v15, v16); // cmeq v14.8B, v15.8B, v16.8B
|
||||
__ cm(Assembler::EQ, v13, __ T16B, v14, v15); // cmeq v13.16B, v14.16B, v15.16B
|
||||
__ cm(Assembler::EQ, v2, __ T4H, v3, v4); // cmeq v2.4H, v3.4H, v4.4H
|
||||
__ cm(Assembler::EQ, v6, __ T8H, v7, v8); // cmeq v6.8H, v7.8H, v8.8H
|
||||
__ cm(Assembler::EQ, v19, __ T2S, v20, v21); // cmeq v19.2S, v20.2S, v21.2S
|
||||
__ cm(Assembler::EQ, v25, __ T4S, v26, v27); // cmeq v25.4S, v26.4S, v27.4S
|
||||
__ cm(Assembler::EQ, v15, __ T2D, v16, v17); // cmeq v15.2D, v16.2D, v17.2D
|
||||
__ cm(Assembler::HI, v4, __ T8B, v5, v6); // cmhi v4.8B, v5.8B, v6.8B
|
||||
__ cm(Assembler::HI, v2, __ T16B, v3, v4); // cmhi v2.16B, v3.16B, v4.16B
|
||||
__ cm(Assembler::HI, v4, __ T4H, v5, v6); // cmhi v4.4H, v5.4H, v6.4H
|
||||
__ cm(Assembler::HI, v11, __ T8H, v12, v13); // cmhi v11.8H, v12.8H, v13.8H
|
||||
__ cm(Assembler::HI, v17, __ T2S, v18, v19); // cmhi v17.2S, v18.2S, v19.2S
|
||||
__ cm(Assembler::HI, v20, __ T4S, v21, v22); // cmhi v20.4S, v21.4S, v22.4S
|
||||
__ cm(Assembler::HI, v16, __ T2D, v17, v18); // cmhi v16.2D, v17.2D, v18.2D
|
||||
__ cm(Assembler::HS, v17, __ T8B, v18, v19); // cmhs v17.8B, v18.8B, v19.8B
|
||||
__ cm(Assembler::HS, v10, __ T16B, v11, v12); // cmhs v10.16B, v11.16B, v12.16B
|
||||
__ cm(Assembler::HS, v20, __ T4H, v21, v22); // cmhs v20.4H, v21.4H, v22.4H
|
||||
__ cm(Assembler::HS, v22, __ T8H, v23, v24); // cmhs v22.8H, v23.8H, v24.8H
|
||||
__ cm(Assembler::HS, v12, __ T2S, v13, v14); // cmhs v12.2S, v13.2S, v14.2S
|
||||
__ cm(Assembler::HS, v25, __ T4S, v26, v27); // cmhs v25.4S, v26.4S, v27.4S
|
||||
__ cm(Assembler::HS, v23, __ T2D, v24, v25); // cmhs v23.2D, v24.2D, v25.2D
|
||||
__ fcm(Assembler::EQ, v28, __ T2S, v29, v30); // fcmeq v28.2S, v29.2S, v30.2S
|
||||
__ fcm(Assembler::EQ, v14, __ T4S, v15, v16); // fcmeq v14.4S, v15.4S, v16.4S
|
||||
__ fcm(Assembler::EQ, v10, __ T2D, v11, v12); // fcmeq v10.2D, v11.2D, v12.2D
|
||||
__ fcm(Assembler::GT, v24, __ T2S, v25, v26); // fcmgt v24.2S, v25.2S, v26.2S
|
||||
__ fcm(Assembler::GT, v1, __ T4S, v2, v3); // fcmgt v1.4S, v2.4S, v3.4S
|
||||
__ fcm(Assembler::GT, v11, __ T2D, v12, v13); // fcmgt v11.2D, v12.2D, v13.2D
|
||||
__ fcm(Assembler::GE, v30, __ T2S, v31, v0); // fcmge v30.2S, v31.2S, v0.2S
|
||||
__ fcm(Assembler::GE, v10, __ T4S, v11, v12); // fcmge v10.4S, v11.4S, v12.4S
|
||||
__ fcm(Assembler::GE, v15, __ T2D, v16, v17); // fcmge v15.2D, v16.2D, v17.2D
|
||||
|
||||
// SVEComparisonWithZero
|
||||
__ sve_fcm(Assembler::EQ, p3, __ S, p3, z2, 0.0); // fcmeq p3.s, p3/z, z2.s, #0.0
|
||||
@ -1429,18 +1431,18 @@
|
||||
0x4e696d07, 0x0eae6dac, 0x4ea26c20, 0x0e35ae93,
|
||||
0x4e23ac41, 0x0e79af17, 0x4e64ac62, 0x0ea2ac20,
|
||||
0x4eaaad28, 0x0eb9f717, 0x4ebbf759, 0x4ef1f60f,
|
||||
0x2e3f8fdd, 0x6e258c83, 0x2e6c8d6a, 0x6e788ef6,
|
||||
0x2eac8d6a, 0x6ea68ca4, 0x6ef38e51, 0x0e23e441,
|
||||
0x4e2de58b, 0x4e69e507, 0x0e2c356a, 0x4e31360f,
|
||||
0x0e723630, 0x4e643462, 0x0eab3549, 0x4ead358b,
|
||||
0x4eee35ac, 0x2e3035ee, 0x6e2f35cd, 0x2e643462,
|
||||
0x6e6834e6, 0x2eb53693, 0x6ebb3759, 0x6ef1360f,
|
||||
0x2e263ca4, 0x6e243c62, 0x2e663ca4, 0x6e6d3d8b,
|
||||
0x2eb33e51, 0x6eb63eb4, 0x6ef23e30, 0x2eb3e651,
|
||||
0x6eace56a, 0x6ef6e6b4, 0x0e383ef6, 0x4e2e3dac,
|
||||
0x0e7b3f59, 0x4e793f17, 0x0ebe3fbc, 0x4eb03dee,
|
||||
0x4eec3d6a, 0x2e3ae738, 0x6e23e441, 0x6e6de58b,
|
||||
0x2ea0effe, 0x6eaced6a, 0x6ef1ee0f, 0x65922c43,
|
||||
0x2ebfefdd, 0x6ea5ec83, 0x6eeced6a, 0x0e3836f6,
|
||||
0x4e2c356a, 0x0e6634a4, 0x4e733651, 0x0ea33441,
|
||||
0x4ead358b, 0x4ee93507, 0x0e2c3d6a, 0x4e313e0f,
|
||||
0x0e723e30, 0x4e643c62, 0x0eab3d49, 0x4ead3d8b,
|
||||
0x4eee3dac, 0x2e308dee, 0x6e2f8dcd, 0x2e648c62,
|
||||
0x6e688ce6, 0x2eb58e93, 0x6ebb8f59, 0x6ef18e0f,
|
||||
0x2e2634a4, 0x6e243462, 0x2e6634a4, 0x6e6d358b,
|
||||
0x2eb33651, 0x6eb636b4, 0x6ef23630, 0x2e333e51,
|
||||
0x6e2c3d6a, 0x2e763eb4, 0x6e783ef6, 0x2eae3dac,
|
||||
0x6ebb3f59, 0x6ef93f17, 0x0e3ee7bc, 0x4e30e5ee,
|
||||
0x4e6ce56a, 0x2ebae738, 0x6ea3e441, 0x6eede58b,
|
||||
0x2e20e7fe, 0x6e2ce56a, 0x6e71e60f, 0x65922c43,
|
||||
0x65d02219, 0x65d02560, 0x65d13dc4, 0x65913690,
|
||||
0x65d33b6b, 0xba5fd3e3, 0x3a5f03e5, 0xfa411be4,
|
||||
0x7a42cbe2, 0x93df03ff, 0xc820ffff, 0x8822fc7f,
|
||||
|
Loading…
Reference in New Issue
Block a user