8256318: AArch64: Add support for floating-point absolute difference
Reviewed-by: aph
This commit is contained in:
parent
655bb619a3
commit
b0b9dd27b8
@ -1334,10 +1334,9 @@ generate(FourRegMulOp,
|
||||
["maddw", "msubw", "madd", "msub", "smaddl", "smsubl", "umaddl", "umsubl"])
|
||||
|
||||
generate(ThreeRegFloatOp,
|
||||
[["fmuls", "sss"], ["fdivs", "sss"], ["fadds", "sss"], ["fsubs", "sss"],
|
||||
["fmuls", "sss"],
|
||||
["fmuld", "ddd"], ["fdivd", "ddd"], ["faddd", "ddd"], ["fsubd", "ddd"],
|
||||
["fmuld", "ddd"]])
|
||||
[["fabds", "sss"], ["fmuls", "sss"], ["fdivs", "sss"], ["fadds", "sss"], ["fsubs", "sss"],
|
||||
["fabdd", "ddd"], ["fmuld", "ddd"], ["fdivd", "ddd"], ["faddd", "ddd"], ["fsubd", "ddd"],
|
||||
])
|
||||
|
||||
generate(FourRegFloatOp,
|
||||
[["fmadds", "ssss"], ["fmsubs", "ssss"], ["fnmadds", "ssss"], ["fnmadds", "ssss"],
|
||||
@ -1437,6 +1436,8 @@ generate(ThreeRegNEONOp,
|
||||
["mulv", "mul", "8B"], ["mulv", "mul", "16B"],
|
||||
["mulv", "mul", "4H"], ["mulv", "mul", "8H"],
|
||||
["mulv", "mul", "2S"], ["mulv", "mul", "4S"],
|
||||
["fabd", "fabd", "2S"], ["fabd", "fabd", "4S"],
|
||||
["fabd", "fabd", "2D"],
|
||||
["fmul", "fmul", "2S"], ["fmul", "fmul", "4S"],
|
||||
["fmul", "fmul", "2D"],
|
||||
["mlav", "mla", "4H"], ["mlav", "mla", "8H"],
|
||||
|
@ -13894,6 +13894,34 @@ instruct absD_reg(vRegD dst, vRegD src) %{
|
||||
ins_pipe(fp_uop_d);
|
||||
%}
|
||||
|
||||
instruct absdF_reg(vRegF dst, vRegF src1, vRegF src2) %{
|
||||
match(Set dst (AbsF (SubF src1 src2)));
|
||||
|
||||
ins_cost(INSN_COST * 3);
|
||||
format %{ "fabds $dst, $src1, $src2" %}
|
||||
ins_encode %{
|
||||
__ fabds(as_FloatRegister($dst$$reg),
|
||||
as_FloatRegister($src1$$reg),
|
||||
as_FloatRegister($src2$$reg));
|
||||
%}
|
||||
|
||||
ins_pipe(fp_uop_s);
|
||||
%}
|
||||
|
||||
instruct absdD_reg(vRegD dst, vRegD src1, vRegD src2) %{
|
||||
match(Set dst (AbsD (SubD src1 src2)));
|
||||
|
||||
ins_cost(INSN_COST * 3);
|
||||
format %{ "fabdd $dst, $src1, $src2" %}
|
||||
ins_encode %{
|
||||
__ fabdd(as_FloatRegister($dst$$reg),
|
||||
as_FloatRegister($src1$$reg),
|
||||
as_FloatRegister($src2$$reg));
|
||||
%}
|
||||
|
||||
ins_pipe(fp_uop_d);
|
||||
%}
|
||||
|
||||
instruct sqrtD_reg(vRegD dst, vRegD src) %{
|
||||
match(Set dst (SqrtD src));
|
||||
|
||||
@ -17872,129 +17900,6 @@ instruct vsqrt2D(vecX dst, vecX src)
|
||||
ins_pipe(vsqrt_fp128);
|
||||
%}
|
||||
|
||||
// --------------------------------- ABS --------------------------------------
|
||||
|
||||
instruct vabs8B(vecD dst, vecD src)
|
||||
%{
|
||||
predicate(n->as_Vector()->length() == 4 ||
|
||||
n->as_Vector()->length() == 8);
|
||||
match(Set dst (AbsVB src));
|
||||
ins_cost(INSN_COST);
|
||||
format %{ "abs $dst, $src\t# vector (8B)" %}
|
||||
ins_encode %{
|
||||
__ absr(as_FloatRegister($dst$$reg), __ T8B, as_FloatRegister($src$$reg));
|
||||
%}
|
||||
ins_pipe(vlogical64);
|
||||
%}
|
||||
|
||||
instruct vabs16B(vecX dst, vecX src)
|
||||
%{
|
||||
predicate(n->as_Vector()->length() == 16);
|
||||
match(Set dst (AbsVB src));
|
||||
ins_cost(INSN_COST);
|
||||
format %{ "abs $dst, $src\t# vector (16B)" %}
|
||||
ins_encode %{
|
||||
__ absr(as_FloatRegister($dst$$reg), __ T16B, as_FloatRegister($src$$reg));
|
||||
%}
|
||||
ins_pipe(vlogical128);
|
||||
%}
|
||||
|
||||
instruct vabs4S(vecD dst, vecD src)
|
||||
%{
|
||||
predicate(n->as_Vector()->length() == 4);
|
||||
match(Set dst (AbsVS src));
|
||||
ins_cost(INSN_COST);
|
||||
format %{ "abs $dst, $src\t# vector (4H)" %}
|
||||
ins_encode %{
|
||||
__ absr(as_FloatRegister($dst$$reg), __ T4H, as_FloatRegister($src$$reg));
|
||||
%}
|
||||
ins_pipe(vlogical64);
|
||||
%}
|
||||
|
||||
instruct vabs8S(vecX dst, vecX src)
|
||||
%{
|
||||
predicate(n->as_Vector()->length() == 8);
|
||||
match(Set dst (AbsVS src));
|
||||
ins_cost(INSN_COST);
|
||||
format %{ "abs $dst, $src\t# vector (8H)" %}
|
||||
ins_encode %{
|
||||
__ absr(as_FloatRegister($dst$$reg), __ T8H, as_FloatRegister($src$$reg));
|
||||
%}
|
||||
ins_pipe(vlogical128);
|
||||
%}
|
||||
|
||||
instruct vabs2I(vecD dst, vecD src)
|
||||
%{
|
||||
predicate(n->as_Vector()->length() == 2);
|
||||
match(Set dst (AbsVI src));
|
||||
ins_cost(INSN_COST);
|
||||
format %{ "abs $dst, $src\t# vector (2S)" %}
|
||||
ins_encode %{
|
||||
__ absr(as_FloatRegister($dst$$reg), __ T2S, as_FloatRegister($src$$reg));
|
||||
%}
|
||||
ins_pipe(vlogical64);
|
||||
%}
|
||||
|
||||
instruct vabs4I(vecX dst, vecX src)
|
||||
%{
|
||||
predicate(n->as_Vector()->length() == 4);
|
||||
match(Set dst (AbsVI src));
|
||||
ins_cost(INSN_COST);
|
||||
format %{ "abs $dst, $src\t# vector (4S)" %}
|
||||
ins_encode %{
|
||||
__ absr(as_FloatRegister($dst$$reg), __ T4S, as_FloatRegister($src$$reg));
|
||||
%}
|
||||
ins_pipe(vlogical128);
|
||||
%}
|
||||
|
||||
instruct vabs2L(vecX dst, vecX src)
|
||||
%{
|
||||
predicate(n->as_Vector()->length() == 2);
|
||||
match(Set dst (AbsVL src));
|
||||
ins_cost(INSN_COST);
|
||||
format %{ "abs $dst, $src\t# vector (2D)" %}
|
||||
ins_encode %{
|
||||
__ absr(as_FloatRegister($dst$$reg), __ T2D, as_FloatRegister($src$$reg));
|
||||
%}
|
||||
ins_pipe(vlogical128);
|
||||
%}
|
||||
|
||||
instruct vabs2F(vecD dst, vecD src)
|
||||
%{
|
||||
predicate(n->as_Vector()->length() == 2);
|
||||
match(Set dst (AbsVF src));
|
||||
ins_cost(INSN_COST * 3);
|
||||
format %{ "fabs $dst,$src\t# vector (2S)" %}
|
||||
ins_encode %{
|
||||
__ fabs(as_FloatRegister($dst$$reg), __ T2S, as_FloatRegister($src$$reg));
|
||||
%}
|
||||
ins_pipe(vunop_fp64);
|
||||
%}
|
||||
|
||||
instruct vabs4F(vecX dst, vecX src)
|
||||
%{
|
||||
predicate(n->as_Vector()->length() == 4);
|
||||
match(Set dst (AbsVF src));
|
||||
ins_cost(INSN_COST * 3);
|
||||
format %{ "fabs $dst,$src\t# vector (4S)" %}
|
||||
ins_encode %{
|
||||
__ fabs(as_FloatRegister($dst$$reg), __ T4S, as_FloatRegister($src$$reg));
|
||||
%}
|
||||
ins_pipe(vunop_fp128);
|
||||
%}
|
||||
|
||||
instruct vabs2D(vecX dst, vecX src)
|
||||
%{
|
||||
predicate(n->as_Vector()->length() == 2);
|
||||
match(Set dst (AbsVD src));
|
||||
ins_cost(INSN_COST * 3);
|
||||
format %{ "fabs $dst,$src\t# vector (2D)" %}
|
||||
ins_encode %{
|
||||
__ fabs(as_FloatRegister($dst$$reg), __ T2D, as_FloatRegister($src$$reg));
|
||||
%}
|
||||
ins_pipe(vunop_fp128);
|
||||
%}
|
||||
|
||||
// --------------------------------- NEG --------------------------------------
|
||||
|
||||
instruct vneg2F(vecD dst, vecD src)
|
||||
|
@ -3454,3 +3454,166 @@ instruct alltrue_in_mask16B(iRegINoSp dst, vecX src1, vecX src2, vecX tmp, rFlag
|
||||
%}
|
||||
ins_pipe(pipe_slow);
|
||||
%}
|
||||
|
||||
// --------------------------------- ABS --------------------------------------
|
||||
|
||||
instruct vabs8B(vecD dst, vecD src)
|
||||
%{
|
||||
predicate(n->as_Vector()->length() == 4 || n->as_Vector()->length() == 8);
|
||||
match(Set dst (AbsVB src));
|
||||
ins_cost(INSN_COST);
|
||||
format %{ "abs $dst, $src\t# vector (8B)" %}
|
||||
ins_encode %{
|
||||
__ absr(as_FloatRegister($dst$$reg), __ T8B, as_FloatRegister($src$$reg));
|
||||
%}
|
||||
ins_pipe(vlogical64);
|
||||
%}
|
||||
|
||||
instruct vabs16B(vecX dst, vecX src)
|
||||
%{
|
||||
predicate(n->as_Vector()->length() == 16);
|
||||
match(Set dst (AbsVB src));
|
||||
ins_cost(INSN_COST);
|
||||
format %{ "abs $dst, $src\t# vector (16B)" %}
|
||||
ins_encode %{
|
||||
__ absr(as_FloatRegister($dst$$reg), __ T16B, as_FloatRegister($src$$reg));
|
||||
%}
|
||||
ins_pipe(vlogical128);
|
||||
%}
|
||||
|
||||
instruct vabs4S(vecD dst, vecD src)
|
||||
%{
|
||||
predicate(n->as_Vector()->length() == 4);
|
||||
match(Set dst (AbsVS src));
|
||||
ins_cost(INSN_COST);
|
||||
format %{ "abs $dst, $src\t# vector (4H)" %}
|
||||
ins_encode %{
|
||||
__ absr(as_FloatRegister($dst$$reg), __ T4H, as_FloatRegister($src$$reg));
|
||||
%}
|
||||
ins_pipe(vlogical64);
|
||||
%}
|
||||
|
||||
instruct vabs8S(vecX dst, vecX src)
|
||||
%{
|
||||
predicate(n->as_Vector()->length() == 8);
|
||||
match(Set dst (AbsVS src));
|
||||
ins_cost(INSN_COST);
|
||||
format %{ "abs $dst, $src\t# vector (8H)" %}
|
||||
ins_encode %{
|
||||
__ absr(as_FloatRegister($dst$$reg), __ T8H, as_FloatRegister($src$$reg));
|
||||
%}
|
||||
ins_pipe(vlogical128);
|
||||
%}
|
||||
|
||||
instruct vabs2I(vecD dst, vecD src)
|
||||
%{
|
||||
predicate(n->as_Vector()->length() == 2);
|
||||
match(Set dst (AbsVI src));
|
||||
ins_cost(INSN_COST);
|
||||
format %{ "abs $dst, $src\t# vector (2S)" %}
|
||||
ins_encode %{
|
||||
__ absr(as_FloatRegister($dst$$reg), __ T2S, as_FloatRegister($src$$reg));
|
||||
%}
|
||||
ins_pipe(vlogical64);
|
||||
%}
|
||||
|
||||
instruct vabs4I(vecX dst, vecX src)
|
||||
%{
|
||||
predicate(n->as_Vector()->length() == 4);
|
||||
match(Set dst (AbsVI src));
|
||||
ins_cost(INSN_COST);
|
||||
format %{ "abs $dst, $src\t# vector (4S)" %}
|
||||
ins_encode %{
|
||||
__ absr(as_FloatRegister($dst$$reg), __ T4S, as_FloatRegister($src$$reg));
|
||||
%}
|
||||
ins_pipe(vlogical128);
|
||||
%}
|
||||
|
||||
instruct vabs2L(vecX dst, vecX src)
|
||||
%{
|
||||
predicate(n->as_Vector()->length() == 2);
|
||||
match(Set dst (AbsVL src));
|
||||
ins_cost(INSN_COST);
|
||||
format %{ "abs $dst, $src\t# vector (2D)" %}
|
||||
ins_encode %{
|
||||
__ absr(as_FloatRegister($dst$$reg), __ T2D, as_FloatRegister($src$$reg));
|
||||
%}
|
||||
ins_pipe(vlogical128);
|
||||
%}
|
||||
|
||||
instruct vabs2F(vecD dst, vecD src)
|
||||
%{
|
||||
predicate(n->as_Vector()->length() == 2);
|
||||
match(Set dst (AbsVF src));
|
||||
ins_cost(INSN_COST * 3);
|
||||
format %{ "fabs $dst, $src\t# vector (2S)" %}
|
||||
ins_encode %{
|
||||
__ fabs(as_FloatRegister($dst$$reg), __ T2S, as_FloatRegister($src$$reg));
|
||||
%}
|
||||
ins_pipe(vunop_fp64);
|
||||
%}
|
||||
|
||||
instruct vabs4F(vecX dst, vecX src)
|
||||
%{
|
||||
predicate(n->as_Vector()->length() == 4);
|
||||
match(Set dst (AbsVF src));
|
||||
ins_cost(INSN_COST * 3);
|
||||
format %{ "fabs $dst, $src\t# vector (4S)" %}
|
||||
ins_encode %{
|
||||
__ fabs(as_FloatRegister($dst$$reg), __ T4S, as_FloatRegister($src$$reg));
|
||||
%}
|
||||
ins_pipe(vunop_fp128);
|
||||
%}
|
||||
|
||||
instruct vabs2D(vecX dst, vecX src)
|
||||
%{
|
||||
predicate(n->as_Vector()->length() == 2);
|
||||
match(Set dst (AbsVD src));
|
||||
ins_cost(INSN_COST * 3);
|
||||
format %{ "fabs $dst, $src\t# vector (2D)" %}
|
||||
ins_encode %{
|
||||
__ fabs(as_FloatRegister($dst$$reg), __ T2D, as_FloatRegister($src$$reg));
|
||||
%}
|
||||
ins_pipe(vunop_fp128);
|
||||
%}
|
||||
|
||||
// --------------------------------- FABS DIFF --------------------------------
|
||||
|
||||
instruct vabd2F(vecD dst, vecD src1, vecD src2)
|
||||
%{
|
||||
predicate(n->as_Vector()->length() == 2);
|
||||
match(Set dst (AbsVF (SubVF src1 src2)));
|
||||
ins_cost(INSN_COST * 3);
|
||||
format %{ "fabd $dst, $src1, $src2\t# vector (2S)" %}
|
||||
ins_encode %{
|
||||
__ fabd(as_FloatRegister($dst$$reg), __ T2S,
|
||||
as_FloatRegister($src1$$reg), as_FloatRegister($src2$$reg));
|
||||
%}
|
||||
ins_pipe(vunop_fp64);
|
||||
%}
|
||||
|
||||
instruct vabd4F(vecX dst, vecX src1, vecX src2)
|
||||
%{
|
||||
predicate(n->as_Vector()->length() == 4);
|
||||
match(Set dst (AbsVF (SubVF src1 src2)));
|
||||
ins_cost(INSN_COST * 3);
|
||||
format %{ "fabd $dst, $src1, $src2\t# vector (4S)" %}
|
||||
ins_encode %{
|
||||
__ fabd(as_FloatRegister($dst$$reg), __ T4S,
|
||||
as_FloatRegister($src1$$reg), as_FloatRegister($src2$$reg));
|
||||
%}
|
||||
ins_pipe(vunop_fp128);
|
||||
%}
|
||||
|
||||
instruct vabd2D(vecX dst, vecX src1, vecX src2)
|
||||
%{
|
||||
predicate(n->as_Vector()->length() == 2);
|
||||
match(Set dst (AbsVD (SubVD src1 src2)));
|
||||
ins_cost(INSN_COST * 3);
|
||||
format %{ "fabd $dst, $src1, $src2\t# vector (2D)" %}
|
||||
ins_encode %{
|
||||
__ fabd(as_FloatRegister($dst$$reg), __ T2D,
|
||||
as_FloatRegister($src1$$reg), as_FloatRegister($src2$$reg));
|
||||
%}
|
||||
ins_pipe(vunop_fp128);
|
||||
%}
|
||||
|
@ -1421,4 +1421,50 @@ instruct alltrue_in_mask$1B`'(iRegINoSp dst, vec$2 src1, vec$2 src2, vec$2 tmp,
|
||||
dnl $1 $2
|
||||
ALLTRUE_IN_MASK(8, D)
|
||||
ALLTRUE_IN_MASK(16, X)
|
||||
|
||||
// --------------------------------- ABS --------------------------------------
|
||||
dnl
|
||||
define(`VABS', `
|
||||
instruct vabs$3$4`'(vec$5 dst, vec$5 src)
|
||||
%{
|
||||
predicate(ifelse($3$4, 8B, n->as_Vector()->length() == 4 || )n->as_Vector()->length() == $3);
|
||||
match(Set dst (AbsV$4 src));
|
||||
ins_cost(ifelse($4, F, INSN_COST * 3, $4, D, INSN_COST * 3, INSN_COST));
|
||||
format %{ "$1 $dst, $src\t# vector ($3$6)" %}
|
||||
ins_encode %{
|
||||
__ $2(as_FloatRegister($dst$$reg), __ T$3$6, as_FloatRegister($src$$reg));
|
||||
%}
|
||||
ins_pipe(ifelse($4, F, vunop_fp$7, $4, D, vunop_fp$7, vlogical$7));
|
||||
%}')dnl
|
||||
dnl $1 $2 $3 $4 $5 $6 $7
|
||||
VABS(abs, absr, 8, B, D, B, 64)
|
||||
VABS(abs, absr, 16, B, X, B, 128)
|
||||
VABS(abs, absr, 4, S, D, H, 64)
|
||||
VABS(abs, absr, 8, S, X, H, 128)
|
||||
VABS(abs, absr, 2, I, D, S, 64)
|
||||
VABS(abs, absr, 4, I, X, S, 128)
|
||||
VABS(abs, absr, 2, L, X, D, 128)
|
||||
VABS(fabs, fabs, 2, F, D, S, 64)
|
||||
VABS(fabs, fabs, 4, F, X, S, 128)
|
||||
VABS(fabs, fabs, 2, D, X, D, 128)
|
||||
|
||||
// --------------------------------- FABS DIFF --------------------------------
|
||||
dnl
|
||||
define(`VFABD', `
|
||||
instruct vabd$3$4`'(vec$5 dst, vec$5 src1, vec$5 src2)
|
||||
%{
|
||||
predicate(n->as_Vector()->length() == $3);
|
||||
match(Set dst (AbsV$4 (SubV$4 src1 src2)));
|
||||
ins_cost(INSN_COST * 3);
|
||||
format %{ "$1 $dst, $src1, $src2\t# vector ($3$6)" %}
|
||||
ins_encode %{
|
||||
__ $2(as_FloatRegister($dst$$reg), __ T$3$6,
|
||||
as_FloatRegister($src1$$reg), as_FloatRegister($src2$$reg));
|
||||
%}
|
||||
ins_pipe(vunop_fp$7);
|
||||
%}')dnl
|
||||
dnl $1 $2 $3 $4 $5 $6 $7
|
||||
VFABD(fabd, fabd, 2, F, D, S, 64)
|
||||
VFABD(fabd, fabd, 4, F, X, S, 128)
|
||||
VFABD(fabd, fabd, 2, D, X, D, 128)
|
||||
dnl
|
||||
|
@ -510,16 +510,16 @@ void entry(CodeBuffer *cb) {
|
||||
__ umsubl(r15, r21, r28, r17); // umsubl x15, w21, w28, x17
|
||||
|
||||
// ThreeRegFloatOp
|
||||
__ fmuls(v27, v10, v3); // fmul s27, s10, s3
|
||||
__ fdivs(v0, v7, v25); // fdiv s0, s7, s25
|
||||
__ fadds(v9, v6, v15); // fadd s9, s6, s15
|
||||
__ fsubs(v29, v15, v10); // fsub s29, s15, s10
|
||||
__ fmuls(v2, v17, v7); // fmul s2, s17, s7
|
||||
__ fmuld(v11, v11, v23); // fmul d11, d11, d23
|
||||
__ fdivd(v7, v29, v23); // fdiv d7, d29, d23
|
||||
__ faddd(v14, v27, v11); // fadd d14, d27, d11
|
||||
__ fsubd(v11, v4, v24); // fsub d11, d4, d24
|
||||
__ fmuld(v12, v15, v14); // fmul d12, d15, d14
|
||||
__ fabds(v27, v10, v3); // fabd s27, s10, s3
|
||||
__ fmuls(v0, v7, v25); // fmul s0, s7, s25
|
||||
__ fdivs(v9, v6, v15); // fdiv s9, s6, s15
|
||||
__ fadds(v29, v15, v10); // fadd s29, s15, s10
|
||||
__ fsubs(v2, v17, v7); // fsub s2, s17, s7
|
||||
__ fabdd(v11, v11, v23); // fabd d11, d11, d23
|
||||
__ fmuld(v7, v29, v23); // fmul d7, d29, d23
|
||||
__ fdivd(v14, v27, v11); // fdiv d14, d27, d11
|
||||
__ faddd(v11, v4, v24); // fadd d11, d4, d24
|
||||
__ fsubd(v12, v15, v14); // fsub d12, d15, d14
|
||||
|
||||
// FourRegFloatOp
|
||||
__ fmadds(v20, v11, v28, v13); // fmadd s20, s11, s28, s13
|
||||
@ -686,74 +686,77 @@ void entry(CodeBuffer *cb) {
|
||||
__ mulv(v6, __ T8H, v7, v8); // mul v6.8H, v7.8H, v8.8H
|
||||
__ mulv(v3, __ T2S, v4, v5); // mul v3.2S, v4.2S, v5.2S
|
||||
__ mulv(v7, __ T4S, v8, v9); // mul v7.4S, v8.4S, v9.4S
|
||||
__ fmul(v24, __ T2S, v25, v26); // fmul v24.2S, v25.2S, v26.2S
|
||||
__ fmul(v0, __ T4S, v1, v2); // fmul v0.4S, v1.4S, v2.4S
|
||||
__ fmul(v27, __ T2D, v28, v29); // fmul v27.2D, v28.2D, v29.2D
|
||||
__ fabd(v24, __ T2S, v25, v26); // fabd v24.2S, v25.2S, v26.2S
|
||||
__ fabd(v0, __ T4S, v1, v2); // fabd v0.4S, v1.4S, v2.4S
|
||||
__ fabd(v27, __ T2D, v28, v29); // fabd v27.2D, v28.2D, v29.2D
|
||||
__ fmul(v29, __ T2S, v30, v31); // fmul v29.2S, v30.2S, v31.2S
|
||||
__ fmul(v5, __ T4S, v6, v7); // fmul v5.4S, v6.4S, v7.4S
|
||||
__ fmul(v5, __ T2D, v6, v7); // fmul v5.2D, v6.2D, v7.2D
|
||||
__ mlav(v29, __ T4H, v30, v31); // mla v29.4H, v30.4H, v31.4H
|
||||
__ mlav(v5, __ T8H, v6, v7); // mla v5.8H, v6.8H, v7.8H
|
||||
__ mlav(v5, __ T2S, v6, v7); // mla v5.2S, v6.2S, v7.2S
|
||||
__ mlav(v29, __ T4S, v30, v31); // mla v29.4S, v30.4S, v31.4S
|
||||
__ fmla(v11, __ T2S, v12, v13); // fmla v11.2S, v12.2S, v13.2S
|
||||
__ fmla(v25, __ T4S, v26, v27); // fmla v25.4S, v26.4S, v27.4S
|
||||
__ fmla(v0, __ T2D, v1, v2); // fmla v0.2D, v1.2D, v2.2D
|
||||
__ mlsv(v30, __ T4H, v31, v0); // mls v30.4H, v31.4H, v0.4H
|
||||
__ mlsv(v0, __ T8H, v1, v2); // mls v0.8H, v1.8H, v2.8H
|
||||
__ mlsv(v17, __ T2S, v18, v19); // mls v17.2S, v18.2S, v19.2S
|
||||
__ mlsv(v28, __ T4S, v29, v30); // mls v28.4S, v29.4S, v30.4S
|
||||
__ fmls(v25, __ T2S, v26, v27); // fmls v25.2S, v26.2S, v27.2S
|
||||
__ fmls(v9, __ T4S, v10, v11); // fmls v9.4S, v10.4S, v11.4S
|
||||
__ fmls(v25, __ T2D, v26, v27); // fmls v25.2D, v26.2D, v27.2D
|
||||
__ fdiv(v12, __ T2S, v13, v14); // fdiv v12.2S, v13.2S, v14.2S
|
||||
__ fdiv(v15, __ T4S, v16, v17); // fdiv v15.4S, v16.4S, v17.4S
|
||||
__ fdiv(v11, __ T2D, v12, v13); // fdiv v11.2D, v12.2D, v13.2D
|
||||
__ maxv(v10, __ T8B, v11, v12); // smax v10.8B, v11.8B, v12.8B
|
||||
__ maxv(v17, __ T16B, v18, v19); // smax v17.16B, v18.16B, v19.16B
|
||||
__ maxv(v24, __ T4H, v25, v26); // smax v24.4H, v25.4H, v26.4H
|
||||
__ maxv(v21, __ T8H, v22, v23); // smax v21.8H, v22.8H, v23.8H
|
||||
__ maxv(v23, __ T2S, v24, v25); // smax v23.2S, v24.2S, v25.2S
|
||||
__ maxv(v0, __ T4S, v1, v2); // smax v0.4S, v1.4S, v2.4S
|
||||
__ fmax(v16, __ T2S, v17, v18); // fmax v16.2S, v17.2S, v18.2S
|
||||
__ fmax(v10, __ T4S, v11, v12); // fmax v10.4S, v11.4S, v12.4S
|
||||
__ fmax(v6, __ T2D, v7, v8); // fmax v6.2D, v7.2D, v8.2D
|
||||
__ minv(v28, __ T8B, v29, v30); // smin v28.8B, v29.8B, v30.8B
|
||||
__ minv(v6, __ T16B, v7, v8); // smin v6.16B, v7.16B, v8.16B
|
||||
__ minv(v5, __ T4H, v6, v7); // smin v5.4H, v6.4H, v7.4H
|
||||
__ minv(v5, __ T8H, v6, v7); // smin v5.8H, v6.8H, v7.8H
|
||||
__ minv(v20, __ T2S, v21, v22); // smin v20.2S, v21.2S, v22.2S
|
||||
__ minv(v17, __ T4S, v18, v19); // smin v17.4S, v18.4S, v19.4S
|
||||
__ fmin(v15, __ T2S, v16, v17); // fmin v15.2S, v16.2S, v17.2S
|
||||
__ fmin(v17, __ T4S, v18, v19); // fmin v17.4S, v18.4S, v19.4S
|
||||
__ fmin(v29, __ T2D, v30, v31); // fmin v29.2D, v30.2D, v31.2D
|
||||
__ cmeq(v26, __ T8B, v27, v28); // cmeq v26.8B, v27.8B, v28.8B
|
||||
__ cmeq(v28, __ T16B, v29, v30); // cmeq v28.16B, v29.16B, v30.16B
|
||||
__ cmeq(v1, __ T4H, v2, v3); // cmeq v1.4H, v2.4H, v3.4H
|
||||
__ cmeq(v27, __ T8H, v28, v29); // cmeq v27.8H, v28.8H, v29.8H
|
||||
__ cmeq(v0, __ T2S, v1, v2); // cmeq v0.2S, v1.2S, v2.2S
|
||||
__ cmeq(v20, __ T4S, v21, v22); // cmeq v20.4S, v21.4S, v22.4S
|
||||
__ cmeq(v28, __ T2D, v29, v30); // cmeq v28.2D, v29.2D, v30.2D
|
||||
__ fcmeq(v15, __ T2S, v16, v17); // fcmeq v15.2S, v16.2S, v17.2S
|
||||
__ fcmeq(v12, __ T4S, v13, v14); // fcmeq v12.4S, v13.4S, v14.4S
|
||||
__ fcmeq(v10, __ T2D, v11, v12); // fcmeq v10.2D, v11.2D, v12.2D
|
||||
__ cmgt(v28, __ T8B, v29, v30); // cmgt v28.8B, v29.8B, v30.8B
|
||||
__ cmgt(v28, __ T16B, v29, v30); // cmgt v28.16B, v29.16B, v30.16B
|
||||
__ cmgt(v19, __ T4H, v20, v21); // cmgt v19.4H, v20.4H, v21.4H
|
||||
__ cmgt(v22, __ T8H, v23, v24); // cmgt v22.8H, v23.8H, v24.8H
|
||||
__ cmgt(v10, __ T2S, v11, v12); // cmgt v10.2S, v11.2S, v12.2S
|
||||
__ cmgt(v4, __ T4S, v5, v6); // cmgt v4.4S, v5.4S, v6.4S
|
||||
__ mlav(v11, __ T8H, v12, v13); // mla v11.8H, v12.8H, v13.8H
|
||||
__ mlav(v25, __ T2S, v26, v27); // mla v25.2S, v26.2S, v27.2S
|
||||
__ mlav(v0, __ T4S, v1, v2); // mla v0.4S, v1.4S, v2.4S
|
||||
__ fmla(v30, __ T2S, v31, v0); // fmla v30.2S, v31.2S, v0.2S
|
||||
__ fmla(v0, __ T4S, v1, v2); // fmla v0.4S, v1.4S, v2.4S
|
||||
__ fmla(v17, __ T2D, v18, v19); // fmla v17.2D, v18.2D, v19.2D
|
||||
__ mlsv(v28, __ T4H, v29, v30); // mls v28.4H, v29.4H, v30.4H
|
||||
__ mlsv(v25, __ T8H, v26, v27); // mls v25.8H, v26.8H, v27.8H
|
||||
__ mlsv(v9, __ T2S, v10, v11); // mls v9.2S, v10.2S, v11.2S
|
||||
__ mlsv(v25, __ T4S, v26, v27); // mls v25.4S, v26.4S, v27.4S
|
||||
__ fmls(v12, __ T2S, v13, v14); // fmls v12.2S, v13.2S, v14.2S
|
||||
__ fmls(v15, __ T4S, v16, v17); // fmls v15.4S, v16.4S, v17.4S
|
||||
__ fmls(v11, __ T2D, v12, v13); // fmls v11.2D, v12.2D, v13.2D
|
||||
__ fdiv(v10, __ T2S, v11, v12); // fdiv v10.2S, v11.2S, v12.2S
|
||||
__ fdiv(v17, __ T4S, v18, v19); // fdiv v17.4S, v18.4S, v19.4S
|
||||
__ fdiv(v24, __ T2D, v25, v26); // fdiv v24.2D, v25.2D, v26.2D
|
||||
__ maxv(v21, __ T8B, v22, v23); // smax v21.8B, v22.8B, v23.8B
|
||||
__ maxv(v23, __ T16B, v24, v25); // smax v23.16B, v24.16B, v25.16B
|
||||
__ maxv(v0, __ T4H, v1, v2); // smax v0.4H, v1.4H, v2.4H
|
||||
__ maxv(v16, __ T8H, v17, v18); // smax v16.8H, v17.8H, v18.8H
|
||||
__ maxv(v10, __ T2S, v11, v12); // smax v10.2S, v11.2S, v12.2S
|
||||
__ maxv(v6, __ T4S, v7, v8); // smax v6.4S, v7.4S, v8.4S
|
||||
__ fmax(v28, __ T2S, v29, v30); // fmax v28.2S, v29.2S, v30.2S
|
||||
__ fmax(v6, __ T4S, v7, v8); // fmax v6.4S, v7.4S, v8.4S
|
||||
__ fmax(v5, __ T2D, v6, v7); // fmax v5.2D, v6.2D, v7.2D
|
||||
__ minv(v5, __ T8B, v6, v7); // smin v5.8B, v6.8B, v7.8B
|
||||
__ minv(v20, __ T16B, v21, v22); // smin v20.16B, v21.16B, v22.16B
|
||||
__ minv(v17, __ T4H, v18, v19); // smin v17.4H, v18.4H, v19.4H
|
||||
__ minv(v15, __ T8H, v16, v17); // smin v15.8H, v16.8H, v17.8H
|
||||
__ minv(v17, __ T2S, v18, v19); // smin v17.2S, v18.2S, v19.2S
|
||||
__ minv(v29, __ T4S, v30, v31); // smin v29.4S, v30.4S, v31.4S
|
||||
__ fmin(v26, __ T2S, v27, v28); // fmin v26.2S, v27.2S, v28.2S
|
||||
__ fmin(v28, __ T4S, v29, v30); // fmin v28.4S, v29.4S, v30.4S
|
||||
__ fmin(v1, __ T2D, v2, v3); // fmin v1.2D, v2.2D, v3.2D
|
||||
__ cmeq(v27, __ T8B, v28, v29); // cmeq v27.8B, v28.8B, v29.8B
|
||||
__ cmeq(v0, __ T16B, v1, v2); // cmeq v0.16B, v1.16B, v2.16B
|
||||
__ cmeq(v20, __ T4H, v21, v22); // cmeq v20.4H, v21.4H, v22.4H
|
||||
__ cmeq(v28, __ T8H, v29, v30); // cmeq v28.8H, v29.8H, v30.8H
|
||||
__ cmeq(v15, __ T2S, v16, v17); // cmeq v15.2S, v16.2S, v17.2S
|
||||
__ cmeq(v12, __ T4S, v13, v14); // cmeq v12.4S, v13.4S, v14.4S
|
||||
__ cmeq(v10, __ T2D, v11, v12); // cmeq v10.2D, v11.2D, v12.2D
|
||||
__ fcmeq(v28, __ T2S, v29, v30); // fcmeq v28.2S, v29.2S, v30.2S
|
||||
__ fcmeq(v28, __ T4S, v29, v30); // fcmeq v28.4S, v29.4S, v30.4S
|
||||
__ fcmeq(v19, __ T2D, v20, v21); // fcmeq v19.2D, v20.2D, v21.2D
|
||||
__ cmgt(v22, __ T8B, v23, v24); // cmgt v22.8B, v23.8B, v24.8B
|
||||
__ cmgt(v10, __ T16B, v11, v12); // cmgt v10.16B, v11.16B, v12.16B
|
||||
__ cmgt(v4, __ T4H, v5, v6); // cmgt v4.4H, v5.4H, v6.4H
|
||||
__ cmgt(v30, __ T8H, v31, v0); // cmgt v30.8H, v31.8H, v0.8H
|
||||
__ cmgt(v20, __ T2S, v21, v22); // cmgt v20.2S, v21.2S, v22.2S
|
||||
__ cmgt(v8, __ T4S, v9, v10); // cmgt v8.4S, v9.4S, v10.4S
|
||||
__ cmgt(v30, __ T2D, v31, v0); // cmgt v30.2D, v31.2D, v0.2D
|
||||
__ fcmgt(v20, __ T2S, v21, v22); // fcmgt v20.2S, v21.2S, v22.2S
|
||||
__ fcmgt(v8, __ T4S, v9, v10); // fcmgt v8.4S, v9.4S, v10.4S
|
||||
__ fcmgt(v30, __ T2D, v31, v0); // fcmgt v30.2D, v31.2D, v0.2D
|
||||
__ cmge(v17, __ T8B, v18, v19); // cmge v17.8B, v18.8B, v19.8B
|
||||
__ cmge(v10, __ T16B, v11, v12); // cmge v10.16B, v11.16B, v12.16B
|
||||
__ cmge(v27, __ T4H, v28, v29); // cmge v27.4H, v28.4H, v29.4H
|
||||
__ cmge(v2, __ T8H, v3, v4); // cmge v2.8H, v3.8H, v4.8H
|
||||
__ cmge(v24, __ T2S, v25, v26); // cmge v24.2S, v25.2S, v26.2S
|
||||
__ cmge(v4, __ T4S, v5, v6); // cmge v4.4S, v5.4S, v6.4S
|
||||
__ cmge(v3, __ T2D, v4, v5); // cmge v3.2D, v4.2D, v5.2D
|
||||
__ fcmge(v8, __ T2S, v9, v10); // fcmge v8.2S, v9.2S, v10.2S
|
||||
__ fcmge(v22, __ T4S, v23, v24); // fcmge v22.4S, v23.4S, v24.4S
|
||||
__ fcmge(v17, __ T2D, v18, v19); // fcmge v17.2D, v18.2D, v19.2D
|
||||
__ fcmgt(v17, __ T2S, v18, v19); // fcmgt v17.2S, v18.2S, v19.2S
|
||||
__ fcmgt(v10, __ T4S, v11, v12); // fcmgt v10.4S, v11.4S, v12.4S
|
||||
__ fcmgt(v27, __ T2D, v28, v29); // fcmgt v27.2D, v28.2D, v29.2D
|
||||
__ cmge(v2, __ T8B, v3, v4); // cmge v2.8B, v3.8B, v4.8B
|
||||
__ cmge(v24, __ T16B, v25, v26); // cmge v24.16B, v25.16B, v26.16B
|
||||
__ cmge(v4, __ T4H, v5, v6); // cmge v4.4H, v5.4H, v6.4H
|
||||
__ cmge(v3, __ T8H, v4, v5); // cmge v3.8H, v4.8H, v5.8H
|
||||
__ cmge(v8, __ T2S, v9, v10); // cmge v8.2S, v9.2S, v10.2S
|
||||
__ cmge(v22, __ T4S, v23, v24); // cmge v22.4S, v23.4S, v24.4S
|
||||
__ cmge(v17, __ T2D, v18, v19); // cmge v17.2D, v18.2D, v19.2D
|
||||
__ fcmge(v13, __ T2S, v14, v15); // fcmge v13.2S, v14.2S, v15.2S
|
||||
__ fcmge(v4, __ T4S, v5, v6); // fcmge v4.4S, v5.4S, v6.4S
|
||||
__ fcmge(v28, __ T2D, v29, v30); // fcmge v28.2D, v29.2D, v30.2D
|
||||
|
||||
// SpecialCases
|
||||
__ ccmn(zr, zr, 3u, Assembler::LE); // ccmn xzr, xzr, #3, LE
|
||||
@ -840,155 +843,155 @@ void entry(CodeBuffer *cb) {
|
||||
__ fmovd(v0, -1.0625); // fmov d0, #-1.0625
|
||||
|
||||
// LSEOp
|
||||
__ swp(Assembler::xword, r13, r5, r29); // swp x13, x5, [x29]
|
||||
__ ldadd(Assembler::xword, r24, r21, r26); // ldadd x24, x21, [x26]
|
||||
__ ldbic(Assembler::xword, r24, r3, r24); // ldclr x24, x3, [x24]
|
||||
__ ldeor(Assembler::xword, r26, r23, r15); // ldeor x26, x23, [x15]
|
||||
__ ldorr(Assembler::xword, r21, r3, r24); // ldset x21, x3, [x24]
|
||||
__ ldsmin(Assembler::xword, r8, r25, r20); // ldsmin x8, x25, [x20]
|
||||
__ ldsmax(Assembler::xword, r16, r17, r2); // ldsmax x16, x17, [x2]
|
||||
__ ldumin(Assembler::xword, r1, r0, r24); // ldumin x1, x0, [x24]
|
||||
__ ldumax(Assembler::xword, r4, r3, r12); // ldumax x4, x3, [x12]
|
||||
__ swp(Assembler::xword, r24, r21, r26); // swp x24, x21, [x26]
|
||||
__ ldadd(Assembler::xword, r24, r3, r24); // ldadd x24, x3, [x24]
|
||||
__ ldbic(Assembler::xword, r26, r23, r15); // ldclr x26, x23, [x15]
|
||||
__ ldeor(Assembler::xword, r21, r3, r24); // ldeor x21, x3, [x24]
|
||||
__ ldorr(Assembler::xword, r8, r25, r20); // ldset x8, x25, [x20]
|
||||
__ ldsmin(Assembler::xword, r16, r17, r2); // ldsmin x16, x17, [x2]
|
||||
__ ldsmax(Assembler::xword, r1, r0, r24); // ldsmax x1, x0, [x24]
|
||||
__ ldumin(Assembler::xword, r4, r3, r12); // ldumin x4, x3, [x12]
|
||||
__ ldumax(Assembler::xword, zr, r28, r10); // ldumax xzr, x28, [x10]
|
||||
|
||||
// LSEOp
|
||||
__ swpa(Assembler::xword, zr, r28, r10); // swpa xzr, x28, [x10]
|
||||
__ ldadda(Assembler::xword, r26, r2, r12); // ldadda x26, x2, [x12]
|
||||
__ ldbica(Assembler::xword, r16, zr, r1); // ldclra x16, xzr, [x1]
|
||||
__ ldeora(Assembler::xword, r13, r29, r0); // ldeora x13, x29, [x0]
|
||||
__ ldorra(Assembler::xword, r19, r12, r17); // ldseta x19, x12, [x17]
|
||||
__ ldsmina(Assembler::xword, r22, r13, r28); // ldsmina x22, x13, [x28]
|
||||
__ ldsmaxa(Assembler::xword, r30, zr, r1); // ldsmaxa x30, xzr, [x1]
|
||||
__ ldumina(Assembler::xword, r26, r28, r4); // ldumina x26, x28, [x4]
|
||||
__ ldumaxa(Assembler::xword, r30, r4, r6); // ldumaxa x30, x4, [x6]
|
||||
__ swpa(Assembler::xword, r26, r2, r12); // swpa x26, x2, [x12]
|
||||
__ ldadda(Assembler::xword, r16, zr, r1); // ldadda x16, xzr, [x1]
|
||||
__ ldbica(Assembler::xword, r13, r29, r0); // ldclra x13, x29, [x0]
|
||||
__ ldeora(Assembler::xword, r19, r12, r17); // ldeora x19, x12, [x17]
|
||||
__ ldorra(Assembler::xword, r22, r13, r28); // ldseta x22, x13, [x28]
|
||||
__ ldsmina(Assembler::xword, r30, zr, r1); // ldsmina x30, xzr, [x1]
|
||||
__ ldsmaxa(Assembler::xword, r26, r28, r4); // ldsmaxa x26, x28, [x4]
|
||||
__ ldumina(Assembler::xword, r30, r4, r6); // ldumina x30, x4, [x6]
|
||||
__ ldumaxa(Assembler::xword, r30, r26, r15); // ldumaxa x30, x26, [x15]
|
||||
|
||||
// LSEOp
|
||||
__ swpal(Assembler::xword, r30, r26, r15); // swpal x30, x26, [x15]
|
||||
__ ldaddal(Assembler::xword, r9, r8, r12); // ldaddal x9, x8, [x12]
|
||||
__ ldbical(Assembler::xword, r0, r20, r1); // ldclral x0, x20, [x1]
|
||||
__ ldeoral(Assembler::xword, r24, r2, r0); // ldeoral x24, x2, [x0]
|
||||
__ ldorral(Assembler::xword, r9, r24, r26); // ldsetal x9, x24, [x26]
|
||||
__ ldsminal(Assembler::xword, r16, r30, r3); // ldsminal x16, x30, [x3]
|
||||
__ ldsmaxal(Assembler::xword, r10, r23, r10); // ldsmaxal x10, x23, [x10]
|
||||
__ lduminal(Assembler::xword, r4, r16, r2); // lduminal x4, x16, [x2]
|
||||
__ ldumaxal(Assembler::xword, r11, r8, r10); // ldumaxal x11, x8, [x10]
|
||||
__ swpal(Assembler::xword, r9, r8, r12); // swpal x9, x8, [x12]
|
||||
__ ldaddal(Assembler::xword, r0, r20, r1); // ldaddal x0, x20, [x1]
|
||||
__ ldbical(Assembler::xword, r24, r2, r0); // ldclral x24, x2, [x0]
|
||||
__ ldeoral(Assembler::xword, r9, r24, r26); // ldeoral x9, x24, [x26]
|
||||
__ ldorral(Assembler::xword, r16, r30, r3); // ldsetal x16, x30, [x3]
|
||||
__ ldsminal(Assembler::xword, r10, r23, r10); // ldsminal x10, x23, [x10]
|
||||
__ ldsmaxal(Assembler::xword, r4, r16, r2); // ldsmaxal x4, x16, [x2]
|
||||
__ lduminal(Assembler::xword, r11, r8, r10); // lduminal x11, x8, [x10]
|
||||
__ ldumaxal(Assembler::xword, r15, r17, r2); // ldumaxal x15, x17, [x2]
|
||||
|
||||
// LSEOp
|
||||
__ swpl(Assembler::xword, r15, r17, r2); // swpl x15, x17, [x2]
|
||||
__ ldaddl(Assembler::xword, r10, r12, r12); // ldaddl x10, x12, [x12]
|
||||
__ ldbicl(Assembler::xword, r15, r13, r2); // ldclrl x15, x13, [x2]
|
||||
__ ldeorl(Assembler::xword, r7, r20, r26); // ldeorl x7, x20, [x26]
|
||||
__ ldorrl(Assembler::xword, r16, r4, r2); // ldsetl x16, x4, [x2]
|
||||
__ ldsminl(Assembler::xword, r4, r12, r15); // ldsminl x4, x12, [x15]
|
||||
__ ldsmaxl(Assembler::xword, r21, r16, r15); // ldsmaxl x21, x16, [x15]
|
||||
__ lduminl(Assembler::xword, r11, r21, r23); // lduminl x11, x21, [x23]
|
||||
__ ldumaxl(Assembler::xword, r12, r26, r23); // ldumaxl x12, x26, [x23]
|
||||
__ swpl(Assembler::xword, r10, r12, r12); // swpl x10, x12, [x12]
|
||||
__ ldaddl(Assembler::xword, r15, r13, r2); // ldaddl x15, x13, [x2]
|
||||
__ ldbicl(Assembler::xword, r7, r20, r26); // ldclrl x7, x20, [x26]
|
||||
__ ldeorl(Assembler::xword, r16, r4, r2); // ldeorl x16, x4, [x2]
|
||||
__ ldorrl(Assembler::xword, r4, r12, r15); // ldsetl x4, x12, [x15]
|
||||
__ ldsminl(Assembler::xword, r21, r16, r15); // ldsminl x21, x16, [x15]
|
||||
__ ldsmaxl(Assembler::xword, r11, r21, r23); // ldsmaxl x11, x21, [x23]
|
||||
__ lduminl(Assembler::xword, r12, r26, r23); // lduminl x12, x26, [x23]
|
||||
__ ldumaxl(Assembler::xword, r28, r14, r11); // ldumaxl x28, x14, [x11]
|
||||
|
||||
// LSEOp
|
||||
__ swp(Assembler::word, r28, r14, r11); // swp w28, w14, [x11]
|
||||
__ ldadd(Assembler::word, r24, r1, r12); // ldadd w24, w1, [x12]
|
||||
__ ldbic(Assembler::word, zr, r10, r16); // ldclr wzr, w10, [x16]
|
||||
__ ldeor(Assembler::word, r7, r2, r3); // ldeor w7, w2, [x3]
|
||||
__ ldorr(Assembler::word, r13, r19, r17); // ldset w13, w19, [x17]
|
||||
__ ldsmin(Assembler::word, r16, r3, r1); // ldsmin w16, w3, [x1]
|
||||
__ ldsmax(Assembler::word, r11, r30, r5); // ldsmax w11, w30, [x5]
|
||||
__ ldumin(Assembler::word, r8, r15, r29); // ldumin w8, w15, [x29]
|
||||
__ ldumax(Assembler::word, r30, r0, r20); // ldumax w30, w0, [x20]
|
||||
__ swp(Assembler::word, r24, r1, r12); // swp w24, w1, [x12]
|
||||
__ ldadd(Assembler::word, zr, r10, r16); // ldadd wzr, w10, [x16]
|
||||
__ ldbic(Assembler::word, r7, r2, r3); // ldclr w7, w2, [x3]
|
||||
__ ldeor(Assembler::word, r13, r19, r17); // ldeor w13, w19, [x17]
|
||||
__ ldorr(Assembler::word, r16, r3, r1); // ldset w16, w3, [x1]
|
||||
__ ldsmin(Assembler::word, r11, r30, r5); // ldsmin w11, w30, [x5]
|
||||
__ ldsmax(Assembler::word, r8, r15, r29); // ldsmax w8, w15, [x29]
|
||||
__ ldumin(Assembler::word, r30, r0, r20); // ldumin w30, w0, [x20]
|
||||
__ ldumax(Assembler::word, r7, r20, r23); // ldumax w7, w20, [x23]
|
||||
|
||||
// LSEOp
|
||||
__ swpa(Assembler::word, r7, r20, r23); // swpa w7, w20, [x23]
|
||||
__ ldadda(Assembler::word, r28, r21, r27); // ldadda w28, w21, [x27]
|
||||
__ ldbica(Assembler::word, r25, r5, r1); // ldclra w25, w5, [x1]
|
||||
__ ldeora(Assembler::word, r23, r16, sp); // ldeora w23, w16, [sp]
|
||||
__ ldorra(Assembler::word, r5, r12, r9); // ldseta w5, w12, [x9]
|
||||
__ ldsmina(Assembler::word, r28, r15, r29); // ldsmina w28, w15, [x29]
|
||||
__ ldsmaxa(Assembler::word, r22, zr, r19); // ldsmaxa w22, wzr, [x19]
|
||||
__ ldumina(Assembler::word, zr, r5, r14); // ldumina wzr, w5, [x14]
|
||||
__ ldumaxa(Assembler::word, r16, zr, r15); // ldumaxa w16, wzr, [x15]
|
||||
__ swpa(Assembler::word, r28, r21, r27); // swpa w28, w21, [x27]
|
||||
__ ldadda(Assembler::word, r25, r5, r1); // ldadda w25, w5, [x1]
|
||||
__ ldbica(Assembler::word, r23, r16, sp); // ldclra w23, w16, [sp]
|
||||
__ ldeora(Assembler::word, r5, r12, r9); // ldeora w5, w12, [x9]
|
||||
__ ldorra(Assembler::word, r28, r15, r29); // ldseta w28, w15, [x29]
|
||||
__ ldsmina(Assembler::word, r22, zr, r19); // ldsmina w22, wzr, [x19]
|
||||
__ ldsmaxa(Assembler::word, zr, r5, r14); // ldsmaxa wzr, w5, [x14]
|
||||
__ ldumina(Assembler::word, r16, zr, r15); // ldumina w16, wzr, [x15]
|
||||
__ ldumaxa(Assembler::word, r27, r20, r16); // ldumaxa w27, w20, [x16]
|
||||
|
||||
// LSEOp
|
||||
__ swpal(Assembler::word, r27, r20, r16); // swpal w27, w20, [x16]
|
||||
__ ldaddal(Assembler::word, r12, r11, r9); // ldaddal w12, w11, [x9]
|
||||
__ ldbical(Assembler::word, r6, r30, r17); // ldclral w6, w30, [x17]
|
||||
__ ldeoral(Assembler::word, r27, r28, r30); // ldeoral w27, w28, [x30]
|
||||
__ ldorral(Assembler::word, r7, r10, r20); // ldsetal w7, w10, [x20]
|
||||
__ ldsminal(Assembler::word, r10, r4, r24); // ldsminal w10, w4, [x24]
|
||||
__ ldsmaxal(Assembler::word, r17, r17, r22); // ldsmaxal w17, w17, [x22]
|
||||
__ lduminal(Assembler::word, r3, r29, r15); // lduminal w3, w29, [x15]
|
||||
__ ldumaxal(Assembler::word, r22, r19, r19); // ldumaxal w22, w19, [x19]
|
||||
__ swpal(Assembler::word, r12, r11, r9); // swpal w12, w11, [x9]
|
||||
__ ldaddal(Assembler::word, r6, r30, r17); // ldaddal w6, w30, [x17]
|
||||
__ ldbical(Assembler::word, r27, r28, r30); // ldclral w27, w28, [x30]
|
||||
__ ldeoral(Assembler::word, r7, r10, r20); // ldeoral w7, w10, [x20]
|
||||
__ ldorral(Assembler::word, r10, r4, r24); // ldsetal w10, w4, [x24]
|
||||
__ ldsminal(Assembler::word, r17, r17, r22); // ldsminal w17, w17, [x22]
|
||||
__ ldsmaxal(Assembler::word, r3, r29, r15); // ldsmaxal w3, w29, [x15]
|
||||
__ lduminal(Assembler::word, r22, r19, r19); // lduminal w22, w19, [x19]
|
||||
__ ldumaxal(Assembler::word, r22, r2, r15); // ldumaxal w22, w2, [x15]
|
||||
|
||||
// LSEOp
|
||||
__ swpl(Assembler::word, r22, r2, r15); // swpl w22, w2, [x15]
|
||||
__ ldaddl(Assembler::word, r6, r12, r16); // ldaddl w6, w12, [x16]
|
||||
__ ldbicl(Assembler::word, r11, r13, r23); // ldclrl w11, w13, [x23]
|
||||
__ ldeorl(Assembler::word, r1, r30, r19); // ldeorl w1, w30, [x19]
|
||||
__ ldorrl(Assembler::word, r5, r17, r2); // ldsetl w5, w17, [x2]
|
||||
__ ldsminl(Assembler::word, r16, r22, r13); // ldsminl w16, w22, [x13]
|
||||
__ ldsmaxl(Assembler::word, r10, r21, r29); // ldsmaxl w10, w21, [x29]
|
||||
__ lduminl(Assembler::word, r27, r12, r27); // lduminl w27, w12, [x27]
|
||||
__ ldumaxl(Assembler::word, r3, r1, sp); // ldumaxl w3, w1, [sp]
|
||||
__ swpl(Assembler::word, r6, r12, r16); // swpl w6, w12, [x16]
|
||||
__ ldaddl(Assembler::word, r11, r13, r23); // ldaddl w11, w13, [x23]
|
||||
__ ldbicl(Assembler::word, r1, r30, r19); // ldclrl w1, w30, [x19]
|
||||
__ ldeorl(Assembler::word, r5, r17, r2); // ldeorl w5, w17, [x2]
|
||||
__ ldorrl(Assembler::word, r16, r22, r13); // ldsetl w16, w22, [x13]
|
||||
__ ldsminl(Assembler::word, r10, r21, r29); // ldsminl w10, w21, [x29]
|
||||
__ ldsmaxl(Assembler::word, r27, r12, r27); // ldsmaxl w27, w12, [x27]
|
||||
__ lduminl(Assembler::word, r3, r1, sp); // lduminl w3, w1, [sp]
|
||||
__ ldumaxl(Assembler::word, r24, r19, r17); // ldumaxl w24, w19, [x17]
|
||||
|
||||
// SHA3SIMDOp
|
||||
__ bcax(v23, __ T16B, v19, v17, v9); // bcax v23.16B, v19.16B, v17.16B, v9.16B
|
||||
__ eor3(v27, __ T16B, v26, v14, v6); // eor3 v27.16B, v26.16B, v14.16B, v6.16B
|
||||
__ rax1(v20, __ T2D, v22, v30); // rax1 v20.2D, v22.2D, v30.2D
|
||||
__ xar(v24, __ T2D, v2, v30, 54); // xar v24.2D, v2.2D, v30.2D, #54
|
||||
__ bcax(v9, __ T16B, v27, v26, v14); // bcax v9.16B, v27.16B, v26.16B, v14.16B
|
||||
__ eor3(v6, __ T16B, v20, v22, v30); // eor3 v6.16B, v20.16B, v22.16B, v30.16B
|
||||
__ rax1(v24, __ T2D, v2, v30); // rax1 v24.2D, v2.2D, v30.2D
|
||||
__ xar(v26, __ T2D, v17, v10, 46); // xar v26.2D, v17.2D, v10.2D, #46
|
||||
|
||||
// SHA512SIMDOp
|
||||
__ sha512h(v17, __ T2D, v10, v22); // sha512h q17, q10, v22.2D
|
||||
__ sha512h2(v17, __ T2D, v2, v17); // sha512h2 q17, q2, v17.2D
|
||||
__ sha512su0(v0, __ T2D, v24); // sha512su0 v0.2D, v24.2D
|
||||
__ sha512su1(v25, __ T2D, v22, v2); // sha512su1 v25.2D, v22.2D, v2.2D
|
||||
__ sha512h(v17, __ T2D, v2, v17); // sha512h q17, q2, v17.2D
|
||||
__ sha512h2(v0, __ T2D, v24, v25); // sha512h2 q0, q24, v25.2D
|
||||
__ sha512su0(v22, __ T2D, v2); // sha512su0 v22.2D, v2.2D
|
||||
__ sha512su1(v17, __ T2D, v12, v3); // sha512su1 v17.2D, v12.2D, v3.2D
|
||||
|
||||
// SVEVectorOp
|
||||
__ sve_add(z17, __ D, z12, z3); // add z17.d, z12.d, z3.d
|
||||
__ sve_sub(z29, __ D, z28, z16); // sub z29.d, z28.d, z16.d
|
||||
__ sve_fadd(z6, __ D, z9, z28); // fadd z6.d, z9.d, z28.d
|
||||
__ sve_fmul(z7, __ S, z4, z7); // fmul z7.s, z4.s, z7.s
|
||||
__ sve_fsub(z9, __ S, z22, z8); // fsub z9.s, z22.s, z8.s
|
||||
__ sve_abs(z27, __ B, p5, z30); // abs z27.b, p5/m, z30.b
|
||||
__ sve_add(z26, __ H, p0, z16); // add z26.h, p0/m, z26.h, z16.h
|
||||
__ sve_asr(z3, __ D, p6, z8); // asr z3.d, p6/m, z3.d, z8.d
|
||||
__ sve_cnt(z21, __ D, p6, z26); // cnt z21.d, p6/m, z26.d
|
||||
__ sve_lsl(z22, __ B, p0, z4); // lsl z22.b, p0/m, z22.b, z4.b
|
||||
__ sve_lsr(z17, __ H, p0, z3); // lsr z17.h, p0/m, z17.h, z3.h
|
||||
__ sve_mul(z1, __ B, p2, z6); // mul z1.b, p2/m, z1.b, z6.b
|
||||
__ sve_neg(z9, __ S, p7, z7); // neg z9.s, p7/m, z7.s
|
||||
__ sve_not(z22, __ H, p5, z5); // not z22.h, p5/m, z5.h
|
||||
__ sve_smax(z8, __ B, p4, z30); // smax z8.b, p4/m, z8.b, z30.b
|
||||
__ sve_smin(z17, __ D, p0, z11); // smin z17.d, p0/m, z17.d, z11.d
|
||||
__ sve_sub(z28, __ S, p0, z26); // sub z28.s, p0/m, z28.s, z26.s
|
||||
__ sve_fabs(z28, __ D, p3, z13); // fabs z28.d, p3/m, z13.d
|
||||
__ sve_fadd(z16, __ S, p6, z5); // fadd z16.s, p6/m, z16.s, z5.s
|
||||
__ sve_fdiv(z13, __ S, p2, z15); // fdiv z13.s, p2/m, z13.s, z15.s
|
||||
__ sve_fmax(z26, __ S, p5, z11); // fmax z26.s, p5/m, z26.s, z11.s
|
||||
__ sve_fmin(z22, __ S, p4, z4); // fmin z22.s, p4/m, z22.s, z4.s
|
||||
__ sve_fmul(z19, __ S, p4, z17); // fmul z19.s, p4/m, z19.s, z17.s
|
||||
__ sve_fneg(z14, __ D, p3, z2); // fneg z14.d, p3/m, z2.d
|
||||
__ sve_frintm(z3, __ S, p5, z23); // frintm z3.s, p5/m, z23.s
|
||||
__ sve_frintn(z6, __ S, p1, z17); // frintn z6.s, p1/m, z17.s
|
||||
__ sve_frintp(z27, __ S, p4, z16); // frintp z27.s, p4/m, z16.s
|
||||
__ sve_fsqrt(z2, __ S, p7, z3); // fsqrt z2.s, p7/m, z3.s
|
||||
__ sve_fsub(z6, __ S, p4, z19); // fsub z6.s, p4/m, z6.s, z19.s
|
||||
__ sve_fmla(z12, __ D, p5, z8, z24); // fmla z12.d, p5/m, z8.d, z24.d
|
||||
__ sve_fmls(z17, __ S, p0, z10, z23); // fmls z17.s, p0/m, z10.s, z23.s
|
||||
__ sve_fnmla(z19, __ S, p7, z13, z16); // fnmla z19.s, p7/m, z13.s, z16.s
|
||||
__ sve_fnmls(z0, __ D, p1, z14, z17); // fnmls z0.d, p1/m, z14.d, z17.d
|
||||
__ sve_mla(z8, __ S, p2, z22, z20); // mla z8.s, p2/m, z22.s, z20.s
|
||||
__ sve_mls(z27, __ S, p0, z3, z15); // mls z27.s, p0/m, z3.s, z15.s
|
||||
__ sve_and(z20, z7, z4); // and z20.d, z7.d, z4.d
|
||||
__ sve_eor(z7, z0, z8); // eor z7.d, z0.d, z8.d
|
||||
__ sve_orr(z19, z22, z4); // orr z19.d, z22.d, z4.d
|
||||
__ sve_add(z27, __ S, z29, z28); // add z27.s, z29.s, z28.s
|
||||
__ sve_sub(z26, __ D, z6, z9); // sub z26.d, z6.d, z9.d
|
||||
__ sve_fadd(z17, __ S, z7, z4); // fadd z17.s, z7.s, z4.s
|
||||
__ sve_fmul(z15, __ S, z9, z22); // fmul z15.s, z9.s, z22.s
|
||||
__ sve_fsub(z2, __ D, z27, z20); // fsub z2.d, z27.d, z20.d
|
||||
__ sve_abs(z5, __ S, p6, z0); // abs z5.s, p6/m, z0.s
|
||||
__ sve_add(z14, __ H, p1, z25); // add z14.h, p1/m, z14.h, z25.h
|
||||
__ sve_asr(z27, __ D, p5, z26); // asr z27.d, p5/m, z27.d, z26.d
|
||||
__ sve_cnt(z24, __ B, p5, z0); // cnt z24.b, p5/m, z0.b
|
||||
__ sve_lsl(z6, __ B, p4, z0); // lsl z6.b, p4/m, z6.b, z0.b
|
||||
__ sve_lsr(z15, __ B, p0, z9); // lsr z15.b, p0/m, z15.b, z9.b
|
||||
__ sve_mul(z5, __ B, p2, z27); // mul z5.b, p2/m, z5.b, z27.b
|
||||
__ sve_neg(z20, __ B, p5, z20); // neg z20.b, p5/m, z20.b
|
||||
__ sve_not(z10, __ D, p2, z16); // not z10.d, p2/m, z16.d
|
||||
__ sve_smax(z6, __ H, p4, z2); // smax z6.h, p4/m, z6.h, z2.h
|
||||
__ sve_smin(z29, __ D, p7, z2); // smin z29.d, p7/m, z29.d, z2.d
|
||||
__ sve_sub(z22, __ H, p7, z14); // sub z22.h, p7/m, z22.h, z14.h
|
||||
__ sve_fabs(z27, __ S, p4, z23); // fabs z27.s, p4/m, z23.s
|
||||
__ sve_fadd(z2, __ D, p3, z10); // fadd z2.d, p3/m, z2.d, z10.d
|
||||
__ sve_fdiv(z10, __ S, p6, z22); // fdiv z10.s, p6/m, z10.s, z22.s
|
||||
__ sve_fmax(z3, __ S, p5, z16); // fmax z3.s, p5/m, z3.s, z16.s
|
||||
__ sve_fmin(z1, __ D, p4, z16); // fmin z1.d, p4/m, z1.d, z16.d
|
||||
__ sve_fmul(z12, __ S, p3, z12); // fmul z12.s, p3/m, z12.s, z12.s
|
||||
__ sve_fneg(z16, __ D, p0, z20); // fneg z16.d, p0/m, z20.d
|
||||
__ sve_frintm(z5, __ D, p1, z7); // frintm z5.d, p1/m, z7.d
|
||||
__ sve_frintn(z12, __ D, p7, z16); // frintn z12.d, p7/m, z16.d
|
||||
__ sve_frintp(z6, __ S, p0, z28); // frintp z6.s, p0/m, z28.s
|
||||
__ sve_fsqrt(z4, __ D, p1, z17); // fsqrt z4.d, p1/m, z17.d
|
||||
__ sve_fsub(z13, __ S, p3, z19); // fsub z13.s, p3/m, z13.s, z19.s
|
||||
__ sve_fmla(z24, __ S, p5, z17, z0); // fmla z24.s, p5/m, z17.s, z0.s
|
||||
__ sve_fmls(z23, __ S, p1, z19, z30); // fmls z23.s, p1/m, z19.s, z30.s
|
||||
__ sve_fnmla(z16, __ S, p1, z0, z7); // fnmla z16.s, p1/m, z0.s, z7.s
|
||||
__ sve_fnmls(z17, __ D, p6, z8, z10); // fnmls z17.d, p6/m, z8.d, z10.d
|
||||
__ sve_mla(z20, __ B, p5, z27, z2); // mla z20.b, p5/m, z27.b, z2.b
|
||||
__ sve_mls(z15, __ B, p4, z20, z7); // mls z15.b, p4/m, z20.b, z7.b
|
||||
__ sve_and(z28, z7, z0); // and z28.d, z7.d, z0.d
|
||||
__ sve_eor(z16, z19, z22); // eor z16.d, z19.d, z22.d
|
||||
__ sve_orr(z15, z9, z22); // orr z15.d, z9.d, z22.d
|
||||
|
||||
// SVEReductionOp
|
||||
__ sve_andv(v9, __ D, p5, z11); // andv d9, p5, z11.d
|
||||
__ sve_orv(v5, __ H, p7, z16); // orv h5, p7, z16.h
|
||||
__ sve_eorv(v22, __ H, p3, z1); // eorv h22, p3, z1.h
|
||||
__ sve_smaxv(v8, __ D, p5, z16); // smaxv d8, p5, z16.d
|
||||
__ sve_sminv(v15, __ S, p1, z4); // sminv s15, p1, z4.s
|
||||
__ sve_fminv(v8, __ S, p1, z29); // fminv s8, p1, z29.s
|
||||
__ sve_fmaxv(v28, __ D, p4, z29); // fmaxv d28, p4, z29.d
|
||||
__ sve_fadda(v9, __ S, p3, z2); // fadda s9, p3, s9, z2.s
|
||||
__ sve_uaddv(v28, __ B, p0, z7); // uaddv d28, p0, z7.b
|
||||
__ sve_andv(v25, __ S, p1, z30); // andv s25, p1, z30.s
|
||||
__ sve_orv(v13, __ B, p5, z11); // orv b13, p5, z11.b
|
||||
__ sve_eorv(v13, __ S, p2, z20); // eorv s13, p2, z20.s
|
||||
__ sve_smaxv(v25, __ B, p3, z4); // smaxv b25, p3, z4.b
|
||||
__ sve_sminv(v17, __ D, p2, z6); // sminv d17, p2, z6.d
|
||||
__ sve_fminv(v4, __ D, p7, z16); // fminv d4, p7, z16.d
|
||||
__ sve_fmaxv(v26, __ S, p2, z14); // fmaxv s26, p2, z14.s
|
||||
__ sve_fadda(v11, __ S, p7, z3); // fadda s11, p7, s11, z3.s
|
||||
__ sve_uaddv(v1, __ S, p6, z21); // uaddv d1, p6, z21.s
|
||||
|
||||
__ bind(forth);
|
||||
|
||||
@ -1007,30 +1010,30 @@ void entry(CodeBuffer *cb) {
|
||||
0x9101a1a0, 0xb10a5cc8, 0xd10810aa, 0xf10fd061,
|
||||
0x120cb166, 0x321764bc, 0x52174681, 0x720c0227,
|
||||
0x9241018e, 0xb25a2969, 0xd278b411, 0xf26aad01,
|
||||
0x14000000, 0x17ffffd7, 0x140002cd, 0x94000000,
|
||||
0x97ffffd4, 0x940002ca, 0x3400000a, 0x34fffa2a,
|
||||
0x340058ea, 0x35000008, 0x35fff9c8, 0x35005888,
|
||||
0xb400000b, 0xb4fff96b, 0xb400582b, 0xb500001d,
|
||||
0xb5fff91d, 0xb50057dd, 0x10000013, 0x10fff8b3,
|
||||
0x10005773, 0x90000013, 0x36300016, 0x3637f836,
|
||||
0x363056f6, 0x3758000c, 0x375ff7cc, 0x3758568c,
|
||||
0x14000000, 0x17ffffd7, 0x140002d0, 0x94000000,
|
||||
0x97ffffd4, 0x940002cd, 0x3400000a, 0x34fffa2a,
|
||||
0x3400594a, 0x35000008, 0x35fff9c8, 0x350058e8,
|
||||
0xb400000b, 0xb4fff96b, 0xb400588b, 0xb500001d,
|
||||
0xb5fff91d, 0xb500583d, 0x10000013, 0x10fff8b3,
|
||||
0x100057d3, 0x90000013, 0x36300016, 0x3637f836,
|
||||
0x36305756, 0x3758000c, 0x375ff7cc, 0x375856ec,
|
||||
0x128313a0, 0x528a32c7, 0x7289173b, 0x92ab3acc,
|
||||
0xd2a0bf94, 0xf2c285e8, 0x9358722f, 0x330e652f,
|
||||
0x53067f3b, 0x93577c53, 0xb34a1aac, 0xd35a4016,
|
||||
0x13946c63, 0x93c3dbc8, 0x54000000, 0x54fff5a0,
|
||||
0x54005460, 0x54000001, 0x54fff541, 0x54005401,
|
||||
0x54000002, 0x54fff4e2, 0x540053a2, 0x54000002,
|
||||
0x54fff482, 0x54005342, 0x54000003, 0x54fff423,
|
||||
0x540052e3, 0x54000003, 0x54fff3c3, 0x54005283,
|
||||
0x54000004, 0x54fff364, 0x54005224, 0x54000005,
|
||||
0x54fff305, 0x540051c5, 0x54000006, 0x54fff2a6,
|
||||
0x54005166, 0x54000007, 0x54fff247, 0x54005107,
|
||||
0x54000008, 0x54fff1e8, 0x540050a8, 0x54000009,
|
||||
0x54fff189, 0x54005049, 0x5400000a, 0x54fff12a,
|
||||
0x54004fea, 0x5400000b, 0x54fff0cb, 0x54004f8b,
|
||||
0x5400000c, 0x54fff06c, 0x54004f2c, 0x5400000d,
|
||||
0x54fff00d, 0x54004ecd, 0x5400000e, 0x54ffefae,
|
||||
0x54004e6e, 0x5400000f, 0x54ffef4f, 0x54004e0f,
|
||||
0x540054c0, 0x54000001, 0x54fff541, 0x54005461,
|
||||
0x54000002, 0x54fff4e2, 0x54005402, 0x54000002,
|
||||
0x54fff482, 0x540053a2, 0x54000003, 0x54fff423,
|
||||
0x54005343, 0x54000003, 0x54fff3c3, 0x540052e3,
|
||||
0x54000004, 0x54fff364, 0x54005284, 0x54000005,
|
||||
0x54fff305, 0x54005225, 0x54000006, 0x54fff2a6,
|
||||
0x540051c6, 0x54000007, 0x54fff247, 0x54005167,
|
||||
0x54000008, 0x54fff1e8, 0x54005108, 0x54000009,
|
||||
0x54fff189, 0x540050a9, 0x5400000a, 0x54fff12a,
|
||||
0x5400504a, 0x5400000b, 0x54fff0cb, 0x54004feb,
|
||||
0x5400000c, 0x54fff06c, 0x54004f8c, 0x5400000d,
|
||||
0x54fff00d, 0x54004f2d, 0x5400000e, 0x54ffefae,
|
||||
0x54004ece, 0x5400000f, 0x54ffef4f, 0x54004e6f,
|
||||
0xd40658e1, 0xd4014d22, 0xd4046543, 0xd4273f60,
|
||||
0xd44cad80, 0xd503201f, 0xd69f03e0, 0xd6bf03e0,
|
||||
0xd5033fdf, 0xd5033e9f, 0xd50332bf, 0xd61f0200,
|
||||
@ -1062,7 +1065,7 @@ void entry(CodeBuffer *cb) {
|
||||
0x791f226d, 0xf95aa2f3, 0xb9587bb7, 0x395f7176,
|
||||
0x795d9143, 0x399e7e08, 0x799a2697, 0x79df3422,
|
||||
0xb99c2624, 0xfd5c2374, 0xbd5fa1d9, 0xfd1d595a,
|
||||
0xbd1b1869, 0x58003e5b, 0x1800000b, 0xf8945060,
|
||||
0xbd1b1869, 0x58003ebb, 0x1800000b, 0xf8945060,
|
||||
0xd8000000, 0xf8ae6ba0, 0xf99a0080, 0x1a070035,
|
||||
0x3a0700a8, 0x5a0e0367, 0x7a11009b, 0x9a000380,
|
||||
0xba1e030c, 0xda0f0320, 0xfa030301, 0x0b340b11,
|
||||
@ -1080,9 +1083,9 @@ void entry(CodeBuffer *cb) {
|
||||
0x9adc2a3b, 0x9ad12c5c, 0x9bce7dea, 0x9b597c6e,
|
||||
0x1b0e166f, 0x1b1ae490, 0x9b023044, 0x9b089e3d,
|
||||
0x9b391083, 0x9b24c73a, 0x9bb15f40, 0x9bbcc6af,
|
||||
0x1e23095b, 0x1e3918e0, 0x1e2f28c9, 0x1e2a39fd,
|
||||
0x1e270a22, 0x1e77096b, 0x1e771ba7, 0x1e6b2b6e,
|
||||
0x1e78388b, 0x1e6e09ec, 0x1f1c3574, 0x1f17f98b,
|
||||
0x7ea3d55b, 0x1e3908e0, 0x1e2f18c9, 0x1e2a29fd,
|
||||
0x1e273a22, 0x7ef7d56b, 0x1e770ba7, 0x1e6b1b6e,
|
||||
0x1e78288b, 0x1e6e39ec, 0x1f1c3574, 0x1f17f98b,
|
||||
0x1f2935da, 0x1f2574ea, 0x1f4b306f, 0x1f5ec7cf,
|
||||
0x1f6f3e93, 0x1f6226a9, 0x1e2040fb, 0x1e20c3dd,
|
||||
0x1e214031, 0x1e21c0c2, 0x1e22c06a, 0x1e604178,
|
||||
@ -1118,75 +1121,76 @@ void entry(CodeBuffer *cb) {
|
||||
0x2eac856a, 0x6eaf85cd, 0x6ef085ee, 0x0eb6d6b4,
|
||||
0x4ea3d441, 0x4ef8d6f6, 0x0e209ffe, 0x4e309dee,
|
||||
0x0e649c62, 0x4e689ce6, 0x0ea59c83, 0x4ea99d07,
|
||||
0x2e3adf38, 0x6e22dc20, 0x6e7ddf9b, 0x0e7f97dd,
|
||||
0x4e6794c5, 0x0ea794c5, 0x4ebf97dd, 0x0e2dcd8b,
|
||||
0x4e3bcf59, 0x4e62cc20, 0x2e6097fe, 0x6e629420,
|
||||
0x2eb39651, 0x6ebe97bc, 0x0ebbcf59, 0x4eabcd49,
|
||||
0x4efbcf59, 0x2e2efdac, 0x6e31fe0f, 0x6e6dfd8b,
|
||||
0x0e2c656a, 0x4e336651, 0x0e7a6738, 0x4e7766d5,
|
||||
0x0eb96717, 0x4ea26420, 0x0e32f630, 0x4e2cf56a,
|
||||
0x4e68f4e6, 0x0e3e6fbc, 0x4e286ce6, 0x0e676cc5,
|
||||
0x4e676cc5, 0x0eb66eb4, 0x4eb36e51, 0x0eb1f60f,
|
||||
0x4eb3f651, 0x4efff7dd, 0x2e3c8f7a, 0x6e3e8fbc,
|
||||
0x2e638c41, 0x6e7d8f9b, 0x2ea28c20, 0x6eb68eb4,
|
||||
0x6efe8fbc, 0x0e31e60f, 0x4e2ee5ac, 0x4e6ce56a,
|
||||
0x0e3e37bc, 0x4e3e37bc, 0x0e753693, 0x4e7836f6,
|
||||
0x0eac356a, 0x4ea634a4, 0x4ee037fe, 0x2eb6e6b4,
|
||||
0x6eaae528, 0x6ee0e7fe, 0x0e333e51, 0x4e2c3d6a,
|
||||
0x0e7d3f9b, 0x4e643c62, 0x0eba3f38, 0x4ea63ca4,
|
||||
0x4ee53c83, 0x2e2ae528, 0x6e38e6f6, 0x6e73e651,
|
||||
0xba5fd3e3, 0x3a5f03e5, 0xfa411be4, 0x7a42cbe2,
|
||||
0x93df03ff, 0xc820ffff, 0x8822fc7f, 0xc8247cbf,
|
||||
0x88267fff, 0x4e010fe0, 0x4e081fe1, 0x4e0c1fe1,
|
||||
0x4e0a1fe1, 0x4e071fe1, 0x4cc0ac3f, 0x05a08020,
|
||||
0x04b0e3e0, 0x0470e7e1, 0x042f9c20, 0x043f9c35,
|
||||
0x047f9c20, 0x04ff9c20, 0x04299420, 0x04319160,
|
||||
0x0461943e, 0x04a19020, 0x042053ff, 0x047f5401,
|
||||
0x25208028, 0x2538cfe0, 0x2578d001, 0x25b8efe2,
|
||||
0x25f8f007, 0xa400a3e0, 0xa4a8a7ea, 0xa547a814,
|
||||
0xa4084ffe, 0xa55c53e0, 0xa5e1540b, 0xe400fbf6,
|
||||
0xe408ffff, 0xe547e400, 0xe4014be0, 0xe4a84fe0,
|
||||
0xe5f15000, 0x858043e0, 0x85a043ff, 0xe59f5d08,
|
||||
0x1e601000, 0x1e603000, 0x1e621000, 0x1e623000,
|
||||
0x1e641000, 0x1e643000, 0x1e661000, 0x1e663000,
|
||||
0x1e681000, 0x1e683000, 0x1e6a1000, 0x1e6a3000,
|
||||
0x1e6c1000, 0x1e6c3000, 0x1e6e1000, 0x1e6e3000,
|
||||
0x1e701000, 0x1e703000, 0x1e721000, 0x1e723000,
|
||||
0x1e741000, 0x1e743000, 0x1e761000, 0x1e763000,
|
||||
0x1e781000, 0x1e783000, 0x1e7a1000, 0x1e7a3000,
|
||||
0x1e7c1000, 0x1e7c3000, 0x1e7e1000, 0x1e7e3000,
|
||||
0xf82d83a5, 0xf8380355, 0xf8381303, 0xf83a21f7,
|
||||
0xf8353303, 0xf8285299, 0xf8304051, 0xf8217300,
|
||||
0xf8246183, 0xf8bf815c, 0xf8ba0182, 0xf8b0103f,
|
||||
0xf8ad201d, 0xf8b3322c, 0xf8b6538d, 0xf8be403f,
|
||||
0xf8ba709c, 0xf8be60c4, 0xf8fe81fa, 0xf8e90188,
|
||||
0xf8e01034, 0xf8f82002, 0xf8e93358, 0xf8f0507e,
|
||||
0xf8ea4157, 0xf8e47050, 0xf8eb6148, 0xf86f8051,
|
||||
0xf86a018c, 0xf86f104d, 0xf8672354, 0xf8703044,
|
||||
0xf86451ec, 0xf87541f0, 0xf86b72f5, 0xf86c62fa,
|
||||
0xb83c816e, 0xb8380181, 0xb83f120a, 0xb8272062,
|
||||
0xb82d3233, 0xb8305023, 0xb82b40be, 0xb82873af,
|
||||
0xb83e6280, 0xb8a782f4, 0xb8bc0375, 0xb8b91025,
|
||||
0xb8b723f0, 0xb8a5312c, 0xb8bc53af, 0xb8b6427f,
|
||||
0xb8bf71c5, 0xb8b061ff, 0xb8fb8214, 0xb8ec012b,
|
||||
0xb8e6123e, 0xb8fb23dc, 0xb8e7328a, 0xb8ea5304,
|
||||
0xb8f142d1, 0xb8e371fd, 0xb8f66273, 0xb87681e2,
|
||||
0xb866020c, 0xb86b12ed, 0xb861227e, 0xb8653051,
|
||||
0xb87051b6, 0xb86a43b5, 0xb87b736c, 0xb86363e1,
|
||||
0xce312677, 0xce0e1b5b, 0xce7e8ed4, 0xce9ed858,
|
||||
0xce768151, 0xce718451, 0xcec08300, 0xce628ad9,
|
||||
0x04e30191, 0x04f0079d, 0x65dc0126, 0x65870887,
|
||||
0x658806c9, 0x0416b7db, 0x0440021a, 0x04d09903,
|
||||
0x04dabb55, 0x04138096, 0x04518071, 0x041008c1,
|
||||
0x0497bce9, 0x045eb4b6, 0x040813c8, 0x04ca0171,
|
||||
0x0481035c, 0x04dcadbc, 0x658098b0, 0x658d89ed,
|
||||
0x6586957a, 0x65879096, 0x65829233, 0x04ddac4e,
|
||||
0x6582b6e3, 0x6580a626, 0x6581b21b, 0x658dbc62,
|
||||
0x65819266, 0x65f8150c, 0x65b72151, 0x65b05db3,
|
||||
0x65f165c0, 0x04944ac8, 0x048f607b, 0x042430f4,
|
||||
0x04a83007, 0x046432d3, 0x04da3569, 0x04583e05,
|
||||
0x04592c36, 0x04c83608, 0x048a248f, 0x658727a8,
|
||||
0x65c633bc, 0x65982c49, 0x040120fc,
|
||||
0x2ebad738, 0x6ea2d420, 0x6efdd79b, 0x2e3fdfdd,
|
||||
0x6e27dcc5, 0x6e67dcc5, 0x0e7f97dd, 0x4e6d958b,
|
||||
0x0ebb9759, 0x4ea29420, 0x0e20cffe, 0x4e22cc20,
|
||||
0x4e73ce51, 0x2e7e97bc, 0x6e7b9759, 0x2eab9549,
|
||||
0x6ebb9759, 0x0eaecdac, 0x4eb1ce0f, 0x4eedcd8b,
|
||||
0x2e2cfd6a, 0x6e33fe51, 0x6e7aff38, 0x0e3766d5,
|
||||
0x4e396717, 0x0e626420, 0x4e726630, 0x0eac656a,
|
||||
0x4ea864e6, 0x0e3ef7bc, 0x4e28f4e6, 0x4e67f4c5,
|
||||
0x0e276cc5, 0x4e366eb4, 0x0e736e51, 0x4e716e0f,
|
||||
0x0eb36e51, 0x4ebf6fdd, 0x0ebcf77a, 0x4ebef7bc,
|
||||
0x4ee3f441, 0x2e3d8f9b, 0x6e228c20, 0x2e768eb4,
|
||||
0x6e7e8fbc, 0x2eb18e0f, 0x6eae8dac, 0x6eec8d6a,
|
||||
0x0e3ee7bc, 0x4e3ee7bc, 0x4e75e693, 0x0e3836f6,
|
||||
0x4e2c356a, 0x0e6634a4, 0x4e6037fe, 0x0eb636b4,
|
||||
0x4eaa3528, 0x4ee037fe, 0x2eb3e651, 0x6eace56a,
|
||||
0x6efde79b, 0x0e243c62, 0x4e3a3f38, 0x0e663ca4,
|
||||
0x4e653c83, 0x0eaa3d28, 0x4eb83ef6, 0x4ef33e51,
|
||||
0x2e2fe5cd, 0x6e26e4a4, 0x6e7ee7bc, 0xba5fd3e3,
|
||||
0x3a5f03e5, 0xfa411be4, 0x7a42cbe2, 0x93df03ff,
|
||||
0xc820ffff, 0x8822fc7f, 0xc8247cbf, 0x88267fff,
|
||||
0x4e010fe0, 0x4e081fe1, 0x4e0c1fe1, 0x4e0a1fe1,
|
||||
0x4e071fe1, 0x4cc0ac3f, 0x05a08020, 0x04b0e3e0,
|
||||
0x0470e7e1, 0x042f9c20, 0x043f9c35, 0x047f9c20,
|
||||
0x04ff9c20, 0x04299420, 0x04319160, 0x0461943e,
|
||||
0x04a19020, 0x042053ff, 0x047f5401, 0x25208028,
|
||||
0x2538cfe0, 0x2578d001, 0x25b8efe2, 0x25f8f007,
|
||||
0xa400a3e0, 0xa4a8a7ea, 0xa547a814, 0xa4084ffe,
|
||||
0xa55c53e0, 0xa5e1540b, 0xe400fbf6, 0xe408ffff,
|
||||
0xe547e400, 0xe4014be0, 0xe4a84fe0, 0xe5f15000,
|
||||
0x858043e0, 0x85a043ff, 0xe59f5d08, 0x1e601000,
|
||||
0x1e603000, 0x1e621000, 0x1e623000, 0x1e641000,
|
||||
0x1e643000, 0x1e661000, 0x1e663000, 0x1e681000,
|
||||
0x1e683000, 0x1e6a1000, 0x1e6a3000, 0x1e6c1000,
|
||||
0x1e6c3000, 0x1e6e1000, 0x1e6e3000, 0x1e701000,
|
||||
0x1e703000, 0x1e721000, 0x1e723000, 0x1e741000,
|
||||
0x1e743000, 0x1e761000, 0x1e763000, 0x1e781000,
|
||||
0x1e783000, 0x1e7a1000, 0x1e7a3000, 0x1e7c1000,
|
||||
0x1e7c3000, 0x1e7e1000, 0x1e7e3000, 0xf8388355,
|
||||
0xf8380303, 0xf83a11f7, 0xf8352303, 0xf8283299,
|
||||
0xf8305051, 0xf8214300, 0xf8247183, 0xf83f615c,
|
||||
0xf8ba8182, 0xf8b0003f, 0xf8ad101d, 0xf8b3222c,
|
||||
0xf8b6338d, 0xf8be503f, 0xf8ba409c, 0xf8be70c4,
|
||||
0xf8be61fa, 0xf8e98188, 0xf8e00034, 0xf8f81002,
|
||||
0xf8e92358, 0xf8f0307e, 0xf8ea5157, 0xf8e44050,
|
||||
0xf8eb7148, 0xf8ef6051, 0xf86a818c, 0xf86f004d,
|
||||
0xf8671354, 0xf8702044, 0xf86431ec, 0xf87551f0,
|
||||
0xf86b42f5, 0xf86c72fa, 0xf87c616e, 0xb8388181,
|
||||
0xb83f020a, 0xb8271062, 0xb82d2233, 0xb8303023,
|
||||
0xb82b50be, 0xb82843af, 0xb83e7280, 0xb82762f4,
|
||||
0xb8bc8375, 0xb8b90025, 0xb8b713f0, 0xb8a5212c,
|
||||
0xb8bc33af, 0xb8b6527f, 0xb8bf41c5, 0xb8b071ff,
|
||||
0xb8bb6214, 0xb8ec812b, 0xb8e6023e, 0xb8fb13dc,
|
||||
0xb8e7228a, 0xb8ea3304, 0xb8f152d1, 0xb8e341fd,
|
||||
0xb8f67273, 0xb8f661e2, 0xb866820c, 0xb86b02ed,
|
||||
0xb861127e, 0xb8652051, 0xb87031b6, 0xb86a53b5,
|
||||
0xb87b436c, 0xb86373e1, 0xb8786233, 0xce3a3b69,
|
||||
0xce167a86, 0xce7e8c58, 0xce8aba3a, 0xce718051,
|
||||
0xce798700, 0xcec08056, 0xce638991, 0x04bc03bb,
|
||||
0x04e904da, 0x658400f1, 0x6596092f, 0x65d40762,
|
||||
0x0496b805, 0x0440072e, 0x04d0975b, 0x041ab418,
|
||||
0x04139006, 0x0411812f, 0x04100b65, 0x0417b694,
|
||||
0x04deaa0a, 0x04481046, 0x04ca1c5d, 0x04411dd6,
|
||||
0x049cb2fb, 0x65c08d42, 0x658d9aca, 0x65869603,
|
||||
0x65c79201, 0x65828d8c, 0x04dda290, 0x65c2a4e5,
|
||||
0x65c0be0c, 0x6581a386, 0x65cda624, 0x65818e6d,
|
||||
0x65a01638, 0x65be2677, 0x65a74410, 0x65ea7911,
|
||||
0x04025774, 0x0407728f, 0x042030fc, 0x04b63270,
|
||||
0x0476312f, 0x049a27d9, 0x0418356d, 0x04992a8d,
|
||||
0x04082c99, 0x04ca28d1, 0x65c73e04, 0x658629da,
|
||||
0x65983c6b, 0x04813aa1,
|
||||
};
|
||||
// END Generated code -- do not edit
|
||||
|
||||
|
@ -1949,7 +1949,7 @@ public:
|
||||
starti;
|
||||
f(op31, 31, 29);
|
||||
f(0b11110, 28, 24);
|
||||
f(type, 23, 22), f(1, 21), f(opcode, 15, 12), f(0b10, 11, 10);
|
||||
f(type, 23, 22), f(1, 21), f(opcode, 15, 10);
|
||||
rf(Vm, 16), rf(Vn, 5), rf(Vd, 0);
|
||||
}
|
||||
|
||||
@ -1958,21 +1958,23 @@ public:
|
||||
data_processing(op31, type, opcode, Vd, Vn, Vm); \
|
||||
}
|
||||
|
||||
INSN(fmuls, 0b000, 0b00, 0b0000);
|
||||
INSN(fdivs, 0b000, 0b00, 0b0001);
|
||||
INSN(fadds, 0b000, 0b00, 0b0010);
|
||||
INSN(fsubs, 0b000, 0b00, 0b0011);
|
||||
INSN(fmaxs, 0b000, 0b00, 0b0100);
|
||||
INSN(fmins, 0b000, 0b00, 0b0101);
|
||||
INSN(fnmuls, 0b000, 0b00, 0b1000);
|
||||
INSN(fabds, 0b011, 0b10, 0b110101);
|
||||
INSN(fmuls, 0b000, 0b00, 0b000010);
|
||||
INSN(fdivs, 0b000, 0b00, 0b000110);
|
||||
INSN(fadds, 0b000, 0b00, 0b001010);
|
||||
INSN(fsubs, 0b000, 0b00, 0b001110);
|
||||
INSN(fmaxs, 0b000, 0b00, 0b010010);
|
||||
INSN(fmins, 0b000, 0b00, 0b010110);
|
||||
INSN(fnmuls, 0b000, 0b00, 0b100010);
|
||||
|
||||
INSN(fmuld, 0b000, 0b01, 0b0000);
|
||||
INSN(fdivd, 0b000, 0b01, 0b0001);
|
||||
INSN(faddd, 0b000, 0b01, 0b0010);
|
||||
INSN(fsubd, 0b000, 0b01, 0b0011);
|
||||
INSN(fmaxd, 0b000, 0b01, 0b0100);
|
||||
INSN(fmind, 0b000, 0b01, 0b0101);
|
||||
INSN(fnmuld, 0b000, 0b01, 0b1000);
|
||||
INSN(fabdd, 0b011, 0b11, 0b110101);
|
||||
INSN(fmuld, 0b000, 0b01, 0b000010);
|
||||
INSN(fdivd, 0b000, 0b01, 0b000110);
|
||||
INSN(faddd, 0b000, 0b01, 0b001010);
|
||||
INSN(fsubd, 0b000, 0b01, 0b001110);
|
||||
INSN(fmaxd, 0b000, 0b01, 0b010010);
|
||||
INSN(fmind, 0b000, 0b01, 0b010110);
|
||||
INSN(fnmuld, 0b000, 0b01, 0b100010);
|
||||
|
||||
#undef INSN
|
||||
|
||||
@ -2482,6 +2484,7 @@ public:
|
||||
f(T==T2D ? 1:0, 22); f(1, 21), rf(Vm, 16), f(op3, 15, 10), rf(Vn, 5), rf(Vd, 0); \
|
||||
}
|
||||
|
||||
INSN(fabd, 1, 1, 0b110101);
|
||||
INSN(fadd, 0, 0, 0b110101);
|
||||
INSN(fdiv, 1, 0, 0b111111);
|
||||
INSN(fmul, 1, 0, 0b110111);
|
||||
@ -2689,7 +2692,7 @@ public:
|
||||
INSN(sshr, 0, 0b000001, /* isSHR = */ true);
|
||||
INSN(ushr, 1, 0b000001, /* isSHR = */ true);
|
||||
INSN(usra, 1, 0b000101, /* isSHR = */ true);
|
||||
INSN(ssra, 0, 0b000101, /* isSHAR =*/ true);
|
||||
INSN(ssra, 0, 0b000101, /* isSHR = */ true);
|
||||
|
||||
#undef INSN
|
||||
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) 2019, Huawei Technologies Co. Ltd. All rights reserved.
|
||||
* Copyright (c) 2019, Huawei Technologies Co., Ltd. All rights reserved.
|
||||
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
||||
*
|
||||
* This code is free software; you can redistribute it and/or modify it
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) 2020, Huawei Technologies Co. Ltd. All rights reserved.
|
||||
* Copyright (c) 2020, Huawei Technologies Co., Ltd. All rights reserved.
|
||||
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
||||
*
|
||||
* This code is free software; you can redistribute it and/or modify it
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) 2020, Huawei Technologies Co. Ltd. All rights reserved.
|
||||
* Copyright (c) 2020, Huawei Technologies Co., Ltd. All rights reserved.
|
||||
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
||||
*
|
||||
* This code is free software; you can redistribute it and/or modify it
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) 2020, Huawei Technologies Co. Ltd. All rights reserved.
|
||||
* Copyright (c) 2020, Huawei Technologies Co., Ltd. All rights reserved.
|
||||
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
||||
*
|
||||
* This code is free software; you can redistribute it and/or modify it
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) 2020, Huawei Technologies Co. Ltd. All rights reserved.
|
||||
* Copyright (c) 2020, Huawei Technologies Co., Ltd. All rights reserved.
|
||||
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
||||
*
|
||||
* This code is free software; you can redistribute it and/or modify it
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) 2020, Huawei Technologies Co. Ltd. All rights reserved.
|
||||
* Copyright (c) 2020, Huawei Technologies Co., Ltd. All rights reserved.
|
||||
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
||||
*
|
||||
* This code is free software; you can redistribute it and/or modify it
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) 2020, Huawei Technologies Co. Ltd. All rights reserved.
|
||||
* Copyright (c) 2020, Huawei Technologies Co., Ltd. All rights reserved.
|
||||
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
||||
*
|
||||
* This code is free software; you can redistribute it and/or modify it
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) 2020, Huawei Technologies Co. Ltd. All rights reserved.
|
||||
* Copyright (c) 2020, Huawei Technologies Co., Ltd. All rights reserved.
|
||||
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
||||
*
|
||||
* This code is free software; you can redistribute it and/or modify it
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) 2020, Huawei Technologies Co. Ltd. All rights reserved.
|
||||
* Copyright (c) 2020, Huawei Technologies Co., Ltd. All rights reserved.
|
||||
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
||||
*
|
||||
* This code is free software; you can redistribute it and/or modify it
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) 2020, Huawei Technologies Co. Ltd. All rights reserved.
|
||||
* Copyright (c) 2020, Huawei Technologies Co., Ltd. All rights reserved.
|
||||
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
||||
*
|
||||
* This code is free software; you can redistribute it and/or modify it
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) 2019, Huawei Technologies Co. Ltd. All rights reserved.
|
||||
* Copyright (c) 2019, Huawei Technologies Co., Ltd. All rights reserved.
|
||||
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
||||
*
|
||||
* This code is free software; you can redistribute it and/or modify it
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) 2020, Huawei Technologies Co. Ltd. All rights reserved.
|
||||
* Copyright (c) 2020, Huawei Technologies Co., Ltd. All rights reserved.
|
||||
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
||||
*
|
||||
* This code is free software; you can redistribute it and/or modify it
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) 2020, Huawei Technologies Co. Ltd. All rights reserved.
|
||||
* Copyright (c) 2020, Huawei Technologies Co., Ltd. All rights reserved.
|
||||
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
||||
*
|
||||
* This code is free software; you can redistribute it and/or modify it
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) 2020, Huawei Technologies Co. Ltd. All rights reserved.
|
||||
* Copyright (c) 2020, Huawei Technologies Co., Ltd. All rights reserved.
|
||||
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
||||
*
|
||||
* This code is free software; you can redistribute it and/or modify it
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) 2020, Huawei Technologies Co. Ltd. All rights reserved.
|
||||
* Copyright (c) 2020, Huawei Technologies Co., Ltd. All rights reserved.
|
||||
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
||||
*
|
||||
* This code is free software; you can redistribute it and/or modify it
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) 2020, Huawei Technologies Co. Ltd. All rights reserved.
|
||||
* Copyright (c) 2020, Huawei Technologies Co., Ltd. All rights reserved.
|
||||
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
||||
*
|
||||
* This code is free software; you can redistribute it and/or modify it
|
||||
|
@ -0,0 +1,104 @@
|
||||
/*
|
||||
* Copyright (c) 2020, Huawei Technologies Co., Ltd. All rights reserved.
|
||||
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
||||
*
|
||||
* This code is free software; you can redistribute it and/or modify it
|
||||
* under the terms of the GNU General Public License version 2 only, as
|
||||
* published by the Free Software Foundation.
|
||||
*
|
||||
* This code is distributed in the hope that it will be useful, but WITHOUT
|
||||
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
||||
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
|
||||
* version 2 for more details (a copy is included in the LICENSE file that
|
||||
* accompanied this code).
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License version
|
||||
* 2 along with this work; if not, write to the Free Software Foundation,
|
||||
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
*
|
||||
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
|
||||
* or visit www.oracle.com if you need additional information or have any
|
||||
* questions.
|
||||
*/
|
||||
package org.openjdk.bench.vm.compiler;
|
||||
|
||||
import org.openjdk.jmh.annotations.*;
|
||||
import org.openjdk.jmh.infra.*;
|
||||
|
||||
import java.util.concurrent.TimeUnit;
|
||||
import java.util.Random;
|
||||
|
||||
@BenchmarkMode(Mode.AverageTime)
|
||||
@OutputTimeUnit(TimeUnit.NANOSECONDS)
|
||||
@State(Scope.Thread)
|
||||
public class FloatingScalarVectorAbsDiff {
|
||||
@Param({"1024"})
|
||||
public int count;
|
||||
|
||||
private float[] floatsA, floatsB, floatsD;
|
||||
private double[] doublesA, doublesB, doublesD;
|
||||
|
||||
@Param("316731")
|
||||
private int seed;
|
||||
private Random r = new Random(seed);
|
||||
|
||||
@Setup
|
||||
public void init() {
|
||||
floatsA = new float[count];
|
||||
doublesA = new double[count];
|
||||
|
||||
floatsB = new float[count];
|
||||
doublesB = new double[count];
|
||||
|
||||
floatsD = new float[count];
|
||||
doublesD = new double[count];
|
||||
|
||||
for (int i = 0; i < count; i++) {
|
||||
floatsA[i] = r.nextFloat();
|
||||
doublesB[i] = r.nextDouble();
|
||||
|
||||
floatsB[i] = r.nextFloat();
|
||||
doublesB[i] = r.nextDouble();
|
||||
}
|
||||
}
|
||||
|
||||
@Benchmark
|
||||
public void testVectorAbsDiffFloat() {
|
||||
for (int i = 0; i < count; i++) {
|
||||
floatsD[i] = Math.abs(floatsA[i] - floatsB[i]);
|
||||
}
|
||||
}
|
||||
|
||||
@Benchmark
|
||||
public void testVectorAbsDiffDouble() {
|
||||
for (int i = 0; i < count; i++) {
|
||||
doublesD[i] = Math.abs(doublesA[i] - doublesB[i]);
|
||||
}
|
||||
}
|
||||
|
||||
@Benchmark
|
||||
public void testScalarAbsDiffFloat(Blackhole bh) {
|
||||
float a = r.nextFloat();
|
||||
float b = r.nextFloat();
|
||||
|
||||
for (int i = 0; i < count; i++) {
|
||||
a = Math.abs(a - b);
|
||||
b = Math.abs(b - a);
|
||||
}
|
||||
|
||||
bh.consume(a + b);
|
||||
}
|
||||
|
||||
@Benchmark
|
||||
public void testScalarAbsDiffDouble(Blackhole bh) {
|
||||
double a = r.nextDouble();
|
||||
double b = r.nextDouble();
|
||||
|
||||
for (int i = 0; i < count; i++) {
|
||||
a = Math.abs(a - b);
|
||||
b = Math.abs(b - a);
|
||||
}
|
||||
|
||||
bh.consume(a + b);
|
||||
}
|
||||
}
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) 2020, Huawei Technologies Co. Ltd. All rights reserved.
|
||||
* Copyright (c) 2020, Huawei Technologies Co., Ltd. All rights reserved.
|
||||
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
||||
*
|
||||
* This code is free software; you can redistribute it and/or modify it
|
||||
|
Loading…
Reference in New Issue
Block a user