8256318: AArch64: Add support for floating-point absolute difference

Reviewed-by: aph
This commit is contained in:
Dong Bo 2020-11-18 10:14:20 +00:00 committed by Fei Yang
parent 655bb619a3
commit b0b9dd27b8
24 changed files with 682 additions and 456 deletions

View File

@ -1334,10 +1334,9 @@ generate(FourRegMulOp,
["maddw", "msubw", "madd", "msub", "smaddl", "smsubl", "umaddl", "umsubl"]) ["maddw", "msubw", "madd", "msub", "smaddl", "smsubl", "umaddl", "umsubl"])
generate(ThreeRegFloatOp, generate(ThreeRegFloatOp,
[["fmuls", "sss"], ["fdivs", "sss"], ["fadds", "sss"], ["fsubs", "sss"], [["fabds", "sss"], ["fmuls", "sss"], ["fdivs", "sss"], ["fadds", "sss"], ["fsubs", "sss"],
["fmuls", "sss"], ["fabdd", "ddd"], ["fmuld", "ddd"], ["fdivd", "ddd"], ["faddd", "ddd"], ["fsubd", "ddd"],
["fmuld", "ddd"], ["fdivd", "ddd"], ["faddd", "ddd"], ["fsubd", "ddd"], ])
["fmuld", "ddd"]])
generate(FourRegFloatOp, generate(FourRegFloatOp,
[["fmadds", "ssss"], ["fmsubs", "ssss"], ["fnmadds", "ssss"], ["fnmadds", "ssss"], [["fmadds", "ssss"], ["fmsubs", "ssss"], ["fnmadds", "ssss"], ["fnmadds", "ssss"],
@ -1437,6 +1436,8 @@ generate(ThreeRegNEONOp,
["mulv", "mul", "8B"], ["mulv", "mul", "16B"], ["mulv", "mul", "8B"], ["mulv", "mul", "16B"],
["mulv", "mul", "4H"], ["mulv", "mul", "8H"], ["mulv", "mul", "4H"], ["mulv", "mul", "8H"],
["mulv", "mul", "2S"], ["mulv", "mul", "4S"], ["mulv", "mul", "2S"], ["mulv", "mul", "4S"],
["fabd", "fabd", "2S"], ["fabd", "fabd", "4S"],
["fabd", "fabd", "2D"],
["fmul", "fmul", "2S"], ["fmul", "fmul", "4S"], ["fmul", "fmul", "2S"], ["fmul", "fmul", "4S"],
["fmul", "fmul", "2D"], ["fmul", "fmul", "2D"],
["mlav", "mla", "4H"], ["mlav", "mla", "8H"], ["mlav", "mla", "4H"], ["mlav", "mla", "8H"],

View File

@ -13894,6 +13894,34 @@ instruct absD_reg(vRegD dst, vRegD src) %{
ins_pipe(fp_uop_d); ins_pipe(fp_uop_d);
%} %}
instruct absdF_reg(vRegF dst, vRegF src1, vRegF src2) %{
match(Set dst (AbsF (SubF src1 src2)));
ins_cost(INSN_COST * 3);
format %{ "fabds $dst, $src1, $src2" %}
ins_encode %{
__ fabds(as_FloatRegister($dst$$reg),
as_FloatRegister($src1$$reg),
as_FloatRegister($src2$$reg));
%}
ins_pipe(fp_uop_s);
%}
instruct absdD_reg(vRegD dst, vRegD src1, vRegD src2) %{
match(Set dst (AbsD (SubD src1 src2)));
ins_cost(INSN_COST * 3);
format %{ "fabdd $dst, $src1, $src2" %}
ins_encode %{
__ fabdd(as_FloatRegister($dst$$reg),
as_FloatRegister($src1$$reg),
as_FloatRegister($src2$$reg));
%}
ins_pipe(fp_uop_d);
%}
instruct sqrtD_reg(vRegD dst, vRegD src) %{ instruct sqrtD_reg(vRegD dst, vRegD src) %{
match(Set dst (SqrtD src)); match(Set dst (SqrtD src));
@ -17872,129 +17900,6 @@ instruct vsqrt2D(vecX dst, vecX src)
ins_pipe(vsqrt_fp128); ins_pipe(vsqrt_fp128);
%} %}
// --------------------------------- ABS --------------------------------------
instruct vabs8B(vecD dst, vecD src)
%{
predicate(n->as_Vector()->length() == 4 ||
n->as_Vector()->length() == 8);
match(Set dst (AbsVB src));
ins_cost(INSN_COST);
format %{ "abs $dst, $src\t# vector (8B)" %}
ins_encode %{
__ absr(as_FloatRegister($dst$$reg), __ T8B, as_FloatRegister($src$$reg));
%}
ins_pipe(vlogical64);
%}
instruct vabs16B(vecX dst, vecX src)
%{
predicate(n->as_Vector()->length() == 16);
match(Set dst (AbsVB src));
ins_cost(INSN_COST);
format %{ "abs $dst, $src\t# vector (16B)" %}
ins_encode %{
__ absr(as_FloatRegister($dst$$reg), __ T16B, as_FloatRegister($src$$reg));
%}
ins_pipe(vlogical128);
%}
instruct vabs4S(vecD dst, vecD src)
%{
predicate(n->as_Vector()->length() == 4);
match(Set dst (AbsVS src));
ins_cost(INSN_COST);
format %{ "abs $dst, $src\t# vector (4H)" %}
ins_encode %{
__ absr(as_FloatRegister($dst$$reg), __ T4H, as_FloatRegister($src$$reg));
%}
ins_pipe(vlogical64);
%}
instruct vabs8S(vecX dst, vecX src)
%{
predicate(n->as_Vector()->length() == 8);
match(Set dst (AbsVS src));
ins_cost(INSN_COST);
format %{ "abs $dst, $src\t# vector (8H)" %}
ins_encode %{
__ absr(as_FloatRegister($dst$$reg), __ T8H, as_FloatRegister($src$$reg));
%}
ins_pipe(vlogical128);
%}
instruct vabs2I(vecD dst, vecD src)
%{
predicate(n->as_Vector()->length() == 2);
match(Set dst (AbsVI src));
ins_cost(INSN_COST);
format %{ "abs $dst, $src\t# vector (2S)" %}
ins_encode %{
__ absr(as_FloatRegister($dst$$reg), __ T2S, as_FloatRegister($src$$reg));
%}
ins_pipe(vlogical64);
%}
instruct vabs4I(vecX dst, vecX src)
%{
predicate(n->as_Vector()->length() == 4);
match(Set dst (AbsVI src));
ins_cost(INSN_COST);
format %{ "abs $dst, $src\t# vector (4S)" %}
ins_encode %{
__ absr(as_FloatRegister($dst$$reg), __ T4S, as_FloatRegister($src$$reg));
%}
ins_pipe(vlogical128);
%}
instruct vabs2L(vecX dst, vecX src)
%{
predicate(n->as_Vector()->length() == 2);
match(Set dst (AbsVL src));
ins_cost(INSN_COST);
format %{ "abs $dst, $src\t# vector (2D)" %}
ins_encode %{
__ absr(as_FloatRegister($dst$$reg), __ T2D, as_FloatRegister($src$$reg));
%}
ins_pipe(vlogical128);
%}
instruct vabs2F(vecD dst, vecD src)
%{
predicate(n->as_Vector()->length() == 2);
match(Set dst (AbsVF src));
ins_cost(INSN_COST * 3);
format %{ "fabs $dst,$src\t# vector (2S)" %}
ins_encode %{
__ fabs(as_FloatRegister($dst$$reg), __ T2S, as_FloatRegister($src$$reg));
%}
ins_pipe(vunop_fp64);
%}
instruct vabs4F(vecX dst, vecX src)
%{
predicate(n->as_Vector()->length() == 4);
match(Set dst (AbsVF src));
ins_cost(INSN_COST * 3);
format %{ "fabs $dst,$src\t# vector (4S)" %}
ins_encode %{
__ fabs(as_FloatRegister($dst$$reg), __ T4S, as_FloatRegister($src$$reg));
%}
ins_pipe(vunop_fp128);
%}
instruct vabs2D(vecX dst, vecX src)
%{
predicate(n->as_Vector()->length() == 2);
match(Set dst (AbsVD src));
ins_cost(INSN_COST * 3);
format %{ "fabs $dst,$src\t# vector (2D)" %}
ins_encode %{
__ fabs(as_FloatRegister($dst$$reg), __ T2D, as_FloatRegister($src$$reg));
%}
ins_pipe(vunop_fp128);
%}
// --------------------------------- NEG -------------------------------------- // --------------------------------- NEG --------------------------------------
instruct vneg2F(vecD dst, vecD src) instruct vneg2F(vecD dst, vecD src)

View File

@ -3454,3 +3454,166 @@ instruct alltrue_in_mask16B(iRegINoSp dst, vecX src1, vecX src2, vecX tmp, rFlag
%} %}
ins_pipe(pipe_slow); ins_pipe(pipe_slow);
%} %}
// --------------------------------- ABS --------------------------------------
instruct vabs8B(vecD dst, vecD src)
%{
predicate(n->as_Vector()->length() == 4 || n->as_Vector()->length() == 8);
match(Set dst (AbsVB src));
ins_cost(INSN_COST);
format %{ "abs $dst, $src\t# vector (8B)" %}
ins_encode %{
__ absr(as_FloatRegister($dst$$reg), __ T8B, as_FloatRegister($src$$reg));
%}
ins_pipe(vlogical64);
%}
instruct vabs16B(vecX dst, vecX src)
%{
predicate(n->as_Vector()->length() == 16);
match(Set dst (AbsVB src));
ins_cost(INSN_COST);
format %{ "abs $dst, $src\t# vector (16B)" %}
ins_encode %{
__ absr(as_FloatRegister($dst$$reg), __ T16B, as_FloatRegister($src$$reg));
%}
ins_pipe(vlogical128);
%}
instruct vabs4S(vecD dst, vecD src)
%{
predicate(n->as_Vector()->length() == 4);
match(Set dst (AbsVS src));
ins_cost(INSN_COST);
format %{ "abs $dst, $src\t# vector (4H)" %}
ins_encode %{
__ absr(as_FloatRegister($dst$$reg), __ T4H, as_FloatRegister($src$$reg));
%}
ins_pipe(vlogical64);
%}
instruct vabs8S(vecX dst, vecX src)
%{
predicate(n->as_Vector()->length() == 8);
match(Set dst (AbsVS src));
ins_cost(INSN_COST);
format %{ "abs $dst, $src\t# vector (8H)" %}
ins_encode %{
__ absr(as_FloatRegister($dst$$reg), __ T8H, as_FloatRegister($src$$reg));
%}
ins_pipe(vlogical128);
%}
instruct vabs2I(vecD dst, vecD src)
%{
predicate(n->as_Vector()->length() == 2);
match(Set dst (AbsVI src));
ins_cost(INSN_COST);
format %{ "abs $dst, $src\t# vector (2S)" %}
ins_encode %{
__ absr(as_FloatRegister($dst$$reg), __ T2S, as_FloatRegister($src$$reg));
%}
ins_pipe(vlogical64);
%}
instruct vabs4I(vecX dst, vecX src)
%{
predicate(n->as_Vector()->length() == 4);
match(Set dst (AbsVI src));
ins_cost(INSN_COST);
format %{ "abs $dst, $src\t# vector (4S)" %}
ins_encode %{
__ absr(as_FloatRegister($dst$$reg), __ T4S, as_FloatRegister($src$$reg));
%}
ins_pipe(vlogical128);
%}
instruct vabs2L(vecX dst, vecX src)
%{
predicate(n->as_Vector()->length() == 2);
match(Set dst (AbsVL src));
ins_cost(INSN_COST);
format %{ "abs $dst, $src\t# vector (2D)" %}
ins_encode %{
__ absr(as_FloatRegister($dst$$reg), __ T2D, as_FloatRegister($src$$reg));
%}
ins_pipe(vlogical128);
%}
instruct vabs2F(vecD dst, vecD src)
%{
predicate(n->as_Vector()->length() == 2);
match(Set dst (AbsVF src));
ins_cost(INSN_COST * 3);
format %{ "fabs $dst, $src\t# vector (2S)" %}
ins_encode %{
__ fabs(as_FloatRegister($dst$$reg), __ T2S, as_FloatRegister($src$$reg));
%}
ins_pipe(vunop_fp64);
%}
instruct vabs4F(vecX dst, vecX src)
%{
predicate(n->as_Vector()->length() == 4);
match(Set dst (AbsVF src));
ins_cost(INSN_COST * 3);
format %{ "fabs $dst, $src\t# vector (4S)" %}
ins_encode %{
__ fabs(as_FloatRegister($dst$$reg), __ T4S, as_FloatRegister($src$$reg));
%}
ins_pipe(vunop_fp128);
%}
instruct vabs2D(vecX dst, vecX src)
%{
predicate(n->as_Vector()->length() == 2);
match(Set dst (AbsVD src));
ins_cost(INSN_COST * 3);
format %{ "fabs $dst, $src\t# vector (2D)" %}
ins_encode %{
__ fabs(as_FloatRegister($dst$$reg), __ T2D, as_FloatRegister($src$$reg));
%}
ins_pipe(vunop_fp128);
%}
// --------------------------------- FABS DIFF --------------------------------
instruct vabd2F(vecD dst, vecD src1, vecD src2)
%{
predicate(n->as_Vector()->length() == 2);
match(Set dst (AbsVF (SubVF src1 src2)));
ins_cost(INSN_COST * 3);
format %{ "fabd $dst, $src1, $src2\t# vector (2S)" %}
ins_encode %{
__ fabd(as_FloatRegister($dst$$reg), __ T2S,
as_FloatRegister($src1$$reg), as_FloatRegister($src2$$reg));
%}
ins_pipe(vunop_fp64);
%}
instruct vabd4F(vecX dst, vecX src1, vecX src2)
%{
predicate(n->as_Vector()->length() == 4);
match(Set dst (AbsVF (SubVF src1 src2)));
ins_cost(INSN_COST * 3);
format %{ "fabd $dst, $src1, $src2\t# vector (4S)" %}
ins_encode %{
__ fabd(as_FloatRegister($dst$$reg), __ T4S,
as_FloatRegister($src1$$reg), as_FloatRegister($src2$$reg));
%}
ins_pipe(vunop_fp128);
%}
instruct vabd2D(vecX dst, vecX src1, vecX src2)
%{
predicate(n->as_Vector()->length() == 2);
match(Set dst (AbsVD (SubVD src1 src2)));
ins_cost(INSN_COST * 3);
format %{ "fabd $dst, $src1, $src2\t# vector (2D)" %}
ins_encode %{
__ fabd(as_FloatRegister($dst$$reg), __ T2D,
as_FloatRegister($src1$$reg), as_FloatRegister($src2$$reg));
%}
ins_pipe(vunop_fp128);
%}

View File

@ -1421,4 +1421,50 @@ instruct alltrue_in_mask$1B`'(iRegINoSp dst, vec$2 src1, vec$2 src2, vec$2 tmp,
dnl $1 $2 dnl $1 $2
ALLTRUE_IN_MASK(8, D) ALLTRUE_IN_MASK(8, D)
ALLTRUE_IN_MASK(16, X) ALLTRUE_IN_MASK(16, X)
// --------------------------------- ABS --------------------------------------
dnl
define(`VABS', `
instruct vabs$3$4`'(vec$5 dst, vec$5 src)
%{
predicate(ifelse($3$4, 8B, n->as_Vector()->length() == 4 || )n->as_Vector()->length() == $3);
match(Set dst (AbsV$4 src));
ins_cost(ifelse($4, F, INSN_COST * 3, $4, D, INSN_COST * 3, INSN_COST));
format %{ "$1 $dst, $src\t# vector ($3$6)" %}
ins_encode %{
__ $2(as_FloatRegister($dst$$reg), __ T$3$6, as_FloatRegister($src$$reg));
%}
ins_pipe(ifelse($4, F, vunop_fp$7, $4, D, vunop_fp$7, vlogical$7));
%}')dnl
dnl $1 $2 $3 $4 $5 $6 $7
VABS(abs, absr, 8, B, D, B, 64)
VABS(abs, absr, 16, B, X, B, 128)
VABS(abs, absr, 4, S, D, H, 64)
VABS(abs, absr, 8, S, X, H, 128)
VABS(abs, absr, 2, I, D, S, 64)
VABS(abs, absr, 4, I, X, S, 128)
VABS(abs, absr, 2, L, X, D, 128)
VABS(fabs, fabs, 2, F, D, S, 64)
VABS(fabs, fabs, 4, F, X, S, 128)
VABS(fabs, fabs, 2, D, X, D, 128)
// --------------------------------- FABS DIFF --------------------------------
dnl
define(`VFABD', `
instruct vabd$3$4`'(vec$5 dst, vec$5 src1, vec$5 src2)
%{
predicate(n->as_Vector()->length() == $3);
match(Set dst (AbsV$4 (SubV$4 src1 src2)));
ins_cost(INSN_COST * 3);
format %{ "$1 $dst, $src1, $src2\t# vector ($3$6)" %}
ins_encode %{
__ $2(as_FloatRegister($dst$$reg), __ T$3$6,
as_FloatRegister($src1$$reg), as_FloatRegister($src2$$reg));
%}
ins_pipe(vunop_fp$7);
%}')dnl
dnl $1 $2 $3 $4 $5 $6 $7
VFABD(fabd, fabd, 2, F, D, S, 64)
VFABD(fabd, fabd, 4, F, X, S, 128)
VFABD(fabd, fabd, 2, D, X, D, 128)
dnl dnl

View File

@ -510,16 +510,16 @@ void entry(CodeBuffer *cb) {
__ umsubl(r15, r21, r28, r17); // umsubl x15, w21, w28, x17 __ umsubl(r15, r21, r28, r17); // umsubl x15, w21, w28, x17
// ThreeRegFloatOp // ThreeRegFloatOp
__ fmuls(v27, v10, v3); // fmul s27, s10, s3 __ fabds(v27, v10, v3); // fabd s27, s10, s3
__ fdivs(v0, v7, v25); // fdiv s0, s7, s25 __ fmuls(v0, v7, v25); // fmul s0, s7, s25
__ fadds(v9, v6, v15); // fadd s9, s6, s15 __ fdivs(v9, v6, v15); // fdiv s9, s6, s15
__ fsubs(v29, v15, v10); // fsub s29, s15, s10 __ fadds(v29, v15, v10); // fadd s29, s15, s10
__ fmuls(v2, v17, v7); // fmul s2, s17, s7 __ fsubs(v2, v17, v7); // fsub s2, s17, s7
__ fmuld(v11, v11, v23); // fmul d11, d11, d23 __ fabdd(v11, v11, v23); // fabd d11, d11, d23
__ fdivd(v7, v29, v23); // fdiv d7, d29, d23 __ fmuld(v7, v29, v23); // fmul d7, d29, d23
__ faddd(v14, v27, v11); // fadd d14, d27, d11 __ fdivd(v14, v27, v11); // fdiv d14, d27, d11
__ fsubd(v11, v4, v24); // fsub d11, d4, d24 __ faddd(v11, v4, v24); // fadd d11, d4, d24
__ fmuld(v12, v15, v14); // fmul d12, d15, d14 __ fsubd(v12, v15, v14); // fsub d12, d15, d14
// FourRegFloatOp // FourRegFloatOp
__ fmadds(v20, v11, v28, v13); // fmadd s20, s11, s28, s13 __ fmadds(v20, v11, v28, v13); // fmadd s20, s11, s28, s13
@ -686,74 +686,77 @@ void entry(CodeBuffer *cb) {
__ mulv(v6, __ T8H, v7, v8); // mul v6.8H, v7.8H, v8.8H __ mulv(v6, __ T8H, v7, v8); // mul v6.8H, v7.8H, v8.8H
__ mulv(v3, __ T2S, v4, v5); // mul v3.2S, v4.2S, v5.2S __ mulv(v3, __ T2S, v4, v5); // mul v3.2S, v4.2S, v5.2S
__ mulv(v7, __ T4S, v8, v9); // mul v7.4S, v8.4S, v9.4S __ mulv(v7, __ T4S, v8, v9); // mul v7.4S, v8.4S, v9.4S
__ fmul(v24, __ T2S, v25, v26); // fmul v24.2S, v25.2S, v26.2S __ fabd(v24, __ T2S, v25, v26); // fabd v24.2S, v25.2S, v26.2S
__ fmul(v0, __ T4S, v1, v2); // fmul v0.4S, v1.4S, v2.4S __ fabd(v0, __ T4S, v1, v2); // fabd v0.4S, v1.4S, v2.4S
__ fmul(v27, __ T2D, v28, v29); // fmul v27.2D, v28.2D, v29.2D __ fabd(v27, __ T2D, v28, v29); // fabd v27.2D, v28.2D, v29.2D
__ fmul(v29, __ T2S, v30, v31); // fmul v29.2S, v30.2S, v31.2S
__ fmul(v5, __ T4S, v6, v7); // fmul v5.4S, v6.4S, v7.4S
__ fmul(v5, __ T2D, v6, v7); // fmul v5.2D, v6.2D, v7.2D
__ mlav(v29, __ T4H, v30, v31); // mla v29.4H, v30.4H, v31.4H __ mlav(v29, __ T4H, v30, v31); // mla v29.4H, v30.4H, v31.4H
__ mlav(v5, __ T8H, v6, v7); // mla v5.8H, v6.8H, v7.8H __ mlav(v11, __ T8H, v12, v13); // mla v11.8H, v12.8H, v13.8H
__ mlav(v5, __ T2S, v6, v7); // mla v5.2S, v6.2S, v7.2S __ mlav(v25, __ T2S, v26, v27); // mla v25.2S, v26.2S, v27.2S
__ mlav(v29, __ T4S, v30, v31); // mla v29.4S, v30.4S, v31.4S __ mlav(v0, __ T4S, v1, v2); // mla v0.4S, v1.4S, v2.4S
__ fmla(v11, __ T2S, v12, v13); // fmla v11.2S, v12.2S, v13.2S __ fmla(v30, __ T2S, v31, v0); // fmla v30.2S, v31.2S, v0.2S
__ fmla(v25, __ T4S, v26, v27); // fmla v25.4S, v26.4S, v27.4S __ fmla(v0, __ T4S, v1, v2); // fmla v0.4S, v1.4S, v2.4S
__ fmla(v0, __ T2D, v1, v2); // fmla v0.2D, v1.2D, v2.2D __ fmla(v17, __ T2D, v18, v19); // fmla v17.2D, v18.2D, v19.2D
__ mlsv(v30, __ T4H, v31, v0); // mls v30.4H, v31.4H, v0.4H __ mlsv(v28, __ T4H, v29, v30); // mls v28.4H, v29.4H, v30.4H
__ mlsv(v0, __ T8H, v1, v2); // mls v0.8H, v1.8H, v2.8H __ mlsv(v25, __ T8H, v26, v27); // mls v25.8H, v26.8H, v27.8H
__ mlsv(v17, __ T2S, v18, v19); // mls v17.2S, v18.2S, v19.2S __ mlsv(v9, __ T2S, v10, v11); // mls v9.2S, v10.2S, v11.2S
__ mlsv(v28, __ T4S, v29, v30); // mls v28.4S, v29.4S, v30.4S __ mlsv(v25, __ T4S, v26, v27); // mls v25.4S, v26.4S, v27.4S
__ fmls(v25, __ T2S, v26, v27); // fmls v25.2S, v26.2S, v27.2S __ fmls(v12, __ T2S, v13, v14); // fmls v12.2S, v13.2S, v14.2S
__ fmls(v9, __ T4S, v10, v11); // fmls v9.4S, v10.4S, v11.4S __ fmls(v15, __ T4S, v16, v17); // fmls v15.4S, v16.4S, v17.4S
__ fmls(v25, __ T2D, v26, v27); // fmls v25.2D, v26.2D, v27.2D __ fmls(v11, __ T2D, v12, v13); // fmls v11.2D, v12.2D, v13.2D
__ fdiv(v12, __ T2S, v13, v14); // fdiv v12.2S, v13.2S, v14.2S __ fdiv(v10, __ T2S, v11, v12); // fdiv v10.2S, v11.2S, v12.2S
__ fdiv(v15, __ T4S, v16, v17); // fdiv v15.4S, v16.4S, v17.4S __ fdiv(v17, __ T4S, v18, v19); // fdiv v17.4S, v18.4S, v19.4S
__ fdiv(v11, __ T2D, v12, v13); // fdiv v11.2D, v12.2D, v13.2D __ fdiv(v24, __ T2D, v25, v26); // fdiv v24.2D, v25.2D, v26.2D
__ maxv(v10, __ T8B, v11, v12); // smax v10.8B, v11.8B, v12.8B __ maxv(v21, __ T8B, v22, v23); // smax v21.8B, v22.8B, v23.8B
__ maxv(v17, __ T16B, v18, v19); // smax v17.16B, v18.16B, v19.16B __ maxv(v23, __ T16B, v24, v25); // smax v23.16B, v24.16B, v25.16B
__ maxv(v24, __ T4H, v25, v26); // smax v24.4H, v25.4H, v26.4H __ maxv(v0, __ T4H, v1, v2); // smax v0.4H, v1.4H, v2.4H
__ maxv(v21, __ T8H, v22, v23); // smax v21.8H, v22.8H, v23.8H __ maxv(v16, __ T8H, v17, v18); // smax v16.8H, v17.8H, v18.8H
__ maxv(v23, __ T2S, v24, v25); // smax v23.2S, v24.2S, v25.2S __ maxv(v10, __ T2S, v11, v12); // smax v10.2S, v11.2S, v12.2S
__ maxv(v0, __ T4S, v1, v2); // smax v0.4S, v1.4S, v2.4S __ maxv(v6, __ T4S, v7, v8); // smax v6.4S, v7.4S, v8.4S
__ fmax(v16, __ T2S, v17, v18); // fmax v16.2S, v17.2S, v18.2S __ fmax(v28, __ T2S, v29, v30); // fmax v28.2S, v29.2S, v30.2S
__ fmax(v10, __ T4S, v11, v12); // fmax v10.4S, v11.4S, v12.4S __ fmax(v6, __ T4S, v7, v8); // fmax v6.4S, v7.4S, v8.4S
__ fmax(v6, __ T2D, v7, v8); // fmax v6.2D, v7.2D, v8.2D __ fmax(v5, __ T2D, v6, v7); // fmax v5.2D, v6.2D, v7.2D
__ minv(v28, __ T8B, v29, v30); // smin v28.8B, v29.8B, v30.8B __ minv(v5, __ T8B, v6, v7); // smin v5.8B, v6.8B, v7.8B
__ minv(v6, __ T16B, v7, v8); // smin v6.16B, v7.16B, v8.16B __ minv(v20, __ T16B, v21, v22); // smin v20.16B, v21.16B, v22.16B
__ minv(v5, __ T4H, v6, v7); // smin v5.4H, v6.4H, v7.4H __ minv(v17, __ T4H, v18, v19); // smin v17.4H, v18.4H, v19.4H
__ minv(v5, __ T8H, v6, v7); // smin v5.8H, v6.8H, v7.8H __ minv(v15, __ T8H, v16, v17); // smin v15.8H, v16.8H, v17.8H
__ minv(v20, __ T2S, v21, v22); // smin v20.2S, v21.2S, v22.2S __ minv(v17, __ T2S, v18, v19); // smin v17.2S, v18.2S, v19.2S
__ minv(v17, __ T4S, v18, v19); // smin v17.4S, v18.4S, v19.4S __ minv(v29, __ T4S, v30, v31); // smin v29.4S, v30.4S, v31.4S
__ fmin(v15, __ T2S, v16, v17); // fmin v15.2S, v16.2S, v17.2S __ fmin(v26, __ T2S, v27, v28); // fmin v26.2S, v27.2S, v28.2S
__ fmin(v17, __ T4S, v18, v19); // fmin v17.4S, v18.4S, v19.4S __ fmin(v28, __ T4S, v29, v30); // fmin v28.4S, v29.4S, v30.4S
__ fmin(v29, __ T2D, v30, v31); // fmin v29.2D, v30.2D, v31.2D __ fmin(v1, __ T2D, v2, v3); // fmin v1.2D, v2.2D, v3.2D
__ cmeq(v26, __ T8B, v27, v28); // cmeq v26.8B, v27.8B, v28.8B __ cmeq(v27, __ T8B, v28, v29); // cmeq v27.8B, v28.8B, v29.8B
__ cmeq(v28, __ T16B, v29, v30); // cmeq v28.16B, v29.16B, v30.16B __ cmeq(v0, __ T16B, v1, v2); // cmeq v0.16B, v1.16B, v2.16B
__ cmeq(v1, __ T4H, v2, v3); // cmeq v1.4H, v2.4H, v3.4H __ cmeq(v20, __ T4H, v21, v22); // cmeq v20.4H, v21.4H, v22.4H
__ cmeq(v27, __ T8H, v28, v29); // cmeq v27.8H, v28.8H, v29.8H __ cmeq(v28, __ T8H, v29, v30); // cmeq v28.8H, v29.8H, v30.8H
__ cmeq(v0, __ T2S, v1, v2); // cmeq v0.2S, v1.2S, v2.2S __ cmeq(v15, __ T2S, v16, v17); // cmeq v15.2S, v16.2S, v17.2S
__ cmeq(v20, __ T4S, v21, v22); // cmeq v20.4S, v21.4S, v22.4S __ cmeq(v12, __ T4S, v13, v14); // cmeq v12.4S, v13.4S, v14.4S
__ cmeq(v28, __ T2D, v29, v30); // cmeq v28.2D, v29.2D, v30.2D __ cmeq(v10, __ T2D, v11, v12); // cmeq v10.2D, v11.2D, v12.2D
__ fcmeq(v15, __ T2S, v16, v17); // fcmeq v15.2S, v16.2S, v17.2S __ fcmeq(v28, __ T2S, v29, v30); // fcmeq v28.2S, v29.2S, v30.2S
__ fcmeq(v12, __ T4S, v13, v14); // fcmeq v12.4S, v13.4S, v14.4S __ fcmeq(v28, __ T4S, v29, v30); // fcmeq v28.4S, v29.4S, v30.4S
__ fcmeq(v10, __ T2D, v11, v12); // fcmeq v10.2D, v11.2D, v12.2D __ fcmeq(v19, __ T2D, v20, v21); // fcmeq v19.2D, v20.2D, v21.2D
__ cmgt(v28, __ T8B, v29, v30); // cmgt v28.8B, v29.8B, v30.8B __ cmgt(v22, __ T8B, v23, v24); // cmgt v22.8B, v23.8B, v24.8B
__ cmgt(v28, __ T16B, v29, v30); // cmgt v28.16B, v29.16B, v30.16B __ cmgt(v10, __ T16B, v11, v12); // cmgt v10.16B, v11.16B, v12.16B
__ cmgt(v19, __ T4H, v20, v21); // cmgt v19.4H, v20.4H, v21.4H __ cmgt(v4, __ T4H, v5, v6); // cmgt v4.4H, v5.4H, v6.4H
__ cmgt(v22, __ T8H, v23, v24); // cmgt v22.8H, v23.8H, v24.8H __ cmgt(v30, __ T8H, v31, v0); // cmgt v30.8H, v31.8H, v0.8H
__ cmgt(v10, __ T2S, v11, v12); // cmgt v10.2S, v11.2S, v12.2S __ cmgt(v20, __ T2S, v21, v22); // cmgt v20.2S, v21.2S, v22.2S
__ cmgt(v4, __ T4S, v5, v6); // cmgt v4.4S, v5.4S, v6.4S __ cmgt(v8, __ T4S, v9, v10); // cmgt v8.4S, v9.4S, v10.4S
__ cmgt(v30, __ T2D, v31, v0); // cmgt v30.2D, v31.2D, v0.2D __ cmgt(v30, __ T2D, v31, v0); // cmgt v30.2D, v31.2D, v0.2D
__ fcmgt(v20, __ T2S, v21, v22); // fcmgt v20.2S, v21.2S, v22.2S __ fcmgt(v17, __ T2S, v18, v19); // fcmgt v17.2S, v18.2S, v19.2S
__ fcmgt(v8, __ T4S, v9, v10); // fcmgt v8.4S, v9.4S, v10.4S __ fcmgt(v10, __ T4S, v11, v12); // fcmgt v10.4S, v11.4S, v12.4S
__ fcmgt(v30, __ T2D, v31, v0); // fcmgt v30.2D, v31.2D, v0.2D __ fcmgt(v27, __ T2D, v28, v29); // fcmgt v27.2D, v28.2D, v29.2D
__ cmge(v17, __ T8B, v18, v19); // cmge v17.8B, v18.8B, v19.8B __ cmge(v2, __ T8B, v3, v4); // cmge v2.8B, v3.8B, v4.8B
__ cmge(v10, __ T16B, v11, v12); // cmge v10.16B, v11.16B, v12.16B __ cmge(v24, __ T16B, v25, v26); // cmge v24.16B, v25.16B, v26.16B
__ cmge(v27, __ T4H, v28, v29); // cmge v27.4H, v28.4H, v29.4H __ cmge(v4, __ T4H, v5, v6); // cmge v4.4H, v5.4H, v6.4H
__ cmge(v2, __ T8H, v3, v4); // cmge v2.8H, v3.8H, v4.8H __ cmge(v3, __ T8H, v4, v5); // cmge v3.8H, v4.8H, v5.8H
__ cmge(v24, __ T2S, v25, v26); // cmge v24.2S, v25.2S, v26.2S __ cmge(v8, __ T2S, v9, v10); // cmge v8.2S, v9.2S, v10.2S
__ cmge(v4, __ T4S, v5, v6); // cmge v4.4S, v5.4S, v6.4S __ cmge(v22, __ T4S, v23, v24); // cmge v22.4S, v23.4S, v24.4S
__ cmge(v3, __ T2D, v4, v5); // cmge v3.2D, v4.2D, v5.2D __ cmge(v17, __ T2D, v18, v19); // cmge v17.2D, v18.2D, v19.2D
__ fcmge(v8, __ T2S, v9, v10); // fcmge v8.2S, v9.2S, v10.2S __ fcmge(v13, __ T2S, v14, v15); // fcmge v13.2S, v14.2S, v15.2S
__ fcmge(v22, __ T4S, v23, v24); // fcmge v22.4S, v23.4S, v24.4S __ fcmge(v4, __ T4S, v5, v6); // fcmge v4.4S, v5.4S, v6.4S
__ fcmge(v17, __ T2D, v18, v19); // fcmge v17.2D, v18.2D, v19.2D __ fcmge(v28, __ T2D, v29, v30); // fcmge v28.2D, v29.2D, v30.2D
// SpecialCases // SpecialCases
__ ccmn(zr, zr, 3u, Assembler::LE); // ccmn xzr, xzr, #3, LE __ ccmn(zr, zr, 3u, Assembler::LE); // ccmn xzr, xzr, #3, LE
@ -840,155 +843,155 @@ void entry(CodeBuffer *cb) {
__ fmovd(v0, -1.0625); // fmov d0, #-1.0625 __ fmovd(v0, -1.0625); // fmov d0, #-1.0625
// LSEOp // LSEOp
__ swp(Assembler::xword, r13, r5, r29); // swp x13, x5, [x29] __ swp(Assembler::xword, r24, r21, r26); // swp x24, x21, [x26]
__ ldadd(Assembler::xword, r24, r21, r26); // ldadd x24, x21, [x26] __ ldadd(Assembler::xword, r24, r3, r24); // ldadd x24, x3, [x24]
__ ldbic(Assembler::xword, r24, r3, r24); // ldclr x24, x3, [x24] __ ldbic(Assembler::xword, r26, r23, r15); // ldclr x26, x23, [x15]
__ ldeor(Assembler::xword, r26, r23, r15); // ldeor x26, x23, [x15] __ ldeor(Assembler::xword, r21, r3, r24); // ldeor x21, x3, [x24]
__ ldorr(Assembler::xword, r21, r3, r24); // ldset x21, x3, [x24] __ ldorr(Assembler::xword, r8, r25, r20); // ldset x8, x25, [x20]
__ ldsmin(Assembler::xword, r8, r25, r20); // ldsmin x8, x25, [x20] __ ldsmin(Assembler::xword, r16, r17, r2); // ldsmin x16, x17, [x2]
__ ldsmax(Assembler::xword, r16, r17, r2); // ldsmax x16, x17, [x2] __ ldsmax(Assembler::xword, r1, r0, r24); // ldsmax x1, x0, [x24]
__ ldumin(Assembler::xword, r1, r0, r24); // ldumin x1, x0, [x24] __ ldumin(Assembler::xword, r4, r3, r12); // ldumin x4, x3, [x12]
__ ldumax(Assembler::xword, r4, r3, r12); // ldumax x4, x3, [x12] __ ldumax(Assembler::xword, zr, r28, r10); // ldumax xzr, x28, [x10]
// LSEOp // LSEOp
__ swpa(Assembler::xword, zr, r28, r10); // swpa xzr, x28, [x10] __ swpa(Assembler::xword, r26, r2, r12); // swpa x26, x2, [x12]
__ ldadda(Assembler::xword, r26, r2, r12); // ldadda x26, x2, [x12] __ ldadda(Assembler::xword, r16, zr, r1); // ldadda x16, xzr, [x1]
__ ldbica(Assembler::xword, r16, zr, r1); // ldclra x16, xzr, [x1] __ ldbica(Assembler::xword, r13, r29, r0); // ldclra x13, x29, [x0]
__ ldeora(Assembler::xword, r13, r29, r0); // ldeora x13, x29, [x0] __ ldeora(Assembler::xword, r19, r12, r17); // ldeora x19, x12, [x17]
__ ldorra(Assembler::xword, r19, r12, r17); // ldseta x19, x12, [x17] __ ldorra(Assembler::xword, r22, r13, r28); // ldseta x22, x13, [x28]
__ ldsmina(Assembler::xword, r22, r13, r28); // ldsmina x22, x13, [x28] __ ldsmina(Assembler::xword, r30, zr, r1); // ldsmina x30, xzr, [x1]
__ ldsmaxa(Assembler::xword, r30, zr, r1); // ldsmaxa x30, xzr, [x1] __ ldsmaxa(Assembler::xword, r26, r28, r4); // ldsmaxa x26, x28, [x4]
__ ldumina(Assembler::xword, r26, r28, r4); // ldumina x26, x28, [x4] __ ldumina(Assembler::xword, r30, r4, r6); // ldumina x30, x4, [x6]
__ ldumaxa(Assembler::xword, r30, r4, r6); // ldumaxa x30, x4, [x6] __ ldumaxa(Assembler::xword, r30, r26, r15); // ldumaxa x30, x26, [x15]
// LSEOp // LSEOp
__ swpal(Assembler::xword, r30, r26, r15); // swpal x30, x26, [x15] __ swpal(Assembler::xword, r9, r8, r12); // swpal x9, x8, [x12]
__ ldaddal(Assembler::xword, r9, r8, r12); // ldaddal x9, x8, [x12] __ ldaddal(Assembler::xword, r0, r20, r1); // ldaddal x0, x20, [x1]
__ ldbical(Assembler::xword, r0, r20, r1); // ldclral x0, x20, [x1] __ ldbical(Assembler::xword, r24, r2, r0); // ldclral x24, x2, [x0]
__ ldeoral(Assembler::xword, r24, r2, r0); // ldeoral x24, x2, [x0] __ ldeoral(Assembler::xword, r9, r24, r26); // ldeoral x9, x24, [x26]
__ ldorral(Assembler::xword, r9, r24, r26); // ldsetal x9, x24, [x26] __ ldorral(Assembler::xword, r16, r30, r3); // ldsetal x16, x30, [x3]
__ ldsminal(Assembler::xword, r16, r30, r3); // ldsminal x16, x30, [x3] __ ldsminal(Assembler::xword, r10, r23, r10); // ldsminal x10, x23, [x10]
__ ldsmaxal(Assembler::xword, r10, r23, r10); // ldsmaxal x10, x23, [x10] __ ldsmaxal(Assembler::xword, r4, r16, r2); // ldsmaxal x4, x16, [x2]
__ lduminal(Assembler::xword, r4, r16, r2); // lduminal x4, x16, [x2] __ lduminal(Assembler::xword, r11, r8, r10); // lduminal x11, x8, [x10]
__ ldumaxal(Assembler::xword, r11, r8, r10); // ldumaxal x11, x8, [x10] __ ldumaxal(Assembler::xword, r15, r17, r2); // ldumaxal x15, x17, [x2]
// LSEOp // LSEOp
__ swpl(Assembler::xword, r15, r17, r2); // swpl x15, x17, [x2] __ swpl(Assembler::xword, r10, r12, r12); // swpl x10, x12, [x12]
__ ldaddl(Assembler::xword, r10, r12, r12); // ldaddl x10, x12, [x12] __ ldaddl(Assembler::xword, r15, r13, r2); // ldaddl x15, x13, [x2]
__ ldbicl(Assembler::xword, r15, r13, r2); // ldclrl x15, x13, [x2] __ ldbicl(Assembler::xword, r7, r20, r26); // ldclrl x7, x20, [x26]
__ ldeorl(Assembler::xword, r7, r20, r26); // ldeorl x7, x20, [x26] __ ldeorl(Assembler::xword, r16, r4, r2); // ldeorl x16, x4, [x2]
__ ldorrl(Assembler::xword, r16, r4, r2); // ldsetl x16, x4, [x2] __ ldorrl(Assembler::xword, r4, r12, r15); // ldsetl x4, x12, [x15]
__ ldsminl(Assembler::xword, r4, r12, r15); // ldsminl x4, x12, [x15] __ ldsminl(Assembler::xword, r21, r16, r15); // ldsminl x21, x16, [x15]
__ ldsmaxl(Assembler::xword, r21, r16, r15); // ldsmaxl x21, x16, [x15] __ ldsmaxl(Assembler::xword, r11, r21, r23); // ldsmaxl x11, x21, [x23]
__ lduminl(Assembler::xword, r11, r21, r23); // lduminl x11, x21, [x23] __ lduminl(Assembler::xword, r12, r26, r23); // lduminl x12, x26, [x23]
__ ldumaxl(Assembler::xword, r12, r26, r23); // ldumaxl x12, x26, [x23] __ ldumaxl(Assembler::xword, r28, r14, r11); // ldumaxl x28, x14, [x11]
// LSEOp // LSEOp
__ swp(Assembler::word, r28, r14, r11); // swp w28, w14, [x11] __ swp(Assembler::word, r24, r1, r12); // swp w24, w1, [x12]
__ ldadd(Assembler::word, r24, r1, r12); // ldadd w24, w1, [x12] __ ldadd(Assembler::word, zr, r10, r16); // ldadd wzr, w10, [x16]
__ ldbic(Assembler::word, zr, r10, r16); // ldclr wzr, w10, [x16] __ ldbic(Assembler::word, r7, r2, r3); // ldclr w7, w2, [x3]
__ ldeor(Assembler::word, r7, r2, r3); // ldeor w7, w2, [x3] __ ldeor(Assembler::word, r13, r19, r17); // ldeor w13, w19, [x17]
__ ldorr(Assembler::word, r13, r19, r17); // ldset w13, w19, [x17] __ ldorr(Assembler::word, r16, r3, r1); // ldset w16, w3, [x1]
__ ldsmin(Assembler::word, r16, r3, r1); // ldsmin w16, w3, [x1] __ ldsmin(Assembler::word, r11, r30, r5); // ldsmin w11, w30, [x5]
__ ldsmax(Assembler::word, r11, r30, r5); // ldsmax w11, w30, [x5] __ ldsmax(Assembler::word, r8, r15, r29); // ldsmax w8, w15, [x29]
__ ldumin(Assembler::word, r8, r15, r29); // ldumin w8, w15, [x29] __ ldumin(Assembler::word, r30, r0, r20); // ldumin w30, w0, [x20]
__ ldumax(Assembler::word, r30, r0, r20); // ldumax w30, w0, [x20] __ ldumax(Assembler::word, r7, r20, r23); // ldumax w7, w20, [x23]
// LSEOp // LSEOp
__ swpa(Assembler::word, r7, r20, r23); // swpa w7, w20, [x23] __ swpa(Assembler::word, r28, r21, r27); // swpa w28, w21, [x27]
__ ldadda(Assembler::word, r28, r21, r27); // ldadda w28, w21, [x27] __ ldadda(Assembler::word, r25, r5, r1); // ldadda w25, w5, [x1]
__ ldbica(Assembler::word, r25, r5, r1); // ldclra w25, w5, [x1] __ ldbica(Assembler::word, r23, r16, sp); // ldclra w23, w16, [sp]
__ ldeora(Assembler::word, r23, r16, sp); // ldeora w23, w16, [sp] __ ldeora(Assembler::word, r5, r12, r9); // ldeora w5, w12, [x9]
__ ldorra(Assembler::word, r5, r12, r9); // ldseta w5, w12, [x9] __ ldorra(Assembler::word, r28, r15, r29); // ldseta w28, w15, [x29]
__ ldsmina(Assembler::word, r28, r15, r29); // ldsmina w28, w15, [x29] __ ldsmina(Assembler::word, r22, zr, r19); // ldsmina w22, wzr, [x19]
__ ldsmaxa(Assembler::word, r22, zr, r19); // ldsmaxa w22, wzr, [x19] __ ldsmaxa(Assembler::word, zr, r5, r14); // ldsmaxa wzr, w5, [x14]
__ ldumina(Assembler::word, zr, r5, r14); // ldumina wzr, w5, [x14] __ ldumina(Assembler::word, r16, zr, r15); // ldumina w16, wzr, [x15]
__ ldumaxa(Assembler::word, r16, zr, r15); // ldumaxa w16, wzr, [x15] __ ldumaxa(Assembler::word, r27, r20, r16); // ldumaxa w27, w20, [x16]
// LSEOp // LSEOp
__ swpal(Assembler::word, r27, r20, r16); // swpal w27, w20, [x16] __ swpal(Assembler::word, r12, r11, r9); // swpal w12, w11, [x9]
__ ldaddal(Assembler::word, r12, r11, r9); // ldaddal w12, w11, [x9] __ ldaddal(Assembler::word, r6, r30, r17); // ldaddal w6, w30, [x17]
__ ldbical(Assembler::word, r6, r30, r17); // ldclral w6, w30, [x17] __ ldbical(Assembler::word, r27, r28, r30); // ldclral w27, w28, [x30]
__ ldeoral(Assembler::word, r27, r28, r30); // ldeoral w27, w28, [x30] __ ldeoral(Assembler::word, r7, r10, r20); // ldeoral w7, w10, [x20]
__ ldorral(Assembler::word, r7, r10, r20); // ldsetal w7, w10, [x20] __ ldorral(Assembler::word, r10, r4, r24); // ldsetal w10, w4, [x24]
__ ldsminal(Assembler::word, r10, r4, r24); // ldsminal w10, w4, [x24] __ ldsminal(Assembler::word, r17, r17, r22); // ldsminal w17, w17, [x22]
__ ldsmaxal(Assembler::word, r17, r17, r22); // ldsmaxal w17, w17, [x22] __ ldsmaxal(Assembler::word, r3, r29, r15); // ldsmaxal w3, w29, [x15]
__ lduminal(Assembler::word, r3, r29, r15); // lduminal w3, w29, [x15] __ lduminal(Assembler::word, r22, r19, r19); // lduminal w22, w19, [x19]
__ ldumaxal(Assembler::word, r22, r19, r19); // ldumaxal w22, w19, [x19] __ ldumaxal(Assembler::word, r22, r2, r15); // ldumaxal w22, w2, [x15]
// LSEOp // LSEOp
__ swpl(Assembler::word, r22, r2, r15); // swpl w22, w2, [x15] __ swpl(Assembler::word, r6, r12, r16); // swpl w6, w12, [x16]
__ ldaddl(Assembler::word, r6, r12, r16); // ldaddl w6, w12, [x16] __ ldaddl(Assembler::word, r11, r13, r23); // ldaddl w11, w13, [x23]
__ ldbicl(Assembler::word, r11, r13, r23); // ldclrl w11, w13, [x23] __ ldbicl(Assembler::word, r1, r30, r19); // ldclrl w1, w30, [x19]
__ ldeorl(Assembler::word, r1, r30, r19); // ldeorl w1, w30, [x19] __ ldeorl(Assembler::word, r5, r17, r2); // ldeorl w5, w17, [x2]
__ ldorrl(Assembler::word, r5, r17, r2); // ldsetl w5, w17, [x2] __ ldorrl(Assembler::word, r16, r22, r13); // ldsetl w16, w22, [x13]
__ ldsminl(Assembler::word, r16, r22, r13); // ldsminl w16, w22, [x13] __ ldsminl(Assembler::word, r10, r21, r29); // ldsminl w10, w21, [x29]
__ ldsmaxl(Assembler::word, r10, r21, r29); // ldsmaxl w10, w21, [x29] __ ldsmaxl(Assembler::word, r27, r12, r27); // ldsmaxl w27, w12, [x27]
__ lduminl(Assembler::word, r27, r12, r27); // lduminl w27, w12, [x27] __ lduminl(Assembler::word, r3, r1, sp); // lduminl w3, w1, [sp]
__ ldumaxl(Assembler::word, r3, r1, sp); // ldumaxl w3, w1, [sp] __ ldumaxl(Assembler::word, r24, r19, r17); // ldumaxl w24, w19, [x17]
// SHA3SIMDOp // SHA3SIMDOp
__ bcax(v23, __ T16B, v19, v17, v9); // bcax v23.16B, v19.16B, v17.16B, v9.16B __ bcax(v9, __ T16B, v27, v26, v14); // bcax v9.16B, v27.16B, v26.16B, v14.16B
__ eor3(v27, __ T16B, v26, v14, v6); // eor3 v27.16B, v26.16B, v14.16B, v6.16B __ eor3(v6, __ T16B, v20, v22, v30); // eor3 v6.16B, v20.16B, v22.16B, v30.16B
__ rax1(v20, __ T2D, v22, v30); // rax1 v20.2D, v22.2D, v30.2D __ rax1(v24, __ T2D, v2, v30); // rax1 v24.2D, v2.2D, v30.2D
__ xar(v24, __ T2D, v2, v30, 54); // xar v24.2D, v2.2D, v30.2D, #54 __ xar(v26, __ T2D, v17, v10, 46); // xar v26.2D, v17.2D, v10.2D, #46
// SHA512SIMDOp // SHA512SIMDOp
__ sha512h(v17, __ T2D, v10, v22); // sha512h q17, q10, v22.2D __ sha512h(v17, __ T2D, v2, v17); // sha512h q17, q2, v17.2D
__ sha512h2(v17, __ T2D, v2, v17); // sha512h2 q17, q2, v17.2D __ sha512h2(v0, __ T2D, v24, v25); // sha512h2 q0, q24, v25.2D
__ sha512su0(v0, __ T2D, v24); // sha512su0 v0.2D, v24.2D __ sha512su0(v22, __ T2D, v2); // sha512su0 v22.2D, v2.2D
__ sha512su1(v25, __ T2D, v22, v2); // sha512su1 v25.2D, v22.2D, v2.2D __ sha512su1(v17, __ T2D, v12, v3); // sha512su1 v17.2D, v12.2D, v3.2D
// SVEVectorOp // SVEVectorOp
__ sve_add(z17, __ D, z12, z3); // add z17.d, z12.d, z3.d __ sve_add(z27, __ S, z29, z28); // add z27.s, z29.s, z28.s
__ sve_sub(z29, __ D, z28, z16); // sub z29.d, z28.d, z16.d __ sve_sub(z26, __ D, z6, z9); // sub z26.d, z6.d, z9.d
__ sve_fadd(z6, __ D, z9, z28); // fadd z6.d, z9.d, z28.d __ sve_fadd(z17, __ S, z7, z4); // fadd z17.s, z7.s, z4.s
__ sve_fmul(z7, __ S, z4, z7); // fmul z7.s, z4.s, z7.s __ sve_fmul(z15, __ S, z9, z22); // fmul z15.s, z9.s, z22.s
__ sve_fsub(z9, __ S, z22, z8); // fsub z9.s, z22.s, z8.s __ sve_fsub(z2, __ D, z27, z20); // fsub z2.d, z27.d, z20.d
__ sve_abs(z27, __ B, p5, z30); // abs z27.b, p5/m, z30.b __ sve_abs(z5, __ S, p6, z0); // abs z5.s, p6/m, z0.s
__ sve_add(z26, __ H, p0, z16); // add z26.h, p0/m, z26.h, z16.h __ sve_add(z14, __ H, p1, z25); // add z14.h, p1/m, z14.h, z25.h
__ sve_asr(z3, __ D, p6, z8); // asr z3.d, p6/m, z3.d, z8.d __ sve_asr(z27, __ D, p5, z26); // asr z27.d, p5/m, z27.d, z26.d
__ sve_cnt(z21, __ D, p6, z26); // cnt z21.d, p6/m, z26.d __ sve_cnt(z24, __ B, p5, z0); // cnt z24.b, p5/m, z0.b
__ sve_lsl(z22, __ B, p0, z4); // lsl z22.b, p0/m, z22.b, z4.b __ sve_lsl(z6, __ B, p4, z0); // lsl z6.b, p4/m, z6.b, z0.b
__ sve_lsr(z17, __ H, p0, z3); // lsr z17.h, p0/m, z17.h, z3.h __ sve_lsr(z15, __ B, p0, z9); // lsr z15.b, p0/m, z15.b, z9.b
__ sve_mul(z1, __ B, p2, z6); // mul z1.b, p2/m, z1.b, z6.b __ sve_mul(z5, __ B, p2, z27); // mul z5.b, p2/m, z5.b, z27.b
__ sve_neg(z9, __ S, p7, z7); // neg z9.s, p7/m, z7.s __ sve_neg(z20, __ B, p5, z20); // neg z20.b, p5/m, z20.b
__ sve_not(z22, __ H, p5, z5); // not z22.h, p5/m, z5.h __ sve_not(z10, __ D, p2, z16); // not z10.d, p2/m, z16.d
__ sve_smax(z8, __ B, p4, z30); // smax z8.b, p4/m, z8.b, z30.b __ sve_smax(z6, __ H, p4, z2); // smax z6.h, p4/m, z6.h, z2.h
__ sve_smin(z17, __ D, p0, z11); // smin z17.d, p0/m, z17.d, z11.d __ sve_smin(z29, __ D, p7, z2); // smin z29.d, p7/m, z29.d, z2.d
__ sve_sub(z28, __ S, p0, z26); // sub z28.s, p0/m, z28.s, z26.s __ sve_sub(z22, __ H, p7, z14); // sub z22.h, p7/m, z22.h, z14.h
__ sve_fabs(z28, __ D, p3, z13); // fabs z28.d, p3/m, z13.d __ sve_fabs(z27, __ S, p4, z23); // fabs z27.s, p4/m, z23.s
__ sve_fadd(z16, __ S, p6, z5); // fadd z16.s, p6/m, z16.s, z5.s __ sve_fadd(z2, __ D, p3, z10); // fadd z2.d, p3/m, z2.d, z10.d
__ sve_fdiv(z13, __ S, p2, z15); // fdiv z13.s, p2/m, z13.s, z15.s __ sve_fdiv(z10, __ S, p6, z22); // fdiv z10.s, p6/m, z10.s, z22.s
__ sve_fmax(z26, __ S, p5, z11); // fmax z26.s, p5/m, z26.s, z11.s __ sve_fmax(z3, __ S, p5, z16); // fmax z3.s, p5/m, z3.s, z16.s
__ sve_fmin(z22, __ S, p4, z4); // fmin z22.s, p4/m, z22.s, z4.s __ sve_fmin(z1, __ D, p4, z16); // fmin z1.d, p4/m, z1.d, z16.d
__ sve_fmul(z19, __ S, p4, z17); // fmul z19.s, p4/m, z19.s, z17.s __ sve_fmul(z12, __ S, p3, z12); // fmul z12.s, p3/m, z12.s, z12.s
__ sve_fneg(z14, __ D, p3, z2); // fneg z14.d, p3/m, z2.d __ sve_fneg(z16, __ D, p0, z20); // fneg z16.d, p0/m, z20.d
__ sve_frintm(z3, __ S, p5, z23); // frintm z3.s, p5/m, z23.s __ sve_frintm(z5, __ D, p1, z7); // frintm z5.d, p1/m, z7.d
__ sve_frintn(z6, __ S, p1, z17); // frintn z6.s, p1/m, z17.s __ sve_frintn(z12, __ D, p7, z16); // frintn z12.d, p7/m, z16.d
__ sve_frintp(z27, __ S, p4, z16); // frintp z27.s, p4/m, z16.s __ sve_frintp(z6, __ S, p0, z28); // frintp z6.s, p0/m, z28.s
__ sve_fsqrt(z2, __ S, p7, z3); // fsqrt z2.s, p7/m, z3.s __ sve_fsqrt(z4, __ D, p1, z17); // fsqrt z4.d, p1/m, z17.d
__ sve_fsub(z6, __ S, p4, z19); // fsub z6.s, p4/m, z6.s, z19.s __ sve_fsub(z13, __ S, p3, z19); // fsub z13.s, p3/m, z13.s, z19.s
__ sve_fmla(z12, __ D, p5, z8, z24); // fmla z12.d, p5/m, z8.d, z24.d __ sve_fmla(z24, __ S, p5, z17, z0); // fmla z24.s, p5/m, z17.s, z0.s
__ sve_fmls(z17, __ S, p0, z10, z23); // fmls z17.s, p0/m, z10.s, z23.s __ sve_fmls(z23, __ S, p1, z19, z30); // fmls z23.s, p1/m, z19.s, z30.s
__ sve_fnmla(z19, __ S, p7, z13, z16); // fnmla z19.s, p7/m, z13.s, z16.s __ sve_fnmla(z16, __ S, p1, z0, z7); // fnmla z16.s, p1/m, z0.s, z7.s
__ sve_fnmls(z0, __ D, p1, z14, z17); // fnmls z0.d, p1/m, z14.d, z17.d __ sve_fnmls(z17, __ D, p6, z8, z10); // fnmls z17.d, p6/m, z8.d, z10.d
__ sve_mla(z8, __ S, p2, z22, z20); // mla z8.s, p2/m, z22.s, z20.s __ sve_mla(z20, __ B, p5, z27, z2); // mla z20.b, p5/m, z27.b, z2.b
__ sve_mls(z27, __ S, p0, z3, z15); // mls z27.s, p0/m, z3.s, z15.s __ sve_mls(z15, __ B, p4, z20, z7); // mls z15.b, p4/m, z20.b, z7.b
__ sve_and(z20, z7, z4); // and z20.d, z7.d, z4.d __ sve_and(z28, z7, z0); // and z28.d, z7.d, z0.d
__ sve_eor(z7, z0, z8); // eor z7.d, z0.d, z8.d __ sve_eor(z16, z19, z22); // eor z16.d, z19.d, z22.d
__ sve_orr(z19, z22, z4); // orr z19.d, z22.d, z4.d __ sve_orr(z15, z9, z22); // orr z15.d, z9.d, z22.d
// SVEReductionOp // SVEReductionOp
__ sve_andv(v9, __ D, p5, z11); // andv d9, p5, z11.d __ sve_andv(v25, __ S, p1, z30); // andv s25, p1, z30.s
__ sve_orv(v5, __ H, p7, z16); // orv h5, p7, z16.h __ sve_orv(v13, __ B, p5, z11); // orv b13, p5, z11.b
__ sve_eorv(v22, __ H, p3, z1); // eorv h22, p3, z1.h __ sve_eorv(v13, __ S, p2, z20); // eorv s13, p2, z20.s
__ sve_smaxv(v8, __ D, p5, z16); // smaxv d8, p5, z16.d __ sve_smaxv(v25, __ B, p3, z4); // smaxv b25, p3, z4.b
__ sve_sminv(v15, __ S, p1, z4); // sminv s15, p1, z4.s __ sve_sminv(v17, __ D, p2, z6); // sminv d17, p2, z6.d
__ sve_fminv(v8, __ S, p1, z29); // fminv s8, p1, z29.s __ sve_fminv(v4, __ D, p7, z16); // fminv d4, p7, z16.d
__ sve_fmaxv(v28, __ D, p4, z29); // fmaxv d28, p4, z29.d __ sve_fmaxv(v26, __ S, p2, z14); // fmaxv s26, p2, z14.s
__ sve_fadda(v9, __ S, p3, z2); // fadda s9, p3, s9, z2.s __ sve_fadda(v11, __ S, p7, z3); // fadda s11, p7, s11, z3.s
__ sve_uaddv(v28, __ B, p0, z7); // uaddv d28, p0, z7.b __ sve_uaddv(v1, __ S, p6, z21); // uaddv d1, p6, z21.s
__ bind(forth); __ bind(forth);
@ -1007,30 +1010,30 @@ void entry(CodeBuffer *cb) {
0x9101a1a0, 0xb10a5cc8, 0xd10810aa, 0xf10fd061, 0x9101a1a0, 0xb10a5cc8, 0xd10810aa, 0xf10fd061,
0x120cb166, 0x321764bc, 0x52174681, 0x720c0227, 0x120cb166, 0x321764bc, 0x52174681, 0x720c0227,
0x9241018e, 0xb25a2969, 0xd278b411, 0xf26aad01, 0x9241018e, 0xb25a2969, 0xd278b411, 0xf26aad01,
0x14000000, 0x17ffffd7, 0x140002cd, 0x94000000, 0x14000000, 0x17ffffd7, 0x140002d0, 0x94000000,
0x97ffffd4, 0x940002ca, 0x3400000a, 0x34fffa2a, 0x97ffffd4, 0x940002cd, 0x3400000a, 0x34fffa2a,
0x340058ea, 0x35000008, 0x35fff9c8, 0x35005888, 0x3400594a, 0x35000008, 0x35fff9c8, 0x350058e8,
0xb400000b, 0xb4fff96b, 0xb400582b, 0xb500001d, 0xb400000b, 0xb4fff96b, 0xb400588b, 0xb500001d,
0xb5fff91d, 0xb50057dd, 0x10000013, 0x10fff8b3, 0xb5fff91d, 0xb500583d, 0x10000013, 0x10fff8b3,
0x10005773, 0x90000013, 0x36300016, 0x3637f836, 0x100057d3, 0x90000013, 0x36300016, 0x3637f836,
0x363056f6, 0x3758000c, 0x375ff7cc, 0x3758568c, 0x36305756, 0x3758000c, 0x375ff7cc, 0x375856ec,
0x128313a0, 0x528a32c7, 0x7289173b, 0x92ab3acc, 0x128313a0, 0x528a32c7, 0x7289173b, 0x92ab3acc,
0xd2a0bf94, 0xf2c285e8, 0x9358722f, 0x330e652f, 0xd2a0bf94, 0xf2c285e8, 0x9358722f, 0x330e652f,
0x53067f3b, 0x93577c53, 0xb34a1aac, 0xd35a4016, 0x53067f3b, 0x93577c53, 0xb34a1aac, 0xd35a4016,
0x13946c63, 0x93c3dbc8, 0x54000000, 0x54fff5a0, 0x13946c63, 0x93c3dbc8, 0x54000000, 0x54fff5a0,
0x54005460, 0x54000001, 0x54fff541, 0x54005401, 0x540054c0, 0x54000001, 0x54fff541, 0x54005461,
0x54000002, 0x54fff4e2, 0x540053a2, 0x54000002, 0x54000002, 0x54fff4e2, 0x54005402, 0x54000002,
0x54fff482, 0x54005342, 0x54000003, 0x54fff423, 0x54fff482, 0x540053a2, 0x54000003, 0x54fff423,
0x540052e3, 0x54000003, 0x54fff3c3, 0x54005283, 0x54005343, 0x54000003, 0x54fff3c3, 0x540052e3,
0x54000004, 0x54fff364, 0x54005224, 0x54000005, 0x54000004, 0x54fff364, 0x54005284, 0x54000005,
0x54fff305, 0x540051c5, 0x54000006, 0x54fff2a6, 0x54fff305, 0x54005225, 0x54000006, 0x54fff2a6,
0x54005166, 0x54000007, 0x54fff247, 0x54005107, 0x540051c6, 0x54000007, 0x54fff247, 0x54005167,
0x54000008, 0x54fff1e8, 0x540050a8, 0x54000009, 0x54000008, 0x54fff1e8, 0x54005108, 0x54000009,
0x54fff189, 0x54005049, 0x5400000a, 0x54fff12a, 0x54fff189, 0x540050a9, 0x5400000a, 0x54fff12a,
0x54004fea, 0x5400000b, 0x54fff0cb, 0x54004f8b, 0x5400504a, 0x5400000b, 0x54fff0cb, 0x54004feb,
0x5400000c, 0x54fff06c, 0x54004f2c, 0x5400000d, 0x5400000c, 0x54fff06c, 0x54004f8c, 0x5400000d,
0x54fff00d, 0x54004ecd, 0x5400000e, 0x54ffefae, 0x54fff00d, 0x54004f2d, 0x5400000e, 0x54ffefae,
0x54004e6e, 0x5400000f, 0x54ffef4f, 0x54004e0f, 0x54004ece, 0x5400000f, 0x54ffef4f, 0x54004e6f,
0xd40658e1, 0xd4014d22, 0xd4046543, 0xd4273f60, 0xd40658e1, 0xd4014d22, 0xd4046543, 0xd4273f60,
0xd44cad80, 0xd503201f, 0xd69f03e0, 0xd6bf03e0, 0xd44cad80, 0xd503201f, 0xd69f03e0, 0xd6bf03e0,
0xd5033fdf, 0xd5033e9f, 0xd50332bf, 0xd61f0200, 0xd5033fdf, 0xd5033e9f, 0xd50332bf, 0xd61f0200,
@ -1062,7 +1065,7 @@ void entry(CodeBuffer *cb) {
0x791f226d, 0xf95aa2f3, 0xb9587bb7, 0x395f7176, 0x791f226d, 0xf95aa2f3, 0xb9587bb7, 0x395f7176,
0x795d9143, 0x399e7e08, 0x799a2697, 0x79df3422, 0x795d9143, 0x399e7e08, 0x799a2697, 0x79df3422,
0xb99c2624, 0xfd5c2374, 0xbd5fa1d9, 0xfd1d595a, 0xb99c2624, 0xfd5c2374, 0xbd5fa1d9, 0xfd1d595a,
0xbd1b1869, 0x58003e5b, 0x1800000b, 0xf8945060, 0xbd1b1869, 0x58003ebb, 0x1800000b, 0xf8945060,
0xd8000000, 0xf8ae6ba0, 0xf99a0080, 0x1a070035, 0xd8000000, 0xf8ae6ba0, 0xf99a0080, 0x1a070035,
0x3a0700a8, 0x5a0e0367, 0x7a11009b, 0x9a000380, 0x3a0700a8, 0x5a0e0367, 0x7a11009b, 0x9a000380,
0xba1e030c, 0xda0f0320, 0xfa030301, 0x0b340b11, 0xba1e030c, 0xda0f0320, 0xfa030301, 0x0b340b11,
@ -1080,9 +1083,9 @@ void entry(CodeBuffer *cb) {
0x9adc2a3b, 0x9ad12c5c, 0x9bce7dea, 0x9b597c6e, 0x9adc2a3b, 0x9ad12c5c, 0x9bce7dea, 0x9b597c6e,
0x1b0e166f, 0x1b1ae490, 0x9b023044, 0x9b089e3d, 0x1b0e166f, 0x1b1ae490, 0x9b023044, 0x9b089e3d,
0x9b391083, 0x9b24c73a, 0x9bb15f40, 0x9bbcc6af, 0x9b391083, 0x9b24c73a, 0x9bb15f40, 0x9bbcc6af,
0x1e23095b, 0x1e3918e0, 0x1e2f28c9, 0x1e2a39fd, 0x7ea3d55b, 0x1e3908e0, 0x1e2f18c9, 0x1e2a29fd,
0x1e270a22, 0x1e77096b, 0x1e771ba7, 0x1e6b2b6e, 0x1e273a22, 0x7ef7d56b, 0x1e770ba7, 0x1e6b1b6e,
0x1e78388b, 0x1e6e09ec, 0x1f1c3574, 0x1f17f98b, 0x1e78288b, 0x1e6e39ec, 0x1f1c3574, 0x1f17f98b,
0x1f2935da, 0x1f2574ea, 0x1f4b306f, 0x1f5ec7cf, 0x1f2935da, 0x1f2574ea, 0x1f4b306f, 0x1f5ec7cf,
0x1f6f3e93, 0x1f6226a9, 0x1e2040fb, 0x1e20c3dd, 0x1f6f3e93, 0x1f6226a9, 0x1e2040fb, 0x1e20c3dd,
0x1e214031, 0x1e21c0c2, 0x1e22c06a, 0x1e604178, 0x1e214031, 0x1e21c0c2, 0x1e22c06a, 0x1e604178,
@ -1118,75 +1121,76 @@ void entry(CodeBuffer *cb) {
0x2eac856a, 0x6eaf85cd, 0x6ef085ee, 0x0eb6d6b4, 0x2eac856a, 0x6eaf85cd, 0x6ef085ee, 0x0eb6d6b4,
0x4ea3d441, 0x4ef8d6f6, 0x0e209ffe, 0x4e309dee, 0x4ea3d441, 0x4ef8d6f6, 0x0e209ffe, 0x4e309dee,
0x0e649c62, 0x4e689ce6, 0x0ea59c83, 0x4ea99d07, 0x0e649c62, 0x4e689ce6, 0x0ea59c83, 0x4ea99d07,
0x2e3adf38, 0x6e22dc20, 0x6e7ddf9b, 0x0e7f97dd, 0x2ebad738, 0x6ea2d420, 0x6efdd79b, 0x2e3fdfdd,
0x4e6794c5, 0x0ea794c5, 0x4ebf97dd, 0x0e2dcd8b, 0x6e27dcc5, 0x6e67dcc5, 0x0e7f97dd, 0x4e6d958b,
0x4e3bcf59, 0x4e62cc20, 0x2e6097fe, 0x6e629420, 0x0ebb9759, 0x4ea29420, 0x0e20cffe, 0x4e22cc20,
0x2eb39651, 0x6ebe97bc, 0x0ebbcf59, 0x4eabcd49, 0x4e73ce51, 0x2e7e97bc, 0x6e7b9759, 0x2eab9549,
0x4efbcf59, 0x2e2efdac, 0x6e31fe0f, 0x6e6dfd8b, 0x6ebb9759, 0x0eaecdac, 0x4eb1ce0f, 0x4eedcd8b,
0x0e2c656a, 0x4e336651, 0x0e7a6738, 0x4e7766d5, 0x2e2cfd6a, 0x6e33fe51, 0x6e7aff38, 0x0e3766d5,
0x0eb96717, 0x4ea26420, 0x0e32f630, 0x4e2cf56a, 0x4e396717, 0x0e626420, 0x4e726630, 0x0eac656a,
0x4e68f4e6, 0x0e3e6fbc, 0x4e286ce6, 0x0e676cc5, 0x4ea864e6, 0x0e3ef7bc, 0x4e28f4e6, 0x4e67f4c5,
0x4e676cc5, 0x0eb66eb4, 0x4eb36e51, 0x0eb1f60f, 0x0e276cc5, 0x4e366eb4, 0x0e736e51, 0x4e716e0f,
0x4eb3f651, 0x4efff7dd, 0x2e3c8f7a, 0x6e3e8fbc, 0x0eb36e51, 0x4ebf6fdd, 0x0ebcf77a, 0x4ebef7bc,
0x2e638c41, 0x6e7d8f9b, 0x2ea28c20, 0x6eb68eb4, 0x4ee3f441, 0x2e3d8f9b, 0x6e228c20, 0x2e768eb4,
0x6efe8fbc, 0x0e31e60f, 0x4e2ee5ac, 0x4e6ce56a, 0x6e7e8fbc, 0x2eb18e0f, 0x6eae8dac, 0x6eec8d6a,
0x0e3e37bc, 0x4e3e37bc, 0x0e753693, 0x4e7836f6, 0x0e3ee7bc, 0x4e3ee7bc, 0x4e75e693, 0x0e3836f6,
0x0eac356a, 0x4ea634a4, 0x4ee037fe, 0x2eb6e6b4, 0x4e2c356a, 0x0e6634a4, 0x4e6037fe, 0x0eb636b4,
0x6eaae528, 0x6ee0e7fe, 0x0e333e51, 0x4e2c3d6a, 0x4eaa3528, 0x4ee037fe, 0x2eb3e651, 0x6eace56a,
0x0e7d3f9b, 0x4e643c62, 0x0eba3f38, 0x4ea63ca4, 0x6efde79b, 0x0e243c62, 0x4e3a3f38, 0x0e663ca4,
0x4ee53c83, 0x2e2ae528, 0x6e38e6f6, 0x6e73e651, 0x4e653c83, 0x0eaa3d28, 0x4eb83ef6, 0x4ef33e51,
0xba5fd3e3, 0x3a5f03e5, 0xfa411be4, 0x7a42cbe2, 0x2e2fe5cd, 0x6e26e4a4, 0x6e7ee7bc, 0xba5fd3e3,
0x93df03ff, 0xc820ffff, 0x8822fc7f, 0xc8247cbf, 0x3a5f03e5, 0xfa411be4, 0x7a42cbe2, 0x93df03ff,
0x88267fff, 0x4e010fe0, 0x4e081fe1, 0x4e0c1fe1, 0xc820ffff, 0x8822fc7f, 0xc8247cbf, 0x88267fff,
0x4e0a1fe1, 0x4e071fe1, 0x4cc0ac3f, 0x05a08020, 0x4e010fe0, 0x4e081fe1, 0x4e0c1fe1, 0x4e0a1fe1,
0x04b0e3e0, 0x0470e7e1, 0x042f9c20, 0x043f9c35, 0x4e071fe1, 0x4cc0ac3f, 0x05a08020, 0x04b0e3e0,
0x047f9c20, 0x04ff9c20, 0x04299420, 0x04319160, 0x0470e7e1, 0x042f9c20, 0x043f9c35, 0x047f9c20,
0x0461943e, 0x04a19020, 0x042053ff, 0x047f5401, 0x04ff9c20, 0x04299420, 0x04319160, 0x0461943e,
0x25208028, 0x2538cfe0, 0x2578d001, 0x25b8efe2, 0x04a19020, 0x042053ff, 0x047f5401, 0x25208028,
0x25f8f007, 0xa400a3e0, 0xa4a8a7ea, 0xa547a814, 0x2538cfe0, 0x2578d001, 0x25b8efe2, 0x25f8f007,
0xa4084ffe, 0xa55c53e0, 0xa5e1540b, 0xe400fbf6, 0xa400a3e0, 0xa4a8a7ea, 0xa547a814, 0xa4084ffe,
0xe408ffff, 0xe547e400, 0xe4014be0, 0xe4a84fe0, 0xa55c53e0, 0xa5e1540b, 0xe400fbf6, 0xe408ffff,
0xe5f15000, 0x858043e0, 0x85a043ff, 0xe59f5d08, 0xe547e400, 0xe4014be0, 0xe4a84fe0, 0xe5f15000,
0x1e601000, 0x1e603000, 0x1e621000, 0x1e623000, 0x858043e0, 0x85a043ff, 0xe59f5d08, 0x1e601000,
0x1e641000, 0x1e643000, 0x1e661000, 0x1e663000, 0x1e603000, 0x1e621000, 0x1e623000, 0x1e641000,
0x1e681000, 0x1e683000, 0x1e6a1000, 0x1e6a3000, 0x1e643000, 0x1e661000, 0x1e663000, 0x1e681000,
0x1e6c1000, 0x1e6c3000, 0x1e6e1000, 0x1e6e3000, 0x1e683000, 0x1e6a1000, 0x1e6a3000, 0x1e6c1000,
0x1e701000, 0x1e703000, 0x1e721000, 0x1e723000, 0x1e6c3000, 0x1e6e1000, 0x1e6e3000, 0x1e701000,
0x1e741000, 0x1e743000, 0x1e761000, 0x1e763000, 0x1e703000, 0x1e721000, 0x1e723000, 0x1e741000,
0x1e781000, 0x1e783000, 0x1e7a1000, 0x1e7a3000, 0x1e743000, 0x1e761000, 0x1e763000, 0x1e781000,
0x1e7c1000, 0x1e7c3000, 0x1e7e1000, 0x1e7e3000, 0x1e783000, 0x1e7a1000, 0x1e7a3000, 0x1e7c1000,
0xf82d83a5, 0xf8380355, 0xf8381303, 0xf83a21f7, 0x1e7c3000, 0x1e7e1000, 0x1e7e3000, 0xf8388355,
0xf8353303, 0xf8285299, 0xf8304051, 0xf8217300, 0xf8380303, 0xf83a11f7, 0xf8352303, 0xf8283299,
0xf8246183, 0xf8bf815c, 0xf8ba0182, 0xf8b0103f, 0xf8305051, 0xf8214300, 0xf8247183, 0xf83f615c,
0xf8ad201d, 0xf8b3322c, 0xf8b6538d, 0xf8be403f, 0xf8ba8182, 0xf8b0003f, 0xf8ad101d, 0xf8b3222c,
0xf8ba709c, 0xf8be60c4, 0xf8fe81fa, 0xf8e90188, 0xf8b6338d, 0xf8be503f, 0xf8ba409c, 0xf8be70c4,
0xf8e01034, 0xf8f82002, 0xf8e93358, 0xf8f0507e, 0xf8be61fa, 0xf8e98188, 0xf8e00034, 0xf8f81002,
0xf8ea4157, 0xf8e47050, 0xf8eb6148, 0xf86f8051, 0xf8e92358, 0xf8f0307e, 0xf8ea5157, 0xf8e44050,
0xf86a018c, 0xf86f104d, 0xf8672354, 0xf8703044, 0xf8eb7148, 0xf8ef6051, 0xf86a818c, 0xf86f004d,
0xf86451ec, 0xf87541f0, 0xf86b72f5, 0xf86c62fa, 0xf8671354, 0xf8702044, 0xf86431ec, 0xf87551f0,
0xb83c816e, 0xb8380181, 0xb83f120a, 0xb8272062, 0xf86b42f5, 0xf86c72fa, 0xf87c616e, 0xb8388181,
0xb82d3233, 0xb8305023, 0xb82b40be, 0xb82873af, 0xb83f020a, 0xb8271062, 0xb82d2233, 0xb8303023,
0xb83e6280, 0xb8a782f4, 0xb8bc0375, 0xb8b91025, 0xb82b50be, 0xb82843af, 0xb83e7280, 0xb82762f4,
0xb8b723f0, 0xb8a5312c, 0xb8bc53af, 0xb8b6427f, 0xb8bc8375, 0xb8b90025, 0xb8b713f0, 0xb8a5212c,
0xb8bf71c5, 0xb8b061ff, 0xb8fb8214, 0xb8ec012b, 0xb8bc33af, 0xb8b6527f, 0xb8bf41c5, 0xb8b071ff,
0xb8e6123e, 0xb8fb23dc, 0xb8e7328a, 0xb8ea5304, 0xb8bb6214, 0xb8ec812b, 0xb8e6023e, 0xb8fb13dc,
0xb8f142d1, 0xb8e371fd, 0xb8f66273, 0xb87681e2, 0xb8e7228a, 0xb8ea3304, 0xb8f152d1, 0xb8e341fd,
0xb866020c, 0xb86b12ed, 0xb861227e, 0xb8653051, 0xb8f67273, 0xb8f661e2, 0xb866820c, 0xb86b02ed,
0xb87051b6, 0xb86a43b5, 0xb87b736c, 0xb86363e1, 0xb861127e, 0xb8652051, 0xb87031b6, 0xb86a53b5,
0xce312677, 0xce0e1b5b, 0xce7e8ed4, 0xce9ed858, 0xb87b436c, 0xb86373e1, 0xb8786233, 0xce3a3b69,
0xce768151, 0xce718451, 0xcec08300, 0xce628ad9, 0xce167a86, 0xce7e8c58, 0xce8aba3a, 0xce718051,
0x04e30191, 0x04f0079d, 0x65dc0126, 0x65870887, 0xce798700, 0xcec08056, 0xce638991, 0x04bc03bb,
0x658806c9, 0x0416b7db, 0x0440021a, 0x04d09903, 0x04e904da, 0x658400f1, 0x6596092f, 0x65d40762,
0x04dabb55, 0x04138096, 0x04518071, 0x041008c1, 0x0496b805, 0x0440072e, 0x04d0975b, 0x041ab418,
0x0497bce9, 0x045eb4b6, 0x040813c8, 0x04ca0171, 0x04139006, 0x0411812f, 0x04100b65, 0x0417b694,
0x0481035c, 0x04dcadbc, 0x658098b0, 0x658d89ed, 0x04deaa0a, 0x04481046, 0x04ca1c5d, 0x04411dd6,
0x6586957a, 0x65879096, 0x65829233, 0x04ddac4e, 0x049cb2fb, 0x65c08d42, 0x658d9aca, 0x65869603,
0x6582b6e3, 0x6580a626, 0x6581b21b, 0x658dbc62, 0x65c79201, 0x65828d8c, 0x04dda290, 0x65c2a4e5,
0x65819266, 0x65f8150c, 0x65b72151, 0x65b05db3, 0x65c0be0c, 0x6581a386, 0x65cda624, 0x65818e6d,
0x65f165c0, 0x04944ac8, 0x048f607b, 0x042430f4, 0x65a01638, 0x65be2677, 0x65a74410, 0x65ea7911,
0x04a83007, 0x046432d3, 0x04da3569, 0x04583e05, 0x04025774, 0x0407728f, 0x042030fc, 0x04b63270,
0x04592c36, 0x04c83608, 0x048a248f, 0x658727a8, 0x0476312f, 0x049a27d9, 0x0418356d, 0x04992a8d,
0x65c633bc, 0x65982c49, 0x040120fc, 0x04082c99, 0x04ca28d1, 0x65c73e04, 0x658629da,
0x65983c6b, 0x04813aa1,
}; };
// END Generated code -- do not edit // END Generated code -- do not edit

View File

@ -1949,7 +1949,7 @@ public:
starti; starti;
f(op31, 31, 29); f(op31, 31, 29);
f(0b11110, 28, 24); f(0b11110, 28, 24);
f(type, 23, 22), f(1, 21), f(opcode, 15, 12), f(0b10, 11, 10); f(type, 23, 22), f(1, 21), f(opcode, 15, 10);
rf(Vm, 16), rf(Vn, 5), rf(Vd, 0); rf(Vm, 16), rf(Vn, 5), rf(Vd, 0);
} }
@ -1958,21 +1958,23 @@ public:
data_processing(op31, type, opcode, Vd, Vn, Vm); \ data_processing(op31, type, opcode, Vd, Vn, Vm); \
} }
INSN(fmuls, 0b000, 0b00, 0b0000); INSN(fabds, 0b011, 0b10, 0b110101);
INSN(fdivs, 0b000, 0b00, 0b0001); INSN(fmuls, 0b000, 0b00, 0b000010);
INSN(fadds, 0b000, 0b00, 0b0010); INSN(fdivs, 0b000, 0b00, 0b000110);
INSN(fsubs, 0b000, 0b00, 0b0011); INSN(fadds, 0b000, 0b00, 0b001010);
INSN(fmaxs, 0b000, 0b00, 0b0100); INSN(fsubs, 0b000, 0b00, 0b001110);
INSN(fmins, 0b000, 0b00, 0b0101); INSN(fmaxs, 0b000, 0b00, 0b010010);
INSN(fnmuls, 0b000, 0b00, 0b1000); INSN(fmins, 0b000, 0b00, 0b010110);
INSN(fnmuls, 0b000, 0b00, 0b100010);
INSN(fmuld, 0b000, 0b01, 0b0000); INSN(fabdd, 0b011, 0b11, 0b110101);
INSN(fdivd, 0b000, 0b01, 0b0001); INSN(fmuld, 0b000, 0b01, 0b000010);
INSN(faddd, 0b000, 0b01, 0b0010); INSN(fdivd, 0b000, 0b01, 0b000110);
INSN(fsubd, 0b000, 0b01, 0b0011); INSN(faddd, 0b000, 0b01, 0b001010);
INSN(fmaxd, 0b000, 0b01, 0b0100); INSN(fsubd, 0b000, 0b01, 0b001110);
INSN(fmind, 0b000, 0b01, 0b0101); INSN(fmaxd, 0b000, 0b01, 0b010010);
INSN(fnmuld, 0b000, 0b01, 0b1000); INSN(fmind, 0b000, 0b01, 0b010110);
INSN(fnmuld, 0b000, 0b01, 0b100010);
#undef INSN #undef INSN
@ -2482,6 +2484,7 @@ public:
f(T==T2D ? 1:0, 22); f(1, 21), rf(Vm, 16), f(op3, 15, 10), rf(Vn, 5), rf(Vd, 0); \ f(T==T2D ? 1:0, 22); f(1, 21), rf(Vm, 16), f(op3, 15, 10), rf(Vn, 5), rf(Vd, 0); \
} }
INSN(fabd, 1, 1, 0b110101);
INSN(fadd, 0, 0, 0b110101); INSN(fadd, 0, 0, 0b110101);
INSN(fdiv, 1, 0, 0b111111); INSN(fdiv, 1, 0, 0b111111);
INSN(fmul, 1, 0, 0b110111); INSN(fmul, 1, 0, 0b110111);
@ -2689,7 +2692,7 @@ public:
INSN(sshr, 0, 0b000001, /* isSHR = */ true); INSN(sshr, 0, 0b000001, /* isSHR = */ true);
INSN(ushr, 1, 0b000001, /* isSHR = */ true); INSN(ushr, 1, 0b000001, /* isSHR = */ true);
INSN(usra, 1, 0b000101, /* isSHR = */ true); INSN(usra, 1, 0b000101, /* isSHR = */ true);
INSN(ssra, 0, 0b000101, /* isSHAR =*/ true); INSN(ssra, 0, 0b000101, /* isSHR = */ true);
#undef INSN #undef INSN

View File

@ -1,5 +1,5 @@
/* /*
* Copyright (c) 2019, Huawei Technologies Co. Ltd. All rights reserved. * Copyright (c) 2019, Huawei Technologies Co., Ltd. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
* *
* This code is free software; you can redistribute it and/or modify it * This code is free software; you can redistribute it and/or modify it

View File

@ -1,5 +1,5 @@
/* /*
* Copyright (c) 2020, Huawei Technologies Co. Ltd. All rights reserved. * Copyright (c) 2020, Huawei Technologies Co., Ltd. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
* *
* This code is free software; you can redistribute it and/or modify it * This code is free software; you can redistribute it and/or modify it

View File

@ -1,5 +1,5 @@
/* /*
* Copyright (c) 2020, Huawei Technologies Co. Ltd. All rights reserved. * Copyright (c) 2020, Huawei Technologies Co., Ltd. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
* *
* This code is free software; you can redistribute it and/or modify it * This code is free software; you can redistribute it and/or modify it

View File

@ -1,5 +1,5 @@
/* /*
* Copyright (c) 2020, Huawei Technologies Co. Ltd. All rights reserved. * Copyright (c) 2020, Huawei Technologies Co., Ltd. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
* *
* This code is free software; you can redistribute it and/or modify it * This code is free software; you can redistribute it and/or modify it

View File

@ -1,5 +1,5 @@
/* /*
* Copyright (c) 2020, Huawei Technologies Co. Ltd. All rights reserved. * Copyright (c) 2020, Huawei Technologies Co., Ltd. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
* *
* This code is free software; you can redistribute it and/or modify it * This code is free software; you can redistribute it and/or modify it

View File

@ -1,5 +1,5 @@
/* /*
* Copyright (c) 2020, Huawei Technologies Co. Ltd. All rights reserved. * Copyright (c) 2020, Huawei Technologies Co., Ltd. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
* *
* This code is free software; you can redistribute it and/or modify it * This code is free software; you can redistribute it and/or modify it

View File

@ -1,5 +1,5 @@
/* /*
* Copyright (c) 2020, Huawei Technologies Co. Ltd. All rights reserved. * Copyright (c) 2020, Huawei Technologies Co., Ltd. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
* *
* This code is free software; you can redistribute it and/or modify it * This code is free software; you can redistribute it and/or modify it

View File

@ -1,5 +1,5 @@
/* /*
* Copyright (c) 2020, Huawei Technologies Co. Ltd. All rights reserved. * Copyright (c) 2020, Huawei Technologies Co., Ltd. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
* *
* This code is free software; you can redistribute it and/or modify it * This code is free software; you can redistribute it and/or modify it

View File

@ -1,5 +1,5 @@
/* /*
* Copyright (c) 2020, Huawei Technologies Co. Ltd. All rights reserved. * Copyright (c) 2020, Huawei Technologies Co., Ltd. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
* *
* This code is free software; you can redistribute it and/or modify it * This code is free software; you can redistribute it and/or modify it

View File

@ -1,5 +1,5 @@
/* /*
* Copyright (c) 2020, Huawei Technologies Co. Ltd. All rights reserved. * Copyright (c) 2020, Huawei Technologies Co., Ltd. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
* *
* This code is free software; you can redistribute it and/or modify it * This code is free software; you can redistribute it and/or modify it

View File

@ -1,5 +1,5 @@
/* /*
* Copyright (c) 2019, Huawei Technologies Co. Ltd. All rights reserved. * Copyright (c) 2019, Huawei Technologies Co., Ltd. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
* *
* This code is free software; you can redistribute it and/or modify it * This code is free software; you can redistribute it and/or modify it

View File

@ -1,5 +1,5 @@
/* /*
* Copyright (c) 2020, Huawei Technologies Co. Ltd. All rights reserved. * Copyright (c) 2020, Huawei Technologies Co., Ltd. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
* *
* This code is free software; you can redistribute it and/or modify it * This code is free software; you can redistribute it and/or modify it

View File

@ -1,5 +1,5 @@
/* /*
* Copyright (c) 2020, Huawei Technologies Co. Ltd. All rights reserved. * Copyright (c) 2020, Huawei Technologies Co., Ltd. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
* *
* This code is free software; you can redistribute it and/or modify it * This code is free software; you can redistribute it and/or modify it

View File

@ -1,5 +1,5 @@
/* /*
* Copyright (c) 2020, Huawei Technologies Co. Ltd. All rights reserved. * Copyright (c) 2020, Huawei Technologies Co., Ltd. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
* *
* This code is free software; you can redistribute it and/or modify it * This code is free software; you can redistribute it and/or modify it

View File

@ -1,5 +1,5 @@
/* /*
* Copyright (c) 2020, Huawei Technologies Co. Ltd. All rights reserved. * Copyright (c) 2020, Huawei Technologies Co., Ltd. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
* *
* This code is free software; you can redistribute it and/or modify it * This code is free software; you can redistribute it and/or modify it

View File

@ -1,5 +1,5 @@
/* /*
* Copyright (c) 2020, Huawei Technologies Co. Ltd. All rights reserved. * Copyright (c) 2020, Huawei Technologies Co., Ltd. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
* *
* This code is free software; you can redistribute it and/or modify it * This code is free software; you can redistribute it and/or modify it

View File

@ -0,0 +1,104 @@
/*
* Copyright (c) 2020, Huawei Technologies Co., Ltd. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License version 2 only, as
* published by the Free Software Foundation.
*
* This code is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
* version 2 for more details (a copy is included in the LICENSE file that
* accompanied this code).
*
* You should have received a copy of the GNU General Public License version
* 2 along with this work; if not, write to the Free Software Foundation,
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
*
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
* or visit www.oracle.com if you need additional information or have any
* questions.
*/
package org.openjdk.bench.vm.compiler;
import org.openjdk.jmh.annotations.*;
import org.openjdk.jmh.infra.*;
import java.util.concurrent.TimeUnit;
import java.util.Random;
@BenchmarkMode(Mode.AverageTime)
@OutputTimeUnit(TimeUnit.NANOSECONDS)
@State(Scope.Thread)
public class FloatingScalarVectorAbsDiff {
@Param({"1024"})
public int count;
private float[] floatsA, floatsB, floatsD;
private double[] doublesA, doublesB, doublesD;
@Param("316731")
private int seed;
private Random r = new Random(seed);
@Setup
public void init() {
floatsA = new float[count];
doublesA = new double[count];
floatsB = new float[count];
doublesB = new double[count];
floatsD = new float[count];
doublesD = new double[count];
for (int i = 0; i < count; i++) {
floatsA[i] = r.nextFloat();
doublesB[i] = r.nextDouble();
floatsB[i] = r.nextFloat();
doublesB[i] = r.nextDouble();
}
}
@Benchmark
public void testVectorAbsDiffFloat() {
for (int i = 0; i < count; i++) {
floatsD[i] = Math.abs(floatsA[i] - floatsB[i]);
}
}
@Benchmark
public void testVectorAbsDiffDouble() {
for (int i = 0; i < count; i++) {
doublesD[i] = Math.abs(doublesA[i] - doublesB[i]);
}
}
@Benchmark
public void testScalarAbsDiffFloat(Blackhole bh) {
float a = r.nextFloat();
float b = r.nextFloat();
for (int i = 0; i < count; i++) {
a = Math.abs(a - b);
b = Math.abs(b - a);
}
bh.consume(a + b);
}
@Benchmark
public void testScalarAbsDiffDouble(Blackhole bh) {
double a = r.nextDouble();
double b = r.nextDouble();
for (int i = 0; i < count; i++) {
a = Math.abs(a - b);
b = Math.abs(b - a);
}
bh.consume(a + b);
}
}

View File

@ -1,5 +1,5 @@
/* /*
* Copyright (c) 2020, Huawei Technologies Co. Ltd. All rights reserved. * Copyright (c) 2020, Huawei Technologies Co., Ltd. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
* *
* This code is free software; you can redistribute it and/or modify it * This code is free software; you can redistribute it and/or modify it