From b0b9dd27b8d3060f66a4400ee8ed1ac93177c2e6 Mon Sep 17 00:00:00 2001 From: Dong Bo Date: Wed, 18 Nov 2020 10:14:20 +0000 Subject: [PATCH] 8256318: AArch64: Add support for floating-point absolute difference Reviewed-by: aph --- src/hotspot/cpu/aarch64/aarch64-asmtest.py | 9 +- src/hotspot/cpu/aarch64/aarch64.ad | 151 +---- src/hotspot/cpu/aarch64/aarch64_neon.ad | 163 +++++ src/hotspot/cpu/aarch64/aarch64_neon_ad.m4 | 46 ++ src/hotspot/cpu/aarch64/assembler_aarch64.cpp | 596 +++++++++--------- src/hotspot/cpu/aarch64/assembler_aarch64.hpp | 35 +- .../jtreg/compiler/c2/Test8217359.java | 2 +- .../jtreg/compiler/c2/TestFoldCompares.java | 2 +- .../compiler/c2/TestReplaceEquivPhis.java | 2 +- .../c2/aarch64/TestVectorShiftShorts.java | 2 +- ...UseSHA3IntrinsicsOptionOnSupportedCPU.java | 2 +- ...eSHA3IntrinsicsOptionOnUnsupportedCPU.java | 2 +- .../sha/sanity/TestSHA3Intrinsics.java | 2 +- .../sanity/TestSHA3MultiBlockIntrinsics.java | 2 +- .../compiler/loopopts/TestBeautifyLoops.java | 2 +- .../loopopts/TestBeautifyLoops_2.java | 2 +- .../loopopts/TestRemoveEmptyLoop.java | 2 +- .../superword/TestSearchAlignment.java | 2 +- .../invokedynamic/DynamicConstantHelper.jasm | 2 +- .../invokedynamic/TestDynamicConstant.java | 2 +- .../security/provider/MessageDigest/SHA3.java | 2 +- .../openjdk/bench/java/util/Base64Encode.java | 2 +- .../compiler/FloatingScalarVectorAbsDiff.java | 104 +++ .../vm/compiler/VectorShiftAccumulate.java | 2 +- 24 files changed, 682 insertions(+), 456 deletions(-) create mode 100644 test/micro/org/openjdk/bench/vm/compiler/FloatingScalarVectorAbsDiff.java diff --git a/src/hotspot/cpu/aarch64/aarch64-asmtest.py b/src/hotspot/cpu/aarch64/aarch64-asmtest.py index 615fe5e045f..22b36e4ac7f 100644 --- a/src/hotspot/cpu/aarch64/aarch64-asmtest.py +++ b/src/hotspot/cpu/aarch64/aarch64-asmtest.py @@ -1334,10 +1334,9 @@ generate(FourRegMulOp, ["maddw", "msubw", "madd", "msub", "smaddl", "smsubl", "umaddl", "umsubl"]) generate(ThreeRegFloatOp, - [["fmuls", "sss"], ["fdivs", "sss"], ["fadds", "sss"], ["fsubs", "sss"], - ["fmuls", "sss"], - ["fmuld", "ddd"], ["fdivd", "ddd"], ["faddd", "ddd"], ["fsubd", "ddd"], - ["fmuld", "ddd"]]) + [["fabds", "sss"], ["fmuls", "sss"], ["fdivs", "sss"], ["fadds", "sss"], ["fsubs", "sss"], + ["fabdd", "ddd"], ["fmuld", "ddd"], ["fdivd", "ddd"], ["faddd", "ddd"], ["fsubd", "ddd"], + ]) generate(FourRegFloatOp, [["fmadds", "ssss"], ["fmsubs", "ssss"], ["fnmadds", "ssss"], ["fnmadds", "ssss"], @@ -1437,6 +1436,8 @@ generate(ThreeRegNEONOp, ["mulv", "mul", "8B"], ["mulv", "mul", "16B"], ["mulv", "mul", "4H"], ["mulv", "mul", "8H"], ["mulv", "mul", "2S"], ["mulv", "mul", "4S"], + ["fabd", "fabd", "2S"], ["fabd", "fabd", "4S"], + ["fabd", "fabd", "2D"], ["fmul", "fmul", "2S"], ["fmul", "fmul", "4S"], ["fmul", "fmul", "2D"], ["mlav", "mla", "4H"], ["mlav", "mla", "8H"], diff --git a/src/hotspot/cpu/aarch64/aarch64.ad b/src/hotspot/cpu/aarch64/aarch64.ad index 65bdeba0264..bedd4938d07 100644 --- a/src/hotspot/cpu/aarch64/aarch64.ad +++ b/src/hotspot/cpu/aarch64/aarch64.ad @@ -13894,6 +13894,34 @@ instruct absD_reg(vRegD dst, vRegD src) %{ ins_pipe(fp_uop_d); %} +instruct absdF_reg(vRegF dst, vRegF src1, vRegF src2) %{ + match(Set dst (AbsF (SubF src1 src2))); + + ins_cost(INSN_COST * 3); + format %{ "fabds $dst, $src1, $src2" %} + ins_encode %{ + __ fabds(as_FloatRegister($dst$$reg), + as_FloatRegister($src1$$reg), + as_FloatRegister($src2$$reg)); + %} + + ins_pipe(fp_uop_s); +%} + +instruct absdD_reg(vRegD dst, vRegD src1, vRegD src2) %{ + match(Set dst (AbsD (SubD src1 src2))); + + ins_cost(INSN_COST * 3); + format %{ "fabdd $dst, $src1, $src2" %} + ins_encode %{ + __ fabdd(as_FloatRegister($dst$$reg), + as_FloatRegister($src1$$reg), + as_FloatRegister($src2$$reg)); + %} + + ins_pipe(fp_uop_d); +%} + instruct sqrtD_reg(vRegD dst, vRegD src) %{ match(Set dst (SqrtD src)); @@ -17872,129 +17900,6 @@ instruct vsqrt2D(vecX dst, vecX src) ins_pipe(vsqrt_fp128); %} -// --------------------------------- ABS -------------------------------------- - -instruct vabs8B(vecD dst, vecD src) -%{ - predicate(n->as_Vector()->length() == 4 || - n->as_Vector()->length() == 8); - match(Set dst (AbsVB src)); - ins_cost(INSN_COST); - format %{ "abs $dst, $src\t# vector (8B)" %} - ins_encode %{ - __ absr(as_FloatRegister($dst$$reg), __ T8B, as_FloatRegister($src$$reg)); - %} - ins_pipe(vlogical64); -%} - -instruct vabs16B(vecX dst, vecX src) -%{ - predicate(n->as_Vector()->length() == 16); - match(Set dst (AbsVB src)); - ins_cost(INSN_COST); - format %{ "abs $dst, $src\t# vector (16B)" %} - ins_encode %{ - __ absr(as_FloatRegister($dst$$reg), __ T16B, as_FloatRegister($src$$reg)); - %} - ins_pipe(vlogical128); -%} - -instruct vabs4S(vecD dst, vecD src) -%{ - predicate(n->as_Vector()->length() == 4); - match(Set dst (AbsVS src)); - ins_cost(INSN_COST); - format %{ "abs $dst, $src\t# vector (4H)" %} - ins_encode %{ - __ absr(as_FloatRegister($dst$$reg), __ T4H, as_FloatRegister($src$$reg)); - %} - ins_pipe(vlogical64); -%} - -instruct vabs8S(vecX dst, vecX src) -%{ - predicate(n->as_Vector()->length() == 8); - match(Set dst (AbsVS src)); - ins_cost(INSN_COST); - format %{ "abs $dst, $src\t# vector (8H)" %} - ins_encode %{ - __ absr(as_FloatRegister($dst$$reg), __ T8H, as_FloatRegister($src$$reg)); - %} - ins_pipe(vlogical128); -%} - -instruct vabs2I(vecD dst, vecD src) -%{ - predicate(n->as_Vector()->length() == 2); - match(Set dst (AbsVI src)); - ins_cost(INSN_COST); - format %{ "abs $dst, $src\t# vector (2S)" %} - ins_encode %{ - __ absr(as_FloatRegister($dst$$reg), __ T2S, as_FloatRegister($src$$reg)); - %} - ins_pipe(vlogical64); -%} - -instruct vabs4I(vecX dst, vecX src) -%{ - predicate(n->as_Vector()->length() == 4); - match(Set dst (AbsVI src)); - ins_cost(INSN_COST); - format %{ "abs $dst, $src\t# vector (4S)" %} - ins_encode %{ - __ absr(as_FloatRegister($dst$$reg), __ T4S, as_FloatRegister($src$$reg)); - %} - ins_pipe(vlogical128); -%} - -instruct vabs2L(vecX dst, vecX src) -%{ - predicate(n->as_Vector()->length() == 2); - match(Set dst (AbsVL src)); - ins_cost(INSN_COST); - format %{ "abs $dst, $src\t# vector (2D)" %} - ins_encode %{ - __ absr(as_FloatRegister($dst$$reg), __ T2D, as_FloatRegister($src$$reg)); - %} - ins_pipe(vlogical128); -%} - -instruct vabs2F(vecD dst, vecD src) -%{ - predicate(n->as_Vector()->length() == 2); - match(Set dst (AbsVF src)); - ins_cost(INSN_COST * 3); - format %{ "fabs $dst,$src\t# vector (2S)" %} - ins_encode %{ - __ fabs(as_FloatRegister($dst$$reg), __ T2S, as_FloatRegister($src$$reg)); - %} - ins_pipe(vunop_fp64); -%} - -instruct vabs4F(vecX dst, vecX src) -%{ - predicate(n->as_Vector()->length() == 4); - match(Set dst (AbsVF src)); - ins_cost(INSN_COST * 3); - format %{ "fabs $dst,$src\t# vector (4S)" %} - ins_encode %{ - __ fabs(as_FloatRegister($dst$$reg), __ T4S, as_FloatRegister($src$$reg)); - %} - ins_pipe(vunop_fp128); -%} - -instruct vabs2D(vecX dst, vecX src) -%{ - predicate(n->as_Vector()->length() == 2); - match(Set dst (AbsVD src)); - ins_cost(INSN_COST * 3); - format %{ "fabs $dst,$src\t# vector (2D)" %} - ins_encode %{ - __ fabs(as_FloatRegister($dst$$reg), __ T2D, as_FloatRegister($src$$reg)); - %} - ins_pipe(vunop_fp128); -%} - // --------------------------------- NEG -------------------------------------- instruct vneg2F(vecD dst, vecD src) diff --git a/src/hotspot/cpu/aarch64/aarch64_neon.ad b/src/hotspot/cpu/aarch64/aarch64_neon.ad index 33b1a869cc3..309ec486067 100644 --- a/src/hotspot/cpu/aarch64/aarch64_neon.ad +++ b/src/hotspot/cpu/aarch64/aarch64_neon.ad @@ -3454,3 +3454,166 @@ instruct alltrue_in_mask16B(iRegINoSp dst, vecX src1, vecX src2, vecX tmp, rFlag %} ins_pipe(pipe_slow); %} + +// --------------------------------- ABS -------------------------------------- + +instruct vabs8B(vecD dst, vecD src) +%{ + predicate(n->as_Vector()->length() == 4 || n->as_Vector()->length() == 8); + match(Set dst (AbsVB src)); + ins_cost(INSN_COST); + format %{ "abs $dst, $src\t# vector (8B)" %} + ins_encode %{ + __ absr(as_FloatRegister($dst$$reg), __ T8B, as_FloatRegister($src$$reg)); + %} + ins_pipe(vlogical64); +%} + +instruct vabs16B(vecX dst, vecX src) +%{ + predicate(n->as_Vector()->length() == 16); + match(Set dst (AbsVB src)); + ins_cost(INSN_COST); + format %{ "abs $dst, $src\t# vector (16B)" %} + ins_encode %{ + __ absr(as_FloatRegister($dst$$reg), __ T16B, as_FloatRegister($src$$reg)); + %} + ins_pipe(vlogical128); +%} + +instruct vabs4S(vecD dst, vecD src) +%{ + predicate(n->as_Vector()->length() == 4); + match(Set dst (AbsVS src)); + ins_cost(INSN_COST); + format %{ "abs $dst, $src\t# vector (4H)" %} + ins_encode %{ + __ absr(as_FloatRegister($dst$$reg), __ T4H, as_FloatRegister($src$$reg)); + %} + ins_pipe(vlogical64); +%} + +instruct vabs8S(vecX dst, vecX src) +%{ + predicate(n->as_Vector()->length() == 8); + match(Set dst (AbsVS src)); + ins_cost(INSN_COST); + format %{ "abs $dst, $src\t# vector (8H)" %} + ins_encode %{ + __ absr(as_FloatRegister($dst$$reg), __ T8H, as_FloatRegister($src$$reg)); + %} + ins_pipe(vlogical128); +%} + +instruct vabs2I(vecD dst, vecD src) +%{ + predicate(n->as_Vector()->length() == 2); + match(Set dst (AbsVI src)); + ins_cost(INSN_COST); + format %{ "abs $dst, $src\t# vector (2S)" %} + ins_encode %{ + __ absr(as_FloatRegister($dst$$reg), __ T2S, as_FloatRegister($src$$reg)); + %} + ins_pipe(vlogical64); +%} + +instruct vabs4I(vecX dst, vecX src) +%{ + predicate(n->as_Vector()->length() == 4); + match(Set dst (AbsVI src)); + ins_cost(INSN_COST); + format %{ "abs $dst, $src\t# vector (4S)" %} + ins_encode %{ + __ absr(as_FloatRegister($dst$$reg), __ T4S, as_FloatRegister($src$$reg)); + %} + ins_pipe(vlogical128); +%} + +instruct vabs2L(vecX dst, vecX src) +%{ + predicate(n->as_Vector()->length() == 2); + match(Set dst (AbsVL src)); + ins_cost(INSN_COST); + format %{ "abs $dst, $src\t# vector (2D)" %} + ins_encode %{ + __ absr(as_FloatRegister($dst$$reg), __ T2D, as_FloatRegister($src$$reg)); + %} + ins_pipe(vlogical128); +%} + +instruct vabs2F(vecD dst, vecD src) +%{ + predicate(n->as_Vector()->length() == 2); + match(Set dst (AbsVF src)); + ins_cost(INSN_COST * 3); + format %{ "fabs $dst, $src\t# vector (2S)" %} + ins_encode %{ + __ fabs(as_FloatRegister($dst$$reg), __ T2S, as_FloatRegister($src$$reg)); + %} + ins_pipe(vunop_fp64); +%} + +instruct vabs4F(vecX dst, vecX src) +%{ + predicate(n->as_Vector()->length() == 4); + match(Set dst (AbsVF src)); + ins_cost(INSN_COST * 3); + format %{ "fabs $dst, $src\t# vector (4S)" %} + ins_encode %{ + __ fabs(as_FloatRegister($dst$$reg), __ T4S, as_FloatRegister($src$$reg)); + %} + ins_pipe(vunop_fp128); +%} + +instruct vabs2D(vecX dst, vecX src) +%{ + predicate(n->as_Vector()->length() == 2); + match(Set dst (AbsVD src)); + ins_cost(INSN_COST * 3); + format %{ "fabs $dst, $src\t# vector (2D)" %} + ins_encode %{ + __ fabs(as_FloatRegister($dst$$reg), __ T2D, as_FloatRegister($src$$reg)); + %} + ins_pipe(vunop_fp128); +%} + +// --------------------------------- FABS DIFF -------------------------------- + +instruct vabd2F(vecD dst, vecD src1, vecD src2) +%{ + predicate(n->as_Vector()->length() == 2); + match(Set dst (AbsVF (SubVF src1 src2))); + ins_cost(INSN_COST * 3); + format %{ "fabd $dst, $src1, $src2\t# vector (2S)" %} + ins_encode %{ + __ fabd(as_FloatRegister($dst$$reg), __ T2S, + as_FloatRegister($src1$$reg), as_FloatRegister($src2$$reg)); + %} + ins_pipe(vunop_fp64); +%} + +instruct vabd4F(vecX dst, vecX src1, vecX src2) +%{ + predicate(n->as_Vector()->length() == 4); + match(Set dst (AbsVF (SubVF src1 src2))); + ins_cost(INSN_COST * 3); + format %{ "fabd $dst, $src1, $src2\t# vector (4S)" %} + ins_encode %{ + __ fabd(as_FloatRegister($dst$$reg), __ T4S, + as_FloatRegister($src1$$reg), as_FloatRegister($src2$$reg)); + %} + ins_pipe(vunop_fp128); +%} + +instruct vabd2D(vecX dst, vecX src1, vecX src2) +%{ + predicate(n->as_Vector()->length() == 2); + match(Set dst (AbsVD (SubVD src1 src2))); + ins_cost(INSN_COST * 3); + format %{ "fabd $dst, $src1, $src2\t# vector (2D)" %} + ins_encode %{ + __ fabd(as_FloatRegister($dst$$reg), __ T2D, + as_FloatRegister($src1$$reg), as_FloatRegister($src2$$reg)); + %} + ins_pipe(vunop_fp128); +%} diff --git a/src/hotspot/cpu/aarch64/aarch64_neon_ad.m4 b/src/hotspot/cpu/aarch64/aarch64_neon_ad.m4 index 0b1dc5cb7c6..9ac9bad3dc8 100644 --- a/src/hotspot/cpu/aarch64/aarch64_neon_ad.m4 +++ b/src/hotspot/cpu/aarch64/aarch64_neon_ad.m4 @@ -1421,4 +1421,50 @@ instruct alltrue_in_mask$1B`'(iRegINoSp dst, vec$2 src1, vec$2 src2, vec$2 tmp, dnl $1 $2 ALLTRUE_IN_MASK(8, D) ALLTRUE_IN_MASK(16, X) + +// --------------------------------- ABS -------------------------------------- +dnl +define(`VABS', ` +instruct vabs$3$4`'(vec$5 dst, vec$5 src) +%{ + predicate(ifelse($3$4, 8B, n->as_Vector()->length() == 4 || )n->as_Vector()->length() == $3); + match(Set dst (AbsV$4 src)); + ins_cost(ifelse($4, F, INSN_COST * 3, $4, D, INSN_COST * 3, INSN_COST)); + format %{ "$1 $dst, $src\t# vector ($3$6)" %} + ins_encode %{ + __ $2(as_FloatRegister($dst$$reg), __ T$3$6, as_FloatRegister($src$$reg)); + %} + ins_pipe(ifelse($4, F, vunop_fp$7, $4, D, vunop_fp$7, vlogical$7)); +%}')dnl +dnl $1 $2 $3 $4 $5 $6 $7 +VABS(abs, absr, 8, B, D, B, 64) +VABS(abs, absr, 16, B, X, B, 128) +VABS(abs, absr, 4, S, D, H, 64) +VABS(abs, absr, 8, S, X, H, 128) +VABS(abs, absr, 2, I, D, S, 64) +VABS(abs, absr, 4, I, X, S, 128) +VABS(abs, absr, 2, L, X, D, 128) +VABS(fabs, fabs, 2, F, D, S, 64) +VABS(fabs, fabs, 4, F, X, S, 128) +VABS(fabs, fabs, 2, D, X, D, 128) + +// --------------------------------- FABS DIFF -------------------------------- +dnl +define(`VFABD', ` +instruct vabd$3$4`'(vec$5 dst, vec$5 src1, vec$5 src2) +%{ + predicate(n->as_Vector()->length() == $3); + match(Set dst (AbsV$4 (SubV$4 src1 src2))); + ins_cost(INSN_COST * 3); + format %{ "$1 $dst, $src1, $src2\t# vector ($3$6)" %} + ins_encode %{ + __ $2(as_FloatRegister($dst$$reg), __ T$3$6, + as_FloatRegister($src1$$reg), as_FloatRegister($src2$$reg)); + %} + ins_pipe(vunop_fp$7); +%}')dnl +dnl $1 $2 $3 $4 $5 $6 $7 +VFABD(fabd, fabd, 2, F, D, S, 64) +VFABD(fabd, fabd, 4, F, X, S, 128) +VFABD(fabd, fabd, 2, D, X, D, 128) dnl diff --git a/src/hotspot/cpu/aarch64/assembler_aarch64.cpp b/src/hotspot/cpu/aarch64/assembler_aarch64.cpp index c7fac2836b7..5b91feebb96 100644 --- a/src/hotspot/cpu/aarch64/assembler_aarch64.cpp +++ b/src/hotspot/cpu/aarch64/assembler_aarch64.cpp @@ -510,16 +510,16 @@ void entry(CodeBuffer *cb) { __ umsubl(r15, r21, r28, r17); // umsubl x15, w21, w28, x17 // ThreeRegFloatOp - __ fmuls(v27, v10, v3); // fmul s27, s10, s3 - __ fdivs(v0, v7, v25); // fdiv s0, s7, s25 - __ fadds(v9, v6, v15); // fadd s9, s6, s15 - __ fsubs(v29, v15, v10); // fsub s29, s15, s10 - __ fmuls(v2, v17, v7); // fmul s2, s17, s7 - __ fmuld(v11, v11, v23); // fmul d11, d11, d23 - __ fdivd(v7, v29, v23); // fdiv d7, d29, d23 - __ faddd(v14, v27, v11); // fadd d14, d27, d11 - __ fsubd(v11, v4, v24); // fsub d11, d4, d24 - __ fmuld(v12, v15, v14); // fmul d12, d15, d14 + __ fabds(v27, v10, v3); // fabd s27, s10, s3 + __ fmuls(v0, v7, v25); // fmul s0, s7, s25 + __ fdivs(v9, v6, v15); // fdiv s9, s6, s15 + __ fadds(v29, v15, v10); // fadd s29, s15, s10 + __ fsubs(v2, v17, v7); // fsub s2, s17, s7 + __ fabdd(v11, v11, v23); // fabd d11, d11, d23 + __ fmuld(v7, v29, v23); // fmul d7, d29, d23 + __ fdivd(v14, v27, v11); // fdiv d14, d27, d11 + __ faddd(v11, v4, v24); // fadd d11, d4, d24 + __ fsubd(v12, v15, v14); // fsub d12, d15, d14 // FourRegFloatOp __ fmadds(v20, v11, v28, v13); // fmadd s20, s11, s28, s13 @@ -686,74 +686,77 @@ void entry(CodeBuffer *cb) { __ mulv(v6, __ T8H, v7, v8); // mul v6.8H, v7.8H, v8.8H __ mulv(v3, __ T2S, v4, v5); // mul v3.2S, v4.2S, v5.2S __ mulv(v7, __ T4S, v8, v9); // mul v7.4S, v8.4S, v9.4S - __ fmul(v24, __ T2S, v25, v26); // fmul v24.2S, v25.2S, v26.2S - __ fmul(v0, __ T4S, v1, v2); // fmul v0.4S, v1.4S, v2.4S - __ fmul(v27, __ T2D, v28, v29); // fmul v27.2D, v28.2D, v29.2D + __ fabd(v24, __ T2S, v25, v26); // fabd v24.2S, v25.2S, v26.2S + __ fabd(v0, __ T4S, v1, v2); // fabd v0.4S, v1.4S, v2.4S + __ fabd(v27, __ T2D, v28, v29); // fabd v27.2D, v28.2D, v29.2D + __ fmul(v29, __ T2S, v30, v31); // fmul v29.2S, v30.2S, v31.2S + __ fmul(v5, __ T4S, v6, v7); // fmul v5.4S, v6.4S, v7.4S + __ fmul(v5, __ T2D, v6, v7); // fmul v5.2D, v6.2D, v7.2D __ mlav(v29, __ T4H, v30, v31); // mla v29.4H, v30.4H, v31.4H - __ mlav(v5, __ T8H, v6, v7); // mla v5.8H, v6.8H, v7.8H - __ mlav(v5, __ T2S, v6, v7); // mla v5.2S, v6.2S, v7.2S - __ mlav(v29, __ T4S, v30, v31); // mla v29.4S, v30.4S, v31.4S - __ fmla(v11, __ T2S, v12, v13); // fmla v11.2S, v12.2S, v13.2S - __ fmla(v25, __ T4S, v26, v27); // fmla v25.4S, v26.4S, v27.4S - __ fmla(v0, __ T2D, v1, v2); // fmla v0.2D, v1.2D, v2.2D - __ mlsv(v30, __ T4H, v31, v0); // mls v30.4H, v31.4H, v0.4H - __ mlsv(v0, __ T8H, v1, v2); // mls v0.8H, v1.8H, v2.8H - __ mlsv(v17, __ T2S, v18, v19); // mls v17.2S, v18.2S, v19.2S - __ mlsv(v28, __ T4S, v29, v30); // mls v28.4S, v29.4S, v30.4S - __ fmls(v25, __ T2S, v26, v27); // fmls v25.2S, v26.2S, v27.2S - __ fmls(v9, __ T4S, v10, v11); // fmls v9.4S, v10.4S, v11.4S - __ fmls(v25, __ T2D, v26, v27); // fmls v25.2D, v26.2D, v27.2D - __ fdiv(v12, __ T2S, v13, v14); // fdiv v12.2S, v13.2S, v14.2S - __ fdiv(v15, __ T4S, v16, v17); // fdiv v15.4S, v16.4S, v17.4S - __ fdiv(v11, __ T2D, v12, v13); // fdiv v11.2D, v12.2D, v13.2D - __ maxv(v10, __ T8B, v11, v12); // smax v10.8B, v11.8B, v12.8B - __ maxv(v17, __ T16B, v18, v19); // smax v17.16B, v18.16B, v19.16B - __ maxv(v24, __ T4H, v25, v26); // smax v24.4H, v25.4H, v26.4H - __ maxv(v21, __ T8H, v22, v23); // smax v21.8H, v22.8H, v23.8H - __ maxv(v23, __ T2S, v24, v25); // smax v23.2S, v24.2S, v25.2S - __ maxv(v0, __ T4S, v1, v2); // smax v0.4S, v1.4S, v2.4S - __ fmax(v16, __ T2S, v17, v18); // fmax v16.2S, v17.2S, v18.2S - __ fmax(v10, __ T4S, v11, v12); // fmax v10.4S, v11.4S, v12.4S - __ fmax(v6, __ T2D, v7, v8); // fmax v6.2D, v7.2D, v8.2D - __ minv(v28, __ T8B, v29, v30); // smin v28.8B, v29.8B, v30.8B - __ minv(v6, __ T16B, v7, v8); // smin v6.16B, v7.16B, v8.16B - __ minv(v5, __ T4H, v6, v7); // smin v5.4H, v6.4H, v7.4H - __ minv(v5, __ T8H, v6, v7); // smin v5.8H, v6.8H, v7.8H - __ minv(v20, __ T2S, v21, v22); // smin v20.2S, v21.2S, v22.2S - __ minv(v17, __ T4S, v18, v19); // smin v17.4S, v18.4S, v19.4S - __ fmin(v15, __ T2S, v16, v17); // fmin v15.2S, v16.2S, v17.2S - __ fmin(v17, __ T4S, v18, v19); // fmin v17.4S, v18.4S, v19.4S - __ fmin(v29, __ T2D, v30, v31); // fmin v29.2D, v30.2D, v31.2D - __ cmeq(v26, __ T8B, v27, v28); // cmeq v26.8B, v27.8B, v28.8B - __ cmeq(v28, __ T16B, v29, v30); // cmeq v28.16B, v29.16B, v30.16B - __ cmeq(v1, __ T4H, v2, v3); // cmeq v1.4H, v2.4H, v3.4H - __ cmeq(v27, __ T8H, v28, v29); // cmeq v27.8H, v28.8H, v29.8H - __ cmeq(v0, __ T2S, v1, v2); // cmeq v0.2S, v1.2S, v2.2S - __ cmeq(v20, __ T4S, v21, v22); // cmeq v20.4S, v21.4S, v22.4S - __ cmeq(v28, __ T2D, v29, v30); // cmeq v28.2D, v29.2D, v30.2D - __ fcmeq(v15, __ T2S, v16, v17); // fcmeq v15.2S, v16.2S, v17.2S - __ fcmeq(v12, __ T4S, v13, v14); // fcmeq v12.4S, v13.4S, v14.4S - __ fcmeq(v10, __ T2D, v11, v12); // fcmeq v10.2D, v11.2D, v12.2D - __ cmgt(v28, __ T8B, v29, v30); // cmgt v28.8B, v29.8B, v30.8B - __ cmgt(v28, __ T16B, v29, v30); // cmgt v28.16B, v29.16B, v30.16B - __ cmgt(v19, __ T4H, v20, v21); // cmgt v19.4H, v20.4H, v21.4H - __ cmgt(v22, __ T8H, v23, v24); // cmgt v22.8H, v23.8H, v24.8H - __ cmgt(v10, __ T2S, v11, v12); // cmgt v10.2S, v11.2S, v12.2S - __ cmgt(v4, __ T4S, v5, v6); // cmgt v4.4S, v5.4S, v6.4S + __ mlav(v11, __ T8H, v12, v13); // mla v11.8H, v12.8H, v13.8H + __ mlav(v25, __ T2S, v26, v27); // mla v25.2S, v26.2S, v27.2S + __ mlav(v0, __ T4S, v1, v2); // mla v0.4S, v1.4S, v2.4S + __ fmla(v30, __ T2S, v31, v0); // fmla v30.2S, v31.2S, v0.2S + __ fmla(v0, __ T4S, v1, v2); // fmla v0.4S, v1.4S, v2.4S + __ fmla(v17, __ T2D, v18, v19); // fmla v17.2D, v18.2D, v19.2D + __ mlsv(v28, __ T4H, v29, v30); // mls v28.4H, v29.4H, v30.4H + __ mlsv(v25, __ T8H, v26, v27); // mls v25.8H, v26.8H, v27.8H + __ mlsv(v9, __ T2S, v10, v11); // mls v9.2S, v10.2S, v11.2S + __ mlsv(v25, __ T4S, v26, v27); // mls v25.4S, v26.4S, v27.4S + __ fmls(v12, __ T2S, v13, v14); // fmls v12.2S, v13.2S, v14.2S + __ fmls(v15, __ T4S, v16, v17); // fmls v15.4S, v16.4S, v17.4S + __ fmls(v11, __ T2D, v12, v13); // fmls v11.2D, v12.2D, v13.2D + __ fdiv(v10, __ T2S, v11, v12); // fdiv v10.2S, v11.2S, v12.2S + __ fdiv(v17, __ T4S, v18, v19); // fdiv v17.4S, v18.4S, v19.4S + __ fdiv(v24, __ T2D, v25, v26); // fdiv v24.2D, v25.2D, v26.2D + __ maxv(v21, __ T8B, v22, v23); // smax v21.8B, v22.8B, v23.8B + __ maxv(v23, __ T16B, v24, v25); // smax v23.16B, v24.16B, v25.16B + __ maxv(v0, __ T4H, v1, v2); // smax v0.4H, v1.4H, v2.4H + __ maxv(v16, __ T8H, v17, v18); // smax v16.8H, v17.8H, v18.8H + __ maxv(v10, __ T2S, v11, v12); // smax v10.2S, v11.2S, v12.2S + __ maxv(v6, __ T4S, v7, v8); // smax v6.4S, v7.4S, v8.4S + __ fmax(v28, __ T2S, v29, v30); // fmax v28.2S, v29.2S, v30.2S + __ fmax(v6, __ T4S, v7, v8); // fmax v6.4S, v7.4S, v8.4S + __ fmax(v5, __ T2D, v6, v7); // fmax v5.2D, v6.2D, v7.2D + __ minv(v5, __ T8B, v6, v7); // smin v5.8B, v6.8B, v7.8B + __ minv(v20, __ T16B, v21, v22); // smin v20.16B, v21.16B, v22.16B + __ minv(v17, __ T4H, v18, v19); // smin v17.4H, v18.4H, v19.4H + __ minv(v15, __ T8H, v16, v17); // smin v15.8H, v16.8H, v17.8H + __ minv(v17, __ T2S, v18, v19); // smin v17.2S, v18.2S, v19.2S + __ minv(v29, __ T4S, v30, v31); // smin v29.4S, v30.4S, v31.4S + __ fmin(v26, __ T2S, v27, v28); // fmin v26.2S, v27.2S, v28.2S + __ fmin(v28, __ T4S, v29, v30); // fmin v28.4S, v29.4S, v30.4S + __ fmin(v1, __ T2D, v2, v3); // fmin v1.2D, v2.2D, v3.2D + __ cmeq(v27, __ T8B, v28, v29); // cmeq v27.8B, v28.8B, v29.8B + __ cmeq(v0, __ T16B, v1, v2); // cmeq v0.16B, v1.16B, v2.16B + __ cmeq(v20, __ T4H, v21, v22); // cmeq v20.4H, v21.4H, v22.4H + __ cmeq(v28, __ T8H, v29, v30); // cmeq v28.8H, v29.8H, v30.8H + __ cmeq(v15, __ T2S, v16, v17); // cmeq v15.2S, v16.2S, v17.2S + __ cmeq(v12, __ T4S, v13, v14); // cmeq v12.4S, v13.4S, v14.4S + __ cmeq(v10, __ T2D, v11, v12); // cmeq v10.2D, v11.2D, v12.2D + __ fcmeq(v28, __ T2S, v29, v30); // fcmeq v28.2S, v29.2S, v30.2S + __ fcmeq(v28, __ T4S, v29, v30); // fcmeq v28.4S, v29.4S, v30.4S + __ fcmeq(v19, __ T2D, v20, v21); // fcmeq v19.2D, v20.2D, v21.2D + __ cmgt(v22, __ T8B, v23, v24); // cmgt v22.8B, v23.8B, v24.8B + __ cmgt(v10, __ T16B, v11, v12); // cmgt v10.16B, v11.16B, v12.16B + __ cmgt(v4, __ T4H, v5, v6); // cmgt v4.4H, v5.4H, v6.4H + __ cmgt(v30, __ T8H, v31, v0); // cmgt v30.8H, v31.8H, v0.8H + __ cmgt(v20, __ T2S, v21, v22); // cmgt v20.2S, v21.2S, v22.2S + __ cmgt(v8, __ T4S, v9, v10); // cmgt v8.4S, v9.4S, v10.4S __ cmgt(v30, __ T2D, v31, v0); // cmgt v30.2D, v31.2D, v0.2D - __ fcmgt(v20, __ T2S, v21, v22); // fcmgt v20.2S, v21.2S, v22.2S - __ fcmgt(v8, __ T4S, v9, v10); // fcmgt v8.4S, v9.4S, v10.4S - __ fcmgt(v30, __ T2D, v31, v0); // fcmgt v30.2D, v31.2D, v0.2D - __ cmge(v17, __ T8B, v18, v19); // cmge v17.8B, v18.8B, v19.8B - __ cmge(v10, __ T16B, v11, v12); // cmge v10.16B, v11.16B, v12.16B - __ cmge(v27, __ T4H, v28, v29); // cmge v27.4H, v28.4H, v29.4H - __ cmge(v2, __ T8H, v3, v4); // cmge v2.8H, v3.8H, v4.8H - __ cmge(v24, __ T2S, v25, v26); // cmge v24.2S, v25.2S, v26.2S - __ cmge(v4, __ T4S, v5, v6); // cmge v4.4S, v5.4S, v6.4S - __ cmge(v3, __ T2D, v4, v5); // cmge v3.2D, v4.2D, v5.2D - __ fcmge(v8, __ T2S, v9, v10); // fcmge v8.2S, v9.2S, v10.2S - __ fcmge(v22, __ T4S, v23, v24); // fcmge v22.4S, v23.4S, v24.4S - __ fcmge(v17, __ T2D, v18, v19); // fcmge v17.2D, v18.2D, v19.2D + __ fcmgt(v17, __ T2S, v18, v19); // fcmgt v17.2S, v18.2S, v19.2S + __ fcmgt(v10, __ T4S, v11, v12); // fcmgt v10.4S, v11.4S, v12.4S + __ fcmgt(v27, __ T2D, v28, v29); // fcmgt v27.2D, v28.2D, v29.2D + __ cmge(v2, __ T8B, v3, v4); // cmge v2.8B, v3.8B, v4.8B + __ cmge(v24, __ T16B, v25, v26); // cmge v24.16B, v25.16B, v26.16B + __ cmge(v4, __ T4H, v5, v6); // cmge v4.4H, v5.4H, v6.4H + __ cmge(v3, __ T8H, v4, v5); // cmge v3.8H, v4.8H, v5.8H + __ cmge(v8, __ T2S, v9, v10); // cmge v8.2S, v9.2S, v10.2S + __ cmge(v22, __ T4S, v23, v24); // cmge v22.4S, v23.4S, v24.4S + __ cmge(v17, __ T2D, v18, v19); // cmge v17.2D, v18.2D, v19.2D + __ fcmge(v13, __ T2S, v14, v15); // fcmge v13.2S, v14.2S, v15.2S + __ fcmge(v4, __ T4S, v5, v6); // fcmge v4.4S, v5.4S, v6.4S + __ fcmge(v28, __ T2D, v29, v30); // fcmge v28.2D, v29.2D, v30.2D // SpecialCases __ ccmn(zr, zr, 3u, Assembler::LE); // ccmn xzr, xzr, #3, LE @@ -840,155 +843,155 @@ void entry(CodeBuffer *cb) { __ fmovd(v0, -1.0625); // fmov d0, #-1.0625 // LSEOp - __ swp(Assembler::xword, r13, r5, r29); // swp x13, x5, [x29] - __ ldadd(Assembler::xword, r24, r21, r26); // ldadd x24, x21, [x26] - __ ldbic(Assembler::xword, r24, r3, r24); // ldclr x24, x3, [x24] - __ ldeor(Assembler::xword, r26, r23, r15); // ldeor x26, x23, [x15] - __ ldorr(Assembler::xword, r21, r3, r24); // ldset x21, x3, [x24] - __ ldsmin(Assembler::xword, r8, r25, r20); // ldsmin x8, x25, [x20] - __ ldsmax(Assembler::xword, r16, r17, r2); // ldsmax x16, x17, [x2] - __ ldumin(Assembler::xword, r1, r0, r24); // ldumin x1, x0, [x24] - __ ldumax(Assembler::xword, r4, r3, r12); // ldumax x4, x3, [x12] + __ swp(Assembler::xword, r24, r21, r26); // swp x24, x21, [x26] + __ ldadd(Assembler::xword, r24, r3, r24); // ldadd x24, x3, [x24] + __ ldbic(Assembler::xword, r26, r23, r15); // ldclr x26, x23, [x15] + __ ldeor(Assembler::xword, r21, r3, r24); // ldeor x21, x3, [x24] + __ ldorr(Assembler::xword, r8, r25, r20); // ldset x8, x25, [x20] + __ ldsmin(Assembler::xword, r16, r17, r2); // ldsmin x16, x17, [x2] + __ ldsmax(Assembler::xword, r1, r0, r24); // ldsmax x1, x0, [x24] + __ ldumin(Assembler::xword, r4, r3, r12); // ldumin x4, x3, [x12] + __ ldumax(Assembler::xword, zr, r28, r10); // ldumax xzr, x28, [x10] // LSEOp - __ swpa(Assembler::xword, zr, r28, r10); // swpa xzr, x28, [x10] - __ ldadda(Assembler::xword, r26, r2, r12); // ldadda x26, x2, [x12] - __ ldbica(Assembler::xword, r16, zr, r1); // ldclra x16, xzr, [x1] - __ ldeora(Assembler::xword, r13, r29, r0); // ldeora x13, x29, [x0] - __ ldorra(Assembler::xword, r19, r12, r17); // ldseta x19, x12, [x17] - __ ldsmina(Assembler::xword, r22, r13, r28); // ldsmina x22, x13, [x28] - __ ldsmaxa(Assembler::xword, r30, zr, r1); // ldsmaxa x30, xzr, [x1] - __ ldumina(Assembler::xword, r26, r28, r4); // ldumina x26, x28, [x4] - __ ldumaxa(Assembler::xword, r30, r4, r6); // ldumaxa x30, x4, [x6] + __ swpa(Assembler::xword, r26, r2, r12); // swpa x26, x2, [x12] + __ ldadda(Assembler::xword, r16, zr, r1); // ldadda x16, xzr, [x1] + __ ldbica(Assembler::xword, r13, r29, r0); // ldclra x13, x29, [x0] + __ ldeora(Assembler::xword, r19, r12, r17); // ldeora x19, x12, [x17] + __ ldorra(Assembler::xword, r22, r13, r28); // ldseta x22, x13, [x28] + __ ldsmina(Assembler::xword, r30, zr, r1); // ldsmina x30, xzr, [x1] + __ ldsmaxa(Assembler::xword, r26, r28, r4); // ldsmaxa x26, x28, [x4] + __ ldumina(Assembler::xword, r30, r4, r6); // ldumina x30, x4, [x6] + __ ldumaxa(Assembler::xword, r30, r26, r15); // ldumaxa x30, x26, [x15] // LSEOp - __ swpal(Assembler::xword, r30, r26, r15); // swpal x30, x26, [x15] - __ ldaddal(Assembler::xword, r9, r8, r12); // ldaddal x9, x8, [x12] - __ ldbical(Assembler::xword, r0, r20, r1); // ldclral x0, x20, [x1] - __ ldeoral(Assembler::xword, r24, r2, r0); // ldeoral x24, x2, [x0] - __ ldorral(Assembler::xword, r9, r24, r26); // ldsetal x9, x24, [x26] - __ ldsminal(Assembler::xword, r16, r30, r3); // ldsminal x16, x30, [x3] - __ ldsmaxal(Assembler::xword, r10, r23, r10); // ldsmaxal x10, x23, [x10] - __ lduminal(Assembler::xword, r4, r16, r2); // lduminal x4, x16, [x2] - __ ldumaxal(Assembler::xword, r11, r8, r10); // ldumaxal x11, x8, [x10] + __ swpal(Assembler::xword, r9, r8, r12); // swpal x9, x8, [x12] + __ ldaddal(Assembler::xword, r0, r20, r1); // ldaddal x0, x20, [x1] + __ ldbical(Assembler::xword, r24, r2, r0); // ldclral x24, x2, [x0] + __ ldeoral(Assembler::xword, r9, r24, r26); // ldeoral x9, x24, [x26] + __ ldorral(Assembler::xword, r16, r30, r3); // ldsetal x16, x30, [x3] + __ ldsminal(Assembler::xword, r10, r23, r10); // ldsminal x10, x23, [x10] + __ ldsmaxal(Assembler::xword, r4, r16, r2); // ldsmaxal x4, x16, [x2] + __ lduminal(Assembler::xword, r11, r8, r10); // lduminal x11, x8, [x10] + __ ldumaxal(Assembler::xword, r15, r17, r2); // ldumaxal x15, x17, [x2] // LSEOp - __ swpl(Assembler::xword, r15, r17, r2); // swpl x15, x17, [x2] - __ ldaddl(Assembler::xword, r10, r12, r12); // ldaddl x10, x12, [x12] - __ ldbicl(Assembler::xword, r15, r13, r2); // ldclrl x15, x13, [x2] - __ ldeorl(Assembler::xword, r7, r20, r26); // ldeorl x7, x20, [x26] - __ ldorrl(Assembler::xword, r16, r4, r2); // ldsetl x16, x4, [x2] - __ ldsminl(Assembler::xword, r4, r12, r15); // ldsminl x4, x12, [x15] - __ ldsmaxl(Assembler::xword, r21, r16, r15); // ldsmaxl x21, x16, [x15] - __ lduminl(Assembler::xword, r11, r21, r23); // lduminl x11, x21, [x23] - __ ldumaxl(Assembler::xword, r12, r26, r23); // ldumaxl x12, x26, [x23] + __ swpl(Assembler::xword, r10, r12, r12); // swpl x10, x12, [x12] + __ ldaddl(Assembler::xword, r15, r13, r2); // ldaddl x15, x13, [x2] + __ ldbicl(Assembler::xword, r7, r20, r26); // ldclrl x7, x20, [x26] + __ ldeorl(Assembler::xword, r16, r4, r2); // ldeorl x16, x4, [x2] + __ ldorrl(Assembler::xword, r4, r12, r15); // ldsetl x4, x12, [x15] + __ ldsminl(Assembler::xword, r21, r16, r15); // ldsminl x21, x16, [x15] + __ ldsmaxl(Assembler::xword, r11, r21, r23); // ldsmaxl x11, x21, [x23] + __ lduminl(Assembler::xword, r12, r26, r23); // lduminl x12, x26, [x23] + __ ldumaxl(Assembler::xword, r28, r14, r11); // ldumaxl x28, x14, [x11] // LSEOp - __ swp(Assembler::word, r28, r14, r11); // swp w28, w14, [x11] - __ ldadd(Assembler::word, r24, r1, r12); // ldadd w24, w1, [x12] - __ ldbic(Assembler::word, zr, r10, r16); // ldclr wzr, w10, [x16] - __ ldeor(Assembler::word, r7, r2, r3); // ldeor w7, w2, [x3] - __ ldorr(Assembler::word, r13, r19, r17); // ldset w13, w19, [x17] - __ ldsmin(Assembler::word, r16, r3, r1); // ldsmin w16, w3, [x1] - __ ldsmax(Assembler::word, r11, r30, r5); // ldsmax w11, w30, [x5] - __ ldumin(Assembler::word, r8, r15, r29); // ldumin w8, w15, [x29] - __ ldumax(Assembler::word, r30, r0, r20); // ldumax w30, w0, [x20] + __ swp(Assembler::word, r24, r1, r12); // swp w24, w1, [x12] + __ ldadd(Assembler::word, zr, r10, r16); // ldadd wzr, w10, [x16] + __ ldbic(Assembler::word, r7, r2, r3); // ldclr w7, w2, [x3] + __ ldeor(Assembler::word, r13, r19, r17); // ldeor w13, w19, [x17] + __ ldorr(Assembler::word, r16, r3, r1); // ldset w16, w3, [x1] + __ ldsmin(Assembler::word, r11, r30, r5); // ldsmin w11, w30, [x5] + __ ldsmax(Assembler::word, r8, r15, r29); // ldsmax w8, w15, [x29] + __ ldumin(Assembler::word, r30, r0, r20); // ldumin w30, w0, [x20] + __ ldumax(Assembler::word, r7, r20, r23); // ldumax w7, w20, [x23] // LSEOp - __ swpa(Assembler::word, r7, r20, r23); // swpa w7, w20, [x23] - __ ldadda(Assembler::word, r28, r21, r27); // ldadda w28, w21, [x27] - __ ldbica(Assembler::word, r25, r5, r1); // ldclra w25, w5, [x1] - __ ldeora(Assembler::word, r23, r16, sp); // ldeora w23, w16, [sp] - __ ldorra(Assembler::word, r5, r12, r9); // ldseta w5, w12, [x9] - __ ldsmina(Assembler::word, r28, r15, r29); // ldsmina w28, w15, [x29] - __ ldsmaxa(Assembler::word, r22, zr, r19); // ldsmaxa w22, wzr, [x19] - __ ldumina(Assembler::word, zr, r5, r14); // ldumina wzr, w5, [x14] - __ ldumaxa(Assembler::word, r16, zr, r15); // ldumaxa w16, wzr, [x15] + __ swpa(Assembler::word, r28, r21, r27); // swpa w28, w21, [x27] + __ ldadda(Assembler::word, r25, r5, r1); // ldadda w25, w5, [x1] + __ ldbica(Assembler::word, r23, r16, sp); // ldclra w23, w16, [sp] + __ ldeora(Assembler::word, r5, r12, r9); // ldeora w5, w12, [x9] + __ ldorra(Assembler::word, r28, r15, r29); // ldseta w28, w15, [x29] + __ ldsmina(Assembler::word, r22, zr, r19); // ldsmina w22, wzr, [x19] + __ ldsmaxa(Assembler::word, zr, r5, r14); // ldsmaxa wzr, w5, [x14] + __ ldumina(Assembler::word, r16, zr, r15); // ldumina w16, wzr, [x15] + __ ldumaxa(Assembler::word, r27, r20, r16); // ldumaxa w27, w20, [x16] // LSEOp - __ swpal(Assembler::word, r27, r20, r16); // swpal w27, w20, [x16] - __ ldaddal(Assembler::word, r12, r11, r9); // ldaddal w12, w11, [x9] - __ ldbical(Assembler::word, r6, r30, r17); // ldclral w6, w30, [x17] - __ ldeoral(Assembler::word, r27, r28, r30); // ldeoral w27, w28, [x30] - __ ldorral(Assembler::word, r7, r10, r20); // ldsetal w7, w10, [x20] - __ ldsminal(Assembler::word, r10, r4, r24); // ldsminal w10, w4, [x24] - __ ldsmaxal(Assembler::word, r17, r17, r22); // ldsmaxal w17, w17, [x22] - __ lduminal(Assembler::word, r3, r29, r15); // lduminal w3, w29, [x15] - __ ldumaxal(Assembler::word, r22, r19, r19); // ldumaxal w22, w19, [x19] + __ swpal(Assembler::word, r12, r11, r9); // swpal w12, w11, [x9] + __ ldaddal(Assembler::word, r6, r30, r17); // ldaddal w6, w30, [x17] + __ ldbical(Assembler::word, r27, r28, r30); // ldclral w27, w28, [x30] + __ ldeoral(Assembler::word, r7, r10, r20); // ldeoral w7, w10, [x20] + __ ldorral(Assembler::word, r10, r4, r24); // ldsetal w10, w4, [x24] + __ ldsminal(Assembler::word, r17, r17, r22); // ldsminal w17, w17, [x22] + __ ldsmaxal(Assembler::word, r3, r29, r15); // ldsmaxal w3, w29, [x15] + __ lduminal(Assembler::word, r22, r19, r19); // lduminal w22, w19, [x19] + __ ldumaxal(Assembler::word, r22, r2, r15); // ldumaxal w22, w2, [x15] // LSEOp - __ swpl(Assembler::word, r22, r2, r15); // swpl w22, w2, [x15] - __ ldaddl(Assembler::word, r6, r12, r16); // ldaddl w6, w12, [x16] - __ ldbicl(Assembler::word, r11, r13, r23); // ldclrl w11, w13, [x23] - __ ldeorl(Assembler::word, r1, r30, r19); // ldeorl w1, w30, [x19] - __ ldorrl(Assembler::word, r5, r17, r2); // ldsetl w5, w17, [x2] - __ ldsminl(Assembler::word, r16, r22, r13); // ldsminl w16, w22, [x13] - __ ldsmaxl(Assembler::word, r10, r21, r29); // ldsmaxl w10, w21, [x29] - __ lduminl(Assembler::word, r27, r12, r27); // lduminl w27, w12, [x27] - __ ldumaxl(Assembler::word, r3, r1, sp); // ldumaxl w3, w1, [sp] + __ swpl(Assembler::word, r6, r12, r16); // swpl w6, w12, [x16] + __ ldaddl(Assembler::word, r11, r13, r23); // ldaddl w11, w13, [x23] + __ ldbicl(Assembler::word, r1, r30, r19); // ldclrl w1, w30, [x19] + __ ldeorl(Assembler::word, r5, r17, r2); // ldeorl w5, w17, [x2] + __ ldorrl(Assembler::word, r16, r22, r13); // ldsetl w16, w22, [x13] + __ ldsminl(Assembler::word, r10, r21, r29); // ldsminl w10, w21, [x29] + __ ldsmaxl(Assembler::word, r27, r12, r27); // ldsmaxl w27, w12, [x27] + __ lduminl(Assembler::word, r3, r1, sp); // lduminl w3, w1, [sp] + __ ldumaxl(Assembler::word, r24, r19, r17); // ldumaxl w24, w19, [x17] // SHA3SIMDOp - __ bcax(v23, __ T16B, v19, v17, v9); // bcax v23.16B, v19.16B, v17.16B, v9.16B - __ eor3(v27, __ T16B, v26, v14, v6); // eor3 v27.16B, v26.16B, v14.16B, v6.16B - __ rax1(v20, __ T2D, v22, v30); // rax1 v20.2D, v22.2D, v30.2D - __ xar(v24, __ T2D, v2, v30, 54); // xar v24.2D, v2.2D, v30.2D, #54 + __ bcax(v9, __ T16B, v27, v26, v14); // bcax v9.16B, v27.16B, v26.16B, v14.16B + __ eor3(v6, __ T16B, v20, v22, v30); // eor3 v6.16B, v20.16B, v22.16B, v30.16B + __ rax1(v24, __ T2D, v2, v30); // rax1 v24.2D, v2.2D, v30.2D + __ xar(v26, __ T2D, v17, v10, 46); // xar v26.2D, v17.2D, v10.2D, #46 // SHA512SIMDOp - __ sha512h(v17, __ T2D, v10, v22); // sha512h q17, q10, v22.2D - __ sha512h2(v17, __ T2D, v2, v17); // sha512h2 q17, q2, v17.2D - __ sha512su0(v0, __ T2D, v24); // sha512su0 v0.2D, v24.2D - __ sha512su1(v25, __ T2D, v22, v2); // sha512su1 v25.2D, v22.2D, v2.2D + __ sha512h(v17, __ T2D, v2, v17); // sha512h q17, q2, v17.2D + __ sha512h2(v0, __ T2D, v24, v25); // sha512h2 q0, q24, v25.2D + __ sha512su0(v22, __ T2D, v2); // sha512su0 v22.2D, v2.2D + __ sha512su1(v17, __ T2D, v12, v3); // sha512su1 v17.2D, v12.2D, v3.2D // SVEVectorOp - __ sve_add(z17, __ D, z12, z3); // add z17.d, z12.d, z3.d - __ sve_sub(z29, __ D, z28, z16); // sub z29.d, z28.d, z16.d - __ sve_fadd(z6, __ D, z9, z28); // fadd z6.d, z9.d, z28.d - __ sve_fmul(z7, __ S, z4, z7); // fmul z7.s, z4.s, z7.s - __ sve_fsub(z9, __ S, z22, z8); // fsub z9.s, z22.s, z8.s - __ sve_abs(z27, __ B, p5, z30); // abs z27.b, p5/m, z30.b - __ sve_add(z26, __ H, p0, z16); // add z26.h, p0/m, z26.h, z16.h - __ sve_asr(z3, __ D, p6, z8); // asr z3.d, p6/m, z3.d, z8.d - __ sve_cnt(z21, __ D, p6, z26); // cnt z21.d, p6/m, z26.d - __ sve_lsl(z22, __ B, p0, z4); // lsl z22.b, p0/m, z22.b, z4.b - __ sve_lsr(z17, __ H, p0, z3); // lsr z17.h, p0/m, z17.h, z3.h - __ sve_mul(z1, __ B, p2, z6); // mul z1.b, p2/m, z1.b, z6.b - __ sve_neg(z9, __ S, p7, z7); // neg z9.s, p7/m, z7.s - __ sve_not(z22, __ H, p5, z5); // not z22.h, p5/m, z5.h - __ sve_smax(z8, __ B, p4, z30); // smax z8.b, p4/m, z8.b, z30.b - __ sve_smin(z17, __ D, p0, z11); // smin z17.d, p0/m, z17.d, z11.d - __ sve_sub(z28, __ S, p0, z26); // sub z28.s, p0/m, z28.s, z26.s - __ sve_fabs(z28, __ D, p3, z13); // fabs z28.d, p3/m, z13.d - __ sve_fadd(z16, __ S, p6, z5); // fadd z16.s, p6/m, z16.s, z5.s - __ sve_fdiv(z13, __ S, p2, z15); // fdiv z13.s, p2/m, z13.s, z15.s - __ sve_fmax(z26, __ S, p5, z11); // fmax z26.s, p5/m, z26.s, z11.s - __ sve_fmin(z22, __ S, p4, z4); // fmin z22.s, p4/m, z22.s, z4.s - __ sve_fmul(z19, __ S, p4, z17); // fmul z19.s, p4/m, z19.s, z17.s - __ sve_fneg(z14, __ D, p3, z2); // fneg z14.d, p3/m, z2.d - __ sve_frintm(z3, __ S, p5, z23); // frintm z3.s, p5/m, z23.s - __ sve_frintn(z6, __ S, p1, z17); // frintn z6.s, p1/m, z17.s - __ sve_frintp(z27, __ S, p4, z16); // frintp z27.s, p4/m, z16.s - __ sve_fsqrt(z2, __ S, p7, z3); // fsqrt z2.s, p7/m, z3.s - __ sve_fsub(z6, __ S, p4, z19); // fsub z6.s, p4/m, z6.s, z19.s - __ sve_fmla(z12, __ D, p5, z8, z24); // fmla z12.d, p5/m, z8.d, z24.d - __ sve_fmls(z17, __ S, p0, z10, z23); // fmls z17.s, p0/m, z10.s, z23.s - __ sve_fnmla(z19, __ S, p7, z13, z16); // fnmla z19.s, p7/m, z13.s, z16.s - __ sve_fnmls(z0, __ D, p1, z14, z17); // fnmls z0.d, p1/m, z14.d, z17.d - __ sve_mla(z8, __ S, p2, z22, z20); // mla z8.s, p2/m, z22.s, z20.s - __ sve_mls(z27, __ S, p0, z3, z15); // mls z27.s, p0/m, z3.s, z15.s - __ sve_and(z20, z7, z4); // and z20.d, z7.d, z4.d - __ sve_eor(z7, z0, z8); // eor z7.d, z0.d, z8.d - __ sve_orr(z19, z22, z4); // orr z19.d, z22.d, z4.d + __ sve_add(z27, __ S, z29, z28); // add z27.s, z29.s, z28.s + __ sve_sub(z26, __ D, z6, z9); // sub z26.d, z6.d, z9.d + __ sve_fadd(z17, __ S, z7, z4); // fadd z17.s, z7.s, z4.s + __ sve_fmul(z15, __ S, z9, z22); // fmul z15.s, z9.s, z22.s + __ sve_fsub(z2, __ D, z27, z20); // fsub z2.d, z27.d, z20.d + __ sve_abs(z5, __ S, p6, z0); // abs z5.s, p6/m, z0.s + __ sve_add(z14, __ H, p1, z25); // add z14.h, p1/m, z14.h, z25.h + __ sve_asr(z27, __ D, p5, z26); // asr z27.d, p5/m, z27.d, z26.d + __ sve_cnt(z24, __ B, p5, z0); // cnt z24.b, p5/m, z0.b + __ sve_lsl(z6, __ B, p4, z0); // lsl z6.b, p4/m, z6.b, z0.b + __ sve_lsr(z15, __ B, p0, z9); // lsr z15.b, p0/m, z15.b, z9.b + __ sve_mul(z5, __ B, p2, z27); // mul z5.b, p2/m, z5.b, z27.b + __ sve_neg(z20, __ B, p5, z20); // neg z20.b, p5/m, z20.b + __ sve_not(z10, __ D, p2, z16); // not z10.d, p2/m, z16.d + __ sve_smax(z6, __ H, p4, z2); // smax z6.h, p4/m, z6.h, z2.h + __ sve_smin(z29, __ D, p7, z2); // smin z29.d, p7/m, z29.d, z2.d + __ sve_sub(z22, __ H, p7, z14); // sub z22.h, p7/m, z22.h, z14.h + __ sve_fabs(z27, __ S, p4, z23); // fabs z27.s, p4/m, z23.s + __ sve_fadd(z2, __ D, p3, z10); // fadd z2.d, p3/m, z2.d, z10.d + __ sve_fdiv(z10, __ S, p6, z22); // fdiv z10.s, p6/m, z10.s, z22.s + __ sve_fmax(z3, __ S, p5, z16); // fmax z3.s, p5/m, z3.s, z16.s + __ sve_fmin(z1, __ D, p4, z16); // fmin z1.d, p4/m, z1.d, z16.d + __ sve_fmul(z12, __ S, p3, z12); // fmul z12.s, p3/m, z12.s, z12.s + __ sve_fneg(z16, __ D, p0, z20); // fneg z16.d, p0/m, z20.d + __ sve_frintm(z5, __ D, p1, z7); // frintm z5.d, p1/m, z7.d + __ sve_frintn(z12, __ D, p7, z16); // frintn z12.d, p7/m, z16.d + __ sve_frintp(z6, __ S, p0, z28); // frintp z6.s, p0/m, z28.s + __ sve_fsqrt(z4, __ D, p1, z17); // fsqrt z4.d, p1/m, z17.d + __ sve_fsub(z13, __ S, p3, z19); // fsub z13.s, p3/m, z13.s, z19.s + __ sve_fmla(z24, __ S, p5, z17, z0); // fmla z24.s, p5/m, z17.s, z0.s + __ sve_fmls(z23, __ S, p1, z19, z30); // fmls z23.s, p1/m, z19.s, z30.s + __ sve_fnmla(z16, __ S, p1, z0, z7); // fnmla z16.s, p1/m, z0.s, z7.s + __ sve_fnmls(z17, __ D, p6, z8, z10); // fnmls z17.d, p6/m, z8.d, z10.d + __ sve_mla(z20, __ B, p5, z27, z2); // mla z20.b, p5/m, z27.b, z2.b + __ sve_mls(z15, __ B, p4, z20, z7); // mls z15.b, p4/m, z20.b, z7.b + __ sve_and(z28, z7, z0); // and z28.d, z7.d, z0.d + __ sve_eor(z16, z19, z22); // eor z16.d, z19.d, z22.d + __ sve_orr(z15, z9, z22); // orr z15.d, z9.d, z22.d // SVEReductionOp - __ sve_andv(v9, __ D, p5, z11); // andv d9, p5, z11.d - __ sve_orv(v5, __ H, p7, z16); // orv h5, p7, z16.h - __ sve_eorv(v22, __ H, p3, z1); // eorv h22, p3, z1.h - __ sve_smaxv(v8, __ D, p5, z16); // smaxv d8, p5, z16.d - __ sve_sminv(v15, __ S, p1, z4); // sminv s15, p1, z4.s - __ sve_fminv(v8, __ S, p1, z29); // fminv s8, p1, z29.s - __ sve_fmaxv(v28, __ D, p4, z29); // fmaxv d28, p4, z29.d - __ sve_fadda(v9, __ S, p3, z2); // fadda s9, p3, s9, z2.s - __ sve_uaddv(v28, __ B, p0, z7); // uaddv d28, p0, z7.b + __ sve_andv(v25, __ S, p1, z30); // andv s25, p1, z30.s + __ sve_orv(v13, __ B, p5, z11); // orv b13, p5, z11.b + __ sve_eorv(v13, __ S, p2, z20); // eorv s13, p2, z20.s + __ sve_smaxv(v25, __ B, p3, z4); // smaxv b25, p3, z4.b + __ sve_sminv(v17, __ D, p2, z6); // sminv d17, p2, z6.d + __ sve_fminv(v4, __ D, p7, z16); // fminv d4, p7, z16.d + __ sve_fmaxv(v26, __ S, p2, z14); // fmaxv s26, p2, z14.s + __ sve_fadda(v11, __ S, p7, z3); // fadda s11, p7, s11, z3.s + __ sve_uaddv(v1, __ S, p6, z21); // uaddv d1, p6, z21.s __ bind(forth); @@ -1007,30 +1010,30 @@ void entry(CodeBuffer *cb) { 0x9101a1a0, 0xb10a5cc8, 0xd10810aa, 0xf10fd061, 0x120cb166, 0x321764bc, 0x52174681, 0x720c0227, 0x9241018e, 0xb25a2969, 0xd278b411, 0xf26aad01, - 0x14000000, 0x17ffffd7, 0x140002cd, 0x94000000, - 0x97ffffd4, 0x940002ca, 0x3400000a, 0x34fffa2a, - 0x340058ea, 0x35000008, 0x35fff9c8, 0x35005888, - 0xb400000b, 0xb4fff96b, 0xb400582b, 0xb500001d, - 0xb5fff91d, 0xb50057dd, 0x10000013, 0x10fff8b3, - 0x10005773, 0x90000013, 0x36300016, 0x3637f836, - 0x363056f6, 0x3758000c, 0x375ff7cc, 0x3758568c, + 0x14000000, 0x17ffffd7, 0x140002d0, 0x94000000, + 0x97ffffd4, 0x940002cd, 0x3400000a, 0x34fffa2a, + 0x3400594a, 0x35000008, 0x35fff9c8, 0x350058e8, + 0xb400000b, 0xb4fff96b, 0xb400588b, 0xb500001d, + 0xb5fff91d, 0xb500583d, 0x10000013, 0x10fff8b3, + 0x100057d3, 0x90000013, 0x36300016, 0x3637f836, + 0x36305756, 0x3758000c, 0x375ff7cc, 0x375856ec, 0x128313a0, 0x528a32c7, 0x7289173b, 0x92ab3acc, 0xd2a0bf94, 0xf2c285e8, 0x9358722f, 0x330e652f, 0x53067f3b, 0x93577c53, 0xb34a1aac, 0xd35a4016, 0x13946c63, 0x93c3dbc8, 0x54000000, 0x54fff5a0, - 0x54005460, 0x54000001, 0x54fff541, 0x54005401, - 0x54000002, 0x54fff4e2, 0x540053a2, 0x54000002, - 0x54fff482, 0x54005342, 0x54000003, 0x54fff423, - 0x540052e3, 0x54000003, 0x54fff3c3, 0x54005283, - 0x54000004, 0x54fff364, 0x54005224, 0x54000005, - 0x54fff305, 0x540051c5, 0x54000006, 0x54fff2a6, - 0x54005166, 0x54000007, 0x54fff247, 0x54005107, - 0x54000008, 0x54fff1e8, 0x540050a8, 0x54000009, - 0x54fff189, 0x54005049, 0x5400000a, 0x54fff12a, - 0x54004fea, 0x5400000b, 0x54fff0cb, 0x54004f8b, - 0x5400000c, 0x54fff06c, 0x54004f2c, 0x5400000d, - 0x54fff00d, 0x54004ecd, 0x5400000e, 0x54ffefae, - 0x54004e6e, 0x5400000f, 0x54ffef4f, 0x54004e0f, + 0x540054c0, 0x54000001, 0x54fff541, 0x54005461, + 0x54000002, 0x54fff4e2, 0x54005402, 0x54000002, + 0x54fff482, 0x540053a2, 0x54000003, 0x54fff423, + 0x54005343, 0x54000003, 0x54fff3c3, 0x540052e3, + 0x54000004, 0x54fff364, 0x54005284, 0x54000005, + 0x54fff305, 0x54005225, 0x54000006, 0x54fff2a6, + 0x540051c6, 0x54000007, 0x54fff247, 0x54005167, + 0x54000008, 0x54fff1e8, 0x54005108, 0x54000009, + 0x54fff189, 0x540050a9, 0x5400000a, 0x54fff12a, + 0x5400504a, 0x5400000b, 0x54fff0cb, 0x54004feb, + 0x5400000c, 0x54fff06c, 0x54004f8c, 0x5400000d, + 0x54fff00d, 0x54004f2d, 0x5400000e, 0x54ffefae, + 0x54004ece, 0x5400000f, 0x54ffef4f, 0x54004e6f, 0xd40658e1, 0xd4014d22, 0xd4046543, 0xd4273f60, 0xd44cad80, 0xd503201f, 0xd69f03e0, 0xd6bf03e0, 0xd5033fdf, 0xd5033e9f, 0xd50332bf, 0xd61f0200, @@ -1062,7 +1065,7 @@ void entry(CodeBuffer *cb) { 0x791f226d, 0xf95aa2f3, 0xb9587bb7, 0x395f7176, 0x795d9143, 0x399e7e08, 0x799a2697, 0x79df3422, 0xb99c2624, 0xfd5c2374, 0xbd5fa1d9, 0xfd1d595a, - 0xbd1b1869, 0x58003e5b, 0x1800000b, 0xf8945060, + 0xbd1b1869, 0x58003ebb, 0x1800000b, 0xf8945060, 0xd8000000, 0xf8ae6ba0, 0xf99a0080, 0x1a070035, 0x3a0700a8, 0x5a0e0367, 0x7a11009b, 0x9a000380, 0xba1e030c, 0xda0f0320, 0xfa030301, 0x0b340b11, @@ -1080,9 +1083,9 @@ void entry(CodeBuffer *cb) { 0x9adc2a3b, 0x9ad12c5c, 0x9bce7dea, 0x9b597c6e, 0x1b0e166f, 0x1b1ae490, 0x9b023044, 0x9b089e3d, 0x9b391083, 0x9b24c73a, 0x9bb15f40, 0x9bbcc6af, - 0x1e23095b, 0x1e3918e0, 0x1e2f28c9, 0x1e2a39fd, - 0x1e270a22, 0x1e77096b, 0x1e771ba7, 0x1e6b2b6e, - 0x1e78388b, 0x1e6e09ec, 0x1f1c3574, 0x1f17f98b, + 0x7ea3d55b, 0x1e3908e0, 0x1e2f18c9, 0x1e2a29fd, + 0x1e273a22, 0x7ef7d56b, 0x1e770ba7, 0x1e6b1b6e, + 0x1e78288b, 0x1e6e39ec, 0x1f1c3574, 0x1f17f98b, 0x1f2935da, 0x1f2574ea, 0x1f4b306f, 0x1f5ec7cf, 0x1f6f3e93, 0x1f6226a9, 0x1e2040fb, 0x1e20c3dd, 0x1e214031, 0x1e21c0c2, 0x1e22c06a, 0x1e604178, @@ -1118,75 +1121,76 @@ void entry(CodeBuffer *cb) { 0x2eac856a, 0x6eaf85cd, 0x6ef085ee, 0x0eb6d6b4, 0x4ea3d441, 0x4ef8d6f6, 0x0e209ffe, 0x4e309dee, 0x0e649c62, 0x4e689ce6, 0x0ea59c83, 0x4ea99d07, - 0x2e3adf38, 0x6e22dc20, 0x6e7ddf9b, 0x0e7f97dd, - 0x4e6794c5, 0x0ea794c5, 0x4ebf97dd, 0x0e2dcd8b, - 0x4e3bcf59, 0x4e62cc20, 0x2e6097fe, 0x6e629420, - 0x2eb39651, 0x6ebe97bc, 0x0ebbcf59, 0x4eabcd49, - 0x4efbcf59, 0x2e2efdac, 0x6e31fe0f, 0x6e6dfd8b, - 0x0e2c656a, 0x4e336651, 0x0e7a6738, 0x4e7766d5, - 0x0eb96717, 0x4ea26420, 0x0e32f630, 0x4e2cf56a, - 0x4e68f4e6, 0x0e3e6fbc, 0x4e286ce6, 0x0e676cc5, - 0x4e676cc5, 0x0eb66eb4, 0x4eb36e51, 0x0eb1f60f, - 0x4eb3f651, 0x4efff7dd, 0x2e3c8f7a, 0x6e3e8fbc, - 0x2e638c41, 0x6e7d8f9b, 0x2ea28c20, 0x6eb68eb4, - 0x6efe8fbc, 0x0e31e60f, 0x4e2ee5ac, 0x4e6ce56a, - 0x0e3e37bc, 0x4e3e37bc, 0x0e753693, 0x4e7836f6, - 0x0eac356a, 0x4ea634a4, 0x4ee037fe, 0x2eb6e6b4, - 0x6eaae528, 0x6ee0e7fe, 0x0e333e51, 0x4e2c3d6a, - 0x0e7d3f9b, 0x4e643c62, 0x0eba3f38, 0x4ea63ca4, - 0x4ee53c83, 0x2e2ae528, 0x6e38e6f6, 0x6e73e651, - 0xba5fd3e3, 0x3a5f03e5, 0xfa411be4, 0x7a42cbe2, - 0x93df03ff, 0xc820ffff, 0x8822fc7f, 0xc8247cbf, - 0x88267fff, 0x4e010fe0, 0x4e081fe1, 0x4e0c1fe1, - 0x4e0a1fe1, 0x4e071fe1, 0x4cc0ac3f, 0x05a08020, - 0x04b0e3e0, 0x0470e7e1, 0x042f9c20, 0x043f9c35, - 0x047f9c20, 0x04ff9c20, 0x04299420, 0x04319160, - 0x0461943e, 0x04a19020, 0x042053ff, 0x047f5401, - 0x25208028, 0x2538cfe0, 0x2578d001, 0x25b8efe2, - 0x25f8f007, 0xa400a3e0, 0xa4a8a7ea, 0xa547a814, - 0xa4084ffe, 0xa55c53e0, 0xa5e1540b, 0xe400fbf6, - 0xe408ffff, 0xe547e400, 0xe4014be0, 0xe4a84fe0, - 0xe5f15000, 0x858043e0, 0x85a043ff, 0xe59f5d08, - 0x1e601000, 0x1e603000, 0x1e621000, 0x1e623000, - 0x1e641000, 0x1e643000, 0x1e661000, 0x1e663000, - 0x1e681000, 0x1e683000, 0x1e6a1000, 0x1e6a3000, - 0x1e6c1000, 0x1e6c3000, 0x1e6e1000, 0x1e6e3000, - 0x1e701000, 0x1e703000, 0x1e721000, 0x1e723000, - 0x1e741000, 0x1e743000, 0x1e761000, 0x1e763000, - 0x1e781000, 0x1e783000, 0x1e7a1000, 0x1e7a3000, - 0x1e7c1000, 0x1e7c3000, 0x1e7e1000, 0x1e7e3000, - 0xf82d83a5, 0xf8380355, 0xf8381303, 0xf83a21f7, - 0xf8353303, 0xf8285299, 0xf8304051, 0xf8217300, - 0xf8246183, 0xf8bf815c, 0xf8ba0182, 0xf8b0103f, - 0xf8ad201d, 0xf8b3322c, 0xf8b6538d, 0xf8be403f, - 0xf8ba709c, 0xf8be60c4, 0xf8fe81fa, 0xf8e90188, - 0xf8e01034, 0xf8f82002, 0xf8e93358, 0xf8f0507e, - 0xf8ea4157, 0xf8e47050, 0xf8eb6148, 0xf86f8051, - 0xf86a018c, 0xf86f104d, 0xf8672354, 0xf8703044, - 0xf86451ec, 0xf87541f0, 0xf86b72f5, 0xf86c62fa, - 0xb83c816e, 0xb8380181, 0xb83f120a, 0xb8272062, - 0xb82d3233, 0xb8305023, 0xb82b40be, 0xb82873af, - 0xb83e6280, 0xb8a782f4, 0xb8bc0375, 0xb8b91025, - 0xb8b723f0, 0xb8a5312c, 0xb8bc53af, 0xb8b6427f, - 0xb8bf71c5, 0xb8b061ff, 0xb8fb8214, 0xb8ec012b, - 0xb8e6123e, 0xb8fb23dc, 0xb8e7328a, 0xb8ea5304, - 0xb8f142d1, 0xb8e371fd, 0xb8f66273, 0xb87681e2, - 0xb866020c, 0xb86b12ed, 0xb861227e, 0xb8653051, - 0xb87051b6, 0xb86a43b5, 0xb87b736c, 0xb86363e1, - 0xce312677, 0xce0e1b5b, 0xce7e8ed4, 0xce9ed858, - 0xce768151, 0xce718451, 0xcec08300, 0xce628ad9, - 0x04e30191, 0x04f0079d, 0x65dc0126, 0x65870887, - 0x658806c9, 0x0416b7db, 0x0440021a, 0x04d09903, - 0x04dabb55, 0x04138096, 0x04518071, 0x041008c1, - 0x0497bce9, 0x045eb4b6, 0x040813c8, 0x04ca0171, - 0x0481035c, 0x04dcadbc, 0x658098b0, 0x658d89ed, - 0x6586957a, 0x65879096, 0x65829233, 0x04ddac4e, - 0x6582b6e3, 0x6580a626, 0x6581b21b, 0x658dbc62, - 0x65819266, 0x65f8150c, 0x65b72151, 0x65b05db3, - 0x65f165c0, 0x04944ac8, 0x048f607b, 0x042430f4, - 0x04a83007, 0x046432d3, 0x04da3569, 0x04583e05, - 0x04592c36, 0x04c83608, 0x048a248f, 0x658727a8, - 0x65c633bc, 0x65982c49, 0x040120fc, + 0x2ebad738, 0x6ea2d420, 0x6efdd79b, 0x2e3fdfdd, + 0x6e27dcc5, 0x6e67dcc5, 0x0e7f97dd, 0x4e6d958b, + 0x0ebb9759, 0x4ea29420, 0x0e20cffe, 0x4e22cc20, + 0x4e73ce51, 0x2e7e97bc, 0x6e7b9759, 0x2eab9549, + 0x6ebb9759, 0x0eaecdac, 0x4eb1ce0f, 0x4eedcd8b, + 0x2e2cfd6a, 0x6e33fe51, 0x6e7aff38, 0x0e3766d5, + 0x4e396717, 0x0e626420, 0x4e726630, 0x0eac656a, + 0x4ea864e6, 0x0e3ef7bc, 0x4e28f4e6, 0x4e67f4c5, + 0x0e276cc5, 0x4e366eb4, 0x0e736e51, 0x4e716e0f, + 0x0eb36e51, 0x4ebf6fdd, 0x0ebcf77a, 0x4ebef7bc, + 0x4ee3f441, 0x2e3d8f9b, 0x6e228c20, 0x2e768eb4, + 0x6e7e8fbc, 0x2eb18e0f, 0x6eae8dac, 0x6eec8d6a, + 0x0e3ee7bc, 0x4e3ee7bc, 0x4e75e693, 0x0e3836f6, + 0x4e2c356a, 0x0e6634a4, 0x4e6037fe, 0x0eb636b4, + 0x4eaa3528, 0x4ee037fe, 0x2eb3e651, 0x6eace56a, + 0x6efde79b, 0x0e243c62, 0x4e3a3f38, 0x0e663ca4, + 0x4e653c83, 0x0eaa3d28, 0x4eb83ef6, 0x4ef33e51, + 0x2e2fe5cd, 0x6e26e4a4, 0x6e7ee7bc, 0xba5fd3e3, + 0x3a5f03e5, 0xfa411be4, 0x7a42cbe2, 0x93df03ff, + 0xc820ffff, 0x8822fc7f, 0xc8247cbf, 0x88267fff, + 0x4e010fe0, 0x4e081fe1, 0x4e0c1fe1, 0x4e0a1fe1, + 0x4e071fe1, 0x4cc0ac3f, 0x05a08020, 0x04b0e3e0, + 0x0470e7e1, 0x042f9c20, 0x043f9c35, 0x047f9c20, + 0x04ff9c20, 0x04299420, 0x04319160, 0x0461943e, + 0x04a19020, 0x042053ff, 0x047f5401, 0x25208028, + 0x2538cfe0, 0x2578d001, 0x25b8efe2, 0x25f8f007, + 0xa400a3e0, 0xa4a8a7ea, 0xa547a814, 0xa4084ffe, + 0xa55c53e0, 0xa5e1540b, 0xe400fbf6, 0xe408ffff, + 0xe547e400, 0xe4014be0, 0xe4a84fe0, 0xe5f15000, + 0x858043e0, 0x85a043ff, 0xe59f5d08, 0x1e601000, + 0x1e603000, 0x1e621000, 0x1e623000, 0x1e641000, + 0x1e643000, 0x1e661000, 0x1e663000, 0x1e681000, + 0x1e683000, 0x1e6a1000, 0x1e6a3000, 0x1e6c1000, + 0x1e6c3000, 0x1e6e1000, 0x1e6e3000, 0x1e701000, + 0x1e703000, 0x1e721000, 0x1e723000, 0x1e741000, + 0x1e743000, 0x1e761000, 0x1e763000, 0x1e781000, + 0x1e783000, 0x1e7a1000, 0x1e7a3000, 0x1e7c1000, + 0x1e7c3000, 0x1e7e1000, 0x1e7e3000, 0xf8388355, + 0xf8380303, 0xf83a11f7, 0xf8352303, 0xf8283299, + 0xf8305051, 0xf8214300, 0xf8247183, 0xf83f615c, + 0xf8ba8182, 0xf8b0003f, 0xf8ad101d, 0xf8b3222c, + 0xf8b6338d, 0xf8be503f, 0xf8ba409c, 0xf8be70c4, + 0xf8be61fa, 0xf8e98188, 0xf8e00034, 0xf8f81002, + 0xf8e92358, 0xf8f0307e, 0xf8ea5157, 0xf8e44050, + 0xf8eb7148, 0xf8ef6051, 0xf86a818c, 0xf86f004d, + 0xf8671354, 0xf8702044, 0xf86431ec, 0xf87551f0, + 0xf86b42f5, 0xf86c72fa, 0xf87c616e, 0xb8388181, + 0xb83f020a, 0xb8271062, 0xb82d2233, 0xb8303023, + 0xb82b50be, 0xb82843af, 0xb83e7280, 0xb82762f4, + 0xb8bc8375, 0xb8b90025, 0xb8b713f0, 0xb8a5212c, + 0xb8bc33af, 0xb8b6527f, 0xb8bf41c5, 0xb8b071ff, + 0xb8bb6214, 0xb8ec812b, 0xb8e6023e, 0xb8fb13dc, + 0xb8e7228a, 0xb8ea3304, 0xb8f152d1, 0xb8e341fd, + 0xb8f67273, 0xb8f661e2, 0xb866820c, 0xb86b02ed, + 0xb861127e, 0xb8652051, 0xb87031b6, 0xb86a53b5, + 0xb87b436c, 0xb86373e1, 0xb8786233, 0xce3a3b69, + 0xce167a86, 0xce7e8c58, 0xce8aba3a, 0xce718051, + 0xce798700, 0xcec08056, 0xce638991, 0x04bc03bb, + 0x04e904da, 0x658400f1, 0x6596092f, 0x65d40762, + 0x0496b805, 0x0440072e, 0x04d0975b, 0x041ab418, + 0x04139006, 0x0411812f, 0x04100b65, 0x0417b694, + 0x04deaa0a, 0x04481046, 0x04ca1c5d, 0x04411dd6, + 0x049cb2fb, 0x65c08d42, 0x658d9aca, 0x65869603, + 0x65c79201, 0x65828d8c, 0x04dda290, 0x65c2a4e5, + 0x65c0be0c, 0x6581a386, 0x65cda624, 0x65818e6d, + 0x65a01638, 0x65be2677, 0x65a74410, 0x65ea7911, + 0x04025774, 0x0407728f, 0x042030fc, 0x04b63270, + 0x0476312f, 0x049a27d9, 0x0418356d, 0x04992a8d, + 0x04082c99, 0x04ca28d1, 0x65c73e04, 0x658629da, + 0x65983c6b, 0x04813aa1, }; // END Generated code -- do not edit diff --git a/src/hotspot/cpu/aarch64/assembler_aarch64.hpp b/src/hotspot/cpu/aarch64/assembler_aarch64.hpp index 7ff9c018bef..d24e8d21c5a 100644 --- a/src/hotspot/cpu/aarch64/assembler_aarch64.hpp +++ b/src/hotspot/cpu/aarch64/assembler_aarch64.hpp @@ -1949,7 +1949,7 @@ public: starti; f(op31, 31, 29); f(0b11110, 28, 24); - f(type, 23, 22), f(1, 21), f(opcode, 15, 12), f(0b10, 11, 10); + f(type, 23, 22), f(1, 21), f(opcode, 15, 10); rf(Vm, 16), rf(Vn, 5), rf(Vd, 0); } @@ -1958,21 +1958,23 @@ public: data_processing(op31, type, opcode, Vd, Vn, Vm); \ } - INSN(fmuls, 0b000, 0b00, 0b0000); - INSN(fdivs, 0b000, 0b00, 0b0001); - INSN(fadds, 0b000, 0b00, 0b0010); - INSN(fsubs, 0b000, 0b00, 0b0011); - INSN(fmaxs, 0b000, 0b00, 0b0100); - INSN(fmins, 0b000, 0b00, 0b0101); - INSN(fnmuls, 0b000, 0b00, 0b1000); + INSN(fabds, 0b011, 0b10, 0b110101); + INSN(fmuls, 0b000, 0b00, 0b000010); + INSN(fdivs, 0b000, 0b00, 0b000110); + INSN(fadds, 0b000, 0b00, 0b001010); + INSN(fsubs, 0b000, 0b00, 0b001110); + INSN(fmaxs, 0b000, 0b00, 0b010010); + INSN(fmins, 0b000, 0b00, 0b010110); + INSN(fnmuls, 0b000, 0b00, 0b100010); - INSN(fmuld, 0b000, 0b01, 0b0000); - INSN(fdivd, 0b000, 0b01, 0b0001); - INSN(faddd, 0b000, 0b01, 0b0010); - INSN(fsubd, 0b000, 0b01, 0b0011); - INSN(fmaxd, 0b000, 0b01, 0b0100); - INSN(fmind, 0b000, 0b01, 0b0101); - INSN(fnmuld, 0b000, 0b01, 0b1000); + INSN(fabdd, 0b011, 0b11, 0b110101); + INSN(fmuld, 0b000, 0b01, 0b000010); + INSN(fdivd, 0b000, 0b01, 0b000110); + INSN(faddd, 0b000, 0b01, 0b001010); + INSN(fsubd, 0b000, 0b01, 0b001110); + INSN(fmaxd, 0b000, 0b01, 0b010010); + INSN(fmind, 0b000, 0b01, 0b010110); + INSN(fnmuld, 0b000, 0b01, 0b100010); #undef INSN @@ -2482,6 +2484,7 @@ public: f(T==T2D ? 1:0, 22); f(1, 21), rf(Vm, 16), f(op3, 15, 10), rf(Vn, 5), rf(Vd, 0); \ } + INSN(fabd, 1, 1, 0b110101); INSN(fadd, 0, 0, 0b110101); INSN(fdiv, 1, 0, 0b111111); INSN(fmul, 1, 0, 0b110111); @@ -2689,7 +2692,7 @@ public: INSN(sshr, 0, 0b000001, /* isSHR = */ true); INSN(ushr, 1, 0b000001, /* isSHR = */ true); INSN(usra, 1, 0b000101, /* isSHR = */ true); - INSN(ssra, 0, 0b000101, /* isSHAR =*/ true); + INSN(ssra, 0, 0b000101, /* isSHR = */ true); #undef INSN diff --git a/test/hotspot/jtreg/compiler/c2/Test8217359.java b/test/hotspot/jtreg/compiler/c2/Test8217359.java index ca0d2cc7544..533bdce4b91 100644 --- a/test/hotspot/jtreg/compiler/c2/Test8217359.java +++ b/test/hotspot/jtreg/compiler/c2/Test8217359.java @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019, Huawei Technologies Co. Ltd. All rights reserved. + * Copyright (c) 2019, Huawei Technologies Co., Ltd. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it diff --git a/test/hotspot/jtreg/compiler/c2/TestFoldCompares.java b/test/hotspot/jtreg/compiler/c2/TestFoldCompares.java index 735ecf76b49..6b7aa50532c 100644 --- a/test/hotspot/jtreg/compiler/c2/TestFoldCompares.java +++ b/test/hotspot/jtreg/compiler/c2/TestFoldCompares.java @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020, Huawei Technologies Co. Ltd. All rights reserved. + * Copyright (c) 2020, Huawei Technologies Co., Ltd. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it diff --git a/test/hotspot/jtreg/compiler/c2/TestReplaceEquivPhis.java b/test/hotspot/jtreg/compiler/c2/TestReplaceEquivPhis.java index d4c93b390dd..229df93e5c4 100644 --- a/test/hotspot/jtreg/compiler/c2/TestReplaceEquivPhis.java +++ b/test/hotspot/jtreg/compiler/c2/TestReplaceEquivPhis.java @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020, Huawei Technologies Co. Ltd. All rights reserved. + * Copyright (c) 2020, Huawei Technologies Co., Ltd. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it diff --git a/test/hotspot/jtreg/compiler/c2/aarch64/TestVectorShiftShorts.java b/test/hotspot/jtreg/compiler/c2/aarch64/TestVectorShiftShorts.java index 75e48ff44bb..3c572ca3ec8 100644 --- a/test/hotspot/jtreg/compiler/c2/aarch64/TestVectorShiftShorts.java +++ b/test/hotspot/jtreg/compiler/c2/aarch64/TestVectorShiftShorts.java @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020, Huawei Technologies Co. Ltd. All rights reserved. + * Copyright (c) 2020, Huawei Technologies Co., Ltd. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it diff --git a/test/hotspot/jtreg/compiler/intrinsics/sha/cli/TestUseSHA3IntrinsicsOptionOnSupportedCPU.java b/test/hotspot/jtreg/compiler/intrinsics/sha/cli/TestUseSHA3IntrinsicsOptionOnSupportedCPU.java index bf280d0896f..8886190817e 100644 --- a/test/hotspot/jtreg/compiler/intrinsics/sha/cli/TestUseSHA3IntrinsicsOptionOnSupportedCPU.java +++ b/test/hotspot/jtreg/compiler/intrinsics/sha/cli/TestUseSHA3IntrinsicsOptionOnSupportedCPU.java @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020, Huawei Technologies Co. Ltd. All rights reserved. + * Copyright (c) 2020, Huawei Technologies Co., Ltd. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it diff --git a/test/hotspot/jtreg/compiler/intrinsics/sha/cli/TestUseSHA3IntrinsicsOptionOnUnsupportedCPU.java b/test/hotspot/jtreg/compiler/intrinsics/sha/cli/TestUseSHA3IntrinsicsOptionOnUnsupportedCPU.java index 6ee6bbf7af8..9f27e873226 100644 --- a/test/hotspot/jtreg/compiler/intrinsics/sha/cli/TestUseSHA3IntrinsicsOptionOnUnsupportedCPU.java +++ b/test/hotspot/jtreg/compiler/intrinsics/sha/cli/TestUseSHA3IntrinsicsOptionOnUnsupportedCPU.java @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020, Huawei Technologies Co. Ltd. All rights reserved. + * Copyright (c) 2020, Huawei Technologies Co., Ltd. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it diff --git a/test/hotspot/jtreg/compiler/intrinsics/sha/sanity/TestSHA3Intrinsics.java b/test/hotspot/jtreg/compiler/intrinsics/sha/sanity/TestSHA3Intrinsics.java index af8919df5e5..34a1bf4514a 100644 --- a/test/hotspot/jtreg/compiler/intrinsics/sha/sanity/TestSHA3Intrinsics.java +++ b/test/hotspot/jtreg/compiler/intrinsics/sha/sanity/TestSHA3Intrinsics.java @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020, Huawei Technologies Co. Ltd. All rights reserved. + * Copyright (c) 2020, Huawei Technologies Co., Ltd. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it diff --git a/test/hotspot/jtreg/compiler/intrinsics/sha/sanity/TestSHA3MultiBlockIntrinsics.java b/test/hotspot/jtreg/compiler/intrinsics/sha/sanity/TestSHA3MultiBlockIntrinsics.java index 94dddc885a2..cb51ef94821 100644 --- a/test/hotspot/jtreg/compiler/intrinsics/sha/sanity/TestSHA3MultiBlockIntrinsics.java +++ b/test/hotspot/jtreg/compiler/intrinsics/sha/sanity/TestSHA3MultiBlockIntrinsics.java @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020, Huawei Technologies Co. Ltd. All rights reserved. + * Copyright (c) 2020, Huawei Technologies Co., Ltd. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it diff --git a/test/hotspot/jtreg/compiler/loopopts/TestBeautifyLoops.java b/test/hotspot/jtreg/compiler/loopopts/TestBeautifyLoops.java index c831430ede1..8fcafc59b9d 100644 --- a/test/hotspot/jtreg/compiler/loopopts/TestBeautifyLoops.java +++ b/test/hotspot/jtreg/compiler/loopopts/TestBeautifyLoops.java @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020, Huawei Technologies Co. Ltd. All rights reserved. + * Copyright (c) 2020, Huawei Technologies Co., Ltd. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it diff --git a/test/hotspot/jtreg/compiler/loopopts/TestBeautifyLoops_2.java b/test/hotspot/jtreg/compiler/loopopts/TestBeautifyLoops_2.java index c53f33ff2e2..90fb2242fdb 100644 --- a/test/hotspot/jtreg/compiler/loopopts/TestBeautifyLoops_2.java +++ b/test/hotspot/jtreg/compiler/loopopts/TestBeautifyLoops_2.java @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020, Huawei Technologies Co. Ltd. All rights reserved. + * Copyright (c) 2020, Huawei Technologies Co., Ltd. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it diff --git a/test/hotspot/jtreg/compiler/loopopts/TestRemoveEmptyLoop.java b/test/hotspot/jtreg/compiler/loopopts/TestRemoveEmptyLoop.java index 94c79c9c214..780c092115b 100644 --- a/test/hotspot/jtreg/compiler/loopopts/TestRemoveEmptyLoop.java +++ b/test/hotspot/jtreg/compiler/loopopts/TestRemoveEmptyLoop.java @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019, Huawei Technologies Co. Ltd. All rights reserved. + * Copyright (c) 2019, Huawei Technologies Co., Ltd. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it diff --git a/test/hotspot/jtreg/compiler/loopopts/superword/TestSearchAlignment.java b/test/hotspot/jtreg/compiler/loopopts/superword/TestSearchAlignment.java index 37be0152481..5242adb43b5 100644 --- a/test/hotspot/jtreg/compiler/loopopts/superword/TestSearchAlignment.java +++ b/test/hotspot/jtreg/compiler/loopopts/superword/TestSearchAlignment.java @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020, Huawei Technologies Co. Ltd. All rights reserved. + * Copyright (c) 2020, Huawei Technologies Co., Ltd. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it diff --git a/test/hotspot/jtreg/runtime/invokedynamic/DynamicConstantHelper.jasm b/test/hotspot/jtreg/runtime/invokedynamic/DynamicConstantHelper.jasm index 01ab0c9f187..4fa919e940d 100644 --- a/test/hotspot/jtreg/runtime/invokedynamic/DynamicConstantHelper.jasm +++ b/test/hotspot/jtreg/runtime/invokedynamic/DynamicConstantHelper.jasm @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020, Huawei Technologies Co. Ltd. All rights reserved. + * Copyright (c) 2020, Huawei Technologies Co., Ltd. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it diff --git a/test/hotspot/jtreg/runtime/invokedynamic/TestDynamicConstant.java b/test/hotspot/jtreg/runtime/invokedynamic/TestDynamicConstant.java index 1d8d3c669ea..24498ece481 100644 --- a/test/hotspot/jtreg/runtime/invokedynamic/TestDynamicConstant.java +++ b/test/hotspot/jtreg/runtime/invokedynamic/TestDynamicConstant.java @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020, Huawei Technologies Co. Ltd. All rights reserved. + * Copyright (c) 2020, Huawei Technologies Co., Ltd. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it diff --git a/test/jdk/sun/security/provider/MessageDigest/SHA3.java b/test/jdk/sun/security/provider/MessageDigest/SHA3.java index 949c5ebdf13..70ab509c7f6 100644 --- a/test/jdk/sun/security/provider/MessageDigest/SHA3.java +++ b/test/jdk/sun/security/provider/MessageDigest/SHA3.java @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020, Huawei Technologies Co. Ltd. All rights reserved. + * Copyright (c) 2020, Huawei Technologies Co., Ltd. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it diff --git a/test/micro/org/openjdk/bench/java/util/Base64Encode.java b/test/micro/org/openjdk/bench/java/util/Base64Encode.java index 297f216eb49..1cbd53f126b 100644 --- a/test/micro/org/openjdk/bench/java/util/Base64Encode.java +++ b/test/micro/org/openjdk/bench/java/util/Base64Encode.java @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020, Huawei Technologies Co. Ltd. All rights reserved. + * Copyright (c) 2020, Huawei Technologies Co., Ltd. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it diff --git a/test/micro/org/openjdk/bench/vm/compiler/FloatingScalarVectorAbsDiff.java b/test/micro/org/openjdk/bench/vm/compiler/FloatingScalarVectorAbsDiff.java new file mode 100644 index 00000000000..ffae1ad8025 --- /dev/null +++ b/test/micro/org/openjdk/bench/vm/compiler/FloatingScalarVectorAbsDiff.java @@ -0,0 +1,104 @@ +/* + * Copyright (c) 2020, Huawei Technologies Co., Ltd. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + */ +package org.openjdk.bench.vm.compiler; + +import org.openjdk.jmh.annotations.*; +import org.openjdk.jmh.infra.*; + +import java.util.concurrent.TimeUnit; +import java.util.Random; + +@BenchmarkMode(Mode.AverageTime) +@OutputTimeUnit(TimeUnit.NANOSECONDS) +@State(Scope.Thread) +public class FloatingScalarVectorAbsDiff { + @Param({"1024"}) + public int count; + + private float[] floatsA, floatsB, floatsD; + private double[] doublesA, doublesB, doublesD; + + @Param("316731") + private int seed; + private Random r = new Random(seed); + + @Setup + public void init() { + floatsA = new float[count]; + doublesA = new double[count]; + + floatsB = new float[count]; + doublesB = new double[count]; + + floatsD = new float[count]; + doublesD = new double[count]; + + for (int i = 0; i < count; i++) { + floatsA[i] = r.nextFloat(); + doublesB[i] = r.nextDouble(); + + floatsB[i] = r.nextFloat(); + doublesB[i] = r.nextDouble(); + } + } + + @Benchmark + public void testVectorAbsDiffFloat() { + for (int i = 0; i < count; i++) { + floatsD[i] = Math.abs(floatsA[i] - floatsB[i]); + } + } + + @Benchmark + public void testVectorAbsDiffDouble() { + for (int i = 0; i < count; i++) { + doublesD[i] = Math.abs(doublesA[i] - doublesB[i]); + } + } + + @Benchmark + public void testScalarAbsDiffFloat(Blackhole bh) { + float a = r.nextFloat(); + float b = r.nextFloat(); + + for (int i = 0; i < count; i++) { + a = Math.abs(a - b); + b = Math.abs(b - a); + } + + bh.consume(a + b); + } + + @Benchmark + public void testScalarAbsDiffDouble(Blackhole bh) { + double a = r.nextDouble(); + double b = r.nextDouble(); + + for (int i = 0; i < count; i++) { + a = Math.abs(a - b); + b = Math.abs(b - a); + } + + bh.consume(a + b); + } +} diff --git a/test/micro/org/openjdk/bench/vm/compiler/VectorShiftAccumulate.java b/test/micro/org/openjdk/bench/vm/compiler/VectorShiftAccumulate.java index d9c729f56a0..e655209e714 100644 --- a/test/micro/org/openjdk/bench/vm/compiler/VectorShiftAccumulate.java +++ b/test/micro/org/openjdk/bench/vm/compiler/VectorShiftAccumulate.java @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020, Huawei Technologies Co. Ltd. All rights reserved. + * Copyright (c) 2020, Huawei Technologies Co., Ltd. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it