From 37c6b23f5b82311c82f5fe981f104824f87e3e54 Mon Sep 17 00:00:00 2001 From: Fei Gao Date: Tue, 15 Aug 2023 01:04:22 +0000 Subject: [PATCH] 8308340: C2: Idealize Fma nodes Reviewed-by: kvn, epeter --- src/hotspot/cpu/aarch64/aarch64.ad | 49 +++-- src/hotspot/cpu/aarch64/aarch64_vector.ad | 81 +++----- src/hotspot/cpu/aarch64/aarch64_vector_ad.m4 | 81 +++----- src/hotspot/cpu/ppc/ppc.ad | 49 +++-- src/hotspot/cpu/riscv/riscv.ad | 37 ++-- src/hotspot/cpu/riscv/riscv_v.ad | 56 +++--- src/hotspot/cpu/s390/s390.ad | 15 ++ src/hotspot/cpu/x86/x86.ad | 8 +- src/hotspot/share/opto/c2compiler.cpp | 4 +- src/hotspot/share/opto/mulnode.cpp | 14 ++ src/hotspot/share/opto/mulnode.hpp | 22 ++- src/hotspot/share/opto/node.hpp | 8 +- src/hotspot/share/opto/subnode.hpp | 4 +- src/hotspot/share/opto/vectornode.cpp | 15 ++ src/hotspot/share/opto/vectornode.hpp | 28 ++- .../jtreg/compiler/c2/irTests/TestIRFma.java | 185 ++++++++++++++++++ .../compiler/lib/ir_framework/IRNode.java | 35 ++++ .../VectorFusedMultiplyAddSubTest.java | 82 +++++++- .../runner/BasicDoubleOpTest.java | 10 +- .../runner/BasicFloatOpTest.java | 10 +- 20 files changed, 572 insertions(+), 221 deletions(-) create mode 100644 test/hotspot/jtreg/compiler/c2/irTests/TestIRFma.java diff --git a/src/hotspot/cpu/aarch64/aarch64.ad b/src/hotspot/cpu/aarch64/aarch64.ad index 6990e7c60b9..7ce4ed9584d 100644 --- a/src/hotspot/cpu/aarch64/aarch64.ad +++ b/src/hotspot/cpu/aarch64/aarch64.ad @@ -2289,7 +2289,6 @@ bool Matcher::match_rule_supported(int opcode) { if (!has_match_rule(opcode)) return false; - bool ret_value = true; switch (opcode) { case Op_OnSpinWait: return VM_Version::supports_on_spin_wait(); @@ -2297,18 +2296,26 @@ bool Matcher::match_rule_supported(int opcode) { case Op_CacheWBPreSync: case Op_CacheWBPostSync: if (!VM_Version::supports_data_cache_line_flush()) { - ret_value = false; + return false; } break; case Op_ExpandBits: case Op_CompressBits: if (!VM_Version::supports_svebitperm()) { - ret_value = false; + return false; + } + break; + case Op_FmaF: + case Op_FmaD: + case Op_FmaVF: + case Op_FmaVD: + if (!UseFMA) { + return false; } break; } - return ret_value; // Per default match rules are supported. + return true; // Per default match rules are supported. } const RegMask* Matcher::predicate_reg_mask(void) { @@ -14305,12 +14312,12 @@ instruct mulD_reg_reg(vRegD dst, vRegD src1, vRegD src2) %{ // src1 * src2 + src3 instruct maddF_reg_reg(vRegF dst, vRegF src1, vRegF src2, vRegF src3) %{ - predicate(UseFMA); match(Set dst (FmaF src3 (Binary src1 src2))); format %{ "fmadds $dst, $src1, $src2, $src3" %} ins_encode %{ + assert(UseFMA, "Needs FMA instructions support."); __ fmadds(as_FloatRegister($dst$$reg), as_FloatRegister($src1$$reg), as_FloatRegister($src2$$reg), @@ -14322,12 +14329,12 @@ instruct maddF_reg_reg(vRegF dst, vRegF src1, vRegF src2, vRegF src3) %{ // src1 * src2 + src3 instruct maddD_reg_reg(vRegD dst, vRegD src1, vRegD src2, vRegD src3) %{ - predicate(UseFMA); match(Set dst (FmaD src3 (Binary src1 src2))); format %{ "fmaddd $dst, $src1, $src2, $src3" %} ins_encode %{ + assert(UseFMA, "Needs FMA instructions support."); __ fmaddd(as_FloatRegister($dst$$reg), as_FloatRegister($src1$$reg), as_FloatRegister($src2$$reg), @@ -14337,15 +14344,15 @@ instruct maddD_reg_reg(vRegD dst, vRegD src1, vRegD src2, vRegD src3) %{ ins_pipe(pipe_class_default); %} -// -src1 * src2 + src3 +// src1 * (-src2) + src3 +// "(-src1) * src2 + src3" has been idealized to "src2 * (-src1) + src3" instruct msubF_reg_reg(vRegF dst, vRegF src1, vRegF src2, vRegF src3) %{ - predicate(UseFMA); - match(Set dst (FmaF src3 (Binary (NegF src1) src2))); match(Set dst (FmaF src3 (Binary src1 (NegF src2)))); format %{ "fmsubs $dst, $src1, $src2, $src3" %} ins_encode %{ + assert(UseFMA, "Needs FMA instructions support."); __ fmsubs(as_FloatRegister($dst$$reg), as_FloatRegister($src1$$reg), as_FloatRegister($src2$$reg), @@ -14355,15 +14362,15 @@ instruct msubF_reg_reg(vRegF dst, vRegF src1, vRegF src2, vRegF src3) %{ ins_pipe(pipe_class_default); %} -// -src1 * src2 + src3 +// src1 * (-src2) + src3 +// "(-src1) * src2 + src3" has been idealized to "src2 * (-src1) + src3" instruct msubD_reg_reg(vRegD dst, vRegD src1, vRegD src2, vRegD src3) %{ - predicate(UseFMA); - match(Set dst (FmaD src3 (Binary (NegD src1) src2))); match(Set dst (FmaD src3 (Binary src1 (NegD src2)))); format %{ "fmsubd $dst, $src1, $src2, $src3" %} ins_encode %{ + assert(UseFMA, "Needs FMA instructions support."); __ fmsubd(as_FloatRegister($dst$$reg), as_FloatRegister($src1$$reg), as_FloatRegister($src2$$reg), @@ -14373,15 +14380,15 @@ instruct msubD_reg_reg(vRegD dst, vRegD src1, vRegD src2, vRegD src3) %{ ins_pipe(pipe_class_default); %} -// -src1 * src2 - src3 +// src1 * (-src2) - src3 +// "(-src1) * src2 - src3" has been idealized to "src2 * (-src1) - src3" instruct mnaddF_reg_reg(vRegF dst, vRegF src1, vRegF src2, vRegF src3) %{ - predicate(UseFMA); - match(Set dst (FmaF (NegF src3) (Binary (NegF src1) src2))); match(Set dst (FmaF (NegF src3) (Binary src1 (NegF src2)))); format %{ "fnmadds $dst, $src1, $src2, $src3" %} ins_encode %{ + assert(UseFMA, "Needs FMA instructions support."); __ fnmadds(as_FloatRegister($dst$$reg), as_FloatRegister($src1$$reg), as_FloatRegister($src2$$reg), @@ -14391,15 +14398,15 @@ instruct mnaddF_reg_reg(vRegF dst, vRegF src1, vRegF src2, vRegF src3) %{ ins_pipe(pipe_class_default); %} -// -src1 * src2 - src3 +// src1 * (-src2) - src3 +// "(-src1) * src2 - src3" has been idealized to "src2 * (-src1) - src3" instruct mnaddD_reg_reg(vRegD dst, vRegD src1, vRegD src2, vRegD src3) %{ - predicate(UseFMA); - match(Set dst (FmaD (NegD src3) (Binary (NegD src1) src2))); match(Set dst (FmaD (NegD src3) (Binary src1 (NegD src2)))); format %{ "fnmaddd $dst, $src1, $src2, $src3" %} ins_encode %{ + assert(UseFMA, "Needs FMA instructions support."); __ fnmaddd(as_FloatRegister($dst$$reg), as_FloatRegister($src1$$reg), as_FloatRegister($src2$$reg), @@ -14411,12 +14418,12 @@ instruct mnaddD_reg_reg(vRegD dst, vRegD src1, vRegD src2, vRegD src3) %{ // src1 * src2 - src3 instruct mnsubF_reg_reg(vRegF dst, vRegF src1, vRegF src2, vRegF src3, immF0 zero) %{ - predicate(UseFMA); match(Set dst (FmaF (NegF src3) (Binary src1 src2))); format %{ "fnmsubs $dst, $src1, $src2, $src3" %} ins_encode %{ + assert(UseFMA, "Needs FMA instructions support."); __ fnmsubs(as_FloatRegister($dst$$reg), as_FloatRegister($src1$$reg), as_FloatRegister($src2$$reg), @@ -14428,13 +14435,13 @@ instruct mnsubF_reg_reg(vRegF dst, vRegF src1, vRegF src2, vRegF src3, immF0 zer // src1 * src2 - src3 instruct mnsubD_reg_reg(vRegD dst, vRegD src1, vRegD src2, vRegD src3, immD0 zero) %{ - predicate(UseFMA); match(Set dst (FmaD (NegD src3) (Binary src1 src2))); format %{ "fnmsubd $dst, $src1, $src2, $src3" %} ins_encode %{ - // n.b. insn name should be fnmsubd + assert(UseFMA, "Needs FMA instructions support."); + // n.b. insn name should be fnmsubd __ fnmsub(as_FloatRegister($dst$$reg), as_FloatRegister($src1$$reg), as_FloatRegister($src2$$reg), diff --git a/src/hotspot/cpu/aarch64/aarch64_vector.ad b/src/hotspot/cpu/aarch64/aarch64_vector.ad index bd669e99435..a0df8fc3c97 100644 --- a/src/hotspot/cpu/aarch64/aarch64_vector.ad +++ b/src/hotspot/cpu/aarch64/aarch64_vector.ad @@ -2131,14 +2131,14 @@ instruct vmla_masked(vReg dst_src1, vReg src2, vReg src3, pRegGov pg) %{ %} // vector fmla -// dst_src1 = dst_src1 + src2 * src3 +// dst_src1 = src2 * src3 + dst_src1 instruct vfmla(vReg dst_src1, vReg src2, vReg src3) %{ - predicate(UseFMA); match(Set dst_src1 (FmaVF dst_src1 (Binary src2 src3))); match(Set dst_src1 (FmaVD dst_src1 (Binary src2 src3))); format %{ "vfmla $dst_src1, $src2, $src3" %} ins_encode %{ + assert(UseFMA, "Needs FMA instructions support."); uint length_in_bytes = Matcher::vector_length_in_bytes(this); if (VM_Version::use_neon_for_vector(length_in_bytes)) { __ fmla($dst_src1$$FloatRegister, get_arrangement(this), @@ -2157,11 +2157,12 @@ instruct vfmla(vReg dst_src1, vReg src2, vReg src3) %{ // dst_src1 = dst_src1 * src2 + src3 instruct vfmad_masked(vReg dst_src1, vReg src2, vReg src3, pRegGov pg) %{ - predicate(UseFMA && UseSVE > 0); + predicate(UseSVE > 0); match(Set dst_src1 (FmaVF (Binary dst_src1 src2) (Binary src3 pg))); match(Set dst_src1 (FmaVD (Binary dst_src1 src2) (Binary src3 pg))); format %{ "vfmad_masked $dst_src1, $pg, $src2, $src3" %} ins_encode %{ + assert(UseFMA, "Needs FMA instructions support."); BasicType bt = Matcher::vector_element_basic_type(this); __ sve_fmad($dst_src1$$FloatRegister, __ elemType_to_regVariant(bt), $pg$$PRegister, $src2$$FloatRegister, $src3$$FloatRegister); @@ -2221,34 +2222,14 @@ instruct vmls_masked(vReg dst_src1, vReg src2, vReg src3, pRegGov pg) %{ // vector fmls -// dst_src1 = dst_src1 + -src2 * src3 -instruct vfmls1(vReg dst_src1, vReg src2, vReg src3) %{ - predicate(UseFMA); - match(Set dst_src1 (FmaVF dst_src1 (Binary (NegVF src2) src3))); - match(Set dst_src1 (FmaVD dst_src1 (Binary (NegVD src2) src3))); - format %{ "vfmls1 $dst_src1, $src2, $src3" %} - ins_encode %{ - uint length_in_bytes = Matcher::vector_length_in_bytes(this); - if (VM_Version::use_neon_for_vector(length_in_bytes)) { - __ fmls($dst_src1$$FloatRegister, get_arrangement(this), - $src2$$FloatRegister, $src3$$FloatRegister); - } else { - assert(UseSVE > 0, "must be sve"); - BasicType bt = Matcher::vector_element_basic_type(this); - __ sve_fmls($dst_src1$$FloatRegister, __ elemType_to_regVariant(bt), - ptrue, $src2$$FloatRegister, $src3$$FloatRegister); - } - %} - ins_pipe(pipe_slow); -%} - -// dst_src1 = dst_src1 + src2 * -src3 -instruct vfmls2(vReg dst_src1, vReg src2, vReg src3) %{ - predicate(UseFMA); +// dst_src1 = src2 * (-src3) + dst_src1 +// "(-src2) * src3 + dst_src1" has been idealized to "src3 * (-src2) + dst_src1" +instruct vfmls(vReg dst_src1, vReg src2, vReg src3) %{ match(Set dst_src1 (FmaVF dst_src1 (Binary src2 (NegVF src3)))); match(Set dst_src1 (FmaVD dst_src1 (Binary src2 (NegVD src3)))); - format %{ "vfmls2 $dst_src1, $src2, $src3" %} + format %{ "vfmls $dst_src1, $src2, $src3" %} ins_encode %{ + assert(UseFMA, "Needs FMA instructions support."); uint length_in_bytes = Matcher::vector_length_in_bytes(this); if (VM_Version::use_neon_for_vector(length_in_bytes)) { __ fmls($dst_src1$$FloatRegister, get_arrangement(this), @@ -2265,13 +2246,14 @@ instruct vfmls2(vReg dst_src1, vReg src2, vReg src3) %{ // vector fmsb - predicated -// dst_src1 = dst_src1 * -src2 + src3 +// dst_src1 = dst_src1 * (-src2) + src3 instruct vfmsb_masked(vReg dst_src1, vReg src2, vReg src3, pRegGov pg) %{ - predicate(UseFMA && UseSVE > 0); + predicate(UseSVE > 0); match(Set dst_src1 (FmaVF (Binary dst_src1 (NegVF src2)) (Binary src3 pg))); match(Set dst_src1 (FmaVD (Binary dst_src1 (NegVD src2)) (Binary src3 pg))); format %{ "vfmsb_masked $dst_src1, $pg, $src2, $src3" %} ins_encode %{ + assert(UseFMA, "Needs FMA instructions support."); BasicType bt = Matcher::vector_element_basic_type(this); __ sve_fmsb($dst_src1$$FloatRegister, __ elemType_to_regVariant(bt), $pg$$PRegister, $src2$$FloatRegister, $src3$$FloatRegister); @@ -2281,27 +2263,15 @@ instruct vfmsb_masked(vReg dst_src1, vReg src2, vReg src3, pRegGov pg) %{ // vector fnmla (sve) -// dst_src1 = -dst_src1 + -src2 * src3 -instruct vfnmla1(vReg dst_src1, vReg src2, vReg src3) %{ - predicate(UseFMA && UseSVE > 0); - match(Set dst_src1 (FmaVF (NegVF dst_src1) (Binary (NegVF src2) src3))); - match(Set dst_src1 (FmaVD (NegVD dst_src1) (Binary (NegVD src2) src3))); - format %{ "vfnmla1 $dst_src1, $src2, $src3" %} - ins_encode %{ - BasicType bt = Matcher::vector_element_basic_type(this); - __ sve_fnmla($dst_src1$$FloatRegister, __ elemType_to_regVariant(bt), - ptrue, $src2$$FloatRegister, $src3$$FloatRegister); - %} - ins_pipe(pipe_slow); -%} - -// dst_src1 = -dst_src1 + src2 * -src3 -instruct vfnmla2(vReg dst_src1, vReg src2, vReg src3) %{ - predicate(UseFMA && UseSVE > 0); +// dst_src1 = src2 * (-src3) - dst_src1 +// "(-src2) * src3 - dst_src1" has been idealized to "src3 * (-src2) - dst_src1" +instruct vfnmla(vReg dst_src1, vReg src2, vReg src3) %{ + predicate(UseSVE > 0); match(Set dst_src1 (FmaVF (NegVF dst_src1) (Binary src2 (NegVF src3)))); match(Set dst_src1 (FmaVD (NegVD dst_src1) (Binary src2 (NegVD src3)))); - format %{ "vfnmla2 $dst_src1, $src2, $src3" %} + format %{ "vfnmla $dst_src1, $src2, $src3" %} ins_encode %{ + assert(UseFMA, "Needs FMA instructions support."); BasicType bt = Matcher::vector_element_basic_type(this); __ sve_fnmla($dst_src1$$FloatRegister, __ elemType_to_regVariant(bt), ptrue, $src2$$FloatRegister, $src3$$FloatRegister); @@ -2311,13 +2281,14 @@ instruct vfnmla2(vReg dst_src1, vReg src2, vReg src3) %{ // vector fnmad - predicated -// dst_src1 = -src3 + dst_src1 * -src2 +// dst_src1 = dst_src1 * (-src2) - src3 instruct vfnmad_masked(vReg dst_src1, vReg src2, vReg src3, pRegGov pg) %{ - predicate(UseFMA && UseSVE > 0); + predicate(UseSVE > 0); match(Set dst_src1 (FmaVF (Binary dst_src1 (NegVF src2)) (Binary (NegVF src3) pg))); match(Set dst_src1 (FmaVD (Binary dst_src1 (NegVD src2)) (Binary (NegVD src3) pg))); format %{ "vfnmad_masked $dst_src1, $pg, $src2, $src3" %} ins_encode %{ + assert(UseFMA, "Needs FMA instructions support."); BasicType bt = Matcher::vector_element_basic_type(this); __ sve_fnmad($dst_src1$$FloatRegister, __ elemType_to_regVariant(bt), $pg$$PRegister, $src2$$FloatRegister, $src3$$FloatRegister); @@ -2327,13 +2298,14 @@ instruct vfnmad_masked(vReg dst_src1, vReg src2, vReg src3, pRegGov pg) %{ // vector fnmls (sve) -// dst_src1 = -dst_src1 + src2 * src3 +// dst_src1 = src2 * src3 - dst_src1 instruct vfnmls(vReg dst_src1, vReg src2, vReg src3) %{ - predicate(UseFMA && UseSVE > 0); + predicate(UseSVE > 0); match(Set dst_src1 (FmaVF (NegVF dst_src1) (Binary src2 src3))); match(Set dst_src1 (FmaVD (NegVD dst_src1) (Binary src2 src3))); format %{ "vfnmls $dst_src1, $src2, $src3" %} ins_encode %{ + assert(UseFMA, "Needs FMA instructions support."); BasicType bt = Matcher::vector_element_basic_type(this); __ sve_fnmls($dst_src1$$FloatRegister, __ elemType_to_regVariant(bt), ptrue, $src2$$FloatRegister, $src3$$FloatRegister); @@ -2343,13 +2315,14 @@ instruct vfnmls(vReg dst_src1, vReg src2, vReg src3) %{ // vector fnmsb - predicated -// dst_src1 = -src3 + dst_src1 * src2 +// dst_src1 = dst_src1 * src2 - src3 instruct vfnmsb_masked(vReg dst_src1, vReg src2, vReg src3, pRegGov pg) %{ - predicate(UseFMA && UseSVE > 0); + predicate(UseSVE > 0); match(Set dst_src1 (FmaVF (Binary dst_src1 src2) (Binary (NegVF src3) pg))); match(Set dst_src1 (FmaVD (Binary dst_src1 src2) (Binary (NegVD src3) pg))); format %{ "vfnmsb_masked $dst_src1, $pg, $src2, $src3" %} ins_encode %{ + assert(UseFMA, "Needs FMA instructions support."); BasicType bt = Matcher::vector_element_basic_type(this); __ sve_fnmsb($dst_src1$$FloatRegister, __ elemType_to_regVariant(bt), $pg$$PRegister, $src2$$FloatRegister, $src3$$FloatRegister); diff --git a/src/hotspot/cpu/aarch64/aarch64_vector_ad.m4 b/src/hotspot/cpu/aarch64/aarch64_vector_ad.m4 index 4a2c5947d1a..cb22e96dc8f 100644 --- a/src/hotspot/cpu/aarch64/aarch64_vector_ad.m4 +++ b/src/hotspot/cpu/aarch64/aarch64_vector_ad.m4 @@ -1173,14 +1173,14 @@ instruct vmla_masked(vReg dst_src1, vReg src2, vReg src3, pRegGov pg) %{ %} // vector fmla -// dst_src1 = dst_src1 + src2 * src3 +// dst_src1 = src2 * src3 + dst_src1 instruct vfmla(vReg dst_src1, vReg src2, vReg src3) %{ - predicate(UseFMA); match(Set dst_src1 (FmaVF dst_src1 (Binary src2 src3))); match(Set dst_src1 (FmaVD dst_src1 (Binary src2 src3))); format %{ "vfmla $dst_src1, $src2, $src3" %} ins_encode %{ + assert(UseFMA, "Needs FMA instructions support."); uint length_in_bytes = Matcher::vector_length_in_bytes(this); if (VM_Version::use_neon_for_vector(length_in_bytes)) { __ fmla($dst_src1$$FloatRegister, get_arrangement(this), @@ -1199,11 +1199,12 @@ instruct vfmla(vReg dst_src1, vReg src2, vReg src3) %{ // dst_src1 = dst_src1 * src2 + src3 instruct vfmad_masked(vReg dst_src1, vReg src2, vReg src3, pRegGov pg) %{ - predicate(UseFMA && UseSVE > 0); + predicate(UseSVE > 0); match(Set dst_src1 (FmaVF (Binary dst_src1 src2) (Binary src3 pg))); match(Set dst_src1 (FmaVD (Binary dst_src1 src2) (Binary src3 pg))); format %{ "vfmad_masked $dst_src1, $pg, $src2, $src3" %} ins_encode %{ + assert(UseFMA, "Needs FMA instructions support."); BasicType bt = Matcher::vector_element_basic_type(this); __ sve_fmad($dst_src1$$FloatRegister, __ elemType_to_regVariant(bt), $pg$$PRegister, $src2$$FloatRegister, $src3$$FloatRegister); @@ -1263,34 +1264,14 @@ instruct vmls_masked(vReg dst_src1, vReg src2, vReg src3, pRegGov pg) %{ // vector fmls -// dst_src1 = dst_src1 + -src2 * src3 -instruct vfmls1(vReg dst_src1, vReg src2, vReg src3) %{ - predicate(UseFMA); - match(Set dst_src1 (FmaVF dst_src1 (Binary (NegVF src2) src3))); - match(Set dst_src1 (FmaVD dst_src1 (Binary (NegVD src2) src3))); - format %{ "vfmls1 $dst_src1, $src2, $src3" %} - ins_encode %{ - uint length_in_bytes = Matcher::vector_length_in_bytes(this); - if (VM_Version::use_neon_for_vector(length_in_bytes)) { - __ fmls($dst_src1$$FloatRegister, get_arrangement(this), - $src2$$FloatRegister, $src3$$FloatRegister); - } else { - assert(UseSVE > 0, "must be sve"); - BasicType bt = Matcher::vector_element_basic_type(this); - __ sve_fmls($dst_src1$$FloatRegister, __ elemType_to_regVariant(bt), - ptrue, $src2$$FloatRegister, $src3$$FloatRegister); - } - %} - ins_pipe(pipe_slow); -%} - -// dst_src1 = dst_src1 + src2 * -src3 -instruct vfmls2(vReg dst_src1, vReg src2, vReg src3) %{ - predicate(UseFMA); +// dst_src1 = src2 * (-src3) + dst_src1 +// "(-src2) * src3 + dst_src1" has been idealized to "src3 * (-src2) + dst_src1" +instruct vfmls(vReg dst_src1, vReg src2, vReg src3) %{ match(Set dst_src1 (FmaVF dst_src1 (Binary src2 (NegVF src3)))); match(Set dst_src1 (FmaVD dst_src1 (Binary src2 (NegVD src3)))); - format %{ "vfmls2 $dst_src1, $src2, $src3" %} + format %{ "vfmls $dst_src1, $src2, $src3" %} ins_encode %{ + assert(UseFMA, "Needs FMA instructions support."); uint length_in_bytes = Matcher::vector_length_in_bytes(this); if (VM_Version::use_neon_for_vector(length_in_bytes)) { __ fmls($dst_src1$$FloatRegister, get_arrangement(this), @@ -1307,13 +1288,14 @@ instruct vfmls2(vReg dst_src1, vReg src2, vReg src3) %{ // vector fmsb - predicated -// dst_src1 = dst_src1 * -src2 + src3 +// dst_src1 = dst_src1 * (-src2) + src3 instruct vfmsb_masked(vReg dst_src1, vReg src2, vReg src3, pRegGov pg) %{ - predicate(UseFMA && UseSVE > 0); + predicate(UseSVE > 0); match(Set dst_src1 (FmaVF (Binary dst_src1 (NegVF src2)) (Binary src3 pg))); match(Set dst_src1 (FmaVD (Binary dst_src1 (NegVD src2)) (Binary src3 pg))); format %{ "vfmsb_masked $dst_src1, $pg, $src2, $src3" %} ins_encode %{ + assert(UseFMA, "Needs FMA instructions support."); BasicType bt = Matcher::vector_element_basic_type(this); __ sve_fmsb($dst_src1$$FloatRegister, __ elemType_to_regVariant(bt), $pg$$PRegister, $src2$$FloatRegister, $src3$$FloatRegister); @@ -1323,27 +1305,15 @@ instruct vfmsb_masked(vReg dst_src1, vReg src2, vReg src3, pRegGov pg) %{ // vector fnmla (sve) -// dst_src1 = -dst_src1 + -src2 * src3 -instruct vfnmla1(vReg dst_src1, vReg src2, vReg src3) %{ - predicate(UseFMA && UseSVE > 0); - match(Set dst_src1 (FmaVF (NegVF dst_src1) (Binary (NegVF src2) src3))); - match(Set dst_src1 (FmaVD (NegVD dst_src1) (Binary (NegVD src2) src3))); - format %{ "vfnmla1 $dst_src1, $src2, $src3" %} - ins_encode %{ - BasicType bt = Matcher::vector_element_basic_type(this); - __ sve_fnmla($dst_src1$$FloatRegister, __ elemType_to_regVariant(bt), - ptrue, $src2$$FloatRegister, $src3$$FloatRegister); - %} - ins_pipe(pipe_slow); -%} - -// dst_src1 = -dst_src1 + src2 * -src3 -instruct vfnmla2(vReg dst_src1, vReg src2, vReg src3) %{ - predicate(UseFMA && UseSVE > 0); +// dst_src1 = src2 * (-src3) - dst_src1 +// "(-src2) * src3 - dst_src1" has been idealized to "src3 * (-src2) - dst_src1" +instruct vfnmla(vReg dst_src1, vReg src2, vReg src3) %{ + predicate(UseSVE > 0); match(Set dst_src1 (FmaVF (NegVF dst_src1) (Binary src2 (NegVF src3)))); match(Set dst_src1 (FmaVD (NegVD dst_src1) (Binary src2 (NegVD src3)))); - format %{ "vfnmla2 $dst_src1, $src2, $src3" %} + format %{ "vfnmla $dst_src1, $src2, $src3" %} ins_encode %{ + assert(UseFMA, "Needs FMA instructions support."); BasicType bt = Matcher::vector_element_basic_type(this); __ sve_fnmla($dst_src1$$FloatRegister, __ elemType_to_regVariant(bt), ptrue, $src2$$FloatRegister, $src3$$FloatRegister); @@ -1353,13 +1323,14 @@ instruct vfnmla2(vReg dst_src1, vReg src2, vReg src3) %{ // vector fnmad - predicated -// dst_src1 = -src3 + dst_src1 * -src2 +// dst_src1 = dst_src1 * (-src2) - src3 instruct vfnmad_masked(vReg dst_src1, vReg src2, vReg src3, pRegGov pg) %{ - predicate(UseFMA && UseSVE > 0); + predicate(UseSVE > 0); match(Set dst_src1 (FmaVF (Binary dst_src1 (NegVF src2)) (Binary (NegVF src3) pg))); match(Set dst_src1 (FmaVD (Binary dst_src1 (NegVD src2)) (Binary (NegVD src3) pg))); format %{ "vfnmad_masked $dst_src1, $pg, $src2, $src3" %} ins_encode %{ + assert(UseFMA, "Needs FMA instructions support."); BasicType bt = Matcher::vector_element_basic_type(this); __ sve_fnmad($dst_src1$$FloatRegister, __ elemType_to_regVariant(bt), $pg$$PRegister, $src2$$FloatRegister, $src3$$FloatRegister); @@ -1369,13 +1340,14 @@ instruct vfnmad_masked(vReg dst_src1, vReg src2, vReg src3, pRegGov pg) %{ // vector fnmls (sve) -// dst_src1 = -dst_src1 + src2 * src3 +// dst_src1 = src2 * src3 - dst_src1 instruct vfnmls(vReg dst_src1, vReg src2, vReg src3) %{ - predicate(UseFMA && UseSVE > 0); + predicate(UseSVE > 0); match(Set dst_src1 (FmaVF (NegVF dst_src1) (Binary src2 src3))); match(Set dst_src1 (FmaVD (NegVD dst_src1) (Binary src2 src3))); format %{ "vfnmls $dst_src1, $src2, $src3" %} ins_encode %{ + assert(UseFMA, "Needs FMA instructions support."); BasicType bt = Matcher::vector_element_basic_type(this); __ sve_fnmls($dst_src1$$FloatRegister, __ elemType_to_regVariant(bt), ptrue, $src2$$FloatRegister, $src3$$FloatRegister); @@ -1385,13 +1357,14 @@ instruct vfnmls(vReg dst_src1, vReg src2, vReg src3) %{ // vector fnmsb - predicated -// dst_src1 = -src3 + dst_src1 * src2 +// dst_src1 = dst_src1 * src2 - src3 instruct vfnmsb_masked(vReg dst_src1, vReg src2, vReg src3, pRegGov pg) %{ - predicate(UseFMA && UseSVE > 0); + predicate(UseSVE > 0); match(Set dst_src1 (FmaVF (Binary dst_src1 src2) (Binary (NegVF src3) pg))); match(Set dst_src1 (FmaVD (Binary dst_src1 src2) (Binary (NegVD src3) pg))); format %{ "vfnmsb_masked $dst_src1, $pg, $src2, $src3" %} ins_encode %{ + assert(UseFMA, "Needs FMA instructions support."); BasicType bt = Matcher::vector_element_basic_type(this); __ sve_fnmsb($dst_src1$$FloatRegister, __ elemType_to_regVariant(bt), $pg$$PRegister, $src2$$FloatRegister, $src3$$FloatRegister); diff --git a/src/hotspot/cpu/ppc/ppc.ad b/src/hotspot/cpu/ppc/ppc.ad index c8f8bf35f22..89ce51e997e 100644 --- a/src/hotspot/cpu/ppc/ppc.ad +++ b/src/hotspot/cpu/ppc/ppc.ad @@ -2148,6 +2148,9 @@ bool Matcher::match_rule_supported(int opcode) { return SuperwordUseVSX; case Op_PopCountVI: return (SuperwordUseVSX && UsePopCountInstruction); + case Op_FmaF: + case Op_FmaD: + return UseFMA; case Op_FmaVF: case Op_FmaVD: return (SuperwordUseVSX && UseFMA); @@ -9652,6 +9655,7 @@ instruct maddF_reg_reg(regF dst, regF src1, regF src2, regF src3) %{ format %{ "FMADDS $dst, $src1, $src2, $src3" %} size(4); ins_encode %{ + assert(UseFMA, "Needs FMA instructions support."); __ fmadds($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister, $src3$$FloatRegister); %} ins_pipe(pipe_class_default); @@ -9664,58 +9668,63 @@ instruct maddD_reg_reg(regD dst, regD src1, regD src2, regD src3) %{ format %{ "FMADD $dst, $src1, $src2, $src3" %} size(4); ins_encode %{ + assert(UseFMA, "Needs FMA instructions support."); __ fmadd($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister, $src3$$FloatRegister); %} ins_pipe(pipe_class_default); %} -// -src1 * src2 + src3 = -(src1*src2-src3) +// src1 * (-src2) + src3 = -(src1*src2-src3) +// "(-src1) * src2 + src3" has been idealized to "src2 * (-src1) + src3" instruct mnsubF_reg_reg(regF dst, regF src1, regF src2, regF src3) %{ - match(Set dst (FmaF src3 (Binary (NegF src1) src2))); match(Set dst (FmaF src3 (Binary src1 (NegF src2)))); format %{ "FNMSUBS $dst, $src1, $src2, $src3" %} size(4); ins_encode %{ + assert(UseFMA, "Needs FMA instructions support."); __ fnmsubs($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister, $src3$$FloatRegister); %} ins_pipe(pipe_class_default); %} -// -src1 * src2 + src3 = -(src1*src2-src3) +// src1 * (-src2) + src3 = -(src1*src2-src3) +// "(-src1) * src2 + src3" has been idealized to "src2 * (-src1) + src3" instruct mnsubD_reg_reg(regD dst, regD src1, regD src2, regD src3) %{ - match(Set dst (FmaD src3 (Binary (NegD src1) src2))); match(Set dst (FmaD src3 (Binary src1 (NegD src2)))); format %{ "FNMSUB $dst, $src1, $src2, $src3" %} size(4); ins_encode %{ + assert(UseFMA, "Needs FMA instructions support."); __ fnmsub($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister, $src3$$FloatRegister); %} ins_pipe(pipe_class_default); %} -// -src1 * src2 - src3 = -(src1*src2+src3) +// src1 * (-src2) - src3 = -(src1*src2+src3) +// "(-src1) * src2 - src3" has been idealized to "src2 * (-src1) - src3" instruct mnaddF_reg_reg(regF dst, regF src1, regF src2, regF src3) %{ - match(Set dst (FmaF (NegF src3) (Binary (NegF src1) src2))); match(Set dst (FmaF (NegF src3) (Binary src1 (NegF src2)))); format %{ "FNMADDS $dst, $src1, $src2, $src3" %} size(4); ins_encode %{ + assert(UseFMA, "Needs FMA instructions support."); __ fnmadds($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister, $src3$$FloatRegister); %} ins_pipe(pipe_class_default); %} -// -src1 * src2 - src3 = -(src1*src2+src3) +// src1 * (-src2) - src3 = -(src1*src2+src3) +// "(-src1) * src2 - src3" has been idealized to "src2 * (-src1) - src3" instruct mnaddD_reg_reg(regD dst, regD src1, regD src2, regD src3) %{ - match(Set dst (FmaD (NegD src3) (Binary (NegD src1) src2))); match(Set dst (FmaD (NegD src3) (Binary src1 (NegD src2)))); format %{ "FNMADD $dst, $src1, $src2, $src3" %} size(4); ins_encode %{ + assert(UseFMA, "Needs FMA instructions support."); __ fnmadd($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister, $src3$$FloatRegister); %} ins_pipe(pipe_class_default); @@ -9728,6 +9737,7 @@ instruct msubF_reg_reg(regF dst, regF src1, regF src2, regF src3) %{ format %{ "FMSUBS $dst, $src1, $src2, $src3" %} size(4); ins_encode %{ + assert(UseFMA, "Needs FMA instructions support."); __ fmsubs($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister, $src3$$FloatRegister); %} ins_pipe(pipe_class_default); @@ -9740,6 +9750,7 @@ instruct msubD_reg_reg(regD dst, regD src1, regD src2, regD src3) %{ format %{ "FMSUB $dst, $src1, $src2, $src3" %} size(4); ins_encode %{ + assert(UseFMA, "Needs FMA instructions support."); __ fmsub($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister, $src3$$FloatRegister); %} ins_pipe(pipe_class_default); @@ -14057,7 +14068,7 @@ instruct vpopcnt_reg(vecX dst, vecX src) %{ %} // --------------------------------- FMA -------------------------------------- -// dst + src1 * src2 +// src1 * src2 + dst instruct vfma4F(vecX dst, vecX src1, vecX src2) %{ match(Set dst (FmaVF dst (Binary src1 src2))); predicate(n->as_Vector()->length() == 4); @@ -14066,14 +14077,15 @@ instruct vfma4F(vecX dst, vecX src1, vecX src2) %{ size(4); ins_encode %{ + assert(UseFMA, "Needs FMA instructions support."); __ xvmaddasp($dst$$VectorSRegister, $src1$$VectorSRegister, $src2$$VectorSRegister); %} ins_pipe(pipe_class_default); %} -// dst - src1 * src2 +// src1 * (-src2) + dst +// "(-src1) * src2 + dst" has been idealized to "src2 * (-src1) + dst" instruct vfma4F_neg1(vecX dst, vecX src1, vecX src2) %{ - match(Set dst (FmaVF dst (Binary (NegVF src1) src2))); match(Set dst (FmaVF dst (Binary src1 (NegVF src2)))); predicate(n->as_Vector()->length() == 4); @@ -14081,12 +14093,13 @@ instruct vfma4F_neg1(vecX dst, vecX src1, vecX src2) %{ size(4); ins_encode %{ + assert(UseFMA, "Needs FMA instructions support."); __ xvnmsubasp($dst$$VectorSRegister, $src1$$VectorSRegister, $src2$$VectorSRegister); %} ins_pipe(pipe_class_default); %} -// - dst + src1 * src2 +// src1 * src2 - dst instruct vfma4F_neg2(vecX dst, vecX src1, vecX src2) %{ match(Set dst (FmaVF (NegVF dst) (Binary src1 src2))); predicate(n->as_Vector()->length() == 4); @@ -14095,12 +14108,13 @@ instruct vfma4F_neg2(vecX dst, vecX src1, vecX src2) %{ size(4); ins_encode %{ + assert(UseFMA, "Needs FMA instructions support."); __ xvmsubasp($dst$$VectorSRegister, $src1$$VectorSRegister, $src2$$VectorSRegister); %} ins_pipe(pipe_class_default); %} -// dst + src1 * src2 +// src1 * src2 + dst instruct vfma2D(vecX dst, vecX src1, vecX src2) %{ match(Set dst (FmaVD dst (Binary src1 src2))); predicate(n->as_Vector()->length() == 2); @@ -14109,14 +14123,15 @@ instruct vfma2D(vecX dst, vecX src1, vecX src2) %{ size(4); ins_encode %{ + assert(UseFMA, "Needs FMA instructions support."); __ xvmaddadp($dst$$VectorSRegister, $src1$$VectorSRegister, $src2$$VectorSRegister); %} ins_pipe(pipe_class_default); %} -// dst - src1 * src2 +// src1 * (-src2) + dst +// "(-src1) * src2 + dst" has been idealized to "src2 * (-src1) + dst" instruct vfma2D_neg1(vecX dst, vecX src1, vecX src2) %{ - match(Set dst (FmaVD dst (Binary (NegVD src1) src2))); match(Set dst (FmaVD dst (Binary src1 (NegVD src2)))); predicate(n->as_Vector()->length() == 2); @@ -14124,12 +14139,13 @@ instruct vfma2D_neg1(vecX dst, vecX src1, vecX src2) %{ size(4); ins_encode %{ + assert(UseFMA, "Needs FMA instructions support."); __ xvnmsubadp($dst$$VectorSRegister, $src1$$VectorSRegister, $src2$$VectorSRegister); %} ins_pipe(pipe_class_default); %} -// - dst + src1 * src2 +// src1 * src2 - dst instruct vfma2D_neg2(vecX dst, vecX src1, vecX src2) %{ match(Set dst (FmaVD (NegVD dst) (Binary src1 src2))); predicate(n->as_Vector()->length() == 2); @@ -14138,6 +14154,7 @@ instruct vfma2D_neg2(vecX dst, vecX src1, vecX src2) %{ size(4); ins_encode %{ + assert(UseFMA, "Needs FMA instructions support."); __ xvmsubadp($dst$$VectorSRegister, $src1$$VectorSRegister, $src2$$VectorSRegister); %} ins_pipe(pipe_class_default); diff --git a/src/hotspot/cpu/riscv/riscv.ad b/src/hotspot/cpu/riscv/riscv.ad index 6016c5fe282..d03dc843c2b 100644 --- a/src/hotspot/cpu/riscv/riscv.ad +++ b/src/hotspot/cpu/riscv/riscv.ad @@ -1907,6 +1907,11 @@ bool Matcher::match_rule_supported(int opcode) { case Op_CountTrailingZerosI: case Op_CountTrailingZerosL: return UseZbb; + case Op_FmaF: + case Op_FmaD: + case Op_FmaVF: + case Op_FmaVD: + return UseFMA; } return true; // Per default match rules are supported. @@ -7271,13 +7276,13 @@ instruct mulD_reg_reg(fRegD dst, fRegD src1, fRegD src2) %{ // src1 * src2 + src3 instruct maddF_reg_reg(fRegF dst, fRegF src1, fRegF src2, fRegF src3) %{ - predicate(UseFMA); match(Set dst (FmaF src3 (Binary src1 src2))); ins_cost(FMUL_SINGLE_COST); format %{ "fmadd.s $dst, $src1, $src2, $src3\t#@maddF_reg_reg" %} ins_encode %{ + assert(UseFMA, "Needs FMA instructions support."); __ fmadd_s(as_FloatRegister($dst$$reg), as_FloatRegister($src1$$reg), as_FloatRegister($src2$$reg), @@ -7289,13 +7294,13 @@ instruct maddF_reg_reg(fRegF dst, fRegF src1, fRegF src2, fRegF src3) %{ // src1 * src2 + src3 instruct maddD_reg_reg(fRegD dst, fRegD src1, fRegD src2, fRegD src3) %{ - predicate(UseFMA); match(Set dst (FmaD src3 (Binary src1 src2))); ins_cost(FMUL_DOUBLE_COST); format %{ "fmadd.d $dst, $src1, $src2, $src3\t#@maddD_reg_reg" %} ins_encode %{ + assert(UseFMA, "Needs FMA instructions support."); __ fmadd_d(as_FloatRegister($dst$$reg), as_FloatRegister($src1$$reg), as_FloatRegister($src2$$reg), @@ -7307,13 +7312,13 @@ instruct maddD_reg_reg(fRegD dst, fRegD src1, fRegD src2, fRegD src3) %{ // src1 * src2 - src3 instruct msubF_reg_reg(fRegF dst, fRegF src1, fRegF src2, fRegF src3) %{ - predicate(UseFMA); match(Set dst (FmaF (NegF src3) (Binary src1 src2))); ins_cost(FMUL_SINGLE_COST); format %{ "fmsub.s $dst, $src1, $src2, $src3\t#@msubF_reg_reg" %} ins_encode %{ + assert(UseFMA, "Needs FMA instructions support."); __ fmsub_s(as_FloatRegister($dst$$reg), as_FloatRegister($src1$$reg), as_FloatRegister($src2$$reg), @@ -7325,13 +7330,13 @@ instruct msubF_reg_reg(fRegF dst, fRegF src1, fRegF src2, fRegF src3) %{ // src1 * src2 - src3 instruct msubD_reg_reg(fRegD dst, fRegD src1, fRegD src2, fRegD src3) %{ - predicate(UseFMA); match(Set dst (FmaD (NegD src3) (Binary src1 src2))); ins_cost(FMUL_DOUBLE_COST); format %{ "fmsub.d $dst, $src1, $src2, $src3\t#@msubD_reg_reg" %} ins_encode %{ + assert(UseFMA, "Needs FMA instructions support."); __ fmsub_d(as_FloatRegister($dst$$reg), as_FloatRegister($src1$$reg), as_FloatRegister($src2$$reg), @@ -7341,16 +7346,16 @@ instruct msubD_reg_reg(fRegD dst, fRegD src1, fRegD src2, fRegD src3) %{ ins_pipe(pipe_class_default); %} -// -src1 * src2 + src3 +// src1 * (-src2) + src3 +// "(-src1) * src2 + src3" has been idealized to "src2 * (-src1) + src3" instruct nmsubF_reg_reg(fRegF dst, fRegF src1, fRegF src2, fRegF src3) %{ - predicate(UseFMA); - match(Set dst (FmaF src3 (Binary (NegF src1) src2))); match(Set dst (FmaF src3 (Binary src1 (NegF src2)))); ins_cost(FMUL_SINGLE_COST); format %{ "fnmsub.s $dst, $src1, $src2, $src3\t#@nmsubF_reg_reg" %} ins_encode %{ + assert(UseFMA, "Needs FMA instructions support."); __ fnmsub_s(as_FloatRegister($dst$$reg), as_FloatRegister($src1$$reg), as_FloatRegister($src2$$reg), @@ -7360,16 +7365,16 @@ instruct nmsubF_reg_reg(fRegF dst, fRegF src1, fRegF src2, fRegF src3) %{ ins_pipe(pipe_class_default); %} -// -src1 * src2 + src3 +// src1 * (-src2) + src3 +// "(-src1) * src2 + src3" has been idealized to "src2 * (-src1) + src3" instruct nmsubD_reg_reg(fRegD dst, fRegD src1, fRegD src2, fRegD src3) %{ - predicate(UseFMA); - match(Set dst (FmaD src3 (Binary (NegD src1) src2))); match(Set dst (FmaD src3 (Binary src1 (NegD src2)))); ins_cost(FMUL_DOUBLE_COST); format %{ "fnmsub.d $dst, $src1, $src2, $src3\t#@nmsubD_reg_reg" %} ins_encode %{ + assert(UseFMA, "Needs FMA instructions support."); __ fnmsub_d(as_FloatRegister($dst$$reg), as_FloatRegister($src1$$reg), as_FloatRegister($src2$$reg), @@ -7379,16 +7384,16 @@ instruct nmsubD_reg_reg(fRegD dst, fRegD src1, fRegD src2, fRegD src3) %{ ins_pipe(pipe_class_default); %} -// -src1 * src2 - src3 +// src1 * (-src2) - src3 +// "(-src1) * src2 - src3" has been idealized to "src2 * (-src1) - src3" instruct nmaddF_reg_reg(fRegF dst, fRegF src1, fRegF src2, fRegF src3) %{ - predicate(UseFMA); - match(Set dst (FmaF (NegF src3) (Binary (NegF src1) src2))); match(Set dst (FmaF (NegF src3) (Binary src1 (NegF src2)))); ins_cost(FMUL_SINGLE_COST); format %{ "fnmadd.s $dst, $src1, $src2, $src3\t#@nmaddF_reg_reg" %} ins_encode %{ + assert(UseFMA, "Needs FMA instructions support."); __ fnmadd_s(as_FloatRegister($dst$$reg), as_FloatRegister($src1$$reg), as_FloatRegister($src2$$reg), @@ -7398,16 +7403,16 @@ instruct nmaddF_reg_reg(fRegF dst, fRegF src1, fRegF src2, fRegF src3) %{ ins_pipe(pipe_class_default); %} -// -src1 * src2 - src3 +// src1 * (-src2) - src3 +// "(-src1) * src2 - src3" has been idealized to "src2 * (-src1) - src3" instruct nmaddD_reg_reg(fRegD dst, fRegD src1, fRegD src2, fRegD src3) %{ - predicate(UseFMA); - match(Set dst (FmaD (NegD src3) (Binary (NegD src1) src2))); match(Set dst (FmaD (NegD src3) (Binary src1 (NegD src2)))); ins_cost(FMUL_DOUBLE_COST); format %{ "fnmadd.d $dst, $src1, $src2, $src3\t#@nmaddD_reg_reg" %} ins_encode %{ + assert(UseFMA, "Needs FMA instructions support."); __ fnmadd_d(as_FloatRegister($dst$$reg), as_FloatRegister($src1$$reg), as_FloatRegister($src2$$reg), diff --git a/src/hotspot/cpu/riscv/riscv_v.ad b/src/hotspot/cpu/riscv/riscv_v.ad index 268a51c327f..a61e59ef96a 100644 --- a/src/hotspot/cpu/riscv/riscv_v.ad +++ b/src/hotspot/cpu/riscv/riscv_v.ad @@ -1,6 +1,6 @@ // -// Copyright (c) 2020, Oracle and/or its affiliates. All rights reserved. -// Copyright (c) 2020, Arm Limited. All rights reserved. +// Copyright (c) 2020, 2023, Oracle and/or its affiliates. All rights reserved. +// Copyright (c) 2020, 2023, Arm Limited. All rights reserved. // Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved. // DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. // @@ -693,14 +693,14 @@ instruct vmin_fp_masked(vReg dst_src1, vReg src2, vRegMask vmask, vReg tmp1, vRe // vector fmla -// dst_src1 = dst_src1 + src2 * src3 +// dst_src1 = src2 * src3 + dst_src1 instruct vfmla(vReg dst_src1, vReg src2, vReg src3) %{ - predicate(UseFMA); match(Set dst_src1 (FmaVF dst_src1 (Binary src2 src3))); match(Set dst_src1 (FmaVD dst_src1 (Binary src2 src3))); ins_cost(VEC_COST); format %{ "vfmla $dst_src1, $dst_src1, $src2, $src3" %} ins_encode %{ + assert(UseFMA, "Needs FMA instructions support."); BasicType bt = Matcher::vector_element_basic_type(this); __ vsetvli_helper(bt, Matcher::vector_length(this)); __ vfmacc_vv(as_VectorRegister($dst_src1$$reg), @@ -713,11 +713,11 @@ instruct vfmla(vReg dst_src1, vReg src2, vReg src3) %{ // dst_src1 = dst_src1 * src2 + src3 instruct vfmadd_masked(vReg dst_src1, vReg src2, vReg src3, vRegMask_V0 v0) %{ - predicate(UseFMA); match(Set dst_src1 (FmaVF (Binary dst_src1 src2) (Binary src3 v0))); match(Set dst_src1 (FmaVD (Binary dst_src1 src2) (Binary src3 v0))); format %{ "vfmadd_masked $dst_src1, $dst_src1, $src2, $src3, $v0" %} ins_encode %{ + assert(UseFMA, "Needs FMA instructions support."); BasicType bt = Matcher::vector_element_basic_type(this); __ vsetvli_helper(bt, Matcher::vector_length(this)); __ vfmadd_vv(as_VectorRegister($dst_src1$$reg), as_VectorRegister($src2$$reg), @@ -728,15 +728,14 @@ instruct vfmadd_masked(vReg dst_src1, vReg src2, vReg src3, vRegMask_V0 v0) %{ // vector fmls -// dst_src1 = dst_src1 + -src2 * src3 -// dst_src1 = dst_src1 + src2 * -src3 +// dst_src1 = src2 * (-src3) + dst_src1 +// "(-src2) * src3 + dst_src1" has been idealized to "src3 * (-src2) + dst_src1" instruct vfmlsF(vReg dst_src1, vReg src2, vReg src3) %{ - predicate(UseFMA); - match(Set dst_src1 (FmaVF dst_src1 (Binary (NegVF src2) src3))); match(Set dst_src1 (FmaVF dst_src1 (Binary src2 (NegVF src3)))); ins_cost(VEC_COST); format %{ "vfmlsF $dst_src1, $dst_src1, $src2, $src3" %} ins_encode %{ + assert(UseFMA, "Needs FMA instructions support."); __ vsetvli_helper(T_FLOAT, Matcher::vector_length(this)); __ vfnmsac_vv(as_VectorRegister($dst_src1$$reg), as_VectorRegister($src2$$reg), as_VectorRegister($src3$$reg)); @@ -744,15 +743,14 @@ instruct vfmlsF(vReg dst_src1, vReg src2, vReg src3) %{ ins_pipe(pipe_slow); %} -// dst_src1 = dst_src1 + -src2 * src3 -// dst_src1 = dst_src1 + src2 * -src3 +// dst_src1 = src2 * (-src3) + dst_src1 +// "(-src2) * src3 + dst_src1" has been idealized to "src3 * (-src2) + dst_src1" instruct vfmlsD(vReg dst_src1, vReg src2, vReg src3) %{ - predicate(UseFMA); - match(Set dst_src1 (FmaVD dst_src1 (Binary (NegVD src2) src3))); match(Set dst_src1 (FmaVD dst_src1 (Binary src2 (NegVD src3)))); ins_cost(VEC_COST); format %{ "vfmlsD $dst_src1, $dst_src1, $src2, $src3" %} ins_encode %{ + assert(UseFMA, "Needs FMA instructions support."); __ vsetvli_helper(T_DOUBLE, Matcher::vector_length(this)); __ vfnmsac_vv(as_VectorRegister($dst_src1$$reg), as_VectorRegister($src2$$reg), as_VectorRegister($src3$$reg)); @@ -762,13 +760,13 @@ instruct vfmlsD(vReg dst_src1, vReg src2, vReg src3) %{ // vector fnmsub - predicated -// dst_src1 = dst_src1 * -src2 + src3 +// dst_src1 = dst_src1 * (-src2) + src3 instruct vfnmsub_masked(vReg dst_src1, vReg src2, vReg src3, vRegMask_V0 v0) %{ - predicate(UseFMA); match(Set dst_src1 (FmaVF (Binary dst_src1 (NegVF src2)) (Binary src3 v0))); match(Set dst_src1 (FmaVD (Binary dst_src1 (NegVD src2)) (Binary src3 v0))); format %{ "vfnmsub_masked $dst_src1, $dst_src1, $src2, $src3, $v0" %} ins_encode %{ + assert(UseFMA, "Needs FMA instructions support."); BasicType bt = Matcher::vector_element_basic_type(this); __ vsetvli_helper(bt, Matcher::vector_length(this)); __ vfnmsub_vv(as_VectorRegister($dst_src1$$reg), as_VectorRegister($src2$$reg), @@ -779,15 +777,14 @@ instruct vfnmsub_masked(vReg dst_src1, vReg src2, vReg src3, vRegMask_V0 v0) %{ // vector fnmla -// dst_src1 = -dst_src1 + -src2 * src3 -// dst_src1 = -dst_src1 + src2 * -src3 +// dst_src1 = src2 * (-src3) - dst_src1 +// "(-src2) * src3 - dst_src1" has been idealized to "src3 * (-src2) - dst_src1" instruct vfnmlaF(vReg dst_src1, vReg src2, vReg src3) %{ - predicate(UseFMA); - match(Set dst_src1 (FmaVF (NegVF dst_src1) (Binary (NegVF src2) src3))); match(Set dst_src1 (FmaVF (NegVF dst_src1) (Binary src2 (NegVF src3)))); ins_cost(VEC_COST); format %{ "vfnmlaF $dst_src1, $dst_src1, $src2, $src3" %} ins_encode %{ + assert(UseFMA, "Needs FMA instructions support."); __ vsetvli_helper(T_FLOAT, Matcher::vector_length(this)); __ vfnmacc_vv(as_VectorRegister($dst_src1$$reg), as_VectorRegister($src2$$reg), as_VectorRegister($src3$$reg)); @@ -795,15 +792,14 @@ instruct vfnmlaF(vReg dst_src1, vReg src2, vReg src3) %{ ins_pipe(pipe_slow); %} -// dst_src1 = -dst_src1 + -src2 * src3 -// dst_src1 = -dst_src1 + src2 * -src3 +// dst_src1 = src2 * (-src3) - dst_src1 +// "(-src2) * src3 - dst_src1" has been idealized to "src3 * (-src2) - dst_src1" instruct vfnmlaD(vReg dst_src1, vReg src2, vReg src3) %{ - predicate(UseFMA); - match(Set dst_src1 (FmaVD (NegVD dst_src1) (Binary (NegVD src2) src3))); match(Set dst_src1 (FmaVD (NegVD dst_src1) (Binary src2 (NegVD src3)))); ins_cost(VEC_COST); format %{ "vfnmlaD $dst_src1, $dst_src1, $src2, $src3" %} ins_encode %{ + assert(UseFMA, "Needs FMA instructions support."); __ vsetvli_helper(T_DOUBLE, Matcher::vector_length(this)); __ vfnmacc_vv(as_VectorRegister($dst_src1$$reg), as_VectorRegister($src2$$reg), as_VectorRegister($src3$$reg)); @@ -813,13 +809,13 @@ instruct vfnmlaD(vReg dst_src1, vReg src2, vReg src3) %{ // vector fnmadd - predicated -// dst_src1 = -src3 + dst_src1 * -src2 +// dst_src1 = dst_src1 * (-src2) - src3 instruct vfnmadd_masked(vReg dst_src1, vReg src2, vReg src3, vRegMask_V0 v0) %{ - predicate(UseFMA); match(Set dst_src1 (FmaVF (Binary dst_src1 (NegVF src2)) (Binary (NegVF src3) v0))); match(Set dst_src1 (FmaVD (Binary dst_src1 (NegVD src2)) (Binary (NegVD src3) v0))); format %{ "vfnmadd_masked $dst_src1, $dst_src1, $src2, $src3, $v0" %} ins_encode %{ + assert(UseFMA, "Needs FMA instructions support."); BasicType bt = Matcher::vector_element_basic_type(this); __ vsetvli_helper(bt, Matcher::vector_length(this)); __ vfnmadd_vv(as_VectorRegister($dst_src1$$reg), as_VectorRegister($src2$$reg), @@ -830,13 +826,13 @@ instruct vfnmadd_masked(vReg dst_src1, vReg src2, vReg src3, vRegMask_V0 v0) %{ // vector fnmls -// dst_src1 = -dst_src1 + src2 * src3 +// dst_src1 = src2 * src3 - dst_src1 instruct vfnmlsF(vReg dst_src1, vReg src2, vReg src3) %{ - predicate(UseFMA); match(Set dst_src1 (FmaVF (NegVF dst_src1) (Binary src2 src3))); ins_cost(VEC_COST); format %{ "vfnmlsF $dst_src1, $dst_src1, $src2, $src3" %} ins_encode %{ + assert(UseFMA, "Needs FMA instructions support."); __ vsetvli_helper(T_FLOAT, Matcher::vector_length(this)); __ vfmsac_vv(as_VectorRegister($dst_src1$$reg), as_VectorRegister($src2$$reg), as_VectorRegister($src3$$reg)); @@ -846,11 +842,11 @@ instruct vfnmlsF(vReg dst_src1, vReg src2, vReg src3) %{ // dst_src1 = -dst_src1 + src2 * src3 instruct vfnmlsD(vReg dst_src1, vReg src2, vReg src3) %{ - predicate(UseFMA); match(Set dst_src1 (FmaVD (NegVD dst_src1) (Binary src2 src3))); ins_cost(VEC_COST); format %{ "vfnmlsD $dst_src1, $dst_src1, $src2, $src3" %} ins_encode %{ + assert(UseFMA, "Needs FMA instructions support."); __ vsetvli_helper(T_DOUBLE, Matcher::vector_length(this)); __ vfmsac_vv(as_VectorRegister($dst_src1$$reg), as_VectorRegister($src2$$reg), as_VectorRegister($src3$$reg)); @@ -860,13 +856,13 @@ instruct vfnmlsD(vReg dst_src1, vReg src2, vReg src3) %{ // vector vfmsub - predicated -// dst_src1 = -src3 + dst_src1 * src2 +// dst_src1 = dst_src1 * src2 - src3 instruct vfmsub_masked(vReg dst_src1, vReg src2, vReg src3, vRegMask_V0 v0) %{ - predicate(UseFMA); match(Set dst_src1 (FmaVF (Binary dst_src1 src2) (Binary (NegVF src3) v0))); match(Set dst_src1 (FmaVD (Binary dst_src1 src2) (Binary (NegVD src3) v0))); format %{ "vfmsub_masked $dst_src1, $dst_src1, $src2, $src3, $v0" %} ins_encode %{ + assert(UseFMA, "Needs FMA instructions support."); BasicType bt = Matcher::vector_element_basic_type(this); __ vsetvli_helper(bt, Matcher::vector_length(this)); __ vfmsub_vv(as_VectorRegister($dst_src1$$reg), as_VectorRegister($src2$$reg), diff --git a/src/hotspot/cpu/s390/s390.ad b/src/hotspot/cpu/s390/s390.ad index 6982349b3b5..ffac6b70a58 100644 --- a/src/hotspot/cpu/s390/s390.ad +++ b/src/hotspot/cpu/s390/s390.ad @@ -1505,6 +1505,9 @@ bool Matcher::match_rule_supported(int opcode) { case Op_PopCountL: // PopCount supported by H/W from z/Architecture G5 (z196) on. return (UsePopCountInstruction && VM_Version::has_PopCount()); + case Op_FmaF: + case Op_FmaD: + return UseFMA; } return true; // Per default match rules are supported. @@ -7160,6 +7163,7 @@ instruct maddF_reg_reg(regF dst, regF src1, regF src2) %{ size(4); format %{ "MAEBR $dst, $src1, $src2" %} ins_encode %{ + assert(UseFMA, "Needs FMA instructions support."); __ z_maebr($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister); %} ins_pipe(pipe_class_dummy); @@ -7173,6 +7177,7 @@ instruct maddD_reg_reg(regD dst, regD src1, regD src2) %{ size(4); format %{ "MADBR $dst, $src1, $src2" %} ins_encode %{ + assert(UseFMA, "Needs FMA instructions support."); __ z_madbr($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister); %} ins_pipe(pipe_class_dummy); @@ -7186,6 +7191,7 @@ instruct msubF_reg_reg(regF dst, regF src1, regF src2) %{ size(4); format %{ "MSEBR $dst, $src1, $src2" %} ins_encode %{ + assert(UseFMA, "Needs FMA instructions support."); __ z_msebr($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister); %} ins_pipe(pipe_class_dummy); @@ -7199,6 +7205,7 @@ instruct msubD_reg_reg(regD dst, regD src1, regD src2) %{ size(4); format %{ "MSDBR $dst, $src1, $src2" %} ins_encode %{ + assert(UseFMA, "Needs FMA instructions support."); __ z_msdbr($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister); %} ins_pipe(pipe_class_dummy); @@ -7212,6 +7219,7 @@ instruct maddF_reg_mem(regF dst, regF src1, memoryRX src2) %{ size(6); format %{ "MAEB $dst, $src1, $src2" %} ins_encode %{ + assert(UseFMA, "Needs FMA instructions support."); __ z_maeb($dst$$FloatRegister, $src1$$FloatRegister, Address(reg_to_register_object($src2$$base), $src2$$index$$Register, $src2$$disp)); %} @@ -7226,6 +7234,7 @@ instruct maddD_reg_mem(regD dst, regD src1, memoryRX src2) %{ size(6); format %{ "MADB $dst, $src1, $src2" %} ins_encode %{ + assert(UseFMA, "Needs FMA instructions support."); __ z_madb($dst$$FloatRegister, $src1$$FloatRegister, Address(reg_to_register_object($src2$$base), $src2$$index$$Register, $src2$$disp)); %} @@ -7240,6 +7249,7 @@ instruct msubF_reg_mem(regF dst, regF src1, memoryRX src2) %{ size(6); format %{ "MSEB $dst, $src1, $src2" %} ins_encode %{ + assert(UseFMA, "Needs FMA instructions support."); __ z_mseb($dst$$FloatRegister, $src1$$FloatRegister, Address(reg_to_register_object($src2$$base), $src2$$index$$Register, $src2$$disp)); %} @@ -7254,6 +7264,7 @@ instruct msubD_reg_mem(regD dst, regD src1, memoryRX src2) %{ size(6); format %{ "MSDB $dst, $src1, $src2" %} ins_encode %{ + assert(UseFMA, "Needs FMA instructions support."); __ z_msdb($dst$$FloatRegister, $src1$$FloatRegister, Address(reg_to_register_object($src2$$base), $src2$$index$$Register, $src2$$disp)); %} @@ -7268,6 +7279,7 @@ instruct maddF_mem_reg(regF dst, memoryRX src1, regF src2) %{ size(6); format %{ "MAEB $dst, $src1, $src2" %} ins_encode %{ + assert(UseFMA, "Needs FMA instructions support."); __ z_maeb($dst$$FloatRegister, $src2$$FloatRegister, Address(reg_to_register_object($src1$$base), $src1$$index$$Register, $src1$$disp)); %} @@ -7282,6 +7294,7 @@ instruct maddD_mem_reg(regD dst, memoryRX src1, regD src2) %{ size(6); format %{ "MADB $dst, $src1, $src2" %} ins_encode %{ + assert(UseFMA, "Needs FMA instructions support."); __ z_madb($dst$$FloatRegister, $src2$$FloatRegister, Address(reg_to_register_object($src1$$base), $src1$$index$$Register, $src1$$disp)); %} @@ -7296,6 +7309,7 @@ instruct msubF_mem_reg(regF dst, memoryRX src1, regF src2) %{ size(6); format %{ "MSEB $dst, $src1, $src2" %} ins_encode %{ + assert(UseFMA, "Needs FMA instructions support."); __ z_mseb($dst$$FloatRegister, $src2$$FloatRegister, Address(reg_to_register_object($src1$$base), $src1$$index$$Register, $src1$$disp)); %} @@ -7310,6 +7324,7 @@ instruct msubD_mem_reg(regD dst, memoryRX src1, regD src2) %{ size(6); format %{ "MSDB $dst, $src1, $src2" %} ins_encode %{ + assert(UseFMA, "Needs FMA instructions support."); __ z_msdb($dst$$FloatRegister, $src2$$FloatRegister, Address(reg_to_register_object($src1$$base), $src1$$index$$Register, $src1$$disp)); %} diff --git a/src/hotspot/cpu/x86/x86.ad b/src/hotspot/cpu/x86/x86.ad index 3147857d249..61ab92326b2 100644 --- a/src/hotspot/cpu/x86/x86.ad +++ b/src/hotspot/cpu/x86/x86.ad @@ -1566,6 +1566,8 @@ bool Matcher::match_rule_supported(int opcode) { return false; } break; + case Op_FmaF: + case Op_FmaD: case Op_FmaVD: case Op_FmaVF: if (!UseFMA) { @@ -3960,11 +3962,11 @@ instruct onspinwait() %{ // a * b + c instruct fmaD_reg(regD a, regD b, regD c) %{ - predicate(UseFMA); match(Set c (FmaD c (Binary a b))); format %{ "fmasd $a,$b,$c\t# $c = $a * $b + $c" %} ins_cost(150); ins_encode %{ + assert(UseFMA, "Needs FMA instructions support."); __ fmad($c$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $c$$XMMRegister); %} ins_pipe( pipe_slow ); @@ -3972,11 +3974,11 @@ instruct fmaD_reg(regD a, regD b, regD c) %{ // a * b + c instruct fmaF_reg(regF a, regF b, regF c) %{ - predicate(UseFMA); match(Set c (FmaF c (Binary a b))); format %{ "fmass $a,$b,$c\t# $c = $a * $b + $c" %} ins_cost(150); ins_encode %{ + assert(UseFMA, "Needs FMA instructions support."); __ fmaf($c$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $c$$XMMRegister); %} ins_pipe( pipe_slow ); @@ -9864,6 +9866,7 @@ instruct vfma_reg_masked(vec dst, vec src2, vec src3, kReg mask) %{ match(Set dst (FmaVD (Binary dst src2) (Binary src3 mask))); format %{ "vfma_masked $dst, $src2, $src3, $mask \t! vfma masked operation" %} ins_encode %{ + assert(UseFMA, "Needs FMA instructions support."); int vlen_enc = vector_length_encoding(this); BasicType bt = Matcher::vector_element_basic_type(this); int opc = this->ideal_Opcode(); @@ -9878,6 +9881,7 @@ instruct vfma_mem_masked(vec dst, vec src2, memory src3, kReg mask) %{ match(Set dst (FmaVD (Binary dst src2) (Binary (LoadVector src3) mask))); format %{ "vfma_masked $dst, $src2, $src3, $mask \t! vfma masked operation" %} ins_encode %{ + assert(UseFMA, "Needs FMA instructions support."); int vlen_enc = vector_length_encoding(this); BasicType bt = Matcher::vector_element_basic_type(this); int opc = this->ideal_Opcode(); diff --git a/src/hotspot/share/opto/c2compiler.cpp b/src/hotspot/share/opto/c2compiler.cpp index 398eb80b8cf..ea56e30ed87 100644 --- a/src/hotspot/share/opto/c2compiler.cpp +++ b/src/hotspot/share/opto/c2compiler.cpp @@ -502,10 +502,10 @@ bool C2Compiler::is_intrinsic_supported(vmIntrinsics::ID id) { if (!Matcher::match_rule_supported(Op_OnSpinWait)) return false; break; case vmIntrinsics::_fmaD: - if (!UseFMA || !Matcher::match_rule_supported(Op_FmaD)) return false; + if (!Matcher::match_rule_supported(Op_FmaD)) return false; break; case vmIntrinsics::_fmaF: - if (!UseFMA || !Matcher::match_rule_supported(Op_FmaF)) return false; + if (!Matcher::match_rule_supported(Op_FmaF)) return false; break; case vmIntrinsics::_isDigit: if (!Matcher::match_rule_supported(Op_Digit)) return false; diff --git a/src/hotspot/share/opto/mulnode.cpp b/src/hotspot/share/opto/mulnode.cpp index e79492a62e8..6709f831447 100644 --- a/src/hotspot/share/opto/mulnode.cpp +++ b/src/hotspot/share/opto/mulnode.cpp @@ -1711,6 +1711,20 @@ const Type* URShiftLNode::Value(PhaseGVN* phase) const { return TypeLong::LONG; // Give up } +//============================================================================= +//------------------------------Ideal------------------------------------------ +Node* FmaNode::Ideal(PhaseGVN* phase, bool can_reshape) { + // We canonicalize the node by converting "(-a)*b+c" into "b*(-a)+c" + // This reduces the number of rules in the matcher, as we only need to check + // for negations on the second argument, and not the symmetric case where + // the first argument is negated. + if (in(1)->is_Neg() && !in(2)->is_Neg()) { + swap_edges(1, 2); + return this; + } + return nullptr; +} + //============================================================================= //------------------------------Value------------------------------------------ const Type* FmaDNode::Value(PhaseGVN* phase) const { diff --git a/src/hotspot/share/opto/mulnode.hpp b/src/hotspot/share/opto/mulnode.hpp index 84307fb00fb..d04648ee61a 100644 --- a/src/hotspot/share/opto/mulnode.hpp +++ b/src/hotspot/share/opto/mulnode.hpp @@ -357,24 +357,34 @@ public: virtual uint ideal_reg() const { return Op_RegL; } }; +//------------------------------FmaNode-------------------------------------- +// fused-multiply-add +class FmaNode : public Node { +public: + FmaNode(Node* c, Node* in1, Node* in2, Node* in3) : Node(c, in1, in2, in3) { + assert(UseFMA, "Needs FMA instructions support."); + } + virtual Node* Ideal(PhaseGVN* phase, bool can_reshape); +}; + //------------------------------FmaDNode-------------------------------------- // fused-multiply-add double -class FmaDNode : public Node { +class FmaDNode : public FmaNode { public: - FmaDNode(Node *c, Node *in1, Node *in2, Node *in3) : Node(c, in1, in2, in3) {} + FmaDNode(Node* c, Node* in1, Node* in2, Node* in3) : FmaNode(c, in1, in2, in3) {} virtual int Opcode() const; - const Type *bottom_type() const { return Type::DOUBLE; } + const Type* bottom_type() const { return Type::DOUBLE; } virtual uint ideal_reg() const { return Op_RegD; } virtual const Type* Value(PhaseGVN* phase) const; }; //------------------------------FmaFNode-------------------------------------- // fused-multiply-add float -class FmaFNode : public Node { +class FmaFNode : public FmaNode { public: - FmaFNode(Node *c, Node *in1, Node *in2, Node *in3) : Node(c, in1, in2, in3) {} + FmaFNode(Node* c, Node* in1, Node* in2, Node* in3) : FmaNode(c, in1, in2, in3) {} virtual int Opcode() const; - const Type *bottom_type() const { return Type::FLOAT; } + const Type* bottom_type() const { return Type::FLOAT; } virtual uint ideal_reg() const { return Op_RegF; } virtual const Type* Value(PhaseGVN* phase) const; }; diff --git a/src/hotspot/share/opto/node.hpp b/src/hotspot/share/opto/node.hpp index 755374e18a9..50134fe0e02 100644 --- a/src/hotspot/share/opto/node.hpp +++ b/src/hotspot/share/opto/node.hpp @@ -129,6 +129,8 @@ class MoveNode; class MulNode; class MultiNode; class MultiBranchNode; +class NegNode; +class NegVNode; class NeverBranchNode; class Opaque1Node; class OuterStripMinedLoopNode; @@ -725,6 +727,7 @@ public: DEFINE_CLASS_ID(CompressM, Vector, 6) DEFINE_CLASS_ID(Reduction, Vector, 7) DEFINE_CLASS_ID(UnorderedReduction, Reduction, 0) + DEFINE_CLASS_ID(NegV, Vector, 8) DEFINE_CLASS_ID(Con, Type, 8) DEFINE_CLASS_ID(ConI, Con, 0) DEFINE_CLASS_ID(SafePointScalarMerge, Type, 9) @@ -780,8 +783,9 @@ public: DEFINE_CLASS_ID(Opaque1, Node, 16) DEFINE_CLASS_ID(Move, Node, 17) DEFINE_CLASS_ID(LShift, Node, 18) + DEFINE_CLASS_ID(Neg, Node, 19) - _max_classes = ClassMask_LShift + _max_classes = ClassMask_Neg }; #undef DEFINE_CLASS_ID @@ -941,6 +945,8 @@ public: DEFINE_CLASS_QUERY(Mul) DEFINE_CLASS_QUERY(Multi) DEFINE_CLASS_QUERY(MultiBranch) + DEFINE_CLASS_QUERY(Neg) + DEFINE_CLASS_QUERY(NegV) DEFINE_CLASS_QUERY(NeverBranch) DEFINE_CLASS_QUERY(Opaque1) DEFINE_CLASS_QUERY(OuterStripMinedLoop) diff --git a/src/hotspot/share/opto/subnode.hpp b/src/hotspot/share/opto/subnode.hpp index a4edeb4053f..f424e258db2 100644 --- a/src/hotspot/share/opto/subnode.hpp +++ b/src/hotspot/share/opto/subnode.hpp @@ -430,7 +430,9 @@ public: //------------------------------NegNode---------------------------------------- class NegNode : public Node { public: - NegNode( Node *in1 ) : Node(0,in1) {} + NegNode(Node* in1) : Node(0, in1) { + init_class_id(Class_Neg); + } }; //------------------------------NegINode--------------------------------------- diff --git a/src/hotspot/share/opto/vectornode.cpp b/src/hotspot/share/opto/vectornode.cpp index 00926f6ab2f..8b6ba1f5cb9 100644 --- a/src/hotspot/share/opto/vectornode.cpp +++ b/src/hotspot/share/opto/vectornode.cpp @@ -1876,6 +1876,21 @@ Node* VectorLongToMaskNode::Ideal(PhaseGVN* phase, bool can_reshape) { return nullptr; } +Node* FmaVNode::Ideal(PhaseGVN* phase, bool can_reshape) { + // We canonicalize the node by converting "(-a)*b+c" into "b*(-a)+c" + // This reduces the number of rules in the matcher, as we only need to check + // for negations on the second argument, and not the symmetric case where + // the first argument is negated. + // We cannot do this if the FmaV is masked, since the inactive lanes have to return + // the first input (i.e. "-a"). If we were to swap the inputs, the inactive lanes would + // incorrectly return "b". + if (!is_predicated_vector() && in(1)->is_NegV() && !in(2)->is_NegV()) { + swap_edges(1, 2); + return this; + } + return nullptr; +} + // Generate other vector nodes to implement the masked/non-masked vector negation. Node* NegVNode::degenerate_integral_negate(PhaseGVN* phase, bool is_predicated) { const TypeVect* vt = vect_type(); diff --git a/src/hotspot/share/opto/vectornode.hpp b/src/hotspot/share/opto/vectornode.hpp index 3ac54c5d535..fbe9b939991 100644 --- a/src/hotspot/share/opto/vectornode.hpp +++ b/src/hotspot/share/opto/vectornode.hpp @@ -376,19 +376,29 @@ class MulAddVS2VINode : public VectorNode { virtual int Opcode() const; }; -//------------------------------FmaVDNode-------------------------------------- -// Vector multiply double -class FmaVDNode : public VectorNode { +//------------------------------FmaVNode-------------------------------------- +// Vector fused-multiply-add +class FmaVNode : public VectorNode { public: - FmaVDNode(Node* in1, Node* in2, Node* in3, const TypeVect* vt) : VectorNode(in1, in2, in3, vt) {} + FmaVNode(Node* in1, Node* in2, Node* in3, const TypeVect* vt) : VectorNode(in1, in2, in3, vt) { + assert(UseFMA, "Needs FMA instructions support."); + } + virtual Node* Ideal(PhaseGVN* phase, bool can_reshape); +}; + +//------------------------------FmaVDNode-------------------------------------- +// Vector fused-multiply-add double +class FmaVDNode : public FmaVNode { +public: + FmaVDNode(Node* in1, Node* in2, Node* in3, const TypeVect* vt) : FmaVNode(in1, in2, in3, vt) {} virtual int Opcode() const; }; //------------------------------FmaVFNode-------------------------------------- -// Vector multiply float -class FmaVFNode : public VectorNode { +// Vector fused-multiply-add float +class FmaVFNode : public FmaVNode { public: - FmaVFNode(Node* in1, Node* in2, Node* in3, const TypeVect* vt) : VectorNode(in1, in2, in3, vt) {} + FmaVFNode(Node* in1, Node* in2, Node* in3, const TypeVect* vt) : FmaVNode(in1, in2, in3, vt) {} virtual int Opcode() const; }; @@ -508,7 +518,9 @@ class AbsVDNode : public VectorNode { // Vector Neg parent class (not for code generation). class NegVNode : public VectorNode { public: - NegVNode(Node* in, const TypeVect* vt) : VectorNode(in, vt) {} + NegVNode(Node* in, const TypeVect* vt) : VectorNode(in, vt) { + init_class_id(Class_NegV); + } virtual int Opcode() const = 0; virtual Node* Ideal(PhaseGVN* phase, bool can_reshape); diff --git a/test/hotspot/jtreg/compiler/c2/irTests/TestIRFma.java b/test/hotspot/jtreg/compiler/c2/irTests/TestIRFma.java new file mode 100644 index 00000000000..0e2cd067a13 --- /dev/null +++ b/test/hotspot/jtreg/compiler/c2/irTests/TestIRFma.java @@ -0,0 +1,185 @@ +/* + * Copyright (c) 2023, Arm Limited. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + */ + +package compiler.c2.irTests; + +import compiler.lib.ir_framework.*; +import java.util.Random; +import jdk.test.lib.Asserts; +import jdk.test.lib.Utils; + +/* + * @test + * @bug 8308340 + * @key randomness + * @summary Test fma match rule after C2 optimizer. + * @library /test/lib / + * @run driver compiler.c2.irTests.TestIRFma + */ + +public class TestIRFma { + + private static final Random RANDOM = Utils.getRandomInstance(); + + public static void main(String[] args) { + TestFramework.run(); + } + + @Run(test = {"test1", "test2", "test3", + "test4", "test5", "test6", + "test7", "test8", "test9", + "test10", "test11", "test12", + "test13", "test14"}) + public void runMethod() { + float fa = RANDOM.nextFloat(); + float fb = RANDOM.nextFloat(); + float fc = RANDOM.nextFloat(); + assertResult(fa, fb, fc); + + double da = RANDOM.nextDouble(); + double db = RANDOM.nextDouble(); + double dc = RANDOM.nextDouble(); + assertResult(da, db, dc); + } + + @DontCompile + public void assertResult(float a, float b, float c) { + Asserts.assertEquals(Math.fma(-a, -b, c) , test1(a, b, c)); + Asserts.assertEquals(Math.fma(-a, b, c) , test3(a, b, c)); + Asserts.assertEquals(Math.fma(a, -b, c) , test5(a, b, c)); + Asserts.assertEquals(Math.fma(-a, b, -c) , test7(a, b, c)); + Asserts.assertEquals(Math.fma(a, -b, -c) , test9(a, b, c)); + Asserts.assertEquals(Math.fma(a, b, -c) , test11(a, b, c)); + Asserts.assertEquals(Math.fma(-a, -b, -c) , test13(a, b, c)); + } + + @DontCompile + public void assertResult(double a, double b, double c) { + Asserts.assertEquals(Math.fma(-a, -b, c) , test2(a, b, c)); + Asserts.assertEquals(Math.fma(-a, b, c) , test4(a, b, c)); + Asserts.assertEquals(Math.fma(a, -b, c) , test6(a, b, c)); + Asserts.assertEquals(Math.fma(-a, b, -c) , test8(a, b, c)); + Asserts.assertEquals(Math.fma(a, -b, -c) , test10(a, b, c)); + Asserts.assertEquals(Math.fma(a, b, -c) , test12(a, b, c)); + Asserts.assertEquals(Math.fma(-a, -b, -c) , test14(a, b, c)); + } + + @Test + @IR(counts = {IRNode.FMSUB, "> 0"}, + applyIfCPUFeature = {"asimd", "true"}) + static float test1(float a, float b, float c) { + return Math.fma(-a, -b, c); + } + + @Test + @IR(counts = {IRNode.FMSUB, "> 0"}, + applyIfCPUFeature = {"asimd", "true"}) + static double test2(double a, double b, double c) { + return Math.fma(-a, -b, c); + } + + @Test + @IR(counts = {IRNode.FMSUB, "> 0"}, + applyIfCPUFeature = {"asimd", "true"}) + static float test3(float a, float b, float c) { + return Math.fma(-a, b, c); + } + + @Test + @IR(counts = {IRNode.FMSUB, "> 0"}, + applyIfCPUFeature = {"asimd", "true"}) + static double test4(double a, double b, double c) { + return Math.fma(-a, b, c); + } + + @Test + @IR(counts = {IRNode.FMSUB, "> 0"}, + applyIfCPUFeature = {"asimd", "true"}) + static float test5(float a, float b, float c) { + return Math.fma(a, -b, c); + } + + @Test + @IR(counts = {IRNode.FMSUB, "> 0"}, + applyIfCPUFeature = {"asimd", "true"}) + static double test6(double a, double b, double c) { + return Math.fma(a, -b, c); + } + + @Test + @IR(counts = {IRNode.FNMADD, "> 0"}, + applyIfCPUFeature = {"asimd", "true"}) + static float test7(float a, float b, float c) { + return Math.fma(-a, b, -c); + } + + @Test + @IR(counts = {IRNode.FNMADD, "> 0"}, + applyIfCPUFeature = {"asimd", "true"}) + static double test8(double a, double b, double c) { + return Math.fma(-a, b, -c); + } + + @Test + @IR(counts = {IRNode.FNMADD, "> 0"}, + applyIfCPUFeature = {"asimd", "true"}) + static float test9(float a, float b, float c) { + return Math.fma(a, -b, -c); + } + + @Test + @IR(counts = {IRNode.FNMADD, "> 0"}, + applyIfCPUFeature = {"asimd", "true"}) + static double test10(double a, double b, double c) { + return Math.fma(a, -b, -c); + } + + @Test + @IR(counts = {IRNode.FNMSUB, "> 0"}, + applyIfCPUFeature = {"asimd", "true"}) + static float test11(float a, float b, float c) { + return Math.fma(a, b, -c); + } + + @Test + @IR(counts = {IRNode.FNMSUB, "> 0"}, + applyIfCPUFeature = {"asimd", "true"}) + static double test12(double a, double b, double c) { + return Math.fma(a, b, -c); + } + + @Test + @IR(counts = {IRNode.FNMADD, "> 0"}, + applyIfCPUFeature = {"asimd", "true"}) + static float test13(float a, float b, float c) { + return Math.fma(-a, -b, -c); + } + + @Test + @IR(counts = {IRNode.FNMADD, "> 0"}, + applyIfCPUFeature = {"asimd", "true"}) + static double test14(double a, double b, double c) { + return Math.fma(-a, -b, -c); + } + + } diff --git a/test/hotspot/jtreg/compiler/lib/ir_framework/IRNode.java b/test/hotspot/jtreg/compiler/lib/ir_framework/IRNode.java index 89900a639ee..cbd95fcac24 100644 --- a/test/hotspot/jtreg/compiler/lib/ir_framework/IRNode.java +++ b/test/hotspot/jtreg/compiler/lib/ir_framework/IRNode.java @@ -1388,6 +1388,11 @@ public class IRNode { machOnlyNameRegex(VFNMSB_MASKED, "vfnmsb_masked"); } + public static final String VFMAD_MASKED = PREFIX + "VFMAD_MASKED" + POSTFIX; + static { + machOnlyNameRegex(VFMAD_MASKED, "vfmad_masked"); + } + public static final String VMASK_AND_NOT_L = PREFIX + "VMASK_AND_NOT_L" + POSTFIX; static { machOnlyNameRegex(VMASK_AND_NOT_L, "vmask_and_notL"); @@ -1403,6 +1408,36 @@ public class IRNode { machOnlyNameRegex(VMLA_MASKED, "vmla_masked"); } + public static final String FMSUB = PREFIX + "FMSUB" + POSTFIX; + static { + machOnlyNameRegex(FMSUB, "msub(F|D)_reg_reg"); + } + + public static final String FNMADD = PREFIX + "FNMADD" + POSTFIX; + static { + machOnlyNameRegex(FNMADD, "mnadd(F|D)_reg_reg"); + } + + public static final String FNMSUB = PREFIX + "FNMSUB" + POSTFIX; + static { + machOnlyNameRegex(FNMSUB, "mnsub(F|D)_reg_reg"); + } + + public static final String VFMLA = PREFIX + "VFMLA" + POSTFIX; + static { + machOnlyNameRegex(VFMLA, "vfmla"); + } + + public static final String VFMLS = PREFIX + "VFMLS" + POSTFIX; + static { + machOnlyNameRegex(VFMLS, "vfmls"); + } + + public static final String VFNMLA = PREFIX + "VFNMLA" + POSTFIX; + static { + machOnlyNameRegex(VFNMLA, "vfnmla"); + } + public static final String VMLS = PREFIX + "VMLS" + POSTFIX; static { machOnlyNameRegex(VMLS, "vmls"); diff --git a/test/hotspot/jtreg/compiler/vectorapi/VectorFusedMultiplyAddSubTest.java b/test/hotspot/jtreg/compiler/vectorapi/VectorFusedMultiplyAddSubTest.java index ece446bd197..cc65fa69e5d 100644 --- a/test/hotspot/jtreg/compiler/vectorapi/VectorFusedMultiplyAddSubTest.java +++ b/test/hotspot/jtreg/compiler/vectorapi/VectorFusedMultiplyAddSubTest.java @@ -1,5 +1,5 @@ /* - * Copyright (c) 2022, Arm Limited. All rights reserved. + * Copyright (c) 2022, 2023, Arm Limited. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -60,7 +60,7 @@ public class VectorFusedMultiplyAddSubTest { private static final VectorSpecies L_SPECIES = LongVector.SPECIES_MAX; private static final VectorSpecies S_SPECIES = ShortVector.SPECIES_MAX; - private static int LENGTH = 1024; + private static int LENGTH = 128; private static final Random RD = Utils.getRandomInstance(); private static byte[] ba; @@ -223,6 +223,26 @@ public class VectorFusedMultiplyAddSubTest { } } + private static void assertArrayEqualsNeg(float[] r, float[] a, float[] b, float[] c, boolean[] m, FTenOp f) { + for (int i = 0; i < LENGTH; i++) { + if (m[i % F_SPECIES.length()]) { + Asserts.assertEquals(f.apply(a[i], b[i], c[i]), r[i]); + } else { + Asserts.assertEquals(-a[i], r[i]); + } + } + } + + private static void assertArrayEqualsNeg(double[] r, double[] a, double[] b, double[] c, boolean[] m, DTenOp f) { + for (int i = 0; i < LENGTH; i++) { + if (m[i % D_SPECIES.length()]) { + Asserts.assertEquals(f.apply(a[i], b[i], c[i]), r[i]); + } else { + Asserts.assertEquals(-a[i], r[i]); + } + } + } + @Test @IR(counts = { IRNode.VMLA_MASKED, ">= 1" }) public static void testByteMultiplyAddMasked() { @@ -340,6 +360,19 @@ public class VectorFusedMultiplyAddSubTest { assertArrayEquals(fr, fa, fb, fc, m, (a, b, c) -> (float) Math.fma(a, -b, c)); } + @Test + @IR(counts = { IRNode.VFMAD_MASKED, ">= 1" }) + public static void testFloatMultiplyNegAMasked() { + VectorMask mask = VectorMask.fromArray(F_SPECIES, m, 0); + for (int i = 0; i < LENGTH; i += F_SPECIES.length()) { + FloatVector av = FloatVector.fromArray(F_SPECIES, fa, i); + FloatVector bv = FloatVector.fromArray(F_SPECIES, fb, i); + FloatVector cv = FloatVector.fromArray(F_SPECIES, fc, i); + av.neg().lanewise(VectorOperators.FMA, bv, cv, mask).intoArray(fr, i); + } + assertArrayEqualsNeg(fr, fa, fb, fc, m, (a, b, c) -> (float) Math.fma(-a, b, c)); + } + @Test @IR(counts = { IRNode.VFNMAD_MASKED, ">= 1" }) public static void testFloatNegatedMultiplyAddMasked() { @@ -353,6 +386,19 @@ public class VectorFusedMultiplyAddSubTest { assertArrayEquals(fr, fa, fb, fc, m, (a, b, c) -> (float) Math.fma(a, -b, -c)); } + @Test + @IR(counts = { IRNode.VFNMSB_MASKED, ">= 1" }) + public static void testFloatNegatedMultiplyNegAMasked() { + VectorMask mask = VectorMask.fromArray(F_SPECIES, m, 0); + for (int i = 0; i < LENGTH; i += F_SPECIES.length()) { + FloatVector av = FloatVector.fromArray(F_SPECIES, fa, i); + FloatVector bv = FloatVector.fromArray(F_SPECIES, fb, i); + FloatVector cv = FloatVector.fromArray(F_SPECIES, fc, i); + av.neg().lanewise(VectorOperators.FMA, bv, cv.neg(), mask).intoArray(fr, i); + } + assertArrayEqualsNeg(fr, fa, fb, fc, m, (a, b, c) -> (float) Math.fma(-a, b, -c)); + } + @Test @IR(counts = { IRNode.VFNMSB_MASKED, ">= 1" }) public static void testFloatNegatedMultiplySubMasked() { @@ -379,6 +425,19 @@ public class VectorFusedMultiplyAddSubTest { assertArrayEquals(dr, da, db, dc, m, (a, b, c) -> (double) Math.fma(a, -b, c)); } + @Test + @IR(counts = { IRNode.VFMAD_MASKED, ">= 1" }) + public static void testDoubleMultiplyNegAMasked() { + VectorMask mask = VectorMask.fromArray(D_SPECIES, m, 0); + for (int i = 0; i < LENGTH; i += D_SPECIES.length()) { + DoubleVector av = DoubleVector.fromArray(D_SPECIES, da, i); + DoubleVector bv = DoubleVector.fromArray(D_SPECIES, db, i); + DoubleVector cv = DoubleVector.fromArray(D_SPECIES, dc, i); + av.neg().lanewise(VectorOperators.FMA, bv, cv, mask).intoArray(dr, i); + } + assertArrayEqualsNeg(dr, da, db, dc, m, (a, b, c) -> (double) Math.fma(-a, b, c)); + } + @Test @IR(counts = { IRNode.VFNMAD_MASKED, ">= 1" }) public static void testDoubleNegatedMultiplyAddMasked() { @@ -392,6 +451,19 @@ public class VectorFusedMultiplyAddSubTest { assertArrayEquals(dr, da, db, dc, m, (a, b, c) -> (double) Math.fma(a, -b, -c)); } + @Test + @IR(counts = { IRNode.VFNMSB_MASKED, ">= 1" }) + public static void testDoubleNegatedMultiplyNegAMasked() { + VectorMask mask = VectorMask.fromArray(D_SPECIES, m, 0); + for (int i = 0; i < LENGTH; i += D_SPECIES.length()) { + DoubleVector av = DoubleVector.fromArray(D_SPECIES, da, i); + DoubleVector bv = DoubleVector.fromArray(D_SPECIES, db, i); + DoubleVector cv = DoubleVector.fromArray(D_SPECIES, dc, i); + av.neg().lanewise(VectorOperators.FMA, bv, cv.neg(), mask).intoArray(dr, i); + } + assertArrayEqualsNeg(dr, da, db, dc, m, (a, b, c) -> (double) Math.fma(-a, b, -c)); + } + @Test @IR(counts = { IRNode.VFNMSB_MASKED, ">= 1" }) public static void testDoubleNegatedMultiplySubMasked() { @@ -406,7 +478,9 @@ public class VectorFusedMultiplyAddSubTest { } public static void main(String[] args) { - TestFramework.runWithFlags("--add-modules=jdk.incubator.vector", - "-XX:UseSVE=1"); + TestFramework testFramework = new TestFramework(); + testFramework.setDefaultWarmup(5000) + .addFlags("--add-modules=jdk.incubator.vector", "-XX:UseSVE=1") + .start(); } } diff --git a/test/hotspot/jtreg/compiler/vectorization/runner/BasicDoubleOpTest.java b/test/hotspot/jtreg/compiler/vectorization/runner/BasicDoubleOpTest.java index 9bb0c670c55..60dc8a5e148 100644 --- a/test/hotspot/jtreg/compiler/vectorization/runner/BasicDoubleOpTest.java +++ b/test/hotspot/jtreg/compiler/vectorization/runner/BasicDoubleOpTest.java @@ -197,7 +197,7 @@ public class BasicDoubleOpTest extends VectorizationTestRunner { @Test @IR(applyIfCPUFeature = {"asimd", "true"}, - counts = {IRNode.FMA_V, ">0"}) + counts = {IRNode.FMA_V, ">0", IRNode.VFMLA, ">0"}) @IR(applyIfCPUFeatureAnd = {"fma", "true", "avx", "true"}, counts = {IRNode.FMA_V, ">0"}) public double[] vectorMulAdd() { @@ -210,7 +210,7 @@ public class BasicDoubleOpTest extends VectorizationTestRunner { @Test @IR(applyIfCPUFeature = {"asimd", "true"}, - counts = {IRNode.FMA_V, ">0"}) + counts = {IRNode.FMA_V, ">0", IRNode.VFMLS, ">0"}) @IR(applyIfCPUFeatureAnd = {"fma", "true", "avx", "true"}, counts = {IRNode.FMA_V, ">0"}) public double[] vectorMulSub1() { @@ -223,7 +223,7 @@ public class BasicDoubleOpTest extends VectorizationTestRunner { @Test @IR(applyIfCPUFeature = {"asimd", "true"}, - counts = {IRNode.FMA_V, ">0"}) + counts = {IRNode.FMA_V, ">0", IRNode.VFMLS, ">0"}) @IR(applyIfCPUFeatureAnd = {"fma", "true", "avx", "true"}, counts = {IRNode.FMA_V, ">0"}) public double[] vectorMulSub2() { @@ -237,6 +237,8 @@ public class BasicDoubleOpTest extends VectorizationTestRunner { @Test @IR(applyIfCPUFeature = {"asimd", "true"}, counts = {IRNode.FMA_V, ">0"}) + @IR(applyIfCPUFeature = {"sve", "true"}, + counts = {IRNode.VFNMLA, ">0"}) @IR(applyIfCPUFeatureAnd = {"fma", "true", "avx", "true"}, counts = {IRNode.FMA_V, ">0"}) public double[] vectorNegateMulAdd1() { @@ -250,6 +252,8 @@ public class BasicDoubleOpTest extends VectorizationTestRunner { @Test @IR(applyIfCPUFeature = {"asimd", "true"}, counts = {IRNode.FMA_V, ">0"}) + @IR(applyIfCPUFeature = {"sve", "true"}, + counts = {IRNode.VFNMLA, ">0"}) @IR(applyIfCPUFeatureAnd = {"fma", "true", "avx", "true"}, counts = {IRNode.FMA_V, ">0"}) public double[] vectorNegateMulAdd2() { diff --git a/test/hotspot/jtreg/compiler/vectorization/runner/BasicFloatOpTest.java b/test/hotspot/jtreg/compiler/vectorization/runner/BasicFloatOpTest.java index a07fcfa9ca2..b52fb5a1364 100644 --- a/test/hotspot/jtreg/compiler/vectorization/runner/BasicFloatOpTest.java +++ b/test/hotspot/jtreg/compiler/vectorization/runner/BasicFloatOpTest.java @@ -164,7 +164,7 @@ public class BasicFloatOpTest extends VectorizationTestRunner { @Test @IR(applyIfCPUFeature = {"asimd", "true"}, - counts = {IRNode.FMA_V, ">0"}) + counts = {IRNode.FMA_V, ">0", IRNode.VFMLA, ">0"}) @IR(applyIfCPUFeatureAnd = {"fma", "true", "avx", "true"}, counts = {IRNode.FMA_V, ">0"}) public float[] vectorMulAdd() { @@ -177,7 +177,7 @@ public class BasicFloatOpTest extends VectorizationTestRunner { @Test @IR(applyIfCPUFeature = {"asimd", "true"}, - counts = {IRNode.FMA_V, ">0"}) + counts = {IRNode.FMA_V, ">0", IRNode.VFMLS, ">0"}) @IR(applyIfCPUFeatureAnd = {"fma", "true", "avx", "true"}, counts = {IRNode.FMA_V, ">0"}) public float[] vectorMulSub1() { @@ -190,7 +190,7 @@ public class BasicFloatOpTest extends VectorizationTestRunner { @Test @IR(applyIfCPUFeature = {"asimd", "true"}, - counts = {IRNode.FMA_V, ">0"}) + counts = {IRNode.FMA_V, ">0", IRNode.VFMLS, ">0"}) @IR(applyIfCPUFeatureAnd = {"fma", "true", "avx", "true"}, counts = {IRNode.FMA_V, ">0"}) public float[] vectorMulSub2() { @@ -204,6 +204,8 @@ public class BasicFloatOpTest extends VectorizationTestRunner { @Test @IR(applyIfCPUFeature = {"asimd", "true"}, counts = {IRNode.FMA_V, ">0"}) + @IR(applyIfCPUFeature = {"sve", "true"}, + counts = {IRNode.VFNMLA, ">0"}) @IR(applyIfCPUFeatureAnd = {"fma", "true", "avx", "true"}, counts = {IRNode.FMA_V, ">0"}) public float[] vectorNegateMulAdd1() { @@ -217,6 +219,8 @@ public class BasicFloatOpTest extends VectorizationTestRunner { @Test @IR(applyIfCPUFeature = {"asimd", "true"}, counts = {IRNode.FMA_V, ">0"}) + @IR(applyIfCPUFeature = {"sve", "true"}, + counts = {IRNode.VFNMLA, ">0"}) @IR(applyIfCPUFeatureAnd = {"fma", "true", "avx", "true"}, counts = {IRNode.FMA_V, ">0"}) public float[] vectorNegateMulAdd2() {