8333964: RISC-V: C2: Check "requires_strict_order" flag for floating-point add reduction

Reviewed-by: fyang
This commit is contained in:
Gui Cao 2024-06-18 05:24:33 +00:00 committed by Fei Yang
parent ba5a4670b8
commit e95f092862
3 changed files with 55 additions and 13 deletions
src/hotspot/cpu/riscv
test/hotspot/jtreg/compiler

@ -2007,11 +2007,20 @@ instruct reduce_addL(iRegLNoSp dst, iRegL src1, vReg src2, vReg tmp) %{
ins_pipe(pipe_slow);
%}
instruct reduce_addF(fRegF dst, fRegF src1, vReg src2, vReg tmp) %{
// Distinguish two cases based on requires_strict_order
// 1. Non strictly-ordered AddReductionVF/D. For example, AddReductionVF/D
// generated by Vector API. It is more beneficial performance-wise to do
// an unordered FP reduction sum (vfredusum.vs).
// 2. Strictly-ordered AddReductionVF/D. For example, AddReductionVF/D
// generated by auto-vectorization. Must do an ordered FP reduction sum
// (vfredosum.vs).
instruct reduce_addF_ordered(fRegF dst, fRegF src1, vReg src2, vReg tmp) %{
predicate(n->as_Reduction()->requires_strict_order());
match(Set dst (AddReductionVF src1 src2));
effect(TEMP tmp);
ins_cost(VEC_COST);
format %{ "reduce_addF $dst, $src1, $src2\t# KILL $tmp" %}
format %{ "reduce_addF_ordered $dst, $src1, $src2\t# KILL $tmp" %}
ins_encode %{
__ vsetvli_helper(T_FLOAT, Matcher::vector_length(this, $src2));
__ vfmv_s_f(as_VectorRegister($tmp$$reg), $src1$$FloatRegister);
@ -2022,11 +2031,28 @@ instruct reduce_addF(fRegF dst, fRegF src1, vReg src2, vReg tmp) %{
ins_pipe(pipe_slow);
%}
instruct reduce_addD(fRegD dst, fRegD src1, vReg src2, vReg tmp) %{
instruct reduce_addF_unordered(fRegF dst, fRegF src1, vReg src2, vReg tmp) %{
predicate(!n->as_Reduction()->requires_strict_order());
match(Set dst (AddReductionVF src1 src2));
effect(TEMP tmp);
ins_cost(VEC_COST);
format %{ "reduce_addF_unordered $dst, $src1, $src2\t# KILL $tmp" %}
ins_encode %{
__ vsetvli_helper(T_FLOAT, Matcher::vector_length(this, $src2));
__ vfmv_s_f(as_VectorRegister($tmp$$reg), $src1$$FloatRegister);
__ vfredusum_vs(as_VectorRegister($tmp$$reg), as_VectorRegister($src2$$reg),
as_VectorRegister($tmp$$reg));
__ vfmv_f_s($dst$$FloatRegister, as_VectorRegister($tmp$$reg));
%}
ins_pipe(pipe_slow);
%}
instruct reduce_addD_ordered(fRegD dst, fRegD src1, vReg src2, vReg tmp) %{
predicate(n->as_Reduction()->requires_strict_order());
match(Set dst (AddReductionVD src1 src2));
effect(TEMP tmp);
ins_cost(VEC_COST);
format %{ "reduce_addD $dst, $src1, $src2\t# KILL $tmp" %}
format %{ "reduce_addD_ordered $dst, $src1, $src2\t# KILL $tmp" %}
ins_encode %{
__ vsetvli_helper(T_DOUBLE, Matcher::vector_length(this, $src2));
__ vfmv_s_f(as_VectorRegister($tmp$$reg), $src1$$FloatRegister);
@ -2037,6 +2063,22 @@ instruct reduce_addD(fRegD dst, fRegD src1, vReg src2, vReg tmp) %{
ins_pipe(pipe_slow);
%}
instruct reduce_addD_unordered(fRegD dst, fRegD src1, vReg src2, vReg tmp) %{
predicate(!n->as_Reduction()->requires_strict_order());
match(Set dst (AddReductionVD src1 src2));
effect(TEMP tmp);
ins_cost(VEC_COST);
format %{ "reduce_addD_unordered $dst, $src1, $src2\t# KILL $tmp" %}
ins_encode %{
__ vsetvli_helper(T_DOUBLE, Matcher::vector_length(this, $src2));
__ vfmv_s_f(as_VectorRegister($tmp$$reg), $src1$$FloatRegister);
__ vfredusum_vs(as_VectorRegister($tmp$$reg), as_VectorRegister($src2$$reg),
as_VectorRegister($tmp$$reg));
__ vfmv_f_s($dst$$FloatRegister, as_VectorRegister($tmp$$reg));
%}
ins_pipe(pipe_slow);
%}
// vector add reduction - predicated
instruct reduce_addI_masked(iRegINoSp dst, iRegIorL2I src1, vReg src2, vRegMask_V0 v0, vReg tmp) %{

@ -54,7 +54,7 @@ public class TestVectorFPReduction {
applyIfCPUFeatureAnd = {"asimd", "true", "sve", "false"})
@IR(counts = {"requires_strict_order", ">=1", IRNode.ADD_REDUCTION_VF, ">=1"},
failOn = {"no_strict_order"},
applyIfCPUFeatureOr = {"sve", "true", "sse2", "true"},
applyIfCPUFeatureOr = {"sve", "true", "sse2", "true", "rvv", "true"},
phase = CompilePhase.PRINT_IDEAL)
private static void testAddReductionVF() {
float result = 1;
@ -69,7 +69,7 @@ public class TestVectorFPReduction {
applyIfCPUFeatureAnd = {"asimd", "true", "sve", "false"})
@IR(counts = {"requires_strict_order", ">=1", IRNode.ADD_REDUCTION_VD, ">=1"},
failOn = {"no_strict_order"},
applyIfCPUFeatureOr = {"sve", "true", "sse2", "true"},
applyIfCPUFeatureOr = {"sve", "true", "sse2", "true", "rvv", "true"},
phase = CompilePhase.PRINT_IDEAL)
private static void testAddReductionVD() {
double result = 1;

@ -78,7 +78,7 @@ public class TestVectorAddMulReduction {
@Test
@IR(counts = {IRNode.ADD_REDUCTION_VF, ">=1", "no_strict_order", ">=1"},
failOn = {"requires_strict_order"},
applyIfCPUFeatureOr = {"asimd", "true", "sse2", "true"},
applyIfCPUFeatureOr = {"asimd", "true", "sse2", "true", "rvv", "true"},
applyIf = {"MaxVectorSize", ">=8"},
phase = CompilePhase.PRINT_IDEAL)
public static void testFloatAdd_64() {
@ -88,7 +88,7 @@ public class TestVectorAddMulReduction {
@Test
@IR(counts = {IRNode.ADD_REDUCTION_VF, ">=1", "no_strict_order", ">=1"},
failOn = {"requires_strict_order"},
applyIfCPUFeatureOr = {"asimd", "true", "sse2", "true"},
applyIfCPUFeatureOr = {"asimd", "true", "sse2", "true", "rvv", "true"},
applyIf = {"MaxVectorSize", ">=16"},
phase = CompilePhase.PRINT_IDEAL)
public static void testFloatAdd_128() {
@ -98,7 +98,7 @@ public class TestVectorAddMulReduction {
@Test
@IR(counts = {IRNode.ADD_REDUCTION_VF, ">=1", "no_strict_order", ">=1"},
failOn = {"requires_strict_order"},
applyIfCPUFeatureOr = {"asimd", "true", "sse2", "true"},
applyIfCPUFeatureOr = {"asimd", "true", "sse2", "true", "rvv", "true"},
applyIf = {"MaxVectorSize", ">=32"},
phase = CompilePhase.PRINT_IDEAL)
public static void testFloatAdd_256() {
@ -108,7 +108,7 @@ public class TestVectorAddMulReduction {
@Test
@IR(counts = {IRNode.ADD_REDUCTION_VF, ">=1", "no_strict_order", ">=1"},
failOn = {"requires_strict_order"},
applyIfCPUFeatureOr = {"asimd", "true", "sse2", "true"},
applyIfCPUFeatureOr = {"asimd", "true", "sse2", "true", "rvv", "true"},
applyIf = {"MaxVectorSize", ">=64"},
phase = CompilePhase.PRINT_IDEAL)
public static void testFloatAdd_512() {
@ -127,7 +127,7 @@ public class TestVectorAddMulReduction {
@Test
@IR(counts = {IRNode.ADD_REDUCTION_VD, ">=1", "no_strict_order", ">=1"},
failOn = {"requires_strict_order"},
applyIfCPUFeatureOr = {"asimd", "true", "sse2", "true"},
applyIfCPUFeatureOr = {"asimd", "true", "sse2", "true", "rvv", "true"},
applyIf = {"MaxVectorSize", ">=16"},
phase = CompilePhase.PRINT_IDEAL)
public static void testDoubleAdd_128() {
@ -137,7 +137,7 @@ public class TestVectorAddMulReduction {
@Test
@IR(counts = {IRNode.ADD_REDUCTION_VD, ">=1", "no_strict_order", ">=1"},
failOn = {"requires_strict_order"},
applyIfCPUFeatureOr = {"asimd", "true", "sse2", "true"},
applyIfCPUFeatureOr = {"asimd", "true", "sse2", "true", "rvv", "true"},
applyIf = {"MaxVectorSize", ">=32"},
phase = CompilePhase.PRINT_IDEAL)
public static void testDoubleAdd_256() {
@ -147,7 +147,7 @@ public class TestVectorAddMulReduction {
@Test
@IR(counts = {IRNode.ADD_REDUCTION_VD, ">=1", "no_strict_order", ">=1"},
failOn = {"requires_strict_order"},
applyIfCPUFeatureOr = {"asimd", "true", "sse2", "true"},
applyIfCPUFeatureOr = {"asimd", "true", "sse2", "true", "rvv", "true"},
applyIf = {"MaxVectorSize", ">=64"},
phase = CompilePhase.PRINT_IDEAL)
public static void testDoubleAdd_512() {