8333964: RISC-V: C2: Check "requires_strict_order" flag for floating-point add reduction
Reviewed-by: fyang
This commit is contained in:
parent
ba5a4670b8
commit
e95f092862
src/hotspot/cpu/riscv
test/hotspot/jtreg/compiler
@ -2007,11 +2007,20 @@ instruct reduce_addL(iRegLNoSp dst, iRegL src1, vReg src2, vReg tmp) %{
|
||||
ins_pipe(pipe_slow);
|
||||
%}
|
||||
|
||||
instruct reduce_addF(fRegF dst, fRegF src1, vReg src2, vReg tmp) %{
|
||||
// Distinguish two cases based on requires_strict_order
|
||||
// 1. Non strictly-ordered AddReductionVF/D. For example, AddReductionVF/D
|
||||
// generated by Vector API. It is more beneficial performance-wise to do
|
||||
// an unordered FP reduction sum (vfredusum.vs).
|
||||
// 2. Strictly-ordered AddReductionVF/D. For example, AddReductionVF/D
|
||||
// generated by auto-vectorization. Must do an ordered FP reduction sum
|
||||
// (vfredosum.vs).
|
||||
|
||||
instruct reduce_addF_ordered(fRegF dst, fRegF src1, vReg src2, vReg tmp) %{
|
||||
predicate(n->as_Reduction()->requires_strict_order());
|
||||
match(Set dst (AddReductionVF src1 src2));
|
||||
effect(TEMP tmp);
|
||||
ins_cost(VEC_COST);
|
||||
format %{ "reduce_addF $dst, $src1, $src2\t# KILL $tmp" %}
|
||||
format %{ "reduce_addF_ordered $dst, $src1, $src2\t# KILL $tmp" %}
|
||||
ins_encode %{
|
||||
__ vsetvli_helper(T_FLOAT, Matcher::vector_length(this, $src2));
|
||||
__ vfmv_s_f(as_VectorRegister($tmp$$reg), $src1$$FloatRegister);
|
||||
@ -2022,11 +2031,28 @@ instruct reduce_addF(fRegF dst, fRegF src1, vReg src2, vReg tmp) %{
|
||||
ins_pipe(pipe_slow);
|
||||
%}
|
||||
|
||||
instruct reduce_addD(fRegD dst, fRegD src1, vReg src2, vReg tmp) %{
|
||||
instruct reduce_addF_unordered(fRegF dst, fRegF src1, vReg src2, vReg tmp) %{
|
||||
predicate(!n->as_Reduction()->requires_strict_order());
|
||||
match(Set dst (AddReductionVF src1 src2));
|
||||
effect(TEMP tmp);
|
||||
ins_cost(VEC_COST);
|
||||
format %{ "reduce_addF_unordered $dst, $src1, $src2\t# KILL $tmp" %}
|
||||
ins_encode %{
|
||||
__ vsetvli_helper(T_FLOAT, Matcher::vector_length(this, $src2));
|
||||
__ vfmv_s_f(as_VectorRegister($tmp$$reg), $src1$$FloatRegister);
|
||||
__ vfredusum_vs(as_VectorRegister($tmp$$reg), as_VectorRegister($src2$$reg),
|
||||
as_VectorRegister($tmp$$reg));
|
||||
__ vfmv_f_s($dst$$FloatRegister, as_VectorRegister($tmp$$reg));
|
||||
%}
|
||||
ins_pipe(pipe_slow);
|
||||
%}
|
||||
|
||||
instruct reduce_addD_ordered(fRegD dst, fRegD src1, vReg src2, vReg tmp) %{
|
||||
predicate(n->as_Reduction()->requires_strict_order());
|
||||
match(Set dst (AddReductionVD src1 src2));
|
||||
effect(TEMP tmp);
|
||||
ins_cost(VEC_COST);
|
||||
format %{ "reduce_addD $dst, $src1, $src2\t# KILL $tmp" %}
|
||||
format %{ "reduce_addD_ordered $dst, $src1, $src2\t# KILL $tmp" %}
|
||||
ins_encode %{
|
||||
__ vsetvli_helper(T_DOUBLE, Matcher::vector_length(this, $src2));
|
||||
__ vfmv_s_f(as_VectorRegister($tmp$$reg), $src1$$FloatRegister);
|
||||
@ -2037,6 +2063,22 @@ instruct reduce_addD(fRegD dst, fRegD src1, vReg src2, vReg tmp) %{
|
||||
ins_pipe(pipe_slow);
|
||||
%}
|
||||
|
||||
instruct reduce_addD_unordered(fRegD dst, fRegD src1, vReg src2, vReg tmp) %{
|
||||
predicate(!n->as_Reduction()->requires_strict_order());
|
||||
match(Set dst (AddReductionVD src1 src2));
|
||||
effect(TEMP tmp);
|
||||
ins_cost(VEC_COST);
|
||||
format %{ "reduce_addD_unordered $dst, $src1, $src2\t# KILL $tmp" %}
|
||||
ins_encode %{
|
||||
__ vsetvli_helper(T_DOUBLE, Matcher::vector_length(this, $src2));
|
||||
__ vfmv_s_f(as_VectorRegister($tmp$$reg), $src1$$FloatRegister);
|
||||
__ vfredusum_vs(as_VectorRegister($tmp$$reg), as_VectorRegister($src2$$reg),
|
||||
as_VectorRegister($tmp$$reg));
|
||||
__ vfmv_f_s($dst$$FloatRegister, as_VectorRegister($tmp$$reg));
|
||||
%}
|
||||
ins_pipe(pipe_slow);
|
||||
%}
|
||||
|
||||
// vector add reduction - predicated
|
||||
|
||||
instruct reduce_addI_masked(iRegINoSp dst, iRegIorL2I src1, vReg src2, vRegMask_V0 v0, vReg tmp) %{
|
||||
|
@ -54,7 +54,7 @@ public class TestVectorFPReduction {
|
||||
applyIfCPUFeatureAnd = {"asimd", "true", "sve", "false"})
|
||||
@IR(counts = {"requires_strict_order", ">=1", IRNode.ADD_REDUCTION_VF, ">=1"},
|
||||
failOn = {"no_strict_order"},
|
||||
applyIfCPUFeatureOr = {"sve", "true", "sse2", "true"},
|
||||
applyIfCPUFeatureOr = {"sve", "true", "sse2", "true", "rvv", "true"},
|
||||
phase = CompilePhase.PRINT_IDEAL)
|
||||
private static void testAddReductionVF() {
|
||||
float result = 1;
|
||||
@ -69,7 +69,7 @@ public class TestVectorFPReduction {
|
||||
applyIfCPUFeatureAnd = {"asimd", "true", "sve", "false"})
|
||||
@IR(counts = {"requires_strict_order", ">=1", IRNode.ADD_REDUCTION_VD, ">=1"},
|
||||
failOn = {"no_strict_order"},
|
||||
applyIfCPUFeatureOr = {"sve", "true", "sse2", "true"},
|
||||
applyIfCPUFeatureOr = {"sve", "true", "sse2", "true", "rvv", "true"},
|
||||
phase = CompilePhase.PRINT_IDEAL)
|
||||
private static void testAddReductionVD() {
|
||||
double result = 1;
|
||||
|
@ -78,7 +78,7 @@ public class TestVectorAddMulReduction {
|
||||
@Test
|
||||
@IR(counts = {IRNode.ADD_REDUCTION_VF, ">=1", "no_strict_order", ">=1"},
|
||||
failOn = {"requires_strict_order"},
|
||||
applyIfCPUFeatureOr = {"asimd", "true", "sse2", "true"},
|
||||
applyIfCPUFeatureOr = {"asimd", "true", "sse2", "true", "rvv", "true"},
|
||||
applyIf = {"MaxVectorSize", ">=8"},
|
||||
phase = CompilePhase.PRINT_IDEAL)
|
||||
public static void testFloatAdd_64() {
|
||||
@ -88,7 +88,7 @@ public class TestVectorAddMulReduction {
|
||||
@Test
|
||||
@IR(counts = {IRNode.ADD_REDUCTION_VF, ">=1", "no_strict_order", ">=1"},
|
||||
failOn = {"requires_strict_order"},
|
||||
applyIfCPUFeatureOr = {"asimd", "true", "sse2", "true"},
|
||||
applyIfCPUFeatureOr = {"asimd", "true", "sse2", "true", "rvv", "true"},
|
||||
applyIf = {"MaxVectorSize", ">=16"},
|
||||
phase = CompilePhase.PRINT_IDEAL)
|
||||
public static void testFloatAdd_128() {
|
||||
@ -98,7 +98,7 @@ public class TestVectorAddMulReduction {
|
||||
@Test
|
||||
@IR(counts = {IRNode.ADD_REDUCTION_VF, ">=1", "no_strict_order", ">=1"},
|
||||
failOn = {"requires_strict_order"},
|
||||
applyIfCPUFeatureOr = {"asimd", "true", "sse2", "true"},
|
||||
applyIfCPUFeatureOr = {"asimd", "true", "sse2", "true", "rvv", "true"},
|
||||
applyIf = {"MaxVectorSize", ">=32"},
|
||||
phase = CompilePhase.PRINT_IDEAL)
|
||||
public static void testFloatAdd_256() {
|
||||
@ -108,7 +108,7 @@ public class TestVectorAddMulReduction {
|
||||
@Test
|
||||
@IR(counts = {IRNode.ADD_REDUCTION_VF, ">=1", "no_strict_order", ">=1"},
|
||||
failOn = {"requires_strict_order"},
|
||||
applyIfCPUFeatureOr = {"asimd", "true", "sse2", "true"},
|
||||
applyIfCPUFeatureOr = {"asimd", "true", "sse2", "true", "rvv", "true"},
|
||||
applyIf = {"MaxVectorSize", ">=64"},
|
||||
phase = CompilePhase.PRINT_IDEAL)
|
||||
public static void testFloatAdd_512() {
|
||||
@ -127,7 +127,7 @@ public class TestVectorAddMulReduction {
|
||||
@Test
|
||||
@IR(counts = {IRNode.ADD_REDUCTION_VD, ">=1", "no_strict_order", ">=1"},
|
||||
failOn = {"requires_strict_order"},
|
||||
applyIfCPUFeatureOr = {"asimd", "true", "sse2", "true"},
|
||||
applyIfCPUFeatureOr = {"asimd", "true", "sse2", "true", "rvv", "true"},
|
||||
applyIf = {"MaxVectorSize", ">=16"},
|
||||
phase = CompilePhase.PRINT_IDEAL)
|
||||
public static void testDoubleAdd_128() {
|
||||
@ -137,7 +137,7 @@ public class TestVectorAddMulReduction {
|
||||
@Test
|
||||
@IR(counts = {IRNode.ADD_REDUCTION_VD, ">=1", "no_strict_order", ">=1"},
|
||||
failOn = {"requires_strict_order"},
|
||||
applyIfCPUFeatureOr = {"asimd", "true", "sse2", "true"},
|
||||
applyIfCPUFeatureOr = {"asimd", "true", "sse2", "true", "rvv", "true"},
|
||||
applyIf = {"MaxVectorSize", ">=32"},
|
||||
phase = CompilePhase.PRINT_IDEAL)
|
||||
public static void testDoubleAdd_256() {
|
||||
@ -147,7 +147,7 @@ public class TestVectorAddMulReduction {
|
||||
@Test
|
||||
@IR(counts = {IRNode.ADD_REDUCTION_VD, ">=1", "no_strict_order", ">=1"},
|
||||
failOn = {"requires_strict_order"},
|
||||
applyIfCPUFeatureOr = {"asimd", "true", "sse2", "true"},
|
||||
applyIfCPUFeatureOr = {"asimd", "true", "sse2", "true", "rvv", "true"},
|
||||
applyIf = {"MaxVectorSize", ">=64"},
|
||||
phase = CompilePhase.PRINT_IDEAL)
|
||||
public static void testDoubleAdd_512() {
|
||||
|
Loading…
x
Reference in New Issue
Block a user