8282431: AArch64: Add optimized rules for masked vector multiply-add/sub for SVE

Reviewed-by: njian, ngasson
This commit is contained in:
Xiaohong Gong 2022-04-01 09:33:46 +00:00 committed by Ningsheng Jian
parent 9156c0b213
commit 05ea7a51e1
6 changed files with 973 additions and 108 deletions

@ -1544,10 +1544,10 @@ instruct vfmlaD(vReg dst_src1, vReg src2, vReg src3) %{
ins_pipe(pipe_slow);
%}
// vector fmla - predicated
// vector fmad - predicated
// dst_src1 = dst_src1 * src2 + src3
instruct vfmlaF_masked(vReg dst_src1, vReg src2, vReg src3, pRegGov pg) %{
instruct vfmadF_masked(vReg dst_src1, vReg src2, vReg src3, pRegGov pg) %{
predicate(UseFMA && UseSVE > 0);
match(Set dst_src1 (FmaVF (Binary dst_src1 src2) (Binary src3 pg)));
ins_cost(SVE_COST);
@ -1560,7 +1560,7 @@ instruct vfmlaF_masked(vReg dst_src1, vReg src2, vReg src3, pRegGov pg) %{
%}
// dst_src1 = dst_src1 * src2 + src3
instruct vfmlaD_masked(vReg dst_src1, vReg src2, vReg src3, pRegGov pg) %{
instruct vfmadD_masked(vReg dst_src1, vReg src2, vReg src3, pRegGov pg) %{
predicate(UseFMA && UseSVE > 0);
match(Set dst_src1 (FmaVD (Binary dst_src1 src2) (Binary src3 pg)));
ins_cost(SVE_COST);
@ -1575,10 +1575,25 @@ instruct vfmlaD_masked(vReg dst_src1, vReg src2, vReg src3, pRegGov pg) %{
// vector fmls
// dst_src1 = dst_src1 + -src2 * src3
// dst_src1 = dst_src1 + src2 * -src3
instruct vfmlsF(vReg dst_src1, vReg src2, vReg src3) %{
predicate(UseFMA && UseSVE > 0);
// The NegVF must not be predicated.
instruct vfmlsF1(vReg dst_src1, vReg src2, vReg src3) %{
predicate(UseFMA && UseSVE > 0 &&
!n->in(2)->in(1)->as_Vector()->is_predicated_vector());
match(Set dst_src1 (FmaVF dst_src1 (Binary (NegVF src2) src3)));
ins_cost(SVE_COST);
format %{ "sve_fmls $dst_src1, $src2, $src3\t # vector (sve) (S)" %}
ins_encode %{
__ sve_fmls(as_FloatRegister($dst_src1$$reg), __ S,
ptrue, as_FloatRegister($src2$$reg), as_FloatRegister($src3$$reg));
%}
ins_pipe(pipe_slow);
%}
// dst_src1 = dst_src1 + src2 * -src3
// The NegVF must not be predicated.
instruct vfmlsF2(vReg dst_src1, vReg src2, vReg src3) %{
predicate(UseFMA && UseSVE > 0 &&
!n->in(2)->in(2)->as_Vector()->is_predicated_vector());
match(Set dst_src1 (FmaVF dst_src1 (Binary src2 (NegVF src3))));
ins_cost(SVE_COST);
format %{ "sve_fmls $dst_src1, $src2, $src3\t # vector (sve) (S)" %}
@ -1590,10 +1605,25 @@ instruct vfmlsF(vReg dst_src1, vReg src2, vReg src3) %{
%}
// dst_src1 = dst_src1 + -src2 * src3
// dst_src1 = dst_src1 + src2 * -src3
instruct vfmlsD(vReg dst_src1, vReg src2, vReg src3) %{
predicate(UseFMA && UseSVE > 0);
// The NegVD must not be predicated.
instruct vfmlsD1(vReg dst_src1, vReg src2, vReg src3) %{
predicate(UseFMA && UseSVE > 0 &&
!n->in(2)->in(1)->as_Vector()->is_predicated_vector());
match(Set dst_src1 (FmaVD dst_src1 (Binary (NegVD src2) src3)));
ins_cost(SVE_COST);
format %{ "sve_fmls $dst_src1, $src2, $src3\t # vector (sve) (D)" %}
ins_encode %{
__ sve_fmls(as_FloatRegister($dst_src1$$reg), __ D,
ptrue, as_FloatRegister($src2$$reg), as_FloatRegister($src3$$reg));
%}
ins_pipe(pipe_slow);
%}
// dst_src1 = dst_src1 + src2 * -src3
// The NegVD must not be predicated.
instruct vfmlsD2(vReg dst_src1, vReg src2, vReg src3) %{
predicate(UseFMA && UseSVE > 0 &&
!n->in(2)->in(2)->as_Vector()->is_predicated_vector());
match(Set dst_src1 (FmaVD dst_src1 (Binary src2 (NegVD src3))));
ins_cost(SVE_COST);
format %{ "sve_fmls $dst_src1, $src2, $src3\t # vector (sve) (D)" %}
@ -1604,13 +1634,62 @@ instruct vfmlsD(vReg dst_src1, vReg src2, vReg src3) %{
ins_pipe(pipe_slow);
%}
// vector fmsb - predicated
// dst_src1 = dst_src1 * -src2 + src3
// The NegVF must not be predicated.
instruct vfmsbF_masked(vReg dst_src1, vReg src2, vReg src3, pRegGov pg) %{
predicate(UseFMA && UseSVE > 0 &&
!n->in(1)->in(2)->as_Vector()->is_predicated_vector());
match(Set dst_src1 (FmaVF (Binary dst_src1 (NegVF src2)) (Binary src3 pg)));
ins_cost(SVE_COST);
format %{ "sve_fmsb $dst_src1, $pg, $src2, $src3\t # vector (sve) (S)" %}
ins_encode %{
__ sve_fmsb(as_FloatRegister($dst_src1$$reg), __ S, as_PRegister($pg$$reg),
as_FloatRegister($src2$$reg), as_FloatRegister($src3$$reg));
%}
ins_pipe(pipe_slow);
%}
// dst_src1 = dst_src1 * -src2 + src3
// The NegVD must not be predicated.
instruct vfmsbD_masked(vReg dst_src1, vReg src2, vReg src3, pRegGov pg) %{
predicate(UseFMA && UseSVE > 0 &&
!n->in(1)->in(2)->as_Vector()->is_predicated_vector());
match(Set dst_src1 (FmaVD (Binary dst_src1 (NegVD src2)) (Binary src3 pg)));
ins_cost(SVE_COST);
format %{ "sve_fmsb $dst_src1, $pg, $src2, $src3\t # vector (sve) (D)" %}
ins_encode %{
__ sve_fmsb(as_FloatRegister($dst_src1$$reg), __ D, as_PRegister($pg$$reg),
as_FloatRegister($src2$$reg), as_FloatRegister($src3$$reg));
%}
ins_pipe(pipe_slow);
%}
// vector fnmla
// dst_src1 = -dst_src1 + -src2 * src3
// dst_src1 = -dst_src1 + src2 * -src3
instruct vfnmlaF(vReg dst_src1, vReg src2, vReg src3) %{
predicate(UseFMA && UseSVE > 0);
// The NegVF must not be predicated.
instruct vfnmlaF1(vReg dst_src1, vReg src2, vReg src3) %{
predicate(UseFMA && UseSVE > 0 &&
!n->in(1)->as_Vector()->is_predicated_vector() &&
!n->in(2)->in(1)->as_Vector()->is_predicated_vector());
match(Set dst_src1 (FmaVF (NegVF dst_src1) (Binary (NegVF src2) src3)));
ins_cost(SVE_COST);
format %{ "sve_fnmla $dst_src1, $src2, $src3\t # vector (sve) (S)" %}
ins_encode %{
__ sve_fnmla(as_FloatRegister($dst_src1$$reg), __ S,
ptrue, as_FloatRegister($src2$$reg), as_FloatRegister($src3$$reg));
%}
ins_pipe(pipe_slow);
%}
// dst_src1 = -dst_src1 + src2 * -src3
// The NegVF must not be predicated.
instruct vfnmlaF2(vReg dst_src1, vReg src2, vReg src3) %{
predicate(UseFMA && UseSVE > 0 &&
!n->in(1)->as_Vector()->is_predicated_vector() &&
!n->in(2)->in(2)->as_Vector()->is_predicated_vector());
match(Set dst_src1 (FmaVF (NegVF dst_src1) (Binary src2 (NegVF src3))));
ins_cost(SVE_COST);
format %{ "sve_fnmla $dst_src1, $src2, $src3\t # vector (sve) (S)" %}
@ -1622,10 +1701,27 @@ instruct vfnmlaF(vReg dst_src1, vReg src2, vReg src3) %{
%}
// dst_src1 = -dst_src1 + -src2 * src3
// dst_src1 = -dst_src1 + src2 * -src3
instruct vfnmlaD(vReg dst_src1, vReg src2, vReg src3) %{
predicate(UseFMA && UseSVE > 0);
// The NegVD must not be predicated.
instruct vfnmlaD1(vReg dst_src1, vReg src2, vReg src3) %{
predicate(UseFMA && UseSVE > 0 &&
!n->in(1)->as_Vector()->is_predicated_vector() &&
!n->in(2)->in(1)->as_Vector()->is_predicated_vector());
match(Set dst_src1 (FmaVD (NegVD dst_src1) (Binary (NegVD src2) src3)));
ins_cost(SVE_COST);
format %{ "sve_fnmla $dst_src1, $src2, $src3\t # vector (sve) (D)" %}
ins_encode %{
__ sve_fnmla(as_FloatRegister($dst_src1$$reg), __ D,
ptrue, as_FloatRegister($src2$$reg), as_FloatRegister($src3$$reg));
%}
ins_pipe(pipe_slow);
%}
// dst_src1 = -dst_src1 + src2 * -src3
// The NegVD must not be predicated.
instruct vfnmlaD2(vReg dst_src1, vReg src2, vReg src3) %{
predicate(UseFMA && UseSVE > 0 &&
!n->in(1)->as_Vector()->is_predicated_vector() &&
!n->in(2)->in(2)->as_Vector()->is_predicated_vector());
match(Set dst_src1 (FmaVD (NegVD dst_src1) (Binary src2 (NegVD src3))));
ins_cost(SVE_COST);
format %{ "sve_fnmla $dst_src1, $src2, $src3\t # vector (sve) (D)" %}
@ -1636,11 +1732,47 @@ instruct vfnmlaD(vReg dst_src1, vReg src2, vReg src3) %{
ins_pipe(pipe_slow);
%}
// vector fnmad - predicated
// dst_src1 = -src3 + dst_src1 * -src2
// The NegVF must not be predicated.
instruct vfnmadF_masked(vReg dst_src1, vReg src2, vReg src3, pRegGov pg) %{
predicate(UseFMA && UseSVE > 0 &&
!n->in(1)->in(2)->as_Vector()->is_predicated_vector() &&
!n->in(2)->in(1)->as_Vector()->is_predicated_vector());
match(Set dst_src1 (FmaVF (Binary dst_src1 (NegVF src2)) (Binary (NegVF src3) pg)));
ins_cost(SVE_COST);
format %{ "sve_fnmad $dst_src1, $pg, $src2, $src3\t # vector (sve) (S)" %}
ins_encode %{
__ sve_fnmad(as_FloatRegister($dst_src1$$reg), __ S, as_PRegister($pg$$reg),
as_FloatRegister($src2$$reg), as_FloatRegister($src3$$reg));
%}
ins_pipe(pipe_slow);
%}
// dst_src1 = -src3 + dst_src1 * -src2
// The NegVD must not be predicated.
instruct vfnmadD_masked(vReg dst_src1, vReg src2, vReg src3, pRegGov pg) %{
predicate(UseFMA && UseSVE > 0 &&
!n->in(1)->in(2)->as_Vector()->is_predicated_vector() &&
!n->in(2)->in(1)->as_Vector()->is_predicated_vector());
match(Set dst_src1 (FmaVD (Binary dst_src1 (NegVD src2)) (Binary (NegVD src3) pg)));
ins_cost(SVE_COST);
format %{ "sve_fnmad $dst_src1, $pg, $src2, $src3\t # vector (sve) (D)" %}
ins_encode %{
__ sve_fnmad(as_FloatRegister($dst_src1$$reg), __ D, as_PRegister($pg$$reg),
as_FloatRegister($src2$$reg), as_FloatRegister($src3$$reg));
%}
ins_pipe(pipe_slow);
%}
// vector fnmls
// dst_src1 = -dst_src1 + src2 * src3
// The NegVF must not be predicated.
instruct vfnmlsF(vReg dst_src1, vReg src2, vReg src3) %{
predicate(UseFMA && UseSVE > 0);
predicate(UseFMA && UseSVE > 0 &&
!n->in(1)->as_Vector()->is_predicated_vector());
match(Set dst_src1 (FmaVF (NegVF dst_src1) (Binary src2 src3)));
ins_cost(SVE_COST);
format %{ "sve_fnmls $dst_src1, $src2, $src3\t # vector (sve) (S)" %}
@ -1652,8 +1784,10 @@ instruct vfnmlsF(vReg dst_src1, vReg src2, vReg src3) %{
%}
// dst_src1 = -dst_src1 + src2 * src3
// The NegVD must not be predicated.
instruct vfnmlsD(vReg dst_src1, vReg src2, vReg src3) %{
predicate(UseFMA && UseSVE > 0);
predicate(UseFMA && UseSVE > 0 &&
!n->in(1)->as_Vector()->is_predicated_vector());
match(Set dst_src1 (FmaVD (NegVD dst_src1) (Binary src2 src3)));
ins_cost(SVE_COST);
format %{ "sve_fnmls $dst_src1, $src2, $src3\t # vector (sve) (D)" %}
@ -1664,6 +1798,38 @@ instruct vfnmlsD(vReg dst_src1, vReg src2, vReg src3) %{
ins_pipe(pipe_slow);
%}
// vector fnmsb - predicated
// dst_src1 = -src3 + dst_src1 * src2
// The NegVF must not be predicated.
instruct vfnmsbF_masked(vReg dst_src1, vReg src2, vReg src3, pRegGov pg) %{
predicate(UseFMA && UseSVE > 0 &&
!n->in(2)->in(1)->as_Vector()->is_predicated_vector());
match(Set dst_src1 (FmaVF (Binary dst_src1 src2) (Binary (NegVF src3) pg)));
ins_cost(SVE_COST);
format %{ "sve_fnmsb $dst_src1, $pg, $src2, $src3\t # vector (sve) (S)" %}
ins_encode %{
__ sve_fnmsb(as_FloatRegister($dst_src1$$reg), __ S, as_PRegister($pg$$reg),
as_FloatRegister($src2$$reg), as_FloatRegister($src3$$reg));
%}
ins_pipe(pipe_slow);
%}
// dst_src1 = -src3 + dst_src1 * src2
// The NegVD must not be predicated.
instruct vfnmsbD_masked(vReg dst_src1, vReg src2, vReg src3, pRegGov pg) %{
predicate(UseFMA && UseSVE > 0 &&
!n->in(2)->in(1)->as_Vector()->is_predicated_vector());
match(Set dst_src1 (FmaVD (Binary dst_src1 src2) (Binary (NegVD src3) pg)));
ins_cost(SVE_COST);
format %{ "sve_fnmsb $dst_src1, $pg, $src2, $src3\t # vector (sve) (D)" %}
ins_encode %{
__ sve_fnmsb(as_FloatRegister($dst_src1$$reg), __ D, as_PRegister($pg$$reg),
as_FloatRegister($src2$$reg), as_FloatRegister($src3$$reg));
%}
ins_pipe(pipe_slow);
%}
// vector mla
// dst_src1 = dst_src1 + src2 * src3
@ -1722,6 +1888,64 @@ instruct vmlaL(vReg dst_src1, vReg src2, vReg src3)
ins_pipe(pipe_slow);
%}
// vector mla - predicated
// dst_src1 = dst_src1 + src2 * src3
instruct vmlaB_masked(vReg dst_src1, vReg src2, vReg src3, pRegGov pg)
%{
predicate(UseSVE > 0);
match(Set dst_src1 (AddVB (Binary dst_src1 (MulVB src2 src3)) pg));
ins_cost(SVE_COST);
format %{ "sve_mla $dst_src1, $pg, src2, src3\t # vector (sve) (B)" %}
ins_encode %{
__ sve_mla(as_FloatRegister($dst_src1$$reg), __ B, as_PRegister($pg$$reg),
as_FloatRegister($src2$$reg), as_FloatRegister($src3$$reg));
%}
ins_pipe(pipe_slow);
%}
// dst_src1 = dst_src1 + src2 * src3
instruct vmlaS_masked(vReg dst_src1, vReg src2, vReg src3, pRegGov pg)
%{
predicate(UseSVE > 0);
match(Set dst_src1 (AddVS (Binary dst_src1 (MulVS src2 src3)) pg));
ins_cost(SVE_COST);
format %{ "sve_mla $dst_src1, $pg, src2, src3\t # vector (sve) (H)" %}
ins_encode %{
__ sve_mla(as_FloatRegister($dst_src1$$reg), __ H, as_PRegister($pg$$reg),
as_FloatRegister($src2$$reg), as_FloatRegister($src3$$reg));
%}
ins_pipe(pipe_slow);
%}
// dst_src1 = dst_src1 + src2 * src3
instruct vmlaI_masked(vReg dst_src1, vReg src2, vReg src3, pRegGov pg)
%{
predicate(UseSVE > 0);
match(Set dst_src1 (AddVI (Binary dst_src1 (MulVI src2 src3)) pg));
ins_cost(SVE_COST);
format %{ "sve_mla $dst_src1, $pg, src2, src3\t # vector (sve) (S)" %}
ins_encode %{
__ sve_mla(as_FloatRegister($dst_src1$$reg), __ S, as_PRegister($pg$$reg),
as_FloatRegister($src2$$reg), as_FloatRegister($src3$$reg));
%}
ins_pipe(pipe_slow);
%}
// dst_src1 = dst_src1 + src2 * src3
instruct vmlaL_masked(vReg dst_src1, vReg src2, vReg src3, pRegGov pg)
%{
predicate(UseSVE > 0);
match(Set dst_src1 (AddVL (Binary dst_src1 (MulVL src2 src3)) pg));
ins_cost(SVE_COST);
format %{ "sve_mla $dst_src1, $pg, src2, src3\t # vector (sve) (D)" %}
ins_encode %{
__ sve_mla(as_FloatRegister($dst_src1$$reg), __ D, as_PRegister($pg$$reg),
as_FloatRegister($src2$$reg), as_FloatRegister($src3$$reg));
%}
ins_pipe(pipe_slow);
%}
// vector mls
// dst_src1 = dst_src1 - src2 * src3
@ -1780,6 +2004,64 @@ instruct vmlsL(vReg dst_src1, vReg src2, vReg src3)
ins_pipe(pipe_slow);
%}
// vector mls - predicated
// dst_src1 = dst_src1 - src2 * src3
instruct vmlsB_masked(vReg dst_src1, vReg src2, vReg src3, pRegGov pg)
%{
predicate(UseSVE > 0);
match(Set dst_src1 (SubVB (Binary dst_src1 (MulVB src2 src3)) pg));
ins_cost(SVE_COST);
format %{ "sve_mls $dst_src1, $pg, src2, src3\t # vector (sve) (B)" %}
ins_encode %{
__ sve_mls(as_FloatRegister($dst_src1$$reg), __ B, as_PRegister($pg$$reg),
as_FloatRegister($src2$$reg), as_FloatRegister($src3$$reg));
%}
ins_pipe(pipe_slow);
%}
// dst_src1 = dst_src1 - src2 * src3
instruct vmlsS_masked(vReg dst_src1, vReg src2, vReg src3, pRegGov pg)
%{
predicate(UseSVE > 0);
match(Set dst_src1 (SubVS (Binary dst_src1 (MulVS src2 src3)) pg));
ins_cost(SVE_COST);
format %{ "sve_mls $dst_src1, $pg, src2, src3\t # vector (sve) (H)" %}
ins_encode %{
__ sve_mls(as_FloatRegister($dst_src1$$reg), __ H, as_PRegister($pg$$reg),
as_FloatRegister($src2$$reg), as_FloatRegister($src3$$reg));
%}
ins_pipe(pipe_slow);
%}
// dst_src1 = dst_src1 - src2 * src3
instruct vmlsI_masked(vReg dst_src1, vReg src2, vReg src3, pRegGov pg)
%{
predicate(UseSVE > 0);
match(Set dst_src1 (SubVI (Binary dst_src1 (MulVI src2 src3)) pg));
ins_cost(SVE_COST);
format %{ "sve_mls $dst_src1, $pg, src2, src3\t # vector (sve) (S)" %}
ins_encode %{
__ sve_mls(as_FloatRegister($dst_src1$$reg), __ S, as_PRegister($pg$$reg),
as_FloatRegister($src2$$reg), as_FloatRegister($src3$$reg));
%}
ins_pipe(pipe_slow);
%}
// dst_src1 = dst_src1 - src2 * src3
instruct vmlsL_masked(vReg dst_src1, vReg src2, vReg src3, pRegGov pg)
%{
predicate(UseSVE > 0);
match(Set dst_src1 (SubVL (Binary dst_src1 (MulVL src2 src3)) pg));
ins_cost(SVE_COST);
format %{ "sve_mls $dst_src1, $pg, src2, src3\t # vector (sve) (D)" %}
ins_encode %{
__ sve_mls(as_FloatRegister($dst_src1$$reg), __ D, as_PRegister($pg$$reg),
as_FloatRegister($src2$$reg), as_FloatRegister($src3$$reg));
%}
ins_pipe(pipe_slow);
%}
// vector mul
instruct vmulB(vReg dst_src1, vReg src2) %{

@ -907,8 +907,8 @@ VMINMAX_PREDICATE(min, MinV, sve_fmin, sve_smin)
VMINMAX_PREDICATE(max, MaxV, sve_fmax, sve_smax)
dnl
dnl VFMLA($1 $2 $3 )
dnl VFMLA(name_suffix, size, min_vec_len)
dnl VFMLA($1 $2 )
dnl VFMLA(name_suffix, size)
define(`VFMLA', `
// dst_src1 = dst_src1 + src2 * src3
instruct vfmla$1(vReg dst_src1, vReg src2, vReg src3) %{
@ -924,15 +924,15 @@ instruct vfmla$1(vReg dst_src1, vReg src2, vReg src3) %{
%}')dnl
dnl
// vector fmla
VFMLA(F, S, 4)
VFMLA(D, D, 2)
VFMLA(F, S)
VFMLA(D, D)
dnl
dnl VFMLA_PREDICATE($1, $2 )
dnl VFMLA_PREDICATE(type, size)
define(`VFMLA_PREDICATE', `
dnl VFMAD_PREDICATE($1 $2 )
dnl VFMAD_PREDICATE(name_suffix, size)
define(`VFMAD_PREDICATE', `
// dst_src1 = dst_src1 * src2 + src3
instruct vfmla$1_masked(vReg dst_src1, vReg src2, vReg src3, pRegGov pg) %{
instruct vfmad$1_masked(vReg dst_src1, vReg src2, vReg src3, pRegGov pg) %{
predicate(UseFMA && UseSVE > 0);
match(Set dst_src1 (FmaV$1 (Binary dst_src1 src2) (Binary src3 pg)));
ins_cost(SVE_COST);
@ -944,19 +944,37 @@ instruct vfmla$1_masked(vReg dst_src1, vReg src2, vReg src3, pRegGov pg) %{
ins_pipe(pipe_slow);
%}')dnl
dnl
// vector fmla - predicated
VFMLA_PREDICATE(F, S)
VFMLA_PREDICATE(D, D)
// vector fmad - predicated
VFMAD_PREDICATE(F, S)
VFMAD_PREDICATE(D, D)
dnl
dnl VFMLS($1 $2 $3 )
dnl VFMLS(name_suffix, size, min_vec_len)
define(`VFMLS', `
dnl VFMLS1($1 $2 )
dnl VFMLS1(name_suffix, size)
define(`VFMLS1', `
// dst_src1 = dst_src1 + -src2 * src3
// dst_src1 = dst_src1 + src2 * -src3
instruct vfmls$1(vReg dst_src1, vReg src2, vReg src3) %{
predicate(UseFMA && UseSVE > 0);
// The NegV$1 must not be predicated.
instruct vfmls`$1'1(vReg dst_src1, vReg src2, vReg src3) %{
predicate(UseFMA && UseSVE > 0 &&
!n->in(2)->in(1)->as_Vector()->is_predicated_vector());
match(Set dst_src1 (FmaV$1 dst_src1 (Binary (NegV$1 src2) src3)));
ins_cost(SVE_COST);
format %{ "sve_fmls $dst_src1, $src2, $src3\t # vector (sve) ($2)" %}
ins_encode %{
__ sve_fmls(as_FloatRegister($dst_src1$$reg), __ $2,
ptrue, as_FloatRegister($src2$$reg), as_FloatRegister($src3$$reg));
%}
ins_pipe(pipe_slow);
%}')dnl
dnl
dnl VFMLS2($1 $2 )
dnl VFMLS2(name_suffix, size)
define(`VFMLS2', `
// dst_src1 = dst_src1 + src2 * -src3
// The NegV$1 must not be predicated.
instruct vfmls`$1'2(vReg dst_src1, vReg src2, vReg src3) %{
predicate(UseFMA && UseSVE > 0 &&
!n->in(2)->in(2)->as_Vector()->is_predicated_vector());
match(Set dst_src1 (FmaV$1 dst_src1 (Binary src2 (NegV$1 src3))));
ins_cost(SVE_COST);
format %{ "sve_fmls $dst_src1, $src2, $src3\t # vector (sve) ($2)" %}
@ -968,18 +986,63 @@ instruct vfmls$1(vReg dst_src1, vReg src2, vReg src3) %{
%}')dnl
dnl
// vector fmls
VFMLS(F, S, 4)
VFMLS(D, D, 2)
VFMLS1(F, S)
VFMLS2(F, S)
VFMLS1(D, D)
VFMLS2(D, D)
dnl
dnl VFNMLA($1 $2 $3 )
dnl VFNMLA(name_suffix, size, min_vec_len)
define(`VFNMLA', `
dnl VFMSB_PREDICATE($1 $2 )
dnl VFMSB_PREDICATE(name_suffix, size)
define(`VFMSB_PREDICATE', `
// dst_src1 = dst_src1 * -src2 + src3
// The NegV$1 must not be predicated.
instruct vfmsb$1_masked(vReg dst_src1, vReg src2, vReg src3, pRegGov pg) %{
predicate(UseFMA && UseSVE > 0 &&
!n->in(1)->in(2)->as_Vector()->is_predicated_vector());
match(Set dst_src1 (FmaV$1 (Binary dst_src1 (NegV$1 src2)) (Binary src3 pg)));
ins_cost(SVE_COST);
format %{ "sve_fmsb $dst_src1, $pg, $src2, $src3\t # vector (sve) ($2)" %}
ins_encode %{
__ sve_fmsb(as_FloatRegister($dst_src1$$reg), __ $2, as_PRegister($pg$$reg),
as_FloatRegister($src2$$reg), as_FloatRegister($src3$$reg));
%}
ins_pipe(pipe_slow);
%}')dnl
dnl
// vector fmsb - predicated
VFMSB_PREDICATE(F, S)
VFMSB_PREDICATE(D, D)
dnl
dnl VFNMLA1($1 $2 )
dnl VFNMLA1(name_suffix, size)
define(`VFNMLA1', `
// dst_src1 = -dst_src1 + -src2 * src3
// dst_src1 = -dst_src1 + src2 * -src3
instruct vfnmla$1(vReg dst_src1, vReg src2, vReg src3) %{
predicate(UseFMA && UseSVE > 0);
// The NegV$1 must not be predicated.
instruct vfnmla`$1'1(vReg dst_src1, vReg src2, vReg src3) %{
predicate(UseFMA && UseSVE > 0 &&
!n->in(1)->as_Vector()->is_predicated_vector() &&
!n->in(2)->in(1)->as_Vector()->is_predicated_vector());
match(Set dst_src1 (FmaV$1 (NegV$1 dst_src1) (Binary (NegV$1 src2) src3)));
ins_cost(SVE_COST);
format %{ "sve_fnmla $dst_src1, $src2, $src3\t # vector (sve) ($2)" %}
ins_encode %{
__ sve_fnmla(as_FloatRegister($dst_src1$$reg), __ $2,
ptrue, as_FloatRegister($src2$$reg), as_FloatRegister($src3$$reg));
%}
ins_pipe(pipe_slow);
%}')dnl
dnl
dnl VFNMLA2($1 $2 )
dnl VFNMLA2(name_suffix, size)
define(`VFNMLA2', `
// dst_src1 = -dst_src1 + src2 * -src3
// The NegV$1 must not be predicated.
instruct vfnmla`$1'2(vReg dst_src1, vReg src2, vReg src3) %{
predicate(UseFMA && UseSVE > 0 &&
!n->in(1)->as_Vector()->is_predicated_vector() &&
!n->in(2)->in(2)->as_Vector()->is_predicated_vector());
match(Set dst_src1 (FmaV$1 (NegV$1 dst_src1) (Binary src2 (NegV$1 src3))));
ins_cost(SVE_COST);
format %{ "sve_fnmla $dst_src1, $src2, $src3\t # vector (sve) ($2)" %}
@ -991,16 +1054,44 @@ instruct vfnmla$1(vReg dst_src1, vReg src2, vReg src3) %{
%}')dnl
dnl
// vector fnmla
VFNMLA(F, S, 4)
VFNMLA(D, D, 2)
VFNMLA1(F, S)
VFNMLA2(F, S)
VFNMLA1(D, D)
VFNMLA2(D, D)
dnl
dnl VFNMLS($1 $2 $3 )
dnl VFNMLS(name_suffix, size, min_vec_len)
dnl VFNMAD_PREDICATE($1 $2 )
dnl VFNMAD_PREDICATE(name_suffix, size)
define(`VFNMAD_PREDICATE', `
// dst_src1 = -src3 + dst_src1 * -src2
// The NegV$1 must not be predicated.
instruct vfnmad$1_masked(vReg dst_src1, vReg src2, vReg src3, pRegGov pg) %{
predicate(UseFMA && UseSVE > 0 &&
!n->in(1)->in(2)->as_Vector()->is_predicated_vector() &&
!n->in(2)->in(1)->as_Vector()->is_predicated_vector());
match(Set dst_src1 (FmaV$1 (Binary dst_src1 (NegV$1 src2)) (Binary (NegV$1 src3) pg)));
ins_cost(SVE_COST);
format %{ "sve_fnmad $dst_src1, $pg, $src2, $src3\t # vector (sve) ($2)" %}
ins_encode %{
__ sve_fnmad(as_FloatRegister($dst_src1$$reg), __ $2, as_PRegister($pg$$reg),
as_FloatRegister($src2$$reg), as_FloatRegister($src3$$reg));
%}
ins_pipe(pipe_slow);
%}')dnl
dnl
// vector fnmad - predicated
VFNMAD_PREDICATE(F, S)
VFNMAD_PREDICATE(D, D)
dnl
dnl VFNMLS($1 $2 )
dnl VFNMLS(name_suffix, size)
define(`VFNMLS', `
// dst_src1 = -dst_src1 + src2 * src3
// The NegV$1 must not be predicated.
instruct vfnmls$1(vReg dst_src1, vReg src2, vReg src3) %{
predicate(UseFMA && UseSVE > 0);
predicate(UseFMA && UseSVE > 0 &&
!n->in(1)->as_Vector()->is_predicated_vector());
match(Set dst_src1 (FmaV$1 (NegV$1 dst_src1) (Binary src2 src3)));
ins_cost(SVE_COST);
format %{ "sve_fnmls $dst_src1, $src2, $src3\t # vector (sve) ($2)" %}
@ -1012,12 +1103,35 @@ instruct vfnmls$1(vReg dst_src1, vReg src2, vReg src3) %{
%}')dnl
dnl
// vector fnmls
VFNMLS(F, S, 4)
VFNMLS(D, D, 2)
VFNMLS(F, S)
VFNMLS(D, D)
dnl
dnl VMLA($1 $2 $3 )
dnl VMLA(name_suffix, size, min_vec_len)
dnl VFNMSB_PREDICATE($1 $2 )
dnl VFNMSB_PREDICATE(name_suffix, size)
define(`VFNMSB_PREDICATE', `
// dst_src1 = -src3 + dst_src1 * src2
// The NegV$1 must not be predicated.
instruct vfnmsb$1_masked(vReg dst_src1, vReg src2, vReg src3, pRegGov pg) %{
predicate(UseFMA && UseSVE > 0 &&
!n->in(2)->in(1)->as_Vector()->is_predicated_vector());
match(Set dst_src1 (FmaV$1 (Binary dst_src1 src2) (Binary (NegV$1 src3) pg)));
ins_cost(SVE_COST);
format %{ "sve_fnmsb $dst_src1, $pg, $src2, $src3\t # vector (sve) ($2)" %}
ins_encode %{
__ sve_fnmsb(as_FloatRegister($dst_src1$$reg), __ $2, as_PRegister($pg$$reg),
as_FloatRegister($src2$$reg), as_FloatRegister($src3$$reg));
%}
ins_pipe(pipe_slow);
%}')dnl
dnl
// vector fnmsb - predicated
VFNMSB_PREDICATE(F, S)
VFNMSB_PREDICATE(D, D)
dnl
dnl VMLA($1 $2 )
dnl VMLA(name_suffix, size)
define(`VMLA', `
// dst_src1 = dst_src1 + src2 * src3
instruct vmla$1(vReg dst_src1, vReg src2, vReg src3)
@ -1034,14 +1148,38 @@ instruct vmla$1(vReg dst_src1, vReg src2, vReg src3)
%}')dnl
dnl
// vector mla
VMLA(B, B, 16)
VMLA(S, H, 8)
VMLA(I, S, 4)
VMLA(L, D, 2)
VMLA(B, B)
VMLA(S, H)
VMLA(I, S)
VMLA(L, D)
dnl
dnl VMLS($1 $2 $3 )
dnl VMLS(name_suffix, size, min_vec_len)
dnl VMLA_PREDICATE($1 $2 )
dnl VMLA_PREDICATE(name_suffix, size)
define(`VMLA_PREDICATE', `
// dst_src1 = dst_src1 + src2 * src3
instruct vmla$1_masked(vReg dst_src1, vReg src2, vReg src3, pRegGov pg)
%{
predicate(UseSVE > 0);
match(Set dst_src1 (AddV$1 (Binary dst_src1 (MulV$1 src2 src3)) pg));
ins_cost(SVE_COST);
format %{ "sve_mla $dst_src1, $pg, src2, src3\t # vector (sve) ($2)" %}
ins_encode %{
__ sve_mla(as_FloatRegister($dst_src1$$reg), __ $2, as_PRegister($pg$$reg),
as_FloatRegister($src2$$reg), as_FloatRegister($src3$$reg));
%}
ins_pipe(pipe_slow);
%}')dnl
dnl
// vector mla - predicated
VMLA_PREDICATE(B, B)
VMLA_PREDICATE(S, H)
VMLA_PREDICATE(I, S)
VMLA_PREDICATE(L, D)
dnl
dnl VMLS($1 $2 )
dnl VMLS(name_suffix, size)
define(`VMLS', `
// dst_src1 = dst_src1 - src2 * src3
instruct vmls$1(vReg dst_src1, vReg src2, vReg src3)
@ -1058,10 +1196,34 @@ instruct vmls$1(vReg dst_src1, vReg src2, vReg src3)
%}')dnl
dnl
// vector mls
VMLS(B, B, 16)
VMLS(S, H, 8)
VMLS(I, S, 4)
VMLS(L, D, 2)
VMLS(B, B)
VMLS(S, H)
VMLS(I, S)
VMLS(L, D)
dnl
dnl VMLS_PREDICATE($1 $2 )
dnl VMLS_PREDICATE(name_suffix, size)
define(`VMLS_PREDICATE', `
// dst_src1 = dst_src1 - src2 * src3
instruct vmls$1_masked(vReg dst_src1, vReg src2, vReg src3, pRegGov pg)
%{
predicate(UseSVE > 0);
match(Set dst_src1 (SubV$1 (Binary dst_src1 (MulV$1 src2 src3)) pg));
ins_cost(SVE_COST);
format %{ "sve_mls $dst_src1, $pg, src2, src3\t # vector (sve) ($2)" %}
ins_encode %{
__ sve_mls(as_FloatRegister($dst_src1$$reg), __ $2, as_PRegister($pg$$reg),
as_FloatRegister($src2$$reg), as_FloatRegister($src3$$reg));
%}
ins_pipe(pipe_slow);
%}')dnl
dnl
// vector mls - predicated
VMLS_PREDICATE(B, B)
VMLS_PREDICATE(S, H)
VMLS_PREDICATE(I, S)
VMLS_PREDICATE(L, D)
dnl
dnl BINARY_OP_TRUE_PREDICATE($1, $2, $3, $4, $5 )

@ -3173,8 +3173,11 @@ public:
INSN(sve_fnmla, 0b01100101, 1, 0b010); // floating-point negated fused multiply-add: Zda = -Zda + -Zn * Zm
INSN(sve_fnmls, 0b01100101, 1, 0b011); // floating-point negated fused multiply-subtract: Zda = -Zda + Zn * Zm
INSN(sve_fmad, 0b01100101, 1, 0b100); // floating-point fused multiply-add, writing multiplicand: Zda = Zm + Zda * Zn
INSN(sve_mla, 0b00000100, 0, 0b010); // multiply-add: Zda = Zda + Zn*Zm
INSN(sve_mls, 0b00000100, 0, 0b011); // multiply-subtract: Zda = Zda + -Zn*Zm
INSN(sve_fmsb, 0b01100101, 1, 0b101); // floating-point fused multiply-subtract, writing multiplicand: Zda = Zm + -Zda * Zn
INSN(sve_fnmad, 0b01100101, 1, 0b110); // floating-point negated fused multiply-add, writing multiplicand: Zda = -Zm + -Zda * Zn
INSN(sve_fnmsb, 0b01100101, 1, 0b111); // floating-point negated fused multiply-subtract, writing multiplicand: Zda = -Zm + Zda * Zn
INSN(sve_mla, 0b00000100, 0, 0b010); // multiply-add, writing addend: Zda = Zda + Zn*Zm
INSN(sve_mls, 0b00000100, 0, 0b011); // multiply-subtract, writing addend: Zda = Zda + -Zn*Zm
#undef INSN
// SVE bitwise logical - unpredicated

@ -1849,6 +1849,9 @@ generate(SVEVectorOp, [["add", "ZZZ"],
["fmad", "ZPZZ", "m"],
["fmla", "ZPZZ", "m"],
["fmls", "ZPZZ", "m"],
["fmsb", "ZPZZ", "m"],
["fnmad", "ZPZZ", "m"],
["fnmsb", "ZPZZ", "m"],
["fnmla", "ZPZZ", "m"],
["fnmls", "ZPZZ", "m"],
["mla", "ZPZZ", "m"],

@ -1153,27 +1153,30 @@
__ sve_fmad(z17, __ D, p2, z16, z17); // fmad z17.d, p2/m, z16.d, z17.d
__ sve_fmla(z0, __ S, p1, z2, z23); // fmla z0.s, p1/m, z2.s, z23.s
__ sve_fmls(z6, __ D, p2, z20, z14); // fmls z6.d, p2/m, z20.d, z14.d
__ sve_fnmla(z29, __ D, p3, z3, z3); // fnmla z29.d, p3/m, z3.d, z3.d
__ sve_fnmls(z9, __ S, p0, z24, z27); // fnmls z9.s, p0/m, z24.s, z27.s
__ sve_mla(z19, __ S, p5, z7, z25); // mla z19.s, p5/m, z7.s, z25.s
__ sve_mls(z13, __ B, p1, z7, z25); // mls z13.b, p1/m, z7.b, z25.b
__ sve_and(z21, z17, z17); // and z21.d, z17.d, z17.d
__ sve_eor(z3, z9, z19); // eor z3.d, z9.d, z19.d
__ sve_orr(z7, z11, z14); // orr z7.d, z11.d, z14.d
__ sve_bic(z17, z11, z13); // bic z17.d, z11.d, z13.d
__ sve_uzp1(z17, __ H, z30, z17); // uzp1 z17.h, z30.h, z17.h
__ sve_uzp2(z15, __ S, z14, z26); // uzp2 z15.s, z14.s, z26.s
__ sve_fmsb(z29, __ D, p3, z3, z3); // fmsb z29.d, p3/m, z3.d, z3.d
__ sve_fnmad(z9, __ S, p0, z24, z27); // fnmad z9.s, p0/m, z24.s, z27.s
__ sve_fnmsb(z19, __ D, p5, z7, z25); // fnmsb z19.d, p5/m, z7.d, z25.d
__ sve_fnmla(z13, __ S, p1, z7, z25); // fnmla z13.s, p1/m, z7.s, z25.s
__ sve_fnmls(z21, __ S, p4, z17, z0); // fnmls z21.s, p4/m, z17.s, z0.s
__ sve_mla(z9, __ H, p5, z11, z7); // mla z9.h, p5/m, z11.h, z7.h
__ sve_mls(z14, __ H, p4, z17, z11); // mls z14.h, p4/m, z17.h, z11.h
__ sve_and(z24, z17, z30); // and z24.d, z17.d, z30.d
__ sve_eor(z8, z15, z14); // eor z8.d, z15.d, z14.d
__ sve_orr(z22, z27, z22); // orr z22.d, z27.d, z22.d
__ sve_bic(z8, z5, z27); // bic z8.d, z5.d, z27.d
__ sve_uzp1(z10, __ D, z0, z14); // uzp1 z10.d, z0.d, z14.d
__ sve_uzp2(z21, __ B, z20, z0); // uzp2 z21.b, z20.b, z0.b
// SVEReductionOp
__ sve_andv(v27, __ H, p5, z7); // andv h27, p5, z7.h
__ sve_orv(v5, __ H, p7, z27); // orv h5, p7, z27.h
__ sve_eorv(v0, __ S, p3, z24); // eorv s0, p3, z24.s
__ sve_smaxv(v20, __ S, p0, z3); // smaxv s20, p0, z3.s
__ sve_sminv(v25, __ D, p1, z25); // sminv d25, p1, z25.d
__ sve_fminv(v17, __ S, p4, z1); // fminv s17, p4, z1.s
__ sve_fmaxv(v14, __ S, p7, z13); // fmaxv s14, p7, z13.s
__ sve_fadda(v17, __ D, p0, z30); // fadda d17, p0, d17, z30.d
__ sve_uaddv(v22, __ H, p5, z29); // uaddv d22, p5, z29.h
__ sve_andv(v22, __ D, p6, z5); // andv d22, p6, z5.d
__ sve_orv(v29, __ B, p4, z17); // orv b29, p4, z17.b
__ sve_eorv(v12, __ H, p3, z29); // eorv h12, p3, z29.h
__ sve_smaxv(v0, __ D, p4, z2); // smaxv d0, p4, z2.d
__ sve_sminv(v20, __ D, p5, z21); // sminv d20, p5, z21.d
__ sve_fminv(v12, __ S, p2, z2); // fminv s12, p2, z2.s
__ sve_fmaxv(v14, __ S, p5, z22); // fmaxv s14, p5, z22.s
__ sve_fadda(v19, __ D, p6, z26); // fadda d19, p6, d19, z26.d
__ sve_uaddv(v12, __ B, p5, z21); // uaddv d12, p5, z21.b
__ bind(forth);
@ -1192,30 +1195,30 @@
0x9101a1a0, 0xb10a5cc8, 0xd10810aa, 0xf10fd061,
0x120cb166, 0x321764bc, 0x52174681, 0x720c0227,
0x9241018e, 0xb25a2969, 0xd278b411, 0xf26aad01,
0x14000000, 0x17ffffd7, 0x140003d2, 0x94000000,
0x97ffffd4, 0x940003cf, 0x3400000a, 0x34fffa2a,
0x3400798a, 0x35000008, 0x35fff9c8, 0x35007928,
0xb400000b, 0xb4fff96b, 0xb40078cb, 0xb500001d,
0xb5fff91d, 0xb500787d, 0x10000013, 0x10fff8b3,
0x10007813, 0x90000013, 0x36300016, 0x3637f836,
0x36307796, 0x3758000c, 0x375ff7cc, 0x3758772c,
0x14000000, 0x17ffffd7, 0x140003d5, 0x94000000,
0x97ffffd4, 0x940003d2, 0x3400000a, 0x34fffa2a,
0x340079ea, 0x35000008, 0x35fff9c8, 0x35007988,
0xb400000b, 0xb4fff96b, 0xb400792b, 0xb500001d,
0xb5fff91d, 0xb50078dd, 0x10000013, 0x10fff8b3,
0x10007873, 0x90000013, 0x36300016, 0x3637f836,
0x363077f6, 0x3758000c, 0x375ff7cc, 0x3758778c,
0x128313a0, 0x528a32c7, 0x7289173b, 0x92ab3acc,
0xd2a0bf94, 0xf2c285e8, 0x9358722f, 0x330e652f,
0x53067f3b, 0x93577c53, 0xb34a1aac, 0xd35a4016,
0x13946c63, 0x93c3dbc8, 0x54000000, 0x54fff5a0,
0x54007500, 0x54000001, 0x54fff541, 0x540074a1,
0x54000002, 0x54fff4e2, 0x54007442, 0x54000002,
0x54fff482, 0x540073e2, 0x54000003, 0x54fff423,
0x54007383, 0x54000003, 0x54fff3c3, 0x54007323,
0x54000004, 0x54fff364, 0x540072c4, 0x54000005,
0x54fff305, 0x54007265, 0x54000006, 0x54fff2a6,
0x54007206, 0x54000007, 0x54fff247, 0x540071a7,
0x54000008, 0x54fff1e8, 0x54007148, 0x54000009,
0x54fff189, 0x540070e9, 0x5400000a, 0x54fff12a,
0x5400708a, 0x5400000b, 0x54fff0cb, 0x5400702b,
0x5400000c, 0x54fff06c, 0x54006fcc, 0x5400000d,
0x54fff00d, 0x54006f6d, 0x5400000e, 0x54ffefae,
0x54006f0e, 0x5400000f, 0x54ffef4f, 0x54006eaf,
0x54007560, 0x54000001, 0x54fff541, 0x54007501,
0x54000002, 0x54fff4e2, 0x540074a2, 0x54000002,
0x54fff482, 0x54007442, 0x54000003, 0x54fff423,
0x540073e3, 0x54000003, 0x54fff3c3, 0x54007383,
0x54000004, 0x54fff364, 0x54007324, 0x54000005,
0x54fff305, 0x540072c5, 0x54000006, 0x54fff2a6,
0x54007266, 0x54000007, 0x54fff247, 0x54007207,
0x54000008, 0x54fff1e8, 0x540071a8, 0x54000009,
0x54fff189, 0x54007149, 0x5400000a, 0x54fff12a,
0x540070ea, 0x5400000b, 0x54fff0cb, 0x5400708b,
0x5400000c, 0x54fff06c, 0x5400702c, 0x5400000d,
0x54fff00d, 0x54006fcd, 0x5400000e, 0x54ffefae,
0x54006f6e, 0x5400000f, 0x54ffef4f, 0x54006f0f,
0xd40658e1, 0xd4014d22, 0xd4046543, 0xd4273f60,
0xd44cad80, 0xd503201f, 0xd503203f, 0xd503205f,
0xd503209f, 0xd50320bf, 0xd503219f, 0xd50323bf,
@ -1432,11 +1435,11 @@
0x65cd85ef, 0x65c68145, 0x6587801a, 0x65c29d53,
0x04ddb4e3, 0x6582aebc, 0x65c0ae3a, 0x65c1ac51,
0x658db690, 0x65c18033, 0x65f18a11, 0x65b70440,
0x65ee2a86, 0x65e34c7d, 0x65bb6309, 0x049954f3,
0x041964ed, 0x04313235, 0x04b33123, 0x046e3167,
0x04ed3171, 0x05716bd1, 0x05ba6dcf, 0x045a34fb,
0x04583f65, 0x04992f00, 0x04882074, 0x04ca2739,
0x65873031, 0x65863dae, 0x65d823d1, 0x044137b6,
0x65ee2a86, 0x65e3ac7d, 0x65bbc309, 0x65f9f4f3,
0x65b944ed, 0x65a07235, 0x04475569, 0x044b722e,
0x043e3238, 0x04ae31e8, 0x04763376, 0x04fb30a8,
0x05ee680a, 0x05206e95, 0x04da38b6, 0x0418323d,
0x04592fac, 0x04c83040, 0x04ca36b4, 0x6587284c,
0x658636ce, 0x65d83b53, 0x040136ac,
};
// END Generated code -- do not edit

@ -0,0 +1,412 @@
/*
* Copyright (c) 2022, Arm Limited. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License version 2 only, as
* published by the Free Software Foundation.
*
* This code is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
* version 2 for more details (a copy is included in the LICENSE file that
* accompanied this code).
*
* You should have received a copy of the GNU General Public License version
* 2 along with this work; if not, write to the Free Software Foundation,
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
*
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
* or visit www.oracle.com if you need additional information or have any
* questions.
*/
package compiler.vectorapi;
import compiler.lib.ir_framework.*;
import java.util.Random;
import jdk.incubator.vector.ByteVector;
import jdk.incubator.vector.DoubleVector;
import jdk.incubator.vector.FloatVector;
import jdk.incubator.vector.IntVector;
import jdk.incubator.vector.LongVector;
import jdk.incubator.vector.ShortVector;
import jdk.incubator.vector.VectorMask;
import jdk.incubator.vector.VectorOperators;
import jdk.incubator.vector.VectorSpecies;
import jdk.test.lib.Asserts;
import jdk.test.lib.Utils;
/**
* @test
* @bug 8282431
* @key randomness
* @library /test/lib /
* @requires vm.cpu.features ~= ".*sve.*"
* @summary AArch64: Add optimized rules for masked vector multiply-add/sub for SVE
* @modules jdk.incubator.vector
*
* @run driver compiler.vectorapi.VectorFusedMultiplyAddSubTest
*/
public class VectorFusedMultiplyAddSubTest {
private static final VectorSpecies<Byte> B_SPECIES = ByteVector.SPECIES_MAX;
private static final VectorSpecies<Double> D_SPECIES = DoubleVector.SPECIES_MAX;
private static final VectorSpecies<Float> F_SPECIES = FloatVector.SPECIES_MAX;
private static final VectorSpecies<Integer> I_SPECIES = IntVector.SPECIES_MAX;
private static final VectorSpecies<Long> L_SPECIES = LongVector.SPECIES_MAX;
private static final VectorSpecies<Short> S_SPECIES = ShortVector.SPECIES_MAX;
private static int LENGTH = 1024;
private static final Random RD = Utils.getRandomInstance();
private static byte[] ba;
private static byte[] bb;
private static byte[] bc;
private static byte[] br;
private static short[] sa;
private static short[] sb;
private static short[] sc;
private static short[] sr;
private static int[] ia;
private static int[] ib;
private static int[] ic;
private static int[] ir;
private static long[] la;
private static long[] lb;
private static long[] lc;
private static long[] lr;
private static float[] fa;
private static float[] fb;
private static float[] fc;
private static float[] fr;
private static double[] da;
private static double[] db;
private static double[] dc;
private static double[] dr;
private static boolean[] m;
static {
ba = new byte[LENGTH];
bb = new byte[LENGTH];
bc = new byte[LENGTH];
br = new byte[LENGTH];
sa = new short[LENGTH];
sb = new short[LENGTH];
sc = new short[LENGTH];
sr = new short[LENGTH];
ia = new int[LENGTH];
ib = new int[LENGTH];
ic = new int[LENGTH];
ir = new int[LENGTH];
la = new long[LENGTH];
lb = new long[LENGTH];
lc = new long[LENGTH];
lr = new long[LENGTH];
fa = new float[LENGTH];
fb = new float[LENGTH];
fc = new float[LENGTH];
fr = new float[LENGTH];
da = new double[LENGTH];
db = new double[LENGTH];
dc = new double[LENGTH];
dr = new double[LENGTH];
m = new boolean[LENGTH];
for (int i = 0; i < LENGTH; i++) {
ba[i] = (byte) RD.nextInt(25);
bb[i] = (byte) RD.nextInt(25);
bc[i] = (byte) RD.nextInt(25);
sa[i] = (short) RD.nextInt(25);
sb[i] = (short) RD.nextInt(25);
sc[i] = (short) RD.nextInt(25);
ia[i] = RD.nextInt(25);
ib[i] = RD.nextInt(25);
ic[i] = RD.nextInt(25);
la[i] = RD.nextLong(25);
lb[i] = RD.nextLong(25);
lc[i] = RD.nextLong(25);
fa[i] = RD.nextFloat((float) 25.0);
fb[i] = RD.nextFloat((float) 25.0);
fc[i] = RD.nextFloat((float) 25.0);
da[i] = RD.nextDouble(25.0);
db[i] = RD.nextDouble(25.0);
dc[i] = RD.nextDouble(25.0);
m[i] = RD.nextBoolean();
}
}
interface BTenOp {
byte apply(byte a, byte b, byte c);
}
interface STenOp {
short apply(short a, short b, short c);
}
interface ITenOp {
int apply(int a, int b, int c);
}
interface LTenOp {
long apply(long a, long b, long c);
}
interface FTenOp {
float apply(float a, float b, float c);
}
interface DTenOp {
double apply(double a, double b, double c);
}
private static void assertArrayEquals(byte[] r, byte[] a, byte[] b, byte[] c, boolean[] m, BTenOp f) {
for (int i = 0; i < LENGTH; i++) {
if (m[i % B_SPECIES.length()]) {
Asserts.assertEquals(f.apply(a[i], b[i], c[i]), r[i]);
} else {
Asserts.assertEquals(a[i], r[i]);
}
}
}
private static void assertArrayEquals(short[] r, short[] a, short[] b, short[] c, boolean[] m, STenOp f) {
for (int i = 0; i < LENGTH; i++) {
if (m[i % S_SPECIES.length()]) {
Asserts.assertEquals(f.apply(a[i], b[i], c[i]), r[i]);
} else {
Asserts.assertEquals(a[i], r[i]);
}
}
}
private static void assertArrayEquals(int[] r, int[] a, int[] b, int[] c, boolean[] m, ITenOp f) {
for (int i = 0; i < LENGTH; i++) {
if (m[i % I_SPECIES.length()]) {
Asserts.assertEquals(f.apply(a[i], b[i], c[i]), r[i]);
} else {
Asserts.assertEquals(a[i], r[i]);
}
}
}
private static void assertArrayEquals(long[] r, long[] a, long[] b, long[] c, boolean[] m, LTenOp f) {
for (int i = 0; i < LENGTH; i++) {
if (m[i % L_SPECIES.length()]) {
Asserts.assertEquals(f.apply(a[i], b[i], c[i]), r[i]);
} else {
Asserts.assertEquals(a[i], r[i]);
}
}
}
private static void assertArrayEquals(float[] r, float[] a, float[] b, float[] c, boolean[] m, FTenOp f) {
for (int i = 0; i < LENGTH; i++) {
if (m[i % F_SPECIES.length()]) {
Asserts.assertEquals(f.apply(a[i], b[i], c[i]), r[i]);
} else {
Asserts.assertEquals(a[i], r[i]);
}
}
}
private static void assertArrayEquals(double[] r, double[] a, double[] b, double[] c, boolean[] m, DTenOp f) {
for (int i = 0; i < LENGTH; i++) {
if (m[i % D_SPECIES.length()]) {
Asserts.assertEquals(f.apply(a[i], b[i], c[i]), r[i]);
} else {
Asserts.assertEquals(a[i], r[i]);
}
}
}
@Test
@IR(counts = { "sve_mla", ">= 1" })
public static void testByteMultiplyAddMasked() {
VectorMask<Byte> mask = VectorMask.fromArray(B_SPECIES, m, 0);
for (int i = 0; i < LENGTH; i += B_SPECIES.length()) {
ByteVector av = ByteVector.fromArray(B_SPECIES, ba, i);
ByteVector bv = ByteVector.fromArray(B_SPECIES, bb, i);
ByteVector cv = ByteVector.fromArray(B_SPECIES, bc, i);
av.add(bv.mul(cv), mask).intoArray(br, i);
}
assertArrayEquals(br, ba, bb, bc, m, (a, b, c) -> (byte) (a + b * c));
}
@Test
@IR(counts = { "sve_mls", ">= 1" })
public static void testByteMultiplySubMasked() {
VectorMask<Byte> mask = VectorMask.fromArray(B_SPECIES, m, 0);
for (int i = 0; i < LENGTH; i += B_SPECIES.length()) {
ByteVector av = ByteVector.fromArray(B_SPECIES, ba, i);
ByteVector bv = ByteVector.fromArray(B_SPECIES, bb, i);
ByteVector cv = ByteVector.fromArray(B_SPECIES, bc, i);
av.sub(bv.mul(cv), mask).intoArray(br, i);
}
assertArrayEquals(br, ba, bb, bc, m, (a, b, c) -> (byte) (a - b * c));
}
@Test
@IR(counts = { "sve_mla", ">= 1" })
public static void testShortMultiplyAddMasked() {
VectorMask<Short> mask = VectorMask.fromArray(S_SPECIES, m, 0);
for (int i = 0; i < LENGTH; i += S_SPECIES.length()) {
ShortVector av = ShortVector.fromArray(S_SPECIES, sa, i);
ShortVector bv = ShortVector.fromArray(S_SPECIES, sb, i);
ShortVector cv = ShortVector.fromArray(S_SPECIES, sc, i);
av.add(bv.mul(cv), mask).intoArray(sr, i);
}
assertArrayEquals(sr, sa, sb, sc, m, (a, b, c) -> (short) (a + b * c));
}
@Test
@IR(counts = { "sve_mls", ">= 1" })
public static void testShortMultiplySubMasked() {
VectorMask<Short> mask = VectorMask.fromArray(S_SPECIES, m, 0);
for (int i = 0; i < LENGTH; i += S_SPECIES.length()) {
ShortVector av = ShortVector.fromArray(S_SPECIES, sa, i);
ShortVector bv = ShortVector.fromArray(S_SPECIES, sb, i);
ShortVector cv = ShortVector.fromArray(S_SPECIES, sc, i);
av.sub(bv.mul(cv), mask).intoArray(sr, i);
}
assertArrayEquals(sr, sa, sb, sc, m, (a, b, c) -> (short) (a - b * c));
}
@Test
@IR(counts = { "sve_mla", ">= 1" })
public static void testIntMultiplyAddMasked() {
VectorMask<Integer> mask = VectorMask.fromArray(I_SPECIES, m, 0);
for (int i = 0; i < LENGTH; i += I_SPECIES.length()) {
IntVector av = IntVector.fromArray(I_SPECIES, ia, i);
IntVector bv = IntVector.fromArray(I_SPECIES, ib, i);
IntVector cv = IntVector.fromArray(I_SPECIES, ic, i);
av.add(bv.mul(cv), mask).intoArray(ir, i);
}
assertArrayEquals(ir, ia, ib, ic, m, (a, b, c) -> (int) (a + b * c));
}
@Test
@IR(counts = { "sve_mls", ">= 1" })
public static void testIntMultiplySubMasked() {
VectorMask<Integer> mask = VectorMask.fromArray(I_SPECIES, m, 0);
for (int i = 0; i < LENGTH; i += I_SPECIES.length()) {
IntVector av = IntVector.fromArray(I_SPECIES, ia, i);
IntVector bv = IntVector.fromArray(I_SPECIES, ib, i);
IntVector cv = IntVector.fromArray(I_SPECIES, ic, i);
av.sub(bv.mul(cv), mask).intoArray(ir, i);
}
assertArrayEquals(ir, ia, ib, ic, m, (a, b, c) -> (int) (a - b * c));
}
@Test
@IR(counts = { "sve_mla", ">= 1" })
public static void testLongMultiplyAddMasked() {
VectorMask<Long> mask = VectorMask.fromArray(L_SPECIES, m, 0);
for (int i = 0; i < LENGTH; i += L_SPECIES.length()) {
LongVector av = LongVector.fromArray(L_SPECIES, la, i);
LongVector bv = LongVector.fromArray(L_SPECIES, lb, i);
LongVector cv = LongVector.fromArray(L_SPECIES, lc, i);
av.add(bv.mul(cv), mask).intoArray(lr, i);
}
assertArrayEquals(lr, la, lb, lc, m, (a, b, c) -> (long) (a + b * c));
}
@Test
@IR(counts = { "sve_mls", ">= 1" })
public static void testLongMultiplySubMasked() {
VectorMask<Long> mask = VectorMask.fromArray(L_SPECIES, m, 0);
for (int i = 0; i < LENGTH; i += L_SPECIES.length()) {
LongVector av = LongVector.fromArray(L_SPECIES, la, i);
LongVector bv = LongVector.fromArray(L_SPECIES, lb, i);
LongVector cv = LongVector.fromArray(L_SPECIES, lc, i);
av.sub(bv.mul(cv), mask).intoArray(lr, i);
}
assertArrayEquals(lr, la, lb, lc, m, (a, b, c) -> (long) (a - b * c));
}
@Test
@IR(counts = { "sve_fmsb", ">= 1" })
public static void testFloatMultiplySubMasked() {
VectorMask<Float> mask = VectorMask.fromArray(F_SPECIES, m, 0);
for (int i = 0; i < LENGTH; i += F_SPECIES.length()) {
FloatVector av = FloatVector.fromArray(F_SPECIES, fa, i);
FloatVector bv = FloatVector.fromArray(F_SPECIES, fb, i);
FloatVector cv = FloatVector.fromArray(F_SPECIES, fc, i);
av.lanewise(VectorOperators.FMA, bv.neg(), cv, mask).intoArray(fr, i);
}
assertArrayEquals(fr, fa, fb, fc, m, (a, b, c) -> (float) Math.fma(a, -b, c));
}
@Test
@IR(counts = { "sve_fnmad", ">= 1" })
public static void testFloatNegatedMultiplyAddMasked() {
VectorMask<Float> mask = VectorMask.fromArray(F_SPECIES, m, 0);
for (int i = 0; i < LENGTH; i += F_SPECIES.length()) {
FloatVector av = FloatVector.fromArray(F_SPECIES, fa, i);
FloatVector bv = FloatVector.fromArray(F_SPECIES, fb, i);
FloatVector cv = FloatVector.fromArray(F_SPECIES, fc, i);
av.lanewise(VectorOperators.FMA, bv.neg(), cv.neg(), mask).intoArray(fr, i);
}
assertArrayEquals(fr, fa, fb, fc, m, (a, b, c) -> (float) Math.fma(a, -b, -c));
}
@Test
@IR(counts = { "sve_fnmsb", ">= 1" })
public static void testFloatNegatedMultiplySubMasked() {
VectorMask<Float> mask = VectorMask.fromArray(F_SPECIES, m, 0);
for (int i = 0; i < LENGTH; i += F_SPECIES.length()) {
FloatVector av = FloatVector.fromArray(F_SPECIES, fa, i);
FloatVector bv = FloatVector.fromArray(F_SPECIES, fb, i);
FloatVector cv = FloatVector.fromArray(F_SPECIES, fc, i);
av.lanewise(VectorOperators.FMA, bv, cv.neg(), mask).intoArray(fr, i);
}
assertArrayEquals(fr, fa, fb, fc, m, (a, b, c) -> (float) Math.fma(a, b, -c));
}
@Test
@IR(counts = { "sve_fmsb", ">= 1" })
public static void testDoubleMultiplySubMasked() {
VectorMask<Double> mask = VectorMask.fromArray(D_SPECIES, m, 0);
for (int i = 0; i < LENGTH; i += D_SPECIES.length()) {
DoubleVector av = DoubleVector.fromArray(D_SPECIES, da, i);
DoubleVector bv = DoubleVector.fromArray(D_SPECIES, db, i);
DoubleVector cv = DoubleVector.fromArray(D_SPECIES, dc, i);
av.lanewise(VectorOperators.FMA, bv.neg(), cv, mask).intoArray(dr, i);
}
assertArrayEquals(dr, da, db, dc, m, (a, b, c) -> (double) Math.fma(a, -b, c));
}
@Test
@IR(counts = { "sve_fnmad", ">= 1" })
public static void testDoubleNegatedMultiplyAddMasked() {
VectorMask<Double> mask = VectorMask.fromArray(D_SPECIES, m, 0);
for (int i = 0; i < LENGTH; i += D_SPECIES.length()) {
DoubleVector av = DoubleVector.fromArray(D_SPECIES, da, i);
DoubleVector bv = DoubleVector.fromArray(D_SPECIES, db, i);
DoubleVector cv = DoubleVector.fromArray(D_SPECIES, dc, i);
av.lanewise(VectorOperators.FMA, bv.neg(), cv.neg(), mask).intoArray(dr, i);
}
assertArrayEquals(dr, da, db, dc, m, (a, b, c) -> (double) Math.fma(a, -b, -c));
}
@Test
@IR(counts = { "sve_fnmsb", ">= 1" })
public static void testDoubleNegatedMultiplySubMasked() {
VectorMask<Double> mask = VectorMask.fromArray(D_SPECIES, m, 0);
for (int i = 0; i < LENGTH; i += D_SPECIES.length()) {
DoubleVector av = DoubleVector.fromArray(D_SPECIES, da, i);
DoubleVector bv = DoubleVector.fromArray(D_SPECIES, db, i);
DoubleVector cv = DoubleVector.fromArray(D_SPECIES, dc, i);
av.lanewise(VectorOperators.FMA, bv, cv.neg(), mask).intoArray(dr, i);
}
assertArrayEquals(dr, da, db, dc, m, (a, b, c) -> (double) Math.fma(a, b, -c));
}
public static void main(String[] args) {
TestFramework.runWithFlags("--add-modules=jdk.incubator.vector",
"-XX:UseSVE=1");
}
}