8275275: AArch64: Fix performance regression after auto-vectorization on NEON

Reviewed-by: aph, xgong
This commit is contained in:
Fei Gao 2022-09-13 03:13:38 +00:00 committed by Pengfei Li
parent cbee0bc9ef
commit ec2629c052
5 changed files with 462 additions and 26 deletions
src/hotspot/cpu/aarch64
test/hotspot/jtreg/compiler

@ -127,11 +127,14 @@ source %{
const bool Matcher::match_rule_supported_superword(int opcode, int vlen, BasicType bt) {
if (UseSVE == 0) {
// ConvD2I and ConvL2F are not profitable to be vectorized on NEON, because no direct
// These operations are not profitable to be vectorized on NEON, because no direct
// NEON instructions support them. But the match rule support for them is profitable for
// Vector API intrinsics.
if ((opcode == Op_VectorCastD2X && bt == T_INT) ||
(opcode == Op_VectorCastL2X && bt == T_FLOAT)) {
(opcode == Op_VectorCastL2X && bt == T_FLOAT) ||
opcode == Op_AddReductionVD || opcode == Op_AddReductionVF ||
opcode == Op_MulReductionVD || opcode == Op_MulReductionVF ||
opcode == Op_MulVL) {
return false;
}
}
@ -153,7 +156,6 @@ source %{
// Check whether specific Op is supported.
// Fail fast, otherwise fall through to common vector_size_supported() check.
switch (opcode) {
case Op_MulVL:
case Op_AndVMask:
case Op_OrVMask:
case Op_XorVMask:
@ -2053,13 +2055,11 @@ instruct vmla(vReg dst_src1, vReg src2, vReg src3) %{
match(Set dst_src1 (AddVB dst_src1 (MulVB src2 src3)));
match(Set dst_src1 (AddVS dst_src1 (MulVS src2 src3)));
match(Set dst_src1 (AddVI dst_src1 (MulVI src2 src3)));
match(Set dst_src1 (AddVL dst_src1 (MulVL src2 src3)));
format %{ "vmla $dst_src1, src2, src3" %}
format %{ "vmla $dst_src1, $src2, $src3" %}
ins_encode %{
BasicType bt = Matcher::vector_element_basic_type(this);
uint length_in_bytes = Matcher::vector_length_in_bytes(this);
if (VM_Version::use_neon_for_vector(length_in_bytes) && bt != T_LONG) {
// NEON mlav does not accept T2D arrangement.
if (VM_Version::use_neon_for_vector(length_in_bytes)) {
__ mlav($dst_src1$$FloatRegister, get_arrangement(this),
$src2$$FloatRegister, $src3$$FloatRegister);
} else {
@ -2071,13 +2071,25 @@ instruct vmla(vReg dst_src1, vReg src2, vReg src3) %{
ins_pipe(pipe_slow);
%}
instruct vmlaL(vReg dst_src1, vReg src2, vReg src3) %{
predicate(UseSVE > 0);
match(Set dst_src1 (AddVL dst_src1 (MulVL src2 src3)));
format %{ "vmlaL $dst_src1, $src2, $src3" %}
ins_encode %{
BasicType bt = Matcher::vector_element_basic_type(this);
__ sve_mla($dst_src1$$FloatRegister, __ elemType_to_regVariant(bt),
ptrue, $src2$$FloatRegister, $src3$$FloatRegister);
%}
ins_pipe(pipe_slow);
%}
instruct vmla_masked(vReg dst_src1, vReg src2, vReg src3, pRegGov pg) %{
predicate(UseSVE > 0);
match(Set dst_src1 (AddVB (Binary dst_src1 (MulVB src2 src3)) pg));
match(Set dst_src1 (AddVS (Binary dst_src1 (MulVS src2 src3)) pg));
match(Set dst_src1 (AddVI (Binary dst_src1 (MulVI src2 src3)) pg));
match(Set dst_src1 (AddVL (Binary dst_src1 (MulVL src2 src3)) pg));
format %{ "vmla_masked $dst_src1, $pg, src2, src3" %}
format %{ "vmla_masked $dst_src1, $pg, $src2, $src3" %}
ins_encode %{
BasicType bt = Matcher::vector_element_basic_type(this);
__ sve_mla($dst_src1$$FloatRegister, __ elemType_to_regVariant(bt),
@ -2132,13 +2144,11 @@ instruct vmls(vReg dst_src1, vReg src2, vReg src3) %{
match(Set dst_src1 (SubVB dst_src1 (MulVB src2 src3)));
match(Set dst_src1 (SubVS dst_src1 (MulVS src2 src3)));
match(Set dst_src1 (SubVI dst_src1 (MulVI src2 src3)));
match(Set dst_src1 (SubVL dst_src1 (MulVL src2 src3)));
format %{ "vmls $dst_src1, src2, src3" %}
format %{ "vmls $dst_src1, $src2, $src3" %}
ins_encode %{
BasicType bt = Matcher::vector_element_basic_type(this);
uint length_in_bytes = Matcher::vector_length_in_bytes(this);
if (VM_Version::use_neon_for_vector(length_in_bytes) && bt != T_LONG) {
// NEON mlsv does not accept T2D arrangement.
if (VM_Version::use_neon_for_vector(length_in_bytes)) {
__ mlsv($dst_src1$$FloatRegister, get_arrangement(this),
$src2$$FloatRegister, $src3$$FloatRegister);
} else {
@ -2150,13 +2160,25 @@ instruct vmls(vReg dst_src1, vReg src2, vReg src3) %{
ins_pipe(pipe_slow);
%}
instruct vmlsL(vReg dst_src1, vReg src2, vReg src3) %{
predicate(UseSVE > 0);
match(Set dst_src1 (SubVL dst_src1 (MulVL src2 src3)));
format %{ "vmlsL $dst_src1, $src2, $src3" %}
ins_encode %{
BasicType bt = Matcher::vector_element_basic_type(this);
__ sve_mls($dst_src1$$FloatRegister, __ elemType_to_regVariant(bt),
ptrue, $src2$$FloatRegister, $src3$$FloatRegister);
%}
ins_pipe(pipe_slow);
%}
instruct vmls_masked(vReg dst_src1, vReg src2, vReg src3, pRegGov pg) %{
predicate(UseSVE > 0);
match(Set dst_src1 (SubVB (Binary dst_src1 (MulVB src2 src3)) pg));
match(Set dst_src1 (SubVS (Binary dst_src1 (MulVS src2 src3)) pg));
match(Set dst_src1 (SubVI (Binary dst_src1 (MulVI src2 src3)) pg));
match(Set dst_src1 (SubVL (Binary dst_src1 (MulVL src2 src3)) pg));
format %{ "vmls_masked $dst_src1, $pg, src2, src3" %}
format %{ "vmls_masked $dst_src1, $pg, $src2, $src3" %}
ins_encode %{
BasicType bt = Matcher::vector_element_basic_type(this);
__ sve_mls($dst_src1$$FloatRegister, __ elemType_to_regVariant(bt),

@ -117,11 +117,14 @@ source %{
const bool Matcher::match_rule_supported_superword(int opcode, int vlen, BasicType bt) {
if (UseSVE == 0) {
// ConvD2I and ConvL2F are not profitable to be vectorized on NEON, because no direct
// These operations are not profitable to be vectorized on NEON, because no direct
// NEON instructions support them. But the match rule support for them is profitable for
// Vector API intrinsics.
if ((opcode == Op_VectorCastD2X && bt == T_INT) ||
(opcode == Op_VectorCastL2X && bt == T_FLOAT)) {
(opcode == Op_VectorCastL2X && bt == T_FLOAT) ||
opcode == Op_AddReductionVD || opcode == Op_AddReductionVF ||
opcode == Op_MulReductionVD || opcode == Op_MulReductionVF ||
opcode == Op_MulVL) {
return false;
}
}
@ -143,7 +146,6 @@ source %{
// Check whether specific Op is supported.
// Fail fast, otherwise fall through to common vector_size_supported() check.
switch (opcode) {
case Op_MulVL:
case Op_AndVMask:
case Op_OrVMask:
case Op_XorVMask:
@ -1085,13 +1087,11 @@ instruct vmla(vReg dst_src1, vReg src2, vReg src3) %{
match(Set dst_src1 (AddVB dst_src1 (MulVB src2 src3)));
match(Set dst_src1 (AddVS dst_src1 (MulVS src2 src3)));
match(Set dst_src1 (AddVI dst_src1 (MulVI src2 src3)));
match(Set dst_src1 (AddVL dst_src1 (MulVL src2 src3)));
format %{ "vmla $dst_src1, src2, src3" %}
format %{ "vmla $dst_src1, $src2, $src3" %}
ins_encode %{
BasicType bt = Matcher::vector_element_basic_type(this);
uint length_in_bytes = Matcher::vector_length_in_bytes(this);
if (VM_Version::use_neon_for_vector(length_in_bytes) && bt != T_LONG) {
// NEON mlav does not accept T2D arrangement.
if (VM_Version::use_neon_for_vector(length_in_bytes)) {
__ mlav($dst_src1$$FloatRegister, get_arrangement(this),
$src2$$FloatRegister, $src3$$FloatRegister);
} else {
@ -1103,13 +1103,25 @@ instruct vmla(vReg dst_src1, vReg src2, vReg src3) %{
ins_pipe(pipe_slow);
%}
instruct vmlaL(vReg dst_src1, vReg src2, vReg src3) %{
predicate(UseSVE > 0);
match(Set dst_src1 (AddVL dst_src1 (MulVL src2 src3)));
format %{ "vmlaL $dst_src1, $src2, $src3" %}
ins_encode %{
BasicType bt = Matcher::vector_element_basic_type(this);
__ sve_mla($dst_src1$$FloatRegister, __ elemType_to_regVariant(bt),
ptrue, $src2$$FloatRegister, $src3$$FloatRegister);
%}
ins_pipe(pipe_slow);
%}
instruct vmla_masked(vReg dst_src1, vReg src2, vReg src3, pRegGov pg) %{
predicate(UseSVE > 0);
match(Set dst_src1 (AddVB (Binary dst_src1 (MulVB src2 src3)) pg));
match(Set dst_src1 (AddVS (Binary dst_src1 (MulVS src2 src3)) pg));
match(Set dst_src1 (AddVI (Binary dst_src1 (MulVI src2 src3)) pg));
match(Set dst_src1 (AddVL (Binary dst_src1 (MulVL src2 src3)) pg));
format %{ "vmla_masked $dst_src1, $pg, src2, src3" %}
format %{ "vmla_masked $dst_src1, $pg, $src2, $src3" %}
ins_encode %{
BasicType bt = Matcher::vector_element_basic_type(this);
__ sve_mla($dst_src1$$FloatRegister, __ elemType_to_regVariant(bt),
@ -1164,13 +1176,11 @@ instruct vmls(vReg dst_src1, vReg src2, vReg src3) %{
match(Set dst_src1 (SubVB dst_src1 (MulVB src2 src3)));
match(Set dst_src1 (SubVS dst_src1 (MulVS src2 src3)));
match(Set dst_src1 (SubVI dst_src1 (MulVI src2 src3)));
match(Set dst_src1 (SubVL dst_src1 (MulVL src2 src3)));
format %{ "vmls $dst_src1, src2, src3" %}
format %{ "vmls $dst_src1, $src2, $src3" %}
ins_encode %{
BasicType bt = Matcher::vector_element_basic_type(this);
uint length_in_bytes = Matcher::vector_length_in_bytes(this);
if (VM_Version::use_neon_for_vector(length_in_bytes) && bt != T_LONG) {
// NEON mlsv does not accept T2D arrangement.
if (VM_Version::use_neon_for_vector(length_in_bytes)) {
__ mlsv($dst_src1$$FloatRegister, get_arrangement(this),
$src2$$FloatRegister, $src3$$FloatRegister);
} else {
@ -1182,13 +1192,25 @@ instruct vmls(vReg dst_src1, vReg src2, vReg src3) %{
ins_pipe(pipe_slow);
%}
instruct vmlsL(vReg dst_src1, vReg src2, vReg src3) %{
predicate(UseSVE > 0);
match(Set dst_src1 (SubVL dst_src1 (MulVL src2 src3)));
format %{ "vmlsL $dst_src1, $src2, $src3" %}
ins_encode %{
BasicType bt = Matcher::vector_element_basic_type(this);
__ sve_mls($dst_src1$$FloatRegister, __ elemType_to_regVariant(bt),
ptrue, $src2$$FloatRegister, $src3$$FloatRegister);
%}
ins_pipe(pipe_slow);
%}
instruct vmls_masked(vReg dst_src1, vReg src2, vReg src3, pRegGov pg) %{
predicate(UseSVE > 0);
match(Set dst_src1 (SubVB (Binary dst_src1 (MulVB src2 src3)) pg));
match(Set dst_src1 (SubVS (Binary dst_src1 (MulVS src2 src3)) pg));
match(Set dst_src1 (SubVI (Binary dst_src1 (MulVI src2 src3)) pg));
match(Set dst_src1 (SubVL (Binary dst_src1 (MulVL src2 src3)) pg));
format %{ "vmls_masked $dst_src1, $pg, src2, src3" %}
format %{ "vmls_masked $dst_src1, $pg, $src2, $src3" %}
ins_encode %{
BasicType bt = Matcher::vector_element_basic_type(this);
__ sve_mls($dst_src1$$FloatRegister, __ elemType_to_regVariant(bt),

@ -0,0 +1,119 @@
/*
* Copyright (c) 2022, Arm Limited. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License version 2 only, as
* published by the Free Software Foundation.
*
* This code is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
* version 2 for more details (a copy is included in the LICENSE file that
* accompanied this code).
*
* You should have received a copy of the GNU General Public License version
* 2 along with this work; if not, write to the Free Software Foundation,
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
*
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
* or visit www.oracle.com if you need additional information or have any
* questions.
*/
package compiler.c2.irTests;
import compiler.lib.ir_framework.*;
/*
* @test
* @bug 8275275
* @summary Fix performance regression after auto-vectorization on aarch64 NEON.
* @requires os.arch=="aarch64"
* @library /test/lib /
* @run driver compiler.c2.irTests.TestDisableAutoVectOpcodes
*/
public class TestDisableAutoVectOpcodes {
final private static int SIZE = 3000;
private static double[] doublea = new double[SIZE];
private static double[] doubleb = new double[SIZE];
private static long[] longa = new long[SIZE];
private static long[] longb = new long[SIZE];
private static int[] inta = new int[SIZE];
private static float[] floata = new float[SIZE];
private static float[] floatb = new float[SIZE];
private static float fresult;
private static double dresult;
public static void main(String[] args) {
TestFramework.runWithFlags("-XX:UseSVE=0");
}
@Test
@IR(failOn = {IRNode.VECTOR_CAST_D2X})
private static void testConvD2I() {
for(int i = 0; i < SIZE; i++) {
inta[i] = (int) (doublea[i]);
}
}
@Test
@IR(failOn = {IRNode.VECTOR_CAST_L2X})
private static void testConvL2F() {
for(int i = 0; i < SIZE; i++) {
floata[i] = (float) (longa[i]);
}
}
@Test
@IR(failOn = {IRNode.MUL_VL})
private static void testMulVL() {
for(int i = 0; i < SIZE; i++) {
longa[i] = longa[i] * longb[i];
}
}
@Test
@IR(failOn = {IRNode.ADD_REDUCTION_VF})
private static void testAddReductionVF() {
float result = 1;
for(int i = 0; i < SIZE; i++) {
result += (floata[i] + floatb[i]);
}
fresult += result;
}
@Test
@IR(failOn = {IRNode.ADD_REDUCTION_VD})
private static void testAddReductionVD() {
double result = 1;
for(int i = 0; i < SIZE; i++) {
result += (doublea[i] + doubleb[i]);
}
dresult += result;
}
@Test
@IR(failOn = {IRNode.MUL_REDUCTION_VF})
private static void testMulReductionVF() {
float result = 1;
for(int i = 0; i < SIZE; i++) {
result *= (floata[i] + floatb[i]);
}
fresult += result;
}
@Test
@IR(failOn = {IRNode.MUL_REDUCTION_VD})
private static void testMulReductionVD() {
double result = 1;
for(int i = 0; i < SIZE; i++) {
result *= (doublea[i] + doubleb[i]);
}
dresult += result;
}
}

@ -216,6 +216,12 @@ public class IRNode {
public static final String Max_I = START + "MaxI" + MID + END;
public static final String Min_V = START + "MinV" + MID + END;
public static final String Max_V = START + "MaxV" + MID + END;
public static final String MUL_VL = START + "MulVL" + MID + END;
public static final String ADD_REDUCTION_VF = START + "AddReductionVF" + MID + END;
public static final String ADD_REDUCTION_VD = START + "AddReductionVD" + MID + END;
public static final String MUL_REDUCTION_VF = START + "MulReductionVF" + MID + END;
public static final String MUL_REDUCTION_VD = START + "MulReductionVD" + MID + END;
public static final String FAST_LOCK = START + "FastLock" + MID + END;
public static final String FAST_UNLOCK = START + "FastUnlock" + MID + END;

@ -0,0 +1,267 @@
/*
* Copyright (c) 2022, Arm Limited. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License version 2 only, as
* published by the Free Software Foundation.
*
* This code is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
* version 2 for more details (a copy is included in the LICENSE file that
* accompanied this code).
*
* You should have received a copy of the GNU General Public License version
* 2 along with this work; if not, write to the Free Software Foundation,
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
*
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
* or visit www.oracle.com if you need additional information or have any
* questions.
*/
package compiler.vectorapi;
import compiler.lib.ir_framework.*;
import java.util.Random;
import jdk.incubator.vector.ByteVector;
import jdk.incubator.vector.ShortVector;
import jdk.incubator.vector.IntVector;
import jdk.incubator.vector.LongVector;
import jdk.incubator.vector.VectorSpecies;
import jdk.test.lib.Asserts;
import jdk.test.lib.Utils;
/**
* @test
* @bug 8275275
* @key randomness
* @library /test/lib /
* @requires os.arch=="aarch64"
* @summary AArch64: Fix performance regression after auto-vectorization on NEON
* @modules jdk.incubator.vector
*
* @run driver compiler.vectorapi.TestVectorMulAddSub
*/
public class TestVectorMulAddSub {
private static final VectorSpecies<Byte> B_SPECIES = ByteVector.SPECIES_MAX;
private static final VectorSpecies<Short> S_SPECIES = ShortVector.SPECIES_MAX;
private static final VectorSpecies<Integer> I_SPECIES = IntVector.SPECIES_MAX;
private static final VectorSpecies<Long> L_SPECIES = LongVector.SPECIES_MAX;
private static int LENGTH = 1024;
private static final Random RD = Utils.getRandomInstance();
private static byte[] ba;
private static byte[] bb;
private static byte[] bc;
private static byte[] br;
private static short[] sa;
private static short[] sb;
private static short[] sc;
private static short[] sr;
private static int[] ia;
private static int[] ib;
private static int[] ic;
private static int[] ir;
private static long[] la;
private static long[] lb;
private static long[] lc;
private static long[] lr;
static {
ba = new byte[LENGTH];
bb = new byte[LENGTH];
bc = new byte[LENGTH];
br = new byte[LENGTH];
sa = new short[LENGTH];
sb = new short[LENGTH];
sc = new short[LENGTH];
sr = new short[LENGTH];
ia = new int[LENGTH];
ib = new int[LENGTH];
ic = new int[LENGTH];
ir = new int[LENGTH];
la = new long[LENGTH];
lb = new long[LENGTH];
lc = new long[LENGTH];
lr = new long[LENGTH];
for (int i = 0; i < LENGTH; i++) {
ba[i] = (byte) RD.nextInt();
bb[i] = (byte) RD.nextInt();
bc[i] = (byte) RD.nextInt();
sa[i] = (short) RD.nextInt();
sb[i] = (short) RD.nextInt();
sc[i] = (short) RD.nextInt();
ia[i] = RD.nextInt();
ib[i] = RD.nextInt();
ic[i] = RD.nextInt();
la[i] = RD.nextLong();
lb[i] = RD.nextLong();
lc[i] = RD.nextLong();
}
}
@Test
@IR(counts = {"vmla", "> 0"})
public static void testByteMulAdd() {
for (int i = 0; i < LENGTH; i += B_SPECIES.length()) {
ByteVector av = ByteVector.fromArray(B_SPECIES, ba, i);
ByteVector bv = ByteVector.fromArray(B_SPECIES, bb, i);
ByteVector cv = ByteVector.fromArray(B_SPECIES, bc, i);
av.add(bv.mul(cv)).intoArray(br, i);
}
}
@Run(test = "testByteMulAdd")
public static void testByteMulAdd_runner() {
testByteMulAdd();
for (int i = 0; i < LENGTH; i++) {
Asserts.assertEquals((byte) (ba[i] + bb[i] * bc[i]), br[i]);
}
}
@Test
@IR(counts = {"vmla", "> 0"})
public static void testShortMulAdd() {
for (int i = 0; i < LENGTH; i += S_SPECIES.length()) {
ShortVector av = ShortVector.fromArray(S_SPECIES, sa, i);
ShortVector bv = ShortVector.fromArray(S_SPECIES, sb, i);
ShortVector cv = ShortVector.fromArray(S_SPECIES, sc, i);
av.add(bv.mul(cv)).intoArray(sr, i);
}
}
@Run(test = "testShortMulAdd")
public static void testShortMulAdd_runner() {
testShortMulAdd();
for (int i = 0; i < LENGTH; i++) {
Asserts.assertEquals((short) (sa[i] + sb[i] * sc[i]), sr[i]);
}
}
@Test
@IR(counts = {"vmla", "> 0"})
public static void testIntMulAdd() {
for (int i = 0; i < LENGTH; i += I_SPECIES.length()) {
IntVector av = IntVector.fromArray(I_SPECIES, ia, i);
IntVector bv = IntVector.fromArray(I_SPECIES, ib, i);
IntVector cv = IntVector.fromArray(I_SPECIES, ic, i);
av.add(bv.mul(cv)).intoArray(ir, i);
}
}
@Run(test = "testIntMulAdd")
public static void testIntMulAdd_runner() {
testIntMulAdd();
for (int i = 0; i < LENGTH; i++) {
Asserts.assertEquals((ia[i] + ib[i] * ic[i]), ir[i]);
}
}
@Test
@IR(applyIf = {"UseSVE", " > 0"}, counts = {"vmla", "> 0"})
public static void testLongMulAdd() {
for (int i = 0; i < LENGTH; i += L_SPECIES.length()) {
LongVector av = LongVector.fromArray(L_SPECIES, la, i);
LongVector bv = LongVector.fromArray(L_SPECIES, lb, i);
LongVector cv = LongVector.fromArray(L_SPECIES, lc, i);
av.add(bv.mul(cv)).intoArray(lr, i);
}
}
@Run(test = "testLongMulAdd")
public static void testLongMulAdd_runner() {
testLongMulAdd();
for (int i = 0; i < LENGTH; i++) {
Asserts.assertEquals((la[i] + lb[i] * lc[i]), lr[i]);
}
}
@Test
@IR(counts = {"vmls", "> 0"})
public static void testByteMulSub() {
for (int i = 0; i < LENGTH; i += B_SPECIES.length()) {
ByteVector av = ByteVector.fromArray(B_SPECIES, ba, i);
ByteVector bv = ByteVector.fromArray(B_SPECIES, bb, i);
ByteVector cv = ByteVector.fromArray(B_SPECIES, bc, i);
av.sub(bv.mul(cv)).intoArray(br, i);
}
}
@Run(test = "testByteMulSub")
public static void testByteMulSub_runner() {
testByteMulSub();
for (int i = 0; i < LENGTH; i++) {
Asserts.assertEquals((byte) (ba[i] - bb[i] * bc[i]), br[i]);
}
}
@Test
@IR(counts = {"vmls", "> 0"})
public static void testShortMulSub() {
for (int i = 0; i < LENGTH; i += S_SPECIES.length()) {
ShortVector av = ShortVector.fromArray(S_SPECIES, sa, i);
ShortVector bv = ShortVector.fromArray(S_SPECIES, sb, i);
ShortVector cv = ShortVector.fromArray(S_SPECIES, sc, i);
av.sub(bv.mul(cv)).intoArray(sr, i);
}
}
@Run(test = "testShortMulSub")
public static void testShortMulSub_runner() {
testShortMulSub();
for (int i = 0; i < LENGTH; i++) {
Asserts.assertEquals((short) (sa[i] - sb[i] * sc[i]), sr[i]);
}
}
@Test
@IR(counts = {"vmls", "> 0"})
public static void testIntMulSub() {
for (int i = 0; i < LENGTH; i += I_SPECIES.length()) {
IntVector av = IntVector.fromArray(I_SPECIES, ia, i);
IntVector bv = IntVector.fromArray(I_SPECIES, ib, i);
IntVector cv = IntVector.fromArray(I_SPECIES, ic, i);
av.sub(bv.mul(cv)).intoArray(ir, i);
}
}
@Run(test = "testIntMulSub")
public static void testIntMulSub_runner() {
testIntMulSub();
for (int i = 0; i < LENGTH; i++) {
Asserts.assertEquals((ia[i] - ib[i] * ic[i]), ir[i]);
}
}
@Test
@IR(applyIf = {"UseSVE", " > 0"}, counts = {"vmls", "> 0"})
public static void testLongMulSub() {
for (int i = 0; i < LENGTH; i += L_SPECIES.length()) {
LongVector av = LongVector.fromArray(L_SPECIES, la, i);
LongVector bv = LongVector.fromArray(L_SPECIES, lb, i);
LongVector cv = LongVector.fromArray(L_SPECIES, lc, i);
av.sub(bv.mul(cv)).intoArray(lr, i);
}
}
@Run(test = "testLongMulSub")
public static void testLongMulSub_runner() {
testLongMulSub();
for (int i = 0; i < LENGTH; i++) {
Asserts.assertEquals((la[i] - lb[i] * lc[i]), lr[i]);
}
}
public static void main(String[] args) {
TestFramework.runWithFlags("--add-modules=jdk.incubator.vector");
}
}