8275275: AArch64: Fix performance regression after auto-vectorization on NEON
Reviewed-by: aph, xgong
This commit is contained in:
parent
cbee0bc9ef
commit
ec2629c052
src/hotspot/cpu/aarch64
test/hotspot/jtreg/compiler
c2/irTests
lib/ir_framework
vectorapi
@ -127,11 +127,14 @@ source %{
|
||||
|
||||
const bool Matcher::match_rule_supported_superword(int opcode, int vlen, BasicType bt) {
|
||||
if (UseSVE == 0) {
|
||||
// ConvD2I and ConvL2F are not profitable to be vectorized on NEON, because no direct
|
||||
// These operations are not profitable to be vectorized on NEON, because no direct
|
||||
// NEON instructions support them. But the match rule support for them is profitable for
|
||||
// Vector API intrinsics.
|
||||
if ((opcode == Op_VectorCastD2X && bt == T_INT) ||
|
||||
(opcode == Op_VectorCastL2X && bt == T_FLOAT)) {
|
||||
(opcode == Op_VectorCastL2X && bt == T_FLOAT) ||
|
||||
opcode == Op_AddReductionVD || opcode == Op_AddReductionVF ||
|
||||
opcode == Op_MulReductionVD || opcode == Op_MulReductionVF ||
|
||||
opcode == Op_MulVL) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
@ -153,7 +156,6 @@ source %{
|
||||
// Check whether specific Op is supported.
|
||||
// Fail fast, otherwise fall through to common vector_size_supported() check.
|
||||
switch (opcode) {
|
||||
case Op_MulVL:
|
||||
case Op_AndVMask:
|
||||
case Op_OrVMask:
|
||||
case Op_XorVMask:
|
||||
@ -2053,13 +2055,11 @@ instruct vmla(vReg dst_src1, vReg src2, vReg src3) %{
|
||||
match(Set dst_src1 (AddVB dst_src1 (MulVB src2 src3)));
|
||||
match(Set dst_src1 (AddVS dst_src1 (MulVS src2 src3)));
|
||||
match(Set dst_src1 (AddVI dst_src1 (MulVI src2 src3)));
|
||||
match(Set dst_src1 (AddVL dst_src1 (MulVL src2 src3)));
|
||||
format %{ "vmla $dst_src1, src2, src3" %}
|
||||
format %{ "vmla $dst_src1, $src2, $src3" %}
|
||||
ins_encode %{
|
||||
BasicType bt = Matcher::vector_element_basic_type(this);
|
||||
uint length_in_bytes = Matcher::vector_length_in_bytes(this);
|
||||
if (VM_Version::use_neon_for_vector(length_in_bytes) && bt != T_LONG) {
|
||||
// NEON mlav does not accept T2D arrangement.
|
||||
if (VM_Version::use_neon_for_vector(length_in_bytes)) {
|
||||
__ mlav($dst_src1$$FloatRegister, get_arrangement(this),
|
||||
$src2$$FloatRegister, $src3$$FloatRegister);
|
||||
} else {
|
||||
@ -2071,13 +2071,25 @@ instruct vmla(vReg dst_src1, vReg src2, vReg src3) %{
|
||||
ins_pipe(pipe_slow);
|
||||
%}
|
||||
|
||||
instruct vmlaL(vReg dst_src1, vReg src2, vReg src3) %{
|
||||
predicate(UseSVE > 0);
|
||||
match(Set dst_src1 (AddVL dst_src1 (MulVL src2 src3)));
|
||||
format %{ "vmlaL $dst_src1, $src2, $src3" %}
|
||||
ins_encode %{
|
||||
BasicType bt = Matcher::vector_element_basic_type(this);
|
||||
__ sve_mla($dst_src1$$FloatRegister, __ elemType_to_regVariant(bt),
|
||||
ptrue, $src2$$FloatRegister, $src3$$FloatRegister);
|
||||
%}
|
||||
ins_pipe(pipe_slow);
|
||||
%}
|
||||
|
||||
instruct vmla_masked(vReg dst_src1, vReg src2, vReg src3, pRegGov pg) %{
|
||||
predicate(UseSVE > 0);
|
||||
match(Set dst_src1 (AddVB (Binary dst_src1 (MulVB src2 src3)) pg));
|
||||
match(Set dst_src1 (AddVS (Binary dst_src1 (MulVS src2 src3)) pg));
|
||||
match(Set dst_src1 (AddVI (Binary dst_src1 (MulVI src2 src3)) pg));
|
||||
match(Set dst_src1 (AddVL (Binary dst_src1 (MulVL src2 src3)) pg));
|
||||
format %{ "vmla_masked $dst_src1, $pg, src2, src3" %}
|
||||
format %{ "vmla_masked $dst_src1, $pg, $src2, $src3" %}
|
||||
ins_encode %{
|
||||
BasicType bt = Matcher::vector_element_basic_type(this);
|
||||
__ sve_mla($dst_src1$$FloatRegister, __ elemType_to_regVariant(bt),
|
||||
@ -2132,13 +2144,11 @@ instruct vmls(vReg dst_src1, vReg src2, vReg src3) %{
|
||||
match(Set dst_src1 (SubVB dst_src1 (MulVB src2 src3)));
|
||||
match(Set dst_src1 (SubVS dst_src1 (MulVS src2 src3)));
|
||||
match(Set dst_src1 (SubVI dst_src1 (MulVI src2 src3)));
|
||||
match(Set dst_src1 (SubVL dst_src1 (MulVL src2 src3)));
|
||||
format %{ "vmls $dst_src1, src2, src3" %}
|
||||
format %{ "vmls $dst_src1, $src2, $src3" %}
|
||||
ins_encode %{
|
||||
BasicType bt = Matcher::vector_element_basic_type(this);
|
||||
uint length_in_bytes = Matcher::vector_length_in_bytes(this);
|
||||
if (VM_Version::use_neon_for_vector(length_in_bytes) && bt != T_LONG) {
|
||||
// NEON mlsv does not accept T2D arrangement.
|
||||
if (VM_Version::use_neon_for_vector(length_in_bytes)) {
|
||||
__ mlsv($dst_src1$$FloatRegister, get_arrangement(this),
|
||||
$src2$$FloatRegister, $src3$$FloatRegister);
|
||||
} else {
|
||||
@ -2150,13 +2160,25 @@ instruct vmls(vReg dst_src1, vReg src2, vReg src3) %{
|
||||
ins_pipe(pipe_slow);
|
||||
%}
|
||||
|
||||
instruct vmlsL(vReg dst_src1, vReg src2, vReg src3) %{
|
||||
predicate(UseSVE > 0);
|
||||
match(Set dst_src1 (SubVL dst_src1 (MulVL src2 src3)));
|
||||
format %{ "vmlsL $dst_src1, $src2, $src3" %}
|
||||
ins_encode %{
|
||||
BasicType bt = Matcher::vector_element_basic_type(this);
|
||||
__ sve_mls($dst_src1$$FloatRegister, __ elemType_to_regVariant(bt),
|
||||
ptrue, $src2$$FloatRegister, $src3$$FloatRegister);
|
||||
%}
|
||||
ins_pipe(pipe_slow);
|
||||
%}
|
||||
|
||||
instruct vmls_masked(vReg dst_src1, vReg src2, vReg src3, pRegGov pg) %{
|
||||
predicate(UseSVE > 0);
|
||||
match(Set dst_src1 (SubVB (Binary dst_src1 (MulVB src2 src3)) pg));
|
||||
match(Set dst_src1 (SubVS (Binary dst_src1 (MulVS src2 src3)) pg));
|
||||
match(Set dst_src1 (SubVI (Binary dst_src1 (MulVI src2 src3)) pg));
|
||||
match(Set dst_src1 (SubVL (Binary dst_src1 (MulVL src2 src3)) pg));
|
||||
format %{ "vmls_masked $dst_src1, $pg, src2, src3" %}
|
||||
format %{ "vmls_masked $dst_src1, $pg, $src2, $src3" %}
|
||||
ins_encode %{
|
||||
BasicType bt = Matcher::vector_element_basic_type(this);
|
||||
__ sve_mls($dst_src1$$FloatRegister, __ elemType_to_regVariant(bt),
|
||||
|
@ -117,11 +117,14 @@ source %{
|
||||
|
||||
const bool Matcher::match_rule_supported_superword(int opcode, int vlen, BasicType bt) {
|
||||
if (UseSVE == 0) {
|
||||
// ConvD2I and ConvL2F are not profitable to be vectorized on NEON, because no direct
|
||||
// These operations are not profitable to be vectorized on NEON, because no direct
|
||||
// NEON instructions support them. But the match rule support for them is profitable for
|
||||
// Vector API intrinsics.
|
||||
if ((opcode == Op_VectorCastD2X && bt == T_INT) ||
|
||||
(opcode == Op_VectorCastL2X && bt == T_FLOAT)) {
|
||||
(opcode == Op_VectorCastL2X && bt == T_FLOAT) ||
|
||||
opcode == Op_AddReductionVD || opcode == Op_AddReductionVF ||
|
||||
opcode == Op_MulReductionVD || opcode == Op_MulReductionVF ||
|
||||
opcode == Op_MulVL) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
@ -143,7 +146,6 @@ source %{
|
||||
// Check whether specific Op is supported.
|
||||
// Fail fast, otherwise fall through to common vector_size_supported() check.
|
||||
switch (opcode) {
|
||||
case Op_MulVL:
|
||||
case Op_AndVMask:
|
||||
case Op_OrVMask:
|
||||
case Op_XorVMask:
|
||||
@ -1085,13 +1087,11 @@ instruct vmla(vReg dst_src1, vReg src2, vReg src3) %{
|
||||
match(Set dst_src1 (AddVB dst_src1 (MulVB src2 src3)));
|
||||
match(Set dst_src1 (AddVS dst_src1 (MulVS src2 src3)));
|
||||
match(Set dst_src1 (AddVI dst_src1 (MulVI src2 src3)));
|
||||
match(Set dst_src1 (AddVL dst_src1 (MulVL src2 src3)));
|
||||
format %{ "vmla $dst_src1, src2, src3" %}
|
||||
format %{ "vmla $dst_src1, $src2, $src3" %}
|
||||
ins_encode %{
|
||||
BasicType bt = Matcher::vector_element_basic_type(this);
|
||||
uint length_in_bytes = Matcher::vector_length_in_bytes(this);
|
||||
if (VM_Version::use_neon_for_vector(length_in_bytes) && bt != T_LONG) {
|
||||
// NEON mlav does not accept T2D arrangement.
|
||||
if (VM_Version::use_neon_for_vector(length_in_bytes)) {
|
||||
__ mlav($dst_src1$$FloatRegister, get_arrangement(this),
|
||||
$src2$$FloatRegister, $src3$$FloatRegister);
|
||||
} else {
|
||||
@ -1103,13 +1103,25 @@ instruct vmla(vReg dst_src1, vReg src2, vReg src3) %{
|
||||
ins_pipe(pipe_slow);
|
||||
%}
|
||||
|
||||
instruct vmlaL(vReg dst_src1, vReg src2, vReg src3) %{
|
||||
predicate(UseSVE > 0);
|
||||
match(Set dst_src1 (AddVL dst_src1 (MulVL src2 src3)));
|
||||
format %{ "vmlaL $dst_src1, $src2, $src3" %}
|
||||
ins_encode %{
|
||||
BasicType bt = Matcher::vector_element_basic_type(this);
|
||||
__ sve_mla($dst_src1$$FloatRegister, __ elemType_to_regVariant(bt),
|
||||
ptrue, $src2$$FloatRegister, $src3$$FloatRegister);
|
||||
%}
|
||||
ins_pipe(pipe_slow);
|
||||
%}
|
||||
|
||||
instruct vmla_masked(vReg dst_src1, vReg src2, vReg src3, pRegGov pg) %{
|
||||
predicate(UseSVE > 0);
|
||||
match(Set dst_src1 (AddVB (Binary dst_src1 (MulVB src2 src3)) pg));
|
||||
match(Set dst_src1 (AddVS (Binary dst_src1 (MulVS src2 src3)) pg));
|
||||
match(Set dst_src1 (AddVI (Binary dst_src1 (MulVI src2 src3)) pg));
|
||||
match(Set dst_src1 (AddVL (Binary dst_src1 (MulVL src2 src3)) pg));
|
||||
format %{ "vmla_masked $dst_src1, $pg, src2, src3" %}
|
||||
format %{ "vmla_masked $dst_src1, $pg, $src2, $src3" %}
|
||||
ins_encode %{
|
||||
BasicType bt = Matcher::vector_element_basic_type(this);
|
||||
__ sve_mla($dst_src1$$FloatRegister, __ elemType_to_regVariant(bt),
|
||||
@ -1164,13 +1176,11 @@ instruct vmls(vReg dst_src1, vReg src2, vReg src3) %{
|
||||
match(Set dst_src1 (SubVB dst_src1 (MulVB src2 src3)));
|
||||
match(Set dst_src1 (SubVS dst_src1 (MulVS src2 src3)));
|
||||
match(Set dst_src1 (SubVI dst_src1 (MulVI src2 src3)));
|
||||
match(Set dst_src1 (SubVL dst_src1 (MulVL src2 src3)));
|
||||
format %{ "vmls $dst_src1, src2, src3" %}
|
||||
format %{ "vmls $dst_src1, $src2, $src3" %}
|
||||
ins_encode %{
|
||||
BasicType bt = Matcher::vector_element_basic_type(this);
|
||||
uint length_in_bytes = Matcher::vector_length_in_bytes(this);
|
||||
if (VM_Version::use_neon_for_vector(length_in_bytes) && bt != T_LONG) {
|
||||
// NEON mlsv does not accept T2D arrangement.
|
||||
if (VM_Version::use_neon_for_vector(length_in_bytes)) {
|
||||
__ mlsv($dst_src1$$FloatRegister, get_arrangement(this),
|
||||
$src2$$FloatRegister, $src3$$FloatRegister);
|
||||
} else {
|
||||
@ -1182,13 +1192,25 @@ instruct vmls(vReg dst_src1, vReg src2, vReg src3) %{
|
||||
ins_pipe(pipe_slow);
|
||||
%}
|
||||
|
||||
instruct vmlsL(vReg dst_src1, vReg src2, vReg src3) %{
|
||||
predicate(UseSVE > 0);
|
||||
match(Set dst_src1 (SubVL dst_src1 (MulVL src2 src3)));
|
||||
format %{ "vmlsL $dst_src1, $src2, $src3" %}
|
||||
ins_encode %{
|
||||
BasicType bt = Matcher::vector_element_basic_type(this);
|
||||
__ sve_mls($dst_src1$$FloatRegister, __ elemType_to_regVariant(bt),
|
||||
ptrue, $src2$$FloatRegister, $src3$$FloatRegister);
|
||||
%}
|
||||
ins_pipe(pipe_slow);
|
||||
%}
|
||||
|
||||
instruct vmls_masked(vReg dst_src1, vReg src2, vReg src3, pRegGov pg) %{
|
||||
predicate(UseSVE > 0);
|
||||
match(Set dst_src1 (SubVB (Binary dst_src1 (MulVB src2 src3)) pg));
|
||||
match(Set dst_src1 (SubVS (Binary dst_src1 (MulVS src2 src3)) pg));
|
||||
match(Set dst_src1 (SubVI (Binary dst_src1 (MulVI src2 src3)) pg));
|
||||
match(Set dst_src1 (SubVL (Binary dst_src1 (MulVL src2 src3)) pg));
|
||||
format %{ "vmls_masked $dst_src1, $pg, src2, src3" %}
|
||||
format %{ "vmls_masked $dst_src1, $pg, $src2, $src3" %}
|
||||
ins_encode %{
|
||||
BasicType bt = Matcher::vector_element_basic_type(this);
|
||||
__ sve_mls($dst_src1$$FloatRegister, __ elemType_to_regVariant(bt),
|
||||
|
@ -0,0 +1,119 @@
|
||||
/*
|
||||
* Copyright (c) 2022, Arm Limited. All rights reserved.
|
||||
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
||||
*
|
||||
* This code is free software; you can redistribute it and/or modify it
|
||||
* under the terms of the GNU General Public License version 2 only, as
|
||||
* published by the Free Software Foundation.
|
||||
*
|
||||
* This code is distributed in the hope that it will be useful, but WITHOUT
|
||||
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
||||
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
|
||||
* version 2 for more details (a copy is included in the LICENSE file that
|
||||
* accompanied this code).
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License version
|
||||
* 2 along with this work; if not, write to the Free Software Foundation,
|
||||
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
*
|
||||
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
|
||||
* or visit www.oracle.com if you need additional information or have any
|
||||
* questions.
|
||||
*/
|
||||
|
||||
package compiler.c2.irTests;
|
||||
|
||||
import compiler.lib.ir_framework.*;
|
||||
|
||||
/*
|
||||
* @test
|
||||
* @bug 8275275
|
||||
* @summary Fix performance regression after auto-vectorization on aarch64 NEON.
|
||||
* @requires os.arch=="aarch64"
|
||||
* @library /test/lib /
|
||||
* @run driver compiler.c2.irTests.TestDisableAutoVectOpcodes
|
||||
*/
|
||||
|
||||
public class TestDisableAutoVectOpcodes {
|
||||
|
||||
final private static int SIZE = 3000;
|
||||
|
||||
private static double[] doublea = new double[SIZE];
|
||||
private static double[] doubleb = new double[SIZE];
|
||||
private static long[] longa = new long[SIZE];
|
||||
private static long[] longb = new long[SIZE];
|
||||
private static int[] inta = new int[SIZE];
|
||||
private static float[] floata = new float[SIZE];
|
||||
private static float[] floatb = new float[SIZE];
|
||||
private static float fresult;
|
||||
private static double dresult;
|
||||
|
||||
public static void main(String[] args) {
|
||||
TestFramework.runWithFlags("-XX:UseSVE=0");
|
||||
}
|
||||
|
||||
@Test
|
||||
@IR(failOn = {IRNode.VECTOR_CAST_D2X})
|
||||
private static void testConvD2I() {
|
||||
for(int i = 0; i < SIZE; i++) {
|
||||
inta[i] = (int) (doublea[i]);
|
||||
}
|
||||
}
|
||||
|
||||
@Test
|
||||
@IR(failOn = {IRNode.VECTOR_CAST_L2X})
|
||||
private static void testConvL2F() {
|
||||
for(int i = 0; i < SIZE; i++) {
|
||||
floata[i] = (float) (longa[i]);
|
||||
}
|
||||
}
|
||||
|
||||
@Test
|
||||
@IR(failOn = {IRNode.MUL_VL})
|
||||
private static void testMulVL() {
|
||||
for(int i = 0; i < SIZE; i++) {
|
||||
longa[i] = longa[i] * longb[i];
|
||||
}
|
||||
}
|
||||
|
||||
@Test
|
||||
@IR(failOn = {IRNode.ADD_REDUCTION_VF})
|
||||
private static void testAddReductionVF() {
|
||||
float result = 1;
|
||||
for(int i = 0; i < SIZE; i++) {
|
||||
result += (floata[i] + floatb[i]);
|
||||
}
|
||||
fresult += result;
|
||||
}
|
||||
|
||||
@Test
|
||||
@IR(failOn = {IRNode.ADD_REDUCTION_VD})
|
||||
private static void testAddReductionVD() {
|
||||
double result = 1;
|
||||
for(int i = 0; i < SIZE; i++) {
|
||||
result += (doublea[i] + doubleb[i]);
|
||||
}
|
||||
dresult += result;
|
||||
}
|
||||
|
||||
@Test
|
||||
@IR(failOn = {IRNode.MUL_REDUCTION_VF})
|
||||
private static void testMulReductionVF() {
|
||||
float result = 1;
|
||||
for(int i = 0; i < SIZE; i++) {
|
||||
result *= (floata[i] + floatb[i]);
|
||||
}
|
||||
fresult += result;
|
||||
}
|
||||
|
||||
@Test
|
||||
@IR(failOn = {IRNode.MUL_REDUCTION_VD})
|
||||
private static void testMulReductionVD() {
|
||||
double result = 1;
|
||||
for(int i = 0; i < SIZE; i++) {
|
||||
result *= (doublea[i] + doubleb[i]);
|
||||
}
|
||||
dresult += result;
|
||||
}
|
||||
|
||||
}
|
@ -216,6 +216,12 @@ public class IRNode {
|
||||
public static final String Max_I = START + "MaxI" + MID + END;
|
||||
public static final String Min_V = START + "MinV" + MID + END;
|
||||
public static final String Max_V = START + "MaxV" + MID + END;
|
||||
public static final String MUL_VL = START + "MulVL" + MID + END;
|
||||
|
||||
public static final String ADD_REDUCTION_VF = START + "AddReductionVF" + MID + END;
|
||||
public static final String ADD_REDUCTION_VD = START + "AddReductionVD" + MID + END;
|
||||
public static final String MUL_REDUCTION_VF = START + "MulReductionVF" + MID + END;
|
||||
public static final String MUL_REDUCTION_VD = START + "MulReductionVD" + MID + END;
|
||||
|
||||
public static final String FAST_LOCK = START + "FastLock" + MID + END;
|
||||
public static final String FAST_UNLOCK = START + "FastUnlock" + MID + END;
|
||||
|
267
test/hotspot/jtreg/compiler/vectorapi/TestVectorMulAddSub.java
Normal file
267
test/hotspot/jtreg/compiler/vectorapi/TestVectorMulAddSub.java
Normal file
@ -0,0 +1,267 @@
|
||||
/*
|
||||
* Copyright (c) 2022, Arm Limited. All rights reserved.
|
||||
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
||||
*
|
||||
* This code is free software; you can redistribute it and/or modify it
|
||||
* under the terms of the GNU General Public License version 2 only, as
|
||||
* published by the Free Software Foundation.
|
||||
*
|
||||
* This code is distributed in the hope that it will be useful, but WITHOUT
|
||||
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
||||
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
|
||||
* version 2 for more details (a copy is included in the LICENSE file that
|
||||
* accompanied this code).
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License version
|
||||
* 2 along with this work; if not, write to the Free Software Foundation,
|
||||
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
*
|
||||
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
|
||||
* or visit www.oracle.com if you need additional information or have any
|
||||
* questions.
|
||||
*/
|
||||
|
||||
package compiler.vectorapi;
|
||||
|
||||
import compiler.lib.ir_framework.*;
|
||||
|
||||
import java.util.Random;
|
||||
|
||||
import jdk.incubator.vector.ByteVector;
|
||||
import jdk.incubator.vector.ShortVector;
|
||||
import jdk.incubator.vector.IntVector;
|
||||
import jdk.incubator.vector.LongVector;
|
||||
import jdk.incubator.vector.VectorSpecies;
|
||||
|
||||
import jdk.test.lib.Asserts;
|
||||
import jdk.test.lib.Utils;
|
||||
|
||||
/**
|
||||
* @test
|
||||
* @bug 8275275
|
||||
* @key randomness
|
||||
* @library /test/lib /
|
||||
* @requires os.arch=="aarch64"
|
||||
* @summary AArch64: Fix performance regression after auto-vectorization on NEON
|
||||
* @modules jdk.incubator.vector
|
||||
*
|
||||
* @run driver compiler.vectorapi.TestVectorMulAddSub
|
||||
*/
|
||||
|
||||
public class TestVectorMulAddSub {
|
||||
|
||||
private static final VectorSpecies<Byte> B_SPECIES = ByteVector.SPECIES_MAX;
|
||||
private static final VectorSpecies<Short> S_SPECIES = ShortVector.SPECIES_MAX;
|
||||
private static final VectorSpecies<Integer> I_SPECIES = IntVector.SPECIES_MAX;
|
||||
private static final VectorSpecies<Long> L_SPECIES = LongVector.SPECIES_MAX;
|
||||
|
||||
private static int LENGTH = 1024;
|
||||
private static final Random RD = Utils.getRandomInstance();
|
||||
|
||||
private static byte[] ba;
|
||||
private static byte[] bb;
|
||||
private static byte[] bc;
|
||||
private static byte[] br;
|
||||
private static short[] sa;
|
||||
private static short[] sb;
|
||||
private static short[] sc;
|
||||
private static short[] sr;
|
||||
private static int[] ia;
|
||||
private static int[] ib;
|
||||
private static int[] ic;
|
||||
private static int[] ir;
|
||||
private static long[] la;
|
||||
private static long[] lb;
|
||||
private static long[] lc;
|
||||
private static long[] lr;
|
||||
|
||||
static {
|
||||
ba = new byte[LENGTH];
|
||||
bb = new byte[LENGTH];
|
||||
bc = new byte[LENGTH];
|
||||
br = new byte[LENGTH];
|
||||
sa = new short[LENGTH];
|
||||
sb = new short[LENGTH];
|
||||
sc = new short[LENGTH];
|
||||
sr = new short[LENGTH];
|
||||
ia = new int[LENGTH];
|
||||
ib = new int[LENGTH];
|
||||
ic = new int[LENGTH];
|
||||
ir = new int[LENGTH];
|
||||
la = new long[LENGTH];
|
||||
lb = new long[LENGTH];
|
||||
lc = new long[LENGTH];
|
||||
lr = new long[LENGTH];
|
||||
|
||||
for (int i = 0; i < LENGTH; i++) {
|
||||
ba[i] = (byte) RD.nextInt();
|
||||
bb[i] = (byte) RD.nextInt();
|
||||
bc[i] = (byte) RD.nextInt();
|
||||
sa[i] = (short) RD.nextInt();
|
||||
sb[i] = (short) RD.nextInt();
|
||||
sc[i] = (short) RD.nextInt();
|
||||
ia[i] = RD.nextInt();
|
||||
ib[i] = RD.nextInt();
|
||||
ic[i] = RD.nextInt();
|
||||
la[i] = RD.nextLong();
|
||||
lb[i] = RD.nextLong();
|
||||
lc[i] = RD.nextLong();
|
||||
}
|
||||
}
|
||||
|
||||
@Test
|
||||
@IR(counts = {"vmla", "> 0"})
|
||||
public static void testByteMulAdd() {
|
||||
for (int i = 0; i < LENGTH; i += B_SPECIES.length()) {
|
||||
ByteVector av = ByteVector.fromArray(B_SPECIES, ba, i);
|
||||
ByteVector bv = ByteVector.fromArray(B_SPECIES, bb, i);
|
||||
ByteVector cv = ByteVector.fromArray(B_SPECIES, bc, i);
|
||||
av.add(bv.mul(cv)).intoArray(br, i);
|
||||
}
|
||||
}
|
||||
|
||||
@Run(test = "testByteMulAdd")
|
||||
public static void testByteMulAdd_runner() {
|
||||
testByteMulAdd();
|
||||
for (int i = 0; i < LENGTH; i++) {
|
||||
Asserts.assertEquals((byte) (ba[i] + bb[i] * bc[i]), br[i]);
|
||||
}
|
||||
}
|
||||
|
||||
@Test
|
||||
@IR(counts = {"vmla", "> 0"})
|
||||
public static void testShortMulAdd() {
|
||||
for (int i = 0; i < LENGTH; i += S_SPECIES.length()) {
|
||||
ShortVector av = ShortVector.fromArray(S_SPECIES, sa, i);
|
||||
ShortVector bv = ShortVector.fromArray(S_SPECIES, sb, i);
|
||||
ShortVector cv = ShortVector.fromArray(S_SPECIES, sc, i);
|
||||
av.add(bv.mul(cv)).intoArray(sr, i);
|
||||
}
|
||||
}
|
||||
|
||||
@Run(test = "testShortMulAdd")
|
||||
public static void testShortMulAdd_runner() {
|
||||
testShortMulAdd();
|
||||
for (int i = 0; i < LENGTH; i++) {
|
||||
Asserts.assertEquals((short) (sa[i] + sb[i] * sc[i]), sr[i]);
|
||||
}
|
||||
}
|
||||
|
||||
@Test
|
||||
@IR(counts = {"vmla", "> 0"})
|
||||
public static void testIntMulAdd() {
|
||||
for (int i = 0; i < LENGTH; i += I_SPECIES.length()) {
|
||||
IntVector av = IntVector.fromArray(I_SPECIES, ia, i);
|
||||
IntVector bv = IntVector.fromArray(I_SPECIES, ib, i);
|
||||
IntVector cv = IntVector.fromArray(I_SPECIES, ic, i);
|
||||
av.add(bv.mul(cv)).intoArray(ir, i);
|
||||
}
|
||||
}
|
||||
|
||||
@Run(test = "testIntMulAdd")
|
||||
public static void testIntMulAdd_runner() {
|
||||
testIntMulAdd();
|
||||
for (int i = 0; i < LENGTH; i++) {
|
||||
Asserts.assertEquals((ia[i] + ib[i] * ic[i]), ir[i]);
|
||||
}
|
||||
}
|
||||
|
||||
@Test
|
||||
@IR(applyIf = {"UseSVE", " > 0"}, counts = {"vmla", "> 0"})
|
||||
public static void testLongMulAdd() {
|
||||
for (int i = 0; i < LENGTH; i += L_SPECIES.length()) {
|
||||
LongVector av = LongVector.fromArray(L_SPECIES, la, i);
|
||||
LongVector bv = LongVector.fromArray(L_SPECIES, lb, i);
|
||||
LongVector cv = LongVector.fromArray(L_SPECIES, lc, i);
|
||||
av.add(bv.mul(cv)).intoArray(lr, i);
|
||||
}
|
||||
}
|
||||
|
||||
@Run(test = "testLongMulAdd")
|
||||
public static void testLongMulAdd_runner() {
|
||||
testLongMulAdd();
|
||||
for (int i = 0; i < LENGTH; i++) {
|
||||
Asserts.assertEquals((la[i] + lb[i] * lc[i]), lr[i]);
|
||||
}
|
||||
}
|
||||
|
||||
@Test
|
||||
@IR(counts = {"vmls", "> 0"})
|
||||
public static void testByteMulSub() {
|
||||
for (int i = 0; i < LENGTH; i += B_SPECIES.length()) {
|
||||
ByteVector av = ByteVector.fromArray(B_SPECIES, ba, i);
|
||||
ByteVector bv = ByteVector.fromArray(B_SPECIES, bb, i);
|
||||
ByteVector cv = ByteVector.fromArray(B_SPECIES, bc, i);
|
||||
av.sub(bv.mul(cv)).intoArray(br, i);
|
||||
}
|
||||
}
|
||||
|
||||
@Run(test = "testByteMulSub")
|
||||
public static void testByteMulSub_runner() {
|
||||
testByteMulSub();
|
||||
for (int i = 0; i < LENGTH; i++) {
|
||||
Asserts.assertEquals((byte) (ba[i] - bb[i] * bc[i]), br[i]);
|
||||
}
|
||||
}
|
||||
|
||||
@Test
|
||||
@IR(counts = {"vmls", "> 0"})
|
||||
public static void testShortMulSub() {
|
||||
for (int i = 0; i < LENGTH; i += S_SPECIES.length()) {
|
||||
ShortVector av = ShortVector.fromArray(S_SPECIES, sa, i);
|
||||
ShortVector bv = ShortVector.fromArray(S_SPECIES, sb, i);
|
||||
ShortVector cv = ShortVector.fromArray(S_SPECIES, sc, i);
|
||||
av.sub(bv.mul(cv)).intoArray(sr, i);
|
||||
}
|
||||
}
|
||||
|
||||
@Run(test = "testShortMulSub")
|
||||
public static void testShortMulSub_runner() {
|
||||
testShortMulSub();
|
||||
for (int i = 0; i < LENGTH; i++) {
|
||||
Asserts.assertEquals((short) (sa[i] - sb[i] * sc[i]), sr[i]);
|
||||
}
|
||||
}
|
||||
|
||||
@Test
|
||||
@IR(counts = {"vmls", "> 0"})
|
||||
public static void testIntMulSub() {
|
||||
for (int i = 0; i < LENGTH; i += I_SPECIES.length()) {
|
||||
IntVector av = IntVector.fromArray(I_SPECIES, ia, i);
|
||||
IntVector bv = IntVector.fromArray(I_SPECIES, ib, i);
|
||||
IntVector cv = IntVector.fromArray(I_SPECIES, ic, i);
|
||||
av.sub(bv.mul(cv)).intoArray(ir, i);
|
||||
}
|
||||
}
|
||||
|
||||
@Run(test = "testIntMulSub")
|
||||
public static void testIntMulSub_runner() {
|
||||
testIntMulSub();
|
||||
for (int i = 0; i < LENGTH; i++) {
|
||||
Asserts.assertEquals((ia[i] - ib[i] * ic[i]), ir[i]);
|
||||
}
|
||||
}
|
||||
|
||||
@Test
|
||||
@IR(applyIf = {"UseSVE", " > 0"}, counts = {"vmls", "> 0"})
|
||||
public static void testLongMulSub() {
|
||||
for (int i = 0; i < LENGTH; i += L_SPECIES.length()) {
|
||||
LongVector av = LongVector.fromArray(L_SPECIES, la, i);
|
||||
LongVector bv = LongVector.fromArray(L_SPECIES, lb, i);
|
||||
LongVector cv = LongVector.fromArray(L_SPECIES, lc, i);
|
||||
av.sub(bv.mul(cv)).intoArray(lr, i);
|
||||
}
|
||||
}
|
||||
|
||||
@Run(test = "testLongMulSub")
|
||||
public static void testLongMulSub_runner() {
|
||||
testLongMulSub();
|
||||
for (int i = 0; i < LENGTH; i++) {
|
||||
Asserts.assertEquals((la[i] - lb[i] * lc[i]), lr[i]);
|
||||
}
|
||||
}
|
||||
|
||||
public static void main(String[] args) {
|
||||
TestFramework.runWithFlags("--add-modules=jdk.incubator.vector");
|
||||
}
|
||||
}
|
Loading…
x
Reference in New Issue
Block a user