jdk-24/test/hotspot/jtreg/compiler/vectorapi/VectorFusedMultiplyAddSubTest.java

413 lines
16 KiB
Java
Raw Normal View History

/*
* Copyright (c) 2022, Arm Limited. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License version 2 only, as
* published by the Free Software Foundation.
*
* This code is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
* version 2 for more details (a copy is included in the LICENSE file that
* accompanied this code).
*
* You should have received a copy of the GNU General Public License version
* 2 along with this work; if not, write to the Free Software Foundation,
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
*
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
* or visit www.oracle.com if you need additional information or have any
* questions.
*/
package compiler.vectorapi;
import compiler.lib.ir_framework.*;
import java.util.Random;
import jdk.incubator.vector.ByteVector;
import jdk.incubator.vector.DoubleVector;
import jdk.incubator.vector.FloatVector;
import jdk.incubator.vector.IntVector;
import jdk.incubator.vector.LongVector;
import jdk.incubator.vector.ShortVector;
import jdk.incubator.vector.VectorMask;
import jdk.incubator.vector.VectorOperators;
import jdk.incubator.vector.VectorSpecies;
import jdk.test.lib.Asserts;
import jdk.test.lib.Utils;
/**
* @test
* @bug 8282431
* @key randomness
* @library /test/lib /
* @requires vm.cpu.features ~= ".*sve.*"
* @summary AArch64: Add optimized rules for masked vector multiply-add/sub for SVE
* @modules jdk.incubator.vector
*
* @run driver compiler.vectorapi.VectorFusedMultiplyAddSubTest
*/
public class VectorFusedMultiplyAddSubTest {
private static final VectorSpecies<Byte> B_SPECIES = ByteVector.SPECIES_MAX;
private static final VectorSpecies<Double> D_SPECIES = DoubleVector.SPECIES_MAX;
private static final VectorSpecies<Float> F_SPECIES = FloatVector.SPECIES_MAX;
private static final VectorSpecies<Integer> I_SPECIES = IntVector.SPECIES_MAX;
private static final VectorSpecies<Long> L_SPECIES = LongVector.SPECIES_MAX;
private static final VectorSpecies<Short> S_SPECIES = ShortVector.SPECIES_MAX;
private static int LENGTH = 1024;
private static final Random RD = Utils.getRandomInstance();
private static byte[] ba;
private static byte[] bb;
private static byte[] bc;
private static byte[] br;
private static short[] sa;
private static short[] sb;
private static short[] sc;
private static short[] sr;
private static int[] ia;
private static int[] ib;
private static int[] ic;
private static int[] ir;
private static long[] la;
private static long[] lb;
private static long[] lc;
private static long[] lr;
private static float[] fa;
private static float[] fb;
private static float[] fc;
private static float[] fr;
private static double[] da;
private static double[] db;
private static double[] dc;
private static double[] dr;
private static boolean[] m;
static {
ba = new byte[LENGTH];
bb = new byte[LENGTH];
bc = new byte[LENGTH];
br = new byte[LENGTH];
sa = new short[LENGTH];
sb = new short[LENGTH];
sc = new short[LENGTH];
sr = new short[LENGTH];
ia = new int[LENGTH];
ib = new int[LENGTH];
ic = new int[LENGTH];
ir = new int[LENGTH];
la = new long[LENGTH];
lb = new long[LENGTH];
lc = new long[LENGTH];
lr = new long[LENGTH];
fa = new float[LENGTH];
fb = new float[LENGTH];
fc = new float[LENGTH];
fr = new float[LENGTH];
da = new double[LENGTH];
db = new double[LENGTH];
dc = new double[LENGTH];
dr = new double[LENGTH];
m = new boolean[LENGTH];
for (int i = 0; i < LENGTH; i++) {
ba[i] = (byte) RD.nextInt(25);
bb[i] = (byte) RD.nextInt(25);
bc[i] = (byte) RD.nextInt(25);
sa[i] = (short) RD.nextInt(25);
sb[i] = (short) RD.nextInt(25);
sc[i] = (short) RD.nextInt(25);
ia[i] = RD.nextInt(25);
ib[i] = RD.nextInt(25);
ic[i] = RD.nextInt(25);
la[i] = RD.nextLong(25);
lb[i] = RD.nextLong(25);
lc[i] = RD.nextLong(25);
fa[i] = RD.nextFloat((float) 25.0);
fb[i] = RD.nextFloat((float) 25.0);
fc[i] = RD.nextFloat((float) 25.0);
da[i] = RD.nextDouble(25.0);
db[i] = RD.nextDouble(25.0);
dc[i] = RD.nextDouble(25.0);
m[i] = RD.nextBoolean();
}
}
interface BTenOp {
byte apply(byte a, byte b, byte c);
}
interface STenOp {
short apply(short a, short b, short c);
}
interface ITenOp {
int apply(int a, int b, int c);
}
interface LTenOp {
long apply(long a, long b, long c);
}
interface FTenOp {
float apply(float a, float b, float c);
}
interface DTenOp {
double apply(double a, double b, double c);
}
private static void assertArrayEquals(byte[] r, byte[] a, byte[] b, byte[] c, boolean[] m, BTenOp f) {
for (int i = 0; i < LENGTH; i++) {
if (m[i % B_SPECIES.length()]) {
Asserts.assertEquals(f.apply(a[i], b[i], c[i]), r[i]);
} else {
Asserts.assertEquals(a[i], r[i]);
}
}
}
private static void assertArrayEquals(short[] r, short[] a, short[] b, short[] c, boolean[] m, STenOp f) {
for (int i = 0; i < LENGTH; i++) {
if (m[i % S_SPECIES.length()]) {
Asserts.assertEquals(f.apply(a[i], b[i], c[i]), r[i]);
} else {
Asserts.assertEquals(a[i], r[i]);
}
}
}
private static void assertArrayEquals(int[] r, int[] a, int[] b, int[] c, boolean[] m, ITenOp f) {
for (int i = 0; i < LENGTH; i++) {
if (m[i % I_SPECIES.length()]) {
Asserts.assertEquals(f.apply(a[i], b[i], c[i]), r[i]);
} else {
Asserts.assertEquals(a[i], r[i]);
}
}
}
private static void assertArrayEquals(long[] r, long[] a, long[] b, long[] c, boolean[] m, LTenOp f) {
for (int i = 0; i < LENGTH; i++) {
if (m[i % L_SPECIES.length()]) {
Asserts.assertEquals(f.apply(a[i], b[i], c[i]), r[i]);
} else {
Asserts.assertEquals(a[i], r[i]);
}
}
}
private static void assertArrayEquals(float[] r, float[] a, float[] b, float[] c, boolean[] m, FTenOp f) {
for (int i = 0; i < LENGTH; i++) {
if (m[i % F_SPECIES.length()]) {
Asserts.assertEquals(f.apply(a[i], b[i], c[i]), r[i]);
} else {
Asserts.assertEquals(a[i], r[i]);
}
}
}
private static void assertArrayEquals(double[] r, double[] a, double[] b, double[] c, boolean[] m, DTenOp f) {
for (int i = 0; i < LENGTH; i++) {
if (m[i % D_SPECIES.length()]) {
Asserts.assertEquals(f.apply(a[i], b[i], c[i]), r[i]);
} else {
Asserts.assertEquals(a[i], r[i]);
}
}
}
@Test
@IR(counts = { IRNode.VMLA_MASKED, ">= 1" })
public static void testByteMultiplyAddMasked() {
VectorMask<Byte> mask = VectorMask.fromArray(B_SPECIES, m, 0);
for (int i = 0; i < LENGTH; i += B_SPECIES.length()) {
ByteVector av = ByteVector.fromArray(B_SPECIES, ba, i);
ByteVector bv = ByteVector.fromArray(B_SPECIES, bb, i);
ByteVector cv = ByteVector.fromArray(B_SPECIES, bc, i);
av.add(bv.mul(cv), mask).intoArray(br, i);
}
assertArrayEquals(br, ba, bb, bc, m, (a, b, c) -> (byte) (a + b * c));
}
@Test
@IR(counts = { IRNode.VMLS_MASKED, ">= 1" })
public static void testByteMultiplySubMasked() {
VectorMask<Byte> mask = VectorMask.fromArray(B_SPECIES, m, 0);
for (int i = 0; i < LENGTH; i += B_SPECIES.length()) {
ByteVector av = ByteVector.fromArray(B_SPECIES, ba, i);
ByteVector bv = ByteVector.fromArray(B_SPECIES, bb, i);
ByteVector cv = ByteVector.fromArray(B_SPECIES, bc, i);
av.sub(bv.mul(cv), mask).intoArray(br, i);
}
assertArrayEquals(br, ba, bb, bc, m, (a, b, c) -> (byte) (a - b * c));
}
@Test
@IR(counts = { IRNode.VMLA_MASKED, ">= 1" })
public static void testShortMultiplyAddMasked() {
VectorMask<Short> mask = VectorMask.fromArray(S_SPECIES, m, 0);
for (int i = 0; i < LENGTH; i += S_SPECIES.length()) {
ShortVector av = ShortVector.fromArray(S_SPECIES, sa, i);
ShortVector bv = ShortVector.fromArray(S_SPECIES, sb, i);
ShortVector cv = ShortVector.fromArray(S_SPECIES, sc, i);
av.add(bv.mul(cv), mask).intoArray(sr, i);
}
assertArrayEquals(sr, sa, sb, sc, m, (a, b, c) -> (short) (a + b * c));
}
@Test
@IR(counts = { IRNode.VMLS_MASKED, ">= 1" })
public static void testShortMultiplySubMasked() {
VectorMask<Short> mask = VectorMask.fromArray(S_SPECIES, m, 0);
for (int i = 0; i < LENGTH; i += S_SPECIES.length()) {
ShortVector av = ShortVector.fromArray(S_SPECIES, sa, i);
ShortVector bv = ShortVector.fromArray(S_SPECIES, sb, i);
ShortVector cv = ShortVector.fromArray(S_SPECIES, sc, i);
av.sub(bv.mul(cv), mask).intoArray(sr, i);
}
assertArrayEquals(sr, sa, sb, sc, m, (a, b, c) -> (short) (a - b * c));
}
@Test
@IR(counts = { IRNode.VMLA_MASKED, ">= 1" })
public static void testIntMultiplyAddMasked() {
VectorMask<Integer> mask = VectorMask.fromArray(I_SPECIES, m, 0);
for (int i = 0; i < LENGTH; i += I_SPECIES.length()) {
IntVector av = IntVector.fromArray(I_SPECIES, ia, i);
IntVector bv = IntVector.fromArray(I_SPECIES, ib, i);
IntVector cv = IntVector.fromArray(I_SPECIES, ic, i);
av.add(bv.mul(cv), mask).intoArray(ir, i);
}
assertArrayEquals(ir, ia, ib, ic, m, (a, b, c) -> (int) (a + b * c));
}
@Test
@IR(counts = { IRNode.VMLS_MASKED, ">= 1" })
public static void testIntMultiplySubMasked() {
VectorMask<Integer> mask = VectorMask.fromArray(I_SPECIES, m, 0);
for (int i = 0; i < LENGTH; i += I_SPECIES.length()) {
IntVector av = IntVector.fromArray(I_SPECIES, ia, i);
IntVector bv = IntVector.fromArray(I_SPECIES, ib, i);
IntVector cv = IntVector.fromArray(I_SPECIES, ic, i);
av.sub(bv.mul(cv), mask).intoArray(ir, i);
}
assertArrayEquals(ir, ia, ib, ic, m, (a, b, c) -> (int) (a - b * c));
}
@Test
@IR(counts = { IRNode.VMLA_MASKED, ">= 1" })
public static void testLongMultiplyAddMasked() {
VectorMask<Long> mask = VectorMask.fromArray(L_SPECIES, m, 0);
for (int i = 0; i < LENGTH; i += L_SPECIES.length()) {
LongVector av = LongVector.fromArray(L_SPECIES, la, i);
LongVector bv = LongVector.fromArray(L_SPECIES, lb, i);
LongVector cv = LongVector.fromArray(L_SPECIES, lc, i);
av.add(bv.mul(cv), mask).intoArray(lr, i);
}
assertArrayEquals(lr, la, lb, lc, m, (a, b, c) -> (long) (a + b * c));
}
@Test
@IR(counts = { IRNode.VMLS_MASKED, ">= 1" })
public static void testLongMultiplySubMasked() {
VectorMask<Long> mask = VectorMask.fromArray(L_SPECIES, m, 0);
for (int i = 0; i < LENGTH; i += L_SPECIES.length()) {
LongVector av = LongVector.fromArray(L_SPECIES, la, i);
LongVector bv = LongVector.fromArray(L_SPECIES, lb, i);
LongVector cv = LongVector.fromArray(L_SPECIES, lc, i);
av.sub(bv.mul(cv), mask).intoArray(lr, i);
}
assertArrayEquals(lr, la, lb, lc, m, (a, b, c) -> (long) (a - b * c));
}
@Test
@IR(counts = { IRNode.VFMSB_MASKED, ">= 1" })
public static void testFloatMultiplySubMasked() {
VectorMask<Float> mask = VectorMask.fromArray(F_SPECIES, m, 0);
for (int i = 0; i < LENGTH; i += F_SPECIES.length()) {
FloatVector av = FloatVector.fromArray(F_SPECIES, fa, i);
FloatVector bv = FloatVector.fromArray(F_SPECIES, fb, i);
FloatVector cv = FloatVector.fromArray(F_SPECIES, fc, i);
av.lanewise(VectorOperators.FMA, bv.neg(), cv, mask).intoArray(fr, i);
}
assertArrayEquals(fr, fa, fb, fc, m, (a, b, c) -> (float) Math.fma(a, -b, c));
}
@Test
@IR(counts = { IRNode.VFNMAD_MASKED, ">= 1" })
public static void testFloatNegatedMultiplyAddMasked() {
VectorMask<Float> mask = VectorMask.fromArray(F_SPECIES, m, 0);
for (int i = 0; i < LENGTH; i += F_SPECIES.length()) {
FloatVector av = FloatVector.fromArray(F_SPECIES, fa, i);
FloatVector bv = FloatVector.fromArray(F_SPECIES, fb, i);
FloatVector cv = FloatVector.fromArray(F_SPECIES, fc, i);
av.lanewise(VectorOperators.FMA, bv.neg(), cv.neg(), mask).intoArray(fr, i);
}
assertArrayEquals(fr, fa, fb, fc, m, (a, b, c) -> (float) Math.fma(a, -b, -c));
}
@Test
@IR(counts = { IRNode.VFNMSB_MASKED, ">= 1" })
public static void testFloatNegatedMultiplySubMasked() {
VectorMask<Float> mask = VectorMask.fromArray(F_SPECIES, m, 0);
for (int i = 0; i < LENGTH; i += F_SPECIES.length()) {
FloatVector av = FloatVector.fromArray(F_SPECIES, fa, i);
FloatVector bv = FloatVector.fromArray(F_SPECIES, fb, i);
FloatVector cv = FloatVector.fromArray(F_SPECIES, fc, i);
av.lanewise(VectorOperators.FMA, bv, cv.neg(), mask).intoArray(fr, i);
}
assertArrayEquals(fr, fa, fb, fc, m, (a, b, c) -> (float) Math.fma(a, b, -c));
}
@Test
@IR(counts = { IRNode.VFMSB_MASKED, ">= 1" })
public static void testDoubleMultiplySubMasked() {
VectorMask<Double> mask = VectorMask.fromArray(D_SPECIES, m, 0);
for (int i = 0; i < LENGTH; i += D_SPECIES.length()) {
DoubleVector av = DoubleVector.fromArray(D_SPECIES, da, i);
DoubleVector bv = DoubleVector.fromArray(D_SPECIES, db, i);
DoubleVector cv = DoubleVector.fromArray(D_SPECIES, dc, i);
av.lanewise(VectorOperators.FMA, bv.neg(), cv, mask).intoArray(dr, i);
}
assertArrayEquals(dr, da, db, dc, m, (a, b, c) -> (double) Math.fma(a, -b, c));
}
@Test
@IR(counts = { IRNode.VFNMAD_MASKED, ">= 1" })
public static void testDoubleNegatedMultiplyAddMasked() {
VectorMask<Double> mask = VectorMask.fromArray(D_SPECIES, m, 0);
for (int i = 0; i < LENGTH; i += D_SPECIES.length()) {
DoubleVector av = DoubleVector.fromArray(D_SPECIES, da, i);
DoubleVector bv = DoubleVector.fromArray(D_SPECIES, db, i);
DoubleVector cv = DoubleVector.fromArray(D_SPECIES, dc, i);
av.lanewise(VectorOperators.FMA, bv.neg(), cv.neg(), mask).intoArray(dr, i);
}
assertArrayEquals(dr, da, db, dc, m, (a, b, c) -> (double) Math.fma(a, -b, -c));
}
@Test
@IR(counts = { IRNode.VFNMSB_MASKED, ">= 1" })
public static void testDoubleNegatedMultiplySubMasked() {
VectorMask<Double> mask = VectorMask.fromArray(D_SPECIES, m, 0);
for (int i = 0; i < LENGTH; i += D_SPECIES.length()) {
DoubleVector av = DoubleVector.fromArray(D_SPECIES, da, i);
DoubleVector bv = DoubleVector.fromArray(D_SPECIES, db, i);
DoubleVector cv = DoubleVector.fromArray(D_SPECIES, dc, i);
av.lanewise(VectorOperators.FMA, bv, cv.neg(), mask).intoArray(dr, i);
}
assertArrayEquals(dr, da, db, dc, m, (a, b, c) -> (double) Math.fma(a, b, -c));
}
public static void main(String[] args) {
TestFramework.runWithFlags("--add-modules=jdk.incubator.vector",
"-XX:UseSVE=1");
}
}