8289422: Fix and re-enable vector conditional move

Reviewed-by: thartmann, kvn
This commit is contained in:
Fei Gao 2022-09-27 01:24:17 +00:00 committed by Pengfei Li
parent 1ddc92fef5
commit aa48705ddd
7 changed files with 374 additions and 15 deletions

View File

@ -5691,6 +5691,48 @@ instruct vblend_sve(vReg dst, vReg src1, vReg src2, pReg pg) %{
ins_pipe(pipe_slow);
%}
// ------------------------- Vector conditional move --------------------------
instruct vcmove_neon(vReg dst, vReg src1, vReg src2, immI cond, cmpOp copnd) %{
predicate(UseSVE == 0 ||
(VM_Version::use_neon_for_vector(Matcher::vector_length_in_bytes(n)) &&
n->in(1)->in(2)->get_int() != BoolTest::ne));
match(Set dst (CMoveVF (Binary copnd cond) (Binary src1 src2)));
match(Set dst (CMoveVD (Binary copnd cond) (Binary src1 src2)));
effect(TEMP_DEF dst);
format %{ "vcmove_neon.$copnd $dst, $src1, $src2\t# vector conditional move fp" %}
ins_encode %{
BasicType bt = Matcher::vector_element_basic_type(this);
uint length_in_bytes = Matcher::vector_length_in_bytes(this);
assert(length_in_bytes == 8 || length_in_bytes == 16, "must be");
__ neon_compare($dst$$FloatRegister, bt, $src1$$FloatRegister,
$src2$$FloatRegister, (int)($cond$$constant),
/* isQ */ length_in_bytes == 16);
__ bsl($dst$$FloatRegister, length_in_bytes == 16 ? __ T16B : __ T8B,
$src2$$FloatRegister, $src1$$FloatRegister);
%}
ins_pipe(pipe_slow);
%}
instruct vcmove_sve(vReg dst, vReg src1, vReg src2, immI cond, cmpOp copnd, pRegGov pgtmp) %{
predicate(!VM_Version::use_neon_for_vector(Matcher::vector_length_in_bytes(n)) ||
(UseSVE > 0 && n->in(1)->in(2)->get_int() == BoolTest::ne));
match(Set dst (CMoveVF (Binary copnd cond) (Binary src1 src2)));
match(Set dst (CMoveVD (Binary copnd cond) (Binary src1 src2)));
effect(TEMP pgtmp);
format %{ "vcmove_sve.$copnd $dst, $src1, $src2\t# vector conditional move fp. KILL $pgtmp" %}
ins_encode %{
assert(UseSVE > 0, "must be sve");
BasicType bt = Matcher::vector_element_basic_type(this);
uint length_in_bytes = Matcher::vector_length_in_bytes(this);
__ sve_compare($pgtmp$$PRegister, bt, ptrue, $src1$$FloatRegister,
$src2$$FloatRegister, (int)($cond$$constant));
__ sve_sel($dst$$FloatRegister, __ elemType_to_regVariant(bt),
$pgtmp$$PRegister, $src2$$FloatRegister, $src1$$FloatRegister);
%}
ins_pipe(pipe_slow);
%}
// ------------------------------ Vector round ---------------------------------
// vector Math.round

View File

@ -4063,6 +4063,48 @@ instruct vblend_sve(vReg dst, vReg src1, vReg src2, pReg pg) %{
ins_pipe(pipe_slow);
%}
// ------------------------- Vector conditional move --------------------------
instruct vcmove_neon(vReg dst, vReg src1, vReg src2, immI cond, cmpOp copnd) %{
predicate(UseSVE == 0 ||
(VM_Version::use_neon_for_vector(Matcher::vector_length_in_bytes(n)) &&
n->in(1)->in(2)->get_int() != BoolTest::ne));
match(Set dst (CMoveVF (Binary copnd cond) (Binary src1 src2)));
match(Set dst (CMoveVD (Binary copnd cond) (Binary src1 src2)));
effect(TEMP_DEF dst);
format %{ "vcmove_neon.$copnd $dst, $src1, $src2\t# vector conditional move fp" %}
ins_encode %{
BasicType bt = Matcher::vector_element_basic_type(this);
uint length_in_bytes = Matcher::vector_length_in_bytes(this);
assert(length_in_bytes == 8 || length_in_bytes == 16, "must be");
__ neon_compare($dst$$FloatRegister, bt, $src1$$FloatRegister,
$src2$$FloatRegister, (int)($cond$$constant),
/* isQ */ length_in_bytes == 16);
__ bsl($dst$$FloatRegister, length_in_bytes == 16 ? __ T16B : __ T8B,
$src2$$FloatRegister, $src1$$FloatRegister);
%}
ins_pipe(pipe_slow);
%}
instruct vcmove_sve(vReg dst, vReg src1, vReg src2, immI cond, cmpOp copnd, pRegGov pgtmp) %{
predicate(!VM_Version::use_neon_for_vector(Matcher::vector_length_in_bytes(n)) ||
(UseSVE > 0 && n->in(1)->in(2)->get_int() == BoolTest::ne));
match(Set dst (CMoveVF (Binary copnd cond) (Binary src1 src2)));
match(Set dst (CMoveVD (Binary copnd cond) (Binary src1 src2)));
effect(TEMP pgtmp);
format %{ "vcmove_sve.$copnd $dst, $src1, $src2\t# vector conditional move fp. KILL $pgtmp" %}
ins_encode %{
assert(UseSVE > 0, "must be sve");
BasicType bt = Matcher::vector_element_basic_type(this);
uint length_in_bytes = Matcher::vector_length_in_bytes(this);
__ sve_compare($pgtmp$$PRegister, bt, ptrue, $src1$$FloatRegister,
$src2$$FloatRegister, (int)($cond$$constant));
__ sve_sel($dst$$FloatRegister, __ elemType_to_regVariant(bt),
$pgtmp$$PRegister, $src2$$FloatRegister, $src1$$FloatRegister);
%}
ins_pipe(pipe_slow);
%}
// ------------------------------ Vector round ---------------------------------
// vector Math.round

View File

@ -2356,9 +2356,7 @@ void Matcher::find_shared_post_visit(Node* n, uint opcode) {
case Op_CMoveI:
case Op_CMoveL:
case Op_CMoveN:
case Op_CMoveP:
case Op_CMoveVF:
case Op_CMoveVD: {
case Op_CMoveP: {
// Restructure into a binary tree for Matching. It's possible that
// we could move this code up next to the graph reshaping for IfNodes
// or vice-versa, but I do not want to debug this for Ladybird.
@ -2370,6 +2368,20 @@ void Matcher::find_shared_post_visit(Node* n, uint opcode) {
n->del_req(3);
break;
}
case Op_CMoveVF:
case Op_CMoveVD: {
// Restructure into a binary tree for Matching:
// CMoveVF (Binary bool mask) (Binary src1 src2)
Node* in_cc = n->in(1);
assert(in_cc->is_Con(), "The condition input of cmove vector node must be a constant.");
Node* bol = new BoolNode(in_cc, (BoolTest::mask)in_cc->get_int());
Node* pair1 = new BinaryNode(bol, in_cc);
n->set_req(1, pair1);
Node* pair2 = new BinaryNode(n->in(2), n->in(3));
n->set_req(2, pair2);
n->del_req(3);
break;
}
case Op_VectorCmpMasked: {
Node* pair1 = new BinaryNode(n->in(2), n->in(3));
n->set_req(2, pair1);

View File

@ -2064,12 +2064,7 @@ bool SuperWord::implemented(Node_List* p) {
opc = Op_RShiftI;
}
retValue = VectorNode::implemented(opc, size, velt_basic_type(p0));
}
if (!retValue) {
if (is_cmov_pack(p)) {
NOT_PRODUCT(if(is_trace_cmov()) {tty->print_cr("SWPointer::implemented: found cmpd pack"); print_pack(p);})
return true;
}
NOT_PRODUCT(if(retValue && is_trace_cmov() && is_cmov_pack(p)) {tty->print_cr("SWPointer::implemented: found cmpd pack"); print_pack(p);})
}
}
return retValue;
@ -2695,12 +2690,33 @@ bool SuperWord::output() {
ShouldNotReachHere();
}
int cond = (int)bol->as_Bool()->_test._test;
Node* in_cc = _igvn.intcon(cond);
NOT_PRODUCT(if(is_trace_cmov()) {tty->print("SWPointer::output: created intcon in_cc node %d", in_cc->_idx); in_cc->dump();})
Node* cc = bol->clone();
cc->set_req(1, in_cc);
NOT_PRODUCT(if(is_trace_cmov()) {tty->print("SWPointer::output: created bool cc node %d", cc->_idx); cc->dump();})
BoolTest boltest = bol->as_Bool()->_test;
BoolTest::mask cond = boltest._test;
Node* cmp = bol->in(1);
// When the src order of cmp node and cmove node are the same:
// cmp: CmpD src1 src2
// bool: Bool cmp mask
// cmove: CMoveD bool scr1 src2
// =====> vectorized, equivalent to
// cmovev: CMoveVD mask src_vector1 src_vector2
//
// When the src order of cmp node and cmove node are different:
// cmp: CmpD src2 src1
// bool: Bool cmp mask
// cmove: CMoveD bool scr1 src2
// =====> equivalent to
// cmp: CmpD src1 src2
// bool: Bool cmp negate(mask)
// cmove: CMoveD bool scr1 src2
// (Note: when mask is ne or eq, we don't need to negate it even after swapping.)
// =====> vectorized, equivalent to
// cmovev: CMoveVD negate(mask) src_vector1 src_vector2
if (cmp->in(2) == n->in(CMoveNode::IfFalse) && cond != BoolTest::ne && cond != BoolTest::eq) {
assert(cmp->in(1) == n->in(CMoveNode::IfTrue), "cmpnode and cmovenode don't share the same inputs.");
cond = boltest.negate();
}
Node* cc = _igvn.intcon((int)cond);
NOT_PRODUCT(if(is_trace_cmov()) {tty->print("SWPointer::output: created intcon in_cc node %d", cc->_idx); cc->dump();})
Node* src1 = vector_opd(p, 2); //2=CMoveNode::IfFalse
if (src1 == NULL) {

View File

@ -0,0 +1,229 @@
/*
* Copyright (c) 2022, Arm Limited. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License version 2 only, as
* published by the Free Software Foundation.
*
* This code is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
* version 2 for more details (a copy is included in the LICENSE file that
* accompanied this code).
*
* You should have received a copy of the GNU General Public License version
* 2 along with this work; if not, write to the Free Software Foundation,
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
*
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
* or visit www.oracle.com if you need additional information or have any
* questions.
*/
package compiler.c2.irTests;
import compiler.lib.ir_framework.*;
import java.util.Random;
import jdk.test.lib.Asserts;
import jdk.test.lib.Utils;
/*
* @test
* @bug 8289422
* @key randomness
* @summary Auto-vectorization enhancement to support vector conditional move on AArch64
* @requires os.arch=="aarch64"
* @library /test/lib /
* @run driver compiler.c2.irTests.TestVectorConditionalMove
*/
public class TestVectorConditionalMove {
final private static int SIZE = 3000;
private static final Random RANDOM = Utils.getRandomInstance();
private static float[] floata = new float[SIZE];
private static float[] floatb = new float[SIZE];
private static float[] floatc = new float[SIZE];
private static double[] doublea = new double[SIZE];
private static double[] doubleb = new double[SIZE];
private static double[] doublec = new double[SIZE];
public static void main(String[] args) {
TestFramework.runWithFlags("-Xcomp", "-XX:-TieredCompilation", "-XX:+UseCMoveUnconditionally",
"-XX:+UseVectorCmov", "-XX:CompileCommand=exclude,*.cmove*");
}
private float cmoveFloatGT(float a, float b) {
return (a > b) ? a : b;
}
private float cmoveFloatGTSwap(float a, float b) {
return (b > a) ? a : b;
}
private float cmoveFloatLT(float a, float b) {
return (a < b) ? a : b;
}
private float cmoveFloatLTSwap(float a, float b) {
return (b < a) ? a : b;
}
private float cmoveFloatEQ(float a, float b) {
return (a == b) ? a : b;
}
private double cmoveDoubleLE(double a, double b) {
return (a <= b) ? a : b;
}
private double cmoveDoubleLESwap(double a, double b) {
return (b <= a) ? a : b;
}
private double cmoveDoubleGE(double a, double b) {
return (a >= b) ? a : b;
}
private double cmoveDoubleGESwap(double a, double b) {
return (b >= a) ? a : b;
}
private double cmoveDoubleNE(double a, double b) {
return (a != b) ? a : b;
}
@Test
@IR(counts = {IRNode.LOAD_VECTOR, ">0", IRNode.CMOVEVF, ">0", IRNode.STORE_VECTOR, ">0"})
private static void testCMoveVFGT(float[] a, float[] b, float[] c) {
for (int i = 0; i < a.length; i++) {
c[i] = (a[i] > b[i]) ? a[i] : b[i];
}
}
@Test
@IR(counts = {IRNode.LOAD_VECTOR, ">0", IRNode.CMOVEVF, ">0", IRNode.STORE_VECTOR, ">0"})
private static void testCMoveVFGTSwap(float[] a, float[] b, float[] c) {
for (int i = 0; i < a.length; i++) {
c[i] = (b[i] > a[i]) ? a[i] : b[i];
}
}
@Test
@IR(counts = {IRNode.LOAD_VECTOR, ">0", IRNode.CMOVEVF, ">0", IRNode.STORE_VECTOR, ">0"})
private static void testCMoveVFLT(float[] a, float[] b, float[] c) {
for (int i = 0; i < a.length; i++) {
c[i] = (a[i] < b[i]) ? a[i] : b[i];
}
}
@Test
@IR(counts = {IRNode.LOAD_VECTOR, ">0", IRNode.CMOVEVF, ">0", IRNode.STORE_VECTOR, ">0"})
private static void testCMoveVFLTSwap(float[] a, float[] b, float[] c) {
for (int i = 0; i < a.length; i++) {
c[i] = (b[i] < a[i]) ? a[i] : b[i];
}
}
@Test
@IR(counts = {IRNode.LOAD_VECTOR, ">0", IRNode.CMOVEVF, ">0", IRNode.STORE_VECTOR, ">0"})
private static void testCMoveVFEQ(float[] a, float[] b, float[] c) {
for (int i = 0; i < a.length; i++) {
c[i] = (a[i] == b[i]) ? a[i] : b[i];
}
}
@Test
@IR(counts = {IRNode.LOAD_VECTOR, ">0", IRNode.CMOVEVD, ">0", IRNode.STORE_VECTOR, ">0"})
private static void testCMoveVDLE(double[] a, double[] b, double[] c) {
for (int i = 0; i < a.length; i++) {
c[i] = (a[i] <= b[i]) ? a[i] : b[i];
}
}
@Test
@IR(counts = {IRNode.LOAD_VECTOR, ">0", IRNode.CMOVEVD, ">0", IRNode.STORE_VECTOR, ">0"})
private static void testCMoveVDLESwap(double[] a, double[] b, double[] c) {
for (int i = 0; i < a.length; i++) {
c[i] = (b[i] <= a[i]) ? a[i] : b[i];
}
}
@Test
@IR(counts = {IRNode.LOAD_VECTOR, ">0", IRNode.CMOVEVD, ">0", IRNode.STORE_VECTOR, ">0"})
private static void testCMoveVDGE(double[] a, double[] b, double[] c) {
for (int i = 0; i < a.length; i++) {
c[i] = (a[i] >= b[i]) ? a[i] : b[i];
}
}
@Test
@IR(counts = {IRNode.LOAD_VECTOR, ">0", IRNode.CMOVEVD, ">0", IRNode.STORE_VECTOR, ">0"})
private static void testCMoveVDGESwap(double[] a, double[] b, double[] c) {
for (int i = 0; i < a.length; i++) {
c[i] = (b[i] >= a[i]) ? a[i] : b[i];
}
}
@Test
@IR(counts = {IRNode.LOAD_VECTOR, ">0", IRNode.CMOVEVD, ">0", IRNode.STORE_VECTOR, ">0"})
private static void testCMoveVDNE(double[] a, double[] b, double[] c) {
for (int i = 0; i < a.length; i++) {
c[i] = (a[i] != b[i]) ? a[i] : b[i];
}
}
@Run(test = {"testCMoveVFGT", "testCMoveVFLT","testCMoveVDLE", "testCMoveVDGE", "testCMoveVFEQ", "testCMoveVDNE",
"testCMoveVFGTSwap", "testCMoveVFLTSwap","testCMoveVDLESwap", "testCMoveVDGESwap"})
private void testCMove_runner() {
for (int i = 0; i < SIZE; i++) {
floata[i] = RANDOM.nextFloat();
floatb[i] = RANDOM.nextFloat();
doublea[i] = RANDOM.nextDouble();
doubleb[i] = RANDOM.nextDouble();
}
testCMoveVFGT(floata, floatb, floatc);
testCMoveVDLE(doublea, doubleb, doublec);
for (int i = 0; i < SIZE; i++) {
Asserts.assertEquals(floatc[i], cmoveFloatGT(floata[i], floatb[i]));
Asserts.assertEquals(doublec[i], cmoveDoubleLE(doublea[i], doubleb[i]));
}
testCMoveVFLT(floata, floatb, floatc);
testCMoveVDGE(doublea, doubleb, doublec);
for (int i = 0; i < SIZE; i++) {
Asserts.assertEquals(floatc[i], cmoveFloatLT(floata[i], floatb[i]));
Asserts.assertEquals(doublec[i], cmoveDoubleGE(doublea[i], doubleb[i]));
}
for (int i = 0; i < SIZE; i++) {
if (i % 3 == 0) {
floatb[i] = floata[i];
doubleb[i] = doublea[i];
}
}
testCMoveVFEQ(floata, floatb, floatc);
testCMoveVDNE(doublea, doubleb, doublec);
for (int i = 0; i < SIZE; i++) {
Asserts.assertEquals(floatc[i], cmoveFloatEQ(floata[i], floatb[i]));
Asserts.assertEquals(doublec[i], cmoveDoubleNE(doublea[i], doubleb[i]));
}
testCMoveVFGTSwap(floata, floatb, floatc);
testCMoveVDLESwap(doublea, doubleb, doublec);
for (int i = 0; i < SIZE; i++) {
Asserts.assertEquals(floatc[i], cmoveFloatGTSwap(floata[i], floatb[i]));
Asserts.assertEquals(doublec[i], cmoveDoubleLESwap(doublea[i], doubleb[i]));
}
testCMoveVFLTSwap(floata, floatb, floatc);
testCMoveVDGESwap(doublea, doubleb, doublec);
for (int i = 0; i < SIZE; i++) {
Asserts.assertEquals(floatc[i], cmoveFloatLTSwap(floata[i], floatb[i]));
Asserts.assertEquals(doublec[i], cmoveDoubleGESwap(doublea[i], doubleb[i]));
}
}
}

View File

@ -142,6 +142,8 @@ public class IRNode {
public static final String SAFEPOINT = START + "SafePoint" + MID + END;
public static final String CMOVEI = START + "CMoveI" + MID + END;
public static final String CMOVEVF = START + "CMoveVF" + MID + END;
public static final String CMOVEVD = START + "CMoveVD" + MID + END;
public static final String ABS_I = START + "AbsI" + MID + END;
public static final String ABS_L = START + "AbsL" + MID + END;
public static final String ABS_F = START + "AbsF" + MID + END;

View File

@ -366,6 +366,22 @@ public abstract class TypeVectorOperations {
}
}
@Benchmark
@Fork(jvmArgsPrepend = {"-XX:+UseCMoveUnconditionally", "-XX:+UseVectorCmov"})
public void cmoveD() {
for (int i = 0; i < COUNT; i++) {
resD[i] = resD[i] < doubles[i] ? resD[i] : doubles[i];
}
}
@Benchmark
@Fork(jvmArgsPrepend = {"-XX:+UseCMoveUnconditionally", "-XX:+UseVectorCmov"})
public void cmoveF() {
for (int i = 0; i < COUNT; i++) {
resF[i] = resF[i] < floats[i] ? resF[i] : floats[i];
}
}
@Fork(value = 2, jvmArgsPrepend = {
"-XX:+UseSuperWord"
})