8301012: [vectorapi]: Intrinsify CompressBitsV/ExpandBitsV and add the AArch64 SVE backend implementation

Co-authored-by: Xiaohong Gong <xgong@openjdk.org>
Co-authored-by: Jatin Bhateja <jbhateja@openjdk.org>
Reviewed-by: ngasson, eliu, thartmann
This commit is contained in:
Bhavana Kilambi 2023-03-27 08:50:05 +00:00 committed by Xiaohong Gong
parent 10fa7d1f9b
commit de1c12ed63
9 changed files with 257 additions and 5 deletions

@ -220,6 +220,12 @@ source %{
return false;
}
break;
case Op_CompressBitsV:
case Op_ExpandBitsV:
if (UseSVE < 2 || !VM_Version::supports_svebitperm()) {
return false;
}
break;
default:
break;
}
@ -240,6 +246,8 @@ source %{
case Op_MulReductionVF:
case Op_MulReductionVI:
case Op_MulReductionVL:
case Op_CompressBitsV:
case Op_ExpandBitsV:
return false;
// We use Op_LoadVectorMasked to implement the predicated Op_LoadVector.
// Hence we turn to check whether Op_LoadVectorMasked is supported. The
@ -6619,3 +6627,31 @@ instruct vsignum_gt128b(vReg dst, vReg src, vReg zero, vReg one, vReg tmp, pRegG
%}
ins_pipe(pipe_slow);
%}
// ---------------------------------- CompressBitsV --------------------------------
instruct vcompressBits(vReg dst, vReg src1, vReg src2) %{
match(Set dst (CompressBitsV src1 src2));
format %{ "vcompressBits $dst, $src1, $src2\t# vector (sve)" %}
ins_encode %{
BasicType bt = Matcher::vector_element_basic_type(this);
Assembler::SIMD_RegVariant size = __ elemType_to_regVariant(bt);
__ sve_bext($dst$$FloatRegister, size,
$src1$$FloatRegister, $src2$$FloatRegister);
%}
ins_pipe(pipe_slow);
%}
// ----------------------------------- ExpandBitsV ---------------------------------
instruct vexpandBits(vReg dst, vReg src1, vReg src2) %{
match(Set dst (ExpandBitsV src1 src2));
format %{ "vexpandBits $dst, $src1, $src2\t# vector (sve)" %}
ins_encode %{
BasicType bt = Matcher::vector_element_basic_type(this);
Assembler::SIMD_RegVariant size = __ elemType_to_regVariant(bt);
__ sve_bdep($dst$$FloatRegister, size,
$src1$$FloatRegister, $src2$$FloatRegister);
%}
ins_pipe(pipe_slow);
%}

@ -210,6 +210,12 @@ source %{
return false;
}
break;
case Op_CompressBitsV:
case Op_ExpandBitsV:
if (UseSVE < 2 || !VM_Version::supports_svebitperm()) {
return false;
}
break;
default:
break;
}
@ -230,6 +236,8 @@ source %{
case Op_MulReductionVF:
case Op_MulReductionVI:
case Op_MulReductionVL:
case Op_CompressBitsV:
case Op_ExpandBitsV:
return false;
// We use Op_LoadVectorMasked to implement the predicated Op_LoadVector.
// Hence we turn to check whether Op_LoadVectorMasked is supported. The
@ -4950,3 +4958,25 @@ instruct vsignum_gt128b(vReg dst, vReg src, vReg zero, vReg one, vReg tmp, pRegG
%}
ins_pipe(pipe_slow);
%}
dnl
dnl BITPERM($1, $2, $3 )
dnl BITPERM(insn_name, op_name, insn)
define(`BITPERM', `
instruct $1(vReg dst, vReg src1, vReg src2) %{
match(Set dst ($2 src1 src2));
format %{ "$1 $dst, $src1, $src2\t# vector (sve)" %}
ins_encode %{
BasicType bt = Matcher::vector_element_basic_type(this);
Assembler::SIMD_RegVariant size = __ elemType_to_regVariant(bt);
__ $3($dst$$FloatRegister, size,
$src1$$FloatRegister, $src2$$FloatRegister);
%}
ins_pipe(pipe_slow);
%}')dnl
dnl
// ---------------------------------- CompressBitsV --------------------------------
BITPERM(vcompressBits, CompressBitsV, sve_bext)
// ----------------------------------- ExpandBitsV ---------------------------------
BITPERM(vexpandBits, ExpandBitsV, sve_bdep)

@ -4210,7 +4210,7 @@ bool MatchRule::is_vector() const {
"SqrtVD","SqrtVF",
"AndV" ,"XorV" ,"OrV",
"MaxV", "MinV",
"CompressV", "ExpandV", "CompressM",
"CompressV", "ExpandV", "CompressM", "CompressBitsV", "ExpandBitsV",
"AddReductionVI", "AddReductionVL",
"AddReductionVF", "AddReductionVD",
"MulReductionVI", "MulReductionVL",

@ -77,6 +77,8 @@ macro(CheckCastPP)
macro(ClearArray)
macro(CompressBits)
macro(ExpandBits)
macro(CompressBitsV)
macro(ExpandBitsV)
macro(ConstraintCast)
macro(CMoveD)
macro(CMoveVD)

@ -182,11 +182,9 @@ int VectorNode::opcode(int sopc, BasicType bt) {
case Op_ReverseBytesL:
return (bt == T_LONG ? Op_ReverseBytesV : 0);
case Op_CompressBits:
// Not implemented. Returning 0 temporarily
return 0;
return (bt == T_INT || bt == T_LONG ? Op_CompressBitsV : 0);
case Op_ExpandBits:
// Not implemented. Returning 0 temporarily
return 0;
return (bt == T_INT || bt == T_LONG ? Op_ExpandBitsV : 0);
case Op_LShiftI:
switch (bt) {
case T_BOOLEAN:
@ -703,6 +701,8 @@ VectorNode* VectorNode::make(int vopc, Node* n1, Node* n2, const TypeVect* vt, b
case Op_ExpandV: return new ExpandVNode(n1, n2, vt);
case Op_CompressV: return new CompressVNode(n1, n2, vt);
case Op_CompressM: assert(n1 == nullptr, ""); return new CompressMNode(n2, vt);
case Op_CompressBitsV: return new CompressBitsVNode(n1, n2, vt);
case Op_ExpandBitsV: return new ExpandBitsVNode(n1, n2, vt);
case Op_CountLeadingZerosV: return new CountLeadingZerosVNode(n1, vt);
case Op_CountTrailingZerosV: return new CountTrailingZerosVNode(n1, vt);
default:

@ -1804,4 +1804,18 @@ public:
virtual int Opcode() const;
};
class CompressBitsVNode : public VectorNode {
public:
CompressBitsVNode(Node* in, Node* mask, const TypeVect* vt)
: VectorNode(in, mask, vt) {}
virtual int Opcode() const;
};
class ExpandBitsVNode : public VectorNode {
public:
ExpandBitsVNode(Node* in, Node* mask, const TypeVect* vt)
: VectorNode(in, mask, vt) {}
virtual int Opcode() const;
};
#endif // SHARE_OPTO_VECTORNODE_HPP

@ -1764,6 +1764,8 @@
declare_c2_type(CompressVNode, VectorNode) \
declare_c2_type(CompressMNode, VectorNode) \
declare_c2_type(ExpandVNode, VectorNode) \
declare_c2_type(CompressBitsVNode, VectorNode) \
declare_c2_type(ExpandBitsVNode, VectorNode) \
declare_c2_type(MulReductionVDNode, ReductionNode) \
declare_c2_type(DivVFNode, VectorNode) \
declare_c2_type(DivVDNode, VectorNode) \

@ -1408,6 +1408,16 @@ public class IRNode {
machOnlyNameRegex(XOR3_SVE, "veor3_sve");
}
public static final String COMPRESS_BITSV = PREFIX + "COMPRESS_BITSV" + POSTFIX;
static {
beforeMatchingNameRegex(COMPRESS_BITSV, "CompressBitsV");
}
public static final String EXPAND_BITSV = PREFIX + "EXPAND_BITSV" + POSTFIX;
static {
beforeMatchingNameRegex(EXPAND_BITSV, "ExpandBitsV");
}
/*
* Utility methods to set up IR_NODE_MAPPINGS.
*/

@ -0,0 +1,158 @@
/*
* Copyright (c) 2023, Arm Limited. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License version 2 only, as
* published by the Free Software Foundation.
*
* This code is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
* version 2 for more details (a copy is included in the LICENSE file that
* accompanied this code).
*
* You should have received a copy of the GNU General Public License version
* 2 along with this work; if not, write to the Free Software Foundation,
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
*
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
* or visit www.oracle.com if you need additional information or have any
* questions.
*/
package compiler.vectorapi;
import compiler.lib.ir_framework.*;
import java.util.Random;
import jdk.incubator.vector.IntVector;
import jdk.incubator.vector.LongVector;
import jdk.incubator.vector.VectorOperators;
import jdk.incubator.vector.VectorSpecies;
import jdk.test.lib.Asserts;
import jdk.test.lib.Utils;
/**
* @test
* @bug 8301012
* @library /test/lib /
* @requires os.arch == "aarch64" & vm.cpu.features ~= ".*sve2.*" & vm.cpu.features ~= ".*svebitperm.*"
* @summary [vectorapi]: Intrinsify CompressBitsV/ExpandBitsV and add the AArch64 SVE backend implementation
* @modules jdk.incubator.vector
* @run driver compiler.vectorapi.TestVectorCompressExpandBits
*/
public class TestVectorCompressExpandBits {
private static final VectorSpecies<Integer> I_SPECIES = IntVector.SPECIES_PREFERRED;
private static final VectorSpecies<Long> L_SPECIES = LongVector.SPECIES_PREFERRED;
private static int LENGTH = 1024;
private static final Random RD = Utils.getRandomInstance();
private static int[] ia;
private static int[] ib;
private static int[] ir;
private static long[] la;
private static long[] lb;
private static long[] lr;
static {
ia = new int[LENGTH];
ib = new int[LENGTH];
ir = new int[LENGTH];
la = new long[LENGTH];
lb = new long[LENGTH];
lr = new long[LENGTH];
for (int i = 0; i < LENGTH; i++) {
ia[i] = RD.nextInt(25);
ib[i] = RD.nextInt(25);
la[i] = RD.nextLong(25);
lb[i] = RD.nextLong(25);
}
}
// Test for vectorized Integer.compress operation in SVE2
@Test
@IR(counts = {IRNode.COMPRESS_BITSV, "> 0"})
public static void testIntCompress() {
for (int i = 0; i < LENGTH; i += I_SPECIES.length()) {
IntVector av = IntVector.fromArray(I_SPECIES, ia, i);
IntVector bv = IntVector.fromArray(I_SPECIES, ib, i);
av.lanewise(VectorOperators.COMPRESS_BITS, bv).intoArray(ir, i);
}
}
@Run(test = "testIntCompress")
public static void testIntCompress_runner() {
testIntCompress();
for (int i = 0; i < LENGTH; i++) {
Asserts.assertEquals(Integer.compress(ia[i], ib[i]), ir[i]);
}
}
// Test for vectorized Integer.expand operation in SVE2
@Test
@IR(counts = {IRNode.EXPAND_BITSV, "> 0"})
public static void testIntExpand() {
for (int i = 0; i < LENGTH; i += I_SPECIES.length()) {
IntVector av = IntVector.fromArray(I_SPECIES, ia, i);
IntVector bv = IntVector.fromArray(I_SPECIES, ib, i);
av.lanewise(VectorOperators.EXPAND_BITS, bv).intoArray(ir, i);
}
}
@Run(test = "testIntExpand")
public static void testIntExpand_runner() {
testIntExpand();
for (int i = 0; i < LENGTH; i++) {
Asserts.assertEquals(Integer.expand(ia[i], ib[i]), ir[i]);
}
}
// Test for vectorized Long.compress operation in SVE2
@Test
@IR(counts = {IRNode.COMPRESS_BITSV, "> 0"})
public static void testLongCompress() {
for (int i = 0; i < LENGTH; i += L_SPECIES.length()) {
LongVector av = LongVector.fromArray(L_SPECIES, la, i);
LongVector bv = LongVector.fromArray(L_SPECIES, lb, i);
av.lanewise(VectorOperators.COMPRESS_BITS, bv).intoArray(lr, i);
}
}
@Run(test = "testLongCompress")
public static void testLongCompress_runner() {
testLongCompress();
for (int i = 0; i < LENGTH; i++) {
Asserts.assertEquals(Long.compress(la[i], lb[i]), lr[i]);
}
}
// Test for vectorized Long.expand operation in SVE2
@Test
@IR(counts = {IRNode.EXPAND_BITSV, "> 0"})
public static void testLongExpand() {
for (int i = 0; i < LENGTH; i += L_SPECIES.length()) {
LongVector av = LongVector.fromArray(L_SPECIES, la, i);
LongVector bv = LongVector.fromArray(L_SPECIES, lb, i);
av.lanewise(VectorOperators.EXPAND_BITS, bv).intoArray(lr, i);
}
}
@Run(test = "testLongExpand")
public static void testLongExpand_runner() {
testLongExpand();
for (int i = 0; i < LENGTH; i++) {
Asserts.assertEquals(Long.expand(la[i], lb[i]), lr[i]);
}
}
public static void main(String[] args) {
TestFramework.runWithFlags("--add-modules=jdk.incubator.vector",
"-XX:UseSVE=2");
}
}