8301012: [vectorapi]: Intrinsify CompressBitsV/ExpandBitsV and add the AArch64 SVE backend implementation
Co-authored-by: Xiaohong Gong <xgong@openjdk.org> Co-authored-by: Jatin Bhateja <jbhateja@openjdk.org> Reviewed-by: ngasson, eliu, thartmann
This commit is contained in:
parent
10fa7d1f9b
commit
de1c12ed63
src/hotspot
cpu/aarch64
share
test/hotspot/jtreg/compiler
@ -220,6 +220,12 @@ source %{
|
||||
return false;
|
||||
}
|
||||
break;
|
||||
case Op_CompressBitsV:
|
||||
case Op_ExpandBitsV:
|
||||
if (UseSVE < 2 || !VM_Version::supports_svebitperm()) {
|
||||
return false;
|
||||
}
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
@ -240,6 +246,8 @@ source %{
|
||||
case Op_MulReductionVF:
|
||||
case Op_MulReductionVI:
|
||||
case Op_MulReductionVL:
|
||||
case Op_CompressBitsV:
|
||||
case Op_ExpandBitsV:
|
||||
return false;
|
||||
// We use Op_LoadVectorMasked to implement the predicated Op_LoadVector.
|
||||
// Hence we turn to check whether Op_LoadVectorMasked is supported. The
|
||||
@ -6619,3 +6627,31 @@ instruct vsignum_gt128b(vReg dst, vReg src, vReg zero, vReg one, vReg tmp, pRegG
|
||||
%}
|
||||
ins_pipe(pipe_slow);
|
||||
%}
|
||||
|
||||
// ---------------------------------- CompressBitsV --------------------------------
|
||||
|
||||
instruct vcompressBits(vReg dst, vReg src1, vReg src2) %{
|
||||
match(Set dst (CompressBitsV src1 src2));
|
||||
format %{ "vcompressBits $dst, $src1, $src2\t# vector (sve)" %}
|
||||
ins_encode %{
|
||||
BasicType bt = Matcher::vector_element_basic_type(this);
|
||||
Assembler::SIMD_RegVariant size = __ elemType_to_regVariant(bt);
|
||||
__ sve_bext($dst$$FloatRegister, size,
|
||||
$src1$$FloatRegister, $src2$$FloatRegister);
|
||||
%}
|
||||
ins_pipe(pipe_slow);
|
||||
%}
|
||||
|
||||
// ----------------------------------- ExpandBitsV ---------------------------------
|
||||
|
||||
instruct vexpandBits(vReg dst, vReg src1, vReg src2) %{
|
||||
match(Set dst (ExpandBitsV src1 src2));
|
||||
format %{ "vexpandBits $dst, $src1, $src2\t# vector (sve)" %}
|
||||
ins_encode %{
|
||||
BasicType bt = Matcher::vector_element_basic_type(this);
|
||||
Assembler::SIMD_RegVariant size = __ elemType_to_regVariant(bt);
|
||||
__ sve_bdep($dst$$FloatRegister, size,
|
||||
$src1$$FloatRegister, $src2$$FloatRegister);
|
||||
%}
|
||||
ins_pipe(pipe_slow);
|
||||
%}
|
||||
|
@ -210,6 +210,12 @@ source %{
|
||||
return false;
|
||||
}
|
||||
break;
|
||||
case Op_CompressBitsV:
|
||||
case Op_ExpandBitsV:
|
||||
if (UseSVE < 2 || !VM_Version::supports_svebitperm()) {
|
||||
return false;
|
||||
}
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
@ -230,6 +236,8 @@ source %{
|
||||
case Op_MulReductionVF:
|
||||
case Op_MulReductionVI:
|
||||
case Op_MulReductionVL:
|
||||
case Op_CompressBitsV:
|
||||
case Op_ExpandBitsV:
|
||||
return false;
|
||||
// We use Op_LoadVectorMasked to implement the predicated Op_LoadVector.
|
||||
// Hence we turn to check whether Op_LoadVectorMasked is supported. The
|
||||
@ -4950,3 +4958,25 @@ instruct vsignum_gt128b(vReg dst, vReg src, vReg zero, vReg one, vReg tmp, pRegG
|
||||
%}
|
||||
ins_pipe(pipe_slow);
|
||||
%}
|
||||
|
||||
dnl
|
||||
dnl BITPERM($1, $2, $3 )
|
||||
dnl BITPERM(insn_name, op_name, insn)
|
||||
define(`BITPERM', `
|
||||
instruct $1(vReg dst, vReg src1, vReg src2) %{
|
||||
match(Set dst ($2 src1 src2));
|
||||
format %{ "$1 $dst, $src1, $src2\t# vector (sve)" %}
|
||||
ins_encode %{
|
||||
BasicType bt = Matcher::vector_element_basic_type(this);
|
||||
Assembler::SIMD_RegVariant size = __ elemType_to_regVariant(bt);
|
||||
__ $3($dst$$FloatRegister, size,
|
||||
$src1$$FloatRegister, $src2$$FloatRegister);
|
||||
%}
|
||||
ins_pipe(pipe_slow);
|
||||
%}')dnl
|
||||
dnl
|
||||
// ---------------------------------- CompressBitsV --------------------------------
|
||||
BITPERM(vcompressBits, CompressBitsV, sve_bext)
|
||||
|
||||
// ----------------------------------- ExpandBitsV ---------------------------------
|
||||
BITPERM(vexpandBits, ExpandBitsV, sve_bdep)
|
||||
|
@ -4210,7 +4210,7 @@ bool MatchRule::is_vector() const {
|
||||
"SqrtVD","SqrtVF",
|
||||
"AndV" ,"XorV" ,"OrV",
|
||||
"MaxV", "MinV",
|
||||
"CompressV", "ExpandV", "CompressM",
|
||||
"CompressV", "ExpandV", "CompressM", "CompressBitsV", "ExpandBitsV",
|
||||
"AddReductionVI", "AddReductionVL",
|
||||
"AddReductionVF", "AddReductionVD",
|
||||
"MulReductionVI", "MulReductionVL",
|
||||
|
@ -77,6 +77,8 @@ macro(CheckCastPP)
|
||||
macro(ClearArray)
|
||||
macro(CompressBits)
|
||||
macro(ExpandBits)
|
||||
macro(CompressBitsV)
|
||||
macro(ExpandBitsV)
|
||||
macro(ConstraintCast)
|
||||
macro(CMoveD)
|
||||
macro(CMoveVD)
|
||||
|
@ -182,11 +182,9 @@ int VectorNode::opcode(int sopc, BasicType bt) {
|
||||
case Op_ReverseBytesL:
|
||||
return (bt == T_LONG ? Op_ReverseBytesV : 0);
|
||||
case Op_CompressBits:
|
||||
// Not implemented. Returning 0 temporarily
|
||||
return 0;
|
||||
return (bt == T_INT || bt == T_LONG ? Op_CompressBitsV : 0);
|
||||
case Op_ExpandBits:
|
||||
// Not implemented. Returning 0 temporarily
|
||||
return 0;
|
||||
return (bt == T_INT || bt == T_LONG ? Op_ExpandBitsV : 0);
|
||||
case Op_LShiftI:
|
||||
switch (bt) {
|
||||
case T_BOOLEAN:
|
||||
@ -703,6 +701,8 @@ VectorNode* VectorNode::make(int vopc, Node* n1, Node* n2, const TypeVect* vt, b
|
||||
case Op_ExpandV: return new ExpandVNode(n1, n2, vt);
|
||||
case Op_CompressV: return new CompressVNode(n1, n2, vt);
|
||||
case Op_CompressM: assert(n1 == nullptr, ""); return new CompressMNode(n2, vt);
|
||||
case Op_CompressBitsV: return new CompressBitsVNode(n1, n2, vt);
|
||||
case Op_ExpandBitsV: return new ExpandBitsVNode(n1, n2, vt);
|
||||
case Op_CountLeadingZerosV: return new CountLeadingZerosVNode(n1, vt);
|
||||
case Op_CountTrailingZerosV: return new CountTrailingZerosVNode(n1, vt);
|
||||
default:
|
||||
|
@ -1804,4 +1804,18 @@ public:
|
||||
virtual int Opcode() const;
|
||||
};
|
||||
|
||||
class CompressBitsVNode : public VectorNode {
|
||||
public:
|
||||
CompressBitsVNode(Node* in, Node* mask, const TypeVect* vt)
|
||||
: VectorNode(in, mask, vt) {}
|
||||
virtual int Opcode() const;
|
||||
};
|
||||
|
||||
class ExpandBitsVNode : public VectorNode {
|
||||
public:
|
||||
ExpandBitsVNode(Node* in, Node* mask, const TypeVect* vt)
|
||||
: VectorNode(in, mask, vt) {}
|
||||
virtual int Opcode() const;
|
||||
};
|
||||
|
||||
#endif // SHARE_OPTO_VECTORNODE_HPP
|
||||
|
@ -1764,6 +1764,8 @@
|
||||
declare_c2_type(CompressVNode, VectorNode) \
|
||||
declare_c2_type(CompressMNode, VectorNode) \
|
||||
declare_c2_type(ExpandVNode, VectorNode) \
|
||||
declare_c2_type(CompressBitsVNode, VectorNode) \
|
||||
declare_c2_type(ExpandBitsVNode, VectorNode) \
|
||||
declare_c2_type(MulReductionVDNode, ReductionNode) \
|
||||
declare_c2_type(DivVFNode, VectorNode) \
|
||||
declare_c2_type(DivVDNode, VectorNode) \
|
||||
|
@ -1408,6 +1408,16 @@ public class IRNode {
|
||||
machOnlyNameRegex(XOR3_SVE, "veor3_sve");
|
||||
}
|
||||
|
||||
public static final String COMPRESS_BITSV = PREFIX + "COMPRESS_BITSV" + POSTFIX;
|
||||
static {
|
||||
beforeMatchingNameRegex(COMPRESS_BITSV, "CompressBitsV");
|
||||
}
|
||||
|
||||
public static final String EXPAND_BITSV = PREFIX + "EXPAND_BITSV" + POSTFIX;
|
||||
static {
|
||||
beforeMatchingNameRegex(EXPAND_BITSV, "ExpandBitsV");
|
||||
}
|
||||
|
||||
/*
|
||||
* Utility methods to set up IR_NODE_MAPPINGS.
|
||||
*/
|
||||
|
@ -0,0 +1,158 @@
|
||||
/*
|
||||
* Copyright (c) 2023, Arm Limited. All rights reserved.
|
||||
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
||||
*
|
||||
* This code is free software; you can redistribute it and/or modify it
|
||||
* under the terms of the GNU General Public License version 2 only, as
|
||||
* published by the Free Software Foundation.
|
||||
*
|
||||
* This code is distributed in the hope that it will be useful, but WITHOUT
|
||||
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
||||
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
|
||||
* version 2 for more details (a copy is included in the LICENSE file that
|
||||
* accompanied this code).
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License version
|
||||
* 2 along with this work; if not, write to the Free Software Foundation,
|
||||
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
*
|
||||
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
|
||||
* or visit www.oracle.com if you need additional information or have any
|
||||
* questions.
|
||||
*/
|
||||
|
||||
package compiler.vectorapi;
|
||||
|
||||
import compiler.lib.ir_framework.*;
|
||||
|
||||
import java.util.Random;
|
||||
|
||||
import jdk.incubator.vector.IntVector;
|
||||
import jdk.incubator.vector.LongVector;
|
||||
import jdk.incubator.vector.VectorOperators;
|
||||
import jdk.incubator.vector.VectorSpecies;
|
||||
|
||||
import jdk.test.lib.Asserts;
|
||||
import jdk.test.lib.Utils;
|
||||
|
||||
/**
|
||||
* @test
|
||||
* @bug 8301012
|
||||
* @library /test/lib /
|
||||
* @requires os.arch == "aarch64" & vm.cpu.features ~= ".*sve2.*" & vm.cpu.features ~= ".*svebitperm.*"
|
||||
* @summary [vectorapi]: Intrinsify CompressBitsV/ExpandBitsV and add the AArch64 SVE backend implementation
|
||||
* @modules jdk.incubator.vector
|
||||
* @run driver compiler.vectorapi.TestVectorCompressExpandBits
|
||||
*/
|
||||
|
||||
public class TestVectorCompressExpandBits {
|
||||
private static final VectorSpecies<Integer> I_SPECIES = IntVector.SPECIES_PREFERRED;
|
||||
private static final VectorSpecies<Long> L_SPECIES = LongVector.SPECIES_PREFERRED;
|
||||
|
||||
private static int LENGTH = 1024;
|
||||
private static final Random RD = Utils.getRandomInstance();
|
||||
|
||||
private static int[] ia;
|
||||
private static int[] ib;
|
||||
private static int[] ir;
|
||||
private static long[] la;
|
||||
private static long[] lb;
|
||||
private static long[] lr;
|
||||
|
||||
static {
|
||||
ia = new int[LENGTH];
|
||||
ib = new int[LENGTH];
|
||||
ir = new int[LENGTH];
|
||||
la = new long[LENGTH];
|
||||
lb = new long[LENGTH];
|
||||
lr = new long[LENGTH];
|
||||
|
||||
for (int i = 0; i < LENGTH; i++) {
|
||||
ia[i] = RD.nextInt(25);
|
||||
ib[i] = RD.nextInt(25);
|
||||
la[i] = RD.nextLong(25);
|
||||
lb[i] = RD.nextLong(25);
|
||||
}
|
||||
}
|
||||
|
||||
// Test for vectorized Integer.compress operation in SVE2
|
||||
@Test
|
||||
@IR(counts = {IRNode.COMPRESS_BITSV, "> 0"})
|
||||
public static void testIntCompress() {
|
||||
for (int i = 0; i < LENGTH; i += I_SPECIES.length()) {
|
||||
IntVector av = IntVector.fromArray(I_SPECIES, ia, i);
|
||||
IntVector bv = IntVector.fromArray(I_SPECIES, ib, i);
|
||||
av.lanewise(VectorOperators.COMPRESS_BITS, bv).intoArray(ir, i);
|
||||
}
|
||||
}
|
||||
|
||||
@Run(test = "testIntCompress")
|
||||
public static void testIntCompress_runner() {
|
||||
testIntCompress();
|
||||
for (int i = 0; i < LENGTH; i++) {
|
||||
Asserts.assertEquals(Integer.compress(ia[i], ib[i]), ir[i]);
|
||||
}
|
||||
}
|
||||
|
||||
// Test for vectorized Integer.expand operation in SVE2
|
||||
@Test
|
||||
@IR(counts = {IRNode.EXPAND_BITSV, "> 0"})
|
||||
public static void testIntExpand() {
|
||||
for (int i = 0; i < LENGTH; i += I_SPECIES.length()) {
|
||||
IntVector av = IntVector.fromArray(I_SPECIES, ia, i);
|
||||
IntVector bv = IntVector.fromArray(I_SPECIES, ib, i);
|
||||
av.lanewise(VectorOperators.EXPAND_BITS, bv).intoArray(ir, i);
|
||||
}
|
||||
}
|
||||
|
||||
@Run(test = "testIntExpand")
|
||||
public static void testIntExpand_runner() {
|
||||
testIntExpand();
|
||||
for (int i = 0; i < LENGTH; i++) {
|
||||
Asserts.assertEquals(Integer.expand(ia[i], ib[i]), ir[i]);
|
||||
}
|
||||
}
|
||||
|
||||
// Test for vectorized Long.compress operation in SVE2
|
||||
@Test
|
||||
@IR(counts = {IRNode.COMPRESS_BITSV, "> 0"})
|
||||
public static void testLongCompress() {
|
||||
for (int i = 0; i < LENGTH; i += L_SPECIES.length()) {
|
||||
LongVector av = LongVector.fromArray(L_SPECIES, la, i);
|
||||
LongVector bv = LongVector.fromArray(L_SPECIES, lb, i);
|
||||
av.lanewise(VectorOperators.COMPRESS_BITS, bv).intoArray(lr, i);
|
||||
}
|
||||
}
|
||||
|
||||
@Run(test = "testLongCompress")
|
||||
public static void testLongCompress_runner() {
|
||||
testLongCompress();
|
||||
for (int i = 0; i < LENGTH; i++) {
|
||||
Asserts.assertEquals(Long.compress(la[i], lb[i]), lr[i]);
|
||||
}
|
||||
}
|
||||
|
||||
// Test for vectorized Long.expand operation in SVE2
|
||||
@Test
|
||||
@IR(counts = {IRNode.EXPAND_BITSV, "> 0"})
|
||||
public static void testLongExpand() {
|
||||
for (int i = 0; i < LENGTH; i += L_SPECIES.length()) {
|
||||
LongVector av = LongVector.fromArray(L_SPECIES, la, i);
|
||||
LongVector bv = LongVector.fromArray(L_SPECIES, lb, i);
|
||||
av.lanewise(VectorOperators.EXPAND_BITS, bv).intoArray(lr, i);
|
||||
}
|
||||
}
|
||||
|
||||
@Run(test = "testLongExpand")
|
||||
public static void testLongExpand_runner() {
|
||||
testLongExpand();
|
||||
for (int i = 0; i < LENGTH; i++) {
|
||||
Asserts.assertEquals(Long.expand(la[i], lb[i]), lr[i]);
|
||||
}
|
||||
}
|
||||
|
||||
public static void main(String[] args) {
|
||||
TestFramework.runWithFlags("--add-modules=jdk.incubator.vector",
|
||||
"-XX:UseSVE=2");
|
||||
}
|
||||
}
|
Loading…
x
Reference in New Issue
Block a user