8280510: AArch64: Vectorize operations with loop induction variable

Reviewed-by: adinn, thartmann
This commit is contained in:
Pengfei Li 2022-04-28 14:13:24 +00:00
parent 36bf6fbe08
commit ea83b4455b
12 changed files with 177 additions and 6 deletions

@ -2465,6 +2465,7 @@ const bool Matcher::match_rule_supported_vector(int opcode, int vlen, BasicType
}
break;
case Op_MulVL:
case Op_PopulateIndex:
return false;
case Op_VectorLoadShuffle:
case Op_VectorRearrange:

@ -5380,6 +5380,21 @@ instruct loadconB(vReg dst, immI0 src) %{
ins_pipe(pipe_slow);
%}
// -------------------------- Populate Index to a Vector --------------------------
instruct populateindex(vReg dst, iRegIorL2I src1, immI src2) %{
predicate(UseSVE > 0);
match(Set dst (PopulateIndex src1 src2));
ins_cost(SVE_COST);
format %{ "sve_index $dst, $src1, $src2\t # populate index (sve)" %}
ins_encode %{
BasicType bt = Matcher::vector_element_basic_type(this);
__ sve_index(as_FloatRegister($dst$$reg), __ elemType_to_regVariant(bt),
as_Register($src1$$reg), $src2$$constant);
%}
ins_pipe(pipe_slow);
%}
// Intrisics for String.indexOf(char)

@ -2962,6 +2962,21 @@ instruct loadconB(vReg dst, immI0 src) %{
ins_pipe(pipe_slow);
%}
// -------------------------- Populate Index to a Vector --------------------------
instruct populateindex(vReg dst, iRegIorL2I src1, immI src2) %{
predicate(UseSVE > 0);
match(Set dst (PopulateIndex src1 src2));
ins_cost(SVE_COST);
format %{ "sve_index $dst, $src1, $src2\t # populate index (sve)" %}
ins_encode %{
BasicType bt = Matcher::vector_element_basic_type(this);
__ sve_index(as_FloatRegister($dst$$reg), __ elemType_to_regVariant(bt),
as_Register($src1$$reg), $src2$$constant);
%}
ins_pipe(pipe_slow);
%}
// Intrisics for String.indexOf(char)
dnl

@ -3793,9 +3793,19 @@ void sve_fcm(Condition cond, PRegister Pd, SIMD_RegVariant T,
INSN(sve_lastb, 0b1);
#undef INSN
// SVE Create index starting from general-purpose register and incremented by immediate
void sve_index(FloatRegister Zd, SIMD_RegVariant T, Register Rn, int imm) {
starti;
assert(T != Q, "invalid size");
f(0b00000100, 31, 24), f(T, 23, 22), f(0b1, 21);
sf(imm, 20, 16), f(0b010001, 15, 10);
rf(Rn, 5), rf(Zd, 0);
}
// SVE create index starting from and incremented by immediate
void sve_index(FloatRegister Zd, SIMD_RegVariant T, int imm1, int imm2) {
starti;
assert(T != Q, "invalid size");
f(0b00000100, 31, 24), f(T, 23, 22), f(0b1, 21);
sf(imm2, 20, 16), f(0b010000, 15, 10);
sf(imm1, 9, 5), rf(Zd, 0);

@ -4103,6 +4103,7 @@ int MatchRule::is_expensive() const {
strcmp(opType,"ReplicateL")==0 ||
strcmp(opType,"ReplicateF")==0 ||
strcmp(opType,"ReplicateD")==0 ||
strcmp(opType,"PopulateIndex")==0 ||
strcmp(opType,"AddReductionVI")==0 ||
strcmp(opType,"AddReductionVL")==0 ||
strcmp(opType,"AddReductionVF")==0 ||
@ -4227,7 +4228,7 @@ bool MatchRule::is_vector() const {
"LShiftVB","LShiftVS","LShiftVI","LShiftVL",
"RShiftVB","RShiftVS","RShiftVI","RShiftVL",
"URShiftVB","URShiftVS","URShiftVI","URShiftVL",
"ReplicateB","ReplicateS","ReplicateI","ReplicateL","ReplicateF","ReplicateD",
"ReplicateB","ReplicateS","ReplicateI","ReplicateL","ReplicateF","ReplicateD","PopulateIndex",
"RoundDoubleModeV","RotateLeftV" , "RotateRightV", "LoadVector","StoreVector",
"LoadVectorGather", "StoreVectorScatter", "LoadVectorGatherMasked", "StoreVectorScatterMasked",
"VectorTest", "VectorLoadMask", "VectorStoreMask", "VectorBlend", "VectorInsert",

@ -284,6 +284,7 @@ macro(PopCountI)
macro(PopCountL)
macro(PopCountVI)
macro(PopCountVL)
macro(PopulateIndex)
macro(PrefetchAllocation)
macro(Proj)
macro(RShiftI)

@ -149,6 +149,7 @@ class PhaseTransform;
class PhaseValues;
class PhiNode;
class Pipeline;
class PopulateIndexNode;
class ProjNode;
class RangeCheckNode;
class RegMask;

@ -1312,7 +1312,16 @@ bool SuperWord::have_similar_inputs(Node* s1, Node* s2) {
// assert(independent(s1, s2) == true, "check independent");
if (s1->req() > 1 && !s1->is_Store() && !s1->is_Load()) {
for (uint i = 1; i < s1->req(); i++) {
if (s1->in(i)->Opcode() != s2->in(i)->Opcode()) return false;
Node* s1_in = s1->in(i);
Node* s2_in = s2->in(i);
if (s1_in->is_Phi() && s2_in->is_Add() && s2_in->in(1) == s1_in) {
// Special handling for expressions with loop iv, like "b[i] = a[i] * i".
// In this case, one node has an input from the tripcount iv and another
// node has an input from iv plus an offset.
if (!s1_in->as_Phi()->is_tripcount(T_INT)) return false;
} else {
if (s1_in->Opcode() != s2_in->Opcode()) return false;
}
}
}
return true;
@ -2837,6 +2846,23 @@ Node* SuperWord::vector_opd(Node_List* p, int opd_idx) {
vlen = cl->slp_max_unroll();
}
// Insert index population operation
if (opd == iv()) {
BasicType p0_bt = velt_basic_type(p0);
BasicType iv_bt = is_subword_type(p0_bt) ? p0_bt : T_INT;
const TypeVect* vt = TypeVect::make(iv_bt, vlen);
Node* vn = new PopulateIndexNode(iv(), _igvn.intcon(1), vt);
#ifdef ASSERT
if (TraceNewVectors) {
tty->print("new Vector node: ");
vn->dump();
}
#endif
_igvn.register_new_node_with_optimizer(vn);
_phase->set_ctrl(vn, _phase->get_ctrl(opd));
return vn;
}
if (same_inputs(p, opd_idx)) {
if (opd->is_Vector() || opd->is_LoadVector()) {
assert(((opd_idx != 2) || !VectorNode::is_shift(p0)), "shift's count can't be vector");
@ -2847,7 +2873,6 @@ Node* SuperWord::vector_opd(Node_List* p, int opd_idx) {
return opd; // input is matching vector
}
if ((opd_idx == 2) && VectorNode::is_shift(p0)) {
Compile* C = _phase->C;
Node* cnt = opd;
// Vector instructions do not mask shift count, do it here.
juint mask = (p0->bottom_type() == TypeInt::INT) ? (BitsPerInt - 1) : (BitsPerLong - 1);
@ -3008,10 +3033,25 @@ bool SuperWord::is_vector_use(Node* use, int u_idx) {
Node* def = use->in(u_idx);
Node_List* d_pk = my_pack(def);
if (d_pk == NULL) {
// check for scalar promotion
Node* n = u_pk->at(0)->in(u_idx);
for (uint i = 1; i < u_pk->size(); i++) {
if (u_pk->at(i)->in(u_idx) != n) return false;
if (n == iv()) {
// check for index population
BasicType bt = velt_basic_type(use);
if (!VectorNode::is_populate_index_supported(bt)) return false;
for (uint i = 1; i < u_pk->size(); i++) {
// We can create a vector filled with iv indices if all other nodes
// in use pack have inputs of iv plus node index.
Node* use_in = u_pk->at(i)->in(u_idx);
if (!use_in->is_Add() || use_in->in(1) != n) return false;
const TypeInt* offset_t = use_in->in(2)->bottom_type()->is_int();
if (offset_t == NULL || !offset_t->is_con() ||
offset_t->get_con() != (jint) i) return false;
}
} else {
// check for scalar promotion
for (uint i = 1; i < u_pk->size(); i++) {
if (u_pk->at(i)->in(u_idx) != n) return false;
}
}
return true;
}

@ -395,6 +395,11 @@ bool VectorNode::is_vector_integral_negate_supported(int opc, uint vlen, BasicTy
return false;
}
bool VectorNode::is_populate_index_supported(BasicType bt) {
int vlen = Matcher::max_vector_size(bt);
return Matcher::match_rule_supported_vector(Op_PopulateIndex, vlen, bt);
}
bool VectorNode::is_shift_opcode(int opc) {
switch (opc) {
case Op_LShiftI:

@ -98,6 +98,7 @@ class VectorNode : public TypeNode {
static bool is_scalar_rotate(Node* n);
static bool is_vector_rotate_supported(int opc, uint vlen, BasicType bt);
static bool is_vector_integral_negate_supported(int opc, uint vlen, BasicType bt, bool use_predicate);
static bool is_populate_index_supported(BasicType bt);
static bool is_invariant_vector(Node* n);
static bool is_all_ones_vector(Node* n);
static bool is_vector_bitwise_not_pattern(Node* n);
@ -1104,6 +1105,13 @@ class ReplicateDNode : public VectorNode {
virtual int Opcode() const;
};
//======================Populate_Indices_into_a_Vector=========================
class PopulateIndexNode : public VectorNode {
public:
PopulateIndexNode(Node* in1, Node* in2, const TypeVect* vt) : VectorNode(in1, in2, vt) {}
virtual int Opcode() const;
};
//========================Pack_Scalars_into_a_Vector===========================
//------------------------------PackNode---------------------------------------

@ -1811,6 +1811,7 @@
declare_c2_type(ReplicateLNode, VectorNode) \
declare_c2_type(ReplicateFNode, VectorNode) \
declare_c2_type(ReplicateDNode, VectorNode) \
declare_c2_type(PopulateIndexNode, VectorNode) \
declare_c2_type(PackNode, VectorNode) \
declare_c2_type(PackBNode, PackNode) \
declare_c2_type(PackSNode, PackNode) \

@ -0,0 +1,73 @@
/*
* Copyright (c) 2022, Arm Limited. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License version 2 only, as
* published by the Free Software Foundation.
*
* This code is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
* version 2 for more details (a copy is included in the LICENSE file that
* accompanied this code).
*
* You should have received a copy of the GNU General Public License version
* 2 along with this work; if not, write to the Free Software Foundation,
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
*
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
* or visit www.oracle.com if you need additional information or have any
* questions.
*/
package org.openjdk.bench.vm.compiler;
import java.util.Random;
import org.openjdk.jmh.annotations.*;
@State(Scope.Benchmark)
public class IndexVector {
@Param({"65536"})
private int count;
private int[] idx;
private int[] src;
private int[] dst;
private float[] f;
@Setup
public void init() {
idx = new int[count];
src = new int[count];
dst = new int[count];
f = new float[count];
Random ran = new Random(0);
for (int i = 0; i < count; i++) {
src[i] = ran.nextInt();
}
}
@Benchmark
public void indexArrayFill() {
for (int i = 0; i < count; i++) {
idx[i] = i;
}
}
@Benchmark
public void exprWithIndex1() {
for (int i = 0; i < count; i++) {
dst[i] = src[i] * (i & 7);
}
}
@Benchmark
public void exprWithIndex2() {
for (int i = 0; i < count; i++) {
f[i] = i * i + 100;
}
}
}