8280510: AArch64: Vectorize operations with loop induction variable
Reviewed-by: adinn, thartmann
This commit is contained in:
parent
36bf6fbe08
commit
ea83b4455b
src/hotspot
cpu/aarch64
share
test/micro/org/openjdk/bench/vm/compiler
@ -2465,6 +2465,7 @@ const bool Matcher::match_rule_supported_vector(int opcode, int vlen, BasicType
|
||||
}
|
||||
break;
|
||||
case Op_MulVL:
|
||||
case Op_PopulateIndex:
|
||||
return false;
|
||||
case Op_VectorLoadShuffle:
|
||||
case Op_VectorRearrange:
|
||||
|
@ -5380,6 +5380,21 @@ instruct loadconB(vReg dst, immI0 src) %{
|
||||
ins_pipe(pipe_slow);
|
||||
%}
|
||||
|
||||
// -------------------------- Populate Index to a Vector --------------------------
|
||||
|
||||
instruct populateindex(vReg dst, iRegIorL2I src1, immI src2) %{
|
||||
predicate(UseSVE > 0);
|
||||
match(Set dst (PopulateIndex src1 src2));
|
||||
ins_cost(SVE_COST);
|
||||
format %{ "sve_index $dst, $src1, $src2\t # populate index (sve)" %}
|
||||
ins_encode %{
|
||||
BasicType bt = Matcher::vector_element_basic_type(this);
|
||||
__ sve_index(as_FloatRegister($dst$$reg), __ elemType_to_regVariant(bt),
|
||||
as_Register($src1$$reg), $src2$$constant);
|
||||
%}
|
||||
ins_pipe(pipe_slow);
|
||||
%}
|
||||
|
||||
// Intrisics for String.indexOf(char)
|
||||
|
||||
|
||||
|
@ -2962,6 +2962,21 @@ instruct loadconB(vReg dst, immI0 src) %{
|
||||
ins_pipe(pipe_slow);
|
||||
%}
|
||||
|
||||
// -------------------------- Populate Index to a Vector --------------------------
|
||||
|
||||
instruct populateindex(vReg dst, iRegIorL2I src1, immI src2) %{
|
||||
predicate(UseSVE > 0);
|
||||
match(Set dst (PopulateIndex src1 src2));
|
||||
ins_cost(SVE_COST);
|
||||
format %{ "sve_index $dst, $src1, $src2\t # populate index (sve)" %}
|
||||
ins_encode %{
|
||||
BasicType bt = Matcher::vector_element_basic_type(this);
|
||||
__ sve_index(as_FloatRegister($dst$$reg), __ elemType_to_regVariant(bt),
|
||||
as_Register($src1$$reg), $src2$$constant);
|
||||
%}
|
||||
ins_pipe(pipe_slow);
|
||||
%}
|
||||
|
||||
// Intrisics for String.indexOf(char)
|
||||
|
||||
dnl
|
||||
|
@ -3793,9 +3793,19 @@ void sve_fcm(Condition cond, PRegister Pd, SIMD_RegVariant T,
|
||||
INSN(sve_lastb, 0b1);
|
||||
#undef INSN
|
||||
|
||||
// SVE Create index starting from general-purpose register and incremented by immediate
|
||||
void sve_index(FloatRegister Zd, SIMD_RegVariant T, Register Rn, int imm) {
|
||||
starti;
|
||||
assert(T != Q, "invalid size");
|
||||
f(0b00000100, 31, 24), f(T, 23, 22), f(0b1, 21);
|
||||
sf(imm, 20, 16), f(0b010001, 15, 10);
|
||||
rf(Rn, 5), rf(Zd, 0);
|
||||
}
|
||||
|
||||
// SVE create index starting from and incremented by immediate
|
||||
void sve_index(FloatRegister Zd, SIMD_RegVariant T, int imm1, int imm2) {
|
||||
starti;
|
||||
assert(T != Q, "invalid size");
|
||||
f(0b00000100, 31, 24), f(T, 23, 22), f(0b1, 21);
|
||||
sf(imm2, 20, 16), f(0b010000, 15, 10);
|
||||
sf(imm1, 9, 5), rf(Zd, 0);
|
||||
|
@ -4103,6 +4103,7 @@ int MatchRule::is_expensive() const {
|
||||
strcmp(opType,"ReplicateL")==0 ||
|
||||
strcmp(opType,"ReplicateF")==0 ||
|
||||
strcmp(opType,"ReplicateD")==0 ||
|
||||
strcmp(opType,"PopulateIndex")==0 ||
|
||||
strcmp(opType,"AddReductionVI")==0 ||
|
||||
strcmp(opType,"AddReductionVL")==0 ||
|
||||
strcmp(opType,"AddReductionVF")==0 ||
|
||||
@ -4227,7 +4228,7 @@ bool MatchRule::is_vector() const {
|
||||
"LShiftVB","LShiftVS","LShiftVI","LShiftVL",
|
||||
"RShiftVB","RShiftVS","RShiftVI","RShiftVL",
|
||||
"URShiftVB","URShiftVS","URShiftVI","URShiftVL",
|
||||
"ReplicateB","ReplicateS","ReplicateI","ReplicateL","ReplicateF","ReplicateD",
|
||||
"ReplicateB","ReplicateS","ReplicateI","ReplicateL","ReplicateF","ReplicateD","PopulateIndex",
|
||||
"RoundDoubleModeV","RotateLeftV" , "RotateRightV", "LoadVector","StoreVector",
|
||||
"LoadVectorGather", "StoreVectorScatter", "LoadVectorGatherMasked", "StoreVectorScatterMasked",
|
||||
"VectorTest", "VectorLoadMask", "VectorStoreMask", "VectorBlend", "VectorInsert",
|
||||
|
@ -284,6 +284,7 @@ macro(PopCountI)
|
||||
macro(PopCountL)
|
||||
macro(PopCountVI)
|
||||
macro(PopCountVL)
|
||||
macro(PopulateIndex)
|
||||
macro(PrefetchAllocation)
|
||||
macro(Proj)
|
||||
macro(RShiftI)
|
||||
|
@ -149,6 +149,7 @@ class PhaseTransform;
|
||||
class PhaseValues;
|
||||
class PhiNode;
|
||||
class Pipeline;
|
||||
class PopulateIndexNode;
|
||||
class ProjNode;
|
||||
class RangeCheckNode;
|
||||
class RegMask;
|
||||
|
@ -1312,7 +1312,16 @@ bool SuperWord::have_similar_inputs(Node* s1, Node* s2) {
|
||||
// assert(independent(s1, s2) == true, "check independent");
|
||||
if (s1->req() > 1 && !s1->is_Store() && !s1->is_Load()) {
|
||||
for (uint i = 1; i < s1->req(); i++) {
|
||||
if (s1->in(i)->Opcode() != s2->in(i)->Opcode()) return false;
|
||||
Node* s1_in = s1->in(i);
|
||||
Node* s2_in = s2->in(i);
|
||||
if (s1_in->is_Phi() && s2_in->is_Add() && s2_in->in(1) == s1_in) {
|
||||
// Special handling for expressions with loop iv, like "b[i] = a[i] * i".
|
||||
// In this case, one node has an input from the tripcount iv and another
|
||||
// node has an input from iv plus an offset.
|
||||
if (!s1_in->as_Phi()->is_tripcount(T_INT)) return false;
|
||||
} else {
|
||||
if (s1_in->Opcode() != s2_in->Opcode()) return false;
|
||||
}
|
||||
}
|
||||
}
|
||||
return true;
|
||||
@ -2837,6 +2846,23 @@ Node* SuperWord::vector_opd(Node_List* p, int opd_idx) {
|
||||
vlen = cl->slp_max_unroll();
|
||||
}
|
||||
|
||||
// Insert index population operation
|
||||
if (opd == iv()) {
|
||||
BasicType p0_bt = velt_basic_type(p0);
|
||||
BasicType iv_bt = is_subword_type(p0_bt) ? p0_bt : T_INT;
|
||||
const TypeVect* vt = TypeVect::make(iv_bt, vlen);
|
||||
Node* vn = new PopulateIndexNode(iv(), _igvn.intcon(1), vt);
|
||||
#ifdef ASSERT
|
||||
if (TraceNewVectors) {
|
||||
tty->print("new Vector node: ");
|
||||
vn->dump();
|
||||
}
|
||||
#endif
|
||||
_igvn.register_new_node_with_optimizer(vn);
|
||||
_phase->set_ctrl(vn, _phase->get_ctrl(opd));
|
||||
return vn;
|
||||
}
|
||||
|
||||
if (same_inputs(p, opd_idx)) {
|
||||
if (opd->is_Vector() || opd->is_LoadVector()) {
|
||||
assert(((opd_idx != 2) || !VectorNode::is_shift(p0)), "shift's count can't be vector");
|
||||
@ -2847,7 +2873,6 @@ Node* SuperWord::vector_opd(Node_List* p, int opd_idx) {
|
||||
return opd; // input is matching vector
|
||||
}
|
||||
if ((opd_idx == 2) && VectorNode::is_shift(p0)) {
|
||||
Compile* C = _phase->C;
|
||||
Node* cnt = opd;
|
||||
// Vector instructions do not mask shift count, do it here.
|
||||
juint mask = (p0->bottom_type() == TypeInt::INT) ? (BitsPerInt - 1) : (BitsPerLong - 1);
|
||||
@ -3008,10 +3033,25 @@ bool SuperWord::is_vector_use(Node* use, int u_idx) {
|
||||
Node* def = use->in(u_idx);
|
||||
Node_List* d_pk = my_pack(def);
|
||||
if (d_pk == NULL) {
|
||||
// check for scalar promotion
|
||||
Node* n = u_pk->at(0)->in(u_idx);
|
||||
for (uint i = 1; i < u_pk->size(); i++) {
|
||||
if (u_pk->at(i)->in(u_idx) != n) return false;
|
||||
if (n == iv()) {
|
||||
// check for index population
|
||||
BasicType bt = velt_basic_type(use);
|
||||
if (!VectorNode::is_populate_index_supported(bt)) return false;
|
||||
for (uint i = 1; i < u_pk->size(); i++) {
|
||||
// We can create a vector filled with iv indices if all other nodes
|
||||
// in use pack have inputs of iv plus node index.
|
||||
Node* use_in = u_pk->at(i)->in(u_idx);
|
||||
if (!use_in->is_Add() || use_in->in(1) != n) return false;
|
||||
const TypeInt* offset_t = use_in->in(2)->bottom_type()->is_int();
|
||||
if (offset_t == NULL || !offset_t->is_con() ||
|
||||
offset_t->get_con() != (jint) i) return false;
|
||||
}
|
||||
} else {
|
||||
// check for scalar promotion
|
||||
for (uint i = 1; i < u_pk->size(); i++) {
|
||||
if (u_pk->at(i)->in(u_idx) != n) return false;
|
||||
}
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
@ -395,6 +395,11 @@ bool VectorNode::is_vector_integral_negate_supported(int opc, uint vlen, BasicTy
|
||||
return false;
|
||||
}
|
||||
|
||||
bool VectorNode::is_populate_index_supported(BasicType bt) {
|
||||
int vlen = Matcher::max_vector_size(bt);
|
||||
return Matcher::match_rule_supported_vector(Op_PopulateIndex, vlen, bt);
|
||||
}
|
||||
|
||||
bool VectorNode::is_shift_opcode(int opc) {
|
||||
switch (opc) {
|
||||
case Op_LShiftI:
|
||||
|
@ -98,6 +98,7 @@ class VectorNode : public TypeNode {
|
||||
static bool is_scalar_rotate(Node* n);
|
||||
static bool is_vector_rotate_supported(int opc, uint vlen, BasicType bt);
|
||||
static bool is_vector_integral_negate_supported(int opc, uint vlen, BasicType bt, bool use_predicate);
|
||||
static bool is_populate_index_supported(BasicType bt);
|
||||
static bool is_invariant_vector(Node* n);
|
||||
static bool is_all_ones_vector(Node* n);
|
||||
static bool is_vector_bitwise_not_pattern(Node* n);
|
||||
@ -1104,6 +1105,13 @@ class ReplicateDNode : public VectorNode {
|
||||
virtual int Opcode() const;
|
||||
};
|
||||
|
||||
//======================Populate_Indices_into_a_Vector=========================
|
||||
class PopulateIndexNode : public VectorNode {
|
||||
public:
|
||||
PopulateIndexNode(Node* in1, Node* in2, const TypeVect* vt) : VectorNode(in1, in2, vt) {}
|
||||
virtual int Opcode() const;
|
||||
};
|
||||
|
||||
//========================Pack_Scalars_into_a_Vector===========================
|
||||
|
||||
//------------------------------PackNode---------------------------------------
|
||||
|
@ -1811,6 +1811,7 @@
|
||||
declare_c2_type(ReplicateLNode, VectorNode) \
|
||||
declare_c2_type(ReplicateFNode, VectorNode) \
|
||||
declare_c2_type(ReplicateDNode, VectorNode) \
|
||||
declare_c2_type(PopulateIndexNode, VectorNode) \
|
||||
declare_c2_type(PackNode, VectorNode) \
|
||||
declare_c2_type(PackBNode, PackNode) \
|
||||
declare_c2_type(PackSNode, PackNode) \
|
||||
|
73
test/micro/org/openjdk/bench/vm/compiler/IndexVector.java
Normal file
73
test/micro/org/openjdk/bench/vm/compiler/IndexVector.java
Normal file
@ -0,0 +1,73 @@
|
||||
/*
|
||||
* Copyright (c) 2022, Arm Limited. All rights reserved.
|
||||
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
||||
*
|
||||
* This code is free software; you can redistribute it and/or modify it
|
||||
* under the terms of the GNU General Public License version 2 only, as
|
||||
* published by the Free Software Foundation.
|
||||
*
|
||||
* This code is distributed in the hope that it will be useful, but WITHOUT
|
||||
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
||||
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
|
||||
* version 2 for more details (a copy is included in the LICENSE file that
|
||||
* accompanied this code).
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License version
|
||||
* 2 along with this work; if not, write to the Free Software Foundation,
|
||||
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
*
|
||||
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
|
||||
* or visit www.oracle.com if you need additional information or have any
|
||||
* questions.
|
||||
*/
|
||||
|
||||
package org.openjdk.bench.vm.compiler;
|
||||
|
||||
import java.util.Random;
|
||||
|
||||
import org.openjdk.jmh.annotations.*;
|
||||
|
||||
@State(Scope.Benchmark)
|
||||
public class IndexVector {
|
||||
@Param({"65536"})
|
||||
private int count;
|
||||
|
||||
private int[] idx;
|
||||
private int[] src;
|
||||
private int[] dst;
|
||||
private float[] f;
|
||||
|
||||
@Setup
|
||||
public void init() {
|
||||
idx = new int[count];
|
||||
src = new int[count];
|
||||
dst = new int[count];
|
||||
f = new float[count];
|
||||
Random ran = new Random(0);
|
||||
for (int i = 0; i < count; i++) {
|
||||
src[i] = ran.nextInt();
|
||||
}
|
||||
}
|
||||
|
||||
@Benchmark
|
||||
public void indexArrayFill() {
|
||||
for (int i = 0; i < count; i++) {
|
||||
idx[i] = i;
|
||||
}
|
||||
}
|
||||
|
||||
@Benchmark
|
||||
public void exprWithIndex1() {
|
||||
for (int i = 0; i < count; i++) {
|
||||
dst[i] = src[i] * (i & 7);
|
||||
}
|
||||
}
|
||||
|
||||
@Benchmark
|
||||
public void exprWithIndex2() {
|
||||
for (int i = 0; i < count; i++) {
|
||||
f[i] = i * i + 100;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
Loading…
x
Reference in New Issue
Block a user