8255949: AArch64: Add support for vectorized shift right and accumulate
Reviewed-by: aph
This commit is contained in:
parent
1332ba3c3c
commit
f71f9dc93a
@ -18922,6 +18922,216 @@ instruct vsrl2L_imm(vecX dst, vecX src, immI shift) %{
|
||||
ins_pipe(vshift128_imm);
|
||||
%}
|
||||
|
||||
instruct vsraa8B_imm(vecD dst, vecD src, immI shift) %{
|
||||
predicate(n->as_Vector()->length() == 8);
|
||||
match(Set dst (AddVB dst (RShiftVB src (RShiftCntV shift))));
|
||||
ins_cost(INSN_COST);
|
||||
format %{ "ssra $dst, $src, $shift\t# vector (8B)" %}
|
||||
ins_encode %{
|
||||
int sh = (int)$shift$$constant;
|
||||
if (sh >= 8) sh = 7;
|
||||
__ ssra(as_FloatRegister($dst$$reg), __ T8B,
|
||||
as_FloatRegister($src$$reg), sh);
|
||||
%}
|
||||
ins_pipe(vshift64_imm);
|
||||
%}
|
||||
|
||||
instruct vsraa16B_imm(vecX dst, vecX src, immI shift) %{
|
||||
predicate(n->as_Vector()->length() == 16);
|
||||
match(Set dst (AddVB dst (RShiftVB src (RShiftCntV shift))));
|
||||
ins_cost(INSN_COST);
|
||||
format %{ "ssra $dst, $src, $shift\t# vector (16B)" %}
|
||||
ins_encode %{
|
||||
int sh = (int)$shift$$constant;
|
||||
if (sh >= 8) sh = 7;
|
||||
__ ssra(as_FloatRegister($dst$$reg), __ T16B,
|
||||
as_FloatRegister($src$$reg), sh);
|
||||
%}
|
||||
ins_pipe(vshift128_imm);
|
||||
%}
|
||||
|
||||
instruct vsraa4S_imm(vecD dst, vecD src, immI shift) %{
|
||||
predicate(n->as_Vector()->length() == 4);
|
||||
match(Set dst (AddVS dst (RShiftVS src (RShiftCntV shift))));
|
||||
ins_cost(INSN_COST);
|
||||
format %{ "ssra $dst, $src, $shift\t# vector (4H)" %}
|
||||
ins_encode %{
|
||||
int sh = (int)$shift$$constant;
|
||||
if (sh >= 16) sh = 15;
|
||||
__ ssra(as_FloatRegister($dst$$reg), __ T4H,
|
||||
as_FloatRegister($src$$reg), sh);
|
||||
%}
|
||||
ins_pipe(vshift64_imm);
|
||||
%}
|
||||
|
||||
instruct vsraa8S_imm(vecX dst, vecX src, immI shift) %{
|
||||
predicate(n->as_Vector()->length() == 8);
|
||||
match(Set dst (AddVS dst (RShiftVS src (RShiftCntV shift))));
|
||||
ins_cost(INSN_COST);
|
||||
format %{ "ssra $dst, $src, $shift\t# vector (8H)" %}
|
||||
ins_encode %{
|
||||
int sh = (int)$shift$$constant;
|
||||
if (sh >= 16) sh = 15;
|
||||
__ ssra(as_FloatRegister($dst$$reg), __ T8H,
|
||||
as_FloatRegister($src$$reg), sh);
|
||||
%}
|
||||
ins_pipe(vshift128_imm);
|
||||
%}
|
||||
|
||||
instruct vsraa2I_imm(vecD dst, vecD src, immI shift) %{
|
||||
predicate(n->as_Vector()->length() == 2);
|
||||
match(Set dst (AddVI dst (RShiftVI src (RShiftCntV shift))));
|
||||
ins_cost(INSN_COST);
|
||||
format %{ "ssra $dst, $src, $shift\t# vector (2S)" %}
|
||||
ins_encode %{
|
||||
__ ssra(as_FloatRegister($dst$$reg), __ T2S,
|
||||
as_FloatRegister($src$$reg),
|
||||
(int)$shift$$constant);
|
||||
%}
|
||||
ins_pipe(vshift64_imm);
|
||||
%}
|
||||
|
||||
instruct vsraa4I_imm(vecX dst, vecX src, immI shift) %{
|
||||
predicate(n->as_Vector()->length() == 4);
|
||||
match(Set dst (AddVI dst (RShiftVI src (RShiftCntV shift))));
|
||||
ins_cost(INSN_COST);
|
||||
format %{ "ssra $dst, $src, $shift\t# vector (4S)" %}
|
||||
ins_encode %{
|
||||
__ ssra(as_FloatRegister($dst$$reg), __ T4S,
|
||||
as_FloatRegister($src$$reg),
|
||||
(int)$shift$$constant);
|
||||
%}
|
||||
ins_pipe(vshift128_imm);
|
||||
%}
|
||||
|
||||
instruct vsraa2L_imm(vecX dst, vecX src, immI shift) %{
|
||||
predicate(n->as_Vector()->length() == 2);
|
||||
match(Set dst (AddVL dst (RShiftVL src (RShiftCntV shift))));
|
||||
ins_cost(INSN_COST);
|
||||
format %{ "ssra $dst, $src, $shift\t# vector (2D)" %}
|
||||
ins_encode %{
|
||||
__ ssra(as_FloatRegister($dst$$reg), __ T2D,
|
||||
as_FloatRegister($src$$reg),
|
||||
(int)$shift$$constant);
|
||||
%}
|
||||
ins_pipe(vshift128_imm);
|
||||
%}
|
||||
|
||||
instruct vsrla8B_imm(vecD dst, vecD src, immI shift) %{
|
||||
predicate(n->as_Vector()->length() == 8);
|
||||
match(Set dst (AddVB dst (URShiftVB src (RShiftCntV shift))));
|
||||
ins_cost(INSN_COST);
|
||||
format %{ "usra $dst, $src, $shift\t# vector (8B)" %}
|
||||
ins_encode %{
|
||||
int sh = (int)$shift$$constant;
|
||||
if (sh >= 8) {
|
||||
__ eor(as_FloatRegister($src$$reg), __ T8B,
|
||||
as_FloatRegister($src$$reg),
|
||||
as_FloatRegister($src$$reg));
|
||||
} else {
|
||||
__ usra(as_FloatRegister($dst$$reg), __ T8B,
|
||||
as_FloatRegister($src$$reg), sh);
|
||||
}
|
||||
%}
|
||||
ins_pipe(vshift64_imm);
|
||||
%}
|
||||
|
||||
instruct vsrla16B_imm(vecX dst, vecX src, immI shift) %{
|
||||
predicate(n->as_Vector()->length() == 16);
|
||||
match(Set dst (AddVB dst (URShiftVB src (RShiftCntV shift))));
|
||||
ins_cost(INSN_COST);
|
||||
format %{ "usra $dst, $src, $shift\t# vector (16B)" %}
|
||||
ins_encode %{
|
||||
int sh = (int)$shift$$constant;
|
||||
if (sh >= 8) {
|
||||
__ eor(as_FloatRegister($src$$reg), __ T16B,
|
||||
as_FloatRegister($src$$reg),
|
||||
as_FloatRegister($src$$reg));
|
||||
} else {
|
||||
__ usra(as_FloatRegister($dst$$reg), __ T16B,
|
||||
as_FloatRegister($src$$reg), sh);
|
||||
}
|
||||
%}
|
||||
ins_pipe(vshift128_imm);
|
||||
%}
|
||||
|
||||
instruct vsrla4S_imm(vecD dst, vecD src, immI shift) %{
|
||||
predicate(n->as_Vector()->length() == 4);
|
||||
match(Set dst (AddVS dst (URShiftVS src (RShiftCntV shift))));
|
||||
ins_cost(INSN_COST);
|
||||
format %{ "usra $dst, $src, $shift\t# vector (4H)" %}
|
||||
ins_encode %{
|
||||
int sh = (int)$shift$$constant;
|
||||
if (sh >= 16) {
|
||||
__ eor(as_FloatRegister($src$$reg), __ T8B,
|
||||
as_FloatRegister($src$$reg),
|
||||
as_FloatRegister($src$$reg));
|
||||
} else {
|
||||
__ ushr(as_FloatRegister($dst$$reg), __ T4H,
|
||||
as_FloatRegister($src$$reg), sh);
|
||||
}
|
||||
%}
|
||||
ins_pipe(vshift64_imm);
|
||||
%}
|
||||
|
||||
instruct vsrla8S_imm(vecX dst, vecX src, immI shift) %{
|
||||
predicate(n->as_Vector()->length() == 8);
|
||||
match(Set dst (AddVS dst (URShiftVS src (RShiftCntV shift))));
|
||||
ins_cost(INSN_COST);
|
||||
format %{ "usra $dst, $src, $shift\t# vector (8H)" %}
|
||||
ins_encode %{
|
||||
int sh = (int)$shift$$constant;
|
||||
if (sh >= 16) {
|
||||
__ eor(as_FloatRegister($src$$reg), __ T16B,
|
||||
as_FloatRegister($src$$reg),
|
||||
as_FloatRegister($src$$reg));
|
||||
} else {
|
||||
__ usra(as_FloatRegister($dst$$reg), __ T8H,
|
||||
as_FloatRegister($src$$reg), sh);
|
||||
}
|
||||
%}
|
||||
ins_pipe(vshift128_imm);
|
||||
%}
|
||||
|
||||
instruct vsrla2I_imm(vecD dst, vecD src, immI shift) %{
|
||||
predicate(n->as_Vector()->length() == 2);
|
||||
match(Set dst (AddVI dst (URShiftVI src (RShiftCntV shift))));
|
||||
ins_cost(INSN_COST);
|
||||
format %{ "usra $dst, $src, $shift\t# vector (2S)" %}
|
||||
ins_encode %{
|
||||
__ usra(as_FloatRegister($dst$$reg), __ T2S,
|
||||
as_FloatRegister($src$$reg),
|
||||
(int)$shift$$constant);
|
||||
%}
|
||||
ins_pipe(vshift64_imm);
|
||||
%}
|
||||
|
||||
instruct vsrla4I_imm(vecX dst, vecX src, immI shift) %{
|
||||
predicate(n->as_Vector()->length() == 4);
|
||||
match(Set dst (AddVI dst (URShiftVI src (RShiftCntV shift))));
|
||||
ins_cost(INSN_COST);
|
||||
format %{ "usra $dst, $src, $shift\t# vector (4S)" %}
|
||||
ins_encode %{
|
||||
__ usra(as_FloatRegister($dst$$reg), __ T4S,
|
||||
as_FloatRegister($src$$reg),
|
||||
(int)$shift$$constant);
|
||||
%}
|
||||
ins_pipe(vshift128_imm);
|
||||
%}
|
||||
|
||||
instruct vsrla2L_imm(vecX dst, vecX src, immI shift) %{
|
||||
predicate(n->as_Vector()->length() == 2);
|
||||
match(Set dst (AddVL dst (URShiftVL src (RShiftCntV shift))));
|
||||
ins_cost(INSN_COST);
|
||||
format %{ "usra $dst, $src, $shift\t# vector (2D)" %}
|
||||
ins_encode %{
|
||||
__ usra(as_FloatRegister($dst$$reg), __ T2D,
|
||||
as_FloatRegister($src$$reg),
|
||||
(int)$shift$$constant);
|
||||
%}
|
||||
ins_pipe(vshift128_imm);
|
||||
%}
|
||||
|
||||
instruct vmax2F(vecD dst, vecD src1, vecD src2)
|
||||
%{
|
||||
predicate(n->as_Vector()->length() == 2 && n->bottom_type()->is_vect()->element_basic_type() == T_FLOAT);
|
||||
|
@ -2688,6 +2688,8 @@ public:
|
||||
INSN(shl, 0, 0b010101, /* isSHR = */ false);
|
||||
INSN(sshr, 0, 0b000001, /* isSHR = */ true);
|
||||
INSN(ushr, 1, 0b000001, /* isSHR = */ true);
|
||||
INSN(usra, 1, 0b000101, /* isSHR = */ true);
|
||||
INSN(ssra, 0, 0b000101, /* isSHAR =*/ true);
|
||||
|
||||
#undef INSN
|
||||
|
||||
|
@ -0,0 +1,137 @@
|
||||
/*
|
||||
* Copyright (c) 2020, Huawei Technologies Co. Ltd. All rights reserved.
|
||||
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
||||
*
|
||||
* This code is free software; you can redistribute it and/or modify it
|
||||
* under the terms of the GNU General Public License version 2 only, as
|
||||
* published by the Free Software Foundation.
|
||||
*
|
||||
* This code is distributed in the hope that it will be useful, but WITHOUT
|
||||
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
||||
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
|
||||
* version 2 for more details (a copy is included in the LICENSE file that
|
||||
* accompanied this code).
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License version
|
||||
* 2 along with this work; if not, write to the Free Software Foundation,
|
||||
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
*
|
||||
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
|
||||
* or visit www.oracle.com if you need additional information or have any
|
||||
* questions.
|
||||
*/
|
||||
package org.openjdk.bench.vm.compiler;
|
||||
|
||||
import org.openjdk.jmh.annotations.*;
|
||||
import org.openjdk.jmh.infra.*;
|
||||
|
||||
import java.util.concurrent.TimeUnit;
|
||||
import java.util.Random;
|
||||
|
||||
@BenchmarkMode(Mode.AverageTime)
|
||||
@OutputTimeUnit(TimeUnit.NANOSECONDS)
|
||||
@State(Scope.Thread)
|
||||
public class VectorShiftAccumulate {
|
||||
@Param({"1028"})
|
||||
public int count;
|
||||
|
||||
private byte[] bytesA, bytesB, bytesD;
|
||||
private short[] shortsA, shortsB, shortsD;
|
||||
private char[] charsA, charsB, charsD;
|
||||
private int[] intsA, intsB, intsD;
|
||||
private long[] longsA, longsB, longsD;
|
||||
|
||||
@Param("0")
|
||||
private int seed;
|
||||
private Random r = new Random(seed);
|
||||
|
||||
@Setup
|
||||
public void init() {
|
||||
bytesA = new byte[count];
|
||||
shortsA = new short[count];
|
||||
charsA = new char[count];
|
||||
intsA = new int[count];
|
||||
longsA = new long[count];
|
||||
|
||||
bytesB = new byte[count];
|
||||
shortsB = new short[count];
|
||||
charsB = new char[count];
|
||||
intsB = new int[count];
|
||||
longsB = new long[count];
|
||||
|
||||
bytesD = new byte[count];
|
||||
shortsD = new short[count];
|
||||
charsD = new char[count];
|
||||
intsD = new int[count];
|
||||
longsD = new long[count];
|
||||
|
||||
for (int i = 0; i < count; i++) {
|
||||
bytesA[i] = (byte) r.nextInt();
|
||||
shortsA[i] = (short) r.nextInt();
|
||||
intsA[i] = r.nextInt();
|
||||
longsA[i] = r.nextLong();
|
||||
|
||||
bytesB[i] = (byte) r.nextInt();
|
||||
shortsB[i] = (short) r.nextInt();
|
||||
intsB[i] = r.nextInt();
|
||||
longsB[i] = r.nextLong();
|
||||
}
|
||||
}
|
||||
|
||||
@Benchmark
|
||||
public void shiftRightAccumulateByte() {
|
||||
for (int i = 0; i < count; i++) {
|
||||
bytesD[i] = (byte) (bytesA[i] + (bytesB[i] >> 1));
|
||||
}
|
||||
}
|
||||
|
||||
@Benchmark
|
||||
public void shiftURightAccumulateByte() {
|
||||
for (int i = 0; i < count; i++) {
|
||||
bytesD[i] = (byte) (bytesA[i] + (((byte) (bytesB[i] >>> 3))));
|
||||
}
|
||||
}
|
||||
|
||||
@Benchmark
|
||||
public void shiftRightAccumulateShort() {
|
||||
for (int i = 0; i < count; i++) {
|
||||
shortsD[i] = (short) (shortsA[i] + (shortsB[i] >> 5));
|
||||
}
|
||||
}
|
||||
|
||||
@Benchmark
|
||||
public void shiftURightAccumulateChar() {
|
||||
for (int i = 0; i < count; i++) {
|
||||
charsD[i] = (char) (charsA[i] + (charsB[i] >>> 4));
|
||||
}
|
||||
}
|
||||
|
||||
@Benchmark
|
||||
public void shiftRightAccumulateInt() {
|
||||
for (int i = 0; i < count; i++) {
|
||||
intsD[i] = intsA[i] + (intsB[i] >> 2);
|
||||
}
|
||||
}
|
||||
|
||||
@Benchmark
|
||||
public void shiftURightAccumulateInt() {
|
||||
for (int i = 0; i < count; i++) {
|
||||
intsD[i] = (intsB[i] >>> 2) + intsA[i];
|
||||
}
|
||||
}
|
||||
|
||||
@Benchmark
|
||||
public void shiftRightAccumulateLong() {
|
||||
for (int i = 0; i < count; i++) {
|
||||
longsD[i] = longsA[i] + (longsB[i] >> 5);
|
||||
}
|
||||
}
|
||||
|
||||
@Benchmark
|
||||
public void shiftURightAccumulateLong() {
|
||||
for (int i = 0; i < count; i++) {
|
||||
longsD[i] = (longsB[i] >>> 2) + longsA[i];
|
||||
}
|
||||
}
|
||||
}
|
||||
|
Loading…
x
Reference in New Issue
Block a user