8307795: AArch64: Optimize VectorMask.truecount() on Neon
Reviewed-by: aph, eliu
This commit is contained in:
parent
07f2070411
commit
f600d0369a
@ -5512,6 +5512,30 @@ instruct vmask_truecount_sve(iRegINoSp dst, pReg src) %{
|
||||
ins_pipe(pipe_slow);
|
||||
%}
|
||||
|
||||
// Combined rule for VectorMaskTrueCount (VectorStoreMask) when the vector element type is not T_BYTE.
|
||||
|
||||
instruct vstoremask_truecount_neon(iRegINoSp dst, vReg src, immI_gt_1 size, vReg vtmp) %{
|
||||
match(Set dst (VectorMaskTrueCount (VectorStoreMask src size)));
|
||||
effect(TEMP vtmp);
|
||||
format %{ "vstoremask_truecount_neon $dst, $src\t# KILL $vtmp" %}
|
||||
ins_encode %{
|
||||
// Input "src" is a vector mask represented as lanes with
|
||||
// 0/-1 as element values.
|
||||
uint esize = (uint)$size$$constant;
|
||||
if (esize == 8) {
|
||||
__ addpd($vtmp$$FloatRegister, $src$$FloatRegister);
|
||||
} else {
|
||||
uint length_in_bytes = Matcher::vector_length_in_bytes(this, $src);
|
||||
Assembler::SIMD_Arrangement arrangement = Assembler::esize2arrangement(esize,
|
||||
/* isQ */ length_in_bytes == 16);
|
||||
__ addv($vtmp$$FloatRegister, arrangement, $src$$FloatRegister);
|
||||
}
|
||||
__ smov($dst$$Register, $vtmp$$FloatRegister, __ B, 0);
|
||||
__ neg($dst$$Register, $dst$$Register);
|
||||
%}
|
||||
ins_pipe(pipe_slow);
|
||||
%}
|
||||
|
||||
// first true
|
||||
|
||||
instruct vmask_firsttrue_lt8e(iRegINoSp dst, vReg src, rFlagsReg cr) %{
|
||||
|
@ -3822,6 +3822,30 @@ instruct vmask_truecount_sve(iRegINoSp dst, pReg src) %{
|
||||
ins_pipe(pipe_slow);
|
||||
%}
|
||||
|
||||
// Combined rule for VectorMaskTrueCount (VectorStoreMask) when the vector element type is not T_BYTE.
|
||||
|
||||
instruct vstoremask_truecount_neon(iRegINoSp dst, vReg src, immI_gt_1 size, vReg vtmp) %{
|
||||
match(Set dst (VectorMaskTrueCount (VectorStoreMask src size)));
|
||||
effect(TEMP vtmp);
|
||||
format %{ "vstoremask_truecount_neon $dst, $src\t# KILL $vtmp" %}
|
||||
ins_encode %{
|
||||
// Input "src" is a vector mask represented as lanes with
|
||||
// 0/-1 as element values.
|
||||
uint esize = (uint)$size$$constant;
|
||||
if (esize == 8) {
|
||||
__ addpd($vtmp$$FloatRegister, $src$$FloatRegister);
|
||||
} else {
|
||||
uint length_in_bytes = Matcher::vector_length_in_bytes(this, $src);
|
||||
Assembler::SIMD_Arrangement arrangement = Assembler::esize2arrangement(esize,
|
||||
/* isQ */ length_in_bytes == 16);
|
||||
__ addv($vtmp$$FloatRegister, arrangement, $src$$FloatRegister);
|
||||
}
|
||||
__ smov($dst$$Register, $vtmp$$FloatRegister, __ B, 0);
|
||||
__ neg($dst$$Register, $dst$$Register);
|
||||
%}
|
||||
ins_pipe(pipe_slow);
|
||||
%}
|
||||
|
||||
// first true
|
||||
|
||||
instruct vmask_firsttrue_lt8e(iRegINoSp dst, vReg src, rFlagsReg cr) %{
|
||||
|
@ -1463,6 +1463,11 @@ public class IRNode {
|
||||
machOnlyNameRegex(VNOT_L_MASKED, "vnotL_masked");
|
||||
}
|
||||
|
||||
public static final String VSTOREMASK_TRUECOUNT = PREFIX + "VSTOREMASK_TRUECOUNT" + POSTFIX;
|
||||
static {
|
||||
machOnlyNameRegex(VSTOREMASK_TRUECOUNT, "vstoremask_truecount_neon");
|
||||
}
|
||||
|
||||
public static final String XOR = PREFIX + "XOR" + POSTFIX;
|
||||
static {
|
||||
beforeMatchingNameRegex(XOR, "Xor(I|L)");
|
||||
|
@ -0,0 +1,101 @@
|
||||
/*
|
||||
* Copyright (c) 2023, Arm Limited. All rights reserved.
|
||||
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
||||
*
|
||||
* This code is free software; you can redistribute it and/or modify it
|
||||
* under the terms of the GNU General Public License version 2 only, as
|
||||
* published by the Free Software Foundation.
|
||||
*
|
||||
* This code is distributed in the hope that it will be useful, but WITHOUT
|
||||
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
||||
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
|
||||
* version 2 for more details (a copy is included in the LICENSE file that
|
||||
* accompanied this code).
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License version
|
||||
* 2 along with this work; if not, write to the Free Software Foundation,
|
||||
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
*
|
||||
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
|
||||
* or visit www.oracle.com if you need additional information or have any
|
||||
* questions.
|
||||
*/
|
||||
|
||||
package compiler.vectorapi;
|
||||
|
||||
import compiler.lib.ir_framework.*;
|
||||
import java.util.Random;
|
||||
import jdk.incubator.vector.*;
|
||||
import jdk.test.lib.Asserts;
|
||||
import jdk.test.lib.Utils;
|
||||
|
||||
/**
|
||||
* @test
|
||||
* @bug 8307795
|
||||
* @key randomness
|
||||
* @library /test/lib /
|
||||
* @requires os.arch=="aarch64"
|
||||
* @summary AArch64: Optimize VectorMask.truecount() on Neon
|
||||
* @modules jdk.incubator.vector
|
||||
*
|
||||
* @run driver compiler.vectorapi.TestVectorMaskTrueCount
|
||||
*/
|
||||
|
||||
public class TestVectorMaskTrueCount {
|
||||
private static final VectorSpecies<Double> SPECIES = DoubleVector.SPECIES_PREFERRED;
|
||||
private static final int LENGTH = 1024;
|
||||
private static final Random RD = new Random();
|
||||
private static boolean[] ba;
|
||||
private static boolean[] bb;
|
||||
|
||||
static {
|
||||
ba = new boolean[LENGTH];
|
||||
bb = new boolean[LENGTH];
|
||||
for (int i = 0; i < LENGTH; i++) {
|
||||
ba[i] = RD.nextBoolean();
|
||||
bb[i] = RD.nextBoolean();
|
||||
}
|
||||
}
|
||||
|
||||
static int maskAndTrueCount(boolean[] a, boolean[] b, int idx) {
|
||||
int trueCount = 0;
|
||||
boolean[] c = new boolean[SPECIES.length()];
|
||||
|
||||
for (int i = idx; i < idx + SPECIES.length(); i++) {
|
||||
c[i - idx] = a[i] & b[i];
|
||||
}
|
||||
|
||||
for (int i = 0; i < c.length; i++) {
|
||||
trueCount += c[i] ? 1 : 0;
|
||||
}
|
||||
|
||||
return trueCount;
|
||||
}
|
||||
|
||||
static void assertArrayEquals(int[] r, boolean[] a, boolean[] b) {
|
||||
for (int i = 0; i < a.length; i += SPECIES.length()) {
|
||||
Asserts.assertEquals(r[i], maskAndTrueCount(a, b, i));
|
||||
}
|
||||
}
|
||||
|
||||
@Test
|
||||
@IR(counts = { IRNode.VSTOREMASK_TRUECOUNT, ">= 1" })
|
||||
public static void test() {
|
||||
int[] r = new int[LENGTH];
|
||||
for (int i = 0; i < LENGTH; i += SPECIES.length()) {
|
||||
VectorMask<Double> ma = VectorMask.fromArray(SPECIES, ba, i);
|
||||
VectorMask<Double> mb = VectorMask.fromArray(SPECIES, bb, i);
|
||||
r[i] = ma.and(mb).trueCount();
|
||||
}
|
||||
|
||||
assertArrayEquals(r, ba, bb);
|
||||
}
|
||||
|
||||
public static void main(String[] args) {
|
||||
TestFramework testFramework = new TestFramework();
|
||||
testFramework.setDefaultWarmup(10000)
|
||||
.addFlags("--add-modules=jdk.incubator.vector")
|
||||
.addFlags("-XX:UseSVE=0")
|
||||
.start();
|
||||
}
|
||||
}
|
@ -0,0 +1,81 @@
|
||||
//
|
||||
// Copyright (c) 2023, Arm Limited. All rights reserved.
|
||||
// DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
||||
//
|
||||
// This code is free software; you can redistribute it and/or modify it
|
||||
// under the terms of the GNU General Public License version 2 only, as
|
||||
// published by the Free Software Foundation.
|
||||
//
|
||||
// This code is distributed in the hope that it will be useful, but WITHOUT
|
||||
// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
||||
// FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
|
||||
// version 2 for more details (a copy is included in the LICENSE file that
|
||||
// accompanied this code).
|
||||
//
|
||||
// You should have received a copy of the GNU General Public License version
|
||||
// 2 along with this work; if not, write to the Free Software Foundation,
|
||||
// Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
//
|
||||
// Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
|
||||
// or visit www.oracle.com if you need additional information or have any
|
||||
// questions.
|
||||
//
|
||||
//
|
||||
package org.openjdk.bench.jdk.incubator.vector;
|
||||
|
||||
import java.util.concurrent.TimeUnit;
|
||||
import java.util.Random;
|
||||
import jdk.incubator.vector.*;
|
||||
import org.openjdk.jmh.annotations.*;
|
||||
|
||||
@OutputTimeUnit(TimeUnit.MILLISECONDS)
|
||||
@State(Scope.Thread)
|
||||
@Fork(jvmArgsPrepend = {"--add-modules=jdk.incubator.vector"})
|
||||
public class StoreMaskTrueCount {
|
||||
private static final VectorSpecies<Short> S_SPECIES = ShortVector.SPECIES_PREFERRED;
|
||||
private static final VectorSpecies<Integer> I_SPECIES = IntVector.SPECIES_PREFERRED;
|
||||
private static final VectorSpecies<Long> L_SPECIES = LongVector.SPECIES_PREFERRED;
|
||||
private static final int LENGTH = 128;
|
||||
private static final Random RD = new Random();
|
||||
private static boolean[] ba;
|
||||
|
||||
static {
|
||||
ba = new boolean[LENGTH];
|
||||
for (int i = 0; i < LENGTH; i++) {
|
||||
ba[i] = RD.nextBoolean();
|
||||
}
|
||||
}
|
||||
|
||||
@Benchmark
|
||||
public static int testShort() {
|
||||
int res = 0;
|
||||
for (int i = 0; i < LENGTH; i += S_SPECIES.length()) {
|
||||
VectorMask<Short> m = VectorMask.fromArray(S_SPECIES, ba, i);
|
||||
res += m.not().trueCount();
|
||||
}
|
||||
|
||||
return res;
|
||||
}
|
||||
|
||||
@Benchmark
|
||||
public static int testInt() {
|
||||
int res = 0;
|
||||
for (int i = 0; i < LENGTH; i += I_SPECIES.length()) {
|
||||
VectorMask<Integer> m = VectorMask.fromArray(I_SPECIES, ba, i);
|
||||
res += m.not().trueCount();
|
||||
}
|
||||
|
||||
return res;
|
||||
}
|
||||
|
||||
@Benchmark
|
||||
public static int testLong() {
|
||||
int res = 0;
|
||||
for (int i = 0; i < LENGTH; i += L_SPECIES.length()) {
|
||||
VectorMask<Long> m = VectorMask.fromArray(L_SPECIES, ba, i);
|
||||
res += m.not().trueCount();
|
||||
}
|
||||
|
||||
return res;
|
||||
}
|
||||
}
|
Loading…
x
Reference in New Issue
Block a user