8282711: Accelerate Math.signum function for AVX and AVX512 target.

Reviewed-by: sviswanathan, thartmann
This commit is contained in:
Jatin Bhateja 2022-04-29 06:34:09 +00:00
parent 0a4a6403bb
commit e4066628ad
13 changed files with 337 additions and 2 deletions

View File

@ -4420,6 +4420,48 @@ void C2_MacroAssembler::vector_mask_operation(int opc, Register dst, XMMRegister
}
#endif
void C2_MacroAssembler::vector_signum_evex(int opcode, XMMRegister dst, XMMRegister src, XMMRegister zero, XMMRegister one,
KRegister ktmp1, int vec_enc) {
if (opcode == Op_SignumVD) {
vsubpd(dst, zero, one, vec_enc);
// if src < 0 ? -1 : 1
evcmppd(ktmp1, k0, src, zero, Assembler::LT_OQ, vec_enc);
evblendmpd(dst, ktmp1, one, dst, true, vec_enc);
// if src == NaN, -0.0 or 0.0 return src.
evcmppd(ktmp1, k0, src, zero, Assembler::EQ_UQ, vec_enc);
evblendmpd(dst, ktmp1, dst, src, true, vec_enc);
} else {
assert(opcode == Op_SignumVF, "");
vsubps(dst, zero, one, vec_enc);
// if src < 0 ? -1 : 1
evcmpps(ktmp1, k0, src, zero, Assembler::LT_OQ, vec_enc);
evblendmps(dst, ktmp1, one, dst, true, vec_enc);
// if src == NaN, -0.0 or 0.0 return src.
evcmpps(ktmp1, k0, src, zero, Assembler::EQ_UQ, vec_enc);
evblendmps(dst, ktmp1, dst, src, true, vec_enc);
}
}
void C2_MacroAssembler::vector_signum_avx(int opcode, XMMRegister dst, XMMRegister src, XMMRegister zero, XMMRegister one,
XMMRegister xtmp1, int vec_enc) {
if (opcode == Op_SignumVD) {
vsubpd(dst, zero, one, vec_enc);
// if src < 0 ? -1 : 1
vblendvpd(dst, one, dst, src, vec_enc);
// if src == NaN, -0.0 or 0.0 return src.
vcmppd(xtmp1, src, zero, Assembler::EQ_UQ, vec_enc);
vblendvpd(dst, dst, src, xtmp1, vec_enc);
} else {
assert(opcode == Op_SignumVF, "");
vsubps(dst, zero, one, vec_enc);
// if src < 0 ? -1 : 1
vblendvps(dst, one, dst, src, vec_enc);
// if src == NaN, -0.0 or 0.0 return src.
vcmpps(xtmp1, src, zero, Assembler::EQ_UQ, vec_enc);
vblendvps(dst, dst, src, xtmp1, vec_enc);
}
}
void C2_MacroAssembler::vector_maskall_operation(KRegister dst, Register src, int mask_len) {
if (VM_Version::supports_avx512bw()) {
if (mask_len > 32) {

View File

@ -340,6 +340,12 @@ public:
void evpternlog(XMMRegister dst, int func, KRegister mask, XMMRegister src2, Address src3,
bool merge, BasicType bt, int vlen_enc);
void vector_signum_avx(int opcode, XMMRegister dst, XMMRegister src, XMMRegister zero, XMMRegister one,
XMMRegister xtmp1, int vec_enc);
void vector_signum_evex(int opcode, XMMRegister dst, XMMRegister src, XMMRegister zero, XMMRegister one,
KRegister ktmp1, int vec_enc);
void udivI(Register rax, Register divisor, Register rdx);
void umodI(Register rax, Register divisor, Register rdx);
void udivmodI(Register rax, Register divisor, Register rdx, Register tmp);
@ -349,6 +355,7 @@ public:
void umodL(Register rax, Register divisor, Register rdx);
void udivmodL(Register rax, Register divisor, Register rdx, Register tmp);
#endif
void vector_popcount_int(XMMRegister dst, XMMRegister src, XMMRegister xtmp1,
XMMRegister xtmp2, XMMRegister xtmp3, Register rtmp,
int vec_enc);

View File

@ -1886,6 +1886,12 @@ const bool Matcher::match_rule_supported_vector(int opcode, int vlen, BasicType
return false;
}
break;
case Op_SignumVD:
case Op_SignumVF:
if (UseAVX < 1) {
return false;
}
break;
case Op_PopCountVI:
if (!VM_Version::supports_avx512_vpopcntdq() &&
(vlen == 16) && !VM_Version::supports_avx512bw()) {
@ -6089,6 +6095,36 @@ instruct signumD_reg(regD dst, regD zero, regD one, rRegP scratch, rFlagsReg cr)
ins_pipe( pipe_slow );
%}
instruct signumV_reg_avx(vec dst, vec src, vec zero, vec one, vec xtmp1) %{
predicate(!VM_Version::supports_avx512vl() && Matcher::vector_length_in_bytes(n) <= 32);
match(Set dst (SignumVF src (Binary zero one)));
match(Set dst (SignumVD src (Binary zero one)));
effect(TEMP dst, TEMP xtmp1);
format %{ "vector_signum_avx $dst, $src\t! using $xtmp1 as TEMP" %}
ins_encode %{
int opcode = this->ideal_Opcode();
int vec_enc = vector_length_encoding(this);
__ vector_signum_avx(opcode, $dst$$XMMRegister, $src$$XMMRegister, $zero$$XMMRegister, $one$$XMMRegister,
$xtmp1$$XMMRegister, vec_enc);
%}
ins_pipe( pipe_slow );
%}
instruct signumV_reg_evex(vec dst, vec src, vec zero, vec one, kReg ktmp1) %{
predicate(VM_Version::supports_avx512vl() || Matcher::vector_length_in_bytes(n) == 64);
match(Set dst (SignumVF src (Binary zero one)));
match(Set dst (SignumVD src (Binary zero one)));
effect(TEMP dst, TEMP ktmp1);
format %{ "vector_signum_evex $dst, $src\t! using $ktmp1 as TEMP" %}
ins_encode %{
int opcode = this->ideal_Opcode();
int vec_enc = vector_length_encoding(this);
__ vector_signum_evex(opcode, $dst$$XMMRegister, $src$$XMMRegister, $zero$$XMMRegister, $one$$XMMRegister,
$ktmp1$$KRegister, vec_enc);
%}
ins_pipe( pipe_slow );
%}
// ---------------------------------------
// For copySign use 0xE4 as writemask for vpternlog
// Desired Truth Table: A -> xmm0 bit, B -> xmm1 bit, C -> xmm2 bit

View File

@ -4237,7 +4237,7 @@ bool MatchRule::is_vector() const {
"VectorCastL2X", "VectorCastF2X", "VectorCastD2X",
"VectorUCastB2X", "VectorUCastS2X", "VectorUCastI2X",
"VectorMaskWrapper","VectorMaskCmp","VectorReinterpret","LoadVectorMasked","StoreVectorMasked",
"FmaVD","FmaVF","PopCountVI", "PopCountVL", "VectorLongToMask",
"FmaVD","FmaVF","PopCountVI", "PopCountVL", "SignumVF", "SignumVD", "VectorLongToMask",
// Next are vector mask ops.
"MaskAll", "AndVMask", "OrVMask", "XorVMask", "VectorMaskCast",
"RoundVF", "RoundVD",

View File

@ -321,6 +321,8 @@ macro(CopySignD)
macro(CopySignF)
macro(SignumD)
macro(SignumF)
macro(SignumVF)
macro(SignumVD)
macro(SqrtD)
macro(SqrtF)
macro(RoundF)

View File

@ -2456,6 +2456,8 @@ void Matcher::find_shared_post_visit(Node* n, uint opcode) {
break;
}
case Op_CopySignD:
case Op_SignumVF:
case Op_SignumVD:
case Op_SignumF:
case Op_SignumD: {
Node* pair = new BinaryNode(n->in(2), n->in(3));

View File

@ -2525,6 +2525,13 @@ bool SuperWord::output() {
Node* in2 = vector_opd(p, 2);
vn = VectorNode::make(opc, in1, in2, vlen, velt_basic_type(n));
vlen_in_bytes = vn->as_Vector()->length_in_bytes();
} else if (opc == Op_SignumF || opc == Op_SignumD) {
assert(n->req() == 4, "four inputs expected");
Node* in = vector_opd(p, 1);
Node* zero = vector_opd(p, 2);
Node* one = vector_opd(p, 3);
vn = VectorNode::make(opc, in, zero, one, vlen, velt_basic_type(n));
vlen_in_bytes = vn->as_Vector()->length_in_bytes();
} else if (n->req() == 3 && !is_cmov_pack(p)) {
// Promote operands to vector
Node* in1 = NULL;

View File

@ -245,6 +245,10 @@ int VectorNode::opcode(int sopc, BasicType bt) {
return Op_VectorCastF2X;
case Op_ConvD2L:
return Op_VectorCastD2X;
case Op_SignumF:
return Op_SignumVF;
case Op_SignumD:
return Op_SignumVD;
default:
return 0; // Unimplemented
@ -646,6 +650,8 @@ VectorNode* VectorNode::make(int vopc, Node* n1, Node* n2, Node* n3, const TypeV
switch (vopc) {
case Op_FmaVD: return new FmaVDNode(n1, n2, n3, vt);
case Op_FmaVF: return new FmaVFNode(n1, n2, n3, vt);
case Op_SignumVD: return new SignumVDNode(n1, n2, n3, vt);
case Op_SignumVF: return new SignumVFNode(n1, n2, n3, vt);
default:
fatal("Missed vector creation for '%s'", NodeClassNames[vopc]);
return NULL;

View File

@ -1674,4 +1674,20 @@ public:
virtual int Opcode() const;
Node* Ideal(PhaseGVN* phase, bool can_reshape);
};
class SignumVFNode : public VectorNode {
public:
SignumVFNode(Node* in1, Node* zero, Node* one, const TypeVect* vt)
: VectorNode(in1, zero, one, vt) {}
virtual int Opcode() const;
};
class SignumVDNode : public VectorNode {
public:
SignumVDNode(Node* in1, Node* zero, Node* one, const TypeVect* vt)
: VectorNode(in1, zero, one, vt) {}
virtual int Opcode() const;
};
#endif // SHARE_OPTO_VECTORNODE_HPP

View File

@ -89,6 +89,7 @@ public class TestDoubleVect {
test_divv(a0, a1, -VALUE);
test_diva(a0, a1, a3);
test_negc(a0, a1);
test_signum(a0, a1);
test_rint(a0, a1);
test_ceil(a0, a1);
test_floor(a0, a1);
@ -426,6 +427,19 @@ public class TestDoubleVect {
errn += verify("test_sqrt: ", i, a0[i], Math.sqrt((double)(ADD_INIT+i)));
}
test_signum(a0, a1);
errn += verify("test_signum: ", 0, a0[0], (Double.NaN));
errn += verify("test_signum: ", 1, a0[1], 1.0);
errn += verify("test_signum: ", 2, a0[2], -1.0);
errn += verify("test_signum: ", 3, a0[3], 1.0);
errn += verify("test_signum: ", 4, a0[4], 1.0);
errn += verify("test_signum: ", 5, a0[5], 1.0);
errn += verify("test_signum: ", 6, a0[6], 0.0);
errn += verify("test_signum: ", 7, a0[7], -0.0);
for (int i=8; i<ARRLEN; i++) {
errn += verify("test_signum: ", i, a0[i], (double)(((double)(ADD_INIT+i)) > 0.0 ? 1.0 : -1.0));
}
a1[6] = +0x1.fffffffffffffp-2;
a1[7] = +0x1.0p-1;
a1[8] = +0x1.0000000000001p-1;
@ -590,6 +604,13 @@ public class TestDoubleVect {
end = System.currentTimeMillis();
System.out.println("test_negc_n: " + (end - start));
start = System.currentTimeMillis();
for (int i=0; i<ITERS; i++) {
test_signum(a0, a1);
}
end = System.currentTimeMillis();
System.out.println("test_signum_n: " + (end - start));
start = System.currentTimeMillis();
for (int i=0; i<ITERS; i++) {
test_sqrt(a0, a1);
@ -693,6 +714,12 @@ public class TestDoubleVect {
}
}
static void test_signum(double[] a0, double[] a1) {
for (int i = 0; i < a0.length; i+=1) {
a0[i] = Math.signum(a1[i]);
}
}
static void test_rint(double[] a0, double[] a1) {
for (int i = 0; i < a0.length; i+=1) {
a0[i] = Math.rint(a1[i] + ((double)(i))/1000);

View File

@ -88,6 +88,7 @@ public class TestFloatVect {
test_divv(a0, a1, -VALUE);
test_diva(a0, a1, a3);
test_negc(a0, a1);
test_signum(a0, a1);
test_sqrt(a0, a1);
test_round(i0, a1);
}
@ -345,6 +346,7 @@ public class TestFloatVect {
errn += verify("test_diva_n: ", i, a0[i], ((ADD_INIT+i)/(-VALUE)));
}
test_negc(a0, a1);
errn += verify("test_negc: ", 0, a0[0], (Float.NaN));
errn += verify("test_negc: ", 1, a0[1], (Float.NEGATIVE_INFINITY));
@ -372,6 +374,19 @@ public class TestFloatVect {
errn += verify("test_sqrt: ", i, a0[i], (float)(Math.sqrt((double)(ADD_INIT+i))));
}
test_signum(a0, a1);
errn += verify("test_signum: ", 0, a0[0], (Float.NaN));
errn += verify("test_signum: ", 1, a0[1], 1.0f);
errn += verify("test_signum: ", 2, a0[2], -1.0f);
errn += verify("test_signum: ", 3, a0[3], 1.0f);
errn += verify("test_signum: ", 4, a0[4], 1.0f);
errn += verify("test_signum: ", 5, a0[5], 1.0f);
errn += verify("test_signum: ", 6, a0[6], 0.0f);
errn += verify("test_signum: ", 7, a0[7], -0.0f);
for (int i=8; i<ARRLEN; i++) {
errn += verify("test_signum: ", i, a0[i], (((float)(ADD_INIT+i)) > 0.0f ? 1.0f : -1.0f));
}
a1[6] = +0x1.fffffep-2f;
a1[7] = +0x1.0p-1f;
a1[8] = +0x1.000002p-1f;
@ -400,7 +415,6 @@ public class TestFloatVect {
for (int i=14; i<ARRLEN; i++) {
errn += verify("test_round: ", i, i0[i], Math.round(((float)(ADD_INIT+i))));
}
}
if (errn > 0)
@ -537,6 +551,13 @@ public class TestFloatVect {
end = System.currentTimeMillis();
System.out.println("test_negc_n: " + (end - start));
start = System.currentTimeMillis();
for (int i=0; i<ITERS; i++) {
test_signum(a0, a1);
}
end = System.currentTimeMillis();
System.out.println("test_signum_n: " + (end - start));
start = System.currentTimeMillis();
for (int i=0; i<ITERS; i++) {
test_sqrt(a0, a1);
@ -635,6 +656,12 @@ public class TestFloatVect {
}
}
static void test_signum(float[] a0, float[] a1) {
for (int i = 0; i < a0.length; i+=1) {
a0[i] = Math.signum(a1[i]);
}
}
static void test_negc(float[] a0, float[] a1) {
for (int i = 0; i < a0.length; i+=1) {
a0[i] = (float)(-((float)a1[i]));

View File

@ -0,0 +1,93 @@
/*
* Copyright (c) 2022, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License version 2 only, as
* published by the Free Software Foundation.
*
* This code is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
* version 2 for more details (a copy is included in the LICENSE file that
* accompanied this code).
*
* You should have received a copy of the GNU General Public License version
* 2 along with this work; if not, write to the Free Software Foundation,
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
*
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
* or visit www.oracle.com if you need additional information or have any
* questions.
*/
/**
* @test
* @bug 8282711
* @summary Accelerate Math.signum function for AVX and AVX512.
* @requires vm.compiler2.enabled
* @requires vm.cpu.features ~= ".*avx.*"
* @requires os.simpleArch == "x64"
* @library /test/lib /
* @run driver compiler.vectorization.TestSignumVector
*/
package compiler.vectorization;
import compiler.lib.ir_framework.*;
public class TestSignumVector {
private static final int ARRLEN = 1024;
private static final int ITERS = 11000;
private static double [] dinp;
private static double [] dout;
private static float [] finp;
private static float [] fout;
public static void main(String args[]) {
TestFramework.runWithFlags("-XX:-TieredCompilation",
"-XX:CompileThresholdScaling=0.3");
System.out.println("PASSED");
}
@Test
@IR(counts = {"SignumVD" , " > 0 "})
public void test_signum_double(double[] dout, double[] dinp) {
for (int i = 0; i < dout.length; i+=1) {
dout[i] = Math.signum(dinp[i]);
}
}
@Run(test = {"test_signum_double"}, mode = RunMode.STANDALONE)
public void kernel_test_signum_double() {
dinp = new double[ARRLEN];
dout = new double[ARRLEN];
for(int i = 0 ; i < ARRLEN; i++) {
dinp[i] = (double)i*1.4;
}
for (int i = 0; i < ITERS; i++) {
test_signum_double(dout , dinp);
}
}
@Test
@IR(counts = {"SignumVF" , " > 0 "})
public void test_signum_float(float[] fout, float[] finp) {
for (int i = 0; i < finp.length; i+=1) {
fout[i] = Math.signum(finp[i]);
}
}
@Run(test = {"test_signum_float"}, mode = RunMode.STANDALONE)
public void kernel_test_round() {
finp = new float[ARRLEN];
fout = new float[ARRLEN];
for(int i = 0 ; i < ARRLEN; i++) {
finp[i] = (float)i*1.4f;
}
for (int i = 0; i < ITERS; i++) {
test_signum_float(fout , finp);
}
}
}

View File

@ -0,0 +1,70 @@
/*
* Copyright (c) 2022, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License version 2 only, as
* published by the Free Software Foundation.
*
* This code is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
* version 2 for more details (a copy is included in the LICENSE file that
* accompanied this code).
*
* You should have received a copy of the GNU General Public License version
* 2 along with this work; if not, write to the Free Software Foundation,
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
*
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
* or visit www.oracle.com if you need additional information or have any
* questions.
*/
package org.openjdk.bench.vm.compiler;
import org.openjdk.jmh.annotations.*;
import org.openjdk.jmh.infra.*;
import java.util.concurrent.TimeUnit;
import java.util.Random;
@BenchmarkMode(Mode.AverageTime)
@OutputTimeUnit(TimeUnit.NANOSECONDS)
@State(Scope.Thread)
public class VectorSignum {
@Param({"256", "512", "1024", "2048"})
private static int SIZE;
private double[] res_doubles = new double[SIZE];
private double[] doubles = new double[SIZE];
private float[] res_floats = new float[SIZE];
private float[] floats = new float[SIZE];
private Random r = new Random(1024);
@Setup
public void init() {
doubles = new double[SIZE];
floats = new float[SIZE];
res_doubles = new double[SIZE];
res_floats = new float[SIZE];
for (int i=0; i<SIZE; i++) {
floats[i] = r.nextFloat();
doubles[i] = r.nextDouble();
}
}
@Benchmark
public void floatSignum() {
for(int i = 0; i < SIZE; i++) {
res_floats[i] = Math.signum(floats[i]);
}
}
@Benchmark
public void doubleSignum() {
for(int i = 0; i < SIZE; i++) {
res_doubles[i] = Math.signum(doubles[i]);
}
}
}