8265491: Math Signum optimization for x86

Reviewed-by: jiefu, jbhateja, neliasso
This commit is contained in:
Marcus G K Williams 2021-05-03 16:28:14 +00:00 committed by Sandhya Viswanathan
parent 55cc0af404
commit ff65920cd1
7 changed files with 217 additions and 6 deletions

View File

@ -1045,6 +1045,35 @@ void C2_MacroAssembler::evminmax_fp(int opcode, BasicType elem_bt,
}
}
// Float/Double signum
void C2_MacroAssembler::signum_fp(int opcode, XMMRegister dst,
XMMRegister zero, XMMRegister one,
Register scratch) {
assert(opcode == Op_SignumF || opcode == Op_SignumD, "sanity");
Label DONE_LABEL;
if (opcode == Op_SignumF) {
assert(UseSSE > 0, "required");
ucomiss(dst, zero);
jcc(Assembler::equal, DONE_LABEL); // handle special case +0.0/-0.0, if argument is +0.0/-0.0, return argument
jcc(Assembler::parity, DONE_LABEL); // handle special case NaN, if argument NaN, return NaN
movflt(dst, one);
jcc(Assembler::above, DONE_LABEL);
xorps(dst, ExternalAddress(StubRoutines::x86::vector_float_sign_flip()), scratch);
} else if (opcode == Op_SignumD) {
assert(UseSSE > 1, "required");
ucomisd(dst, zero);
jcc(Assembler::equal, DONE_LABEL); // handle special case +0.0/-0.0, if argument is +0.0/-0.0, return argument
jcc(Assembler::parity, DONE_LABEL); // handle special case NaN, if argument NaN, return NaN
movdbl(dst, one);
jcc(Assembler::above, DONE_LABEL);
xorpd(dst, ExternalAddress(StubRoutines::x86::vector_double_sign_flip()), scratch);
}
bind(DONE_LABEL);
}
void C2_MacroAssembler::vextendbw(bool sign, XMMRegister dst, XMMRegister src) {
if (sign) {
pmovsxbw(dst, src);

View File

@ -89,6 +89,10 @@ public:
KRegister ktmp, XMMRegister atmp, XMMRegister btmp,
int vlen_enc);
void signum_fp(int opcode, XMMRegister dst,
XMMRegister zero, XMMRegister one,
Register scratch);
void vextendbw(bool sign, XMMRegister dst, XMMRegister src, int vector_len);
void vextendbw(bool sign, XMMRegister dst, XMMRegister src);
void vextendbd(bool sign, XMMRegister dst, XMMRegister src, int vector_len);

View File

@ -1702,6 +1702,9 @@ void VM_Version::get_processor_features() {
}
}
#endif // !PRODUCT
if (FLAG_IS_DEFAULT(UseSignumIntrinsic)) {
FLAG_SET_DEFAULT(UseSignumIntrinsic, true);
}
}
void VM_Version::print_platform_virtualization_info(outputStream* st) {

View File

@ -1599,6 +1599,16 @@ const bool Matcher::match_rule_supported(int opcode) {
}
break;
#endif // !LP64
case Op_SignumF:
if (UseSSE < 1) {
return false;
}
break;
case Op_SignumD:
if (UseSSE < 2) {
return false;
}
break;
}
return true; // Match rules are supported by default.
}
@ -5779,6 +5789,30 @@ instruct evminmaxFP_reg_eavx(vec dst, vec a, vec b, vec atmp, vec btmp, kReg ktm
ins_pipe( pipe_slow );
%}
// --------------------------------- Signum ---------------------------
instruct signumF_reg(regF dst, regF zero, regF one, rRegP scratch, rFlagsReg cr) %{
match(Set dst (SignumF dst (Binary zero one)));
effect(TEMP scratch, KILL cr);
format %{ "signumF $dst, $dst\t! using $scratch as TEMP" %}
ins_encode %{
int opcode = this->ideal_Opcode();
__ signum_fp(opcode, $dst$$XMMRegister, $zero$$XMMRegister, $one$$XMMRegister, $scratch$$Register);
%}
ins_pipe( pipe_slow );
%}
instruct signumD_reg(regD dst, regD zero, regD one, rRegP scratch, rFlagsReg cr) %{
match(Set dst (SignumD dst (Binary zero one)));
effect(TEMP scratch, KILL cr);
format %{ "signumD $dst, $dst\t! using $scratch as TEMP" %}
ins_encode %{
int opcode = this->ideal_Opcode();
__ signum_fp(opcode, $dst$$XMMRegister, $zero$$XMMRegister, $one$$XMMRegister, $scratch$$Register);
%}
ins_pipe( pipe_slow );
%}
// --------------------------------- Sqrt --------------------------------------
instruct vsqrtF_reg(vec dst, vec src) %{

View File

@ -1737,8 +1737,8 @@ bool LibraryCallKit::inline_math_native(vmIntrinsics::ID id) {
case vmIntrinsics::_dpow: return inline_math_pow();
case vmIntrinsics::_dcopySign: return inline_double_math(id);
case vmIntrinsics::_fcopySign: return inline_math(id);
case vmIntrinsics::_dsignum: return inline_double_math(id);
case vmIntrinsics::_fsignum: return inline_math(id);
case vmIntrinsics::_dsignum: return Matcher::match_rule_supported(Op_SignumD) ? inline_double_math(id) : false;
case vmIntrinsics::_fsignum: return Matcher::match_rule_supported(Op_SignumF) ? inline_math(id) : false;
// These intrinsics are not yet correctly implemented
case vmIntrinsics::_datan2:

View File

@ -1,5 +1,5 @@
/*
* Copyright (c) 2020, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2020, 2021, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2020, BELLSOFT. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
@ -25,7 +25,6 @@
/*
* @test
* @summary Test compiler intrinsics for signum
* @requires os.arch=="aarch64"
* @library /test/lib
*
* @run main/othervm
@ -100,7 +99,7 @@ public class TestSignumIntrinsic {
float arg = fcase[0];
float expected = fcase[1];
float calculated = Math.signum(arg);
Asserts.assertEQ(expected, calculated, "Unexpected float result");
Asserts.assertEQ(expected, calculated, "Unexpected float result from " + arg);
accum += calculated;
}
return accum;
@ -112,7 +111,7 @@ public class TestSignumIntrinsic {
double arg = dcase[0];
double expected = dcase[1];
double calculated = Math.signum(arg);
Asserts.assertEQ(expected, calculated, "Unexpected double result");
Asserts.assertEQ(expected, calculated, "Unexpected double result from " + arg);
accum += calculated;
}
return accum;

View File

@ -0,0 +1,142 @@
/*
* Copyright (c) Intel, 2021 All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License version 2 only, as
* published by the Free Software Foundation.
*
* This code is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
* version 2 for more details (a copy is included in the LICENSE file that
* accompanied this code).
*
* You should have received a copy of the GNU General Public License version
* 2 along with this work; if not, write to the Free Software Foundation,
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
*
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
* or visit www.oracle.com if you need additional information or have any
* questions.
*/
package org.openjdk.bench.vm.compiler;
import org.openjdk.jmh.annotations.Benchmark;
import org.openjdk.jmh.annotations.BenchmarkMode;
import org.openjdk.jmh.annotations.Fork;
import org.openjdk.jmh.annotations.Measurement;
import org.openjdk.jmh.annotations.Mode;
import org.openjdk.jmh.annotations.OperationsPerInvocation;
import org.openjdk.jmh.annotations.OutputTimeUnit;
import org.openjdk.jmh.annotations.Scope;
import org.openjdk.jmh.annotations.State;
import org.openjdk.jmh.annotations.Warmup;
import org.openjdk.jmh.infra.Blackhole;
import java.util.concurrent.TimeUnit;
@BenchmarkMode(Mode.AverageTime)
@OutputTimeUnit(TimeUnit.NANOSECONDS)
@State(Scope.Thread)
@Warmup(iterations = 10, time = 500, timeUnit = TimeUnit.MILLISECONDS)
@Measurement(iterations = 5, time = 1000, timeUnit = TimeUnit.MILLISECONDS)
@Fork(3)
public class Signum {
private final int ITERATIONS = 15000;
private double doubleValue = 1D;
private float floatValue = 1F;
private static final float[] float_values = {
123.4f,
-56.7f,
7e30f,
-0.3e30f,
Float.MAX_VALUE,
-Float.MAX_VALUE,
Float.MIN_VALUE,
-Float.MIN_VALUE,
0.0f,
-0.0f,
Float.POSITIVE_INFINITY,
Float.NEGATIVE_INFINITY,
Float.NaN,
Float.MIN_NORMAL,
-Float.MIN_NORMAL,
0x0.0002P-126f,
-0x0.0002P-126f
};
private static final double[] double_values = {
123.4d,
-56.7d,
7e30d,
-0.3e30d,
Double.MAX_VALUE,
-Double.MAX_VALUE,
Double.MIN_VALUE,
-Double.MIN_VALUE,
0.0d,
-0.0d,
Double.POSITIVE_INFINITY,
Double.NEGATIVE_INFINITY,
Double.NaN,
Double.MIN_NORMAL,
-Double.MIN_NORMAL,
0x0.00000001P-1022,
-0x0.00000001P-1022,
};
private static double Signum_Kernel(double data)
{
return Math.signum(data);
}
private static float Signum_Kernel(float data)
{
return Math.signum(data);
}
@Benchmark
@OperationsPerInvocation(ITERATIONS * 17)
public void _1_signumFloatTest(Blackhole bh) {
for (int i = 0; i < ITERATIONS; i++) {
for (float f : float_values) {
bh.consume(Signum_Kernel(f));
}
}
}
@Benchmark
@OperationsPerInvocation(ITERATIONS * 17)
public void _2_overheadFloat(Blackhole bh) {
for (int i = 0; i < ITERATIONS; i++) {
for (float f : float_values) {
bh.consume(f);
}
}
}
@Benchmark
@OperationsPerInvocation(ITERATIONS * 17)
public void _3_signumDoubleTest(Blackhole bh) {
for (int i = 0; i < ITERATIONS; i++) {
for (double d : double_values) {
bh.consume(Signum_Kernel(d));
}
}
}
@Benchmark
@OperationsPerInvocation(ITERATIONS * 17)
public void _4_overheadDouble(Blackhole bh) {
for (int i = 0; i < ITERATIONS; i++) {
for (double d : double_values) {
bh.consume(d);
}
}
}
}