8265491: Math Signum optimization for x86
Reviewed-by: jiefu, jbhateja, neliasso
This commit is contained in:
parent
55cc0af404
commit
ff65920cd1
@ -1045,6 +1045,35 @@ void C2_MacroAssembler::evminmax_fp(int opcode, BasicType elem_bt,
|
||||
}
|
||||
}
|
||||
|
||||
// Float/Double signum
|
||||
void C2_MacroAssembler::signum_fp(int opcode, XMMRegister dst,
|
||||
XMMRegister zero, XMMRegister one,
|
||||
Register scratch) {
|
||||
assert(opcode == Op_SignumF || opcode == Op_SignumD, "sanity");
|
||||
|
||||
Label DONE_LABEL;
|
||||
|
||||
if (opcode == Op_SignumF) {
|
||||
assert(UseSSE > 0, "required");
|
||||
ucomiss(dst, zero);
|
||||
jcc(Assembler::equal, DONE_LABEL); // handle special case +0.0/-0.0, if argument is +0.0/-0.0, return argument
|
||||
jcc(Assembler::parity, DONE_LABEL); // handle special case NaN, if argument NaN, return NaN
|
||||
movflt(dst, one);
|
||||
jcc(Assembler::above, DONE_LABEL);
|
||||
xorps(dst, ExternalAddress(StubRoutines::x86::vector_float_sign_flip()), scratch);
|
||||
} else if (opcode == Op_SignumD) {
|
||||
assert(UseSSE > 1, "required");
|
||||
ucomisd(dst, zero);
|
||||
jcc(Assembler::equal, DONE_LABEL); // handle special case +0.0/-0.0, if argument is +0.0/-0.0, return argument
|
||||
jcc(Assembler::parity, DONE_LABEL); // handle special case NaN, if argument NaN, return NaN
|
||||
movdbl(dst, one);
|
||||
jcc(Assembler::above, DONE_LABEL);
|
||||
xorpd(dst, ExternalAddress(StubRoutines::x86::vector_double_sign_flip()), scratch);
|
||||
}
|
||||
|
||||
bind(DONE_LABEL);
|
||||
}
|
||||
|
||||
void C2_MacroAssembler::vextendbw(bool sign, XMMRegister dst, XMMRegister src) {
|
||||
if (sign) {
|
||||
pmovsxbw(dst, src);
|
||||
|
@ -89,6 +89,10 @@ public:
|
||||
KRegister ktmp, XMMRegister atmp, XMMRegister btmp,
|
||||
int vlen_enc);
|
||||
|
||||
void signum_fp(int opcode, XMMRegister dst,
|
||||
XMMRegister zero, XMMRegister one,
|
||||
Register scratch);
|
||||
|
||||
void vextendbw(bool sign, XMMRegister dst, XMMRegister src, int vector_len);
|
||||
void vextendbw(bool sign, XMMRegister dst, XMMRegister src);
|
||||
void vextendbd(bool sign, XMMRegister dst, XMMRegister src, int vector_len);
|
||||
|
@ -1702,6 +1702,9 @@ void VM_Version::get_processor_features() {
|
||||
}
|
||||
}
|
||||
#endif // !PRODUCT
|
||||
if (FLAG_IS_DEFAULT(UseSignumIntrinsic)) {
|
||||
FLAG_SET_DEFAULT(UseSignumIntrinsic, true);
|
||||
}
|
||||
}
|
||||
|
||||
void VM_Version::print_platform_virtualization_info(outputStream* st) {
|
||||
|
@ -1599,6 +1599,16 @@ const bool Matcher::match_rule_supported(int opcode) {
|
||||
}
|
||||
break;
|
||||
#endif // !LP64
|
||||
case Op_SignumF:
|
||||
if (UseSSE < 1) {
|
||||
return false;
|
||||
}
|
||||
break;
|
||||
case Op_SignumD:
|
||||
if (UseSSE < 2) {
|
||||
return false;
|
||||
}
|
||||
break;
|
||||
}
|
||||
return true; // Match rules are supported by default.
|
||||
}
|
||||
@ -5779,6 +5789,30 @@ instruct evminmaxFP_reg_eavx(vec dst, vec a, vec b, vec atmp, vec btmp, kReg ktm
|
||||
ins_pipe( pipe_slow );
|
||||
%}
|
||||
|
||||
// --------------------------------- Signum ---------------------------
|
||||
|
||||
instruct signumF_reg(regF dst, regF zero, regF one, rRegP scratch, rFlagsReg cr) %{
|
||||
match(Set dst (SignumF dst (Binary zero one)));
|
||||
effect(TEMP scratch, KILL cr);
|
||||
format %{ "signumF $dst, $dst\t! using $scratch as TEMP" %}
|
||||
ins_encode %{
|
||||
int opcode = this->ideal_Opcode();
|
||||
__ signum_fp(opcode, $dst$$XMMRegister, $zero$$XMMRegister, $one$$XMMRegister, $scratch$$Register);
|
||||
%}
|
||||
ins_pipe( pipe_slow );
|
||||
%}
|
||||
|
||||
instruct signumD_reg(regD dst, regD zero, regD one, rRegP scratch, rFlagsReg cr) %{
|
||||
match(Set dst (SignumD dst (Binary zero one)));
|
||||
effect(TEMP scratch, KILL cr);
|
||||
format %{ "signumD $dst, $dst\t! using $scratch as TEMP" %}
|
||||
ins_encode %{
|
||||
int opcode = this->ideal_Opcode();
|
||||
__ signum_fp(opcode, $dst$$XMMRegister, $zero$$XMMRegister, $one$$XMMRegister, $scratch$$Register);
|
||||
%}
|
||||
ins_pipe( pipe_slow );
|
||||
%}
|
||||
|
||||
// --------------------------------- Sqrt --------------------------------------
|
||||
|
||||
instruct vsqrtF_reg(vec dst, vec src) %{
|
||||
|
@ -1737,8 +1737,8 @@ bool LibraryCallKit::inline_math_native(vmIntrinsics::ID id) {
|
||||
case vmIntrinsics::_dpow: return inline_math_pow();
|
||||
case vmIntrinsics::_dcopySign: return inline_double_math(id);
|
||||
case vmIntrinsics::_fcopySign: return inline_math(id);
|
||||
case vmIntrinsics::_dsignum: return inline_double_math(id);
|
||||
case vmIntrinsics::_fsignum: return inline_math(id);
|
||||
case vmIntrinsics::_dsignum: return Matcher::match_rule_supported(Op_SignumD) ? inline_double_math(id) : false;
|
||||
case vmIntrinsics::_fsignum: return Matcher::match_rule_supported(Op_SignumF) ? inline_math(id) : false;
|
||||
|
||||
// These intrinsics are not yet correctly implemented
|
||||
case vmIntrinsics::_datan2:
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) 2020, Oracle and/or its affiliates. All rights reserved.
|
||||
* Copyright (c) 2020, 2021, Oracle and/or its affiliates. All rights reserved.
|
||||
* Copyright (c) 2020, BELLSOFT. All rights reserved.
|
||||
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
||||
*
|
||||
@ -25,7 +25,6 @@
|
||||
/*
|
||||
* @test
|
||||
* @summary Test compiler intrinsics for signum
|
||||
* @requires os.arch=="aarch64"
|
||||
* @library /test/lib
|
||||
*
|
||||
* @run main/othervm
|
||||
@ -100,7 +99,7 @@ public class TestSignumIntrinsic {
|
||||
float arg = fcase[0];
|
||||
float expected = fcase[1];
|
||||
float calculated = Math.signum(arg);
|
||||
Asserts.assertEQ(expected, calculated, "Unexpected float result");
|
||||
Asserts.assertEQ(expected, calculated, "Unexpected float result from " + arg);
|
||||
accum += calculated;
|
||||
}
|
||||
return accum;
|
||||
@ -112,7 +111,7 @@ public class TestSignumIntrinsic {
|
||||
double arg = dcase[0];
|
||||
double expected = dcase[1];
|
||||
double calculated = Math.signum(arg);
|
||||
Asserts.assertEQ(expected, calculated, "Unexpected double result");
|
||||
Asserts.assertEQ(expected, calculated, "Unexpected double result from " + arg);
|
||||
accum += calculated;
|
||||
}
|
||||
return accum;
|
||||
|
142
test/micro/org/openjdk/bench/vm/compiler/Signum.java
Normal file
142
test/micro/org/openjdk/bench/vm/compiler/Signum.java
Normal file
@ -0,0 +1,142 @@
|
||||
/*
|
||||
* Copyright (c) Intel, 2021 All rights reserved.
|
||||
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
||||
*
|
||||
* This code is free software; you can redistribute it and/or modify it
|
||||
* under the terms of the GNU General Public License version 2 only, as
|
||||
* published by the Free Software Foundation.
|
||||
*
|
||||
* This code is distributed in the hope that it will be useful, but WITHOUT
|
||||
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
||||
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
|
||||
* version 2 for more details (a copy is included in the LICENSE file that
|
||||
* accompanied this code).
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License version
|
||||
* 2 along with this work; if not, write to the Free Software Foundation,
|
||||
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
*
|
||||
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
|
||||
* or visit www.oracle.com if you need additional information or have any
|
||||
* questions.
|
||||
*/
|
||||
|
||||
package org.openjdk.bench.vm.compiler;
|
||||
|
||||
import org.openjdk.jmh.annotations.Benchmark;
|
||||
import org.openjdk.jmh.annotations.BenchmarkMode;
|
||||
import org.openjdk.jmh.annotations.Fork;
|
||||
import org.openjdk.jmh.annotations.Measurement;
|
||||
import org.openjdk.jmh.annotations.Mode;
|
||||
import org.openjdk.jmh.annotations.OperationsPerInvocation;
|
||||
import org.openjdk.jmh.annotations.OutputTimeUnit;
|
||||
import org.openjdk.jmh.annotations.Scope;
|
||||
import org.openjdk.jmh.annotations.State;
|
||||
import org.openjdk.jmh.annotations.Warmup;
|
||||
import org.openjdk.jmh.infra.Blackhole;
|
||||
|
||||
import java.util.concurrent.TimeUnit;
|
||||
|
||||
@BenchmarkMode(Mode.AverageTime)
|
||||
@OutputTimeUnit(TimeUnit.NANOSECONDS)
|
||||
@State(Scope.Thread)
|
||||
@Warmup(iterations = 10, time = 500, timeUnit = TimeUnit.MILLISECONDS)
|
||||
@Measurement(iterations = 5, time = 1000, timeUnit = TimeUnit.MILLISECONDS)
|
||||
@Fork(3)
|
||||
public class Signum {
|
||||
|
||||
private final int ITERATIONS = 15000;
|
||||
|
||||
private double doubleValue = 1D;
|
||||
private float floatValue = 1F;
|
||||
|
||||
private static final float[] float_values = {
|
||||
123.4f,
|
||||
-56.7f,
|
||||
7e30f,
|
||||
-0.3e30f,
|
||||
Float.MAX_VALUE,
|
||||
-Float.MAX_VALUE,
|
||||
Float.MIN_VALUE,
|
||||
-Float.MIN_VALUE,
|
||||
0.0f,
|
||||
-0.0f,
|
||||
Float.POSITIVE_INFINITY,
|
||||
Float.NEGATIVE_INFINITY,
|
||||
Float.NaN,
|
||||
Float.MIN_NORMAL,
|
||||
-Float.MIN_NORMAL,
|
||||
0x0.0002P-126f,
|
||||
-0x0.0002P-126f
|
||||
};
|
||||
|
||||
private static final double[] double_values = {
|
||||
123.4d,
|
||||
-56.7d,
|
||||
7e30d,
|
||||
-0.3e30d,
|
||||
Double.MAX_VALUE,
|
||||
-Double.MAX_VALUE,
|
||||
Double.MIN_VALUE,
|
||||
-Double.MIN_VALUE,
|
||||
0.0d,
|
||||
-0.0d,
|
||||
Double.POSITIVE_INFINITY,
|
||||
Double.NEGATIVE_INFINITY,
|
||||
Double.NaN,
|
||||
Double.MIN_NORMAL,
|
||||
-Double.MIN_NORMAL,
|
||||
0x0.00000001P-1022,
|
||||
-0x0.00000001P-1022,
|
||||
};
|
||||
|
||||
private static double Signum_Kernel(double data)
|
||||
{
|
||||
return Math.signum(data);
|
||||
}
|
||||
|
||||
private static float Signum_Kernel(float data)
|
||||
{
|
||||
return Math.signum(data);
|
||||
}
|
||||
|
||||
@Benchmark
|
||||
@OperationsPerInvocation(ITERATIONS * 17)
|
||||
public void _1_signumFloatTest(Blackhole bh) {
|
||||
for (int i = 0; i < ITERATIONS; i++) {
|
||||
for (float f : float_values) {
|
||||
bh.consume(Signum_Kernel(f));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@Benchmark
|
||||
@OperationsPerInvocation(ITERATIONS * 17)
|
||||
public void _2_overheadFloat(Blackhole bh) {
|
||||
for (int i = 0; i < ITERATIONS; i++) {
|
||||
for (float f : float_values) {
|
||||
bh.consume(f);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@Benchmark
|
||||
@OperationsPerInvocation(ITERATIONS * 17)
|
||||
public void _3_signumDoubleTest(Blackhole bh) {
|
||||
for (int i = 0; i < ITERATIONS; i++) {
|
||||
for (double d : double_values) {
|
||||
bh.consume(Signum_Kernel(d));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@Benchmark
|
||||
@OperationsPerInvocation(ITERATIONS * 17)
|
||||
public void _4_overheadDouble(Blackhole bh) {
|
||||
for (int i = 0; i < ITERATIONS; i++) {
|
||||
for (double d : double_values) {
|
||||
bh.consume(d);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
Loading…
Reference in New Issue
Block a user