8271883: Math CopySign optimization for x86
Reviewed-by: jbhateja, sviswanathan, kvn
This commit is contained in:
parent
6b8b160e37
commit
87d2761f1b
@ -1736,6 +1736,9 @@ void VM_Version::get_processor_features() {
|
||||
if (FLAG_IS_DEFAULT(UseSignumIntrinsic)) {
|
||||
FLAG_SET_DEFAULT(UseSignumIntrinsic, true);
|
||||
}
|
||||
if (FLAG_IS_DEFAULT(UseCopySignIntrinsic)) {
|
||||
FLAG_SET_DEFAULT(UseCopySignIntrinsic, true);
|
||||
}
|
||||
}
|
||||
|
||||
void VM_Version::print_platform_virtualization_info(outputStream* st) {
|
||||
|
@ -1560,6 +1560,15 @@ const bool Matcher::match_rule_supported(int opcode) {
|
||||
return false;
|
||||
}
|
||||
break;
|
||||
case Op_CopySignD:
|
||||
case Op_CopySignF:
|
||||
if (UseAVX < 3 || !is_LP64) {
|
||||
return false;
|
||||
}
|
||||
if (!VM_Version::supports_avx512vl()) {
|
||||
return false;
|
||||
}
|
||||
break;
|
||||
#ifndef _LP64
|
||||
case Op_AddReductionVF:
|
||||
case Op_AddReductionVD:
|
||||
@ -5776,7 +5785,7 @@ instruct evminmaxFP_reg_eavx(vec dst, vec a, vec b, vec atmp, vec btmp, kReg ktm
|
||||
ins_pipe( pipe_slow );
|
||||
%}
|
||||
|
||||
// --------------------------------- Signum ---------------------------
|
||||
// --------------------------------- Signum/CopySign ---------------------------
|
||||
|
||||
instruct signumF_reg(regF dst, regF zero, regF one, rRegP scratch, rFlagsReg cr) %{
|
||||
match(Set dst (SignumF dst (Binary zero one)));
|
||||
@ -5800,6 +5809,53 @@ instruct signumD_reg(regD dst, regD zero, regD one, rRegP scratch, rFlagsReg cr)
|
||||
ins_pipe( pipe_slow );
|
||||
%}
|
||||
|
||||
// ---------------------------------------
|
||||
// For copySign use 0xE4 as writemask for vpternlog
|
||||
// Desired Truth Table: A -> xmm0 bit, B -> xmm1 bit, C -> xmm2 bit
|
||||
// C (xmm2) is set to 0x7FFFFFFF
|
||||
// Wherever xmm2 is 0, we want to pick from B (sign)
|
||||
// Wherever xmm2 is 1, we want to pick from A (src)
|
||||
//
|
||||
// A B C Result
|
||||
// 0 0 0 0
|
||||
// 0 0 1 0
|
||||
// 0 1 0 1
|
||||
// 0 1 1 0
|
||||
// 1 0 0 0
|
||||
// 1 0 1 1
|
||||
// 1 1 0 1
|
||||
// 1 1 1 1
|
||||
//
|
||||
// Result going from high bit to low bit is 0x11100100 = 0xe4
|
||||
// ---------------------------------------
|
||||
|
||||
#ifdef _LP64
|
||||
instruct copySignF_reg(regF dst, regF src, regF tmp1, rRegI tmp2) %{
|
||||
match(Set dst (CopySignF dst src));
|
||||
effect(TEMP tmp1, TEMP tmp2);
|
||||
format %{ "CopySignF $dst, $src\t! using $tmp1 and $tmp2 as TEMP" %}
|
||||
ins_encode %{
|
||||
__ movl($tmp2$$Register, 0x7FFFFFFF);
|
||||
__ movdl($tmp1$$XMMRegister, $tmp2$$Register);
|
||||
__ vpternlogd($dst$$XMMRegister, 0xE4, $src$$XMMRegister, $tmp1$$XMMRegister, Assembler::AVX_128bit);
|
||||
%}
|
||||
ins_pipe( pipe_slow );
|
||||
%}
|
||||
|
||||
instruct copySignD_imm(regD dst, regD src, regD tmp1, rRegL tmp2, immD zero) %{
|
||||
match(Set dst (CopySignD dst (Binary src zero)));
|
||||
ins_cost(100);
|
||||
effect(TEMP tmp1, TEMP tmp2);
|
||||
format %{ "CopySignD $dst, $src\t! using $tmp1 and $tmp2 as TEMP" %}
|
||||
ins_encode %{
|
||||
__ mov64($tmp2$$Register, 0x7FFFFFFFFFFFFFFF);
|
||||
__ movq($tmp1$$XMMRegister, $tmp2$$Register);
|
||||
__ vpternlogq($dst$$XMMRegister, 0xE4, $src$$XMMRegister, $tmp1$$XMMRegister, Assembler::AVX_128bit);
|
||||
%}
|
||||
ins_pipe( pipe_slow );
|
||||
%}
|
||||
#endif // _LP64
|
||||
|
||||
// --------------------------------- Sqrt --------------------------------------
|
||||
|
||||
instruct vsqrtF_reg(vec dst, vec src) %{
|
||||
|
@ -100,6 +100,16 @@ public class Signum {
|
||||
return Math.signum(data);
|
||||
}
|
||||
|
||||
private static double Copysign_Kernel(double data, double sign)
|
||||
{
|
||||
return Math.copySign(data, sign);
|
||||
}
|
||||
|
||||
private static float Copysign_Kernel(float data, float sign)
|
||||
{
|
||||
return Math.copySign(data, sign);
|
||||
}
|
||||
|
||||
@Benchmark
|
||||
@OperationsPerInvocation(ITERATIONS * 17)
|
||||
public void _1_signumFloatTest(Blackhole bh) {
|
||||
@ -139,4 +149,45 @@ public class Signum {
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@Benchmark
|
||||
@OperationsPerInvocation(ITERATIONS * 17)
|
||||
public void _5_copySignFloatTest(Blackhole bh) {
|
||||
for (int i = 0; i < ITERATIONS; i++) {
|
||||
for (float f : float_values) {
|
||||
bh.consume(Copysign_Kernel(floatValue, f));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@Benchmark
|
||||
@OperationsPerInvocation(ITERATIONS * 17)
|
||||
public void _6_overheadCopySignFloat(Blackhole bh) {
|
||||
for (int i = 0; i < ITERATIONS; i++) {
|
||||
for (float f : float_values) {
|
||||
bh.consume(f);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@Benchmark
|
||||
@OperationsPerInvocation(ITERATIONS * 17)
|
||||
public void _7_copySignDoubleTest(Blackhole bh) {
|
||||
for (int i = 0; i < ITERATIONS; i++) {
|
||||
for (double d : double_values) {
|
||||
bh.consume(Copysign_Kernel(doubleValue, d));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@Benchmark
|
||||
@OperationsPerInvocation(ITERATIONS * 17)
|
||||
public void _8_overheadCopySignDouble(Blackhole bh) {
|
||||
for (int i = 0; i < ITERATIONS; i++) {
|
||||
for (double d : double_values) {
|
||||
bh.consume(d);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user