8323116: [REDO] Computational test more than 2x slower when AVX instructions are used

Reviewed-by: sviswanathan, kvn
This commit is contained in:
vamsi-parasa 2024-04-08 18:41:32 +00:00 committed by Sandhya Viswanathan
parent 94677200fb
commit 7e5ef79f95
4 changed files with 223 additions and 6 deletions

View File

@ -2031,7 +2031,7 @@ void Assembler::cvtsd2ss(XMMRegister dst, XMMRegister src) {
NOT_LP64(assert(VM_Version::supports_sse2(), ""));
InstructionAttr attributes(AVX_128bit, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
attributes.set_rex_vex_w_reverted();
int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_F2, VEX_OPCODE_0F, &attributes);
int encode = simd_prefix_and_encode(dst, src, src, VEX_SIMD_F2, VEX_OPCODE_0F, &attributes);
emit_int16(0x5A, (0xC0 | encode));
}
@ -2090,7 +2090,7 @@ void Assembler::cvtsi2ssq(XMMRegister dst, Register src) {
void Assembler::cvtss2sd(XMMRegister dst, XMMRegister src) {
NOT_LP64(assert(VM_Version::supports_sse2(), ""));
InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_F3, VEX_OPCODE_0F, &attributes);
int encode = simd_prefix_and_encode(dst, src, src, VEX_SIMD_F3, VEX_OPCODE_0F, &attributes);
emit_int16(0x5A, (0xC0 | encode));
}

View File

@ -9959,6 +9959,7 @@ instruct convF2D_reg_reg(regD dst, regF src)
instruct convF2D_reg_mem(regD dst, memory src)
%{
predicate(UseAVX == 0);
match(Set dst (ConvF2D (LoadF src)));
format %{ "cvtss2sd $dst, $src" %}
@ -9981,6 +9982,7 @@ instruct convD2F_reg_reg(regF dst, regD src)
instruct convD2F_reg_mem(regF dst, memory src)
%{
predicate(UseAVX == 0);
match(Set dst (ConvD2F (LoadD src)));
format %{ "cvtsd2ss $dst, $src" %}
@ -10057,13 +10059,16 @@ instruct round_float_reg(rRegI dst, regF src, rRegL rtmp, rcx_RegL rcx, rFlagsRe
ins_pipe(pipe_slow);
%}
instruct convI2F_reg_reg(regF dst, rRegI src)
instruct convI2F_reg_reg(vlRegF dst, rRegI src)
%{
predicate(!UseXmmI2F);
match(Set dst (ConvI2F src));
format %{ "cvtsi2ssl $dst, $src\t# i2f" %}
ins_encode %{
if (UseAVX > 0) {
__ pxor($dst$$XMMRegister, $dst$$XMMRegister);
}
__ cvtsi2ssl ($dst$$XMMRegister, $src$$Register);
%}
ins_pipe(pipe_slow); // XXX
@ -10071,6 +10076,7 @@ instruct convI2F_reg_reg(regF dst, rRegI src)
instruct convI2F_reg_mem(regF dst, memory src)
%{
predicate(UseAVX == 0);
match(Set dst (ConvI2F (LoadI src)));
format %{ "cvtsi2ssl $dst, $src\t# i2f" %}
@ -10080,13 +10086,16 @@ instruct convI2F_reg_mem(regF dst, memory src)
ins_pipe(pipe_slow); // XXX
%}
instruct convI2D_reg_reg(regD dst, rRegI src)
instruct convI2D_reg_reg(vlRegD dst, rRegI src)
%{
predicate(!UseXmmI2D);
match(Set dst (ConvI2D src));
format %{ "cvtsi2sdl $dst, $src\t# i2d" %}
ins_encode %{
if (UseAVX > 0) {
__ pxor($dst$$XMMRegister, $dst$$XMMRegister);
}
__ cvtsi2sdl ($dst$$XMMRegister, $src$$Register);
%}
ins_pipe(pipe_slow); // XXX
@ -10094,6 +10103,7 @@ instruct convI2D_reg_reg(regD dst, rRegI src)
instruct convI2D_reg_mem(regD dst, memory src)
%{
predicate(UseAVX == 0);
match(Set dst (ConvI2D (LoadI src)));
format %{ "cvtsi2sdl $dst, $src\t# i2d" %}
@ -10131,12 +10141,15 @@ instruct convXI2D_reg(regD dst, rRegI src)
ins_pipe(pipe_slow); // XXX
%}
instruct convL2F_reg_reg(regF dst, rRegL src)
instruct convL2F_reg_reg(vlRegF dst, rRegL src)
%{
match(Set dst (ConvL2F src));
format %{ "cvtsi2ssq $dst, $src\t# l2f" %}
ins_encode %{
if (UseAVX > 0) {
__ pxor($dst$$XMMRegister, $dst$$XMMRegister);
}
__ cvtsi2ssq ($dst$$XMMRegister, $src$$Register);
%}
ins_pipe(pipe_slow); // XXX
@ -10144,6 +10157,7 @@ instruct convL2F_reg_reg(regF dst, rRegL src)
instruct convL2F_reg_mem(regF dst, memory src)
%{
predicate(UseAVX == 0);
match(Set dst (ConvL2F (LoadL src)));
format %{ "cvtsi2ssq $dst, $src\t# l2f" %}
@ -10153,12 +10167,15 @@ instruct convL2F_reg_mem(regF dst, memory src)
ins_pipe(pipe_slow); // XXX
%}
instruct convL2D_reg_reg(regD dst, rRegL src)
instruct convL2D_reg_reg(vlRegD dst, rRegL src)
%{
match(Set dst (ConvL2D src));
format %{ "cvtsi2sdq $dst, $src\t# l2d" %}
ins_encode %{
if (UseAVX > 0) {
__ pxor($dst$$XMMRegister, $dst$$XMMRegister);
}
__ cvtsi2sdq ($dst$$XMMRegister, $src$$Register);
%}
ins_pipe(pipe_slow); // XXX
@ -10166,6 +10183,7 @@ instruct convL2D_reg_reg(regD dst, rRegL src)
instruct convL2D_reg_mem(regD dst, memory src)
%{
predicate(UseAVX == 0);
match(Set dst (ConvL2D (LoadL src)));
format %{ "cvtsi2sdq $dst, $src\t# l2d" %}

View File

@ -0,0 +1,57 @@
/*
* Copyright (c) 2024, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License version 2 only, as
* published by the Free Software Foundation. Oracle designates this
* particular file as subject to the "Classpath" exception as provided
* by Oracle in the LICENSE file that accompanied this code.
*
* This code is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
* version 2 for more details (a copy is included in the LICENSE file that
* accompanied this code).
*
* You should have received a copy of the GNU General Public License version
* 2 along with this work; if not, write to the Free Software Foundation,
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
*
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
* or visit www.oracle.com if you need additional information or have any
* questions.
*/
/*
* @test
* @bug 8318562
* @run main/othervm/timeout=200 -XX:CompileCommand=compileonly,TestConvertImplicitNullCheck::test -XX:-TieredCompilation -Xbatch TestConvertImplicitNullCheck
* @summary Exercise float to double conversion with implicit null check
*
*/
public class TestConvertImplicitNullCheck {
float f = 42;
static double test(TestConvertImplicitNullCheck t) {
return t.f; // float to double conversion with implicit null check of 't'
}
public static void main(String[] args) {
// Warmup to trigger C2 compilation
TestConvertImplicitNullCheck t = new TestConvertImplicitNullCheck();
for (int i = 0; i < 50_000; ++i) {
test(t);
}
// implicit null check
try {
test(null);
throw new RuntimeException("Test failed as no NullPointerException is thrown");
} catch (NullPointerException e) {
// Expected
}
}
}

View File

@ -0,0 +1,142 @@
/*
* Copyright (c) 2024, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License version 2 only, as
* published by the Free Software Foundation.
*
* This code is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
* version 2 for more details (a copy is included in the LICENSE file that
* accompanied this code).
*
* You should have received a copy of the GNU General Public License version
* 2 along with this work; if not, write to the Free Software Foundation,
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
*
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
* or visit www.oracle.com if you need additional information or have any
* questions.
*/
package org.openjdk.bench.vm.compiler;
import org.openjdk.jmh.annotations.Benchmark;
import org.openjdk.jmh.annotations.BenchmarkMode;
import org.openjdk.jmh.annotations.Fork;
import org.openjdk.jmh.annotations.Measurement;
import org.openjdk.jmh.annotations.Mode;
import org.openjdk.jmh.annotations.OutputTimeUnit;
import org.openjdk.jmh.annotations.Scope;
import org.openjdk.jmh.annotations.Setup;
import org.openjdk.jmh.annotations.State;
import org.openjdk.jmh.annotations.Warmup;
import java.util.concurrent.TimeUnit;
@State(Scope.Thread)
@BenchmarkMode(Mode.AverageTime)
@OutputTimeUnit(TimeUnit.NANOSECONDS)
@Warmup(iterations = 5, time = 5, timeUnit = TimeUnit.SECONDS)
@Measurement(iterations = 5, time = 5, timeUnit = TimeUnit.SECONDS)
@Fork(value = 3)
public class ComputePI {
@Benchmark
public double compute_pi_int_dbl() {
double pi = 4.0;
boolean sign = false;
for (int i = 3; i < 1000; i += 2) {
if (sign) {
pi += 4.0 / i;
} else {
pi -= 4.0 / i;
}
sign = !sign;
}
return pi;
}
@Benchmark
public double compute_pi_int_flt() {
float pi = 4.0f;
boolean sign = false;
for (int i = 3; i < 1000; i += 2) {
if (sign) {
pi += 4.0f / i;
} else {
pi -= 4.0f / i;
}
sign = !sign;
}
return pi;
}
@Benchmark
public double compute_pi_long_dbl() {
double pi = 4.0;
boolean sign = false;
for (long i = 3; i < 1000; i += 2) {
if (sign) {
pi += 4.0 / i;
} else {
pi -= 4.0 / i;
}
sign = !sign;
}
return pi;
}
@Benchmark
public double compute_pi_long_flt() {
float pi = 4.0f;
boolean sign = false;
for (long i = 3; i < 1000; i += 2) {
if (sign) {
pi += 4.0f / i;
} else {
pi -= 4.0f / i;
}
sign = !sign;
}
return pi;
}
@Benchmark
public double compute_pi_flt_dbl() {
double pi = 4.0;
boolean sign = false;
for (float i = 3.0f; i < 1000.0f; i += 2.0f) {
if (sign) {
pi += 4.0 / i;
} else {
pi -= 4.0 / i;
}
sign = !sign;
}
return pi;
}
@Benchmark
public double compute_pi_dbl_flt() {
float pi = 4.0f;
boolean sign = false;
for (float i = 3.0f; i < 1000.0f; i += 2.0f) {
if (sign) {
pi += 4.0f / i;
} else {
pi -= 4.0f / i;
}
sign = !sign;
}
return pi;
}
}