From 153ad911f9fa3389ab92a1acab44526e3f4be4a2 Mon Sep 17 00:00:00 2001 From: Sandhya Viswanathan Date: Mon, 21 Oct 2024 14:58:43 +0000 Subject: [PATCH] 8338126: C2 SuperWord: VectorCastF2HF / vcvtps2ph produces wrong results for vector length 2 Reviewed-by: thartmann, jbhateja, epeter --- src/hotspot/cpu/x86/x86.ad | 1 + .../ir_framework/test/IREncodingPrinter.java | 3 +- .../TestFloatConversionsVector.java | 31 +++++++++++++++---- 3 files changed, 28 insertions(+), 7 deletions(-) diff --git a/src/hotspot/cpu/x86/x86.ad b/src/hotspot/cpu/x86/x86.ad index 7684febb8ae..43c959bb917 100644 --- a/src/hotspot/cpu/x86/x86.ad +++ b/src/hotspot/cpu/x86/x86.ad @@ -3686,6 +3686,7 @@ instruct vconvF2HF(vec dst, vec src) %{ %} instruct vconvF2HF_mem_reg(memory mem, vec src) %{ + predicate(n->as_StoreVector()->memory_size() >= 16); match(Set mem (StoreVector mem (VectorCastF2HF src))); format %{ "vcvtps2ph $mem,$src" %} ins_encode %{ diff --git a/test/hotspot/jtreg/compiler/lib/ir_framework/test/IREncodingPrinter.java b/test/hotspot/jtreg/compiler/lib/ir_framework/test/IREncodingPrinter.java index 73943db3f53..cd524622f4e 100644 --- a/test/hotspot/jtreg/compiler/lib/ir_framework/test/IREncodingPrinter.java +++ b/test/hotspot/jtreg/compiler/lib/ir_framework/test/IREncodingPrinter.java @@ -109,7 +109,8 @@ public class IREncodingPrinter { "sve", // Riscv64 "rvv", - "zvbb" + "zvbb", + "zvfh" )); public IREncodingPrinter() { diff --git a/test/hotspot/jtreg/compiler/vectorization/TestFloatConversionsVector.java b/test/hotspot/jtreg/compiler/vectorization/TestFloatConversionsVector.java index 96324c62c32..2fd5364a78b 100644 --- a/test/hotspot/jtreg/compiler/vectorization/TestFloatConversionsVector.java +++ b/test/hotspot/jtreg/compiler/vectorization/TestFloatConversionsVector.java @@ -26,9 +26,6 @@ * @bug 8294588 * @summary Auto-vectorize Float.floatToFloat16, Float.float16ToFloat APIs * @requires vm.compiler2.enabled - * @requires (os.simpleArch == "x64" & (vm.cpu.features ~= ".*avx512f.*" | vm.cpu.features ~= ".*f16c.*")) | - * os.arch == "aarch64" | - * (os.arch == "riscv64" & vm.cpu.features ~= ".*zvfh.*") * @library /test/lib / * @run driver compiler.vectorization.TestFloatConversionsVector */ @@ -53,7 +50,9 @@ public class TestFloatConversionsVector { } @Test - @IR(counts = {IRNode.VECTOR_CAST_F2HF, IRNode.VECTOR_SIZE + "min(max_float, max_short)", "> 0"}) + @IR(counts = {IRNode.VECTOR_CAST_F2HF, IRNode.VECTOR_SIZE + "min(max_float, max_short)", "> 0"}, + applyIfPlatformOr = {"x64", "true", "aarch64", "true", "riscv64", "true"}, + applyIfCPUFeatureOr = {"f16c", "true", "avx512f", "true", "zvfh", "true", "asimd", "true", "sve", "true"}) public void test_float_float16(short[] sout, float[] finp) { for (int i = 0; i < finp.length; i++) { sout[i] = Float.floatToFloat16(finp[i]); @@ -67,7 +66,16 @@ public class TestFloatConversionsVector { } } - @Run(test = {"test_float_float16", "test_float_float16_strided"}, mode = RunMode.STANDALONE) + @Test + public void test_float_float16_short_vector(short[] sout, float[] finp) { + for (int i = 0; i < finp.length; i+= 4) { + sout[i+0] = Float.floatToFloat16(finp[i+0]); + sout[i+1] = Float.floatToFloat16(finp[i+1]); + } + } + + @Run(test = {"test_float_float16", "test_float_float16_strided", + "test_float_float16_short_vector"}, mode = RunMode.STANDALONE) public void kernel_test_float_float16() { finp = new float[ARRLEN]; sout = new short[ARRLEN]; @@ -93,10 +101,21 @@ public class TestFloatConversionsVector { for (int i = 0; i < ARRLEN/2; i++) { Asserts.assertEquals(Float.floatToFloat16(finp[i*2]), sout[i*2]); } + + for (int i = 0; i < ITERS; i++) { + test_float_float16_short_vector(sout, finp); + } + + // Verifying the result + for (int i = 0; i < ARRLEN; i++) { + Asserts.assertEquals(Float.floatToFloat16(finp[i]), sout[i]); + } } @Test - @IR(counts = {IRNode.VECTOR_CAST_HF2F, IRNode.VECTOR_SIZE + "min(max_float, max_short)", "> 0"}) + @IR(counts = {IRNode.VECTOR_CAST_HF2F, IRNode.VECTOR_SIZE + "min(max_float, max_short)", "> 0"}, + applyIfPlatformOr = {"x64", "true", "aarch64", "true", "riscv64", "true"}, + applyIfCPUFeatureOr = {"f16c", "true", "avx512f", "true", "zvfh", "true", "asimd", "true", "sve", "true"}) public void test_float16_float(float[] fout, short[] sinp) { for (int i = 0; i < sinp.length; i++) { fout[i] = Float.float16ToFloat(sinp[i]);