8299038: Add AArch64 backend support for auto-vectorized FP16 conversions
Reviewed-by: xgong, ngasson
This commit is contained in:
parent
cac72a6018
commit
98d75f1879
@ -1,6 +1,6 @@
|
||||
//
|
||||
// Copyright (c) 2020, 2022, Oracle and/or its affiliates. All rights reserved.
|
||||
// Copyright (c) 2020, 2022, Arm Limited. All rights reserved.
|
||||
// Copyright (c) 2020, 2023, Oracle and/or its affiliates. All rights reserved.
|
||||
// Copyright (c) 2020, 2023, Arm Limited. All rights reserved.
|
||||
// DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
||||
//
|
||||
// This code is free software; you can redistribute it and/or modify it
|
||||
@ -4159,6 +4159,52 @@ instruct vcvtDtoF_gt64b(vReg dst, vReg src, vReg tmp) %{
|
||||
ins_pipe(pipe_slow);
|
||||
%}
|
||||
|
||||
// VectorCastHF2F
|
||||
|
||||
instruct vcvtHFtoF(vReg dst, vReg src) %{
|
||||
match(Set dst (VectorCastHF2F src));
|
||||
format %{ "vcvtHFtoF $dst, $src" %}
|
||||
ins_encode %{
|
||||
uint length_in_bytes = Matcher::vector_length_in_bytes(this);
|
||||
if (VM_Version::use_neon_for_vector(length_in_bytes)) {
|
||||
// 4HF to 4F
|
||||
__ fcvtl($dst$$FloatRegister, __ T4S, $src$$FloatRegister, __ T4H);
|
||||
} else {
|
||||
assert(UseSVE > 0, "must be sve");
|
||||
__ sve_vector_extend($dst$$FloatRegister, __ S, $src$$FloatRegister, __ H);
|
||||
__ sve_fcvt($dst$$FloatRegister, __ S, ptrue, $dst$$FloatRegister, __ H);
|
||||
}
|
||||
%}
|
||||
ins_pipe(pipe_slow);
|
||||
%}
|
||||
|
||||
// VectorCastF2HF
|
||||
|
||||
instruct vcvtFtoHF_neon(vReg dst, vReg src) %{
|
||||
predicate(VM_Version::use_neon_for_vector(Matcher::vector_length_in_bytes(n->in(1))));
|
||||
match(Set dst (VectorCastF2HF src));
|
||||
format %{ "vcvtFtoHF_neon $dst, $src\t# 4F to 4HF" %}
|
||||
ins_encode %{
|
||||
// 4F to 4HF
|
||||
__ fcvtn($dst$$FloatRegister, __ T4H, $src$$FloatRegister, __ T4S);
|
||||
%}
|
||||
ins_pipe(pipe_slow);
|
||||
%}
|
||||
|
||||
instruct vcvtFtoHF_sve(vReg dst, vReg src, vReg tmp) %{
|
||||
predicate(!VM_Version::use_neon_for_vector(Matcher::vector_length_in_bytes(n->in(1))));
|
||||
match(Set dst (VectorCastF2HF src));
|
||||
effect(TEMP_DEF dst, TEMP tmp);
|
||||
format %{ "vcvtFtoHF_sve $dst, $src\t# KILL $tmp" %}
|
||||
ins_encode %{
|
||||
assert(UseSVE > 0, "must be sve");
|
||||
__ sve_fcvt($dst$$FloatRegister, __ H, ptrue, $src$$FloatRegister, __ S);
|
||||
__ sve_vector_narrow($dst$$FloatRegister, __ H,
|
||||
$dst$$FloatRegister, __ S, $tmp$$FloatRegister);
|
||||
%}
|
||||
ins_pipe(pipe_slow);
|
||||
%}
|
||||
|
||||
// ------------------------------ Replicate ------------------------------------
|
||||
|
||||
// replicate from reg
|
||||
|
@ -1,6 +1,6 @@
|
||||
//
|
||||
// Copyright (c) 2020, 2022, Oracle and/or its affiliates. All rights reserved.
|
||||
// Copyright (c) 2020, 2022, Arm Limited. All rights reserved.
|
||||
// Copyright (c) 2020, 2023, Oracle and/or its affiliates. All rights reserved.
|
||||
// Copyright (c) 2020, 2023, Arm Limited. All rights reserved.
|
||||
// DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
||||
//
|
||||
// This code is free software; you can redistribute it and/or modify it
|
||||
@ -2731,6 +2731,52 @@ instruct vcvtDtoF_gt64b(vReg dst, vReg src, vReg tmp) %{
|
||||
ins_pipe(pipe_slow);
|
||||
%}
|
||||
|
||||
// VectorCastHF2F
|
||||
|
||||
instruct vcvtHFtoF(vReg dst, vReg src) %{
|
||||
match(Set dst (VectorCastHF2F src));
|
||||
format %{ "vcvtHFtoF $dst, $src" %}
|
||||
ins_encode %{
|
||||
uint length_in_bytes = Matcher::vector_length_in_bytes(this);
|
||||
if (VM_Version::use_neon_for_vector(length_in_bytes)) {
|
||||
// 4HF to 4F
|
||||
__ fcvtl($dst$$FloatRegister, __ T4S, $src$$FloatRegister, __ T4H);
|
||||
} else {
|
||||
assert(UseSVE > 0, "must be sve");
|
||||
__ sve_vector_extend($dst$$FloatRegister, __ S, $src$$FloatRegister, __ H);
|
||||
__ sve_fcvt($dst$$FloatRegister, __ S, ptrue, $dst$$FloatRegister, __ H);
|
||||
}
|
||||
%}
|
||||
ins_pipe(pipe_slow);
|
||||
%}
|
||||
|
||||
// VectorCastF2HF
|
||||
|
||||
instruct vcvtFtoHF_neon(vReg dst, vReg src) %{
|
||||
predicate(VM_Version::use_neon_for_vector(Matcher::vector_length_in_bytes(n->in(1))));
|
||||
match(Set dst (VectorCastF2HF src));
|
||||
format %{ "vcvtFtoHF_neon $dst, $src\t# 4F to 4HF" %}
|
||||
ins_encode %{
|
||||
// 4F to 4HF
|
||||
__ fcvtn($dst$$FloatRegister, __ T4H, $src$$FloatRegister, __ T4S);
|
||||
%}
|
||||
ins_pipe(pipe_slow);
|
||||
%}
|
||||
|
||||
instruct vcvtFtoHF_sve(vReg dst, vReg src, vReg tmp) %{
|
||||
predicate(!VM_Version::use_neon_for_vector(Matcher::vector_length_in_bytes(n->in(1))));
|
||||
match(Set dst (VectorCastF2HF src));
|
||||
effect(TEMP_DEF dst, TEMP tmp);
|
||||
format %{ "vcvtFtoHF_sve $dst, $src\t# KILL $tmp" %}
|
||||
ins_encode %{
|
||||
assert(UseSVE > 0, "must be sve");
|
||||
__ sve_fcvt($dst$$FloatRegister, __ H, ptrue, $src$$FloatRegister, __ S);
|
||||
__ sve_vector_narrow($dst$$FloatRegister, __ H,
|
||||
$dst$$FloatRegister, __ S, $tmp$$FloatRegister);
|
||||
%}
|
||||
ins_pipe(pipe_slow);
|
||||
%}
|
||||
|
||||
// ------------------------------ Replicate ------------------------------------
|
||||
|
||||
dnl REPLICATE_INT($1, $2, $3 )
|
||||
|
@ -3943,9 +3943,29 @@ void sve_fcm(Condition cond, PRegister Pd, SIMD_RegVariant T,
|
||||
starti;
|
||||
assert(T_src != B && T_dst != B && T_src != Q && T_dst != Q &&
|
||||
T_src != T_dst, "invalid register variant");
|
||||
guarantee(T_src != H && T_dst != H, "half-precision unsupported");
|
||||
f(0b01100101, 31, 24), f(0b11, 23, 22), f(0b0010, 21, 18);
|
||||
f(T_dst, 17, 16), f(0b101, 15, 13);
|
||||
// The encodings of fields op1 (bits 17-16) and op2 (bits 23-22)
|
||||
// depend on T_src and T_dst as given below -
|
||||
// +-----+------+---------------------------------------------+
|
||||
// | op2 | op1 | Instruction Details |
|
||||
// +-----+------+---------------------------------------------+
|
||||
// | 10 | 01 | FCVT - half-precision to single-precision |
|
||||
// | 11 | 01 | FCVT - half-precision to double-precision |
|
||||
// | 10 | 00 | FCVT - single-precision to half-precision |
|
||||
// | 11 | 11 | FCVT - single-precision to double-precision |
|
||||
// | 11 | 00 | FCVT - double-preciison to half-precision |
|
||||
// | 11 | 10 | FCVT - double-precision to single-precision |
|
||||
// +-----+------+---+-----------------------------------------+
|
||||
int op1 = 0b00;
|
||||
int op2 = (T_src == D || T_dst == D) ? 0b11 : 0b10;
|
||||
if (T_src == H) {
|
||||
op1 = 0b01;
|
||||
} else if (T_dst == S) {
|
||||
op1 = 0b10;
|
||||
} else if (T_dst == D) {
|
||||
op1 = 0b11;
|
||||
}
|
||||
f(0b01100101, 31, 24), f(op2, 23, 22), f(0b0010, 21, 18);
|
||||
f(op1, 17, 16), f(0b101, 15, 13);
|
||||
pgrf(Pg, 10), rf(Zn, 5), rf(Zd, 0);
|
||||
}
|
||||
|
||||
|
@ -1772,6 +1772,10 @@ generate(SpecialCases, [["ccmn", "__ ccmn(zr, zr, 3u, Assembler::LE);",
|
||||
["scvtf", "__ sve_scvtf(z6, __ H, p3, z1, __ H);", "scvtf\tz6.h, p3/m, z1.h"],
|
||||
["fcvt", "__ sve_fcvt(z5, __ D, p3, z4, __ S);", "fcvt\tz5.d, p3/m, z4.s"],
|
||||
["fcvt", "__ sve_fcvt(z1, __ S, p3, z0, __ D);", "fcvt\tz1.s, p3/m, z0.d"],
|
||||
["fcvt", "__ sve_fcvt(z5, __ S, p3, z4, __ H);", "fcvt\tz5.s, p3/m, z4.h"],
|
||||
["fcvt", "__ sve_fcvt(z1, __ H, p3, z0, __ S);", "fcvt\tz1.h, p3/m, z0.s"],
|
||||
["fcvt", "__ sve_fcvt(z5, __ D, p3, z4, __ H);", "fcvt\tz5.d, p3/m, z4.h"],
|
||||
["fcvt", "__ sve_fcvt(z1, __ H, p3, z0, __ D);", "fcvt\tz1.h, p3/m, z0.d"],
|
||||
["fcvtzs", "__ sve_fcvtzs(z19, __ D, p2, z1, __ D);", "fcvtzs\tz19.d, p2/m, z1.d"],
|
||||
["fcvtzs", "__ sve_fcvtzs(z9, __ S, p1, z8, __ S);", "fcvtzs\tz9.s, p1/m, z8.s"],
|
||||
["fcvtzs", "__ sve_fcvtzs(z1, __ S, p2, z0, __ D);", "fcvtzs\tz1.s, p2/m, z0.d"],
|
||||
|
@ -915,6 +915,10 @@
|
||||
__ sve_scvtf(z6, __ H, p3, z1, __ H); // scvtf z6.h, p3/m, z1.h
|
||||
__ sve_fcvt(z5, __ D, p3, z4, __ S); // fcvt z5.d, p3/m, z4.s
|
||||
__ sve_fcvt(z1, __ S, p3, z0, __ D); // fcvt z1.s, p3/m, z0.d
|
||||
__ sve_fcvt(z5, __ S, p3, z4, __ H); // fcvt z5.s, p3/m, z4.h
|
||||
__ sve_fcvt(z1, __ H, p3, z0, __ S); // fcvt z1.h, p3/m, z0.s
|
||||
__ sve_fcvt(z5, __ D, p3, z4, __ H); // fcvt z5.d, p3/m, z4.h
|
||||
__ sve_fcvt(z1, __ H, p3, z0, __ D); // fcvt z1.h, p3/m, z0.d
|
||||
__ sve_fcvtzs(z19, __ D, p2, z1, __ D); // fcvtzs z19.d, p2/m, z1.d
|
||||
__ sve_fcvtzs(z9, __ S, p1, z8, __ S); // fcvtzs z9.s, p1/m, z8.s
|
||||
__ sve_fcvtzs(z1, __ S, p2, z0, __ D); // fcvtzs z1.s, p2/m, z0.d
|
||||
@ -1245,30 +1249,30 @@
|
||||
0x9101a1a0, 0xb10a5cc8, 0xd10810aa, 0xf10fd061,
|
||||
0x120cb166, 0x321764bc, 0x52174681, 0x720c0227,
|
||||
0x9241018e, 0xb25a2969, 0xd278b411, 0xf26aad01,
|
||||
0x14000000, 0x17ffffd7, 0x14000405, 0x94000000,
|
||||
0x97ffffd4, 0x94000402, 0x3400000a, 0x34fffa2a,
|
||||
0x34007fea, 0x35000008, 0x35fff9c8, 0x35007f88,
|
||||
0xb400000b, 0xb4fff96b, 0xb4007f2b, 0xb500001d,
|
||||
0xb5fff91d, 0xb5007edd, 0x10000013, 0x10fff8b3,
|
||||
0x10007e73, 0x90000013, 0x36300016, 0x3637f836,
|
||||
0x36307df6, 0x3758000c, 0x375ff7cc, 0x37587d8c,
|
||||
0x14000000, 0x17ffffd7, 0x14000409, 0x94000000,
|
||||
0x97ffffd4, 0x94000406, 0x3400000a, 0x34fffa2a,
|
||||
0x3400806a, 0x35000008, 0x35fff9c8, 0x35008008,
|
||||
0xb400000b, 0xb4fff96b, 0xb4007fab, 0xb500001d,
|
||||
0xb5fff91d, 0xb5007f5d, 0x10000013, 0x10fff8b3,
|
||||
0x10007ef3, 0x90000013, 0x36300016, 0x3637f836,
|
||||
0x36307e76, 0x3758000c, 0x375ff7cc, 0x37587e0c,
|
||||
0x128313a0, 0x528a32c7, 0x7289173b, 0x92ab3acc,
|
||||
0xd2a0bf94, 0xf2c285e8, 0x9358722f, 0x330e652f,
|
||||
0x53067f3b, 0x93577c53, 0xb34a1aac, 0xd35a4016,
|
||||
0x13946c63, 0x93c3dbc8, 0x54000000, 0x54fff5a0,
|
||||
0x54007b60, 0x54000001, 0x54fff541, 0x54007b01,
|
||||
0x54000002, 0x54fff4e2, 0x54007aa2, 0x54000002,
|
||||
0x54fff482, 0x54007a42, 0x54000003, 0x54fff423,
|
||||
0x540079e3, 0x54000003, 0x54fff3c3, 0x54007983,
|
||||
0x54000004, 0x54fff364, 0x54007924, 0x54000005,
|
||||
0x54fff305, 0x540078c5, 0x54000006, 0x54fff2a6,
|
||||
0x54007866, 0x54000007, 0x54fff247, 0x54007807,
|
||||
0x54000008, 0x54fff1e8, 0x540077a8, 0x54000009,
|
||||
0x54fff189, 0x54007749, 0x5400000a, 0x54fff12a,
|
||||
0x540076ea, 0x5400000b, 0x54fff0cb, 0x5400768b,
|
||||
0x5400000c, 0x54fff06c, 0x5400762c, 0x5400000d,
|
||||
0x54fff00d, 0x540075cd, 0x5400000e, 0x54ffefae,
|
||||
0x5400756e, 0x5400000f, 0x54ffef4f, 0x5400750f,
|
||||
0x54007be0, 0x54000001, 0x54fff541, 0x54007b81,
|
||||
0x54000002, 0x54fff4e2, 0x54007b22, 0x54000002,
|
||||
0x54fff482, 0x54007ac2, 0x54000003, 0x54fff423,
|
||||
0x54007a63, 0x54000003, 0x54fff3c3, 0x54007a03,
|
||||
0x54000004, 0x54fff364, 0x540079a4, 0x54000005,
|
||||
0x54fff305, 0x54007945, 0x54000006, 0x54fff2a6,
|
||||
0x540078e6, 0x54000007, 0x54fff247, 0x54007887,
|
||||
0x54000008, 0x54fff1e8, 0x54007828, 0x54000009,
|
||||
0x54fff189, 0x540077c9, 0x5400000a, 0x54fff12a,
|
||||
0x5400776a, 0x5400000b, 0x54fff0cb, 0x5400770b,
|
||||
0x5400000c, 0x54fff06c, 0x540076ac, 0x5400000d,
|
||||
0x54fff00d, 0x5400764d, 0x5400000e, 0x54ffefae,
|
||||
0x540075ee, 0x5400000f, 0x54ffef4f, 0x5400758f,
|
||||
0xd40658e1, 0xd4014d22, 0xd4046543, 0xd4273f60,
|
||||
0xd44cad80, 0xd503201f, 0xd503203f, 0xd503205f,
|
||||
0xd503209f, 0xd50320bf, 0xd503219f, 0xd50323bf,
|
||||
@ -1434,7 +1438,8 @@
|
||||
0x05733820, 0x05b238a4, 0x05f138e6, 0x0570396a,
|
||||
0x65d0a001, 0x65d6a443, 0x65d4a826, 0x6594ac26,
|
||||
0x6554ac26, 0x6556ac26, 0x6552ac26, 0x65cbac85,
|
||||
0x65caac01, 0x65dea833, 0x659ca509, 0x65d8a801,
|
||||
0x65caac01, 0x6589ac85, 0x6588ac01, 0x65c9ac85,
|
||||
0x65c8ac01, 0x65dea833, 0x659ca509, 0x65d8a801,
|
||||
0x65dcac01, 0x655cb241, 0x0520a1e0, 0x0521a601,
|
||||
0x052281e0, 0x05238601, 0x04a14026, 0x042244a6,
|
||||
0x046344a6, 0x04a444a6, 0x04e544a7, 0x0568aca7,
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) 2022, Oracle and/or its affiliates. All rights reserved.
|
||||
* Copyright (c) 2022, 2023, Oracle and/or its affiliates. All rights reserved.
|
||||
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
||||
*
|
||||
* This code is free software; you can redistribute it and/or modify it
|
||||
@ -26,7 +26,7 @@
|
||||
* @bug 8294588
|
||||
* @summary Auto-vectorize Float.floatToFloat16, Float.float16ToFloat APIs
|
||||
* @requires vm.compiler2.enabled
|
||||
* @requires os.simpleArch == "x64"
|
||||
* @requires (os.simpleArch == "x64" & (vm.cpu.features ~= ".*avx512f.*" | vm.cpu.features ~= ".*f16c.*")) | os.arch == "aarch64"
|
||||
* @library /test/lib /
|
||||
* @run driver compiler.vectorization.TestFloatConversionsVector
|
||||
*/
|
||||
@ -34,62 +34,73 @@
|
||||
package compiler.vectorization;
|
||||
|
||||
import compiler.lib.ir_framework.*;
|
||||
import jdk.test.lib.Asserts;
|
||||
|
||||
public class TestFloatConversionsVector {
|
||||
private static final int ARRLEN = 1024;
|
||||
private static final int ITERS = 11000;
|
||||
private static float [] finp;
|
||||
private static short [] sout;
|
||||
private static short [] sinp;
|
||||
private static float [] fout;
|
||||
private static final int ARRLEN = 1024;
|
||||
private static final int ITERS = 11000;
|
||||
private static float [] finp;
|
||||
private static short [] sout;
|
||||
private static short [] sinp;
|
||||
private static float [] fout;
|
||||
|
||||
public static void main(String args[]) {
|
||||
TestFramework.runWithFlags("-XX:-TieredCompilation",
|
||||
"-XX:CompileThresholdScaling=0.3");
|
||||
System.out.println("PASSED");
|
||||
}
|
||||
public static void main(String args[]) {
|
||||
TestFramework.runWithFlags("-XX:-TieredCompilation",
|
||||
"-XX:CompileThresholdScaling=0.3");
|
||||
System.out.println("PASSED");
|
||||
}
|
||||
|
||||
@Test
|
||||
@IR(counts = {IRNode.VECTOR_CAST_F2HF, "> 0"}, applyIfCPUFeatureOr = {"avx512f", "true", "f16c", "true"})
|
||||
public void test_float_float16(short[] sout, float[] finp) {
|
||||
for (int i = 0; i < finp.length; i++) {
|
||||
sout[i] = Float.floatToFloat16(finp[i]);
|
||||
}
|
||||
}
|
||||
@Test
|
||||
@IR(counts = {IRNode.VECTOR_CAST_F2HF, "> 0"})
|
||||
public void test_float_float16(short[] sout, float[] finp) {
|
||||
for (int i = 0; i < finp.length; i++) {
|
||||
sout[i] = Float.floatToFloat16(finp[i]);
|
||||
}
|
||||
}
|
||||
|
||||
@Run(test = {"test_float_float16"}, mode = RunMode.STANDALONE)
|
||||
public void kernel_test_float_float16() {
|
||||
finp = new float[ARRLEN];
|
||||
sout = new short[ARRLEN];
|
||||
@Run(test = {"test_float_float16"}, mode = RunMode.STANDALONE)
|
||||
public void kernel_test_float_float16() {
|
||||
finp = new float[ARRLEN];
|
||||
sout = new short[ARRLEN];
|
||||
|
||||
for (int i = 0; i < ARRLEN; i++) {
|
||||
finp[i] = (float) i * 1.4f;
|
||||
}
|
||||
for (int i = 0; i < ARRLEN; i++) {
|
||||
finp[i] = (float) i * 1.4f;
|
||||
}
|
||||
|
||||
for (int i = 0; i < ITERS; i++) {
|
||||
test_float_float16(sout, finp);
|
||||
}
|
||||
}
|
||||
for (int i = 0; i < ITERS; i++) {
|
||||
test_float_float16(sout, finp);
|
||||
}
|
||||
|
||||
@Test
|
||||
@IR(counts = {IRNode.VECTOR_CAST_HF2F, "> 0"}, applyIfCPUFeatureOr = {"avx512f", "true", "f16c", "true"})
|
||||
public void test_float16_float(float[] fout, short[] sinp) {
|
||||
for (int i = 0; i < sinp.length; i++) {
|
||||
fout[i] = Float.float16ToFloat(sinp[i]);
|
||||
}
|
||||
}
|
||||
// Verifying the result
|
||||
for (int i = 0; i < ARRLEN; i++) {
|
||||
Asserts.assertEquals(Float.floatToFloat16(finp[i]), sout[i]);
|
||||
}
|
||||
}
|
||||
|
||||
@Run(test = {"test_float16_float"}, mode = RunMode.STANDALONE)
|
||||
public void kernel_test_float16_float() {
|
||||
sinp = new short[ARRLEN];
|
||||
fout = new float[ARRLEN];
|
||||
@Test
|
||||
@IR(counts = {IRNode.VECTOR_CAST_HF2F, "> 0"})
|
||||
public void test_float16_float(float[] fout, short[] sinp) {
|
||||
for (int i = 0; i < sinp.length; i++) {
|
||||
fout[i] = Float.float16ToFloat(sinp[i]);
|
||||
}
|
||||
}
|
||||
|
||||
for (int i = 0; i < ARRLEN; i++) {
|
||||
sinp[i] = (short)i;
|
||||
}
|
||||
@Run(test = {"test_float16_float"}, mode = RunMode.STANDALONE)
|
||||
public void kernel_test_float16_float() {
|
||||
sinp = new short[ARRLEN];
|
||||
fout = new float[ARRLEN];
|
||||
|
||||
for (int i = 0; i < ITERS; i++) {
|
||||
test_float16_float(fout , sinp);
|
||||
}
|
||||
}
|
||||
for (int i = 0; i < ARRLEN; i++) {
|
||||
sinp[i] = (short)i;
|
||||
}
|
||||
|
||||
for (int i = 0; i < ITERS; i++) {
|
||||
test_float16_float(fout, sinp);
|
||||
}
|
||||
|
||||
// Verifying the result
|
||||
for (int i = 0; i < ARRLEN; i++) {
|
||||
Asserts.assertEquals(Float.float16ToFloat(sinp[i]), fout[i]);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
Loading…
x
Reference in New Issue
Block a user