8289552: Make intrinsic conversions between bit representations of half precision values and floats
Reviewed-by: kvn, sviswanathan, jbhateja
This commit is contained in:
parent
2586b1a3c1
commit
07946aa49c
@ -1930,6 +1930,34 @@ void Assembler::vcvtdq2pd(XMMRegister dst, XMMRegister src, int vector_len) {
|
||||
emit_int16((unsigned char)0xE6, (0xC0 | encode));
|
||||
}
|
||||
|
||||
void Assembler::vcvtps2ph(XMMRegister dst, XMMRegister src, int imm8, int vector_len) {
|
||||
assert(VM_Version::supports_avx512vl() || VM_Version::supports_f16c(), "");
|
||||
InstructionAttr attributes(vector_len, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /*uses_vl */ true);
|
||||
int encode = vex_prefix_and_encode(src->encoding(), 0, dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
|
||||
emit_int24(0x1D, (0xC0 | encode), imm8);
|
||||
}
|
||||
|
||||
void Assembler::evcvtps2ph(Address dst, KRegister mask, XMMRegister src, int imm8, int vector_len) {
|
||||
assert(VM_Version::supports_avx512vl(), "");
|
||||
InstructionMark im(this);
|
||||
InstructionAttr attributes(vector_len, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /*uses_vl */ true);
|
||||
attributes.set_address_attributes(/* tuple_type */ EVEX_HVM, /* input_size_in_bits */ EVEX_64bit);
|
||||
attributes.reset_is_clear_context();
|
||||
attributes.set_embedded_opmask_register_specifier(mask);
|
||||
attributes.set_is_evex_instruction();
|
||||
vex_prefix(dst, 0, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
|
||||
emit_int8(0x1D);
|
||||
emit_operand(src, dst, 1);
|
||||
emit_int8(imm8);
|
||||
}
|
||||
|
||||
void Assembler::vcvtph2ps(XMMRegister dst, XMMRegister src, int vector_len) {
|
||||
assert(VM_Version::supports_avx512vl() || VM_Version::supports_f16c(), "");
|
||||
InstructionAttr attributes(vector_len, /* rex_w */ false, /* legacy_mode */false, /* no_mask_reg */ true, /* uses_vl */ true);
|
||||
int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
|
||||
emit_int16(0x13, (0xC0 | encode));
|
||||
}
|
||||
|
||||
void Assembler::cvtdq2ps(XMMRegister dst, XMMRegister src) {
|
||||
NOT_LP64(assert(VM_Version::supports_sse2(), ""));
|
||||
InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
|
||||
|
@ -1156,6 +1156,11 @@ private:
|
||||
void cvtdq2pd(XMMRegister dst, XMMRegister src);
|
||||
void vcvtdq2pd(XMMRegister dst, XMMRegister src, int vector_len);
|
||||
|
||||
// Convert Halffloat to Single Precision Floating-Point value
|
||||
void vcvtps2ph(XMMRegister dst, XMMRegister src, int imm8, int vector_len);
|
||||
void vcvtph2ps(XMMRegister dst, XMMRegister src, int vector_len);
|
||||
void evcvtps2ph(Address dst, KRegister mask, XMMRegister src, int imm8, int vector_len);
|
||||
|
||||
// Convert Packed Signed Doubleword Integers to Packed Single-Precision Floating-Point Value
|
||||
void cvtdq2ps(XMMRegister dst, XMMRegister src);
|
||||
void vcvtdq2ps(XMMRegister dst, XMMRegister src, int vector_len);
|
||||
|
@ -2883,6 +2883,8 @@ uint64_t VM_Version::feature_flags() {
|
||||
_cpuid_info.xem_xcr0_eax.bits.ymm != 0) {
|
||||
result |= CPU_AVX;
|
||||
result |= CPU_VZEROUPPER;
|
||||
if (_cpuid_info.std_cpuid1_ecx.bits.f16c != 0)
|
||||
result |= CPU_F16C;
|
||||
if (_cpuid_info.sef_cpuid7_ebx.bits.avx2 != 0)
|
||||
result |= CPU_AVX2;
|
||||
if (_cpuid_info.sef_cpuid7_ebx.bits.avx512f != 0 &&
|
||||
|
@ -89,7 +89,8 @@ class VM_Version : public Abstract_VM_Version {
|
||||
: 1,
|
||||
osxsave : 1,
|
||||
avx : 1,
|
||||
: 2,
|
||||
f16c : 1,
|
||||
: 1,
|
||||
hv : 1;
|
||||
} bits;
|
||||
};
|
||||
@ -374,7 +375,8 @@ protected:
|
||||
decl(RDPID, "rdpid", 49) /* RDPID instruction */ \
|
||||
decl(FSRM, "fsrm", 50) /* Fast Short REP MOV */ \
|
||||
decl(GFNI, "gfni", 51) /* Vector GFNI instructions */ \
|
||||
decl(AVX512_BITALG, "avx512_bitalg", 52) /* Vector sub-word popcount and bit gather instructions */
|
||||
decl(AVX512_BITALG, "avx512_bitalg", 52) /* Vector sub-word popcount and bit gather instructions */\
|
||||
decl(F16C, "f16c", 53) /* Half-precision and single precision FP conversion instructions*/
|
||||
|
||||
#define DECLARE_CPU_FEATURE_FLAG(id, name, bit) CPU_##id = (1ULL << bit),
|
||||
CPU_FEATURE_FLAGS(DECLARE_CPU_FEATURE_FLAG)
|
||||
@ -681,6 +683,7 @@ public:
|
||||
static bool supports_avx512_vbmi2() { return (_features & CPU_AVX512_VBMI2) != 0; }
|
||||
static bool supports_hv() { return (_features & CPU_HV) != 0; }
|
||||
static bool supports_serialize() { return (_features & CPU_SERIALIZE) != 0; }
|
||||
static bool supports_f16c() { return (_features & CPU_F16C) != 0; }
|
||||
|
||||
// Intel features
|
||||
static bool is_intel_family_core() { return is_intel() &&
|
||||
|
@ -1678,6 +1678,12 @@ const bool Matcher::match_rule_supported(int opcode) {
|
||||
// Together with common x86 rules, this handles all UseSSE cases.
|
||||
#endif
|
||||
break;
|
||||
case Op_ConvF2HF:
|
||||
case Op_ConvHF2F:
|
||||
if (!VM_Version::supports_f16c() && !VM_Version::supports_avx512vl()) {
|
||||
return false;
|
||||
}
|
||||
break;
|
||||
}
|
||||
return true; // Match rules are supported by default.
|
||||
}
|
||||
@ -3652,6 +3658,41 @@ instruct sqrtD_reg(regD dst) %{
|
||||
ins_pipe(pipe_slow);
|
||||
%}
|
||||
|
||||
instruct convF2HF_reg_reg(rRegI dst, regF src, regF tmp) %{
|
||||
effect(TEMP tmp);
|
||||
match(Set dst (ConvF2HF src));
|
||||
ins_cost(125);
|
||||
format %{ "vcvtps2ph $dst,$src \t using $tmp as TEMP"%}
|
||||
ins_encode %{
|
||||
__ vcvtps2ph($tmp$$XMMRegister, $src$$XMMRegister, 0x04, Assembler::AVX_128bit);
|
||||
__ movdl($dst$$Register, $tmp$$XMMRegister);
|
||||
__ movswl($dst$$Register, $dst$$Register);
|
||||
%}
|
||||
ins_pipe( pipe_slow );
|
||||
%}
|
||||
|
||||
instruct convF2HF_mem_reg(memory mem, regF src, kReg ktmp, rRegI rtmp) %{
|
||||
predicate((UseAVX > 2) && VM_Version::supports_avx512vl());
|
||||
effect(TEMP ktmp, TEMP rtmp);
|
||||
match(Set mem (StoreC mem (ConvF2HF src)));
|
||||
format %{ "evcvtps2ph $mem,$src \t using $ktmp and $rtmp as TEMP" %}
|
||||
ins_encode %{
|
||||
__ movl($rtmp$$Register, 0x1);
|
||||
__ kmovwl($ktmp$$KRegister, $rtmp$$Register);
|
||||
__ evcvtps2ph($mem$$Address, $ktmp$$KRegister, $src$$XMMRegister, 0x04, Assembler::AVX_128bit);
|
||||
%}
|
||||
ins_pipe( pipe_slow );
|
||||
%}
|
||||
|
||||
instruct convHF2F_reg_reg(regF dst, rRegI src) %{
|
||||
match(Set dst (ConvHF2F src));
|
||||
format %{ "vcvtph2ps $dst,$src" %}
|
||||
ins_encode %{
|
||||
__ movdl($dst$$XMMRegister, $src$$Register);
|
||||
__ vcvtph2ps($dst$$XMMRegister, $dst$$XMMRegister, Assembler::AVX_128bit);
|
||||
%}
|
||||
ins_pipe( pipe_slow );
|
||||
%}
|
||||
|
||||
// ---------------------------------------- VectorReinterpret ------------------------------------
|
||||
instruct reinterpret_mask(kReg dst) %{
|
||||
|
@ -224,6 +224,12 @@ class methodHandle;
|
||||
do_name( doubleToLongBits_name, "doubleToLongBits") \
|
||||
do_intrinsic(_longBitsToDouble, java_lang_Double, longBitsToDouble_name, long_double_signature, F_SN)\
|
||||
do_name( longBitsToDouble_name, "longBitsToDouble") \
|
||||
do_intrinsic(_float16ToFloat, java_lang_Float, float16ToFloat_name, f16_float_signature, F_S) \
|
||||
do_name( float16ToFloat_name, "float16ToFloat") \
|
||||
do_signature(f16_float_signature, "(S)F") \
|
||||
do_intrinsic(_floatToFloat16, java_lang_Float, floatToFloat16_name, float_f16_signature, F_S) \
|
||||
do_name( floatToFloat16_name, "floatToFloat16") \
|
||||
do_signature(float_f16_signature, "(F)S") \
|
||||
\
|
||||
do_intrinsic(_compareUnsigned_i, java_lang_Integer, compareUnsigned_name, int2_int_signature, F_S) \
|
||||
do_intrinsic(_compareUnsigned_l, java_lang_Long, compareUnsigned_name, long2_int_signature, F_S) \
|
||||
|
@ -300,6 +300,12 @@ bool C2Compiler::is_intrinsic_supported(const methodHandle& method, bool is_virt
|
||||
case vmIntrinsics::_remainderUnsigned_l:
|
||||
if (!Matcher::match_rule_supported(Op_UModL)) return false;
|
||||
break;
|
||||
case vmIntrinsics::_float16ToFloat:
|
||||
if (!Matcher::match_rule_supported(Op_ConvHF2F)) return false;
|
||||
break;
|
||||
case vmIntrinsics::_floatToFloat16:
|
||||
if (!Matcher::match_rule_supported(Op_ConvF2HF)) return false;
|
||||
break;
|
||||
|
||||
/* CompareAndSet, Object: */
|
||||
case vmIntrinsics::_compareAndSetReference:
|
||||
|
@ -149,6 +149,8 @@ macro(ConvI2L)
|
||||
macro(ConvL2D)
|
||||
macro(ConvL2F)
|
||||
macro(ConvL2I)
|
||||
macro(ConvF2HF)
|
||||
macro(ConvHF2F)
|
||||
macro(CountedLoop)
|
||||
macro(CountedLoopEnd)
|
||||
macro(OuterStripMinedLoop)
|
||||
|
@ -161,6 +161,21 @@ const Type* ConvF2DNode::Value(PhaseGVN* phase) const {
|
||||
return TypeD::make( (double)tf->getf() );
|
||||
}
|
||||
|
||||
//=============================================================================
|
||||
//------------------------------Value------------------------------------------
|
||||
const Type* ConvF2HFNode::Value(PhaseGVN* phase) const {
|
||||
const Type *t = phase->type( in(1) );
|
||||
if( t == Type::TOP ) return Type::TOP;
|
||||
if( t == Type::FLOAT ) return TypeInt::SHORT;
|
||||
const TypeF *tf = t->is_float_constant();
|
||||
return TypeInt::make( SharedRuntime::f2hf( tf->getf() ) );
|
||||
}
|
||||
|
||||
//------------------------------Identity---------------------------------------
|
||||
Node* ConvF2HFNode::Identity(PhaseGVN* phase) {
|
||||
return (in(1)->Opcode() == Op_ConvHF2F) ? in(1)->in(1) : this;
|
||||
}
|
||||
|
||||
//=============================================================================
|
||||
//------------------------------Value------------------------------------------
|
||||
const Type* ConvF2INode::Value(PhaseGVN* phase) const {
|
||||
@ -219,6 +234,18 @@ Node *ConvF2LNode::Ideal(PhaseGVN *phase, bool can_reshape) {
|
||||
return NULL;
|
||||
}
|
||||
|
||||
//=============================================================================
|
||||
//------------------------------Value------------------------------------------
|
||||
const Type* ConvHF2FNode::Value(PhaseGVN* phase) const {
|
||||
const Type *t = phase->type( in(1) );
|
||||
if( t == Type::TOP ) return Type::TOP;
|
||||
if( t == TypeInt::SHORT ) return Type::FLOAT;
|
||||
const TypeInt *ti = t->is_int();
|
||||
if ( ti->is_con() ) return TypeF::make( SharedRuntime::hf2f( ti->get_con() ) );
|
||||
|
||||
return bottom_type();
|
||||
}
|
||||
|
||||
//=============================================================================
|
||||
//------------------------------Value------------------------------------------
|
||||
const Type* ConvI2DNode::Value(PhaseGVN* phase) const {
|
||||
|
@ -100,6 +100,18 @@ class ConvF2DNode : public Node {
|
||||
virtual uint ideal_reg() const { return Op_RegD; }
|
||||
};
|
||||
|
||||
//------------------------------ConvF2HFNode------------------------------------
|
||||
// Convert Float to Halffloat
|
||||
class ConvF2HFNode : public Node {
|
||||
public:
|
||||
ConvF2HFNode( Node *in1 ) : Node(0,in1) {}
|
||||
virtual int Opcode() const;
|
||||
virtual const Type *bottom_type() const { return TypeInt::SHORT; }
|
||||
virtual const Type* Value(PhaseGVN* phase) const;
|
||||
virtual Node* Identity(PhaseGVN* phase);
|
||||
virtual uint ideal_reg() const { return Op_RegI; }
|
||||
};
|
||||
|
||||
//------------------------------ConvF2INode------------------------------------
|
||||
// Convert float to integer
|
||||
class ConvF2INode : public Node {
|
||||
@ -127,6 +139,17 @@ class ConvF2LNode : public Node {
|
||||
virtual uint ideal_reg() const { return Op_RegL; }
|
||||
};
|
||||
|
||||
//------------------------------ConvHF2FNode------------------------------------
|
||||
// Convert Halffloat to float
|
||||
class ConvHF2FNode : public Node {
|
||||
public:
|
||||
ConvHF2FNode( Node *in1 ) : Node(0,in1) {}
|
||||
virtual int Opcode() const;
|
||||
virtual const Type *bottom_type() const { return Type::FLOAT; }
|
||||
virtual const Type* Value(PhaseGVN* phase) const;
|
||||
virtual uint ideal_reg() const { return Op_RegF; }
|
||||
};
|
||||
|
||||
//------------------------------ConvI2DNode------------------------------------
|
||||
// Convert Integer to Double
|
||||
class ConvI2DNode : public Node {
|
||||
|
@ -514,7 +514,9 @@ bool LibraryCallKit::try_to_inline(int predicate) {
|
||||
case vmIntrinsics::_intBitsToFloat:
|
||||
case vmIntrinsics::_doubleToRawLongBits:
|
||||
case vmIntrinsics::_doubleToLongBits:
|
||||
case vmIntrinsics::_longBitsToDouble: return inline_fp_conversions(intrinsic_id());
|
||||
case vmIntrinsics::_longBitsToDouble:
|
||||
case vmIntrinsics::_floatToFloat16:
|
||||
case vmIntrinsics::_float16ToFloat: return inline_fp_conversions(intrinsic_id());
|
||||
|
||||
case vmIntrinsics::_floatIsFinite:
|
||||
case vmIntrinsics::_floatIsInfinite:
|
||||
@ -4440,6 +4442,8 @@ bool LibraryCallKit::inline_fp_conversions(vmIntrinsics::ID id) {
|
||||
case vmIntrinsics::_intBitsToFloat: result = new MoveI2FNode(arg); break;
|
||||
case vmIntrinsics::_doubleToRawLongBits: result = new MoveD2LNode(arg); break;
|
||||
case vmIntrinsics::_longBitsToDouble: result = new MoveL2DNode(arg); break;
|
||||
case vmIntrinsics::_floatToFloat16: result = new ConvF2HFNode(arg); break;
|
||||
case vmIntrinsics::_float16ToFloat: result = new ConvHF2FNode(arg); break;
|
||||
|
||||
case vmIntrinsics::_doubleToLongBits: {
|
||||
// two paths (plus control) merge in a wood
|
||||
|
@ -271,6 +271,10 @@ JRT_LEAF(jdouble, SharedRuntime::drem(jdouble x, jdouble y))
|
||||
#endif
|
||||
JRT_END
|
||||
|
||||
JRT_LEAF(jfloat, SharedRuntime::i2f(jint x))
|
||||
return (jfloat)x;
|
||||
JRT_END
|
||||
|
||||
#ifdef __SOFTFP__
|
||||
JRT_LEAF(jfloat, SharedRuntime::fadd(jfloat x, jfloat y))
|
||||
return x + y;
|
||||
@ -304,10 +308,6 @@ JRT_LEAF(jdouble, SharedRuntime::ddiv(jdouble x, jdouble y))
|
||||
return x / y;
|
||||
JRT_END
|
||||
|
||||
JRT_LEAF(jfloat, SharedRuntime::i2f(jint x))
|
||||
return (jfloat)x;
|
||||
JRT_END
|
||||
|
||||
JRT_LEAF(jdouble, SharedRuntime::i2d(jint x))
|
||||
return (jdouble)x;
|
||||
JRT_END
|
||||
@ -448,6 +448,86 @@ JRT_LEAF(jdouble, SharedRuntime::l2d(jlong x))
|
||||
return (jdouble)x;
|
||||
JRT_END
|
||||
|
||||
// Reference implementation at src/java.base/share/classes/java/lang/Float.java:floatToFloat16
|
||||
JRT_LEAF(jshort, SharedRuntime::f2hf(jfloat x))
|
||||
jint doppel = SharedRuntime::f2i(x);
|
||||
jshort sign_bit = (jshort) ((doppel & 0x80000000) >> 16);
|
||||
if (g_isnan(x))
|
||||
return (jshort)(sign_bit | 0x7c00 | (doppel & 0x007fe000) >> 13 | (doppel & 0x00001ff0) >> 4 | (doppel & 0x0000000f));
|
||||
|
||||
jfloat abs_f = (x >= 0.0f) ? x : (x * -1.0f);
|
||||
|
||||
// Overflow threshold is halffloat max value + 1/2 ulp
|
||||
if (abs_f >= (65504.0f + 16.0f)) {
|
||||
return (jshort)(sign_bit | 0x7c00); // Positive or negative infinity
|
||||
}
|
||||
|
||||
// Smallest magnitude of Halffloat is 0x1.0p-24, half-way or smaller rounds to zero
|
||||
if (abs_f <= (pow(2, -24) * 0.5f)) { // Covers float zeros and subnormals.
|
||||
return sign_bit; // Positive or negative zero
|
||||
}
|
||||
|
||||
jint exp = 0x7f800000 & doppel;
|
||||
|
||||
// For binary16 subnormals, beside forcing exp to -15, retain
|
||||
// the difference exp_delta = E_min - exp. This is the excess
|
||||
// shift value, in addition to 13, to be used in the
|
||||
// computations below. Further the (hidden) msb with value 1
|
||||
// in f must be involved as well
|
||||
jint exp_delta = 0;
|
||||
jint msb = 0x00000000;
|
||||
if (exp < -14) {
|
||||
exp_delta = -14 - exp;
|
||||
exp = -15;
|
||||
msb = 0x00800000;
|
||||
}
|
||||
jint f_signif_bits = ((doppel & 0x007fffff) | msb);
|
||||
|
||||
// Significand bits as if using rounding to zero
|
||||
jshort signif_bits = (jshort)(f_signif_bits >> (13 + exp_delta));
|
||||
|
||||
jint lsb = f_signif_bits & (1 << (13 + exp_delta));
|
||||
jint round = f_signif_bits & (1 << (12 + exp_delta));
|
||||
jint sticky = f_signif_bits & ((1 << (12 + exp_delta)) - 1);
|
||||
|
||||
if (round != 0 && ((lsb | sticky) != 0 )) {
|
||||
signif_bits++;
|
||||
}
|
||||
|
||||
return (jshort)(sign_bit | ( ((exp + 15) << 10) + signif_bits ) );
|
||||
JRT_END
|
||||
|
||||
// Reference implementation at src/java.base/share/classes/java/lang/Float.java:float16ToFloat
|
||||
JRT_LEAF(jfloat, SharedRuntime::hf2f(jshort x))
|
||||
// Halffloat format has 1 signbit, 5 exponent bits and
|
||||
// 10 significand bits
|
||||
jint hf_arg = (jint)x;
|
||||
jint hf_sign_bit = 0x8000 & hf_arg;
|
||||
jint hf_exp_bits = 0x7c00 & hf_arg;
|
||||
jint hf_significand_bits = 0x03ff & hf_arg;
|
||||
|
||||
jint significand_shift = 13; //difference between float and halffloat precision
|
||||
|
||||
jfloat sign = (hf_sign_bit != 0) ? -1.0f : 1.0f;
|
||||
|
||||
// Extract halffloat exponent, remove its bias
|
||||
jint hf_exp = (hf_exp_bits >> 10) - 15;
|
||||
|
||||
if (hf_exp == -15) {
|
||||
// For subnormal values, return 2^-24 * significand bits
|
||||
return (sign * (pow(2,-24)) * hf_significand_bits);
|
||||
}else if (hf_exp == 16) {
|
||||
return (hf_significand_bits == 0) ? sign * float_infinity : (SharedRuntime::i2f((hf_sign_bit << 16) | 0x7f800000 |
|
||||
(hf_significand_bits << significand_shift)));
|
||||
}
|
||||
|
||||
// Add the bias of float exponent and shift
|
||||
int float_exp_bits = (hf_exp + 127) << (24 - 1);
|
||||
|
||||
// Combine sign, exponent and significand bits
|
||||
return SharedRuntime::i2f((hf_sign_bit << 16) | float_exp_bits | (hf_significand_bits << significand_shift));
|
||||
JRT_END
|
||||
|
||||
// Exception handling across interpreter/compiler boundaries
|
||||
//
|
||||
// exception_handler_for_return_address(...) returns the continuation address.
|
||||
|
@ -129,9 +129,11 @@ class SharedRuntime: AllStatic {
|
||||
static jfloat d2f (jdouble x);
|
||||
static jfloat l2f (jlong x);
|
||||
static jdouble l2d (jlong x);
|
||||
static jfloat hf2f(jshort x);
|
||||
static jshort f2hf(jfloat x);
|
||||
static jfloat i2f (jint x);
|
||||
|
||||
#ifdef __SOFTFP__
|
||||
static jfloat i2f (jint x);
|
||||
static jdouble i2d (jint x);
|
||||
static jdouble f2d (jfloat x);
|
||||
#endif // __SOFTFP__
|
||||
|
@ -1013,7 +1013,7 @@ public final class Float extends Number
|
||||
* @param floatBinary16 the binary16 value to convert to {@code float}
|
||||
* @since 20
|
||||
*/
|
||||
// @IntrinsicCandidate
|
||||
@IntrinsicCandidate
|
||||
public static float float16ToFloat(short floatBinary16) {
|
||||
/*
|
||||
* The binary16 format has 1 sign bit, 5 exponent bits, and 10
|
||||
@ -1088,7 +1088,7 @@ public final class Float extends Number
|
||||
* @param f the {@code float} value to convert to binary16
|
||||
* @since 20
|
||||
*/
|
||||
// @IntrinsicCandidate
|
||||
@IntrinsicCandidate
|
||||
public static short floatToFloat16(float f) {
|
||||
int doppel = Float.floatToRawIntBits(f);
|
||||
short sign_bit = (short)((doppel & 0x8000_0000) >> 16);
|
||||
|
@ -226,6 +226,7 @@ public class AMD64 extends Architecture {
|
||||
FSRM,
|
||||
GFNI,
|
||||
AVX512_BITALG,
|
||||
F16C,
|
||||
}
|
||||
|
||||
private final EnumSet<CPUFeature> features;
|
||||
|
@ -28,6 +28,8 @@
|
||||
* @library ../Math
|
||||
* @build FloatConsts
|
||||
* @run main Binary16Conversion
|
||||
* @run main/othervm -XX:+UnlockDiagnosticVMOptions
|
||||
* -XX:DisableIntrinsic=_float16ToFloat,_floatToFloat16 Binary16Conversion
|
||||
*/
|
||||
|
||||
public class Binary16Conversion {
|
||||
|
@ -26,6 +26,9 @@
|
||||
* @bug 8289551
|
||||
* @requires (os.arch != "x86" & os.arch != "i386") | vm.opt.UseSSE == "null" | vm.opt.UseSSE > 0
|
||||
* @summary Verify NaN sign and significand bits are preserved across conversions
|
||||
* @run main/othervm -XX:-TieredCompilation -XX:CompileThresholdScaling=0.1 Binary16ConversionNaN
|
||||
* @run main/othervm -XX:+UnlockDiagnosticVMOptions
|
||||
* -XX:DisableIntrinsic=_float16ToFloat,_floatToFloat16 Binary16ConversionNaN
|
||||
*/
|
||||
|
||||
/*
|
||||
|
@ -63,7 +63,8 @@ public class CPUInfoTest {
|
||||
"vzeroupper", "avx512_vpopcntdq", "avx512_vpclmulqdq", "avx512_vaes",
|
||||
"avx512_vnni", "clflush", "clflushopt", "clwb",
|
||||
"avx512_vbmi2", "avx512_vbmi", "rdtscp", "rdpid",
|
||||
"hv", "fsrm", "avx512_bitalg", "gfni"
|
||||
"hv", "fsrm", "avx512_bitalg", "gfni",
|
||||
"f16c"
|
||||
);
|
||||
// @formatter:on
|
||||
// Checkstyle: resume
|
||||
|
@ -0,0 +1,98 @@
|
||||
/*
|
||||
* Copyright (c) 2022, Oracle and/or its affiliates. All rights reserved.
|
||||
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
||||
*
|
||||
* This code is free software; you can redistribute it and/or modify it
|
||||
* under the terms of the GNU General Public License version 2 only, as
|
||||
* published by the Free Software Foundation.
|
||||
*
|
||||
* This code is distributed in the hope that it will be useful, but WITHOUT
|
||||
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
||||
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
|
||||
* version 2 for more details (a copy is included in the LICENSE file that
|
||||
* accompanied this code).
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License version
|
||||
* 2 along with this work; if not, write to the Free Software Foundation,
|
||||
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
*
|
||||
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
|
||||
* or visit www.oracle.com if you need additional information or have any
|
||||
* questions.
|
||||
*/
|
||||
|
||||
package org.openjdk.bench.java.math;
|
||||
|
||||
import java.util.Random;
|
||||
import java.util.concurrent.TimeUnit;
|
||||
import org.openjdk.jmh.annotations.*;
|
||||
|
||||
@OutputTimeUnit(TimeUnit.MILLISECONDS)
|
||||
@State(Scope.Thread)
|
||||
@Warmup(iterations = 5, time = 1)
|
||||
@Measurement(iterations = 5, time = 1)
|
||||
@Fork(value = 3)
|
||||
public class Fp16ConversionBenchmark {
|
||||
|
||||
@Param({"2048"})
|
||||
public int size;
|
||||
|
||||
public short[] f16in;
|
||||
public short[] f16out;
|
||||
public float[] fin;
|
||||
public float[] fout;
|
||||
public static short f16, s;
|
||||
public static float f;
|
||||
|
||||
@Setup(Level.Trial)
|
||||
public void BmSetup() {
|
||||
int i = 0;
|
||||
Random r = new Random(1024);
|
||||
|
||||
f16in = new short[size];
|
||||
f16out = new short[size];
|
||||
f16 = (short) r.nextInt();
|
||||
|
||||
for (; i < size; i++) {
|
||||
f16in[i] = Float.floatToFloat16(r.nextFloat());;
|
||||
}
|
||||
|
||||
fin = new float[size];
|
||||
fout = new float[size];
|
||||
f = r.nextFloat();
|
||||
|
||||
i = 0;
|
||||
|
||||
for (; i < size; i++) {
|
||||
fin[i] = Float.float16ToFloat((short)r.nextInt());
|
||||
}
|
||||
}
|
||||
|
||||
@Benchmark
|
||||
public short[] floatToFloat16() {
|
||||
for (int i = 0; i < fin.length; i++) {
|
||||
f16out[i] = Float.floatToFloat16(fin[i]);
|
||||
}
|
||||
return f16out;
|
||||
}
|
||||
|
||||
@Benchmark
|
||||
public float[] float16ToFloat() {
|
||||
for (int i = 0; i < f16in.length; i++) {
|
||||
fout[i] = Float.float16ToFloat(f16in[i]);
|
||||
}
|
||||
return fout;
|
||||
}
|
||||
|
||||
@Benchmark
|
||||
public float float16ToFloatMemory() {
|
||||
f = Float.float16ToFloat(f16);
|
||||
return f;
|
||||
}
|
||||
|
||||
@Benchmark
|
||||
public short floatToFloat16Memory() {
|
||||
s = Float.floatToFloat16(f);
|
||||
return s;
|
||||
}
|
||||
}
|
Loading…
x
Reference in New Issue
Block a user