8217561: X86: Add floating-point Math.min/max intrinsics
Implementation taking care of +/-0.0 and NaN which uses a specific pattern for reductions Reviewed-by: aph, kvn, neliasso, sviswanathan, adinn
This commit is contained in:
parent
367ae10733
commit
c2ec1085e1
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) 1997, 2018, Oracle and/or its affiliates. All rights reserved.
|
||||
* Copyright (c) 1997, 2019, Oracle and/or its affiliates. All rights reserved.
|
||||
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
||||
*
|
||||
* This code is free software; you can redistribute it and/or modify it
|
||||
@ -7765,9 +7765,43 @@ int Assembler::simd_prefix_and_encode(XMMRegister dst, XMMRegister nds, XMMRegis
|
||||
}
|
||||
}
|
||||
|
||||
void Assembler::vmaxss(XMMRegister dst, XMMRegister nds, XMMRegister src) {
|
||||
assert(VM_Version::supports_avx(), "");
|
||||
InstructionAttr attributes(AVX_128bit, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
|
||||
int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_F3, VEX_OPCODE_0F, &attributes);
|
||||
emit_int8(0x5F);
|
||||
emit_int8((unsigned char)(0xC0 | encode));
|
||||
}
|
||||
|
||||
void Assembler::vmaxsd(XMMRegister dst, XMMRegister nds, XMMRegister src) {
|
||||
assert(VM_Version::supports_avx(), "");
|
||||
InstructionAttr attributes(AVX_128bit, /* vex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
|
||||
attributes.set_rex_vex_w_reverted();
|
||||
int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_F2, VEX_OPCODE_0F, &attributes);
|
||||
emit_int8(0x5F);
|
||||
emit_int8((unsigned char)(0xC0 | encode));
|
||||
}
|
||||
|
||||
void Assembler::vminss(XMMRegister dst, XMMRegister nds, XMMRegister src) {
|
||||
assert(VM_Version::supports_avx(), "");
|
||||
InstructionAttr attributes(AVX_128bit, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
|
||||
int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_F3, VEX_OPCODE_0F, &attributes);
|
||||
emit_int8(0x5D);
|
||||
emit_int8((unsigned char)(0xC0 | encode));
|
||||
}
|
||||
|
||||
void Assembler::vminsd(XMMRegister dst, XMMRegister nds, XMMRegister src) {
|
||||
assert(VM_Version::supports_avx(), "");
|
||||
InstructionAttr attributes(AVX_128bit, /* vex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
|
||||
attributes.set_rex_vex_w_reverted();
|
||||
int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_F2, VEX_OPCODE_0F, &attributes);
|
||||
emit_int8(0x5D);
|
||||
emit_int8((unsigned char)(0xC0 | encode));
|
||||
}
|
||||
|
||||
void Assembler::cmppd(XMMRegister dst, XMMRegister nds, XMMRegister src, int cop, int vector_len) {
|
||||
assert(VM_Version::supports_avx(), "");
|
||||
assert(!VM_Version::supports_evex(), "");
|
||||
assert(vector_len <= AVX_256bit, "");
|
||||
InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ true);
|
||||
int encode = simd_prefix_and_encode(dst, nds, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
|
||||
emit_int8((unsigned char)0xC2);
|
||||
@ -7777,7 +7811,7 @@ void Assembler::cmppd(XMMRegister dst, XMMRegister nds, XMMRegister src, int cop
|
||||
|
||||
void Assembler::blendvpd(XMMRegister dst, XMMRegister nds, XMMRegister src1, XMMRegister src2, int vector_len) {
|
||||
assert(VM_Version::supports_avx(), "");
|
||||
assert(!VM_Version::supports_evex(), "");
|
||||
assert(vector_len <= AVX_256bit, "");
|
||||
InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ true);
|
||||
int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src1->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
|
||||
emit_int8((unsigned char)0x4B);
|
||||
@ -7788,7 +7822,7 @@ void Assembler::blendvpd(XMMRegister dst, XMMRegister nds, XMMRegister src1, XMM
|
||||
|
||||
void Assembler::cmpps(XMMRegister dst, XMMRegister nds, XMMRegister src, int cop, int vector_len) {
|
||||
assert(VM_Version::supports_avx(), "");
|
||||
assert(!VM_Version::supports_evex(), "");
|
||||
assert(vector_len <= AVX_256bit, "");
|
||||
InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ true);
|
||||
int encode = simd_prefix_and_encode(dst, nds, src, VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes);
|
||||
emit_int8((unsigned char)0xC2);
|
||||
@ -7798,7 +7832,7 @@ void Assembler::cmpps(XMMRegister dst, XMMRegister nds, XMMRegister src, int cop
|
||||
|
||||
void Assembler::blendvps(XMMRegister dst, XMMRegister nds, XMMRegister src1, XMMRegister src2, int vector_len) {
|
||||
assert(VM_Version::supports_avx(), "");
|
||||
assert(!VM_Version::supports_evex(), "");
|
||||
assert(vector_len <= AVX_256bit, "");
|
||||
InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ true);
|
||||
int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src1->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
|
||||
emit_int8((unsigned char)0x4A);
|
||||
|
@ -1934,6 +1934,11 @@ private:
|
||||
void vsubss(XMMRegister dst, XMMRegister nds, Address src);
|
||||
void vsubss(XMMRegister dst, XMMRegister nds, XMMRegister src);
|
||||
|
||||
void vmaxss(XMMRegister dst, XMMRegister nds, XMMRegister src);
|
||||
void vmaxsd(XMMRegister dst, XMMRegister nds, XMMRegister src);
|
||||
void vminss(XMMRegister dst, XMMRegister nds, XMMRegister src);
|
||||
void vminsd(XMMRegister dst, XMMRegister nds, XMMRegister src);
|
||||
|
||||
void shlxl(Register dst, Register src1, Register src2);
|
||||
void shlxq(Register dst, Register src1, Register src2);
|
||||
|
||||
|
@ -165,6 +165,7 @@ class MacroAssembler: public Assembler {
|
||||
|
||||
// Support optimal SSE move instructions.
|
||||
void movflt(XMMRegister dst, XMMRegister src) {
|
||||
if (dst-> encoding() == src->encoding()) return;
|
||||
if (UseXmmRegToRegMoveAll) { movaps(dst, src); return; }
|
||||
else { movss (dst, src); return; }
|
||||
}
|
||||
@ -173,6 +174,7 @@ class MacroAssembler: public Assembler {
|
||||
void movflt(Address dst, XMMRegister src) { movss(dst, src); }
|
||||
|
||||
void movdbl(XMMRegister dst, XMMRegister src) {
|
||||
if (dst-> encoding() == src->encoding()) return;
|
||||
if (UseXmmRegToRegMoveAll) { movapd(dst, src); return; }
|
||||
else { movsd (dst, src); return; }
|
||||
}
|
||||
|
@ -1,5 +1,5 @@
|
||||
//
|
||||
// Copyright (c) 2011, 2018, Oracle and/or its affiliates. All rights reserved.
|
||||
// Copyright (c) 2011, 2019, Oracle and/or its affiliates. All rights reserved.
|
||||
// DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
||||
//
|
||||
// This code is free software; you can redistribute it and/or modify it
|
||||
@ -1450,6 +1450,15 @@ const bool Matcher::match_rule_supported(int opcode) {
|
||||
if (UseSSE < 2)
|
||||
ret_value = false;
|
||||
break;
|
||||
#ifdef _LP64
|
||||
case Op_MaxD:
|
||||
case Op_MaxF:
|
||||
case Op_MinD:
|
||||
case Op_MinF:
|
||||
if (UseAVX < 1) // enabled for AVX only
|
||||
ret_value = false;
|
||||
break;
|
||||
#endif
|
||||
}
|
||||
|
||||
return ret_value; // Per default match rules are supported.
|
||||
|
@ -698,6 +698,87 @@ void emit_cmpfp3(MacroAssembler& _masm, Register dst) {
|
||||
__ bind(done);
|
||||
}
|
||||
|
||||
// Math.min() # Math.max()
|
||||
// --------------------------
|
||||
// ucomis[s/d] #
|
||||
// ja -> b # a
|
||||
// jp -> NaN # NaN
|
||||
// jb -> a # b
|
||||
// je #
|
||||
// |-jz -> a | b # a & b
|
||||
// | -> a #
|
||||
void emit_fp_min_max(MacroAssembler& _masm, XMMRegister dst,
|
||||
XMMRegister a, XMMRegister b,
|
||||
XMMRegister xmmt, Register rt,
|
||||
bool min, bool single) {
|
||||
|
||||
Label nan, zero, below, above, done;
|
||||
|
||||
if (single)
|
||||
__ ucomiss(a, b);
|
||||
else
|
||||
__ ucomisd(a, b);
|
||||
|
||||
if (dst->encoding() != (min ? b : a)->encoding())
|
||||
__ jccb(Assembler::above, above); // CF=0 & ZF=0
|
||||
else
|
||||
__ jccb(Assembler::above, done);
|
||||
|
||||
__ jccb(Assembler::parity, nan); // PF=1
|
||||
__ jccb(Assembler::below, below); // CF=1
|
||||
|
||||
// equal
|
||||
__ vpxor(xmmt, xmmt, xmmt, Assembler::AVX_128bit);
|
||||
if (single) {
|
||||
__ ucomiss(a, xmmt);
|
||||
__ jccb(Assembler::equal, zero);
|
||||
|
||||
__ movflt(dst, a);
|
||||
__ jmp(done);
|
||||
}
|
||||
else {
|
||||
__ ucomisd(a, xmmt);
|
||||
__ jccb(Assembler::equal, zero);
|
||||
|
||||
__ movdbl(dst, a);
|
||||
__ jmp(done);
|
||||
}
|
||||
|
||||
__ bind(zero);
|
||||
if (min)
|
||||
__ vpor(dst, a, b, Assembler::AVX_128bit);
|
||||
else
|
||||
__ vpand(dst, a, b, Assembler::AVX_128bit);
|
||||
|
||||
__ jmp(done);
|
||||
|
||||
__ bind(above);
|
||||
if (single)
|
||||
__ movflt(dst, min ? b : a);
|
||||
else
|
||||
__ movdbl(dst, min ? b : a);
|
||||
|
||||
__ jmp(done);
|
||||
|
||||
__ bind(nan);
|
||||
if (single) {
|
||||
__ movl(rt, 0x7fc00000); // Float.NaN
|
||||
__ movdl(dst, rt);
|
||||
}
|
||||
else {
|
||||
__ mov64(rt, 0x7ff8000000000000L); // Double.NaN
|
||||
__ movdq(dst, rt);
|
||||
}
|
||||
__ jmp(done);
|
||||
|
||||
__ bind(below);
|
||||
if (single)
|
||||
__ movflt(dst, min ? a : b);
|
||||
else
|
||||
__ movdbl(dst, min ? a : b);
|
||||
|
||||
__ bind(done);
|
||||
}
|
||||
|
||||
//=============================================================================
|
||||
const RegMask& MachConstantBaseNode::_out_RegMask = RegMask::Empty;
|
||||
@ -3547,6 +3628,15 @@ operand regF() %{
|
||||
interface(REG_INTER);
|
||||
%}
|
||||
|
||||
// Float register operands
|
||||
operand legRegF() %{
|
||||
constraint(ALLOC_IN_RC(float_reg_legacy));
|
||||
match(RegF);
|
||||
|
||||
format %{ %}
|
||||
interface(REG_INTER);
|
||||
%}
|
||||
|
||||
// Float register operands
|
||||
operand vlRegF() %{
|
||||
constraint(ALLOC_IN_RC(float_reg_vl));
|
||||
@ -3565,6 +3655,15 @@ operand regD() %{
|
||||
interface(REG_INTER);
|
||||
%}
|
||||
|
||||
// Double register operands
|
||||
operand legRegD() %{
|
||||
constraint(ALLOC_IN_RC(double_reg_legacy));
|
||||
match(RegD);
|
||||
|
||||
format %{ %}
|
||||
interface(REG_INTER);
|
||||
%}
|
||||
|
||||
// Double register operands
|
||||
operand vlRegD() %{
|
||||
constraint(ALLOC_IN_RC(double_reg_vl));
|
||||
@ -5303,6 +5402,16 @@ instruct MoveF2VL(vlRegF dst, regF src) %{
|
||||
ins_pipe( fpu_reg_reg );
|
||||
%}
|
||||
|
||||
// Load Float
|
||||
instruct MoveF2LEG(legRegF dst, regF src) %{
|
||||
match(Set dst src);
|
||||
format %{ "movss $dst,$src\t# if src != dst load float (4 bytes)" %}
|
||||
ins_encode %{
|
||||
__ movflt($dst$$XMMRegister, $src$$XMMRegister);
|
||||
%}
|
||||
ins_pipe( fpu_reg_reg );
|
||||
%}
|
||||
|
||||
// Load Float
|
||||
instruct MoveVL2F(regF dst, vlRegF src) %{
|
||||
match(Set dst src);
|
||||
@ -5313,6 +5422,16 @@ instruct MoveVL2F(regF dst, vlRegF src) %{
|
||||
ins_pipe( fpu_reg_reg );
|
||||
%}
|
||||
|
||||
// Load Float
|
||||
instruct MoveLEG2F(regF dst, legRegF src) %{
|
||||
match(Set dst src);
|
||||
format %{ "movss $dst,$src\t# if src != dst load float (4 bytes)" %}
|
||||
ins_encode %{
|
||||
__ movflt($dst$$XMMRegister, $src$$XMMRegister);
|
||||
%}
|
||||
ins_pipe( fpu_reg_reg );
|
||||
%}
|
||||
|
||||
// Load Double
|
||||
instruct loadD_partial(regD dst, memory mem)
|
||||
%{
|
||||
@ -5350,6 +5469,16 @@ instruct MoveD2VL(vlRegD dst, regD src) %{
|
||||
ins_pipe( fpu_reg_reg );
|
||||
%}
|
||||
|
||||
// Load Double
|
||||
instruct MoveD2LEG(legRegD dst, regD src) %{
|
||||
match(Set dst src);
|
||||
format %{ "movsd $dst,$src\t# if src != dst load double (8 bytes)" %}
|
||||
ins_encode %{
|
||||
__ movdbl($dst$$XMMRegister, $src$$XMMRegister);
|
||||
%}
|
||||
ins_pipe( fpu_reg_reg );
|
||||
%}
|
||||
|
||||
// Load Double
|
||||
instruct MoveVL2D(regD dst, vlRegD src) %{
|
||||
match(Set dst src);
|
||||
@ -5360,6 +5489,167 @@ instruct MoveVL2D(regD dst, vlRegD src) %{
|
||||
ins_pipe( fpu_reg_reg );
|
||||
%}
|
||||
|
||||
// Load Double
|
||||
instruct MoveLEG2D(regD dst, legRegD src) %{
|
||||
match(Set dst src);
|
||||
format %{ "movsd $dst,$src\t# if src != dst load double (8 bytes)" %}
|
||||
ins_encode %{
|
||||
__ movdbl($dst$$XMMRegister, $src$$XMMRegister);
|
||||
%}
|
||||
ins_pipe( fpu_reg_reg );
|
||||
%}
|
||||
|
||||
// Following pseudo code describes the algorithm for max[FD]:
|
||||
// Min algorithm is on similar lines
|
||||
// btmp = (b < +0.0) ? a : b
|
||||
// atmp = (b < +0.0) ? b : a
|
||||
// Tmp = Max_Float(atmp , btmp)
|
||||
// Res = (atmp == NaN) ? atmp : Tmp
|
||||
|
||||
// max = java.lang.Math.max(float a, float b)
|
||||
instruct maxF_reg(legRegF dst, legRegF a, legRegF b, legRegF tmp, legRegF atmp, legRegF btmp) %{
|
||||
predicate(UseAVX > 0 && !n->is_reduction());
|
||||
match(Set dst (MaxF a b));
|
||||
effect(USE a, USE b, TEMP tmp, TEMP atmp, TEMP btmp);
|
||||
format %{
|
||||
"blendvps $btmp,$b,$a,$b \n\t"
|
||||
"blendvps $atmp,$a,$b,$b \n\t"
|
||||
"vmaxss $tmp,$atmp,$btmp \n\t"
|
||||
"cmpps.unordered $btmp,$atmp,$atmp \n\t"
|
||||
"blendvps $dst,$tmp,$atmp,$btmp \n\t"
|
||||
%}
|
||||
ins_encode %{
|
||||
int vector_len = Assembler::AVX_128bit;
|
||||
__ blendvps($btmp$$XMMRegister, $b$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, vector_len);
|
||||
__ blendvps($atmp$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $b$$XMMRegister, vector_len);
|
||||
__ vmaxss($tmp$$XMMRegister, $atmp$$XMMRegister, $btmp$$XMMRegister);
|
||||
__ cmpps($btmp$$XMMRegister, $atmp$$XMMRegister, $atmp$$XMMRegister, Assembler::_false, vector_len);
|
||||
__ blendvps($dst$$XMMRegister, $tmp$$XMMRegister, $atmp$$XMMRegister, $btmp$$XMMRegister, vector_len);
|
||||
%}
|
||||
ins_pipe( pipe_slow );
|
||||
%}
|
||||
|
||||
instruct maxF_reduction_reg(regF dst, regF a, regF b, regF xmmt, rRegI tmp, rFlagsReg cr) %{
|
||||
predicate(UseAVX > 0 && n->is_reduction());
|
||||
match(Set dst (MaxF a b));
|
||||
effect(USE a, USE b, TEMP xmmt, TEMP tmp, KILL cr);
|
||||
|
||||
format %{ "$dst = max($a, $b)\t# intrinsic (float)" %}
|
||||
ins_encode %{
|
||||
emit_fp_min_max(_masm, $dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $xmmt$$XMMRegister, $tmp$$Register,
|
||||
false /*min*/, true /*single*/);
|
||||
%}
|
||||
ins_pipe( pipe_slow );
|
||||
%}
|
||||
|
||||
// max = java.lang.Math.max(double a, double b)
|
||||
instruct maxD_reg(legRegD dst, legRegD a, legRegD b, legRegD tmp, legRegD atmp, legRegD btmp) %{
|
||||
predicate(UseAVX > 0 && !n->is_reduction());
|
||||
match(Set dst (MaxD a b));
|
||||
effect(USE a, USE b, TEMP atmp, TEMP btmp, TEMP tmp);
|
||||
format %{
|
||||
"blendvpd $btmp,$b,$a,$b \n\t"
|
||||
"blendvpd $atmp,$a,$b,$b \n\t"
|
||||
"vmaxsd $tmp,$atmp,$btmp \n\t"
|
||||
"cmppd.unordered $btmp,$atmp,$atmp \n\t"
|
||||
"blendvpd $dst,$tmp,$atmp,$btmp \n\t"
|
||||
%}
|
||||
ins_encode %{
|
||||
int vector_len = Assembler::AVX_128bit;
|
||||
__ blendvpd($btmp$$XMMRegister, $b$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, vector_len);
|
||||
__ blendvpd($atmp$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $b$$XMMRegister, vector_len);
|
||||
__ vmaxsd($tmp$$XMMRegister, $atmp$$XMMRegister, $btmp$$XMMRegister);
|
||||
__ cmppd($btmp$$XMMRegister, $atmp$$XMMRegister, $atmp$$XMMRegister, Assembler::_false, vector_len);
|
||||
__ blendvpd($dst$$XMMRegister, $tmp$$XMMRegister, $atmp$$XMMRegister, $btmp$$XMMRegister, vector_len);
|
||||
%}
|
||||
ins_pipe( pipe_slow );
|
||||
%}
|
||||
|
||||
instruct maxD_reduction_reg(regD dst, regD a, regD b, regD xmmt, rRegL tmp, rFlagsReg cr) %{
|
||||
predicate(UseAVX > 0 && n->is_reduction());
|
||||
match(Set dst (MaxD a b));
|
||||
effect(USE a, USE b, TEMP xmmt, TEMP tmp, KILL cr);
|
||||
|
||||
format %{ "$dst = max($a, $b)\t# intrinsic (double)" %}
|
||||
ins_encode %{
|
||||
emit_fp_min_max(_masm, $dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $xmmt$$XMMRegister, $tmp$$Register,
|
||||
false /*min*/, false /*single*/);
|
||||
%}
|
||||
ins_pipe( pipe_slow );
|
||||
%}
|
||||
|
||||
// min = java.lang.Math.min(float a, float b)
|
||||
instruct minF_reg(legRegF dst, legRegF a, legRegF b, legRegF tmp, legRegF atmp, legRegF btmp) %{
|
||||
predicate(UseAVX > 0 && !n->is_reduction());
|
||||
match(Set dst (MinF a b));
|
||||
effect(USE a, USE b, TEMP tmp, TEMP atmp, TEMP btmp);
|
||||
format %{
|
||||
"blendvps $atmp,$a,$b,$a \n\t"
|
||||
"blendvps $btmp,$b,$a,$a \n\t"
|
||||
"vminss $tmp,$atmp,$btmp \n\t"
|
||||
"cmpps.unordered $btmp,$atmp,$atmp \n\t"
|
||||
"blendvps $dst,$tmp,$atmp,$btmp \n\t"
|
||||
%}
|
||||
ins_encode %{
|
||||
int vector_len = Assembler::AVX_128bit;
|
||||
__ blendvps($atmp$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $a$$XMMRegister, vector_len);
|
||||
__ blendvps($btmp$$XMMRegister, $b$$XMMRegister, $a$$XMMRegister, $a$$XMMRegister, vector_len);
|
||||
__ vminss($tmp$$XMMRegister, $atmp$$XMMRegister, $btmp$$XMMRegister);
|
||||
__ cmpps($btmp$$XMMRegister, $atmp$$XMMRegister, $atmp$$XMMRegister, Assembler::_false, vector_len);
|
||||
__ blendvps($dst$$XMMRegister, $tmp$$XMMRegister, $atmp$$XMMRegister, $btmp$$XMMRegister, vector_len);
|
||||
%}
|
||||
ins_pipe( pipe_slow );
|
||||
%}
|
||||
|
||||
instruct minF_reduction_reg(regF dst, regF a, regF b, regF xmmt, rRegI tmp, rFlagsReg cr) %{
|
||||
predicate(UseAVX > 0 && n->is_reduction());
|
||||
match(Set dst (MinF a b));
|
||||
effect(USE a, USE b, TEMP xmmt, TEMP tmp, KILL cr);
|
||||
|
||||
format %{ "$dst = min($a, $b)\t# intrinsic (float)" %}
|
||||
ins_encode %{
|
||||
emit_fp_min_max(_masm, $dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $xmmt$$XMMRegister, $tmp$$Register,
|
||||
true /*min*/, true /*single*/);
|
||||
%}
|
||||
ins_pipe( pipe_slow );
|
||||
%}
|
||||
|
||||
// min = java.lang.Math.min(double a, double b)
|
||||
instruct minD_reg(legRegD dst, legRegD a, legRegD b, legRegD tmp, legRegD atmp, legRegD btmp) %{
|
||||
predicate(UseAVX > 0 && !n->is_reduction());
|
||||
match(Set dst (MinD a b));
|
||||
effect(USE a, USE b, TEMP tmp, TEMP atmp, TEMP btmp);
|
||||
format %{
|
||||
"blendvpd $atmp,$a,$b,$a \n\t"
|
||||
"blendvpd $btmp,$b,$a,$a \n\t"
|
||||
"vminsd $tmp,$atmp,$btmp \n\t"
|
||||
"cmppd.unordered $btmp,$atmp,$atmp \n\t"
|
||||
"blendvpd $dst,$tmp,$atmp,$btmp \n\t"
|
||||
%}
|
||||
ins_encode %{
|
||||
int vector_len = Assembler::AVX_128bit;
|
||||
__ blendvpd($atmp$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $a$$XMMRegister, vector_len);
|
||||
__ blendvpd($btmp$$XMMRegister, $b$$XMMRegister, $a$$XMMRegister, $a$$XMMRegister, vector_len);
|
||||
__ vminsd($tmp$$XMMRegister, $atmp$$XMMRegister, $btmp$$XMMRegister);
|
||||
__ cmppd($btmp$$XMMRegister, $atmp$$XMMRegister, $atmp$$XMMRegister, Assembler::_false, vector_len);
|
||||
__ blendvpd($dst$$XMMRegister, $tmp$$XMMRegister, $atmp$$XMMRegister, $btmp$$XMMRegister, vector_len);
|
||||
%}
|
||||
ins_pipe( pipe_slow );
|
||||
%}
|
||||
|
||||
instruct minD_reduction_reg(regD dst, regD a, regD b, regD xmmt, rRegL tmp, rFlagsReg cr) %{
|
||||
predicate(UseAVX > 0 && n->is_reduction());
|
||||
match(Set dst (MinD a b));
|
||||
effect(USE a, USE b, TEMP xmmt, TEMP tmp, KILL cr);
|
||||
|
||||
format %{ "$dst = min($a, $b)\t# intrinsic (double)" %}
|
||||
ins_encode %{
|
||||
emit_fp_min_max(_masm, $dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $xmmt$$XMMRegister, $tmp$$Register,
|
||||
true /*min*/, false /*single*/);
|
||||
%}
|
||||
ins_pipe( pipe_slow );
|
||||
%}
|
||||
|
||||
// Load Effective Address
|
||||
instruct leaP8(rRegP dst, indOffset8 mem)
|
||||
%{
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) 1999, 2018, Oracle and/or its affiliates. All rights reserved.
|
||||
* Copyright (c) 1999, 2019, Oracle and/or its affiliates. All rights reserved.
|
||||
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
||||
*
|
||||
* This code is free software; you can redistribute it and/or modify it
|
||||
@ -6609,6 +6609,40 @@ bool LibraryCallKit::inline_character_compare(vmIntrinsics::ID id) {
|
||||
|
||||
//------------------------------inline_fp_min_max------------------------------
|
||||
bool LibraryCallKit::inline_fp_min_max(vmIntrinsics::ID id) {
|
||||
/* DISABLED BECAUSE METHOD DATA ISN'T COLLECTED PER CALL-SITE, SEE JDK-8015416.
|
||||
|
||||
// The intrinsic should be used only when the API branches aren't predictable,
|
||||
// the last one performing the most important comparison. The following heuristic
|
||||
// uses the branch statistics to eventually bail out if necessary.
|
||||
|
||||
ciMethodData *md = callee()->method_data();
|
||||
|
||||
if ( md != NULL && md->is_mature() && md->invocation_count() > 0 ) {
|
||||
ciCallProfile cp = caller()->call_profile_at_bci(bci());
|
||||
|
||||
if ( ((double)cp.count()) / ((double)md->invocation_count()) < 0.8 ) {
|
||||
// Bail out if the call-site didn't contribute enough to the statistics.
|
||||
return false;
|
||||
}
|
||||
|
||||
uint taken = 0, not_taken = 0;
|
||||
|
||||
for (ciProfileData *p = md->first_data(); md->is_valid(p); p = md->next_data(p)) {
|
||||
if (p->is_BranchData()) {
|
||||
taken = ((ciBranchData*)p)->taken();
|
||||
not_taken = ((ciBranchData*)p)->not_taken();
|
||||
}
|
||||
}
|
||||
|
||||
double balance = (((double)taken) - ((double)not_taken)) / ((double)md->invocation_count());
|
||||
balance = balance < 0 ? -balance : balance;
|
||||
if ( balance > 0.2 ) {
|
||||
// Bail out if the most important branch is predictable enough.
|
||||
return false;
|
||||
}
|
||||
}
|
||||
*/
|
||||
|
||||
Node *a = NULL;
|
||||
Node *b = NULL;
|
||||
Node *n = NULL;
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) 2000, 2018, Oracle and/or its affiliates. All rights reserved.
|
||||
* Copyright (c) 2000, 2019, Oracle and/or its affiliates. All rights reserved.
|
||||
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
||||
*
|
||||
* This code is free software; you can redistribute it and/or modify it
|
||||
@ -2039,7 +2039,8 @@ void PhaseIdealLoop::mark_reductions(IdealLoopTree *loop) {
|
||||
if (n_ctrl != NULL && loop->is_member(get_loop(n_ctrl))) {
|
||||
// Now test it to see if it fits the standard pattern for a reduction operator.
|
||||
int opc = def_node->Opcode();
|
||||
if (opc != ReductionNode::opcode(opc, def_node->bottom_type()->basic_type())) {
|
||||
if (opc != ReductionNode::opcode(opc, def_node->bottom_type()->basic_type())
|
||||
|| opc == Op_MinD || opc == Op_MinF || opc == Op_MaxD || opc == Op_MaxF) {
|
||||
if (!def_node->is_reduction()) { // Not marked yet
|
||||
// To be a reduction, the arithmetic node must have the phi as input and provide a def to it
|
||||
bool ok = false;
|
||||
|
@ -1,6 +1,6 @@
|
||||
/*
|
||||
* Copyright (c) 2018, Oracle and/or its affiliates. All rights reserved.
|
||||
* Copyright (c) 2018, Arm Limited. All rights reserved.
|
||||
* Copyright (c) 2018, 2019, Oracle and/or its affiliates. All rights reserved.
|
||||
* Copyright (c) 2018, 2019, Arm Limited. All rights reserved.
|
||||
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
||||
*
|
||||
* This code is free software; you can redistribute it and/or modify it
|
||||
@ -27,20 +27,41 @@
|
||||
* @bug 8212043
|
||||
* @summary Test compiler intrinsics of floating-point Math.min/max
|
||||
*
|
||||
* @run main/othervm -Xint compiler.intrinsics.math.TestFpMinMaxIntrinsics
|
||||
* @run main/othervm -Xint compiler.intrinsics.math.TestFpMinMaxIntrinsics sanityTests 1
|
||||
* @run main/othervm -XX:+UnlockDiagnosticVMOptions
|
||||
* -Xcomp -XX:TieredStopAtLevel=1
|
||||
* -XX:CompileOnly=java/lang/Math
|
||||
* compiler.intrinsics.math.TestFpMinMaxIntrinsics
|
||||
* compiler.intrinsics.math.TestFpMinMaxIntrinsics sanityTests 1
|
||||
* @run main/othervm -XX:+UnlockDiagnosticVMOptions
|
||||
* -Xcomp -XX:-TieredCompilation
|
||||
* -XX:CompileOnly=java/lang/Math
|
||||
* compiler.intrinsics.math.TestFpMinMaxIntrinsics
|
||||
* compiler.intrinsics.math.TestFpMinMaxIntrinsics sanityTests 1
|
||||
* @run main/othervm -XX:+IgnoreUnrecognizedVMOptions -XX:+UnlockDiagnosticVMOptions
|
||||
* -XX:-TieredCompilation -XX:CompileThresholdScaling=0.1
|
||||
* -XX:CompileCommand=print,compiler/intrinsics/math/TestFpMinMaxIntrinsics.*Test*
|
||||
* compiler.intrinsics.math.TestFpMinMaxIntrinsics sanityTests 10000
|
||||
* @run main/othervm -XX:+IgnoreUnrecognizedVMOptions -XX:+UnlockDiagnosticVMOptions
|
||||
* -XX:-TieredCompilation -Xcomp
|
||||
* -XX:CompileCommand=print,compiler/intrinsics/math/TestFpMinMaxIntrinsics.*Test*
|
||||
* -XX:CompileCommand=compileonly,compiler/intrinsics/math/TestFpMinMaxIntrinsics.*Test*
|
||||
* compiler.intrinsics.math.TestFpMinMaxIntrinsics reductionTests 100
|
||||
* @run main/othervm -XX:+IgnoreUnrecognizedVMOptions -XX:+UnlockDiagnosticVMOptions
|
||||
* -XX:+TieredCompilation
|
||||
* -XX:CompileCommand=print,compiler/intrinsics/math/TestFpMinMaxIntrinsics.min*
|
||||
* -XX:CompileCommand=dontinline,compiler/intrinsics/math/TestFpMinMaxIntrinsics.min*
|
||||
* compiler.intrinsics.math.TestFpMinMaxIntrinsics randomSearchTree 1
|
||||
* @run main/othervm -XX:+IgnoreUnrecognizedVMOptions -XX:+UnlockDiagnosticVMOptions
|
||||
* -XX:+TieredCompilation
|
||||
* -XX:CompileCommand=print,compiler/intrinsics/math/TestFpMinMaxIntrinsics.min*
|
||||
* -XX:CompileCommand=dontinline,compiler/intrinsics/math/TestFpMinMaxIntrinsics.min*
|
||||
* compiler.intrinsics.math.TestFpMinMaxIntrinsics sortedSearchTree 1
|
||||
*/
|
||||
|
||||
package compiler.intrinsics.math;
|
||||
|
||||
import java.util.Arrays;
|
||||
import java.util.Random;
|
||||
import java.lang.reflect.Method;
|
||||
|
||||
public class TestFpMinMaxIntrinsics {
|
||||
|
||||
@ -63,63 +84,220 @@ public class TestFpMinMaxIntrinsics {
|
||||
private static final float[][] f_cases = {
|
||||
// a b min max
|
||||
{ fPos, fPos, fPos, fPos },
|
||||
{ fNeg, fNeg, fNeg, fNeg },
|
||||
{ fPos, fNeg, fNeg, fPos },
|
||||
{ fNeg, fPos, fNeg, fPos },
|
||||
|
||||
{ fPosZero, fNegZero, fNegZero, fPosZero },
|
||||
{ fNegZero, fPosZero, fNegZero, fPosZero },
|
||||
{ fNegZero, fNegZero, fNegZero, fNegZero },
|
||||
|
||||
{ fPos, fPosInf, fPos, fPosInf },
|
||||
{ fNeg, fNegInf, fNegInf, fNeg },
|
||||
|
||||
{ fPos, fNaN, fNaN, fNaN },
|
||||
{ fNaN, fPos, fNaN, fNaN },
|
||||
{ fNeg, fNaN, fNaN, fNaN },
|
||||
{ fNaN, fNeg, fNaN, fNaN },
|
||||
|
||||
{ fPosInf, fNaN, fNaN, fNaN },
|
||||
{ fNaN, fPosInf, fNaN, fNaN },
|
||||
{ fNegInf, fNaN, fNaN, fNaN },
|
||||
{ fNaN, fNegInf, fNaN, fNaN }
|
||||
};
|
||||
|
||||
private static final double[][] d_cases = {
|
||||
// a b min max
|
||||
{ dPos, dPos, dPos, dPos },
|
||||
{ dNeg, dNeg, dNeg, dNeg },
|
||||
{ dPos, dNeg, dNeg, dPos },
|
||||
{ dNeg, dPos, dNeg, dPos },
|
||||
|
||||
{ dPosZero, dNegZero, dNegZero, dPosZero },
|
||||
{ dNegZero, dPosZero, dNegZero, dPosZero },
|
||||
{ dNegZero, dNegZero, dNegZero, dNegZero },
|
||||
|
||||
{ dPos, dPosInf, dPos, dPosInf },
|
||||
{ dNeg, dNegInf, dNegInf, dNeg },
|
||||
|
||||
{ dPos, dNaN, dNaN, dNaN },
|
||||
{ dNaN, dPos, dNaN, dNaN },
|
||||
{ dNeg, dNaN, dNaN, dNaN },
|
||||
{ dNaN, dNeg, dNaN, dNaN },
|
||||
|
||||
{ dPosInf, dNaN, dNaN, dNaN },
|
||||
{ dNaN, dPosInf, dNaN, dNaN },
|
||||
{ dNegInf, dNaN, dNaN, dNaN },
|
||||
{ dNaN, dNegInf, dNaN, dNaN }
|
||||
};
|
||||
|
||||
private static void fTest(float[] row) {
|
||||
float min = Math.min(row[0], row[1]);
|
||||
float max = Math.max(row[0], row[1]);
|
||||
if (Float.isNaN(min) && Float.isNaN(max)
|
||||
&& Float.isNaN(row[2]) && Float.isNaN(row[3])) {
|
||||
// Return if all of them are NaN
|
||||
return;
|
||||
fCheck(row[0], row[1], Math.min(row[0], row[1]), Math.max(row[0], row[1]), row[2], row[3]);
|
||||
}
|
||||
|
||||
private static void fReductionTest(float[] row) {
|
||||
float fmin = row[0], fmax = row[0];
|
||||
|
||||
for (int i=0; i<100; i++) {
|
||||
fmin = Math.min(fmin, row[1]);
|
||||
fmax = Math.max(fmax, row[1]);
|
||||
}
|
||||
if (min != row[2] || max != row[3]) {
|
||||
|
||||
fCheck(row[0], row[1], fmin, fmax, row[2], row[3]);
|
||||
}
|
||||
|
||||
private static void fCheck(float a, float b, float fmin, float fmax, float efmin, float efmax) {
|
||||
int min = Float.floatToRawIntBits(fmin);
|
||||
int max = Float.floatToRawIntBits(fmax);
|
||||
int emin = Float.floatToRawIntBits(efmin);
|
||||
int emax = Float.floatToRawIntBits(efmax);
|
||||
|
||||
if (min != emin || max != emax) {
|
||||
throw new AssertionError("Unexpected result of float min/max: " +
|
||||
"a = " + row[0] + ", b = " + row[1] + ", " +
|
||||
"result = (" + min + ", " + max + "), " +
|
||||
"expected = (" + row[2] + ", " + row[3] + ")");
|
||||
"a = " + a + ", b = " + b + ", " +
|
||||
"result = (" + fmin + ", " + fmax + "), " +
|
||||
"expected = (" + efmin + ", " + efmax + ")");
|
||||
}
|
||||
}
|
||||
|
||||
private static void dTest(double[] row) {
|
||||
double min = Math.min(row[0], row[1]);
|
||||
double max = Math.max(row[0], row[1]);
|
||||
if (Double.isNaN(min) && Double.isNaN(max)
|
||||
&& Double.isNaN(row[2]) && Double.isNaN(row[3])) {
|
||||
// Return if all of them are NaN
|
||||
return;
|
||||
dCheck(row[0], row[1], Math.min(row[0], row[1]), Math.max(row[0], row[1]), row[2], row[3]);
|
||||
}
|
||||
|
||||
private static void dReductionTest(double[] row) {
|
||||
double dmin = row[0], dmax = row[0];
|
||||
|
||||
for (int i=0; i<100; i++) {
|
||||
dmin = Math.min(dmin, row[1]);
|
||||
dmax = Math.max(dmax, row[1]);
|
||||
}
|
||||
if (min != row[2] || max != row[3]) {
|
||||
throw new AssertionError("Unexpected result of double min/max" +
|
||||
"a = " + row[0] + ", b = " + row[1] + ", " +
|
||||
"result = (" + min + ", " + max + "), " +
|
||||
"expected = (" + row[2] + ", " + row[3] + ")");
|
||||
|
||||
dCheck(row[0], row[1], dmin, dmax, row[2], row[3]);
|
||||
}
|
||||
|
||||
private static void dCheck(double a, double b, double dmin, double dmax, double edmin, double edmax) {
|
||||
double min = Double.doubleToRawLongBits(dmin);
|
||||
double max = Double.doubleToRawLongBits(dmax);
|
||||
double emin = Double.doubleToRawLongBits(edmin);
|
||||
double emax = Double.doubleToRawLongBits(edmax);
|
||||
|
||||
if (min != emin || max != emax) {
|
||||
throw new AssertionError("Unexpected result of double min/max: " +
|
||||
"a = " + a + ", b = " + b + ", " +
|
||||
"result = (" + dmin + ", " + dmax + "), " +
|
||||
"expected = (" + edmin + ", " + edmax + ")");
|
||||
}
|
||||
}
|
||||
|
||||
public static void main(String[] args) {
|
||||
public static void sanityTests() {
|
||||
Arrays.stream(f_cases).forEach(TestFpMinMaxIntrinsics::fTest);
|
||||
Arrays.stream(d_cases).forEach(TestFpMinMaxIntrinsics::dTest);
|
||||
System.out.println("PASS");
|
||||
}
|
||||
|
||||
public static void reductionTests() {
|
||||
Arrays.stream(f_cases).forEach(TestFpMinMaxIntrinsics::fReductionTest);
|
||||
Arrays.stream(d_cases).forEach(TestFpMinMaxIntrinsics::dReductionTest);
|
||||
}
|
||||
|
||||
public static void main(String[] args) throws Exception {
|
||||
Method m = TestFpMinMaxIntrinsics.class.getDeclaredMethod(args[0]);
|
||||
for (int i = 0 ; i < Integer.parseInt(args[1]) ; i++)
|
||||
m.invoke(null);
|
||||
}
|
||||
|
||||
private static final int COUNT = 1000;
|
||||
private static final int LOOPS = 100;
|
||||
|
||||
private static Random r = new Random();
|
||||
|
||||
private static Node[] pool = new Node[COUNT];
|
||||
|
||||
private static long time = 0;
|
||||
private static long times = 0;
|
||||
|
||||
public static void init() {
|
||||
for (int i=0; i<COUNT; i++)
|
||||
pool[i] = new Node(Double.NaN);
|
||||
}
|
||||
|
||||
public static void finish() {
|
||||
// String sorted = pool[0].toString();
|
||||
// System.out.println("Sorted: {" + sorted.substring(0, Math.min(sorted.length(), 180)) + "... }");
|
||||
System.out.println("Average time: " + (time/times) + " ns");
|
||||
}
|
||||
|
||||
public static void randomSearchTree() {
|
||||
init();
|
||||
for (int l=0; l < LOOPS; l++) {
|
||||
Node root = pool[0].reset(r.nextDouble());
|
||||
|
||||
for (int i=1; i<COUNT; i++)
|
||||
insert(root, pool[i].reset(r.nextDouble()));
|
||||
}
|
||||
finish();
|
||||
}
|
||||
|
||||
public static void sortedSearchTree() {
|
||||
init();
|
||||
for (int l=0; l < LOOPS; l++) {
|
||||
Node root = pool[0].reset(-0.0);
|
||||
|
||||
for (int i=1; i<COUNT; i++)
|
||||
insert(root, pool[i].reset(i-1));
|
||||
}
|
||||
finish();
|
||||
}
|
||||
|
||||
private static class Node {
|
||||
private double value;
|
||||
private Node min;
|
||||
private Node max;
|
||||
|
||||
public Node(double d) { value = d; }
|
||||
|
||||
public Node reset(double d) { value = d; min = max = null; return this; }
|
||||
|
||||
@Override
|
||||
public String toString() {
|
||||
return (min != null ? min + ", " : "") +
|
||||
value +
|
||||
(max != null ? ", " + max : "");
|
||||
}
|
||||
}
|
||||
|
||||
private static Node insert(Node root, Node d) {
|
||||
for ( ; ; ) {
|
||||
long rootBits = Double.doubleToRawLongBits(root.value);
|
||||
long dBits = Double.doubleToRawLongBits(d.value);
|
||||
|
||||
// No duplicates
|
||||
if (rootBits == dBits)
|
||||
return root;
|
||||
|
||||
long delta = System.nanoTime();
|
||||
|
||||
double dmin = min(root.value, d.value);
|
||||
|
||||
time += System.nanoTime() - delta;
|
||||
times++;
|
||||
|
||||
long minBits = Double.doubleToRawLongBits(dmin);
|
||||
|
||||
if (minBits == dBits)
|
||||
if (root.min != null)
|
||||
root = root.min;
|
||||
else
|
||||
return root.min = d;
|
||||
else
|
||||
if (root.max != null)
|
||||
root = root.max;
|
||||
else
|
||||
return root.max = d;
|
||||
}
|
||||
}
|
||||
|
||||
// Wrapper method to prevent code reordering from affecting measures (JLS 17.4).
|
||||
private static double min(double a, double b) {
|
||||
return Math.min(a, b);
|
||||
}
|
||||
}
|
||||
|
||||
|
127
test/micro/org/openjdk/bench/vm/compiler/FpMinMaxIntrinsics.java
Normal file
127
test/micro/org/openjdk/bench/vm/compiler/FpMinMaxIntrinsics.java
Normal file
@ -0,0 +1,127 @@
|
||||
/*
|
||||
* Copyright (c) 2019, Oracle and/or its affiliates. All rights reserved.
|
||||
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
||||
*
|
||||
* This code is free software; you can redistribute it and/or modify it
|
||||
* under the terms of the GNU General Public License version 2 only, as
|
||||
* published by the Free Software Foundation.
|
||||
*
|
||||
* This code is distributed in the hope that it will be useful, but WITHOUT
|
||||
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
||||
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
|
||||
* version 2 for more details (a copy is included in the LICENSE file that
|
||||
* accompanied this code).
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License version
|
||||
* 2 along with this work; if not, write to the Free Software Foundation,
|
||||
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
*
|
||||
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
|
||||
* or visit www.oracle.com if you need additional information or have any
|
||||
* questions.
|
||||
*/
|
||||
package org.openjdk.bench.vm.compiler;
|
||||
|
||||
import org.openjdk.jmh.annotations.*;
|
||||
import org.openjdk.jmh.infra.*;
|
||||
|
||||
import java.util.concurrent.TimeUnit;
|
||||
import java.util.Random;
|
||||
|
||||
@BenchmarkMode(Mode.AverageTime)
|
||||
@OutputTimeUnit(TimeUnit.NANOSECONDS)
|
||||
@State(Scope.Thread)
|
||||
public class FpMinMaxIntrinsics {
|
||||
private static final int COUNT = 1000;
|
||||
|
||||
private double[] doubles = new double[COUNT];
|
||||
private float[] floats = new float[COUNT];
|
||||
|
||||
private int c1, c2, s1, s2;
|
||||
|
||||
private Random r = new Random();
|
||||
|
||||
@Setup
|
||||
public void init() {
|
||||
c1 = s1 = step();
|
||||
c2 = COUNT - (s2 = step());
|
||||
|
||||
for (int i=0; i<COUNT; i++) {
|
||||
floats[i] = r.nextFloat();
|
||||
doubles[i] = r.nextDouble();
|
||||
}
|
||||
}
|
||||
|
||||
private int step() {
|
||||
return (r.nextInt() & 0xf) + 1;
|
||||
}
|
||||
|
||||
@Benchmark
|
||||
public void dMax(Blackhole bh) {
|
||||
for (int i=0; i<COUNT; i++)
|
||||
bh.consume(dMaxBench());
|
||||
}
|
||||
|
||||
@Benchmark
|
||||
public void dMin(Blackhole bh) {
|
||||
for (int i=0; i<COUNT; i++)
|
||||
bh.consume(dMinBench());
|
||||
}
|
||||
|
||||
@Benchmark
|
||||
public void fMax(Blackhole bh) {
|
||||
for (int i=0; i<COUNT; i++)
|
||||
bh.consume(fMaxBench());
|
||||
}
|
||||
|
||||
@Benchmark
|
||||
public void fMin(Blackhole bh) {
|
||||
for (int i=0; i<COUNT; i++)
|
||||
bh.consume(fMinBench());
|
||||
}
|
||||
|
||||
private double dMaxBench() {
|
||||
inc();
|
||||
return Math.max(doubles[c1], doubles[c2]);
|
||||
}
|
||||
|
||||
private double dMinBench() {
|
||||
inc();
|
||||
return Math.min(doubles[c1], doubles[c2]);
|
||||
}
|
||||
|
||||
private float fMaxBench() {
|
||||
inc();
|
||||
return Math.max(floats[c1], floats[c2]);
|
||||
}
|
||||
|
||||
private float fMinBench() {
|
||||
inc();
|
||||
return Math.min(floats[c1], floats[c2]);
|
||||
}
|
||||
|
||||
private void inc() {
|
||||
c1 = c1 + s1 < COUNT ? c1 + s1 : (s1 = step());
|
||||
c2 = c2 - s2 > 0 ? c2 - s2 : COUNT - (s2 = step());
|
||||
}
|
||||
|
||||
@Benchmark
|
||||
public float fMinReduce() {
|
||||
float result = Float.MAX_VALUE;
|
||||
|
||||
for (int i=0; i<COUNT; i++)
|
||||
result = Math.min(result, floats[i]);
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
@Benchmark
|
||||
public double dMinReduce() {
|
||||
double result = Double.MAX_VALUE;
|
||||
|
||||
for (int i=0; i<COUNT; i++)
|
||||
result = Math.min(result, doubles[i]);
|
||||
|
||||
return result;
|
||||
}
|
||||
}
|
Loading…
Reference in New Issue
Block a user