8241040: Support for AVX-512 Ternary Logic Instruction
A new pass has been added which folds expression tree involving vector boolean logic operations into a MacroLogic node. Reviewed-by: vlivanov, neliasso
This commit is contained in:
parent
fb56759d08
commit
5532b27d22
@ -6216,6 +6216,30 @@ void Assembler::evpxorq(XMMRegister dst, XMMRegister nds, Address src, int vecto
|
|||||||
emit_operand(dst, src);
|
emit_operand(dst, src);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void Assembler::vpternlogd(XMMRegister dst, int imm8, XMMRegister src2, XMMRegister src3, int vector_len) {
|
||||||
|
assert(VM_Version::supports_evex(), "requires EVEX support");
|
||||||
|
assert(vector_len == Assembler::AVX_512bit || VM_Version::supports_avx512vl(), "requires VL support");
|
||||||
|
InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
|
||||||
|
attributes.set_is_evex_instruction();
|
||||||
|
int encode = vex_prefix_and_encode(dst->encoding(), src2->encoding(), src3->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
|
||||||
|
emit_int8(0x25);
|
||||||
|
emit_int8((unsigned char)(0xC0 | encode));
|
||||||
|
emit_int8(imm8);
|
||||||
|
}
|
||||||
|
|
||||||
|
void Assembler::vpternlogd(XMMRegister dst, int imm8, XMMRegister src2, Address src3, int vector_len) {
|
||||||
|
assert(VM_Version::supports_evex(), "requires EVEX support");
|
||||||
|
assert(vector_len == Assembler::AVX_512bit || VM_Version::supports_avx512vl(), "requires VL support");
|
||||||
|
assert(dst != xnoreg, "sanity");
|
||||||
|
InstructionMark im(this);
|
||||||
|
InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
|
||||||
|
attributes.set_is_evex_instruction();
|
||||||
|
attributes.set_address_attributes(/* tuple_type */ EVEX_FV, /* input_size_in_bits */ EVEX_64bit);
|
||||||
|
vex_prefix(src3, src2->encoding(), dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
|
||||||
|
emit_int8(0x25);
|
||||||
|
emit_operand(dst, src3);
|
||||||
|
emit_int8(imm8);
|
||||||
|
}
|
||||||
|
|
||||||
// vinserti forms
|
// vinserti forms
|
||||||
|
|
||||||
|
@ -2198,6 +2198,9 @@ private:
|
|||||||
void evpxorq(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
|
void evpxorq(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
|
||||||
void evpxorq(XMMRegister dst, XMMRegister nds, Address src, int vector_len);
|
void evpxorq(XMMRegister dst, XMMRegister nds, Address src, int vector_len);
|
||||||
|
|
||||||
|
// Ternary logic instruction.
|
||||||
|
void vpternlogd(XMMRegister dst, int imm8, XMMRegister src2, XMMRegister src3, int vector_len);
|
||||||
|
void vpternlogd(XMMRegister dst, int imm8, XMMRegister src2, Address src3, int vector_len);
|
||||||
|
|
||||||
// vinserti forms
|
// vinserti forms
|
||||||
void vinserti128(XMMRegister dst, XMMRegister nds, XMMRegister src, uint8_t imm8);
|
void vinserti128(XMMRegister dst, XMMRegister nds, XMMRegister src, uint8_t imm8);
|
||||||
|
@ -3757,6 +3757,14 @@ void MacroAssembler::_verify_oop(Register reg, const char* s, const char* file,
|
|||||||
BLOCK_COMMENT("} verify_oop");
|
BLOCK_COMMENT("} verify_oop");
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void MacroAssembler::vallones(XMMRegister dst, int vector_len) {
|
||||||
|
if (UseAVX > 2 && (vector_len == Assembler::AVX_512bit || VM_Version::supports_avx512vl())) {
|
||||||
|
vpternlogd(dst, 0xFF, dst, dst, vector_len);
|
||||||
|
} else {
|
||||||
|
assert(UseAVX > 0, "");
|
||||||
|
vpcmpeqb(dst, dst, dst, vector_len);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
RegisterOrConstant MacroAssembler::delayed_value_impl(intptr_t* delayed_value_addr,
|
RegisterOrConstant MacroAssembler::delayed_value_impl(intptr_t* delayed_value_addr,
|
||||||
Register tmp,
|
Register tmp,
|
||||||
|
@ -1721,6 +1721,8 @@ public:
|
|||||||
void cache_wb(Address line);
|
void cache_wb(Address line);
|
||||||
void cache_wbsync(bool is_pre);
|
void cache_wbsync(bool is_pre);
|
||||||
#endif // _LP64
|
#endif // _LP64
|
||||||
|
|
||||||
|
void vallones(XMMRegister dst, int vector_len);
|
||||||
};
|
};
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
@ -1363,6 +1363,11 @@ const bool Matcher::match_rule_supported(int opcode) {
|
|||||||
return false; // 128bit vroundpd is not available
|
return false; // 128bit vroundpd is not available
|
||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
|
case Op_MacroLogicV:
|
||||||
|
if (UseAVX < 3 || !UseVectorMacroLogic) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
break;
|
||||||
#ifndef _LP64
|
#ifndef _LP64
|
||||||
case Op_AddReductionVF:
|
case Op_AddReductionVF:
|
||||||
case Op_AddReductionVD:
|
case Op_AddReductionVD:
|
||||||
@ -1408,6 +1413,7 @@ const bool Matcher::match_rule_supported_vector(int opcode, int vlen, BasicType
|
|||||||
// * implementation limitations
|
// * implementation limitations
|
||||||
// * some 512bit vector operations on FLOAT and DOUBLE types require AVX512DQ
|
// * some 512bit vector operations on FLOAT and DOUBLE types require AVX512DQ
|
||||||
// * 128bit vroundpd instruction is present only in AVX1
|
// * 128bit vroundpd instruction is present only in AVX1
|
||||||
|
int size_in_bits = vlen * type2aelembytes(bt) * BitsPerByte;
|
||||||
switch (opcode) {
|
switch (opcode) {
|
||||||
case Op_AbsVF:
|
case Op_AbsVF:
|
||||||
case Op_NegVF:
|
case Op_NegVF:
|
||||||
@ -1426,6 +1432,12 @@ const bool Matcher::match_rule_supported_vector(int opcode, int vlen, BasicType
|
|||||||
return false; // implementation limitation (only vcmov8F_reg is present)
|
return false; // implementation limitation (only vcmov8F_reg is present)
|
||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
|
case Op_MacroLogicV:
|
||||||
|
if (!VM_Version::supports_evex() ||
|
||||||
|
((size_in_bits != 512) && !VM_Version::supports_avx512vl())) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
break;
|
||||||
case Op_CMoveVD:
|
case Op_CMoveVD:
|
||||||
if (vlen != 4) {
|
if (vlen != 4) {
|
||||||
return false; // implementation limitation (only vcmov4D_reg is present)
|
return false; // implementation limitation (only vcmov4D_reg is present)
|
||||||
@ -3356,6 +3368,20 @@ instruct ReplI_zero(vec dst, immI0 zero) %{
|
|||||||
ins_pipe( fpu_reg_reg );
|
ins_pipe( fpu_reg_reg );
|
||||||
%}
|
%}
|
||||||
|
|
||||||
|
instruct ReplI_M1(vec dst, immI_M1 con) %{
|
||||||
|
predicate(UseAVX > 0);
|
||||||
|
match(Set dst (ReplicateB con));
|
||||||
|
match(Set dst (ReplicateS con));
|
||||||
|
match(Set dst (ReplicateI con));
|
||||||
|
effect(TEMP dst);
|
||||||
|
format %{ "vallones $dst" %}
|
||||||
|
ins_encode %{
|
||||||
|
int vector_len = vector_length_encoding(this);
|
||||||
|
__ vallones($dst$$XMMRegister, vector_len);
|
||||||
|
%}
|
||||||
|
ins_pipe( pipe_slow );
|
||||||
|
%}
|
||||||
|
|
||||||
// ====================ReplicateL=======================================
|
// ====================ReplicateL=======================================
|
||||||
|
|
||||||
#ifdef _LP64
|
#ifdef _LP64
|
||||||
@ -3488,6 +3514,18 @@ instruct ReplL_zero(vec dst, immL0 zero) %{
|
|||||||
ins_pipe( fpu_reg_reg );
|
ins_pipe( fpu_reg_reg );
|
||||||
%}
|
%}
|
||||||
|
|
||||||
|
instruct ReplL_M1(vec dst, immL_M1 con) %{
|
||||||
|
predicate(UseAVX > 0);
|
||||||
|
match(Set dst (ReplicateL con));
|
||||||
|
effect(TEMP dst);
|
||||||
|
format %{ "vallones $dst" %}
|
||||||
|
ins_encode %{
|
||||||
|
int vector_len = vector_length_encoding(this);
|
||||||
|
__ vallones($dst$$XMMRegister, vector_len);
|
||||||
|
%}
|
||||||
|
ins_pipe( pipe_slow );
|
||||||
|
%}
|
||||||
|
|
||||||
// ====================ReplicateF=======================================
|
// ====================ReplicateF=======================================
|
||||||
|
|
||||||
instruct ReplF_reg(vec dst, vlRegF src) %{
|
instruct ReplF_reg(vec dst, vlRegF src) %{
|
||||||
@ -5154,3 +5192,27 @@ instruct vpopcountI(vec dst, vec src) %{
|
|||||||
%}
|
%}
|
||||||
ins_pipe( pipe_slow );
|
ins_pipe( pipe_slow );
|
||||||
%}
|
%}
|
||||||
|
|
||||||
|
// --------------------------------- Bitwise Ternary Logic ----------------------------------
|
||||||
|
|
||||||
|
instruct vpternlogdB(vec dst, vec src2, vec src3, immU8 func) %{
|
||||||
|
match(Set dst (MacroLogicV (Binary dst src2) (Binary src3 func)));
|
||||||
|
effect(TEMP dst);
|
||||||
|
format %{ "vpternlogd $dst,$src2,$src3,$func\t! vector ternary logic" %}
|
||||||
|
ins_encode %{
|
||||||
|
int vector_len = vector_length_encoding(this);
|
||||||
|
__ vpternlogd($dst$$XMMRegister, $func$$constant, $src2$$XMMRegister, $src3$$XMMRegister, vector_len);
|
||||||
|
%}
|
||||||
|
ins_pipe( pipe_slow );
|
||||||
|
%}
|
||||||
|
|
||||||
|
instruct vpternlogdB_mem(vec dst, vec src2, memory src3, immU8 func) %{
|
||||||
|
match(Set dst (MacroLogicV (Binary dst src2) (Binary (LoadVector src3) func)));
|
||||||
|
effect(TEMP dst);
|
||||||
|
format %{ "vpternlogd $dst,$src2,$src3,$func\t! vector ternary logic" %}
|
||||||
|
ins_encode %{
|
||||||
|
int vector_len = vector_length_encoding(this);
|
||||||
|
__ vpternlogd($dst$$XMMRegister, $func$$constant, $src2$$XMMRegister, $src3$$Address, vector_len);
|
||||||
|
%}
|
||||||
|
ins_pipe( pipe_slow );
|
||||||
|
%}
|
||||||
|
@ -4168,7 +4168,7 @@ bool MatchRule::is_vector() const {
|
|||||||
"MulReductionVF", "MulReductionVD",
|
"MulReductionVF", "MulReductionVD",
|
||||||
"MaxReductionV", "MinReductionV",
|
"MaxReductionV", "MinReductionV",
|
||||||
"AndReductionV", "OrReductionV", "XorReductionV",
|
"AndReductionV", "OrReductionV", "XorReductionV",
|
||||||
"MulAddVS2VI",
|
"MulAddVS2VI", "MacroLogicV",
|
||||||
"LShiftCntV","RShiftCntV",
|
"LShiftCntV","RShiftCntV",
|
||||||
"LShiftVB","LShiftVS","LShiftVI","LShiftVL",
|
"LShiftVB","LShiftVS","LShiftVI","LShiftVL",
|
||||||
"RShiftVB","RShiftVS","RShiftVI","RShiftVL",
|
"RShiftVB","RShiftVS","RShiftVI","RShiftVL",
|
||||||
|
@ -186,6 +186,9 @@
|
|||||||
notproduct(bool, TraceSuperWordLoopUnrollAnalysis, false, \
|
notproduct(bool, TraceSuperWordLoopUnrollAnalysis, false, \
|
||||||
"Trace what Superword Level Parallelism analysis applies") \
|
"Trace what Superword Level Parallelism analysis applies") \
|
||||||
\
|
\
|
||||||
|
diagnostic(bool, UseVectorMacroLogic, true, \
|
||||||
|
"Use ternary macro logic instructions") \
|
||||||
|
\
|
||||||
product(intx, LoopUnrollMin, 4, \
|
product(intx, LoopUnrollMin, 4, \
|
||||||
"Minimum number of unroll loop bodies before checking progress" \
|
"Minimum number of unroll loop bodies before checking progress" \
|
||||||
"of rounds of unroll,optimize,..") \
|
"of rounds of unroll,optimize,..") \
|
||||||
|
@ -312,6 +312,7 @@ macro(SubI)
|
|||||||
macro(SubL)
|
macro(SubL)
|
||||||
macro(TailCall)
|
macro(TailCall)
|
||||||
macro(TailJump)
|
macro(TailJump)
|
||||||
|
macro(MacroLogicV)
|
||||||
macro(ThreadLocal)
|
macro(ThreadLocal)
|
||||||
macro(Unlock)
|
macro(Unlock)
|
||||||
macro(URShiftI)
|
macro(URShiftI)
|
||||||
|
@ -76,6 +76,7 @@
|
|||||||
#include "utilities/align.hpp"
|
#include "utilities/align.hpp"
|
||||||
#include "utilities/copy.hpp"
|
#include "utilities/copy.hpp"
|
||||||
#include "utilities/macros.hpp"
|
#include "utilities/macros.hpp"
|
||||||
|
#include "utilities/resourceHash.hpp"
|
||||||
|
|
||||||
|
|
||||||
// -------------------- Compile::mach_constant_base_node -----------------------
|
// -------------------- Compile::mach_constant_base_node -----------------------
|
||||||
@ -2228,6 +2229,11 @@ void Compile::Optimize() {
|
|||||||
igvn.optimize();
|
igvn.optimize();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (C->max_vector_size() > 0) {
|
||||||
|
C->optimize_logic_cones(igvn);
|
||||||
|
igvn.optimize();
|
||||||
|
}
|
||||||
|
|
||||||
DEBUG_ONLY( _modified_nodes = NULL; )
|
DEBUG_ONLY( _modified_nodes = NULL; )
|
||||||
} // (End scope of igvn; run destructor if necessary for asserts.)
|
} // (End scope of igvn; run destructor if necessary for asserts.)
|
||||||
|
|
||||||
@ -2244,6 +2250,317 @@ void Compile::Optimize() {
|
|||||||
print_method(PHASE_OPTIMIZE_FINISHED, 2);
|
print_method(PHASE_OPTIMIZE_FINISHED, 2);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
//---------------------------- Bitwise operation packing optimization ---------------------------
|
||||||
|
|
||||||
|
static bool is_vector_unary_bitwise_op(Node* n) {
|
||||||
|
return n->Opcode() == Op_XorV &&
|
||||||
|
VectorNode::is_vector_bitwise_not_pattern(n);
|
||||||
|
}
|
||||||
|
|
||||||
|
static bool is_vector_binary_bitwise_op(Node* n) {
|
||||||
|
switch (n->Opcode()) {
|
||||||
|
case Op_AndV:
|
||||||
|
case Op_OrV:
|
||||||
|
return true;
|
||||||
|
|
||||||
|
case Op_XorV:
|
||||||
|
return !is_vector_unary_bitwise_op(n);
|
||||||
|
|
||||||
|
default:
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
static bool is_vector_ternary_bitwise_op(Node* n) {
|
||||||
|
return n->Opcode() == Op_MacroLogicV;
|
||||||
|
}
|
||||||
|
|
||||||
|
static bool is_vector_bitwise_op(Node* n) {
|
||||||
|
return is_vector_unary_bitwise_op(n) ||
|
||||||
|
is_vector_binary_bitwise_op(n) ||
|
||||||
|
is_vector_ternary_bitwise_op(n);
|
||||||
|
}
|
||||||
|
|
||||||
|
static bool is_vector_bitwise_cone_root(Node* n) {
|
||||||
|
if (!is_vector_bitwise_op(n)) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
for (DUIterator_Fast imax, i = n->fast_outs(imax); i < imax; i++) {
|
||||||
|
if (is_vector_bitwise_op(n->fast_out(i))) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
static uint collect_unique_inputs(Node* n, Unique_Node_List& partition, Unique_Node_List& inputs) {
|
||||||
|
uint cnt = 0;
|
||||||
|
if (is_vector_bitwise_op(n)) {
|
||||||
|
if (VectorNode::is_vector_bitwise_not_pattern(n)) {
|
||||||
|
for (uint i = 1; i < n->req(); i++) {
|
||||||
|
Node* in = n->in(i);
|
||||||
|
bool skip = VectorNode::is_all_ones_vector(in);
|
||||||
|
if (!skip && !inputs.member(in)) {
|
||||||
|
inputs.push(in);
|
||||||
|
cnt++;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
assert(cnt <= 1, "not unary");
|
||||||
|
} else {
|
||||||
|
uint last_req = n->req();
|
||||||
|
if (is_vector_ternary_bitwise_op(n)) {
|
||||||
|
last_req = n->req() - 1; // skip last input
|
||||||
|
}
|
||||||
|
for (uint i = 1; i < last_req; i++) {
|
||||||
|
Node* def = n->in(i);
|
||||||
|
if (!inputs.member(def)) {
|
||||||
|
inputs.push(def);
|
||||||
|
cnt++;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
partition.push(n);
|
||||||
|
} else { // not a bitwise operations
|
||||||
|
if (!inputs.member(n)) {
|
||||||
|
inputs.push(n);
|
||||||
|
cnt++;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return cnt;
|
||||||
|
}
|
||||||
|
|
||||||
|
void Compile::collect_logic_cone_roots(Unique_Node_List& list) {
|
||||||
|
Unique_Node_List useful_nodes;
|
||||||
|
C->identify_useful_nodes(useful_nodes);
|
||||||
|
|
||||||
|
for (uint i = 0; i < useful_nodes.size(); i++) {
|
||||||
|
Node* n = useful_nodes.at(i);
|
||||||
|
if (is_vector_bitwise_cone_root(n)) {
|
||||||
|
list.push(n);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
Node* Compile::xform_to_MacroLogicV(PhaseIterGVN& igvn,
|
||||||
|
const TypeVect* vt,
|
||||||
|
Unique_Node_List& partition,
|
||||||
|
Unique_Node_List& inputs) {
|
||||||
|
assert(partition.size() == 2 || partition.size() == 3, "not supported");
|
||||||
|
assert(inputs.size() == 2 || inputs.size() == 3, "not supported");
|
||||||
|
assert(Matcher::match_rule_supported_vector(Op_MacroLogicV, vt->length(), vt->element_basic_type()), "not supported");
|
||||||
|
|
||||||
|
Node* in1 = inputs.at(0);
|
||||||
|
Node* in2 = inputs.at(1);
|
||||||
|
Node* in3 = (inputs.size() == 3 ? inputs.at(2) : in2);
|
||||||
|
|
||||||
|
uint func = compute_truth_table(partition, inputs);
|
||||||
|
return igvn.transform(MacroLogicVNode::make(igvn, in3, in2, in1, func, vt));
|
||||||
|
}
|
||||||
|
|
||||||
|
static uint extract_bit(uint func, uint pos) {
|
||||||
|
return (func & (1 << pos)) >> pos;
|
||||||
|
}
|
||||||
|
|
||||||
|
//
|
||||||
|
// A macro logic node represents a truth table. It has 4 inputs,
|
||||||
|
// First three inputs corresponds to 3 columns of a truth table
|
||||||
|
// and fourth input captures the logic function.
|
||||||
|
//
|
||||||
|
// eg. fn = (in1 AND in2) OR in3;
|
||||||
|
//
|
||||||
|
// MacroNode(in1,in2,in3,fn)
|
||||||
|
//
|
||||||
|
// -----------------
|
||||||
|
// in1 in2 in3 fn
|
||||||
|
// -----------------
|
||||||
|
// 0 0 0 0
|
||||||
|
// 0 0 1 1
|
||||||
|
// 0 1 0 0
|
||||||
|
// 0 1 1 1
|
||||||
|
// 1 0 0 0
|
||||||
|
// 1 0 1 1
|
||||||
|
// 1 1 0 1
|
||||||
|
// 1 1 1 1
|
||||||
|
//
|
||||||
|
|
||||||
|
uint Compile::eval_macro_logic_op(uint func, uint in1 , uint in2, uint in3) {
|
||||||
|
int res = 0;
|
||||||
|
for (int i = 0; i < 8; i++) {
|
||||||
|
int bit1 = extract_bit(in1, i);
|
||||||
|
int bit2 = extract_bit(in2, i);
|
||||||
|
int bit3 = extract_bit(in3, i);
|
||||||
|
|
||||||
|
int func_bit_pos = (bit1 << 2 | bit2 << 1 | bit3);
|
||||||
|
int func_bit = extract_bit(func, func_bit_pos);
|
||||||
|
|
||||||
|
res |= func_bit << i;
|
||||||
|
}
|
||||||
|
return res;
|
||||||
|
}
|
||||||
|
|
||||||
|
static uint eval_operand(Node* n, ResourceHashtable<Node*,uint>& eval_map) {
|
||||||
|
assert(n != NULL, "");
|
||||||
|
assert(eval_map.contains(n), "absent");
|
||||||
|
return *(eval_map.get(n));
|
||||||
|
}
|
||||||
|
|
||||||
|
static void eval_operands(Node* n,
|
||||||
|
uint& func1, uint& func2, uint& func3,
|
||||||
|
ResourceHashtable<Node*,uint>& eval_map) {
|
||||||
|
assert(is_vector_bitwise_op(n), "");
|
||||||
|
func1 = eval_operand(n->in(1), eval_map);
|
||||||
|
|
||||||
|
if (is_vector_binary_bitwise_op(n)) {
|
||||||
|
func2 = eval_operand(n->in(2), eval_map);
|
||||||
|
} else if (is_vector_ternary_bitwise_op(n)) {
|
||||||
|
func2 = eval_operand(n->in(2), eval_map);
|
||||||
|
func3 = eval_operand(n->in(3), eval_map);
|
||||||
|
} else {
|
||||||
|
assert(is_vector_unary_bitwise_op(n), "not unary");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
uint Compile::compute_truth_table(Unique_Node_List& partition, Unique_Node_List& inputs) {
|
||||||
|
assert(inputs.size() <= 3, "sanity");
|
||||||
|
ResourceMark rm;
|
||||||
|
uint res = 0;
|
||||||
|
ResourceHashtable<Node*,uint> eval_map;
|
||||||
|
|
||||||
|
// Populate precomputed functions for inputs.
|
||||||
|
// Each input corresponds to one column of 3 input truth-table.
|
||||||
|
uint input_funcs[] = { 0xAA, // (_, _, a) -> a
|
||||||
|
0xCC, // (_, b, _) -> b
|
||||||
|
0xF0 }; // (c, _, _) -> c
|
||||||
|
for (uint i = 0; i < inputs.size(); i++) {
|
||||||
|
eval_map.put(inputs.at(i), input_funcs[i]);
|
||||||
|
}
|
||||||
|
|
||||||
|
for (uint i = 0; i < partition.size(); i++) {
|
||||||
|
Node* n = partition.at(i);
|
||||||
|
|
||||||
|
uint func1 = 0, func2 = 0, func3 = 0;
|
||||||
|
eval_operands(n, func1, func2, func3, eval_map);
|
||||||
|
|
||||||
|
switch (n->Opcode()) {
|
||||||
|
case Op_OrV:
|
||||||
|
assert(func3 == 0, "not binary");
|
||||||
|
res = func1 | func2;
|
||||||
|
break;
|
||||||
|
case Op_AndV:
|
||||||
|
assert(func3 == 0, "not binary");
|
||||||
|
res = func1 & func2;
|
||||||
|
break;
|
||||||
|
case Op_XorV:
|
||||||
|
if (VectorNode::is_vector_bitwise_not_pattern(n)) {
|
||||||
|
assert(func2 == 0 && func3 == 0, "not unary");
|
||||||
|
res = (~func1) & 0xFF;
|
||||||
|
} else {
|
||||||
|
assert(func3 == 0, "not binary");
|
||||||
|
res = func1 ^ func2;
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
case Op_MacroLogicV:
|
||||||
|
// Ordering of inputs may change during evaluation of sub-tree
|
||||||
|
// containing MacroLogic node as a child node, thus a re-evaluation
|
||||||
|
// makes sure that function is evaluated in context of current
|
||||||
|
// inputs.
|
||||||
|
res = eval_macro_logic_op(n->in(4)->get_int(), func1, func2, func3);
|
||||||
|
break;
|
||||||
|
|
||||||
|
default: assert(false, "not supported: %s", n->Name());
|
||||||
|
}
|
||||||
|
assert(res <= 0xFF, "invalid");
|
||||||
|
eval_map.put(n, res);
|
||||||
|
}
|
||||||
|
return res;
|
||||||
|
}
|
||||||
|
|
||||||
|
bool Compile::compute_logic_cone(Node* n, Unique_Node_List& partition, Unique_Node_List& inputs) {
|
||||||
|
assert(partition.size() == 0, "not empty");
|
||||||
|
assert(inputs.size() == 0, "not empty");
|
||||||
|
assert(!is_vector_ternary_bitwise_op(n), "not supported");
|
||||||
|
|
||||||
|
bool is_unary_op = is_vector_unary_bitwise_op(n);
|
||||||
|
if (is_unary_op) {
|
||||||
|
assert(collect_unique_inputs(n, partition, inputs) == 1, "not unary");
|
||||||
|
return false; // too few inputs
|
||||||
|
}
|
||||||
|
|
||||||
|
assert(is_vector_binary_bitwise_op(n), "not binary");
|
||||||
|
Node* in1 = n->in(1);
|
||||||
|
Node* in2 = n->in(2);
|
||||||
|
|
||||||
|
int in1_unique_inputs_cnt = collect_unique_inputs(in1, partition, inputs);
|
||||||
|
int in2_unique_inputs_cnt = collect_unique_inputs(in2, partition, inputs);
|
||||||
|
partition.push(n);
|
||||||
|
|
||||||
|
// Too many inputs?
|
||||||
|
if (inputs.size() > 3) {
|
||||||
|
partition.clear();
|
||||||
|
inputs.clear();
|
||||||
|
{ // Recompute in2 inputs
|
||||||
|
Unique_Node_List not_used;
|
||||||
|
in2_unique_inputs_cnt = collect_unique_inputs(in2, not_used, not_used);
|
||||||
|
}
|
||||||
|
// Pick the node with minimum number of inputs.
|
||||||
|
if (in1_unique_inputs_cnt >= 3 && in2_unique_inputs_cnt >= 3) {
|
||||||
|
return false; // still too many inputs
|
||||||
|
}
|
||||||
|
// Recompute partition & inputs.
|
||||||
|
Node* child = (in1_unique_inputs_cnt < in2_unique_inputs_cnt ? in1 : in2);
|
||||||
|
collect_unique_inputs(child, partition, inputs);
|
||||||
|
|
||||||
|
Node* other_input = (in1_unique_inputs_cnt < in2_unique_inputs_cnt ? in2 : in1);
|
||||||
|
inputs.push(other_input);
|
||||||
|
|
||||||
|
partition.push(n);
|
||||||
|
}
|
||||||
|
|
||||||
|
return (partition.size() == 2 || partition.size() == 3) &&
|
||||||
|
(inputs.size() == 2 || inputs.size() == 3);
|
||||||
|
}
|
||||||
|
|
||||||
|
void Compile::process_logic_cone_root(PhaseIterGVN &igvn, Node *n, VectorSet &visited) {
|
||||||
|
assert(is_vector_bitwise_op(n), "not a root");
|
||||||
|
|
||||||
|
visited.set(n->_idx);
|
||||||
|
|
||||||
|
// 1) Do a DFS walk over the logic cone.
|
||||||
|
for (uint i = 1; i < n->req(); i++) {
|
||||||
|
Node* in = n->in(i);
|
||||||
|
if (!visited.test(in->_idx) && is_vector_bitwise_op(in)) {
|
||||||
|
process_logic_cone_root(igvn, in, visited);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// 2) Bottom up traversal: Merge node[s] with
|
||||||
|
// the parent to form macro logic node.
|
||||||
|
Unique_Node_List partition;
|
||||||
|
Unique_Node_List inputs;
|
||||||
|
if (compute_logic_cone(n, partition, inputs)) {
|
||||||
|
const TypeVect* vt = n->bottom_type()->is_vect();
|
||||||
|
Node* macro_logic = xform_to_MacroLogicV(igvn, vt, partition, inputs);
|
||||||
|
igvn.replace_node(n, macro_logic);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void Compile::optimize_logic_cones(PhaseIterGVN &igvn) {
|
||||||
|
ResourceMark rm;
|
||||||
|
if (Matcher::match_rule_supported(Op_MacroLogicV)) {
|
||||||
|
Unique_Node_List list;
|
||||||
|
collect_logic_cone_roots(list);
|
||||||
|
|
||||||
|
while (list.size() > 0) {
|
||||||
|
Node* n = list.pop();
|
||||||
|
const TypeVect* vt = n->bottom_type()->is_vect();
|
||||||
|
bool supported = Matcher::match_rule_supported_vector(Op_MacroLogicV, vt->length(), vt->element_basic_type());
|
||||||
|
if (supported) {
|
||||||
|
VectorSet visited(comp_arena());
|
||||||
|
process_logic_cone_root(igvn, n, visited);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
//------------------------------Code_Gen---------------------------------------
|
//------------------------------Code_Gen---------------------------------------
|
||||||
// Given a graph, generate code for it
|
// Given a graph, generate code for it
|
||||||
@ -4225,6 +4542,7 @@ void CloneMap::dump(node_idx_t key) const {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
// Move Allocate nodes to the start of the list
|
// Move Allocate nodes to the start of the list
|
||||||
void Compile::sort_macro_nodes() {
|
void Compile::sort_macro_nodes() {
|
||||||
int count = macro_count();
|
int count = macro_count();
|
||||||
|
@ -83,6 +83,7 @@ class TypeInt;
|
|||||||
class TypePtr;
|
class TypePtr;
|
||||||
class TypeOopPtr;
|
class TypeOopPtr;
|
||||||
class TypeFunc;
|
class TypeFunc;
|
||||||
|
class TypeVect;
|
||||||
class Unique_Node_List;
|
class Unique_Node_List;
|
||||||
class nmethod;
|
class nmethod;
|
||||||
class WarmCallInfo;
|
class WarmCallInfo;
|
||||||
@ -1106,6 +1107,15 @@ class Compile : public Phase {
|
|||||||
void final_graph_reshaping_walk( Node_Stack &nstack, Node *root, Final_Reshape_Counts &frc );
|
void final_graph_reshaping_walk( Node_Stack &nstack, Node *root, Final_Reshape_Counts &frc );
|
||||||
void eliminate_redundant_card_marks(Node* n);
|
void eliminate_redundant_card_marks(Node* n);
|
||||||
|
|
||||||
|
// Logic cone optimization.
|
||||||
|
void optimize_logic_cones(PhaseIterGVN &igvn);
|
||||||
|
void collect_logic_cone_roots(Unique_Node_List& list);
|
||||||
|
void process_logic_cone_root(PhaseIterGVN &igvn, Node* n, VectorSet& visited);
|
||||||
|
bool compute_logic_cone(Node* n, Unique_Node_List& partition, Unique_Node_List& inputs);
|
||||||
|
uint compute_truth_table(Unique_Node_List& partition, Unique_Node_List& inputs);
|
||||||
|
uint eval_macro_logic_op(uint func, uint op1, uint op2, uint op3);
|
||||||
|
Node* xform_to_MacroLogicV(PhaseIterGVN &igvn, const TypeVect* vt, Unique_Node_List& partitions, Unique_Node_List& inputs);
|
||||||
|
|
||||||
public:
|
public:
|
||||||
|
|
||||||
// Note: Histogram array size is about 1 Kb.
|
// Note: Histogram array size is about 1 Kb.
|
||||||
|
@ -2220,6 +2220,7 @@ bool Matcher::find_shared_visit(MStack& mstack, Node* n, uint opcode, bool& mem_
|
|||||||
case Op_FmaF:
|
case Op_FmaF:
|
||||||
case Op_FmaVD:
|
case Op_FmaVD:
|
||||||
case Op_FmaVF:
|
case Op_FmaVF:
|
||||||
|
case Op_MacroLogicV:
|
||||||
set_shared(n); // Force result into register (it will be anyways)
|
set_shared(n); // Force result into register (it will be anyways)
|
||||||
break;
|
break;
|
||||||
case Op_ConP: { // Convert pointers above the centerline to NUL
|
case Op_ConP: { // Convert pointers above the centerline to NUL
|
||||||
@ -2313,6 +2314,15 @@ void Matcher::find_shared_post_visit(Node* n, uint opcode) {
|
|||||||
n->del_req(3);
|
n->del_req(3);
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
case Op_MacroLogicV: {
|
||||||
|
Node* pair1 = new BinaryNode(n->in(1), n->in(2));
|
||||||
|
Node* pair2 = new BinaryNode(n->in(3), n->in(4));
|
||||||
|
n->set_req(1, pair1);
|
||||||
|
n->set_req(2, pair2);
|
||||||
|
n->del_req(4);
|
||||||
|
n->del_req(3);
|
||||||
|
break;
|
||||||
|
}
|
||||||
case Op_LoopLimit: {
|
case Op_LoopLimit: {
|
||||||
Node* pair1 = new BinaryNode(n->in(1), n->in(2));
|
Node* pair1 = new BinaryNode(n->in(1), n->in(2));
|
||||||
n->set_req(1, pair1);
|
n->set_req(1, pair1);
|
||||||
|
@ -518,6 +518,39 @@ bool VectorNode::is_vector_shift_count(int opc) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static bool is_con_M1(Node* n) {
|
||||||
|
if (n->is_Con()) {
|
||||||
|
const Type* t = n->bottom_type();
|
||||||
|
if (t->isa_int() && t->is_int()->get_con() == -1) {
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
if (t->isa_long() && t->is_long()->get_con() == -1) {
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
bool VectorNode::is_all_ones_vector(Node* n) {
|
||||||
|
switch (n->Opcode()) {
|
||||||
|
case Op_ReplicateB:
|
||||||
|
case Op_ReplicateS:
|
||||||
|
case Op_ReplicateI:
|
||||||
|
case Op_ReplicateL:
|
||||||
|
return is_con_M1(n->in(1));
|
||||||
|
default:
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
bool VectorNode::is_vector_bitwise_not_pattern(Node* n) {
|
||||||
|
if (n->Opcode() == Op_XorV) {
|
||||||
|
return is_all_ones_vector(n->in(1)) ||
|
||||||
|
is_all_ones_vector(n->in(2));
|
||||||
|
}
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
// Return initial Pack node. Additional operands added with add_opd() calls.
|
// Return initial Pack node. Additional operands added with add_opd() calls.
|
||||||
PackNode* PackNode::make(Node* s, uint vlen, BasicType bt) {
|
PackNode* PackNode::make(Node* s, uint vlen, BasicType bt) {
|
||||||
const TypeVect* vt = TypeVect::make(bt, vlen);
|
const TypeVect* vt = TypeVect::make(bt, vlen);
|
||||||
@ -740,3 +773,13 @@ bool ReductionNode::implemented(int opc, uint vlen, BasicType bt) {
|
|||||||
}
|
}
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
MacroLogicVNode* MacroLogicVNode::make(PhaseGVN& gvn, Node* v1, Node* v2, Node* v3,
|
||||||
|
uint truth_table, const TypeVect* vt) {
|
||||||
|
assert(truth_table <= 0xFF, "invalid");
|
||||||
|
assert(v1->bottom_type() == vt, "mismatch");
|
||||||
|
assert(v2->bottom_type() == vt, "mismatch");
|
||||||
|
assert(v3->bottom_type() == vt, "mismatch");
|
||||||
|
Node* fn = gvn.intcon(truth_table);
|
||||||
|
return new MacroLogicVNode(v1, v2, v3, fn, vt);
|
||||||
|
}
|
||||||
|
@ -51,6 +51,14 @@ class VectorNode : public TypeNode {
|
|||||||
init_req(3, n3);
|
init_req(3, n3);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
VectorNode(Node *n0, Node* n1, Node* n2, Node* n3, const TypeVect* vt) : TypeNode(vt, 5) {
|
||||||
|
init_class_id(Class_Vector);
|
||||||
|
init_req(1, n0);
|
||||||
|
init_req(2, n1);
|
||||||
|
init_req(3, n2);
|
||||||
|
init_req(4, n3);
|
||||||
|
}
|
||||||
|
|
||||||
const TypeVect* vect_type() const { return type()->is_vect(); }
|
const TypeVect* vect_type() const { return type()->is_vect(); }
|
||||||
uint length() const { return vect_type()->length(); } // Vector length
|
uint length() const { return vect_type()->length(); } // Vector length
|
||||||
uint length_in_bytes() const { return vect_type()->length_in_bytes(); }
|
uint length_in_bytes() const { return vect_type()->length_in_bytes(); }
|
||||||
@ -72,6 +80,9 @@ class VectorNode : public TypeNode {
|
|||||||
static bool is_muladds2i(Node* n);
|
static bool is_muladds2i(Node* n);
|
||||||
static bool is_roundopD(Node * n);
|
static bool is_roundopD(Node * n);
|
||||||
static bool is_invariant_vector(Node* n);
|
static bool is_invariant_vector(Node* n);
|
||||||
|
static bool is_all_ones_vector(Node* n);
|
||||||
|
static bool is_vector_bitwise_not_pattern(Node* n);
|
||||||
|
|
||||||
// [Start, end) half-open range defining which operands are vectors
|
// [Start, end) half-open range defining which operands are vectors
|
||||||
static void vector_operands(Node* n, uint* start, uint* end);
|
static void vector_operands(Node* n, uint* start, uint* end);
|
||||||
|
|
||||||
@ -982,4 +993,17 @@ public:
|
|||||||
virtual const Type *Value(PhaseGVN *phase) const { return TypeInt::INT; }
|
virtual const Type *Value(PhaseGVN *phase) const { return TypeInt::INT; }
|
||||||
};
|
};
|
||||||
|
|
||||||
|
//------------------------------MacroLogicVNode-------------------------------
|
||||||
|
// Vector logical operations packing node.
|
||||||
|
class MacroLogicVNode : public VectorNode {
|
||||||
|
private:
|
||||||
|
MacroLogicVNode(Node* in1, Node* in2, Node* in3, Node* fn, const TypeVect* vt)
|
||||||
|
: VectorNode(in1, in2, in3, fn, vt) {}
|
||||||
|
|
||||||
|
public:
|
||||||
|
virtual int Opcode() const;
|
||||||
|
|
||||||
|
static MacroLogicVNode* make(PhaseGVN& igvn, Node* in1, Node* in2, Node* in3, uint truth_table, const TypeVect* vt);
|
||||||
|
};
|
||||||
|
|
||||||
#endif // SHARE_OPTO_VECTORNODE_HPP
|
#endif // SHARE_OPTO_VECTORNODE_HPP
|
||||||
|
@ -0,0 +1,208 @@
|
|||||||
|
/*
|
||||||
|
* Copyright (c) 2020, Oracle and/or its affiliates. All rights reserved.
|
||||||
|
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
||||||
|
*
|
||||||
|
* This code is free software; you can redistribute it and/or modify it
|
||||||
|
* under the terms of the GNU General Public License version 2 only, as
|
||||||
|
* published by the Free Software Foundation.
|
||||||
|
*
|
||||||
|
* This code is distributed in the hope that it will be useful, but WITHOUT
|
||||||
|
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
||||||
|
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
|
||||||
|
* version 2 for more details (a copy is included in the LICENSE file that
|
||||||
|
* accompanied this code).
|
||||||
|
*
|
||||||
|
* You should have received a copy of the GNU General Public License version
|
||||||
|
* 2 along with this work; if not, write to the Free Software Foundation,
|
||||||
|
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||||
|
*
|
||||||
|
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
|
||||||
|
* or visit www.oracle.com if you need additional information or have any
|
||||||
|
* questions.
|
||||||
|
*/
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @test
|
||||||
|
* @bug 8241040
|
||||||
|
* @library /test/lib
|
||||||
|
*
|
||||||
|
* @run main/othervm -XX:+IgnoreUnrecognizedVMOptions
|
||||||
|
* -Xbatch -XX:-TieredCompilation -XX:CICompilerCount=1 -XX:UseAVX=3
|
||||||
|
* -XX:CompileCommand=quiet -XX:CompileCommand=compileonly,*::test*
|
||||||
|
* -XX:+TraceNewVectors compiler.vectorization.TestMacroLogicVector
|
||||||
|
*/
|
||||||
|
package compiler.vectorization;
|
||||||
|
|
||||||
|
import jdk.test.lib.Utils;
|
||||||
|
|
||||||
|
import java.util.Random;
|
||||||
|
import java.util.concurrent.Callable;
|
||||||
|
|
||||||
|
public class TestMacroLogicVector {
|
||||||
|
static int intFunc1(int a, int b, int c) {
|
||||||
|
int v1 = (a & b) ^ (a & c) ^ (b & c);
|
||||||
|
int v2 = (~a & b) | (~b & c) | (~c & a);
|
||||||
|
return v1 & v2;
|
||||||
|
}
|
||||||
|
static void testInt1(int[] r, int[] a, int[] b, int[] c) {
|
||||||
|
for (int i = 0; i < r.length; i++) {
|
||||||
|
r[i] = intFunc1(a[i], b[i], c[i]);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
static void verifyInt1(int[] r, int[] a, int[] b, int[] c) {
|
||||||
|
for (int i = 0; i < r.length; i++) {
|
||||||
|
int expected = intFunc1(a[i], b[i], c[i]);
|
||||||
|
if (r[i] != expected) {
|
||||||
|
throw new AssertionError(String.format("at #%d: r=%d, expected = %d = intFunc1(%d,%d,%d)",
|
||||||
|
i, r[i], expected, a[i], b[i], c[i]));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
static int intFunc2(int a) {
|
||||||
|
int v1 = (a & a) ^ (a & a) ^ (a & a);
|
||||||
|
int v2 = (~a & ~a) | (~a & a) | (a & a);
|
||||||
|
return v1 & v2;
|
||||||
|
}
|
||||||
|
static void testInt2(int[] r, int[] a) {
|
||||||
|
for (int i = 0; i < r.length; i++) {
|
||||||
|
r[i] = intFunc2(a[i]);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
static void verifyInt2(int[] r, int[] a) {
|
||||||
|
for (int i = 0; i < r.length; i++) {
|
||||||
|
int expected = intFunc2(a[i]);
|
||||||
|
if (r[i] != expected) {
|
||||||
|
throw new AssertionError(String.format("at #%d: r=%d, expected = %d = intFunc2(%d)",
|
||||||
|
i, r[i], expected, a[i]));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
static int intFunc3(int a, int b) {
|
||||||
|
return (~a & b) ^ (a | b);
|
||||||
|
}
|
||||||
|
static void testInt3(int[] r, int[] a, int[] b) {
|
||||||
|
for (int i = 0; i < r.length; i++) {
|
||||||
|
r[i] = intFunc3(a[i], b[i]);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
static void verifyInt3(int[] r, int[] a, int[] b) {
|
||||||
|
for (int i = 0; i < r.length; i++) {
|
||||||
|
int expected = intFunc3(a[i], b[i]);
|
||||||
|
if (r[i] != expected) {
|
||||||
|
throw new AssertionError(String.format("at #%d: r=%d, expected = %d = intFunc3(%d,%d)",
|
||||||
|
i, r[i], expected, a[i], b[i]));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
static int intFunc4(int a, int b, int c, int d, int e, int f) {
|
||||||
|
int v1 = (~a | ~b) & ~c;
|
||||||
|
int v2 = ~d & (~e & f);
|
||||||
|
return v1 | v2;
|
||||||
|
}
|
||||||
|
|
||||||
|
static void testInt4(int[] r, int[] a, int[] b, int[] c, int[] d, int[] e, int[] f) {
|
||||||
|
for (int i = 0; i < r.length; i++) {
|
||||||
|
r[i] = intFunc4(a[i], b[i], c[i], d[i], e[i], f[i]);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
static void verifyTest4(int[] r, int[] a, int[] b, int[] c, int[] d, int[] e, int[] f) {
|
||||||
|
for (int i = 0; i < r.length; i++) {
|
||||||
|
long expected = intFunc4(a[i], b[i], c[i], d[i], e[i], f[i]);
|
||||||
|
if (r[i] != expected) {
|
||||||
|
throw new AssertionError(
|
||||||
|
String.format("at #%d: r=%d, expected = %d = intFunc4(%d,%d,%d,%d,%d,%d)",
|
||||||
|
i, r[i], expected, a[i], b[i], c[i], d[i], e[i], f[i]));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
// ===================================================== //
|
||||||
|
|
||||||
|
static long longFunc(long a, long b, long c) {
|
||||||
|
long v1 = (a & b) ^ (a & c) ^ (b & c);
|
||||||
|
long v2 = (~a & b) | (~b & c) | (~c & a);
|
||||||
|
return v1 & v2;
|
||||||
|
}
|
||||||
|
static void testLong(long[] r, long[] a, long[] b, long[] c) {
|
||||||
|
for (int i = 0; i < r.length; i++) {
|
||||||
|
r[i] = longFunc(a[i], b[i], c[i]);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
static void verifyLong(long[] r, long[] a, long[] b, long[] c) {
|
||||||
|
for (int i = 0; i < r.length; i++) {
|
||||||
|
long expected = longFunc(a[i], b[i], c[i]);
|
||||||
|
if (r[i] != expected) {
|
||||||
|
throw new AssertionError(
|
||||||
|
String.format("at #%d: r=%d, expected = %d = longFunc(%d,%d,%d)",
|
||||||
|
i, r[i], expected, a[i], b[i], c[i]));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// ===================================================== //
|
||||||
|
|
||||||
|
private static final Random R = Utils.getRandomInstance();
|
||||||
|
|
||||||
|
static int[] fillIntRandom(Callable<int[]> factory) {
|
||||||
|
try {
|
||||||
|
int[] arr = factory.call();
|
||||||
|
for (int i = 0; i < arr.length; i++) {
|
||||||
|
arr[i] = R.nextInt();
|
||||||
|
}
|
||||||
|
return arr;
|
||||||
|
} catch (Exception e) {
|
||||||
|
throw new InternalError(e);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
static long[] fillLongRandom(Callable<long[]> factory) {
|
||||||
|
try {
|
||||||
|
long[] arr = factory.call();
|
||||||
|
for (int i = 0; i < arr.length; i++) {
|
||||||
|
arr[i] = R.nextLong();
|
||||||
|
}
|
||||||
|
return arr;
|
||||||
|
} catch (Exception e) {
|
||||||
|
throw new InternalError(e);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// ===================================================== //
|
||||||
|
|
||||||
|
static final int SIZE = 128;
|
||||||
|
|
||||||
|
public static void main(String[] args) {
|
||||||
|
|
||||||
|
int[] r = new int[SIZE];
|
||||||
|
int[] a = fillIntRandom(()-> new int[SIZE]);
|
||||||
|
int[] b = fillIntRandom(()-> new int[SIZE]);
|
||||||
|
int[] c = fillIntRandom(()-> new int[SIZE]);
|
||||||
|
int[] d = fillIntRandom(()-> new int[SIZE]);
|
||||||
|
int[] e = fillIntRandom(()-> new int[SIZE]);
|
||||||
|
int[] f = fillIntRandom(()-> new int[SIZE]);
|
||||||
|
|
||||||
|
long[] rl = new long[SIZE];
|
||||||
|
long[] al = fillLongRandom(() -> new long[SIZE]);
|
||||||
|
long[] bl = fillLongRandom(() -> new long[SIZE]);
|
||||||
|
long[] cl = fillLongRandom(() -> new long[SIZE]);
|
||||||
|
|
||||||
|
for (int i = 0; i < 20_000; i++) {
|
||||||
|
testInt1(r, a, b, c);
|
||||||
|
verifyInt1(r, a, b, c);
|
||||||
|
|
||||||
|
testInt2(r, a);
|
||||||
|
verifyInt2(r, a);
|
||||||
|
|
||||||
|
testInt3(r, a, b);
|
||||||
|
verifyInt3(r, a, b);
|
||||||
|
|
||||||
|
testInt4(r, a, b, c, d, e, f);
|
||||||
|
verifyTest4(r, a, b, c, d, e, f);
|
||||||
|
|
||||||
|
testLong(rl, al, bl, cl);
|
||||||
|
verifyLong(rl, al, bl, cl);
|
||||||
|
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
125
test/micro/org/openjdk/bench/vm/compiler/MacroLogicOpt.java
Normal file
125
test/micro/org/openjdk/bench/vm/compiler/MacroLogicOpt.java
Normal file
@ -0,0 +1,125 @@
|
|||||||
|
/*
|
||||||
|
* Copyright (c) 2020, Oracle and/or its affiliates. All rights reserved.
|
||||||
|
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
||||||
|
*
|
||||||
|
* This code is free software; you can redistribute it and/or modify it
|
||||||
|
* under the terms of the GNU General Public License version 2 only, as
|
||||||
|
* published by the Free Software Foundation.
|
||||||
|
*
|
||||||
|
* This code is distributed in the hope that it will be useful, but WITHOUT
|
||||||
|
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
||||||
|
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
|
||||||
|
* version 2 for more details (a copy is included in the LICENSE file that
|
||||||
|
* accompanied this code).
|
||||||
|
*
|
||||||
|
* You should have received a copy of the GNU General Public License version
|
||||||
|
* 2 along with this work; if not, write to the Free Software Foundation,
|
||||||
|
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||||
|
*
|
||||||
|
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
|
||||||
|
* or visit www.oracle.com if you need additional information or have any
|
||||||
|
* questions.
|
||||||
|
*/
|
||||||
|
package org.openjdk.bench.vm.compiler;
|
||||||
|
|
||||||
|
import org.openjdk.jmh.annotations.*;
|
||||||
|
import org.openjdk.jmh.infra.*;
|
||||||
|
|
||||||
|
import java.util.concurrent.TimeUnit;
|
||||||
|
import java.util.Random;
|
||||||
|
|
||||||
|
@BenchmarkMode(Mode.Throughput)
|
||||||
|
@OutputTimeUnit(TimeUnit.SECONDS)
|
||||||
|
@State(Scope.Thread)
|
||||||
|
public class MacroLogicOpt {
|
||||||
|
@Param({"64","128","256","512","1024","2048","4096"}) private int VECLEN;
|
||||||
|
|
||||||
|
private int [] ai = new int[VECLEN];
|
||||||
|
private int [] bi = new int[VECLEN];
|
||||||
|
private int [] ci = new int[VECLEN];
|
||||||
|
private int [] ri = new int[VECLEN];
|
||||||
|
|
||||||
|
private long [] al = new long[VECLEN];
|
||||||
|
private long [] bl = new long[VECLEN];
|
||||||
|
private long [] cl = new long[VECLEN];
|
||||||
|
private long [] dl = new long[VECLEN];
|
||||||
|
private long [] el = new long[VECLEN];
|
||||||
|
private long [] fl = new long[VECLEN];
|
||||||
|
private long [] rl = new long[VECLEN];
|
||||||
|
|
||||||
|
private Random r = new Random();
|
||||||
|
|
||||||
|
@Setup
|
||||||
|
public void init() {
|
||||||
|
ai = new int[VECLEN];
|
||||||
|
bi = new int[VECLEN];
|
||||||
|
ci = new int[VECLEN];
|
||||||
|
ri = new int[VECLEN];
|
||||||
|
|
||||||
|
al = new long[VECLEN];
|
||||||
|
bl = new long[VECLEN];
|
||||||
|
cl = new long[VECLEN];
|
||||||
|
dl = new long[VECLEN];
|
||||||
|
el = new long[VECLEN];
|
||||||
|
fl = new long[VECLEN];
|
||||||
|
rl = new long[VECLEN];
|
||||||
|
for (int i=0; i<VECLEN; i++) {
|
||||||
|
ai[i] = r.nextInt();
|
||||||
|
bi[i] = r.nextInt();
|
||||||
|
ci[i] = r.nextInt();
|
||||||
|
|
||||||
|
al[i] = r.nextLong();
|
||||||
|
bl[i] = r.nextLong();
|
||||||
|
cl[i] = r.nextLong();
|
||||||
|
dl[i] = r.nextLong();
|
||||||
|
el[i] = r.nextLong();
|
||||||
|
fl[i] = r.nextLong();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
@CompilerControl(CompilerControl.Mode.DONT_INLINE)
|
||||||
|
private int run_workload1(int count, int [] a , int [] b, int [] c, int [] r) {
|
||||||
|
for(int i = 0 ; i < r.length ; i++)
|
||||||
|
r[i] = (((a[i] & b[i]) ^ (a[i] & c[i]) ^ (b[i] & c[i])) & ((~a[i] & b[i]) | (~b[i] & c[i]) | ~c[i] & a[i]));
|
||||||
|
return r[count];
|
||||||
|
}
|
||||||
|
|
||||||
|
@Benchmark
|
||||||
|
public void workload1_caller(Blackhole bh) {
|
||||||
|
int r = 0;
|
||||||
|
for(int i = 0 ; i < 10000; i++)
|
||||||
|
r += run_workload1(i&(ri.length-1), ai, bi, ci, ri);
|
||||||
|
bh.consume(r);
|
||||||
|
}
|
||||||
|
|
||||||
|
@CompilerControl(CompilerControl.Mode.DONT_INLINE)
|
||||||
|
private long run_workload2(int count, long [] a , long [] b, long [] c, long [] r) {
|
||||||
|
for(int i = 0 ; i < r.length ; i++)
|
||||||
|
r[i] = (((a[i] & b[i]) ^ (a[i] & c[i]) ^ (b[i] & c[i])) & ((~a[i] & b[i]) | (~b[i] & c[i]) | ~c[i] & a[i]));
|
||||||
|
return r[count];
|
||||||
|
}
|
||||||
|
|
||||||
|
@Benchmark
|
||||||
|
public void workload2_caller(Blackhole bh) {
|
||||||
|
long r = 0;
|
||||||
|
for(int i = 0 ; i < 100000; i++)
|
||||||
|
r += run_workload2(i&(rl.length-1), al, bl, cl, rl);
|
||||||
|
bh.consume(r);
|
||||||
|
}
|
||||||
|
|
||||||
|
@CompilerControl(CompilerControl.Mode.DONT_INLINE)
|
||||||
|
private long run_workload3(int count, long [] a , long [] b, long [] c,
|
||||||
|
long [] d, long [] e, long [] f, long [] r) {
|
||||||
|
for(int i = 0 ; i < r.length ; i++)
|
||||||
|
r[i] = (((~a[i] | ~b[i]) & (~c[i])) | (~d[i] & (~e[i] & f[i])));
|
||||||
|
return r[count];
|
||||||
|
}
|
||||||
|
|
||||||
|
@Benchmark
|
||||||
|
public void workload3_caller(Blackhole bh) {
|
||||||
|
long r = 0;
|
||||||
|
for(int i = 0 ; i < 10000; i++)
|
||||||
|
r += run_workload3(i&(ri.length-1), al, bl, cl, dl, el, fl, rl);
|
||||||
|
bh.consume(r);
|
||||||
|
}
|
||||||
|
}
|
Loading…
x
Reference in New Issue
Block a user