8273322: Enhance macro logic optimization for masked logic operations.
Reviewed-by: kvn, sviswanathan
This commit is contained in:
parent
bc12381105
commit
8703f14808
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) 1997, 2021, Oracle and/or its affiliates. All rights reserved.
|
||||
* Copyright (c) 1997, 2022, Oracle and/or its affiliates. All rights reserved.
|
||||
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
||||
*
|
||||
* This code is free software; you can redistribute it and/or modify it
|
||||
@ -9725,6 +9725,68 @@ void Assembler::evpmaxsq(XMMRegister dst, KRegister mask, XMMRegister nds, Addre
|
||||
emit_operand(dst, src);
|
||||
}
|
||||
|
||||
void Assembler::evpternlogd(XMMRegister dst, int imm8, KRegister mask, XMMRegister src2, XMMRegister src3, bool merge, int vector_len) {
|
||||
assert(VM_Version::supports_evex(), "requires EVEX support");
|
||||
assert(vector_len == Assembler::AVX_512bit || VM_Version::supports_avx512vl(), "requires VL support");
|
||||
InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
|
||||
attributes.set_is_evex_instruction();
|
||||
attributes.set_embedded_opmask_register_specifier(mask);
|
||||
if (merge) {
|
||||
attributes.reset_is_clear_context();
|
||||
}
|
||||
int encode = vex_prefix_and_encode(dst->encoding(), src2->encoding(), src3->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
|
||||
emit_int24(0x25, (unsigned char)(0xC0 | encode), imm8);
|
||||
}
|
||||
|
||||
void Assembler::evpternlogd(XMMRegister dst, int imm8, KRegister mask, XMMRegister src2, Address src3, bool merge, int vector_len) {
|
||||
assert(VM_Version::supports_evex(), "requires EVEX support");
|
||||
assert(vector_len == Assembler::AVX_512bit || VM_Version::supports_avx512vl(), "requires VL support");
|
||||
assert(dst != xnoreg, "sanity");
|
||||
InstructionMark im(this);
|
||||
InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
|
||||
attributes.set_is_evex_instruction();
|
||||
attributes.set_embedded_opmask_register_specifier(mask);
|
||||
attributes.set_address_attributes(/* tuple_type */ EVEX_FV, /* input_size_in_bits */ EVEX_64bit);
|
||||
if (merge) {
|
||||
attributes.reset_is_clear_context();
|
||||
}
|
||||
vex_prefix(src3, src2->encoding(), dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
|
||||
emit_int8(0x25);
|
||||
emit_operand(dst, src3);
|
||||
emit_int8(imm8);
|
||||
}
|
||||
|
||||
void Assembler::evpternlogq(XMMRegister dst, int imm8, KRegister mask, XMMRegister src2, XMMRegister src3, bool merge, int vector_len) {
|
||||
assert(VM_Version::supports_evex(), "requires EVEX support");
|
||||
assert(vector_len == Assembler::AVX_512bit || VM_Version::supports_avx512vl(), "requires VL support");
|
||||
InstructionAttr attributes(vector_len, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
|
||||
attributes.set_is_evex_instruction();
|
||||
attributes.set_embedded_opmask_register_specifier(mask);
|
||||
if (merge) {
|
||||
attributes.reset_is_clear_context();
|
||||
}
|
||||
int encode = vex_prefix_and_encode(dst->encoding(), src2->encoding(), src3->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
|
||||
emit_int24(0x25, (unsigned char)(0xC0 | encode), imm8);
|
||||
}
|
||||
|
||||
void Assembler::evpternlogq(XMMRegister dst, int imm8, KRegister mask, XMMRegister src2, Address src3, bool merge, int vector_len) {
|
||||
assert(VM_Version::supports_evex(), "requires EVEX support");
|
||||
assert(vector_len == Assembler::AVX_512bit || VM_Version::supports_avx512vl(), "requires VL support");
|
||||
assert(dst != xnoreg, "sanity");
|
||||
InstructionMark im(this);
|
||||
InstructionAttr attributes(vector_len, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
|
||||
attributes.set_is_evex_instruction();
|
||||
attributes.set_embedded_opmask_register_specifier(mask);
|
||||
attributes.set_address_attributes(/* tuple_type */ EVEX_FV, /* input_size_in_bits */ EVEX_64bit);
|
||||
if (merge) {
|
||||
attributes.reset_is_clear_context();
|
||||
}
|
||||
vex_prefix(src3, src2->encoding(), dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
|
||||
emit_int8(0x25);
|
||||
emit_operand(dst, src3);
|
||||
emit_int8(imm8);
|
||||
}
|
||||
|
||||
// duplicate 4-byte integer data from src into programmed locations in dest : requires AVX512VL
|
||||
void Assembler::vpbroadcastd(XMMRegister dst, XMMRegister src, int vector_len) {
|
||||
assert(UseAVX >= 2, "");
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) 1997, 2021, Oracle and/or its affiliates. All rights reserved.
|
||||
* Copyright (c) 1997, 2022, Oracle and/or its affiliates. All rights reserved.
|
||||
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
||||
*
|
||||
* This code is free software; you can redistribute it and/or modify it
|
||||
@ -2413,6 +2413,12 @@ private:
|
||||
void evprorvd(XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len);
|
||||
void evprorvq(XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len);
|
||||
|
||||
void evpternlogd(XMMRegister dst, int imm8, KRegister mask, XMMRegister src2, XMMRegister src3, bool merge, int vector_len);
|
||||
void evpternlogd(XMMRegister dst, int imm8, KRegister mask, XMMRegister src2, Address src3, bool merge, int vector_len);
|
||||
void evpternlogq(XMMRegister dst, int imm8, KRegister mask, XMMRegister src2, XMMRegister src3, bool merge, int vector_len);
|
||||
void evpternlogq(XMMRegister dst, int imm8, KRegister mask, XMMRegister src2, Address src3, bool merge, int vector_len);
|
||||
|
||||
|
||||
// Sub packed integers
|
||||
void psubb(XMMRegister dst, XMMRegister src);
|
||||
void psubw(XMMRegister dst, XMMRegister src);
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) 2020, 2021, Oracle and/or its affiliates. All rights reserved.
|
||||
* Copyright (c) 2020, 2022, Oracle and/or its affiliates. All rights reserved.
|
||||
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
||||
*
|
||||
* This code is free software; you can redistribute it and/or modify it
|
||||
@ -4152,6 +4152,26 @@ void C2_MacroAssembler::vector_castF2I_evex(XMMRegister dst, XMMRegister src, XM
|
||||
bind(done);
|
||||
}
|
||||
|
||||
void C2_MacroAssembler::evpternlog(XMMRegister dst, int func, KRegister mask, XMMRegister src2, XMMRegister src3,
|
||||
bool merge, BasicType bt, int vlen_enc) {
|
||||
if (bt == T_INT) {
|
||||
evpternlogd(dst, func, mask, src2, src3, merge, vlen_enc);
|
||||
} else {
|
||||
assert(bt == T_LONG, "");
|
||||
evpternlogq(dst, func, mask, src2, src3, merge, vlen_enc);
|
||||
}
|
||||
}
|
||||
|
||||
void C2_MacroAssembler::evpternlog(XMMRegister dst, int func, KRegister mask, XMMRegister src2, Address src3,
|
||||
bool merge, BasicType bt, int vlen_enc) {
|
||||
if (bt == T_INT) {
|
||||
evpternlogd(dst, func, mask, src2, src3, merge, vlen_enc);
|
||||
} else {
|
||||
assert(bt == T_LONG, "");
|
||||
evpternlogq(dst, func, mask, src2, src3, merge, vlen_enc);
|
||||
}
|
||||
}
|
||||
|
||||
#ifdef _LP64
|
||||
void C2_MacroAssembler::vector_long_to_maskvec(XMMRegister dst, Register src, Register rtmp1,
|
||||
Register rtmp2, XMMRegister xtmp, int mask_len,
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) 2020, 2021, Oracle and/or its affiliates. All rights reserved.
|
||||
* Copyright (c) 2020, 2022, Oracle and/or its affiliates. All rights reserved.
|
||||
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
||||
*
|
||||
* This code is free software; you can redistribute it and/or modify it
|
||||
@ -311,4 +311,11 @@ public:
|
||||
void vector_castD2L_evex(XMMRegister dst, XMMRegister src, XMMRegister xtmp1, XMMRegister xtmp2,
|
||||
KRegister ktmp1, KRegister ktmp2, AddressLiteral double_sign_flip,
|
||||
Register scratch, int vec_enc);
|
||||
|
||||
void evpternlog(XMMRegister dst, int func, KRegister mask, XMMRegister src2, XMMRegister src3,
|
||||
bool merge, BasicType bt, int vlen_enc);
|
||||
|
||||
void evpternlog(XMMRegister dst, int func, KRegister mask, XMMRegister src2, Address src3,
|
||||
bool merge, BasicType bt, int vlen_enc);
|
||||
|
||||
#endif // CPU_X86_C2_MACROASSEMBLER_X86_HPP
|
||||
|
@ -1888,6 +1888,12 @@ const bool Matcher::match_rule_supported_vector_masked(int opcode, int vlen, Bas
|
||||
case Op_FmaVD:
|
||||
return true;
|
||||
|
||||
case Op_MacroLogicV:
|
||||
if(bt != T_INT && bt != T_LONG) {
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
|
||||
// Binary masked operations
|
||||
case Op_AddVB:
|
||||
case Op_AddVS:
|
||||
@ -9560,6 +9566,29 @@ instruct mask_opers_evex(kReg dst, kReg src1, kReg src2, kReg kscratch) %{
|
||||
ins_pipe( pipe_slow );
|
||||
%}
|
||||
|
||||
instruct vternlog_reg_masked(vec dst, vec src2, vec src3, immU8 func, kReg mask) %{
|
||||
match(Set dst (MacroLogicV dst (Binary src2 (Binary src3 (Binary func mask)))));
|
||||
format %{ "vternlog_masked $dst,$src2,$src3,$func,$mask\t! vternlog masked operation" %}
|
||||
ins_encode %{
|
||||
int vlen_enc = vector_length_encoding(this);
|
||||
BasicType bt = Matcher::vector_element_basic_type(this);
|
||||
__ evpternlog($dst$$XMMRegister, $func$$constant, $mask$$KRegister,
|
||||
$src2$$XMMRegister, $src3$$XMMRegister, true, bt, vlen_enc);
|
||||
%}
|
||||
ins_pipe( pipe_slow );
|
||||
%}
|
||||
|
||||
instruct vternlogd_mem_masked(vec dst, vec src2, memory src3, immU8 func, kReg mask) %{
|
||||
match(Set dst (MacroLogicV dst (Binary src2 (Binary src3 (Binary func mask)))));
|
||||
format %{ "vternlog_masked $dst,$src2,$src3,$func,$mask\t! vternlog masked operation" %}
|
||||
ins_encode %{
|
||||
int vlen_enc = vector_length_encoding(this);
|
||||
BasicType bt = Matcher::vector_element_basic_type(this);
|
||||
__ evpternlog($dst$$XMMRegister, $func$$constant, $mask$$KRegister,
|
||||
$src2$$XMMRegister, $src3$$Address, true, bt, vlen_enc);
|
||||
%}
|
||||
ins_pipe( pipe_slow );
|
||||
%}
|
||||
|
||||
instruct castMM(kReg dst)
|
||||
%{
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) 1997, 2021, Oracle and/or its affiliates. All rights reserved.
|
||||
* Copyright (c) 1997, 2022, Oracle and/or its affiliates. All rights reserved.
|
||||
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
||||
*
|
||||
* This code is free software; you can redistribute it and/or modify it
|
||||
@ -2375,7 +2375,6 @@ bool Compile::has_vbox_nodes() {
|
||||
|
||||
static bool is_vector_unary_bitwise_op(Node* n) {
|
||||
return n->Opcode() == Op_XorV &&
|
||||
n->req() == 2 &&
|
||||
VectorNode::is_vector_bitwise_not_pattern(n);
|
||||
}
|
||||
|
||||
@ -2383,7 +2382,7 @@ static bool is_vector_binary_bitwise_op(Node* n) {
|
||||
switch (n->Opcode()) {
|
||||
case Op_AndV:
|
||||
case Op_OrV:
|
||||
return n->req() == 2;
|
||||
return true;
|
||||
|
||||
case Op_XorV:
|
||||
return !is_vector_unary_bitwise_op(n);
|
||||
@ -2415,11 +2414,12 @@ static bool is_vector_bitwise_cone_root(Node* n) {
|
||||
return true;
|
||||
}
|
||||
|
||||
static uint collect_unique_inputs(Node* n, Unique_Node_List& partition, Unique_Node_List& inputs) {
|
||||
static uint collect_unique_inputs(Node* n, Unique_Node_List& inputs) {
|
||||
uint cnt = 0;
|
||||
if (is_vector_bitwise_op(n)) {
|
||||
uint inp_cnt = n->is_predicated_vector() ? n->req()-1 : n->req();
|
||||
if (VectorNode::is_vector_bitwise_not_pattern(n)) {
|
||||
for (uint i = 1; i < n->req(); i++) {
|
||||
for (uint i = 1; i < inp_cnt; i++) {
|
||||
Node* in = n->in(i);
|
||||
bool skip = VectorNode::is_all_ones_vector(in);
|
||||
if (!skip && !inputs.member(in)) {
|
||||
@ -2429,9 +2429,9 @@ static uint collect_unique_inputs(Node* n, Unique_Node_List& partition, Unique_N
|
||||
}
|
||||
assert(cnt <= 1, "not unary");
|
||||
} else {
|
||||
uint last_req = n->req();
|
||||
uint last_req = inp_cnt;
|
||||
if (is_vector_ternary_bitwise_op(n)) {
|
||||
last_req = n->req() - 1; // skip last input
|
||||
last_req = inp_cnt - 1; // skip last input
|
||||
}
|
||||
for (uint i = 1; i < last_req; i++) {
|
||||
Node* def = n->in(i);
|
||||
@ -2441,7 +2441,6 @@ static uint collect_unique_inputs(Node* n, Unique_Node_List& partition, Unique_N
|
||||
}
|
||||
}
|
||||
}
|
||||
partition.push(n);
|
||||
} else { // not a bitwise operations
|
||||
if (!inputs.member(n)) {
|
||||
inputs.push(n);
|
||||
@ -2476,7 +2475,10 @@ Node* Compile::xform_to_MacroLogicV(PhaseIterGVN& igvn,
|
||||
Node* in3 = (inputs.size() == 3 ? inputs.at(2) : in2);
|
||||
|
||||
uint func = compute_truth_table(partition, inputs);
|
||||
return igvn.transform(MacroLogicVNode::make(igvn, in3, in2, in1, func, vt));
|
||||
|
||||
Node* pn = partition.at(partition.size() - 1);
|
||||
Node* mask = pn->is_predicated_vector() ? pn->in(pn->req()-1) : NULL;
|
||||
return igvn.transform(MacroLogicVNode::make(igvn, in1, in2, in3, mask, func, vt));
|
||||
}
|
||||
|
||||
static uint extract_bit(uint func, uint pos) {
|
||||
@ -2556,11 +2558,11 @@ uint Compile::compute_truth_table(Unique_Node_List& partition, Unique_Node_List&
|
||||
|
||||
// Populate precomputed functions for inputs.
|
||||
// Each input corresponds to one column of 3 input truth-table.
|
||||
uint input_funcs[] = { 0xAA, // (_, _, a) -> a
|
||||
uint input_funcs[] = { 0xAA, // (_, _, c) -> c
|
||||
0xCC, // (_, b, _) -> b
|
||||
0xF0 }; // (c, _, _) -> c
|
||||
0xF0 }; // (a, _, _) -> a
|
||||
for (uint i = 0; i < inputs.size(); i++) {
|
||||
eval_map.put(inputs.at(i), input_funcs[i]);
|
||||
eval_map.put(inputs.at(i), input_funcs[2-i]);
|
||||
}
|
||||
|
||||
for (uint i = 0; i < partition.size(); i++) {
|
||||
@ -2603,6 +2605,14 @@ uint Compile::compute_truth_table(Unique_Node_List& partition, Unique_Node_List&
|
||||
return res;
|
||||
}
|
||||
|
||||
// Criteria under which nodes gets packed into a macro logic node:-
|
||||
// 1) Parent and both child nodes are all unmasked or masked with
|
||||
// same predicates.
|
||||
// 2) Masked parent can be packed with left child if it is predicated
|
||||
// and both have same predicates.
|
||||
// 3) Masked parent can be packed with right child if its un-predicated
|
||||
// or has matching predication condition.
|
||||
// 4) An unmasked parent can be packed with an unmasked child.
|
||||
bool Compile::compute_logic_cone(Node* n, Unique_Node_List& partition, Unique_Node_List& inputs) {
|
||||
assert(partition.size() == 0, "not empty");
|
||||
assert(inputs.size() == 0, "not empty");
|
||||
@ -2612,37 +2622,65 @@ bool Compile::compute_logic_cone(Node* n, Unique_Node_List& partition, Unique_No
|
||||
|
||||
bool is_unary_op = is_vector_unary_bitwise_op(n);
|
||||
if (is_unary_op) {
|
||||
assert(collect_unique_inputs(n, partition, inputs) == 1, "not unary");
|
||||
assert(collect_unique_inputs(n, inputs) == 1, "not unary");
|
||||
return false; // too few inputs
|
||||
}
|
||||
|
||||
assert(is_vector_binary_bitwise_op(n), "not binary");
|
||||
Node* in1 = n->in(1);
|
||||
Node* in2 = n->in(2);
|
||||
bool pack_left_child = true;
|
||||
bool pack_right_child = true;
|
||||
|
||||
int in1_unique_inputs_cnt = collect_unique_inputs(in1, partition, inputs);
|
||||
int in2_unique_inputs_cnt = collect_unique_inputs(in2, partition, inputs);
|
||||
partition.push(n);
|
||||
bool left_child_LOP = is_vector_bitwise_op(n->in(1));
|
||||
bool right_child_LOP = is_vector_bitwise_op(n->in(2));
|
||||
|
||||
int left_child_input_cnt = 0;
|
||||
int right_child_input_cnt = 0;
|
||||
|
||||
bool parent_is_predicated = n->is_predicated_vector();
|
||||
bool left_child_predicated = n->in(1)->is_predicated_vector();
|
||||
bool right_child_predicated = n->in(2)->is_predicated_vector();
|
||||
|
||||
Node* parent_pred = parent_is_predicated ? n->in(n->req()-1) : NULL;
|
||||
Node* left_child_pred = left_child_predicated ? n->in(1)->in(n->in(1)->req()-1) : NULL;
|
||||
Node* right_child_pred = right_child_predicated ? n->in(1)->in(n->in(1)->req()-1) : NULL;
|
||||
|
||||
do {
|
||||
if (pack_left_child && left_child_LOP &&
|
||||
((!parent_is_predicated && !left_child_predicated) ||
|
||||
((parent_is_predicated && left_child_predicated &&
|
||||
parent_pred == left_child_pred)))) {
|
||||
partition.push(n->in(1));
|
||||
left_child_input_cnt = collect_unique_inputs(n->in(1), inputs);
|
||||
} else {
|
||||
inputs.push(n->in(1));
|
||||
left_child_input_cnt = 1;
|
||||
}
|
||||
|
||||
if (pack_right_child && right_child_LOP &&
|
||||
(!right_child_predicated ||
|
||||
(right_child_predicated && parent_is_predicated &&
|
||||
parent_pred == right_child_pred))) {
|
||||
partition.push(n->in(2));
|
||||
right_child_input_cnt = collect_unique_inputs(n->in(2), inputs);
|
||||
} else {
|
||||
inputs.push(n->in(2));
|
||||
right_child_input_cnt = 1;
|
||||
}
|
||||
|
||||
// Too many inputs?
|
||||
if (inputs.size() > 3) {
|
||||
partition.clear();
|
||||
assert(partition.size() > 0, "");
|
||||
inputs.clear();
|
||||
{ // Recompute in2 inputs
|
||||
Unique_Node_List not_used;
|
||||
in2_unique_inputs_cnt = collect_unique_inputs(in2, not_used, not_used);
|
||||
partition.clear();
|
||||
if (left_child_input_cnt > right_child_input_cnt) {
|
||||
pack_left_child = false;
|
||||
} else {
|
||||
pack_right_child = false;
|
||||
}
|
||||
// Pick the node with minimum number of inputs.
|
||||
if (in1_unique_inputs_cnt >= 3 && in2_unique_inputs_cnt >= 3) {
|
||||
return false; // still too many inputs
|
||||
} else {
|
||||
break;
|
||||
}
|
||||
// Recompute partition & inputs.
|
||||
Node* child = (in1_unique_inputs_cnt < in2_unique_inputs_cnt ? in1 : in2);
|
||||
collect_unique_inputs(child, partition, inputs);
|
||||
|
||||
Node* other_input = (in1_unique_inputs_cnt < in2_unique_inputs_cnt ? in2 : in1);
|
||||
inputs.push(other_input);
|
||||
} while(true);
|
||||
|
||||
if(partition.size()) {
|
||||
partition.push(n);
|
||||
}
|
||||
|
||||
@ -2650,7 +2688,6 @@ bool Compile::compute_logic_cone(Node* n, Unique_Node_List& partition, Unique_No
|
||||
(inputs.size() == 2 || inputs.size() == 3);
|
||||
}
|
||||
|
||||
|
||||
void Compile::process_logic_cone_root(PhaseIterGVN &igvn, Node *n, VectorSet &visited) {
|
||||
assert(is_vector_bitwise_op(n), "not a root");
|
||||
|
||||
@ -2670,9 +2707,20 @@ void Compile::process_logic_cone_root(PhaseIterGVN &igvn, Node *n, VectorSet &vi
|
||||
Unique_Node_List inputs;
|
||||
if (compute_logic_cone(n, partition, inputs)) {
|
||||
const TypeVect* vt = n->bottom_type()->is_vect();
|
||||
Node* pn = partition.at(partition.size() - 1);
|
||||
Node* mask = pn->is_predicated_vector() ? pn->in(pn->req()-1) : NULL;
|
||||
if (mask == NULL ||
|
||||
Matcher::match_rule_supported_vector_masked(Op_MacroLogicV, vt->length(), vt->element_basic_type())) {
|
||||
Node* macro_logic = xform_to_MacroLogicV(igvn, vt, partition, inputs);
|
||||
#ifdef ASSERT
|
||||
if (TraceNewVectors) {
|
||||
tty->print("new Vector node: ");
|
||||
macro_logic->dump();
|
||||
}
|
||||
#endif
|
||||
igvn.replace_node(n, macro_logic);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void Compile::optimize_logic_cones(PhaseIterGVN &igvn) {
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) 1997, 2021, Oracle and/or its affiliates. All rights reserved.
|
||||
* Copyright (c) 1997, 2022, Oracle and/or its affiliates. All rights reserved.
|
||||
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
||||
*
|
||||
* This code is free software; you can redistribute it and/or modify it
|
||||
@ -2320,6 +2320,14 @@ void Matcher::find_shared_post_visit(Node* n, uint opcode) {
|
||||
n->set_req(2, new BinaryNode(n->in(3), n->in(4)));
|
||||
n->del_req(4);
|
||||
n->del_req(3);
|
||||
} else if (n->req() == 6) {
|
||||
Node* b3 = new BinaryNode(n->in(4), n->in(5));
|
||||
Node* b2 = new BinaryNode(n->in(3), b3);
|
||||
Node* b1 = new BinaryNode(n->in(2), b2);
|
||||
n->set_req(2, b1);
|
||||
n->del_req(5);
|
||||
n->del_req(4);
|
||||
n->del_req(3);
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) 2007, 2021, Oracle and/or its affiliates. All rights reserved.
|
||||
* Copyright (c) 2007, 2022, Oracle and/or its affiliates. All rights reserved.
|
||||
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
||||
*
|
||||
* This code is free software; you can redistribute it and/or modify it
|
||||
@ -1201,13 +1201,14 @@ bool ReductionNode::implemented(int opc, uint vlen, BasicType bt) {
|
||||
}
|
||||
|
||||
MacroLogicVNode* MacroLogicVNode::make(PhaseGVN& gvn, Node* in1, Node* in2, Node* in3,
|
||||
uint truth_table, const TypeVect* vt) {
|
||||
Node* mask, uint truth_table, const TypeVect* vt) {
|
||||
assert(truth_table <= 0xFF, "invalid");
|
||||
assert(in1->bottom_type()->is_vect()->length_in_bytes() == vt->length_in_bytes(), "mismatch");
|
||||
assert(in2->bottom_type()->is_vect()->length_in_bytes() == vt->length_in_bytes(), "mismatch");
|
||||
assert(in3->bottom_type()->is_vect()->length_in_bytes() == vt->length_in_bytes(), "mismatch");
|
||||
assert(!mask || mask->bottom_type()->isa_vectmask(), "predicated register type expected");
|
||||
Node* fn = gvn.intcon(truth_table);
|
||||
return new MacroLogicVNode(in1, in2, in3, fn, vt);
|
||||
return new MacroLogicVNode(in1, in2, in3, fn, mask, vt);
|
||||
}
|
||||
|
||||
Node* VectorNode::degenerate_vector_rotate(Node* src, Node* cnt, bool is_rotate_left,
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) 2007, 2021, Oracle and/or its affiliates. All rights reserved.
|
||||
* Copyright (c) 2007, 2022, Oracle and/or its affiliates. All rights reserved.
|
||||
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
||||
*
|
||||
* This code is free software; you can redistribute it and/or modify it
|
||||
@ -1275,13 +1275,19 @@ public:
|
||||
// Vector logical operations packing node.
|
||||
class MacroLogicVNode : public VectorNode {
|
||||
private:
|
||||
MacroLogicVNode(Node* in1, Node* in2, Node* in3, Node* fn, const TypeVect* vt)
|
||||
: VectorNode(in1, in2, in3, fn, vt) {}
|
||||
MacroLogicVNode(Node* in1, Node* in2, Node* in3, Node* fn, Node* mask, const TypeVect* vt)
|
||||
: VectorNode(in1, in2, in3, fn, vt) {
|
||||
if (mask) {
|
||||
this->add_req(mask);
|
||||
this->add_flag(Node::Flag_is_predicated_vector);
|
||||
}
|
||||
}
|
||||
|
||||
public:
|
||||
virtual int Opcode() const;
|
||||
|
||||
static MacroLogicVNode* make(PhaseGVN& igvn, Node* in1, Node* in2, Node* in3, uint truth_table, const TypeVect* vt);
|
||||
static MacroLogicVNode* make(PhaseGVN& igvn, Node* in1, Node* in2, Node* in3,
|
||||
Node* mask, uint truth_table, const TypeVect* vt);
|
||||
};
|
||||
|
||||
class VectorMaskCmpNode : public VectorNode {
|
||||
|
@ -0,0 +1,844 @@
|
||||
/*
|
||||
* Copyright (c) 2022, Oracle and/or its affiliates. All rights reserved.
|
||||
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
||||
*
|
||||
* This code is free software; you can redistribute it and/or modify it
|
||||
* under the terms of the GNU General Public License version 2 only, as
|
||||
* published by the Free Software Foundation.
|
||||
*
|
||||
* This code is distributed in the hope that it will be useful, but WITHOUT
|
||||
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
||||
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
|
||||
* version 2 for more details (a copy is included in the LICENSE file that
|
||||
* accompanied this code).
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License version
|
||||
* 2 along with this work; if not, write to the Free Software Foundation,
|
||||
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
*
|
||||
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
|
||||
* or visit www.oracle.com if you need additional information or have any
|
||||
* questions.
|
||||
*/
|
||||
|
||||
/**
|
||||
* @test
|
||||
* @bug 8273322
|
||||
* @key randomness
|
||||
* @summary Enhance macro logic optimization for masked logic operations.
|
||||
* @modules jdk.incubator.vector
|
||||
* @requires vm.compiler2.enabled
|
||||
* @requires os.simpleArch == "x64"
|
||||
* @library /test/lib /
|
||||
* @run driver compiler.vectorapi.TestMaskedMacroLogicVector
|
||||
*/
|
||||
|
||||
package compiler.vectorapi;
|
||||
|
||||
import java.util.concurrent.Callable;
|
||||
import compiler.lib.ir_framework.*;
|
||||
import compiler.lib.ir_framework.driver.IRViolationException;
|
||||
import jdk.test.lib.Asserts;
|
||||
import jdk.test.lib.Asserts;
|
||||
import jdk.test.lib.Utils;
|
||||
import java.util.Random;
|
||||
|
||||
import jdk.incubator.vector.*;
|
||||
|
||||
public class TestMaskedMacroLogicVector {
|
||||
boolean [] br;
|
||||
boolean [] ba;
|
||||
boolean [] bb;
|
||||
|
||||
short [] sr;
|
||||
char [] ca;
|
||||
char [] cb;
|
||||
|
||||
int [] r;
|
||||
int [] a;
|
||||
int [] b;
|
||||
int [] c;
|
||||
int [] d;
|
||||
int [] e;
|
||||
int [] f;
|
||||
|
||||
long [] rl;
|
||||
long [] al;
|
||||
long [] bl;
|
||||
long [] cl;
|
||||
|
||||
boolean [] mask;
|
||||
|
||||
static boolean booleanFunc1(boolean a, boolean b) {
|
||||
return a & b;
|
||||
}
|
||||
|
||||
@Test
|
||||
@IR(applyIf = {"UseAVX", "3"}, counts = {"MacroLogicV" , " > 0 "})
|
||||
public void testSubWordBoolean(boolean[] r, boolean[] a, boolean[] b) {
|
||||
for (int i = 0; i < r.length; i++) {
|
||||
r[i] = booleanFunc1(a[i], b[i]);
|
||||
}
|
||||
}
|
||||
public void verifySubWordBoolean(boolean[] r, boolean[] a, boolean[] b) {
|
||||
for (int i = 0; i < r.length; i++) {
|
||||
boolean expected = booleanFunc1(a[i], b[i]);
|
||||
if (r[i] != expected) {
|
||||
throw new AssertionError(
|
||||
String.format("at #%d: r=%b, expected = %b = booleanFunc1(%b,%b)",
|
||||
i, r[i], expected, a[i], b[i]));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
static short charFunc1(char a, char b) {
|
||||
return (short)((a & b) & 1);
|
||||
}
|
||||
|
||||
@Test
|
||||
@IR(applyIf = {"UseAVX", "3"}, counts = {"MacroLogicV" , " > 0 "})
|
||||
public void testSubWordChar(short[] r, char[] a, char[] b) {
|
||||
for (int i = 0; i < r.length; i++) {
|
||||
r[i] = charFunc1(a[i], b[i]);
|
||||
}
|
||||
}
|
||||
public void verifySubWordChar(short[] r, char[] a, char[] b) {
|
||||
for (int i = 0; i < r.length; i++) {
|
||||
short expected = charFunc1(a[i], b[i]);
|
||||
if (r[i] != expected) {
|
||||
throw new AssertionError(
|
||||
String.format("testSubWordChar: at #%d: r=%d, expected = %d = booleanFunc1(%d,%d)",
|
||||
i, r[i], expected, (int)a[i], (int)b[i]));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Case 1): Unmasked expression tree.
|
||||
// P_LOP
|
||||
// L_LOP R_LOP
|
||||
|
||||
static int intFunc1(int a, int b, int c) {
|
||||
return (a & b) ^ (a & c);
|
||||
}
|
||||
|
||||
@ForceInline
|
||||
public void testInt1Kernel(VectorSpecies SPECIES, int [] r, int [] a, int [] b, int [] c) {
|
||||
for (int i = 0; i < SPECIES.loopBound(r.length); i += SPECIES.length()) {
|
||||
IntVector va = IntVector.fromArray(SPECIES, a, i);
|
||||
IntVector vb = IntVector.fromArray(SPECIES, b, i);
|
||||
IntVector vc = IntVector.fromArray(SPECIES, c, i);
|
||||
va.lanewise(VectorOperators.AND, vc)
|
||||
.lanewise(VectorOperators.XOR, va.lanewise(VectorOperators.AND, vb))
|
||||
.intoArray(r, i);
|
||||
}
|
||||
}
|
||||
|
||||
@Test
|
||||
@IR(applyIf = {"UseAVX", "3"}, counts = {"MacroLogicV", " > 0 "})
|
||||
public void testInt1_Int128(int[] r, int[] a, int[] b, int[] c) {
|
||||
testInt1Kernel(IntVector.SPECIES_128, r, a, b, c);
|
||||
}
|
||||
|
||||
@Test
|
||||
@IR(applyIf = {"UseAVX", "3"}, counts = {"MacroLogicV", " > 0 "})
|
||||
public void testInt1_Int256(int[] r, int[] a, int[] b, int[] c) {
|
||||
testInt1Kernel(IntVector.SPECIES_256, r, a, b, c);
|
||||
}
|
||||
|
||||
@Test
|
||||
@IR(applyIf = {"UseAVX", "3"}, counts = {"MacroLogicV", " > 0 "})
|
||||
public void testInt1_Int512(int[] r, int[] a, int[] b, int[] c) {
|
||||
testInt1Kernel(IntVector.SPECIES_512, r, a, b, c);
|
||||
}
|
||||
|
||||
public void verifyInt1(int[] r, int[] a, int[] b, int[] c) {
|
||||
for (int i = 0; i < r.length; i++) {
|
||||
int expected = intFunc1(a[i], b[i], c[i]);
|
||||
if (r[i] != expected) {
|
||||
throw new AssertionError(String.format("testInt1: at #%d: r=%d, expected = %d = intFunc1(%d,%d,%d)",
|
||||
i, r[i], expected, a[i], b[i], c[i]));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Case 2): Only right child is masked.
|
||||
// P_LOP
|
||||
// L_LOP R_LOP(mask)
|
||||
|
||||
static int intFunc2(int a, int b, int c, boolean mask) {
|
||||
return (a & b) ^ (mask == true ? a & c : a);
|
||||
}
|
||||
|
||||
@ForceInline
|
||||
public void testInt2Kernel(VectorSpecies SPECIES, int[] r, int[] a, int[] b, int[] c, boolean [] mask) {
|
||||
for (int i = 0; i < SPECIES.loopBound(r.length); i += SPECIES.length()) {
|
||||
VectorMask<Integer> vmask = VectorMask.fromArray(SPECIES, mask , i);
|
||||
IntVector va = IntVector.fromArray(SPECIES, a, i);
|
||||
IntVector vb = IntVector.fromArray(SPECIES, b, i);
|
||||
IntVector vc = IntVector.fromArray(SPECIES, c, i);
|
||||
va.lanewise(VectorOperators.AND, vb)
|
||||
.lanewise(VectorOperators.XOR,
|
||||
va.lanewise(VectorOperators.AND, vc, vmask))
|
||||
.intoArray(r, i);
|
||||
}
|
||||
}
|
||||
|
||||
@Test
|
||||
@IR(applyIf = {"UseAVX", "3"}, counts = {"MacroLogicV", " > 0 "})
|
||||
public void testInt2_Int128(int[] r, int[] a, int[] b, int[] c, boolean [] mask) {
|
||||
testInt2Kernel(IntVector.SPECIES_128, r, a, b, c, mask);
|
||||
}
|
||||
@Test
|
||||
@IR(applyIf = {"UseAVX", "3"}, counts = {"MacroLogicV", " > 0 "})
|
||||
public void testInt2_Int256(int[] r, int[] a, int[] b, int[] c, boolean [] mask) {
|
||||
testInt2Kernel(IntVector.SPECIES_256, r, a, b, c, mask);
|
||||
}
|
||||
@Test
|
||||
@IR(applyIf = {"UseAVX", "3"}, counts = {"MacroLogicV", " > 0 "})
|
||||
public void testInt2_Int512(int[] r, int[] a, int[] b, int[] c, boolean [] mask) {
|
||||
testInt2Kernel(IntVector.SPECIES_512, r, a, b, c, mask);
|
||||
}
|
||||
|
||||
public void verifyInt2(int[] r, int[] a, int[] b, int[] c, boolean [] mask) {
|
||||
for (int i = 0; i < r.length; i++) {
|
||||
int expected = intFunc2(a[i], b[i], c[i], mask[i]);
|
||||
if (r[i] != expected) {
|
||||
throw new AssertionError(String.format("testInt2: at #%d: r=%d, expected = %d = intFunc2(%d,%d,%d,%b)",
|
||||
i, r[i], expected, a[i], b[i], c[i], mask[i]));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Case 3): Only left child is masked.
|
||||
// P_LOP
|
||||
// L_LOP(mask) R_LOP
|
||||
|
||||
static int intFunc3(int a, int b, int c, boolean mask) {
|
||||
return (mask == true ? a & b : a) ^ (a & c);
|
||||
}
|
||||
|
||||
@ForceInline
|
||||
public void testInt3Kernel(VectorSpecies SPECIES, int[] r, int[] a, int[] b, int[] c, boolean [] mask) {
|
||||
for (int i = 0; i < SPECIES.loopBound(r.length); i += SPECIES.length()) {
|
||||
VectorMask<Integer> vmask = VectorMask.fromArray(SPECIES, mask , i);
|
||||
IntVector va = IntVector.fromArray(SPECIES, a, i);
|
||||
IntVector vb = IntVector.fromArray(SPECIES, b, i);
|
||||
IntVector vc = IntVector.fromArray(SPECIES, c, i);
|
||||
va.lanewise(VectorOperators.AND, vb, vmask)
|
||||
.lanewise(VectorOperators.XOR,
|
||||
va.lanewise(VectorOperators.AND, vc))
|
||||
.intoArray(r, i);
|
||||
}
|
||||
}
|
||||
|
||||
@Test
|
||||
@IR(applyIf = {"UseAVX", "3"}, counts = {"MacroLogicV", " > 0 "})
|
||||
public void testInt3_Int128(int[] r, int[] a, int[] b, int[] c, boolean [] mask) {
|
||||
testInt3Kernel(IntVector.SPECIES_128, r, a, b, c, mask);
|
||||
}
|
||||
@Test
|
||||
@IR(applyIf = {"UseAVX", "3"}, counts = {"MacroLogicV", " > 0 "})
|
||||
public void testInt3_Int256(int[] r, int[] a, int[] b, int[] c, boolean [] mask) {
|
||||
testInt3Kernel(IntVector.SPECIES_256, r, a, b, c, mask);
|
||||
}
|
||||
@Test
|
||||
@IR(applyIf = {"UseAVX", "3"}, counts = {"MacroLogicV", " > 0 "})
|
||||
public void testInt3_Int512(int[] r, int[] a, int[] b, int[] c, boolean [] mask) {
|
||||
testInt3Kernel(IntVector.SPECIES_512, r, a, b, c, mask);
|
||||
}
|
||||
|
||||
|
||||
@ForceInline
|
||||
public void verifyInt3(int[] r, int[] a, int[] b, int[] c, boolean [] mask) {
|
||||
for (int i = 0; i < r.length; i++) {
|
||||
int expected = intFunc3(a[i], b[i], c[i], mask[i]);
|
||||
if (r[i] != expected) {
|
||||
throw new AssertionError(String.format("testInt3: at #%d: r=%d, expected = %d = intFunc3(%d,%d,%d,%b)",
|
||||
i, r[i], expected, a[i], b[i], c[i], mask[i]));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Case 4): Both child nodes are masked.
|
||||
// P_LOP
|
||||
// L_LOP(mask) R_LOP(mask)
|
||||
|
||||
static int intFunc4(int a, int b, int c, boolean mask) {
|
||||
return (mask == true ? b & a : b) ^ (mask == true ? c & a : c);
|
||||
}
|
||||
|
||||
@ForceInline
|
||||
public void testInt4Kernel(VectorSpecies SPECIES, int[] r, int[] a, int[] b, int[] c, boolean [] mask) {
|
||||
for (int i = 0; i < SPECIES.loopBound(r.length); i += SPECIES.length()) {
|
||||
VectorMask<Integer> vmask = VectorMask.fromArray(SPECIES, mask , i);
|
||||
IntVector va = IntVector.fromArray(SPECIES, a, i);
|
||||
IntVector vb = IntVector.fromArray(SPECIES, b, i);
|
||||
IntVector vc = IntVector.fromArray(SPECIES, c, i);
|
||||
vb.lanewise(VectorOperators.AND, va, vmask)
|
||||
.lanewise(VectorOperators.XOR,
|
||||
vc.lanewise(VectorOperators.AND, va, vmask))
|
||||
.intoArray(r, i);
|
||||
}
|
||||
}
|
||||
|
||||
@Test
|
||||
@IR(applyIf = {"UseAVX", "3"}, counts = {"AndV", " > 0 ", "XorV", " > 0 "})
|
||||
public void testInt4_Int128(int[] r, int[] a, int[] b, int[] c, boolean [] mask) {
|
||||
testInt4Kernel(IntVector.SPECIES_128, r, a, b, c, mask);
|
||||
}
|
||||
@Test
|
||||
@IR(applyIf = {"UseAVX", "3"}, counts = {"AndV", " > 0 ", "XorV", " > 0 "})
|
||||
public void testInt4_Int256(int[] r, int[] a, int[] b, int[] c, boolean [] mask) {
|
||||
testInt4Kernel(IntVector.SPECIES_256, r, a, b, c, mask);
|
||||
}
|
||||
@Test
|
||||
@IR(applyIf = {"UseAVX", "3"}, counts = {"AndV", " > 0 ", "XorV", " > 0 "})
|
||||
public void testInt4_Int512(int[] r, int[] a, int[] b, int[] c, boolean [] mask) {
|
||||
testInt4Kernel(IntVector.SPECIES_512, r, a, b, c, mask);
|
||||
}
|
||||
|
||||
public void verifyInt4(int[] r, int[] a, int[] b, int[] c, boolean [] mask) {
|
||||
for (int i = 0; i < r.length; i++) {
|
||||
int expected = intFunc4(a[i], b[i], c[i], mask[i]);
|
||||
if (r[i] != expected) {
|
||||
throw new AssertionError(String.format("testInt4: at #%d: r=%d, expected = %d = intFunc4(%d,%d,%d,%b)",
|
||||
i, r[i], expected, a[i], b[i], c[i], mask[i]));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Case 5): Parent is masked with unmasked child expressions.
|
||||
// P_LOP(mask)
|
||||
// L_LOP R_LOP
|
||||
|
||||
static int intFunc5(int a, int b, int c, boolean mask) {
|
||||
return mask == true ? ((a & b) ^ (a & c)) : (a & b);
|
||||
}
|
||||
|
||||
@ForceInline
|
||||
public void testInt5Kernel(VectorSpecies SPECIES, int[] r, int[] a, int[] b, int[] c, boolean [] mask) {
|
||||
for (int i = 0; i < SPECIES.loopBound(r.length); i += SPECIES.length()) {
|
||||
VectorMask<Integer> vmask = VectorMask.fromArray(SPECIES, mask , i);
|
||||
IntVector va = IntVector.fromArray(SPECIES, a, i);
|
||||
IntVector vb = IntVector.fromArray(SPECIES, b, i);
|
||||
IntVector vc = IntVector.fromArray(SPECIES, c, i);
|
||||
va.lanewise(VectorOperators.AND, vb)
|
||||
.lanewise(VectorOperators.XOR,
|
||||
va.lanewise(VectorOperators.AND, vc), vmask)
|
||||
.intoArray(r, i);
|
||||
}
|
||||
}
|
||||
|
||||
@Test
|
||||
@IR(applyIf = {"UseAVX", "3"}, counts = {"MacroLogicV", " > 0 "})
|
||||
public void testInt5_Int128(int[] r, int[] a, int[] b, int[] c, boolean [] mask) {
|
||||
testInt5Kernel(IntVector.SPECIES_128, r, a, b, c, mask);
|
||||
}
|
||||
@Test
|
||||
@IR(applyIf = {"UseAVX", "3"}, counts = {"MacroLogicV", " > 0 "})
|
||||
public void testInt5_Int256(int[] r, int[] a, int[] b, int[] c, boolean [] mask) {
|
||||
testInt5Kernel(IntVector.SPECIES_256, r, a, b, c, mask);
|
||||
}
|
||||
@Test
|
||||
@IR(applyIf = {"UseAVX", "3"}, counts = {"MacroLogicV", " > 0 "})
|
||||
public void testInt5_Int512(int[] r, int[] a, int[] b, int[] c, boolean [] mask) {
|
||||
testInt5Kernel(IntVector.SPECIES_512, r, a, b, c, mask);
|
||||
}
|
||||
|
||||
@ForceInline
|
||||
public void verifyInt5(int[] r, int[] a, int[] b, int[] c, boolean [] mask) {
|
||||
for (int i = 0; i < r.length; i++) {
|
||||
int expected = intFunc5(a[i], b[i], c[i], mask[i]);
|
||||
if (r[i] != expected) {
|
||||
throw new AssertionError(String.format("testInt5: at #%d: r=%d, expected = %d = intFunc5(%d,%d,%d,%b)",
|
||||
i, r[i], expected, a[i], b[i], c[i], mask[i]));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Case 6): Parent and right child are masked.
|
||||
// P_LOP(mask)
|
||||
// L_LOP R_LOP(mask)
|
||||
|
||||
static int intFunc6(int a, int b, int c, boolean mask) {
|
||||
return mask == true ? ((a & b) ^ (mask == true ? a & c : a)) : (a & b);
|
||||
}
|
||||
|
||||
@ForceInline
|
||||
public void testInt6Kernel(VectorSpecies SPECIES, int[] r, int[] a, int[] b, int[] c, boolean [] mask) {
|
||||
for (int i = 0; i < SPECIES.loopBound(r.length); i += SPECIES.length()) {
|
||||
VectorMask<Integer> vmask = VectorMask.fromArray(SPECIES, mask , i);
|
||||
IntVector va = IntVector.fromArray(SPECIES, a, i);
|
||||
IntVector vb = IntVector.fromArray(SPECIES, b, i);
|
||||
IntVector vc = IntVector.fromArray(SPECIES, c, i);
|
||||
va.lanewise(VectorOperators.AND, vb)
|
||||
.lanewise(VectorOperators.XOR,
|
||||
va.lanewise(VectorOperators.AND, vc, vmask), vmask)
|
||||
.intoArray(r, i);
|
||||
}
|
||||
}
|
||||
@Test
|
||||
@IR(applyIf = {"UseAVX", "3"}, counts = {"MacroLogicV", " > 0 "})
|
||||
public void testInt6_Int128(int[] r, int[] a, int[] b, int[] c, boolean [] mask) {
|
||||
testInt6Kernel(IntVector.SPECIES_128, r, a, b, c, mask);
|
||||
}
|
||||
@Test
|
||||
@IR(applyIf = {"UseAVX", "3"}, counts = {"MacroLogicV", " > 0 "})
|
||||
public void testInt6_Int256(int[] r, int[] a, int[] b, int[] c, boolean [] mask) {
|
||||
testInt6Kernel(IntVector.SPECIES_256, r, a, b, c, mask);
|
||||
}
|
||||
@Test
|
||||
@IR(applyIf = {"UseAVX", "3"}, counts = {"MacroLogicV", " > 0 "})
|
||||
public void testInt6_Int512(int[] r, int[] a, int[] b, int[] c, boolean [] mask) {
|
||||
testInt6Kernel(IntVector.SPECIES_512, r, a, b, c, mask);
|
||||
}
|
||||
|
||||
|
||||
public void verifyInt6(int[] r, int[] a, int[] b, int[] c, boolean [] mask) {
|
||||
for (int i = 0; i < r.length; i++) {
|
||||
int expected = intFunc6(a[i], b[i], c[i], mask[i]);
|
||||
if (r[i] != expected) {
|
||||
throw new AssertionError(String.format("testInt6: at #%d: r=%d, expected = %d = intFunc6(%d,%d,%d,%b)",
|
||||
i, r[i], expected, a[i], b[i], c[i], mask[i]));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Case 7): Parent and left child are masked.
|
||||
// P_LOP(mask)
|
||||
// L_LOP(mask) R_LOP
|
||||
|
||||
static int intFunc7(int a, int b, int c, boolean mask) {
|
||||
return mask == true ? ((mask == true ? a & b : a) ^ (a & c)) : a;
|
||||
}
|
||||
|
||||
@ForceInline
|
||||
public void testInt7Kernel(VectorSpecies SPECIES, int[] r, int[] a, int[] b, int[] c, boolean [] mask) {
|
||||
for (int i = 0; i < SPECIES.loopBound(r.length); i += SPECIES.length()) {
|
||||
VectorMask<Integer> vmask = VectorMask.fromArray(SPECIES, mask , i);
|
||||
IntVector va = IntVector.fromArray(SPECIES, a, i);
|
||||
IntVector vb = IntVector.fromArray(SPECIES, b, i);
|
||||
IntVector vc = IntVector.fromArray(SPECIES, c, i);
|
||||
va.lanewise(VectorOperators.AND, vb, vmask)
|
||||
.lanewise(VectorOperators.XOR,
|
||||
va.lanewise(VectorOperators.AND, vc), vmask)
|
||||
.intoArray(r, i);
|
||||
}
|
||||
}
|
||||
|
||||
@Test
|
||||
@IR(applyIf = {"UseAVX", "3"}, counts = {"MacroLogicV", " > 0 "})
|
||||
public void testInt7_Int128(int[] r, int[] a, int[] b, int[] c, boolean [] mask) {
|
||||
testInt7Kernel(IntVector.SPECIES_128, r, a, b, c, mask);
|
||||
}
|
||||
@Test
|
||||
@IR(applyIf = {"UseAVX", "3"}, counts = {"MacroLogicV", " > 0 "})
|
||||
public void testInt7_Int256(int[] r, int[] a, int[] b, int[] c, boolean [] mask) {
|
||||
testInt7Kernel(IntVector.SPECIES_256, r, a, b, c, mask);
|
||||
}
|
||||
@Test
|
||||
@IR(applyIf = {"UseAVX", "3"}, counts = {"MacroLogicV", " > 0 "})
|
||||
public void testInt7_Int512(int[] r, int[] a, int[] b, int[] c, boolean [] mask) {
|
||||
testInt7Kernel(IntVector.SPECIES_512, r, a, b, c, mask);
|
||||
}
|
||||
|
||||
public void verifyInt7(int[] r, int[] a, int[] b, int[] c, boolean [] mask) {
|
||||
for (int i = 0; i < r.length; i++) {
|
||||
int expected = intFunc7(a[i], b[i], c[i], mask[i]);
|
||||
if (r[i] != expected) {
|
||||
throw new AssertionError(String.format("testInt7: at #%d: r=%d, expected = %d = intFunc7(%d,%d,%d,%b)",
|
||||
i, r[i], expected, a[i], b[i], c[i], mask[i]));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Case 8): Parent and both child expressions are masked.
|
||||
// P_LOP(mask)
|
||||
// L_LOP(mask) R_LOP (mask)
|
||||
|
||||
static int intFunc8(int a, int b, int c, boolean mask) {
|
||||
return mask == true ? ((mask == true ? b & a : b) ^ (mask == true ? c & a : c)) : b;
|
||||
}
|
||||
|
||||
@ForceInline
|
||||
public void testInt8Kernel(VectorSpecies SPECIES, int[] r, int[] a, int[] b, int[] c, boolean [] mask) {
|
||||
for (int i = 0; i < SPECIES.loopBound(r.length); i += SPECIES.length()) {
|
||||
VectorMask<Integer> vmask = VectorMask.fromArray(SPECIES, mask , i);
|
||||
IntVector va = IntVector.fromArray(SPECIES, a, i);
|
||||
IntVector vb = IntVector.fromArray(SPECIES, b, i);
|
||||
IntVector vc = IntVector.fromArray(SPECIES, c, i);
|
||||
vb.lanewise(VectorOperators.AND, va, vmask)
|
||||
.lanewise(VectorOperators.XOR,
|
||||
vc.lanewise(VectorOperators.AND, va, vmask), vmask)
|
||||
.intoArray(r, i);
|
||||
}
|
||||
}
|
||||
|
||||
@Test
|
||||
@IR(applyIf = {"UseAVX", "3"}, counts = {"MacroLogicV", " > 0 "})
|
||||
public void testInt8_Int128(int[] r, int[] a, int[] b, int[] c, boolean [] mask) {
|
||||
testInt8Kernel(IntVector.SPECIES_128, r, a, b, c, mask);
|
||||
}
|
||||
@Test
|
||||
@IR(applyIf = {"UseAVX", "3"}, counts = {"MacroLogicV", " > 0 "})
|
||||
public void testInt8_Int256(int[] r, int[] a, int[] b, int[] c, boolean [] mask) {
|
||||
testInt8Kernel(IntVector.SPECIES_256, r, a, b, c, mask);
|
||||
}
|
||||
@Test
|
||||
@IR(applyIf = {"UseAVX", "3"}, counts = {"MacroLogicV", " > 0 "})
|
||||
public void testInt8_Int512(int[] r, int[] a, int[] b, int[] c, boolean [] mask) {
|
||||
testInt8Kernel(IntVector.SPECIES_512, r, a, b, c, mask);
|
||||
}
|
||||
|
||||
public void verifyInt8(int[] r, int[] a, int[] b, int[] c, boolean [] mask) {
|
||||
for (int i = 0; i < r.length; i++) {
|
||||
int expected = intFunc8(a[i], b[i], c[i], mask[i]);
|
||||
if (r[i] != expected) {
|
||||
throw new AssertionError(String.format("testInt8: at #%d: r=%d, expected = %d = intFunc8(%d,%d,%d,%b)",
|
||||
i, r[i], expected, a[i], b[i], c[i], mask[i]));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
// ===================================================== //
|
||||
|
||||
static long longFunc(long a, long b, long c) {
|
||||
long v1 = (a & b) ^ (a & c) ^ (b & c);
|
||||
long v2 = (~a & b) | (~b & c) | (~c & a);
|
||||
return v1 & v2;
|
||||
}
|
||||
|
||||
@ForceInline
|
||||
public void testLongKernel(VectorSpecies SPECIES, long[] r, long[] a, long[] b, long[] c) {
|
||||
for (int i = 0; i < SPECIES.loopBound(r.length); i += SPECIES.length()) {
|
||||
LongVector va = LongVector.fromArray(SPECIES, a, i);
|
||||
LongVector vb = LongVector.fromArray(SPECIES, b, i);
|
||||
LongVector vc = LongVector.fromArray(SPECIES, c, i);
|
||||
|
||||
va.lanewise(VectorOperators.AND, vb)
|
||||
.lanewise(VectorOperators.XOR, va.lanewise(VectorOperators.AND, vc))
|
||||
.lanewise(VectorOperators.XOR, vb.lanewise(VectorOperators.AND, vc))
|
||||
.lanewise(VectorOperators.AND,
|
||||
va.lanewise(VectorOperators.NOT).lanewise(VectorOperators.AND, vb)
|
||||
.lanewise(VectorOperators.OR, vb.lanewise(VectorOperators.NOT).lanewise(VectorOperators.AND, vc))
|
||||
.lanewise(VectorOperators.OR, vc.lanewise(VectorOperators.NOT).lanewise(VectorOperators.AND, va)))
|
||||
.intoArray(r, i);
|
||||
}
|
||||
}
|
||||
|
||||
@Test
|
||||
@IR(applyIf = {"UseAVX", "3"}, counts = {"MacroLogicV", " > 0 "})
|
||||
public void testLong_Long256(long[] r, long[] a, long[] b, long[] c) {
|
||||
testLongKernel(LongVector.SPECIES_256, r, a, b, c);
|
||||
}
|
||||
@Test
|
||||
@IR(applyIf = {"UseAVX", "3"}, counts = {"MacroLogicV", " > 0 "})
|
||||
public void testLong_Long512(long[] r, long[] a, long[] b, long[] c) {
|
||||
testLongKernel(LongVector.SPECIES_512, r, a, b, c);
|
||||
}
|
||||
|
||||
public void verifyLong(long[] r, long[] a, long[] b, long[] c) {
|
||||
for (int i = 0; i < r.length; i++) {
|
||||
long expected = longFunc(a[i], b[i], c[i]);
|
||||
if (r[i] != expected) {
|
||||
throw new AssertionError(
|
||||
String.format("testLong: at #%d: r=%d, expected = %d = longFunc(%d,%d,%d)",
|
||||
i, r[i], expected, a[i], b[i], c[i]));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// ===================================================== //
|
||||
|
||||
private static final Random R = Utils.getRandomInstance();
|
||||
|
||||
static boolean[] fillBooleanRandom(Callable<boolean[]> factory) {
|
||||
try {
|
||||
boolean[] arr = factory.call();
|
||||
for (int i = 0; i < arr.length; i++) {
|
||||
arr[i] = R.nextBoolean();
|
||||
}
|
||||
return arr;
|
||||
} catch (Exception e) {
|
||||
throw new InternalError(e);
|
||||
}
|
||||
}
|
||||
static char[] fillCharRandom(Callable<char[]> factory) {
|
||||
try {
|
||||
char[] arr = factory.call();
|
||||
for (int i = 0; i < arr.length; i++) {
|
||||
arr[i] = (char)R.nextInt();
|
||||
}
|
||||
return arr;
|
||||
} catch (Exception e) {
|
||||
throw new InternalError(e);
|
||||
}
|
||||
}
|
||||
static int[] fillIntRandom(Callable<int[]> factory) {
|
||||
try {
|
||||
int[] arr = factory.call();
|
||||
for (int i = 0; i < arr.length; i++) {
|
||||
arr[i] = R.nextInt();
|
||||
}
|
||||
return arr;
|
||||
} catch (Exception e) {
|
||||
throw new InternalError(e);
|
||||
}
|
||||
}
|
||||
static long[] fillLongRandom(Callable<long[]> factory) {
|
||||
try {
|
||||
long[] arr = factory.call();
|
||||
for (int i = 0; i < arr.length; i++) {
|
||||
arr[i] = R.nextLong();
|
||||
}
|
||||
return arr;
|
||||
} catch (Exception e) {
|
||||
throw new InternalError(e);
|
||||
}
|
||||
}
|
||||
|
||||
// ===================================================== //
|
||||
|
||||
static final int SIZE = 512;
|
||||
|
||||
@Run(test = {"testInt4_Int128"}, mode = RunMode.STANDALONE)
|
||||
public void kernel_testInt4_Int128() {
|
||||
for (int i = 0; i < 10000; i++) {
|
||||
testInt4_Int128(r, a, b, c, mask);
|
||||
verifyInt4(r, a, b, c, mask);
|
||||
}
|
||||
}
|
||||
@Run(test = {"testInt4_Int256"}, mode = RunMode.STANDALONE)
|
||||
public void kernel_testInt4_Int256() {
|
||||
for (int i = 0; i < 10000; i++) {
|
||||
testInt4_Int256(r, a, b, c, mask);
|
||||
verifyInt4(r, a, b, c, mask);
|
||||
}
|
||||
}
|
||||
@Run(test = {"testInt4_Int512"}, mode = RunMode.STANDALONE)
|
||||
public void kernel_testInt4_Int512() {
|
||||
for (int i = 0; i < 10000; i++) {
|
||||
testInt4_Int512(r, a, b, c, mask);
|
||||
verifyInt4(r, a, b, c, mask);
|
||||
}
|
||||
}
|
||||
|
||||
@Run(test = {"testSubWordBoolean"}, mode = RunMode.STANDALONE)
|
||||
public void kernel_test_SubWordBoolean() {
|
||||
for (int i = 0; i < 10000; i++) {
|
||||
testSubWordBoolean(br, ba, bb);
|
||||
verifySubWordBoolean(br, ba, bb);
|
||||
}
|
||||
}
|
||||
|
||||
@Run(test = {"testSubWordChar"}, mode = RunMode.STANDALONE)
|
||||
public void kernel_test_SubWordChar() {
|
||||
for (int i = 0; i < 10000; i++) {
|
||||
testSubWordChar(sr, ca, cb);
|
||||
verifySubWordChar(sr, ca, cb);
|
||||
}
|
||||
}
|
||||
|
||||
@Run(test = {"testInt1_Int128"}, mode = RunMode.STANDALONE)
|
||||
public void kernel_testInt1_Int128() {
|
||||
for (int i = 0; i < 10000; i++) {
|
||||
testInt1_Int128(r, a, b, c);
|
||||
verifyInt1(r, a, b, c);
|
||||
}
|
||||
}
|
||||
@Run(test = {"testInt1_Int256"}, mode = RunMode.STANDALONE)
|
||||
public void kernel_testInt1_Int256() {
|
||||
for (int i = 0; i < 10000; i++) {
|
||||
testInt1_Int256(r, a, b, c);
|
||||
verifyInt1(r, a, b, c);
|
||||
}
|
||||
}
|
||||
@Run(test = {"testInt1_Int512"}, mode = RunMode.STANDALONE)
|
||||
public void kernel_testInt1_Int512() {
|
||||
for (int i = 0; i < 10000; i++) {
|
||||
testInt1_Int512(r, a, b, c);
|
||||
verifyInt1(r, a, b, c);
|
||||
}
|
||||
}
|
||||
|
||||
@Run(test = {"testInt2_Int128"}, mode = RunMode.STANDALONE)
|
||||
public void kernel_testInt2_Int128() {
|
||||
for (int i = 0; i < 10000; i++) {
|
||||
testInt2_Int128(r, a, b, c, mask);
|
||||
verifyInt2(r, a, b, c, mask);
|
||||
}
|
||||
}
|
||||
@Run(test = {"testInt2_Int256"}, mode = RunMode.STANDALONE)
|
||||
public void kernel_testInt2_Int256() {
|
||||
for (int i = 0; i < 10000; i++) {
|
||||
testInt2_Int256(r, a, b, c, mask);
|
||||
verifyInt2(r, a, b, c, mask);
|
||||
}
|
||||
}
|
||||
@Run(test = {"testInt2_Int512"}, mode = RunMode.STANDALONE)
|
||||
public void kernel_testInt2_Int512() {
|
||||
for (int i = 0; i < 10000; i++) {
|
||||
testInt2_Int512(r, a, b, c, mask);
|
||||
verifyInt2(r, a, b, c, mask);
|
||||
}
|
||||
}
|
||||
|
||||
@Run(test = {"testInt3_Int128"}, mode = RunMode.STANDALONE)
|
||||
public void kernel_testInt3_Int128() {
|
||||
for (int i = 0; i < 10000; i++) {
|
||||
testInt3_Int128(r, a, b, c, mask);
|
||||
verifyInt3(r, a, b, c, mask);
|
||||
}
|
||||
}
|
||||
@Run(test = {"testInt3_Int256"}, mode = RunMode.STANDALONE)
|
||||
public void kernel_testInt3_Int256() {
|
||||
for (int i = 0; i < 10000; i++) {
|
||||
testInt3_Int256(r, a, b, c, mask);
|
||||
verifyInt3(r, a, b, c, mask);
|
||||
}
|
||||
}
|
||||
@Run(test = {"testInt3_Int512"}, mode = RunMode.STANDALONE)
|
||||
public void kernel_testInt3_Int512() {
|
||||
for (int i = 0; i < 10000; i++) {
|
||||
testInt3_Int512(r, a, b, c, mask);
|
||||
verifyInt3(r, a, b, c, mask);
|
||||
}
|
||||
}
|
||||
|
||||
@Run(test = {"testInt5_Int128"}, mode = RunMode.STANDALONE)
|
||||
public void kernel_testInt5_128() {
|
||||
for (int i = 0; i < 10000; i++) {
|
||||
testInt5_Int128(r, a, b, c, mask);
|
||||
verifyInt5(r, a, b, c, mask);
|
||||
}
|
||||
}
|
||||
@Run(test = {"testInt5_Int256"}, mode = RunMode.STANDALONE)
|
||||
public void kernel_testInt5_256() {
|
||||
for (int i = 0; i < 10000; i++) {
|
||||
testInt5_Int256(r, a, b, c, mask);
|
||||
verifyInt5(r, a, b, c, mask);
|
||||
}
|
||||
}
|
||||
@Run(test = {"testInt5_Int512"}, mode = RunMode.STANDALONE)
|
||||
public void kernel_testInt5_Int512() {
|
||||
for (int i = 0; i < 10000; i++) {
|
||||
testInt5_Int512(r, a, b, c, mask);
|
||||
verifyInt5(r, a, b, c, mask);
|
||||
}
|
||||
}
|
||||
|
||||
@Run(test = {"testInt6_Int128"}, mode = RunMode.STANDALONE)
|
||||
public void kernel_testInt6_Int128() {
|
||||
for (int i = 0; i < 10000; i++) {
|
||||
testInt6_Int128(r, a, b, c, mask);
|
||||
verifyInt6(r, a, b, c, mask);
|
||||
}
|
||||
}
|
||||
@Run(test = {"testInt6_Int256"}, mode = RunMode.STANDALONE)
|
||||
public void kernel_testInt6_Int256() {
|
||||
for (int i = 0; i < 10000; i++) {
|
||||
testInt6_Int256(r, a, b, c, mask);
|
||||
verifyInt6(r, a, b, c, mask);
|
||||
}
|
||||
}
|
||||
@Run(test = {"testInt6_Int512"}, mode = RunMode.STANDALONE)
|
||||
public void kernel_testInt6_Int512() {
|
||||
for (int i = 0; i < 10000; i++) {
|
||||
testInt6_Int512(r, a, b, c, mask);
|
||||
verifyInt6(r, a, b, c, mask);
|
||||
}
|
||||
}
|
||||
|
||||
@Run(test = {"testInt7_Int128"}, mode = RunMode.STANDALONE)
|
||||
public void kernel_testInt7_Int128() {
|
||||
for (int i = 0; i < 10000; i++) {
|
||||
testInt7_Int128(r, a, b, c, mask);
|
||||
verifyInt7(r, a, b, c, mask);
|
||||
}
|
||||
}
|
||||
@Run(test = {"testInt7_Int256"}, mode = RunMode.STANDALONE)
|
||||
public void kernel_testInt7_Int256() {
|
||||
for (int i = 0; i < 10000; i++) {
|
||||
testInt7_Int256(r, a, b, c, mask);
|
||||
verifyInt7(r, a, b, c, mask);
|
||||
}
|
||||
}
|
||||
@Run(test = {"testInt7_Int512"}, mode = RunMode.STANDALONE)
|
||||
public void kernel_testInt7_Int512() {
|
||||
for (int i = 0; i < 10000; i++) {
|
||||
testInt7_Int512(r, a, b, c, mask);
|
||||
verifyInt7(r, a, b, c, mask);
|
||||
}
|
||||
}
|
||||
|
||||
@Run(test = {"testInt8_Int128"}, mode = RunMode.STANDALONE)
|
||||
public void kernel_testInt8_Int128() {
|
||||
for (int i = 0; i < 10000; i++) {
|
||||
testInt8_Int128(r, a, b, c, mask);
|
||||
verifyInt8(r, a, b, c, mask);
|
||||
}
|
||||
}
|
||||
@Run(test = {"testInt8_Int256"}, mode = RunMode.STANDALONE)
|
||||
public void kernel_testInt8_Int256() {
|
||||
for (int i = 0; i < 10000; i++) {
|
||||
testInt8_Int256(r, a, b, c, mask);
|
||||
verifyInt8(r, a, b, c, mask);
|
||||
}
|
||||
}
|
||||
@Run(test = {"testInt8_Int512"}, mode = RunMode.STANDALONE)
|
||||
public void kernel_testInt8_Int512() {
|
||||
for (int i = 0; i < 10000; i++) {
|
||||
testInt8_Int512(r, a, b, c, mask);
|
||||
verifyInt8(r, a, b, c, mask);
|
||||
}
|
||||
}
|
||||
|
||||
@Run(test = {"testLong_Long256"}, mode = RunMode.STANDALONE)
|
||||
public void kernel_testLong_Long256() {
|
||||
for (int i = 0; i < 10000; i++) {
|
||||
testLong_Long256(rl, al, bl, cl);
|
||||
verifyLong(rl, al, bl, cl);
|
||||
}
|
||||
}
|
||||
@Run(test = {"testLong_Long512"}, mode = RunMode.STANDALONE)
|
||||
public void kernel_testLong_Long512() {
|
||||
for (int i = 0; i < 10000; i++) {
|
||||
testLong_Long512(rl, al, bl, cl);
|
||||
verifyLong(rl, al, bl, cl);
|
||||
}
|
||||
}
|
||||
|
||||
public TestMaskedMacroLogicVector() {
|
||||
br = new boolean[SIZE];
|
||||
ba = fillBooleanRandom((()-> new boolean[SIZE]));
|
||||
bb = fillBooleanRandom((()-> new boolean[SIZE]));
|
||||
|
||||
sr = new short[SIZE];
|
||||
ca = fillCharRandom((()-> new char[SIZE]));
|
||||
cb = fillCharRandom((()-> new char[SIZE]));
|
||||
|
||||
r = new int[SIZE];
|
||||
a = fillIntRandom(()-> new int[SIZE]);
|
||||
b = fillIntRandom(()-> new int[SIZE]);
|
||||
c = fillIntRandom(()-> new int[SIZE]);
|
||||
d = fillIntRandom(()-> new int[SIZE]);
|
||||
e = fillIntRandom(()-> new int[SIZE]);
|
||||
f = fillIntRandom(()-> new int[SIZE]);
|
||||
|
||||
rl = new long[SIZE];
|
||||
al = fillLongRandom(() -> new long[SIZE]);
|
||||
bl = fillLongRandom(() -> new long[SIZE]);
|
||||
cl = fillLongRandom(() -> new long[SIZE]);
|
||||
|
||||
mask = fillBooleanRandom((()-> new boolean[SIZE]));
|
||||
}
|
||||
|
||||
public static void main(String[] args) {
|
||||
TestFramework.runWithFlags("-XX:-TieredCompilation",
|
||||
"-XX:UseAVX=3",
|
||||
"--add-modules=jdk.incubator.vector",
|
||||
"-XX:CompileThresholdScaling=0.3");
|
||||
}
|
||||
}
|
@ -0,0 +1,331 @@
|
||||
/*
|
||||
* Copyright (c) 2022, Oracle and/or its affiliates. All rights reserved.
|
||||
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
||||
*
|
||||
* This code is free software; you can redistribute it and/or modify it
|
||||
* under the terms of the GNU General Public License version 2 only, as
|
||||
* published by the Free Software Foundation.
|
||||
*
|
||||
* This code is distributed in the hope that it will be useful, but WITHOUT
|
||||
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
||||
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
|
||||
* version 2 for more details (a copy is included in the LICENSE file that
|
||||
* accompanied this code).
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License version
|
||||
* 2 along with this work; if not, write to the Free Software Foundation,
|
||||
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
*
|
||||
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
|
||||
* or visit www.oracle.com if you need additional information or have any
|
||||
* questions.
|
||||
*/
|
||||
package org.openjdk.bench.jdk.incubator.vector;
|
||||
|
||||
import org.openjdk.jmh.annotations.*;
|
||||
import org.openjdk.jmh.infra.*;
|
||||
|
||||
import jdk.incubator.vector.*;
|
||||
import java.util.concurrent.TimeUnit;
|
||||
import java.util.Random;
|
||||
|
||||
@BenchmarkMode(Mode.Throughput)
|
||||
@OutputTimeUnit(TimeUnit.SECONDS)
|
||||
@State(Scope.Thread)
|
||||
public class MaskedLogicOpts {
|
||||
@Param({"256","512","1024"})
|
||||
private int ARRAYLEN;
|
||||
|
||||
boolean [] mask_arr = {
|
||||
false, false, false, true, false, false, false, false,
|
||||
false, false, false, true, false, false, false, false,
|
||||
false, false, false, true, false, false, false, false,
|
||||
true, true, true, true, true, true, true, true,
|
||||
true, true, true, true, true, true, true, true,
|
||||
false, false, false, true, false, false, false, false,
|
||||
false, false, false, true, false, false, false, false,
|
||||
false, false, false, true, false, false, false, false
|
||||
};
|
||||
|
||||
int INVOC_COUNTER = 4096;
|
||||
|
||||
int [] i1 = new int[ARRAYLEN];
|
||||
int [] i2 = new int[ARRAYLEN];
|
||||
int [] i3 = new int[ARRAYLEN];
|
||||
int [] i4 = new int[ARRAYLEN];
|
||||
int [] i5 = new int[ARRAYLEN];
|
||||
|
||||
long [] l1 = new long[ARRAYLEN];
|
||||
long [] l2 = new long[ARRAYLEN];
|
||||
long [] l3 = new long[ARRAYLEN];
|
||||
long [] l4 = new long[ARRAYLEN];
|
||||
long [] l5 = new long[ARRAYLEN];
|
||||
|
||||
Vector<Integer> iv1;
|
||||
Vector<Integer> iv2;
|
||||
Vector<Integer> iv3;
|
||||
Vector<Integer> iv4;
|
||||
Vector<Integer> iv5;
|
||||
|
||||
Vector<Long> lv1;
|
||||
Vector<Long> lv2;
|
||||
Vector<Long> lv3;
|
||||
Vector<Long> lv4;
|
||||
Vector<Long> lv5;
|
||||
|
||||
VectorMask<Integer> imask;
|
||||
VectorMask<Long> lmask;
|
||||
|
||||
VectorSpecies<Integer> ispecies;
|
||||
VectorSpecies<Long> lspecies;
|
||||
|
||||
int int512_arr_idx;
|
||||
int int256_arr_idx;
|
||||
int int128_arr_idx;
|
||||
int long256_arr_idx;
|
||||
int long512_arr_idx;
|
||||
|
||||
private Random r = new Random();
|
||||
|
||||
@Setup(Level.Trial)
|
||||
public void init() {
|
||||
int512_arr_idx = 0;
|
||||
int256_arr_idx = 0;
|
||||
int128_arr_idx = 0;
|
||||
long256_arr_idx = 0;
|
||||
long512_arr_idx = 0;
|
||||
i1 = new int[ARRAYLEN];
|
||||
i2 = new int[ARRAYLEN];
|
||||
i3 = new int[ARRAYLEN];
|
||||
i4 = new int[ARRAYLEN];
|
||||
i5 = new int[ARRAYLEN];
|
||||
|
||||
l1 = new long[ARRAYLEN];
|
||||
l2 = new long[ARRAYLEN];
|
||||
l3 = new long[ARRAYLEN];
|
||||
l4 = new long[ARRAYLEN];
|
||||
l5 = new long[ARRAYLEN];
|
||||
|
||||
for (int i=0; i<ARRAYLEN; i++) {
|
||||
i1[i] = r.nextInt();
|
||||
i2[i] = r.nextInt();
|
||||
i3[i] = r.nextInt();
|
||||
i4[i] = r.nextInt();
|
||||
i5[i] = r.nextInt();
|
||||
|
||||
l1[i] = r.nextLong();
|
||||
l2[i] = r.nextLong();
|
||||
l3[i] = r.nextLong();
|
||||
l4[i] = r.nextLong();
|
||||
l5[i] = r.nextLong();
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
@Setup(Level.Invocation)
|
||||
public void init_per_invoc() {
|
||||
int512_arr_idx = (int512_arr_idx + 16) & (ARRAYLEN-1);
|
||||
int256_arr_idx = (int256_arr_idx + 8) & (ARRAYLEN-1);
|
||||
int128_arr_idx = (int128_arr_idx + 4) & (ARRAYLEN-1);
|
||||
long512_arr_idx = (long512_arr_idx + 8) & (ARRAYLEN-1);
|
||||
long256_arr_idx = (long256_arr_idx + 4) & (ARRAYLEN-1);
|
||||
}
|
||||
|
||||
@CompilerControl(CompilerControl.Mode.INLINE)
|
||||
public void maskedLogicKernel(VectorSpecies<Integer> SPECIES) {
|
||||
imask = VectorMask.fromArray(SPECIES, mask_arr, 0);
|
||||
iv2 = IntVector.fromArray(SPECIES, i2, int512_arr_idx);
|
||||
iv3 = IntVector.fromArray(SPECIES, i3, int512_arr_idx);
|
||||
iv4 = IntVector.fromArray(SPECIES, i4, int512_arr_idx);
|
||||
iv5 = IntVector.fromArray(SPECIES, i5, int512_arr_idx);
|
||||
for(int i = 0; i < INVOC_COUNTER; i++) {
|
||||
for(int j = 0 ; j < ARRAYLEN; j+= SPECIES.length()) {
|
||||
IntVector.fromArray(SPECIES, i1, j)
|
||||
.lanewise(VectorOperators.AND, iv2, imask)
|
||||
.lanewise(VectorOperators.OR, iv2, imask)
|
||||
.lanewise(VectorOperators.AND, iv3, imask)
|
||||
.lanewise(VectorOperators.OR, iv3, imask)
|
||||
.lanewise(VectorOperators.AND, iv4, imask)
|
||||
.lanewise(VectorOperators.OR, iv4, imask)
|
||||
.lanewise(VectorOperators.AND, iv5, imask)
|
||||
.lanewise(VectorOperators.XOR, iv5, imask)
|
||||
.intoArray(i1, j);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@Benchmark
|
||||
public void maskedLogicOperationsInt512() {
|
||||
maskedLogicKernel(IntVector.SPECIES_512);
|
||||
}
|
||||
|
||||
@Benchmark
|
||||
public void maskedLogicOperationsInt256() {
|
||||
maskedLogicKernel(IntVector.SPECIES_256);
|
||||
}
|
||||
|
||||
@Benchmark
|
||||
public void maskedLogicOperationsInt128() {
|
||||
maskedLogicKernel(IntVector.SPECIES_128);
|
||||
}
|
||||
|
||||
@CompilerControl(CompilerControl.Mode.INLINE)
|
||||
public void partiallyMaskedLogicOperationsIntKernel(VectorSpecies<Integer> SPECIES) {
|
||||
imask = VectorMask.fromArray(SPECIES, mask_arr, 0);
|
||||
iv2 = IntVector.fromArray(SPECIES, i2, int512_arr_idx);
|
||||
iv3 = IntVector.fromArray(SPECIES, i3, int512_arr_idx);
|
||||
iv4 = IntVector.fromArray(SPECIES, i4, int512_arr_idx);
|
||||
iv5 = IntVector.fromArray(SPECIES, i5, int512_arr_idx);
|
||||
for(int i = 0; i < INVOC_COUNTER; i++) {
|
||||
for(int j = 0 ; j < ARRAYLEN; j+= SPECIES.length()) {
|
||||
IntVector.fromArray(SPECIES, i1, j)
|
||||
.lanewise(VectorOperators.AND, iv2, imask)
|
||||
.lanewise(VectorOperators.OR, iv2, imask)
|
||||
.lanewise(VectorOperators.AND, iv3)
|
||||
.lanewise(VectorOperators.OR, iv3)
|
||||
.lanewise(VectorOperators.OR, iv4, imask)
|
||||
.lanewise(VectorOperators.AND, iv4, imask)
|
||||
.lanewise(VectorOperators.XOR, iv5, imask)
|
||||
.intoArray(i1, j);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@Benchmark
|
||||
public void partiallyMaskedLogicOperationsInt512() {
|
||||
partiallyMaskedLogicOperationsIntKernel(IntVector.SPECIES_512);
|
||||
}
|
||||
|
||||
@Benchmark
|
||||
public void partiallyMaskedLogicOperationsInt256() {
|
||||
partiallyMaskedLogicOperationsIntKernel(IntVector.SPECIES_256);
|
||||
}
|
||||
|
||||
@Benchmark
|
||||
public void partiallyMaskedLogicOperationsInt128() {
|
||||
partiallyMaskedLogicOperationsIntKernel(IntVector.SPECIES_128);
|
||||
}
|
||||
|
||||
@CompilerControl(CompilerControl.Mode.INLINE)
|
||||
public void bitwiseBlendOperationIntKernel(VectorSpecies<Integer> SPECIES) {
|
||||
imask = VectorMask.fromArray(SPECIES, mask_arr, 0);
|
||||
iv2 = IntVector.fromArray(SPECIES, i2, int512_arr_idx);
|
||||
iv3 = IntVector.fromArray(SPECIES, i3, int512_arr_idx);
|
||||
iv4 = IntVector.fromArray(SPECIES, i4, int512_arr_idx);
|
||||
iv5 = IntVector.fromArray(SPECIES, i5, int512_arr_idx);
|
||||
for(int i = 0; i < INVOC_COUNTER; i++) {
|
||||
for(int j = 0 ; j < ARRAYLEN; j+= SPECIES.length()) {
|
||||
IntVector.fromArray(SPECIES, i1, j)
|
||||
.lanewise(VectorOperators.BITWISE_BLEND, iv2, iv3, imask)
|
||||
.lanewise(VectorOperators.BITWISE_BLEND, iv3, iv4, imask)
|
||||
.lanewise(VectorOperators.BITWISE_BLEND, iv4, iv5, imask)
|
||||
.intoArray(i1, j);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@Benchmark
|
||||
public void bitwiseBlendOperationInt512() {
|
||||
bitwiseBlendOperationIntKernel(IntVector.SPECIES_512);
|
||||
}
|
||||
|
||||
@Benchmark
|
||||
public void bitwiseBlendOperationInt256() {
|
||||
bitwiseBlendOperationIntKernel(IntVector.SPECIES_256);
|
||||
}
|
||||
|
||||
@Benchmark
|
||||
public void bitwiseBlendOperationInt128() {
|
||||
bitwiseBlendOperationIntKernel(IntVector.SPECIES_128);
|
||||
}
|
||||
|
||||
@CompilerControl(CompilerControl.Mode.INLINE)
|
||||
public void maskedLogicOperationsLongKernel(VectorSpecies<Long> SPECIES) {
|
||||
lmask = VectorMask.fromArray(SPECIES, mask_arr, 0);
|
||||
lv2 = LongVector.fromArray(SPECIES, l2, long256_arr_idx);
|
||||
lv3 = LongVector.fromArray(SPECIES, l3, long256_arr_idx);
|
||||
lv4 = LongVector.fromArray(SPECIES, l4, long256_arr_idx);
|
||||
lv5 = LongVector.fromArray(SPECIES, l5, long256_arr_idx);
|
||||
for(int i = 0; i < INVOC_COUNTER; i++) {
|
||||
for(int j = 0 ; j < ARRAYLEN; j+= SPECIES.length()) {
|
||||
LongVector.fromArray(SPECIES, l1, j)
|
||||
.lanewise(VectorOperators.AND, lv2, lmask)
|
||||
.lanewise(VectorOperators.OR, lv3, lmask)
|
||||
.lanewise(VectorOperators.AND, lv3, lmask)
|
||||
.lanewise(VectorOperators.OR, lv4, lmask)
|
||||
.lanewise(VectorOperators.AND, lv4, lmask)
|
||||
.lanewise(VectorOperators.XOR, lv5, lmask)
|
||||
.intoArray(l1, j);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@Benchmark
|
||||
public void maskedLogicOperationsLong512() {
|
||||
maskedLogicOperationsLongKernel(LongVector.SPECIES_512);
|
||||
}
|
||||
@Benchmark
|
||||
public void maskedLogicOperationsLong256() {
|
||||
maskedLogicOperationsLongKernel(LongVector.SPECIES_256);
|
||||
}
|
||||
|
||||
@CompilerControl(CompilerControl.Mode.INLINE)
|
||||
public void partiallyMaskedLogicOperationsLongKernel(VectorSpecies<Long> SPECIES) {
|
||||
lmask = VectorMask.fromArray(SPECIES, mask_arr, 0);
|
||||
lv2 = LongVector.fromArray(SPECIES, l2, long512_arr_idx);
|
||||
lv3 = LongVector.fromArray(SPECIES, l3, long512_arr_idx);
|
||||
lv4 = LongVector.fromArray(SPECIES, l4, long512_arr_idx);
|
||||
lv5 = LongVector.fromArray(SPECIES, l5, long512_arr_idx);
|
||||
for(int i = 0; i < INVOC_COUNTER; i++) {
|
||||
for(int j = 0 ; j < ARRAYLEN; j+= SPECIES.length()) {
|
||||
LongVector.fromArray(SPECIES, l1, j)
|
||||
.lanewise(VectorOperators.AND, lv2, lmask)
|
||||
.lanewise(VectorOperators.OR, lv2, lmask)
|
||||
.lanewise(VectorOperators.AND, lv3)
|
||||
.lanewise(VectorOperators.OR, lv3)
|
||||
.lanewise(VectorOperators.AND, lv4)
|
||||
.lanewise(VectorOperators.OR, lv4, lmask)
|
||||
.lanewise(VectorOperators.XOR, lv5, lmask)
|
||||
.intoArray(l1, j);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@Benchmark
|
||||
public void partiallyMaskedLogicOperationsLong512() {
|
||||
partiallyMaskedLogicOperationsLongKernel(LongVector.SPECIES_512);
|
||||
}
|
||||
|
||||
@Benchmark
|
||||
public void partiallyMaskedLogicOperationsLong256() {
|
||||
partiallyMaskedLogicOperationsLongKernel(LongVector.SPECIES_256);
|
||||
}
|
||||
|
||||
@CompilerControl(CompilerControl.Mode.INLINE)
|
||||
public void bitwiseBlendOperationLongKernel(VectorSpecies<Long> SPECIES) {
|
||||
lmask = VectorMask.fromArray(SPECIES, mask_arr, 0);
|
||||
lv2 = LongVector.fromArray(SPECIES, l2, long512_arr_idx);
|
||||
lv3 = LongVector.fromArray(SPECIES, l3, long512_arr_idx);
|
||||
lv4 = LongVector.fromArray(SPECIES, l4, long512_arr_idx);
|
||||
lv5 = LongVector.fromArray(SPECIES, l5, long512_arr_idx);
|
||||
for(int i = 0; i < INVOC_COUNTER; i++) {
|
||||
for(int j = 0 ; j < ARRAYLEN; j+= SPECIES.length()) {
|
||||
LongVector.fromArray(SPECIES, l1, j)
|
||||
.lanewise(VectorOperators.BITWISE_BLEND, lv2, lv3, lmask)
|
||||
.lanewise(VectorOperators.BITWISE_BLEND, lv3, lv4, lmask)
|
||||
.lanewise(VectorOperators.BITWISE_BLEND, lv4, lv5, lmask)
|
||||
.intoArray(l1, j);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@Benchmark
|
||||
public void bitwiseBlendOperationLong512() {
|
||||
bitwiseBlendOperationLongKernel(LongVector.SPECIES_512);
|
||||
}
|
||||
|
||||
@Benchmark
|
||||
public void bitwiseBlendOperationLong256() {
|
||||
bitwiseBlendOperationLongKernel(LongVector.SPECIES_256);
|
||||
}
|
||||
}
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) 2020, Oracle and/or its affiliates. All rights reserved.
|
||||
* Copyright (c) 2020, 2022, Oracle and/or its affiliates. All rights reserved.
|
||||
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
||||
*
|
||||
* This code is free software; you can redistribute it and/or modify it
|
||||
@ -32,7 +32,7 @@ import java.util.Random;
|
||||
@OutputTimeUnit(TimeUnit.SECONDS)
|
||||
@State(Scope.Thread)
|
||||
public class MacroLogicOpt {
|
||||
@Param({"64","128","256","512","1024","2048","4096"}) private int VECLEN;
|
||||
@Param({"64","128","256","512","1024"}) private int VECLEN;
|
||||
|
||||
private int [] ai = new int[VECLEN];
|
||||
private int [] bi = new int[VECLEN];
|
||||
|
Loading…
x
Reference in New Issue
Block a user