8266951: Partial in-lining for vectorized mismatch operation using AVX512 masked instructions

Reviewed-by: psandoz, vlivanov
This commit is contained in:
Jatin Bhateja 2021-06-05 18:07:14 +00:00
parent f768fbf7a9
commit b05c40ca3b
22 changed files with 725 additions and 97 deletions

View File

@ -2572,6 +2572,13 @@ void Assembler::knotwl(KRegister dst, KRegister src) {
emit_int16(0x44, (0xC0 | encode));
}
void Assembler::knotql(KRegister dst, KRegister src) {
assert(VM_Version::supports_avx512bw(), "");
InstructionAttr attributes(AVX_128bit, /* rex_w */ true, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes);
emit_int16(0x44, (0xC0 | encode));
}
// This instruction produces ZF or CF flags
void Assembler::kortestbl(KRegister src1, KRegister src2) {
assert(VM_Version::supports_avx512dq(), "");

View File

@ -1480,6 +1480,7 @@ private:
void kmovql(Register dst, KRegister src);
void knotwl(KRegister dst, KRegister src);
void knotql(KRegister dst, KRegister src);
void kortestbl(KRegister dst, KRegister src);
void kortestwl(KRegister dst, KRegister src);

View File

@ -1923,7 +1923,7 @@ void C2_MacroAssembler::reduce8L(int opcode, Register dst, Register src1, XMMReg
}
void C2_MacroAssembler::genmask(KRegister dst, Register len, Register temp) {
assert(ArrayCopyPartialInlineSize <= 64,"");
assert(ArrayOperationPartialInlineSize > 0 && ArrayOperationPartialInlineSize <= 64, "invalid");
mov64(temp, -1L);
bzhiq(temp, temp, len);
kmovql(dst, temp);
@ -2140,11 +2140,37 @@ void C2_MacroAssembler::get_elem(BasicType typ, XMMRegister dst, XMMRegister src
}
}
void C2_MacroAssembler::evpcmp(BasicType typ, KRegister kdmask, KRegister ksmask, XMMRegister src1, XMMRegister src2, int comparison, int vector_len) {
switch(typ) {
case T_BYTE:
case T_BOOLEAN:
evpcmpb(kdmask, ksmask, src1, src2, comparison, vector_len);
break;
case T_SHORT:
case T_CHAR:
evpcmpw(kdmask, ksmask, src1, src2, comparison, vector_len);
break;
case T_INT:
case T_FLOAT:
evpcmpd(kdmask, ksmask, src1, src2, comparison, vector_len);
break;
case T_LONG:
case T_DOUBLE:
evpcmpq(kdmask, ksmask, src1, src2, comparison, vector_len);
break;
default:
assert(false,"Should not reach here.");
break;
}
}
void C2_MacroAssembler::evpcmp(BasicType typ, KRegister kdmask, KRegister ksmask, XMMRegister src1, AddressLiteral adr, int comparison, int vector_len, Register scratch) {
switch(typ) {
case T_BOOLEAN:
case T_BYTE:
evpcmpb(kdmask, ksmask, src1, adr, comparison, /*signed*/ true, vector_len, scratch);
break;
case T_CHAR:
case T_SHORT:
evpcmpw(kdmask, ksmask, src1, adr, comparison, /*signed*/ true, vector_len, scratch);
break;

View File

@ -139,6 +139,7 @@ public:
// blend
void evpcmp(BasicType typ, KRegister kdmask, KRegister ksmask, XMMRegister src1, AddressLiteral adr, int comparison, int vector_len, Register scratch = rscratch1);
void evpcmp(BasicType typ, KRegister kdmask, KRegister ksmask, XMMRegister src1, XMMRegister src2, int comparison, int vector_len);
void evpblend(BasicType typ, XMMRegister dst, KRegister kmask, XMMRegister src1, XMMRegister src2, bool merge, int vector_len);
void load_vector_mask(XMMRegister dst, XMMRegister src, int vlen_in_bytes, BasicType elem_bt);

View File

@ -1419,12 +1419,12 @@ void VM_Version::get_processor_features() {
}
#ifdef COMPILER2
if (UseAVX > 2) {
if (FLAG_IS_DEFAULT(ArrayCopyPartialInlineSize) ||
(!FLAG_IS_DEFAULT(ArrayCopyPartialInlineSize) &&
ArrayCopyPartialInlineSize != 0 &&
ArrayCopyPartialInlineSize != 32 &&
ArrayCopyPartialInlineSize != 16 &&
ArrayCopyPartialInlineSize != 64)) {
if (FLAG_IS_DEFAULT(ArrayOperationPartialInlineSize) ||
(!FLAG_IS_DEFAULT(ArrayOperationPartialInlineSize) &&
ArrayOperationPartialInlineSize != 0 &&
ArrayOperationPartialInlineSize != 16 &&
ArrayOperationPartialInlineSize != 32 &&
ArrayOperationPartialInlineSize != 64)) {
int inline_size = 0;
if (MaxVectorSize >= 64 && AVX3Threshold == 0) {
inline_size = 64;
@ -1433,18 +1433,18 @@ void VM_Version::get_processor_features() {
} else if (MaxVectorSize >= 16) {
inline_size = 16;
}
if(!FLAG_IS_DEFAULT(ArrayCopyPartialInlineSize)) {
warning("Setting ArrayCopyPartialInlineSize as %d", inline_size);
if(!FLAG_IS_DEFAULT(ArrayOperationPartialInlineSize)) {
warning("Setting ArrayOperationPartialInlineSize as %d", inline_size);
}
ArrayCopyPartialInlineSize = inline_size;
ArrayOperationPartialInlineSize = inline_size;
}
if (ArrayCopyPartialInlineSize > MaxVectorSize) {
ArrayCopyPartialInlineSize = MaxVectorSize >= 16 ? MaxVectorSize : 0;
if (ArrayCopyPartialInlineSize) {
warning("Setting ArrayCopyPartialInlineSize as MaxVectorSize" INTX_FORMAT ")", MaxVectorSize);
if (ArrayOperationPartialInlineSize > MaxVectorSize) {
ArrayOperationPartialInlineSize = MaxVectorSize >= 16 ? MaxVectorSize : 0;
if (ArrayOperationPartialInlineSize) {
warning("Setting ArrayOperationPartialInlineSize as MaxVectorSize" INTX_FORMAT ")", MaxVectorSize);
} else {
warning("Setting ArrayCopyPartialInlineSize as " INTX_FORMAT, ArrayCopyPartialInlineSize);
warning("Setting ArrayOperationPartialInlineSize as " INTX_FORMAT, ArrayOperationPartialInlineSize);
}
}
}

View File

@ -1578,6 +1578,7 @@ const bool Matcher::match_rule_supported(int opcode) {
}
break;
case Op_VectorCmpMasked:
case Op_VectorMaskGen:
case Op_LoadVectorMasked:
case Op_StoreVectorMasked:
@ -1678,6 +1679,7 @@ const bool Matcher::match_rule_supported_vector(int opcode, int vlen, BasicType
break;
case Op_ClearArray:
case Op_VectorMaskGen:
case Op_VectorCmpMasked:
case Op_LoadVectorMasked:
case Op_StoreVectorMasked:
if (!is_LP64 || !VM_Version::supports_avx512bw()) {
@ -8084,7 +8086,34 @@ instruct vprorate(vec dst, vec src, vec shift) %{
%}
#ifdef _LP64
// ---------------------------------- Masked Block Copy ------------------------------------
// ---------------------------------- Masked Operations ------------------------------------
instruct vmask_cmp_node(rRegI dst, vec src1, vec src2, kReg mask, kReg ktmp1, kReg ktmp2, rFlagsReg cr) %{
match(Set dst (VectorCmpMasked src1 (Binary src2 mask)));
effect(TEMP_DEF dst, TEMP ktmp1, TEMP ktmp2, KILL cr);
format %{ "vector_mask_cmp $src1, $src2, $mask \t! vector mask comparison" %}
ins_encode %{
assert(vector_length_encoding(this, $src1) == vector_length_encoding(this, $src2), "mismatch");
assert(vector_element_basic_type(this, $src1) == vector_element_basic_type(this, $src2), "mismatch");
Label DONE;
int vlen_enc = vector_length_encoding(this, $src1);
BasicType elem_bt = vector_element_basic_type(this, $src1);
__ knotql($ktmp2$$KRegister, $mask$$KRegister);
__ mov64($dst$$Register, -1L);
__ evpcmp(elem_bt, $ktmp1$$KRegister, $mask$$KRegister, $src1$$XMMRegister, $src2$$XMMRegister, Assembler::eq, vlen_enc);
__ kortestql($ktmp2$$KRegister, $ktmp1$$KRegister);
__ jccb(Assembler::carrySet, DONE);
__ kmovql($dst$$Register, $ktmp1$$KRegister);
__ notq($dst$$Register);
__ tzcntq($dst$$Register, $dst$$Register);
__ bind(DONE);
%}
ins_pipe( pipe_slow );
%}
instruct vmasked_load64(vec dst, memory mem, kReg mask) %{
match(Set dst (LoadVectorMasked mem mask));
format %{ "vector_masked_load $dst, $mem, $mask \t! vector masked copy" %}

View File

@ -789,6 +789,7 @@ bool InstructForm::captures_bottom_type(FormDict &globals) const {
!strcmp(_matrule->_rChild->_opType,"ShenandoahCompareAndExchangeN") ||
#endif
!strcmp(_matrule->_rChild->_opType,"StrInflatedCopy") ||
!strcmp(_matrule->_rChild->_opType,"VectorCmpMasked")||
!strcmp(_matrule->_rChild->_opType,"VectorMaskGen")||
!strcmp(_matrule->_rChild->_opType,"CompareAndExchangeP") ||
!strcmp(_matrule->_rChild->_opType,"CompareAndExchangeN"))) return true;

View File

@ -738,7 +738,7 @@ bool ArrayCopyNode::modifies(intptr_t offset_lo, intptr_t offset_hi, PhaseTransf
// As an optimization, choose optimum vector size for copy length known at compile time.
int ArrayCopyNode::get_partial_inline_vector_lane_count(BasicType type, int const_len) {
int lane_count = ArrayCopyPartialInlineSize/type2aelembytes(type);
int lane_count = ArrayOperationPartialInlineSize/type2aelembytes(type);
if (const_len > 0) {
int size_in_bytes = const_len * type2aelembytes(type);
if (size_in_bytes <= 16)

View File

@ -82,9 +82,10 @@
"actual size could be less depending on elements type") \
range(0, max_jint) \
\
product(intx, ArrayCopyPartialInlineSize, -1, DIAGNOSTIC, \
"Partial inline size used for array copy acceleration.") \
range(-1, 64) \
product(intx, ArrayOperationPartialInlineSize, 0, DIAGNOSTIC, \
"Partial inline size used for small array operations" \
"(e.g. copy,cmp) acceleration.") \
range(0, 64) \
\
product(bool, AlignVector, true, \
"Perform vector store/load alignment in loop") \

View File

@ -78,6 +78,11 @@ class CastIINode: public ConstraintCastNode {
: ConstraintCastNode(n, t, carry_dependency), _range_check_dependency(range_check_dependency) {
init_class_id(Class_CastII);
}
CastIINode(Node* ctrl, Node* n, const Type* t, bool carry_dependency = false, bool range_check_dependency = false)
: ConstraintCastNode(n, t, carry_dependency), _range_check_dependency(range_check_dependency) {
init_class_id(Class_CastII);
init_req(0, ctrl);
}
virtual int Opcode() const;
virtual uint ideal_reg() const { return Op_RegI; }
virtual Node* Identity(PhaseGVN* phase);
@ -103,6 +108,11 @@ class CastIINode: public ConstraintCastNode {
class CastLLNode: public ConstraintCastNode {
public:
CastLLNode(Node* ctrl, Node* n, const Type* t, bool carry_dependency = false)
: ConstraintCastNode(n, t, carry_dependency) {
init_class_id(Class_CastLL);
init_req(0, ctrl);
}
CastLLNode(Node* n, const Type* t, bool carry_dependency = false)
: ConstraintCastNode(n, t, carry_dependency){
init_class_id(Class_CastLL);

View File

@ -417,6 +417,7 @@ macro(StoreVector)
macro(StoreVectorScatter)
macro(LoadVectorMasked)
macro(StoreVectorMasked)
macro(VectorCmpMasked)
macro(VectorMaskGen)
macro(VectorMaskOp)
macro(VectorMaskTrueCount)

View File

@ -3410,6 +3410,7 @@ void Compile::final_graph_reshaping_main_switch(Node* n, Final_Reshape_Counts& f
case Op_StoreVector:
case Op_LoadVectorGather:
case Op_StoreVectorScatter:
case Op_VectorCmpMasked:
case Op_VectorMaskGen:
case Op_LoadVectorMasked:
case Op_StoreVectorMasked:

View File

@ -5236,46 +5236,134 @@ bool LibraryCallKit::inline_bigIntegerShift(bool isRightShift) {
//-------------inline_vectorizedMismatch------------------------------
bool LibraryCallKit::inline_vectorizedMismatch() {
assert(UseVectorizedMismatchIntrinsic, "not implementated on this platform");
assert(UseVectorizedMismatchIntrinsic, "not implemented on this platform");
address stubAddr = StubRoutines::vectorizedMismatch();
if (stubAddr == NULL) {
return false; // Intrinsic's stub is not implemented on this platform
}
const char* stubName = "vectorizedMismatch";
int size_l = callee()->signature()->size();
assert(callee()->signature()->size() == 8, "vectorizedMismatch has 6 parameters");
Node* obja = argument(0); // Object
Node* aoffset = argument(1); // long
Node* objb = argument(3); // Object
Node* boffset = argument(4); // long
Node* length = argument(6); // int
Node* scale = argument(7); // int
Node* obja = argument(0);
Node* aoffset = argument(1);
Node* objb = argument(3);
Node* boffset = argument(4);
Node* length = argument(6);
Node* scale = argument(7);
const Type* a_type = obja->Value(&_gvn);
const Type* b_type = objb->Value(&_gvn);
const TypeAryPtr* top_a = a_type->isa_aryptr();
const TypeAryPtr* top_b = b_type->isa_aryptr();
if (top_a == NULL || top_a->klass() == NULL ||
top_b == NULL || top_b->klass() == NULL) {
// failed array check
return false;
const TypeAryPtr* obja_t = _gvn.type(obja)->isa_aryptr();
const TypeAryPtr* objb_t = _gvn.type(objb)->isa_aryptr();
if (obja_t == NULL || obja_t->klass() == NULL ||
objb_t == NULL || objb_t->klass() == NULL ||
scale == top()) {
return false; // failed input validation
}
Node* call;
jvms()->set_should_reexecute(true);
Node* obja_adr = make_unsafe_address(obja, aoffset);
Node* objb_adr = make_unsafe_address(objb, boffset);
call = make_runtime_call(RC_LEAF,
OptoRuntime::vectorizedMismatch_Type(),
stubAddr, stubName, TypePtr::BOTTOM,
obja_adr, objb_adr, length, scale);
// Partial inlining handling for inputs smaller than ArrayOperationPartialInlineSize bytes in size.
//
// inline_limit = ArrayOperationPartialInlineSize / element_size;
// if (length <= inline_limit) {
// inline_path:
// vmask = VectorMaskGen length
// vload1 = LoadVectorMasked obja, vmask
// vload2 = LoadVectorMasked objb, vmask
// result1 = VectorCmpMasked vload1, vload2, vmask
// } else {
// call_stub_path:
// result2 = call vectorizedMismatch_stub(obja, objb, length, scale)
// }
// exit_block:
// return Phi(result1, result2);
//
enum { inline_path = 1, // input is small enough to process it all at once
stub_path = 2, // input is too large; call into the VM
PATH_LIMIT = 3
};
Node* exit_block = new RegionNode(PATH_LIMIT);
Node* result_phi = new PhiNode(exit_block, TypeInt::INT);
Node* memory_phi = new PhiNode(exit_block, Type::MEMORY, TypePtr::BOTTOM);
Node* call_stub_path = control();
BasicType elem_bt = T_ILLEGAL;
const TypeInt* scale_t = _gvn.type(scale)->is_int();
if (scale_t->is_con()) {
switch (scale_t->get_con()) {
case 0: elem_bt = T_BYTE; break;
case 1: elem_bt = T_SHORT; break;
case 2: elem_bt = T_INT; break;
case 3: elem_bt = T_LONG; break;
default: elem_bt = T_ILLEGAL; break; // not supported
}
}
int inline_limit = 0;
bool do_partial_inline = false;
if (elem_bt != T_ILLEGAL && ArrayOperationPartialInlineSize > 0) {
inline_limit = ArrayOperationPartialInlineSize / type2aelembytes(elem_bt);
do_partial_inline = inline_limit >= 16;
}
if (do_partial_inline) {
assert(elem_bt != T_ILLEGAL, "sanity");
const TypeVect* vt = TypeVect::make(elem_bt, inline_limit);
if (Matcher::match_rule_supported_vector(Op_VectorMaskGen, inline_limit, elem_bt) &&
Matcher::match_rule_supported_vector(Op_LoadVectorMasked, inline_limit, elem_bt) &&
Matcher::match_rule_supported_vector(Op_VectorCmpMasked, inline_limit, elem_bt)) {
Node* cmp_length = _gvn.transform(new CmpINode(length, intcon(inline_limit)));
Node* bol_gt = _gvn.transform(new BoolNode(cmp_length, BoolTest::gt));
call_stub_path = generate_guard(bol_gt, NULL, PROB_MIN);
if (!stopped()) {
Node* casted_length = _gvn.transform(new CastIINode(control(), length, TypeInt::make(0, inline_limit, Type::WidenMin)));
const TypePtr* obja_adr_t = _gvn.type(obja_adr)->isa_ptr();
const TypePtr* objb_adr_t = _gvn.type(objb_adr)->isa_ptr();
Node* obja_adr_mem = memory(C->get_alias_index(obja_adr_t));
Node* objb_adr_mem = memory(C->get_alias_index(objb_adr_t));
Node* vmask = _gvn.transform(new VectorMaskGenNode(ConvI2X(casted_length), TypeVect::VECTMASK, elem_bt));
Node* vload_obja = _gvn.transform(new LoadVectorMaskedNode(control(), obja_adr_mem, obja_adr, obja_adr_t, vt, vmask));
Node* vload_objb = _gvn.transform(new LoadVectorMaskedNode(control(), objb_adr_mem, objb_adr, objb_adr_t, vt, vmask));
Node* result = _gvn.transform(new VectorCmpMaskedNode(vload_obja, vload_objb, vmask, TypeInt::INT));
exit_block->init_req(inline_path, control());
memory_phi->init_req(inline_path, map()->memory());
result_phi->init_req(inline_path, result);
C->set_max_vector_size(MAX2((uint)ArrayOperationPartialInlineSize, C->max_vector_size()));
clear_upper_avx();
}
}
}
if (call_stub_path != NULL) {
set_control(call_stub_path);
Node* call = make_runtime_call(RC_LEAF,
OptoRuntime::vectorizedMismatch_Type(),
StubRoutines::vectorizedMismatch(), "vectorizedMismatch", TypePtr::BOTTOM,
obja_adr, objb_adr, length, scale);
exit_block->init_req(stub_path, control());
memory_phi->init_req(stub_path, map()->memory());
result_phi->init_req(stub_path, _gvn.transform(new ProjNode(call, TypeFunc::Parms)));
}
exit_block = _gvn.transform(exit_block);
memory_phi = _gvn.transform(memory_phi);
result_phi = _gvn.transform(result_phi);
set_control(exit_block);
set_all_memory(memory_phi);
set_result(result_phi);
Node* result = _gvn.transform(new ProjNode(call, TypeFunc::Parms));
set_result(result);
return true;
}

View File

@ -32,6 +32,7 @@
#include "opto/graphKit.hpp"
#include "opto/macro.hpp"
#include "opto/runtime.hpp"
#include "opto/castnode.hpp"
#include "runtime/stubRoutines.hpp"
#include "utilities/align.hpp"
#include "utilities/powerOfTwo.hpp"
@ -174,8 +175,8 @@ void PhaseMacroExpand::generate_limit_guard(Node** ctrl, Node* offset, Node* sub
//
// Partial in-lining handling for smaller conjoint/disjoint array copies having
// length(in bytes) less than ArrayCopyPartialInlineSize.
// if (length <= ArrayCopyPartialInlineSize) {
// length(in bytes) less than ArrayOperationPartialInlineSize.
// if (length <= ArrayOperationPartialInlineSize) {
// partial_inlining_block:
// mask = Mask_Gen
// vload = LoadVectorMasked src , mask
@ -216,24 +217,27 @@ void PhaseMacroExpand::generate_partial_inlining_block(Node** ctrl, MergeMemNode
// Return if copy length is greater than partial inline size limit or
// target does not supports masked load/stores.
int lane_count = ArrayCopyNode::get_partial_inline_vector_lane_count(type, const_len);
if ( const_len > ArrayCopyPartialInlineSize ||
if ( const_len > ArrayOperationPartialInlineSize ||
!Matcher::match_rule_supported_vector(Op_LoadVectorMasked, lane_count, type) ||
!Matcher::match_rule_supported_vector(Op_StoreVectorMasked, lane_count, type) ||
!Matcher::match_rule_supported_vector(Op_VectorMaskGen, lane_count, type)) {
return;
}
int inline_limit = ArrayOperationPartialInlineSize / type2aelembytes(type);
Node* casted_length = new CastLLNode(*ctrl, length, TypeLong::make(0, inline_limit, Type::WidenMin));
transform_later(casted_length);
Node* copy_bytes = new LShiftXNode(length, intcon(shift));
transform_later(copy_bytes);
Node* cmp_le = new CmpULNode(copy_bytes, longcon(ArrayCopyPartialInlineSize));
Node* cmp_le = new CmpULNode(copy_bytes, longcon(ArrayOperationPartialInlineSize));
transform_later(cmp_le);
Node* bol_le = new BoolNode(cmp_le, BoolTest::le);
transform_later(bol_le);
inline_block = generate_guard(ctrl, bol_le, NULL, PROB_FAIR);
stub_block = *ctrl;
Node* mask_gen = new VectorMaskGenNode(length, TypeVect::VECTMASK, Type::get_const_basic_type(type));
Node* mask_gen = new VectorMaskGenNode(casted_length, TypeVect::VECTMASK, type);
transform_later(mask_gen);
unsigned vec_size = lane_count * type2aelembytes(type);
@ -1187,7 +1191,7 @@ bool PhaseMacroExpand::generate_unchecked_arraycopy(Node** ctrl, MergeMemNode**
Node* result_memory = NULL;
RegionNode* exit_block = NULL;
if (ArrayCopyPartialInlineSize > 0 && is_subword_type(basic_elem_type) &&
if (ArrayOperationPartialInlineSize > 0 && is_subword_type(basic_elem_type) &&
Matcher::vector_width_in_bytes(basic_elem_type) >= 16) {
generate_partial_inlining_block(ctrl, mem, adr_type, &exit_block, &result_memory,
copy_length, src_start, dest_start, basic_elem_type);

View File

@ -2227,6 +2227,7 @@ bool Matcher::find_shared_visit(MStack& mstack, Node* n, uint opcode, bool& mem_
case Op_FmaVF:
case Op_MacroLogicV:
case Op_LoadVectorMasked:
case Op_VectorCmpMasked:
set_shared(n); // Force result into register (it will be anyways)
break;
case Op_ConP: { // Convert pointers above the centerline to NUL
@ -2320,6 +2321,12 @@ void Matcher::find_shared_post_visit(Node* n, uint opcode) {
n->del_req(3);
break;
}
case Op_VectorCmpMasked: {
Node* pair1 = new BinaryNode(n->in(2), n->in(3));
n->set_req(2, pair1);
n->del_req(3);
break;
}
case Op_MacroLogicV: {
Node* pair1 = new BinaryNode(n->in(1), n->in(2));
Node* pair2 = new BinaryNode(n->in(3), n->in(4));

View File

@ -721,35 +721,39 @@ StoreVectorNode* StoreVectorNode::make(int opc, Node* ctl, Node* mem,
}
Node* LoadVectorMaskedNode::Ideal(PhaseGVN* phase, bool can_reshape) {
Node* mask_len = in(3)->in(1);
const TypeLong* ty = phase->type(mask_len)->isa_long();
if (ty && ty->is_con()) {
BasicType mask_bt = ((VectorMaskGenNode*)in(3))->get_elem_type()->array_element_basic_type();
uint load_sz = type2aelembytes(mask_bt) * ty->get_con();
if ( load_sz == 32 || load_sz == 64) {
assert(load_sz == 32 || MaxVectorSize > 32, "Unexpected load size");
Node* ctr = in(MemNode::Control);
Node* mem = in(MemNode::Memory);
Node* adr = in(MemNode::Address);
return phase->transform(new LoadVectorNode(ctr, mem, adr, adr_type(), vect_type()));
if (!in(3)->is_top() && in(3)->Opcode() == Op_VectorMaskGen) {
Node* mask_len = in(3)->in(1);
const TypeLong* ty = phase->type(mask_len)->isa_long();
if (ty && ty->is_con()) {
BasicType mask_bt = ((VectorMaskGenNode*)in(3))->get_elem_type();
uint load_sz = type2aelembytes(mask_bt) * ty->get_con();
if ( load_sz == 32 || load_sz == 64) {
assert(load_sz == 32 || MaxVectorSize > 32, "Unexpected load size");
Node* ctr = in(MemNode::Control);
Node* mem = in(MemNode::Memory);
Node* adr = in(MemNode::Address);
return phase->transform(new LoadVectorNode(ctr, mem, adr, adr_type(), vect_type()));
}
}
}
return NULL;
}
Node* StoreVectorMaskedNode::Ideal(PhaseGVN* phase, bool can_reshape) {
Node* mask_len = in(4)->in(1);
const TypeLong* ty = phase->type(mask_len)->isa_long();
if (ty && ty->is_con()) {
BasicType mask_bt = ((VectorMaskGenNode*)in(4))->get_elem_type()->array_element_basic_type();
uint load_sz = type2aelembytes(mask_bt) * ty->get_con();
if ( load_sz == 32 || load_sz == 64) {
assert(load_sz == 32 || MaxVectorSize > 32, "Unexpected store size");
Node* ctr = in(MemNode::Control);
Node* mem = in(MemNode::Memory);
Node* adr = in(MemNode::Address);
Node* val = in(MemNode::ValueIn);
return phase->transform(new StoreVectorNode(ctr, mem, adr, adr_type(), val));
if (!in(4)->is_top() && in(4)->Opcode() == Op_VectorMaskGen) {
Node* mask_len = in(4)->in(1);
const TypeLong* ty = phase->type(mask_len)->isa_long();
if (ty && ty->is_con()) {
BasicType mask_bt = ((VectorMaskGenNode*)in(4))->get_elem_type();
uint load_sz = type2aelembytes(mask_bt) * ty->get_con();
if ( load_sz == 32 || load_sz == 64) {
assert(load_sz == 32 || MaxVectorSize > 32, "Unexpected store size");
Node* ctr = in(MemNode::Control);
Node* mem = in(MemNode::Memory);
Node* adr = in(MemNode::Address);
Node* val = in(MemNode::ValueIn);
return phase->transform(new StoreVectorNode(ctr, mem, adr, adr_type(), val));
}
}
}
return NULL;

View File

@ -800,6 +800,8 @@ class StoreVectorNode : public StoreNode {
idx == MemNode::ValueIn + 1; }
};
//------------------------------StoreVectorMaskedNode--------------------------------
// Store Vector to memory under the influence of a predicate register(mask).
class StoreVectorMaskedNode : public StoreVectorNode {
public:
StoreVectorMaskedNode(Node* c, Node* mem, Node* dst, Node* src, const TypePtr* at, Node* mask)
@ -818,6 +820,8 @@ class StoreVectorMaskedNode : public StoreVectorNode {
Node* Ideal(PhaseGVN* phase, bool can_reshape);
};
//------------------------------LoadVectorMaskedNode--------------------------------
// Load Vector from memory under the influence of a predicate register(mask).
class LoadVectorMaskedNode : public LoadVectorNode {
public:
LoadVectorMaskedNode(Node* c, Node* mem, Node* src, const TypePtr* at, const TypeVect* vt, Node* mask)
@ -836,21 +840,36 @@ class LoadVectorMaskedNode : public LoadVectorNode {
Node* Ideal(PhaseGVN* phase, bool can_reshape);
};
//------------------------------VectorCmpMaskedNode--------------------------------
// Vector Comparison under the influence of a predicate register(mask).
class VectorCmpMaskedNode : public TypeNode {
public:
VectorCmpMaskedNode(Node* src1, Node* src2, Node* mask, const Type* ty): TypeNode(ty, 4) {
init_req(1, src1);
init_req(2, src2);
init_req(3, mask);
}
virtual int Opcode() const;
};
class VectorMaskGenNode : public TypeNode {
public:
VectorMaskGenNode(Node* length, const Type* ty, const Type* ety): TypeNode(ty, 2), _elemType(ety) {
VectorMaskGenNode(Node* length, const Type* ty, BasicType ety): TypeNode(ty, 2), _elemType(ety) {
init_req(1, length);
}
virtual int Opcode() const;
const Type* get_elem_type() { return _elemType;}
BasicType get_elem_type() { return _elemType;}
virtual uint size_of() const { return sizeof(VectorMaskGenNode); }
virtual uint ideal_reg() const {
return Op_RegVectMask;
}
private:
const Type* _elemType;
BasicType _elemType;
};
class VectorMaskOpNode : public TypeNode {

View File

@ -30,25 +30,25 @@ import java.util.Random;
* @summary Optimize arrayCopy using AVX-512 masked instructions.
*
* @run main/othervm/timeout=600 -XX:-TieredCompilation -Xbatch -XX:+IgnoreUnrecognizedVMOptions
* -XX:UseAVX=3 -XX:+UnlockDiagnosticVMOptions -XX:ArrayCopyPartialInlineSize=0 -XX:MaxVectorSize=32 -XX:+UnlockDiagnosticVMOptions
* -XX:UseAVX=3 -XX:+UnlockDiagnosticVMOptions -XX:ArrayOperationPartialInlineSize=0 -XX:MaxVectorSize=32 -XX:+UnlockDiagnosticVMOptions
* compiler.arraycopy.TestArrayCopyConjoint
* @run main/othervm/timeout=600 -XX:-TieredCompilation -Xbatch -XX:+IgnoreUnrecognizedVMOptions
* -XX:UseAVX=3 -XX:+UnlockDiagnosticVMOptions -XX:ArrayCopyPartialInlineSize=0 -XX:MaxVectorSize=64
* -XX:UseAVX=3 -XX:+UnlockDiagnosticVMOptions -XX:ArrayOperationPartialInlineSize=0 -XX:MaxVectorSize=64
* compiler.arraycopy.TestArrayCopyConjoint
* @run main/othervm/timeout=600 -XX:-TieredCompilation -Xbatch -XX:+IgnoreUnrecognizedVMOptions
* -XX:UseAVX=3 -XX:+UnlockDiagnosticVMOptions -XX:ArrayCopyPartialInlineSize=32 -XX:+UnlockDiagnosticVMOptions -XX:MaxVectorSize=32 -XX:+UnlockDiagnosticVMOption
* -XX:UseAVX=3 -XX:+UnlockDiagnosticVMOptions -XX:ArrayOperationPartialInlineSize=32 -XX:+UnlockDiagnosticVMOptions -XX:MaxVectorSize=32 -XX:+UnlockDiagnosticVMOption
* compiler.arraycopy.TestArrayCopyConjoint
* @run main/othervm/timeout=600 -XX:-TieredCompilation -Xbatch -XX:+IgnoreUnrecognizedVMOptions
* -XX:UseAVX=3 -XX:+UnlockDiagnosticVMOptions -XX:ArrayCopyPartialInlineSize=32 -XX:+UnlockDiagnosticVMOptions -XX:MaxVectorSize=64
* -XX:UseAVX=3 -XX:+UnlockDiagnosticVMOptions -XX:ArrayOperationPartialInlineSize=32 -XX:+UnlockDiagnosticVMOptions -XX:MaxVectorSize=64
* compiler.arraycopy.TestArrayCopyConjoint
* @run main/othervm/timeout=600 -XX:-TieredCompilation -Xbatch -XX:+IgnoreUnrecognizedVMOptions
* -XX:UseAVX=3 -XX:+UnlockDiagnosticVMOptions -XX:ArrayCopyPartialInlineSize=64 -XX:MaxVectorSize=64
* -XX:UseAVX=3 -XX:+UnlockDiagnosticVMOptions -XX:ArrayOperationPartialInlineSize=64 -XX:MaxVectorSize=64
* compiler.arraycopy.TestArrayCopyConjoint
* @run main/othervm/timeout=600 -XX:-TieredCompilation -Xbatch -XX:+IgnoreUnrecognizedVMOptions
* -XX:UseAVX=3 -XX:+UnlockDiagnosticVMOptions -XX:ArrayCopyPartialInlineSize=32 -XX:+UnlockDiagnosticVMOptions -XX:MaxVectorSize=32 -XX:+UnlockDiagnosticVMOption -XX:ArrayCopyLoadStoreMaxElem=16
* -XX:UseAVX=3 -XX:+UnlockDiagnosticVMOptions -XX:ArrayOperationPartialInlineSize=32 -XX:+UnlockDiagnosticVMOptions -XX:MaxVectorSize=32 -XX:+UnlockDiagnosticVMOption -XX:ArrayCopyLoadStoreMaxElem=16
* compiler.arraycopy.TestArrayCopyConjoint
* @run main/othervm/timeout=600 -XX:-TieredCompilation -Xbatch -XX:+IgnoreUnrecognizedVMOptions
* -XX:UseAVX=3 -XX:+UnlockDiagnosticVMOptions -XX:ArrayCopyPartialInlineSize=64 -XX:MaxVectorSize=64 -XX:ArrayCopyLoadStoreMaxElem=16
* -XX:UseAVX=3 -XX:+UnlockDiagnosticVMOptions -XX:ArrayOperationPartialInlineSize=64 -XX:MaxVectorSize=64 -XX:ArrayCopyLoadStoreMaxElem=16
* compiler.arraycopy.TestArrayCopyConjoint
*
*/

View File

@ -30,25 +30,25 @@ import java.util.Random;
* @summary Optimize arrayCopy using AVX-512 masked instructions.
*
* @run main/othervm/timeout=600 -XX:-TieredCompilation -Xbatch -XX:+IgnoreUnrecognizedVMOptions
* -XX:UseAVX=3 -XX:+UnlockDiagnosticVMOptions -XX:ArrayCopyPartialInlineSize=0 -XX:MaxVectorSize=32 -XX:+UnlockDiagnosticVMOptions
* -XX:UseAVX=3 -XX:+UnlockDiagnosticVMOptions -XX:ArrayOperationPartialInlineSize=0 -XX:MaxVectorSize=32 -XX:+UnlockDiagnosticVMOptions
* compiler.arraycopy.TestArrayCopyDisjoint
* @run main/othervm/timeout=600 -XX:-TieredCompilation -Xbatch -XX:+IgnoreUnrecognizedVMOptions
* -XX:UseAVX=3 -XX:+UnlockDiagnosticVMOptions -XX:ArrayCopyPartialInlineSize=0 -XX:MaxVectorSize=64
* -XX:UseAVX=3 -XX:+UnlockDiagnosticVMOptions -XX:ArrayOperationPartialInlineSize=0 -XX:MaxVectorSize=64
* compiler.arraycopy.TestArrayCopyDisjoint
* @run main/othervm/timeout=600 -XX:-TieredCompilation -Xbatch -XX:+IgnoreUnrecognizedVMOptions
* -XX:UseAVX=3 -XX:+UnlockDiagnosticVMOptions -XX:ArrayCopyPartialInlineSize=32 -XX:+UnlockDiagnosticVMOptions -XX:MaxVectorSize=32 -XX:+UnlockDiagnosticVMOption
* -XX:UseAVX=3 -XX:+UnlockDiagnosticVMOptions -XX:ArrayOperationPartialInlineSize=32 -XX:+UnlockDiagnosticVMOptions -XX:MaxVectorSize=32 -XX:+UnlockDiagnosticVMOption
* compiler.arraycopy.TestArrayCopyDisjoint
* @run main/othervm/timeout=600 -XX:-TieredCompilation -Xbatch -XX:+IgnoreUnrecognizedVMOptions
* -XX:UseAVX=3 -XX:+UnlockDiagnosticVMOptions -XX:ArrayCopyPartialInlineSize=32 -XX:+UnlockDiagnosticVMOptions -XX:MaxVectorSize=64
* -XX:UseAVX=3 -XX:+UnlockDiagnosticVMOptions -XX:ArrayOperationPartialInlineSize=32 -XX:+UnlockDiagnosticVMOptions -XX:MaxVectorSize=64
* compiler.arraycopy.TestArrayCopyDisjoint
* @run main/othervm/timeout=600 -XX:-TieredCompilation -Xbatch -XX:+IgnoreUnrecognizedVMOptions
* -XX:UseAVX=3 -XX:+UnlockDiagnosticVMOptions -XX:ArrayCopyPartialInlineSize=64 -XX:MaxVectorSize=64
* -XX:UseAVX=3 -XX:+UnlockDiagnosticVMOptions -XX:ArrayOperationPartialInlineSize=64 -XX:MaxVectorSize=64
* compiler.arraycopy.TestArrayCopyDisjoint
* @run main/othervm/timeout=600 -XX:-TieredCompilation -Xbatch -XX:+IgnoreUnrecognizedVMOptions
* -XX:UseAVX=3 -XX:+UnlockDiagnosticVMOptions -XX:ArrayCopyPartialInlineSize=32 -XX:+UnlockDiagnosticVMOptions -XX:MaxVectorSize=32 -XX:+UnlockDiagnosticVMOption -XX:ArrayCopyLoadStoreMaxElem=16
* -XX:UseAVX=3 -XX:+UnlockDiagnosticVMOptions -XX:ArrayOperationPartialInlineSize=32 -XX:+UnlockDiagnosticVMOptions -XX:MaxVectorSize=32 -XX:+UnlockDiagnosticVMOption -XX:ArrayCopyLoadStoreMaxElem=16
* compiler.arraycopy.TestArrayCopyDisjoint
* @run main/othervm/timeout=600 -XX:-TieredCompilation -Xbatch -XX:+IgnoreUnrecognizedVMOptions
* -XX:UseAVX=3 -XX:+UnlockDiagnosticVMOptions -XX:ArrayCopyPartialInlineSize=64 -XX:MaxVectorSize=64 -XX:ArrayCopyLoadStoreMaxElem=16
* -XX:UseAVX=3 -XX:+UnlockDiagnosticVMOptions -XX:ArrayOperationPartialInlineSize=64 -XX:MaxVectorSize=64 -XX:ArrayCopyLoadStoreMaxElem=16
* compiler.arraycopy.TestArrayCopyDisjoint
*
*/

View File

@ -0,0 +1,288 @@
/*
* Copyright (c) 2021, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License version 2 only, as
* published by the Free Software Foundation.
*
* This code is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
* version 2 for more details (a copy is included in the LICENSE file that
* accompanied this code).
*
* You should have received a copy of the GNU General Public License version
* 2 along with this work; if not, write to the Free Software Foundation,
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
*
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
* or visit www.oracle.com if you need additional information or have any
* questions.
*/
package compiler.intrinsics;
/*
* @test
* @requires vm.opt.final.UseVectorizedMismatchIntrinsic == true
* @modules java.base/jdk.internal.misc
* java.base/jdk.internal.util
*
* @run main/othervm -XX:CompileCommand=quiet -XX:CompileCommand=compileonly,*::test*
* -Xbatch -XX:-TieredCompilation
* -XX:UseAVX=3
* compiler.intrinsics.VectorizedMismatchTest
*
* @run main/othervm -XX:CompileCommand=quiet -XX:CompileCommand=compileonly,*::test*
* -Xbatch -XX:-TieredCompilation
* -XX:UseAVX=3 -XX:AVX3Threshold=0
* compiler.intrinsics.VectorizedMismatchTest
*/
import jdk.internal.misc.Unsafe;
import jdk.internal.util.ArraysSupport;
public class VectorizedMismatchTest {
private boolean[] boolean_a = new boolean[128];
private boolean[] boolean_b = new boolean[128];
int testBooleanConstantLength(int length) {
boolean[] obja = boolean_a;
boolean[] objb = boolean_b;
long offset = Unsafe.ARRAY_BOOLEAN_BASE_OFFSET;
int scale = ArraysSupport.LOG2_ARRAY_BOOLEAN_INDEX_SCALE;
return ArraysSupport.vectorizedMismatch(obja, offset, objb, offset, length, scale);
}
int testBooleanConstantLength0() { return testBooleanConstantLength(0); }
int testBooleanConstantLength1() { return testBooleanConstantLength(1); }
int testBooleanConstantLength64() { return testBooleanConstantLength(64); }
int testBooleanConstantLength128() { return testBooleanConstantLength(128); }
/* ==================================================================================== */
private byte[] byte_a = new byte[128];
private byte[] byte_b = new byte[128];
int testByteConstantLength(int length) {
byte[] obja = byte_a;
byte[] objb = byte_b;
long offset = Unsafe.ARRAY_BYTE_BASE_OFFSET;
int scale = ArraysSupport.LOG2_ARRAY_BYTE_INDEX_SCALE;
return ArraysSupport.vectorizedMismatch(obja, offset, objb, offset, length, scale);
}
int testByteConstantLength0() { return testByteConstantLength(0); }
int testByteConstantLength1() { return testByteConstantLength(1); }
int testByteConstantLength64() { return testByteConstantLength(64); }
int testByteConstantLength128() { return testByteConstantLength(128); }
/* ==================================================================================== */
private short[] short_a = new short[64];
private short[] short_b = new short[64];
int testShortConstantLength(int length) {
short[] obja = short_a;
short[] objb = short_b;
long offset = Unsafe.ARRAY_SHORT_BASE_OFFSET;
int scale = ArraysSupport.LOG2_ARRAY_SHORT_INDEX_SCALE;
return ArraysSupport.vectorizedMismatch(obja, offset, objb, offset, length, scale);
}
int testShortConstantLength0() { return testShortConstantLength(0); }
int testShortConstantLength1() { return testShortConstantLength(1); }
int testShortConstantLength32() { return testShortConstantLength(32); }
int testShortConstantLength64() { return testShortConstantLength(64); }
/* ==================================================================================== */
private char[] char_a = new char[64];
private char[] char_b = new char[64];
int testCharConstantLength(int length) {
char[] obja = char_a;
char[] objb = char_b;
long offset = Unsafe.ARRAY_CHAR_BASE_OFFSET;
int scale = ArraysSupport.LOG2_ARRAY_CHAR_INDEX_SCALE;
return ArraysSupport.vectorizedMismatch(obja, offset, objb, offset, length, scale);
}
int testCharConstantLength0() { return testCharConstantLength(0); }
int testCharConstantLength1() { return testCharConstantLength(1); }
int testCharConstantLength32() { return testCharConstantLength(32); }
int testCharConstantLength64() { return testCharConstantLength(64); }
/* ==================================================================================== */
private int[] int_a = new int[32];
private int[] int_b = new int[32];
int testIntConstantLength(int length) {
int[] obja = int_a;
int[] objb = int_b;
long offset = Unsafe.ARRAY_INT_BASE_OFFSET;
int scale = ArraysSupport.LOG2_ARRAY_INT_INDEX_SCALE;
return ArraysSupport.vectorizedMismatch(obja, offset, objb, offset, length, scale);
}
int testIntConstantLength0() { return testIntConstantLength(0); }
int testIntConstantLength1() { return testIntConstantLength(1); }
int testIntConstantLength16() { return testIntConstantLength(16); }
int testIntConstantLength32() { return testIntConstantLength(32); }
/* ==================================================================================== */
private float[] float_a = new float[32];
private float[] float_b = new float[32];
int testFloatConstantLength(int length) {
float[] obja = float_a;
float[] objb = float_b;
long offset = Unsafe.ARRAY_FLOAT_BASE_OFFSET;
int scale = ArraysSupport.LOG2_ARRAY_FLOAT_INDEX_SCALE;
return ArraysSupport.vectorizedMismatch(obja, offset, objb, offset, length, scale);
}
int testFloatConstantLength0() { return testFloatConstantLength(0); }
int testFloatConstantLength1() { return testFloatConstantLength(1); }
int testFloatConstantLength16() { return testFloatConstantLength(16); }
int testFloatConstantLength32() { return testFloatConstantLength(32); }
/* ==================================================================================== */
private long[] long_a = new long[16];
private long[] long_b = new long[16];
int testLongConstantLength(int length) {
long[] obja = long_a;
long[] objb = long_b;
long offset = Unsafe.ARRAY_LONG_BASE_OFFSET;
int scale = ArraysSupport.LOG2_ARRAY_LONG_INDEX_SCALE;
return ArraysSupport.vectorizedMismatch(obja, offset, objb, offset, length, scale);
}
int testLongConstantLength0() { return testLongConstantLength(0); }
int testLongConstantLength1() { return testLongConstantLength(1); }
int testLongConstantLength8() { return testLongConstantLength(8); }
int testLongConstantLength16() { return testLongConstantLength(16); }
/* ==================================================================================== */
private double[] double_a = new double[16];
private double[] double_b = new double[16];
int testDoubleConstantLength(int length) {
double[] obja = double_a;
double[] objb = double_b;
long offset = Unsafe.ARRAY_DOUBLE_BASE_OFFSET;
int scale = ArraysSupport.LOG2_ARRAY_DOUBLE_INDEX_SCALE;
return ArraysSupport.vectorizedMismatch(obja, offset, objb, offset, length, scale);
}
int testDoubleConstantLength0() { return testDoubleConstantLength(0); }
int testDoubleConstantLength1() { return testDoubleConstantLength(1); }
int testDoubleConstantLength8() { return testDoubleConstantLength(8); }
int testDoubleConstantLength16() { return testDoubleConstantLength(16); }
/* ==================================================================================== */
static class ClassInitTest {
static final int LENGTH = 64;
static final int RESULT;
static {
byte[] arr1 = new byte[LENGTH];
byte[] arr2 = new byte[LENGTH];
for (int i = 0; i < 20_000; i++) {
test(arr1, arr2);
}
RESULT = test(arr1, arr2);
}
static int test(byte[] obja, byte[] objb) {
long offset = Unsafe.ARRAY_BYTE_BASE_OFFSET;
int scale = ArraysSupport.LOG2_ARRAY_BYTE_INDEX_SCALE;
return ArraysSupport.vectorizedMismatch(obja, offset, objb, offset, LENGTH, scale); // LENGTH is not considered a constant
}
}
int testConstantBeingInitialized() {
return ClassInitTest.RESULT; // trigger class initialization
}
/* ==================================================================================== */
int testLoopUnswitch(int length) {
long offset = Unsafe.ARRAY_BYTE_BASE_OFFSET;
int scale = ArraysSupport.LOG2_ARRAY_BYTE_INDEX_SCALE;
int acc = 0;
for (int i = 0; i < 32; i++) {
acc += ArraysSupport.vectorizedMismatch(byte_a, offset, byte_b, offset, length, scale);
}
return acc;
}
int testLoopHoist(int length, int stride) {
long offset = Unsafe.ARRAY_BYTE_BASE_OFFSET;
int scale = ArraysSupport.LOG2_ARRAY_BYTE_INDEX_SCALE;
int acc = 0;
for (int i = 0; i < 32; i += stride) {
acc += ArraysSupport.vectorizedMismatch(byte_a, offset, byte_b, offset, length, scale);
}
return acc;
}
/* ==================================================================================== */
public static void main(String[] args) {
VectorizedMismatchTest t = new VectorizedMismatchTest();
for (int i = 0; i < 20_000; i++) {
t.testBooleanConstantLength0();
t.testBooleanConstantLength1();
t.testBooleanConstantLength64();
t.testBooleanConstantLength128();
t.testByteConstantLength0();
t.testByteConstantLength1();
t.testByteConstantLength64();
t.testByteConstantLength128();
t.testShortConstantLength0();
t.testShortConstantLength1();
t.testShortConstantLength32();
t.testShortConstantLength64();
t.testCharConstantLength0();
t.testCharConstantLength1();
t.testCharConstantLength32();
t.testCharConstantLength64();
t.testIntConstantLength0();
t.testIntConstantLength1();
t.testIntConstantLength16();
t.testIntConstantLength32();
t.testFloatConstantLength0();
t.testFloatConstantLength1();
t.testFloatConstantLength16();
t.testFloatConstantLength32();
t.testLongConstantLength0();
t.testLongConstantLength1();
t.testLongConstantLength8();
t.testLongConstantLength16();
t.testDoubleConstantLength0();
t.testDoubleConstantLength1();
t.testDoubleConstantLength8();
t.testDoubleConstantLength16();
t.testLoopUnswitch(32);
t.testLoopHoist(128, 2);
}
}
}

View File

@ -311,6 +311,7 @@ public class VMProps implements Callable<Map<String, String>> {
vmOptFinalFlag(map, "ClassUnloading");
vmOptFinalFlag(map, "ClassUnloadingWithConcurrentMark");
vmOptFinalFlag(map, "UseCompressedOops");
vmOptFinalFlag(map, "UseVectorizedMismatchIntrinsic");
vmOptFinalFlag(map, "EnableJVMCI");
vmOptFinalFlag(map, "EliminateAllocations");
vmOptFinalFlag(map, "UseVtableBasedCHA");

View File

@ -0,0 +1,139 @@
/*
* Copyright (c) 2021, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License version 2 only, as
* published by the Free Software Foundation.
*
* This code is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
* version 2 for more details (a copy is included in the LICENSE file that
* accompanied this code).
*
* You should have received a copy of the GNU General Public License version
* 2 along with this work; if not, write to the Free Software Foundation,
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
*
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
* or visit www.oracle.com if you need additional information or have any
* questions.
*/
package org.openjdk.bench.java.util;
import java.util.Arrays;
import java.util.concurrent.TimeUnit;
import org.openjdk.jmh.annotations.Benchmark;
import org.openjdk.jmh.annotations.BenchmarkMode;
import org.openjdk.jmh.annotations.Measurement;
import org.openjdk.jmh.annotations.Mode;
import org.openjdk.jmh.annotations.OutputTimeUnit;
import org.openjdk.jmh.annotations.Param;
import org.openjdk.jmh.annotations.Scope;
import org.openjdk.jmh.annotations.Setup;
import org.openjdk.jmh.annotations.State;
import org.openjdk.jmh.annotations.Warmup;
@BenchmarkMode(Mode.Throughput)
@OutputTimeUnit(TimeUnit.MILLISECONDS)
@State(Scope.Thread)
public class ArraysMismatchPartialInlining {
@Param({"3", "4", "5", "6", "7", "15", "31", "63", "95", "800"})
private static int size;
byte [] barray1;
char [] carray1;
short [] sarray1;
int [] iarray1;
long [] larray1;
float [] farray1;
double [] darray1;
byte [] barray2;
char [] carray2;
short [] sarray2;
int [] iarray2;
long [] larray2;
float [] farray2;
double [] darray2;
@Setup
public void setup() {
barray1 = new byte[size];
carray1 = new char[size];
sarray1 = new short[size];
iarray1 = new int[size];
larray1 = new long[size];
farray1 = new float[size];
darray1 = new double[size];
barray2 = new byte[size];
carray2 = new char[size];
sarray2 = new short[size];
iarray2 = new int[size];
larray2 = new long[size];
farray2 = new float[size];
darray2 = new double[size];
Arrays.fill(barray1 , (byte)0xF);
Arrays.fill(carray1 , (char)0xFF);
Arrays.fill(sarray1 , (short)0xFF);
Arrays.fill(iarray1 , -1);
Arrays.fill(larray1 , -1L);
Arrays.fill(farray1 , -1.0f);
Arrays.fill(darray1, -1.0);
Arrays.fill(barray2 , (byte)0xF);
Arrays.fill(carray2 , (char)0xFF);
Arrays.fill(sarray2 , (short)0xFF);
Arrays.fill(iarray2 , -1);
Arrays.fill(larray2 , -1L);
Arrays.fill(farray2 , -1.0F);
Arrays.fill(darray2, -1.0);
barray2[size-1] = (byte)1;
carray2[size-1] = (char)1;
sarray2[size-1] = (short)1;
iarray2[size-1] = 1;
larray2[size-1] = 1L;
farray2[size-1] = 1.0f;
darray2[size-1] = 1.0;
}
@Benchmark
public int testByteMatch() {
return Arrays.mismatch(barray1, barray2);
}
@Benchmark
public int testCharMatch() {
return Arrays.mismatch(carray1, carray2);
}
@Benchmark
public int testShortMatch() {
return Arrays.mismatch(sarray1, sarray2);
}
@Benchmark
public int testIntMatch() {
return Arrays.mismatch(iarray1, iarray2);
}
@Benchmark
public int testLongMatch() {
return Arrays.mismatch(larray1, larray2);
}
@Benchmark
public int testFloatMatch() {
return Arrays.mismatch(farray1, farray2);
}
@Benchmark
public int testDoubleMatch() {
return Arrays.mismatch(darray1, darray2);
}
}