8266951: Partial in-lining for vectorized mismatch operation using AVX512 masked instructions
Reviewed-by: psandoz, vlivanov
This commit is contained in:
parent
f768fbf7a9
commit
b05c40ca3b
@ -2572,6 +2572,13 @@ void Assembler::knotwl(KRegister dst, KRegister src) {
|
||||
emit_int16(0x44, (0xC0 | encode));
|
||||
}
|
||||
|
||||
void Assembler::knotql(KRegister dst, KRegister src) {
|
||||
assert(VM_Version::supports_avx512bw(), "");
|
||||
InstructionAttr attributes(AVX_128bit, /* rex_w */ true, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
|
||||
int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes);
|
||||
emit_int16(0x44, (0xC0 | encode));
|
||||
}
|
||||
|
||||
// This instruction produces ZF or CF flags
|
||||
void Assembler::kortestbl(KRegister src1, KRegister src2) {
|
||||
assert(VM_Version::supports_avx512dq(), "");
|
||||
|
@ -1480,6 +1480,7 @@ private:
|
||||
void kmovql(Register dst, KRegister src);
|
||||
|
||||
void knotwl(KRegister dst, KRegister src);
|
||||
void knotql(KRegister dst, KRegister src);
|
||||
|
||||
void kortestbl(KRegister dst, KRegister src);
|
||||
void kortestwl(KRegister dst, KRegister src);
|
||||
|
@ -1923,7 +1923,7 @@ void C2_MacroAssembler::reduce8L(int opcode, Register dst, Register src1, XMMReg
|
||||
}
|
||||
|
||||
void C2_MacroAssembler::genmask(KRegister dst, Register len, Register temp) {
|
||||
assert(ArrayCopyPartialInlineSize <= 64,"");
|
||||
assert(ArrayOperationPartialInlineSize > 0 && ArrayOperationPartialInlineSize <= 64, "invalid");
|
||||
mov64(temp, -1L);
|
||||
bzhiq(temp, temp, len);
|
||||
kmovql(dst, temp);
|
||||
@ -2140,11 +2140,37 @@ void C2_MacroAssembler::get_elem(BasicType typ, XMMRegister dst, XMMRegister src
|
||||
}
|
||||
}
|
||||
|
||||
void C2_MacroAssembler::evpcmp(BasicType typ, KRegister kdmask, KRegister ksmask, XMMRegister src1, XMMRegister src2, int comparison, int vector_len) {
|
||||
switch(typ) {
|
||||
case T_BYTE:
|
||||
case T_BOOLEAN:
|
||||
evpcmpb(kdmask, ksmask, src1, src2, comparison, vector_len);
|
||||
break;
|
||||
case T_SHORT:
|
||||
case T_CHAR:
|
||||
evpcmpw(kdmask, ksmask, src1, src2, comparison, vector_len);
|
||||
break;
|
||||
case T_INT:
|
||||
case T_FLOAT:
|
||||
evpcmpd(kdmask, ksmask, src1, src2, comparison, vector_len);
|
||||
break;
|
||||
case T_LONG:
|
||||
case T_DOUBLE:
|
||||
evpcmpq(kdmask, ksmask, src1, src2, comparison, vector_len);
|
||||
break;
|
||||
default:
|
||||
assert(false,"Should not reach here.");
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
void C2_MacroAssembler::evpcmp(BasicType typ, KRegister kdmask, KRegister ksmask, XMMRegister src1, AddressLiteral adr, int comparison, int vector_len, Register scratch) {
|
||||
switch(typ) {
|
||||
case T_BOOLEAN:
|
||||
case T_BYTE:
|
||||
evpcmpb(kdmask, ksmask, src1, adr, comparison, /*signed*/ true, vector_len, scratch);
|
||||
break;
|
||||
case T_CHAR:
|
||||
case T_SHORT:
|
||||
evpcmpw(kdmask, ksmask, src1, adr, comparison, /*signed*/ true, vector_len, scratch);
|
||||
break;
|
||||
|
@ -139,6 +139,7 @@ public:
|
||||
|
||||
// blend
|
||||
void evpcmp(BasicType typ, KRegister kdmask, KRegister ksmask, XMMRegister src1, AddressLiteral adr, int comparison, int vector_len, Register scratch = rscratch1);
|
||||
void evpcmp(BasicType typ, KRegister kdmask, KRegister ksmask, XMMRegister src1, XMMRegister src2, int comparison, int vector_len);
|
||||
void evpblend(BasicType typ, XMMRegister dst, KRegister kmask, XMMRegister src1, XMMRegister src2, bool merge, int vector_len);
|
||||
|
||||
void load_vector_mask(XMMRegister dst, XMMRegister src, int vlen_in_bytes, BasicType elem_bt);
|
||||
|
@ -1419,12 +1419,12 @@ void VM_Version::get_processor_features() {
|
||||
}
|
||||
#ifdef COMPILER2
|
||||
if (UseAVX > 2) {
|
||||
if (FLAG_IS_DEFAULT(ArrayCopyPartialInlineSize) ||
|
||||
(!FLAG_IS_DEFAULT(ArrayCopyPartialInlineSize) &&
|
||||
ArrayCopyPartialInlineSize != 0 &&
|
||||
ArrayCopyPartialInlineSize != 32 &&
|
||||
ArrayCopyPartialInlineSize != 16 &&
|
||||
ArrayCopyPartialInlineSize != 64)) {
|
||||
if (FLAG_IS_DEFAULT(ArrayOperationPartialInlineSize) ||
|
||||
(!FLAG_IS_DEFAULT(ArrayOperationPartialInlineSize) &&
|
||||
ArrayOperationPartialInlineSize != 0 &&
|
||||
ArrayOperationPartialInlineSize != 16 &&
|
||||
ArrayOperationPartialInlineSize != 32 &&
|
||||
ArrayOperationPartialInlineSize != 64)) {
|
||||
int inline_size = 0;
|
||||
if (MaxVectorSize >= 64 && AVX3Threshold == 0) {
|
||||
inline_size = 64;
|
||||
@ -1433,18 +1433,18 @@ void VM_Version::get_processor_features() {
|
||||
} else if (MaxVectorSize >= 16) {
|
||||
inline_size = 16;
|
||||
}
|
||||
if(!FLAG_IS_DEFAULT(ArrayCopyPartialInlineSize)) {
|
||||
warning("Setting ArrayCopyPartialInlineSize as %d", inline_size);
|
||||
if(!FLAG_IS_DEFAULT(ArrayOperationPartialInlineSize)) {
|
||||
warning("Setting ArrayOperationPartialInlineSize as %d", inline_size);
|
||||
}
|
||||
ArrayCopyPartialInlineSize = inline_size;
|
||||
ArrayOperationPartialInlineSize = inline_size;
|
||||
}
|
||||
|
||||
if (ArrayCopyPartialInlineSize > MaxVectorSize) {
|
||||
ArrayCopyPartialInlineSize = MaxVectorSize >= 16 ? MaxVectorSize : 0;
|
||||
if (ArrayCopyPartialInlineSize) {
|
||||
warning("Setting ArrayCopyPartialInlineSize as MaxVectorSize" INTX_FORMAT ")", MaxVectorSize);
|
||||
if (ArrayOperationPartialInlineSize > MaxVectorSize) {
|
||||
ArrayOperationPartialInlineSize = MaxVectorSize >= 16 ? MaxVectorSize : 0;
|
||||
if (ArrayOperationPartialInlineSize) {
|
||||
warning("Setting ArrayOperationPartialInlineSize as MaxVectorSize" INTX_FORMAT ")", MaxVectorSize);
|
||||
} else {
|
||||
warning("Setting ArrayCopyPartialInlineSize as " INTX_FORMAT, ArrayCopyPartialInlineSize);
|
||||
warning("Setting ArrayOperationPartialInlineSize as " INTX_FORMAT, ArrayOperationPartialInlineSize);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -1578,6 +1578,7 @@ const bool Matcher::match_rule_supported(int opcode) {
|
||||
}
|
||||
break;
|
||||
|
||||
case Op_VectorCmpMasked:
|
||||
case Op_VectorMaskGen:
|
||||
case Op_LoadVectorMasked:
|
||||
case Op_StoreVectorMasked:
|
||||
@ -1678,6 +1679,7 @@ const bool Matcher::match_rule_supported_vector(int opcode, int vlen, BasicType
|
||||
break;
|
||||
case Op_ClearArray:
|
||||
case Op_VectorMaskGen:
|
||||
case Op_VectorCmpMasked:
|
||||
case Op_LoadVectorMasked:
|
||||
case Op_StoreVectorMasked:
|
||||
if (!is_LP64 || !VM_Version::supports_avx512bw()) {
|
||||
@ -8084,7 +8086,34 @@ instruct vprorate(vec dst, vec src, vec shift) %{
|
||||
%}
|
||||
|
||||
#ifdef _LP64
|
||||
// ---------------------------------- Masked Block Copy ------------------------------------
|
||||
// ---------------------------------- Masked Operations ------------------------------------
|
||||
|
||||
instruct vmask_cmp_node(rRegI dst, vec src1, vec src2, kReg mask, kReg ktmp1, kReg ktmp2, rFlagsReg cr) %{
|
||||
match(Set dst (VectorCmpMasked src1 (Binary src2 mask)));
|
||||
effect(TEMP_DEF dst, TEMP ktmp1, TEMP ktmp2, KILL cr);
|
||||
format %{ "vector_mask_cmp $src1, $src2, $mask \t! vector mask comparison" %}
|
||||
ins_encode %{
|
||||
assert(vector_length_encoding(this, $src1) == vector_length_encoding(this, $src2), "mismatch");
|
||||
assert(vector_element_basic_type(this, $src1) == vector_element_basic_type(this, $src2), "mismatch");
|
||||
|
||||
Label DONE;
|
||||
int vlen_enc = vector_length_encoding(this, $src1);
|
||||
BasicType elem_bt = vector_element_basic_type(this, $src1);
|
||||
|
||||
__ knotql($ktmp2$$KRegister, $mask$$KRegister);
|
||||
__ mov64($dst$$Register, -1L);
|
||||
__ evpcmp(elem_bt, $ktmp1$$KRegister, $mask$$KRegister, $src1$$XMMRegister, $src2$$XMMRegister, Assembler::eq, vlen_enc);
|
||||
__ kortestql($ktmp2$$KRegister, $ktmp1$$KRegister);
|
||||
__ jccb(Assembler::carrySet, DONE);
|
||||
__ kmovql($dst$$Register, $ktmp1$$KRegister);
|
||||
__ notq($dst$$Register);
|
||||
__ tzcntq($dst$$Register, $dst$$Register);
|
||||
__ bind(DONE);
|
||||
%}
|
||||
ins_pipe( pipe_slow );
|
||||
%}
|
||||
|
||||
|
||||
instruct vmasked_load64(vec dst, memory mem, kReg mask) %{
|
||||
match(Set dst (LoadVectorMasked mem mask));
|
||||
format %{ "vector_masked_load $dst, $mem, $mask \t! vector masked copy" %}
|
||||
|
@ -789,6 +789,7 @@ bool InstructForm::captures_bottom_type(FormDict &globals) const {
|
||||
!strcmp(_matrule->_rChild->_opType,"ShenandoahCompareAndExchangeN") ||
|
||||
#endif
|
||||
!strcmp(_matrule->_rChild->_opType,"StrInflatedCopy") ||
|
||||
!strcmp(_matrule->_rChild->_opType,"VectorCmpMasked")||
|
||||
!strcmp(_matrule->_rChild->_opType,"VectorMaskGen")||
|
||||
!strcmp(_matrule->_rChild->_opType,"CompareAndExchangeP") ||
|
||||
!strcmp(_matrule->_rChild->_opType,"CompareAndExchangeN"))) return true;
|
||||
|
@ -738,7 +738,7 @@ bool ArrayCopyNode::modifies(intptr_t offset_lo, intptr_t offset_hi, PhaseTransf
|
||||
|
||||
// As an optimization, choose optimum vector size for copy length known at compile time.
|
||||
int ArrayCopyNode::get_partial_inline_vector_lane_count(BasicType type, int const_len) {
|
||||
int lane_count = ArrayCopyPartialInlineSize/type2aelembytes(type);
|
||||
int lane_count = ArrayOperationPartialInlineSize/type2aelembytes(type);
|
||||
if (const_len > 0) {
|
||||
int size_in_bytes = const_len * type2aelembytes(type);
|
||||
if (size_in_bytes <= 16)
|
||||
|
@ -82,9 +82,10 @@
|
||||
"actual size could be less depending on elements type") \
|
||||
range(0, max_jint) \
|
||||
\
|
||||
product(intx, ArrayCopyPartialInlineSize, -1, DIAGNOSTIC, \
|
||||
"Partial inline size used for array copy acceleration.") \
|
||||
range(-1, 64) \
|
||||
product(intx, ArrayOperationPartialInlineSize, 0, DIAGNOSTIC, \
|
||||
"Partial inline size used for small array operations" \
|
||||
"(e.g. copy,cmp) acceleration.") \
|
||||
range(0, 64) \
|
||||
\
|
||||
product(bool, AlignVector, true, \
|
||||
"Perform vector store/load alignment in loop") \
|
||||
|
@ -78,6 +78,11 @@ class CastIINode: public ConstraintCastNode {
|
||||
: ConstraintCastNode(n, t, carry_dependency), _range_check_dependency(range_check_dependency) {
|
||||
init_class_id(Class_CastII);
|
||||
}
|
||||
CastIINode(Node* ctrl, Node* n, const Type* t, bool carry_dependency = false, bool range_check_dependency = false)
|
||||
: ConstraintCastNode(n, t, carry_dependency), _range_check_dependency(range_check_dependency) {
|
||||
init_class_id(Class_CastII);
|
||||
init_req(0, ctrl);
|
||||
}
|
||||
virtual int Opcode() const;
|
||||
virtual uint ideal_reg() const { return Op_RegI; }
|
||||
virtual Node* Identity(PhaseGVN* phase);
|
||||
@ -103,6 +108,11 @@ class CastIINode: public ConstraintCastNode {
|
||||
|
||||
class CastLLNode: public ConstraintCastNode {
|
||||
public:
|
||||
CastLLNode(Node* ctrl, Node* n, const Type* t, bool carry_dependency = false)
|
||||
: ConstraintCastNode(n, t, carry_dependency) {
|
||||
init_class_id(Class_CastLL);
|
||||
init_req(0, ctrl);
|
||||
}
|
||||
CastLLNode(Node* n, const Type* t, bool carry_dependency = false)
|
||||
: ConstraintCastNode(n, t, carry_dependency){
|
||||
init_class_id(Class_CastLL);
|
||||
|
@ -417,6 +417,7 @@ macro(StoreVector)
|
||||
macro(StoreVectorScatter)
|
||||
macro(LoadVectorMasked)
|
||||
macro(StoreVectorMasked)
|
||||
macro(VectorCmpMasked)
|
||||
macro(VectorMaskGen)
|
||||
macro(VectorMaskOp)
|
||||
macro(VectorMaskTrueCount)
|
||||
|
@ -3410,6 +3410,7 @@ void Compile::final_graph_reshaping_main_switch(Node* n, Final_Reshape_Counts& f
|
||||
case Op_StoreVector:
|
||||
case Op_LoadVectorGather:
|
||||
case Op_StoreVectorScatter:
|
||||
case Op_VectorCmpMasked:
|
||||
case Op_VectorMaskGen:
|
||||
case Op_LoadVectorMasked:
|
||||
case Op_StoreVectorMasked:
|
||||
|
@ -5236,46 +5236,134 @@ bool LibraryCallKit::inline_bigIntegerShift(bool isRightShift) {
|
||||
|
||||
//-------------inline_vectorizedMismatch------------------------------
|
||||
bool LibraryCallKit::inline_vectorizedMismatch() {
|
||||
assert(UseVectorizedMismatchIntrinsic, "not implementated on this platform");
|
||||
assert(UseVectorizedMismatchIntrinsic, "not implemented on this platform");
|
||||
|
||||
address stubAddr = StubRoutines::vectorizedMismatch();
|
||||
if (stubAddr == NULL) {
|
||||
return false; // Intrinsic's stub is not implemented on this platform
|
||||
}
|
||||
const char* stubName = "vectorizedMismatch";
|
||||
int size_l = callee()->signature()->size();
|
||||
assert(callee()->signature()->size() == 8, "vectorizedMismatch has 6 parameters");
|
||||
Node* obja = argument(0); // Object
|
||||
Node* aoffset = argument(1); // long
|
||||
Node* objb = argument(3); // Object
|
||||
Node* boffset = argument(4); // long
|
||||
Node* length = argument(6); // int
|
||||
Node* scale = argument(7); // int
|
||||
|
||||
Node* obja = argument(0);
|
||||
Node* aoffset = argument(1);
|
||||
Node* objb = argument(3);
|
||||
Node* boffset = argument(4);
|
||||
Node* length = argument(6);
|
||||
Node* scale = argument(7);
|
||||
|
||||
const Type* a_type = obja->Value(&_gvn);
|
||||
const Type* b_type = objb->Value(&_gvn);
|
||||
const TypeAryPtr* top_a = a_type->isa_aryptr();
|
||||
const TypeAryPtr* top_b = b_type->isa_aryptr();
|
||||
if (top_a == NULL || top_a->klass() == NULL ||
|
||||
top_b == NULL || top_b->klass() == NULL) {
|
||||
// failed array check
|
||||
return false;
|
||||
const TypeAryPtr* obja_t = _gvn.type(obja)->isa_aryptr();
|
||||
const TypeAryPtr* objb_t = _gvn.type(objb)->isa_aryptr();
|
||||
if (obja_t == NULL || obja_t->klass() == NULL ||
|
||||
objb_t == NULL || objb_t->klass() == NULL ||
|
||||
scale == top()) {
|
||||
return false; // failed input validation
|
||||
}
|
||||
|
||||
Node* call;
|
||||
jvms()->set_should_reexecute(true);
|
||||
|
||||
Node* obja_adr = make_unsafe_address(obja, aoffset);
|
||||
Node* objb_adr = make_unsafe_address(objb, boffset);
|
||||
|
||||
call = make_runtime_call(RC_LEAF,
|
||||
OptoRuntime::vectorizedMismatch_Type(),
|
||||
stubAddr, stubName, TypePtr::BOTTOM,
|
||||
obja_adr, objb_adr, length, scale);
|
||||
// Partial inlining handling for inputs smaller than ArrayOperationPartialInlineSize bytes in size.
|
||||
//
|
||||
// inline_limit = ArrayOperationPartialInlineSize / element_size;
|
||||
// if (length <= inline_limit) {
|
||||
// inline_path:
|
||||
// vmask = VectorMaskGen length
|
||||
// vload1 = LoadVectorMasked obja, vmask
|
||||
// vload2 = LoadVectorMasked objb, vmask
|
||||
// result1 = VectorCmpMasked vload1, vload2, vmask
|
||||
// } else {
|
||||
// call_stub_path:
|
||||
// result2 = call vectorizedMismatch_stub(obja, objb, length, scale)
|
||||
// }
|
||||
// exit_block:
|
||||
// return Phi(result1, result2);
|
||||
//
|
||||
enum { inline_path = 1, // input is small enough to process it all at once
|
||||
stub_path = 2, // input is too large; call into the VM
|
||||
PATH_LIMIT = 3
|
||||
};
|
||||
|
||||
Node* exit_block = new RegionNode(PATH_LIMIT);
|
||||
Node* result_phi = new PhiNode(exit_block, TypeInt::INT);
|
||||
Node* memory_phi = new PhiNode(exit_block, Type::MEMORY, TypePtr::BOTTOM);
|
||||
|
||||
Node* call_stub_path = control();
|
||||
|
||||
BasicType elem_bt = T_ILLEGAL;
|
||||
|
||||
const TypeInt* scale_t = _gvn.type(scale)->is_int();
|
||||
if (scale_t->is_con()) {
|
||||
switch (scale_t->get_con()) {
|
||||
case 0: elem_bt = T_BYTE; break;
|
||||
case 1: elem_bt = T_SHORT; break;
|
||||
case 2: elem_bt = T_INT; break;
|
||||
case 3: elem_bt = T_LONG; break;
|
||||
|
||||
default: elem_bt = T_ILLEGAL; break; // not supported
|
||||
}
|
||||
}
|
||||
|
||||
int inline_limit = 0;
|
||||
bool do_partial_inline = false;
|
||||
|
||||
if (elem_bt != T_ILLEGAL && ArrayOperationPartialInlineSize > 0) {
|
||||
inline_limit = ArrayOperationPartialInlineSize / type2aelembytes(elem_bt);
|
||||
do_partial_inline = inline_limit >= 16;
|
||||
}
|
||||
|
||||
if (do_partial_inline) {
|
||||
assert(elem_bt != T_ILLEGAL, "sanity");
|
||||
|
||||
const TypeVect* vt = TypeVect::make(elem_bt, inline_limit);
|
||||
|
||||
if (Matcher::match_rule_supported_vector(Op_VectorMaskGen, inline_limit, elem_bt) &&
|
||||
Matcher::match_rule_supported_vector(Op_LoadVectorMasked, inline_limit, elem_bt) &&
|
||||
Matcher::match_rule_supported_vector(Op_VectorCmpMasked, inline_limit, elem_bt)) {
|
||||
|
||||
Node* cmp_length = _gvn.transform(new CmpINode(length, intcon(inline_limit)));
|
||||
Node* bol_gt = _gvn.transform(new BoolNode(cmp_length, BoolTest::gt));
|
||||
|
||||
call_stub_path = generate_guard(bol_gt, NULL, PROB_MIN);
|
||||
|
||||
if (!stopped()) {
|
||||
Node* casted_length = _gvn.transform(new CastIINode(control(), length, TypeInt::make(0, inline_limit, Type::WidenMin)));
|
||||
|
||||
const TypePtr* obja_adr_t = _gvn.type(obja_adr)->isa_ptr();
|
||||
const TypePtr* objb_adr_t = _gvn.type(objb_adr)->isa_ptr();
|
||||
Node* obja_adr_mem = memory(C->get_alias_index(obja_adr_t));
|
||||
Node* objb_adr_mem = memory(C->get_alias_index(objb_adr_t));
|
||||
|
||||
Node* vmask = _gvn.transform(new VectorMaskGenNode(ConvI2X(casted_length), TypeVect::VECTMASK, elem_bt));
|
||||
Node* vload_obja = _gvn.transform(new LoadVectorMaskedNode(control(), obja_adr_mem, obja_adr, obja_adr_t, vt, vmask));
|
||||
Node* vload_objb = _gvn.transform(new LoadVectorMaskedNode(control(), objb_adr_mem, objb_adr, objb_adr_t, vt, vmask));
|
||||
Node* result = _gvn.transform(new VectorCmpMaskedNode(vload_obja, vload_objb, vmask, TypeInt::INT));
|
||||
|
||||
exit_block->init_req(inline_path, control());
|
||||
memory_phi->init_req(inline_path, map()->memory());
|
||||
result_phi->init_req(inline_path, result);
|
||||
|
||||
C->set_max_vector_size(MAX2((uint)ArrayOperationPartialInlineSize, C->max_vector_size()));
|
||||
clear_upper_avx();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (call_stub_path != NULL) {
|
||||
set_control(call_stub_path);
|
||||
|
||||
Node* call = make_runtime_call(RC_LEAF,
|
||||
OptoRuntime::vectorizedMismatch_Type(),
|
||||
StubRoutines::vectorizedMismatch(), "vectorizedMismatch", TypePtr::BOTTOM,
|
||||
obja_adr, objb_adr, length, scale);
|
||||
|
||||
exit_block->init_req(stub_path, control());
|
||||
memory_phi->init_req(stub_path, map()->memory());
|
||||
result_phi->init_req(stub_path, _gvn.transform(new ProjNode(call, TypeFunc::Parms)));
|
||||
}
|
||||
|
||||
exit_block = _gvn.transform(exit_block);
|
||||
memory_phi = _gvn.transform(memory_phi);
|
||||
result_phi = _gvn.transform(result_phi);
|
||||
|
||||
set_control(exit_block);
|
||||
set_all_memory(memory_phi);
|
||||
set_result(result_phi);
|
||||
|
||||
Node* result = _gvn.transform(new ProjNode(call, TypeFunc::Parms));
|
||||
set_result(result);
|
||||
return true;
|
||||
}
|
||||
|
||||
|
@ -32,6 +32,7 @@
|
||||
#include "opto/graphKit.hpp"
|
||||
#include "opto/macro.hpp"
|
||||
#include "opto/runtime.hpp"
|
||||
#include "opto/castnode.hpp"
|
||||
#include "runtime/stubRoutines.hpp"
|
||||
#include "utilities/align.hpp"
|
||||
#include "utilities/powerOfTwo.hpp"
|
||||
@ -174,8 +175,8 @@ void PhaseMacroExpand::generate_limit_guard(Node** ctrl, Node* offset, Node* sub
|
||||
|
||||
//
|
||||
// Partial in-lining handling for smaller conjoint/disjoint array copies having
|
||||
// length(in bytes) less than ArrayCopyPartialInlineSize.
|
||||
// if (length <= ArrayCopyPartialInlineSize) {
|
||||
// length(in bytes) less than ArrayOperationPartialInlineSize.
|
||||
// if (length <= ArrayOperationPartialInlineSize) {
|
||||
// partial_inlining_block:
|
||||
// mask = Mask_Gen
|
||||
// vload = LoadVectorMasked src , mask
|
||||
@ -216,24 +217,27 @@ void PhaseMacroExpand::generate_partial_inlining_block(Node** ctrl, MergeMemNode
|
||||
// Return if copy length is greater than partial inline size limit or
|
||||
// target does not supports masked load/stores.
|
||||
int lane_count = ArrayCopyNode::get_partial_inline_vector_lane_count(type, const_len);
|
||||
if ( const_len > ArrayCopyPartialInlineSize ||
|
||||
if ( const_len > ArrayOperationPartialInlineSize ||
|
||||
!Matcher::match_rule_supported_vector(Op_LoadVectorMasked, lane_count, type) ||
|
||||
!Matcher::match_rule_supported_vector(Op_StoreVectorMasked, lane_count, type) ||
|
||||
!Matcher::match_rule_supported_vector(Op_VectorMaskGen, lane_count, type)) {
|
||||
return;
|
||||
}
|
||||
|
||||
int inline_limit = ArrayOperationPartialInlineSize / type2aelembytes(type);
|
||||
Node* casted_length = new CastLLNode(*ctrl, length, TypeLong::make(0, inline_limit, Type::WidenMin));
|
||||
transform_later(casted_length);
|
||||
Node* copy_bytes = new LShiftXNode(length, intcon(shift));
|
||||
transform_later(copy_bytes);
|
||||
|
||||
Node* cmp_le = new CmpULNode(copy_bytes, longcon(ArrayCopyPartialInlineSize));
|
||||
Node* cmp_le = new CmpULNode(copy_bytes, longcon(ArrayOperationPartialInlineSize));
|
||||
transform_later(cmp_le);
|
||||
Node* bol_le = new BoolNode(cmp_le, BoolTest::le);
|
||||
transform_later(bol_le);
|
||||
inline_block = generate_guard(ctrl, bol_le, NULL, PROB_FAIR);
|
||||
stub_block = *ctrl;
|
||||
|
||||
Node* mask_gen = new VectorMaskGenNode(length, TypeVect::VECTMASK, Type::get_const_basic_type(type));
|
||||
Node* mask_gen = new VectorMaskGenNode(casted_length, TypeVect::VECTMASK, type);
|
||||
transform_later(mask_gen);
|
||||
|
||||
unsigned vec_size = lane_count * type2aelembytes(type);
|
||||
@ -1187,7 +1191,7 @@ bool PhaseMacroExpand::generate_unchecked_arraycopy(Node** ctrl, MergeMemNode**
|
||||
|
||||
Node* result_memory = NULL;
|
||||
RegionNode* exit_block = NULL;
|
||||
if (ArrayCopyPartialInlineSize > 0 && is_subword_type(basic_elem_type) &&
|
||||
if (ArrayOperationPartialInlineSize > 0 && is_subword_type(basic_elem_type) &&
|
||||
Matcher::vector_width_in_bytes(basic_elem_type) >= 16) {
|
||||
generate_partial_inlining_block(ctrl, mem, adr_type, &exit_block, &result_memory,
|
||||
copy_length, src_start, dest_start, basic_elem_type);
|
||||
|
@ -2227,6 +2227,7 @@ bool Matcher::find_shared_visit(MStack& mstack, Node* n, uint opcode, bool& mem_
|
||||
case Op_FmaVF:
|
||||
case Op_MacroLogicV:
|
||||
case Op_LoadVectorMasked:
|
||||
case Op_VectorCmpMasked:
|
||||
set_shared(n); // Force result into register (it will be anyways)
|
||||
break;
|
||||
case Op_ConP: { // Convert pointers above the centerline to NUL
|
||||
@ -2320,6 +2321,12 @@ void Matcher::find_shared_post_visit(Node* n, uint opcode) {
|
||||
n->del_req(3);
|
||||
break;
|
||||
}
|
||||
case Op_VectorCmpMasked: {
|
||||
Node* pair1 = new BinaryNode(n->in(2), n->in(3));
|
||||
n->set_req(2, pair1);
|
||||
n->del_req(3);
|
||||
break;
|
||||
}
|
||||
case Op_MacroLogicV: {
|
||||
Node* pair1 = new BinaryNode(n->in(1), n->in(2));
|
||||
Node* pair2 = new BinaryNode(n->in(3), n->in(4));
|
||||
|
@ -721,35 +721,39 @@ StoreVectorNode* StoreVectorNode::make(int opc, Node* ctl, Node* mem,
|
||||
}
|
||||
|
||||
Node* LoadVectorMaskedNode::Ideal(PhaseGVN* phase, bool can_reshape) {
|
||||
Node* mask_len = in(3)->in(1);
|
||||
const TypeLong* ty = phase->type(mask_len)->isa_long();
|
||||
if (ty && ty->is_con()) {
|
||||
BasicType mask_bt = ((VectorMaskGenNode*)in(3))->get_elem_type()->array_element_basic_type();
|
||||
uint load_sz = type2aelembytes(mask_bt) * ty->get_con();
|
||||
if ( load_sz == 32 || load_sz == 64) {
|
||||
assert(load_sz == 32 || MaxVectorSize > 32, "Unexpected load size");
|
||||
Node* ctr = in(MemNode::Control);
|
||||
Node* mem = in(MemNode::Memory);
|
||||
Node* adr = in(MemNode::Address);
|
||||
return phase->transform(new LoadVectorNode(ctr, mem, adr, adr_type(), vect_type()));
|
||||
if (!in(3)->is_top() && in(3)->Opcode() == Op_VectorMaskGen) {
|
||||
Node* mask_len = in(3)->in(1);
|
||||
const TypeLong* ty = phase->type(mask_len)->isa_long();
|
||||
if (ty && ty->is_con()) {
|
||||
BasicType mask_bt = ((VectorMaskGenNode*)in(3))->get_elem_type();
|
||||
uint load_sz = type2aelembytes(mask_bt) * ty->get_con();
|
||||
if ( load_sz == 32 || load_sz == 64) {
|
||||
assert(load_sz == 32 || MaxVectorSize > 32, "Unexpected load size");
|
||||
Node* ctr = in(MemNode::Control);
|
||||
Node* mem = in(MemNode::Memory);
|
||||
Node* adr = in(MemNode::Address);
|
||||
return phase->transform(new LoadVectorNode(ctr, mem, adr, adr_type(), vect_type()));
|
||||
}
|
||||
}
|
||||
}
|
||||
return NULL;
|
||||
}
|
||||
|
||||
Node* StoreVectorMaskedNode::Ideal(PhaseGVN* phase, bool can_reshape) {
|
||||
Node* mask_len = in(4)->in(1);
|
||||
const TypeLong* ty = phase->type(mask_len)->isa_long();
|
||||
if (ty && ty->is_con()) {
|
||||
BasicType mask_bt = ((VectorMaskGenNode*)in(4))->get_elem_type()->array_element_basic_type();
|
||||
uint load_sz = type2aelembytes(mask_bt) * ty->get_con();
|
||||
if ( load_sz == 32 || load_sz == 64) {
|
||||
assert(load_sz == 32 || MaxVectorSize > 32, "Unexpected store size");
|
||||
Node* ctr = in(MemNode::Control);
|
||||
Node* mem = in(MemNode::Memory);
|
||||
Node* adr = in(MemNode::Address);
|
||||
Node* val = in(MemNode::ValueIn);
|
||||
return phase->transform(new StoreVectorNode(ctr, mem, adr, adr_type(), val));
|
||||
if (!in(4)->is_top() && in(4)->Opcode() == Op_VectorMaskGen) {
|
||||
Node* mask_len = in(4)->in(1);
|
||||
const TypeLong* ty = phase->type(mask_len)->isa_long();
|
||||
if (ty && ty->is_con()) {
|
||||
BasicType mask_bt = ((VectorMaskGenNode*)in(4))->get_elem_type();
|
||||
uint load_sz = type2aelembytes(mask_bt) * ty->get_con();
|
||||
if ( load_sz == 32 || load_sz == 64) {
|
||||
assert(load_sz == 32 || MaxVectorSize > 32, "Unexpected store size");
|
||||
Node* ctr = in(MemNode::Control);
|
||||
Node* mem = in(MemNode::Memory);
|
||||
Node* adr = in(MemNode::Address);
|
||||
Node* val = in(MemNode::ValueIn);
|
||||
return phase->transform(new StoreVectorNode(ctr, mem, adr, adr_type(), val));
|
||||
}
|
||||
}
|
||||
}
|
||||
return NULL;
|
||||
|
@ -800,6 +800,8 @@ class StoreVectorNode : public StoreNode {
|
||||
idx == MemNode::ValueIn + 1; }
|
||||
};
|
||||
|
||||
//------------------------------StoreVectorMaskedNode--------------------------------
|
||||
// Store Vector to memory under the influence of a predicate register(mask).
|
||||
class StoreVectorMaskedNode : public StoreVectorNode {
|
||||
public:
|
||||
StoreVectorMaskedNode(Node* c, Node* mem, Node* dst, Node* src, const TypePtr* at, Node* mask)
|
||||
@ -818,6 +820,8 @@ class StoreVectorMaskedNode : public StoreVectorNode {
|
||||
Node* Ideal(PhaseGVN* phase, bool can_reshape);
|
||||
};
|
||||
|
||||
//------------------------------LoadVectorMaskedNode--------------------------------
|
||||
// Load Vector from memory under the influence of a predicate register(mask).
|
||||
class LoadVectorMaskedNode : public LoadVectorNode {
|
||||
public:
|
||||
LoadVectorMaskedNode(Node* c, Node* mem, Node* src, const TypePtr* at, const TypeVect* vt, Node* mask)
|
||||
@ -836,21 +840,36 @@ class LoadVectorMaskedNode : public LoadVectorNode {
|
||||
Node* Ideal(PhaseGVN* phase, bool can_reshape);
|
||||
};
|
||||
|
||||
|
||||
//------------------------------VectorCmpMaskedNode--------------------------------
|
||||
// Vector Comparison under the influence of a predicate register(mask).
|
||||
class VectorCmpMaskedNode : public TypeNode {
|
||||
public:
|
||||
VectorCmpMaskedNode(Node* src1, Node* src2, Node* mask, const Type* ty): TypeNode(ty, 4) {
|
||||
init_req(1, src1);
|
||||
init_req(2, src2);
|
||||
init_req(3, mask);
|
||||
}
|
||||
|
||||
virtual int Opcode() const;
|
||||
};
|
||||
|
||||
|
||||
class VectorMaskGenNode : public TypeNode {
|
||||
public:
|
||||
VectorMaskGenNode(Node* length, const Type* ty, const Type* ety): TypeNode(ty, 2), _elemType(ety) {
|
||||
VectorMaskGenNode(Node* length, const Type* ty, BasicType ety): TypeNode(ty, 2), _elemType(ety) {
|
||||
init_req(1, length);
|
||||
}
|
||||
|
||||
virtual int Opcode() const;
|
||||
const Type* get_elem_type() { return _elemType;}
|
||||
BasicType get_elem_type() { return _elemType;}
|
||||
virtual uint size_of() const { return sizeof(VectorMaskGenNode); }
|
||||
virtual uint ideal_reg() const {
|
||||
return Op_RegVectMask;
|
||||
}
|
||||
|
||||
private:
|
||||
const Type* _elemType;
|
||||
BasicType _elemType;
|
||||
};
|
||||
|
||||
class VectorMaskOpNode : public TypeNode {
|
||||
|
@ -30,25 +30,25 @@ import java.util.Random;
|
||||
* @summary Optimize arrayCopy using AVX-512 masked instructions.
|
||||
*
|
||||
* @run main/othervm/timeout=600 -XX:-TieredCompilation -Xbatch -XX:+IgnoreUnrecognizedVMOptions
|
||||
* -XX:UseAVX=3 -XX:+UnlockDiagnosticVMOptions -XX:ArrayCopyPartialInlineSize=0 -XX:MaxVectorSize=32 -XX:+UnlockDiagnosticVMOptions
|
||||
* -XX:UseAVX=3 -XX:+UnlockDiagnosticVMOptions -XX:ArrayOperationPartialInlineSize=0 -XX:MaxVectorSize=32 -XX:+UnlockDiagnosticVMOptions
|
||||
* compiler.arraycopy.TestArrayCopyConjoint
|
||||
* @run main/othervm/timeout=600 -XX:-TieredCompilation -Xbatch -XX:+IgnoreUnrecognizedVMOptions
|
||||
* -XX:UseAVX=3 -XX:+UnlockDiagnosticVMOptions -XX:ArrayCopyPartialInlineSize=0 -XX:MaxVectorSize=64
|
||||
* -XX:UseAVX=3 -XX:+UnlockDiagnosticVMOptions -XX:ArrayOperationPartialInlineSize=0 -XX:MaxVectorSize=64
|
||||
* compiler.arraycopy.TestArrayCopyConjoint
|
||||
* @run main/othervm/timeout=600 -XX:-TieredCompilation -Xbatch -XX:+IgnoreUnrecognizedVMOptions
|
||||
* -XX:UseAVX=3 -XX:+UnlockDiagnosticVMOptions -XX:ArrayCopyPartialInlineSize=32 -XX:+UnlockDiagnosticVMOptions -XX:MaxVectorSize=32 -XX:+UnlockDiagnosticVMOption
|
||||
* -XX:UseAVX=3 -XX:+UnlockDiagnosticVMOptions -XX:ArrayOperationPartialInlineSize=32 -XX:+UnlockDiagnosticVMOptions -XX:MaxVectorSize=32 -XX:+UnlockDiagnosticVMOption
|
||||
* compiler.arraycopy.TestArrayCopyConjoint
|
||||
* @run main/othervm/timeout=600 -XX:-TieredCompilation -Xbatch -XX:+IgnoreUnrecognizedVMOptions
|
||||
* -XX:UseAVX=3 -XX:+UnlockDiagnosticVMOptions -XX:ArrayCopyPartialInlineSize=32 -XX:+UnlockDiagnosticVMOptions -XX:MaxVectorSize=64
|
||||
* -XX:UseAVX=3 -XX:+UnlockDiagnosticVMOptions -XX:ArrayOperationPartialInlineSize=32 -XX:+UnlockDiagnosticVMOptions -XX:MaxVectorSize=64
|
||||
* compiler.arraycopy.TestArrayCopyConjoint
|
||||
* @run main/othervm/timeout=600 -XX:-TieredCompilation -Xbatch -XX:+IgnoreUnrecognizedVMOptions
|
||||
* -XX:UseAVX=3 -XX:+UnlockDiagnosticVMOptions -XX:ArrayCopyPartialInlineSize=64 -XX:MaxVectorSize=64
|
||||
* -XX:UseAVX=3 -XX:+UnlockDiagnosticVMOptions -XX:ArrayOperationPartialInlineSize=64 -XX:MaxVectorSize=64
|
||||
* compiler.arraycopy.TestArrayCopyConjoint
|
||||
* @run main/othervm/timeout=600 -XX:-TieredCompilation -Xbatch -XX:+IgnoreUnrecognizedVMOptions
|
||||
* -XX:UseAVX=3 -XX:+UnlockDiagnosticVMOptions -XX:ArrayCopyPartialInlineSize=32 -XX:+UnlockDiagnosticVMOptions -XX:MaxVectorSize=32 -XX:+UnlockDiagnosticVMOption -XX:ArrayCopyLoadStoreMaxElem=16
|
||||
* -XX:UseAVX=3 -XX:+UnlockDiagnosticVMOptions -XX:ArrayOperationPartialInlineSize=32 -XX:+UnlockDiagnosticVMOptions -XX:MaxVectorSize=32 -XX:+UnlockDiagnosticVMOption -XX:ArrayCopyLoadStoreMaxElem=16
|
||||
* compiler.arraycopy.TestArrayCopyConjoint
|
||||
* @run main/othervm/timeout=600 -XX:-TieredCompilation -Xbatch -XX:+IgnoreUnrecognizedVMOptions
|
||||
* -XX:UseAVX=3 -XX:+UnlockDiagnosticVMOptions -XX:ArrayCopyPartialInlineSize=64 -XX:MaxVectorSize=64 -XX:ArrayCopyLoadStoreMaxElem=16
|
||||
* -XX:UseAVX=3 -XX:+UnlockDiagnosticVMOptions -XX:ArrayOperationPartialInlineSize=64 -XX:MaxVectorSize=64 -XX:ArrayCopyLoadStoreMaxElem=16
|
||||
* compiler.arraycopy.TestArrayCopyConjoint
|
||||
*
|
||||
*/
|
||||
|
@ -30,25 +30,25 @@ import java.util.Random;
|
||||
* @summary Optimize arrayCopy using AVX-512 masked instructions.
|
||||
*
|
||||
* @run main/othervm/timeout=600 -XX:-TieredCompilation -Xbatch -XX:+IgnoreUnrecognizedVMOptions
|
||||
* -XX:UseAVX=3 -XX:+UnlockDiagnosticVMOptions -XX:ArrayCopyPartialInlineSize=0 -XX:MaxVectorSize=32 -XX:+UnlockDiagnosticVMOptions
|
||||
* -XX:UseAVX=3 -XX:+UnlockDiagnosticVMOptions -XX:ArrayOperationPartialInlineSize=0 -XX:MaxVectorSize=32 -XX:+UnlockDiagnosticVMOptions
|
||||
* compiler.arraycopy.TestArrayCopyDisjoint
|
||||
* @run main/othervm/timeout=600 -XX:-TieredCompilation -Xbatch -XX:+IgnoreUnrecognizedVMOptions
|
||||
* -XX:UseAVX=3 -XX:+UnlockDiagnosticVMOptions -XX:ArrayCopyPartialInlineSize=0 -XX:MaxVectorSize=64
|
||||
* -XX:UseAVX=3 -XX:+UnlockDiagnosticVMOptions -XX:ArrayOperationPartialInlineSize=0 -XX:MaxVectorSize=64
|
||||
* compiler.arraycopy.TestArrayCopyDisjoint
|
||||
* @run main/othervm/timeout=600 -XX:-TieredCompilation -Xbatch -XX:+IgnoreUnrecognizedVMOptions
|
||||
* -XX:UseAVX=3 -XX:+UnlockDiagnosticVMOptions -XX:ArrayCopyPartialInlineSize=32 -XX:+UnlockDiagnosticVMOptions -XX:MaxVectorSize=32 -XX:+UnlockDiagnosticVMOption
|
||||
* -XX:UseAVX=3 -XX:+UnlockDiagnosticVMOptions -XX:ArrayOperationPartialInlineSize=32 -XX:+UnlockDiagnosticVMOptions -XX:MaxVectorSize=32 -XX:+UnlockDiagnosticVMOption
|
||||
* compiler.arraycopy.TestArrayCopyDisjoint
|
||||
* @run main/othervm/timeout=600 -XX:-TieredCompilation -Xbatch -XX:+IgnoreUnrecognizedVMOptions
|
||||
* -XX:UseAVX=3 -XX:+UnlockDiagnosticVMOptions -XX:ArrayCopyPartialInlineSize=32 -XX:+UnlockDiagnosticVMOptions -XX:MaxVectorSize=64
|
||||
* -XX:UseAVX=3 -XX:+UnlockDiagnosticVMOptions -XX:ArrayOperationPartialInlineSize=32 -XX:+UnlockDiagnosticVMOptions -XX:MaxVectorSize=64
|
||||
* compiler.arraycopy.TestArrayCopyDisjoint
|
||||
* @run main/othervm/timeout=600 -XX:-TieredCompilation -Xbatch -XX:+IgnoreUnrecognizedVMOptions
|
||||
* -XX:UseAVX=3 -XX:+UnlockDiagnosticVMOptions -XX:ArrayCopyPartialInlineSize=64 -XX:MaxVectorSize=64
|
||||
* -XX:UseAVX=3 -XX:+UnlockDiagnosticVMOptions -XX:ArrayOperationPartialInlineSize=64 -XX:MaxVectorSize=64
|
||||
* compiler.arraycopy.TestArrayCopyDisjoint
|
||||
* @run main/othervm/timeout=600 -XX:-TieredCompilation -Xbatch -XX:+IgnoreUnrecognizedVMOptions
|
||||
* -XX:UseAVX=3 -XX:+UnlockDiagnosticVMOptions -XX:ArrayCopyPartialInlineSize=32 -XX:+UnlockDiagnosticVMOptions -XX:MaxVectorSize=32 -XX:+UnlockDiagnosticVMOption -XX:ArrayCopyLoadStoreMaxElem=16
|
||||
* -XX:UseAVX=3 -XX:+UnlockDiagnosticVMOptions -XX:ArrayOperationPartialInlineSize=32 -XX:+UnlockDiagnosticVMOptions -XX:MaxVectorSize=32 -XX:+UnlockDiagnosticVMOption -XX:ArrayCopyLoadStoreMaxElem=16
|
||||
* compiler.arraycopy.TestArrayCopyDisjoint
|
||||
* @run main/othervm/timeout=600 -XX:-TieredCompilation -Xbatch -XX:+IgnoreUnrecognizedVMOptions
|
||||
* -XX:UseAVX=3 -XX:+UnlockDiagnosticVMOptions -XX:ArrayCopyPartialInlineSize=64 -XX:MaxVectorSize=64 -XX:ArrayCopyLoadStoreMaxElem=16
|
||||
* -XX:UseAVX=3 -XX:+UnlockDiagnosticVMOptions -XX:ArrayOperationPartialInlineSize=64 -XX:MaxVectorSize=64 -XX:ArrayCopyLoadStoreMaxElem=16
|
||||
* compiler.arraycopy.TestArrayCopyDisjoint
|
||||
*
|
||||
*/
|
||||
|
@ -0,0 +1,288 @@
|
||||
/*
|
||||
* Copyright (c) 2021, Oracle and/or its affiliates. All rights reserved.
|
||||
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
||||
*
|
||||
* This code is free software; you can redistribute it and/or modify it
|
||||
* under the terms of the GNU General Public License version 2 only, as
|
||||
* published by the Free Software Foundation.
|
||||
*
|
||||
* This code is distributed in the hope that it will be useful, but WITHOUT
|
||||
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
||||
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
|
||||
* version 2 for more details (a copy is included in the LICENSE file that
|
||||
* accompanied this code).
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License version
|
||||
* 2 along with this work; if not, write to the Free Software Foundation,
|
||||
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
*
|
||||
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
|
||||
* or visit www.oracle.com if you need additional information or have any
|
||||
* questions.
|
||||
*/
|
||||
|
||||
package compiler.intrinsics;
|
||||
|
||||
/*
|
||||
* @test
|
||||
* @requires vm.opt.final.UseVectorizedMismatchIntrinsic == true
|
||||
* @modules java.base/jdk.internal.misc
|
||||
* java.base/jdk.internal.util
|
||||
*
|
||||
* @run main/othervm -XX:CompileCommand=quiet -XX:CompileCommand=compileonly,*::test*
|
||||
* -Xbatch -XX:-TieredCompilation
|
||||
* -XX:UseAVX=3
|
||||
* compiler.intrinsics.VectorizedMismatchTest
|
||||
*
|
||||
* @run main/othervm -XX:CompileCommand=quiet -XX:CompileCommand=compileonly,*::test*
|
||||
* -Xbatch -XX:-TieredCompilation
|
||||
* -XX:UseAVX=3 -XX:AVX3Threshold=0
|
||||
* compiler.intrinsics.VectorizedMismatchTest
|
||||
*/
|
||||
|
||||
import jdk.internal.misc.Unsafe;
|
||||
import jdk.internal.util.ArraysSupport;
|
||||
|
||||
public class VectorizedMismatchTest {
|
||||
private boolean[] boolean_a = new boolean[128];
|
||||
private boolean[] boolean_b = new boolean[128];
|
||||
|
||||
int testBooleanConstantLength(int length) {
|
||||
boolean[] obja = boolean_a;
|
||||
boolean[] objb = boolean_b;
|
||||
long offset = Unsafe.ARRAY_BOOLEAN_BASE_OFFSET;
|
||||
int scale = ArraysSupport.LOG2_ARRAY_BOOLEAN_INDEX_SCALE;
|
||||
return ArraysSupport.vectorizedMismatch(obja, offset, objb, offset, length, scale);
|
||||
}
|
||||
|
||||
int testBooleanConstantLength0() { return testBooleanConstantLength(0); }
|
||||
int testBooleanConstantLength1() { return testBooleanConstantLength(1); }
|
||||
int testBooleanConstantLength64() { return testBooleanConstantLength(64); }
|
||||
int testBooleanConstantLength128() { return testBooleanConstantLength(128); }
|
||||
|
||||
/* ==================================================================================== */
|
||||
|
||||
private byte[] byte_a = new byte[128];
|
||||
private byte[] byte_b = new byte[128];
|
||||
|
||||
int testByteConstantLength(int length) {
|
||||
byte[] obja = byte_a;
|
||||
byte[] objb = byte_b;
|
||||
long offset = Unsafe.ARRAY_BYTE_BASE_OFFSET;
|
||||
int scale = ArraysSupport.LOG2_ARRAY_BYTE_INDEX_SCALE;
|
||||
return ArraysSupport.vectorizedMismatch(obja, offset, objb, offset, length, scale);
|
||||
}
|
||||
|
||||
int testByteConstantLength0() { return testByteConstantLength(0); }
|
||||
int testByteConstantLength1() { return testByteConstantLength(1); }
|
||||
int testByteConstantLength64() { return testByteConstantLength(64); }
|
||||
int testByteConstantLength128() { return testByteConstantLength(128); }
|
||||
|
||||
/* ==================================================================================== */
|
||||
|
||||
private short[] short_a = new short[64];
|
||||
private short[] short_b = new short[64];
|
||||
|
||||
int testShortConstantLength(int length) {
|
||||
short[] obja = short_a;
|
||||
short[] objb = short_b;
|
||||
long offset = Unsafe.ARRAY_SHORT_BASE_OFFSET;
|
||||
int scale = ArraysSupport.LOG2_ARRAY_SHORT_INDEX_SCALE;
|
||||
return ArraysSupport.vectorizedMismatch(obja, offset, objb, offset, length, scale);
|
||||
}
|
||||
|
||||
int testShortConstantLength0() { return testShortConstantLength(0); }
|
||||
int testShortConstantLength1() { return testShortConstantLength(1); }
|
||||
int testShortConstantLength32() { return testShortConstantLength(32); }
|
||||
int testShortConstantLength64() { return testShortConstantLength(64); }
|
||||
|
||||
/* ==================================================================================== */
|
||||
|
||||
private char[] char_a = new char[64];
|
||||
private char[] char_b = new char[64];
|
||||
|
||||
int testCharConstantLength(int length) {
|
||||
char[] obja = char_a;
|
||||
char[] objb = char_b;
|
||||
long offset = Unsafe.ARRAY_CHAR_BASE_OFFSET;
|
||||
int scale = ArraysSupport.LOG2_ARRAY_CHAR_INDEX_SCALE;
|
||||
return ArraysSupport.vectorizedMismatch(obja, offset, objb, offset, length, scale);
|
||||
}
|
||||
|
||||
int testCharConstantLength0() { return testCharConstantLength(0); }
|
||||
int testCharConstantLength1() { return testCharConstantLength(1); }
|
||||
int testCharConstantLength32() { return testCharConstantLength(32); }
|
||||
int testCharConstantLength64() { return testCharConstantLength(64); }
|
||||
|
||||
/* ==================================================================================== */
|
||||
|
||||
private int[] int_a = new int[32];
|
||||
private int[] int_b = new int[32];
|
||||
|
||||
int testIntConstantLength(int length) {
|
||||
int[] obja = int_a;
|
||||
int[] objb = int_b;
|
||||
long offset = Unsafe.ARRAY_INT_BASE_OFFSET;
|
||||
int scale = ArraysSupport.LOG2_ARRAY_INT_INDEX_SCALE;
|
||||
return ArraysSupport.vectorizedMismatch(obja, offset, objb, offset, length, scale);
|
||||
}
|
||||
|
||||
int testIntConstantLength0() { return testIntConstantLength(0); }
|
||||
int testIntConstantLength1() { return testIntConstantLength(1); }
|
||||
int testIntConstantLength16() { return testIntConstantLength(16); }
|
||||
int testIntConstantLength32() { return testIntConstantLength(32); }
|
||||
|
||||
/* ==================================================================================== */
|
||||
|
||||
private float[] float_a = new float[32];
|
||||
private float[] float_b = new float[32];
|
||||
|
||||
int testFloatConstantLength(int length) {
|
||||
float[] obja = float_a;
|
||||
float[] objb = float_b;
|
||||
long offset = Unsafe.ARRAY_FLOAT_BASE_OFFSET;
|
||||
int scale = ArraysSupport.LOG2_ARRAY_FLOAT_INDEX_SCALE;
|
||||
return ArraysSupport.vectorizedMismatch(obja, offset, objb, offset, length, scale);
|
||||
}
|
||||
|
||||
int testFloatConstantLength0() { return testFloatConstantLength(0); }
|
||||
int testFloatConstantLength1() { return testFloatConstantLength(1); }
|
||||
int testFloatConstantLength16() { return testFloatConstantLength(16); }
|
||||
int testFloatConstantLength32() { return testFloatConstantLength(32); }
|
||||
|
||||
/* ==================================================================================== */
|
||||
|
||||
private long[] long_a = new long[16];
|
||||
private long[] long_b = new long[16];
|
||||
|
||||
int testLongConstantLength(int length) {
|
||||
long[] obja = long_a;
|
||||
long[] objb = long_b;
|
||||
long offset = Unsafe.ARRAY_LONG_BASE_OFFSET;
|
||||
int scale = ArraysSupport.LOG2_ARRAY_LONG_INDEX_SCALE;
|
||||
return ArraysSupport.vectorizedMismatch(obja, offset, objb, offset, length, scale);
|
||||
}
|
||||
|
||||
int testLongConstantLength0() { return testLongConstantLength(0); }
|
||||
int testLongConstantLength1() { return testLongConstantLength(1); }
|
||||
int testLongConstantLength8() { return testLongConstantLength(8); }
|
||||
int testLongConstantLength16() { return testLongConstantLength(16); }
|
||||
|
||||
/* ==================================================================================== */
|
||||
|
||||
private double[] double_a = new double[16];
|
||||
private double[] double_b = new double[16];
|
||||
|
||||
int testDoubleConstantLength(int length) {
|
||||
double[] obja = double_a;
|
||||
double[] objb = double_b;
|
||||
long offset = Unsafe.ARRAY_DOUBLE_BASE_OFFSET;
|
||||
int scale = ArraysSupport.LOG2_ARRAY_DOUBLE_INDEX_SCALE;
|
||||
return ArraysSupport.vectorizedMismatch(obja, offset, objb, offset, length, scale);
|
||||
}
|
||||
|
||||
int testDoubleConstantLength0() { return testDoubleConstantLength(0); }
|
||||
int testDoubleConstantLength1() { return testDoubleConstantLength(1); }
|
||||
int testDoubleConstantLength8() { return testDoubleConstantLength(8); }
|
||||
int testDoubleConstantLength16() { return testDoubleConstantLength(16); }
|
||||
|
||||
/* ==================================================================================== */
|
||||
|
||||
static class ClassInitTest {
|
||||
static final int LENGTH = 64;
|
||||
static final int RESULT;
|
||||
static {
|
||||
byte[] arr1 = new byte[LENGTH];
|
||||
byte[] arr2 = new byte[LENGTH];
|
||||
for (int i = 0; i < 20_000; i++) {
|
||||
test(arr1, arr2);
|
||||
}
|
||||
RESULT = test(arr1, arr2);
|
||||
}
|
||||
|
||||
static int test(byte[] obja, byte[] objb) {
|
||||
long offset = Unsafe.ARRAY_BYTE_BASE_OFFSET;
|
||||
int scale = ArraysSupport.LOG2_ARRAY_BYTE_INDEX_SCALE;
|
||||
return ArraysSupport.vectorizedMismatch(obja, offset, objb, offset, LENGTH, scale); // LENGTH is not considered a constant
|
||||
}
|
||||
}
|
||||
|
||||
int testConstantBeingInitialized() {
|
||||
return ClassInitTest.RESULT; // trigger class initialization
|
||||
}
|
||||
|
||||
/* ==================================================================================== */
|
||||
|
||||
int testLoopUnswitch(int length) {
|
||||
long offset = Unsafe.ARRAY_BYTE_BASE_OFFSET;
|
||||
int scale = ArraysSupport.LOG2_ARRAY_BYTE_INDEX_SCALE;
|
||||
|
||||
int acc = 0;
|
||||
for (int i = 0; i < 32; i++) {
|
||||
acc += ArraysSupport.vectorizedMismatch(byte_a, offset, byte_b, offset, length, scale);
|
||||
}
|
||||
return acc;
|
||||
}
|
||||
|
||||
int testLoopHoist(int length, int stride) {
|
||||
long offset = Unsafe.ARRAY_BYTE_BASE_OFFSET;
|
||||
int scale = ArraysSupport.LOG2_ARRAY_BYTE_INDEX_SCALE;
|
||||
|
||||
int acc = 0;
|
||||
|
||||
for (int i = 0; i < 32; i += stride) {
|
||||
acc += ArraysSupport.vectorizedMismatch(byte_a, offset, byte_b, offset, length, scale);
|
||||
}
|
||||
return acc;
|
||||
}
|
||||
|
||||
/* ==================================================================================== */
|
||||
|
||||
public static void main(String[] args) {
|
||||
VectorizedMismatchTest t = new VectorizedMismatchTest();
|
||||
for (int i = 0; i < 20_000; i++) {
|
||||
t.testBooleanConstantLength0();
|
||||
t.testBooleanConstantLength1();
|
||||
t.testBooleanConstantLength64();
|
||||
t.testBooleanConstantLength128();
|
||||
|
||||
t.testByteConstantLength0();
|
||||
t.testByteConstantLength1();
|
||||
t.testByteConstantLength64();
|
||||
t.testByteConstantLength128();
|
||||
|
||||
t.testShortConstantLength0();
|
||||
t.testShortConstantLength1();
|
||||
t.testShortConstantLength32();
|
||||
t.testShortConstantLength64();
|
||||
|
||||
t.testCharConstantLength0();
|
||||
t.testCharConstantLength1();
|
||||
t.testCharConstantLength32();
|
||||
t.testCharConstantLength64();
|
||||
|
||||
t.testIntConstantLength0();
|
||||
t.testIntConstantLength1();
|
||||
t.testIntConstantLength16();
|
||||
t.testIntConstantLength32();
|
||||
|
||||
t.testFloatConstantLength0();
|
||||
t.testFloatConstantLength1();
|
||||
t.testFloatConstantLength16();
|
||||
t.testFloatConstantLength32();
|
||||
|
||||
t.testLongConstantLength0();
|
||||
t.testLongConstantLength1();
|
||||
t.testLongConstantLength8();
|
||||
t.testLongConstantLength16();
|
||||
|
||||
t.testDoubleConstantLength0();
|
||||
t.testDoubleConstantLength1();
|
||||
t.testDoubleConstantLength8();
|
||||
t.testDoubleConstantLength16();
|
||||
|
||||
t.testLoopUnswitch(32);
|
||||
t.testLoopHoist(128, 2);
|
||||
}
|
||||
}
|
||||
}
|
@ -311,6 +311,7 @@ public class VMProps implements Callable<Map<String, String>> {
|
||||
vmOptFinalFlag(map, "ClassUnloading");
|
||||
vmOptFinalFlag(map, "ClassUnloadingWithConcurrentMark");
|
||||
vmOptFinalFlag(map, "UseCompressedOops");
|
||||
vmOptFinalFlag(map, "UseVectorizedMismatchIntrinsic");
|
||||
vmOptFinalFlag(map, "EnableJVMCI");
|
||||
vmOptFinalFlag(map, "EliminateAllocations");
|
||||
vmOptFinalFlag(map, "UseVtableBasedCHA");
|
||||
|
@ -0,0 +1,139 @@
|
||||
/*
|
||||
* Copyright (c) 2021, Oracle and/or its affiliates. All rights reserved.
|
||||
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
||||
*
|
||||
* This code is free software; you can redistribute it and/or modify it
|
||||
* under the terms of the GNU General Public License version 2 only, as
|
||||
* published by the Free Software Foundation.
|
||||
*
|
||||
* This code is distributed in the hope that it will be useful, but WITHOUT
|
||||
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
||||
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
|
||||
* version 2 for more details (a copy is included in the LICENSE file that
|
||||
* accompanied this code).
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License version
|
||||
* 2 along with this work; if not, write to the Free Software Foundation,
|
||||
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
*
|
||||
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
|
||||
* or visit www.oracle.com if you need additional information or have any
|
||||
* questions.
|
||||
*/
|
||||
package org.openjdk.bench.java.util;
|
||||
|
||||
import java.util.Arrays;
|
||||
import java.util.concurrent.TimeUnit;
|
||||
import org.openjdk.jmh.annotations.Benchmark;
|
||||
import org.openjdk.jmh.annotations.BenchmarkMode;
|
||||
import org.openjdk.jmh.annotations.Measurement;
|
||||
import org.openjdk.jmh.annotations.Mode;
|
||||
import org.openjdk.jmh.annotations.OutputTimeUnit;
|
||||
import org.openjdk.jmh.annotations.Param;
|
||||
import org.openjdk.jmh.annotations.Scope;
|
||||
import org.openjdk.jmh.annotations.Setup;
|
||||
import org.openjdk.jmh.annotations.State;
|
||||
import org.openjdk.jmh.annotations.Warmup;
|
||||
|
||||
@BenchmarkMode(Mode.Throughput)
|
||||
@OutputTimeUnit(TimeUnit.MILLISECONDS)
|
||||
@State(Scope.Thread)
|
||||
public class ArraysMismatchPartialInlining {
|
||||
|
||||
@Param({"3", "4", "5", "6", "7", "15", "31", "63", "95", "800"})
|
||||
private static int size;
|
||||
|
||||
byte [] barray1;
|
||||
char [] carray1;
|
||||
short [] sarray1;
|
||||
int [] iarray1;
|
||||
long [] larray1;
|
||||
float [] farray1;
|
||||
double [] darray1;
|
||||
|
||||
byte [] barray2;
|
||||
char [] carray2;
|
||||
short [] sarray2;
|
||||
int [] iarray2;
|
||||
long [] larray2;
|
||||
float [] farray2;
|
||||
double [] darray2;
|
||||
|
||||
@Setup
|
||||
public void setup() {
|
||||
barray1 = new byte[size];
|
||||
carray1 = new char[size];
|
||||
sarray1 = new short[size];
|
||||
iarray1 = new int[size];
|
||||
larray1 = new long[size];
|
||||
farray1 = new float[size];
|
||||
darray1 = new double[size];
|
||||
|
||||
barray2 = new byte[size];
|
||||
carray2 = new char[size];
|
||||
sarray2 = new short[size];
|
||||
iarray2 = new int[size];
|
||||
larray2 = new long[size];
|
||||
farray2 = new float[size];
|
||||
darray2 = new double[size];
|
||||
|
||||
Arrays.fill(barray1 , (byte)0xF);
|
||||
Arrays.fill(carray1 , (char)0xFF);
|
||||
Arrays.fill(sarray1 , (short)0xFF);
|
||||
Arrays.fill(iarray1 , -1);
|
||||
Arrays.fill(larray1 , -1L);
|
||||
Arrays.fill(farray1 , -1.0f);
|
||||
Arrays.fill(darray1, -1.0);
|
||||
|
||||
Arrays.fill(barray2 , (byte)0xF);
|
||||
Arrays.fill(carray2 , (char)0xFF);
|
||||
Arrays.fill(sarray2 , (short)0xFF);
|
||||
Arrays.fill(iarray2 , -1);
|
||||
Arrays.fill(larray2 , -1L);
|
||||
Arrays.fill(farray2 , -1.0F);
|
||||
Arrays.fill(darray2, -1.0);
|
||||
|
||||
barray2[size-1] = (byte)1;
|
||||
carray2[size-1] = (char)1;
|
||||
sarray2[size-1] = (short)1;
|
||||
iarray2[size-1] = 1;
|
||||
larray2[size-1] = 1L;
|
||||
farray2[size-1] = 1.0f;
|
||||
darray2[size-1] = 1.0;
|
||||
}
|
||||
|
||||
@Benchmark
|
||||
public int testByteMatch() {
|
||||
return Arrays.mismatch(barray1, barray2);
|
||||
}
|
||||
|
||||
@Benchmark
|
||||
public int testCharMatch() {
|
||||
return Arrays.mismatch(carray1, carray2);
|
||||
}
|
||||
|
||||
@Benchmark
|
||||
public int testShortMatch() {
|
||||
return Arrays.mismatch(sarray1, sarray2);
|
||||
}
|
||||
|
||||
@Benchmark
|
||||
public int testIntMatch() {
|
||||
return Arrays.mismatch(iarray1, iarray2);
|
||||
}
|
||||
|
||||
@Benchmark
|
||||
public int testLongMatch() {
|
||||
return Arrays.mismatch(larray1, larray2);
|
||||
}
|
||||
|
||||
@Benchmark
|
||||
public int testFloatMatch() {
|
||||
return Arrays.mismatch(farray1, farray2);
|
||||
}
|
||||
|
||||
@Benchmark
|
||||
public int testDoubleMatch() {
|
||||
return Arrays.mismatch(darray1, darray2);
|
||||
}
|
||||
}
|
Loading…
x
Reference in New Issue
Block a user