8252848: Optimize small primitive arrayCopy operations through partial inlining using AVX-512 masked instructions
Reviewed-by: neliasso, kvn
This commit is contained in:
parent
66943fefa7
commit
0d91f0a1df
@ -2706,34 +2706,18 @@ void Assembler::evmovdqub(XMMRegister dst, KRegister mask, Address src, bool mer
|
||||
emit_operand(dst, src);
|
||||
}
|
||||
|
||||
void Assembler::evmovdqu(XMMRegister dst, KRegister mask, Address src, int vector_len, int type) {
|
||||
assert(VM_Version::supports_avx512vlbw(), "");
|
||||
assert(type == T_BYTE || type == T_SHORT || type == T_CHAR || type == T_INT || type == T_LONG, "");
|
||||
InstructionMark im(this);
|
||||
bool wide = type == T_SHORT || type == T_CHAR || type == T_LONG;
|
||||
int prefix = (type == T_BYTE || type == T_SHORT || type == T_CHAR) ? VEX_SIMD_F2 : VEX_SIMD_F3;
|
||||
InstructionAttr attributes(vector_len, /* vex_w */ wide, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
|
||||
attributes.set_address_attributes(/* tuple_type */ EVEX_FVM, /* input_size_in_bits */ EVEX_NObit);
|
||||
attributes.set_embedded_opmask_register_specifier(mask);
|
||||
attributes.set_is_evex_instruction();
|
||||
vex_prefix(src, 0, dst->encoding(), (Assembler::VexSimdPrefix)prefix, VEX_OPCODE_0F, &attributes);
|
||||
emit_int8(0x6F);
|
||||
emit_operand(dst, src);
|
||||
}
|
||||
|
||||
void Assembler::evmovdqu(Address dst, KRegister mask, XMMRegister src, int vector_len, int type) {
|
||||
void Assembler::evmovdqub(Address dst, KRegister mask, XMMRegister src, bool merge, int vector_len) {
|
||||
assert(VM_Version::supports_avx512vlbw(), "");
|
||||
assert(src != xnoreg, "sanity");
|
||||
assert(type == T_BYTE || type == T_SHORT || type == T_CHAR || type == T_INT || type == T_LONG, "");
|
||||
InstructionMark im(this);
|
||||
bool wide = type == T_SHORT || type == T_CHAR || type == T_LONG;
|
||||
int prefix = (type == T_BYTE || type == T_SHORT || type == T_CHAR) ? VEX_SIMD_F2 : VEX_SIMD_F3;
|
||||
InstructionAttr attributes(vector_len, /* vex_w */ wide, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
|
||||
InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
|
||||
attributes.set_address_attributes(/* tuple_type */ EVEX_FVM, /* input_size_in_bits */ EVEX_NObit);
|
||||
attributes.reset_is_clear_context();
|
||||
attributes.set_embedded_opmask_register_specifier(mask);
|
||||
attributes.set_is_evex_instruction();
|
||||
vex_prefix(dst, 0, src->encoding(), (Assembler::VexSimdPrefix)prefix, VEX_OPCODE_0F, &attributes);
|
||||
if (merge) {
|
||||
attributes.reset_is_clear_context();
|
||||
}
|
||||
vex_prefix(dst, 0, src->encoding(), VEX_SIMD_F2, VEX_OPCODE_0F, &attributes);
|
||||
emit_int8(0x7F);
|
||||
emit_operand(src, dst);
|
||||
}
|
||||
|
@ -1549,6 +1549,7 @@ private:
|
||||
void evmovdqub(XMMRegister dst, Address src, bool merge, int vector_len);
|
||||
void evmovdqub(XMMRegister dst, XMMRegister src, bool merge, int vector_len);
|
||||
void evmovdqub(XMMRegister dst, KRegister mask, Address src, bool merge, int vector_len);
|
||||
void evmovdqub(Address dst, KRegister mask, XMMRegister src, bool merge, int vector_len);
|
||||
void evmovdquw(Address dst, XMMRegister src, bool merge, int vector_len);
|
||||
void evmovdquw(Address dst, KRegister mask, XMMRegister src, bool merge, int vector_len);
|
||||
void evmovdquw(XMMRegister dst, Address src, bool merge, int vector_len);
|
||||
@ -1566,10 +1567,6 @@ private:
|
||||
void evmovdquq(XMMRegister dst, KRegister mask, Address src, bool merge, int vector_len);
|
||||
void evmovdquq(XMMRegister dst, KRegister mask, XMMRegister src, bool merge, int vector_len);
|
||||
|
||||
// Generic move instructions.
|
||||
void evmovdqu(Address dst, KRegister mask, XMMRegister src, int vector_len, int type);
|
||||
void evmovdqu(XMMRegister dst, KRegister mask, Address src, int vector_len, int type);
|
||||
|
||||
// Move lower 64bit to high 64bit in 128bit register
|
||||
void movlhps(XMMRegister dst, XMMRegister src);
|
||||
|
||||
|
@ -1891,6 +1891,20 @@ void C2_MacroAssembler::reduce8L(int opcode, Register dst, Register src1, XMMReg
|
||||
reduce_operation_256(T_LONG, opcode, vtmp2, vtmp2, src2);
|
||||
reduce4L(opcode, dst, src1, vtmp2, vtmp1, vtmp2);
|
||||
}
|
||||
|
||||
void C2_MacroAssembler::genmask(Register dst, Register len, Register temp) {
|
||||
if (ArrayCopyPartialInlineSize <= 32) {
|
||||
mov64(dst, 1);
|
||||
shlxq(dst, dst, len);
|
||||
decq(dst);
|
||||
} else {
|
||||
mov64(dst, -1);
|
||||
movq(temp, len);
|
||||
negptr(temp);
|
||||
addptr(temp, 64);
|
||||
shrxq(dst, dst, temp);
|
||||
}
|
||||
}
|
||||
#endif // _LP64
|
||||
|
||||
void C2_MacroAssembler::reduce2F(int opcode, XMMRegister dst, XMMRegister src, XMMRegister vtmp) {
|
||||
@ -1937,6 +1951,15 @@ void C2_MacroAssembler::reduce8D(int opcode, XMMRegister dst, XMMRegister src, X
|
||||
reduce4D(opcode, dst, vtmp1, vtmp1, vtmp2);
|
||||
}
|
||||
|
||||
void C2_MacroAssembler::evmovdqu(BasicType type, KRegister kmask, XMMRegister dst, Address src, int vector_len) {
|
||||
MacroAssembler::evmovdqu(type, kmask, dst, src, vector_len);
|
||||
}
|
||||
|
||||
void C2_MacroAssembler::evmovdqu(BasicType type, KRegister kmask, Address dst, XMMRegister src, int vector_len) {
|
||||
MacroAssembler::evmovdqu(type, kmask, dst, src, vector_len);
|
||||
}
|
||||
|
||||
|
||||
void C2_MacroAssembler::reduceFloatMinMax(int opcode, int vlen, bool is_dst_valid,
|
||||
XMMRegister dst, XMMRegister src,
|
||||
XMMRegister tmp, XMMRegister atmp, XMMRegister btmp,
|
||||
|
@ -120,6 +120,9 @@ public:
|
||||
void evgather(BasicType typ, XMMRegister dst, KRegister mask, Register base, XMMRegister idx, int vector_len);
|
||||
void evscatter(BasicType typ, Register base, XMMRegister idx, KRegister mask, XMMRegister src, int vector_len);
|
||||
|
||||
void evmovdqu(BasicType type, KRegister kmask, XMMRegister dst, Address src, int vector_len);
|
||||
void evmovdqu(BasicType type, KRegister kmask, Address dst, XMMRegister src, int vector_len);
|
||||
|
||||
// extract
|
||||
void extract(BasicType typ, Register dst, XMMRegister src, int idx);
|
||||
XMMRegister get_lane(BasicType typ, XMMRegister dst, XMMRegister src, int elemindex);
|
||||
@ -139,6 +142,7 @@ public:
|
||||
void reduceI(int opcode, int vlen, Register dst, Register src1, XMMRegister src2, XMMRegister vtmp1, XMMRegister vtmp2);
|
||||
#ifdef _LP64
|
||||
void reduceL(int opcode, int vlen, Register dst, Register src1, XMMRegister src2, XMMRegister vtmp1, XMMRegister vtmp2);
|
||||
void genmask(Register dst, Register len, Register temp);
|
||||
#endif // _LP64
|
||||
|
||||
// dst = reduce(op, src2) using vtmp as temps
|
||||
|
@ -8000,6 +8000,56 @@ void MacroAssembler::byte_array_inflate(Register src, Register dst, Register len
|
||||
bind(done);
|
||||
}
|
||||
|
||||
|
||||
void MacroAssembler::evmovdqu(BasicType type, KRegister kmask, XMMRegister dst, Address src, int vector_len) {
|
||||
switch(type) {
|
||||
case T_BYTE:
|
||||
case T_BOOLEAN:
|
||||
evmovdqub(dst, kmask, src, false, vector_len);
|
||||
break;
|
||||
case T_CHAR:
|
||||
case T_SHORT:
|
||||
evmovdquw(dst, kmask, src, false, vector_len);
|
||||
break;
|
||||
case T_INT:
|
||||
case T_FLOAT:
|
||||
evmovdqul(dst, kmask, src, false, vector_len);
|
||||
break;
|
||||
case T_LONG:
|
||||
case T_DOUBLE:
|
||||
evmovdquq(dst, kmask, src, false, vector_len);
|
||||
break;
|
||||
default:
|
||||
fatal("Unexpected type argument %s", type2name(type));
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
void MacroAssembler::evmovdqu(BasicType type, KRegister kmask, Address dst, XMMRegister src, int vector_len) {
|
||||
switch(type) {
|
||||
case T_BYTE:
|
||||
case T_BOOLEAN:
|
||||
evmovdqub(dst, kmask, src, true, vector_len);
|
||||
break;
|
||||
case T_CHAR:
|
||||
case T_SHORT:
|
||||
evmovdquw(dst, kmask, src, true, vector_len);
|
||||
break;
|
||||
case T_INT:
|
||||
case T_FLOAT:
|
||||
evmovdqul(dst, kmask, src, true, vector_len);
|
||||
break;
|
||||
case T_LONG:
|
||||
case T_DOUBLE:
|
||||
evmovdquq(dst, kmask, src, true, vector_len);
|
||||
break;
|
||||
default:
|
||||
fatal("Unexpected type argument %s", type2name(type));
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
#ifdef _LP64
|
||||
void MacroAssembler::convert_f2i(Register dst, XMMRegister src) {
|
||||
Label done;
|
||||
|
@ -1094,10 +1094,14 @@ public:
|
||||
void vmovdqu(XMMRegister dst, AddressLiteral src, Register scratch_reg = rscratch1);
|
||||
|
||||
// AVX512 Unaligned
|
||||
void evmovdqu(BasicType type, KRegister kmask, Address dst, XMMRegister src, int vector_len);
|
||||
void evmovdqu(BasicType type, KRegister kmask, XMMRegister dst, Address src, int vector_len);
|
||||
|
||||
void evmovdqub(Address dst, XMMRegister src, bool merge, int vector_len) { Assembler::evmovdqub(dst, src, merge, vector_len); }
|
||||
void evmovdqub(XMMRegister dst, Address src, bool merge, int vector_len) { Assembler::evmovdqub(dst, src, merge, vector_len); }
|
||||
void evmovdqub(XMMRegister dst, XMMRegister src, bool merge, int vector_len) { Assembler::evmovdqub(dst, src, merge, vector_len); }
|
||||
void evmovdqub(XMMRegister dst, KRegister mask, Address src, bool merge, int vector_len) { Assembler::evmovdqub(dst, mask, src, merge, vector_len); }
|
||||
void evmovdqub(Address dst, KRegister mask, XMMRegister src, bool merge, int vector_len) { Assembler::evmovdqub(dst, mask, src, merge, vector_len); }
|
||||
void evmovdqub(XMMRegister dst, KRegister mask, AddressLiteral src, bool merge, int vector_len, Register scratch_reg);
|
||||
|
||||
void evmovdquw(Address dst, XMMRegister src, bool merge, int vector_len) { Assembler::evmovdquw(dst, src, merge, vector_len); }
|
||||
|
@ -200,8 +200,8 @@ void MacroAssembler::copy64_masked_avx(Register dst, Register src, XMMRegister x
|
||||
mov64(temp, -1);
|
||||
shrxq(temp, temp, length);
|
||||
kmovql(mask, temp);
|
||||
evmovdqu(xmm, mask, Address(src, index, scale, offset), Assembler::AVX_512bit, type[shift]);
|
||||
evmovdqu(Address(dst, index, scale, offset), mask, xmm, Assembler::AVX_512bit, type[shift]);
|
||||
evmovdqu(type[shift], mask, xmm, Address(src, index, scale, offset), Assembler::AVX_512bit);
|
||||
evmovdqu(type[shift], mask, Address(dst, index, scale, offset), xmm, Assembler::AVX_512bit);
|
||||
}
|
||||
}
|
||||
|
||||
@ -216,8 +216,8 @@ void MacroAssembler::copy32_masked_avx(Register dst, Register src, XMMRegister x
|
||||
shlxq(temp, temp, length);
|
||||
decq(temp);
|
||||
kmovql(mask, temp);
|
||||
evmovdqu(xmm, mask, Address(src, index, scale, offset), Assembler::AVX_256bit, type[shift]);
|
||||
evmovdqu(Address(dst, index, scale, offset), mask, xmm, Assembler::AVX_256bit, type[shift]);
|
||||
evmovdqu(type[shift], mask, xmm, Address(src, index, scale, offset), Assembler::AVX_256bit);
|
||||
evmovdqu(type[shift], mask, Address(dst, index, scale, offset), xmm, Assembler::AVX_256bit);
|
||||
}
|
||||
|
||||
|
||||
|
@ -1362,6 +1362,7 @@ void VM_Version::get_processor_features() {
|
||||
MaxLoopPad = 11;
|
||||
}
|
||||
#endif // COMPILER2
|
||||
|
||||
if (FLAG_IS_DEFAULT(UseXMMForArrayCopy)) {
|
||||
UseXMMForArrayCopy = true; // use SSE2 movq on new Intel cpus
|
||||
}
|
||||
@ -1399,6 +1400,38 @@ void VM_Version::get_processor_features() {
|
||||
if (FLAG_IS_DEFAULT(AllocatePrefetchInstr) && supports_3dnow_prefetch()) {
|
||||
FLAG_SET_DEFAULT(AllocatePrefetchInstr, 3);
|
||||
}
|
||||
#ifdef COMPILER2
|
||||
if (UseAVX > 2) {
|
||||
if (FLAG_IS_DEFAULT(ArrayCopyPartialInlineSize) ||
|
||||
(!FLAG_IS_DEFAULT(ArrayCopyPartialInlineSize) &&
|
||||
ArrayCopyPartialInlineSize != 0 &&
|
||||
ArrayCopyPartialInlineSize != 32 &&
|
||||
ArrayCopyPartialInlineSize != 16 &&
|
||||
ArrayCopyPartialInlineSize != 64)) {
|
||||
int inline_size = 0;
|
||||
if (MaxVectorSize >= 64 && AVX3Threshold == 0) {
|
||||
inline_size = 64;
|
||||
} else if (MaxVectorSize >= 32) {
|
||||
inline_size = 32;
|
||||
} else if (MaxVectorSize >= 16) {
|
||||
inline_size = 16;
|
||||
}
|
||||
if(!FLAG_IS_DEFAULT(ArrayCopyPartialInlineSize)) {
|
||||
warning("Setting ArrayCopyPartialInlineSize as %d", inline_size);
|
||||
}
|
||||
ArrayCopyPartialInlineSize = inline_size;
|
||||
}
|
||||
|
||||
if (ArrayCopyPartialInlineSize > MaxVectorSize) {
|
||||
ArrayCopyPartialInlineSize = MaxVectorSize >= 16 ? MaxVectorSize : 0;
|
||||
if (ArrayCopyPartialInlineSize) {
|
||||
warning("Setting ArrayCopyPartialInlineSize as MaxVectorSize" INTX_FORMAT ")", MaxVectorSize);
|
||||
} else {
|
||||
warning("Setting ArrayCopyPartialInlineSize as " INTX_FORMAT, ArrayCopyPartialInlineSize);
|
||||
}
|
||||
}
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
#ifdef _LP64
|
||||
|
@ -1521,6 +1521,13 @@ const bool Matcher::match_rule_supported(int opcode) {
|
||||
return false;
|
||||
}
|
||||
break;
|
||||
case Op_VectorMaskGen:
|
||||
case Op_LoadVectorMasked:
|
||||
case Op_StoreVectorMasked:
|
||||
if (UseAVX < 3) {
|
||||
return false;
|
||||
}
|
||||
break;
|
||||
#ifndef _LP64
|
||||
case Op_AddReductionVF:
|
||||
case Op_AddReductionVD:
|
||||
@ -1594,6 +1601,16 @@ const bool Matcher::match_rule_supported_vector(int opcode, int vlen, BasicType
|
||||
return false;
|
||||
}
|
||||
break;
|
||||
case Op_VectorMaskGen:
|
||||
case Op_LoadVectorMasked:
|
||||
case Op_StoreVectorMasked:
|
||||
if (!VM_Version::supports_avx512bw()) {
|
||||
return false;
|
||||
}
|
||||
if ((size_in_bits != 512) && !VM_Version::supports_avx512vl()) {
|
||||
return false;
|
||||
}
|
||||
break;
|
||||
case Op_CMoveVD:
|
||||
if (vlen != 4) {
|
||||
return false; // implementation limitation (only vcmov4D_reg is present)
|
||||
@ -7894,3 +7911,50 @@ instruct vprorate(vec dst, vec src, vec shift) %{
|
||||
ins_pipe( pipe_slow );
|
||||
%}
|
||||
|
||||
#ifdef _LP64
|
||||
// ---------------------------------- Masked Block Copy ------------------------------------
|
||||
|
||||
instruct vmasked_load64(vec dst, memory mem, rRegL mask) %{
|
||||
match(Set dst (LoadVectorMasked mem mask));
|
||||
format %{ "vector_masked_load $dst, $mem, $mask \t! vector masked copy" %}
|
||||
ins_encode %{
|
||||
BasicType elmType = this->bottom_type()->is_vect()->element_basic_type();
|
||||
int vector_len = vector_length_encoding(this);
|
||||
__ kmovql(k2, $mask$$Register);
|
||||
__ evmovdqu(elmType, k2, $dst$$XMMRegister, $mem$$Address, vector_len);
|
||||
%}
|
||||
ins_pipe( pipe_slow );
|
||||
%}
|
||||
|
||||
instruct vmask_gen(rRegL dst, rRegL len, rRegL tempLen) %{
|
||||
match(Set dst (VectorMaskGen len));
|
||||
effect(TEMP_DEF dst, TEMP tempLen);
|
||||
format %{ "vector_mask_gen $len \t! vector mask generator" %}
|
||||
ins_encode %{
|
||||
__ genmask($dst$$Register, $len$$Register, $tempLen$$Register);
|
||||
%}
|
||||
ins_pipe( pipe_slow );
|
||||
%}
|
||||
|
||||
instruct vmask_gen_imm(rRegL dst, immL len) %{
|
||||
match(Set dst (VectorMaskGen len));
|
||||
format %{ "vector_mask_gen $len \t! vector mask generator" %}
|
||||
ins_encode %{
|
||||
__ mov64($dst$$Register, (0xFFFFFFFFFFFFFFFFUL >> (64 -$len$$constant)));
|
||||
%}
|
||||
ins_pipe( pipe_slow );
|
||||
%}
|
||||
|
||||
instruct vmasked_store64(memory mem, vec src, rRegL mask) %{
|
||||
match(Set mem (StoreVectorMasked mem (Binary src mask)));
|
||||
format %{ "vector_masked_store $mem, $src, $mask \t! vector masked store" %}
|
||||
ins_encode %{
|
||||
const MachNode* src_node = static_cast<const MachNode*>(this->in(this->operand_index($src)));
|
||||
BasicType elmType = src_node->bottom_type()->is_vect()->element_basic_type();
|
||||
int vector_len = vector_length_encoding(src_node);
|
||||
__ kmovql(k2, $mask$$Register);
|
||||
__ evmovdqu(elmType, k2, $mem$$Address, $src$$XMMRegister, vector_len);
|
||||
%}
|
||||
ins_pipe( pipe_slow );
|
||||
%}
|
||||
#endif // _LP64
|
||||
|
@ -269,6 +269,7 @@ Form::DataType Form::is_load_from_memory(const char *opType) const {
|
||||
if( strcmp(opType,"LoadS")==0 ) return Form::idealS;
|
||||
if( strcmp(opType,"LoadVector")==0 ) return Form::idealV;
|
||||
if( strcmp(opType,"LoadVectorGather")==0 ) return Form::idealV;
|
||||
if( strcmp(opType,"LoadVectorMasked")==0 ) return Form::idealV;
|
||||
assert( strcmp(opType,"Load") != 0, "Must type Loads" );
|
||||
return Form::none;
|
||||
}
|
||||
@ -286,6 +287,7 @@ Form::DataType Form::is_store_to_memory(const char *opType) const {
|
||||
if( strcmp(opType,"StoreNKlass")==0) return Form::idealNKlass;
|
||||
if( strcmp(opType,"StoreVector")==0 ) return Form::idealV;
|
||||
if( strcmp(opType,"StoreVectorScatter")==0 ) return Form::idealV;
|
||||
if( strcmp(opType,"StoreVectorMasked")==0 ) return Form::idealV;
|
||||
assert( strcmp(opType,"Store") != 0, "Must type Stores" );
|
||||
return Form::none;
|
||||
}
|
||||
|
@ -781,6 +781,7 @@ bool InstructForm::captures_bottom_type(FormDict &globals) const {
|
||||
!strcmp(_matrule->_rChild->_opType,"ShenandoahCompareAndExchangeP") ||
|
||||
!strcmp(_matrule->_rChild->_opType,"ShenandoahCompareAndExchangeN") ||
|
||||
#endif
|
||||
!strcmp(_matrule->_rChild->_opType,"VectorMaskGen")||
|
||||
!strcmp(_matrule->_rChild->_opType,"CompareAndExchangeP") ||
|
||||
!strcmp(_matrule->_rChild->_opType,"CompareAndExchangeN"))) return true;
|
||||
else if ( is_ideal_load() == Form::idealP ) return true;
|
||||
@ -3489,7 +3490,7 @@ int MatchNode::needs_ideal_memory_edge(FormDict &globals) const {
|
||||
"StoreB","StoreC","Store" ,"StoreFP",
|
||||
"LoadI", "LoadL", "LoadP" ,"LoadN", "LoadD" ,"LoadF" ,
|
||||
"LoadB" , "LoadUB", "LoadUS" ,"LoadS" ,"Load" ,
|
||||
"StoreVector", "LoadVector", "LoadVectorGather", "StoreVectorScatter",
|
||||
"StoreVector", "LoadVector", "LoadVectorGather", "StoreVectorScatter", "LoadVectorMasked", "StoreVectorMasked",
|
||||
"LoadRange", "LoadKlass", "LoadNKlass", "LoadL_unaligned", "LoadD_unaligned",
|
||||
"LoadPLocked",
|
||||
"StorePConditional", "StoreIConditional", "StoreLConditional",
|
||||
@ -4181,7 +4182,7 @@ bool MatchRule::is_vector() const {
|
||||
"VectorRearrange","VectorLoadShuffle", "VectorLoadConst",
|
||||
"VectorCastB2X", "VectorCastS2X", "VectorCastI2X",
|
||||
"VectorCastL2X", "VectorCastF2X", "VectorCastD2X",
|
||||
"VectorMaskWrapper", "VectorMaskCmp", "VectorReinterpret",
|
||||
"VectorMaskWrapper", "VectorMaskCmp", "VectorReinterpret","LoadVectorMasked","StoreVectorMasked",
|
||||
"FmaVD", "FmaVF","PopCountVI",
|
||||
// Next are not supported currently.
|
||||
"PackB","PackS","PackI","PackL","PackF","PackD","Pack2L","Pack2D",
|
||||
|
@ -684,6 +684,8 @@ bool ArrayCopyNode::may_modify(const TypeOopPtr *t_oop, MemBarNode* mb, PhaseTra
|
||||
assert(c == mb->in(0) || (ac != NULL && ac->is_clonebasic() && !use_ReduceInitialCardMarks), "only for clone");
|
||||
#endif
|
||||
return true;
|
||||
} else if (mb->trailing_partial_array_copy()) {
|
||||
return true;
|
||||
}
|
||||
|
||||
return false;
|
||||
@ -730,3 +732,16 @@ bool ArrayCopyNode::modifies(intptr_t offset_lo, intptr_t offset_hi, PhaseTransf
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
// As an optimization, choose optimum vector size for copy length known at compile time.
|
||||
int ArrayCopyNode::get_partial_inline_vector_lane_count(BasicType type, int const_len) {
|
||||
int lane_count = ArrayCopyPartialInlineSize/type2aelembytes(type);
|
||||
if (const_len > 0) {
|
||||
int size_in_bytes = const_len * type2aelembytes(type);
|
||||
if (size_in_bytes <= 16)
|
||||
lane_count = 16/type2aelembytes(type);
|
||||
else if (size_in_bytes > 16 && size_in_bytes <= 32)
|
||||
lane_count = 32/type2aelembytes(type);
|
||||
}
|
||||
return lane_count;
|
||||
}
|
||||
|
@ -180,6 +180,9 @@ public:
|
||||
bool has_negative_length_guard() const { return _has_negative_length_guard; }
|
||||
|
||||
static bool may_modify(const TypeOopPtr *t_oop, MemBarNode* mb, PhaseTransform *phase, ArrayCopyNode*& ac);
|
||||
|
||||
static int get_partial_inline_vector_lane_count(BasicType type, int const_len);
|
||||
|
||||
bool modifies(intptr_t offset_lo, intptr_t offset_hi, PhaseTransform* phase, bool must_modify) const;
|
||||
|
||||
#ifndef PRODUCT
|
||||
|
@ -80,6 +80,10 @@
|
||||
"actual size could be less depending on elements type") \
|
||||
range(0, max_jint) \
|
||||
\
|
||||
product(intx, ArrayCopyPartialInlineSize, -1, DIAGNOSTIC, \
|
||||
"Partial inline size used for array copy acceleration.") \
|
||||
range(-1, 64) \
|
||||
\
|
||||
product(bool, AlignVector, true, \
|
||||
"Perform vector store/load alignment in loop") \
|
||||
\
|
||||
|
@ -407,6 +407,9 @@ macro(LoadVector)
|
||||
macro(LoadVectorGather)
|
||||
macro(StoreVector)
|
||||
macro(StoreVectorScatter)
|
||||
macro(LoadVectorMasked)
|
||||
macro(StoreVectorMasked)
|
||||
macro(VectorMaskGen)
|
||||
macro(Pack)
|
||||
macro(PackB)
|
||||
macro(PackS)
|
||||
|
@ -3355,6 +3355,9 @@ void Compile::final_graph_reshaping_main_switch(Node* n, Final_Reshape_Counts& f
|
||||
case Op_StoreVector:
|
||||
case Op_LoadVectorGather:
|
||||
case Op_StoreVectorScatter:
|
||||
case Op_VectorMaskGen:
|
||||
case Op_LoadVectorMasked:
|
||||
case Op_StoreVectorMasked:
|
||||
break;
|
||||
|
||||
case Op_AddReductionVI:
|
||||
|
@ -687,6 +687,7 @@ void PhaseCFG::adjust_register_pressure(Node* n, Block* block, intptr_t* recalc_
|
||||
case Op_StoreN:
|
||||
case Op_StoreVector:
|
||||
case Op_StoreVectorScatter:
|
||||
case Op_StoreVectorMasked:
|
||||
case Op_StoreNKlass:
|
||||
for (uint k = 1; k < m->req(); k++) {
|
||||
Node *in = m->in(k);
|
||||
|
@ -243,8 +243,10 @@ static Node *scan_mem_chain(Node *mem, int alias_idx, int offset, Node *start_me
|
||||
} else if (in->is_MemBar()) {
|
||||
ArrayCopyNode* ac = NULL;
|
||||
if (ArrayCopyNode::may_modify(tinst, in->as_MemBar(), phase, ac)) {
|
||||
assert(ac != NULL && ac->is_clonebasic(), "Only basic clone is a non escaping clone");
|
||||
return ac;
|
||||
if (ac != NULL) {
|
||||
assert(ac->is_clonebasic(), "Only basic clone is a non escaping clone");
|
||||
return ac;
|
||||
}
|
||||
}
|
||||
mem = in->in(TypeFunc::Memory);
|
||||
} else {
|
||||
|
@ -126,6 +126,11 @@ private:
|
||||
// helper methods modeled after LibraryCallKit for array copy
|
||||
Node* generate_guard(Node** ctrl, Node* test, RegionNode* region, float true_prob);
|
||||
Node* generate_slow_guard(Node** ctrl, Node* test, RegionNode* region);
|
||||
|
||||
void generate_partial_inlining_block(Node** ctrl, MergeMemNode** mem, const TypePtr* adr_type,
|
||||
RegionNode** exit_block, Node** result_memory, Node* length,
|
||||
Node* src_start, Node* dst_start, BasicType type);
|
||||
|
||||
void generate_negative_guard(Node** ctrl, Node* index, RegionNode* region);
|
||||
void generate_limit_guard(Node** ctrl, Node* offset, Node* subseq_length, Node* array_length, RegionNode* region);
|
||||
|
||||
@ -174,7 +179,7 @@ private:
|
||||
Node* src, Node* src_offset,
|
||||
Node* dest, Node* dest_offset,
|
||||
Node* copy_length, bool dest_uninitialized);
|
||||
void generate_unchecked_arraycopy(Node** ctrl, MergeMemNode** mem,
|
||||
bool generate_unchecked_arraycopy(Node** ctrl, MergeMemNode** mem,
|
||||
const TypePtr* adr_type,
|
||||
BasicType basic_elem_type,
|
||||
bool disjoint_bases,
|
||||
|
@ -27,6 +27,7 @@
|
||||
#include "opto/arraycopynode.hpp"
|
||||
#include "oops/objArrayKlass.hpp"
|
||||
#include "opto/convertnode.hpp"
|
||||
#include "opto/vectornode.hpp"
|
||||
#include "opto/graphKit.hpp"
|
||||
#include "opto/macro.hpp"
|
||||
#include "opto/runtime.hpp"
|
||||
@ -169,6 +170,98 @@ void PhaseMacroExpand::generate_limit_guard(Node** ctrl, Node* offset, Node* sub
|
||||
generate_guard(ctrl, bol_lt, region, PROB_MIN);
|
||||
}
|
||||
|
||||
//
|
||||
// Partial in-lining handling for smaller conjoint/disjoint array copies having
|
||||
// length(in bytes) less than ArrayCopyPartialInlineSize.
|
||||
// if (length <= ArrayCopyPartialInlineSize) {
|
||||
// partial_inlining_block:
|
||||
// mask = Mask_Gen
|
||||
// vload = LoadVectorMasked src , mask
|
||||
// StoreVectorMasked dst, mask, vload
|
||||
// } else {
|
||||
// stub_block:
|
||||
// callstub array_copy
|
||||
// }
|
||||
// exit_block:
|
||||
// Phi = label partial_inlining_block:mem , label stub_block:mem (filled by caller)
|
||||
// mem = MergeMem (Phi)
|
||||
// control = stub_block
|
||||
//
|
||||
// Exit_block and associated phi(memory) are partially initialized for partial_in-lining_block
|
||||
// edges. Remaining edges for exit_block coming from stub_block are connected by the caller
|
||||
// post stub nodes creation.
|
||||
//
|
||||
|
||||
void PhaseMacroExpand::generate_partial_inlining_block(Node** ctrl, MergeMemNode** mem, const TypePtr* adr_type,
|
||||
RegionNode** exit_block, Node** result_memory, Node* length,
|
||||
Node* src_start, Node* dst_start, BasicType type) {
|
||||
const TypePtr *src_adr_type = _igvn.type(src_start)->isa_ptr();
|
||||
Node* inline_block = NULL;
|
||||
Node* stub_block = NULL;
|
||||
|
||||
int const_len = -1;
|
||||
const TypeInt* lty = NULL;
|
||||
uint shift = exact_log2(type2aelembytes(type));
|
||||
if (length->Opcode() == Op_ConvI2L) {
|
||||
lty = _igvn.type(length->in(1))->isa_int();
|
||||
} else {
|
||||
lty = _igvn.type(length)->isa_int();
|
||||
}
|
||||
if (lty && lty->is_con()) {
|
||||
const_len = lty->get_con() << shift;
|
||||
}
|
||||
|
||||
// Return if copy length is greater than partial inline size limit or
|
||||
// target does not supports masked load/stores.
|
||||
int lane_count = ArrayCopyNode::get_partial_inline_vector_lane_count(type, const_len);
|
||||
if ( const_len > ArrayCopyPartialInlineSize ||
|
||||
!Matcher::match_rule_supported_vector(Op_LoadVectorMasked, lane_count, type) ||
|
||||
!Matcher::match_rule_supported_vector(Op_StoreVectorMasked, lane_count, type) ||
|
||||
!Matcher::match_rule_supported_vector(Op_VectorMaskGen, lane_count, type)) {
|
||||
return;
|
||||
}
|
||||
|
||||
Node* copy_bytes = new LShiftXNode(length, intcon(shift));
|
||||
transform_later(copy_bytes);
|
||||
|
||||
Node* cmp_le = new CmpULNode(copy_bytes, longcon(ArrayCopyPartialInlineSize));
|
||||
transform_later(cmp_le);
|
||||
Node* bol_le = new BoolNode(cmp_le, BoolTest::le);
|
||||
transform_later(bol_le);
|
||||
inline_block = generate_guard(ctrl, bol_le, NULL, PROB_FAIR);
|
||||
stub_block = *ctrl;
|
||||
|
||||
Node* mask_gen = new VectorMaskGenNode(length, TypeLong::LONG, Type::get_const_basic_type(type));
|
||||
transform_later(mask_gen);
|
||||
|
||||
unsigned vec_size = lane_count * type2aelembytes(type);
|
||||
if (C->max_vector_size() < vec_size) {
|
||||
C->set_max_vector_size(vec_size);
|
||||
}
|
||||
|
||||
const TypeVect * vt = TypeVect::make(type, lane_count);
|
||||
Node* mm = (*mem)->memory_at(C->get_alias_index(src_adr_type));
|
||||
Node* masked_load = new LoadVectorMaskedNode(inline_block, mm, src_start,
|
||||
src_adr_type, vt, mask_gen);
|
||||
transform_later(masked_load);
|
||||
|
||||
mm = (*mem)->memory_at(C->get_alias_index(adr_type));
|
||||
Node* masked_store = new StoreVectorMaskedNode(inline_block, mm, dst_start,
|
||||
masked_load, adr_type, mask_gen);
|
||||
transform_later(masked_store);
|
||||
|
||||
// Convergence region for inline_block and stub_block.
|
||||
*exit_block = new RegionNode(3);
|
||||
transform_later(*exit_block);
|
||||
(*exit_block)->init_req(1, inline_block);
|
||||
*result_memory = new PhiNode(*exit_block, Type::MEMORY, adr_type);
|
||||
transform_later(*result_memory);
|
||||
(*result_memory)->init_req(1, masked_store);
|
||||
|
||||
*ctrl = stub_block;
|
||||
}
|
||||
|
||||
|
||||
Node* PhaseMacroExpand::generate_nonpositive_guard(Node** ctrl, Node* index, bool never_negative) {
|
||||
if ((*ctrl)->is_top()) return NULL;
|
||||
|
||||
@ -559,16 +652,17 @@ Node* PhaseMacroExpand::generate_arraycopy(ArrayCopyNode *ac, AllocateArrayNode*
|
||||
}
|
||||
}
|
||||
|
||||
bool is_partial_array_copy = false;
|
||||
if (!(*ctrl)->is_top()) {
|
||||
// Generate the fast path, if possible.
|
||||
Node* local_ctrl = *ctrl;
|
||||
MergeMemNode* local_mem = MergeMemNode::make(mem);
|
||||
transform_later(local_mem);
|
||||
|
||||
generate_unchecked_arraycopy(&local_ctrl, &local_mem,
|
||||
adr_type, copy_type, disjoint_bases,
|
||||
src, src_offset, dest, dest_offset,
|
||||
ConvI2X(copy_length), dest_uninitialized);
|
||||
is_partial_array_copy = generate_unchecked_arraycopy(&local_ctrl, &local_mem,
|
||||
adr_type, copy_type, disjoint_bases,
|
||||
src, src_offset, dest, dest_offset,
|
||||
ConvI2X(copy_length), dest_uninitialized);
|
||||
|
||||
// Present the results of the fast call.
|
||||
result_region->init_req(fast_path, local_ctrl);
|
||||
@ -715,13 +809,19 @@ Node* PhaseMacroExpand::generate_arraycopy(ArrayCopyNode *ac, AllocateArrayNode*
|
||||
insert_mem_bar(ctrl, &out_mem, Op_MemBarCPUOrder);
|
||||
}
|
||||
|
||||
if (is_partial_array_copy) {
|
||||
assert((*ctrl)->is_Proj(), "MemBar control projection");
|
||||
assert((*ctrl)->in(0)->isa_MemBar(), "MemBar node");
|
||||
(*ctrl)->in(0)->isa_MemBar()->set_trailing_partial_array_copy();
|
||||
}
|
||||
|
||||
_igvn.replace_node(_memproj_fallthrough, out_mem);
|
||||
_igvn.replace_node(_ioproj_fallthrough, *io);
|
||||
_igvn.replace_node(_fallthroughcatchproj, *ctrl);
|
||||
|
||||
#ifdef ASSERT
|
||||
const TypeOopPtr* dest_t = _igvn.type(dest)->is_oopptr();
|
||||
if (dest_t->is_known_instance()) {
|
||||
if (dest_t->is_known_instance() && !is_partial_array_copy) {
|
||||
ArrayCopyNode* ac = NULL;
|
||||
assert(ArrayCopyNode::may_modify(dest_t, (*ctrl)->in(0)->as_MemBar(), &_igvn, ac), "dependency on arraycopy lost");
|
||||
assert(ac == NULL, "no arraycopy anymore");
|
||||
@ -1053,14 +1153,14 @@ Node* PhaseMacroExpand::generate_generic_arraycopy(Node** ctrl, MergeMemNode** m
|
||||
}
|
||||
|
||||
// Helper function; generates the fast out-of-line call to an arraycopy stub.
|
||||
void PhaseMacroExpand::generate_unchecked_arraycopy(Node** ctrl, MergeMemNode** mem,
|
||||
bool PhaseMacroExpand::generate_unchecked_arraycopy(Node** ctrl, MergeMemNode** mem,
|
||||
const TypePtr* adr_type,
|
||||
BasicType basic_elem_type,
|
||||
bool disjoint_bases,
|
||||
Node* src, Node* src_offset,
|
||||
Node* dest, Node* dest_offset,
|
||||
Node* copy_length, bool dest_uninitialized) {
|
||||
if ((*ctrl)->is_top()) return;
|
||||
if ((*ctrl)->is_top()) return false;
|
||||
|
||||
Node* src_start = src;
|
||||
Node* dest_start = dest;
|
||||
@ -1075,11 +1175,39 @@ void PhaseMacroExpand::generate_unchecked_arraycopy(Node** ctrl, MergeMemNode**
|
||||
basictype2arraycopy(basic_elem_type, src_offset, dest_offset,
|
||||
disjoint_bases, copyfunc_name, dest_uninitialized);
|
||||
|
||||
Node* result_memory = NULL;
|
||||
RegionNode* exit_block = NULL;
|
||||
if (ArrayCopyPartialInlineSize > 0 && is_subword_type(basic_elem_type) &&
|
||||
Matcher::vector_width_in_bytes(basic_elem_type) >= 16) {
|
||||
generate_partial_inlining_block(ctrl, mem, adr_type, &exit_block, &result_memory,
|
||||
copy_length, src_start, dest_start, basic_elem_type);
|
||||
}
|
||||
|
||||
const TypeFunc* call_type = OptoRuntime::fast_arraycopy_Type();
|
||||
Node* call = make_leaf_call(*ctrl, *mem, call_type, copyfunc_addr, copyfunc_name, adr_type,
|
||||
src_start, dest_start, copy_length XTOP);
|
||||
|
||||
finish_arraycopy_call(call, ctrl, mem, adr_type);
|
||||
|
||||
// Connecting remaining edges for exit_block coming from stub_block.
|
||||
if (exit_block) {
|
||||
exit_block->init_req(2, *ctrl);
|
||||
|
||||
// Memory edge corresponding to stub_region.
|
||||
result_memory->init_req(2, *mem);
|
||||
|
||||
uint alias_idx = C->get_alias_index(adr_type);
|
||||
if (alias_idx != Compile::AliasIdxBot) {
|
||||
*mem = MergeMemNode::make(*mem);
|
||||
(*mem)->set_memory_at(alias_idx, result_memory);
|
||||
} else {
|
||||
*mem = MergeMemNode::make(result_memory);
|
||||
}
|
||||
transform_later(*mem);
|
||||
*ctrl = exit_block;
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
void PhaseMacroExpand::expand_arraycopy_node(ArrayCopyNode *ac) {
|
||||
|
@ -2213,6 +2213,7 @@ bool Matcher::find_shared_visit(MStack& mstack, Node* n, uint opcode, bool& mem_
|
||||
case Op_FmaVD:
|
||||
case Op_FmaVF:
|
||||
case Op_MacroLogicV:
|
||||
case Op_LoadVectorMasked:
|
||||
set_shared(n); // Force result into register (it will be anyways)
|
||||
break;
|
||||
case Op_ConP: { // Convert pointers above the centerline to NUL
|
||||
@ -2315,6 +2316,12 @@ void Matcher::find_shared_post_visit(Node* n, uint opcode) {
|
||||
n->del_req(3);
|
||||
break;
|
||||
}
|
||||
case Op_StoreVectorMasked: {
|
||||
Node* pair = new BinaryNode(n->in(3), n->in(4));
|
||||
n->set_req(3, pair);
|
||||
n->del_req(4);
|
||||
break;
|
||||
}
|
||||
case Op_LoopLimit: {
|
||||
Node* pair1 = new BinaryNode(n->in(1), n->in(2));
|
||||
n->set_req(1, pair1);
|
||||
|
@ -1190,7 +1190,8 @@ class MemBarNode: public MultiNode {
|
||||
TrailingStore,
|
||||
LeadingStore,
|
||||
TrailingLoadStore,
|
||||
LeadingLoadStore
|
||||
LeadingLoadStore,
|
||||
TrailingPartialArrayCopy
|
||||
} _kind;
|
||||
|
||||
#ifdef ASSERT
|
||||
@ -1227,6 +1228,8 @@ public:
|
||||
bool trailing() const { return _kind == TrailingLoad || _kind == TrailingStore || _kind == TrailingLoadStore; }
|
||||
bool leading() const { return _kind == LeadingStore || _kind == LeadingLoadStore; }
|
||||
bool standalone() const { return _kind == Standalone; }
|
||||
void set_trailing_partial_array_copy() { _kind = TrailingPartialArrayCopy; }
|
||||
bool trailing_partial_array_copy() const { return _kind == TrailingPartialArrayCopy; }
|
||||
|
||||
static void set_store_pair(MemBarNode* leading, MemBarNode* trailing);
|
||||
static void set_load_store_pair(MemBarNode* leading, MemBarNode* trailing);
|
||||
|
@ -157,6 +157,8 @@ class TypeNode;
|
||||
class UnlockNode;
|
||||
class VectorNode;
|
||||
class LoadVectorNode;
|
||||
class LoadVectorMaskedNode;
|
||||
class StoreVectorMaskedNode;
|
||||
class LoadVectorGatherNode;
|
||||
class StoreVectorNode;
|
||||
class StoreVectorScatterNode;
|
||||
@ -692,13 +694,15 @@ public:
|
||||
DEFINE_CLASS_ID(Parm, Proj, 4)
|
||||
DEFINE_CLASS_ID(MachProj, Proj, 5)
|
||||
|
||||
DEFINE_CLASS_ID(Mem, Node, 4)
|
||||
DEFINE_CLASS_ID(Load, Mem, 0)
|
||||
DEFINE_CLASS_ID(Mem, Node, 4)
|
||||
DEFINE_CLASS_ID(Load, Mem, 0)
|
||||
DEFINE_CLASS_ID(LoadVector, Load, 0)
|
||||
DEFINE_CLASS_ID(LoadVectorGather, LoadVector, 0)
|
||||
DEFINE_CLASS_ID(LoadVectorMasked, LoadVector, 1)
|
||||
DEFINE_CLASS_ID(Store, Mem, 1)
|
||||
DEFINE_CLASS_ID(StoreVector, Store, 0)
|
||||
DEFINE_CLASS_ID(StoreVectorScatter, StoreVector, 0)
|
||||
DEFINE_CLASS_ID(StoreVectorMasked, StoreVector, 1)
|
||||
DEFINE_CLASS_ID(LoadStore, Mem, 2)
|
||||
DEFINE_CLASS_ID(LoadStoreConditional, LoadStore, 0)
|
||||
DEFINE_CLASS_ID(CompareAndSwap, LoadStoreConditional, 0)
|
||||
|
@ -759,6 +759,41 @@ StoreVectorNode* StoreVectorNode::make(int opc, Node* ctl, Node* mem,
|
||||
return new StoreVectorNode(ctl, mem, adr, atyp, val);
|
||||
}
|
||||
|
||||
Node* LoadVectorMaskedNode::Ideal(PhaseGVN* phase, bool can_reshape) {
|
||||
Node* mask_len = in(3)->in(1);
|
||||
const TypeLong* ty = phase->type(mask_len)->isa_long();
|
||||
if (ty && ty->is_con()) {
|
||||
BasicType mask_bt = ((VectorMaskGenNode*)in(3))->get_elem_type()->array_element_basic_type();
|
||||
uint load_sz = type2aelembytes(mask_bt) * ty->get_con();
|
||||
if ( load_sz == 32 || load_sz == 64) {
|
||||
assert(load_sz == 32 || MaxVectorSize > 32, "Unexpected load size");
|
||||
Node* ctr = in(MemNode::Control);
|
||||
Node* mem = in(MemNode::Memory);
|
||||
Node* adr = in(MemNode::Address);
|
||||
return phase->transform(new LoadVectorNode(ctr, mem, adr, adr_type(), vect_type()));
|
||||
}
|
||||
}
|
||||
return NULL;
|
||||
}
|
||||
|
||||
Node* StoreVectorMaskedNode::Ideal(PhaseGVN* phase, bool can_reshape) {
|
||||
Node* mask_len = in(4)->in(1);
|
||||
const TypeLong* ty = phase->type(mask_len)->isa_long();
|
||||
if (ty && ty->is_con()) {
|
||||
BasicType mask_bt = ((VectorMaskGenNode*)in(4))->get_elem_type()->array_element_basic_type();
|
||||
uint load_sz = type2aelembytes(mask_bt) * ty->get_con();
|
||||
if ( load_sz == 32 || load_sz == 64) {
|
||||
assert(load_sz == 32 || MaxVectorSize > 32, "Unexpected store size");
|
||||
Node* ctr = in(MemNode::Control);
|
||||
Node* mem = in(MemNode::Memory);
|
||||
Node* adr = in(MemNode::Address);
|
||||
Node* val = in(MemNode::ValueIn);
|
||||
return phase->transform(new StoreVectorNode(ctr, mem, adr, adr_type(), val));
|
||||
}
|
||||
}
|
||||
return NULL;
|
||||
}
|
||||
|
||||
int ExtractNode::opcode(BasicType bt) {
|
||||
switch (bt) {
|
||||
case T_BOOLEAN: return Op_ExtractUB;
|
||||
|
@ -778,6 +778,56 @@ class StoreVectorNode : public StoreNode {
|
||||
idx == MemNode::ValueIn + 1; }
|
||||
};
|
||||
|
||||
class StoreVectorMaskedNode : public StoreVectorNode {
|
||||
public:
|
||||
StoreVectorMaskedNode(Node* c, Node* mem, Node* dst, Node* src, const TypePtr* at, Node* mask)
|
||||
: StoreVectorNode(c, mem, dst, at, src) {
|
||||
assert(mask->bottom_type()->is_long(), "sanity");
|
||||
init_class_id(Class_StoreVector);
|
||||
set_mismatched_access();
|
||||
add_req(mask);
|
||||
}
|
||||
|
||||
virtual int Opcode() const;
|
||||
|
||||
virtual uint match_edge(uint idx) const {
|
||||
return idx > 1;
|
||||
}
|
||||
Node* Ideal(PhaseGVN* phase, bool can_reshape);
|
||||
};
|
||||
|
||||
class LoadVectorMaskedNode : public LoadVectorNode {
|
||||
public:
|
||||
LoadVectorMaskedNode(Node* c, Node* mem, Node* src, const TypePtr* at, const TypeVect* vt, Node* mask)
|
||||
: LoadVectorNode(c, mem, src, at, vt) {
|
||||
assert(mask->bottom_type()->is_long(), "sanity");
|
||||
init_class_id(Class_LoadVector);
|
||||
set_mismatched_access();
|
||||
add_req(mask);
|
||||
}
|
||||
|
||||
virtual int Opcode() const;
|
||||
|
||||
virtual uint match_edge(uint idx) const {
|
||||
return idx > 1;
|
||||
}
|
||||
Node* Ideal(PhaseGVN* phase, bool can_reshape);
|
||||
};
|
||||
|
||||
class VectorMaskGenNode : public TypeNode {
|
||||
public:
|
||||
VectorMaskGenNode(Node* length, const Type* ty, const Type* ety): TypeNode(ty, 2), _elemType(ety) {
|
||||
init_req(1, length);
|
||||
}
|
||||
|
||||
virtual int Opcode() const;
|
||||
const Type* get_elem_type() { return _elemType;}
|
||||
virtual uint size_of() const { return sizeof(VectorMaskGenNode); }
|
||||
|
||||
private:
|
||||
const Type* _elemType;
|
||||
};
|
||||
|
||||
//=========================Promote_Scalar_to_Vector============================
|
||||
|
||||
//------------------------------ReplicateBNode---------------------------------
|
||||
|
Loading…
x
Reference in New Issue
Block a user