6940726: Use BIS instruction for allocation prefetch on Sparc
Use BIS instruction for allocation prefetch on Sparc Reviewed-by: twisti
This commit is contained in:
parent
629d139cac
commit
f6934fd3b7
@ -471,6 +471,9 @@ extern bool can_branch_register( Node *bol, Node *cmp );
|
||||
source %{
|
||||
#define __ _masm.
|
||||
|
||||
// Block initializing store
|
||||
#define ASI_BLK_INIT_QUAD_LDD_P 0xE2
|
||||
|
||||
// tertiary op of a LoadP or StoreP encoding
|
||||
#define REGP_OP true
|
||||
|
||||
@ -6147,6 +6150,7 @@ instruct prefetchr( memory mem ) %{
|
||||
%}
|
||||
|
||||
instruct prefetchw( memory mem ) %{
|
||||
predicate(AllocatePrefetchStyle != 3 );
|
||||
match( PrefetchWrite mem );
|
||||
ins_cost(MEMORY_REF_COST);
|
||||
|
||||
@ -6156,6 +6160,23 @@ instruct prefetchw( memory mem ) %{
|
||||
ins_pipe(iload_mem);
|
||||
%}
|
||||
|
||||
// Use BIS instruction to prefetch.
|
||||
instruct prefetchw_bis( memory mem ) %{
|
||||
predicate(AllocatePrefetchStyle == 3);
|
||||
match( PrefetchWrite mem );
|
||||
ins_cost(MEMORY_REF_COST);
|
||||
|
||||
format %{ "STXA G0,$mem\t! // Block initializing store" %}
|
||||
ins_encode %{
|
||||
Register base = as_Register($mem$$base);
|
||||
int disp = $mem$$disp;
|
||||
if (disp != 0) {
|
||||
__ add(base, AllocatePrefetchStepSize, base);
|
||||
}
|
||||
__ stxa(G0, base, G0, ASI_BLK_INIT_QUAD_LDD_P);
|
||||
%}
|
||||
ins_pipe(istore_mem_reg);
|
||||
%}
|
||||
|
||||
//----------Store Instructions-------------------------------------------------
|
||||
// Store Byte
|
||||
|
@ -86,9 +86,19 @@ void VM_Version::initialize() {
|
||||
if (FLAG_IS_DEFAULT(InteriorEntryAlignment)) {
|
||||
FLAG_SET_DEFAULT(InteriorEntryAlignment, 4);
|
||||
}
|
||||
if (is_niagara1_plus() && FLAG_IS_DEFAULT(AllocatePrefetchDistance)) {
|
||||
// Use smaller prefetch distance on N2
|
||||
FLAG_SET_DEFAULT(AllocatePrefetchDistance, 256);
|
||||
if (is_niagara1_plus()) {
|
||||
if (AllocatePrefetchStyle > 0 && FLAG_IS_DEFAULT(AllocatePrefetchStyle)) {
|
||||
// Use BIS instruction for allocation prefetch.
|
||||
FLAG_SET_DEFAULT(AllocatePrefetchStyle, 3);
|
||||
if (FLAG_IS_DEFAULT(AllocatePrefetchDistance)) {
|
||||
// Use smaller prefetch distance on N2 with BIS
|
||||
FLAG_SET_DEFAULT(AllocatePrefetchDistance, 64);
|
||||
}
|
||||
}
|
||||
if (AllocatePrefetchStyle != 3 && FLAG_IS_DEFAULT(AllocatePrefetchDistance)) {
|
||||
// Use different prefetch distance without BIS
|
||||
FLAG_SET_DEFAULT(AllocatePrefetchDistance, 256);
|
||||
}
|
||||
}
|
||||
#endif
|
||||
if (FLAG_IS_DEFAULT(OptoLoopAlignment)) {
|
||||
|
@ -111,7 +111,22 @@ public:
|
||||
|
||||
// Allocate size HeapWords. The memory is NOT initialized to zero.
|
||||
inline HeapWord* allocate(size_t size);
|
||||
static size_t alignment_reserve() { return align_object_size(typeArrayOopDesc::header_size(T_INT)); }
|
||||
|
||||
// Reserve space at the end of TLAB
|
||||
static size_t end_reserve() {
|
||||
int reserve_size = typeArrayOopDesc::header_size(T_INT);
|
||||
if (AllocatePrefetchStyle == 3) {
|
||||
// BIS is used to prefetch - we need a space for it.
|
||||
// +1 for rounding up to next cache line +1 to be safe
|
||||
int lines = AllocatePrefetchLines + 2;
|
||||
int step_size = AllocatePrefetchStepSize;
|
||||
int distance = AllocatePrefetchDistance;
|
||||
int prefetch_end = (distance + step_size*lines)/(int)HeapWordSize;
|
||||
reserve_size = MAX2(reserve_size, prefetch_end);
|
||||
}
|
||||
return reserve_size;
|
||||
}
|
||||
static size_t alignment_reserve() { return align_object_size(end_reserve()); }
|
||||
static size_t alignment_reserve_in_bytes() { return alignment_reserve() * HeapWordSize; }
|
||||
|
||||
// Return tlab size or remaining space in eden such that the
|
||||
|
@ -1487,11 +1487,11 @@ Node* PhaseMacroExpand::prefetch_allocation(Node* i_o, Node*& needgc_false,
|
||||
Node*& contended_phi_rawmem,
|
||||
Node* old_eden_top, Node* new_eden_top,
|
||||
Node* length) {
|
||||
enum { fall_in_path = 1, pf_path = 2 };
|
||||
if( UseTLAB && AllocatePrefetchStyle == 2 ) {
|
||||
// Generate prefetch allocation with watermark check.
|
||||
// As an allocation hits the watermark, we will prefetch starting
|
||||
// at a "distance" away from watermark.
|
||||
enum { fall_in_path = 1, pf_path = 2 };
|
||||
|
||||
Node *pf_region = new (C, 3) RegionNode(3);
|
||||
Node *pf_phi_rawmem = new (C, 3) PhiNode( pf_region, Type::MEMORY,
|
||||
@ -1570,6 +1570,45 @@ Node* PhaseMacroExpand::prefetch_allocation(Node* i_o, Node*& needgc_false,
|
||||
needgc_false = pf_region;
|
||||
contended_phi_rawmem = pf_phi_rawmem;
|
||||
i_o = pf_phi_abio;
|
||||
} else if( UseTLAB && AllocatePrefetchStyle == 3 ) {
|
||||
// Insert a prefetch for each allocation only on the fast-path
|
||||
Node *pf_region = new (C, 3) RegionNode(3);
|
||||
Node *pf_phi_rawmem = new (C, 3) PhiNode( pf_region, Type::MEMORY,
|
||||
TypeRawPtr::BOTTOM );
|
||||
|
||||
// Generate several prefetch instructions only for arrays.
|
||||
uint lines = (length != NULL) ? AllocatePrefetchLines : 1;
|
||||
uint step_size = AllocatePrefetchStepSize;
|
||||
uint distance = AllocatePrefetchDistance;
|
||||
|
||||
// Next cache address.
|
||||
Node *cache_adr = new (C, 4) AddPNode(old_eden_top, old_eden_top,
|
||||
_igvn.MakeConX(distance));
|
||||
transform_later(cache_adr);
|
||||
cache_adr = new (C, 2) CastP2XNode(needgc_false, cache_adr);
|
||||
transform_later(cache_adr);
|
||||
Node* mask = _igvn.MakeConX(~(intptr_t)(step_size-1));
|
||||
cache_adr = new (C, 3) AndXNode(cache_adr, mask);
|
||||
transform_later(cache_adr);
|
||||
cache_adr = new (C, 2) CastX2PNode(cache_adr);
|
||||
transform_later(cache_adr);
|
||||
|
||||
// Prefetch
|
||||
Node *prefetch = new (C, 3) PrefetchWriteNode( contended_phi_rawmem, cache_adr );
|
||||
prefetch->set_req(0, needgc_false);
|
||||
transform_later(prefetch);
|
||||
contended_phi_rawmem = prefetch;
|
||||
Node *prefetch_adr;
|
||||
distance = step_size;
|
||||
for ( uint i = 1; i < lines; i++ ) {
|
||||
prefetch_adr = new (C, 4) AddPNode( cache_adr, cache_adr,
|
||||
_igvn.MakeConX(distance) );
|
||||
transform_later(prefetch_adr);
|
||||
prefetch = new (C, 3) PrefetchWriteNode( contended_phi_rawmem, prefetch_adr );
|
||||
transform_later(prefetch);
|
||||
distance += step_size;
|
||||
contended_phi_rawmem = prefetch;
|
||||
}
|
||||
} else if( AllocatePrefetchStyle > 0 ) {
|
||||
// Insert a prefetch for each allocation only on the fast-path
|
||||
Node *prefetch_adr;
|
||||
|
@ -1244,5 +1244,5 @@ public:
|
||||
virtual int Opcode() const;
|
||||
virtual uint ideal_reg() const { return NotAMachineReg; }
|
||||
virtual uint match_edge(uint idx) const { return idx==2; }
|
||||
virtual const Type *bottom_type() const { return Type::ABIO; }
|
||||
virtual const Type *bottom_type() const { return ( AllocatePrefetchStyle == 3 ) ? Type::MEMORY : Type::ABIO; }
|
||||
};
|
||||
|
@ -2708,7 +2708,8 @@ class CommandLineFlags {
|
||||
product(intx, AllocatePrefetchStyle, 1, \
|
||||
"0 = no prefetch, " \
|
||||
"1 = prefetch instructions for each allocation, " \
|
||||
"2 = use TLAB watermark to gate allocation prefetch") \
|
||||
"2 = use TLAB watermark to gate allocation prefetch, " \
|
||||
"3 = use BIS instruction on Sparc for allocation prefetch") \
|
||||
\
|
||||
product(intx, AllocatePrefetchDistance, -1, \
|
||||
"Distance to prefetch ahead of allocation pointer") \
|
||||
|
Loading…
Reference in New Issue
Block a user