6940726: Use BIS instruction for allocation prefetch on Sparc

Use BIS instruction for allocation prefetch on Sparc

Reviewed-by: twisti
This commit is contained in:
Vladimir Kozlov 2010-04-07 12:39:27 -07:00
parent 629d139cac
commit f6934fd3b7
6 changed files with 93 additions and 7 deletions

View File

@ -471,6 +471,9 @@ extern bool can_branch_register( Node *bol, Node *cmp );
source %{ source %{
#define __ _masm. #define __ _masm.
// Block initializing store
#define ASI_BLK_INIT_QUAD_LDD_P 0xE2
// tertiary op of a LoadP or StoreP encoding // tertiary op of a LoadP or StoreP encoding
#define REGP_OP true #define REGP_OP true
@ -6147,6 +6150,7 @@ instruct prefetchr( memory mem ) %{
%} %}
instruct prefetchw( memory mem ) %{ instruct prefetchw( memory mem ) %{
predicate(AllocatePrefetchStyle != 3 );
match( PrefetchWrite mem ); match( PrefetchWrite mem );
ins_cost(MEMORY_REF_COST); ins_cost(MEMORY_REF_COST);
@ -6156,6 +6160,23 @@ instruct prefetchw( memory mem ) %{
ins_pipe(iload_mem); ins_pipe(iload_mem);
%} %}
// Use BIS instruction to prefetch.
instruct prefetchw_bis( memory mem ) %{
predicate(AllocatePrefetchStyle == 3);
match( PrefetchWrite mem );
ins_cost(MEMORY_REF_COST);
format %{ "STXA G0,$mem\t! // Block initializing store" %}
ins_encode %{
Register base = as_Register($mem$$base);
int disp = $mem$$disp;
if (disp != 0) {
__ add(base, AllocatePrefetchStepSize, base);
}
__ stxa(G0, base, G0, ASI_BLK_INIT_QUAD_LDD_P);
%}
ins_pipe(istore_mem_reg);
%}
//----------Store Instructions------------------------------------------------- //----------Store Instructions-------------------------------------------------
// Store Byte // Store Byte

View File

@ -86,10 +86,20 @@ void VM_Version::initialize() {
if (FLAG_IS_DEFAULT(InteriorEntryAlignment)) { if (FLAG_IS_DEFAULT(InteriorEntryAlignment)) {
FLAG_SET_DEFAULT(InteriorEntryAlignment, 4); FLAG_SET_DEFAULT(InteriorEntryAlignment, 4);
} }
if (is_niagara1_plus() && FLAG_IS_DEFAULT(AllocatePrefetchDistance)) { if (is_niagara1_plus()) {
// Use smaller prefetch distance on N2 if (AllocatePrefetchStyle > 0 && FLAG_IS_DEFAULT(AllocatePrefetchStyle)) {
// Use BIS instruction for allocation prefetch.
FLAG_SET_DEFAULT(AllocatePrefetchStyle, 3);
if (FLAG_IS_DEFAULT(AllocatePrefetchDistance)) {
// Use smaller prefetch distance on N2 with BIS
FLAG_SET_DEFAULT(AllocatePrefetchDistance, 64);
}
}
if (AllocatePrefetchStyle != 3 && FLAG_IS_DEFAULT(AllocatePrefetchDistance)) {
// Use different prefetch distance without BIS
FLAG_SET_DEFAULT(AllocatePrefetchDistance, 256); FLAG_SET_DEFAULT(AllocatePrefetchDistance, 256);
} }
}
#endif #endif
if (FLAG_IS_DEFAULT(OptoLoopAlignment)) { if (FLAG_IS_DEFAULT(OptoLoopAlignment)) {
FLAG_SET_DEFAULT(OptoLoopAlignment, 4); FLAG_SET_DEFAULT(OptoLoopAlignment, 4);

View File

@ -111,7 +111,22 @@ public:
// Allocate size HeapWords. The memory is NOT initialized to zero. // Allocate size HeapWords. The memory is NOT initialized to zero.
inline HeapWord* allocate(size_t size); inline HeapWord* allocate(size_t size);
static size_t alignment_reserve() { return align_object_size(typeArrayOopDesc::header_size(T_INT)); }
// Reserve space at the end of TLAB
static size_t end_reserve() {
int reserve_size = typeArrayOopDesc::header_size(T_INT);
if (AllocatePrefetchStyle == 3) {
// BIS is used to prefetch - we need a space for it.
// +1 for rounding up to next cache line +1 to be safe
int lines = AllocatePrefetchLines + 2;
int step_size = AllocatePrefetchStepSize;
int distance = AllocatePrefetchDistance;
int prefetch_end = (distance + step_size*lines)/(int)HeapWordSize;
reserve_size = MAX2(reserve_size, prefetch_end);
}
return reserve_size;
}
static size_t alignment_reserve() { return align_object_size(end_reserve()); }
static size_t alignment_reserve_in_bytes() { return alignment_reserve() * HeapWordSize; } static size_t alignment_reserve_in_bytes() { return alignment_reserve() * HeapWordSize; }
// Return tlab size or remaining space in eden such that the // Return tlab size or remaining space in eden such that the

View File

@ -1487,11 +1487,11 @@ Node* PhaseMacroExpand::prefetch_allocation(Node* i_o, Node*& needgc_false,
Node*& contended_phi_rawmem, Node*& contended_phi_rawmem,
Node* old_eden_top, Node* new_eden_top, Node* old_eden_top, Node* new_eden_top,
Node* length) { Node* length) {
enum { fall_in_path = 1, pf_path = 2 };
if( UseTLAB && AllocatePrefetchStyle == 2 ) { if( UseTLAB && AllocatePrefetchStyle == 2 ) {
// Generate prefetch allocation with watermark check. // Generate prefetch allocation with watermark check.
// As an allocation hits the watermark, we will prefetch starting // As an allocation hits the watermark, we will prefetch starting
// at a "distance" away from watermark. // at a "distance" away from watermark.
enum { fall_in_path = 1, pf_path = 2 };
Node *pf_region = new (C, 3) RegionNode(3); Node *pf_region = new (C, 3) RegionNode(3);
Node *pf_phi_rawmem = new (C, 3) PhiNode( pf_region, Type::MEMORY, Node *pf_phi_rawmem = new (C, 3) PhiNode( pf_region, Type::MEMORY,
@ -1570,6 +1570,45 @@ Node* PhaseMacroExpand::prefetch_allocation(Node* i_o, Node*& needgc_false,
needgc_false = pf_region; needgc_false = pf_region;
contended_phi_rawmem = pf_phi_rawmem; contended_phi_rawmem = pf_phi_rawmem;
i_o = pf_phi_abio; i_o = pf_phi_abio;
} else if( UseTLAB && AllocatePrefetchStyle == 3 ) {
// Insert a prefetch for each allocation only on the fast-path
Node *pf_region = new (C, 3) RegionNode(3);
Node *pf_phi_rawmem = new (C, 3) PhiNode( pf_region, Type::MEMORY,
TypeRawPtr::BOTTOM );
// Generate several prefetch instructions only for arrays.
uint lines = (length != NULL) ? AllocatePrefetchLines : 1;
uint step_size = AllocatePrefetchStepSize;
uint distance = AllocatePrefetchDistance;
// Next cache address.
Node *cache_adr = new (C, 4) AddPNode(old_eden_top, old_eden_top,
_igvn.MakeConX(distance));
transform_later(cache_adr);
cache_adr = new (C, 2) CastP2XNode(needgc_false, cache_adr);
transform_later(cache_adr);
Node* mask = _igvn.MakeConX(~(intptr_t)(step_size-1));
cache_adr = new (C, 3) AndXNode(cache_adr, mask);
transform_later(cache_adr);
cache_adr = new (C, 2) CastX2PNode(cache_adr);
transform_later(cache_adr);
// Prefetch
Node *prefetch = new (C, 3) PrefetchWriteNode( contended_phi_rawmem, cache_adr );
prefetch->set_req(0, needgc_false);
transform_later(prefetch);
contended_phi_rawmem = prefetch;
Node *prefetch_adr;
distance = step_size;
for ( uint i = 1; i < lines; i++ ) {
prefetch_adr = new (C, 4) AddPNode( cache_adr, cache_adr,
_igvn.MakeConX(distance) );
transform_later(prefetch_adr);
prefetch = new (C, 3) PrefetchWriteNode( contended_phi_rawmem, prefetch_adr );
transform_later(prefetch);
distance += step_size;
contended_phi_rawmem = prefetch;
}
} else if( AllocatePrefetchStyle > 0 ) { } else if( AllocatePrefetchStyle > 0 ) {
// Insert a prefetch for each allocation only on the fast-path // Insert a prefetch for each allocation only on the fast-path
Node *prefetch_adr; Node *prefetch_adr;

View File

@ -1244,5 +1244,5 @@ public:
virtual int Opcode() const; virtual int Opcode() const;
virtual uint ideal_reg() const { return NotAMachineReg; } virtual uint ideal_reg() const { return NotAMachineReg; }
virtual uint match_edge(uint idx) const { return idx==2; } virtual uint match_edge(uint idx) const { return idx==2; }
virtual const Type *bottom_type() const { return Type::ABIO; } virtual const Type *bottom_type() const { return ( AllocatePrefetchStyle == 3 ) ? Type::MEMORY : Type::ABIO; }
}; };

View File

@ -2708,7 +2708,8 @@ class CommandLineFlags {
product(intx, AllocatePrefetchStyle, 1, \ product(intx, AllocatePrefetchStyle, 1, \
"0 = no prefetch, " \ "0 = no prefetch, " \
"1 = prefetch instructions for each allocation, " \ "1 = prefetch instructions for each allocation, " \
"2 = use TLAB watermark to gate allocation prefetch") \ "2 = use TLAB watermark to gate allocation prefetch, " \
"3 = use BIS instruction on Sparc for allocation prefetch") \
\ \
product(intx, AllocatePrefetchDistance, -1, \ product(intx, AllocatePrefetchDistance, -1, \
"Distance to prefetch ahead of allocation pointer") \ "Distance to prefetch ahead of allocation pointer") \