6940726: Use BIS instruction for allocation prefetch on Sparc
Use BIS instruction for allocation prefetch on Sparc Reviewed-by: twisti
This commit is contained in:
parent
629d139cac
commit
f6934fd3b7
@ -471,6 +471,9 @@ extern bool can_branch_register( Node *bol, Node *cmp );
|
|||||||
source %{
|
source %{
|
||||||
#define __ _masm.
|
#define __ _masm.
|
||||||
|
|
||||||
|
// Block initializing store
|
||||||
|
#define ASI_BLK_INIT_QUAD_LDD_P 0xE2
|
||||||
|
|
||||||
// tertiary op of a LoadP or StoreP encoding
|
// tertiary op of a LoadP or StoreP encoding
|
||||||
#define REGP_OP true
|
#define REGP_OP true
|
||||||
|
|
||||||
@ -6147,6 +6150,7 @@ instruct prefetchr( memory mem ) %{
|
|||||||
%}
|
%}
|
||||||
|
|
||||||
instruct prefetchw( memory mem ) %{
|
instruct prefetchw( memory mem ) %{
|
||||||
|
predicate(AllocatePrefetchStyle != 3 );
|
||||||
match( PrefetchWrite mem );
|
match( PrefetchWrite mem );
|
||||||
ins_cost(MEMORY_REF_COST);
|
ins_cost(MEMORY_REF_COST);
|
||||||
|
|
||||||
@ -6156,6 +6160,23 @@ instruct prefetchw( memory mem ) %{
|
|||||||
ins_pipe(iload_mem);
|
ins_pipe(iload_mem);
|
||||||
%}
|
%}
|
||||||
|
|
||||||
|
// Use BIS instruction to prefetch.
|
||||||
|
instruct prefetchw_bis( memory mem ) %{
|
||||||
|
predicate(AllocatePrefetchStyle == 3);
|
||||||
|
match( PrefetchWrite mem );
|
||||||
|
ins_cost(MEMORY_REF_COST);
|
||||||
|
|
||||||
|
format %{ "STXA G0,$mem\t! // Block initializing store" %}
|
||||||
|
ins_encode %{
|
||||||
|
Register base = as_Register($mem$$base);
|
||||||
|
int disp = $mem$$disp;
|
||||||
|
if (disp != 0) {
|
||||||
|
__ add(base, AllocatePrefetchStepSize, base);
|
||||||
|
}
|
||||||
|
__ stxa(G0, base, G0, ASI_BLK_INIT_QUAD_LDD_P);
|
||||||
|
%}
|
||||||
|
ins_pipe(istore_mem_reg);
|
||||||
|
%}
|
||||||
|
|
||||||
//----------Store Instructions-------------------------------------------------
|
//----------Store Instructions-------------------------------------------------
|
||||||
// Store Byte
|
// Store Byte
|
||||||
|
@ -86,9 +86,19 @@ void VM_Version::initialize() {
|
|||||||
if (FLAG_IS_DEFAULT(InteriorEntryAlignment)) {
|
if (FLAG_IS_DEFAULT(InteriorEntryAlignment)) {
|
||||||
FLAG_SET_DEFAULT(InteriorEntryAlignment, 4);
|
FLAG_SET_DEFAULT(InteriorEntryAlignment, 4);
|
||||||
}
|
}
|
||||||
if (is_niagara1_plus() && FLAG_IS_DEFAULT(AllocatePrefetchDistance)) {
|
if (is_niagara1_plus()) {
|
||||||
// Use smaller prefetch distance on N2
|
if (AllocatePrefetchStyle > 0 && FLAG_IS_DEFAULT(AllocatePrefetchStyle)) {
|
||||||
FLAG_SET_DEFAULT(AllocatePrefetchDistance, 256);
|
// Use BIS instruction for allocation prefetch.
|
||||||
|
FLAG_SET_DEFAULT(AllocatePrefetchStyle, 3);
|
||||||
|
if (FLAG_IS_DEFAULT(AllocatePrefetchDistance)) {
|
||||||
|
// Use smaller prefetch distance on N2 with BIS
|
||||||
|
FLAG_SET_DEFAULT(AllocatePrefetchDistance, 64);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (AllocatePrefetchStyle != 3 && FLAG_IS_DEFAULT(AllocatePrefetchDistance)) {
|
||||||
|
// Use different prefetch distance without BIS
|
||||||
|
FLAG_SET_DEFAULT(AllocatePrefetchDistance, 256);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
if (FLAG_IS_DEFAULT(OptoLoopAlignment)) {
|
if (FLAG_IS_DEFAULT(OptoLoopAlignment)) {
|
||||||
|
@ -111,7 +111,22 @@ public:
|
|||||||
|
|
||||||
// Allocate size HeapWords. The memory is NOT initialized to zero.
|
// Allocate size HeapWords. The memory is NOT initialized to zero.
|
||||||
inline HeapWord* allocate(size_t size);
|
inline HeapWord* allocate(size_t size);
|
||||||
static size_t alignment_reserve() { return align_object_size(typeArrayOopDesc::header_size(T_INT)); }
|
|
||||||
|
// Reserve space at the end of TLAB
|
||||||
|
static size_t end_reserve() {
|
||||||
|
int reserve_size = typeArrayOopDesc::header_size(T_INT);
|
||||||
|
if (AllocatePrefetchStyle == 3) {
|
||||||
|
// BIS is used to prefetch - we need a space for it.
|
||||||
|
// +1 for rounding up to next cache line +1 to be safe
|
||||||
|
int lines = AllocatePrefetchLines + 2;
|
||||||
|
int step_size = AllocatePrefetchStepSize;
|
||||||
|
int distance = AllocatePrefetchDistance;
|
||||||
|
int prefetch_end = (distance + step_size*lines)/(int)HeapWordSize;
|
||||||
|
reserve_size = MAX2(reserve_size, prefetch_end);
|
||||||
|
}
|
||||||
|
return reserve_size;
|
||||||
|
}
|
||||||
|
static size_t alignment_reserve() { return align_object_size(end_reserve()); }
|
||||||
static size_t alignment_reserve_in_bytes() { return alignment_reserve() * HeapWordSize; }
|
static size_t alignment_reserve_in_bytes() { return alignment_reserve() * HeapWordSize; }
|
||||||
|
|
||||||
// Return tlab size or remaining space in eden such that the
|
// Return tlab size or remaining space in eden such that the
|
||||||
|
@ -1487,11 +1487,11 @@ Node* PhaseMacroExpand::prefetch_allocation(Node* i_o, Node*& needgc_false,
|
|||||||
Node*& contended_phi_rawmem,
|
Node*& contended_phi_rawmem,
|
||||||
Node* old_eden_top, Node* new_eden_top,
|
Node* old_eden_top, Node* new_eden_top,
|
||||||
Node* length) {
|
Node* length) {
|
||||||
|
enum { fall_in_path = 1, pf_path = 2 };
|
||||||
if( UseTLAB && AllocatePrefetchStyle == 2 ) {
|
if( UseTLAB && AllocatePrefetchStyle == 2 ) {
|
||||||
// Generate prefetch allocation with watermark check.
|
// Generate prefetch allocation with watermark check.
|
||||||
// As an allocation hits the watermark, we will prefetch starting
|
// As an allocation hits the watermark, we will prefetch starting
|
||||||
// at a "distance" away from watermark.
|
// at a "distance" away from watermark.
|
||||||
enum { fall_in_path = 1, pf_path = 2 };
|
|
||||||
|
|
||||||
Node *pf_region = new (C, 3) RegionNode(3);
|
Node *pf_region = new (C, 3) RegionNode(3);
|
||||||
Node *pf_phi_rawmem = new (C, 3) PhiNode( pf_region, Type::MEMORY,
|
Node *pf_phi_rawmem = new (C, 3) PhiNode( pf_region, Type::MEMORY,
|
||||||
@ -1570,6 +1570,45 @@ Node* PhaseMacroExpand::prefetch_allocation(Node* i_o, Node*& needgc_false,
|
|||||||
needgc_false = pf_region;
|
needgc_false = pf_region;
|
||||||
contended_phi_rawmem = pf_phi_rawmem;
|
contended_phi_rawmem = pf_phi_rawmem;
|
||||||
i_o = pf_phi_abio;
|
i_o = pf_phi_abio;
|
||||||
|
} else if( UseTLAB && AllocatePrefetchStyle == 3 ) {
|
||||||
|
// Insert a prefetch for each allocation only on the fast-path
|
||||||
|
Node *pf_region = new (C, 3) RegionNode(3);
|
||||||
|
Node *pf_phi_rawmem = new (C, 3) PhiNode( pf_region, Type::MEMORY,
|
||||||
|
TypeRawPtr::BOTTOM );
|
||||||
|
|
||||||
|
// Generate several prefetch instructions only for arrays.
|
||||||
|
uint lines = (length != NULL) ? AllocatePrefetchLines : 1;
|
||||||
|
uint step_size = AllocatePrefetchStepSize;
|
||||||
|
uint distance = AllocatePrefetchDistance;
|
||||||
|
|
||||||
|
// Next cache address.
|
||||||
|
Node *cache_adr = new (C, 4) AddPNode(old_eden_top, old_eden_top,
|
||||||
|
_igvn.MakeConX(distance));
|
||||||
|
transform_later(cache_adr);
|
||||||
|
cache_adr = new (C, 2) CastP2XNode(needgc_false, cache_adr);
|
||||||
|
transform_later(cache_adr);
|
||||||
|
Node* mask = _igvn.MakeConX(~(intptr_t)(step_size-1));
|
||||||
|
cache_adr = new (C, 3) AndXNode(cache_adr, mask);
|
||||||
|
transform_later(cache_adr);
|
||||||
|
cache_adr = new (C, 2) CastX2PNode(cache_adr);
|
||||||
|
transform_later(cache_adr);
|
||||||
|
|
||||||
|
// Prefetch
|
||||||
|
Node *prefetch = new (C, 3) PrefetchWriteNode( contended_phi_rawmem, cache_adr );
|
||||||
|
prefetch->set_req(0, needgc_false);
|
||||||
|
transform_later(prefetch);
|
||||||
|
contended_phi_rawmem = prefetch;
|
||||||
|
Node *prefetch_adr;
|
||||||
|
distance = step_size;
|
||||||
|
for ( uint i = 1; i < lines; i++ ) {
|
||||||
|
prefetch_adr = new (C, 4) AddPNode( cache_adr, cache_adr,
|
||||||
|
_igvn.MakeConX(distance) );
|
||||||
|
transform_later(prefetch_adr);
|
||||||
|
prefetch = new (C, 3) PrefetchWriteNode( contended_phi_rawmem, prefetch_adr );
|
||||||
|
transform_later(prefetch);
|
||||||
|
distance += step_size;
|
||||||
|
contended_phi_rawmem = prefetch;
|
||||||
|
}
|
||||||
} else if( AllocatePrefetchStyle > 0 ) {
|
} else if( AllocatePrefetchStyle > 0 ) {
|
||||||
// Insert a prefetch for each allocation only on the fast-path
|
// Insert a prefetch for each allocation only on the fast-path
|
||||||
Node *prefetch_adr;
|
Node *prefetch_adr;
|
||||||
|
@ -1244,5 +1244,5 @@ public:
|
|||||||
virtual int Opcode() const;
|
virtual int Opcode() const;
|
||||||
virtual uint ideal_reg() const { return NotAMachineReg; }
|
virtual uint ideal_reg() const { return NotAMachineReg; }
|
||||||
virtual uint match_edge(uint idx) const { return idx==2; }
|
virtual uint match_edge(uint idx) const { return idx==2; }
|
||||||
virtual const Type *bottom_type() const { return Type::ABIO; }
|
virtual const Type *bottom_type() const { return ( AllocatePrefetchStyle == 3 ) ? Type::MEMORY : Type::ABIO; }
|
||||||
};
|
};
|
||||||
|
@ -2708,7 +2708,8 @@ class CommandLineFlags {
|
|||||||
product(intx, AllocatePrefetchStyle, 1, \
|
product(intx, AllocatePrefetchStyle, 1, \
|
||||||
"0 = no prefetch, " \
|
"0 = no prefetch, " \
|
||||||
"1 = prefetch instructions for each allocation, " \
|
"1 = prefetch instructions for each allocation, " \
|
||||||
"2 = use TLAB watermark to gate allocation prefetch") \
|
"2 = use TLAB watermark to gate allocation prefetch, " \
|
||||||
|
"3 = use BIS instruction on Sparc for allocation prefetch") \
|
||||||
\
|
\
|
||||||
product(intx, AllocatePrefetchDistance, -1, \
|
product(intx, AllocatePrefetchDistance, -1, \
|
||||||
"Distance to prefetch ahead of allocation pointer") \
|
"Distance to prefetch ahead of allocation pointer") \
|
||||||
|
Loading…
Reference in New Issue
Block a user