7059037: Use BIS for zeroing on T4
Use BIS for zeroing new allocated big (2Kb and more) objects and arrays. Reviewed-by: never, twisti, ysr
This commit is contained in:
parent
19f7fb98b8
commit
6446205688
@ -4973,3 +4973,65 @@ void MacroAssembler::char_arrays_equals(Register ary1, Register ary2,
|
|||||||
// Caller should set it:
|
// Caller should set it:
|
||||||
// add(G0, 1, result); // equals
|
// add(G0, 1, result); // equals
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Use BIS for zeroing (count is in bytes).
|
||||||
|
void MacroAssembler::bis_zeroing(Register to, Register count, Register temp, Label& Ldone) {
|
||||||
|
assert(UseBlockZeroing && VM_Version::has_block_zeroing(), "only works with BIS zeroing");
|
||||||
|
Register end = count;
|
||||||
|
int cache_line_size = VM_Version::prefetch_data_size();
|
||||||
|
// Minimum count when BIS zeroing can be used since
|
||||||
|
// it needs membar which is expensive.
|
||||||
|
int block_zero_size = MAX2(cache_line_size*3, (int)BlockZeroingLowLimit);
|
||||||
|
|
||||||
|
Label small_loop;
|
||||||
|
// Check if count is negative (dead code) or zero.
|
||||||
|
// Note, count uses 64bit in 64 bit VM.
|
||||||
|
cmp_and_brx_short(count, 0, Assembler::lessEqual, Assembler::pn, Ldone);
|
||||||
|
|
||||||
|
// Use BIS zeroing only for big arrays since it requires membar.
|
||||||
|
if (Assembler::is_simm13(block_zero_size)) { // < 4096
|
||||||
|
cmp(count, block_zero_size);
|
||||||
|
} else {
|
||||||
|
set(block_zero_size, temp);
|
||||||
|
cmp(count, temp);
|
||||||
|
}
|
||||||
|
br(Assembler::lessUnsigned, false, Assembler::pt, small_loop);
|
||||||
|
delayed()->add(to, count, end);
|
||||||
|
|
||||||
|
// Note: size is >= three (32 bytes) cache lines.
|
||||||
|
|
||||||
|
// Clean the beginning of space up to next cache line.
|
||||||
|
for (int offs = 0; offs < cache_line_size; offs += 8) {
|
||||||
|
stx(G0, to, offs);
|
||||||
|
}
|
||||||
|
|
||||||
|
// align to next cache line
|
||||||
|
add(to, cache_line_size, to);
|
||||||
|
and3(to, -cache_line_size, to);
|
||||||
|
|
||||||
|
// Note: size left >= two (32 bytes) cache lines.
|
||||||
|
|
||||||
|
// BIS should not be used to zero tail (64 bytes)
|
||||||
|
// to avoid zeroing a header of the following object.
|
||||||
|
sub(end, (cache_line_size*2)-8, end);
|
||||||
|
|
||||||
|
Label bis_loop;
|
||||||
|
bind(bis_loop);
|
||||||
|
stxa(G0, to, G0, Assembler::ASI_ST_BLKINIT_PRIMARY);
|
||||||
|
add(to, cache_line_size, to);
|
||||||
|
cmp_and_brx_short(to, end, Assembler::lessUnsigned, Assembler::pt, bis_loop);
|
||||||
|
|
||||||
|
// BIS needs membar.
|
||||||
|
membar(Assembler::StoreLoad);
|
||||||
|
|
||||||
|
add(end, (cache_line_size*2)-8, end); // restore end
|
||||||
|
cmp_and_brx_short(to, end, Assembler::greaterEqualUnsigned, Assembler::pn, Ldone);
|
||||||
|
|
||||||
|
// Clean the tail.
|
||||||
|
bind(small_loop);
|
||||||
|
stx(G0, to, 0);
|
||||||
|
add(to, 8, to);
|
||||||
|
cmp_and_brx_short(to, end, Assembler::lessUnsigned, Assembler::pt, small_loop);
|
||||||
|
nop(); // Separate short branches
|
||||||
|
}
|
||||||
|
|
||||||
|
@ -885,8 +885,9 @@ class Assembler : public AbstractAssembler {
|
|||||||
}
|
}
|
||||||
|
|
||||||
enum ASIs { // page 72, v9
|
enum ASIs { // page 72, v9
|
||||||
ASI_PRIMARY = 0x80,
|
ASI_PRIMARY = 0x80,
|
||||||
ASI_PRIMARY_LITTLE = 0x88,
|
ASI_PRIMARY_NOFAULT = 0x82,
|
||||||
|
ASI_PRIMARY_LITTLE = 0x88,
|
||||||
// Block initializing store
|
// Block initializing store
|
||||||
ASI_ST_BLKINIT_PRIMARY = 0xE2,
|
ASI_ST_BLKINIT_PRIMARY = 0xE2,
|
||||||
// Most-Recently-Used (MRU) BIS variant
|
// Most-Recently-Used (MRU) BIS variant
|
||||||
@ -1786,9 +1787,12 @@ public:
|
|||||||
rs1(s) |
|
rs1(s) |
|
||||||
op3(wrreg_op3) |
|
op3(wrreg_op3) |
|
||||||
u_field(2, 29, 25) |
|
u_field(2, 29, 25) |
|
||||||
u_field(1, 13, 13) |
|
immed(true) |
|
||||||
simm(simm13a, 13)); }
|
simm(simm13a, 13)); }
|
||||||
inline void wrasi( Register d) { v9_only(); emit_long( op(arith_op) | rs1(d) | op3(wrreg_op3) | u_field(3, 29, 25)); }
|
inline void wrasi(Register d) { v9_only(); emit_long( op(arith_op) | rs1(d) | op3(wrreg_op3) | u_field(3, 29, 25)); }
|
||||||
|
// wrasi(d, imm) stores (d xor imm) to asi
|
||||||
|
inline void wrasi(Register d, int simm13a) { v9_only(); emit_long( op(arith_op) | rs1(d) | op3(wrreg_op3) |
|
||||||
|
u_field(3, 29, 25) | immed(true) | simm(simm13a, 13)); }
|
||||||
inline void wrfprs( Register d) { v9_only(); emit_long( op(arith_op) | rs1(d) | op3(wrreg_op3) | u_field(6, 29, 25)); }
|
inline void wrfprs( Register d) { v9_only(); emit_long( op(arith_op) | rs1(d) | op3(wrreg_op3) | u_field(6, 29, 25)); }
|
||||||
|
|
||||||
|
|
||||||
@ -2631,6 +2635,8 @@ public:
|
|||||||
void char_arrays_equals(Register ary1, Register ary2,
|
void char_arrays_equals(Register ary1, Register ary2,
|
||||||
Register limit, Register result,
|
Register limit, Register result,
|
||||||
Register chr1, Register chr2, Label& Ldone);
|
Register chr1, Register chr2, Label& Ldone);
|
||||||
|
// Use BIS for zeroing
|
||||||
|
void bis_zeroing(Register to, Register count, Register temp, Label& Ldone);
|
||||||
|
|
||||||
#undef VIRTUAL
|
#undef VIRTUAL
|
||||||
|
|
||||||
|
@ -156,9 +156,16 @@ static void pd_fill_to_words(HeapWord* tohw, size_t count, juint value) {
|
|||||||
#endif // _LP64
|
#endif // _LP64
|
||||||
}
|
}
|
||||||
|
|
||||||
|
typedef void (*_zero_Fn)(HeapWord* to, size_t count);
|
||||||
|
|
||||||
static void pd_fill_to_aligned_words(HeapWord* tohw, size_t count, juint value) {
|
static void pd_fill_to_aligned_words(HeapWord* tohw, size_t count, juint value) {
|
||||||
assert(MinObjAlignmentInBytes >= BytesPerLong, "need alternate implementation");
|
assert(MinObjAlignmentInBytes >= BytesPerLong, "need alternate implementation");
|
||||||
|
|
||||||
|
if (value == 0 && UseBlockZeroing &&
|
||||||
|
(count > (BlockZeroingLowLimit >> LogHeapWordSize))) {
|
||||||
|
// Call it only when block zeroing is used
|
||||||
|
((_zero_Fn)StubRoutines::zero_aligned_words())(tohw, count);
|
||||||
|
} else {
|
||||||
julong* to = (julong*)tohw;
|
julong* to = (julong*)tohw;
|
||||||
julong v = ((julong)value << 32) | value;
|
julong v = ((julong)value << 32) | value;
|
||||||
// If count is odd, odd will be equal to 1 on 32-bit platform
|
// If count is odd, odd will be equal to 1 on 32-bit platform
|
||||||
@ -176,6 +183,7 @@ static void pd_fill_to_aligned_words(HeapWord* tohw, size_t count, juint value)
|
|||||||
*((juint*)to) = value;
|
*((juint*)to) = value;
|
||||||
|
|
||||||
}
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
static void pd_fill_to_bytes(void* to, size_t count, jubyte value) {
|
static void pd_fill_to_bytes(void* to, size_t count, jubyte value) {
|
||||||
|
@ -460,6 +460,8 @@ source_hpp %{
|
|||||||
// Must be visible to the DFA in dfa_sparc.cpp
|
// Must be visible to the DFA in dfa_sparc.cpp
|
||||||
extern bool can_branch_register( Node *bol, Node *cmp );
|
extern bool can_branch_register( Node *bol, Node *cmp );
|
||||||
|
|
||||||
|
extern bool use_block_zeroing(Node* count);
|
||||||
|
|
||||||
// Macros to extract hi & lo halves from a long pair.
|
// Macros to extract hi & lo halves from a long pair.
|
||||||
// G0 is not part of any long pair, so assert on that.
|
// G0 is not part of any long pair, so assert on that.
|
||||||
// Prevents accidentally using G1 instead of G0.
|
// Prevents accidentally using G1 instead of G0.
|
||||||
@ -521,6 +523,12 @@ bool can_branch_register( Node *bol, Node *cmp ) {
|
|||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
bool use_block_zeroing(Node* count) {
|
||||||
|
// Use BIS for zeroing if count is not constant
|
||||||
|
// or it is >= BlockZeroingLowLimit.
|
||||||
|
return UseBlockZeroing && (count->find_intptr_t_con(BlockZeroingLowLimit) >= BlockZeroingLowLimit);
|
||||||
|
}
|
||||||
|
|
||||||
// ****************************************************************************
|
// ****************************************************************************
|
||||||
|
|
||||||
// REQUIRED FUNCTIONALITY
|
// REQUIRED FUNCTIONALITY
|
||||||
@ -2810,25 +2818,6 @@ enc_class Fast_Unlock(iRegP oop, iRegP box, o7RegP scratch, iRegP scratch2) %{
|
|||||||
__ float_cmp( $primary, -1, Fsrc1, Fsrc2, Rdst);
|
__ float_cmp( $primary, -1, Fsrc1, Fsrc2, Rdst);
|
||||||
%}
|
%}
|
||||||
|
|
||||||
// Compiler ensures base is doubleword aligned and cnt is count of doublewords
|
|
||||||
enc_class enc_Clear_Array(iRegX cnt, iRegP base, iRegX temp) %{
|
|
||||||
MacroAssembler _masm(&cbuf);
|
|
||||||
Register nof_bytes_arg = reg_to_register_object($cnt$$reg);
|
|
||||||
Register nof_bytes_tmp = reg_to_register_object($temp$$reg);
|
|
||||||
Register base_pointer_arg = reg_to_register_object($base$$reg);
|
|
||||||
|
|
||||||
Label loop;
|
|
||||||
__ mov(nof_bytes_arg, nof_bytes_tmp);
|
|
||||||
|
|
||||||
// Loop and clear, walking backwards through the array.
|
|
||||||
// nof_bytes_tmp (if >0) is always the number of bytes to zero
|
|
||||||
__ bind(loop);
|
|
||||||
__ deccc(nof_bytes_tmp, 8);
|
|
||||||
__ br(Assembler::greaterEqual, true, Assembler::pt, loop);
|
|
||||||
__ delayed()-> stx(G0, base_pointer_arg, nof_bytes_tmp);
|
|
||||||
// %%%% this mini-loop must not cross a cache boundary!
|
|
||||||
%}
|
|
||||||
|
|
||||||
|
|
||||||
enc_class enc_String_Compare(o0RegP str1, o1RegP str2, g3RegI cnt1, g4RegI cnt2, notemp_iRegI result) %{
|
enc_class enc_String_Compare(o0RegP str1, o1RegP str2, g3RegI cnt1, g4RegI cnt2, notemp_iRegI result) %{
|
||||||
Label Ldone, Lloop;
|
Label Ldone, Lloop;
|
||||||
@ -10257,9 +10246,9 @@ instruct cmpFastUnlock(flagsRegP pcc, iRegP object, iRegP box, iRegP scratch2, o
|
|||||||
ins_pipe(long_memory_op);
|
ins_pipe(long_memory_op);
|
||||||
%}
|
%}
|
||||||
|
|
||||||
// Count and Base registers are fixed because the allocator cannot
|
// The encodings are generic.
|
||||||
// kill unknown registers. The encodings are generic.
|
|
||||||
instruct clear_array(iRegX cnt, iRegP base, iRegX temp, Universe dummy, flagsReg ccr) %{
|
instruct clear_array(iRegX cnt, iRegP base, iRegX temp, Universe dummy, flagsReg ccr) %{
|
||||||
|
predicate(!use_block_zeroing(n->in(2)) );
|
||||||
match(Set dummy (ClearArray cnt base));
|
match(Set dummy (ClearArray cnt base));
|
||||||
effect(TEMP temp, KILL ccr);
|
effect(TEMP temp, KILL ccr);
|
||||||
ins_cost(300);
|
ins_cost(300);
|
||||||
@ -10267,7 +10256,71 @@ instruct clear_array(iRegX cnt, iRegP base, iRegX temp, Universe dummy, flagsReg
|
|||||||
"loop: SUBcc $temp,8,$temp\t! Count down a dword of bytes\n"
|
"loop: SUBcc $temp,8,$temp\t! Count down a dword of bytes\n"
|
||||||
" BRge loop\t\t! Clearing loop\n"
|
" BRge loop\t\t! Clearing loop\n"
|
||||||
" STX G0,[$base+$temp]\t! delay slot" %}
|
" STX G0,[$base+$temp]\t! delay slot" %}
|
||||||
ins_encode( enc_Clear_Array(cnt, base, temp) );
|
|
||||||
|
ins_encode %{
|
||||||
|
// Compiler ensures base is doubleword aligned and cnt is count of doublewords
|
||||||
|
Register nof_bytes_arg = $cnt$$Register;
|
||||||
|
Register nof_bytes_tmp = $temp$$Register;
|
||||||
|
Register base_pointer_arg = $base$$Register;
|
||||||
|
|
||||||
|
Label loop;
|
||||||
|
__ mov(nof_bytes_arg, nof_bytes_tmp);
|
||||||
|
|
||||||
|
// Loop and clear, walking backwards through the array.
|
||||||
|
// nof_bytes_tmp (if >0) is always the number of bytes to zero
|
||||||
|
__ bind(loop);
|
||||||
|
__ deccc(nof_bytes_tmp, 8);
|
||||||
|
__ br(Assembler::greaterEqual, true, Assembler::pt, loop);
|
||||||
|
__ delayed()-> stx(G0, base_pointer_arg, nof_bytes_tmp);
|
||||||
|
// %%%% this mini-loop must not cross a cache boundary!
|
||||||
|
%}
|
||||||
|
ins_pipe(long_memory_op);
|
||||||
|
%}
|
||||||
|
|
||||||
|
instruct clear_array_bis(g1RegX cnt, o0RegP base, Universe dummy, flagsReg ccr) %{
|
||||||
|
predicate(use_block_zeroing(n->in(2)));
|
||||||
|
match(Set dummy (ClearArray cnt base));
|
||||||
|
effect(USE_KILL cnt, USE_KILL base, KILL ccr);
|
||||||
|
ins_cost(300);
|
||||||
|
format %{ "CLEAR [$base, $cnt]\t! ClearArray" %}
|
||||||
|
|
||||||
|
ins_encode %{
|
||||||
|
|
||||||
|
assert(MinObjAlignmentInBytes >= BytesPerLong, "need alternate implementation");
|
||||||
|
Register to = $base$$Register;
|
||||||
|
Register count = $cnt$$Register;
|
||||||
|
|
||||||
|
Label Ldone;
|
||||||
|
__ nop(); // Separate short branches
|
||||||
|
// Use BIS for zeroing (temp is not used).
|
||||||
|
__ bis_zeroing(to, count, G0, Ldone);
|
||||||
|
__ bind(Ldone);
|
||||||
|
|
||||||
|
%}
|
||||||
|
ins_pipe(long_memory_op);
|
||||||
|
%}
|
||||||
|
|
||||||
|
instruct clear_array_bis_2(g1RegX cnt, o0RegP base, iRegX tmp, Universe dummy, flagsReg ccr) %{
|
||||||
|
predicate(use_block_zeroing(n->in(2)) && !Assembler::is_simm13((int)BlockZeroingLowLimit));
|
||||||
|
match(Set dummy (ClearArray cnt base));
|
||||||
|
effect(TEMP tmp, USE_KILL cnt, USE_KILL base, KILL ccr);
|
||||||
|
ins_cost(300);
|
||||||
|
format %{ "CLEAR [$base, $cnt]\t! ClearArray" %}
|
||||||
|
|
||||||
|
ins_encode %{
|
||||||
|
|
||||||
|
assert(MinObjAlignmentInBytes >= BytesPerLong, "need alternate implementation");
|
||||||
|
Register to = $base$$Register;
|
||||||
|
Register count = $cnt$$Register;
|
||||||
|
Register temp = $tmp$$Register;
|
||||||
|
|
||||||
|
Label Ldone;
|
||||||
|
__ nop(); // Separate short branches
|
||||||
|
// Use BIS for zeroing
|
||||||
|
__ bis_zeroing(to, count, temp, Ldone);
|
||||||
|
__ bind(Ldone);
|
||||||
|
|
||||||
|
%}
|
||||||
ins_pipe(long_memory_op);
|
ins_pipe(long_memory_op);
|
||||||
%}
|
%}
|
||||||
|
|
||||||
|
@ -3069,6 +3069,34 @@ class StubGenerator: public StubCodeGenerator {
|
|||||||
return start;
|
return start;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
//
|
||||||
|
// Generate stub for heap zeroing.
|
||||||
|
// "to" address is aligned to jlong (8 bytes).
|
||||||
|
//
|
||||||
|
// Arguments for generated stub:
|
||||||
|
// to: O0
|
||||||
|
// count: O1 treated as signed (count of HeapWord)
|
||||||
|
// count could be 0
|
||||||
|
//
|
||||||
|
address generate_zero_aligned_words(const char* name) {
|
||||||
|
__ align(CodeEntryAlignment);
|
||||||
|
StubCodeMark mark(this, "StubRoutines", name);
|
||||||
|
address start = __ pc();
|
||||||
|
|
||||||
|
const Register to = O0; // source array address
|
||||||
|
const Register count = O1; // HeapWords count
|
||||||
|
const Register temp = O2; // scratch
|
||||||
|
|
||||||
|
Label Ldone;
|
||||||
|
__ sllx(count, LogHeapWordSize, count); // to bytes count
|
||||||
|
// Use BIS for zeroing
|
||||||
|
__ bis_zeroing(to, count, temp, Ldone);
|
||||||
|
__ bind(Ldone);
|
||||||
|
__ retl();
|
||||||
|
__ delayed()->nop();
|
||||||
|
return start;
|
||||||
|
}
|
||||||
|
|
||||||
void generate_arraycopy_stubs() {
|
void generate_arraycopy_stubs() {
|
||||||
address entry;
|
address entry;
|
||||||
address entry_jbyte_arraycopy;
|
address entry_jbyte_arraycopy;
|
||||||
@ -3195,6 +3223,10 @@ class StubGenerator: public StubCodeGenerator {
|
|||||||
StubRoutines::_arrayof_jbyte_fill = generate_fill(T_BYTE, true, "arrayof_jbyte_fill");
|
StubRoutines::_arrayof_jbyte_fill = generate_fill(T_BYTE, true, "arrayof_jbyte_fill");
|
||||||
StubRoutines::_arrayof_jshort_fill = generate_fill(T_SHORT, true, "arrayof_jshort_fill");
|
StubRoutines::_arrayof_jshort_fill = generate_fill(T_SHORT, true, "arrayof_jshort_fill");
|
||||||
StubRoutines::_arrayof_jint_fill = generate_fill(T_INT, true, "arrayof_jint_fill");
|
StubRoutines::_arrayof_jint_fill = generate_fill(T_INT, true, "arrayof_jint_fill");
|
||||||
|
|
||||||
|
if (UseBlockZeroing) {
|
||||||
|
StubRoutines::_zero_aligned_words = generate_zero_aligned_words("zero_aligned_words");
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void generate_initial() {
|
void generate_initial() {
|
||||||
|
@ -3374,7 +3374,7 @@ void TemplateTable::_new() {
|
|||||||
|
|
||||||
if(UseTLAB) {
|
if(UseTLAB) {
|
||||||
Register RoldTopValue = RallocatedObject;
|
Register RoldTopValue = RallocatedObject;
|
||||||
Register RtopAddr = G3_scratch, RtlabWasteLimitValue = G3_scratch;
|
Register RtlabWasteLimitValue = G3_scratch;
|
||||||
Register RnewTopValue = G1_scratch;
|
Register RnewTopValue = G1_scratch;
|
||||||
Register RendValue = Rscratch;
|
Register RendValue = Rscratch;
|
||||||
Register RfreeValue = RnewTopValue;
|
Register RfreeValue = RnewTopValue;
|
||||||
@ -3455,7 +3455,11 @@ void TemplateTable::_new() {
|
|||||||
__ delayed()->add(RallocatedObject, sizeof(oopDesc), G3_scratch);
|
__ delayed()->add(RallocatedObject, sizeof(oopDesc), G3_scratch);
|
||||||
|
|
||||||
// initialize remaining object fields
|
// initialize remaining object fields
|
||||||
{ Label loop;
|
if (UseBlockZeroing) {
|
||||||
|
// Use BIS for zeroing
|
||||||
|
__ bis_zeroing(G3_scratch, Roffset, G1_scratch, initialize_header);
|
||||||
|
} else {
|
||||||
|
Label loop;
|
||||||
__ subcc(Roffset, wordSize, Roffset);
|
__ subcc(Roffset, wordSize, Roffset);
|
||||||
__ bind(loop);
|
__ bind(loop);
|
||||||
//__ subcc(Roffset, wordSize, Roffset); // executed above loop or in delay slot
|
//__ subcc(Roffset, wordSize, Roffset); // executed above loop or in delay slot
|
||||||
|
@ -170,6 +170,16 @@ void VM_Version::initialize() {
|
|||||||
FLAG_SET_DEFAULT(UseCBCond, false);
|
FLAG_SET_DEFAULT(UseCBCond, false);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
assert(BlockZeroingLowLimit > 0, "invalid value");
|
||||||
|
if (has_block_zeroing()) {
|
||||||
|
if (FLAG_IS_DEFAULT(UseBlockZeroing)) {
|
||||||
|
FLAG_SET_DEFAULT(UseBlockZeroing, true);
|
||||||
|
}
|
||||||
|
} else if (UseBlockZeroing) {
|
||||||
|
warning("BIS zeroing instructions are not available on this CPU");
|
||||||
|
FLAG_SET_DEFAULT(UseBlockZeroing, false);
|
||||||
|
}
|
||||||
|
|
||||||
#ifdef COMPILER2
|
#ifdef COMPILER2
|
||||||
// T4 and newer Sparc cpus have fast RDPC.
|
// T4 and newer Sparc cpus have fast RDPC.
|
||||||
if (has_fast_rdpc() && FLAG_IS_DEFAULT(UseRDPCForConstantTableBase)) {
|
if (has_fast_rdpc() && FLAG_IS_DEFAULT(UseRDPCForConstantTableBase)) {
|
||||||
|
@ -135,8 +135,8 @@ public:
|
|||||||
// T4 and newer Sparc have fast RDPC instruction.
|
// T4 and newer Sparc have fast RDPC instruction.
|
||||||
static bool has_fast_rdpc() { return is_T4(); }
|
static bool has_fast_rdpc() { return is_T4(); }
|
||||||
|
|
||||||
// T4 and newer Sparc have Most-Recently-Used (MRU) BIS.
|
// On T4 and newer Sparc BIS to the beginning of cache line always zeros it.
|
||||||
static bool has_mru_blk_init() { return has_blk_init() && is_T4(); }
|
static bool has_block_zeroing() { return has_blk_init() && is_T4(); }
|
||||||
|
|
||||||
static const char* cpu_features() { return _features_str; }
|
static const char* cpu_features() { return _features_str; }
|
||||||
|
|
||||||
|
@ -157,8 +157,14 @@ HeapWord* CollectedHeap::allocate_from_tlab_slow(Thread* thread, size_t size) {
|
|||||||
// ..and clear it.
|
// ..and clear it.
|
||||||
Copy::zero_to_words(obj, new_tlab_size);
|
Copy::zero_to_words(obj, new_tlab_size);
|
||||||
} else {
|
} else {
|
||||||
// ...and clear just the allocated object.
|
// ...and zap just allocated object.
|
||||||
Copy::zero_to_words(obj, size);
|
#ifdef ASSERT
|
||||||
|
// Skip mangling the space corresponding to the object header to
|
||||||
|
// ensure that the returned space is not considered parsable by
|
||||||
|
// any concurrent GC thread.
|
||||||
|
size_t hdr_size = oopDesc::header_size();
|
||||||
|
Copy::fill_to_words(obj + hdr_size, new_tlab_size - hdr_size, badHeapWordVal);
|
||||||
|
#endif // ASSERT
|
||||||
}
|
}
|
||||||
thread->tlab().fill(obj, obj + size, new_tlab_size);
|
thread->tlab().fill(obj, obj + size, new_tlab_size);
|
||||||
return obj;
|
return obj;
|
||||||
|
@ -287,7 +287,10 @@ oop CollectedHeap::permanent_obj_allocate_no_klass_install(KlassHandle klass,
|
|||||||
assert(size >= 0, "int won't convert to size_t");
|
assert(size >= 0, "int won't convert to size_t");
|
||||||
HeapWord* obj = common_permanent_mem_allocate_init(size, CHECK_NULL);
|
HeapWord* obj = common_permanent_mem_allocate_init(size, CHECK_NULL);
|
||||||
post_allocation_setup_no_klass_install(klass, obj, size);
|
post_allocation_setup_no_klass_install(klass, obj, size);
|
||||||
NOT_PRODUCT(Universe::heap()->check_for_bad_heap_word_value(obj, size));
|
#ifndef PRODUCT
|
||||||
|
const size_t hs = oopDesc::header_size();
|
||||||
|
Universe::heap()->check_for_bad_heap_word_value(obj+hs, size-hs);
|
||||||
|
#endif
|
||||||
return (oop)obj;
|
return (oop)obj;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -63,8 +63,10 @@ constantPoolCacheOop constantPoolCacheKlass::allocate(int length,
|
|||||||
// CollectedHeap::permanent_obj_allocate(klass, size, CHECK_NULL);
|
// CollectedHeap::permanent_obj_allocate(klass, size, CHECK_NULL);
|
||||||
|
|
||||||
oop obj = CollectedHeap::permanent_obj_allocate_no_klass_install(klass, size, CHECK_NULL);
|
oop obj = CollectedHeap::permanent_obj_allocate_no_klass_install(klass, size, CHECK_NULL);
|
||||||
NOT_PRODUCT(Universe::heap()->check_for_bad_heap_word_value((HeapWord*) obj,
|
#ifndef PRODUCT
|
||||||
size));
|
const size_t hs = oopDesc::header_size();
|
||||||
|
Universe::heap()->check_for_bad_heap_word_value(((HeapWord*) obj)+hs, size-hs);
|
||||||
|
#endif
|
||||||
constantPoolCacheOop cache = (constantPoolCacheOop) obj;
|
constantPoolCacheOop cache = (constantPoolCacheOop) obj;
|
||||||
assert(!UseConcMarkSweepGC || obj->klass_or_null() == NULL,
|
assert(!UseConcMarkSweepGC || obj->klass_or_null() == NULL,
|
||||||
"klass should be NULL here when using CMS");
|
"klass should be NULL here when using CMS");
|
||||||
|
@ -1979,6 +1979,12 @@ class CommandLineFlags {
|
|||||||
product(bool, TLABStats, true, \
|
product(bool, TLABStats, true, \
|
||||||
"Print various TLAB related information") \
|
"Print various TLAB related information") \
|
||||||
\
|
\
|
||||||
|
product(bool, UseBlockZeroing, false, \
|
||||||
|
"Use special cpu instructions for block zeroing") \
|
||||||
|
\
|
||||||
|
product(intx, BlockZeroingLowLimit, 2048, \
|
||||||
|
"Minimum size in bytes when block zeroing will be used") \
|
||||||
|
\
|
||||||
product(bool, PrintRevisitStats, false, \
|
product(bool, PrintRevisitStats, false, \
|
||||||
"Print revisit (klass and MDO) stack related information") \
|
"Print revisit (klass and MDO) stack related information") \
|
||||||
\
|
\
|
||||||
|
@ -108,6 +108,7 @@ address StubRoutines::_arrayof_jlong_disjoint_arraycopy = CAST_FROM_FN_PTR(addr
|
|||||||
address StubRoutines::_arrayof_oop_disjoint_arraycopy = CAST_FROM_FN_PTR(address, StubRoutines::arrayof_oop_copy);
|
address StubRoutines::_arrayof_oop_disjoint_arraycopy = CAST_FROM_FN_PTR(address, StubRoutines::arrayof_oop_copy);
|
||||||
address StubRoutines::_arrayof_oop_disjoint_arraycopy_uninit = CAST_FROM_FN_PTR(address, StubRoutines::arrayof_oop_copy_uninit);
|
address StubRoutines::_arrayof_oop_disjoint_arraycopy_uninit = CAST_FROM_FN_PTR(address, StubRoutines::arrayof_oop_copy_uninit);
|
||||||
|
|
||||||
|
address StubRoutines::_zero_aligned_words = CAST_FROM_FN_PTR(address, Copy::zero_to_words);
|
||||||
|
|
||||||
address StubRoutines::_checkcast_arraycopy = NULL;
|
address StubRoutines::_checkcast_arraycopy = NULL;
|
||||||
address StubRoutines::_checkcast_arraycopy_uninit = NULL;
|
address StubRoutines::_checkcast_arraycopy_uninit = NULL;
|
||||||
|
@ -199,6 +199,9 @@ class StubRoutines: AllStatic {
|
|||||||
static address _arrayof_jshort_fill;
|
static address _arrayof_jshort_fill;
|
||||||
static address _arrayof_jint_fill;
|
static address _arrayof_jint_fill;
|
||||||
|
|
||||||
|
// zero heap space aligned to jlong (8 bytes)
|
||||||
|
static address _zero_aligned_words;
|
||||||
|
|
||||||
// These are versions of the java.lang.Math methods which perform
|
// These are versions of the java.lang.Math methods which perform
|
||||||
// the same operations as the intrinsic version. They are used for
|
// the same operations as the intrinsic version. They are used for
|
||||||
// constant folding in the compiler to ensure equivalence. If the
|
// constant folding in the compiler to ensure equivalence. If the
|
||||||
@ -332,6 +335,7 @@ class StubRoutines: AllStatic {
|
|||||||
|
|
||||||
static address select_fill_function(BasicType t, bool aligned, const char* &name);
|
static address select_fill_function(BasicType t, bool aligned, const char* &name);
|
||||||
|
|
||||||
|
static address zero_aligned_words() { return _zero_aligned_words; }
|
||||||
|
|
||||||
static double intrinsic_log(double d) {
|
static double intrinsic_log(double d) {
|
||||||
assert(_intrinsic_log != NULL, "must be defined");
|
assert(_intrinsic_log != NULL, "must be defined");
|
||||||
|
Loading…
Reference in New Issue
Block a user