8270947: AArch64: C1: use zero_words to initialize all objects
Reviewed-by: ngasson, adinn
This commit is contained in:
parent
cd7e30ef84
commit
6c68ce2d39
@ -14983,12 +14983,12 @@ instruct clearArray_reg_reg(iRegL_R11 cnt, iRegP_R10 base, Universe dummy, rFlag
|
||||
ins_pipe(pipe_class_memory);
|
||||
%}
|
||||
|
||||
instruct clearArray_imm_reg(immL cnt, iRegP_R10 base, Universe dummy, rFlagsReg cr)
|
||||
instruct clearArray_imm_reg(immL cnt, iRegP_R10 base, iRegL_R11 temp, Universe dummy, rFlagsReg cr)
|
||||
%{
|
||||
predicate((uint64_t)n->in(2)->get_long()
|
||||
< (uint64_t)(BlockZeroingLowLimit >> LogBytesPerWord));
|
||||
match(Set dummy (ClearArray cnt base));
|
||||
effect(USE_KILL base);
|
||||
effect(TEMP temp, USE_KILL base, KILL cr);
|
||||
|
||||
ins_cost(4 * INSN_COST);
|
||||
format %{ "ClearArray $cnt, $base" %}
|
||||
|
@ -1127,8 +1127,8 @@ void LIRGenerator::do_NewInstance(NewInstance* x) {
|
||||
CodeEmitInfo* info = state_for(x, x->state());
|
||||
LIR_Opr reg = result_register_for(x->type());
|
||||
new_instance(reg, x->klass(), x->is_unresolved(),
|
||||
FrameMap::r2_oop_opr,
|
||||
FrameMap::r5_oop_opr,
|
||||
FrameMap::r10_oop_opr,
|
||||
FrameMap::r11_oop_opr,
|
||||
FrameMap::r4_oop_opr,
|
||||
LIR_OprFact::illegalOpr,
|
||||
FrameMap::r3_metadata_opr, info);
|
||||
@ -1143,8 +1143,8 @@ void LIRGenerator::do_NewTypeArray(NewTypeArray* x) {
|
||||
length.load_item_force(FrameMap::r19_opr);
|
||||
|
||||
LIR_Opr reg = result_register_for(x->type());
|
||||
LIR_Opr tmp1 = FrameMap::r2_oop_opr;
|
||||
LIR_Opr tmp2 = FrameMap::r4_oop_opr;
|
||||
LIR_Opr tmp1 = FrameMap::r10_oop_opr;
|
||||
LIR_Opr tmp2 = FrameMap::r11_oop_opr;
|
||||
LIR_Opr tmp3 = FrameMap::r5_oop_opr;
|
||||
LIR_Opr tmp4 = reg;
|
||||
LIR_Opr klass_reg = FrameMap::r3_metadata_opr;
|
||||
@ -1172,8 +1172,8 @@ void LIRGenerator::do_NewObjectArray(NewObjectArray* x) {
|
||||
CodeEmitInfo* info = state_for(x, x->state());
|
||||
|
||||
LIR_Opr reg = result_register_for(x->type());
|
||||
LIR_Opr tmp1 = FrameMap::r2_oop_opr;
|
||||
LIR_Opr tmp2 = FrameMap::r4_oop_opr;
|
||||
LIR_Opr tmp1 = FrameMap::r10_oop_opr;
|
||||
LIR_Opr tmp2 = FrameMap::r11_oop_opr;
|
||||
LIR_Opr tmp3 = FrameMap::r5_oop_opr;
|
||||
LIR_Opr tmp4 = reg;
|
||||
LIR_Opr klass_reg = FrameMap::r3_metadata_opr;
|
||||
|
@ -1,6 +1,6 @@
|
||||
/*
|
||||
* Copyright (c) 1999, 2021, Oracle and/or its affiliates. All rights reserved.
|
||||
* Copyright (c) 2014, Red Hat Inc. All rights reserved.
|
||||
* Copyright (c) 2014, 2021, Red Hat Inc. All rights reserved.
|
||||
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
||||
*
|
||||
* This code is free software; you can redistribute it and/or modify it
|
||||
@ -180,20 +180,24 @@ void C1_MacroAssembler::initialize_header(Register obj, Register klass, Register
|
||||
}
|
||||
|
||||
// preserves obj, destroys len_in_bytes
|
||||
void C1_MacroAssembler::initialize_body(Register obj, Register len_in_bytes, int hdr_size_in_bytes, Register t1) {
|
||||
//
|
||||
// Scratch registers: t1 = r10, t2 = r11
|
||||
//
|
||||
void C1_MacroAssembler::initialize_body(Register obj, Register len_in_bytes, int hdr_size_in_bytes, Register t1, Register t2) {
|
||||
assert(hdr_size_in_bytes >= 0, "header size must be positive or 0");
|
||||
assert(t1 == r10 && t2 == r11, "must be");
|
||||
|
||||
Label done;
|
||||
|
||||
// len_in_bytes is positive and ptr sized
|
||||
subs(len_in_bytes, len_in_bytes, hdr_size_in_bytes);
|
||||
br(Assembler::EQ, done);
|
||||
|
||||
// Preserve obj
|
||||
if (hdr_size_in_bytes)
|
||||
add(obj, obj, hdr_size_in_bytes);
|
||||
zero_memory(obj, len_in_bytes, t1);
|
||||
if (hdr_size_in_bytes)
|
||||
sub(obj, obj, hdr_size_in_bytes);
|
||||
// zero_words() takes ptr in r10 and count in words in r11
|
||||
mov(rscratch1, len_in_bytes);
|
||||
lea(t1, Address(obj, hdr_size_in_bytes));
|
||||
lsr(t2, rscratch1, LogBytesPerWord);
|
||||
zero_words(t1, t2);
|
||||
|
||||
bind(done);
|
||||
}
|
||||
@ -208,6 +212,7 @@ void C1_MacroAssembler::allocate_object(Register obj, Register t1, Register t2,
|
||||
initialize_object(obj, klass, noreg, object_size * HeapWordSize, t1, t2, UseTLAB);
|
||||
}
|
||||
|
||||
// Scratch registers: t1 = r10, t2 = r11
|
||||
void C1_MacroAssembler::initialize_object(Register obj, Register klass, Register var_size_in_bytes, int con_size_in_bytes, Register t1, Register t2, bool is_tlab_allocated) {
|
||||
assert((con_size_in_bytes & MinObjAlignmentInBytesMask) == 0,
|
||||
"con_size_in_bytes is not multiple of alignment");
|
||||
@ -218,45 +223,13 @@ void C1_MacroAssembler::initialize_object(Register obj, Register klass, Register
|
||||
if (!(UseTLAB && ZeroTLAB && is_tlab_allocated)) {
|
||||
// clear rest of allocated space
|
||||
const Register index = t2;
|
||||
const int threshold = 16 * BytesPerWord; // approximate break even point for code size (see comments below)
|
||||
if (var_size_in_bytes != noreg) {
|
||||
mov(index, var_size_in_bytes);
|
||||
initialize_body(obj, index, hdr_size_in_bytes, t1);
|
||||
} else if (con_size_in_bytes <= threshold) {
|
||||
// use explicit null stores
|
||||
int i = hdr_size_in_bytes;
|
||||
if (i < con_size_in_bytes && (con_size_in_bytes % (2 * BytesPerWord))) {
|
||||
str(zr, Address(obj, i));
|
||||
i += BytesPerWord;
|
||||
}
|
||||
for (; i < con_size_in_bytes; i += 2 * BytesPerWord)
|
||||
stp(zr, zr, Address(obj, i));
|
||||
initialize_body(obj, index, hdr_size_in_bytes, t1, t2);
|
||||
} else if (con_size_in_bytes > hdr_size_in_bytes) {
|
||||
block_comment("zero memory");
|
||||
// use loop to null out the fields
|
||||
|
||||
int words = (con_size_in_bytes - hdr_size_in_bytes) / BytesPerWord;
|
||||
mov(index, words / 8);
|
||||
|
||||
const int unroll = 8; // Number of str(zr) instructions we'll unroll
|
||||
int remainder = words % unroll;
|
||||
lea(rscratch1, Address(obj, hdr_size_in_bytes + remainder * BytesPerWord));
|
||||
|
||||
Label entry_point, loop;
|
||||
b(entry_point);
|
||||
|
||||
bind(loop);
|
||||
sub(index, index, 1);
|
||||
for (int i = -unroll; i < 0; i++) {
|
||||
if (-i == remainder)
|
||||
bind(entry_point);
|
||||
str(zr, Address(rscratch1, i * wordSize));
|
||||
}
|
||||
if (remainder == 0)
|
||||
bind(entry_point);
|
||||
add(rscratch1, rscratch1, unroll * wordSize);
|
||||
cbnz(index, loop);
|
||||
|
||||
con_size_in_bytes -= hdr_size_in_bytes;
|
||||
lea(t1, Address(obj, hdr_size_in_bytes));
|
||||
zero_words(t1, con_size_in_bytes / BytesPerWord);
|
||||
}
|
||||
}
|
||||
|
||||
@ -291,8 +264,7 @@ void C1_MacroAssembler::allocate_array(Register obj, Register len, Register t1,
|
||||
initialize_header(obj, klass, len, t1, t2);
|
||||
|
||||
// clear rest of allocated space
|
||||
const Register len_zero = len;
|
||||
initialize_body(obj, arr_size, header_size * BytesPerWord, len_zero);
|
||||
initialize_body(obj, arr_size, header_size * BytesPerWord, t1, t2);
|
||||
|
||||
membar(StoreStore);
|
||||
|
||||
|
@ -1,6 +1,6 @@
|
||||
/*
|
||||
* Copyright (c) 1999, 2019, Oracle and/or its affiliates. All rights reserved.
|
||||
* Copyright (c) 2014, 2015, Red Hat Inc. All rights reserved.
|
||||
* Copyright (c) 1999, 2021, Oracle and/or its affiliates. All rights reserved.
|
||||
* Copyright (c) 2014, 2021, Red Hat Inc. All rights reserved.
|
||||
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
||||
*
|
||||
* This code is free software; you can redistribute it and/or modify it
|
||||
@ -48,7 +48,7 @@ using MacroAssembler::null_check;
|
||||
);
|
||||
|
||||
void initialize_header(Register obj, Register klass, Register len, Register t1, Register t2);
|
||||
void initialize_body(Register obj, Register len_in_bytes, int hdr_size_in_bytes, Register t1);
|
||||
void initialize_body(Register obj, Register len_in_bytes, int hdr_size_in_bytes, Register t1, Register t2);
|
||||
|
||||
void float_cmp(bool is_float, int unordered_result,
|
||||
FloatRegister f0, FloatRegister f1,
|
||||
|
@ -1,6 +1,6 @@
|
||||
/*
|
||||
* Copyright (c) 1999, 2021, Oracle and/or its affiliates. All rights reserved.
|
||||
* Copyright (c) 2014, Red Hat Inc. All rights reserved.
|
||||
* Copyright (c) 2014, 2021, Red Hat Inc. All rights reserved.
|
||||
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
||||
*
|
||||
* This code is free software; you can redistribute it and/or modify it
|
||||
@ -656,9 +656,9 @@ OopMapSet* Runtime1::generate_code_for(StubID id, StubAssembler* sasm) {
|
||||
if ((id == fast_new_instance_id || id == fast_new_instance_init_check_id) &&
|
||||
!UseTLAB && Universe::heap()->supports_inline_contig_alloc()) {
|
||||
Label slow_path;
|
||||
Register obj_size = r2;
|
||||
Register t1 = r19;
|
||||
Register t2 = r4;
|
||||
Register obj_size = r19;
|
||||
Register t1 = r10;
|
||||
Register t2 = r11;
|
||||
assert_different_registers(klass, obj, obj_size, t1, t2);
|
||||
|
||||
__ stp(r19, zr, Address(__ pre(sp, -2 * wordSize)));
|
||||
@ -769,9 +769,9 @@ OopMapSet* Runtime1::generate_code_for(StubID id, StubAssembler* sasm) {
|
||||
// allocations.
|
||||
// Otherwise, just go to the slow path.
|
||||
if (!UseTLAB && Universe::heap()->supports_inline_contig_alloc()) {
|
||||
Register arr_size = r4;
|
||||
Register t1 = r2;
|
||||
Register t2 = r5;
|
||||
Register arr_size = r5;
|
||||
Register t1 = r10;
|
||||
Register t2 = r11;
|
||||
Label slow_path;
|
||||
assert_different_registers(length, klass, obj, arr_size, t1, t2);
|
||||
|
||||
@ -801,7 +801,7 @@ OopMapSet* Runtime1::generate_code_for(StubID id, StubAssembler* sasm) {
|
||||
__ andr(t1, t1, Klass::_lh_header_size_mask);
|
||||
__ sub(arr_size, arr_size, t1); // body length
|
||||
__ add(t1, t1, obj); // body start
|
||||
__ initialize_body(t1, arr_size, 0, t2);
|
||||
__ initialize_body(t1, arr_size, 0, t1, t2);
|
||||
__ membar(Assembler::StoreStore);
|
||||
__ verify_oop(obj);
|
||||
|
||||
|
@ -4088,68 +4088,6 @@ void MacroAssembler::eden_allocate(Register obj,
|
||||
bs->eden_allocate(this, obj, var_size_in_bytes, con_size_in_bytes, t1, slow_case);
|
||||
}
|
||||
|
||||
// Zero words; len is in bytes
|
||||
// Destroys all registers except addr
|
||||
// len must be a nonzero multiple of wordSize
|
||||
void MacroAssembler::zero_memory(Register addr, Register len, Register t1) {
|
||||
assert_different_registers(addr, len, t1, rscratch1, rscratch2);
|
||||
|
||||
#ifdef ASSERT
|
||||
{ Label L;
|
||||
tst(len, BytesPerWord - 1);
|
||||
br(Assembler::EQ, L);
|
||||
stop("len is not a multiple of BytesPerWord");
|
||||
bind(L);
|
||||
}
|
||||
#endif
|
||||
|
||||
#ifndef PRODUCT
|
||||
block_comment("zero memory");
|
||||
#endif
|
||||
|
||||
Label loop;
|
||||
Label entry;
|
||||
|
||||
// Algorithm:
|
||||
//
|
||||
// scratch1 = cnt & 7;
|
||||
// cnt -= scratch1;
|
||||
// p += scratch1;
|
||||
// switch (scratch1) {
|
||||
// do {
|
||||
// cnt -= 8;
|
||||
// p[-8] = 0;
|
||||
// case 7:
|
||||
// p[-7] = 0;
|
||||
// case 6:
|
||||
// p[-6] = 0;
|
||||
// // ...
|
||||
// case 1:
|
||||
// p[-1] = 0;
|
||||
// case 0:
|
||||
// p += 8;
|
||||
// } while (cnt);
|
||||
// }
|
||||
|
||||
const int unroll = 8; // Number of str(zr) instructions we'll unroll
|
||||
|
||||
lsr(len, len, LogBytesPerWord);
|
||||
andr(rscratch1, len, unroll - 1); // tmp1 = cnt % unroll
|
||||
sub(len, len, rscratch1); // cnt -= unroll
|
||||
// t1 always points to the end of the region we're about to zero
|
||||
add(t1, addr, rscratch1, Assembler::LSL, LogBytesPerWord);
|
||||
adr(rscratch2, entry);
|
||||
sub(rscratch2, rscratch2, rscratch1, Assembler::LSL, 2);
|
||||
br(rscratch2);
|
||||
bind(loop);
|
||||
sub(len, len, unroll);
|
||||
for (int i = -unroll; i < 0; i++)
|
||||
Assembler::str(zr, Address(t1, i * wordSize));
|
||||
bind(entry);
|
||||
add(t1, t1, unroll * wordSize);
|
||||
cbnz(len, loop);
|
||||
}
|
||||
|
||||
void MacroAssembler::verify_tlab() {
|
||||
#ifdef ASSERT
|
||||
if (UseTLAB && VerifyOops) {
|
||||
@ -4668,10 +4606,11 @@ void MacroAssembler::string_equals(Register a1, Register a2,
|
||||
// handle anything smaller than this ourselves in zero_words().
|
||||
const int MacroAssembler::zero_words_block_size = 8;
|
||||
|
||||
// zero_words() is used by C2 ClearArray patterns. It is as small as
|
||||
// possible, handling small word counts locally and delegating
|
||||
// anything larger to the zero_blocks stub. It is expanded many times
|
||||
// in compiled code, so it is important to keep it short.
|
||||
// zero_words() is used by C2 ClearArray patterns and by
|
||||
// C1_MacroAssembler. It is as small as possible, handling small word
|
||||
// counts locally and delegating anything larger to the zero_blocks
|
||||
// stub. It is expanded many times in compiled code, so it is
|
||||
// important to keep it short.
|
||||
|
||||
// ptr: Address of a buffer to be zeroed.
|
||||
// cnt: Count in HeapWords.
|
||||
@ -4680,32 +4619,46 @@ const int MacroAssembler::zero_words_block_size = 8;
|
||||
address MacroAssembler::zero_words(Register ptr, Register cnt)
|
||||
{
|
||||
assert(is_power_of_2(zero_words_block_size), "adjust this");
|
||||
assert(ptr == r10 && cnt == r11, "mismatch in register usage");
|
||||
|
||||
BLOCK_COMMENT("zero_words {");
|
||||
cmp(cnt, (u1)zero_words_block_size);
|
||||
assert(ptr == r10 && cnt == r11, "mismatch in register usage");
|
||||
RuntimeAddress zero_blocks = RuntimeAddress(StubRoutines::aarch64::zero_blocks());
|
||||
assert(zero_blocks.target() != NULL, "zero_blocks stub has not been generated");
|
||||
|
||||
subs(rscratch1, cnt, zero_words_block_size);
|
||||
Label around;
|
||||
br(LO, around);
|
||||
{
|
||||
RuntimeAddress zero_blocks = RuntimeAddress(StubRoutines::aarch64::zero_blocks());
|
||||
assert(zero_blocks.target() != NULL, "zero_blocks stub has not been generated");
|
||||
if (StubRoutines::aarch64::complete()) {
|
||||
// Make sure this is a C2 compilation. C1 allocates space only for
|
||||
// trampoline stubs generated by Call LIR ops, and in any case it
|
||||
// makes sense for a C1 compilation task to proceed as quickly as
|
||||
// possible.
|
||||
CompileTask* task;
|
||||
if (StubRoutines::aarch64::complete()
|
||||
&& Thread::current()->is_Compiler_thread()
|
||||
&& (task = ciEnv::current()->task())
|
||||
&& is_c2_compile(task->comp_level())) {
|
||||
address tpc = trampoline_call(zero_blocks);
|
||||
if (tpc == NULL) {
|
||||
DEBUG_ONLY(reset_labels(around));
|
||||
postcond(pc() == badAddress);
|
||||
assert(false, "failed to allocate space for trampoline");
|
||||
return NULL;
|
||||
}
|
||||
} else {
|
||||
bl(zero_blocks);
|
||||
far_call(zero_blocks);
|
||||
}
|
||||
}
|
||||
bind(around);
|
||||
|
||||
// We have a few words left to do. zero_blocks has adjusted r10 and r11
|
||||
// for us.
|
||||
for (int i = zero_words_block_size >> 1; i > 1; i >>= 1) {
|
||||
Label l;
|
||||
tbz(cnt, exact_log2(i), l);
|
||||
for (int j = 0; j < i; j += 2) {
|
||||
stp(zr, zr, post(ptr, 16));
|
||||
stp(zr, zr, post(ptr, 2 * BytesPerWord));
|
||||
}
|
||||
bind(l);
|
||||
}
|
||||
@ -4715,46 +4668,56 @@ address MacroAssembler::zero_words(Register ptr, Register cnt)
|
||||
str(zr, Address(ptr));
|
||||
bind(l);
|
||||
}
|
||||
|
||||
BLOCK_COMMENT("} zero_words");
|
||||
postcond(pc() != badAddress);
|
||||
return pc();
|
||||
}
|
||||
|
||||
// base: Address of a buffer to be zeroed, 8 bytes aligned.
|
||||
// cnt: Immediate count in HeapWords.
|
||||
#define SmallArraySize (18 * BytesPerLong)
|
||||
//
|
||||
// r10, r11, rscratch1, and rscratch2 are clobbered.
|
||||
void MacroAssembler::zero_words(Register base, uint64_t cnt)
|
||||
{
|
||||
BLOCK_COMMENT("zero_words {");
|
||||
int i = cnt & 1; // store any odd word to start
|
||||
if (i) str(zr, Address(base));
|
||||
|
||||
if (cnt <= SmallArraySize / BytesPerLong) {
|
||||
guarantee(zero_words_block_size < BlockZeroingLowLimit,
|
||||
"increase BlockZeroingLowLimit");
|
||||
if (cnt <= (uint64_t)BlockZeroingLowLimit / BytesPerWord) {
|
||||
#ifndef PRODUCT
|
||||
{
|
||||
char buf[64];
|
||||
snprintf(buf, sizeof buf, "zero_words (count = %" PRIu64 ") {", cnt);
|
||||
BLOCK_COMMENT(buf);
|
||||
}
|
||||
#endif
|
||||
if (cnt >= 16) {
|
||||
uint64_t loops = cnt/16;
|
||||
if (loops > 1) {
|
||||
mov(rscratch2, loops - 1);
|
||||
}
|
||||
{
|
||||
Label loop;
|
||||
bind(loop);
|
||||
for (int i = 0; i < 16; i += 2) {
|
||||
stp(zr, zr, Address(base, i * BytesPerWord));
|
||||
}
|
||||
add(base, base, 16 * BytesPerWord);
|
||||
if (loops > 1) {
|
||||
subs(rscratch2, rscratch2, 1);
|
||||
br(GE, loop);
|
||||
}
|
||||
}
|
||||
}
|
||||
cnt %= 16;
|
||||
int i = cnt & 1; // store any odd word to start
|
||||
if (i) str(zr, Address(base));
|
||||
for (; i < (int)cnt; i += 2) {
|
||||
stp(zr, zr, Address(base, i * wordSize));
|
||||
}
|
||||
BLOCK_COMMENT("} zero_words");
|
||||
} else {
|
||||
const int unroll = 4; // Number of stp(zr, zr) instructions we'll unroll
|
||||
int remainder = cnt % (2 * unroll);
|
||||
for (; i < remainder; i += 2) {
|
||||
stp(zr, zr, Address(base, i * wordSize));
|
||||
}
|
||||
Label loop;
|
||||
Register cnt_reg = rscratch1;
|
||||
Register loop_base = rscratch2;
|
||||
cnt = cnt - remainder;
|
||||
mov(cnt_reg, cnt);
|
||||
// adjust base and prebias by -2 * wordSize so we can pre-increment
|
||||
add(loop_base, base, (remainder - 2) * wordSize);
|
||||
bind(loop);
|
||||
sub(cnt_reg, cnt_reg, 2 * unroll);
|
||||
for (i = 1; i < unroll; i++) {
|
||||
stp(zr, zr, Address(loop_base, 2 * i * wordSize));
|
||||
}
|
||||
stp(zr, zr, Address(pre(loop_base, 2 * unroll * wordSize)));
|
||||
cbnz(cnt_reg, loop);
|
||||
mov(r10, base); mov(r11, cnt);
|
||||
zero_words(r10, r11);
|
||||
}
|
||||
BLOCK_COMMENT("} zero_words");
|
||||
}
|
||||
|
||||
// Zero blocks of memory by using DC ZVA.
|
||||
|
@ -888,7 +888,6 @@ public:
|
||||
Register t2, // temp register
|
||||
Label& slow_case // continuation point if fast allocation fails
|
||||
);
|
||||
void zero_memory(Register addr, Register len, Register t1);
|
||||
void verify_tlab();
|
||||
|
||||
// interface method calling
|
||||
|
28732
test/micro/org/openjdk/bench/vm/gc/RawAllocationRate.java
Normal file
28732
test/micro/org/openjdk/bench/vm/gc/RawAllocationRate.java
Normal file
File diff suppressed because it is too large
Load Diff
Loading…
x
Reference in New Issue
Block a user