This commit is contained in:
J. Duke 2017-07-05 16:43:09 +02:00
commit e30875c59b
323 changed files with 42344 additions and 4041 deletions

View File

@ -12,3 +12,4 @@ bb1ef4ee3d2c8cbf43a37d372325a7952be590b9 jdk7-b33
143c1abedb7d3095eff0f9ee5fec9bf48e3490fc jdk7-b35
4b4f5fea8d7d0743f0c30d91fcd9bf9d96e5d2ad jdk7-b36
744554f5a3290e11c71cd2ddb1aff49e431f9ed0 jdk7-b37
cc47a76899ed33a2c513cb688348244c9b5a1288 jdk7-b38

View File

@ -12,3 +12,4 @@ ef6af34d75a7b44e77083f1d4ee47631fa09d3b4 jdk7-b31
3867c4d14a5bfdbb37c97b4874ccb0ee5343111c jdk7-b35
0723891eb8d1c27e67c54163af0b4cea05a4e036 jdk7-b36
59d5848bdedebe91cc2753acce78911bcb4a66db jdk7-b37
08be802754b0296c91a7713b6d85a015dbcd5349 jdk7-b38

View File

@ -12,3 +12,4 @@ b727c32788a906c04839516ae7443a085185a300 jdk7-b32
5fa96a5a7e76da7c8dad12486293a0456c2c116c jdk7-b35
e91159f921a58af3698e6479ea1fc5818da66d09 jdk7-b36
9ee9cf798b59e7d51f8c0a686959f313867a55d6 jdk7-b37
d9bc824aa078573829bb66572af847e26e1bd12e jdk7-b38

View File

@ -1,4 +1,4 @@
#
#
# Copyright 2006-2008 Sun Microsystems, Inc. All Rights Reserved.
# DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
#
@ -19,7 +19,7 @@
# Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
# CA 95054 USA or visit www.sun.com if you need additional information or
# have any questions.
#
#
#
# This file format must remain compatible with both

View File

@ -35,7 +35,7 @@ HOTSPOT_VM_COPYRIGHT=Copyright 2008
HS_MAJOR_VER=14
HS_MINOR_VER=0
HS_BUILD_NUMBER=05
HS_BUILD_NUMBER=06
JDK_MAJOR_VER=1
JDK_MINOR_VER=7

View File

@ -64,6 +64,7 @@ Include_DBs/GC = $(VM)/includeDB_gc \
$(VM)/gc_implementation/includeDB_gc_parallelScavenge \
$(VM)/gc_implementation/includeDB_gc_concurrentMarkSweep \
$(VM)/gc_implementation/includeDB_gc_parNew \
$(VM)/gc_implementation/includeDB_gc_g1 \
$(VM)/gc_implementation/includeDB_gc_serial \
$(VM)/gc_implementation/includeDB_gc_shared

View File

@ -54,6 +54,7 @@ Include_DBs/GC = $(VM)/includeDB_gc \
$(VM)/gc_implementation/includeDB_gc_parallelScavenge \
$(VM)/gc_implementation/includeDB_gc_concurrentMarkSweep \
$(VM)/gc_implementation/includeDB_gc_parNew \
$(VM)/gc_implementation/includeDB_gc_g1 \
$(VM)/gc_implementation/includeDB_gc_serial \
$(VM)/gc_implementation/includeDB_gc_shared

View File

@ -50,7 +50,8 @@ IncludeDBs_gc= $(WorkSpace)/src/share/vm/includeDB_gc_parallel \
$(WorkSpace)/src/share/vm/gc_implementation/includeDB_gc_parallelScavenge \
$(WorkSpace)/src/share/vm/gc_implementation/includeDB_gc_shared \
$(WorkSpace)/src/share/vm/gc_implementation/includeDB_gc_parNew \
$(WorkSpace)/src/share/vm/gc_implementation/includeDB_gc_concurrentMarkSweep
$(WorkSpace)/src/share/vm/gc_implementation/includeDB_gc_concurrentMarkSweep \
$(WorkSpace)/src/share/vm/gc_implementation/includeDB_gc_g1
IncludeDBs_core=$(IncludeDBs_base) $(IncludeDBs_gc) \
$(WorkSpace)/src/share/vm/includeDB_features

View File

@ -64,6 +64,7 @@ MakeDepsIncludesPRIVATE=\
-relativeInclude src\share\vm\gc_implementation\shared \
-relativeInclude src\share\vm\gc_implementation\parNew \
-relativeInclude src\share\vm\gc_implementation\concurrentMarkSweep \
-relativeInclude src\share\vm\gc_implementation\g1 \
-relativeInclude src\share\vm\gc_interface \
-relativeInclude src\share\vm\asm \
-relativeInclude src\share\vm\memory \
@ -115,6 +116,7 @@ MakeDepsIDEOptions=\
-additionalFile includeDB_gc_parallel \
-additionalFile includeDB_gc_parallelScavenge \
-additionalFile includeDB_gc_concurrentMarkSweep \
-additionalFile includeDB_gc_g1 \
-additionalFile includeDB_gc_parNew \
-additionalFile includeDB_gc_shared \
-additionalFile includeDB_gc_serial \

View File

@ -117,6 +117,7 @@ CPP_INCLUDE_DIRS=\
/I "$(WorkSpace)\src\share\vm\gc_implementation\shared"\
/I "$(WorkSpace)\src\share\vm\gc_implementation\parNew"\
/I "$(WorkSpace)\src\share\vm\gc_implementation\concurrentMarkSweep"\
/I "$(WorkSpace)\src\share\vm\gc_implementation\g1"\
/I "$(WorkSpace)\src\share\vm\gc_interface"\
/I "$(WorkSpace)\src\share\vm\asm" \
/I "$(WorkSpace)\src\share\vm\memory" \
@ -146,6 +147,7 @@ VM_PATH=$(VM_PATH);$(WorkSpace)/src/share/vm/gc_implementation/parallelScavenge
VM_PATH=$(VM_PATH);$(WorkSpace)/src/share/vm/gc_implementation/shared
VM_PATH=$(VM_PATH);$(WorkSpace)/src/share/vm/gc_implementation/parNew
VM_PATH=$(VM_PATH);$(WorkSpace)/src/share/vm/gc_implementation/concurrentMarkSweep
VM_PATH=$(VM_PATH);$(WorkSpace)/src/share/vm/gc_implementation/g1
VM_PATH=$(VM_PATH);$(WorkSpace)/src/share/vm/gc_interface
VM_PATH=$(VM_PATH);$(WorkSpace)/src/share/vm/asm
VM_PATH=$(VM_PATH);$(WorkSpace)/src/share/vm/memory
@ -222,6 +224,9 @@ bytecodeInterpreterWithChecks.obj: ..\generated\jvmtifiles\bytecodeInterpreterWi
{$(WorkSpace)\src\share\vm\gc_implementation\concurrentMarkSweep}.cpp.obj::
$(CPP) $(CPP_FLAGS) $(CPP_USE_PCH) /c $<
{$(WorkSpace)\src\share\vm\gc_implementation\g1}.cpp.obj::
$(CPP) $(CPP_FLAGS) $(CPP_USE_PCH) /c $<
{$(WorkSpace)\src\share\vm\gc_interface}.cpp.obj::
$(CPP) $(CPP_FLAGS) $(CPP_USE_PCH) /c $<

View File

@ -130,6 +130,20 @@ int AbstractAssembler::code_fill_byte() {
return 0x00; // illegal instruction 0x00000000
}
Assembler::Condition Assembler::reg_cond_to_cc_cond(Assembler::RCondition in) {
switch (in) {
case rc_z: return equal;
case rc_lez: return lessEqual;
case rc_lz: return less;
case rc_nz: return notEqual;
case rc_gz: return greater;
case rc_gez: return greaterEqual;
default:
ShouldNotReachHere();
}
return equal;
}
// Generate a bunch 'o stuff (including v9's
#ifndef PRODUCT
void Assembler::test_v9() {
@ -1213,31 +1227,19 @@ void MacroAssembler::set_vm_result(Register oop_result) {
}
void MacroAssembler::store_check(Register tmp, Register obj) {
// Use two shifts to clear out those low order two bits! (Cannot opt. into 1.)
/* $$$ This stuff needs to go into one of the BarrierSet generator
functions. (The particular barrier sets will have to be friends of
MacroAssembler, I guess.) */
BarrierSet* bs = Universe::heap()->barrier_set();
assert(bs->kind() == BarrierSet::CardTableModRef, "Wrong barrier set kind");
CardTableModRefBS* ct = (CardTableModRefBS*)bs;
assert(sizeof(*ct->byte_map_base) == sizeof(jbyte), "adjust this code");
void MacroAssembler::card_table_write(jbyte* byte_map_base,
Register tmp, Register obj) {
#ifdef _LP64
srlx(obj, CardTableModRefBS::card_shift, obj);
#else
srl(obj, CardTableModRefBS::card_shift, obj);
#endif
assert( tmp != obj, "need separate temp reg");
Address rs(tmp, (address)ct->byte_map_base);
Address rs(tmp, (address)byte_map_base);
load_address(rs);
stb(G0, rs.base(), obj);
}
void MacroAssembler::store_check(Register tmp, Register obj, Register offset) {
store_check(tmp, obj);
}
// %%% Note: The following six instructions have been moved,
// unchanged, from assembler_sparc.inline.hpp.
// They will be refactored at a later date.
@ -1663,11 +1665,21 @@ void MacroAssembler::_verify_oop(Register reg, const char* msg, const char * fil
if (reg == G0) return; // always NULL, which is always an oop
char buffer[16];
char buffer[64];
#ifdef COMPILER1
if (CommentedAssembly) {
snprintf(buffer, sizeof(buffer), "verify_oop at %d", offset());
block_comment(buffer);
}
#endif
int len = strlen(file) + strlen(msg) + 1 + 4;
sprintf(buffer, "%d", line);
int len = strlen(file) + strlen(msg) + 1 + 4 + strlen(buffer);
len += strlen(buffer);
sprintf(buffer, " at offset %d ", offset());
len += strlen(buffer);
char * real_msg = new char[len];
sprintf(real_msg, "%s (%s:%d)", msg, file, line);
sprintf(real_msg, "%s%s(%s:%d)", msg, buffer, file, line);
// Call indirectly to solve generation ordering problem
Address a(O7, (address)StubRoutines::verify_oop_subroutine_entry_address());
@ -2059,6 +2071,27 @@ void MacroAssembler::br_notnull( Register s1, bool a, Predict p, Label& L ) {
#endif
}
void MacroAssembler::br_on_reg_cond( RCondition rc, bool a, Predict p,
Register s1, address d,
relocInfo::relocType rt ) {
if (VM_Version::v9_instructions_work()) {
bpr(rc, a, p, s1, d, rt);
} else {
tst(s1);
br(reg_cond_to_cc_cond(rc), a, p, d, rt);
}
}
void MacroAssembler::br_on_reg_cond( RCondition rc, bool a, Predict p,
Register s1, Label& L ) {
if (VM_Version::v9_instructions_work()) {
bpr(rc, a, p, s1, L);
} else {
tst(s1);
br(reg_cond_to_cc_cond(rc), a, p, L);
}
}
// instruction sequences factored across compiler & interpreter
@ -3241,68 +3274,74 @@ void MacroAssembler::eden_allocate(
assert(0 <= con_size_in_bytes && Assembler::is_simm13(con_size_in_bytes), "illegal object size");
assert((con_size_in_bytes & MinObjAlignmentInBytesMask) == 0, "object size is not multiple of alignment");
// get eden boundaries
// note: we need both top & top_addr!
const Register top_addr = t1;
const Register end = t2;
CollectedHeap* ch = Universe::heap();
set((intx)ch->top_addr(), top_addr);
intx delta = (intx)ch->end_addr() - (intx)ch->top_addr();
ld_ptr(top_addr, delta, end);
ld_ptr(top_addr, 0, obj);
// try to allocate
Label retry;
bind(retry);
#ifdef ASSERT
// make sure eden top is properly aligned
{
Label L;
btst(MinObjAlignmentInBytesMask, obj);
br(Assembler::zero, false, Assembler::pt, L);
if (CMSIncrementalMode || !Universe::heap()->supports_inline_contig_alloc()) {
// No allocation in the shared eden.
br(Assembler::always, false, Assembler::pt, slow_case);
delayed()->nop();
stop("eden top is not properly aligned");
bind(L);
}
#endif // ASSERT
const Register free = end;
sub(end, obj, free); // compute amount of free space
if (var_size_in_bytes->is_valid()) {
// size is unknown at compile time
cmp(free, var_size_in_bytes);
br(Assembler::lessUnsigned, false, Assembler::pn, slow_case); // if there is not enough space go the slow case
delayed()->add(obj, var_size_in_bytes, end);
} else {
// size is known at compile time
cmp(free, con_size_in_bytes);
br(Assembler::lessUnsigned, false, Assembler::pn, slow_case); // if there is not enough space go the slow case
delayed()->add(obj, con_size_in_bytes, end);
}
// Compare obj with the value at top_addr; if still equal, swap the value of
// end with the value at top_addr. If not equal, read the value at top_addr
// into end.
casx_under_lock(top_addr, obj, end, (address)StubRoutines::Sparc::atomic_memory_operation_lock_addr());
// if someone beat us on the allocation, try again, otherwise continue
cmp(obj, end);
brx(Assembler::notEqual, false, Assembler::pn, retry);
delayed()->mov(end, obj); // nop if successfull since obj == end
// get eden boundaries
// note: we need both top & top_addr!
const Register top_addr = t1;
const Register end = t2;
CollectedHeap* ch = Universe::heap();
set((intx)ch->top_addr(), top_addr);
intx delta = (intx)ch->end_addr() - (intx)ch->top_addr();
ld_ptr(top_addr, delta, end);
ld_ptr(top_addr, 0, obj);
// try to allocate
Label retry;
bind(retry);
#ifdef ASSERT
// make sure eden top is properly aligned
{
Label L;
btst(MinObjAlignmentInBytesMask, obj);
br(Assembler::zero, false, Assembler::pt, L);
delayed()->nop();
stop("eden top is not properly aligned");
bind(L);
}
#endif // ASSERT
const Register free = end;
sub(end, obj, free); // compute amount of free space
if (var_size_in_bytes->is_valid()) {
// size is unknown at compile time
cmp(free, var_size_in_bytes);
br(Assembler::lessUnsigned, false, Assembler::pn, slow_case); // if there is not enough space go the slow case
delayed()->add(obj, var_size_in_bytes, end);
} else {
// size is known at compile time
cmp(free, con_size_in_bytes);
br(Assembler::lessUnsigned, false, Assembler::pn, slow_case); // if there is not enough space go the slow case
delayed()->add(obj, con_size_in_bytes, end);
}
// Compare obj with the value at top_addr; if still equal, swap the value of
// end with the value at top_addr. If not equal, read the value at top_addr
// into end.
casx_under_lock(top_addr, obj, end, (address)StubRoutines::Sparc::atomic_memory_operation_lock_addr());
// if someone beat us on the allocation, try again, otherwise continue
cmp(obj, end);
brx(Assembler::notEqual, false, Assembler::pn, retry);
delayed()->mov(end, obj); // nop if successfull since obj == end
#ifdef ASSERT
// make sure eden top is properly aligned
{
Label L;
const Register top_addr = t1;
// make sure eden top is properly aligned
{
Label L;
const Register top_addr = t1;
set((intx)ch->top_addr(), top_addr);
ld_ptr(top_addr, 0, top_addr);
btst(MinObjAlignmentInBytesMask, top_addr);
br(Assembler::zero, false, Assembler::pt, L);
delayed()->nop();
stop("eden top is not properly aligned");
bind(L);
}
set((intx)ch->top_addr(), top_addr);
ld_ptr(top_addr, 0, top_addr);
btst(MinObjAlignmentInBytesMask, top_addr);
br(Assembler::zero, false, Assembler::pt, L);
delayed()->nop();
stop("eden top is not properly aligned");
bind(L);
}
#endif // ASSERT
}
}
@ -3554,6 +3593,468 @@ void MacroAssembler::bang_stack_size(Register Rsize, Register Rtsp,
}
}
///////////////////////////////////////////////////////////////////////////////////
#ifndef SERIALGC
static uint num_stores = 0;
static uint num_null_pre_stores = 0;
static void count_null_pre_vals(void* pre_val) {
num_stores++;
if (pre_val == NULL) num_null_pre_stores++;
if ((num_stores % 1000000) == 0) {
tty->print_cr(UINT32_FORMAT " stores, " UINT32_FORMAT " (%5.2f%%) with null pre-vals.",
num_stores, num_null_pre_stores,
100.0*(float)num_null_pre_stores/(float)num_stores);
}
}
static address satb_log_enqueue_with_frame = 0;
static u_char* satb_log_enqueue_with_frame_end = 0;
static address satb_log_enqueue_frameless = 0;
static u_char* satb_log_enqueue_frameless_end = 0;
static int EnqueueCodeSize = 128 DEBUG_ONLY( + 256); // Instructions?
// The calls to this don't work. We'd need to do a fair amount of work to
// make it work.
static void check_index(int ind) {
assert(0 <= ind && ind <= 64*K && ((ind % oopSize) == 0),
"Invariants.")
}
static void generate_satb_log_enqueue(bool with_frame) {
BufferBlob* bb = BufferBlob::create("enqueue_with_frame", EnqueueCodeSize);
CodeBuffer buf(bb->instructions_begin(), bb->instructions_size());
MacroAssembler masm(&buf);
address start = masm.pc();
Register pre_val;
Label refill, restart;
if (with_frame) {
masm.save_frame(0);
pre_val = I0; // Was O0 before the save.
} else {
pre_val = O0;
}
int satb_q_index_byte_offset =
in_bytes(JavaThread::satb_mark_queue_offset() +
PtrQueue::byte_offset_of_index());
int satb_q_buf_byte_offset =
in_bytes(JavaThread::satb_mark_queue_offset() +
PtrQueue::byte_offset_of_buf());
assert(in_bytes(PtrQueue::byte_width_of_index()) == sizeof(intptr_t) &&
in_bytes(PtrQueue::byte_width_of_buf()) == sizeof(intptr_t),
"check sizes in assembly below");
masm.bind(restart);
masm.ld_ptr(G2_thread, satb_q_index_byte_offset, L0);
masm.br_on_reg_cond(Assembler::rc_z, /*annul*/false, Assembler::pn, L0, refill);
// If the branch is taken, no harm in executing this in the delay slot.
masm.delayed()->ld_ptr(G2_thread, satb_q_buf_byte_offset, L1);
masm.sub(L0, oopSize, L0);
masm.st_ptr(pre_val, L1, L0); // [_buf + index] := I0
if (!with_frame) {
// Use return-from-leaf
masm.retl();
masm.delayed()->st_ptr(L0, G2_thread, satb_q_index_byte_offset);
} else {
// Not delayed.
masm.st_ptr(L0, G2_thread, satb_q_index_byte_offset);
}
if (with_frame) {
masm.ret();
masm.delayed()->restore();
}
masm.bind(refill);
address handle_zero =
CAST_FROM_FN_PTR(address,
&SATBMarkQueueSet::handle_zero_index_for_thread);
// This should be rare enough that we can afford to save all the
// scratch registers that the calling context might be using.
masm.mov(G1_scratch, L0);
masm.mov(G3_scratch, L1);
masm.mov(G4, L2);
// We need the value of O0 above (for the write into the buffer), so we
// save and restore it.
masm.mov(O0, L3);
// Since the call will overwrite O7, we save and restore that, as well.
masm.mov(O7, L4);
masm.call_VM_leaf(L5, handle_zero, G2_thread);
masm.mov(L0, G1_scratch);
masm.mov(L1, G3_scratch);
masm.mov(L2, G4);
masm.mov(L3, O0);
masm.br(Assembler::always, /*annul*/false, Assembler::pt, restart);
masm.delayed()->mov(L4, O7);
if (with_frame) {
satb_log_enqueue_with_frame = start;
satb_log_enqueue_with_frame_end = masm.pc();
} else {
satb_log_enqueue_frameless = start;
satb_log_enqueue_frameless_end = masm.pc();
}
}
static inline void generate_satb_log_enqueue_if_necessary(bool with_frame) {
if (with_frame) {
if (satb_log_enqueue_with_frame == 0) {
generate_satb_log_enqueue(with_frame);
assert(satb_log_enqueue_with_frame != 0, "postcondition.");
if (G1SATBPrintStubs) {
tty->print_cr("Generated with-frame satb enqueue:");
Disassembler::decode((u_char*)satb_log_enqueue_with_frame,
satb_log_enqueue_with_frame_end,
tty);
}
}
} else {
if (satb_log_enqueue_frameless == 0) {
generate_satb_log_enqueue(with_frame);
assert(satb_log_enqueue_frameless != 0, "postcondition.");
if (G1SATBPrintStubs) {
tty->print_cr("Generated frameless satb enqueue:");
Disassembler::decode((u_char*)satb_log_enqueue_frameless,
satb_log_enqueue_frameless_end,
tty);
}
}
}
}
void MacroAssembler::g1_write_barrier_pre(Register obj, Register index, int offset, Register tmp, bool preserve_o_regs) {
assert(offset == 0 || index == noreg, "choose one");
if (G1DisablePreBarrier) return;
// satb_log_barrier(tmp, obj, offset, preserve_o_regs);
Label filtered;
// satb_log_barrier_work0(tmp, filtered);
if (in_bytes(PtrQueue::byte_width_of_active()) == 4) {
ld(G2,
in_bytes(JavaThread::satb_mark_queue_offset() +
PtrQueue::byte_offset_of_active()),
tmp);
} else {
guarantee(in_bytes(PtrQueue::byte_width_of_active()) == 1,
"Assumption");
ldsb(G2,
in_bytes(JavaThread::satb_mark_queue_offset() +
PtrQueue::byte_offset_of_active()),
tmp);
}
// Check on whether to annul.
br_on_reg_cond(rc_z, /*annul*/false, Assembler::pt, tmp, filtered);
delayed() -> nop();
// satb_log_barrier_work1(tmp, offset);
if (index == noreg) {
if (Assembler::is_simm13(offset)) {
ld_ptr(obj, offset, tmp);
} else {
set(offset, tmp);
ld_ptr(obj, tmp, tmp);
}
} else {
ld_ptr(obj, index, tmp);
}
// satb_log_barrier_work2(obj, tmp, offset);
// satb_log_barrier_work3(tmp, filtered, preserve_o_regs);
const Register pre_val = tmp;
if (G1SATBBarrierPrintNullPreVals) {
save_frame(0);
mov(pre_val, O0);
// Save G-regs that target may use.
mov(G1, L1);
mov(G2, L2);
mov(G3, L3);
mov(G4, L4);
mov(G5, L5);
call(CAST_FROM_FN_PTR(address, &count_null_pre_vals));
delayed()->nop();
// Restore G-regs that target may have used.
mov(L1, G1);
mov(L2, G2);
mov(L3, G3);
mov(L4, G4);
mov(L5, G5);
restore(G0, G0, G0);
}
// Check on whether to annul.
br_on_reg_cond(rc_z, /*annul*/false, Assembler::pt, pre_val, filtered);
delayed() -> nop();
// OK, it's not filtered, so we'll need to call enqueue. In the normal
// case, pre_val will be a scratch G-reg, but there's some cases in which
// it's an O-reg. In the first case, do a normal call. In the latter,
// do a save here and call the frameless version.
guarantee(pre_val->is_global() || pre_val->is_out(),
"Or we need to think harder.");
if (pre_val->is_global() && !preserve_o_regs) {
generate_satb_log_enqueue_if_necessary(true); // with frame.
call(satb_log_enqueue_with_frame);
delayed()->mov(pre_val, O0);
} else {
generate_satb_log_enqueue_if_necessary(false); // with frameless.
save_frame(0);
call(satb_log_enqueue_frameless);
delayed()->mov(pre_val->after_save(), O0);
restore();
}
bind(filtered);
}
static jint num_ct_writes = 0;
static jint num_ct_writes_filtered_in_hr = 0;
static jint num_ct_writes_filtered_null = 0;
static jint num_ct_writes_filtered_pop = 0;
static G1CollectedHeap* g1 = NULL;
static Thread* count_ct_writes(void* filter_val, void* new_val) {
Atomic::inc(&num_ct_writes);
if (filter_val == NULL) {
Atomic::inc(&num_ct_writes_filtered_in_hr);
} else if (new_val == NULL) {
Atomic::inc(&num_ct_writes_filtered_null);
} else {
if (g1 == NULL) {
g1 = G1CollectedHeap::heap();
}
if ((HeapWord*)new_val < g1->popular_object_boundary()) {
Atomic::inc(&num_ct_writes_filtered_pop);
}
}
if ((num_ct_writes % 1000000) == 0) {
jint num_ct_writes_filtered =
num_ct_writes_filtered_in_hr +
num_ct_writes_filtered_null +
num_ct_writes_filtered_pop;
tty->print_cr("%d potential CT writes: %5.2f%% filtered\n"
" (%5.2f%% intra-HR, %5.2f%% null, %5.2f%% popular).",
num_ct_writes,
100.0*(float)num_ct_writes_filtered/(float)num_ct_writes,
100.0*(float)num_ct_writes_filtered_in_hr/
(float)num_ct_writes,
100.0*(float)num_ct_writes_filtered_null/
(float)num_ct_writes,
100.0*(float)num_ct_writes_filtered_pop/
(float)num_ct_writes);
}
return Thread::current();
}
static address dirty_card_log_enqueue = 0;
static u_char* dirty_card_log_enqueue_end = 0;
// This gets to assume that o0 contains the object address.
static void generate_dirty_card_log_enqueue(jbyte* byte_map_base) {
BufferBlob* bb = BufferBlob::create("dirty_card_enqueue", EnqueueCodeSize*2);
CodeBuffer buf(bb->instructions_begin(), bb->instructions_size());
MacroAssembler masm(&buf);
address start = masm.pc();
Label not_already_dirty, restart, refill;
#ifdef _LP64
masm.srlx(O0, CardTableModRefBS::card_shift, O0);
#else
masm.srl(O0, CardTableModRefBS::card_shift, O0);
#endif
Address rs(O1, (address)byte_map_base);
masm.load_address(rs); // O1 := <card table base>
masm.ldub(O0, O1, O2); // O2 := [O0 + O1]
masm.br_on_reg_cond(Assembler::rc_nz, /*annul*/false, Assembler::pt,
O2, not_already_dirty);
// Get O1 + O2 into a reg by itself -- useful in the take-the-branch
// case, harmless if not.
masm.delayed()->add(O0, O1, O3);
// We didn't take the branch, so we're already dirty: return.
// Use return-from-leaf
masm.retl();
masm.delayed()->nop();
// Not dirty.
masm.bind(not_already_dirty);
// First, dirty it.
masm.stb(G0, O3, G0); // [cardPtr] := 0 (i.e., dirty).
int dirty_card_q_index_byte_offset =
in_bytes(JavaThread::dirty_card_queue_offset() +
PtrQueue::byte_offset_of_index());
int dirty_card_q_buf_byte_offset =
in_bytes(JavaThread::dirty_card_queue_offset() +
PtrQueue::byte_offset_of_buf());
masm.bind(restart);
masm.ld_ptr(G2_thread, dirty_card_q_index_byte_offset, L0);
masm.br_on_reg_cond(Assembler::rc_z, /*annul*/false, Assembler::pn,
L0, refill);
// If the branch is taken, no harm in executing this in the delay slot.
masm.delayed()->ld_ptr(G2_thread, dirty_card_q_buf_byte_offset, L1);
masm.sub(L0, oopSize, L0);
masm.st_ptr(O3, L1, L0); // [_buf + index] := I0
// Use return-from-leaf
masm.retl();
masm.delayed()->st_ptr(L0, G2_thread, dirty_card_q_index_byte_offset);
masm.bind(refill);
address handle_zero =
CAST_FROM_FN_PTR(address,
&DirtyCardQueueSet::handle_zero_index_for_thread);
// This should be rare enough that we can afford to save all the
// scratch registers that the calling context might be using.
masm.mov(G1_scratch, L3);
masm.mov(G3_scratch, L5);
// We need the value of O3 above (for the write into the buffer), so we
// save and restore it.
masm.mov(O3, L6);
// Since the call will overwrite O7, we save and restore that, as well.
masm.mov(O7, L4);
masm.call_VM_leaf(L7_thread_cache, handle_zero, G2_thread);
masm.mov(L3, G1_scratch);
masm.mov(L5, G3_scratch);
masm.mov(L6, O3);
masm.br(Assembler::always, /*annul*/false, Assembler::pt, restart);
masm.delayed()->mov(L4, O7);
dirty_card_log_enqueue = start;
dirty_card_log_enqueue_end = masm.pc();
// XXX Should have a guarantee here about not going off the end!
// Does it already do so? Do an experiment...
}
static inline void
generate_dirty_card_log_enqueue_if_necessary(jbyte* byte_map_base) {
if (dirty_card_log_enqueue == 0) {
generate_dirty_card_log_enqueue(byte_map_base);
assert(dirty_card_log_enqueue != 0, "postcondition.");
if (G1SATBPrintStubs) {
tty->print_cr("Generated dirty_card enqueue:");
Disassembler::decode((u_char*)dirty_card_log_enqueue,
dirty_card_log_enqueue_end,
tty);
}
}
}
void MacroAssembler::g1_write_barrier_post(Register store_addr, Register new_val, Register tmp) {
Label filtered;
MacroAssembler* post_filter_masm = this;
if (new_val == G0) return;
if (G1DisablePostBarrier) return;
G1SATBCardTableModRefBS* bs = (G1SATBCardTableModRefBS*) Universe::heap()->barrier_set();
assert(bs->kind() == BarrierSet::G1SATBCT ||
bs->kind() == BarrierSet::G1SATBCTLogging, "wrong barrier");
if (G1RSBarrierRegionFilter) {
xor3(store_addr, new_val, tmp);
#ifdef _LP64
srlx(tmp, HeapRegion::LogOfHRGrainBytes, tmp);
#else
srl(tmp, HeapRegion::LogOfHRGrainBytes, tmp);
#endif
if (G1PrintCTFilterStats) {
guarantee(tmp->is_global(), "Or stats won't work...");
// This is a sleazy hack: I'm temporarily hijacking G2, which I
// promise to restore.
mov(new_val, G2);
save_frame(0);
mov(tmp, O0);
mov(G2, O1);
// Save G-regs that target may use.
mov(G1, L1);
mov(G2, L2);
mov(G3, L3);
mov(G4, L4);
mov(G5, L5);
call(CAST_FROM_FN_PTR(address, &count_ct_writes));
delayed()->nop();
mov(O0, G2);
// Restore G-regs that target may have used.
mov(L1, G1);
mov(L3, G3);
mov(L4, G4);
mov(L5, G5);
restore(G0, G0, G0);
}
// XXX Should I predict this taken or not? Does it mattern?
br_on_reg_cond(rc_z, /*annul*/false, Assembler::pt, tmp, filtered);
delayed()->nop();
}
// Now we decide how to generate the card table write. If we're
// enqueueing, we call out to a generated function. Otherwise, we do it
// inline here.
if (G1RSBarrierUseQueue) {
// If the "store_addr" register is an "in" or "local" register, move it to
// a scratch reg so we can pass it as an argument.
bool use_scr = !(store_addr->is_global() || store_addr->is_out());
// Pick a scratch register different from "tmp".
Register scr = (tmp == G1_scratch ? G3_scratch : G1_scratch);
// Make sure we use up the delay slot!
if (use_scr) {
post_filter_masm->mov(store_addr, scr);
} else {
post_filter_masm->nop();
}
generate_dirty_card_log_enqueue_if_necessary(bs->byte_map_base);
save_frame(0);
call(dirty_card_log_enqueue);
if (use_scr) {
delayed()->mov(scr, O0);
} else {
delayed()->mov(store_addr->after_save(), O0);
}
restore();
} else {
#ifdef _LP64
post_filter_masm->srlx(store_addr, CardTableModRefBS::card_shift, store_addr);
#else
post_filter_masm->srl(store_addr, CardTableModRefBS::card_shift, store_addr);
#endif
assert( tmp != store_addr, "need separate temp reg");
Address rs(tmp, (address)bs->byte_map_base);
load_address(rs);
stb(G0, rs.base(), store_addr);
}
bind(filtered);
}
#endif // SERIALGC
///////////////////////////////////////////////////////////////////////////////////
void MacroAssembler::card_write_barrier_post(Register store_addr, Register new_val, Register tmp) {
// If we're writing constant NULL, we can skip the write barrier.
if (new_val == G0) return;
CardTableModRefBS* bs = (CardTableModRefBS*) Universe::heap()->barrier_set();
assert(bs->kind() == BarrierSet::CardTableModRef ||
bs->kind() == BarrierSet::CardTableExtension, "wrong barrier");
card_table_write(bs->byte_map_base, tmp, store_addr);
}
void MacroAssembler::load_klass(Register src_oop, Register klass) {
// The number of bytes in this code is used by
// MachCallDynamicJavaNode::ret_addr_offset()

View File

@ -1439,7 +1439,11 @@ public:
// pp 214
void save( Register s1, Register s2, Register d ) { emit_long( op(arith_op) | rd(d) | op3(save_op3) | rs1(s1) | rs2(s2) ); }
void save( Register s1, int simm13a, Register d ) { emit_long( op(arith_op) | rd(d) | op3(save_op3) | rs1(s1) | immed(true) | simm(simm13a, 13) ); }
void save( Register s1, int simm13a, Register d ) {
// make sure frame is at least large enough for the register save area
assert(-simm13a >= 16 * wordSize, "frame too small");
emit_long( op(arith_op) | rd(d) | op3(save_op3) | rs1(s1) | immed(true) | simm(simm13a, 13) );
}
void restore( Register s1 = G0, Register s2 = G0, Register d = G0 ) { emit_long( op(arith_op) | rd(d) | op3(restore_op3) | rs1(s1) | rs2(s2) ); }
void restore( Register s1, int simm13a, Register d ) { emit_long( op(arith_op) | rd(d) | op3(restore_op3) | rs1(s1) | immed(true) | simm(simm13a, 13) ); }
@ -1594,6 +1598,11 @@ public:
inline void wrasi( Register d) { v9_only(); emit_long( op(arith_op) | rs1(d) | op3(wrreg_op3) | u_field(3, 29, 25)); }
inline void wrfprs( Register d) { v9_only(); emit_long( op(arith_op) | rs1(d) | op3(wrreg_op3) | u_field(6, 29, 25)); }
// For a given register condition, return the appropriate condition code
// Condition (the one you would use to get the same effect after "tst" on
// the target register.)
Assembler::Condition reg_cond_to_cc_cond(RCondition in);
// Creation
Assembler(CodeBuffer* code) : AbstractAssembler(code) {
@ -1630,6 +1639,8 @@ class RegistersForDebugging : public StackObj {
// restore global registers in case C code disturbed them
static void restore_registers(MacroAssembler* a, Register r);
};
@ -1722,6 +1733,12 @@ class MacroAssembler: public Assembler {
void br_null ( Register s1, bool a, Predict p, Label& L );
void br_notnull( Register s1, bool a, Predict p, Label& L );
// These versions will do the most efficient thing on v8 and v9. Perhaps
// this is what the routine above was meant to do, but it didn't (and
// didn't cover both target address kinds.)
void br_on_reg_cond( RCondition c, bool a, Predict p, Register s1, address d, relocInfo::relocType rt = relocInfo::none );
void br_on_reg_cond( RCondition c, bool a, Predict p, Register s1, Label& L);
inline void bp( Condition c, bool a, CC cc, Predict p, address d, relocInfo::relocType rt = relocInfo::none );
inline void bp( Condition c, bool a, CC cc, Predict p, Label& L );
@ -2056,9 +2073,23 @@ class MacroAssembler: public Assembler {
#endif // ASSERT
public:
// Stores
void store_check(Register tmp, Register obj); // store check for obj - register is destroyed afterwards
void store_check(Register tmp, Register obj, Register offset); // store check for obj - register is destroyed afterwards
// Write to card table for - register is destroyed afterwards.
void card_table_write(jbyte* byte_map_base, Register tmp, Register obj);
void card_write_barrier_post(Register store_addr, Register new_val, Register tmp);
#ifndef SERIALGC
// Array store and offset
void g1_write_barrier_pre(Register obj, Register index, int offset, Register tmp, bool preserve_o_regs);
void g1_write_barrier_post(Register store_addr, Register new_val, Register tmp);
// May do filtering, depending on the boolean arguments.
void g1_card_table_write(jbyte* byte_map_base,
Register tmp, Register obj, Register new_val,
bool region_filter, bool null_filter);
#endif // SERIALGC
// pushes double TOS element of FPU stack on CPU stack; pops from FPU stack
void push_fTOS();

View File

@ -404,4 +404,55 @@ void ArrayCopyStub::emit_code(LIR_Assembler* ce) {
}
///////////////////////////////////////////////////////////////////////////////////
#ifndef SERIALGC
void G1PreBarrierStub::emit_code(LIR_Assembler* ce) {
__ bind(_entry);
assert(pre_val()->is_register(), "Precondition.");
Register pre_val_reg = pre_val()->as_register();
ce->mem2reg(addr(), pre_val(), T_OBJECT, patch_code(), info(), false);
__ br_on_reg_cond(Assembler::rc_z, /*annul*/false, Assembler::pt,
pre_val_reg, _continuation);
__ delayed()->nop();
__ call(Runtime1::entry_for(Runtime1::Runtime1::g1_pre_barrier_slow_id));
__ delayed()->mov(pre_val_reg, G4);
__ br(Assembler::always, false, Assembler::pt, _continuation);
__ delayed()->nop();
}
jbyte* G1PostBarrierStub::_byte_map_base = NULL;
jbyte* G1PostBarrierStub::byte_map_base_slow() {
BarrierSet* bs = Universe::heap()->barrier_set();
assert(bs->is_a(BarrierSet::G1SATBCTLogging),
"Must be if we're using this.");
return ((G1SATBCardTableModRefBS*)bs)->byte_map_base;
}
void G1PostBarrierStub::emit_code(LIR_Assembler* ce) {
__ bind(_entry);
assert(addr()->is_register(), "Precondition.");
assert(new_val()->is_register(), "Precondition.");
Register addr_reg = addr()->as_pointer_register();
Register new_val_reg = new_val()->as_register();
__ br_on_reg_cond(Assembler::rc_z, /*annul*/false, Assembler::pt,
new_val_reg, _continuation);
__ delayed()->nop();
__ call(Runtime1::entry_for(Runtime1::Runtime1::g1_post_barrier_slow_id));
__ delayed()->mov(addr_reg, G4);
__ br(Assembler::always, false, Assembler::pt, _continuation);
__ delayed()->nop();
}
#endif // SERIALGC
///////////////////////////////////////////////////////////////////////////////////
#undef __

View File

@ -2093,7 +2093,11 @@ void LIR_Assembler::emit_arraycopy(LIR_OpArrayCopy* op) {
// the known type isn't loaded since the code sanity checks
// in debug mode and the type isn't required when we know the exact type
// also check that the type is an array type.
if (op->expected_type() == NULL) {
// We also, for now, always call the stub if the barrier set requires a
// write_ref_pre barrier (which the stub does, but none of the optimized
// cases currently does).
if (op->expected_type() == NULL ||
Universe::heap()->barrier_set()->has_write_ref_pre_barrier()) {
__ mov(src, O0);
__ mov(src_pos, O1);
__ mov(dst, O2);

View File

@ -365,6 +365,10 @@ void LIRGenerator::do_StoreIndexed(StoreIndexed* x) {
__ store_check(value.result(), array.result(), tmp1, tmp2, tmp3, store_check_info);
}
if (obj_store) {
// Needs GC write barriers.
pre_barrier(LIR_OprFact::address(array_addr), false, NULL);
}
__ move(value.result(), array_addr, null_check_info);
if (obj_store) {
// Is this precise?
@ -663,6 +667,10 @@ void LIRGenerator::do_CompareAndSwap(Intrinsic* x, ValueType* type) {
__ add(obj.result(), offset.result(), addr);
if (type == objectType) { // Write-barrier needed for Object fields.
pre_barrier(obj.result(), false, NULL);
}
if (type == objectType)
__ cas_obj(addr, cmp.result(), val.result(), t1, t2);
else if (type == intType)
@ -677,7 +685,11 @@ void LIRGenerator::do_CompareAndSwap(Intrinsic* x, ValueType* type) {
LIR_Opr result = rlock_result(x);
__ cmove(lir_cond_equal, LIR_OprFact::intConst(1), LIR_OprFact::intConst(0), result);
if (type == objectType) { // Write-barrier needed for Object fields.
#ifdef PRECISE_CARDMARK
post_barrier(addr, val.result());
#else
post_barrier(obj.result(), val.result());
#endif // PRECISE_CARDMARK
}
}
@ -1154,6 +1166,10 @@ void LIRGenerator::put_Object_unsafe(LIR_Opr src, LIR_Opr offset, LIR_Opr data,
addr = new LIR_Address(base_op, index_op, type);
}
if (is_obj) {
pre_barrier(LIR_OprFact::address(addr), false, NULL);
// _bs->c1_write_barrier_pre(this, LIR_OprFact::address(addr));
}
__ move(data, addr);
if (is_obj) {
// This address is precise

View File

@ -832,6 +832,163 @@ OopMapSet* Runtime1::generate_code_for(StubID id, StubAssembler* sasm) {
}
break;
#ifndef SERIALGC
case g1_pre_barrier_slow_id:
{ // G4: previous value of memory
BarrierSet* bs = Universe::heap()->barrier_set();
if (bs->kind() != BarrierSet::G1SATBCTLogging) {
__ save_frame(0);
__ set((int)id, O1);
__ call_RT(noreg, noreg, CAST_FROM_FN_PTR(address, unimplemented_entry), I0);
__ should_not_reach_here();
break;
}
__ set_info("g1_pre_barrier_slow_id", dont_gc_arguments);
Register pre_val = G4;
Register tmp = G1_scratch;
Register tmp2 = G3_scratch;
Label refill, restart;
bool with_frame = false; // I don't know if we can do with-frame.
int satb_q_index_byte_offset =
in_bytes(JavaThread::satb_mark_queue_offset() +
PtrQueue::byte_offset_of_index());
int satb_q_buf_byte_offset =
in_bytes(JavaThread::satb_mark_queue_offset() +
PtrQueue::byte_offset_of_buf());
__ bind(restart);
__ ld_ptr(G2_thread, satb_q_index_byte_offset, tmp);
__ br_on_reg_cond(Assembler::rc_z, /*annul*/false,
Assembler::pn, tmp, refill);
// If the branch is taken, no harm in executing this in the delay slot.
__ delayed()->ld_ptr(G2_thread, satb_q_buf_byte_offset, tmp2);
__ sub(tmp, oopSize, tmp);
__ st_ptr(pre_val, tmp2, tmp); // [_buf + index] := <address_of_card>
// Use return-from-leaf
__ retl();
__ delayed()->st_ptr(tmp, G2_thread, satb_q_index_byte_offset);
__ bind(refill);
__ save_frame(0);
__ mov(pre_val, L0);
__ mov(tmp, L1);
__ mov(tmp2, L2);
__ call_VM_leaf(L7_thread_cache,
CAST_FROM_FN_PTR(address,
SATBMarkQueueSet::handle_zero_index_for_thread),
G2_thread);
__ mov(L0, pre_val);
__ mov(L1, tmp);
__ mov(L2, tmp2);
__ br(Assembler::always, /*annul*/false, Assembler::pt, restart);
__ delayed()->restore();
}
break;
case g1_post_barrier_slow_id:
{
BarrierSet* bs = Universe::heap()->barrier_set();
if (bs->kind() != BarrierSet::G1SATBCTLogging) {
__ save_frame(0);
__ set((int)id, O1);
__ call_RT(noreg, noreg, CAST_FROM_FN_PTR(address, unimplemented_entry), I0);
__ should_not_reach_here();
break;
}
__ set_info("g1_post_barrier_slow_id", dont_gc_arguments);
Register addr = G4;
Register cardtable = G5;
Register tmp = G1_scratch;
Register tmp2 = G3_scratch;
jbyte* byte_map_base = ((CardTableModRefBS*)bs)->byte_map_base;
Label not_already_dirty, restart, refill;
#ifdef _LP64
__ srlx(addr, CardTableModRefBS::card_shift, addr);
#else
__ srl(addr, CardTableModRefBS::card_shift, addr);
#endif
Address rs(cardtable, (address)byte_map_base);
__ load_address(rs); // cardtable := <card table base>
__ ldub(addr, cardtable, tmp); // tmp := [addr + cardtable]
__ br_on_reg_cond(Assembler::rc_nz, /*annul*/false, Assembler::pt,
tmp, not_already_dirty);
// Get cardtable + tmp into a reg by itself -- useful in the take-the-branch
// case, harmless if not.
__ delayed()->add(addr, cardtable, tmp2);
// We didn't take the branch, so we're already dirty: return.
// Use return-from-leaf
__ retl();
__ delayed()->nop();
// Not dirty.
__ bind(not_already_dirty);
// First, dirty it.
__ stb(G0, tmp2, 0); // [cardPtr] := 0 (i.e., dirty).
Register tmp3 = cardtable;
Register tmp4 = tmp;
// these registers are now dead
addr = cardtable = tmp = noreg;
int dirty_card_q_index_byte_offset =
in_bytes(JavaThread::dirty_card_queue_offset() +
PtrQueue::byte_offset_of_index());
int dirty_card_q_buf_byte_offset =
in_bytes(JavaThread::dirty_card_queue_offset() +
PtrQueue::byte_offset_of_buf());
__ bind(restart);
__ ld_ptr(G2_thread, dirty_card_q_index_byte_offset, tmp3);
__ br_on_reg_cond(Assembler::rc_z, /*annul*/false, Assembler::pn,
tmp3, refill);
// If the branch is taken, no harm in executing this in the delay slot.
__ delayed()->ld_ptr(G2_thread, dirty_card_q_buf_byte_offset, tmp4);
__ sub(tmp3, oopSize, tmp3);
__ st_ptr(tmp2, tmp4, tmp3); // [_buf + index] := <address_of_card>
// Use return-from-leaf
__ retl();
__ delayed()->st_ptr(tmp3, G2_thread, dirty_card_q_index_byte_offset);
__ bind(refill);
__ save_frame(0);
__ mov(tmp2, L0);
__ mov(tmp3, L1);
__ mov(tmp4, L2);
__ call_VM_leaf(L7_thread_cache,
CAST_FROM_FN_PTR(address,
DirtyCardQueueSet::handle_zero_index_for_thread),
G2_thread);
__ mov(L0, tmp2);
__ mov(L1, tmp3);
__ mov(L2, tmp4);
__ br(Assembler::always, /*annul*/false, Assembler::pt, restart);
__ delayed()->restore();
}
break;
#endif // !SERIALGC
default:
{ __ set_info("unimplemented entry", dont_gc_arguments);
__ save_frame(0);

View File

@ -1110,30 +1110,31 @@ class StubGenerator: public StubCodeGenerator {
// The input registers are overwritten.
//
void gen_write_ref_array_pre_barrier(Register addr, Register count) {
#if 0 // G1 only
BarrierSet* bs = Universe::heap()->barrier_set();
if (bs->has_write_ref_pre_barrier()) {
assert(bs->has_write_ref_array_pre_opt(),
"Else unsupported barrier set.");
assert(addr->is_global() && count->is_global(),
"If not, then we have to fix this code to handle more "
"general cases.");
// Get some new fresh output registers.
__ save_frame(0);
// Save the necessary global regs... will be used after.
__ mov(addr, L0);
__ mov(count, L1);
__ mov(addr, O0);
if (addr->is_global()) {
__ mov(addr, L0);
}
if (count->is_global()) {
__ mov(count, L1);
}
__ mov(addr->after_save(), O0);
// Get the count into O1
__ call(CAST_FROM_FN_PTR(address, BarrierSet::static_write_ref_array_pre));
__ delayed()->mov(count, O1);
__ mov(L0, addr);
__ mov(L1, count);
__ delayed()->mov(count->after_save(), O1);
if (addr->is_global()) {
__ mov(L0, addr);
}
if (count->is_global()) {
__ mov(L1, count);
}
__ restore();
}
#endif // 0
}
//
// Generate post-write barrier for array.
@ -1150,22 +1151,17 @@ class StubGenerator: public StubCodeGenerator {
BarrierSet* bs = Universe::heap()->barrier_set();
switch (bs->kind()) {
#if 0 // G1 - only
case BarrierSet::G1SATBCT:
case BarrierSet::G1SATBCTLogging:
{
assert(addr->is_global() && count->is_global(),
"If not, then we have to fix this code to handle more "
"general cases.");
// Get some new fresh output registers.
__ save_frame(0);
__ mov(addr, O0);
__ mov(addr->after_save(), O0);
__ call(CAST_FROM_FN_PTR(address, BarrierSet::static_write_ref_array_post));
__ delayed()->mov(count, O1);
__ delayed()->mov(count->after_save(), O1);
__ restore();
}
break;
#endif // 0 G1 - only
case BarrierSet::CardTableModRef:
case BarrierSet::CardTableExtension:
{
@ -2412,8 +2408,7 @@ class StubGenerator: public StubCodeGenerator {
StubCodeMark mark(this, "StubRoutines", name);
address start = __ pc();
gen_write_ref_array_pre_barrier(G1, G5);
gen_write_ref_array_pre_barrier(O1, O2);
#ifdef ASSERT
// We sometimes save a frame (see partial_subtype_check below).

View File

@ -28,6 +28,79 @@
#ifndef CC_INTERP
#define __ _masm->
// Misc helpers
// Do an oop store like *(base + index + offset) = val
// index can be noreg,
static void do_oop_store(InterpreterMacroAssembler* _masm,
Register base,
Register index,
int offset,
Register val,
Register tmp,
BarrierSet::Name barrier,
bool precise) {
assert(tmp != val && tmp != base && tmp != index, "register collision");
assert(index == noreg || offset == 0, "only one offset");
switch (barrier) {
#ifndef SERIALGC
case BarrierSet::G1SATBCT:
case BarrierSet::G1SATBCTLogging:
{
__ g1_write_barrier_pre( base, index, offset, tmp, /*preserve_o_regs*/true);
if (index == noreg ) {
assert(Assembler::is_simm13(offset), "fix this code");
__ store_heap_oop(val, base, offset);
} else {
__ store_heap_oop(val, base, index);
}
// No need for post barrier if storing NULL
if (val != G0) {
if (precise) {
if (index == noreg) {
__ add(base, offset, base);
} else {
__ add(base, index, base);
}
}
__ g1_write_barrier_post(base, val, tmp);
}
}
break;
#endif // SERIALGC
case BarrierSet::CardTableModRef:
case BarrierSet::CardTableExtension:
{
if (index == noreg ) {
assert(Assembler::is_simm13(offset), "fix this code");
__ store_heap_oop(val, base, offset);
} else {
__ store_heap_oop(val, base, index);
}
// No need for post barrier if storing NULL
if (val != G0) {
if (precise) {
if (index == noreg) {
__ add(base, offset, base);
} else {
__ add(base, index, base);
}
}
__ card_write_barrier_post(base, val, tmp);
}
}
break;
case BarrierSet::ModRef:
case BarrierSet::Other:
ShouldNotReachHere();
break;
default :
ShouldNotReachHere();
}
}
//----------------------------------------------------------------------------------------------------
// Platform-dependent initialization
@ -758,6 +831,8 @@ void TemplateTable::aastore() {
// O4: array element klass
// O5: value klass
// Address element(O1, 0, arrayOopDesc::base_offset_in_bytes(T_OBJECT));
// Generate a fast subtype check. Branch to store_ok if no
// failure. Throw if failure.
__ gen_subtype_check( O5, O4, G3_scratch, G4_scratch, G1_scratch, store_ok );
@ -767,18 +842,14 @@ void TemplateTable::aastore() {
// Store is OK.
__ bind(store_ok);
__ store_heap_oop(Otos_i, O1, arrayOopDesc::base_offset_in_bytes(T_OBJECT));
// Quote from rememberedSet.hpp: For objArrays, the precise card
// corresponding to the pointer store is dirtied so we don't need to
// scavenge the entire array.
Address element(O1, 0, arrayOopDesc::base_offset_in_bytes(T_OBJECT));
__ add(element, O1); // address the element precisely
__ store_check(G3_scratch, O1);
do_oop_store(_masm, O1, noreg, arrayOopDesc::base_offset_in_bytes(T_OBJECT), Otos_i, G3_scratch, _bs->kind(), true);
__ ba(false,done);
__ delayed()->inc(Lesp, 3* Interpreter::stackElementSize()); // adj sp (pops array, index and value)
__ bind(is_null);
__ store_heap_oop(Otos_i, element);
do_oop_store(_masm, O1, noreg, arrayOopDesc::base_offset_in_bytes(T_OBJECT), G0, G4_scratch, _bs->kind(), true);
__ profile_null_seen(G3_scratch);
__ inc(Lesp, 3* Interpreter::stackElementSize()); // adj sp (pops array, index and value)
__ bind(done);
@ -2449,8 +2520,9 @@ void TemplateTable::putfield_or_static(int byte_no, bool is_static) {
// atos
__ pop_ptr();
__ verify_oop(Otos_i);
__ store_heap_oop(Otos_i, Rclass, Roffset);
__ store_check(G1_scratch, Rclass, Roffset);
do_oop_store(_masm, Rclass, Roffset, 0, Otos_i, G1_scratch, _bs->kind(), false);
__ ba(false, checkVolatile);
__ delayed()->tst(Lscratch);
@ -2491,8 +2563,9 @@ void TemplateTable::putfield_or_static(int byte_no, bool is_static) {
__ pop_ptr();
pop_and_check_object(Rclass);
__ verify_oop(Otos_i);
__ store_heap_oop(Otos_i, Rclass, Roffset);
__ store_check(G1_scratch, Rclass, Roffset);
do_oop_store(_masm, Rclass, Roffset, 0, Otos_i, G1_scratch, _bs->kind(), false);
patch_bytecode(Bytecodes::_fast_aputfield, G3_scratch, G4_scratch);
__ ba(false, checkVolatile);
__ delayed()->tst(Lscratch);
@ -2646,8 +2719,7 @@ void TemplateTable::fast_storefield(TosState state) {
__ stf(FloatRegisterImpl::D, Ftos_d, Rclass, Roffset);
break;
case Bytecodes::_fast_aputfield:
__ store_heap_oop(Otos_i, Rclass, Roffset);
__ store_check(G1_scratch, Rclass, Roffset);
do_oop_store(_masm, Rclass, Roffset, 0, Otos_i, G1_scratch, _bs->kind(), false);
break;
default:
ShouldNotReachHere();

View File

@ -1575,6 +1575,35 @@ void Assembler::movdqa(Address dst, XMMRegister src) {
emit_operand(src, dst);
}
void Assembler::movdqu(XMMRegister dst, Address src) {
NOT_LP64(assert(VM_Version::supports_sse2(), ""));
InstructionMark im(this);
emit_byte(0xF3);
prefix(src, dst);
emit_byte(0x0F);
emit_byte(0x6F);
emit_operand(dst, src);
}
void Assembler::movdqu(XMMRegister dst, XMMRegister src) {
NOT_LP64(assert(VM_Version::supports_sse2(), ""));
emit_byte(0xF3);
int encode = prefixq_and_encode(dst->encoding(), src->encoding());
emit_byte(0x0F);
emit_byte(0x6F);
emit_byte(0xC0 | encode);
}
void Assembler::movdqu(Address dst, XMMRegister src) {
NOT_LP64(assert(VM_Version::supports_sse2(), ""));
InstructionMark im(this);
emit_byte(0xF3);
prefix(dst, src);
emit_byte(0x0F);
emit_byte(0x7F);
emit_operand(src, dst);
}
// Uses zero extension on 64bit
void Assembler::movl(Register dst, int32_t imm32) {
@ -5935,26 +5964,30 @@ void MacroAssembler::eden_allocate(Register obj,
Label& slow_case) {
assert(obj == rax, "obj must be in rax, for cmpxchg");
assert_different_registers(obj, var_size_in_bytes, t1);
Register end = t1;
Label retry;
bind(retry);
ExternalAddress heap_top((address) Universe::heap()->top_addr());
movptr(obj, heap_top);
if (var_size_in_bytes == noreg) {
lea(end, Address(obj, con_size_in_bytes));
if (CMSIncrementalMode || !Universe::heap()->supports_inline_contig_alloc()) {
jmp(slow_case);
} else {
lea(end, Address(obj, var_size_in_bytes, Address::times_1));
Register end = t1;
Label retry;
bind(retry);
ExternalAddress heap_top((address) Universe::heap()->top_addr());
movptr(obj, heap_top);
if (var_size_in_bytes == noreg) {
lea(end, Address(obj, con_size_in_bytes));
} else {
lea(end, Address(obj, var_size_in_bytes, Address::times_1));
}
// if end < obj then we wrapped around => object too long => slow case
cmpptr(end, obj);
jcc(Assembler::below, slow_case);
cmpptr(end, ExternalAddress((address) Universe::heap()->end_addr()));
jcc(Assembler::above, slow_case);
// Compare obj with the top addr, and if still equal, store the new top addr in
// end at the address of the top addr pointer. Sets ZF if was equal, and clears
// it otherwise. Use lock prefix for atomicity on MPs.
locked_cmpxchgptr(end, heap_top);
jcc(Assembler::notEqual, retry);
}
// if end < obj then we wrapped around => object too long => slow case
cmpptr(end, obj);
jcc(Assembler::below, slow_case);
cmpptr(end, ExternalAddress((address) Universe::heap()->end_addr()));
jcc(Assembler::above, slow_case);
// Compare obj with the top addr, and if still equal, store the new top addr in
// end at the address of the top addr pointer. Sets ZF if was equal, and clears
// it otherwise. Use lock prefix for atomicity on MPs.
locked_cmpxchgptr(end, heap_top);
jcc(Assembler::notEqual, retry);
}
void MacroAssembler::enter() {
@ -6491,6 +6524,179 @@ void MacroAssembler::sign_extend_short(Register reg) {
}
}
//////////////////////////////////////////////////////////////////////////////////
#ifndef SERIALGC
void MacroAssembler::g1_write_barrier_pre(Register obj,
#ifndef _LP64
Register thread,
#endif
Register tmp,
Register tmp2,
bool tosca_live) {
LP64_ONLY(Register thread = r15_thread;)
Address in_progress(thread, in_bytes(JavaThread::satb_mark_queue_offset() +
PtrQueue::byte_offset_of_active()));
Address index(thread, in_bytes(JavaThread::satb_mark_queue_offset() +
PtrQueue::byte_offset_of_index()));
Address buffer(thread, in_bytes(JavaThread::satb_mark_queue_offset() +
PtrQueue::byte_offset_of_buf()));
Label done;
Label runtime;
// if (!marking_in_progress) goto done;
if (in_bytes(PtrQueue::byte_width_of_active()) == 4) {
cmpl(in_progress, 0);
} else {
assert(in_bytes(PtrQueue::byte_width_of_active()) == 1, "Assumption");
cmpb(in_progress, 0);
}
jcc(Assembler::equal, done);
// if (x.f == NULL) goto done;
cmpptr(Address(obj, 0), NULL_WORD);
jcc(Assembler::equal, done);
// Can we store original value in the thread's buffer?
LP64_ONLY(movslq(tmp, index);)
movptr(tmp2, Address(obj, 0));
#ifdef _LP64
cmpq(tmp, 0);
#else
cmpl(index, 0);
#endif
jcc(Assembler::equal, runtime);
#ifdef _LP64
subq(tmp, wordSize);
movl(index, tmp);
addq(tmp, buffer);
#else
subl(index, wordSize);
movl(tmp, buffer);
addl(tmp, index);
#endif
movptr(Address(tmp, 0), tmp2);
jmp(done);
bind(runtime);
// save the live input values
if(tosca_live) push(rax);
push(obj);
#ifdef _LP64
movq(c_rarg0, Address(obj, 0));
call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::g1_wb_pre), c_rarg0, r15_thread);
#else
push(thread);
call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::g1_wb_pre), tmp2, thread);
pop(thread);
#endif
pop(obj);
if(tosca_live) pop(rax);
bind(done);
}
void MacroAssembler::g1_write_barrier_post(Register store_addr,
Register new_val,
#ifndef _LP64
Register thread,
#endif
Register tmp,
Register tmp2) {
LP64_ONLY(Register thread = r15_thread;)
Address queue_index(thread, in_bytes(JavaThread::dirty_card_queue_offset() +
PtrQueue::byte_offset_of_index()));
Address buffer(thread, in_bytes(JavaThread::dirty_card_queue_offset() +
PtrQueue::byte_offset_of_buf()));
BarrierSet* bs = Universe::heap()->barrier_set();
CardTableModRefBS* ct = (CardTableModRefBS*)bs;
Label done;
Label runtime;
// Does store cross heap regions?
movptr(tmp, store_addr);
xorptr(tmp, new_val);
shrptr(tmp, HeapRegion::LogOfHRGrainBytes);
jcc(Assembler::equal, done);
// crosses regions, storing NULL?
cmpptr(new_val, (int32_t) NULL_WORD);
jcc(Assembler::equal, done);
// storing region crossing non-NULL, is card already dirty?
ExternalAddress cardtable((address) ct->byte_map_base);
assert(sizeof(*ct->byte_map_base) == sizeof(jbyte), "adjust this code");
#ifdef _LP64
const Register card_addr = tmp;
movq(card_addr, store_addr);
shrq(card_addr, CardTableModRefBS::card_shift);
lea(tmp2, cardtable);
// get the address of the card
addq(card_addr, tmp2);
#else
const Register card_index = tmp;
movl(card_index, store_addr);
shrl(card_index, CardTableModRefBS::card_shift);
Address index(noreg, card_index, Address::times_1);
const Register card_addr = tmp;
lea(card_addr, as_Address(ArrayAddress(cardtable, index)));
#endif
cmpb(Address(card_addr, 0), 0);
jcc(Assembler::equal, done);
// storing a region crossing, non-NULL oop, card is clean.
// dirty card and log.
movb(Address(card_addr, 0), 0);
cmpl(queue_index, 0);
jcc(Assembler::equal, runtime);
subl(queue_index, wordSize);
movptr(tmp2, buffer);
#ifdef _LP64
movslq(rscratch1, queue_index);
addq(tmp2, rscratch1);
movq(Address(tmp2, 0), card_addr);
#else
addl(tmp2, queue_index);
movl(Address(tmp2, 0), card_index);
#endif
jmp(done);
bind(runtime);
// save the live input values
push(store_addr);
push(new_val);
#ifdef _LP64
call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::g1_wb_post), card_addr, r15_thread);
#else
push(thread);
call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::g1_wb_post), card_addr, thread);
pop(thread);
#endif
pop(new_val);
pop(store_addr);
bind(done);
}
#endif // SERIALGC
//////////////////////////////////////////////////////////////////////////////////
void MacroAssembler::store_check(Register obj) {
// Does a store check for the oop in register obj. The content of
// register obj is destroyed afterwards.

View File

@ -227,9 +227,11 @@ class Address VALUE_OBJ_CLASS_SPEC {
#endif // ASSERT
// accessors
bool uses(Register reg) const {
return _base == reg || _index == reg;
}
bool uses(Register reg) const { return _base == reg || _index == reg; }
Register base() const { return _base; }
Register index() const { return _index; }
ScaleFactor scale() const { return _scale; }
int disp() const { return _disp; }
// Convert the raw encoding form into the form expected by the constructor for
// Address. An index of 4 (rsp) corresponds to having no index, so convert
@ -1053,6 +1055,11 @@ private:
void movdqa(XMMRegister dst, Address src);
void movdqa(XMMRegister dst, XMMRegister src);
// Move Unaligned Double Quadword
void movdqu(Address dst, XMMRegister src);
void movdqu(XMMRegister dst, Address src);
void movdqu(XMMRegister dst, XMMRegister src);
void movl(Register dst, int32_t imm32);
void movl(Address dst, int32_t imm32);
void movl(Register dst, Register src);
@ -1310,7 +1317,8 @@ private:
// on arguments should also go in here.
class MacroAssembler: public Assembler {
friend class LIR_Assembler;
friend class LIR_Assembler;
friend class Runtime1; // as_Address()
protected:
Address as_Address(AddressLiteral adr);
@ -1453,6 +1461,7 @@ class MacroAssembler: public Assembler {
// The pointer will be loaded into the thread register.
void get_thread(Register thread);
// Support for VM calls
//
// It is imperative that all calls into the VM are handled via the call_VM macros.
@ -1527,6 +1536,22 @@ class MacroAssembler: public Assembler {
void store_check(Register obj); // store check for obj - register is destroyed afterwards
void store_check(Register obj, Address dst); // same as above, dst is exact store location (reg. is destroyed)
void g1_write_barrier_pre(Register obj,
#ifndef _LP64
Register thread,
#endif
Register tmp,
Register tmp2,
bool tosca_live);
void g1_write_barrier_post(Register store_addr,
Register new_val,
#ifndef _LP64
Register thread,
#endif
Register tmp,
Register tmp2);
// split store_check(Register obj) to enhance instruction interleaving
void store_check_part_1(Register obj);
void store_check_part_2(Register obj);

View File

@ -456,5 +456,50 @@ void ArrayCopyStub::emit_code(LIR_Assembler* ce) {
__ jmp(_continuation);
}
/////////////////////////////////////////////////////////////////////////////
#ifndef SERIALGC
void G1PreBarrierStub::emit_code(LIR_Assembler* ce) {
// At this point we know that marking is in progress
__ bind(_entry);
assert(pre_val()->is_register(), "Precondition.");
Register pre_val_reg = pre_val()->as_register();
ce->mem2reg(addr(), pre_val(), T_OBJECT, patch_code(), info(), false);
__ cmpptr(pre_val_reg, (int32_t) NULL_WORD);
__ jcc(Assembler::equal, _continuation);
ce->store_parameter(pre_val()->as_register(), 0);
__ call(RuntimeAddress(Runtime1::entry_for(Runtime1::g1_pre_barrier_slow_id)));
__ jmp(_continuation);
}
jbyte* G1PostBarrierStub::_byte_map_base = NULL;
jbyte* G1PostBarrierStub::byte_map_base_slow() {
BarrierSet* bs = Universe::heap()->barrier_set();
assert(bs->is_a(BarrierSet::G1SATBCTLogging),
"Must be if we're using this.");
return ((G1SATBCardTableModRefBS*)bs)->byte_map_base;
}
void G1PostBarrierStub::emit_code(LIR_Assembler* ce) {
__ bind(_entry);
assert(addr()->is_register(), "Precondition.");
assert(new_val()->is_register(), "Precondition.");
Register new_val_reg = new_val()->as_register();
__ cmpptr(new_val_reg, (int32_t) NULL_WORD);
__ jcc(Assembler::equal, _continuation);
ce->store_parameter(addr()->as_register(), 0);
__ call(RuntimeAddress(Runtime1::entry_for(Runtime1::g1_post_barrier_slow_id)));
__ jmp(_continuation);
}
#endif // SERIALGC
/////////////////////////////////////////////////////////////////////////////
#undef __

View File

@ -302,6 +302,8 @@ void LIRGenerator::do_StoreIndexed(StoreIndexed* x) {
}
if (obj_store) {
// Needs GC write barriers.
pre_barrier(LIR_OprFact::address(array_addr), false, NULL);
__ move(value.result(), array_addr, null_check_info);
// Seems to be a precise
post_barrier(LIR_OprFact::address(array_addr), value.result());
@ -756,7 +758,10 @@ void LIRGenerator::do_CompareAndSwap(Intrinsic* x, ValueType* type) {
__ move(obj.result(), addr);
__ add(addr, offset.result(), addr);
if (type == objectType) { // Write-barrier needed for Object fields.
// Do the pre-write barrier, if any.
pre_barrier(addr, false, NULL);
}
LIR_Opr ill = LIR_OprFact::illegalOpr; // for convenience
if (type == objectType)
@ -1286,6 +1291,8 @@ void LIRGenerator::put_Object_unsafe(LIR_Opr src, LIR_Opr offset, LIR_Opr data,
LIR_Address* addr = new LIR_Address(src, offset, type);
bool is_obj = (type == T_ARRAY || type == T_OBJECT);
if (is_obj) {
// Do the pre-write barrier, if any.
pre_barrier(LIR_OprFact::address(addr), false, NULL);
__ move(data, addr);
assert(src->is_register(), "must be register");
// Seems to be a precise address

View File

@ -1583,6 +1583,166 @@ OopMapSet* Runtime1::generate_code_for(StubID id, StubAssembler* sasm) {
}
break;
#ifndef SERIALGC
case g1_pre_barrier_slow_id:
{
StubFrame f(sasm, "g1_pre_barrier", dont_gc_arguments);
// arg0 : previous value of memory
BarrierSet* bs = Universe::heap()->barrier_set();
if (bs->kind() != BarrierSet::G1SATBCTLogging) {
__ movptr(rax, (int)id);
__ call_RT(noreg, noreg, CAST_FROM_FN_PTR(address, unimplemented_entry), rax);
__ should_not_reach_here();
break;
}
__ push(rax);
__ push(rdx);
const Register pre_val = rax;
const Register thread = NOT_LP64(rax) LP64_ONLY(r15_thread);
const Register tmp = rdx;
NOT_LP64(__ get_thread(thread);)
Address in_progress(thread, in_bytes(JavaThread::satb_mark_queue_offset() +
PtrQueue::byte_offset_of_active()));
Address queue_index(thread, in_bytes(JavaThread::satb_mark_queue_offset() +
PtrQueue::byte_offset_of_index()));
Address buffer(thread, in_bytes(JavaThread::satb_mark_queue_offset() +
PtrQueue::byte_offset_of_buf()));
Label done;
Label runtime;
// Can we store original value in the thread's buffer?
LP64_ONLY(__ movslq(tmp, queue_index);)
#ifdef _LP64
__ cmpq(tmp, 0);
#else
__ cmpl(queue_index, 0);
#endif
__ jcc(Assembler::equal, runtime);
#ifdef _LP64
__ subq(tmp, wordSize);
__ movl(queue_index, tmp);
__ addq(tmp, buffer);
#else
__ subl(queue_index, wordSize);
__ movl(tmp, buffer);
__ addl(tmp, queue_index);
#endif
// prev_val (rax)
f.load_argument(0, pre_val);
__ movptr(Address(tmp, 0), pre_val);
__ jmp(done);
__ bind(runtime);
// load the pre-value
__ push(rcx);
f.load_argument(0, rcx);
__ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::g1_wb_pre), rcx, thread);
__ pop(rcx);
__ bind(done);
__ pop(rdx);
__ pop(rax);
}
break;
case g1_post_barrier_slow_id:
{
StubFrame f(sasm, "g1_post_barrier", dont_gc_arguments);
// arg0: store_address
Address store_addr(rbp, 2*BytesPerWord);
BarrierSet* bs = Universe::heap()->barrier_set();
CardTableModRefBS* ct = (CardTableModRefBS*)bs;
Label done;
Label runtime;
// At this point we know new_value is non-NULL and the new_value crosses regsion.
// Must check to see if card is already dirty
const Register thread = NOT_LP64(rax) LP64_ONLY(r15_thread);
Address queue_index(thread, in_bytes(JavaThread::dirty_card_queue_offset() +
PtrQueue::byte_offset_of_index()));
Address buffer(thread, in_bytes(JavaThread::dirty_card_queue_offset() +
PtrQueue::byte_offset_of_buf()));
__ push(rax);
__ push(rdx);
NOT_LP64(__ get_thread(thread);)
ExternalAddress cardtable((address)ct->byte_map_base);
assert(sizeof(*ct->byte_map_base) == sizeof(jbyte), "adjust this code");
const Register card_addr = rdx;
#ifdef _LP64
const Register tmp = rscratch1;
f.load_argument(0, card_addr);
__ shrq(card_addr, CardTableModRefBS::card_shift);
__ lea(tmp, cardtable);
// get the address of the card
__ addq(card_addr, tmp);
#else
const Register card_index = rdx;
f.load_argument(0, card_index);
__ shrl(card_index, CardTableModRefBS::card_shift);
Address index(noreg, card_index, Address::times_1);
__ leal(card_addr, __ as_Address(ArrayAddress(cardtable, index)));
#endif
__ cmpb(Address(card_addr, 0), 0);
__ jcc(Assembler::equal, done);
// storing region crossing non-NULL, card is clean.
// dirty card and log.
__ movb(Address(card_addr, 0), 0);
__ cmpl(queue_index, 0);
__ jcc(Assembler::equal, runtime);
__ subl(queue_index, wordSize);
const Register buffer_addr = rbx;
__ push(rbx);
__ movptr(buffer_addr, buffer);
#ifdef _LP64
__ movslq(rscratch1, queue_index);
__ addptr(buffer_addr, rscratch1);
#else
__ addptr(buffer_addr, queue_index);
#endif
__ movptr(Address(buffer_addr, 0), card_addr);
__ pop(rbx);
__ jmp(done);
__ bind(runtime);
NOT_LP64(__ push(rcx);)
__ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::g1_wb_post), card_addr, thread);
NOT_LP64(__ pop(rcx);)
__ bind(done);
__ pop(rdx);
__ pop(rax);
}
break;
#endif // !SERIALGC
default:
{ StubFrame f(sasm, "unimplemented entry", dont_gc_arguments);
__ movptr(rax, (int)id);

View File

@ -44,8 +44,13 @@ void InterpreterMacroAssembler::call_VM_leaf_base(address entry_point,
// Note: No need to save/restore bcp & locals (r13 & r14) pointer
// since these are callee saved registers and no blocking/
// GC can happen in leaf calls.
// Further Note: DO NOT save/restore bcp/locals. If a caller has
// already saved them so that it can use esi/edi as temporaries
// then a save/restore here will DESTROY the copy the caller
// saved! There used to be a save_bcp() that only happened in
// the ASSERT path (no restore_bcp). Which caused bizarre failures
// when jvm built with ASSERTs.
#ifdef ASSERT
save_bcp();
{
Label L;
cmpptr(Address(rbp, frame::interpreter_frame_last_sp_offset * wordSize), (int32_t)NULL_WORD);
@ -58,24 +63,9 @@ void InterpreterMacroAssembler::call_VM_leaf_base(address entry_point,
// super call
MacroAssembler::call_VM_leaf_base(entry_point, number_of_arguments);
// interpreter specific
#ifdef ASSERT
{
Label L;
cmpptr(r13, Address(rbp, frame::interpreter_frame_bcx_offset * wordSize));
jcc(Assembler::equal, L);
stop("InterpreterMacroAssembler::call_VM_leaf_base:"
" r13 not callee saved?");
bind(L);
}
{
Label L;
cmpptr(r14, Address(rbp, frame::interpreter_frame_locals_offset * wordSize));
jcc(Assembler::equal, L);
stop("InterpreterMacroAssembler::call_VM_leaf_base:"
" r14 not callee saved?");
bind(L);
}
#endif
// Used to ASSERT that r13/r14 were equal to frame's bcp/locals
// but since they may not have been saved (and we don't want to
// save thme here (see note above) the assert is invalid.
}
void InterpreterMacroAssembler::call_VM_base(Register oop_result,

View File

@ -712,7 +712,6 @@ class StubGenerator: public StubCodeGenerator {
// end - element count
void gen_write_ref_array_pre_barrier(Register start, Register count) {
assert_different_registers(start, count);
#if 0 // G1 only
BarrierSet* bs = Universe::heap()->barrier_set();
switch (bs->kind()) {
case BarrierSet::G1SATBCT:
@ -721,8 +720,8 @@ class StubGenerator: public StubCodeGenerator {
__ pusha(); // push registers
__ push(count);
__ push(start);
__ call(RuntimeAddress(CAST_FROM_FN_PTR(address, BarrierSet::static_write_ref_array_pre));
__ addl(esp, wordSize * 2);
__ call(RuntimeAddress(CAST_FROM_FN_PTR(address, BarrierSet::static_write_ref_array_pre)));
__ addptr(rsp, 2*wordSize);
__ popa();
}
break;
@ -734,7 +733,6 @@ class StubGenerator: public StubCodeGenerator {
ShouldNotReachHere();
}
#endif // 0 - G1 only
}
@ -750,20 +748,18 @@ class StubGenerator: public StubCodeGenerator {
BarrierSet* bs = Universe::heap()->barrier_set();
assert_different_registers(start, count);
switch (bs->kind()) {
#if 0 // G1 only
case BarrierSet::G1SATBCT:
case BarrierSet::G1SATBCTLogging:
{
__ pusha(); // push registers
__ push(count);
__ push(start);
__ call(RuntimeAddress(CAST_FROM_FN_PTR(address, BarrierSet::static_write_ref_array_post));
__ addl(esp, wordSize * 2);
__ call(RuntimeAddress(CAST_FROM_FN_PTR(address, BarrierSet::static_write_ref_array_post)));
__ addptr(rsp, 2*wordSize);
__ popa();
}
break;
#endif // 0 G1 only
case BarrierSet::CardTableModRef:
case BarrierSet::CardTableExtension:
@ -795,6 +791,69 @@ class StubGenerator: public StubCodeGenerator {
}
}
// Copy 64 bytes chunks
//
// Inputs:
// from - source array address
// to_from - destination array address - from
// qword_count - 8-bytes element count, negative
//
void xmm_copy_forward(Register from, Register to_from, Register qword_count) {
assert( UseSSE >= 2, "supported cpu only" );
Label L_copy_64_bytes_loop, L_copy_64_bytes, L_copy_8_bytes, L_exit;
// Copy 64-byte chunks
__ jmpb(L_copy_64_bytes);
__ align(16);
__ BIND(L_copy_64_bytes_loop);
if(UseUnalignedLoadStores) {
__ movdqu(xmm0, Address(from, 0));
__ movdqu(Address(from, to_from, Address::times_1, 0), xmm0);
__ movdqu(xmm1, Address(from, 16));
__ movdqu(Address(from, to_from, Address::times_1, 16), xmm1);
__ movdqu(xmm2, Address(from, 32));
__ movdqu(Address(from, to_from, Address::times_1, 32), xmm2);
__ movdqu(xmm3, Address(from, 48));
__ movdqu(Address(from, to_from, Address::times_1, 48), xmm3);
} else {
__ movq(xmm0, Address(from, 0));
__ movq(Address(from, to_from, Address::times_1, 0), xmm0);
__ movq(xmm1, Address(from, 8));
__ movq(Address(from, to_from, Address::times_1, 8), xmm1);
__ movq(xmm2, Address(from, 16));
__ movq(Address(from, to_from, Address::times_1, 16), xmm2);
__ movq(xmm3, Address(from, 24));
__ movq(Address(from, to_from, Address::times_1, 24), xmm3);
__ movq(xmm4, Address(from, 32));
__ movq(Address(from, to_from, Address::times_1, 32), xmm4);
__ movq(xmm5, Address(from, 40));
__ movq(Address(from, to_from, Address::times_1, 40), xmm5);
__ movq(xmm6, Address(from, 48));
__ movq(Address(from, to_from, Address::times_1, 48), xmm6);
__ movq(xmm7, Address(from, 56));
__ movq(Address(from, to_from, Address::times_1, 56), xmm7);
}
__ addl(from, 64);
__ BIND(L_copy_64_bytes);
__ subl(qword_count, 8);
__ jcc(Assembler::greaterEqual, L_copy_64_bytes_loop);
__ addl(qword_count, 8);
__ jccb(Assembler::zero, L_exit);
//
// length is too short, just copy qwords
//
__ BIND(L_copy_8_bytes);
__ movq(xmm0, Address(from, 0));
__ movq(Address(from, to_from, Address::times_1), xmm0);
__ addl(from, 8);
__ decrement(qword_count);
__ jcc(Assembler::greater, L_copy_8_bytes);
__ BIND(L_exit);
}
// Copy 64 bytes chunks
//
// Inputs:
@ -803,6 +862,7 @@ class StubGenerator: public StubCodeGenerator {
// qword_count - 8-bytes element count, negative
//
void mmx_copy_forward(Register from, Register to_from, Register qword_count) {
assert( VM_Version::supports_mmx(), "supported cpu only" );
Label L_copy_64_bytes_loop, L_copy_64_bytes, L_copy_8_bytes, L_exit;
// Copy 64-byte chunks
__ jmpb(L_copy_64_bytes);
@ -880,7 +940,7 @@ class StubGenerator: public StubCodeGenerator {
__ subptr(to, from); // to --> to_from
__ cmpl(count, 2<<shift); // Short arrays (< 8 bytes) copy by element
__ jcc(Assembler::below, L_copy_4_bytes); // use unsigned cmp
if (!aligned && (t == T_BYTE || t == T_SHORT)) {
if (!UseUnalignedLoadStores && !aligned && (t == T_BYTE || t == T_SHORT)) {
// align source address at 4 bytes address boundary
if (t == T_BYTE) {
// One byte misalignment happens only for byte arrays
@ -910,20 +970,26 @@ class StubGenerator: public StubCodeGenerator {
__ mov(count, rax); // restore 'count'
__ jmpb(L_copy_2_bytes); // all dwords were copied
} else {
// align to 8 bytes, we know we are 4 byte aligned to start
__ testptr(from, 4);
__ jccb(Assembler::zero, L_copy_64_bytes);
__ movl(rax, Address(from, 0));
__ movl(Address(from, to_from, Address::times_1, 0), rax);
__ addptr(from, 4);
__ subl(count, 1<<shift);
if (!UseUnalignedLoadStores) {
// align to 8 bytes, we know we are 4 byte aligned to start
__ testptr(from, 4);
__ jccb(Assembler::zero, L_copy_64_bytes);
__ movl(rax, Address(from, 0));
__ movl(Address(from, to_from, Address::times_1, 0), rax);
__ addptr(from, 4);
__ subl(count, 1<<shift);
}
__ BIND(L_copy_64_bytes);
__ mov(rax, count);
__ shrl(rax, shift+1); // 8 bytes chunk count
//
// Copy 8-byte chunks through MMX registers, 8 per iteration of the loop
//
mmx_copy_forward(from, to_from, rax);
if (UseXMMForArrayCopy) {
xmm_copy_forward(from, to_from, rax);
} else {
mmx_copy_forward(from, to_from, rax);
}
}
// copy tailing dword
__ BIND(L_copy_4_bytes);
@ -1073,13 +1139,20 @@ class StubGenerator: public StubCodeGenerator {
__ align(16);
// Move 8 bytes
__ BIND(L_copy_8_bytes_loop);
__ movq(mmx0, Address(from, count, sf, 0));
__ movq(Address(to, count, sf, 0), mmx0);
if (UseXMMForArrayCopy) {
__ movq(xmm0, Address(from, count, sf, 0));
__ movq(Address(to, count, sf, 0), xmm0);
} else {
__ movq(mmx0, Address(from, count, sf, 0));
__ movq(Address(to, count, sf, 0), mmx0);
}
__ BIND(L_copy_8_bytes);
__ subl(count, 2<<shift);
__ jcc(Assembler::greaterEqual, L_copy_8_bytes_loop);
__ addl(count, 2<<shift);
__ emms();
if (!UseXMMForArrayCopy) {
__ emms();
}
}
__ BIND(L_copy_4_bytes);
// copy prefix qword
@ -1147,7 +1220,11 @@ class StubGenerator: public StubCodeGenerator {
__ subptr(to, from); // to --> to_from
if (VM_Version::supports_mmx()) {
mmx_copy_forward(from, to_from, count);
if (UseXMMForArrayCopy) {
xmm_copy_forward(from, to_from, count);
} else {
mmx_copy_forward(from, to_from, count);
}
} else {
__ jmpb(L_copy_8_bytes);
__ align(16);
@ -1200,8 +1277,13 @@ class StubGenerator: public StubCodeGenerator {
__ align(16);
__ BIND(L_copy_8_bytes_loop);
if (VM_Version::supports_mmx()) {
__ movq(mmx0, Address(from, count, Address::times_8));
__ movq(Address(to, count, Address::times_8), mmx0);
if (UseXMMForArrayCopy) {
__ movq(xmm0, Address(from, count, Address::times_8));
__ movq(Address(to, count, Address::times_8), xmm0);
} else {
__ movq(mmx0, Address(from, count, Address::times_8));
__ movq(Address(to, count, Address::times_8), mmx0);
}
} else {
__ fild_d(Address(from, count, Address::times_8));
__ fistp_d(Address(to, count, Address::times_8));
@ -1210,7 +1292,7 @@ class StubGenerator: public StubCodeGenerator {
__ decrement(count);
__ jcc(Assembler::greaterEqual, L_copy_8_bytes_loop);
if (VM_Version::supports_mmx()) {
if (VM_Version::supports_mmx() && !UseXMMForArrayCopy) {
__ emms();
}
inc_copy_counter_np(T_LONG);
@ -1378,9 +1460,9 @@ class StubGenerator: public StubCodeGenerator {
Address elem_klass_addr(elem, oopDesc::klass_offset_in_bytes());
// Copy from low to high addresses, indexed from the end of each array.
gen_write_ref_array_pre_barrier(to, count);
__ lea(end_from, end_from_addr);
__ lea(end_to, end_to_addr);
gen_write_ref_array_pre_barrier(to, count);
assert(length == count, ""); // else fix next line:
__ negptr(count); // negate and test the length
__ jccb(Assembler::notZero, L_load_element);

View File

@ -1153,18 +1153,26 @@ class StubGenerator: public StubCodeGenerator {
// Destroy no registers!
//
void gen_write_ref_array_pre_barrier(Register addr, Register count) {
#if 0 // G1 - only
assert_different_registers(addr, c_rarg1);
assert_different_registers(count, c_rarg0);
BarrierSet* bs = Universe::heap()->barrier_set();
switch (bs->kind()) {
case BarrierSet::G1SATBCT:
case BarrierSet::G1SATBCTLogging:
{
__ pusha(); // push registers
__ movptr(c_rarg0, addr);
__ movptr(c_rarg1, count);
__ call(RuntimeAddress(BarrierSet::static_write_ref_array_pre));
if (count == c_rarg0) {
if (addr == c_rarg1) {
// exactly backwards!!
__ xchgptr(c_rarg1, c_rarg0);
} else {
__ movptr(c_rarg1, count);
__ movptr(c_rarg0, addr);
}
} else {
__ movptr(c_rarg0, addr);
__ movptr(c_rarg1, count);
}
__ call(RuntimeAddress(CAST_FROM_FN_PTR(address, BarrierSet::static_write_ref_array_pre)));
__ popa();
}
break;
@ -1172,11 +1180,10 @@ class StubGenerator: public StubCodeGenerator {
case BarrierSet::CardTableExtension:
case BarrierSet::ModRef:
break;
default :
default:
ShouldNotReachHere();
}
#endif // 0 G1 - only
}
//
@ -1193,7 +1200,6 @@ class StubGenerator: public StubCodeGenerator {
assert_different_registers(start, end, scratch);
BarrierSet* bs = Universe::heap()->barrier_set();
switch (bs->kind()) {
#if 0 // G1 - only
case BarrierSet::G1SATBCT:
case BarrierSet::G1SATBCTLogging:
@ -1206,11 +1212,10 @@ class StubGenerator: public StubCodeGenerator {
__ shrptr(scratch, LogBytesPerWord);
__ mov(c_rarg0, start);
__ mov(c_rarg1, scratch);
__ call(RuntimeAddress(CAST_FROM_FN_PTR(address, BarrierSet::static_write_ref_array_post));
__ call(RuntimeAddress(CAST_FROM_FN_PTR(address, BarrierSet::static_write_ref_array_post)));
__ popa();
}
break;
#endif // 0 G1 - only
case BarrierSet::CardTableModRef:
case BarrierSet::CardTableExtension:
{
@ -1239,8 +1244,13 @@ class StubGenerator: public StubCodeGenerator {
__ decrement(count);
__ jcc(Assembler::greaterEqual, L_loop);
}
}
}
break;
default:
ShouldNotReachHere();
}
}
// Copy big chunks forward
//
@ -1259,14 +1269,22 @@ class StubGenerator: public StubCodeGenerator {
Label L_loop;
__ align(16);
__ BIND(L_loop);
__ movq(to, Address(end_from, qword_count, Address::times_8, -24));
__ movq(Address(end_to, qword_count, Address::times_8, -24), to);
__ movq(to, Address(end_from, qword_count, Address::times_8, -16));
__ movq(Address(end_to, qword_count, Address::times_8, -16), to);
__ movq(to, Address(end_from, qword_count, Address::times_8, - 8));
__ movq(Address(end_to, qword_count, Address::times_8, - 8), to);
__ movq(to, Address(end_from, qword_count, Address::times_8, - 0));
__ movq(Address(end_to, qword_count, Address::times_8, - 0), to);
if(UseUnalignedLoadStores) {
__ movdqu(xmm0, Address(end_from, qword_count, Address::times_8, -24));
__ movdqu(Address(end_to, qword_count, Address::times_8, -24), xmm0);
__ movdqu(xmm1, Address(end_from, qword_count, Address::times_8, - 8));
__ movdqu(Address(end_to, qword_count, Address::times_8, - 8), xmm1);
} else {
__ movq(to, Address(end_from, qword_count, Address::times_8, -24));
__ movq(Address(end_to, qword_count, Address::times_8, -24), to);
__ movq(to, Address(end_from, qword_count, Address::times_8, -16));
__ movq(Address(end_to, qword_count, Address::times_8, -16), to);
__ movq(to, Address(end_from, qword_count, Address::times_8, - 8));
__ movq(Address(end_to, qword_count, Address::times_8, - 8), to);
__ movq(to, Address(end_from, qword_count, Address::times_8, - 0));
__ movq(Address(end_to, qword_count, Address::times_8, - 0), to);
}
__ BIND(L_copy_32_bytes);
__ addptr(qword_count, 4);
__ jcc(Assembler::lessEqual, L_loop);
@ -1292,14 +1310,22 @@ class StubGenerator: public StubCodeGenerator {
Label L_loop;
__ align(16);
__ BIND(L_loop);
__ movq(to, Address(from, qword_count, Address::times_8, 24));
__ movq(Address(dest, qword_count, Address::times_8, 24), to);
__ movq(to, Address(from, qword_count, Address::times_8, 16));
__ movq(Address(dest, qword_count, Address::times_8, 16), to);
__ movq(to, Address(from, qword_count, Address::times_8, 8));
__ movq(Address(dest, qword_count, Address::times_8, 8), to);
__ movq(to, Address(from, qword_count, Address::times_8, 0));
__ movq(Address(dest, qword_count, Address::times_8, 0), to);
if(UseUnalignedLoadStores) {
__ movdqu(xmm0, Address(from, qword_count, Address::times_8, 16));
__ movdqu(Address(dest, qword_count, Address::times_8, 16), xmm0);
__ movdqu(xmm1, Address(from, qword_count, Address::times_8, 0));
__ movdqu(Address(dest, qword_count, Address::times_8, 0), xmm1);
} else {
__ movq(to, Address(from, qword_count, Address::times_8, 24));
__ movq(Address(dest, qword_count, Address::times_8, 24), to);
__ movq(to, Address(from, qword_count, Address::times_8, 16));
__ movq(Address(dest, qword_count, Address::times_8, 16), to);
__ movq(to, Address(from, qword_count, Address::times_8, 8));
__ movq(Address(dest, qword_count, Address::times_8, 8), to);
__ movq(to, Address(from, qword_count, Address::times_8, 0));
__ movq(Address(dest, qword_count, Address::times_8, 0), to);
}
__ BIND(L_copy_32_bytes);
__ subptr(qword_count, 4);
__ jcc(Assembler::greaterEqual, L_loop);
@ -2282,7 +2308,7 @@ class StubGenerator: public StubCodeGenerator {
// and report their number to the caller.
assert_different_registers(rax, r14_length, count, to, end_to, rcx);
__ lea(end_to, to_element_addr);
gen_write_ref_array_post_barrier(to, end_to, rcx);
gen_write_ref_array_post_barrier(to, end_to, rscratch1);
__ movptr(rax, r14_length); // original oops
__ addptr(rax, count); // K = (original - remaining) oops
__ notptr(rax); // report (-1^K) to caller
@ -2291,7 +2317,7 @@ class StubGenerator: public StubCodeGenerator {
// Come here on success only.
__ BIND(L_do_card_marks);
__ addptr(end_to, -wordSize); // make an inclusive end pointer
gen_write_ref_array_post_barrier(to, end_to, rcx);
gen_write_ref_array_post_barrier(to, end_to, rscratch1);
__ xorptr(rax, rax); // return 0 on success
// Common exit point (success or failure).

View File

@ -107,6 +107,78 @@ static Assembler::Condition j_not(TemplateTable::Condition cc) {
//----------------------------------------------------------------------------------------------------
// Miscelaneous helper routines
// Store an oop (or NULL) at the address described by obj.
// If val == noreg this means store a NULL
static void do_oop_store(InterpreterMacroAssembler* _masm,
Address obj,
Register val,
BarrierSet::Name barrier,
bool precise) {
assert(val == noreg || val == rax, "parameter is just for looks");
switch (barrier) {
#ifndef SERIALGC
case BarrierSet::G1SATBCT:
case BarrierSet::G1SATBCTLogging:
{
// flatten object address if needed
// We do it regardless of precise because we need the registers
if (obj.index() == noreg && obj.disp() == 0) {
if (obj.base() != rdx) {
__ movl(rdx, obj.base());
}
} else {
__ leal(rdx, obj);
}
__ get_thread(rcx);
__ save_bcp();
__ g1_write_barrier_pre(rdx, rcx, rsi, rbx, val != noreg);
// Do the actual store
// noreg means NULL
if (val == noreg) {
__ movl(Address(rdx, 0), NULL_WORD);
// No post barrier for NULL
} else {
__ movl(Address(rdx, 0), val);
__ g1_write_barrier_post(rdx, rax, rcx, rbx, rsi);
}
__ restore_bcp();
}
break;
#endif // SERIALGC
case BarrierSet::CardTableModRef:
case BarrierSet::CardTableExtension:
{
if (val == noreg) {
__ movl(obj, NULL_WORD);
} else {
__ movl(obj, val);
// flatten object address if needed
if (!precise || (obj.index() == noreg && obj.disp() == 0)) {
__ store_check(obj.base());
} else {
__ leal(rdx, obj);
__ store_check(rdx);
}
}
}
break;
case BarrierSet::ModRef:
case BarrierSet::Other:
if (val == noreg) {
__ movl(obj, NULL_WORD);
} else {
__ movl(obj, val);
}
break;
default :
ShouldNotReachHere();
}
}
Address TemplateTable::at_bcp(int offset) {
assert(_desc->uses_bcp(), "inconsistent uses_bcp information");
return Address(rsi, offset);
@ -876,6 +948,8 @@ void TemplateTable::aastore() {
__ movptr(rax, at_tos()); // Value
__ movl(rcx, at_tos_p1()); // Index
__ movptr(rdx, at_tos_p2()); // Array
Address element_address(rdx, rcx, Address::times_4, arrayOopDesc::base_offset_in_bytes(T_OBJECT));
index_check_without_pop(rdx, rcx); // kills rbx,
// do array store check - check for NULL value first
__ testptr(rax, rax);
@ -887,7 +961,7 @@ void TemplateTable::aastore() {
__ movptr(rax, Address(rdx, oopDesc::klass_offset_in_bytes()));
__ movptr(rax, Address(rax, sizeof(oopDesc) + objArrayKlass::element_klass_offset_in_bytes()));
// Compress array+index*wordSize+12 into a single register. Frees ECX.
__ lea(rdx, Address(rdx, rcx, Address::times_ptr, arrayOopDesc::base_offset_in_bytes(T_OBJECT)));
__ lea(rdx, element_address);
// Generate subtype check. Blows ECX. Resets EDI to locals.
// Superklass in EAX. Subklass in EBX.
@ -899,15 +973,20 @@ void TemplateTable::aastore() {
// Come here on success
__ bind(ok_is_subtype);
__ movptr(rax, at_rsp()); // Value
__ movptr(Address(rdx, 0), rax);
__ store_check(rdx);
__ jmpb(done);
// Get the value to store
__ movptr(rax, at_rsp());
// and store it with appropriate barrier
do_oop_store(_masm, Address(rdx, 0), rax, _bs->kind(), true);
__ jmp(done);
// Have a NULL in EAX, EDX=array, ECX=index. Store NULL at ary[idx]
__ bind(is_null);
__ profile_null_seen(rbx);
__ movptr(Address(rdx, rcx, Address::times_ptr, arrayOopDesc::base_offset_in_bytes(T_OBJECT)), rax);
// Store NULL, (noreg means NULL to do_oop_store)
do_oop_store(_masm, element_address, noreg, _bs->kind(), true);
// Pop stack arguments
__ bind(done);
@ -1515,7 +1594,7 @@ void TemplateTable::branch(bool is_jsr, bool is_wide) {
// compute return address as bci in rax,
__ lea(rax, at_bcp((is_wide ? 5 : 3) - in_bytes(constMethodOopDesc::codes_offset())));
__ subptr(rax, Address(rcx, methodOopDesc::const_offset()));
// Adjust the bcp in ESI by the displacement in EDX
// Adjust the bcp in RSI by the displacement in EDX
__ addptr(rsi, rdx);
// Push return address
__ push_i(rax);
@ -1526,7 +1605,7 @@ void TemplateTable::branch(bool is_jsr, bool is_wide) {
// Normal (non-jsr) branch handling
// Adjust the bcp in ESI by the displacement in EDX
// Adjust the bcp in RSI by the displacement in EDX
__ addptr(rsi, rdx);
assert(UseLoopCounter || !UseOnStackReplacement, "on-stack-replacement requires loop counters");
@ -2439,11 +2518,12 @@ void TemplateTable::putfield_or_static(int byte_no, bool is_static) {
__ pop(atos);
if (!is_static) pop_and_check_object(obj);
__ movptr(lo, rax );
__ store_check(obj, lo); // Need to mark card
do_oop_store(_masm, lo, rax, _bs->kind(), false);
if (!is_static) {
patch_bytecode(Bytecodes::_fast_aputfield, rcx, rbx);
}
__ jmp(Done);
__ bind(notObj);
@ -2664,7 +2744,10 @@ void TemplateTable::fast_storefield(TosState state) {
break;
case Bytecodes::_fast_fputfield: __ fstp_s(lo); break;
case Bytecodes::_fast_dputfield: __ fstp_d(lo); break;
case Bytecodes::_fast_aputfield: __ movptr(lo, rax); __ store_check(rcx, lo); break;
case Bytecodes::_fast_aputfield: {
do_oop_store(_masm, lo, rax, _bs->kind(), false);
break;
}
default:
ShouldNotReachHere();
}
@ -2672,7 +2755,8 @@ void TemplateTable::fast_storefield(TosState state) {
Label done;
volatile_barrier(Assembler::Membar_mask_bits(Assembler::StoreLoad |
Assembler::StoreStore));
__ jmpb(done);
// Barriers are so large that short branch doesn't reach!
__ jmp(done);
// Same code as above, but don't need rdx to test for volatile.
__ bind(notVolatile);
@ -2694,7 +2778,10 @@ void TemplateTable::fast_storefield(TosState state) {
break;
case Bytecodes::_fast_fputfield: __ fstp_s(lo); break;
case Bytecodes::_fast_dputfield: __ fstp_d(lo); break;
case Bytecodes::_fast_aputfield: __ movptr(lo, rax); __ store_check(rcx, lo); break;
case Bytecodes::_fast_aputfield: {
do_oop_store(_masm, lo, rax, _bs->kind(), false);
break;
}
default:
ShouldNotReachHere();
}
@ -3054,8 +3141,6 @@ void TemplateTable::_new() {
Label initialize_object; // including clearing the fields
Label allocate_shared;
ExternalAddress heap_top((address)Universe::heap()->top_addr());
__ get_cpool_and_tags(rcx, rax);
// get instanceKlass
__ movptr(rcx, Address(rcx, rdx, Address::times_ptr, sizeof(constantPoolOopDesc)));
@ -3112,6 +3197,8 @@ void TemplateTable::_new() {
if (allow_shared_alloc) {
__ bind(allocate_shared);
ExternalAddress heap_top((address)Universe::heap()->top_addr());
Label retry;
__ bind(retry);
__ movptr(rax, heap_top);

View File

@ -115,6 +115,69 @@ static Assembler::Condition j_not(TemplateTable::Condition cc) {
// Miscelaneous helper routines
// Store an oop (or NULL) at the address described by obj.
// If val == noreg this means store a NULL
static void do_oop_store(InterpreterMacroAssembler* _masm,
Address obj,
Register val,
BarrierSet::Name barrier,
bool precise) {
assert(val == noreg || val == rax, "parameter is just for looks");
switch (barrier) {
#ifndef SERIALGC
case BarrierSet::G1SATBCT:
case BarrierSet::G1SATBCTLogging:
{
// flatten object address if needed
if (obj.index() == noreg && obj.disp() == 0) {
if (obj.base() != rdx) {
__ movq(rdx, obj.base());
}
} else {
__ leaq(rdx, obj);
}
__ g1_write_barrier_pre(rdx, r8, rbx, val != noreg);
if (val == noreg) {
__ store_heap_oop(Address(rdx, 0), NULL_WORD);
} else {
__ store_heap_oop(Address(rdx, 0), val);
__ g1_write_barrier_post(rdx, val, r8, rbx);
}
}
break;
#endif // SERIALGC
case BarrierSet::CardTableModRef:
case BarrierSet::CardTableExtension:
{
if (val == noreg) {
__ store_heap_oop(obj, NULL_WORD);
} else {
__ store_heap_oop(obj, val);
// flatten object address if needed
if (!precise || (obj.index() == noreg && obj.disp() == 0)) {
__ store_check(obj.base());
} else {
__ leaq(rdx, obj);
__ store_check(rdx);
}
}
}
break;
case BarrierSet::ModRef:
case BarrierSet::Other:
if (val == noreg) {
__ store_heap_oop(obj, NULL_WORD);
} else {
__ store_heap_oop(obj, val);
}
break;
default :
ShouldNotReachHere();
}
}
Address TemplateTable::at_bcp(int offset) {
assert(_desc->uses_bcp(), "inconsistent uses_bcp information");
@ -560,8 +623,8 @@ void TemplateTable::aaload() {
// rdx: array
index_check(rdx, rax); // kills rbx
__ load_heap_oop(rax, Address(rdx, rax,
UseCompressedOops ? Address::times_4 : Address::times_8,
arrayOopDesc::base_offset_in_bytes(T_OBJECT)));
UseCompressedOops ? Address::times_4 : Address::times_8,
arrayOopDesc::base_offset_in_bytes(T_OBJECT)));
}
void TemplateTable::baload() {
@ -866,6 +929,11 @@ void TemplateTable::aastore() {
__ movptr(rax, at_tos()); // value
__ movl(rcx, at_tos_p1()); // index
__ movptr(rdx, at_tos_p2()); // array
Address element_address(rdx, rcx,
UseCompressedOops? Address::times_4 : Address::times_8,
arrayOopDesc::base_offset_in_bytes(T_OBJECT));
index_check(rdx, rcx); // kills rbx
// do array store check - check for NULL value first
__ testptr(rax, rax);
@ -879,9 +947,7 @@ void TemplateTable::aastore() {
sizeof(oopDesc) +
objArrayKlass::element_klass_offset_in_bytes()));
// Compress array + index*oopSize + 12 into a single register. Frees rcx.
__ lea(rdx, Address(rdx, rcx,
UseCompressedOops ? Address::times_4 : Address::times_8,
arrayOopDesc::base_offset_in_bytes(T_OBJECT)));
__ lea(rdx, element_address);
// Generate subtype check. Blows rcx, rdi
// Superklass in rax. Subklass in rbx.
@ -893,18 +959,19 @@ void TemplateTable::aastore() {
// Come here on success
__ bind(ok_is_subtype);
__ movptr(rax, at_tos()); // Value
__ store_heap_oop(Address(rdx, 0), rax);
__ store_check(rdx);
// Get the value we will store
__ movptr(rax, at_tos());
// Now store using the appropriate barrier
do_oop_store(_masm, Address(rdx, 0), rax, _bs->kind(), true);
__ jmp(done);
// Have a NULL in rax, rdx=array, ecx=index. Store NULL at ary[idx]
__ bind(is_null);
__ profile_null_seen(rbx);
__ store_heap_oop(Address(rdx, rcx,
UseCompressedOops ? Address::times_4 : Address::times_8,
arrayOopDesc::base_offset_in_bytes(T_OBJECT)),
rax);
// Store a NULL
do_oop_store(_masm, element_address, noreg, _bs->kind(), true);
// Pop stack arguments
__ bind(done);
@ -2396,8 +2463,10 @@ void TemplateTable::putfield_or_static(int byte_no, bool is_static) {
// atos
__ pop(atos);
if (!is_static) pop_and_check_object(obj);
__ store_heap_oop(field, rax);
__ store_check(obj, field); // Need to mark card
// Store into the field
do_oop_store(_masm, field, rax, _bs->kind(), false);
if (!is_static) {
patch_bytecode(Bytecodes::_fast_aputfield, bc, rbx);
}
@ -2584,8 +2653,7 @@ void TemplateTable::fast_storefield(TosState state) {
// access field
switch (bytecode()) {
case Bytecodes::_fast_aputfield:
__ store_heap_oop(field, rax);
__ store_check(rcx, field);
do_oop_store(_masm, field, rax, _bs->kind(), false);
break;
case Bytecodes::_fast_lputfield:
__ movq(field, rax);
@ -3044,8 +3112,6 @@ void TemplateTable::_new() {
Label initialize_header;
Label initialize_object; // including clearing the fields
Label allocate_shared;
ExternalAddress top((address)Universe::heap()->top_addr());
ExternalAddress end((address)Universe::heap()->end_addr());
__ get_cpool_and_tags(rsi, rax);
// get instanceKlass
@ -3106,6 +3172,9 @@ void TemplateTable::_new() {
if (allow_shared_alloc) {
__ bind(allocate_shared);
ExternalAddress top((address)Universe::heap()->top_addr());
ExternalAddress end((address)Universe::heap()->end_addr());
const Register RtopAddr = rscratch1;
const Register RendAddr = rscratch2;

View File

@ -242,9 +242,11 @@ void VM_Version::get_processor_features() {
_supports_cx8 = supports_cmpxchg8();
// if the OS doesn't support SSE, we can't use this feature even if the HW does
if( !os::supports_sse())
_cpuFeatures &= ~(CPU_SSE|CPU_SSE2|CPU_SSE3|CPU_SSSE3|CPU_SSE4|CPU_SSE4A);
if (UseSSE < 4)
_cpuFeatures &= ~CPU_SSE4;
_cpuFeatures &= ~(CPU_SSE|CPU_SSE2|CPU_SSE3|CPU_SSSE3|CPU_SSE4A|CPU_SSE4_1|CPU_SSE4_2);
if (UseSSE < 4) {
_cpuFeatures &= ~CPU_SSE4_1;
_cpuFeatures &= ~CPU_SSE4_2;
}
if (UseSSE < 3) {
_cpuFeatures &= ~CPU_SSE3;
_cpuFeatures &= ~CPU_SSSE3;
@ -261,7 +263,7 @@ void VM_Version::get_processor_features() {
}
char buf[256];
jio_snprintf(buf, sizeof(buf), "(%u cores per cpu, %u threads per core) family %d model %d stepping %d%s%s%s%s%s%s%s%s%s%s%s%s%s%s",
jio_snprintf(buf, sizeof(buf), "(%u cores per cpu, %u threads per core) family %d model %d stepping %d%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s",
cores_per_cpu(), threads_per_core(),
cpu_family(), _model, _stepping,
(supports_cmov() ? ", cmov" : ""),
@ -272,7 +274,8 @@ void VM_Version::get_processor_features() {
(supports_sse2() ? ", sse2" : ""),
(supports_sse3() ? ", sse3" : ""),
(supports_ssse3()? ", ssse3": ""),
(supports_sse4() ? ", sse4" : ""),
(supports_sse4_1() ? ", sse4.1" : ""),
(supports_sse4_2() ? ", sse4.2" : ""),
(supports_mmx_ext() ? ", mmxext" : ""),
(supports_3dnow() ? ", 3dnow" : ""),
(supports_3dnow2() ? ", 3dnowext" : ""),
@ -285,7 +288,7 @@ void VM_Version::get_processor_features() {
// older Pentiums which do not support it.
if( UseSSE > 4 ) UseSSE=4;
if( UseSSE < 0 ) UseSSE=0;
if( !supports_sse4() ) // Drop to 3 if no SSE4 support
if( !supports_sse4_1() ) // Drop to 3 if no SSE4 support
UseSSE = MIN2((intx)3,UseSSE);
if( !supports_sse3() ) // Drop to 2 if no SSE3 support
UseSSE = MIN2((intx)2,UseSSE);
@ -375,6 +378,14 @@ void VM_Version::get_processor_features() {
MaxLoopPad = 11;
}
#endif // COMPILER2
if( FLAG_IS_DEFAULT(UseXMMForArrayCopy) ) {
UseXMMForArrayCopy = true; // use SSE2 movq on new Intel cpus
}
if( supports_sse4_2() && supports_ht() ) { // Newest Intel cpus
if( FLAG_IS_DEFAULT(UseUnalignedLoadStores) && UseXMMForArrayCopy ) {
UseUnalignedLoadStores = true; // use movdqu on newest Intel cpus
}
}
}
}
@ -413,7 +424,7 @@ void VM_Version::get_processor_features() {
#ifndef PRODUCT
if (PrintMiscellaneous && Verbose) {
tty->print_cr("Logical CPUs per package: %u",
tty->print_cr("Logical CPUs per core: %u",
logical_processors_per_package());
tty->print_cr("UseSSE=%d",UseSSE);
tty->print("Allocation: ");

View File

@ -68,9 +68,9 @@ public:
cmpxchg16: 1,
: 4,
dca : 1,
: 4,
popcnt : 1,
: 8;
sse4_1 : 1,
sse4_2 : 1,
: 11;
} bits;
};
@ -177,8 +177,9 @@ protected:
CPU_SSE2 = (1 << 7),
CPU_SSE3 = (1 << 8), // sse3 comes from cpuid 1 (ECX)
CPU_SSSE3= (1 << 9),
CPU_SSE4 = (1 <<10),
CPU_SSE4A= (1 <<11)
CPU_SSE4A= (1 <<10),
CPU_SSE4_1 = (1 << 11),
CPU_SSE4_2 = (1 << 12)
} cpuFeatureFlags;
// cpuid information block. All info derived from executing cpuid with
@ -240,22 +241,14 @@ protected:
static CpuidInfo _cpuid_info;
// Extractors and predicates
static bool is_extended_cpu_family() {
const uint32_t Extended_Cpu_Family = 0xf;
return _cpuid_info.std_cpuid1_rax.bits.family == Extended_Cpu_Family;
}
static uint32_t extended_cpu_family() {
uint32_t result = _cpuid_info.std_cpuid1_rax.bits.family;
if (is_extended_cpu_family()) {
result += _cpuid_info.std_cpuid1_rax.bits.ext_family;
}
result += _cpuid_info.std_cpuid1_rax.bits.ext_family;
return result;
}
static uint32_t extended_cpu_model() {
uint32_t result = _cpuid_info.std_cpuid1_rax.bits.model;
if (is_extended_cpu_family()) {
result |= _cpuid_info.std_cpuid1_rax.bits.ext_model << 4;
}
result |= _cpuid_info.std_cpuid1_rax.bits.ext_model << 4;
return result;
}
static uint32_t cpu_stepping() {
@ -293,6 +286,10 @@ protected:
result |= CPU_SSSE3;
if (is_amd() && _cpuid_info.ext_cpuid1_rcx.bits.sse4a != 0)
result |= CPU_SSE4A;
if (_cpuid_info.std_cpuid1_rcx.bits.sse4_1 != 0)
result |= CPU_SSE4_1;
if (_cpuid_info.std_cpuid1_rcx.bits.sse4_2 != 0)
result |= CPU_SSE4_2;
return result;
}
@ -380,7 +377,8 @@ public:
static bool supports_sse2() { return (_cpuFeatures & CPU_SSE2) != 0; }
static bool supports_sse3() { return (_cpuFeatures & CPU_SSE3) != 0; }
static bool supports_ssse3() { return (_cpuFeatures & CPU_SSSE3)!= 0; }
static bool supports_sse4() { return (_cpuFeatures & CPU_SSE4) != 0; }
static bool supports_sse4_1() { return (_cpuFeatures & CPU_SSE4_1) != 0; }
static bool supports_sse4_2() { return (_cpuFeatures & CPU_SSE4_2) != 0; }
//
// AMD features
//

View File

@ -186,8 +186,10 @@ void VM_Version::get_processor_features() {
if (!VM_Version::supports_sse2()) {
vm_exit_during_initialization("Unknown x64 processor: SSE2 not supported");
}
if (UseSSE < 4)
_cpuFeatures &= ~CPU_SSE4;
if (UseSSE < 4) {
_cpuFeatures &= ~CPU_SSE4_1;
_cpuFeatures &= ~CPU_SSE4_2;
}
if (UseSSE < 3) {
_cpuFeatures &= ~CPU_SSE3;
_cpuFeatures &= ~CPU_SSSE3;
@ -204,7 +206,7 @@ void VM_Version::get_processor_features() {
}
char buf[256];
jio_snprintf(buf, sizeof(buf), "(%u cores per cpu, %u threads per core) family %d model %d stepping %d%s%s%s%s%s%s%s%s%s%s%s%s%s%s",
jio_snprintf(buf, sizeof(buf), "(%u cores per cpu, %u threads per core) family %d model %d stepping %d%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s",
cores_per_cpu(), threads_per_core(),
cpu_family(), _model, _stepping,
(supports_cmov() ? ", cmov" : ""),
@ -215,7 +217,8 @@ void VM_Version::get_processor_features() {
(supports_sse2() ? ", sse2" : ""),
(supports_sse3() ? ", sse3" : ""),
(supports_ssse3()? ", ssse3": ""),
(supports_sse4() ? ", sse4" : ""),
(supports_sse4_1() ? ", sse4.1" : ""),
(supports_sse4_2() ? ", sse4.2" : ""),
(supports_mmx_ext() ? ", mmxext" : ""),
(supports_3dnow() ? ", 3dnow" : ""),
(supports_3dnow2() ? ", 3dnowext" : ""),
@ -228,7 +231,7 @@ void VM_Version::get_processor_features() {
// older Pentiums which do not support it.
if( UseSSE > 4 ) UseSSE=4;
if( UseSSE < 0 ) UseSSE=0;
if( !supports_sse4() ) // Drop to 3 if no SSE4 support
if( !supports_sse4_1() ) // Drop to 3 if no SSE4 support
UseSSE = MIN2((intx)3,UseSSE);
if( !supports_sse3() ) // Drop to 2 if no SSE3 support
UseSSE = MIN2((intx)2,UseSSE);
@ -314,6 +317,14 @@ void VM_Version::get_processor_features() {
MaxLoopPad = 11;
}
#endif // COMPILER2
if( FLAG_IS_DEFAULT(UseXMMForArrayCopy) ) {
UseXMMForArrayCopy = true; // use SSE2 movq on new Intel cpus
}
if( supports_sse4_2() && supports_ht() ) { // Newest Intel cpus
if( FLAG_IS_DEFAULT(UseUnalignedLoadStores) && UseXMMForArrayCopy ) {
UseUnalignedLoadStores = true; // use movdqu on newest Intel cpus
}
}
}
}
@ -355,7 +366,7 @@ void VM_Version::get_processor_features() {
#ifndef PRODUCT
if (PrintMiscellaneous && Verbose) {
tty->print_cr("Logical CPUs per package: %u",
tty->print_cr("Logical CPUs per core: %u",
logical_processors_per_package());
tty->print_cr("UseSSE=%d",UseSSE);
tty->print("Allocation: ");

View File

@ -68,9 +68,9 @@ public:
cmpxchg16: 1,
: 4,
dca : 1,
: 4,
popcnt : 1,
: 8;
sse4_1 : 1,
sse4_2 : 1,
: 11;
} bits;
};
@ -177,8 +177,9 @@ protected:
CPU_SSE2 = (1 << 7),
CPU_SSE3 = (1 << 8),
CPU_SSSE3= (1 << 9),
CPU_SSE4 = (1 <<10),
CPU_SSE4A= (1 <<11)
CPU_SSE4A= (1 <<10),
CPU_SSE4_1 = (1 << 11),
CPU_SSE4_2 = (1 << 12)
} cpuFeatureFlags;
// cpuid information block. All info derived from executing cpuid with
@ -240,22 +241,14 @@ protected:
static CpuidInfo _cpuid_info;
// Extractors and predicates
static bool is_extended_cpu_family() {
const uint32_t Extended_Cpu_Family = 0xf;
return _cpuid_info.std_cpuid1_eax.bits.family == Extended_Cpu_Family;
}
static uint32_t extended_cpu_family() {
uint32_t result = _cpuid_info.std_cpuid1_eax.bits.family;
if (is_extended_cpu_family()) {
result += _cpuid_info.std_cpuid1_eax.bits.ext_family;
}
result += _cpuid_info.std_cpuid1_eax.bits.ext_family;
return result;
}
static uint32_t extended_cpu_model() {
uint32_t result = _cpuid_info.std_cpuid1_eax.bits.model;
if (is_extended_cpu_family()) {
result |= _cpuid_info.std_cpuid1_eax.bits.ext_model << 4;
}
result |= _cpuid_info.std_cpuid1_eax.bits.ext_model << 4;
return result;
}
static uint32_t cpu_stepping() {
@ -293,6 +286,10 @@ protected:
result |= CPU_SSSE3;
if (is_amd() && _cpuid_info.ext_cpuid1_ecx.bits.sse4a != 0)
result |= CPU_SSE4A;
if (_cpuid_info.std_cpuid1_ecx.bits.sse4_1 != 0)
result |= CPU_SSE4_1;
if (_cpuid_info.std_cpuid1_ecx.bits.sse4_2 != 0)
result |= CPU_SSE4_2;
return result;
}
@ -380,7 +377,8 @@ public:
static bool supports_sse2() { return (_cpuFeatures & CPU_SSE2) != 0; }
static bool supports_sse3() { return (_cpuFeatures & CPU_SSE3) != 0; }
static bool supports_ssse3() { return (_cpuFeatures & CPU_SSSE3)!= 0; }
static bool supports_sse4() { return (_cpuFeatures & CPU_SSE4) != 0; }
static bool supports_sse4_1() { return (_cpuFeatures & CPU_SSE4_1) != 0; }
static bool supports_sse4_2() { return (_cpuFeatures & CPU_SSE4_2) != 0; }
//
// AMD features
//

View File

@ -4810,6 +4810,16 @@ operand immL0() %{
interface(CONST_INTER);
%}
// Long Immediate zero
operand immL_M1() %{
predicate( n->get_long() == -1L );
match(ConL);
op_cost(0);
format %{ %}
interface(CONST_INTER);
%}
// Long immediate from 0 to 127.
// Used for a shorter form of long mul by 10.
operand immL_127() %{
@ -8621,6 +8631,18 @@ instruct xorI_eReg(eRegI dst, eRegI src, eFlagsReg cr) %{
ins_pipe( ialu_reg_reg );
%}
// Xor Register with Immediate -1
instruct xorI_eReg_im1(eRegI dst, immI_M1 imm) %{
match(Set dst (XorI dst imm));
size(2);
format %{ "NOT $dst" %}
ins_encode %{
__ notl($dst$$Register);
%}
ins_pipe( ialu_reg );
%}
// Xor Register with Immediate
instruct xorI_eReg_imm(eRegI dst, immI src, eFlagsReg cr) %{
match(Set dst (XorI dst src));
@ -8938,6 +8960,18 @@ instruct xorl_eReg(eRegL dst, eRegL src, eFlagsReg cr) %{
ins_pipe( ialu_reg_reg_long );
%}
// Xor Long Register with Immediate -1
instruct xorl_eReg_im1(eRegL dst, immL_M1 imm) %{
match(Set dst (XorL dst imm));
format %{ "NOT $dst.lo\n\t"
"NOT $dst.hi" %}
ins_encode %{
__ notl($dst$$Register);
__ notl(HIGH_FROM_LOW($dst$$Register));
%}
ins_pipe( ialu_reg_long );
%}
// Xor Long Register with Immediate
instruct xorl_eReg_imm(eRegL dst, immL src, eFlagsReg cr) %{
match(Set dst (XorL dst src));

View File

@ -9309,6 +9309,17 @@ instruct xorI_rReg(rRegI dst, rRegI src, rFlagsReg cr)
ins_pipe(ialu_reg_reg);
%}
// Xor Register with Immediate -1
instruct xorI_rReg_im1(rRegI dst, immI_M1 imm) %{
match(Set dst (XorI dst imm));
format %{ "not $dst" %}
ins_encode %{
__ notl($dst$$Register);
%}
ins_pipe(ialu_reg);
%}
// Xor Register with Immediate
instruct xorI_rReg_imm(rRegI dst, immI src, rFlagsReg cr)
%{
@ -9529,6 +9540,17 @@ instruct xorL_rReg(rRegL dst, rRegL src, rFlagsReg cr)
ins_pipe(ialu_reg_reg);
%}
// Xor Register with Immediate -1
instruct xorL_rReg_im1(rRegL dst, immL_M1 imm) %{
match(Set dst (XorL dst imm));
format %{ "notq $dst" %}
ins_encode %{
__ notq($dst$$Register);
%}
ins_pipe(ialu_reg);
%}
// Xor Register with Immediate
instruct xorL_rReg_imm(rRegL dst, immL32 src, rFlagsReg cr)
%{

View File

@ -1110,7 +1110,7 @@ static jstring getPlatformEncoding(JNIEnv *env) {
if (propname) {
jclass cls;
jmethodID mid;
NULL_CHECK0 (cls = (*env)->FindClass(env, "java/lang/System"));
NULL_CHECK0 (cls = FindBootStrapClass(env, "java/lang/System"));
NULL_CHECK0 (mid = (*env)->GetStaticMethodID(
env, cls,
"getProperty",
@ -1125,7 +1125,7 @@ static jstring getPlatformEncoding(JNIEnv *env) {
static jboolean isEncodingSupported(JNIEnv *env, jstring enc) {
jclass cls;
jmethodID mid;
NULL_CHECK0 (cls = (*env)->FindClass(env, "java/nio/charset/Charset"));
NULL_CHECK0 (cls = FindBootStrapClass(env, "java/nio/charset/Charset"));
NULL_CHECK0 (mid = (*env)->GetStaticMethodID(
env, cls,
"isSupported",
@ -1161,7 +1161,7 @@ NewPlatformString(JNIEnv *env, char *s)
#else
if (isEncodingSupported(env, enc) == JNI_TRUE) {
#endif
NULL_CHECK0(cls = (*env)->FindClass(env, "java/lang/String"));
NULL_CHECK0(cls = FindBootStrapClass(env, "java/lang/String"));
NULL_CHECK0(mid = (*env)->GetMethodID(env, cls, "<init>",
"([BLjava/lang/String;)V"));
str = (*env)->NewObject(env, cls, mid, ary, enc);
@ -1172,7 +1172,7 @@ NewPlatformString(JNIEnv *env, char *s)
the encoding name, in which the StringCoding class will
pickup the iso-8859-1 as the fallback converter for us.
*/
NULL_CHECK0(cls = (*env)->FindClass(env, "java/lang/String"));
NULL_CHECK0(cls = FindBootStrapClass(env, "java/lang/String"));
NULL_CHECK0(mid = (*env)->GetMethodID(env, cls, "<init>",
"([B)V"));
str = (*env)->NewObject(env, cls, mid, ary);
@ -1195,7 +1195,7 @@ NewPlatformStringArray(JNIEnv *env, char **strv, int strc)
jarray ary;
int i;
NULL_CHECK0(cls = (*env)->FindClass(env, "java/lang/String"));
NULL_CHECK0(cls = FindBootStrapClass(env, "java/lang/String"));
NULL_CHECK0(ary = (*env)->NewObjectArray(env, strc, cls, 0));
for (i = 0; i < strc; i++) {
jstring str = NewPlatformString(env, *strv++);
@ -1224,6 +1224,7 @@ LoadClass(JNIEnv *env, char *name)
c = *t++;
*s++ = (c == '.') ? '/' : c;
} while (c != '\0');
// use the application class loader for main-class
cls = (*env)->FindClass(env, buf);
free(buf);
@ -1250,7 +1251,7 @@ GetMainClassName(JNIEnv *env, char *jarname)
jobject jar, man, attr;
jstring str, result = 0;
NULL_CHECK0(cls = (*env)->FindClass(env, "java/util/jar/JarFile"));
NULL_CHECK0(cls = FindBootStrapClass(env, "java/util/jar/JarFile"));
NULL_CHECK0(mid = (*env)->GetMethodID(env, cls, "<init>",
"(Ljava/lang/String;)V"));
NULL_CHECK0(str = NewPlatformString(env, jarname));
@ -1471,7 +1472,7 @@ PrintJavaVersion(JNIEnv *env)
jclass ver;
jmethodID print;
NULL_CHECK(ver = (*env)->FindClass(env, "sun/misc/Version"));
NULL_CHECK(ver = FindBootStrapClass(env, "sun/misc/Version"));
NULL_CHECK(print = (*env)->GetStaticMethodID(env, ver, "print", "()V"));
(*env)->CallStaticVoidMethod(env, ver, print);

View File

@ -100,5 +100,15 @@ void* MemAlloc(size_t size);
* Make launcher spit debug output.
*/
extern jboolean _launcher_debug;
/*
* This allows for finding classes from the VM's bootstrap class loader
* directly, FindClass uses the application class loader internally, this will
* cause unnecessary searching of the classpath for the required classes.
*/
typedef jclass (JNICALL FindClassFromBootLoader_t(JNIEnv *env,
const char *name,
jboolean throwError));
jclass FindBootStrapClass(JNIEnv *env, const char *classname);
#endif /* _JAVA_H_ */

View File

@ -1826,3 +1826,23 @@ UnsetEnv(char *name)
{
return(borrowed_unsetenv(name));
}
/*
* The implementation for finding classes from the bootstrap
* class loader, refer to java.h
*/
static FindClassFromBootLoader_t *findBootClass = NULL;
jclass
FindBootStrapClass(JNIEnv *env, const char* classname)
{
if (findBootClass == NULL) {
findBootClass = (FindClassFromBootLoader_t *)dlsym(RTLD_DEFAULT,
"JVM_FindClassFromBootLoader");
if (findBootClass == NULL) {
fprintf(stderr, "Error: could load method JVM_FindClassFromBootLoader");
return NULL;
}
}
return findBootClass(env, classname, JNI_FALSE);
}

View File

@ -38,5 +38,6 @@
// platforms, but they may have different default values on other platforms.
//
define_pd_global(bool, UseLargePages, false);
define_pd_global(bool, UseLargePagesIndividualAllocation, false);
define_pd_global(bool, UseOSErrorReporting, false);
define_pd_global(bool, UseThreadPriorities, true) ;

View File

@ -1261,6 +1261,17 @@ jlong os::elapsed_frequency() {
return (1000 * 1000);
}
// For now, we say that linux does not support vtime. I have no idea
// whether it can actually be made to (DLD, 9/13/05).
bool os::supports_vtime() { return false; }
bool os::enable_vtime() { return false; }
bool os::vtime_enabled() { return false; }
double os::elapsedVTime() {
// better than nothing, but not much
return elapsedTime();
}
jlong os::javaTimeMillis() {
timeval time;
int status = gettimeofday(&time, NULL);

View File

@ -1110,7 +1110,7 @@ static jstring getPlatformEncoding(JNIEnv *env) {
if (propname) {
jclass cls;
jmethodID mid;
NULL_CHECK0 (cls = (*env)->FindClass(env, "java/lang/System"));
NULL_CHECK0 (cls = FindBootStrapClass(env, "java/lang/System"));
NULL_CHECK0 (mid = (*env)->GetStaticMethodID(
env, cls,
"getProperty",
@ -1125,7 +1125,7 @@ static jstring getPlatformEncoding(JNIEnv *env) {
static jboolean isEncodingSupported(JNIEnv *env, jstring enc) {
jclass cls;
jmethodID mid;
NULL_CHECK0 (cls = (*env)->FindClass(env, "java/nio/charset/Charset"));
NULL_CHECK0 (cls = FindBootStrapClass(env, "java/nio/charset/Charset"));
NULL_CHECK0 (mid = (*env)->GetStaticMethodID(
env, cls,
"isSupported",
@ -1161,7 +1161,7 @@ NewPlatformString(JNIEnv *env, char *s)
#else
if (isEncodingSupported(env, enc) == JNI_TRUE) {
#endif
NULL_CHECK0(cls = (*env)->FindClass(env, "java/lang/String"));
NULL_CHECK0(cls = FindBootStrapClass(env, "java/lang/String"));
NULL_CHECK0(mid = (*env)->GetMethodID(env, cls, "<init>",
"([BLjava/lang/String;)V"));
str = (*env)->NewObject(env, cls, mid, ary, enc);
@ -1172,7 +1172,7 @@ NewPlatformString(JNIEnv *env, char *s)
the encoding name, in which the StringCoding class will
pickup the iso-8859-1 as the fallback converter for us.
*/
NULL_CHECK0(cls = (*env)->FindClass(env, "java/lang/String"));
NULL_CHECK0(cls = FindBootStrapClass(env, "java/lang/String"));
NULL_CHECK0(mid = (*env)->GetMethodID(env, cls, "<init>",
"([B)V"));
str = (*env)->NewObject(env, cls, mid, ary);
@ -1195,7 +1195,7 @@ NewPlatformStringArray(JNIEnv *env, char **strv, int strc)
jarray ary;
int i;
NULL_CHECK0(cls = (*env)->FindClass(env, "java/lang/String"));
NULL_CHECK0(cls = FindBootStrapClass(env, "java/lang/String"));
NULL_CHECK0(ary = (*env)->NewObjectArray(env, strc, cls, 0));
for (i = 0; i < strc; i++) {
jstring str = NewPlatformString(env, *strv++);
@ -1224,6 +1224,7 @@ LoadClass(JNIEnv *env, char *name)
c = *t++;
*s++ = (c == '.') ? '/' : c;
} while (c != '\0');
// use the application class loader for the main-class
cls = (*env)->FindClass(env, buf);
free(buf);
@ -1250,7 +1251,7 @@ GetMainClassName(JNIEnv *env, char *jarname)
jobject jar, man, attr;
jstring str, result = 0;
NULL_CHECK0(cls = (*env)->FindClass(env, "java/util/jar/JarFile"));
NULL_CHECK0(cls = FindBootStrapClass(env, "java/util/jar/JarFile"));
NULL_CHECK0(mid = (*env)->GetMethodID(env, cls, "<init>",
"(Ljava/lang/String;)V"));
NULL_CHECK0(str = NewPlatformString(env, jarname));
@ -1471,7 +1472,7 @@ PrintJavaVersion(JNIEnv *env)
jclass ver;
jmethodID print;
NULL_CHECK(ver = (*env)->FindClass(env, "sun/misc/Version"));
NULL_CHECK(ver = FindBootStrapClass(env, "sun/misc/Version"));
NULL_CHECK(print = (*env)->GetStaticMethodID(env, ver, "print", "()V"));
(*env)->CallStaticVoidMethod(env, ver, print);

View File

@ -101,4 +101,15 @@ void* MemAlloc(size_t size);
*/
extern jboolean _launcher_debug;
/*
* This allows for finding classes from the VM's bootstrap class loader
* directly, FindClass uses the application class loader internally, this will
* cause unnecessary searching of the classpath for the required classes.
*/
typedef jclass (JNICALL FindClassFromBootLoader_t(JNIEnv *env,
const char *name,
jboolean throwError));
jclass FindBootStrapClass(JNIEnv *env, const char *classname);
#endif /* _JAVA_H_ */

View File

@ -1826,3 +1826,24 @@ UnsetEnv(char *name)
{
return(borrowed_unsetenv(name));
}
/*
* The implementation for finding classes from the bootstrap
* class loader, refer to java.h
*/
static FindClassFromBootLoader_t *findBootClass = NULL;
jclass
FindBootStrapClass(JNIEnv *env, const char* classname)
{
if (findBootClass == NULL) {
findBootClass = (FindClassFromBootLoader_t *)dlsym(RTLD_DEFAULT,
"JVM_FindClassFromBootLoader");
if (findBootClass == NULL) {
fprintf(stderr, "Error: could not load method JVM_FindClassFromBootLoader");
return NULL;
}
}
return findBootClass(env, classname, JNI_FALSE);
}

View File

@ -44,5 +44,6 @@
// platforms, but they may have different default values on other platforms.
//
define_pd_global(bool, UseLargePages, true);
define_pd_global(bool, UseLargePagesIndividualAllocation, false);
define_pd_global(bool, UseOSErrorReporting, false);
define_pd_global(bool, UseThreadPriorities, false);

View File

@ -462,16 +462,14 @@ int os::active_processor_count() {
int online_cpus = sysconf(_SC_NPROCESSORS_ONLN);
pid_t pid = getpid();
psetid_t pset = PS_NONE;
// Are we running in a processor set?
// Are we running in a processor set or is there any processor set around?
if (pset_bind(PS_QUERY, P_PID, pid, &pset) == 0) {
if (pset != PS_NONE) {
uint_t pset_cpus;
// Query number of cpus in processor set
if (pset_info(pset, NULL, &pset_cpus, NULL) == 0) {
assert(pset_cpus > 0 && pset_cpus <= online_cpus, "sanity check");
_processors_online = pset_cpus;
return pset_cpus;
}
uint_t pset_cpus;
// Query the number of cpus available to us.
if (pset_info(pset, NULL, &pset_cpus, NULL) == 0) {
assert(pset_cpus > 0 && pset_cpus <= online_cpus, "sanity check");
_processors_online = pset_cpus;
return pset_cpus;
}
}
// Otherwise return number of online cpus
@ -1691,6 +1689,40 @@ bool os::getTimesSecs(double* process_real_time,
}
}
bool os::supports_vtime() { return true; }
bool os::enable_vtime() {
int fd = open("/proc/self/ctl", O_WRONLY);
if (fd == -1)
return false;
long cmd[] = { PCSET, PR_MSACCT };
int res = write(fd, cmd, sizeof(long) * 2);
close(fd);
if (res != sizeof(long) * 2)
return false;
return true;
}
bool os::vtime_enabled() {
int fd = open("/proc/self/status", O_RDONLY);
if (fd == -1)
return false;
pstatus_t status;
int res = read(fd, (void*) &status, sizeof(pstatus_t));
close(fd);
if (res != sizeof(pstatus_t))
return false;
return status.pr_flags & PR_MSACCT;
}
double os::elapsedVTime() {
return (double)gethrvtime() / (double)hrtime_hz;
}
// Used internally for comparisons only
// getTimeMillis guaranteed to not move backwards on Solaris
jlong getTimeMillis() {
@ -2688,7 +2720,7 @@ size_t os::numa_get_leaf_groups(int *ids, size_t size) {
return bottom;
}
// Detect the topology change. Typically happens during CPU pluggin-unplugging.
// Detect the topology change. Typically happens during CPU plugging-unplugging.
bool os::numa_topology_changed() {
int is_stale = Solaris::lgrp_cookie_stale(Solaris::lgrp_cookie());
if (is_stale != -1 && is_stale) {

View File

@ -37,5 +37,6 @@
// platforms, but they may have different default values on other platforms.
//
define_pd_global(bool, UseLargePages, false);
define_pd_global(bool, UseLargePagesIndividualAllocation, true);
define_pd_global(bool, UseOSErrorReporting, false); // for now.
define_pd_global(bool, UseThreadPriorities, true) ;

View File

@ -737,6 +737,17 @@ FILETIME java_to_windows_time(jlong l) {
return result;
}
// For now, we say that Windows does not support vtime. I have no idea
// whether it can actually be made to (DLD, 9/13/05).
bool os::supports_vtime() { return false; }
bool os::enable_vtime() { return false; }
bool os::vtime_enabled() { return false; }
double os::elapsedVTime() {
// better than nothing, but not much
return elapsedTime();
}
jlong os::javaTimeMillis() {
if (UseFakeTimers) {
return fake_time++;
@ -2582,9 +2593,104 @@ bool os::can_execute_large_page_memory() {
}
char* os::reserve_memory_special(size_t bytes) {
DWORD flag = MEM_RESERVE | MEM_COMMIT | MEM_LARGE_PAGES;
char * res = (char *)VirtualAlloc(NULL, bytes, flag, PAGE_EXECUTE_READWRITE);
return res;
if (UseLargePagesIndividualAllocation) {
if (TracePageSizes && Verbose) {
tty->print_cr("Reserving large pages individually.");
}
char * p_buf;
// first reserve enough address space in advance since we want to be
// able to break a single contiguous virtual address range into multiple
// large page commits but WS2003 does not allow reserving large page space
// so we just use 4K pages for reserve, this gives us a legal contiguous
// address space. then we will deallocate that reservation, and re alloc
// using large pages
const size_t size_of_reserve = bytes + _large_page_size;
if (bytes > size_of_reserve) {
// Overflowed.
warning("Individually allocated large pages failed, "
"use -XX:-UseLargePagesIndividualAllocation to turn off");
return NULL;
}
p_buf = (char *) VirtualAlloc(NULL,
size_of_reserve, // size of Reserve
MEM_RESERVE,
PAGE_EXECUTE_READWRITE);
// If reservation failed, return NULL
if (p_buf == NULL) return NULL;
release_memory(p_buf, bytes + _large_page_size);
// round up to page boundary. If the size_of_reserve did not
// overflow and the reservation did not fail, this align up
// should not overflow.
p_buf = (char *) align_size_up((size_t)p_buf, _large_page_size);
// now go through and allocate one page at a time until all bytes are
// allocated
size_t bytes_remaining = align_size_up(bytes, _large_page_size);
// An overflow of align_size_up() would have been caught above
// in the calculation of size_of_reserve.
char * next_alloc_addr = p_buf;
#ifdef ASSERT
// Variable for the failure injection
long ran_num = os::random();
size_t fail_after = ran_num % bytes;
#endif
while (bytes_remaining) {
size_t bytes_to_rq = MIN2(bytes_remaining, _large_page_size);
// Note allocate and commit
char * p_new;
#ifdef ASSERT
bool inject_error = LargePagesIndividualAllocationInjectError &&
(bytes_remaining <= fail_after);
#else
const bool inject_error = false;
#endif
if (inject_error) {
p_new = NULL;
} else {
p_new = (char *) VirtualAlloc(next_alloc_addr,
bytes_to_rq,
MEM_RESERVE | MEM_COMMIT | MEM_LARGE_PAGES,
PAGE_EXECUTE_READWRITE);
}
if (p_new == NULL) {
// Free any allocated pages
if (next_alloc_addr > p_buf) {
// Some memory was committed so release it.
size_t bytes_to_release = bytes - bytes_remaining;
release_memory(p_buf, bytes_to_release);
}
#ifdef ASSERT
if (UseLargePagesIndividualAllocation &&
LargePagesIndividualAllocationInjectError) {
if (TracePageSizes && Verbose) {
tty->print_cr("Reserving large pages individually failed.");
}
}
#endif
return NULL;
}
bytes_remaining -= bytes_to_rq;
next_alloc_addr += bytes_to_rq;
}
return p_buf;
} else {
// normal policy just allocate it all at once
DWORD flag = MEM_RESERVE | MEM_COMMIT | MEM_LARGE_PAGES;
char * res = (char *)VirtualAlloc(NULL,
bytes,
flag,
PAGE_EXECUTE_READWRITE);
return res;
}
}
bool os::release_memory_special(char* base, size_t bytes) {
@ -2972,6 +3078,7 @@ size_t os::win32::_default_stack_size = 0;
volatile intx os::win32::_os_thread_count = 0;
bool os::win32::_is_nt = false;
bool os::win32::_is_windows_2003 = false;
void os::win32::initialize_system_info() {
@ -2994,7 +3101,15 @@ void os::win32::initialize_system_info() {
GetVersionEx(&oi);
switch(oi.dwPlatformId) {
case VER_PLATFORM_WIN32_WINDOWS: _is_nt = false; break;
case VER_PLATFORM_WIN32_NT: _is_nt = true; break;
case VER_PLATFORM_WIN32_NT:
_is_nt = true;
{
int os_vers = oi.dwMajorVersion * 1000 + oi.dwMinorVersion;
if (os_vers == 5002) {
_is_windows_2003 = true;
}
}
break;
default: fatal("Unknown platform");
}
@ -3092,9 +3207,13 @@ void os::init(void) {
NoYieldsInMicrolock = true;
}
#endif
// This may be overridden later when argument processing is done.
FLAG_SET_ERGO(bool, UseLargePagesIndividualAllocation,
os::win32::is_windows_2003());
// Initialize main_process and main_thread
main_process = GetCurrentProcess(); // Remember main_process is a pseudo handle
if (!DuplicateHandle(main_process, GetCurrentThread(), main_process,
if (!DuplicateHandle(main_process, GetCurrentThread(), main_process,
&main_thread, THREAD_ALL_ACCESS, false, 0)) {
fatal("DuplicateHandle failed\n");
}

View File

@ -34,6 +34,7 @@ class win32 {
static julong _physical_memory;
static size_t _default_stack_size;
static bool _is_nt;
static bool _is_windows_2003;
public:
// Windows-specific interface:
@ -60,6 +61,9 @@ class win32 {
// Tells whether the platform is NT or Windown95
static bool is_nt() { return _is_nt; }
// Tells whether the platform is Windows 2003
static bool is_windows_2003() { return _is_windows_2003; }
// Returns the byte size of a virtual memory page
static int vm_page_size() { return _vm_page_size; }

View File

@ -3768,6 +3768,10 @@ bool MatchRule::is_chain_rule(FormDict &globals) const {
int MatchRule::is_ideal_copy() const {
if( _rChild ) {
const char *opType = _rChild->_opType;
#if 1
if( strcmp(opType,"CastIP")==0 )
return 1;
#else
if( strcmp(opType,"CastII")==0 )
return 1;
// Do not treat *CastPP this way, because it
@ -3787,6 +3791,7 @@ int MatchRule::is_ideal_copy() const {
// return 1;
//if( strcmp(opType,"CastP2X")==0 )
// return 1;
#endif
}
if( is_chain_rule(_AD.globalNames()) &&
_lChild && strncmp(_lChild->_opType,"stackSlot",9)==0 )

View File

@ -249,8 +249,6 @@ void AbstractAssembler::block_comment(const char* comment) {
bool MacroAssembler::needs_explicit_null_check(intptr_t offset) {
// Exception handler checks the nmethod's implicit null checks table
// only when this method returns false.
#ifndef SPARC
// Sparc does not have based addressing
if (UseCompressedOops) {
// The first page after heap_base is unmapped and
// the 'offset' is equal to [heap_base + offset] for
@ -261,7 +259,6 @@ bool MacroAssembler::needs_explicit_null_check(intptr_t offset) {
offset = (intptr_t)(pointer_delta((void*)offset, (void*)heap_base, 1));
}
}
#endif // SPARC
return offset < 0 || os::vm_page_size() <= offset;
}

View File

@ -482,3 +482,81 @@ class ArrayCopyStub: public CodeStub {
virtual void print_name(outputStream* out) const { out->print("ArrayCopyStub"); }
#endif // PRODUCT
};
//////////////////////////////////////////////////////////////////////////////////////////
#ifndef SERIALGC
// Code stubs for Garbage-First barriers.
class G1PreBarrierStub: public CodeStub {
private:
LIR_Opr _addr;
LIR_Opr _pre_val;
LIR_PatchCode _patch_code;
CodeEmitInfo* _info;
public:
// pre_val (a temporary register) must be a register;
// addr (the address of the field to be read) must be a LIR_Address
G1PreBarrierStub(LIR_Opr addr, LIR_Opr pre_val, LIR_PatchCode patch_code, CodeEmitInfo* info) :
_addr(addr), _pre_val(pre_val), _patch_code(patch_code), _info(info)
{
assert(_pre_val->is_register(), "should be temporary register");
assert(_addr->is_address(), "should be the address of the field");
}
LIR_Opr addr() const { return _addr; }
LIR_Opr pre_val() const { return _pre_val; }
LIR_PatchCode patch_code() const { return _patch_code; }
CodeEmitInfo* info() const { return _info; }
virtual void emit_code(LIR_Assembler* e);
virtual void visit(LIR_OpVisitState* visitor) {
// don't pass in the code emit info since it's processed in the fast
// path
if (_info != NULL)
visitor->do_slow_case(_info);
else
visitor->do_slow_case();
visitor->do_input(_addr);
visitor->do_temp(_pre_val);
}
#ifndef PRODUCT
virtual void print_name(outputStream* out) const { out->print("G1PreBarrierStub"); }
#endif // PRODUCT
};
class G1PostBarrierStub: public CodeStub {
private:
LIR_Opr _addr;
LIR_Opr _new_val;
static jbyte* _byte_map_base;
static jbyte* byte_map_base_slow();
static jbyte* byte_map_base() {
if (_byte_map_base == NULL) {
_byte_map_base = byte_map_base_slow();
}
return _byte_map_base;
}
public:
// addr (the address of the object head) and new_val must be registers.
G1PostBarrierStub(LIR_Opr addr, LIR_Opr new_val): _addr(addr), _new_val(new_val) { }
LIR_Opr addr() const { return _addr; }
LIR_Opr new_val() const { return _new_val; }
virtual void emit_code(LIR_Assembler* e);
virtual void visit(LIR_OpVisitState* visitor) {
// don't pass in the code emit info since it's processed in the fast path
visitor->do_slow_case();
visitor->do_input(_addr);
visitor->do_input(_new_val);
}
#ifndef PRODUCT
virtual void print_name(outputStream* out) const { out->print("G1PostBarrierStub"); }
#endif // PRODUCT
};
#endif // SERIALGC
//////////////////////////////////////////////////////////////////////////////////////////

View File

@ -74,6 +74,7 @@ void LIR_Assembler::patching_epilog(PatchingStub* patch, LIR_PatchCode patch_cod
LIR_Assembler::LIR_Assembler(Compilation* c):
_compilation(c)
, _masm(c->masm())
, _bs(Universe::heap()->barrier_set())
, _frame_map(c->frame_map())
, _current_block(NULL)
, _pending_non_safepoint(NULL)

View File

@ -24,11 +24,13 @@
class Compilation;
class ScopeValue;
class BarrierSet;
class LIR_Assembler: public CompilationResourceObj {
private:
C1_MacroAssembler* _masm;
CodeStubList* _slow_case_stubs;
BarrierSet* _bs;
Compilation* _compilation;
FrameMap* _frame_map;

View File

@ -285,16 +285,7 @@ jlong LIRItem::get_jlong_constant() const {
void LIRGenerator::init() {
BarrierSet* bs = Universe::heap()->barrier_set();
assert(bs->kind() == BarrierSet::CardTableModRef, "Wrong barrier set kind");
CardTableModRefBS* ct = (CardTableModRefBS*)bs;
assert(sizeof(*ct->byte_map_base) == sizeof(jbyte), "adjust this code");
#ifdef _LP64
_card_table_base = new LIR_Const((jlong)ct->byte_map_base);
#else
_card_table_base = new LIR_Const((jint)ct->byte_map_base);
#endif
_bs = Universe::heap()->barrier_set();
}
@ -1239,8 +1230,37 @@ LIR_Opr LIRGenerator::load_constant(LIR_Const* c) {
// Various barriers
void LIRGenerator::pre_barrier(LIR_Opr addr_opr, bool patch, CodeEmitInfo* info) {
// Do the pre-write barrier, if any.
switch (_bs->kind()) {
#ifndef SERIALGC
case BarrierSet::G1SATBCT:
case BarrierSet::G1SATBCTLogging:
G1SATBCardTableModRef_pre_barrier(addr_opr, patch, info);
break;
#endif // SERIALGC
case BarrierSet::CardTableModRef:
case BarrierSet::CardTableExtension:
// No pre barriers
break;
case BarrierSet::ModRef:
case BarrierSet::Other:
// No pre barriers
break;
default :
ShouldNotReachHere();
}
}
void LIRGenerator::post_barrier(LIR_OprDesc* addr, LIR_OprDesc* new_val) {
switch (Universe::heap()->barrier_set()->kind()) {
switch (_bs->kind()) {
#ifndef SERIALGC
case BarrierSet::G1SATBCT:
case BarrierSet::G1SATBCTLogging:
G1SATBCardTableModRef_post_barrier(addr, new_val);
break;
#endif // SERIALGC
case BarrierSet::CardTableModRef:
case BarrierSet::CardTableExtension:
CardTableModRef_post_barrier(addr, new_val);
@ -1254,11 +1274,120 @@ void LIRGenerator::post_barrier(LIR_OprDesc* addr, LIR_OprDesc* new_val) {
}
}
////////////////////////////////////////////////////////////////////////
#ifndef SERIALGC
void LIRGenerator::G1SATBCardTableModRef_pre_barrier(LIR_Opr addr_opr, bool patch, CodeEmitInfo* info) {
if (G1DisablePreBarrier) return;
// First we test whether marking is in progress.
BasicType flag_type;
if (in_bytes(PtrQueue::byte_width_of_active()) == 4) {
flag_type = T_INT;
} else {
guarantee(in_bytes(PtrQueue::byte_width_of_active()) == 1,
"Assumption");
flag_type = T_BYTE;
}
LIR_Opr thrd = getThreadPointer();
LIR_Address* mark_active_flag_addr =
new LIR_Address(thrd,
in_bytes(JavaThread::satb_mark_queue_offset() +
PtrQueue::byte_offset_of_active()),
flag_type);
// Read the marking-in-progress flag.
LIR_Opr flag_val = new_register(T_INT);
__ load(mark_active_flag_addr, flag_val);
LabelObj* start_store = new LabelObj();
LIR_PatchCode pre_val_patch_code =
patch ? lir_patch_normal : lir_patch_none;
LIR_Opr pre_val = new_register(T_OBJECT);
__ cmp(lir_cond_notEqual, flag_val, LIR_OprFact::intConst(0));
if (!addr_opr->is_address()) {
assert(addr_opr->is_register(), "must be");
addr_opr = LIR_OprFact::address(new LIR_Address(addr_opr, 0, T_OBJECT));
}
CodeStub* slow = new G1PreBarrierStub(addr_opr, pre_val, pre_val_patch_code,
info);
__ branch(lir_cond_notEqual, T_INT, slow);
__ branch_destination(slow->continuation());
}
void LIRGenerator::G1SATBCardTableModRef_post_barrier(LIR_OprDesc* addr, LIR_OprDesc* new_val) {
if (G1DisablePostBarrier) return;
// If the "new_val" is a constant NULL, no barrier is necessary.
if (new_val->is_constant() &&
new_val->as_constant_ptr()->as_jobject() == NULL) return;
if (!new_val->is_register()) {
LIR_Opr new_val_reg = new_pointer_register();
if (new_val->is_constant()) {
__ move(new_val, new_val_reg);
} else {
__ leal(new_val, new_val_reg);
}
new_val = new_val_reg;
}
assert(new_val->is_register(), "must be a register at this point");
if (addr->is_address()) {
LIR_Address* address = addr->as_address_ptr();
LIR_Opr ptr = new_pointer_register();
if (!address->index()->is_valid() && address->disp() == 0) {
__ move(address->base(), ptr);
} else {
assert(address->disp() != max_jint, "lea doesn't support patched addresses!");
__ leal(addr, ptr);
}
addr = ptr;
}
assert(addr->is_register(), "must be a register at this point");
LIR_Opr xor_res = new_pointer_register();
LIR_Opr xor_shift_res = new_pointer_register();
if (TwoOperandLIRForm ) {
__ move(addr, xor_res);
__ logical_xor(xor_res, new_val, xor_res);
__ move(xor_res, xor_shift_res);
__ unsigned_shift_right(xor_shift_res,
LIR_OprFact::intConst(HeapRegion::LogOfHRGrainBytes),
xor_shift_res,
LIR_OprDesc::illegalOpr());
} else {
__ logical_xor(addr, new_val, xor_res);
__ unsigned_shift_right(xor_res,
LIR_OprFact::intConst(HeapRegion::LogOfHRGrainBytes),
xor_shift_res,
LIR_OprDesc::illegalOpr());
}
if (!new_val->is_register()) {
LIR_Opr new_val_reg = new_pointer_register();
__ leal(new_val, new_val_reg);
new_val = new_val_reg;
}
assert(new_val->is_register(), "must be a register at this point");
__ cmp(lir_cond_notEqual, xor_shift_res, LIR_OprFact::intptrConst(NULL_WORD));
CodeStub* slow = new G1PostBarrierStub(addr, new_val);
__ branch(lir_cond_notEqual, T_INT, slow);
__ branch_destination(slow->continuation());
}
#endif // SERIALGC
////////////////////////////////////////////////////////////////////////
void LIRGenerator::CardTableModRef_post_barrier(LIR_OprDesc* addr, LIR_OprDesc* new_val) {
BarrierSet* bs = Universe::heap()->barrier_set();
assert(sizeof(*((CardTableModRefBS*)bs)->byte_map_base) == sizeof(jbyte), "adjust this code");
LIR_Const* card_table_base = new LIR_Const(((CardTableModRefBS*)bs)->byte_map_base);
assert(sizeof(*((CardTableModRefBS*)_bs)->byte_map_base) == sizeof(jbyte), "adjust this code");
LIR_Const* card_table_base = new LIR_Const(((CardTableModRefBS*)_bs)->byte_map_base);
if (addr->is_address()) {
LIR_Address* address = addr->as_address_ptr();
LIR_Opr ptr = new_register(T_OBJECT);
@ -1388,6 +1517,13 @@ void LIRGenerator::do_StoreField(StoreField* x) {
__ membar_release();
}
if (is_oop) {
// Do the pre-write barrier, if any.
pre_barrier(LIR_OprFact::address(address),
needs_patching,
(info ? new CodeEmitInfo(info) : NULL));
}
if (is_volatile) {
assert(!needs_patching && x->is_loaded(),
"how do we know it's volatile if it's not loaded");
@ -1398,7 +1534,12 @@ void LIRGenerator::do_StoreField(StoreField* x) {
}
if (is_oop) {
#ifdef PRECISE_CARDMARK
// Precise cardmarks don't work
post_barrier(LIR_OprFact::address(address), value.result());
#else
post_barrier(object.result(), value.result());
#endif // PRECISE_CARDMARK
}
if (is_volatile && os::is_MP()) {

View File

@ -145,6 +145,7 @@ class PhiResolver: public CompilationResourceObj {
// only the classes below belong in the same file
class LIRGenerator: public InstructionVisitor, public BlockClosure {
private:
Compilation* _compilation;
ciMethod* _method; // method that we are compiling
@ -154,6 +155,7 @@ class LIRGenerator: public InstructionVisitor, public BlockClosure {
Values _instruction_for_operand;
BitMap2D _vreg_flags; // flags which can be set on a per-vreg basis
LIR_List* _lir;
BarrierSet* _bs;
LIRGenerator* gen() {
return this;
@ -174,8 +176,6 @@ class LIRGenerator: public InstructionVisitor, public BlockClosure {
LIR_OprList _reg_for_constants;
Values _unpinned_constants;
LIR_Const* _card_table_base;
friend class PhiResolver;
// unified bailout support
@ -196,8 +196,6 @@ class LIRGenerator: public InstructionVisitor, public BlockClosure {
LIR_Opr load_constant(Constant* x);
LIR_Opr load_constant(LIR_Const* constant);
LIR_Const* card_table_base() const { return _card_table_base; }
void set_result(Value x, LIR_Opr opr) {
assert(opr->is_valid(), "must set to valid value");
assert(x->operand()->is_illegal(), "operand should never change");
@ -253,12 +251,17 @@ class LIRGenerator: public InstructionVisitor, public BlockClosure {
// generic interface
void pre_barrier(LIR_Opr addr_opr, bool patch, CodeEmitInfo* info);
void post_barrier(LIR_OprDesc* addr, LIR_OprDesc* new_val);
// specific implementations
// pre barriers
void G1SATBCardTableModRef_pre_barrier(LIR_Opr addr_opr, bool patch, CodeEmitInfo* info);
// post barriers
void G1SATBCardTableModRef_post_barrier(LIR_OprDesc* addr, LIR_OprDesc* new_val);
void CardTableModRef_post_barrier(LIR_OprDesc* addr, LIR_OprDesc* new_val);

View File

@ -168,6 +168,8 @@ void Runtime1::generate_blob_for(StubID id) {
switch (id) {
// These stubs don't need to have an oopmap
case dtrace_object_alloc_id:
case g1_pre_barrier_slow_id:
case g1_post_barrier_slow_id:
case slow_subtype_check_id:
case fpu2long_stub_id:
case unwind_exception_id:

View File

@ -56,6 +56,8 @@ class StubAssembler;
stub(access_field_patching) \
stub(load_klass_patching) \
stub(jvmti_exception_throw) \
stub(g1_pre_barrier_slow) \
stub(g1_post_barrier_slow) \
stub(fpu2long_stub) \
stub(counter_overflow) \
last_entry(number_of_ids)

View File

@ -213,9 +213,6 @@
develop(bool, UseFastLocking, true, \
"Use fast inlined locking code") \
\
product(bool, FastTLABRefill, true, \
"Use fast TLAB refill code") \
\
develop(bool, UseSlowPath, false, \
"For debugging: test slow cases by always using them") \
\

View File

@ -49,7 +49,7 @@ bool ciMethodBlocks::is_block_start(int bci) {
// first half. Returns the range beginning at bci.
ciBlock *ciMethodBlocks::split_block_at(int bci) {
ciBlock *former_block = block_containing(bci);
ciBlock *new_block = new(_arena) ciBlock(_method, _num_blocks++, this, former_block->start_bci());
ciBlock *new_block = new(_arena) ciBlock(_method, _num_blocks++, former_block->start_bci());
_blocks->append(new_block);
assert(former_block != NULL, "must not be NULL");
new_block->set_limit_bci(bci);
@ -83,7 +83,7 @@ ciBlock *ciMethodBlocks::make_block_at(int bci) {
if (cb == NULL ) {
// This is our first time visiting this bytecode. Create
// a fresh block and assign it this starting point.
ciBlock *nb = new(_arena) ciBlock(_method, _num_blocks++, this, bci);
ciBlock *nb = new(_arena) ciBlock(_method, _num_blocks++, bci);
_blocks->append(nb);
_bci_to_block[bci] = nb;
return nb;
@ -98,6 +98,11 @@ ciBlock *ciMethodBlocks::make_block_at(int bci) {
}
}
ciBlock *ciMethodBlocks::make_dummy_block() {
ciBlock *dum = new(_arena) ciBlock(_method, -1, 0);
return dum;
}
void ciMethodBlocks::do_analysis() {
ciBytecodeStream s(_method);
ciBlock *cur_block = block_containing(0);
@ -253,7 +258,7 @@ ciMethodBlocks::ciMethodBlocks(Arena *arena, ciMethod *meth): _method(meth),
Copy::zero_to_words((HeapWord*) _bci_to_block, b2bsize / sizeof(HeapWord));
// create initial block covering the entire method
ciBlock *b = new(arena) ciBlock(_method, _num_blocks++, this, 0);
ciBlock *b = new(arena) ciBlock(_method, _num_blocks++, 0);
_blocks->append(b);
_bci_to_block[0] = b;
@ -334,7 +339,7 @@ void ciMethodBlocks::dump() {
#endif
ciBlock::ciBlock(ciMethod *method, int index, ciMethodBlocks *mb, int start_bci) :
ciBlock::ciBlock(ciMethod *method, int index, int start_bci) :
#ifndef PRODUCT
_method(method),
#endif

View File

@ -48,6 +48,8 @@ public:
int num_blocks() { return _num_blocks;}
void clear_processed();
ciBlock *make_dummy_block(); // a block not associated with a bci
#ifndef PRODUCT
void dump();
#endif
@ -81,7 +83,7 @@ public:
fall_through_bci = -1
};
ciBlock(ciMethod *method, int index, ciMethodBlocks *mb, int start_bci);
ciBlock(ciMethod *method, int index, int start_bci);
int start_bci() const { return _start_bci; }
int limit_bci() const { return _limit_bci; }
int control_bci() const { return _control_bci; }
@ -94,7 +96,6 @@ public:
int ex_limit_bci() const { return _ex_limit_bci; }
bool contains(int bci) const { return start_bci() <= bci && bci < limit_bci(); }
// flag handling
bool processed() const { return (_flags & Processed) != 0; }
bool is_handler() const { return (_flags & Handler) != 0; }

File diff suppressed because it is too large Load Diff

View File

@ -34,11 +34,13 @@ private:
int _max_locals;
int _max_stack;
int _code_size;
bool _has_irreducible_entry;
const char* _failure_reason;
public:
class StateVector;
class Loop;
class Block;
// Build a type flow analyzer
@ -55,6 +57,7 @@ public:
int max_stack() const { return _max_stack; }
int max_cells() const { return _max_locals + _max_stack; }
int code_size() const { return _code_size; }
bool has_irreducible_entry() const { return _has_irreducible_entry; }
// Represents information about an "active" jsr call. This
// class represents a call to the routine at some entry address
@ -125,6 +128,19 @@ public:
void print_on(outputStream* st) const PRODUCT_RETURN;
};
class LocalSet VALUE_OBJ_CLASS_SPEC {
private:
enum Constants { max = 63 };
uint64_t _bits;
public:
LocalSet() : _bits(0) {}
void add(uint32_t i) { if (i < (uint32_t)max) _bits |= (1LL << i); }
void add(LocalSet* ls) { _bits |= ls->_bits; }
bool test(uint32_t i) const { return i < (uint32_t)max ? (_bits>>i)&1U : true; }
void clear() { _bits = 0; }
void print_on(outputStream* st, int limit) const PRODUCT_RETURN;
};
// Used as a combined index for locals and temps
enum Cell {
Cell_0, Cell_max = INT_MAX
@ -142,6 +158,8 @@ public:
int _trap_bci;
int _trap_index;
LocalSet _def_locals; // For entire block
static ciType* type_meet_internal(ciType* t1, ciType* t2, ciTypeFlow* analyzer);
public:
@ -181,6 +199,9 @@ public:
int monitor_count() const { return _monitor_count; }
void set_monitor_count(int mc) { _monitor_count = mc; }
LocalSet* def_locals() { return &_def_locals; }
const LocalSet* def_locals() const { return &_def_locals; }
static Cell start_cell() { return (Cell)0; }
static Cell next_cell(Cell c) { return (Cell)(((int)c) + 1); }
Cell limit_cell() const {
@ -250,6 +271,10 @@ public:
return type->basic_type() == T_DOUBLE;
}
void store_to_local(int lnum) {
_def_locals.add((uint) lnum);
}
void push_translate(ciType* type);
void push_int() {
@ -358,6 +383,7 @@ public:
"must be reference type or return address");
overwrite_local_double_long(index);
set_type_at(local(index), type);
store_to_local(index);
}
void load_local_double(int index) {
@ -376,6 +402,8 @@ public:
overwrite_local_double_long(index);
set_type_at(local(index), type);
set_type_at(local(index+1), type2);
store_to_local(index);
store_to_local(index+1);
}
void load_local_float(int index) {
@ -388,6 +416,7 @@ public:
assert(is_float(type), "must be float type");
overwrite_local_double_long(index);
set_type_at(local(index), type);
store_to_local(index);
}
void load_local_int(int index) {
@ -400,6 +429,7 @@ public:
assert(is_int(type), "must be int type");
overwrite_local_double_long(index);
set_type_at(local(index), type);
store_to_local(index);
}
void load_local_long(int index) {
@ -418,6 +448,8 @@ public:
overwrite_local_double_long(index);
set_type_at(local(index), type);
set_type_at(local(index+1), type2);
store_to_local(index);
store_to_local(index+1);
}
// Stop interpretation of this path with a trap.
@ -450,13 +482,31 @@ public:
};
// Parameter for "find_block" calls:
// Describes the difference between a public and private copy.
// Describes the difference between a public and backedge copy.
enum CreateOption {
create_public_copy,
create_private_copy,
create_backedge_copy,
no_create
};
// Successor iterator
class SuccIter : public StackObj {
private:
Block* _pred;
int _index;
Block* _succ;
public:
SuccIter() : _pred(NULL), _index(-1), _succ(NULL) {}
SuccIter(Block* pred) : _pred(pred), _index(-1), _succ(NULL) { next(); }
int index() { return _index; }
Block* pred() { return _pred; } // Return predecessor
bool done() { return _index < 0; } // Finished?
Block* succ() { return _succ; } // Return current successor
void next(); // Advance
void set_succ(Block* succ); // Update current successor
bool is_normal_ctrl() { return index() < _pred->successors()->length(); }
};
// A basic block
class Block : public ResourceObj {
private:
@ -470,15 +520,24 @@ public:
int _trap_bci;
int _trap_index;
// A reasonable approximation to pre-order, provided.to the client.
// pre_order, assigned at first visit. Used as block ID and "visited" tag
int _pre_order;
// Has this block been cloned for some special purpose?
bool _private_copy;
// A post-order, used to compute the reverse post order (RPO) provided to the client
int _post_order; // used to compute rpo
// Has this block been cloned for a loop backedge?
bool _backedge_copy;
// A pointer used for our internal work list
Block* _next;
bool _on_work_list;
Block* _next;
bool _on_work_list; // on the work list
Block* _rpo_next; // Reverse post order list
// Loop info
Loop* _loop; // nearest loop
bool _irreducible_entry; // entry to irreducible loop
bool _exception_entry; // entry to exception handler
ciBlock* ciblock() const { return _ciblock; }
StateVector* state() const { return _state; }
@ -504,10 +563,11 @@ public:
int start() const { return _ciblock->start_bci(); }
int limit() const { return _ciblock->limit_bci(); }
int control() const { return _ciblock->control_bci(); }
JsrSet* jsrs() const { return _jsrs; }
bool is_private_copy() const { return _private_copy; }
void set_private_copy(bool z);
int private_copy_count() const { return outer()->private_copy_count(ciblock()->index(), _jsrs); }
bool is_backedge_copy() const { return _backedge_copy; }
void set_backedge_copy(bool z);
int backedge_copy_count() const { return outer()->backedge_copy_count(ciblock()->index(), _jsrs); }
// access to entry state
int stack_size() const { return _state->stack_size(); }
@ -515,6 +575,20 @@ public:
ciType* local_type_at(int i) const { return _state->local_type_at(i); }
ciType* stack_type_at(int i) const { return _state->stack_type_at(i); }
// Data flow on locals
bool is_invariant_local(uint v) const {
assert(is_loop_head(), "only loop heads");
// Find outermost loop with same loop head
Loop* lp = loop();
while (lp->parent() != NULL) {
if (lp->parent()->head() != lp->head()) break;
lp = lp->parent();
}
return !lp->def_locals()->test(v);
}
LocalSet* def_locals() { return _state->def_locals(); }
const LocalSet* def_locals() const { return _state->def_locals(); }
// Get the successors for this Block.
GrowableArray<Block*>* successors(ciBytecodeStream* str,
StateVector* state,
@ -524,13 +598,6 @@ public:
return _successors;
}
// Helper function for "successors" when making private copies of
// loop heads for C2.
Block * clone_loop_head(ciTypeFlow* analyzer,
int branch_bci,
Block* target,
JsrSet* jsrs);
// Get the exceptional successors for this Block.
GrowableArray<Block*>* exceptions() {
if (_exceptions == NULL) {
@ -584,17 +651,126 @@ public:
bool is_on_work_list() const { return _on_work_list; }
bool has_pre_order() const { return _pre_order >= 0; }
void set_pre_order(int po) { assert(!has_pre_order() && po >= 0, ""); _pre_order = po; }
void set_pre_order(int po) { assert(!has_pre_order(), ""); _pre_order = po; }
int pre_order() const { assert(has_pre_order(), ""); return _pre_order; }
void set_next_pre_order() { set_pre_order(outer()->inc_next_pre_order()); }
bool is_start() const { return _pre_order == outer()->start_block_num(); }
// A ranking used in determining order within the work list.
bool is_simpler_than(Block* other);
// Reverse post order
void df_init();
bool has_post_order() const { return _post_order >= 0; }
void set_post_order(int po) { assert(!has_post_order() && po >= 0, ""); _post_order = po; }
void reset_post_order(int o){ _post_order = o; }
int post_order() const { assert(has_post_order(), ""); return _post_order; }
bool has_rpo() const { return has_post_order() && outer()->have_block_count(); }
int rpo() const { assert(has_rpo(), ""); return outer()->block_count() - post_order() - 1; }
void set_rpo_next(Block* b) { _rpo_next = b; }
Block* rpo_next() { return _rpo_next; }
// Loops
Loop* loop() const { return _loop; }
void set_loop(Loop* lp) { _loop = lp; }
bool is_loop_head() const { return _loop && _loop->head() == this; }
void set_irreducible_entry(bool c) { _irreducible_entry = c; }
bool is_irreducible_entry() const { return _irreducible_entry; }
bool is_visited() const { return has_pre_order(); }
bool is_post_visited() const { return has_post_order(); }
bool is_clonable_exit(Loop* lp);
Block* looping_succ(Loop* lp); // Successor inside of loop
bool is_single_entry_loop_head() const {
if (!is_loop_head()) return false;
for (Loop* lp = loop(); lp != NULL && lp->head() == this; lp = lp->parent())
if (lp->is_irreducible()) return false;
return true;
}
void print_value_on(outputStream* st) const PRODUCT_RETURN;
void print_on(outputStream* st) const PRODUCT_RETURN;
};
// Loop
class Loop : public ResourceObj {
private:
Loop* _parent;
Loop* _sibling; // List of siblings, null terminated
Loop* _child; // Head of child list threaded thru sibling pointer
Block* _head; // Head of loop
Block* _tail; // Tail of loop
bool _irreducible;
LocalSet _def_locals;
public:
Loop(Block* head, Block* tail) :
_head(head), _tail(tail),
_parent(NULL), _sibling(NULL), _child(NULL),
_irreducible(false), _def_locals() {}
Loop* parent() const { return _parent; }
Loop* sibling() const { return _sibling; }
Loop* child() const { return _child; }
Block* head() const { return _head; }
Block* tail() const { return _tail; }
void set_parent(Loop* p) { _parent = p; }
void set_sibling(Loop* s) { _sibling = s; }
void set_child(Loop* c) { _child = c; }
void set_head(Block* hd) { _head = hd; }
void set_tail(Block* tl) { _tail = tl; }
int depth() const; // nesting depth
// Returns true if lp is a nested loop or us.
bool contains(Loop* lp) const;
bool contains(Block* blk) const { return contains(blk->loop()); }
// Data flow on locals
LocalSet* def_locals() { return &_def_locals; }
const LocalSet* def_locals() const { return &_def_locals; }
// Merge the branch lp into this branch, sorting on the loop head
// pre_orders. Returns the new branch.
Loop* sorted_merge(Loop* lp);
// Mark non-single entry to loop
void set_irreducible(Block* entry) {
_irreducible = true;
entry->set_irreducible_entry(true);
}
bool is_irreducible() const { return _irreducible; }
bool is_root() const { return _tail->pre_order() == max_jint; }
void print(outputStream* st = tty, int indent = 0) const PRODUCT_RETURN;
};
// Postorder iteration over the loop tree.
class PostorderLoops : public StackObj {
private:
Loop* _root;
Loop* _current;
public:
PostorderLoops(Loop* root) : _root(root), _current(root) {
while (_current->child() != NULL) {
_current = _current->child();
}
}
bool done() { return _current == NULL; } // Finished iterating?
void next(); // Advance to next loop
Loop* current() { return _current; } // Return current loop.
};
// Preorder iteration over the loop tree.
class PreorderLoops : public StackObj {
private:
Loop* _root;
Loop* _current;
public:
PreorderLoops(Loop* root) : _root(root), _current(root) {}
bool done() { return _current == NULL; } // Finished iterating?
void next(); // Advance to next loop
Loop* current() { return _current; } // Return current loop.
};
// Standard indexes of successors, for various bytecodes.
enum {
FALL_THROUGH = 0, // normal control
@ -619,6 +795,12 @@ private:
// Tells if a given instruction is able to generate an exception edge.
bool can_trap(ciBytecodeStream& str);
// Clone the loop heads. Returns true if any cloning occurred.
bool clone_loop_heads(Loop* lp, StateVector* temp_vector, JsrSet* temp_set);
// Clone lp's head and replace tail's successors with clone.
Block* clone_loop_head(Loop* lp, StateVector* temp_vector, JsrSet* temp_set);
public:
// Return the block beginning at bci which has a JsrSet compatible
// with jsrs.
@ -627,8 +809,8 @@ public:
// block factory
Block* get_block_for(int ciBlockIndex, JsrSet* jsrs, CreateOption option = create_public_copy);
// How many of the blocks have the private_copy bit set?
int private_copy_count(int ciBlockIndex, JsrSet* jsrs) const;
// How many of the blocks have the backedge_copy bit set?
int backedge_copy_count(int ciBlockIndex, JsrSet* jsrs) const;
// Return an existing block containing bci which has a JsrSet compatible
// with jsrs, or NULL if there is none.
@ -651,11 +833,18 @@ public:
return _block_map[po]; }
Block* start_block() const { return pre_order_at(start_block_num()); }
int start_block_num() const { return 0; }
Block* rpo_at(int rpo) const { assert(0 <= rpo && rpo < block_count(), "out of bounds");
return _block_map[rpo]; }
int next_pre_order() { return _next_pre_order; }
int inc_next_pre_order() { return _next_pre_order++; }
private:
// A work list used during flow analysis.
Block* _work_list;
// List of blocks in reverse post order
Block* _rpo_list;
// Next Block::_pre_order. After mapping, doubles as block_count.
int _next_pre_order;
@ -668,6 +857,15 @@ private:
// Add a basic block to our work list.
void add_to_work_list(Block* block);
// Prepend a basic block to rpo list.
void prepend_to_rpo_list(Block* blk) {
blk->set_rpo_next(_rpo_list);
_rpo_list = blk;
}
// Root of the loop tree
Loop* _loop_tree_root;
// State used for make_jsr_record
int _jsr_count;
GrowableArray<JsrRecord*>* _jsr_records;
@ -677,6 +875,9 @@ public:
// does not already exist.
JsrRecord* make_jsr_record(int entry_address, int return_address);
void set_loop_tree_root(Loop* ltr) { _loop_tree_root = ltr; }
Loop* loop_tree_root() { return _loop_tree_root; }
private:
// Get the initial state for start_bci:
const StateVector* get_start_state();
@ -703,6 +904,15 @@ private:
// necessary.
void flow_types();
// Perform the depth first type flow analysis. Helper for flow_types.
void df_flow_types(Block* start,
bool do_flow,
StateVector* temp_vector,
JsrSet* temp_set);
// Incrementally build loop tree.
void build_loop_tree(Block* blk);
// Create the block map, which indexes blocks in pre_order.
void map_blocks();
@ -711,4 +921,6 @@ public:
void do_flow();
void print_on(outputStream* st) const PRODUCT_RETURN;
void rpo_print_on(outputStream* st) const PRODUCT_RETURN;
};

View File

@ -1350,11 +1350,7 @@ bool nmethod::can_unload(BoolObjectClosure* is_alive,
return false;
}
}
if (!UseParallelOldGC || !VerifyParallelOldWithMarkSweep) {
// Cannot do this test if verification of the UseParallelOldGC
// code using the PSMarkSweep code is being done.
assert(unloading_occurred, "Inconsistency in unloading");
}
assert(unloading_occurred, "Inconsistency in unloading");
make_unloaded(is_alive, obj);
return true;
}

View File

@ -76,8 +76,9 @@ class BitCounter: public BitMapClosure {
BitCounter() : _count(0) {}
// Callback when bit in map is set
virtual void do_bit(size_t offset) {
virtual bool do_bit(size_t offset) {
_count++;
return true;
}
int count() {
@ -467,7 +468,7 @@ MethodLivenessResult MethodLiveness::get_liveness_at(int entry_bci) {
bci = 0;
}
MethodLivenessResult answer(NULL,0);
MethodLivenessResult answer((uintptr_t*)NULL,0);
if (_block_count > 0) {
if (TimeLivenessAnalysis) _time_total.start();

View File

@ -29,7 +29,7 @@ class MethodLivenessResult : public BitMap {
bool _is_valid;
public:
MethodLivenessResult(uintptr_t* map, idx_t size_in_bits)
MethodLivenessResult(BitMap::bm_word_t* map, idx_t size_in_bits)
: BitMap(map, size_in_bits)
, _is_valid(false)
{}

View File

@ -790,7 +790,7 @@ CompactibleFreeListSpace::object_iterate_careful_m(MemRegion mr,
}
HeapWord* CompactibleFreeListSpace::block_start(const void* p) const {
HeapWord* CompactibleFreeListSpace::block_start_const(const void* p) const {
NOT_PRODUCT(verify_objects_initialized());
return _bt.block_start(p);
}
@ -2286,9 +2286,9 @@ void CompactibleFreeListSpace::verifyIndexedFreeLists() const {
}
void CompactibleFreeListSpace::verifyIndexedFreeList(size_t size) const {
guarantee(size % 2 == 0, "Odd slots should be empty");
for (FreeChunk* fc = _indexedFreeList[size].head(); fc != NULL;
fc = fc->next()) {
FreeChunk* fc = _indexedFreeList[size].head();
guarantee((size % 2 == 0) || fc == NULL, "Odd slots should be empty");
for (; fc != NULL; fc = fc->next()) {
guarantee(fc->size() == size, "Size inconsistency");
guarantee(fc->isFree(), "!free?");
guarantee(fc->next() == NULL || fc->next()->prev() == fc, "Broken list");
@ -2790,10 +2790,11 @@ initialize_sequential_subtasks_for_rescan(int n_threads) {
assert(n_threads > 0, "Unexpected n_threads argument");
const size_t task_size = rescan_task_size();
size_t n_tasks = (used_region().word_size() + task_size - 1)/task_size;
assert((used_region().start() + (n_tasks - 1)*task_size <
used_region().end()) &&
(used_region().start() + n_tasks*task_size >=
used_region().end()), "n_task calculation incorrect");
assert((n_tasks == 0) == used_region().is_empty(), "n_tasks incorrect");
assert(n_tasks == 0 ||
((used_region().start() + (n_tasks - 1)*task_size < used_region().end()) &&
(used_region().start() + n_tasks*task_size >= used_region().end())),
"n_tasks calculation incorrect");
SequentialSubTasksDone* pst = conc_par_seq_tasks();
assert(!pst->valid(), "Clobbering existing data?");
pst->set_par_threads(n_threads);
@ -2833,7 +2834,7 @@ initialize_sequential_subtasks_for_marking(int n_threads,
assert(n_tasks == 0 ||
((span.start() + (n_tasks - 1)*task_size < span.end()) &&
(span.start() + n_tasks*task_size >= span.end())),
"n_task calculation incorrect");
"n_tasks calculation incorrect");
SequentialSubTasksDone* pst = conc_par_seq_tasks();
assert(!pst->valid(), "Clobbering existing data?");
pst->set_par_threads(n_threads);

View File

@ -502,7 +502,7 @@ class CompactibleFreeListSpace: public CompactibleSpace {
void blk_iterate(BlkClosure* cl);
void blk_iterate_careful(BlkClosureCareful* cl);
HeapWord* block_start(const void* p) const;
HeapWord* block_start_const(const void* p) const;
HeapWord* block_start_careful(const void* p) const;
size_t block_size(const HeapWord* p) const;
size_t block_size_no_stall(HeapWord* p, const CMSCollector* c) const;

View File

@ -2761,13 +2761,14 @@ class VerifyMarkedClosure: public BitMapClosure {
public:
VerifyMarkedClosure(CMSBitMap* bm): _marks(bm), _failed(false) {}
void do_bit(size_t offset) {
bool do_bit(size_t offset) {
HeapWord* addr = _marks->offsetToHeapWord(offset);
if (!_marks->isMarked(addr)) {
oop(addr)->print();
gclog_or_tty->print_cr(" ("INTPTR_FORMAT" should have been marked)", addr);
_failed = true;
}
return true;
}
bool failed() { return _failed; }
@ -3650,6 +3651,7 @@ class CMSConcMarkingTask: public YieldingFlexibleGangTask {
CompactibleFreeListSpace* _cms_space;
CompactibleFreeListSpace* _perm_space;
HeapWord* _global_finger;
HeapWord* _restart_addr;
// Exposed here for yielding support
Mutex* const _bit_map_lock;
@ -3680,7 +3682,7 @@ class CMSConcMarkingTask: public YieldingFlexibleGangTask {
_term.set_task(this);
assert(_cms_space->bottom() < _perm_space->bottom(),
"Finger incorrectly initialized below");
_global_finger = _cms_space->bottom();
_restart_addr = _global_finger = _cms_space->bottom();
}
@ -3698,6 +3700,10 @@ class CMSConcMarkingTask: public YieldingFlexibleGangTask {
bool result() { return _result; }
void reset(HeapWord* ra) {
assert(_global_finger >= _cms_space->end(), "Postcondition of ::work(i)");
assert(_global_finger >= _perm_space->end(), "Postcondition of ::work(i)");
assert(ra < _perm_space->end(), "ra too large");
_restart_addr = _global_finger = ra;
_term.reset_for_reuse();
}
@ -3842,16 +3848,24 @@ void CMSConcMarkingTask::do_scan_and_mark(int i, CompactibleFreeListSpace* sp) {
int n_tasks = pst->n_tasks();
// We allow that there may be no tasks to do here because
// we are restarting after a stack overflow.
assert(pst->valid() || n_tasks == 0, "Uninitializd use?");
assert(pst->valid() || n_tasks == 0, "Uninitialized use?");
int nth_task = 0;
HeapWord* start = sp->bottom();
HeapWord* aligned_start = sp->bottom();
if (sp->used_region().contains(_restart_addr)) {
// Align down to a card boundary for the start of 0th task
// for this space.
aligned_start =
(HeapWord*)align_size_down((uintptr_t)_restart_addr,
CardTableModRefBS::card_size);
}
size_t chunk_size = sp->marking_task_size();
while (!pst->is_task_claimed(/* reference */ nth_task)) {
// Having claimed the nth task in this space,
// compute the chunk that it corresponds to:
MemRegion span = MemRegion(start + nth_task*chunk_size,
start + (nth_task+1)*chunk_size);
MemRegion span = MemRegion(aligned_start + nth_task*chunk_size,
aligned_start + (nth_task+1)*chunk_size);
// Try and bump the global finger via a CAS;
// note that we need to do the global finger bump
// _before_ taking the intersection below, because
@ -3866,26 +3880,40 @@ void CMSConcMarkingTask::do_scan_and_mark(int i, CompactibleFreeListSpace* sp) {
// beyond the "top" address of the space.
span = span.intersection(sp->used_region());
if (!span.is_empty()) { // Non-null task
// We want to skip the first object because
// the protocol is to scan any object in its entirety
// that _starts_ in this span; a fortiori, any
// object starting in an earlier span is scanned
// as part of an earlier claimed task.
// Below we use the "careful" version of block_start
// so we do not try to navigate uninitialized objects.
HeapWord* prev_obj = sp->block_start_careful(span.start());
// Below we use a variant of block_size that uses the
// Printezis bits to avoid waiting for allocated
// objects to become initialized/parsable.
while (prev_obj < span.start()) {
size_t sz = sp->block_size_no_stall(prev_obj, _collector);
if (sz > 0) {
prev_obj += sz;
HeapWord* prev_obj;
assert(!span.contains(_restart_addr) || nth_task == 0,
"Inconsistency");
if (nth_task == 0) {
// For the 0th task, we'll not need to compute a block_start.
if (span.contains(_restart_addr)) {
// In the case of a restart because of stack overflow,
// we might additionally skip a chunk prefix.
prev_obj = _restart_addr;
} else {
// In this case we may end up doing a bit of redundant
// scanning, but that appears unavoidable, short of
// locking the free list locks; see bug 6324141.
break;
prev_obj = span.start();
}
} else {
// We want to skip the first object because
// the protocol is to scan any object in its entirety
// that _starts_ in this span; a fortiori, any
// object starting in an earlier span is scanned
// as part of an earlier claimed task.
// Below we use the "careful" version of block_start
// so we do not try to navigate uninitialized objects.
prev_obj = sp->block_start_careful(span.start());
// Below we use a variant of block_size that uses the
// Printezis bits to avoid waiting for allocated
// objects to become initialized/parsable.
while (prev_obj < span.start()) {
size_t sz = sp->block_size_no_stall(prev_obj, _collector);
if (sz > 0) {
prev_obj += sz;
} else {
// In this case we may end up doing a bit of redundant
// scanning, but that appears unavoidable, short of
// locking the free list locks; see bug 6324141.
break;
}
}
}
if (prev_obj < span.end()) {
@ -3938,12 +3966,14 @@ class Par_ConcMarkingClosure: public OopClosure {
void handle_stack_overflow(HeapWord* lost);
};
// Grey object rescan during work stealing phase --
// the salient assumption here is that stolen oops must
// always be initialized, so we do not need to check for
// uninitialized objects before scanning here.
// Grey object scanning during work stealing phase --
// the salient assumption here is that any references
// that are in these stolen objects being scanned must
// already have been initialized (else they would not have
// been published), so we do not need to check for
// uninitialized objects before pushing here.
void Par_ConcMarkingClosure::do_oop(oop obj) {
assert(obj->is_oop_or_null(), "expected an oop or NULL");
assert(obj->is_oop_or_null(true), "expected an oop or NULL");
HeapWord* addr = (HeapWord*)obj;
// Check if oop points into the CMS generation
// and is not marked
@ -4001,7 +4031,7 @@ void Par_ConcMarkingClosure::trim_queue(size_t max) {
// in CMSCollector's _restart_address.
void Par_ConcMarkingClosure::handle_stack_overflow(HeapWord* lost) {
// We need to do this under a mutex to prevent other
// workers from interfering with the expansion below.
// workers from interfering with the work done below.
MutexLockerEx ml(_overflow_stack->par_lock(),
Mutex::_no_safepoint_check_flag);
// Remember the least grey address discarded
@ -4640,8 +4670,11 @@ size_t CMSCollector::preclean_card_table(ConcurrentMarkSweepGeneration* gen,
startTimer();
sample_eden();
// Get and clear dirty region from card table
dirtyRegion = _ct->ct_bs()->dirty_card_range_after_preclean(
MemRegion(nextAddr, endAddr));
dirtyRegion = _ct->ct_bs()->dirty_card_range_after_reset(
MemRegion(nextAddr, endAddr),
true,
CardTableModRefBS::precleaned_card_val());
assert(dirtyRegion.start() >= nextAddr,
"returned region inconsistent?");
}
@ -5409,8 +5442,8 @@ void CMSCollector::do_remark_non_parallel() {
&mrias_cl);
{
TraceTime t("grey object rescan", PrintGCDetails, false, gclog_or_tty);
// Iterate over the dirty cards, marking them precleaned, and
// setting the corresponding bits in the mod union table.
// Iterate over the dirty cards, setting the corresponding bits in the
// mod union table.
{
ModUnionClosure modUnionClosure(&_modUnionTable);
_ct->ct_bs()->dirty_card_iterate(
@ -6182,7 +6215,7 @@ HeapWord* CMSCollector::next_card_start_after_block(HeapWord* addr) const {
// bit vector itself. That is done by a separate call CMSBitMap::allocate()
// further below.
CMSBitMap::CMSBitMap(int shifter, int mutex_rank, const char* mutex_name):
_bm(NULL,0),
_bm(),
_shifter(shifter),
_lock(mutex_rank >= 0 ? new Mutex(mutex_rank, mutex_name, true) : NULL)
{
@ -6207,7 +6240,7 @@ bool CMSBitMap::allocate(MemRegion mr) {
}
assert(_virtual_space.committed_size() == brs.size(),
"didn't reserve backing store for all of CMS bit map?");
_bm.set_map((uintptr_t*)_virtual_space.low());
_bm.set_map((BitMap::bm_word_t*)_virtual_space.low());
assert(_virtual_space.committed_size() << (_shifter + LogBitsPerByte) >=
_bmWordSize, "inconsistency in bit map sizing");
_bm.set_size(_bmWordSize >> _shifter);
@ -6554,7 +6587,7 @@ void Par_MarkRefsIntoAndScanClosure::do_oop(oop obj) {
if (obj != NULL) {
// Ignore mark word because this could be an already marked oop
// that may be chained at the end of the overflow list.
assert(obj->is_oop(), "expected an oop");
assert(obj->is_oop(true), "expected an oop");
HeapWord* addr = (HeapWord*)obj;
if (_span.contains(addr) &&
!_bit_map->isMarked(addr)) {
@ -6845,10 +6878,10 @@ void MarkFromRootsClosure::reset(HeapWord* addr) {
// Should revisit to see if this should be restructured for
// greater efficiency.
void MarkFromRootsClosure::do_bit(size_t offset) {
bool MarkFromRootsClosure::do_bit(size_t offset) {
if (_skipBits > 0) {
_skipBits--;
return;
return true;
}
// convert offset into a HeapWord*
HeapWord* addr = _bitMap->startWord() + offset;
@ -6886,10 +6919,11 @@ void MarkFromRootsClosure::do_bit(size_t offset) {
} // ...else the setting of klass will dirty the card anyway.
}
DEBUG_ONLY(})
return;
return true;
}
}
scanOopsInOop(addr);
return true;
}
// We take a break if we've been at this for a while,
@ -7023,10 +7057,10 @@ Par_MarkFromRootsClosure::Par_MarkFromRootsClosure(CMSConcMarkingTask* task,
// Should revisit to see if this should be restructured for
// greater efficiency.
void Par_MarkFromRootsClosure::do_bit(size_t offset) {
bool Par_MarkFromRootsClosure::do_bit(size_t offset) {
if (_skip_bits > 0) {
_skip_bits--;
return;
return true;
}
// convert offset into a HeapWord*
HeapWord* addr = _bit_map->startWord() + offset;
@ -7041,10 +7075,11 @@ void Par_MarkFromRootsClosure::do_bit(size_t offset) {
if (p->klass_or_null() == NULL || !p->is_parsable()) {
// in the case of Clean-on-Enter optimization, redirty card
// and avoid clearing card by increasing the threshold.
return;
return true;
}
}
scan_oops_in_oop(addr);
return true;
}
void Par_MarkFromRootsClosure::scan_oops_in_oop(HeapWord* ptr) {
@ -7167,7 +7202,7 @@ void MarkFromRootsVerifyClosure::reset(HeapWord* addr) {
// Should revisit to see if this should be restructured for
// greater efficiency.
void MarkFromRootsVerifyClosure::do_bit(size_t offset) {
bool MarkFromRootsVerifyClosure::do_bit(size_t offset) {
// convert offset into a HeapWord*
HeapWord* addr = _verification_bm->startWord() + offset;
assert(_verification_bm->endWord() && addr < _verification_bm->endWord(),
@ -7195,6 +7230,7 @@ void MarkFromRootsVerifyClosure::do_bit(size_t offset) {
new_oop->oop_iterate(&_pam_verify_closure);
}
assert(_mark_stack->isEmpty(), "tautology, emphasizing post-condition");
return true;
}
PushAndMarkVerifyClosure::PushAndMarkVerifyClosure(
@ -7289,6 +7325,8 @@ Par_PushOrMarkClosure::Par_PushOrMarkClosure(CMSCollector* collector,
_should_remember_klasses(collector->should_unload_classes())
{ }
// Assumes thread-safe access by callers, who are
// responsible for mutual exclusion.
void CMSCollector::lower_restart_addr(HeapWord* low) {
assert(_span.contains(low), "Out of bounds addr");
if (_restart_addr == NULL) {
@ -7314,7 +7352,7 @@ void PushOrMarkClosure::handle_stack_overflow(HeapWord* lost) {
// in CMSCollector's _restart_address.
void Par_PushOrMarkClosure::handle_stack_overflow(HeapWord* lost) {
// We need to do this under a mutex to prevent other
// workers from interfering with the expansion below.
// workers from interfering with the work done below.
MutexLockerEx ml(_overflow_stack->par_lock(),
Mutex::_no_safepoint_check_flag);
// Remember the least grey address discarded
@ -7438,8 +7476,12 @@ PushAndMarkClosure::PushAndMarkClosure(CMSCollector* collector,
// Grey object rescan during pre-cleaning and second checkpoint phases --
// the non-parallel version (the parallel version appears further below.)
void PushAndMarkClosure::do_oop(oop obj) {
// If _concurrent_precleaning, ignore mark word verification
assert(obj->is_oop_or_null(_concurrent_precleaning),
// Ignore mark word verification. If during concurrent precleaning,
// the object monitor may be locked. If during the checkpoint
// phases, the object may already have been reached by a different
// path and may be at the end of the global overflow list (so
// the mark word may be NULL).
assert(obj->is_oop_or_null(true /* ignore mark word */),
"expected an oop or NULL");
HeapWord* addr = (HeapWord*)obj;
// Check if oop points into the CMS generation

View File

@ -1327,7 +1327,7 @@ class MarkFromRootsClosure: public BitMapClosure {
CMSMarkStack* markStack,
CMSMarkStack* revisitStack,
bool should_yield, bool verifying = false);
void do_bit(size_t offset);
bool do_bit(size_t offset);
void reset(HeapWord* addr);
inline void do_yield_check();
@ -1363,7 +1363,7 @@ class Par_MarkFromRootsClosure: public BitMapClosure {
CMSMarkStack* overflow_stack,
CMSMarkStack* revisit_stack,
bool should_yield);
void do_bit(size_t offset);
bool do_bit(size_t offset);
inline void do_yield_check();
private:
@ -1411,7 +1411,7 @@ class MarkFromRootsVerifyClosure: public BitMapClosure {
CMSBitMap* verification_bm,
CMSBitMap* cms_bm,
CMSMarkStack* mark_stack);
void do_bit(size_t offset);
bool do_bit(size_t offset);
void reset(HeapWord* addr);
};
@ -1420,8 +1420,9 @@ class MarkFromRootsVerifyClosure: public BitMapClosure {
// "empty" (i.e. the bit vector doesn't have any 1-bits).
class FalseBitMapClosure: public BitMapClosure {
public:
void do_bit(size_t offset) {
bool do_bit(size_t offset) {
guarantee(false, "Should not have a 1 bit");
return true;
}
};

View File

@ -0,0 +1,195 @@
/*
* Copyright 2001-2007 Sun Microsystems, Inc. All Rights Reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License version 2 only, as
* published by the Free Software Foundation.
*
* This code is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
* version 2 for more details (a copy is included in the LICENSE file that
* accompanied this code).
*
* You should have received a copy of the GNU General Public License version
* 2 along with this work; if not, write to the Free Software Foundation,
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
*
* Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
* CA 95054 USA or visit www.sun.com if you need additional information or
* have any questions.
*
*/
// A BufferingOops closure tries to separate out the cost of finding roots
// from the cost of applying closures to them. It maintains an array of
// ref-containing locations. Until the array is full, applying the closure
// to an oop* merely records that location in the array. Since this
// closure app cost is small, an elapsed timer can approximately attribute
// all of this cost to the cost of finding the roots. When the array fills
// up, the wrapped closure is applied to all elements, keeping track of
// this elapsed time of this process, and leaving the array empty.
// The caller must be sure to call "done" to process any unprocessed
// buffered entriess.
class Generation;
class HeapRegion;
class BufferingOopClosure: public OopClosure {
protected:
enum PrivateConstants {
BufferLength = 1024
};
oop *_buffer[BufferLength];
oop **_buffer_top;
oop **_buffer_curr;
OopClosure *_oc;
double _closure_app_seconds;
void process_buffer () {
double start = os::elapsedTime();
for (oop **curr = _buffer; curr < _buffer_curr; ++curr) {
_oc->do_oop(*curr);
}
_buffer_curr = _buffer;
_closure_app_seconds += (os::elapsedTime() - start);
}
public:
virtual void do_oop(narrowOop* p) {
guarantee(false, "NYI");
}
virtual void do_oop(oop *p) {
if (_buffer_curr == _buffer_top) {
process_buffer();
}
*_buffer_curr = p;
++_buffer_curr;
}
void done () {
if (_buffer_curr > _buffer) {
process_buffer();
}
}
double closure_app_seconds () {
return _closure_app_seconds;
}
BufferingOopClosure (OopClosure *oc) :
_oc(oc),
_buffer_curr(_buffer), _buffer_top(_buffer + BufferLength),
_closure_app_seconds(0.0) { }
};
class BufferingOopsInGenClosure: public OopsInGenClosure {
BufferingOopClosure _boc;
OopsInGenClosure* _oc;
public:
BufferingOopsInGenClosure(OopsInGenClosure *oc) :
_boc(oc), _oc(oc) {}
virtual void do_oop(narrowOop* p) {
guarantee(false, "NYI");
}
virtual void do_oop(oop* p) {
assert(generation()->is_in_reserved(p), "Must be in!");
_boc.do_oop(p);
}
void done() {
_boc.done();
}
double closure_app_seconds () {
return _boc.closure_app_seconds();
}
void set_generation(Generation* gen) {
OopsInGenClosure::set_generation(gen);
_oc->set_generation(gen);
}
void reset_generation() {
// Make sure we finish the current work with the current generation.
_boc.done();
OopsInGenClosure::reset_generation();
_oc->reset_generation();
}
};
class BufferingOopsInHeapRegionClosure: public OopsInHeapRegionClosure {
private:
enum PrivateConstants {
BufferLength = 1024
};
oop *_buffer[BufferLength];
oop **_buffer_top;
oop **_buffer_curr;
HeapRegion *_hr_buffer[BufferLength];
HeapRegion **_hr_curr;
OopsInHeapRegionClosure *_oc;
double _closure_app_seconds;
void process_buffer () {
assert((_hr_curr - _hr_buffer) == (_buffer_curr - _buffer),
"the two lengths should be the same");
double start = os::elapsedTime();
HeapRegion **hr_curr = _hr_buffer;
HeapRegion *hr_prev = NULL;
for (oop **curr = _buffer; curr < _buffer_curr; ++curr) {
HeapRegion *region = *hr_curr;
if (region != hr_prev) {
_oc->set_region(region);
hr_prev = region;
}
_oc->do_oop(*curr);
++hr_curr;
}
_buffer_curr = _buffer;
_hr_curr = _hr_buffer;
_closure_app_seconds += (os::elapsedTime() - start);
}
public:
virtual void do_oop(narrowOop *p) {
guarantee(false, "NYI");
}
virtual void do_oop(oop *p) {
if (_buffer_curr == _buffer_top) {
assert(_hr_curr > _hr_buffer, "_hr_curr should be consistent with _buffer_curr");
process_buffer();
}
*_buffer_curr = p;
++_buffer_curr;
*_hr_curr = _from;
++_hr_curr;
}
void done () {
if (_buffer_curr > _buffer) {
assert(_hr_curr > _hr_buffer, "_hr_curr should be consistent with _buffer_curr");
process_buffer();
}
}
double closure_app_seconds () {
return _closure_app_seconds;
}
BufferingOopsInHeapRegionClosure (OopsInHeapRegionClosure *oc) :
_oc(oc),
_buffer_curr(_buffer), _buffer_top(_buffer + BufferLength),
_hr_curr(_hr_buffer),
_closure_app_seconds(0.0) { }
};

View File

@ -0,0 +1,409 @@
/*
* Copyright 2001-2007 Sun Microsystems, Inc. All Rights Reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License version 2 only, as
* published by the Free Software Foundation.
*
* This code is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
* version 2 for more details (a copy is included in the LICENSE file that
* accompanied this code).
*
* You should have received a copy of the GNU General Public License version
* 2 along with this work; if not, write to the Free Software Foundation,
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
*
* Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
* CA 95054 USA or visit www.sun.com if you need additional information or
* have any questions.
*
*/
# include "incls/_precompiled.incl"
# include "incls/_collectionSetChooser.cpp.incl"
CSetChooserCache::CSetChooserCache() {
for (int i = 0; i < CacheLength; ++i)
_cache[i] = NULL;
clear();
}
void CSetChooserCache::clear() {
_occupancy = 0;
_first = 0;
for (int i = 0; i < CacheLength; ++i) {
HeapRegion *hr = _cache[i];
if (hr != NULL)
hr->set_sort_index(-1);
_cache[i] = NULL;
}
}
#ifndef PRODUCT
bool CSetChooserCache::verify() {
int index = _first;
HeapRegion *prev = NULL;
for (int i = 0; i < _occupancy; ++i) {
guarantee(_cache[index] != NULL, "cache entry should not be empty");
HeapRegion *hr = _cache[index];
guarantee(!hr->is_young(), "should not be young!");
if (prev != NULL) {
guarantee(prev->gc_efficiency() >= hr->gc_efficiency(),
"cache should be correctly ordered");
}
guarantee(hr->sort_index() == get_sort_index(index),
"sort index should be correct");
index = trim_index(index + 1);
prev = hr;
}
for (int i = 0; i < (CacheLength - _occupancy); ++i) {
guarantee(_cache[index] == NULL, "cache entry should be empty");
index = trim_index(index + 1);
}
guarantee(index == _first, "we should have reached where we started from");
return true;
}
#endif // PRODUCT
void CSetChooserCache::insert(HeapRegion *hr) {
assert(!is_full(), "cache should not be empty");
hr->calc_gc_efficiency();
int empty_index;
if (_occupancy == 0) {
empty_index = _first;
} else {
empty_index = trim_index(_first + _occupancy);
assert(_cache[empty_index] == NULL, "last slot should be empty");
int last_index = trim_index(empty_index - 1);
HeapRegion *last = _cache[last_index];
assert(last != NULL,"as the cache is not empty, last should not be empty");
while (empty_index != _first &&
last->gc_efficiency() < hr->gc_efficiency()) {
_cache[empty_index] = last;
last->set_sort_index(get_sort_index(empty_index));
empty_index = last_index;
last_index = trim_index(last_index - 1);
last = _cache[last_index];
}
}
_cache[empty_index] = hr;
hr->set_sort_index(get_sort_index(empty_index));
++_occupancy;
assert(verify(), "cache should be consistent");
}
HeapRegion *CSetChooserCache::remove_first() {
if (_occupancy > 0) {
assert(_cache[_first] != NULL, "cache should have at least one region");
HeapRegion *ret = _cache[_first];
_cache[_first] = NULL;
ret->set_sort_index(-1);
--_occupancy;
_first = trim_index(_first + 1);
assert(verify(), "cache should be consistent");
return ret;
} else {
return NULL;
}
}
// this is a bit expensive... but we expect that it should not be called
// to often.
void CSetChooserCache::remove(HeapRegion *hr) {
assert(_occupancy > 0, "cache should not be empty");
assert(hr->sort_index() < -1, "should already be in the cache");
int index = get_index(hr->sort_index());
assert(_cache[index] == hr, "index should be correct");
int next_index = trim_index(index + 1);
int last_index = trim_index(_first + _occupancy - 1);
while (index != last_index) {
assert(_cache[next_index] != NULL, "should not be null");
_cache[index] = _cache[next_index];
_cache[index]->set_sort_index(get_sort_index(index));
index = next_index;
next_index = trim_index(next_index+1);
}
assert(index == last_index, "should have reached the last one");
_cache[index] = NULL;
hr->set_sort_index(-1);
--_occupancy;
assert(verify(), "cache should be consistent");
}
static inline int orderRegions(HeapRegion* hr1, HeapRegion* hr2) {
if (hr1 == NULL) {
if (hr2 == NULL) return 0;
else return 1;
} else if (hr2 == NULL) {
return -1;
}
if (hr2->gc_efficiency() < hr1->gc_efficiency()) return -1;
else if (hr1->gc_efficiency() < hr2->gc_efficiency()) return 1;
else return 0;
}
static int orderRegions(HeapRegion** hr1p, HeapRegion** hr2p) {
return orderRegions(*hr1p, *hr2p);
}
CollectionSetChooser::CollectionSetChooser() :
// The line below is the worst bit of C++ hackery I've ever written
// (Detlefs, 11/23). You should think of it as equivalent to
// "_regions(100, true)": initialize the growable array and inform it
// that it should allocate its elem array(s) on the C heap. The first
// argument, however, is actually a comma expression (new-expr, 100).
// The purpose of the new_expr is to inform the growable array that it
// is *already* allocated on the C heap: it uses the placement syntax to
// keep it from actually doing any allocation.
_markedRegions((ResourceObj::operator new (sizeof(GrowableArray<HeapRegion*>),
(void*)&_markedRegions,
ResourceObj::C_HEAP),
100),
true),
_curMarkedIndex(0),
_numMarkedRegions(0),
_unmarked_age_1_returned_as_new(false),
_first_par_unreserved_idx(0)
{}
#ifndef PRODUCT
bool CollectionSetChooser::verify() {
int index = 0;
guarantee(_curMarkedIndex <= _numMarkedRegions,
"_curMarkedIndex should be within bounds");
while (index < _curMarkedIndex) {
guarantee(_markedRegions.at(index++) == NULL,
"all entries before _curMarkedIndex should be NULL");
}
HeapRegion *prev = NULL;
while (index < _numMarkedRegions) {
HeapRegion *curr = _markedRegions.at(index++);
if (curr != NULL) {
int si = curr->sort_index();
guarantee(!curr->is_young(), "should not be young!");
guarantee(si > -1 && si == (index-1), "sort index invariant");
if (prev != NULL) {
guarantee(orderRegions(prev, curr) != 1, "regions should be sorted");
}
prev = curr;
}
}
return _cache.verify();
}
#endif
bool
CollectionSetChooser::addRegionToCache() {
assert(!_cache.is_full(), "cache should not be full");
HeapRegion *hr = NULL;
while (hr == NULL && _curMarkedIndex < _numMarkedRegions) {
hr = _markedRegions.at(_curMarkedIndex++);
}
if (hr == NULL)
return false;
assert(!hr->is_young(), "should not be young!");
assert(hr->sort_index() == _curMarkedIndex-1, "sort_index invariant");
_markedRegions.at_put(hr->sort_index(), NULL);
_cache.insert(hr);
assert(!_cache.is_empty(), "cache should not be empty");
assert(verify(), "cache should be consistent");
return false;
}
void
CollectionSetChooser::fillCache() {
while (!_cache.is_full() && addRegionToCache()) {
}
}
void
CollectionSetChooser::sortMarkedHeapRegions() {
guarantee(_cache.is_empty(), "cache should be empty");
// First trim any unused portion of the top in the parallel case.
if (_first_par_unreserved_idx > 0) {
if (G1PrintParCleanupStats) {
gclog_or_tty->print(" Truncating _markedRegions from %d to %d.\n",
_markedRegions.length(), _first_par_unreserved_idx);
}
assert(_first_par_unreserved_idx <= _markedRegions.length(),
"Or we didn't reserved enough length");
_markedRegions.trunc_to(_first_par_unreserved_idx);
}
_markedRegions.sort(orderRegions);
assert(_numMarkedRegions <= _markedRegions.length(), "Requirement");
assert(_numMarkedRegions == 0
|| _markedRegions.at(_numMarkedRegions-1) != NULL,
"Testing _numMarkedRegions");
assert(_numMarkedRegions == _markedRegions.length()
|| _markedRegions.at(_numMarkedRegions) == NULL,
"Testing _numMarkedRegions");
if (G1PrintParCleanupStats) {
gclog_or_tty->print_cr(" Sorted %d marked regions.", _numMarkedRegions);
}
for (int i = 0; i < _numMarkedRegions; i++) {
assert(_markedRegions.at(i) != NULL, "Should be true by sorting!");
_markedRegions.at(i)->set_sort_index(i);
if (G1PrintRegionLivenessInfo > 0) {
if (i == 0) gclog_or_tty->print_cr("Sorted marked regions:");
if (i < G1PrintRegionLivenessInfo ||
(_numMarkedRegions-i) < G1PrintRegionLivenessInfo) {
HeapRegion* hr = _markedRegions.at(i);
size_t u = hr->used();
gclog_or_tty->print_cr(" Region %d: %d used, %d max live, %5.2f%%.",
i, u, hr->max_live_bytes(),
100.0*(float)hr->max_live_bytes()/(float)u);
}
}
}
if (G1PolicyVerbose > 1)
printSortedHeapRegions();
assert(verify(), "should now be sorted");
}
void
printHeapRegion(HeapRegion *hr) {
if (hr->isHumongous())
gclog_or_tty->print("H: ");
if (hr->in_collection_set())
gclog_or_tty->print("CS: ");
if (hr->popular())
gclog_or_tty->print("pop: ");
gclog_or_tty->print_cr("Region " PTR_FORMAT " (%s%s) "
"[" PTR_FORMAT ", " PTR_FORMAT"] "
"Used: " SIZE_FORMAT "K, garbage: " SIZE_FORMAT "K.",
hr, hr->is_young() ? "Y " : " ",
hr->is_marked()? "M1" : "M0",
hr->bottom(), hr->end(),
hr->used()/K, hr->garbage_bytes()/K);
}
void
CollectionSetChooser::addMarkedHeapRegion(HeapRegion* hr) {
assert(!hr->isHumongous(),
"Humongous regions shouldn't be added to the collection set");
assert(!hr->is_young(), "should not be young!");
_markedRegions.append(hr);
_numMarkedRegions++;
hr->calc_gc_efficiency();
}
void
CollectionSetChooser::
prepareForAddMarkedHeapRegionsPar(size_t n_regions, size_t chunkSize) {
_first_par_unreserved_idx = 0;
size_t max_waste = ParallelGCThreads * chunkSize;
// it should be aligned with respect to chunkSize
size_t aligned_n_regions =
(n_regions + (chunkSize - 1)) / chunkSize * chunkSize;
assert( aligned_n_regions % chunkSize == 0, "should be aligned" );
_markedRegions.at_put_grow((int)(aligned_n_regions + max_waste - 1), NULL);
}
jint
CollectionSetChooser::getParMarkedHeapRegionChunk(jint n_regions) {
jint res = Atomic::add(n_regions, &_first_par_unreserved_idx);
assert(_markedRegions.length() > res + n_regions - 1,
"Should already have been expanded");
return res - n_regions;
}
void
CollectionSetChooser::setMarkedHeapRegion(jint index, HeapRegion* hr) {
assert(_markedRegions.at(index) == NULL, "precondition");
assert(!hr->is_young(), "should not be young!");
_markedRegions.at_put(index, hr);
hr->calc_gc_efficiency();
}
void
CollectionSetChooser::incNumMarkedHeapRegions(jint inc_by) {
(void)Atomic::add(inc_by, &_numMarkedRegions);
}
void
CollectionSetChooser::clearMarkedHeapRegions(){
for (int i = 0; i < _markedRegions.length(); i++) {
HeapRegion* r = _markedRegions.at(i);
if (r != NULL) r->set_sort_index(-1);
}
_markedRegions.clear();
_curMarkedIndex = 0;
_numMarkedRegions = 0;
_cache.clear();
};
void
CollectionSetChooser::updateAfterFullCollection() {
G1CollectedHeap* g1h = G1CollectedHeap::heap();
clearMarkedHeapRegions();
}
void
CollectionSetChooser::printSortedHeapRegions() {
gclog_or_tty->print_cr("Printing %d Heap Regions sorted by amount of known garbage",
_numMarkedRegions);
for (int i = 0; i < _markedRegions.length(); i++) {
printHeapRegion(_markedRegions.at(i));
}
gclog_or_tty->print_cr("Done sorted heap region print");
}
void CollectionSetChooser::removeRegion(HeapRegion *hr) {
int si = hr->sort_index();
assert(si == -1 || hr->is_marked(), "Sort index not valid.");
if (si > -1) {
assert(_markedRegions.at(si) == hr, "Sort index not valid." );
_markedRegions.at_put(si, NULL);
} else if (si < -1) {
assert(_cache.region_in_cache(hr), "should be in the cache");
_cache.remove(hr);
assert(hr->sort_index() == -1, "sort index invariant");
}
hr->set_sort_index(-1);
}
// if time_remaining < 0.0, then this method should try to return
// a region, whether it fits within the remaining time or not
HeapRegion*
CollectionSetChooser::getNextMarkedRegion(double time_remaining,
double avg_prediction) {
G1CollectedHeap* g1h = G1CollectedHeap::heap();
G1CollectorPolicy* g1p = g1h->g1_policy();
fillCache();
if (_cache.is_empty()) {
assert(_curMarkedIndex == _numMarkedRegions,
"if cache is empty, list should also be empty");
return NULL;
}
HeapRegion *hr = _cache.get_first();
assert(hr != NULL, "if cache not empty, first entry should be non-null");
double predicted_time = g1h->predict_region_elapsed_time_ms(hr, false);
if (g1p->adaptive_young_list_length()) {
if (time_remaining - predicted_time < 0.0) {
g1h->check_if_region_is_too_expensive(predicted_time);
return NULL;
}
} else {
if (predicted_time > 2.0 * avg_prediction) {
return NULL;
}
}
HeapRegion *hr2 = _cache.remove_first();
assert(hr == hr2, "cache contents should not have changed");
return hr;
}

View File

@ -0,0 +1,138 @@
/*
* Copyright 2001-2007 Sun Microsystems, Inc. All Rights Reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License version 2 only, as
* published by the Free Software Foundation.
*
* This code is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
* version 2 for more details (a copy is included in the LICENSE file that
* accompanied this code).
*
* You should have received a copy of the GNU General Public License version
* 2 along with this work; if not, write to the Free Software Foundation,
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
*
* Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
* CA 95054 USA or visit www.sun.com if you need additional information or
* have any questions.
*
*/
// We need to sort heap regions by collection desirability.
class CSetChooserCache {
private:
enum {
CacheLength = 16
} PrivateConstants;
HeapRegion* _cache[CacheLength];
int _occupancy; // number of region in cache
int _first; // "first" region in the cache
// adding CacheLength to deal with negative values
inline int trim_index(int index) {
return (index + CacheLength) % CacheLength;
}
inline int get_sort_index(int index) {
return -index-2;
}
inline int get_index(int sort_index) {
return -sort_index-2;
}
public:
CSetChooserCache(void);
inline int occupancy(void) { return _occupancy; }
inline bool is_full() { return _occupancy == CacheLength; }
inline bool is_empty() { return _occupancy == 0; }
void clear(void);
void insert(HeapRegion *hr);
HeapRegion *remove_first(void);
void remove (HeapRegion *hr);
inline HeapRegion *get_first(void) {
return _cache[_first];
}
#ifndef PRODUCT
bool verify (void);
bool region_in_cache(HeapRegion *hr) {
int sort_index = hr->sort_index();
if (sort_index < -1) {
int index = get_index(sort_index);
guarantee(index < CacheLength, "should be within bounds");
return _cache[index] == hr;
} else
return 0;
}
#endif // PRODUCT
};
class CollectionSetChooser: public CHeapObj {
GrowableArray<HeapRegion*> _markedRegions;
int _curMarkedIndex;
int _numMarkedRegions;
CSetChooserCache _cache;
// True iff last collection pause ran of out new "age 0" regions, and
// returned an "age 1" region.
bool _unmarked_age_1_returned_as_new;
jint _first_par_unreserved_idx;
public:
HeapRegion* getNextMarkedRegion(double time_so_far, double avg_prediction);
CollectionSetChooser();
void printSortedHeapRegions();
void sortMarkedHeapRegions();
void fillCache();
bool addRegionToCache(void);
void addMarkedHeapRegion(HeapRegion *hr);
// Must be called before calls to getParMarkedHeapRegionChunk.
// "n_regions" is the number of regions, "chunkSize" the chunk size.
void prepareForAddMarkedHeapRegionsPar(size_t n_regions, size_t chunkSize);
// Returns the first index in a contiguous chunk of "n_regions" indexes
// that the calling thread has reserved. These must be set by the
// calling thread using "setMarkedHeapRegion" (to NULL if necessary).
jint getParMarkedHeapRegionChunk(jint n_regions);
// Set the marked array entry at index to hr. Careful to claim the index
// first if in parallel.
void setMarkedHeapRegion(jint index, HeapRegion* hr);
// Atomically increment the number of claimed regions by "inc_by".
void incNumMarkedHeapRegions(jint inc_by);
void clearMarkedHeapRegions();
void updateAfterFullCollection();
// Ensure that "hr" is not a member of the marked region array or the cache
void removeRegion(HeapRegion* hr);
bool unmarked_age_1_returned_as_new() { return _unmarked_age_1_returned_as_new; }
// Returns true if the used portion of "_markedRegions" is properly
// sorted, otherwise asserts false.
#ifndef PRODUCT
bool verify(void);
bool regionProperlyOrdered(HeapRegion* r) {
int si = r->sort_index();
return (si == -1) ||
(si > -1 && _markedRegions.at(si) == r) ||
(si < -1 && _cache.region_in_cache(r));
}
#endif
};

View File

@ -0,0 +1,355 @@
/*
* Copyright 2001-2007 Sun Microsystems, Inc. All Rights Reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License version 2 only, as
* published by the Free Software Foundation.
*
* This code is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
* version 2 for more details (a copy is included in the LICENSE file that
* accompanied this code).
*
* You should have received a copy of the GNU General Public License version
* 2 along with this work; if not, write to the Free Software Foundation,
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
*
* Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
* CA 95054 USA or visit www.sun.com if you need additional information or
* have any questions.
*
*/
#include "incls/_precompiled.incl"
#include "incls/_concurrentG1Refine.cpp.incl"
bool ConcurrentG1Refine::_enabled = false;
ConcurrentG1Refine::ConcurrentG1Refine() :
_pya(PYA_continue), _last_pya(PYA_continue),
_last_cards_during(), _first_traversal(false),
_card_counts(NULL), _cur_card_count_histo(NULL), _cum_card_count_histo(NULL),
_hot_cache(NULL),
_def_use_cache(false), _use_cache(false),
_n_periods(0), _total_cards(0), _total_travs(0)
{
if (G1ConcRefine) {
_cg1rThread = new ConcurrentG1RefineThread(this);
assert(cg1rThread() != NULL, "Conc refine should have been created");
assert(cg1rThread()->cg1r() == this,
"Conc refine thread should refer to this");
} else {
_cg1rThread = NULL;
}
}
void ConcurrentG1Refine::init() {
if (G1ConcRSLogCacheSize > 0 || G1ConcRSCountTraversals) {
G1CollectedHeap* g1h = G1CollectedHeap::heap();
_n_card_counts =
(unsigned) (g1h->g1_reserved_obj_bytes() >> CardTableModRefBS::card_shift);
_card_counts = NEW_C_HEAP_ARRAY(unsigned char, _n_card_counts);
for (size_t i = 0; i < _n_card_counts; i++) _card_counts[i] = 0;
ModRefBarrierSet* bs = g1h->mr_bs();
guarantee(bs->is_a(BarrierSet::CardTableModRef), "Precondition");
CardTableModRefBS* ctbs = (CardTableModRefBS*)bs;
_ct_bot = ctbs->byte_for_const(g1h->reserved_region().start());
if (G1ConcRSCountTraversals) {
_cur_card_count_histo = NEW_C_HEAP_ARRAY(unsigned, 256);
_cum_card_count_histo = NEW_C_HEAP_ARRAY(unsigned, 256);
for (int i = 0; i < 256; i++) {
_cur_card_count_histo[i] = 0;
_cum_card_count_histo[i] = 0;
}
}
}
if (G1ConcRSLogCacheSize > 0) {
_def_use_cache = true;
_use_cache = true;
_hot_cache_size = (1 << G1ConcRSLogCacheSize);
_hot_cache = NEW_C_HEAP_ARRAY(jbyte*, _hot_cache_size);
_n_hot = 0;
_hot_cache_idx = 0;
}
}
ConcurrentG1Refine::~ConcurrentG1Refine() {
if (G1ConcRSLogCacheSize > 0 || G1ConcRSCountTraversals) {
assert(_card_counts != NULL, "Logic");
FREE_C_HEAP_ARRAY(unsigned char, _card_counts);
assert(_cur_card_count_histo != NULL, "Logic");
FREE_C_HEAP_ARRAY(unsigned, _cur_card_count_histo);
assert(_cum_card_count_histo != NULL, "Logic");
FREE_C_HEAP_ARRAY(unsigned, _cum_card_count_histo);
}
if (G1ConcRSLogCacheSize > 0) {
assert(_hot_cache != NULL, "Logic");
FREE_C_HEAP_ARRAY(jbyte*, _hot_cache);
}
}
bool ConcurrentG1Refine::refine() {
G1CollectedHeap* g1h = G1CollectedHeap::heap();
unsigned cards_before = g1h->g1_rem_set()->conc_refine_cards();
clear_hot_cache(); // Any previous values in this are now invalid.
g1h->g1_rem_set()->concurrentRefinementPass(this);
_traversals++;
unsigned cards_after = g1h->g1_rem_set()->conc_refine_cards();
unsigned cards_during = cards_after-cards_before;
// If this is the first traversal in the current enabling
// and we did some cards, or if the number of cards found is decreasing
// sufficiently quickly, then keep going. Otherwise, sleep a while.
bool res =
(_first_traversal && cards_during > 0)
||
(!_first_traversal && cards_during * 3 < _last_cards_during * 2);
_last_cards_during = cards_during;
_first_traversal = false;
return res;
}
void ConcurrentG1Refine::enable() {
MutexLocker x(G1ConcRefine_mon);
if (!_enabled) {
_enabled = true;
_first_traversal = true; _last_cards_during = 0;
G1ConcRefine_mon->notify_all();
}
}
unsigned ConcurrentG1Refine::disable() {
MutexLocker x(G1ConcRefine_mon);
if (_enabled) {
_enabled = false;
return _traversals;
} else {
return 0;
}
}
void ConcurrentG1Refine::wait_for_ConcurrentG1Refine_enabled() {
G1ConcRefine_mon->lock();
while (!_enabled) {
G1ConcRefine_mon->wait(Mutex::_no_safepoint_check_flag);
}
G1ConcRefine_mon->unlock();
_traversals = 0;
};
void ConcurrentG1Refine::set_pya_restart() {
// If we're using the log-based RS barrier, the above will cause
// in-progress traversals of completed log buffers to quit early; we will
// also abandon all other buffers.
if (G1RSBarrierUseQueue) {
DirtyCardQueueSet& dcqs = JavaThread::dirty_card_queue_set();
dcqs.abandon_logs();
if (_cg1rThread->do_traversal()) {
_pya = PYA_restart;
} else {
_cg1rThread->set_do_traversal(true);
// Reset the post-yield actions.
_pya = PYA_continue;
_last_pya = PYA_continue;
}
} else {
_pya = PYA_restart;
}
}
void ConcurrentG1Refine::set_pya_cancel() {
_pya = PYA_cancel;
}
PostYieldAction ConcurrentG1Refine::get_pya() {
if (_pya != PYA_continue) {
jint val = _pya;
while (true) {
jint val_read = Atomic::cmpxchg(PYA_continue, &_pya, val);
if (val_read == val) {
PostYieldAction res = (PostYieldAction)val;
assert(res != PYA_continue, "Only the refine thread should reset.");
_last_pya = res;
return res;
} else {
val = val_read;
}
}
}
// QQQ WELL WHAT DO WE RETURN HERE???
// make up something!
return PYA_continue;
}
PostYieldAction ConcurrentG1Refine::get_last_pya() {
PostYieldAction res = _last_pya;
_last_pya = PYA_continue;
return res;
}
bool ConcurrentG1Refine::do_traversal() {
return _cg1rThread->do_traversal();
}
int ConcurrentG1Refine::add_card_count(jbyte* card_ptr) {
size_t card_num = (card_ptr - _ct_bot);
guarantee(0 <= card_num && card_num < _n_card_counts, "Bounds");
unsigned char cnt = _card_counts[card_num];
if (cnt < 255) _card_counts[card_num]++;
return cnt;
_total_travs++;
}
jbyte* ConcurrentG1Refine::cache_insert(jbyte* card_ptr) {
int count = add_card_count(card_ptr);
// Count previously unvisited cards.
if (count == 0) _total_cards++;
// We'll assume a traversal unless we store it in the cache.
if (count < G1ConcRSHotCardLimit) {
_total_travs++;
return card_ptr;
}
// Otherwise, it's hot.
jbyte* res = NULL;
MutexLockerEx x(HotCardCache_lock, Mutex::_no_safepoint_check_flag);
if (_n_hot == _hot_cache_size) {
_total_travs++;
res = _hot_cache[_hot_cache_idx];
_n_hot--;
}
// Now _n_hot < _hot_cache_size, and we can insert at _hot_cache_idx.
_hot_cache[_hot_cache_idx] = card_ptr;
_hot_cache_idx++;
if (_hot_cache_idx == _hot_cache_size) _hot_cache_idx = 0;
_n_hot++;
return res;
}
void ConcurrentG1Refine::clean_up_cache(int worker_i, G1RemSet* g1rs) {
assert(!use_cache(), "cache should be disabled");
int start_ind = _hot_cache_idx-1;
for (int i = 0; i < _n_hot; i++) {
int ind = start_ind - i;
if (ind < 0) ind = ind + _hot_cache_size;
jbyte* entry = _hot_cache[ind];
if (entry != NULL) {
g1rs->concurrentRefineOneCard(entry, worker_i);
}
}
_n_hot = 0;
_hot_cache_idx = 0;
}
void ConcurrentG1Refine::clear_and_record_card_counts() {
if (G1ConcRSLogCacheSize == 0 && !G1ConcRSCountTraversals) return;
_n_periods++;
if (G1ConcRSCountTraversals) {
for (size_t i = 0; i < _n_card_counts; i++) {
unsigned char bucket = _card_counts[i];
_cur_card_count_histo[bucket]++;
_card_counts[i] = 0;
}
gclog_or_tty->print_cr("Card counts:");
for (int i = 0; i < 256; i++) {
if (_cur_card_count_histo[i] > 0) {
gclog_or_tty->print_cr(" %3d: %9d", i, _cur_card_count_histo[i]);
_cum_card_count_histo[i] += _cur_card_count_histo[i];
_cur_card_count_histo[i] = 0;
}
}
} else {
assert(G1ConcRSLogCacheSize > 0, "Logic");
Copy::fill_to_words((HeapWord*)(&_card_counts[0]),
_n_card_counts / HeapWordSize);
}
}
void
ConcurrentG1Refine::
print_card_count_histo_range(unsigned* histo, int from, int to,
float& cum_card_pct,
float& cum_travs_pct) {
unsigned cards = 0;
unsigned travs = 0;
guarantee(to <= 256, "Precondition");
for (int i = from; i < to-1; i++) {
cards += histo[i];
travs += histo[i] * i;
}
if (to == 256) {
unsigned histo_card_sum = 0;
unsigned histo_trav_sum = 0;
for (int i = 1; i < 255; i++) {
histo_trav_sum += histo[i] * i;
}
cards += histo[255];
// correct traversals for the last one.
unsigned travs_255 = (unsigned) (_total_travs - histo_trav_sum);
travs += travs_255;
} else {
cards += histo[to-1];
travs += histo[to-1] * (to-1);
}
float fperiods = (float)_n_periods;
float f_tot_cards = (float)_total_cards/fperiods;
float f_tot_travs = (float)_total_travs/fperiods;
if (cards > 0) {
float fcards = (float)cards/fperiods;
float ftravs = (float)travs/fperiods;
if (to == 256) {
gclog_or_tty->print(" %4d- %10.2f%10.2f", from, fcards, ftravs);
} else {
gclog_or_tty->print(" %4d-%4d %10.2f%10.2f", from, to-1, fcards, ftravs);
}
float pct_cards = fcards*100.0/f_tot_cards;
cum_card_pct += pct_cards;
float pct_travs = ftravs*100.0/f_tot_travs;
cum_travs_pct += pct_travs;
gclog_or_tty->print_cr("%10.2f%10.2f%10.2f%10.2f",
pct_cards, cum_card_pct,
pct_travs, cum_travs_pct);
}
}
void ConcurrentG1Refine::print_final_card_counts() {
if (!G1ConcRSCountTraversals) return;
gclog_or_tty->print_cr("Did %d total traversals of %d distinct cards.",
_total_travs, _total_cards);
float fperiods = (float)_n_periods;
gclog_or_tty->print_cr(" This is an average of %8.2f traversals, %8.2f cards, "
"per collection.", (float)_total_travs/fperiods,
(float)_total_cards/fperiods);
gclog_or_tty->print_cr(" This is an average of %8.2f traversals/distinct "
"dirty card.\n",
_total_cards > 0 ?
(float)_total_travs/(float)_total_cards : 0.0);
gclog_or_tty->print_cr("Histogram:\n\n%10s %10s%10s%10s%10s%10s%10s",
"range", "# cards", "# travs", "% cards", "(cum)",
"% travs", "(cum)");
gclog_or_tty->print_cr("------------------------------------------------------------"
"-------------");
float cum_cards_pct = 0.0;
float cum_travs_pct = 0.0;
for (int i = 1; i < 10; i++) {
print_card_count_histo_range(_cum_card_count_histo, i, i+1,
cum_cards_pct, cum_travs_pct);
}
for (int i = 10; i < 100; i += 10) {
print_card_count_histo_range(_cum_card_count_histo, i, i+10,
cum_cards_pct, cum_travs_pct);
}
print_card_count_histo_range(_cum_card_count_histo, 100, 150,
cum_cards_pct, cum_travs_pct);
print_card_count_histo_range(_cum_card_count_histo, 150, 200,
cum_cards_pct, cum_travs_pct);
print_card_count_histo_range(_cum_card_count_histo, 150, 255,
cum_cards_pct, cum_travs_pct);
print_card_count_histo_range(_cum_card_count_histo, 255, 256,
cum_cards_pct, cum_travs_pct);
}

View File

@ -0,0 +1,132 @@
/*
* Copyright 2001-2007 Sun Microsystems, Inc. All Rights Reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License version 2 only, as
* published by the Free Software Foundation.
*
* This code is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
* version 2 for more details (a copy is included in the LICENSE file that
* accompanied this code).
*
* You should have received a copy of the GNU General Public License version
* 2 along with this work; if not, write to the Free Software Foundation,
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
*
* Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
* CA 95054 USA or visit www.sun.com if you need additional information or
* have any questions.
*
*/
// Forward decl
class ConcurrentG1RefineThread;
class G1RemSet;
// What to do after a yield:
enum PostYieldAction {
PYA_continue, // Continue the traversal
PYA_restart, // Restart
PYA_cancel // It's been completed by somebody else: cancel.
};
class ConcurrentG1Refine {
ConcurrentG1RefineThread* _cg1rThread;
volatile jint _pya;
PostYieldAction _last_pya;
static bool _enabled; // Protected by G1ConcRefine_mon.
unsigned _traversals;
// Number of cards processed during last refinement traversal.
unsigned _first_traversal;
unsigned _last_cards_during;
// The cache for card refinement.
bool _use_cache;
bool _def_use_cache;
size_t _n_periods;
size_t _total_cards;
size_t _total_travs;
unsigned char* _card_counts;
unsigned _n_card_counts;
const jbyte* _ct_bot;
unsigned* _cur_card_count_histo;
unsigned* _cum_card_count_histo;
jbyte** _hot_cache;
int _hot_cache_size;
int _n_hot;
int _hot_cache_idx;
// Returns the count of this card after incrementing it.
int add_card_count(jbyte* card_ptr);
void print_card_count_histo_range(unsigned* histo, int from, int to,
float& cum_card_pct,
float& cum_travs_pct);
public:
ConcurrentG1Refine();
~ConcurrentG1Refine();
void init(); // Accomplish some initialization that has to wait.
// Enabled Conc refinement, waking up thread if necessary.
void enable();
// Returns the number of traversals performed since this refiner was enabled.
unsigned disable();
// Requires G1ConcRefine_mon to be held.
bool enabled() { return _enabled; }
// Returns only when G1 concurrent refinement has been enabled.
void wait_for_ConcurrentG1Refine_enabled();
// Do one concurrent refinement pass over the card table. Returns "true"
// if heuristics determine that another pass should be done immediately.
bool refine();
// Indicate that an in-progress refinement pass should start over.
void set_pya_restart();
// Indicate that an in-progress refinement pass should quit.
void set_pya_cancel();
// Get the appropriate post-yield action. Also sets last_pya.
PostYieldAction get_pya();
// The last PYA read by "get_pya".
PostYieldAction get_last_pya();
bool do_traversal();
ConcurrentG1RefineThread* cg1rThread() { return _cg1rThread; }
// If this is the first entry for the slot, writes into the cache and
// returns NULL. If it causes an eviction, returns the evicted pointer.
// Otherwise, its a cache hit, and returns NULL.
jbyte* cache_insert(jbyte* card_ptr);
// Process the cached entries.
void clean_up_cache(int worker_i, G1RemSet* g1rs);
// Discard entries in the hot cache.
void clear_hot_cache() {
_hot_cache_idx = 0; _n_hot = 0;
}
bool hot_cache_is_empty() { return _n_hot == 0; }
bool use_cache() { return _use_cache; }
void set_use_cache(bool b) {
if (b) _use_cache = _def_use_cache;
else _use_cache = false;
}
void clear_and_record_card_counts();
void print_final_card_counts();
};

View File

@ -0,0 +1,246 @@
/*
* Copyright 2001-2007 Sun Microsystems, Inc. All Rights Reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License version 2 only, as
* published by the Free Software Foundation.
*
* This code is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
* version 2 for more details (a copy is included in the LICENSE file that
* accompanied this code).
*
* You should have received a copy of the GNU General Public License version
* 2 along with this work; if not, write to the Free Software Foundation,
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
*
* Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
* CA 95054 USA or visit www.sun.com if you need additional information or
* have any questions.
*
*/
#include "incls/_precompiled.incl"
#include "incls/_concurrentG1RefineThread.cpp.incl"
// ======= Concurrent Mark Thread ========
// The CM thread is created when the G1 garbage collector is used
ConcurrentG1RefineThread::
ConcurrentG1RefineThread(ConcurrentG1Refine* cg1r) :
ConcurrentGCThread(),
_cg1r(cg1r),
_started(false),
_in_progress(false),
_do_traversal(false),
_vtime_accum(0.0),
_co_tracker(G1CRGroup),
_interval_ms(5.0)
{
create_and_start();
}
const long timeout = 200; // ms.
void ConcurrentG1RefineThread::traversalBasedRefinement() {
_cg1r->wait_for_ConcurrentG1Refine_enabled();
MutexLocker x(G1ConcRefine_mon);
while (_cg1r->enabled()) {
MutexUnlocker ux(G1ConcRefine_mon);
ResourceMark rm;
HandleMark hm;
if (TraceG1Refine) gclog_or_tty->print_cr("G1-Refine starting pass");
_sts.join();
bool no_sleep = _cg1r->refine();
_sts.leave();
if (!no_sleep) {
MutexLockerEx x(CGC_lock, Mutex::_no_safepoint_check_flag);
// We do this only for the timeout; we don't expect this to be signalled.
CGC_lock->wait(Mutex::_no_safepoint_check_flag, timeout);
}
}
}
void ConcurrentG1RefineThread::queueBasedRefinement() {
DirtyCardQueueSet& dcqs = JavaThread::dirty_card_queue_set();
// Wait for completed log buffers to exist.
{
MutexLockerEx x(DirtyCardQ_CBL_mon, Mutex::_no_safepoint_check_flag);
while (!_do_traversal && !dcqs.process_completed_buffers() &&
!_should_terminate) {
DirtyCardQ_CBL_mon->wait(Mutex::_no_safepoint_check_flag);
}
}
if (_should_terminate) {
return;
}
// Now we take them off (this doesn't hold locks while it applies
// closures.) (If we did a full collection, then we'll do a full
// traversal.
_sts.join();
if (_do_traversal) {
(void)_cg1r->refine();
switch (_cg1r->get_last_pya()) {
case PYA_cancel: case PYA_continue:
// Continue was caught and handled inside "refine". If it's still
// "continue" when we get here, we're done.
_do_traversal = false;
break;
case PYA_restart:
assert(_do_traversal, "Because of Full GC.");
break;
}
} else {
int n_logs = 0;
int lower_limit = 0;
double start_vtime_sec; // only used when G1SmoothConcRefine is on
int prev_buffer_num; // only used when G1SmoothConcRefine is on
if (G1SmoothConcRefine) {
lower_limit = 0;
start_vtime_sec = os::elapsedVTime();
prev_buffer_num = (int) dcqs.completed_buffers_num();
} else {
lower_limit = DCQBarrierProcessCompletedThreshold / 4; // For now.
}
while (dcqs.apply_closure_to_completed_buffer(0, lower_limit)) {
double end_vtime_sec;
double elapsed_vtime_sec;
int elapsed_vtime_ms;
int curr_buffer_num;
if (G1SmoothConcRefine) {
end_vtime_sec = os::elapsedVTime();
elapsed_vtime_sec = end_vtime_sec - start_vtime_sec;
elapsed_vtime_ms = (int) (elapsed_vtime_sec * 1000.0);
curr_buffer_num = (int) dcqs.completed_buffers_num();
if (curr_buffer_num > prev_buffer_num ||
curr_buffer_num > DCQBarrierProcessCompletedThreshold) {
decreaseInterval(elapsed_vtime_ms);
} else if (curr_buffer_num < prev_buffer_num) {
increaseInterval(elapsed_vtime_ms);
}
}
sample_young_list_rs_lengths();
_co_tracker.update(false);
if (G1SmoothConcRefine) {
start_vtime_sec = os::elapsedVTime();
prev_buffer_num = curr_buffer_num;
_sts.leave();
os::sleep(Thread::current(), (jlong) _interval_ms, false);
_sts.join();
}
n_logs++;
}
// Make sure we harvest the PYA, if any.
(void)_cg1r->get_pya();
}
_sts.leave();
}
void ConcurrentG1RefineThread::sample_young_list_rs_lengths() {
G1CollectedHeap* g1h = G1CollectedHeap::heap();
G1CollectorPolicy* g1p = g1h->g1_policy();
if (g1p->adaptive_young_list_length()) {
int regions_visited = 0;
g1h->young_list_rs_length_sampling_init();
while (g1h->young_list_rs_length_sampling_more()) {
g1h->young_list_rs_length_sampling_next();
++regions_visited;
// we try to yield every time we visit 10 regions
if (regions_visited == 10) {
if (_sts.should_yield()) {
_sts.yield("G1 refine");
// we just abandon the iteration
break;
}
regions_visited = 0;
}
}
g1p->check_prediction_validity();
}
}
void ConcurrentG1RefineThread::run() {
initialize_in_thread();
_vtime_start = os::elapsedVTime();
wait_for_universe_init();
_co_tracker.enable();
_co_tracker.start();
while (!_should_terminate) {
// wait until started is set.
if (G1RSBarrierUseQueue) {
queueBasedRefinement();
} else {
traversalBasedRefinement();
}
_sts.join();
_co_tracker.update();
_sts.leave();
if (os::supports_vtime()) {
_vtime_accum = (os::elapsedVTime() - _vtime_start);
} else {
_vtime_accum = 0.0;
}
}
_sts.join();
_co_tracker.update(true);
_sts.leave();
assert(_should_terminate, "just checking");
terminate();
}
void ConcurrentG1RefineThread::yield() {
if (TraceG1Refine) gclog_or_tty->print_cr("G1-Refine-yield");
_sts.yield("G1 refine");
if (TraceG1Refine) gclog_or_tty->print_cr("G1-Refine-yield-end");
}
void ConcurrentG1RefineThread::stop() {
// it is ok to take late safepoints here, if needed
{
MutexLockerEx mu(Terminator_lock);
_should_terminate = true;
}
{
MutexLockerEx x(DirtyCardQ_CBL_mon, Mutex::_no_safepoint_check_flag);
DirtyCardQ_CBL_mon->notify_all();
}
{
MutexLockerEx mu(Terminator_lock);
while (!_has_terminated) {
Terminator_lock->wait();
}
}
if (TraceG1Refine) gclog_or_tty->print_cr("G1-Refine-stop");
}
void ConcurrentG1RefineThread::print() {
gclog_or_tty->print("\"Concurrent G1 Refinement Thread\" ");
Thread::print();
gclog_or_tty->cr();
}
void ConcurrentG1RefineThread::set_do_traversal(bool b) {
_do_traversal = b;
}

View File

@ -0,0 +1,104 @@
/*
* Copyright 2001-2007 Sun Microsystems, Inc. All Rights Reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License version 2 only, as
* published by the Free Software Foundation.
*
* This code is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
* version 2 for more details (a copy is included in the LICENSE file that
* accompanied this code).
*
* You should have received a copy of the GNU General Public License version
* 2 along with this work; if not, write to the Free Software Foundation,
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
*
* Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
* CA 95054 USA or visit www.sun.com if you need additional information or
* have any questions.
*
*/
// Forward Decl.
class ConcurrentG1Refine;
// The G1 Concurrent Refinement Thread (could be several in the future).
class ConcurrentG1RefineThread: public ConcurrentGCThread {
friend class VMStructs;
friend class G1CollectedHeap;
double _vtime_start; // Initial virtual time.
double _vtime_accum; // Initial virtual time.
public:
virtual void run();
private:
ConcurrentG1Refine* _cg1r;
bool _started;
bool _in_progress;
volatile bool _restart;
COTracker _co_tracker;
double _interval_ms;
bool _do_traversal;
void decreaseInterval(int processing_time_ms) {
double min_interval_ms = (double) processing_time_ms;
_interval_ms = 0.8 * _interval_ms;
if (_interval_ms < min_interval_ms)
_interval_ms = min_interval_ms;
}
void increaseInterval(int processing_time_ms) {
double max_interval_ms = 9.0 * (double) processing_time_ms;
_interval_ms = 1.1 * _interval_ms;
if (max_interval_ms > 0 && _interval_ms > max_interval_ms)
_interval_ms = max_interval_ms;
}
void sleepBeforeNextCycle();
void traversalBasedRefinement();
void queueBasedRefinement();
// For use by G1CollectedHeap, which is a friend.
static SuspendibleThreadSet* sts() { return &_sts; }
public:
// Constructor
ConcurrentG1RefineThread(ConcurrentG1Refine* cg1r);
// Printing
void print();
// Total virtual time so far.
double vtime_accum() { return _vtime_accum; }
ConcurrentG1Refine* cg1r() { return _cg1r; }
void set_started() { _started = true; }
void clear_started() { _started = false; }
bool started() { return _started; }
void set_in_progress() { _in_progress = true; }
void clear_in_progress() { _in_progress = false; }
bool in_progress() { return _in_progress; }
void set_do_traversal(bool b);
bool do_traversal() { return _do_traversal; }
void sample_young_list_rs_lengths();
// Yield for GC
void yield();
// shutdown
static void stop();
};

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,336 @@
/*
* Copyright 2001-2007 Sun Microsystems, Inc. All Rights Reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License version 2 only, as
* published by the Free Software Foundation.
*
* This code is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
* version 2 for more details (a copy is included in the LICENSE file that
* accompanied this code).
*
* You should have received a copy of the GNU General Public License version
* 2 along with this work; if not, write to the Free Software Foundation,
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
*
* Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
* CA 95054 USA or visit www.sun.com if you need additional information or
* have any questions.
*
*/
#include "incls/_precompiled.incl"
#include "incls/_concurrentMarkThread.cpp.incl"
// ======= Concurrent Mark Thread ========
// The CM thread is created when the G1 garbage collector is used
SurrogateLockerThread*
ConcurrentMarkThread::_slt = NULL;
ConcurrentMarkThread::ConcurrentMarkThread(ConcurrentMark* cm) :
ConcurrentGCThread(),
_cm(cm),
_started(false),
_in_progress(false),
_vtime_accum(0.0),
_vtime_mark_accum(0.0),
_vtime_count_accum(0.0)
{
create_and_start();
}
class CMCheckpointRootsInitialClosure: public VoidClosure {
ConcurrentMark* _cm;
public:
CMCheckpointRootsInitialClosure(ConcurrentMark* cm) :
_cm(cm) {}
void do_void(){
_cm->checkpointRootsInitial();
}
};
class CMCheckpointRootsFinalClosure: public VoidClosure {
ConcurrentMark* _cm;
public:
CMCheckpointRootsFinalClosure(ConcurrentMark* cm) :
_cm(cm) {}
void do_void(){
_cm->checkpointRootsFinal(false); // !clear_all_soft_refs
}
};
class CMCleanUp: public VoidClosure {
ConcurrentMark* _cm;
public:
CMCleanUp(ConcurrentMark* cm) :
_cm(cm) {}
void do_void(){
_cm->cleanup();
}
};
void ConcurrentMarkThread::run() {
initialize_in_thread();
_vtime_start = os::elapsedVTime();
wait_for_universe_init();
G1CollectedHeap* g1 = G1CollectedHeap::heap();
G1CollectorPolicy* g1_policy = g1->g1_policy();
G1MMUTracker *mmu_tracker = g1_policy->mmu_tracker();
Thread *current_thread = Thread::current();
while (!_should_terminate) {
// wait until started is set.
sleepBeforeNextCycle();
{
ResourceMark rm;
HandleMark hm;
double cycle_start = os::elapsedVTime();
double mark_start_sec = os::elapsedTime();
char verbose_str[128];
if (PrintGC) {
gclog_or_tty->date_stamp(PrintGCDateStamps);
gclog_or_tty->stamp(PrintGCTimeStamps);
tty->print_cr("[GC concurrent-mark-start]");
}
if (!g1_policy->in_young_gc_mode()) {
// this ensures the flag is not set if we bail out of the marking
// cycle; normally the flag is cleared immediately after cleanup
g1->set_marking_complete();
if (g1_policy->adaptive_young_list_length()) {
double now = os::elapsedTime();
double init_prediction_ms = g1_policy->predict_init_time_ms();
jlong sleep_time_ms = mmu_tracker->when_ms(now, init_prediction_ms);
os::sleep(current_thread, sleep_time_ms, false);
}
// We don't have to skip here if we've been asked to restart, because
// in the worst case we just enqueue a new VM operation to start a
// marking. Note that the init operation resets has_aborted()
CMCheckpointRootsInitialClosure init_cl(_cm);
strcpy(verbose_str, "GC initial-mark");
VM_CGC_Operation op(&init_cl, verbose_str);
VMThread::execute(&op);
}
int iter = 0;
do {
iter++;
if (!cm()->has_aborted()) {
_cm->markFromRoots();
} else {
if (TraceConcurrentMark)
gclog_or_tty->print_cr("CM-skip-mark-from-roots");
}
double mark_end_time = os::elapsedVTime();
double mark_end_sec = os::elapsedTime();
_vtime_mark_accum += (mark_end_time - cycle_start);
if (!cm()->has_aborted()) {
if (g1_policy->adaptive_young_list_length()) {
double now = os::elapsedTime();
double remark_prediction_ms = g1_policy->predict_remark_time_ms();
jlong sleep_time_ms = mmu_tracker->when_ms(now, remark_prediction_ms);
os::sleep(current_thread, sleep_time_ms, false);
}
if (PrintGC) {
gclog_or_tty->date_stamp(PrintGCDateStamps);
gclog_or_tty->stamp(PrintGCTimeStamps);
gclog_or_tty->print_cr("[GC concurrent-mark-end, %1.7lf sec]",
mark_end_sec - mark_start_sec);
}
CMCheckpointRootsFinalClosure final_cl(_cm);
sprintf(verbose_str, "GC remark");
VM_CGC_Operation op(&final_cl, verbose_str);
VMThread::execute(&op);
} else {
if (TraceConcurrentMark)
gclog_or_tty->print_cr("CM-skip-remark");
}
if (cm()->restart_for_overflow() &&
G1TraceMarkStackOverflow) {
gclog_or_tty->print_cr("Restarting conc marking because of MS overflow "
"in remark (restart #%d).", iter);
}
if (cm()->restart_for_overflow()) {
if (PrintGC) {
gclog_or_tty->date_stamp(PrintGCDateStamps);
gclog_or_tty->stamp(PrintGCTimeStamps);
gclog_or_tty->print_cr("[GC concurrent-mark-restart-for-overflow]");
}
}
} while (cm()->restart_for_overflow());
double counting_start_time = os::elapsedVTime();
// YSR: These look dubious (i.e. redundant) !!! FIX ME
slt()->manipulatePLL(SurrogateLockerThread::acquirePLL);
slt()->manipulatePLL(SurrogateLockerThread::releaseAndNotifyPLL);
if (!cm()->has_aborted()) {
double count_start_sec = os::elapsedTime();
if (PrintGC) {
gclog_or_tty->date_stamp(PrintGCDateStamps);
gclog_or_tty->stamp(PrintGCTimeStamps);
gclog_or_tty->print_cr("[GC concurrent-count-start]");
}
_sts.join();
_cm->calcDesiredRegions();
_sts.leave();
if (!cm()->has_aborted()) {
double count_end_sec = os::elapsedTime();
if (PrintGC) {
gclog_or_tty->date_stamp(PrintGCDateStamps);
gclog_or_tty->stamp(PrintGCTimeStamps);
gclog_or_tty->print_cr("[GC concurrent-count-end, %1.7lf]",
count_end_sec - count_start_sec);
}
}
} else {
if (TraceConcurrentMark) gclog_or_tty->print_cr("CM-skip-end-game");
}
double end_time = os::elapsedVTime();
_vtime_count_accum += (end_time - counting_start_time);
// Update the total virtual time before doing this, since it will try
// to measure it to get the vtime for this marking. We purposely
// neglect the presumably-short "completeCleanup" phase here.
_vtime_accum = (end_time - _vtime_start);
if (!cm()->has_aborted()) {
if (g1_policy->adaptive_young_list_length()) {
double now = os::elapsedTime();
double cleanup_prediction_ms = g1_policy->predict_cleanup_time_ms();
jlong sleep_time_ms = mmu_tracker->when_ms(now, cleanup_prediction_ms);
os::sleep(current_thread, sleep_time_ms, false);
}
CMCleanUp cl_cl(_cm);
sprintf(verbose_str, "GC cleanup");
VM_CGC_Operation op(&cl_cl, verbose_str);
VMThread::execute(&op);
} else {
if (TraceConcurrentMark) gclog_or_tty->print_cr("CM-skip-cleanup");
G1CollectedHeap::heap()->set_marking_complete();
}
if (!cm()->has_aborted()) {
double cleanup_start_sec = os::elapsedTime();
if (PrintGC) {
gclog_or_tty->date_stamp(PrintGCDateStamps);
gclog_or_tty->stamp(PrintGCTimeStamps);
gclog_or_tty->print_cr("[GC concurrent-cleanup-start]");
}
// Now do the remainder of the cleanup operation.
_sts.join();
_cm->completeCleanup();
if (!cm()->has_aborted()) {
g1_policy->record_concurrent_mark_cleanup_completed();
double cleanup_end_sec = os::elapsedTime();
if (PrintGC) {
gclog_or_tty->date_stamp(PrintGCDateStamps);
gclog_or_tty->stamp(PrintGCTimeStamps);
gclog_or_tty->print_cr("[GC concurrent-cleanup-end, %1.7lf]",
cleanup_end_sec - cleanup_start_sec);
}
}
_sts.leave();
}
// We're done: no more unclean regions coming.
G1CollectedHeap::heap()->set_unclean_regions_coming(false);
if (cm()->has_aborted()) {
if (PrintGC) {
gclog_or_tty->date_stamp(PrintGCDateStamps);
gclog_or_tty->stamp(PrintGCTimeStamps);
gclog_or_tty->print_cr("[GC concurrent-mark-abort]");
}
}
_sts.join();
_cm->disable_co_trackers();
_sts.leave();
// we now want to allow clearing of the marking bitmap to be
// suspended by a collection pause.
_sts.join();
_cm->clearNextBitmap();
_sts.leave();
}
}
assert(_should_terminate, "just checking");
terminate();
}
void ConcurrentMarkThread::yield() {
if (TraceConcurrentMark) gclog_or_tty->print_cr("CM-yield");
_sts.yield("Concurrent Mark");
if (TraceConcurrentMark) gclog_or_tty->print_cr("CM-yield-end");
}
void ConcurrentMarkThread::stop() {
// it is ok to take late safepoints here, if needed
MutexLockerEx mu(Terminator_lock);
_should_terminate = true;
while (!_has_terminated) {
Terminator_lock->wait();
}
if (TraceConcurrentMark) gclog_or_tty->print_cr("CM-stop");
}
void ConcurrentMarkThread::print() {
gclog_or_tty->print("\"Concurrent Mark GC Thread\" ");
Thread::print();
gclog_or_tty->cr();
}
void ConcurrentMarkThread::sleepBeforeNextCycle() {
clear_in_progress();
// We join here because we don't want to do the "shouldConcurrentMark()"
// below while the world is otherwise stopped.
MutexLockerEx x(CGC_lock, Mutex::_no_safepoint_check_flag);
while (!started()) {
if (TraceConcurrentMark) gclog_or_tty->print_cr("CM-sleeping");
CGC_lock->wait(Mutex::_no_safepoint_check_flag);
}
set_in_progress();
clear_started();
if (TraceConcurrentMark) gclog_or_tty->print_cr("CM-starting");
return;
}
// Note: this method, although exported by the ConcurrentMarkSweepThread,
// which is a non-JavaThread, can only be called by a JavaThread.
// Currently this is done at vm creation time (post-vm-init) by the
// main/Primordial (Java)Thread.
// XXX Consider changing this in the future to allow the CMS thread
// itself to create this thread?
void ConcurrentMarkThread::makeSurrogateLockerThread(TRAPS) {
assert(_slt == NULL, "SLT already created");
_slt = SurrogateLockerThread::make(THREAD);
}

View File

@ -0,0 +1,84 @@
/*
* Copyright 2001-2007 Sun Microsystems, Inc. All Rights Reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License version 2 only, as
* published by the Free Software Foundation.
*
* This code is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
* version 2 for more details (a copy is included in the LICENSE file that
* accompanied this code).
*
* You should have received a copy of the GNU General Public License version
* 2 along with this work; if not, write to the Free Software Foundation,
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
*
* Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
* CA 95054 USA or visit www.sun.com if you need additional information or
* have any questions.
*
*/
// The Concurrent Mark GC Thread (could be several in the future).
// This is copied from the Concurrent Mark Sweep GC Thread
// Still under construction.
class ConcurrentMark;
class ConcurrentMarkThread: public ConcurrentGCThread {
friend class VMStructs;
double _vtime_start; // Initial virtual time.
double _vtime_accum; // Accumulated virtual time.
double _vtime_mark_accum;
double _vtime_count_accum;
public:
virtual void run();
private:
ConcurrentMark* _cm;
bool _started;
bool _in_progress;
void sleepBeforeNextCycle();
static SurrogateLockerThread* _slt;
public:
// Constructor
ConcurrentMarkThread(ConcurrentMark* cm);
static void makeSurrogateLockerThread(TRAPS);
static SurrogateLockerThread* slt() { return _slt; }
// Printing
void print();
// Total virtual time so far.
double vtime_accum();
// Marking virtual time so far
double vtime_mark_accum();
// Counting virtual time so far.
double vtime_count_accum() { return _vtime_count_accum; }
ConcurrentMark* cm() { return _cm; }
void set_started() { _started = true; }
void clear_started() { _started = false; }
bool started() { return _started; }
void set_in_progress() { _in_progress = true; }
void clear_in_progress() { _in_progress = false; }
bool in_progress() { return _in_progress; }
// Yield for GC
void yield();
// shutdown
static void stop();
};

View File

@ -0,0 +1,33 @@
/*
* Copyright 2001-2007 Sun Microsystems, Inc. All Rights Reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License version 2 only, as
* published by the Free Software Foundation.
*
* This code is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
* version 2 for more details (a copy is included in the LICENSE file that
* accompanied this code).
*
* You should have received a copy of the GNU General Public License version
* 2 along with this work; if not, write to the Free Software Foundation,
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
*
* Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
* CA 95054 USA or visit www.sun.com if you need additional information or
* have any questions.
*
*/
// Total virtual time so far.
inline double ConcurrentMarkThread::vtime_accum() {
return _vtime_accum + _cm->all_task_accum_vtime();
}
// Marking virtual time so far
inline double ConcurrentMarkThread::vtime_mark_accum() {
return _vtime_mark_accum + _cm->all_task_accum_vtime();
}

View File

@ -0,0 +1,191 @@
/*
* Copyright 2001-2007 Sun Microsystems, Inc. All Rights Reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License version 2 only, as
* published by the Free Software Foundation.
*
* This code is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
* version 2 for more details (a copy is included in the LICENSE file that
* accompanied this code).
*
* You should have received a copy of the GNU General Public License version
* 2 along with this work; if not, write to the Free Software Foundation,
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
*
* Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
* CA 95054 USA or visit www.sun.com if you need additional information or
* have any questions.
*
*/
#include "incls/_precompiled.incl"
#include "incls/_concurrentZFThread.cpp.incl"
// ======= Concurrent Zero-Fill Thread ========
// The CM thread is created when the G1 garbage collector is used
int ConcurrentZFThread::_region_allocs = 0;
int ConcurrentZFThread::_sync_zfs = 0;
int ConcurrentZFThread::_zf_waits = 0;
int ConcurrentZFThread::_regions_filled = 0;
ConcurrentZFThread::ConcurrentZFThread() :
ConcurrentGCThread(),
_co_tracker(G1ZFGroup)
{
create_and_start();
}
void ConcurrentZFThread::wait_for_ZF_completed(HeapRegion* hr) {
assert(ZF_mon->owned_by_self(), "Precondition.");
note_zf_wait();
while (hr->zero_fill_state() == HeapRegion::ZeroFilling) {
ZF_mon->wait(Mutex::_no_safepoint_check_flag);
}
}
void ConcurrentZFThread::processHeapRegion(HeapRegion* hr) {
assert(!Universe::heap()->is_gc_active(),
"This should not happen during GC.");
assert(hr != NULL, "Precondition");
// These are unlocked reads, but if this test is successful, then no
// other thread will attempt this zero filling. Only a GC thread can
// modify the ZF state of a region whose state is zero-filling, and this
// should only happen while the ZF thread is locking out GC.
if (hr->zero_fill_state() == HeapRegion::ZeroFilling
&& hr->zero_filler() == Thread::current()) {
assert(hr->top() == hr->bottom(), "better be empty!");
assert(!hr->isHumongous(), "Only free regions on unclean list.");
Copy::fill_to_words(hr->bottom(), hr->capacity()/HeapWordSize);
note_region_filled();
}
}
void ConcurrentZFThread::run() {
initialize_in_thread();
Thread* thr_self = Thread::current();
_vtime_start = os::elapsedVTime();
wait_for_universe_init();
_co_tracker.enable();
_co_tracker.start();
G1CollectedHeap* g1 = G1CollectedHeap::heap();
_sts.join();
while (!_should_terminate) {
_sts.leave();
{
MutexLockerEx x(ZF_mon, Mutex::_no_safepoint_check_flag);
// This local variable will hold a region being zero-filled. This
// region will neither be on the unclean or zero-filled lists, and
// will not be available for allocation; thus, we might have an
// allocation fail, causing a full GC, because of this, but this is a
// price we will pay. (In future, we might want to make the fact
// that there's a region being zero-filled apparent to the G1 heap,
// which could then wait for it in this extreme case...)
HeapRegion* to_fill;
while (!g1->should_zf()
|| (to_fill = g1->pop_unclean_region_list_locked()) == NULL)
ZF_mon->wait(Mutex::_no_safepoint_check_flag);
while (to_fill->zero_fill_state() == HeapRegion::ZeroFilling)
ZF_mon->wait(Mutex::_no_safepoint_check_flag);
// So now to_fill is non-NULL and is not ZeroFilling. It might be
// Allocated or ZeroFilled. (The latter could happen if this thread
// starts the zero-filling of a region, but a GC intervenes and
// pushes new regions needing on the front of the filling on the
// front of the list.)
switch (to_fill->zero_fill_state()) {
case HeapRegion::Allocated:
to_fill = NULL;
break;
case HeapRegion::NotZeroFilled:
to_fill->set_zero_fill_in_progress(thr_self);
ZF_mon->unlock();
_sts.join();
processHeapRegion(to_fill);
_sts.leave();
ZF_mon->lock_without_safepoint_check();
if (to_fill->zero_fill_state() == HeapRegion::ZeroFilling
&& to_fill->zero_filler() == thr_self) {
to_fill->set_zero_fill_complete();
(void)g1->put_free_region_on_list_locked(to_fill);
}
break;
case HeapRegion::ZeroFilled:
(void)g1->put_free_region_on_list_locked(to_fill);
break;
case HeapRegion::ZeroFilling:
ShouldNotReachHere();
break;
}
}
_vtime_accum = (os::elapsedVTime() - _vtime_start);
_sts.join();
_co_tracker.update();
}
_co_tracker.update(false);
_sts.leave();
assert(_should_terminate, "just checking");
terminate();
}
bool ConcurrentZFThread::offer_yield() {
if (_sts.should_yield()) {
_sts.yield("Concurrent ZF");
return true;
} else {
return false;
}
}
void ConcurrentZFThread::stop() {
// it is ok to take late safepoints here, if needed
MutexLockerEx mu(Terminator_lock);
_should_terminate = true;
while (!_has_terminated) {
Terminator_lock->wait();
}
}
void ConcurrentZFThread::print() {
gclog_or_tty->print("\"Concurrent ZF Thread\" ");
Thread::print();
gclog_or_tty->cr();
}
double ConcurrentZFThread::_vtime_accum;
void ConcurrentZFThread::print_summary_info() {
gclog_or_tty->print("\nConcurrent Zero-Filling:\n");
gclog_or_tty->print(" Filled %d regions, used %5.2fs.\n",
_regions_filled,
vtime_accum());
gclog_or_tty->print(" Of %d region allocs, %d (%5.2f%%) required sync ZF,\n",
_region_allocs, _sync_zfs,
(_region_allocs > 0 ?
(float)_sync_zfs/(float)_region_allocs*100.0 :
0.0));
gclog_or_tty->print(" and %d (%5.2f%%) required a ZF wait.\n",
_zf_waits,
(_region_allocs > 0 ?
(float)_zf_waits/(float)_region_allocs*100.0 :
0.0));
}

View File

@ -0,0 +1,85 @@
/*
* Copyright 2001-2007 Sun Microsystems, Inc. All Rights Reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License version 2 only, as
* published by the Free Software Foundation.
*
* This code is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
* version 2 for more details (a copy is included in the LICENSE file that
* accompanied this code).
*
* You should have received a copy of the GNU General Public License version
* 2 along with this work; if not, write to the Free Software Foundation,
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
*
* Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
* CA 95054 USA or visit www.sun.com if you need additional information or
* have any questions.
*
*/
// The Concurrent ZF Thread. Performs concurrent zero-filling.
class ConcurrentZFThread: public ConcurrentGCThread {
friend class VMStructs;
friend class ZeroFillRegionClosure;
private:
// Zero fill the heap region.
void processHeapRegion(HeapRegion* r);
// Stats
// Allocation (protected by heap lock).
static int _region_allocs; // Number of regions allocated
static int _sync_zfs; // Synchronous zero-fills +
static int _zf_waits; // Wait for conc zero-fill completion.
// Number of regions CFZ thread fills.
static int _regions_filled;
COTracker _co_tracker;
double _vtime_start; // Initial virtual time.
// These are static because the "print_summary_info" method is, and
// it currently assumes there is only one ZF thread. We'll change when
// we need to.
static double _vtime_accum; // Initial virtual time.
static double vtime_accum() { return _vtime_accum; }
// Offer yield for GC. Returns true if yield occurred.
bool offer_yield();
public:
// Constructor
ConcurrentZFThread();
// Main loop.
virtual void run();
// Printing
void print();
// Waits until "r" has been zero-filled. Requires caller to hold the
// ZF_mon.
static void wait_for_ZF_completed(HeapRegion* r);
// Get or clear the current unclean region. Should be done
// while holding the ZF_needed_mon lock.
// shutdown
static void stop();
// Stats
static void note_region_alloc() {_region_allocs++; }
static void note_sync_zfs() { _sync_zfs++; }
static void note_zf_wait() { _zf_waits++; }
static void note_region_filled() { _regions_filled++; }
static void print_summary_info();
};

View File

@ -0,0 +1,308 @@
/*
* Copyright 2001-2007 Sun Microsystems, Inc. All Rights Reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License version 2 only, as
* published by the Free Software Foundation.
*
* This code is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
* version 2 for more details (a copy is included in the LICENSE file that
* accompanied this code).
*
* You should have received a copy of the GNU General Public License version
* 2 along with this work; if not, write to the Free Software Foundation,
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
*
* Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
* CA 95054 USA or visit www.sun.com if you need additional information or
* have any questions.
*
*/
# include "incls/_precompiled.incl"
# include "incls/_dirtyCardQueue.cpp.incl"
bool DirtyCardQueue::apply_closure(CardTableEntryClosure* cl,
bool consume,
size_t worker_i) {
bool res = true;
if (_buf != NULL) {
res = apply_closure_to_buffer(cl, _buf, _index, _sz,
consume,
(int) worker_i);
if (res && consume) _index = _sz;
}
return res;
}
bool DirtyCardQueue::apply_closure_to_buffer(CardTableEntryClosure* cl,
void** buf,
size_t index, size_t sz,
bool consume,
int worker_i) {
if (cl == NULL) return true;
for (size_t i = index; i < sz; i += oopSize) {
int ind = byte_index_to_index((int)i);
jbyte* card_ptr = (jbyte*)buf[ind];
if (card_ptr != NULL) {
// Set the entry to null, so we don't do it again (via the test
// above) if we reconsider this buffer.
if (consume) buf[ind] = NULL;
if (!cl->do_card_ptr(card_ptr, worker_i)) return false;
}
}
return true;
}
#ifdef _MSC_VER // the use of 'this' below gets a warning, make it go away
#pragma warning( disable:4355 ) // 'this' : used in base member initializer list
#endif // _MSC_VER
DirtyCardQueueSet::DirtyCardQueueSet() :
PtrQueueSet(true /*notify_when_complete*/),
_closure(NULL),
_shared_dirty_card_queue(this, true /*perm*/),
_free_ids(NULL),
_processed_buffers_mut(0), _processed_buffers_rs_thread(0)
{
_all_active = true;
}
size_t DirtyCardQueueSet::num_par_ids() {
return MAX2(ParallelGCThreads, (size_t)2);
}
void DirtyCardQueueSet::initialize(Monitor* cbl_mon, Mutex* fl_lock,
int max_completed_queue,
Mutex* lock) {
PtrQueueSet::initialize(cbl_mon, fl_lock, max_completed_queue);
set_buffer_size(DCQBarrierQueueBufferSize);
set_process_completed_threshold(DCQBarrierProcessCompletedThreshold);
_shared_dirty_card_queue.set_lock(lock);
_free_ids = new FreeIdSet((int) num_par_ids(), _cbl_mon);
bool b = _free_ids->claim_perm_id(0);
guarantee(b, "Must reserve id zero for concurrent refinement thread.");
}
void DirtyCardQueueSet::handle_zero_index_for_thread(JavaThread* t) {
t->dirty_card_queue().handle_zero_index();
}
void DirtyCardQueueSet::set_closure(CardTableEntryClosure* closure) {
_closure = closure;
}
void DirtyCardQueueSet::iterate_closure_all_threads(bool consume,
size_t worker_i) {
assert(SafepointSynchronize::is_at_safepoint(), "Must be at safepoint.");
for(JavaThread* t = Threads::first(); t; t = t->next()) {
bool b = t->dirty_card_queue().apply_closure(_closure, consume);
guarantee(b, "Should not be interrupted.");
}
bool b = shared_dirty_card_queue()->apply_closure(_closure,
consume,
worker_i);
guarantee(b, "Should not be interrupted.");
}
bool DirtyCardQueueSet::mut_process_buffer(void** buf) {
// Used to determine if we had already claimed a par_id
// before entering this method.
bool already_claimed = false;
// We grab the current JavaThread.
JavaThread* thread = JavaThread::current();
// We get the the number of any par_id that this thread
// might have already claimed.
int worker_i = thread->get_claimed_par_id();
// If worker_i is not -1 then the thread has already claimed
// a par_id. We make note of it using the already_claimed value
if (worker_i != -1) {
already_claimed = true;
} else {
// Otherwise we need to claim a par id
worker_i = _free_ids->claim_par_id();
// And store the par_id value in the thread
thread->set_claimed_par_id(worker_i);
}
bool b = false;
if (worker_i != -1) {
b = DirtyCardQueue::apply_closure_to_buffer(_closure, buf, 0,
_sz, true, worker_i);
if (b) Atomic::inc(&_processed_buffers_mut);
// If we had not claimed an id before entering the method
// then we must release the id.
if (!already_claimed) {
// we release the id
_free_ids->release_par_id(worker_i);
// and set the claimed_id in the thread to -1
thread->set_claimed_par_id(-1);
}
}
return b;
}
DirtyCardQueueSet::CompletedBufferNode*
DirtyCardQueueSet::get_completed_buffer_lock(int stop_at) {
CompletedBufferNode* nd = NULL;
MutexLockerEx x(_cbl_mon, Mutex::_no_safepoint_check_flag);
if ((int)_n_completed_buffers <= stop_at) {
_process_completed = false;
return NULL;
}
if (_completed_buffers_head != NULL) {
nd = _completed_buffers_head;
_completed_buffers_head = nd->next;
if (_completed_buffers_head == NULL)
_completed_buffers_tail = NULL;
_n_completed_buffers--;
}
debug_only(assert_completed_buffer_list_len_correct_locked());
return nd;
}
// We only do this in contexts where there is no concurrent enqueueing.
DirtyCardQueueSet::CompletedBufferNode*
DirtyCardQueueSet::get_completed_buffer_CAS() {
CompletedBufferNode* nd = _completed_buffers_head;
while (nd != NULL) {
CompletedBufferNode* next = nd->next;
CompletedBufferNode* result =
(CompletedBufferNode*)Atomic::cmpxchg_ptr(next,
&_completed_buffers_head,
nd);
if (result == nd) {
return result;
} else {
nd = _completed_buffers_head;
}
}
assert(_completed_buffers_head == NULL, "Loop post");
_completed_buffers_tail = NULL;
return NULL;
}
bool DirtyCardQueueSet::
apply_closure_to_completed_buffer_helper(int worker_i,
CompletedBufferNode* nd) {
if (nd != NULL) {
bool b =
DirtyCardQueue::apply_closure_to_buffer(_closure, nd->buf,
nd->index, _sz,
true, worker_i);
void** buf = nd->buf;
size_t index = nd->index;
delete nd;
if (b) {
deallocate_buffer(buf);
return true; // In normal case, go on to next buffer.
} else {
enqueue_complete_buffer(buf, index, true);
return false;
}
} else {
return false;
}
}
bool DirtyCardQueueSet::apply_closure_to_completed_buffer(int worker_i,
int stop_at,
bool with_CAS)
{
CompletedBufferNode* nd = NULL;
if (with_CAS) {
guarantee(stop_at == 0, "Precondition");
nd = get_completed_buffer_CAS();
} else {
nd = get_completed_buffer_lock(stop_at);
}
bool res = apply_closure_to_completed_buffer_helper(worker_i, nd);
if (res) _processed_buffers_rs_thread++;
return res;
}
void DirtyCardQueueSet::apply_closure_to_all_completed_buffers() {
CompletedBufferNode* nd = _completed_buffers_head;
while (nd != NULL) {
bool b =
DirtyCardQueue::apply_closure_to_buffer(_closure, nd->buf, 0, _sz,
false);
guarantee(b, "Should not stop early.");
nd = nd->next;
}
}
void DirtyCardQueueSet::abandon_logs() {
assert(SafepointSynchronize::is_at_safepoint(), "Must be at safepoint.");
CompletedBufferNode* buffers_to_delete = NULL;
{
MutexLockerEx x(_cbl_mon, Mutex::_no_safepoint_check_flag);
while (_completed_buffers_head != NULL) {
CompletedBufferNode* nd = _completed_buffers_head;
_completed_buffers_head = nd->next;
nd->next = buffers_to_delete;
buffers_to_delete = nd;
}
_n_completed_buffers = 0;
_completed_buffers_tail = NULL;
debug_only(assert_completed_buffer_list_len_correct_locked());
}
while (buffers_to_delete != NULL) {
CompletedBufferNode* nd = buffers_to_delete;
buffers_to_delete = nd->next;
deallocate_buffer(nd->buf);
delete nd;
}
// Since abandon is done only at safepoints, we can safely manipulate
// these queues.
for (JavaThread* t = Threads::first(); t; t = t->next()) {
t->dirty_card_queue().reset();
}
shared_dirty_card_queue()->reset();
}
void DirtyCardQueueSet::concatenate_logs() {
// Iterate over all the threads, if we find a partial log add it to
// the global list of logs. Temporarily turn off the limit on the number
// of outstanding buffers.
int save_max_completed_queue = _max_completed_queue;
_max_completed_queue = max_jint;
assert(SafepointSynchronize::is_at_safepoint(), "Must be at safepoint.");
for (JavaThread* t = Threads::first(); t; t = t->next()) {
DirtyCardQueue& dcq = t->dirty_card_queue();
if (dcq.size() != 0) {
void **buf = t->dirty_card_queue().get_buf();
// We must NULL out the unused entries, then enqueue.
for (size_t i = 0; i < t->dirty_card_queue().get_index(); i += oopSize) {
buf[PtrQueue::byte_index_to_index((int)i)] = NULL;
}
enqueue_complete_buffer(dcq.get_buf(), dcq.get_index());
dcq.reinitialize();
}
}
if (_shared_dirty_card_queue.size() != 0) {
enqueue_complete_buffer(_shared_dirty_card_queue.get_buf(),
_shared_dirty_card_queue.get_index());
_shared_dirty_card_queue.reinitialize();
}
// Restore the completed buffer queue limit.
_max_completed_queue = save_max_completed_queue;
}

View File

@ -0,0 +1,152 @@
/*
* Copyright 2001-2007 Sun Microsystems, Inc. All Rights Reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License version 2 only, as
* published by the Free Software Foundation.
*
* This code is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
* version 2 for more details (a copy is included in the LICENSE file that
* accompanied this code).
*
* You should have received a copy of the GNU General Public License version
* 2 along with this work; if not, write to the Free Software Foundation,
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
*
* Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
* CA 95054 USA or visit www.sun.com if you need additional information or
* have any questions.
*
*/
class FreeIdSet;
// A closure class for processing card table entries. Note that we don't
// require these closure objects to be stack-allocated.
class CardTableEntryClosure: public CHeapObj {
public:
// Process the card whose card table entry is "card_ptr". If returns
// "false", terminate the iteration early.
virtual bool do_card_ptr(jbyte* card_ptr, int worker_i = 0) = 0;
};
// A ptrQueue whose elements are "oops", pointers to object heads.
class DirtyCardQueue: public PtrQueue {
public:
DirtyCardQueue(PtrQueueSet* qset_, bool perm = false) :
PtrQueue(qset_, perm)
{
// Dirty card queues are always active.
_active = true;
}
// Apply the closure to all elements, and reset the index to make the
// buffer empty. If a closure application returns "false", return
// "false" immediately, halting the iteration. If "consume" is true,
// deletes processed entries from logs.
bool apply_closure(CardTableEntryClosure* cl,
bool consume = true,
size_t worker_i = 0);
// Apply the closure to all elements of "buf", down to "index"
// (inclusive.) If returns "false", then a closure application returned
// "false", and we return immediately. If "consume" is true, entries are
// set to NULL as they are processed, so they will not be processed again
// later.
static bool apply_closure_to_buffer(CardTableEntryClosure* cl,
void** buf, size_t index, size_t sz,
bool consume = true,
int worker_i = 0);
void **get_buf() { return _buf;}
void set_buf(void **buf) {_buf = buf;}
size_t get_index() { return _index;}
void reinitialize() { _buf = 0; _sz = 0; _index = 0;}
};
class DirtyCardQueueSet: public PtrQueueSet {
CardTableEntryClosure* _closure;
DirtyCardQueue _shared_dirty_card_queue;
// Override.
bool mut_process_buffer(void** buf);
// Protected by the _cbl_mon.
FreeIdSet* _free_ids;
// The number of completed buffers processed by mutator and rs thread,
// respectively.
jint _processed_buffers_mut;
jint _processed_buffers_rs_thread;
public:
DirtyCardQueueSet();
void initialize(Monitor* cbl_mon, Mutex* fl_lock,
int max_completed_queue = 0,
Mutex* lock = NULL);
// The number of parallel ids that can be claimed to allow collector or
// mutator threads to do card-processing work.
static size_t num_par_ids();
static void handle_zero_index_for_thread(JavaThread* t);
// Register "blk" as "the closure" for all queues. Only one such closure
// is allowed. The "apply_closure_to_completed_buffer" method will apply
// this closure to a completed buffer, and "iterate_closure_all_threads"
// applies it to partially-filled buffers (the latter should only be done
// with the world stopped).
void set_closure(CardTableEntryClosure* closure);
// If there is a registered closure for buffers, apply it to all entries
// in all currently-active buffers. This should only be applied at a
// safepoint. (Currently must not be called in parallel; this should
// change in the future.) If "consume" is true, processed entries are
// discarded.
void iterate_closure_all_threads(bool consume = true,
size_t worker_i = 0);
// If there exists some completed buffer, pop it, then apply the
// registered closure to all its elements, nulling out those elements
// processed. If all elements are processed, returns "true". If no
// completed buffers exist, returns false. If a completed buffer exists,
// but is only partially completed before a "yield" happens, the
// partially completed buffer (with its processed elements set to NULL)
// is returned to the completed buffer set, and this call returns false.
bool apply_closure_to_completed_buffer(int worker_i = 0,
int stop_at = 0,
bool with_CAS = false);
bool apply_closure_to_completed_buffer_helper(int worker_i,
CompletedBufferNode* nd);
CompletedBufferNode* get_completed_buffer_CAS();
CompletedBufferNode* get_completed_buffer_lock(int stop_at);
// Applies the current closure to all completed buffers,
// non-consumptively.
void apply_closure_to_all_completed_buffers();
DirtyCardQueue* shared_dirty_card_queue() {
return &_shared_dirty_card_queue;
}
// If a full collection is happening, reset partial logs, and ignore
// completed ones: the full collection will make them all irrelevant.
void abandon_logs();
// If any threads have partial logs, add them to the global list of logs.
void concatenate_logs();
void clear_n_completed_buffers() { _n_completed_buffers = 0;}
jint processed_buffers_mut() {
return _processed_buffers_mut;
}
jint processed_buffers_rs_thread() {
return _processed_buffers_rs_thread;
}
};

View File

@ -0,0 +1,628 @@
/*
* Copyright 2001-2007 Sun Microsystems, Inc. All Rights Reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License version 2 only, as
* published by the Free Software Foundation.
*
* This code is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
* version 2 for more details (a copy is included in the LICENSE file that
* accompanied this code).
*
* You should have received a copy of the GNU General Public License version
* 2 along with this work; if not, write to the Free Software Foundation,
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
*
* Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
* CA 95054 USA or visit www.sun.com if you need additional information or
* have any questions.
*
*/
#include "incls/_precompiled.incl"
#include "incls/_g1BlockOffsetTable.cpp.incl"
//////////////////////////////////////////////////////////////////////
// G1BlockOffsetSharedArray
//////////////////////////////////////////////////////////////////////
G1BlockOffsetSharedArray::G1BlockOffsetSharedArray(MemRegion reserved,
size_t init_word_size) :
_reserved(reserved), _end(NULL)
{
size_t size = compute_size(reserved.word_size());
ReservedSpace rs(ReservedSpace::allocation_align_size_up(size));
if (!rs.is_reserved()) {
vm_exit_during_initialization("Could not reserve enough space for heap offset array");
}
if (!_vs.initialize(rs, 0)) {
vm_exit_during_initialization("Could not reserve enough space for heap offset array");
}
_offset_array = (u_char*)_vs.low_boundary();
resize(init_word_size);
if (TraceBlockOffsetTable) {
gclog_or_tty->print_cr("G1BlockOffsetSharedArray::G1BlockOffsetSharedArray: ");
gclog_or_tty->print_cr(" "
" rs.base(): " INTPTR_FORMAT
" rs.size(): " INTPTR_FORMAT
" rs end(): " INTPTR_FORMAT,
rs.base(), rs.size(), rs.base() + rs.size());
gclog_or_tty->print_cr(" "
" _vs.low_boundary(): " INTPTR_FORMAT
" _vs.high_boundary(): " INTPTR_FORMAT,
_vs.low_boundary(),
_vs.high_boundary());
}
}
void G1BlockOffsetSharedArray::resize(size_t new_word_size) {
assert(new_word_size <= _reserved.word_size(), "Resize larger than reserved");
size_t new_size = compute_size(new_word_size);
size_t old_size = _vs.committed_size();
size_t delta;
char* high = _vs.high();
_end = _reserved.start() + new_word_size;
if (new_size > old_size) {
delta = ReservedSpace::page_align_size_up(new_size - old_size);
assert(delta > 0, "just checking");
if (!_vs.expand_by(delta)) {
// Do better than this for Merlin
vm_exit_out_of_memory(delta, "offset table expansion");
}
assert(_vs.high() == high + delta, "invalid expansion");
// Initialization of the contents is left to the
// G1BlockOffsetArray that uses it.
} else {
delta = ReservedSpace::page_align_size_down(old_size - new_size);
if (delta == 0) return;
_vs.shrink_by(delta);
assert(_vs.high() == high - delta, "invalid expansion");
}
}
bool G1BlockOffsetSharedArray::is_card_boundary(HeapWord* p) const {
assert(p >= _reserved.start(), "just checking");
size_t delta = pointer_delta(p, _reserved.start());
return (delta & right_n_bits(LogN_words)) == (size_t)NoBits;
}
//////////////////////////////////////////////////////////////////////
// G1BlockOffsetArray
//////////////////////////////////////////////////////////////////////
G1BlockOffsetArray::G1BlockOffsetArray(G1BlockOffsetSharedArray* array,
MemRegion mr, bool init_to_zero) :
G1BlockOffsetTable(mr.start(), mr.end()),
_unallocated_block(_bottom),
_array(array), _csp(NULL),
_init_to_zero(init_to_zero) {
assert(_bottom <= _end, "arguments out of order");
if (!_init_to_zero) {
// initialize cards to point back to mr.start()
set_remainder_to_point_to_start(mr.start() + N_words, mr.end());
_array->set_offset_array(0, 0); // set first card to 0
}
}
void G1BlockOffsetArray::set_space(Space* sp) {
_sp = sp;
_csp = sp->toContiguousSpace();
}
// The arguments follow the normal convention of denoting
// a right-open interval: [start, end)
void
G1BlockOffsetArray:: set_remainder_to_point_to_start(HeapWord* start, HeapWord* end) {
if (start >= end) {
// The start address is equal to the end address (or to
// the right of the end address) so there are not cards
// that need to be updated..
return;
}
// Write the backskip value for each region.
//
// offset
// card 2nd 3rd
// | +- 1st | |
// v v v v
// +-+-+-+-+-+-+-+-+-+-+-+-+-+-+ +-+-+-+-+-+-+-+-+-+-+-
// |x|0|0|0|0|0|0|0|1|1|1|1|1|1| ... |1|1|1|1|2|2|2|2|2|2| ...
// +-+-+-+-+-+-+-+-+-+-+-+-+-+-+ +-+-+-+-+-+-+-+-+-+-+-
// 11 19 75
// 12
//
// offset card is the card that points to the start of an object
// x - offset value of offset card
// 1st - start of first logarithmic region
// 0 corresponds to logarithmic value N_words + 0 and 2**(3 * 0) = 1
// 2nd - start of second logarithmic region
// 1 corresponds to logarithmic value N_words + 1 and 2**(3 * 1) = 8
// 3rd - start of third logarithmic region
// 2 corresponds to logarithmic value N_words + 2 and 2**(3 * 2) = 64
//
// integer below the block offset entry is an example of
// the index of the entry
//
// Given an address,
// Find the index for the address
// Find the block offset table entry
// Convert the entry to a back slide
// (e.g., with today's, offset = 0x81 =>
// back slip = 2**(3*(0x81 - N_words)) = 2**3) = 8
// Move back N (e.g., 8) entries and repeat with the
// value of the new entry
//
size_t start_card = _array->index_for(start);
size_t end_card = _array->index_for(end-1);
assert(start ==_array->address_for_index(start_card), "Precondition");
assert(end ==_array->address_for_index(end_card)+N_words, "Precondition");
set_remainder_to_point_to_start_incl(start_card, end_card); // closed interval
}
// Unlike the normal convention in this code, the argument here denotes
// a closed, inclusive interval: [start_card, end_card], cf set_remainder_to_point_to_start()
// above.
void
G1BlockOffsetArray::set_remainder_to_point_to_start_incl(size_t start_card, size_t end_card) {
if (start_card > end_card) {
return;
}
assert(start_card > _array->index_for(_bottom), "Cannot be first card");
assert(_array->offset_array(start_card-1) <= N_words,
"Offset card has an unexpected value");
size_t start_card_for_region = start_card;
u_char offset = max_jubyte;
for (int i = 0; i < BlockOffsetArray::N_powers; i++) {
// -1 so that the the card with the actual offset is counted. Another -1
// so that the reach ends in this region and not at the start
// of the next.
size_t reach = start_card - 1 + (BlockOffsetArray::power_to_cards_back(i+1) - 1);
offset = N_words + i;
if (reach >= end_card) {
_array->set_offset_array(start_card_for_region, end_card, offset);
start_card_for_region = reach + 1;
break;
}
_array->set_offset_array(start_card_for_region, reach, offset);
start_card_for_region = reach + 1;
}
assert(start_card_for_region > end_card, "Sanity check");
DEBUG_ONLY(check_all_cards(start_card, end_card);)
}
// The block [blk_start, blk_end) has been allocated;
// adjust the block offset table to represent this information;
// right-open interval: [blk_start, blk_end)
void
G1BlockOffsetArray::alloc_block(HeapWord* blk_start, HeapWord* blk_end) {
mark_block(blk_start, blk_end);
allocated(blk_start, blk_end);
}
// Adjust BOT to show that a previously whole block has been split
// into two.
void G1BlockOffsetArray::split_block(HeapWord* blk, size_t blk_size,
size_t left_blk_size) {
// Verify that the BOT shows [blk, blk + blk_size) to be one block.
verify_single_block(blk, blk_size);
// Update the BOT to indicate that [blk + left_blk_size, blk + blk_size)
// is one single block.
mark_block(blk + left_blk_size, blk + blk_size);
}
// Action_mark - update the BOT for the block [blk_start, blk_end).
// Current typical use is for splitting a block.
// Action_single - udpate the BOT for an allocation.
// Action_verify - BOT verification.
void G1BlockOffsetArray::do_block_internal(HeapWord* blk_start,
HeapWord* blk_end,
Action action) {
assert(Universe::heap()->is_in_reserved(blk_start),
"reference must be into the heap");
assert(Universe::heap()->is_in_reserved(blk_end-1),
"limit must be within the heap");
// This is optimized to make the test fast, assuming we only rarely
// cross boundaries.
uintptr_t end_ui = (uintptr_t)(blk_end - 1);
uintptr_t start_ui = (uintptr_t)blk_start;
// Calculate the last card boundary preceding end of blk
intptr_t boundary_before_end = (intptr_t)end_ui;
clear_bits(boundary_before_end, right_n_bits(LogN));
if (start_ui <= (uintptr_t)boundary_before_end) {
// blk starts at or crosses a boundary
// Calculate index of card on which blk begins
size_t start_index = _array->index_for(blk_start);
// Index of card on which blk ends
size_t end_index = _array->index_for(blk_end - 1);
// Start address of card on which blk begins
HeapWord* boundary = _array->address_for_index(start_index);
assert(boundary <= blk_start, "blk should start at or after boundary");
if (blk_start != boundary) {
// blk starts strictly after boundary
// adjust card boundary and start_index forward to next card
boundary += N_words;
start_index++;
}
assert(start_index <= end_index, "monotonicity of index_for()");
assert(boundary <= (HeapWord*)boundary_before_end, "tautology");
switch (action) {
case Action_mark: {
if (init_to_zero()) {
_array->set_offset_array(start_index, boundary, blk_start);
break;
} // Else fall through to the next case
}
case Action_single: {
_array->set_offset_array(start_index, boundary, blk_start);
// We have finished marking the "offset card". We need to now
// mark the subsequent cards that this blk spans.
if (start_index < end_index) {
HeapWord* rem_st = _array->address_for_index(start_index) + N_words;
HeapWord* rem_end = _array->address_for_index(end_index) + N_words;
set_remainder_to_point_to_start(rem_st, rem_end);
}
break;
}
case Action_check: {
_array->check_offset_array(start_index, boundary, blk_start);
// We have finished checking the "offset card". We need to now
// check the subsequent cards that this blk spans.
check_all_cards(start_index + 1, end_index);
break;
}
default:
ShouldNotReachHere();
}
}
}
// The card-interval [start_card, end_card] is a closed interval; this
// is an expensive check -- use with care and only under protection of
// suitable flag.
void G1BlockOffsetArray::check_all_cards(size_t start_card, size_t end_card) const {
if (end_card < start_card) {
return;
}
guarantee(_array->offset_array(start_card) == N_words, "Wrong value in second card");
for (size_t c = start_card + 1; c <= end_card; c++ /* yeah! */) {
u_char entry = _array->offset_array(c);
if (c - start_card > BlockOffsetArray::power_to_cards_back(1)) {
guarantee(entry > N_words, "Should be in logarithmic region");
}
size_t backskip = BlockOffsetArray::entry_to_cards_back(entry);
size_t landing_card = c - backskip;
guarantee(landing_card >= (start_card - 1), "Inv");
if (landing_card >= start_card) {
guarantee(_array->offset_array(landing_card) <= entry, "monotonicity");
} else {
guarantee(landing_card == start_card - 1, "Tautology");
guarantee(_array->offset_array(landing_card) <= N_words, "Offset value");
}
}
}
// The range [blk_start, blk_end) represents a single contiguous block
// of storage; modify the block offset table to represent this
// information; Right-open interval: [blk_start, blk_end)
// NOTE: this method does _not_ adjust _unallocated_block.
void
G1BlockOffsetArray::single_block(HeapWord* blk_start, HeapWord* blk_end) {
do_block_internal(blk_start, blk_end, Action_single);
}
// Mark the BOT such that if [blk_start, blk_end) straddles a card
// boundary, the card following the first such boundary is marked
// with the appropriate offset.
// NOTE: this method does _not_ adjust _unallocated_block or
// any cards subsequent to the first one.
void
G1BlockOffsetArray::mark_block(HeapWord* blk_start, HeapWord* blk_end) {
do_block_internal(blk_start, blk_end, Action_mark);
}
void G1BlockOffsetArray::join_blocks(HeapWord* blk1, HeapWord* blk2) {
HeapWord* blk1_start = Universe::heap()->block_start(blk1);
HeapWord* blk2_start = Universe::heap()->block_start(blk2);
assert(blk1 == blk1_start && blk2 == blk2_start,
"Must be block starts.");
assert(blk1 + _sp->block_size(blk1) == blk2, "Must be contiguous.");
size_t blk1_start_index = _array->index_for(blk1);
size_t blk2_start_index = _array->index_for(blk2);
assert(blk1_start_index <= blk2_start_index, "sanity");
HeapWord* blk2_card_start = _array->address_for_index(blk2_start_index);
if (blk2 == blk2_card_start) {
// blk2 starts a card. Does blk1 start on the prevous card, or futher
// back?
assert(blk1_start_index < blk2_start_index, "must be lower card.");
if (blk1_start_index + 1 == blk2_start_index) {
// previous card; new value for blk2 card is size of blk1.
_array->set_offset_array(blk2_start_index, (u_char) _sp->block_size(blk1));
} else {
// Earlier card; go back a card.
_array->set_offset_array(blk2_start_index, N_words);
}
} else {
// blk2 does not start a card. Does it cross a card? If not, nothing
// to do.
size_t blk2_end_index =
_array->index_for(blk2 + _sp->block_size(blk2) - 1);
assert(blk2_end_index >= blk2_start_index, "sanity");
if (blk2_end_index > blk2_start_index) {
// Yes, it crosses a card. The value for the next card must change.
if (blk1_start_index + 1 == blk2_start_index) {
// previous card; new value for second blk2 card is size of blk1.
_array->set_offset_array(blk2_start_index + 1,
(u_char) _sp->block_size(blk1));
} else {
// Earlier card; go back a card.
_array->set_offset_array(blk2_start_index + 1, N_words);
}
}
}
}
HeapWord* G1BlockOffsetArray::block_start_unsafe(const void* addr) {
assert(_bottom <= addr && addr < _end,
"addr must be covered by this Array");
// Must read this exactly once because it can be modified by parallel
// allocation.
HeapWord* ub = _unallocated_block;
if (BlockOffsetArrayUseUnallocatedBlock && addr >= ub) {
assert(ub < _end, "tautology (see above)");
return ub;
}
// Otherwise, find the block start using the table.
HeapWord* q = block_at_or_preceding(addr, false, 0);
return forward_to_block_containing_addr(q, addr);
}
// This duplicates a little code from the above: unavoidable.
HeapWord*
G1BlockOffsetArray::block_start_unsafe_const(const void* addr) const {
assert(_bottom <= addr && addr < _end,
"addr must be covered by this Array");
// Must read this exactly once because it can be modified by parallel
// allocation.
HeapWord* ub = _unallocated_block;
if (BlockOffsetArrayUseUnallocatedBlock && addr >= ub) {
assert(ub < _end, "tautology (see above)");
return ub;
}
// Otherwise, find the block start using the table.
HeapWord* q = block_at_or_preceding(addr, false, 0);
HeapWord* n = q + _sp->block_size(q);
return forward_to_block_containing_addr_const(q, n, addr);
}
HeapWord*
G1BlockOffsetArray::forward_to_block_containing_addr_slow(HeapWord* q,
HeapWord* n,
const void* addr) {
// We're not in the normal case. We need to handle an important subcase
// here: LAB allocation. An allocation previously recorded in the
// offset table was actually a lab allocation, and was divided into
// several objects subsequently. Fix this situation as we answer the
// query, by updating entries as we cross them.
// If the fist object's end q is at the card boundary. Start refining
// with the corresponding card (the value of the entry will be basically
// set to 0). If the object crosses the boundary -- start from the next card.
size_t next_index = _array->index_for(n) + !_array->is_card_boundary(n);
HeapWord* next_boundary = _array->address_for_index(next_index);
if (csp() != NULL) {
if (addr >= csp()->top()) return csp()->top();
while (next_boundary < addr) {
while (n <= next_boundary) {
q = n;
oop obj = oop(q);
if (obj->klass() == NULL) return q;
n += obj->size();
}
assert(q <= next_boundary && n > next_boundary, "Consequence of loop");
// [q, n) is the block that crosses the boundary.
alloc_block_work2(&next_boundary, &next_index, q, n);
}
} else {
while (next_boundary < addr) {
while (n <= next_boundary) {
q = n;
oop obj = oop(q);
if (obj->klass() == NULL) return q;
n += _sp->block_size(q);
}
assert(q <= next_boundary && n > next_boundary, "Consequence of loop");
// [q, n) is the block that crosses the boundary.
alloc_block_work2(&next_boundary, &next_index, q, n);
}
}
return forward_to_block_containing_addr_const(q, n, addr);
}
HeapWord* G1BlockOffsetArray::block_start_careful(const void* addr) const {
assert(_array->offset_array(0) == 0, "objects can't cross covered areas");
assert(_bottom <= addr && addr < _end,
"addr must be covered by this Array");
// Must read this exactly once because it can be modified by parallel
// allocation.
HeapWord* ub = _unallocated_block;
if (BlockOffsetArrayUseUnallocatedBlock && addr >= ub) {
assert(ub < _end, "tautology (see above)");
return ub;
}
// Otherwise, find the block start using the table, but taking
// care (cf block_start_unsafe() above) not to parse any objects/blocks
// on the cards themsleves.
size_t index = _array->index_for(addr);
assert(_array->address_for_index(index) == addr,
"arg should be start of card");
HeapWord* q = (HeapWord*)addr;
uint offset;
do {
offset = _array->offset_array(index--);
q -= offset;
} while (offset == N_words);
assert(q <= addr, "block start should be to left of arg");
return q;
}
// Note that the committed size of the covered space may have changed,
// so the table size might also wish to change.
void G1BlockOffsetArray::resize(size_t new_word_size) {
HeapWord* new_end = _bottom + new_word_size;
if (_end < new_end && !init_to_zero()) {
// verify that the old and new boundaries are also card boundaries
assert(_array->is_card_boundary(_end),
"_end not a card boundary");
assert(_array->is_card_boundary(new_end),
"new _end would not be a card boundary");
// set all the newly added cards
_array->set_offset_array(_end, new_end, N_words);
}
_end = new_end; // update _end
}
void G1BlockOffsetArray::set_region(MemRegion mr) {
_bottom = mr.start();
_end = mr.end();
}
//
// threshold_
// | _index_
// v v
// +-------+-------+-------+-------+-------+
// | i-1 | i | i+1 | i+2 | i+3 |
// +-------+-------+-------+-------+-------+
// ( ^ ]
// block-start
//
void G1BlockOffsetArray::alloc_block_work2(HeapWord** threshold_, size_t* index_,
HeapWord* blk_start, HeapWord* blk_end) {
// For efficiency, do copy-in/copy-out.
HeapWord* threshold = *threshold_;
size_t index = *index_;
assert(blk_start != NULL && blk_end > blk_start,
"phantom block");
assert(blk_end > threshold, "should be past threshold");
assert(blk_start <= threshold, "blk_start should be at or before threshold")
assert(pointer_delta(threshold, blk_start) <= N_words,
"offset should be <= BlockOffsetSharedArray::N");
assert(Universe::heap()->is_in_reserved(blk_start),
"reference must be into the heap");
assert(Universe::heap()->is_in_reserved(blk_end-1),
"limit must be within the heap");
assert(threshold == _array->_reserved.start() + index*N_words,
"index must agree with threshold");
DEBUG_ONLY(size_t orig_index = index;)
// Mark the card that holds the offset into the block. Note
// that _next_offset_index and _next_offset_threshold are not
// updated until the end of this method.
_array->set_offset_array(index, threshold, blk_start);
// We need to now mark the subsequent cards that this blk spans.
// Index of card on which blk ends.
size_t end_index = _array->index_for(blk_end - 1);
// Are there more cards left to be updated?
if (index + 1 <= end_index) {
HeapWord* rem_st = _array->address_for_index(index + 1);
// Calculate rem_end this way because end_index
// may be the last valid index in the covered region.
HeapWord* rem_end = _array->address_for_index(end_index) + N_words;
set_remainder_to_point_to_start(rem_st, rem_end);
}
index = end_index + 1;
// Calculate threshold_ this way because end_index
// may be the last valid index in the covered region.
threshold = _array->address_for_index(end_index) + N_words;
assert(threshold >= blk_end, "Incorrect offset threshold");
// index_ and threshold_ updated here.
*threshold_ = threshold;
*index_ = index;
#ifdef ASSERT
// The offset can be 0 if the block starts on a boundary. That
// is checked by an assertion above.
size_t start_index = _array->index_for(blk_start);
HeapWord* boundary = _array->address_for_index(start_index);
assert((_array->offset_array(orig_index) == 0 &&
blk_start == boundary) ||
(_array->offset_array(orig_index) > 0 &&
_array->offset_array(orig_index) <= N_words),
"offset array should have been set");
for (size_t j = orig_index + 1; j <= end_index; j++) {
assert(_array->offset_array(j) > 0 &&
_array->offset_array(j) <=
(u_char) (N_words+BlockOffsetArray::N_powers-1),
"offset array should have been set");
}
#endif
}
//////////////////////////////////////////////////////////////////////
// G1BlockOffsetArrayContigSpace
//////////////////////////////////////////////////////////////////////
HeapWord*
G1BlockOffsetArrayContigSpace::block_start_unsafe(const void* addr) {
assert(_bottom <= addr && addr < _end,
"addr must be covered by this Array");
HeapWord* q = block_at_or_preceding(addr, true, _next_offset_index-1);
return forward_to_block_containing_addr(q, addr);
}
HeapWord*
G1BlockOffsetArrayContigSpace::
block_start_unsafe_const(const void* addr) const {
assert(_bottom <= addr && addr < _end,
"addr must be covered by this Array");
HeapWord* q = block_at_or_preceding(addr, true, _next_offset_index-1);
HeapWord* n = q + _sp->block_size(q);
return forward_to_block_containing_addr_const(q, n, addr);
}
G1BlockOffsetArrayContigSpace::
G1BlockOffsetArrayContigSpace(G1BlockOffsetSharedArray* array,
MemRegion mr) :
G1BlockOffsetArray(array, mr, true)
{
_next_offset_threshold = NULL;
_next_offset_index = 0;
}
HeapWord* G1BlockOffsetArrayContigSpace::initialize_threshold() {
assert(!Universe::heap()->is_in_reserved(_array->_offset_array),
"just checking");
_next_offset_index = _array->index_for(_bottom);
_next_offset_index++;
_next_offset_threshold =
_array->address_for_index(_next_offset_index);
return _next_offset_threshold;
}
void G1BlockOffsetArrayContigSpace::zero_bottom_entry() {
assert(!Universe::heap()->is_in_reserved(_array->_offset_array),
"just checking");
size_t bottom_index = _array->index_for(_bottom);
assert(_array->address_for_index(bottom_index) == _bottom,
"Precondition of call");
_array->set_offset_array(bottom_index, 0);
}

View File

@ -0,0 +1,487 @@
/*
* Copyright 2001-2007 Sun Microsystems, Inc. All Rights Reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License version 2 only, as
* published by the Free Software Foundation.
*
* This code is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
* version 2 for more details (a copy is included in the LICENSE file that
* accompanied this code).
*
* You should have received a copy of the GNU General Public License version
* 2 along with this work; if not, write to the Free Software Foundation,
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
*
* Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
* CA 95054 USA or visit www.sun.com if you need additional information or
* have any questions.
*
*/
// The CollectedHeap type requires subtypes to implement a method
// "block_start". For some subtypes, notably generational
// systems using card-table-based write barriers, the efficiency of this
// operation may be important. Implementations of the "BlockOffsetArray"
// class may be useful in providing such efficient implementations.
//
// While generally mirroring the structure of the BOT for GenCollectedHeap,
// the following types are tailored more towards G1's uses; these should,
// however, be merged back into a common BOT to avoid code duplication
// and reduce maintenance overhead.
//
// G1BlockOffsetTable (abstract)
// -- G1BlockOffsetArray (uses G1BlockOffsetSharedArray)
// -- G1BlockOffsetArrayContigSpace
//
// A main impediment to the consolidation of this code might be the
// effect of making some of the block_start*() calls non-const as
// below. Whether that might adversely affect performance optimizations
// that compilers might normally perform in the case of non-G1
// collectors needs to be carefully investigated prior to any such
// consolidation.
// Forward declarations
class ContiguousSpace;
class G1BlockOffsetSharedArray;
class G1BlockOffsetTable VALUE_OBJ_CLASS_SPEC {
friend class VMStructs;
protected:
// These members describe the region covered by the table.
// The space this table is covering.
HeapWord* _bottom; // == reserved.start
HeapWord* _end; // End of currently allocated region.
public:
// Initialize the table to cover the given space.
// The contents of the initial table are undefined.
G1BlockOffsetTable(HeapWord* bottom, HeapWord* end) :
_bottom(bottom), _end(end)
{
assert(_bottom <= _end, "arguments out of order");
}
// Note that the committed size of the covered space may have changed,
// so the table size might also wish to change.
virtual void resize(size_t new_word_size) = 0;
virtual void set_bottom(HeapWord* new_bottom) {
assert(new_bottom <= _end, "new_bottom > _end");
_bottom = new_bottom;
resize(pointer_delta(_end, _bottom));
}
// Requires "addr" to be contained by a block, and returns the address of
// the start of that block. (May have side effects, namely updating of
// shared array entries that "point" too far backwards. This can occur,
// for example, when LAB allocation is used in a space covered by the
// table.)
virtual HeapWord* block_start_unsafe(const void* addr) = 0;
// Same as above, but does not have any of the possible side effects
// discussed above.
virtual HeapWord* block_start_unsafe_const(const void* addr) const = 0;
// Returns the address of the start of the block containing "addr", or
// else "null" if it is covered by no block. (May have side effects,
// namely updating of shared array entries that "point" too far
// backwards. This can occur, for example, when lab allocation is used
// in a space covered by the table.)
inline HeapWord* block_start(const void* addr);
// Same as above, but does not have any of the possible side effects
// discussed above.
inline HeapWord* block_start_const(const void* addr) const;
};
// This implementation of "G1BlockOffsetTable" divides the covered region
// into "N"-word subregions (where "N" = 2^"LogN". An array with an entry
// for each such subregion indicates how far back one must go to find the
// start of the chunk that includes the first word of the subregion.
//
// Each BlockOffsetArray is owned by a Space. However, the actual array
// may be shared by several BlockOffsetArrays; this is useful
// when a single resizable area (such as a generation) is divided up into
// several spaces in which contiguous allocation takes place,
// such as, for example, in G1 or in the train generation.)
// Here is the shared array type.
class G1BlockOffsetSharedArray: public CHeapObj {
friend class G1BlockOffsetArray;
friend class G1BlockOffsetArrayContigSpace;
friend class VMStructs;
private:
// The reserved region covered by the shared array.
MemRegion _reserved;
// End of the current committed region.
HeapWord* _end;
// Array for keeping offsets for retrieving object start fast given an
// address.
VirtualSpace _vs;
u_char* _offset_array; // byte array keeping backwards offsets
// Bounds checking accessors:
// For performance these have to devolve to array accesses in product builds.
u_char offset_array(size_t index) const {
assert(index < _vs.committed_size(), "index out of range");
return _offset_array[index];
}
void set_offset_array(size_t index, u_char offset) {
assert(index < _vs.committed_size(), "index out of range");
assert(offset <= N_words, "offset too large");
_offset_array[index] = offset;
}
void set_offset_array(size_t index, HeapWord* high, HeapWord* low) {
assert(index < _vs.committed_size(), "index out of range");
assert(high >= low, "addresses out of order");
assert(pointer_delta(high, low) <= N_words, "offset too large");
_offset_array[index] = (u_char) pointer_delta(high, low);
}
void set_offset_array(HeapWord* left, HeapWord* right, u_char offset) {
assert(index_for(right - 1) < _vs.committed_size(),
"right address out of range");
assert(left < right, "Heap addresses out of order");
size_t num_cards = pointer_delta(right, left) >> LogN_words;
memset(&_offset_array[index_for(left)], offset, num_cards);
}
void set_offset_array(size_t left, size_t right, u_char offset) {
assert(right < _vs.committed_size(), "right address out of range");
assert(left <= right, "indexes out of order");
size_t num_cards = right - left + 1;
memset(&_offset_array[left], offset, num_cards);
}
void check_offset_array(size_t index, HeapWord* high, HeapWord* low) const {
assert(index < _vs.committed_size(), "index out of range");
assert(high >= low, "addresses out of order");
assert(pointer_delta(high, low) <= N_words, "offset too large");
assert(_offset_array[index] == pointer_delta(high, low),
"Wrong offset");
}
bool is_card_boundary(HeapWord* p) const;
// Return the number of slots needed for an offset array
// that covers mem_region_words words.
// We always add an extra slot because if an object
// ends on a card boundary we put a 0 in the next
// offset array slot, so we want that slot always
// to be reserved.
size_t compute_size(size_t mem_region_words) {
size_t number_of_slots = (mem_region_words / N_words) + 1;
return ReservedSpace::page_align_size_up(number_of_slots);
}
public:
enum SomePublicConstants {
LogN = 9,
LogN_words = LogN - LogHeapWordSize,
N_bytes = 1 << LogN,
N_words = 1 << LogN_words
};
// Initialize the table to cover from "base" to (at least)
// "base + init_word_size". In the future, the table may be expanded
// (see "resize" below) up to the size of "_reserved" (which must be at
// least "init_word_size".) The contents of the initial table are
// undefined; it is the responsibility of the constituent
// G1BlockOffsetTable(s) to initialize cards.
G1BlockOffsetSharedArray(MemRegion reserved, size_t init_word_size);
// Notes a change in the committed size of the region covered by the
// table. The "new_word_size" may not be larger than the size of the
// reserved region this table covers.
void resize(size_t new_word_size);
void set_bottom(HeapWord* new_bottom);
// Updates all the BlockOffsetArray's sharing this shared array to
// reflect the current "top"'s of their spaces.
void update_offset_arrays();
// Return the appropriate index into "_offset_array" for "p".
inline size_t index_for(const void* p) const;
// Return the address indicating the start of the region corresponding to
// "index" in "_offset_array".
inline HeapWord* address_for_index(size_t index) const;
};
// And here is the G1BlockOffsetTable subtype that uses the array.
class G1BlockOffsetArray: public G1BlockOffsetTable {
friend class G1BlockOffsetSharedArray;
friend class G1BlockOffsetArrayContigSpace;
friend class VMStructs;
private:
enum SomePrivateConstants {
N_words = G1BlockOffsetSharedArray::N_words,
LogN = G1BlockOffsetSharedArray::LogN
};
// The following enums are used by do_block_helper
enum Action {
Action_single, // BOT records a single block (see single_block())
Action_mark, // BOT marks the start of a block (see mark_block())
Action_check // Check that BOT records block correctly
// (see verify_single_block()).
};
// This is the array, which can be shared by several BlockOffsetArray's
// servicing different
G1BlockOffsetSharedArray* _array;
// The space that owns this subregion.
Space* _sp;
// If "_sp" is a contiguous space, the field below is the view of "_sp"
// as a contiguous space, else NULL.
ContiguousSpace* _csp;
// If true, array entries are initialized to 0; otherwise, they are
// initialized to point backwards to the beginning of the covered region.
bool _init_to_zero;
// The portion [_unallocated_block, _sp.end()) of the space that
// is a single block known not to contain any objects.
// NOTE: See BlockOffsetArrayUseUnallocatedBlock flag.
HeapWord* _unallocated_block;
// Sets the entries
// corresponding to the cards starting at "start" and ending at "end"
// to point back to the card before "start": the interval [start, end)
// is right-open.
void set_remainder_to_point_to_start(HeapWord* start, HeapWord* end);
// Same as above, except that the args here are a card _index_ interval
// that is closed: [start_index, end_index]
void set_remainder_to_point_to_start_incl(size_t start, size_t end);
// A helper function for BOT adjustment/verification work
void do_block_internal(HeapWord* blk_start, HeapWord* blk_end, Action action);
protected:
ContiguousSpace* csp() const { return _csp; }
// Returns the address of a block whose start is at most "addr".
// If "has_max_index" is true, "assumes "max_index" is the last valid one
// in the array.
inline HeapWord* block_at_or_preceding(const void* addr,
bool has_max_index,
size_t max_index) const;
// "q" is a block boundary that is <= "addr"; "n" is the address of the
// next block (or the end of the space.) Return the address of the
// beginning of the block that contains "addr". Does so without side
// effects (see, e.g., spec of block_start.)
inline HeapWord*
forward_to_block_containing_addr_const(HeapWord* q, HeapWord* n,
const void* addr) const;
// "q" is a block boundary that is <= "addr"; return the address of the
// beginning of the block that contains "addr". May have side effects
// on "this", by updating imprecise entries.
inline HeapWord* forward_to_block_containing_addr(HeapWord* q,
const void* addr);
// "q" is a block boundary that is <= "addr"; "n" is the address of the
// next block (or the end of the space.) Return the address of the
// beginning of the block that contains "addr". May have side effects
// on "this", by updating imprecise entries.
HeapWord* forward_to_block_containing_addr_slow(HeapWord* q,
HeapWord* n,
const void* addr);
// Requires that "*threshold_" be the first array entry boundary at or
// above "blk_start", and that "*index_" be the corresponding array
// index. If the block starts at or crosses "*threshold_", records
// "blk_start" as the appropriate block start for the array index
// starting at "*threshold_", and for any other indices crossed by the
// block. Updates "*threshold_" and "*index_" to correspond to the first
// index after the block end.
void alloc_block_work2(HeapWord** threshold_, size_t* index_,
HeapWord* blk_start, HeapWord* blk_end);
public:
// The space may not have it's bottom and top set yet, which is why the
// region is passed as a parameter. If "init_to_zero" is true, the
// elements of the array are initialized to zero. Otherwise, they are
// initialized to point backwards to the beginning.
G1BlockOffsetArray(G1BlockOffsetSharedArray* array, MemRegion mr,
bool init_to_zero);
// Note: this ought to be part of the constructor, but that would require
// "this" to be passed as a parameter to a member constructor for
// the containing concrete subtype of Space.
// This would be legal C++, but MS VC++ doesn't allow it.
void set_space(Space* sp);
// Resets the covered region to the given "mr".
void set_region(MemRegion mr);
// Resets the covered region to one with the same _bottom as before but
// the "new_word_size".
void resize(size_t new_word_size);
// These must be guaranteed to work properly (i.e., do nothing)
// when "blk_start" ("blk" for second version) is "NULL".
virtual void alloc_block(HeapWord* blk_start, HeapWord* blk_end);
virtual void alloc_block(HeapWord* blk, size_t size) {
alloc_block(blk, blk + size);
}
// The following methods are useful and optimized for a
// general, non-contiguous space.
// The given arguments are required to be the starts of adjacent ("blk1"
// before "blk2") well-formed blocks covered by "this". After this call,
// they should be considered to form one block.
virtual void join_blocks(HeapWord* blk1, HeapWord* blk2);
// Given a block [blk_start, blk_start + full_blk_size), and
// a left_blk_size < full_blk_size, adjust the BOT to show two
// blocks [blk_start, blk_start + left_blk_size) and
// [blk_start + left_blk_size, blk_start + full_blk_size).
// It is assumed (and verified in the non-product VM) that the
// BOT was correct for the original block.
void split_block(HeapWord* blk_start, size_t full_blk_size,
size_t left_blk_size);
// Adjust the BOT to show that it has a single block in the
// range [blk_start, blk_start + size). All necessary BOT
// cards are adjusted, but _unallocated_block isn't.
void single_block(HeapWord* blk_start, HeapWord* blk_end);
void single_block(HeapWord* blk, size_t size) {
single_block(blk, blk + size);
}
// Adjust BOT to show that it has a block in the range
// [blk_start, blk_start + size). Only the first card
// of BOT is touched. It is assumed (and verified in the
// non-product VM) that the remaining cards of the block
// are correct.
void mark_block(HeapWord* blk_start, HeapWord* blk_end);
void mark_block(HeapWord* blk, size_t size) {
mark_block(blk, blk + size);
}
// Adjust _unallocated_block to indicate that a particular
// block has been newly allocated or freed. It is assumed (and
// verified in the non-product VM) that the BOT is correct for
// the given block.
inline void allocated(HeapWord* blk_start, HeapWord* blk_end) {
// Verify that the BOT shows [blk, blk + blk_size) to be one block.
verify_single_block(blk_start, blk_end);
if (BlockOffsetArrayUseUnallocatedBlock) {
_unallocated_block = MAX2(_unallocated_block, blk_end);
}
}
inline void allocated(HeapWord* blk, size_t size) {
allocated(blk, blk + size);
}
inline void freed(HeapWord* blk_start, HeapWord* blk_end);
inline void freed(HeapWord* blk, size_t size);
virtual HeapWord* block_start_unsafe(const void* addr);
virtual HeapWord* block_start_unsafe_const(const void* addr) const;
// Requires "addr" to be the start of a card and returns the
// start of the block that contains the given address.
HeapWord* block_start_careful(const void* addr) const;
// If true, initialize array slots with no allocated blocks to zero.
// Otherwise, make them point back to the front.
bool init_to_zero() { return _init_to_zero; }
// Verification & debugging - ensure that the offset table reflects the fact
// that the block [blk_start, blk_end) or [blk, blk + size) is a
// single block of storage. NOTE: can;t const this because of
// call to non-const do_block_internal() below.
inline void verify_single_block(HeapWord* blk_start, HeapWord* blk_end) {
if (VerifyBlockOffsetArray) {
do_block_internal(blk_start, blk_end, Action_check);
}
}
inline void verify_single_block(HeapWord* blk, size_t size) {
verify_single_block(blk, blk + size);
}
// Verify that the given block is before _unallocated_block
inline void verify_not_unallocated(HeapWord* blk_start,
HeapWord* blk_end) const {
if (BlockOffsetArrayUseUnallocatedBlock) {
assert(blk_start < blk_end, "Block inconsistency?");
assert(blk_end <= _unallocated_block, "_unallocated_block problem");
}
}
inline void verify_not_unallocated(HeapWord* blk, size_t size) const {
verify_not_unallocated(blk, blk + size);
}
void check_all_cards(size_t left_card, size_t right_card) const;
};
// A subtype of BlockOffsetArray that takes advantage of the fact
// that its underlying space is a ContiguousSpace, so that its "active"
// region can be more efficiently tracked (than for a non-contiguous space).
class G1BlockOffsetArrayContigSpace: public G1BlockOffsetArray {
friend class VMStructs;
// allocation boundary at which offset array must be updated
HeapWord* _next_offset_threshold;
size_t _next_offset_index; // index corresponding to that boundary
// Work function to be called when allocation start crosses the next
// threshold in the contig space.
void alloc_block_work1(HeapWord* blk_start, HeapWord* blk_end) {
alloc_block_work2(&_next_offset_threshold, &_next_offset_index,
blk_start, blk_end);
}
public:
G1BlockOffsetArrayContigSpace(G1BlockOffsetSharedArray* array, MemRegion mr);
// Initialize the threshold to reflect the first boundary after the
// bottom of the covered region.
HeapWord* initialize_threshold();
// Zero out the entry for _bottom (offset will be zero).
void zero_bottom_entry();
// Return the next threshold, the point at which the table should be
// updated.
HeapWord* threshold() const { return _next_offset_threshold; }
// These must be guaranteed to work properly (i.e., do nothing)
// when "blk_start" ("blk" for second version) is "NULL". In this
// implementation, that's true because NULL is represented as 0, and thus
// never exceeds the "_next_offset_threshold".
void alloc_block(HeapWord* blk_start, HeapWord* blk_end) {
if (blk_end > _next_offset_threshold)
alloc_block_work1(blk_start, blk_end);
}
void alloc_block(HeapWord* blk, size_t size) {
alloc_block(blk, blk+size);
}
HeapWord* block_start_unsafe(const void* addr);
HeapWord* block_start_unsafe_const(const void* addr) const;
};

View File

@ -0,0 +1,153 @@
/*
* Copyright 2001-2007 Sun Microsystems, Inc. All Rights Reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License version 2 only, as
* published by the Free Software Foundation.
*
* This code is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
* version 2 for more details (a copy is included in the LICENSE file that
* accompanied this code).
*
* You should have received a copy of the GNU General Public License version
* 2 along with this work; if not, write to the Free Software Foundation,
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
*
* Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
* CA 95054 USA or visit www.sun.com if you need additional information or
* have any questions.
*
*/
inline HeapWord* G1BlockOffsetTable::block_start(const void* addr) {
if (addr >= _bottom && addr < _end) {
return block_start_unsafe(addr);
} else {
return NULL;
}
}
inline HeapWord*
G1BlockOffsetTable::block_start_const(const void* addr) const {
if (addr >= _bottom && addr < _end) {
return block_start_unsafe_const(addr);
} else {
return NULL;
}
}
inline size_t G1BlockOffsetSharedArray::index_for(const void* p) const {
char* pc = (char*)p;
assert(pc >= (char*)_reserved.start() &&
pc < (char*)_reserved.end(),
"p not in range.");
size_t delta = pointer_delta(pc, _reserved.start(), sizeof(char));
size_t result = delta >> LogN;
assert(result < _vs.committed_size(), "bad index from address");
return result;
}
inline HeapWord*
G1BlockOffsetSharedArray::address_for_index(size_t index) const {
assert(index < _vs.committed_size(), "bad index");
HeapWord* result = _reserved.start() + (index << LogN_words);
assert(result >= _reserved.start() && result < _reserved.end(),
"bad address from index");
return result;
}
inline HeapWord*
G1BlockOffsetArray::block_at_or_preceding(const void* addr,
bool has_max_index,
size_t max_index) const {
assert(_array->offset_array(0) == 0, "objects can't cross covered areas");
size_t index = _array->index_for(addr);
// We must make sure that the offset table entry we use is valid. If
// "addr" is past the end, start at the last known one and go forward.
if (has_max_index) {
index = MIN2(index, max_index);
}
HeapWord* q = _array->address_for_index(index);
uint offset = _array->offset_array(index); // Extend u_char to uint.
while (offset >= N_words) {
// The excess of the offset from N_words indicates a power of Base
// to go back by.
size_t n_cards_back = BlockOffsetArray::entry_to_cards_back(offset);
q -= (N_words * n_cards_back);
assert(q >= _sp->bottom(), "Went below bottom!");
index -= n_cards_back;
offset = _array->offset_array(index);
}
assert(offset < N_words, "offset too large");
q -= offset;
return q;
}
inline HeapWord*
G1BlockOffsetArray::
forward_to_block_containing_addr_const(HeapWord* q, HeapWord* n,
const void* addr) const {
if (csp() != NULL) {
if (addr >= csp()->top()) return csp()->top();
while (n <= addr) {
q = n;
oop obj = oop(q);
if (obj->klass() == NULL) return q;
n += obj->size();
}
} else {
while (n <= addr) {
q = n;
oop obj = oop(q);
if (obj->klass() == NULL) return q;
n += _sp->block_size(q);
}
}
assert(q <= n, "wrong order for q and addr");
assert(addr < n, "wrong order for addr and n");
return q;
}
inline HeapWord*
G1BlockOffsetArray::forward_to_block_containing_addr(HeapWord* q,
const void* addr) {
if (oop(q)->klass() == NULL) return q;
HeapWord* n = q + _sp->block_size(q);
// In the normal case, where the query "addr" is a card boundary, and the
// offset table chunks are the same size as cards, the block starting at
// "q" will contain addr, so the test below will fail, and we'll fall
// through quickly.
if (n <= addr) {
q = forward_to_block_containing_addr_slow(q, n, addr);
}
assert(q <= addr, "wrong order for current and arg");
return q;
}
//////////////////////////////////////////////////////////////////////////
// BlockOffsetArrayNonContigSpace inlines
//////////////////////////////////////////////////////////////////////////
inline void G1BlockOffsetArray::freed(HeapWord* blk_start, HeapWord* blk_end) {
// Verify that the BOT shows [blk_start, blk_end) to be one block.
verify_single_block(blk_start, blk_end);
// adjust _unallocated_block upward or downward
// as appropriate
if (BlockOffsetArrayUseUnallocatedBlock) {
assert(_unallocated_block <= _end,
"Inconsistent value for _unallocated_block");
if (blk_end >= _unallocated_block && blk_start <= _unallocated_block) {
// CMS-specific note: a block abutting _unallocated_block to
// its left is being freed, a new block is being added or
// we are resetting following a compaction
_unallocated_block = blk_start;
}
}
}
inline void G1BlockOffsetArray::freed(HeapWord* blk, size_t size) {
freed(blk, blk + size);
}

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,91 @@
/*
* Copyright 2001-2007 Sun Microsystems, Inc. All Rights Reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License version 2 only, as
* published by the Free Software Foundation.
*
* This code is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
* version 2 for more details (a copy is included in the LICENSE file that
* accompanied this code).
*
* You should have received a copy of the GNU General Public License version
* 2 along with this work; if not, write to the Free Software Foundation,
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
*
* Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
* CA 95054 USA or visit www.sun.com if you need additional information or
* have any questions.
*
*/
// Inline functions for G1CollectedHeap
inline HeapRegion*
G1CollectedHeap::heap_region_containing(const void* addr) const {
HeapRegion* hr = _hrs->addr_to_region(addr);
// hr can be null if addr in perm_gen
if (hr != NULL && hr->continuesHumongous()) {
hr = hr->humongous_start_region();
}
return hr;
}
inline HeapRegion*
G1CollectedHeap::heap_region_containing_raw(const void* addr) const {
HeapRegion* res = _hrs->addr_to_region(addr);
assert(res != NULL, "addr outside of heap?");
return res;
}
inline bool G1CollectedHeap::obj_in_cs(oop obj) {
HeapRegion* r = _hrs->addr_to_region(obj);
return r != NULL && r->in_collection_set();
}
inline HeapWord* G1CollectedHeap::attempt_allocation(size_t word_size,
bool permit_collection_pause) {
HeapWord* res = NULL;
assert( SafepointSynchronize::is_at_safepoint() ||
Heap_lock->owned_by_self(), "pre-condition of the call" );
if (_cur_alloc_region != NULL) {
// If this allocation causes a region to become non empty,
// then we need to update our free_regions count.
if (_cur_alloc_region->is_empty()) {
res = _cur_alloc_region->allocate(word_size);
if (res != NULL)
_free_regions--;
} else {
res = _cur_alloc_region->allocate(word_size);
}
}
if (res != NULL) {
if (!SafepointSynchronize::is_at_safepoint()) {
assert( Heap_lock->owned_by_self(), "invariant" );
Heap_lock->unlock();
}
return res;
}
// attempt_allocation_slow will also unlock the heap lock when appropriate.
return attempt_allocation_slow(word_size, permit_collection_pause);
}
inline RefToScanQueue* G1CollectedHeap::task_queue(int i) {
return _task_queues->queue(i);
}
inline bool G1CollectedHeap::isMarkedPrev(oop obj) const {
return _cm->prevMarkBitMap()->isMarked((HeapWord *)obj);
}
inline bool G1CollectedHeap::isMarkedNext(oop obj) const {
return _cm->nextMarkBitMap()->isMarked((HeapWord *)obj);
}

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,187 @@
/*
* Copyright 2001-2007 Sun Microsystems, Inc. All Rights Reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License version 2 only, as
* published by the Free Software Foundation.
*
* This code is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
* version 2 for more details (a copy is included in the LICENSE file that
* accompanied this code).
*
* You should have received a copy of the GNU General Public License version
* 2 along with this work; if not, write to the Free Software Foundation,
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
*
* Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
* CA 95054 USA or visit www.sun.com if you need additional information or
* have any questions.
*
*/
#include "incls/_precompiled.incl"
#include "incls/_g1MMUTracker.cpp.incl"
#define _DISABLE_MMU 0
// can't rely on comparing doubles with tolerating a small margin for error
#define SMALL_MARGIN 0.0000001
#define is_double_leq_0(_value) ( (_value) < SMALL_MARGIN )
#define is_double_leq(_val1, _val2) is_double_leq_0((_val1) - (_val2))
#define is_double_geq(_val1, _val2) is_double_leq_0((_val2) - (_val1))
/***** ALL TIMES ARE IN SECS!!!!!!! *****/
G1MMUTracker::G1MMUTracker(double time_slice, double max_gc_time) :
_time_slice(time_slice),
_max_gc_time(max_gc_time),
_conc_overhead_time_sec(0.0) { }
void
G1MMUTracker::update_conc_overhead(double conc_overhead) {
double conc_overhead_time_sec = _time_slice * conc_overhead;
if (conc_overhead_time_sec > 0.9 * _max_gc_time) {
// We are screwed, as we only seem to have <10% of the soft
// real-time goal available for pauses. Let's admit defeat and
// allow something more generous as a pause target.
conc_overhead_time_sec = 0.75 * _max_gc_time;
}
_conc_overhead_time_sec = conc_overhead_time_sec;
}
G1MMUTrackerQueue::G1MMUTrackerQueue(double time_slice, double max_gc_time) :
G1MMUTracker(time_slice, max_gc_time),
_head_index(0),
_tail_index(trim_index(_head_index+1)),
_no_entries(0) { }
void G1MMUTrackerQueue::remove_expired_entries(double current_time) {
double limit = current_time - _time_slice;
while (_no_entries > 0) {
if (is_double_geq(limit, _array[_tail_index].end_time())) {
_tail_index = trim_index(_tail_index + 1);
--_no_entries;
} else
return;
}
guarantee(_no_entries == 0, "should have no entries in the array");
}
double G1MMUTrackerQueue::calculate_gc_time(double current_time) {
double gc_time = 0.0;
double limit = current_time - _time_slice;
for (int i = 0; i < _no_entries; ++i) {
int index = trim_index(_tail_index + i);
G1MMUTrackerQueueElem *elem = &_array[index];
if (elem->end_time() > limit) {
if (elem->start_time() > limit)
gc_time += elem->duration();
else
gc_time += elem->end_time() - limit;
}
}
return gc_time;
}
void G1MMUTrackerQueue::add_pause(double start, double end, bool gc_thread) {
double longest_allowed = longest_pause_internal(start);
if (longest_allowed < 0.0)
longest_allowed = 0.0;
double duration = end - start;
remove_expired_entries(end);
if (_no_entries == QueueLength) {
// OK, right now when we fill up we bomb out
// there are a few ways of dealing with this "gracefully"
// increase the array size (:-)
// remove the oldest entry (this might allow more GC time for
// the time slice than what's allowed)
// concolidate the two entries with the minimum gap between them
// (this mighte allow less GC time than what's allowed)
guarantee(0, "array full, currently we can't recover");
}
_head_index = trim_index(_head_index + 1);
++_no_entries;
_array[_head_index] = G1MMUTrackerQueueElem(start, end);
}
// basically the _internal call does not remove expired entries
// this is for trying things out in the future and a couple
// of other places (debugging)
double G1MMUTrackerQueue::longest_pause(double current_time) {
if (_DISABLE_MMU)
return _max_gc_time;
MutexLockerEx x(MMUTracker_lock, Mutex::_no_safepoint_check_flag);
remove_expired_entries(current_time);
return longest_pause_internal(current_time);
}
double G1MMUTrackerQueue::longest_pause_internal(double current_time) {
double target_time = _max_gc_time;
while( 1 ) {
double gc_time =
calculate_gc_time(current_time + target_time) + _conc_overhead_time_sec;
double diff = target_time + gc_time - _max_gc_time;
if (!is_double_leq_0(diff)) {
target_time -= diff;
if (is_double_leq_0(target_time)) {
target_time = -1.0;
break;
}
} else {
break;
}
}
return target_time;
}
// basically the _internal call does not remove expired entries
// this is for trying things out in the future and a couple
// of other places (debugging)
double G1MMUTrackerQueue::when_sec(double current_time, double pause_time) {
if (_DISABLE_MMU)
return 0.0;
MutexLockerEx x(MMUTracker_lock, Mutex::_no_safepoint_check_flag);
remove_expired_entries(current_time);
return when_internal(current_time, pause_time);
}
double G1MMUTrackerQueue::when_internal(double current_time,
double pause_time) {
// if the pause is over the maximum, just assume that it's the maximum
double adjusted_pause_time =
(pause_time > max_gc_time()) ? max_gc_time() : pause_time;
double earliest_end = current_time + adjusted_pause_time;
double limit = earliest_end - _time_slice;
double gc_time = calculate_gc_time(earliest_end);
double diff = gc_time + adjusted_pause_time - max_gc_time();
if (is_double_leq_0(diff))
return 0.0;
int index = _tail_index;
while ( 1 ) {
G1MMUTrackerQueueElem *elem = &_array[index];
if (elem->end_time() > limit) {
if (elem->start_time() > limit)
diff -= elem->duration();
else
diff -= elem->end_time() - limit;
if (is_double_leq_0(diff))
return elem->end_time() + diff + _time_slice - adjusted_pause_time - current_time;
}
index = trim_index(index+1);
guarantee(index != trim_index(_head_index + 1), "should not go past head");
}
}

View File

@ -0,0 +1,130 @@
/*
* Copyright 2001-2007 Sun Microsystems, Inc. All Rights Reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License version 2 only, as
* published by the Free Software Foundation.
*
* This code is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
* version 2 for more details (a copy is included in the LICENSE file that
* accompanied this code).
*
* You should have received a copy of the GNU General Public License version
* 2 along with this work; if not, write to the Free Software Foundation,
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
*
* Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
* CA 95054 USA or visit www.sun.com if you need additional information or
* have any questions.
*
*/
// Keeps track of the GC work and decides when it is OK to do GC work
// and for how long so that the MMU invariants are maintained.
/***** ALL TIMES ARE IN SECS!!!!!!! *****/
// this is the "interface"
class G1MMUTracker {
protected:
double _time_slice;
double _max_gc_time; // this is per time slice
double _conc_overhead_time_sec;
public:
G1MMUTracker(double time_slice, double max_gc_time);
void update_conc_overhead(double conc_overhead);
virtual void add_pause(double start, double end, bool gc_thread) = 0;
virtual double longest_pause(double current_time) = 0;
virtual double when_sec(double current_time, double pause_time) = 0;
double max_gc_time() {
return _max_gc_time - _conc_overhead_time_sec;
}
inline bool now_max_gc(double current_time) {
return when_sec(current_time, max_gc_time()) < 0.00001;
}
inline double when_max_gc_sec(double current_time) {
return when_sec(current_time, max_gc_time());
}
inline jlong when_max_gc_ms(double current_time) {
double when = when_max_gc_sec(current_time);
return (jlong) (when * 1000.0);
}
inline jlong when_ms(double current_time, double pause_time) {
double when = when_sec(current_time, pause_time);
return (jlong) (when * 1000.0);
}
};
class G1MMUTrackerQueueElem {
private:
double _start_time;
double _end_time;
public:
inline double start_time() { return _start_time; }
inline double end_time() { return _end_time; }
inline double duration() { return _end_time - _start_time; }
G1MMUTrackerQueueElem() {
_start_time = 0.0;
_end_time = 0.0;
}
G1MMUTrackerQueueElem(double start_time, double end_time) {
_start_time = start_time;
_end_time = end_time;
}
};
// this is an implementation of the MMUTracker using a (fixed-size) queue
// that keeps track of all the recent pause times
class G1MMUTrackerQueue: public G1MMUTracker {
private:
enum PrivateConstants {
QueueLength = 64
};
// The array keeps track of all the pauses that fall within a time
// slice (the last time slice during which pauses took place).
// The data structure implemented is a circular queue.
// Head "points" to the most recent addition, tail to the oldest one.
// The array is of fixed size and I don't think we'll need more than
// two or three entries with the current behaviour of G1 pauses.
// If the array is full, an easy fix is to look for the pauses with
// the shortest gap between them and concolidate them.
G1MMUTrackerQueueElem _array[QueueLength];
int _head_index;
int _tail_index;
int _no_entries;
inline int trim_index(int index) {
return (index + QueueLength) % QueueLength;
}
void remove_expired_entries(double current_time);
double calculate_gc_time(double current_time);
double longest_pause_internal(double current_time);
double when_internal(double current_time, double pause_time);
public:
G1MMUTrackerQueue(double time_slice, double max_gc_time);
virtual void add_pause(double start, double end, bool gc_thread);
virtual double longest_pause(double current_time);
virtual double when_sec(double current_time, double pause_time);
};

View File

@ -0,0 +1,385 @@
/*
* Copyright 2001-2007 Sun Microsystems, Inc. All Rights Reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License version 2 only, as
* published by the Free Software Foundation.
*
* This code is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
* version 2 for more details (a copy is included in the LICENSE file that
* accompanied this code).
*
* You should have received a copy of the GNU General Public License version
* 2 along with this work; if not, write to the Free Software Foundation,
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
*
* Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
* CA 95054 USA or visit www.sun.com if you need additional information or
* have any questions.
*
*/
#include "incls/_precompiled.incl"
#include "incls/_g1MarkSweep.cpp.incl"
class HeapRegion;
void G1MarkSweep::invoke_at_safepoint(ReferenceProcessor* rp,
bool clear_all_softrefs) {
assert(SafepointSynchronize::is_at_safepoint(), "must be at a safepoint");
// hook up weak ref data so it can be used during Mark-Sweep
assert(GenMarkSweep::ref_processor() == NULL, "no stomping");
GenMarkSweep::_ref_processor = rp;
assert(rp != NULL, "should be non-NULL");
// When collecting the permanent generation methodOops may be moving,
// so we either have to flush all bcp data or convert it into bci.
CodeCache::gc_prologue();
Threads::gc_prologue();
// Increment the invocation count for the permanent generation, since it is
// implicitly collected whenever we do a full mark sweep collection.
SharedHeap* sh = SharedHeap::heap();
sh->perm_gen()->stat_record()->invocations++;
bool marked_for_unloading = false;
allocate_stacks();
// We should save the marks of the currently locked biased monitors.
// The marking doesn't preserve the marks of biased objects.
BiasedLocking::preserve_marks();
mark_sweep_phase1(marked_for_unloading, clear_all_softrefs);
if (G1VerifyConcMark) {
G1CollectedHeap* g1h = G1CollectedHeap::heap();
g1h->checkConcurrentMark();
}
mark_sweep_phase2();
// Don't add any more derived pointers during phase3
COMPILER2_PRESENT(DerivedPointerTable::set_active(false));
mark_sweep_phase3();
mark_sweep_phase4();
GenMarkSweep::restore_marks();
BiasedLocking::restore_marks();
GenMarkSweep::deallocate_stacks();
// We must invalidate the perm-gen rs, so that it gets rebuilt.
GenRemSet* rs = sh->rem_set();
rs->invalidate(sh->perm_gen()->used_region(), true /*whole_heap*/);
// "free at last gc" is calculated from these.
// CHF: cheating for now!!!
// Universe::set_heap_capacity_at_last_gc(Universe::heap()->capacity());
// Universe::set_heap_used_at_last_gc(Universe::heap()->used());
Threads::gc_epilogue();
CodeCache::gc_epilogue();
// refs processing: clean slate
GenMarkSweep::_ref_processor = NULL;
}
void G1MarkSweep::allocate_stacks() {
GenMarkSweep::_preserved_count_max = 0;
GenMarkSweep::_preserved_marks = NULL;
GenMarkSweep::_preserved_count = 0;
GenMarkSweep::_preserved_mark_stack = NULL;
GenMarkSweep::_preserved_oop_stack = NULL;
GenMarkSweep::_marking_stack =
new (ResourceObj::C_HEAP) GrowableArray<oop>(4000, true);
size_t size = SystemDictionary::number_of_classes() * 2;
GenMarkSweep::_revisit_klass_stack =
new (ResourceObj::C_HEAP) GrowableArray<Klass*>((int)size, true);
}
void G1MarkSweep::mark_sweep_phase1(bool& marked_for_unloading,
bool clear_all_softrefs) {
// Recursively traverse all live objects and mark them
EventMark m("1 mark object");
TraceTime tm("phase 1", PrintGC && Verbose, true, gclog_or_tty);
GenMarkSweep::trace(" 1");
SharedHeap* sh = SharedHeap::heap();
sh->process_strong_roots(true, // Collecting permanent generation.
SharedHeap::SO_SystemClasses,
&GenMarkSweep::follow_root_closure,
&GenMarkSweep::follow_root_closure);
// Process reference objects found during marking
ReferencePolicy *soft_ref_policy;
if (clear_all_softrefs) {
soft_ref_policy = new AlwaysClearPolicy();
} else {
#ifdef COMPILER2
soft_ref_policy = new LRUMaxHeapPolicy();
#else
soft_ref_policy = new LRUCurrentHeapPolicy();
#endif
}
assert(soft_ref_policy != NULL,"No soft reference policy");
GenMarkSweep::ref_processor()->process_discovered_references(
soft_ref_policy,
&GenMarkSweep::is_alive,
&GenMarkSweep::keep_alive,
&GenMarkSweep::follow_stack_closure,
NULL);
// Follow system dictionary roots and unload classes
bool purged_class = SystemDictionary::do_unloading(&GenMarkSweep::is_alive);
assert(GenMarkSweep::_marking_stack->is_empty(),
"stack should be empty by now");
// Follow code cache roots (has to be done after system dictionary,
// assumes all live klasses are marked)
CodeCache::do_unloading(&GenMarkSweep::is_alive,
&GenMarkSweep::keep_alive,
purged_class);
GenMarkSweep::follow_stack();
// Update subklass/sibling/implementor links of live klasses
GenMarkSweep::follow_weak_klass_links();
assert(GenMarkSweep::_marking_stack->is_empty(),
"stack should be empty by now");
// Visit symbol and interned string tables and delete unmarked oops
SymbolTable::unlink(&GenMarkSweep::is_alive);
StringTable::unlink(&GenMarkSweep::is_alive);
assert(GenMarkSweep::_marking_stack->is_empty(),
"stack should be empty by now");
}
class G1PrepareCompactClosure: public HeapRegionClosure {
ModRefBarrierSet* _mrbs;
CompactPoint _cp;
bool _popular_only;
void free_humongous_region(HeapRegion* hr) {
HeapWord* bot = hr->bottom();
HeapWord* end = hr->end();
assert(hr->startsHumongous(),
"Only the start of a humongous region should be freed.");
G1CollectedHeap::heap()->free_region(hr);
hr->prepare_for_compaction(&_cp);
// Also clear the part of the card table that will be unused after
// compaction.
_mrbs->clear(MemRegion(hr->compaction_top(), hr->end()));
}
public:
G1PrepareCompactClosure(CompactibleSpace* cs, bool popular_only) :
_cp(NULL, cs, cs->initialize_threshold()),
_mrbs(G1CollectedHeap::heap()->mr_bs()),
_popular_only(popular_only)
{}
bool doHeapRegion(HeapRegion* hr) {
if (_popular_only && !hr->popular())
return true; // terminate early
else if (!_popular_only && hr->popular())
return false; // skip this one.
if (hr->isHumongous()) {
if (hr->startsHumongous()) {
oop obj = oop(hr->bottom());
if (obj->is_gc_marked()) {
obj->forward_to(obj);
} else {
free_humongous_region(hr);
}
} else {
assert(hr->continuesHumongous(), "Invalid humongous.");
}
} else {
hr->prepare_for_compaction(&_cp);
// Also clear the part of the card table that will be unused after
// compaction.
_mrbs->clear(MemRegion(hr->compaction_top(), hr->end()));
}
return false;
}
};
// Stolen verbatim from g1CollectedHeap.cpp
class FindFirstRegionClosure: public HeapRegionClosure {
HeapRegion* _a_region;
bool _find_popular;
public:
FindFirstRegionClosure(bool find_popular) :
_a_region(NULL), _find_popular(find_popular) {}
bool doHeapRegion(HeapRegion* r) {
if (r->popular() == _find_popular) {
_a_region = r;
return true;
} else {
return false;
}
}
HeapRegion* result() { return _a_region; }
};
void G1MarkSweep::mark_sweep_phase2() {
// Now all live objects are marked, compute the new object addresses.
// It is imperative that we traverse perm_gen LAST. If dead space is
// allowed a range of dead object may get overwritten by a dead int
// array. If perm_gen is not traversed last a klassOop may get
// overwritten. This is fine since it is dead, but if the class has dead
// instances we have to skip them, and in order to find their size we
// need the klassOop!
//
// It is not required that we traverse spaces in the same order in
// phase2, phase3 and phase4, but the ValidateMarkSweep live oops
// tracking expects us to do so. See comment under phase4.
G1CollectedHeap* g1h = G1CollectedHeap::heap();
Generation* pg = g1h->perm_gen();
EventMark m("2 compute new addresses");
TraceTime tm("phase 2", PrintGC && Verbose, true, gclog_or_tty);
GenMarkSweep::trace("2");
// First we compact the popular regions.
if (G1NumPopularRegions > 0) {
CompactibleSpace* sp = g1h->first_compactible_space();
FindFirstRegionClosure cl(true /*find_popular*/);
g1h->heap_region_iterate(&cl);
HeapRegion *r = cl.result();
assert(r->popular(), "should have found a popular region.");
assert(r == sp, "first popular heap region should "
"== first compactible space");
G1PrepareCompactClosure blk(sp, true/*popular_only*/);
g1h->heap_region_iterate(&blk);
}
// Now we do the regular regions.
FindFirstRegionClosure cl(false /*find_popular*/);
g1h->heap_region_iterate(&cl);
HeapRegion *r = cl.result();
assert(!r->popular(), "should have founda non-popular region.");
CompactibleSpace* sp = r;
if (r->isHumongous() && oop(r->bottom())->is_gc_marked()) {
sp = r->next_compaction_space();
}
G1PrepareCompactClosure blk(sp, false/*popular_only*/);
g1h->heap_region_iterate(&blk);
CompactPoint perm_cp(pg, NULL, NULL);
pg->prepare_for_compaction(&perm_cp);
}
class G1AdjustPointersClosure: public HeapRegionClosure {
public:
bool doHeapRegion(HeapRegion* r) {
if (r->isHumongous()) {
if (r->startsHumongous()) {
// We must adjust the pointers on the single H object.
oop obj = oop(r->bottom());
debug_only(GenMarkSweep::track_interior_pointers(obj));
// point all the oops to the new location
obj->adjust_pointers();
debug_only(GenMarkSweep::check_interior_pointers());
}
} else {
// This really ought to be "as_CompactibleSpace"...
r->adjust_pointers();
}
return false;
}
};
void G1MarkSweep::mark_sweep_phase3() {
G1CollectedHeap* g1h = G1CollectedHeap::heap();
Generation* pg = g1h->perm_gen();
// Adjust the pointers to reflect the new locations
EventMark m("3 adjust pointers");
TraceTime tm("phase 3", PrintGC && Verbose, true, gclog_or_tty);
GenMarkSweep::trace("3");
SharedHeap* sh = SharedHeap::heap();
sh->process_strong_roots(true, // Collecting permanent generation.
SharedHeap::SO_AllClasses,
&GenMarkSweep::adjust_root_pointer_closure,
&GenMarkSweep::adjust_pointer_closure);
g1h->ref_processor()->weak_oops_do(&GenMarkSweep::adjust_root_pointer_closure);
// Now adjust pointers in remaining weak roots. (All of which should
// have been cleared if they pointed to non-surviving objects.)
g1h->g1_process_weak_roots(&GenMarkSweep::adjust_root_pointer_closure,
&GenMarkSweep::adjust_pointer_closure);
GenMarkSweep::adjust_marks();
G1AdjustPointersClosure blk;
g1h->heap_region_iterate(&blk);
pg->adjust_pointers();
}
class G1SpaceCompactClosure: public HeapRegionClosure {
public:
G1SpaceCompactClosure() {}
bool doHeapRegion(HeapRegion* hr) {
if (hr->isHumongous()) {
if (hr->startsHumongous()) {
oop obj = oop(hr->bottom());
if (obj->is_gc_marked()) {
obj->init_mark();
} else {
assert(hr->is_empty(), "Should have been cleared in phase 2.");
}
hr->reset_during_compaction();
}
} else {
hr->compact();
}
return false;
}
};
void G1MarkSweep::mark_sweep_phase4() {
// All pointers are now adjusted, move objects accordingly
// It is imperative that we traverse perm_gen first in phase4. All
// classes must be allocated earlier than their instances, and traversing
// perm_gen first makes sure that all klassOops have moved to their new
// location before any instance does a dispatch through it's klass!
// The ValidateMarkSweep live oops tracking expects us to traverse spaces
// in the same order in phase2, phase3 and phase4. We don't quite do that
// here (perm_gen first rather than last), so we tell the validate code
// to use a higher index (saved from phase2) when verifying perm_gen.
G1CollectedHeap* g1h = G1CollectedHeap::heap();
Generation* pg = g1h->perm_gen();
EventMark m("4 compact heap");
TraceTime tm("phase 4", PrintGC && Verbose, true, gclog_or_tty);
GenMarkSweep::trace("4");
pg->compact();
G1SpaceCompactClosure blk;
g1h->heap_region_iterate(&blk);
}
// Local Variables: ***
// c-indentation-style: gnu ***
// End: ***

View File

@ -0,0 +1,57 @@
/*
* Copyright 2001-2007 Sun Microsystems, Inc. All Rights Reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License version 2 only, as
* published by the Free Software Foundation.
*
* This code is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
* version 2 for more details (a copy is included in the LICENSE file that
* accompanied this code).
*
* You should have received a copy of the GNU General Public License version
* 2 along with this work; if not, write to the Free Software Foundation,
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
*
* Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
* CA 95054 USA or visit www.sun.com if you need additional information or
* have any questions.
*
*/
class ReferenceProcessor;
// G1MarkSweep takes care of global mark-compact garbage collection for a
// G1CollectedHeap using a four-phase pointer forwarding algorithm. All
// generations are assumed to support marking; those that can also support
// compaction.
//
// Class unloading will only occur when a full gc is invoked.
class G1MarkSweep : AllStatic {
friend class VM_G1MarkSweep;
friend class Scavenge;
public:
static void invoke_at_safepoint(ReferenceProcessor* rp,
bool clear_all_softrefs);
private:
// Mark live objects
static void mark_sweep_phase1(bool& marked_for_deopt,
bool clear_all_softrefs);
// Calculate new addresses
static void mark_sweep_phase2();
// Update pointers
static void mark_sweep_phase3();
// Move objects to new positions
static void mark_sweep_phase4();
static void allocate_stacks();
};

View File

@ -0,0 +1,202 @@
/*
* Copyright 2001-2007 Sun Microsystems, Inc. All Rights Reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License version 2 only, as
* published by the Free Software Foundation.
*
* This code is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
* version 2 for more details (a copy is included in the LICENSE file that
* accompanied this code).
*
* You should have received a copy of the GNU General Public License version
* 2 along with this work; if not, write to the Free Software Foundation,
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
*
* Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
* CA 95054 USA or visit www.sun.com if you need additional information or
* have any questions.
*
*/
class HeapRegion;
class G1CollectedHeap;
class G1RemSet;
class HRInto_G1RemSet;
class G1RemSet;
class ConcurrentMark;
class DirtyCardToOopClosure;
class CMBitMap;
class CMMarkStack;
class G1ParScanThreadState;
// A class that scans oops in a given heap region (much as OopsInGenClosure
// scans oops in a generation.)
class OopsInHeapRegionClosure: public OopsInGenClosure {
protected:
HeapRegion* _from;
public:
virtual void set_region(HeapRegion* from) { _from = from; }
};
class G1ScanAndBalanceClosure : public OopClosure {
G1CollectedHeap* _g1;
static int _nq;
public:
G1ScanAndBalanceClosure(G1CollectedHeap* g1) : _g1(g1) { }
inline void do_oop_nv(oop* p);
inline void do_oop_nv(narrowOop* p) { guarantee(false, "NYI"); }
virtual void do_oop(oop* p);
virtual void do_oop(narrowOop* p) { guarantee(false, "NYI"); }
};
class G1ParClosureSuper : public OopsInHeapRegionClosure {
protected:
G1CollectedHeap* _g1;
G1RemSet* _g1_rem;
ConcurrentMark* _cm;
G1ParScanThreadState* _par_scan_state;
public:
G1ParClosureSuper(G1CollectedHeap* g1, G1ParScanThreadState* par_scan_state);
bool apply_to_weak_ref_discovered_field() { return true; }
};
class G1ParScanClosure : public G1ParClosureSuper {
public:
G1ParScanClosure(G1CollectedHeap* g1, G1ParScanThreadState* par_scan_state) :
G1ParClosureSuper(g1, par_scan_state) { }
void do_oop_nv(oop* p); // should be made inline
inline void do_oop_nv(narrowOop* p) { guarantee(false, "NYI"); }
virtual void do_oop(oop* p) { do_oop_nv(p); }
virtual void do_oop(narrowOop* p) { do_oop_nv(p); }
};
#define G1_PARTIAL_ARRAY_MASK 1
class G1ParScanPartialArrayClosure : public G1ParClosureSuper {
G1ParScanClosure _scanner;
template <class T> void process_array_chunk(oop obj, int start, int end);
public:
G1ParScanPartialArrayClosure(G1CollectedHeap* g1, G1ParScanThreadState* par_scan_state) :
G1ParClosureSuper(g1, par_scan_state), _scanner(g1, par_scan_state) { }
void do_oop_nv(oop* p);
void do_oop_nv(narrowOop* p) { guarantee(false, "NYI"); }
virtual void do_oop(oop* p) { do_oop_nv(p); }
virtual void do_oop(narrowOop* p) { do_oop_nv(p); }
};
class G1ParCopyHelper : public G1ParClosureSuper {
G1ParScanClosure *_scanner;
protected:
void mark_forwardee(oop* p);
oop copy_to_survivor_space(oop obj);
public:
G1ParCopyHelper(G1CollectedHeap* g1, G1ParScanThreadState* par_scan_state,
G1ParScanClosure *scanner) :
G1ParClosureSuper(g1, par_scan_state), _scanner(scanner) { }
};
template<bool do_gen_barrier, G1Barrier barrier, bool do_mark_forwardee>
class G1ParCopyClosure : public G1ParCopyHelper {
G1ParScanClosure _scanner;
void do_oop_work(oop* p);
void do_oop_work(narrowOop* p) { guarantee(false, "NYI"); }
public:
G1ParCopyClosure(G1CollectedHeap* g1, G1ParScanThreadState* par_scan_state) :
_scanner(g1, par_scan_state), G1ParCopyHelper(g1, par_scan_state, &_scanner) { }
inline void do_oop_nv(oop* p) {
do_oop_work(p);
if (do_mark_forwardee)
mark_forwardee(p);
}
inline void do_oop_nv(narrowOop* p) { guarantee(false, "NYI"); }
virtual void do_oop(oop* p) { do_oop_nv(p); }
virtual void do_oop(narrowOop* p) { do_oop_nv(p); }
};
typedef G1ParCopyClosure<false, G1BarrierNone, false> G1ParScanExtRootClosure;
typedef G1ParCopyClosure<true, G1BarrierNone, false> G1ParScanPermClosure;
typedef G1ParCopyClosure<false, G1BarrierNone, true> G1ParScanAndMarkExtRootClosure;
typedef G1ParCopyClosure<true, G1BarrierNone, true> G1ParScanAndMarkPermClosure;
typedef G1ParCopyClosure<false, G1BarrierRS, false> G1ParScanHeapRSClosure;
typedef G1ParCopyClosure<false, G1BarrierRS, true> G1ParScanAndMarkHeapRSClosure;
typedef G1ParCopyClosure<false, G1BarrierEvac, false> G1ParScanHeapEvacClosure;
class FilterIntoCSClosure: public OopClosure {
G1CollectedHeap* _g1;
OopClosure* _oc;
DirtyCardToOopClosure* _dcto_cl;
public:
FilterIntoCSClosure( DirtyCardToOopClosure* dcto_cl,
G1CollectedHeap* g1, OopClosure* oc) :
_dcto_cl(dcto_cl), _g1(g1), _oc(oc)
{}
inline void do_oop_nv(oop* p);
inline void do_oop_nv(narrowOop* p) { guarantee(false, "NYI"); }
virtual void do_oop(oop* p);
virtual void do_oop(narrowOop* p) { guarantee(false, "NYI"); }
bool apply_to_weak_ref_discovered_field() { return true; }
bool do_header() { return false; }
};
class FilterInHeapRegionAndIntoCSClosure : public OopsInHeapRegionClosure {
G1CollectedHeap* _g1;
OopsInHeapRegionClosure* _oc;
public:
FilterInHeapRegionAndIntoCSClosure(G1CollectedHeap* g1,
OopsInHeapRegionClosure* oc) :
_g1(g1), _oc(oc)
{}
inline void do_oop_nv(oop* p);
inline void do_oop_nv(narrowOop* p) { guarantee(false, "NYI"); }
virtual void do_oop(oop* p);
virtual void do_oop(narrowOop* p) { guarantee(false, "NYI"); }
bool apply_to_weak_ref_discovered_field() { return true; }
bool do_header() { return false; }
void set_region(HeapRegion* from) {
_oc->set_region(from);
}
};
class FilterAndMarkInHeapRegionAndIntoCSClosure : public OopsInHeapRegionClosure {
G1CollectedHeap* _g1;
ConcurrentMark* _cm;
OopsInHeapRegionClosure* _oc;
public:
FilterAndMarkInHeapRegionAndIntoCSClosure(G1CollectedHeap* g1,
OopsInHeapRegionClosure* oc,
ConcurrentMark* cm)
: _g1(g1), _oc(oc), _cm(cm) { }
inline void do_oop_nv(oop* p);
inline void do_oop_nv(narrowOop* p) { guarantee(false, "NYI"); }
virtual void do_oop(oop* p);
virtual void do_oop(narrowOop* p) { guarantee(false, "NYI"); }
bool apply_to_weak_ref_discovered_field() { return true; }
bool do_header() { return false; }
void set_region(HeapRegion* from) {
_oc->set_region(from);
}
};
class FilterOutOfRegionClosure: public OopClosure {
HeapWord* _r_bottom;
HeapWord* _r_end;
OopClosure* _oc;
int _out_of_region;
public:
FilterOutOfRegionClosure(HeapRegion* r, OopClosure* oc);
inline void do_oop_nv(oop* p);
inline void do_oop_nv(narrowOop* p) { guarantee(false, "NYI"); }
virtual void do_oop(oop* p);
virtual void do_oop(narrowOop* p) { guarantee(false, "NYI"); }
bool apply_to_weak_ref_discovered_field() { return true; }
bool do_header() { return false; }
int out_of_region() { return _out_of_region; }
};

View File

@ -0,0 +1,112 @@
/*
* Copyright 2001-2007 Sun Microsystems, Inc. All Rights Reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License version 2 only, as
* published by the Free Software Foundation.
*
* This code is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
* version 2 for more details (a copy is included in the LICENSE file that
* accompanied this code).
*
* You should have received a copy of the GNU General Public License version
* 2 along with this work; if not, write to the Free Software Foundation,
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
*
* Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
* CA 95054 USA or visit www.sun.com if you need additional information or
* have any questions.
*
*/
/*
* This really ought to be an inline function, but apparently the C++
* compiler sometimes sees fit to ignore inline declarations. Sigh.
*/
// This must a ifdef'ed because the counting it controls is in a
// perf-critical inner loop.
#define FILTERINTOCSCLOSURE_DOHISTOGRAMCOUNT 0
inline void FilterIntoCSClosure::do_oop_nv(oop* p) {
oop obj = *p;
if (obj != NULL && _g1->obj_in_cs(obj)) {
_oc->do_oop(p);
#if FILTERINTOCSCLOSURE_DOHISTOGRAMCOUNT
_dcto_cl->incr_count();
#endif
}
}
inline void FilterIntoCSClosure::do_oop(oop* p)
{
do_oop_nv(p);
}
#define FILTEROUTOFREGIONCLOSURE_DOHISTOGRAMCOUNT 0
inline void FilterOutOfRegionClosure::do_oop_nv(oop* p) {
oop obj = *p;
HeapWord* obj_hw = (HeapWord*)obj;
if (obj_hw != NULL && (obj_hw < _r_bottom || obj_hw >= _r_end)) {
_oc->do_oop(p);
#if FILTEROUTOFREGIONCLOSURE_DOHISTOGRAMCOUNT
_out_of_region++;
#endif
}
}
inline void FilterOutOfRegionClosure::do_oop(oop* p)
{
do_oop_nv(p);
}
inline void FilterInHeapRegionAndIntoCSClosure::do_oop_nv(oop* p) {
oop obj = *p;
if (obj != NULL && _g1->obj_in_cs(obj))
_oc->do_oop(p);
}
inline void FilterInHeapRegionAndIntoCSClosure::do_oop(oop* p)
{
do_oop_nv(p);
}
inline void FilterAndMarkInHeapRegionAndIntoCSClosure::do_oop_nv(oop* p) {
oop obj = *p;
if (obj != NULL) {
HeapRegion* hr = _g1->heap_region_containing((HeapWord*) obj);
if (hr != NULL) {
if (hr->in_collection_set())
_oc->do_oop(p);
else if (!hr->is_young())
_cm->grayRoot(obj);
}
}
}
inline void FilterAndMarkInHeapRegionAndIntoCSClosure::do_oop(oop* p)
{
do_oop_nv(p);
}
inline void G1ScanAndBalanceClosure::do_oop_nv(oop* p) {
RefToScanQueue* q;
if (ParallelGCThreads > 0) {
// Deal the work out equally.
_nq = (_nq + 1) % ParallelGCThreads;
q = _g1->task_queue(_nq);
} else {
q = _g1->task_queue(0);
}
bool nooverflow = q->push(p);
guarantee(nooverflow, "Overflow during poplularity region processing");
}
inline void G1ScanAndBalanceClosure::do_oop(oop* p) {
do_oop_nv(p);
}

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,216 @@
/*
* Copyright 2001-2007 Sun Microsystems, Inc. All Rights Reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License version 2 only, as
* published by the Free Software Foundation.
*
* This code is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
* version 2 for more details (a copy is included in the LICENSE file that
* accompanied this code).
*
* You should have received a copy of the GNU General Public License version
* 2 along with this work; if not, write to the Free Software Foundation,
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
*
* Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
* CA 95054 USA or visit www.sun.com if you need additional information or
* have any questions.
*
*/
// A G1RemSet provides ways of iterating over pointers into a selected
// collection set.
class G1CollectedHeap;
class CardTableModRefBarrierSet;
class HRInto_G1RemSet;
class ConcurrentG1Refine;
class G1RemSet {
protected:
G1CollectedHeap* _g1;
unsigned _conc_refine_traversals;
unsigned _conc_refine_cards;
size_t n_workers();
public:
G1RemSet(G1CollectedHeap* g1) :
_g1(g1), _conc_refine_traversals(0), _conc_refine_cards(0)
{}
// Invoke "blk->do_oop" on all pointers into the CS in object in regions
// outside the CS (having invoked "blk->set_region" to set the "from"
// region correctly beforehand.) The "worker_i" param is for the
// parallel case where the number of the worker thread calling this
// function can be helpful in partitioning the work to be done. It
// should be the same as the "i" passed to the calling thread's
// work(i) function. In the sequential case this param will be ingored.
virtual void oops_into_collection_set_do(OopsInHeapRegionClosure* blk,
int worker_i) = 0;
// Prepare for and cleanup after an oops_into_collection_set_do
// call. Must call each of these once before and after (in sequential
// code) any threads call oops into collection set do. (This offers an
// opportunity to sequential setup and teardown of structures needed by a
// parallel iteration over the CS's RS.)
virtual void prepare_for_oops_into_collection_set_do() = 0;
virtual void cleanup_after_oops_into_collection_set_do() = 0;
// If "this" is of the given subtype, return "this", else "NULL".
virtual HRInto_G1RemSet* as_HRInto_G1RemSet() { return NULL; }
// Record, if necessary, the fact that *p (where "p" is in region "from")
// has changed to its new value.
virtual void write_ref(HeapRegion* from, oop* p) = 0;
virtual void par_write_ref(HeapRegion* from, oop* p, int tid) = 0;
// Requires "region_bm" and "card_bm" to be bitmaps with 1 bit per region
// or card, respectively, such that a region or card with a corresponding
// 0 bit contains no part of any live object. Eliminates any remembered
// set entries that correspond to dead heap ranges.
virtual void scrub(BitMap* region_bm, BitMap* card_bm) = 0;
// Like the above, but assumes is called in parallel: "worker_num" is the
// parallel thread id of the current thread, and "claim_val" is the
// value that should be used to claim heap regions.
virtual void scrub_par(BitMap* region_bm, BitMap* card_bm,
int worker_num, int claim_val) = 0;
// Do any "refinement" activity that might be appropriate to the given
// G1RemSet. If "refinement" has iterateive "passes", do one pass.
// If "t" is non-NULL, it is the thread performing the refinement.
// Default implementation does nothing.
virtual void concurrentRefinementPass(ConcurrentG1Refine* cg1r) {}
// Refine the card corresponding to "card_ptr". If "sts" is non-NULL,
// join and leave around parts that must be atomic wrt GC. (NULL means
// being done at a safepoint.)
virtual void concurrentRefineOneCard(jbyte* card_ptr, int worker_i) {}
unsigned conc_refine_cards() { return _conc_refine_cards; }
// Print any relevant summary info.
virtual void print_summary_info() {}
// Prepare remebered set for verification.
virtual void prepare_for_verify() {};
};
// The simplest possible G1RemSet: iterates over all objects in non-CS
// regions, searching for pointers into the CS.
class StupidG1RemSet: public G1RemSet {
public:
StupidG1RemSet(G1CollectedHeap* g1) : G1RemSet(g1) {}
void oops_into_collection_set_do(OopsInHeapRegionClosure* blk,
int worker_i);
void prepare_for_oops_into_collection_set_do() {}
void cleanup_after_oops_into_collection_set_do() {}
// Nothing is necessary in the version below.
void write_ref(HeapRegion* from, oop* p) {}
void par_write_ref(HeapRegion* from, oop* p, int tid) {}
void scrub(BitMap* region_bm, BitMap* card_bm) {}
void scrub_par(BitMap* region_bm, BitMap* card_bm,
int worker_num, int claim_val) {}
};
// A G1RemSet in which each heap region has a rem set that records the
// external heap references into it. Uses a mod ref bs to track updates,
// so that they can be used to update the individual region remsets.
class HRInto_G1RemSet: public G1RemSet {
protected:
enum SomePrivateConstants {
UpdateRStoMergeSync = 0,
MergeRStoDoDirtySync = 1,
DoDirtySync = 2,
LastSync = 3,
SeqTask = 0,
NumSeqTasks = 1
};
CardTableModRefBS* _ct_bs;
SubTasksDone* _seq_task;
G1CollectorPolicy* _g1p;
ConcurrentG1Refine* _cg1r;
size_t* _cards_scanned;
size_t _total_cards_scanned;
// _par_traversal_in_progress is "true" iff a parallel traversal is in
// progress. If so, then cards added to remembered sets should also have
// their references into the collection summarized in "_new_refs".
bool _par_traversal_in_progress;
void set_par_traversal(bool b);
GrowableArray<oop*>** _new_refs;
public:
// This is called to reset dual hash tables after the gc pause
// is finished and the initial hash table is no longer being
// scanned.
void cleanupHRRS();
HRInto_G1RemSet(G1CollectedHeap* g1, CardTableModRefBS* ct_bs);
~HRInto_G1RemSet();
void oops_into_collection_set_do(OopsInHeapRegionClosure* blk,
int worker_i);
void prepare_for_oops_into_collection_set_do();
void cleanup_after_oops_into_collection_set_do();
void scanRS(OopsInHeapRegionClosure* oc, int worker_i);
void scanNewRefsRS(OopsInHeapRegionClosure* oc, int worker_i);
void updateRS(int worker_i);
HeapRegion* calculateStartRegion(int i);
HRInto_G1RemSet* as_HRInto_G1RemSet() { return this; }
CardTableModRefBS* ct_bs() { return _ct_bs; }
size_t cardsScanned() { return _total_cards_scanned; }
// Record, if necessary, the fact that *p (where "p" is in region "from",
// which is required to be non-NULL) has changed to a new non-NULL value.
inline void write_ref(HeapRegion* from, oop* p);
// The "_nv" version is the same; it exists just so that it is not virtual.
inline void write_ref_nv(HeapRegion* from, oop* p);
inline bool self_forwarded(oop obj);
inline void par_write_ref(HeapRegion* from, oop* p, int tid);
void scrub(BitMap* region_bm, BitMap* card_bm);
void scrub_par(BitMap* region_bm, BitMap* card_bm,
int worker_num, int claim_val);
virtual void concurrentRefinementPass(ConcurrentG1Refine* t);
virtual void concurrentRefineOneCard(jbyte* card_ptr, int worker_i);
virtual void print_summary_info();
virtual void prepare_for_verify();
};
#define G1_REM_SET_LOGGING 0
class CountNonCleanMemRegionClosure: public MemRegionClosure {
G1CollectedHeap* _g1;
int _n;
HeapWord* _start_first;
public:
CountNonCleanMemRegionClosure(G1CollectedHeap* g1) :
_g1(g1), _n(0), _start_first(NULL)
{}
void do_MemRegion(MemRegion mr);
int n() { return _n; };
HeapWord* start_first() { return _start_first; }
};

Some files were not shown because too many files have changed in this diff Show More