diff --git a/.hgtags-top-repo b/.hgtags-top-repo index 97e4ff1601b..65dc4a6d39c 100644 --- a/.hgtags-top-repo +++ b/.hgtags-top-repo @@ -12,3 +12,4 @@ bb1ef4ee3d2c8cbf43a37d372325a7952be590b9 jdk7-b33 143c1abedb7d3095eff0f9ee5fec9bf48e3490fc jdk7-b35 4b4f5fea8d7d0743f0c30d91fcd9bf9d96e5d2ad jdk7-b36 744554f5a3290e11c71cd2ddb1aff49e431f9ed0 jdk7-b37 +cc47a76899ed33a2c513cb688348244c9b5a1288 jdk7-b38 diff --git a/corba/.hgtags b/corba/.hgtags index 296a27ee08e..baa91282f79 100644 --- a/corba/.hgtags +++ b/corba/.hgtags @@ -12,3 +12,4 @@ ef6af34d75a7b44e77083f1d4ee47631fa09d3b4 jdk7-b31 3867c4d14a5bfdbb37c97b4874ccb0ee5343111c jdk7-b35 0723891eb8d1c27e67c54163af0b4cea05a4e036 jdk7-b36 59d5848bdedebe91cc2753acce78911bcb4a66db jdk7-b37 +08be802754b0296c91a7713b6d85a015dbcd5349 jdk7-b38 diff --git a/hotspot/.hgtags b/hotspot/.hgtags index 7cf7d998708..15a4678d894 100644 --- a/hotspot/.hgtags +++ b/hotspot/.hgtags @@ -12,3 +12,4 @@ b727c32788a906c04839516ae7443a085185a300 jdk7-b32 5fa96a5a7e76da7c8dad12486293a0456c2c116c jdk7-b35 e91159f921a58af3698e6479ea1fc5818da66d09 jdk7-b36 9ee9cf798b59e7d51f8c0a686959f313867a55d6 jdk7-b37 +d9bc824aa078573829bb66572af847e26e1bd12e jdk7-b38 diff --git a/hotspot/make/hotspot_distro b/hotspot/make/hotspot_distro index ab698d37c4e..51bad9748eb 100644 --- a/hotspot/make/hotspot_distro +++ b/hotspot/make/hotspot_distro @@ -1,4 +1,4 @@ -# +# # Copyright 2006-2008 Sun Microsystems, Inc. All Rights Reserved. # DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. # @@ -19,7 +19,7 @@ # Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara, # CA 95054 USA or visit www.sun.com if you need additional information or # have any questions. -# +# # # This file format must remain compatible with both diff --git a/hotspot/make/hotspot_version b/hotspot/make/hotspot_version index 061bd9ebc32..efc6eb5223d 100644 --- a/hotspot/make/hotspot_version +++ b/hotspot/make/hotspot_version @@ -35,7 +35,7 @@ HOTSPOT_VM_COPYRIGHT=Copyright 2008 HS_MAJOR_VER=14 HS_MINOR_VER=0 -HS_BUILD_NUMBER=05 +HS_BUILD_NUMBER=06 JDK_MAJOR_VER=1 JDK_MINOR_VER=7 diff --git a/hotspot/make/linux/makefiles/top.make b/hotspot/make/linux/makefiles/top.make index 2a7579febd7..46362ca16fa 100644 --- a/hotspot/make/linux/makefiles/top.make +++ b/hotspot/make/linux/makefiles/top.make @@ -64,6 +64,7 @@ Include_DBs/GC = $(VM)/includeDB_gc \ $(VM)/gc_implementation/includeDB_gc_parallelScavenge \ $(VM)/gc_implementation/includeDB_gc_concurrentMarkSweep \ $(VM)/gc_implementation/includeDB_gc_parNew \ + $(VM)/gc_implementation/includeDB_gc_g1 \ $(VM)/gc_implementation/includeDB_gc_serial \ $(VM)/gc_implementation/includeDB_gc_shared diff --git a/hotspot/make/solaris/makefiles/top.make b/hotspot/make/solaris/makefiles/top.make index bd8a42238d1..4b235d40520 100644 --- a/hotspot/make/solaris/makefiles/top.make +++ b/hotspot/make/solaris/makefiles/top.make @@ -54,6 +54,7 @@ Include_DBs/GC = $(VM)/includeDB_gc \ $(VM)/gc_implementation/includeDB_gc_parallelScavenge \ $(VM)/gc_implementation/includeDB_gc_concurrentMarkSweep \ $(VM)/gc_implementation/includeDB_gc_parNew \ + $(VM)/gc_implementation/includeDB_gc_g1 \ $(VM)/gc_implementation/includeDB_gc_serial \ $(VM)/gc_implementation/includeDB_gc_shared diff --git a/hotspot/make/windows/makefiles/generated.make b/hotspot/make/windows/makefiles/generated.make index 3d2a0001d00..3a99300f02d 100644 --- a/hotspot/make/windows/makefiles/generated.make +++ b/hotspot/make/windows/makefiles/generated.make @@ -50,7 +50,8 @@ IncludeDBs_gc= $(WorkSpace)/src/share/vm/includeDB_gc_parallel \ $(WorkSpace)/src/share/vm/gc_implementation/includeDB_gc_parallelScavenge \ $(WorkSpace)/src/share/vm/gc_implementation/includeDB_gc_shared \ $(WorkSpace)/src/share/vm/gc_implementation/includeDB_gc_parNew \ - $(WorkSpace)/src/share/vm/gc_implementation/includeDB_gc_concurrentMarkSweep + $(WorkSpace)/src/share/vm/gc_implementation/includeDB_gc_concurrentMarkSweep \ + $(WorkSpace)/src/share/vm/gc_implementation/includeDB_gc_g1 IncludeDBs_core=$(IncludeDBs_base) $(IncludeDBs_gc) \ $(WorkSpace)/src/share/vm/includeDB_features diff --git a/hotspot/make/windows/makefiles/makedeps.make b/hotspot/make/windows/makefiles/makedeps.make index a64407bfa5f..8bfe00737ab 100644 --- a/hotspot/make/windows/makefiles/makedeps.make +++ b/hotspot/make/windows/makefiles/makedeps.make @@ -64,6 +64,7 @@ MakeDepsIncludesPRIVATE=\ -relativeInclude src\share\vm\gc_implementation\shared \ -relativeInclude src\share\vm\gc_implementation\parNew \ -relativeInclude src\share\vm\gc_implementation\concurrentMarkSweep \ + -relativeInclude src\share\vm\gc_implementation\g1 \ -relativeInclude src\share\vm\gc_interface \ -relativeInclude src\share\vm\asm \ -relativeInclude src\share\vm\memory \ @@ -115,6 +116,7 @@ MakeDepsIDEOptions=\ -additionalFile includeDB_gc_parallel \ -additionalFile includeDB_gc_parallelScavenge \ -additionalFile includeDB_gc_concurrentMarkSweep \ + -additionalFile includeDB_gc_g1 \ -additionalFile includeDB_gc_parNew \ -additionalFile includeDB_gc_shared \ -additionalFile includeDB_gc_serial \ diff --git a/hotspot/make/windows/makefiles/vm.make b/hotspot/make/windows/makefiles/vm.make index e7321de1332..151c280a4fc 100644 --- a/hotspot/make/windows/makefiles/vm.make +++ b/hotspot/make/windows/makefiles/vm.make @@ -117,6 +117,7 @@ CPP_INCLUDE_DIRS=\ /I "$(WorkSpace)\src\share\vm\gc_implementation\shared"\ /I "$(WorkSpace)\src\share\vm\gc_implementation\parNew"\ /I "$(WorkSpace)\src\share\vm\gc_implementation\concurrentMarkSweep"\ + /I "$(WorkSpace)\src\share\vm\gc_implementation\g1"\ /I "$(WorkSpace)\src\share\vm\gc_interface"\ /I "$(WorkSpace)\src\share\vm\asm" \ /I "$(WorkSpace)\src\share\vm\memory" \ @@ -146,6 +147,7 @@ VM_PATH=$(VM_PATH);$(WorkSpace)/src/share/vm/gc_implementation/parallelScavenge VM_PATH=$(VM_PATH);$(WorkSpace)/src/share/vm/gc_implementation/shared VM_PATH=$(VM_PATH);$(WorkSpace)/src/share/vm/gc_implementation/parNew VM_PATH=$(VM_PATH);$(WorkSpace)/src/share/vm/gc_implementation/concurrentMarkSweep +VM_PATH=$(VM_PATH);$(WorkSpace)/src/share/vm/gc_implementation/g1 VM_PATH=$(VM_PATH);$(WorkSpace)/src/share/vm/gc_interface VM_PATH=$(VM_PATH);$(WorkSpace)/src/share/vm/asm VM_PATH=$(VM_PATH);$(WorkSpace)/src/share/vm/memory @@ -222,6 +224,9 @@ bytecodeInterpreterWithChecks.obj: ..\generated\jvmtifiles\bytecodeInterpreterWi {$(WorkSpace)\src\share\vm\gc_implementation\concurrentMarkSweep}.cpp.obj:: $(CPP) $(CPP_FLAGS) $(CPP_USE_PCH) /c $< +{$(WorkSpace)\src\share\vm\gc_implementation\g1}.cpp.obj:: + $(CPP) $(CPP_FLAGS) $(CPP_USE_PCH) /c $< + {$(WorkSpace)\src\share\vm\gc_interface}.cpp.obj:: $(CPP) $(CPP_FLAGS) $(CPP_USE_PCH) /c $< diff --git a/hotspot/src/cpu/sparc/vm/assembler_sparc.cpp b/hotspot/src/cpu/sparc/vm/assembler_sparc.cpp index c7dce335619..998f39508e7 100644 --- a/hotspot/src/cpu/sparc/vm/assembler_sparc.cpp +++ b/hotspot/src/cpu/sparc/vm/assembler_sparc.cpp @@ -130,6 +130,20 @@ int AbstractAssembler::code_fill_byte() { return 0x00; // illegal instruction 0x00000000 } +Assembler::Condition Assembler::reg_cond_to_cc_cond(Assembler::RCondition in) { + switch (in) { + case rc_z: return equal; + case rc_lez: return lessEqual; + case rc_lz: return less; + case rc_nz: return notEqual; + case rc_gz: return greater; + case rc_gez: return greaterEqual; + default: + ShouldNotReachHere(); + } + return equal; +} + // Generate a bunch 'o stuff (including v9's #ifndef PRODUCT void Assembler::test_v9() { @@ -1213,31 +1227,19 @@ void MacroAssembler::set_vm_result(Register oop_result) { } -void MacroAssembler::store_check(Register tmp, Register obj) { - // Use two shifts to clear out those low order two bits! (Cannot opt. into 1.) - - /* $$$ This stuff needs to go into one of the BarrierSet generator - functions. (The particular barrier sets will have to be friends of - MacroAssembler, I guess.) */ - BarrierSet* bs = Universe::heap()->barrier_set(); - assert(bs->kind() == BarrierSet::CardTableModRef, "Wrong barrier set kind"); - CardTableModRefBS* ct = (CardTableModRefBS*)bs; - assert(sizeof(*ct->byte_map_base) == sizeof(jbyte), "adjust this code"); +void MacroAssembler::card_table_write(jbyte* byte_map_base, + Register tmp, Register obj) { #ifdef _LP64 srlx(obj, CardTableModRefBS::card_shift, obj); #else srl(obj, CardTableModRefBS::card_shift, obj); #endif assert( tmp != obj, "need separate temp reg"); - Address rs(tmp, (address)ct->byte_map_base); + Address rs(tmp, (address)byte_map_base); load_address(rs); stb(G0, rs.base(), obj); } -void MacroAssembler::store_check(Register tmp, Register obj, Register offset) { - store_check(tmp, obj); -} - // %%% Note: The following six instructions have been moved, // unchanged, from assembler_sparc.inline.hpp. // They will be refactored at a later date. @@ -1663,11 +1665,21 @@ void MacroAssembler::_verify_oop(Register reg, const char* msg, const char * fil if (reg == G0) return; // always NULL, which is always an oop - char buffer[16]; + char buffer[64]; +#ifdef COMPILER1 + if (CommentedAssembly) { + snprintf(buffer, sizeof(buffer), "verify_oop at %d", offset()); + block_comment(buffer); + } +#endif + + int len = strlen(file) + strlen(msg) + 1 + 4; sprintf(buffer, "%d", line); - int len = strlen(file) + strlen(msg) + 1 + 4 + strlen(buffer); + len += strlen(buffer); + sprintf(buffer, " at offset %d ", offset()); + len += strlen(buffer); char * real_msg = new char[len]; - sprintf(real_msg, "%s (%s:%d)", msg, file, line); + sprintf(real_msg, "%s%s(%s:%d)", msg, buffer, file, line); // Call indirectly to solve generation ordering problem Address a(O7, (address)StubRoutines::verify_oop_subroutine_entry_address()); @@ -2059,6 +2071,27 @@ void MacroAssembler::br_notnull( Register s1, bool a, Predict p, Label& L ) { #endif } +void MacroAssembler::br_on_reg_cond( RCondition rc, bool a, Predict p, + Register s1, address d, + relocInfo::relocType rt ) { + if (VM_Version::v9_instructions_work()) { + bpr(rc, a, p, s1, d, rt); + } else { + tst(s1); + br(reg_cond_to_cc_cond(rc), a, p, d, rt); + } +} + +void MacroAssembler::br_on_reg_cond( RCondition rc, bool a, Predict p, + Register s1, Label& L ) { + if (VM_Version::v9_instructions_work()) { + bpr(rc, a, p, s1, L); + } else { + tst(s1); + br(reg_cond_to_cc_cond(rc), a, p, L); + } +} + // instruction sequences factored across compiler & interpreter @@ -3241,68 +3274,74 @@ void MacroAssembler::eden_allocate( assert(0 <= con_size_in_bytes && Assembler::is_simm13(con_size_in_bytes), "illegal object size"); assert((con_size_in_bytes & MinObjAlignmentInBytesMask) == 0, "object size is not multiple of alignment"); - // get eden boundaries - // note: we need both top & top_addr! - const Register top_addr = t1; - const Register end = t2; - - CollectedHeap* ch = Universe::heap(); - set((intx)ch->top_addr(), top_addr); - intx delta = (intx)ch->end_addr() - (intx)ch->top_addr(); - ld_ptr(top_addr, delta, end); - ld_ptr(top_addr, 0, obj); - - // try to allocate - Label retry; - bind(retry); -#ifdef ASSERT - // make sure eden top is properly aligned - { - Label L; - btst(MinObjAlignmentInBytesMask, obj); - br(Assembler::zero, false, Assembler::pt, L); + if (CMSIncrementalMode || !Universe::heap()->supports_inline_contig_alloc()) { + // No allocation in the shared eden. + br(Assembler::always, false, Assembler::pt, slow_case); delayed()->nop(); - stop("eden top is not properly aligned"); - bind(L); - } -#endif // ASSERT - const Register free = end; - sub(end, obj, free); // compute amount of free space - if (var_size_in_bytes->is_valid()) { - // size is unknown at compile time - cmp(free, var_size_in_bytes); - br(Assembler::lessUnsigned, false, Assembler::pn, slow_case); // if there is not enough space go the slow case - delayed()->add(obj, var_size_in_bytes, end); } else { - // size is known at compile time - cmp(free, con_size_in_bytes); - br(Assembler::lessUnsigned, false, Assembler::pn, slow_case); // if there is not enough space go the slow case - delayed()->add(obj, con_size_in_bytes, end); - } - // Compare obj with the value at top_addr; if still equal, swap the value of - // end with the value at top_addr. If not equal, read the value at top_addr - // into end. - casx_under_lock(top_addr, obj, end, (address)StubRoutines::Sparc::atomic_memory_operation_lock_addr()); - // if someone beat us on the allocation, try again, otherwise continue - cmp(obj, end); - brx(Assembler::notEqual, false, Assembler::pn, retry); - delayed()->mov(end, obj); // nop if successfull since obj == end + // get eden boundaries + // note: we need both top & top_addr! + const Register top_addr = t1; + const Register end = t2; + + CollectedHeap* ch = Universe::heap(); + set((intx)ch->top_addr(), top_addr); + intx delta = (intx)ch->end_addr() - (intx)ch->top_addr(); + ld_ptr(top_addr, delta, end); + ld_ptr(top_addr, 0, obj); + + // try to allocate + Label retry; + bind(retry); +#ifdef ASSERT + // make sure eden top is properly aligned + { + Label L; + btst(MinObjAlignmentInBytesMask, obj); + br(Assembler::zero, false, Assembler::pt, L); + delayed()->nop(); + stop("eden top is not properly aligned"); + bind(L); + } +#endif // ASSERT + const Register free = end; + sub(end, obj, free); // compute amount of free space + if (var_size_in_bytes->is_valid()) { + // size is unknown at compile time + cmp(free, var_size_in_bytes); + br(Assembler::lessUnsigned, false, Assembler::pn, slow_case); // if there is not enough space go the slow case + delayed()->add(obj, var_size_in_bytes, end); + } else { + // size is known at compile time + cmp(free, con_size_in_bytes); + br(Assembler::lessUnsigned, false, Assembler::pn, slow_case); // if there is not enough space go the slow case + delayed()->add(obj, con_size_in_bytes, end); + } + // Compare obj with the value at top_addr; if still equal, swap the value of + // end with the value at top_addr. If not equal, read the value at top_addr + // into end. + casx_under_lock(top_addr, obj, end, (address)StubRoutines::Sparc::atomic_memory_operation_lock_addr()); + // if someone beat us on the allocation, try again, otherwise continue + cmp(obj, end); + brx(Assembler::notEqual, false, Assembler::pn, retry); + delayed()->mov(end, obj); // nop if successfull since obj == end #ifdef ASSERT - // make sure eden top is properly aligned - { - Label L; - const Register top_addr = t1; + // make sure eden top is properly aligned + { + Label L; + const Register top_addr = t1; - set((intx)ch->top_addr(), top_addr); - ld_ptr(top_addr, 0, top_addr); - btst(MinObjAlignmentInBytesMask, top_addr); - br(Assembler::zero, false, Assembler::pt, L); - delayed()->nop(); - stop("eden top is not properly aligned"); - bind(L); - } + set((intx)ch->top_addr(), top_addr); + ld_ptr(top_addr, 0, top_addr); + btst(MinObjAlignmentInBytesMask, top_addr); + br(Assembler::zero, false, Assembler::pt, L); + delayed()->nop(); + stop("eden top is not properly aligned"); + bind(L); + } #endif // ASSERT + } } @@ -3554,6 +3593,468 @@ void MacroAssembler::bang_stack_size(Register Rsize, Register Rtsp, } } +/////////////////////////////////////////////////////////////////////////////////// +#ifndef SERIALGC + +static uint num_stores = 0; +static uint num_null_pre_stores = 0; + +static void count_null_pre_vals(void* pre_val) { + num_stores++; + if (pre_val == NULL) num_null_pre_stores++; + if ((num_stores % 1000000) == 0) { + tty->print_cr(UINT32_FORMAT " stores, " UINT32_FORMAT " (%5.2f%%) with null pre-vals.", + num_stores, num_null_pre_stores, + 100.0*(float)num_null_pre_stores/(float)num_stores); + } +} + +static address satb_log_enqueue_with_frame = 0; +static u_char* satb_log_enqueue_with_frame_end = 0; + +static address satb_log_enqueue_frameless = 0; +static u_char* satb_log_enqueue_frameless_end = 0; + +static int EnqueueCodeSize = 128 DEBUG_ONLY( + 256); // Instructions? + +// The calls to this don't work. We'd need to do a fair amount of work to +// make it work. +static void check_index(int ind) { + assert(0 <= ind && ind <= 64*K && ((ind % oopSize) == 0), + "Invariants.") +} + +static void generate_satb_log_enqueue(bool with_frame) { + BufferBlob* bb = BufferBlob::create("enqueue_with_frame", EnqueueCodeSize); + CodeBuffer buf(bb->instructions_begin(), bb->instructions_size()); + MacroAssembler masm(&buf); + address start = masm.pc(); + Register pre_val; + + Label refill, restart; + if (with_frame) { + masm.save_frame(0); + pre_val = I0; // Was O0 before the save. + } else { + pre_val = O0; + } + int satb_q_index_byte_offset = + in_bytes(JavaThread::satb_mark_queue_offset() + + PtrQueue::byte_offset_of_index()); + int satb_q_buf_byte_offset = + in_bytes(JavaThread::satb_mark_queue_offset() + + PtrQueue::byte_offset_of_buf()); + assert(in_bytes(PtrQueue::byte_width_of_index()) == sizeof(intptr_t) && + in_bytes(PtrQueue::byte_width_of_buf()) == sizeof(intptr_t), + "check sizes in assembly below"); + + masm.bind(restart); + masm.ld_ptr(G2_thread, satb_q_index_byte_offset, L0); + + masm.br_on_reg_cond(Assembler::rc_z, /*annul*/false, Assembler::pn, L0, refill); + // If the branch is taken, no harm in executing this in the delay slot. + masm.delayed()->ld_ptr(G2_thread, satb_q_buf_byte_offset, L1); + masm.sub(L0, oopSize, L0); + + masm.st_ptr(pre_val, L1, L0); // [_buf + index] := I0 + if (!with_frame) { + // Use return-from-leaf + masm.retl(); + masm.delayed()->st_ptr(L0, G2_thread, satb_q_index_byte_offset); + } else { + // Not delayed. + masm.st_ptr(L0, G2_thread, satb_q_index_byte_offset); + } + if (with_frame) { + masm.ret(); + masm.delayed()->restore(); + } + masm.bind(refill); + + address handle_zero = + CAST_FROM_FN_PTR(address, + &SATBMarkQueueSet::handle_zero_index_for_thread); + // This should be rare enough that we can afford to save all the + // scratch registers that the calling context might be using. + masm.mov(G1_scratch, L0); + masm.mov(G3_scratch, L1); + masm.mov(G4, L2); + // We need the value of O0 above (for the write into the buffer), so we + // save and restore it. + masm.mov(O0, L3); + // Since the call will overwrite O7, we save and restore that, as well. + masm.mov(O7, L4); + masm.call_VM_leaf(L5, handle_zero, G2_thread); + masm.mov(L0, G1_scratch); + masm.mov(L1, G3_scratch); + masm.mov(L2, G4); + masm.mov(L3, O0); + masm.br(Assembler::always, /*annul*/false, Assembler::pt, restart); + masm.delayed()->mov(L4, O7); + + if (with_frame) { + satb_log_enqueue_with_frame = start; + satb_log_enqueue_with_frame_end = masm.pc(); + } else { + satb_log_enqueue_frameless = start; + satb_log_enqueue_frameless_end = masm.pc(); + } +} + +static inline void generate_satb_log_enqueue_if_necessary(bool with_frame) { + if (with_frame) { + if (satb_log_enqueue_with_frame == 0) { + generate_satb_log_enqueue(with_frame); + assert(satb_log_enqueue_with_frame != 0, "postcondition."); + if (G1SATBPrintStubs) { + tty->print_cr("Generated with-frame satb enqueue:"); + Disassembler::decode((u_char*)satb_log_enqueue_with_frame, + satb_log_enqueue_with_frame_end, + tty); + } + } + } else { + if (satb_log_enqueue_frameless == 0) { + generate_satb_log_enqueue(with_frame); + assert(satb_log_enqueue_frameless != 0, "postcondition."); + if (G1SATBPrintStubs) { + tty->print_cr("Generated frameless satb enqueue:"); + Disassembler::decode((u_char*)satb_log_enqueue_frameless, + satb_log_enqueue_frameless_end, + tty); + } + } + } +} + +void MacroAssembler::g1_write_barrier_pre(Register obj, Register index, int offset, Register tmp, bool preserve_o_regs) { + assert(offset == 0 || index == noreg, "choose one"); + + if (G1DisablePreBarrier) return; + // satb_log_barrier(tmp, obj, offset, preserve_o_regs); + Label filtered; + // satb_log_barrier_work0(tmp, filtered); + if (in_bytes(PtrQueue::byte_width_of_active()) == 4) { + ld(G2, + in_bytes(JavaThread::satb_mark_queue_offset() + + PtrQueue::byte_offset_of_active()), + tmp); + } else { + guarantee(in_bytes(PtrQueue::byte_width_of_active()) == 1, + "Assumption"); + ldsb(G2, + in_bytes(JavaThread::satb_mark_queue_offset() + + PtrQueue::byte_offset_of_active()), + tmp); + } + // Check on whether to annul. + br_on_reg_cond(rc_z, /*annul*/false, Assembler::pt, tmp, filtered); + delayed() -> nop(); + + // satb_log_barrier_work1(tmp, offset); + if (index == noreg) { + if (Assembler::is_simm13(offset)) { + ld_ptr(obj, offset, tmp); + } else { + set(offset, tmp); + ld_ptr(obj, tmp, tmp); + } + } else { + ld_ptr(obj, index, tmp); + } + + // satb_log_barrier_work2(obj, tmp, offset); + + // satb_log_barrier_work3(tmp, filtered, preserve_o_regs); + + const Register pre_val = tmp; + + if (G1SATBBarrierPrintNullPreVals) { + save_frame(0); + mov(pre_val, O0); + // Save G-regs that target may use. + mov(G1, L1); + mov(G2, L2); + mov(G3, L3); + mov(G4, L4); + mov(G5, L5); + call(CAST_FROM_FN_PTR(address, &count_null_pre_vals)); + delayed()->nop(); + // Restore G-regs that target may have used. + mov(L1, G1); + mov(L2, G2); + mov(L3, G3); + mov(L4, G4); + mov(L5, G5); + restore(G0, G0, G0); + } + + // Check on whether to annul. + br_on_reg_cond(rc_z, /*annul*/false, Assembler::pt, pre_val, filtered); + delayed() -> nop(); + + // OK, it's not filtered, so we'll need to call enqueue. In the normal + // case, pre_val will be a scratch G-reg, but there's some cases in which + // it's an O-reg. In the first case, do a normal call. In the latter, + // do a save here and call the frameless version. + + guarantee(pre_val->is_global() || pre_val->is_out(), + "Or we need to think harder."); + if (pre_val->is_global() && !preserve_o_regs) { + generate_satb_log_enqueue_if_necessary(true); // with frame. + call(satb_log_enqueue_with_frame); + delayed()->mov(pre_val, O0); + } else { + generate_satb_log_enqueue_if_necessary(false); // with frameless. + save_frame(0); + call(satb_log_enqueue_frameless); + delayed()->mov(pre_val->after_save(), O0); + restore(); + } + + bind(filtered); +} + +static jint num_ct_writes = 0; +static jint num_ct_writes_filtered_in_hr = 0; +static jint num_ct_writes_filtered_null = 0; +static jint num_ct_writes_filtered_pop = 0; +static G1CollectedHeap* g1 = NULL; + +static Thread* count_ct_writes(void* filter_val, void* new_val) { + Atomic::inc(&num_ct_writes); + if (filter_val == NULL) { + Atomic::inc(&num_ct_writes_filtered_in_hr); + } else if (new_val == NULL) { + Atomic::inc(&num_ct_writes_filtered_null); + } else { + if (g1 == NULL) { + g1 = G1CollectedHeap::heap(); + } + if ((HeapWord*)new_val < g1->popular_object_boundary()) { + Atomic::inc(&num_ct_writes_filtered_pop); + } + } + if ((num_ct_writes % 1000000) == 0) { + jint num_ct_writes_filtered = + num_ct_writes_filtered_in_hr + + num_ct_writes_filtered_null + + num_ct_writes_filtered_pop; + + tty->print_cr("%d potential CT writes: %5.2f%% filtered\n" + " (%5.2f%% intra-HR, %5.2f%% null, %5.2f%% popular).", + num_ct_writes, + 100.0*(float)num_ct_writes_filtered/(float)num_ct_writes, + 100.0*(float)num_ct_writes_filtered_in_hr/ + (float)num_ct_writes, + 100.0*(float)num_ct_writes_filtered_null/ + (float)num_ct_writes, + 100.0*(float)num_ct_writes_filtered_pop/ + (float)num_ct_writes); + } + return Thread::current(); +} + +static address dirty_card_log_enqueue = 0; +static u_char* dirty_card_log_enqueue_end = 0; + +// This gets to assume that o0 contains the object address. +static void generate_dirty_card_log_enqueue(jbyte* byte_map_base) { + BufferBlob* bb = BufferBlob::create("dirty_card_enqueue", EnqueueCodeSize*2); + CodeBuffer buf(bb->instructions_begin(), bb->instructions_size()); + MacroAssembler masm(&buf); + address start = masm.pc(); + + Label not_already_dirty, restart, refill; + +#ifdef _LP64 + masm.srlx(O0, CardTableModRefBS::card_shift, O0); +#else + masm.srl(O0, CardTableModRefBS::card_shift, O0); +#endif + Address rs(O1, (address)byte_map_base); + masm.load_address(rs); // O1 := + masm.ldub(O0, O1, O2); // O2 := [O0 + O1] + + masm.br_on_reg_cond(Assembler::rc_nz, /*annul*/false, Assembler::pt, + O2, not_already_dirty); + // Get O1 + O2 into a reg by itself -- useful in the take-the-branch + // case, harmless if not. + masm.delayed()->add(O0, O1, O3); + + // We didn't take the branch, so we're already dirty: return. + // Use return-from-leaf + masm.retl(); + masm.delayed()->nop(); + + // Not dirty. + masm.bind(not_already_dirty); + // First, dirty it. + masm.stb(G0, O3, G0); // [cardPtr] := 0 (i.e., dirty). + int dirty_card_q_index_byte_offset = + in_bytes(JavaThread::dirty_card_queue_offset() + + PtrQueue::byte_offset_of_index()); + int dirty_card_q_buf_byte_offset = + in_bytes(JavaThread::dirty_card_queue_offset() + + PtrQueue::byte_offset_of_buf()); + masm.bind(restart); + masm.ld_ptr(G2_thread, dirty_card_q_index_byte_offset, L0); + + masm.br_on_reg_cond(Assembler::rc_z, /*annul*/false, Assembler::pn, + L0, refill); + // If the branch is taken, no harm in executing this in the delay slot. + masm.delayed()->ld_ptr(G2_thread, dirty_card_q_buf_byte_offset, L1); + masm.sub(L0, oopSize, L0); + + masm.st_ptr(O3, L1, L0); // [_buf + index] := I0 + // Use return-from-leaf + masm.retl(); + masm.delayed()->st_ptr(L0, G2_thread, dirty_card_q_index_byte_offset); + + masm.bind(refill); + address handle_zero = + CAST_FROM_FN_PTR(address, + &DirtyCardQueueSet::handle_zero_index_for_thread); + // This should be rare enough that we can afford to save all the + // scratch registers that the calling context might be using. + masm.mov(G1_scratch, L3); + masm.mov(G3_scratch, L5); + // We need the value of O3 above (for the write into the buffer), so we + // save and restore it. + masm.mov(O3, L6); + // Since the call will overwrite O7, we save and restore that, as well. + masm.mov(O7, L4); + + masm.call_VM_leaf(L7_thread_cache, handle_zero, G2_thread); + masm.mov(L3, G1_scratch); + masm.mov(L5, G3_scratch); + masm.mov(L6, O3); + masm.br(Assembler::always, /*annul*/false, Assembler::pt, restart); + masm.delayed()->mov(L4, O7); + + dirty_card_log_enqueue = start; + dirty_card_log_enqueue_end = masm.pc(); + // XXX Should have a guarantee here about not going off the end! + // Does it already do so? Do an experiment... +} + +static inline void +generate_dirty_card_log_enqueue_if_necessary(jbyte* byte_map_base) { + if (dirty_card_log_enqueue == 0) { + generate_dirty_card_log_enqueue(byte_map_base); + assert(dirty_card_log_enqueue != 0, "postcondition."); + if (G1SATBPrintStubs) { + tty->print_cr("Generated dirty_card enqueue:"); + Disassembler::decode((u_char*)dirty_card_log_enqueue, + dirty_card_log_enqueue_end, + tty); + } + } +} + + +void MacroAssembler::g1_write_barrier_post(Register store_addr, Register new_val, Register tmp) { + + Label filtered; + MacroAssembler* post_filter_masm = this; + + if (new_val == G0) return; + if (G1DisablePostBarrier) return; + + G1SATBCardTableModRefBS* bs = (G1SATBCardTableModRefBS*) Universe::heap()->barrier_set(); + assert(bs->kind() == BarrierSet::G1SATBCT || + bs->kind() == BarrierSet::G1SATBCTLogging, "wrong barrier"); + if (G1RSBarrierRegionFilter) { + xor3(store_addr, new_val, tmp); +#ifdef _LP64 + srlx(tmp, HeapRegion::LogOfHRGrainBytes, tmp); +#else + srl(tmp, HeapRegion::LogOfHRGrainBytes, tmp); +#endif + if (G1PrintCTFilterStats) { + guarantee(tmp->is_global(), "Or stats won't work..."); + // This is a sleazy hack: I'm temporarily hijacking G2, which I + // promise to restore. + mov(new_val, G2); + save_frame(0); + mov(tmp, O0); + mov(G2, O1); + // Save G-regs that target may use. + mov(G1, L1); + mov(G2, L2); + mov(G3, L3); + mov(G4, L4); + mov(G5, L5); + call(CAST_FROM_FN_PTR(address, &count_ct_writes)); + delayed()->nop(); + mov(O0, G2); + // Restore G-regs that target may have used. + mov(L1, G1); + mov(L3, G3); + mov(L4, G4); + mov(L5, G5); + restore(G0, G0, G0); + } + // XXX Should I predict this taken or not? Does it mattern? + br_on_reg_cond(rc_z, /*annul*/false, Assembler::pt, tmp, filtered); + delayed()->nop(); + } + + // Now we decide how to generate the card table write. If we're + // enqueueing, we call out to a generated function. Otherwise, we do it + // inline here. + + if (G1RSBarrierUseQueue) { + // If the "store_addr" register is an "in" or "local" register, move it to + // a scratch reg so we can pass it as an argument. + bool use_scr = !(store_addr->is_global() || store_addr->is_out()); + // Pick a scratch register different from "tmp". + Register scr = (tmp == G1_scratch ? G3_scratch : G1_scratch); + // Make sure we use up the delay slot! + if (use_scr) { + post_filter_masm->mov(store_addr, scr); + } else { + post_filter_masm->nop(); + } + generate_dirty_card_log_enqueue_if_necessary(bs->byte_map_base); + save_frame(0); + call(dirty_card_log_enqueue); + if (use_scr) { + delayed()->mov(scr, O0); + } else { + delayed()->mov(store_addr->after_save(), O0); + } + restore(); + + } else { + +#ifdef _LP64 + post_filter_masm->srlx(store_addr, CardTableModRefBS::card_shift, store_addr); +#else + post_filter_masm->srl(store_addr, CardTableModRefBS::card_shift, store_addr); +#endif + assert( tmp != store_addr, "need separate temp reg"); + Address rs(tmp, (address)bs->byte_map_base); + load_address(rs); + stb(G0, rs.base(), store_addr); + } + + bind(filtered); + +} + +#endif // SERIALGC +/////////////////////////////////////////////////////////////////////////////////// + +void MacroAssembler::card_write_barrier_post(Register store_addr, Register new_val, Register tmp) { + // If we're writing constant NULL, we can skip the write barrier. + if (new_val == G0) return; + CardTableModRefBS* bs = (CardTableModRefBS*) Universe::heap()->barrier_set(); + assert(bs->kind() == BarrierSet::CardTableModRef || + bs->kind() == BarrierSet::CardTableExtension, "wrong barrier"); + card_table_write(bs->byte_map_base, tmp, store_addr); +} + void MacroAssembler::load_klass(Register src_oop, Register klass) { // The number of bytes in this code is used by // MachCallDynamicJavaNode::ret_addr_offset() diff --git a/hotspot/src/cpu/sparc/vm/assembler_sparc.hpp b/hotspot/src/cpu/sparc/vm/assembler_sparc.hpp index e1f7e0636b8..3cea450cc2c 100644 --- a/hotspot/src/cpu/sparc/vm/assembler_sparc.hpp +++ b/hotspot/src/cpu/sparc/vm/assembler_sparc.hpp @@ -1439,7 +1439,11 @@ public: // pp 214 void save( Register s1, Register s2, Register d ) { emit_long( op(arith_op) | rd(d) | op3(save_op3) | rs1(s1) | rs2(s2) ); } - void save( Register s1, int simm13a, Register d ) { emit_long( op(arith_op) | rd(d) | op3(save_op3) | rs1(s1) | immed(true) | simm(simm13a, 13) ); } + void save( Register s1, int simm13a, Register d ) { + // make sure frame is at least large enough for the register save area + assert(-simm13a >= 16 * wordSize, "frame too small"); + emit_long( op(arith_op) | rd(d) | op3(save_op3) | rs1(s1) | immed(true) | simm(simm13a, 13) ); + } void restore( Register s1 = G0, Register s2 = G0, Register d = G0 ) { emit_long( op(arith_op) | rd(d) | op3(restore_op3) | rs1(s1) | rs2(s2) ); } void restore( Register s1, int simm13a, Register d ) { emit_long( op(arith_op) | rd(d) | op3(restore_op3) | rs1(s1) | immed(true) | simm(simm13a, 13) ); } @@ -1594,6 +1598,11 @@ public: inline void wrasi( Register d) { v9_only(); emit_long( op(arith_op) | rs1(d) | op3(wrreg_op3) | u_field(3, 29, 25)); } inline void wrfprs( Register d) { v9_only(); emit_long( op(arith_op) | rs1(d) | op3(wrreg_op3) | u_field(6, 29, 25)); } + // For a given register condition, return the appropriate condition code + // Condition (the one you would use to get the same effect after "tst" on + // the target register.) + Assembler::Condition reg_cond_to_cc_cond(RCondition in); + // Creation Assembler(CodeBuffer* code) : AbstractAssembler(code) { @@ -1630,6 +1639,8 @@ class RegistersForDebugging : public StackObj { // restore global registers in case C code disturbed them static void restore_registers(MacroAssembler* a, Register r); + + }; @@ -1722,6 +1733,12 @@ class MacroAssembler: public Assembler { void br_null ( Register s1, bool a, Predict p, Label& L ); void br_notnull( Register s1, bool a, Predict p, Label& L ); + // These versions will do the most efficient thing on v8 and v9. Perhaps + // this is what the routine above was meant to do, but it didn't (and + // didn't cover both target address kinds.) + void br_on_reg_cond( RCondition c, bool a, Predict p, Register s1, address d, relocInfo::relocType rt = relocInfo::none ); + void br_on_reg_cond( RCondition c, bool a, Predict p, Register s1, Label& L); + inline void bp( Condition c, bool a, CC cc, Predict p, address d, relocInfo::relocType rt = relocInfo::none ); inline void bp( Condition c, bool a, CC cc, Predict p, Label& L ); @@ -2056,9 +2073,23 @@ class MacroAssembler: public Assembler { #endif // ASSERT public: - // Stores - void store_check(Register tmp, Register obj); // store check for obj - register is destroyed afterwards - void store_check(Register tmp, Register obj, Register offset); // store check for obj - register is destroyed afterwards + + // Write to card table for - register is destroyed afterwards. + void card_table_write(jbyte* byte_map_base, Register tmp, Register obj); + + void card_write_barrier_post(Register store_addr, Register new_val, Register tmp); + +#ifndef SERIALGC + // Array store and offset + void g1_write_barrier_pre(Register obj, Register index, int offset, Register tmp, bool preserve_o_regs); + + void g1_write_barrier_post(Register store_addr, Register new_val, Register tmp); + + // May do filtering, depending on the boolean arguments. + void g1_card_table_write(jbyte* byte_map_base, + Register tmp, Register obj, Register new_val, + bool region_filter, bool null_filter); +#endif // SERIALGC // pushes double TOS element of FPU stack on CPU stack; pops from FPU stack void push_fTOS(); diff --git a/hotspot/src/cpu/sparc/vm/c1_CodeStubs_sparc.cpp b/hotspot/src/cpu/sparc/vm/c1_CodeStubs_sparc.cpp index b0caddae967..87a411da1d1 100644 --- a/hotspot/src/cpu/sparc/vm/c1_CodeStubs_sparc.cpp +++ b/hotspot/src/cpu/sparc/vm/c1_CodeStubs_sparc.cpp @@ -404,4 +404,55 @@ void ArrayCopyStub::emit_code(LIR_Assembler* ce) { } +/////////////////////////////////////////////////////////////////////////////////// +#ifndef SERIALGC + +void G1PreBarrierStub::emit_code(LIR_Assembler* ce) { + __ bind(_entry); + + assert(pre_val()->is_register(), "Precondition."); + + Register pre_val_reg = pre_val()->as_register(); + + ce->mem2reg(addr(), pre_val(), T_OBJECT, patch_code(), info(), false); + __ br_on_reg_cond(Assembler::rc_z, /*annul*/false, Assembler::pt, + pre_val_reg, _continuation); + __ delayed()->nop(); + + __ call(Runtime1::entry_for(Runtime1::Runtime1::g1_pre_barrier_slow_id)); + __ delayed()->mov(pre_val_reg, G4); + __ br(Assembler::always, false, Assembler::pt, _continuation); + __ delayed()->nop(); + +} + +jbyte* G1PostBarrierStub::_byte_map_base = NULL; + +jbyte* G1PostBarrierStub::byte_map_base_slow() { + BarrierSet* bs = Universe::heap()->barrier_set(); + assert(bs->is_a(BarrierSet::G1SATBCTLogging), + "Must be if we're using this."); + return ((G1SATBCardTableModRefBS*)bs)->byte_map_base; +} + +void G1PostBarrierStub::emit_code(LIR_Assembler* ce) { + __ bind(_entry); + + assert(addr()->is_register(), "Precondition."); + assert(new_val()->is_register(), "Precondition."); + Register addr_reg = addr()->as_pointer_register(); + Register new_val_reg = new_val()->as_register(); + __ br_on_reg_cond(Assembler::rc_z, /*annul*/false, Assembler::pt, + new_val_reg, _continuation); + __ delayed()->nop(); + + __ call(Runtime1::entry_for(Runtime1::Runtime1::g1_post_barrier_slow_id)); + __ delayed()->mov(addr_reg, G4); + __ br(Assembler::always, false, Assembler::pt, _continuation); + __ delayed()->nop(); +} + +#endif // SERIALGC +/////////////////////////////////////////////////////////////////////////////////// + #undef __ diff --git a/hotspot/src/cpu/sparc/vm/c1_LIRAssembler_sparc.cpp b/hotspot/src/cpu/sparc/vm/c1_LIRAssembler_sparc.cpp index f1dc75f1ef4..dabea15a087 100644 --- a/hotspot/src/cpu/sparc/vm/c1_LIRAssembler_sparc.cpp +++ b/hotspot/src/cpu/sparc/vm/c1_LIRAssembler_sparc.cpp @@ -2093,7 +2093,11 @@ void LIR_Assembler::emit_arraycopy(LIR_OpArrayCopy* op) { // the known type isn't loaded since the code sanity checks // in debug mode and the type isn't required when we know the exact type // also check that the type is an array type. - if (op->expected_type() == NULL) { + // We also, for now, always call the stub if the barrier set requires a + // write_ref_pre barrier (which the stub does, but none of the optimized + // cases currently does). + if (op->expected_type() == NULL || + Universe::heap()->barrier_set()->has_write_ref_pre_barrier()) { __ mov(src, O0); __ mov(src_pos, O1); __ mov(dst, O2); diff --git a/hotspot/src/cpu/sparc/vm/c1_LIRGenerator_sparc.cpp b/hotspot/src/cpu/sparc/vm/c1_LIRGenerator_sparc.cpp index 6d941c36866..239d867a043 100644 --- a/hotspot/src/cpu/sparc/vm/c1_LIRGenerator_sparc.cpp +++ b/hotspot/src/cpu/sparc/vm/c1_LIRGenerator_sparc.cpp @@ -365,6 +365,10 @@ void LIRGenerator::do_StoreIndexed(StoreIndexed* x) { __ store_check(value.result(), array.result(), tmp1, tmp2, tmp3, store_check_info); } + if (obj_store) { + // Needs GC write barriers. + pre_barrier(LIR_OprFact::address(array_addr), false, NULL); + } __ move(value.result(), array_addr, null_check_info); if (obj_store) { // Is this precise? @@ -663,6 +667,10 @@ void LIRGenerator::do_CompareAndSwap(Intrinsic* x, ValueType* type) { __ add(obj.result(), offset.result(), addr); + if (type == objectType) { // Write-barrier needed for Object fields. + pre_barrier(obj.result(), false, NULL); + } + if (type == objectType) __ cas_obj(addr, cmp.result(), val.result(), t1, t2); else if (type == intType) @@ -677,7 +685,11 @@ void LIRGenerator::do_CompareAndSwap(Intrinsic* x, ValueType* type) { LIR_Opr result = rlock_result(x); __ cmove(lir_cond_equal, LIR_OprFact::intConst(1), LIR_OprFact::intConst(0), result); if (type == objectType) { // Write-barrier needed for Object fields. +#ifdef PRECISE_CARDMARK + post_barrier(addr, val.result()); +#else post_barrier(obj.result(), val.result()); +#endif // PRECISE_CARDMARK } } @@ -1154,6 +1166,10 @@ void LIRGenerator::put_Object_unsafe(LIR_Opr src, LIR_Opr offset, LIR_Opr data, addr = new LIR_Address(base_op, index_op, type); } + if (is_obj) { + pre_barrier(LIR_OprFact::address(addr), false, NULL); + // _bs->c1_write_barrier_pre(this, LIR_OprFact::address(addr)); + } __ move(data, addr); if (is_obj) { // This address is precise diff --git a/hotspot/src/cpu/sparc/vm/c1_Runtime1_sparc.cpp b/hotspot/src/cpu/sparc/vm/c1_Runtime1_sparc.cpp index 45c63228c64..489e84dd58c 100644 --- a/hotspot/src/cpu/sparc/vm/c1_Runtime1_sparc.cpp +++ b/hotspot/src/cpu/sparc/vm/c1_Runtime1_sparc.cpp @@ -832,6 +832,163 @@ OopMapSet* Runtime1::generate_code_for(StubID id, StubAssembler* sasm) { } break; +#ifndef SERIALGC + case g1_pre_barrier_slow_id: + { // G4: previous value of memory + BarrierSet* bs = Universe::heap()->barrier_set(); + if (bs->kind() != BarrierSet::G1SATBCTLogging) { + __ save_frame(0); + __ set((int)id, O1); + __ call_RT(noreg, noreg, CAST_FROM_FN_PTR(address, unimplemented_entry), I0); + __ should_not_reach_here(); + break; + } + + __ set_info("g1_pre_barrier_slow_id", dont_gc_arguments); + + Register pre_val = G4; + Register tmp = G1_scratch; + Register tmp2 = G3_scratch; + + Label refill, restart; + bool with_frame = false; // I don't know if we can do with-frame. + int satb_q_index_byte_offset = + in_bytes(JavaThread::satb_mark_queue_offset() + + PtrQueue::byte_offset_of_index()); + int satb_q_buf_byte_offset = + in_bytes(JavaThread::satb_mark_queue_offset() + + PtrQueue::byte_offset_of_buf()); + __ bind(restart); + __ ld_ptr(G2_thread, satb_q_index_byte_offset, tmp); + + __ br_on_reg_cond(Assembler::rc_z, /*annul*/false, + Assembler::pn, tmp, refill); + + // If the branch is taken, no harm in executing this in the delay slot. + __ delayed()->ld_ptr(G2_thread, satb_q_buf_byte_offset, tmp2); + __ sub(tmp, oopSize, tmp); + + __ st_ptr(pre_val, tmp2, tmp); // [_buf + index] := + // Use return-from-leaf + __ retl(); + __ delayed()->st_ptr(tmp, G2_thread, satb_q_index_byte_offset); + + __ bind(refill); + __ save_frame(0); + + __ mov(pre_val, L0); + __ mov(tmp, L1); + __ mov(tmp2, L2); + + __ call_VM_leaf(L7_thread_cache, + CAST_FROM_FN_PTR(address, + SATBMarkQueueSet::handle_zero_index_for_thread), + G2_thread); + + __ mov(L0, pre_val); + __ mov(L1, tmp); + __ mov(L2, tmp2); + + __ br(Assembler::always, /*annul*/false, Assembler::pt, restart); + __ delayed()->restore(); + } + break; + + case g1_post_barrier_slow_id: + { + BarrierSet* bs = Universe::heap()->barrier_set(); + if (bs->kind() != BarrierSet::G1SATBCTLogging) { + __ save_frame(0); + __ set((int)id, O1); + __ call_RT(noreg, noreg, CAST_FROM_FN_PTR(address, unimplemented_entry), I0); + __ should_not_reach_here(); + break; + } + + __ set_info("g1_post_barrier_slow_id", dont_gc_arguments); + + Register addr = G4; + Register cardtable = G5; + Register tmp = G1_scratch; + Register tmp2 = G3_scratch; + jbyte* byte_map_base = ((CardTableModRefBS*)bs)->byte_map_base; + + Label not_already_dirty, restart, refill; + +#ifdef _LP64 + __ srlx(addr, CardTableModRefBS::card_shift, addr); +#else + __ srl(addr, CardTableModRefBS::card_shift, addr); +#endif + + Address rs(cardtable, (address)byte_map_base); + __ load_address(rs); // cardtable := + __ ldub(addr, cardtable, tmp); // tmp := [addr + cardtable] + + __ br_on_reg_cond(Assembler::rc_nz, /*annul*/false, Assembler::pt, + tmp, not_already_dirty); + // Get cardtable + tmp into a reg by itself -- useful in the take-the-branch + // case, harmless if not. + __ delayed()->add(addr, cardtable, tmp2); + + // We didn't take the branch, so we're already dirty: return. + // Use return-from-leaf + __ retl(); + __ delayed()->nop(); + + // Not dirty. + __ bind(not_already_dirty); + // First, dirty it. + __ stb(G0, tmp2, 0); // [cardPtr] := 0 (i.e., dirty). + + Register tmp3 = cardtable; + Register tmp4 = tmp; + + // these registers are now dead + addr = cardtable = tmp = noreg; + + int dirty_card_q_index_byte_offset = + in_bytes(JavaThread::dirty_card_queue_offset() + + PtrQueue::byte_offset_of_index()); + int dirty_card_q_buf_byte_offset = + in_bytes(JavaThread::dirty_card_queue_offset() + + PtrQueue::byte_offset_of_buf()); + __ bind(restart); + __ ld_ptr(G2_thread, dirty_card_q_index_byte_offset, tmp3); + + __ br_on_reg_cond(Assembler::rc_z, /*annul*/false, Assembler::pn, + tmp3, refill); + // If the branch is taken, no harm in executing this in the delay slot. + __ delayed()->ld_ptr(G2_thread, dirty_card_q_buf_byte_offset, tmp4); + __ sub(tmp3, oopSize, tmp3); + + __ st_ptr(tmp2, tmp4, tmp3); // [_buf + index] := + // Use return-from-leaf + __ retl(); + __ delayed()->st_ptr(tmp3, G2_thread, dirty_card_q_index_byte_offset); + + __ bind(refill); + __ save_frame(0); + + __ mov(tmp2, L0); + __ mov(tmp3, L1); + __ mov(tmp4, L2); + + __ call_VM_leaf(L7_thread_cache, + CAST_FROM_FN_PTR(address, + DirtyCardQueueSet::handle_zero_index_for_thread), + G2_thread); + + __ mov(L0, tmp2); + __ mov(L1, tmp3); + __ mov(L2, tmp4); + + __ br(Assembler::always, /*annul*/false, Assembler::pt, restart); + __ delayed()->restore(); + } + break; +#endif // !SERIALGC + default: { __ set_info("unimplemented entry", dont_gc_arguments); __ save_frame(0); diff --git a/hotspot/src/cpu/sparc/vm/stubGenerator_sparc.cpp b/hotspot/src/cpu/sparc/vm/stubGenerator_sparc.cpp index f21ed2f80ba..a16d332ba6b 100644 --- a/hotspot/src/cpu/sparc/vm/stubGenerator_sparc.cpp +++ b/hotspot/src/cpu/sparc/vm/stubGenerator_sparc.cpp @@ -1110,30 +1110,31 @@ class StubGenerator: public StubCodeGenerator { // The input registers are overwritten. // void gen_write_ref_array_pre_barrier(Register addr, Register count) { -#if 0 // G1 only BarrierSet* bs = Universe::heap()->barrier_set(); if (bs->has_write_ref_pre_barrier()) { assert(bs->has_write_ref_array_pre_opt(), "Else unsupported barrier set."); - assert(addr->is_global() && count->is_global(), - "If not, then we have to fix this code to handle more " - "general cases."); - // Get some new fresh output registers. __ save_frame(0); // Save the necessary global regs... will be used after. - __ mov(addr, L0); - __ mov(count, L1); - - __ mov(addr, O0); + if (addr->is_global()) { + __ mov(addr, L0); + } + if (count->is_global()) { + __ mov(count, L1); + } + __ mov(addr->after_save(), O0); // Get the count into O1 __ call(CAST_FROM_FN_PTR(address, BarrierSet::static_write_ref_array_pre)); - __ delayed()->mov(count, O1); - __ mov(L0, addr); - __ mov(L1, count); + __ delayed()->mov(count->after_save(), O1); + if (addr->is_global()) { + __ mov(L0, addr); + } + if (count->is_global()) { + __ mov(L1, count); + } __ restore(); } -#endif // 0 } // // Generate post-write barrier for array. @@ -1150,22 +1151,17 @@ class StubGenerator: public StubCodeGenerator { BarrierSet* bs = Universe::heap()->barrier_set(); switch (bs->kind()) { -#if 0 // G1 - only case BarrierSet::G1SATBCT: case BarrierSet::G1SATBCTLogging: { - assert(addr->is_global() && count->is_global(), - "If not, then we have to fix this code to handle more " - "general cases."); // Get some new fresh output registers. __ save_frame(0); - __ mov(addr, O0); + __ mov(addr->after_save(), O0); __ call(CAST_FROM_FN_PTR(address, BarrierSet::static_write_ref_array_post)); - __ delayed()->mov(count, O1); + __ delayed()->mov(count->after_save(), O1); __ restore(); } break; -#endif // 0 G1 - only case BarrierSet::CardTableModRef: case BarrierSet::CardTableExtension: { @@ -2412,8 +2408,7 @@ class StubGenerator: public StubCodeGenerator { StubCodeMark mark(this, "StubRoutines", name); address start = __ pc(); - gen_write_ref_array_pre_barrier(G1, G5); - + gen_write_ref_array_pre_barrier(O1, O2); #ifdef ASSERT // We sometimes save a frame (see partial_subtype_check below). diff --git a/hotspot/src/cpu/sparc/vm/templateTable_sparc.cpp b/hotspot/src/cpu/sparc/vm/templateTable_sparc.cpp index bd852da5636..73de2de94f3 100644 --- a/hotspot/src/cpu/sparc/vm/templateTable_sparc.cpp +++ b/hotspot/src/cpu/sparc/vm/templateTable_sparc.cpp @@ -28,6 +28,79 @@ #ifndef CC_INTERP #define __ _masm-> +// Misc helpers + +// Do an oop store like *(base + index + offset) = val +// index can be noreg, +static void do_oop_store(InterpreterMacroAssembler* _masm, + Register base, + Register index, + int offset, + Register val, + Register tmp, + BarrierSet::Name barrier, + bool precise) { + assert(tmp != val && tmp != base && tmp != index, "register collision"); + assert(index == noreg || offset == 0, "only one offset"); + switch (barrier) { +#ifndef SERIALGC + case BarrierSet::G1SATBCT: + case BarrierSet::G1SATBCTLogging: + { + __ g1_write_barrier_pre( base, index, offset, tmp, /*preserve_o_regs*/true); + if (index == noreg ) { + assert(Assembler::is_simm13(offset), "fix this code"); + __ store_heap_oop(val, base, offset); + } else { + __ store_heap_oop(val, base, index); + } + + // No need for post barrier if storing NULL + if (val != G0) { + if (precise) { + if (index == noreg) { + __ add(base, offset, base); + } else { + __ add(base, index, base); + } + } + __ g1_write_barrier_post(base, val, tmp); + } + } + break; +#endif // SERIALGC + case BarrierSet::CardTableModRef: + case BarrierSet::CardTableExtension: + { + if (index == noreg ) { + assert(Assembler::is_simm13(offset), "fix this code"); + __ store_heap_oop(val, base, offset); + } else { + __ store_heap_oop(val, base, index); + } + // No need for post barrier if storing NULL + if (val != G0) { + if (precise) { + if (index == noreg) { + __ add(base, offset, base); + } else { + __ add(base, index, base); + } + } + __ card_write_barrier_post(base, val, tmp); + } + } + break; + case BarrierSet::ModRef: + case BarrierSet::Other: + ShouldNotReachHere(); + break; + default : + ShouldNotReachHere(); + + } +} + //---------------------------------------------------------------------------------------------------- // Platform-dependent initialization @@ -758,6 +831,8 @@ void TemplateTable::aastore() { // O4: array element klass // O5: value klass + // Address element(O1, 0, arrayOopDesc::base_offset_in_bytes(T_OBJECT)); + // Generate a fast subtype check. Branch to store_ok if no // failure. Throw if failure. __ gen_subtype_check( O5, O4, G3_scratch, G4_scratch, G1_scratch, store_ok ); @@ -767,18 +842,14 @@ void TemplateTable::aastore() { // Store is OK. __ bind(store_ok); - __ store_heap_oop(Otos_i, O1, arrayOopDesc::base_offset_in_bytes(T_OBJECT)); - // Quote from rememberedSet.hpp: For objArrays, the precise card - // corresponding to the pointer store is dirtied so we don't need to - // scavenge the entire array. - Address element(O1, 0, arrayOopDesc::base_offset_in_bytes(T_OBJECT)); - __ add(element, O1); // address the element precisely - __ store_check(G3_scratch, O1); + do_oop_store(_masm, O1, noreg, arrayOopDesc::base_offset_in_bytes(T_OBJECT), Otos_i, G3_scratch, _bs->kind(), true); + __ ba(false,done); __ delayed()->inc(Lesp, 3* Interpreter::stackElementSize()); // adj sp (pops array, index and value) __ bind(is_null); - __ store_heap_oop(Otos_i, element); + do_oop_store(_masm, O1, noreg, arrayOopDesc::base_offset_in_bytes(T_OBJECT), G0, G4_scratch, _bs->kind(), true); + __ profile_null_seen(G3_scratch); __ inc(Lesp, 3* Interpreter::stackElementSize()); // adj sp (pops array, index and value) __ bind(done); @@ -2449,8 +2520,9 @@ void TemplateTable::putfield_or_static(int byte_no, bool is_static) { // atos __ pop_ptr(); __ verify_oop(Otos_i); - __ store_heap_oop(Otos_i, Rclass, Roffset); - __ store_check(G1_scratch, Rclass, Roffset); + + do_oop_store(_masm, Rclass, Roffset, 0, Otos_i, G1_scratch, _bs->kind(), false); + __ ba(false, checkVolatile); __ delayed()->tst(Lscratch); @@ -2491,8 +2563,9 @@ void TemplateTable::putfield_or_static(int byte_no, bool is_static) { __ pop_ptr(); pop_and_check_object(Rclass); __ verify_oop(Otos_i); - __ store_heap_oop(Otos_i, Rclass, Roffset); - __ store_check(G1_scratch, Rclass, Roffset); + + do_oop_store(_masm, Rclass, Roffset, 0, Otos_i, G1_scratch, _bs->kind(), false); + patch_bytecode(Bytecodes::_fast_aputfield, G3_scratch, G4_scratch); __ ba(false, checkVolatile); __ delayed()->tst(Lscratch); @@ -2646,8 +2719,7 @@ void TemplateTable::fast_storefield(TosState state) { __ stf(FloatRegisterImpl::D, Ftos_d, Rclass, Roffset); break; case Bytecodes::_fast_aputfield: - __ store_heap_oop(Otos_i, Rclass, Roffset); - __ store_check(G1_scratch, Rclass, Roffset); + do_oop_store(_masm, Rclass, Roffset, 0, Otos_i, G1_scratch, _bs->kind(), false); break; default: ShouldNotReachHere(); diff --git a/hotspot/src/cpu/x86/vm/assembler_x86.cpp b/hotspot/src/cpu/x86/vm/assembler_x86.cpp index d8bc4948c92..ec208540ad0 100644 --- a/hotspot/src/cpu/x86/vm/assembler_x86.cpp +++ b/hotspot/src/cpu/x86/vm/assembler_x86.cpp @@ -1575,6 +1575,35 @@ void Assembler::movdqa(Address dst, XMMRegister src) { emit_operand(src, dst); } +void Assembler::movdqu(XMMRegister dst, Address src) { + NOT_LP64(assert(VM_Version::supports_sse2(), "")); + InstructionMark im(this); + emit_byte(0xF3); + prefix(src, dst); + emit_byte(0x0F); + emit_byte(0x6F); + emit_operand(dst, src); +} + +void Assembler::movdqu(XMMRegister dst, XMMRegister src) { + NOT_LP64(assert(VM_Version::supports_sse2(), "")); + emit_byte(0xF3); + int encode = prefixq_and_encode(dst->encoding(), src->encoding()); + emit_byte(0x0F); + emit_byte(0x6F); + emit_byte(0xC0 | encode); +} + +void Assembler::movdqu(Address dst, XMMRegister src) { + NOT_LP64(assert(VM_Version::supports_sse2(), "")); + InstructionMark im(this); + emit_byte(0xF3); + prefix(dst, src); + emit_byte(0x0F); + emit_byte(0x7F); + emit_operand(src, dst); +} + // Uses zero extension on 64bit void Assembler::movl(Register dst, int32_t imm32) { @@ -5935,26 +5964,30 @@ void MacroAssembler::eden_allocate(Register obj, Label& slow_case) { assert(obj == rax, "obj must be in rax, for cmpxchg"); assert_different_registers(obj, var_size_in_bytes, t1); - Register end = t1; - Label retry; - bind(retry); - ExternalAddress heap_top((address) Universe::heap()->top_addr()); - movptr(obj, heap_top); - if (var_size_in_bytes == noreg) { - lea(end, Address(obj, con_size_in_bytes)); + if (CMSIncrementalMode || !Universe::heap()->supports_inline_contig_alloc()) { + jmp(slow_case); } else { - lea(end, Address(obj, var_size_in_bytes, Address::times_1)); + Register end = t1; + Label retry; + bind(retry); + ExternalAddress heap_top((address) Universe::heap()->top_addr()); + movptr(obj, heap_top); + if (var_size_in_bytes == noreg) { + lea(end, Address(obj, con_size_in_bytes)); + } else { + lea(end, Address(obj, var_size_in_bytes, Address::times_1)); + } + // if end < obj then we wrapped around => object too long => slow case + cmpptr(end, obj); + jcc(Assembler::below, slow_case); + cmpptr(end, ExternalAddress((address) Universe::heap()->end_addr())); + jcc(Assembler::above, slow_case); + // Compare obj with the top addr, and if still equal, store the new top addr in + // end at the address of the top addr pointer. Sets ZF if was equal, and clears + // it otherwise. Use lock prefix for atomicity on MPs. + locked_cmpxchgptr(end, heap_top); + jcc(Assembler::notEqual, retry); } - // if end < obj then we wrapped around => object too long => slow case - cmpptr(end, obj); - jcc(Assembler::below, slow_case); - cmpptr(end, ExternalAddress((address) Universe::heap()->end_addr())); - jcc(Assembler::above, slow_case); - // Compare obj with the top addr, and if still equal, store the new top addr in - // end at the address of the top addr pointer. Sets ZF if was equal, and clears - // it otherwise. Use lock prefix for atomicity on MPs. - locked_cmpxchgptr(end, heap_top); - jcc(Assembler::notEqual, retry); } void MacroAssembler::enter() { @@ -6491,6 +6524,179 @@ void MacroAssembler::sign_extend_short(Register reg) { } } +////////////////////////////////////////////////////////////////////////////////// +#ifndef SERIALGC + +void MacroAssembler::g1_write_barrier_pre(Register obj, +#ifndef _LP64 + Register thread, +#endif + Register tmp, + Register tmp2, + bool tosca_live) { + LP64_ONLY(Register thread = r15_thread;) + Address in_progress(thread, in_bytes(JavaThread::satb_mark_queue_offset() + + PtrQueue::byte_offset_of_active())); + + Address index(thread, in_bytes(JavaThread::satb_mark_queue_offset() + + PtrQueue::byte_offset_of_index())); + Address buffer(thread, in_bytes(JavaThread::satb_mark_queue_offset() + + PtrQueue::byte_offset_of_buf())); + + + Label done; + Label runtime; + + // if (!marking_in_progress) goto done; + if (in_bytes(PtrQueue::byte_width_of_active()) == 4) { + cmpl(in_progress, 0); + } else { + assert(in_bytes(PtrQueue::byte_width_of_active()) == 1, "Assumption"); + cmpb(in_progress, 0); + } + jcc(Assembler::equal, done); + + // if (x.f == NULL) goto done; + cmpptr(Address(obj, 0), NULL_WORD); + jcc(Assembler::equal, done); + + // Can we store original value in the thread's buffer? + + LP64_ONLY(movslq(tmp, index);) + movptr(tmp2, Address(obj, 0)); +#ifdef _LP64 + cmpq(tmp, 0); +#else + cmpl(index, 0); +#endif + jcc(Assembler::equal, runtime); +#ifdef _LP64 + subq(tmp, wordSize); + movl(index, tmp); + addq(tmp, buffer); +#else + subl(index, wordSize); + movl(tmp, buffer); + addl(tmp, index); +#endif + movptr(Address(tmp, 0), tmp2); + jmp(done); + bind(runtime); + // save the live input values + if(tosca_live) push(rax); + push(obj); +#ifdef _LP64 + movq(c_rarg0, Address(obj, 0)); + call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::g1_wb_pre), c_rarg0, r15_thread); +#else + push(thread); + call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::g1_wb_pre), tmp2, thread); + pop(thread); +#endif + pop(obj); + if(tosca_live) pop(rax); + bind(done); + +} + +void MacroAssembler::g1_write_barrier_post(Register store_addr, + Register new_val, +#ifndef _LP64 + Register thread, +#endif + Register tmp, + Register tmp2) { + + LP64_ONLY(Register thread = r15_thread;) + Address queue_index(thread, in_bytes(JavaThread::dirty_card_queue_offset() + + PtrQueue::byte_offset_of_index())); + Address buffer(thread, in_bytes(JavaThread::dirty_card_queue_offset() + + PtrQueue::byte_offset_of_buf())); + BarrierSet* bs = Universe::heap()->barrier_set(); + CardTableModRefBS* ct = (CardTableModRefBS*)bs; + Label done; + Label runtime; + + // Does store cross heap regions? + + movptr(tmp, store_addr); + xorptr(tmp, new_val); + shrptr(tmp, HeapRegion::LogOfHRGrainBytes); + jcc(Assembler::equal, done); + + // crosses regions, storing NULL? + + cmpptr(new_val, (int32_t) NULL_WORD); + jcc(Assembler::equal, done); + + // storing region crossing non-NULL, is card already dirty? + + ExternalAddress cardtable((address) ct->byte_map_base); + assert(sizeof(*ct->byte_map_base) == sizeof(jbyte), "adjust this code"); +#ifdef _LP64 + const Register card_addr = tmp; + + movq(card_addr, store_addr); + shrq(card_addr, CardTableModRefBS::card_shift); + + lea(tmp2, cardtable); + + // get the address of the card + addq(card_addr, tmp2); +#else + const Register card_index = tmp; + + movl(card_index, store_addr); + shrl(card_index, CardTableModRefBS::card_shift); + + Address index(noreg, card_index, Address::times_1); + const Register card_addr = tmp; + lea(card_addr, as_Address(ArrayAddress(cardtable, index))); +#endif + cmpb(Address(card_addr, 0), 0); + jcc(Assembler::equal, done); + + // storing a region crossing, non-NULL oop, card is clean. + // dirty card and log. + + movb(Address(card_addr, 0), 0); + + cmpl(queue_index, 0); + jcc(Assembler::equal, runtime); + subl(queue_index, wordSize); + movptr(tmp2, buffer); +#ifdef _LP64 + movslq(rscratch1, queue_index); + addq(tmp2, rscratch1); + movq(Address(tmp2, 0), card_addr); +#else + addl(tmp2, queue_index); + movl(Address(tmp2, 0), card_index); +#endif + jmp(done); + + bind(runtime); + // save the live input values + push(store_addr); + push(new_val); +#ifdef _LP64 + call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::g1_wb_post), card_addr, r15_thread); +#else + push(thread); + call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::g1_wb_post), card_addr, thread); + pop(thread); +#endif + pop(new_val); + pop(store_addr); + + bind(done); + +} + +#endif // SERIALGC +////////////////////////////////////////////////////////////////////////////////// + + void MacroAssembler::store_check(Register obj) { // Does a store check for the oop in register obj. The content of // register obj is destroyed afterwards. diff --git a/hotspot/src/cpu/x86/vm/assembler_x86.hpp b/hotspot/src/cpu/x86/vm/assembler_x86.hpp index d9637ffc9e2..c2b64771803 100644 --- a/hotspot/src/cpu/x86/vm/assembler_x86.hpp +++ b/hotspot/src/cpu/x86/vm/assembler_x86.hpp @@ -227,9 +227,11 @@ class Address VALUE_OBJ_CLASS_SPEC { #endif // ASSERT // accessors - bool uses(Register reg) const { - return _base == reg || _index == reg; - } + bool uses(Register reg) const { return _base == reg || _index == reg; } + Register base() const { return _base; } + Register index() const { return _index; } + ScaleFactor scale() const { return _scale; } + int disp() const { return _disp; } // Convert the raw encoding form into the form expected by the constructor for // Address. An index of 4 (rsp) corresponds to having no index, so convert @@ -1053,6 +1055,11 @@ private: void movdqa(XMMRegister dst, Address src); void movdqa(XMMRegister dst, XMMRegister src); + // Move Unaligned Double Quadword + void movdqu(Address dst, XMMRegister src); + void movdqu(XMMRegister dst, Address src); + void movdqu(XMMRegister dst, XMMRegister src); + void movl(Register dst, int32_t imm32); void movl(Address dst, int32_t imm32); void movl(Register dst, Register src); @@ -1310,7 +1317,8 @@ private: // on arguments should also go in here. class MacroAssembler: public Assembler { - friend class LIR_Assembler; + friend class LIR_Assembler; + friend class Runtime1; // as_Address() protected: Address as_Address(AddressLiteral adr); @@ -1453,6 +1461,7 @@ class MacroAssembler: public Assembler { // The pointer will be loaded into the thread register. void get_thread(Register thread); + // Support for VM calls // // It is imperative that all calls into the VM are handled via the call_VM macros. @@ -1527,6 +1536,22 @@ class MacroAssembler: public Assembler { void store_check(Register obj); // store check for obj - register is destroyed afterwards void store_check(Register obj, Address dst); // same as above, dst is exact store location (reg. is destroyed) + void g1_write_barrier_pre(Register obj, +#ifndef _LP64 + Register thread, +#endif + Register tmp, + Register tmp2, + bool tosca_live); + void g1_write_barrier_post(Register store_addr, + Register new_val, +#ifndef _LP64 + Register thread, +#endif + Register tmp, + Register tmp2); + + // split store_check(Register obj) to enhance instruction interleaving void store_check_part_1(Register obj); void store_check_part_2(Register obj); diff --git a/hotspot/src/cpu/x86/vm/c1_CodeStubs_x86.cpp b/hotspot/src/cpu/x86/vm/c1_CodeStubs_x86.cpp index 2f394855de0..c513092c574 100644 --- a/hotspot/src/cpu/x86/vm/c1_CodeStubs_x86.cpp +++ b/hotspot/src/cpu/x86/vm/c1_CodeStubs_x86.cpp @@ -456,5 +456,50 @@ void ArrayCopyStub::emit_code(LIR_Assembler* ce) { __ jmp(_continuation); } +///////////////////////////////////////////////////////////////////////////// +#ifndef SERIALGC + +void G1PreBarrierStub::emit_code(LIR_Assembler* ce) { + + // At this point we know that marking is in progress + + __ bind(_entry); + assert(pre_val()->is_register(), "Precondition."); + + Register pre_val_reg = pre_val()->as_register(); + + ce->mem2reg(addr(), pre_val(), T_OBJECT, patch_code(), info(), false); + + __ cmpptr(pre_val_reg, (int32_t) NULL_WORD); + __ jcc(Assembler::equal, _continuation); + ce->store_parameter(pre_val()->as_register(), 0); + __ call(RuntimeAddress(Runtime1::entry_for(Runtime1::g1_pre_barrier_slow_id))); + __ jmp(_continuation); + +} + +jbyte* G1PostBarrierStub::_byte_map_base = NULL; + +jbyte* G1PostBarrierStub::byte_map_base_slow() { + BarrierSet* bs = Universe::heap()->barrier_set(); + assert(bs->is_a(BarrierSet::G1SATBCTLogging), + "Must be if we're using this."); + return ((G1SATBCardTableModRefBS*)bs)->byte_map_base; +} + +void G1PostBarrierStub::emit_code(LIR_Assembler* ce) { + __ bind(_entry); + assert(addr()->is_register(), "Precondition."); + assert(new_val()->is_register(), "Precondition."); + Register new_val_reg = new_val()->as_register(); + __ cmpptr(new_val_reg, (int32_t) NULL_WORD); + __ jcc(Assembler::equal, _continuation); + ce->store_parameter(addr()->as_register(), 0); + __ call(RuntimeAddress(Runtime1::entry_for(Runtime1::g1_post_barrier_slow_id))); + __ jmp(_continuation); +} + +#endif // SERIALGC +///////////////////////////////////////////////////////////////////////////// #undef __ diff --git a/hotspot/src/cpu/x86/vm/c1_LIRGenerator_x86.cpp b/hotspot/src/cpu/x86/vm/c1_LIRGenerator_x86.cpp index b8c29fb09a8..26acffbb321 100644 --- a/hotspot/src/cpu/x86/vm/c1_LIRGenerator_x86.cpp +++ b/hotspot/src/cpu/x86/vm/c1_LIRGenerator_x86.cpp @@ -302,6 +302,8 @@ void LIRGenerator::do_StoreIndexed(StoreIndexed* x) { } if (obj_store) { + // Needs GC write barriers. + pre_barrier(LIR_OprFact::address(array_addr), false, NULL); __ move(value.result(), array_addr, null_check_info); // Seems to be a precise post_barrier(LIR_OprFact::address(array_addr), value.result()); @@ -756,7 +758,10 @@ void LIRGenerator::do_CompareAndSwap(Intrinsic* x, ValueType* type) { __ move(obj.result(), addr); __ add(addr, offset.result(), addr); - + if (type == objectType) { // Write-barrier needed for Object fields. + // Do the pre-write barrier, if any. + pre_barrier(addr, false, NULL); + } LIR_Opr ill = LIR_OprFact::illegalOpr; // for convenience if (type == objectType) @@ -1286,6 +1291,8 @@ void LIRGenerator::put_Object_unsafe(LIR_Opr src, LIR_Opr offset, LIR_Opr data, LIR_Address* addr = new LIR_Address(src, offset, type); bool is_obj = (type == T_ARRAY || type == T_OBJECT); if (is_obj) { + // Do the pre-write barrier, if any. + pre_barrier(LIR_OprFact::address(addr), false, NULL); __ move(data, addr); assert(src->is_register(), "must be register"); // Seems to be a precise address diff --git a/hotspot/src/cpu/x86/vm/c1_Runtime1_x86.cpp b/hotspot/src/cpu/x86/vm/c1_Runtime1_x86.cpp index 9f36305a35b..eed2ad68de4 100644 --- a/hotspot/src/cpu/x86/vm/c1_Runtime1_x86.cpp +++ b/hotspot/src/cpu/x86/vm/c1_Runtime1_x86.cpp @@ -1583,6 +1583,166 @@ OopMapSet* Runtime1::generate_code_for(StubID id, StubAssembler* sasm) { } break; +#ifndef SERIALGC + case g1_pre_barrier_slow_id: + { + StubFrame f(sasm, "g1_pre_barrier", dont_gc_arguments); + // arg0 : previous value of memory + + BarrierSet* bs = Universe::heap()->barrier_set(); + if (bs->kind() != BarrierSet::G1SATBCTLogging) { + __ movptr(rax, (int)id); + __ call_RT(noreg, noreg, CAST_FROM_FN_PTR(address, unimplemented_entry), rax); + __ should_not_reach_here(); + break; + } + + __ push(rax); + __ push(rdx); + + const Register pre_val = rax; + const Register thread = NOT_LP64(rax) LP64_ONLY(r15_thread); + const Register tmp = rdx; + + NOT_LP64(__ get_thread(thread);) + + Address in_progress(thread, in_bytes(JavaThread::satb_mark_queue_offset() + + PtrQueue::byte_offset_of_active())); + + Address queue_index(thread, in_bytes(JavaThread::satb_mark_queue_offset() + + PtrQueue::byte_offset_of_index())); + Address buffer(thread, in_bytes(JavaThread::satb_mark_queue_offset() + + PtrQueue::byte_offset_of_buf())); + + + Label done; + Label runtime; + + // Can we store original value in the thread's buffer? + + LP64_ONLY(__ movslq(tmp, queue_index);) +#ifdef _LP64 + __ cmpq(tmp, 0); +#else + __ cmpl(queue_index, 0); +#endif + __ jcc(Assembler::equal, runtime); +#ifdef _LP64 + __ subq(tmp, wordSize); + __ movl(queue_index, tmp); + __ addq(tmp, buffer); +#else + __ subl(queue_index, wordSize); + __ movl(tmp, buffer); + __ addl(tmp, queue_index); +#endif + + // prev_val (rax) + f.load_argument(0, pre_val); + __ movptr(Address(tmp, 0), pre_val); + __ jmp(done); + + __ bind(runtime); + // load the pre-value + __ push(rcx); + f.load_argument(0, rcx); + __ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::g1_wb_pre), rcx, thread); + __ pop(rcx); + + __ bind(done); + __ pop(rdx); + __ pop(rax); + } + break; + + case g1_post_barrier_slow_id: + { + StubFrame f(sasm, "g1_post_barrier", dont_gc_arguments); + + + // arg0: store_address + Address store_addr(rbp, 2*BytesPerWord); + + BarrierSet* bs = Universe::heap()->barrier_set(); + CardTableModRefBS* ct = (CardTableModRefBS*)bs; + Label done; + Label runtime; + + // At this point we know new_value is non-NULL and the new_value crosses regsion. + // Must check to see if card is already dirty + + const Register thread = NOT_LP64(rax) LP64_ONLY(r15_thread); + + Address queue_index(thread, in_bytes(JavaThread::dirty_card_queue_offset() + + PtrQueue::byte_offset_of_index())); + Address buffer(thread, in_bytes(JavaThread::dirty_card_queue_offset() + + PtrQueue::byte_offset_of_buf())); + + __ push(rax); + __ push(rdx); + + NOT_LP64(__ get_thread(thread);) + ExternalAddress cardtable((address)ct->byte_map_base); + assert(sizeof(*ct->byte_map_base) == sizeof(jbyte), "adjust this code"); + + const Register card_addr = rdx; +#ifdef _LP64 + const Register tmp = rscratch1; + f.load_argument(0, card_addr); + __ shrq(card_addr, CardTableModRefBS::card_shift); + __ lea(tmp, cardtable); + // get the address of the card + __ addq(card_addr, tmp); +#else + const Register card_index = rdx; + f.load_argument(0, card_index); + __ shrl(card_index, CardTableModRefBS::card_shift); + + Address index(noreg, card_index, Address::times_1); + __ leal(card_addr, __ as_Address(ArrayAddress(cardtable, index))); +#endif + + __ cmpb(Address(card_addr, 0), 0); + __ jcc(Assembler::equal, done); + + // storing region crossing non-NULL, card is clean. + // dirty card and log. + + __ movb(Address(card_addr, 0), 0); + + __ cmpl(queue_index, 0); + __ jcc(Assembler::equal, runtime); + __ subl(queue_index, wordSize); + + const Register buffer_addr = rbx; + __ push(rbx); + + __ movptr(buffer_addr, buffer); + +#ifdef _LP64 + __ movslq(rscratch1, queue_index); + __ addptr(buffer_addr, rscratch1); +#else + __ addptr(buffer_addr, queue_index); +#endif + __ movptr(Address(buffer_addr, 0), card_addr); + + __ pop(rbx); + __ jmp(done); + + __ bind(runtime); + NOT_LP64(__ push(rcx);) + __ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::g1_wb_post), card_addr, thread); + NOT_LP64(__ pop(rcx);) + + __ bind(done); + __ pop(rdx); + __ pop(rax); + + } + break; +#endif // !SERIALGC + default: { StubFrame f(sasm, "unimplemented entry", dont_gc_arguments); __ movptr(rax, (int)id); diff --git a/hotspot/src/cpu/x86/vm/interp_masm_x86_64.cpp b/hotspot/src/cpu/x86/vm/interp_masm_x86_64.cpp index 75ca463ace0..9809649d37e 100644 --- a/hotspot/src/cpu/x86/vm/interp_masm_x86_64.cpp +++ b/hotspot/src/cpu/x86/vm/interp_masm_x86_64.cpp @@ -44,8 +44,13 @@ void InterpreterMacroAssembler::call_VM_leaf_base(address entry_point, // Note: No need to save/restore bcp & locals (r13 & r14) pointer // since these are callee saved registers and no blocking/ // GC can happen in leaf calls. + // Further Note: DO NOT save/restore bcp/locals. If a caller has + // already saved them so that it can use esi/edi as temporaries + // then a save/restore here will DESTROY the copy the caller + // saved! There used to be a save_bcp() that only happened in + // the ASSERT path (no restore_bcp). Which caused bizarre failures + // when jvm built with ASSERTs. #ifdef ASSERT - save_bcp(); { Label L; cmpptr(Address(rbp, frame::interpreter_frame_last_sp_offset * wordSize), (int32_t)NULL_WORD); @@ -58,24 +63,9 @@ void InterpreterMacroAssembler::call_VM_leaf_base(address entry_point, // super call MacroAssembler::call_VM_leaf_base(entry_point, number_of_arguments); // interpreter specific -#ifdef ASSERT - { - Label L; - cmpptr(r13, Address(rbp, frame::interpreter_frame_bcx_offset * wordSize)); - jcc(Assembler::equal, L); - stop("InterpreterMacroAssembler::call_VM_leaf_base:" - " r13 not callee saved?"); - bind(L); - } - { - Label L; - cmpptr(r14, Address(rbp, frame::interpreter_frame_locals_offset * wordSize)); - jcc(Assembler::equal, L); - stop("InterpreterMacroAssembler::call_VM_leaf_base:" - " r14 not callee saved?"); - bind(L); - } -#endif + // Used to ASSERT that r13/r14 were equal to frame's bcp/locals + // but since they may not have been saved (and we don't want to + // save thme here (see note above) the assert is invalid. } void InterpreterMacroAssembler::call_VM_base(Register oop_result, diff --git a/hotspot/src/cpu/x86/vm/stubGenerator_x86_32.cpp b/hotspot/src/cpu/x86/vm/stubGenerator_x86_32.cpp index ad2606e99f3..06435cb0005 100644 --- a/hotspot/src/cpu/x86/vm/stubGenerator_x86_32.cpp +++ b/hotspot/src/cpu/x86/vm/stubGenerator_x86_32.cpp @@ -712,7 +712,6 @@ class StubGenerator: public StubCodeGenerator { // end - element count void gen_write_ref_array_pre_barrier(Register start, Register count) { assert_different_registers(start, count); -#if 0 // G1 only BarrierSet* bs = Universe::heap()->barrier_set(); switch (bs->kind()) { case BarrierSet::G1SATBCT: @@ -721,8 +720,8 @@ class StubGenerator: public StubCodeGenerator { __ pusha(); // push registers __ push(count); __ push(start); - __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, BarrierSet::static_write_ref_array_pre)); - __ addl(esp, wordSize * 2); + __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, BarrierSet::static_write_ref_array_pre))); + __ addptr(rsp, 2*wordSize); __ popa(); } break; @@ -734,7 +733,6 @@ class StubGenerator: public StubCodeGenerator { ShouldNotReachHere(); } -#endif // 0 - G1 only } @@ -750,20 +748,18 @@ class StubGenerator: public StubCodeGenerator { BarrierSet* bs = Universe::heap()->barrier_set(); assert_different_registers(start, count); switch (bs->kind()) { -#if 0 // G1 only case BarrierSet::G1SATBCT: case BarrierSet::G1SATBCTLogging: { __ pusha(); // push registers __ push(count); __ push(start); - __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, BarrierSet::static_write_ref_array_post)); - __ addl(esp, wordSize * 2); + __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, BarrierSet::static_write_ref_array_post))); + __ addptr(rsp, 2*wordSize); __ popa(); } break; -#endif // 0 G1 only case BarrierSet::CardTableModRef: case BarrierSet::CardTableExtension: @@ -795,6 +791,69 @@ class StubGenerator: public StubCodeGenerator { } } + + // Copy 64 bytes chunks + // + // Inputs: + // from - source array address + // to_from - destination array address - from + // qword_count - 8-bytes element count, negative + // + void xmm_copy_forward(Register from, Register to_from, Register qword_count) { + assert( UseSSE >= 2, "supported cpu only" ); + Label L_copy_64_bytes_loop, L_copy_64_bytes, L_copy_8_bytes, L_exit; + // Copy 64-byte chunks + __ jmpb(L_copy_64_bytes); + __ align(16); + __ BIND(L_copy_64_bytes_loop); + + if(UseUnalignedLoadStores) { + __ movdqu(xmm0, Address(from, 0)); + __ movdqu(Address(from, to_from, Address::times_1, 0), xmm0); + __ movdqu(xmm1, Address(from, 16)); + __ movdqu(Address(from, to_from, Address::times_1, 16), xmm1); + __ movdqu(xmm2, Address(from, 32)); + __ movdqu(Address(from, to_from, Address::times_1, 32), xmm2); + __ movdqu(xmm3, Address(from, 48)); + __ movdqu(Address(from, to_from, Address::times_1, 48), xmm3); + + } else { + __ movq(xmm0, Address(from, 0)); + __ movq(Address(from, to_from, Address::times_1, 0), xmm0); + __ movq(xmm1, Address(from, 8)); + __ movq(Address(from, to_from, Address::times_1, 8), xmm1); + __ movq(xmm2, Address(from, 16)); + __ movq(Address(from, to_from, Address::times_1, 16), xmm2); + __ movq(xmm3, Address(from, 24)); + __ movq(Address(from, to_from, Address::times_1, 24), xmm3); + __ movq(xmm4, Address(from, 32)); + __ movq(Address(from, to_from, Address::times_1, 32), xmm4); + __ movq(xmm5, Address(from, 40)); + __ movq(Address(from, to_from, Address::times_1, 40), xmm5); + __ movq(xmm6, Address(from, 48)); + __ movq(Address(from, to_from, Address::times_1, 48), xmm6); + __ movq(xmm7, Address(from, 56)); + __ movq(Address(from, to_from, Address::times_1, 56), xmm7); + } + + __ addl(from, 64); + __ BIND(L_copy_64_bytes); + __ subl(qword_count, 8); + __ jcc(Assembler::greaterEqual, L_copy_64_bytes_loop); + __ addl(qword_count, 8); + __ jccb(Assembler::zero, L_exit); + // + // length is too short, just copy qwords + // + __ BIND(L_copy_8_bytes); + __ movq(xmm0, Address(from, 0)); + __ movq(Address(from, to_from, Address::times_1), xmm0); + __ addl(from, 8); + __ decrement(qword_count); + __ jcc(Assembler::greater, L_copy_8_bytes); + __ BIND(L_exit); + } + // Copy 64 bytes chunks // // Inputs: @@ -803,6 +862,7 @@ class StubGenerator: public StubCodeGenerator { // qword_count - 8-bytes element count, negative // void mmx_copy_forward(Register from, Register to_from, Register qword_count) { + assert( VM_Version::supports_mmx(), "supported cpu only" ); Label L_copy_64_bytes_loop, L_copy_64_bytes, L_copy_8_bytes, L_exit; // Copy 64-byte chunks __ jmpb(L_copy_64_bytes); @@ -880,7 +940,7 @@ class StubGenerator: public StubCodeGenerator { __ subptr(to, from); // to --> to_from __ cmpl(count, 2< to_from if (VM_Version::supports_mmx()) { - mmx_copy_forward(from, to_from, count); + if (UseXMMForArrayCopy) { + xmm_copy_forward(from, to_from, count); + } else { + mmx_copy_forward(from, to_from, count); + } } else { __ jmpb(L_copy_8_bytes); __ align(16); @@ -1200,8 +1277,13 @@ class StubGenerator: public StubCodeGenerator { __ align(16); __ BIND(L_copy_8_bytes_loop); if (VM_Version::supports_mmx()) { - __ movq(mmx0, Address(from, count, Address::times_8)); - __ movq(Address(to, count, Address::times_8), mmx0); + if (UseXMMForArrayCopy) { + __ movq(xmm0, Address(from, count, Address::times_8)); + __ movq(Address(to, count, Address::times_8), xmm0); + } else { + __ movq(mmx0, Address(from, count, Address::times_8)); + __ movq(Address(to, count, Address::times_8), mmx0); + } } else { __ fild_d(Address(from, count, Address::times_8)); __ fistp_d(Address(to, count, Address::times_8)); @@ -1210,7 +1292,7 @@ class StubGenerator: public StubCodeGenerator { __ decrement(count); __ jcc(Assembler::greaterEqual, L_copy_8_bytes_loop); - if (VM_Version::supports_mmx()) { + if (VM_Version::supports_mmx() && !UseXMMForArrayCopy) { __ emms(); } inc_copy_counter_np(T_LONG); @@ -1378,9 +1460,9 @@ class StubGenerator: public StubCodeGenerator { Address elem_klass_addr(elem, oopDesc::klass_offset_in_bytes()); // Copy from low to high addresses, indexed from the end of each array. + gen_write_ref_array_pre_barrier(to, count); __ lea(end_from, end_from_addr); __ lea(end_to, end_to_addr); - gen_write_ref_array_pre_barrier(to, count); assert(length == count, ""); // else fix next line: __ negptr(count); // negate and test the length __ jccb(Assembler::notZero, L_load_element); diff --git a/hotspot/src/cpu/x86/vm/stubGenerator_x86_64.cpp b/hotspot/src/cpu/x86/vm/stubGenerator_x86_64.cpp index 6964a1eb048..33f6e88ee6d 100644 --- a/hotspot/src/cpu/x86/vm/stubGenerator_x86_64.cpp +++ b/hotspot/src/cpu/x86/vm/stubGenerator_x86_64.cpp @@ -1153,18 +1153,26 @@ class StubGenerator: public StubCodeGenerator { // Destroy no registers! // void gen_write_ref_array_pre_barrier(Register addr, Register count) { -#if 0 // G1 - only - assert_different_registers(addr, c_rarg1); - assert_different_registers(count, c_rarg0); BarrierSet* bs = Universe::heap()->barrier_set(); switch (bs->kind()) { case BarrierSet::G1SATBCT: case BarrierSet::G1SATBCTLogging: { __ pusha(); // push registers - __ movptr(c_rarg0, addr); - __ movptr(c_rarg1, count); - __ call(RuntimeAddress(BarrierSet::static_write_ref_array_pre)); + if (count == c_rarg0) { + if (addr == c_rarg1) { + // exactly backwards!! + __ xchgptr(c_rarg1, c_rarg0); + } else { + __ movptr(c_rarg1, count); + __ movptr(c_rarg0, addr); + } + + } else { + __ movptr(c_rarg0, addr); + __ movptr(c_rarg1, count); + } + __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, BarrierSet::static_write_ref_array_pre))); __ popa(); } break; @@ -1172,11 +1180,10 @@ class StubGenerator: public StubCodeGenerator { case BarrierSet::CardTableExtension: case BarrierSet::ModRef: break; - default : + default: ShouldNotReachHere(); } -#endif // 0 G1 - only } // @@ -1193,7 +1200,6 @@ class StubGenerator: public StubCodeGenerator { assert_different_registers(start, end, scratch); BarrierSet* bs = Universe::heap()->barrier_set(); switch (bs->kind()) { -#if 0 // G1 - only case BarrierSet::G1SATBCT: case BarrierSet::G1SATBCTLogging: @@ -1206,11 +1212,10 @@ class StubGenerator: public StubCodeGenerator { __ shrptr(scratch, LogBytesPerWord); __ mov(c_rarg0, start); __ mov(c_rarg1, scratch); - __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, BarrierSet::static_write_ref_array_post)); + __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, BarrierSet::static_write_ref_array_post))); __ popa(); } break; -#endif // 0 G1 - only case BarrierSet::CardTableModRef: case BarrierSet::CardTableExtension: { @@ -1239,8 +1244,13 @@ class StubGenerator: public StubCodeGenerator { __ decrement(count); __ jcc(Assembler::greaterEqual, L_loop); } - } - } + break; + default: + ShouldNotReachHere(); + + } + } + // Copy big chunks forward // @@ -1259,14 +1269,22 @@ class StubGenerator: public StubCodeGenerator { Label L_loop; __ align(16); __ BIND(L_loop); - __ movq(to, Address(end_from, qword_count, Address::times_8, -24)); - __ movq(Address(end_to, qword_count, Address::times_8, -24), to); - __ movq(to, Address(end_from, qword_count, Address::times_8, -16)); - __ movq(Address(end_to, qword_count, Address::times_8, -16), to); - __ movq(to, Address(end_from, qword_count, Address::times_8, - 8)); - __ movq(Address(end_to, qword_count, Address::times_8, - 8), to); - __ movq(to, Address(end_from, qword_count, Address::times_8, - 0)); - __ movq(Address(end_to, qword_count, Address::times_8, - 0), to); + if(UseUnalignedLoadStores) { + __ movdqu(xmm0, Address(end_from, qword_count, Address::times_8, -24)); + __ movdqu(Address(end_to, qword_count, Address::times_8, -24), xmm0); + __ movdqu(xmm1, Address(end_from, qword_count, Address::times_8, - 8)); + __ movdqu(Address(end_to, qword_count, Address::times_8, - 8), xmm1); + + } else { + __ movq(to, Address(end_from, qword_count, Address::times_8, -24)); + __ movq(Address(end_to, qword_count, Address::times_8, -24), to); + __ movq(to, Address(end_from, qword_count, Address::times_8, -16)); + __ movq(Address(end_to, qword_count, Address::times_8, -16), to); + __ movq(to, Address(end_from, qword_count, Address::times_8, - 8)); + __ movq(Address(end_to, qword_count, Address::times_8, - 8), to); + __ movq(to, Address(end_from, qword_count, Address::times_8, - 0)); + __ movq(Address(end_to, qword_count, Address::times_8, - 0), to); + } __ BIND(L_copy_32_bytes); __ addptr(qword_count, 4); __ jcc(Assembler::lessEqual, L_loop); @@ -1292,14 +1310,22 @@ class StubGenerator: public StubCodeGenerator { Label L_loop; __ align(16); __ BIND(L_loop); - __ movq(to, Address(from, qword_count, Address::times_8, 24)); - __ movq(Address(dest, qword_count, Address::times_8, 24), to); - __ movq(to, Address(from, qword_count, Address::times_8, 16)); - __ movq(Address(dest, qword_count, Address::times_8, 16), to); - __ movq(to, Address(from, qword_count, Address::times_8, 8)); - __ movq(Address(dest, qword_count, Address::times_8, 8), to); - __ movq(to, Address(from, qword_count, Address::times_8, 0)); - __ movq(Address(dest, qword_count, Address::times_8, 0), to); + if(UseUnalignedLoadStores) { + __ movdqu(xmm0, Address(from, qword_count, Address::times_8, 16)); + __ movdqu(Address(dest, qword_count, Address::times_8, 16), xmm0); + __ movdqu(xmm1, Address(from, qword_count, Address::times_8, 0)); + __ movdqu(Address(dest, qword_count, Address::times_8, 0), xmm1); + + } else { + __ movq(to, Address(from, qword_count, Address::times_8, 24)); + __ movq(Address(dest, qword_count, Address::times_8, 24), to); + __ movq(to, Address(from, qword_count, Address::times_8, 16)); + __ movq(Address(dest, qword_count, Address::times_8, 16), to); + __ movq(to, Address(from, qword_count, Address::times_8, 8)); + __ movq(Address(dest, qword_count, Address::times_8, 8), to); + __ movq(to, Address(from, qword_count, Address::times_8, 0)); + __ movq(Address(dest, qword_count, Address::times_8, 0), to); + } __ BIND(L_copy_32_bytes); __ subptr(qword_count, 4); __ jcc(Assembler::greaterEqual, L_loop); @@ -2282,7 +2308,7 @@ class StubGenerator: public StubCodeGenerator { // and report their number to the caller. assert_different_registers(rax, r14_length, count, to, end_to, rcx); __ lea(end_to, to_element_addr); - gen_write_ref_array_post_barrier(to, end_to, rcx); + gen_write_ref_array_post_barrier(to, end_to, rscratch1); __ movptr(rax, r14_length); // original oops __ addptr(rax, count); // K = (original - remaining) oops __ notptr(rax); // report (-1^K) to caller @@ -2291,7 +2317,7 @@ class StubGenerator: public StubCodeGenerator { // Come here on success only. __ BIND(L_do_card_marks); __ addptr(end_to, -wordSize); // make an inclusive end pointer - gen_write_ref_array_post_barrier(to, end_to, rcx); + gen_write_ref_array_post_barrier(to, end_to, rscratch1); __ xorptr(rax, rax); // return 0 on success // Common exit point (success or failure). diff --git a/hotspot/src/cpu/x86/vm/templateTable_x86_32.cpp b/hotspot/src/cpu/x86/vm/templateTable_x86_32.cpp index 31c6975ec64..5acd696b2b0 100644 --- a/hotspot/src/cpu/x86/vm/templateTable_x86_32.cpp +++ b/hotspot/src/cpu/x86/vm/templateTable_x86_32.cpp @@ -107,6 +107,78 @@ static Assembler::Condition j_not(TemplateTable::Condition cc) { //---------------------------------------------------------------------------------------------------- // Miscelaneous helper routines +// Store an oop (or NULL) at the address described by obj. +// If val == noreg this means store a NULL + +static void do_oop_store(InterpreterMacroAssembler* _masm, + Address obj, + Register val, + BarrierSet::Name barrier, + bool precise) { + assert(val == noreg || val == rax, "parameter is just for looks"); + switch (barrier) { +#ifndef SERIALGC + case BarrierSet::G1SATBCT: + case BarrierSet::G1SATBCTLogging: + { + // flatten object address if needed + // We do it regardless of precise because we need the registers + if (obj.index() == noreg && obj.disp() == 0) { + if (obj.base() != rdx) { + __ movl(rdx, obj.base()); + } + } else { + __ leal(rdx, obj); + } + __ get_thread(rcx); + __ save_bcp(); + __ g1_write_barrier_pre(rdx, rcx, rsi, rbx, val != noreg); + + // Do the actual store + // noreg means NULL + if (val == noreg) { + __ movl(Address(rdx, 0), NULL_WORD); + // No post barrier for NULL + } else { + __ movl(Address(rdx, 0), val); + __ g1_write_barrier_post(rdx, rax, rcx, rbx, rsi); + } + __ restore_bcp(); + + } + break; +#endif // SERIALGC + case BarrierSet::CardTableModRef: + case BarrierSet::CardTableExtension: + { + if (val == noreg) { + __ movl(obj, NULL_WORD); + } else { + __ movl(obj, val); + // flatten object address if needed + if (!precise || (obj.index() == noreg && obj.disp() == 0)) { + __ store_check(obj.base()); + } else { + __ leal(rdx, obj); + __ store_check(rdx); + } + } + } + break; + case BarrierSet::ModRef: + case BarrierSet::Other: + if (val == noreg) { + __ movl(obj, NULL_WORD); + } else { + __ movl(obj, val); + } + break; + default : + ShouldNotReachHere(); + + } +} + Address TemplateTable::at_bcp(int offset) { assert(_desc->uses_bcp(), "inconsistent uses_bcp information"); return Address(rsi, offset); @@ -876,6 +948,8 @@ void TemplateTable::aastore() { __ movptr(rax, at_tos()); // Value __ movl(rcx, at_tos_p1()); // Index __ movptr(rdx, at_tos_p2()); // Array + + Address element_address(rdx, rcx, Address::times_4, arrayOopDesc::base_offset_in_bytes(T_OBJECT)); index_check_without_pop(rdx, rcx); // kills rbx, // do array store check - check for NULL value first __ testptr(rax, rax); @@ -887,7 +961,7 @@ void TemplateTable::aastore() { __ movptr(rax, Address(rdx, oopDesc::klass_offset_in_bytes())); __ movptr(rax, Address(rax, sizeof(oopDesc) + objArrayKlass::element_klass_offset_in_bytes())); // Compress array+index*wordSize+12 into a single register. Frees ECX. - __ lea(rdx, Address(rdx, rcx, Address::times_ptr, arrayOopDesc::base_offset_in_bytes(T_OBJECT))); + __ lea(rdx, element_address); // Generate subtype check. Blows ECX. Resets EDI to locals. // Superklass in EAX. Subklass in EBX. @@ -899,15 +973,20 @@ void TemplateTable::aastore() { // Come here on success __ bind(ok_is_subtype); - __ movptr(rax, at_rsp()); // Value - __ movptr(Address(rdx, 0), rax); - __ store_check(rdx); - __ jmpb(done); + + // Get the value to store + __ movptr(rax, at_rsp()); + // and store it with appropriate barrier + do_oop_store(_masm, Address(rdx, 0), rax, _bs->kind(), true); + + __ jmp(done); // Have a NULL in EAX, EDX=array, ECX=index. Store NULL at ary[idx] __ bind(is_null); __ profile_null_seen(rbx); - __ movptr(Address(rdx, rcx, Address::times_ptr, arrayOopDesc::base_offset_in_bytes(T_OBJECT)), rax); + + // Store NULL, (noreg means NULL to do_oop_store) + do_oop_store(_masm, element_address, noreg, _bs->kind(), true); // Pop stack arguments __ bind(done); @@ -1515,7 +1594,7 @@ void TemplateTable::branch(bool is_jsr, bool is_wide) { // compute return address as bci in rax, __ lea(rax, at_bcp((is_wide ? 5 : 3) - in_bytes(constMethodOopDesc::codes_offset()))); __ subptr(rax, Address(rcx, methodOopDesc::const_offset())); - // Adjust the bcp in ESI by the displacement in EDX + // Adjust the bcp in RSI by the displacement in EDX __ addptr(rsi, rdx); // Push return address __ push_i(rax); @@ -1526,7 +1605,7 @@ void TemplateTable::branch(bool is_jsr, bool is_wide) { // Normal (non-jsr) branch handling - // Adjust the bcp in ESI by the displacement in EDX + // Adjust the bcp in RSI by the displacement in EDX __ addptr(rsi, rdx); assert(UseLoopCounter || !UseOnStackReplacement, "on-stack-replacement requires loop counters"); @@ -2439,11 +2518,12 @@ void TemplateTable::putfield_or_static(int byte_no, bool is_static) { __ pop(atos); if (!is_static) pop_and_check_object(obj); - __ movptr(lo, rax ); - __ store_check(obj, lo); // Need to mark card + do_oop_store(_masm, lo, rax, _bs->kind(), false); + if (!is_static) { patch_bytecode(Bytecodes::_fast_aputfield, rcx, rbx); } + __ jmp(Done); __ bind(notObj); @@ -2664,7 +2744,10 @@ void TemplateTable::fast_storefield(TosState state) { break; case Bytecodes::_fast_fputfield: __ fstp_s(lo); break; case Bytecodes::_fast_dputfield: __ fstp_d(lo); break; - case Bytecodes::_fast_aputfield: __ movptr(lo, rax); __ store_check(rcx, lo); break; + case Bytecodes::_fast_aputfield: { + do_oop_store(_masm, lo, rax, _bs->kind(), false); + break; + } default: ShouldNotReachHere(); } @@ -2672,7 +2755,8 @@ void TemplateTable::fast_storefield(TosState state) { Label done; volatile_barrier(Assembler::Membar_mask_bits(Assembler::StoreLoad | Assembler::StoreStore)); - __ jmpb(done); + // Barriers are so large that short branch doesn't reach! + __ jmp(done); // Same code as above, but don't need rdx to test for volatile. __ bind(notVolatile); @@ -2694,7 +2778,10 @@ void TemplateTable::fast_storefield(TosState state) { break; case Bytecodes::_fast_fputfield: __ fstp_s(lo); break; case Bytecodes::_fast_dputfield: __ fstp_d(lo); break; - case Bytecodes::_fast_aputfield: __ movptr(lo, rax); __ store_check(rcx, lo); break; + case Bytecodes::_fast_aputfield: { + do_oop_store(_masm, lo, rax, _bs->kind(), false); + break; + } default: ShouldNotReachHere(); } @@ -3054,8 +3141,6 @@ void TemplateTable::_new() { Label initialize_object; // including clearing the fields Label allocate_shared; - ExternalAddress heap_top((address)Universe::heap()->top_addr()); - __ get_cpool_and_tags(rcx, rax); // get instanceKlass __ movptr(rcx, Address(rcx, rdx, Address::times_ptr, sizeof(constantPoolOopDesc))); @@ -3112,6 +3197,8 @@ void TemplateTable::_new() { if (allow_shared_alloc) { __ bind(allocate_shared); + ExternalAddress heap_top((address)Universe::heap()->top_addr()); + Label retry; __ bind(retry); __ movptr(rax, heap_top); diff --git a/hotspot/src/cpu/x86/vm/templateTable_x86_64.cpp b/hotspot/src/cpu/x86/vm/templateTable_x86_64.cpp index b239d635b43..e4b4cb96980 100644 --- a/hotspot/src/cpu/x86/vm/templateTable_x86_64.cpp +++ b/hotspot/src/cpu/x86/vm/templateTable_x86_64.cpp @@ -115,6 +115,69 @@ static Assembler::Condition j_not(TemplateTable::Condition cc) { // Miscelaneous helper routines +// Store an oop (or NULL) at the address described by obj. +// If val == noreg this means store a NULL + +static void do_oop_store(InterpreterMacroAssembler* _masm, + Address obj, + Register val, + BarrierSet::Name barrier, + bool precise) { + assert(val == noreg || val == rax, "parameter is just for looks"); + switch (barrier) { +#ifndef SERIALGC + case BarrierSet::G1SATBCT: + case BarrierSet::G1SATBCTLogging: + { + // flatten object address if needed + if (obj.index() == noreg && obj.disp() == 0) { + if (obj.base() != rdx) { + __ movq(rdx, obj.base()); + } + } else { + __ leaq(rdx, obj); + } + __ g1_write_barrier_pre(rdx, r8, rbx, val != noreg); + if (val == noreg) { + __ store_heap_oop(Address(rdx, 0), NULL_WORD); + } else { + __ store_heap_oop(Address(rdx, 0), val); + __ g1_write_barrier_post(rdx, val, r8, rbx); + } + + } + break; +#endif // SERIALGC + case BarrierSet::CardTableModRef: + case BarrierSet::CardTableExtension: + { + if (val == noreg) { + __ store_heap_oop(obj, NULL_WORD); + } else { + __ store_heap_oop(obj, val); + // flatten object address if needed + if (!precise || (obj.index() == noreg && obj.disp() == 0)) { + __ store_check(obj.base()); + } else { + __ leaq(rdx, obj); + __ store_check(rdx); + } + } + } + break; + case BarrierSet::ModRef: + case BarrierSet::Other: + if (val == noreg) { + __ store_heap_oop(obj, NULL_WORD); + } else { + __ store_heap_oop(obj, val); + } + break; + default : + ShouldNotReachHere(); + + } +} Address TemplateTable::at_bcp(int offset) { assert(_desc->uses_bcp(), "inconsistent uses_bcp information"); @@ -560,8 +623,8 @@ void TemplateTable::aaload() { // rdx: array index_check(rdx, rax); // kills rbx __ load_heap_oop(rax, Address(rdx, rax, - UseCompressedOops ? Address::times_4 : Address::times_8, - arrayOopDesc::base_offset_in_bytes(T_OBJECT))); + UseCompressedOops ? Address::times_4 : Address::times_8, + arrayOopDesc::base_offset_in_bytes(T_OBJECT))); } void TemplateTable::baload() { @@ -866,6 +929,11 @@ void TemplateTable::aastore() { __ movptr(rax, at_tos()); // value __ movl(rcx, at_tos_p1()); // index __ movptr(rdx, at_tos_p2()); // array + + Address element_address(rdx, rcx, + UseCompressedOops? Address::times_4 : Address::times_8, + arrayOopDesc::base_offset_in_bytes(T_OBJECT)); + index_check(rdx, rcx); // kills rbx // do array store check - check for NULL value first __ testptr(rax, rax); @@ -879,9 +947,7 @@ void TemplateTable::aastore() { sizeof(oopDesc) + objArrayKlass::element_klass_offset_in_bytes())); // Compress array + index*oopSize + 12 into a single register. Frees rcx. - __ lea(rdx, Address(rdx, rcx, - UseCompressedOops ? Address::times_4 : Address::times_8, - arrayOopDesc::base_offset_in_bytes(T_OBJECT))); + __ lea(rdx, element_address); // Generate subtype check. Blows rcx, rdi // Superklass in rax. Subklass in rbx. @@ -893,18 +959,19 @@ void TemplateTable::aastore() { // Come here on success __ bind(ok_is_subtype); - __ movptr(rax, at_tos()); // Value - __ store_heap_oop(Address(rdx, 0), rax); - __ store_check(rdx); + + // Get the value we will store + __ movptr(rax, at_tos()); + // Now store using the appropriate barrier + do_oop_store(_masm, Address(rdx, 0), rax, _bs->kind(), true); __ jmp(done); // Have a NULL in rax, rdx=array, ecx=index. Store NULL at ary[idx] __ bind(is_null); __ profile_null_seen(rbx); - __ store_heap_oop(Address(rdx, rcx, - UseCompressedOops ? Address::times_4 : Address::times_8, - arrayOopDesc::base_offset_in_bytes(T_OBJECT)), - rax); + + // Store a NULL + do_oop_store(_masm, element_address, noreg, _bs->kind(), true); // Pop stack arguments __ bind(done); @@ -2396,8 +2463,10 @@ void TemplateTable::putfield_or_static(int byte_no, bool is_static) { // atos __ pop(atos); if (!is_static) pop_and_check_object(obj); - __ store_heap_oop(field, rax); - __ store_check(obj, field); // Need to mark card + + // Store into the field + do_oop_store(_masm, field, rax, _bs->kind(), false); + if (!is_static) { patch_bytecode(Bytecodes::_fast_aputfield, bc, rbx); } @@ -2584,8 +2653,7 @@ void TemplateTable::fast_storefield(TosState state) { // access field switch (bytecode()) { case Bytecodes::_fast_aputfield: - __ store_heap_oop(field, rax); - __ store_check(rcx, field); + do_oop_store(_masm, field, rax, _bs->kind(), false); break; case Bytecodes::_fast_lputfield: __ movq(field, rax); @@ -3044,8 +3112,6 @@ void TemplateTable::_new() { Label initialize_header; Label initialize_object; // including clearing the fields Label allocate_shared; - ExternalAddress top((address)Universe::heap()->top_addr()); - ExternalAddress end((address)Universe::heap()->end_addr()); __ get_cpool_and_tags(rsi, rax); // get instanceKlass @@ -3106,6 +3172,9 @@ void TemplateTable::_new() { if (allow_shared_alloc) { __ bind(allocate_shared); + ExternalAddress top((address)Universe::heap()->top_addr()); + ExternalAddress end((address)Universe::heap()->end_addr()); + const Register RtopAddr = rscratch1; const Register RendAddr = rscratch2; diff --git a/hotspot/src/cpu/x86/vm/vm_version_x86_32.cpp b/hotspot/src/cpu/x86/vm/vm_version_x86_32.cpp index 839a4cdaeda..edd1da4e31f 100644 --- a/hotspot/src/cpu/x86/vm/vm_version_x86_32.cpp +++ b/hotspot/src/cpu/x86/vm/vm_version_x86_32.cpp @@ -242,9 +242,11 @@ void VM_Version::get_processor_features() { _supports_cx8 = supports_cmpxchg8(); // if the OS doesn't support SSE, we can't use this feature even if the HW does if( !os::supports_sse()) - _cpuFeatures &= ~(CPU_SSE|CPU_SSE2|CPU_SSE3|CPU_SSSE3|CPU_SSE4|CPU_SSE4A); - if (UseSSE < 4) - _cpuFeatures &= ~CPU_SSE4; + _cpuFeatures &= ~(CPU_SSE|CPU_SSE2|CPU_SSE3|CPU_SSSE3|CPU_SSE4A|CPU_SSE4_1|CPU_SSE4_2); + if (UseSSE < 4) { + _cpuFeatures &= ~CPU_SSE4_1; + _cpuFeatures &= ~CPU_SSE4_2; + } if (UseSSE < 3) { _cpuFeatures &= ~CPU_SSE3; _cpuFeatures &= ~CPU_SSSE3; @@ -261,7 +263,7 @@ void VM_Version::get_processor_features() { } char buf[256]; - jio_snprintf(buf, sizeof(buf), "(%u cores per cpu, %u threads per core) family %d model %d stepping %d%s%s%s%s%s%s%s%s%s%s%s%s%s%s", + jio_snprintf(buf, sizeof(buf), "(%u cores per cpu, %u threads per core) family %d model %d stepping %d%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s", cores_per_cpu(), threads_per_core(), cpu_family(), _model, _stepping, (supports_cmov() ? ", cmov" : ""), @@ -272,7 +274,8 @@ void VM_Version::get_processor_features() { (supports_sse2() ? ", sse2" : ""), (supports_sse3() ? ", sse3" : ""), (supports_ssse3()? ", ssse3": ""), - (supports_sse4() ? ", sse4" : ""), + (supports_sse4_1() ? ", sse4.1" : ""), + (supports_sse4_2() ? ", sse4.2" : ""), (supports_mmx_ext() ? ", mmxext" : ""), (supports_3dnow() ? ", 3dnow" : ""), (supports_3dnow2() ? ", 3dnowext" : ""), @@ -285,7 +288,7 @@ void VM_Version::get_processor_features() { // older Pentiums which do not support it. if( UseSSE > 4 ) UseSSE=4; if( UseSSE < 0 ) UseSSE=0; - if( !supports_sse4() ) // Drop to 3 if no SSE4 support + if( !supports_sse4_1() ) // Drop to 3 if no SSE4 support UseSSE = MIN2((intx)3,UseSSE); if( !supports_sse3() ) // Drop to 2 if no SSE3 support UseSSE = MIN2((intx)2,UseSSE); @@ -375,6 +378,14 @@ void VM_Version::get_processor_features() { MaxLoopPad = 11; } #endif // COMPILER2 + if( FLAG_IS_DEFAULT(UseXMMForArrayCopy) ) { + UseXMMForArrayCopy = true; // use SSE2 movq on new Intel cpus + } + if( supports_sse4_2() && supports_ht() ) { // Newest Intel cpus + if( FLAG_IS_DEFAULT(UseUnalignedLoadStores) && UseXMMForArrayCopy ) { + UseUnalignedLoadStores = true; // use movdqu on newest Intel cpus + } + } } } @@ -413,7 +424,7 @@ void VM_Version::get_processor_features() { #ifndef PRODUCT if (PrintMiscellaneous && Verbose) { - tty->print_cr("Logical CPUs per package: %u", + tty->print_cr("Logical CPUs per core: %u", logical_processors_per_package()); tty->print_cr("UseSSE=%d",UseSSE); tty->print("Allocation: "); diff --git a/hotspot/src/cpu/x86/vm/vm_version_x86_32.hpp b/hotspot/src/cpu/x86/vm/vm_version_x86_32.hpp index 361bfb2eb11..b37b4f4e516 100644 --- a/hotspot/src/cpu/x86/vm/vm_version_x86_32.hpp +++ b/hotspot/src/cpu/x86/vm/vm_version_x86_32.hpp @@ -68,9 +68,9 @@ public: cmpxchg16: 1, : 4, dca : 1, - : 4, - popcnt : 1, - : 8; + sse4_1 : 1, + sse4_2 : 1, + : 11; } bits; }; @@ -177,8 +177,9 @@ protected: CPU_SSE2 = (1 << 7), CPU_SSE3 = (1 << 8), // sse3 comes from cpuid 1 (ECX) CPU_SSSE3= (1 << 9), - CPU_SSE4 = (1 <<10), - CPU_SSE4A= (1 <<11) + CPU_SSE4A= (1 <<10), + CPU_SSE4_1 = (1 << 11), + CPU_SSE4_2 = (1 << 12) } cpuFeatureFlags; // cpuid information block. All info derived from executing cpuid with @@ -240,22 +241,14 @@ protected: static CpuidInfo _cpuid_info; // Extractors and predicates - static bool is_extended_cpu_family() { - const uint32_t Extended_Cpu_Family = 0xf; - return _cpuid_info.std_cpuid1_rax.bits.family == Extended_Cpu_Family; - } static uint32_t extended_cpu_family() { uint32_t result = _cpuid_info.std_cpuid1_rax.bits.family; - if (is_extended_cpu_family()) { - result += _cpuid_info.std_cpuid1_rax.bits.ext_family; - } + result += _cpuid_info.std_cpuid1_rax.bits.ext_family; return result; } static uint32_t extended_cpu_model() { uint32_t result = _cpuid_info.std_cpuid1_rax.bits.model; - if (is_extended_cpu_family()) { - result |= _cpuid_info.std_cpuid1_rax.bits.ext_model << 4; - } + result |= _cpuid_info.std_cpuid1_rax.bits.ext_model << 4; return result; } static uint32_t cpu_stepping() { @@ -293,6 +286,10 @@ protected: result |= CPU_SSSE3; if (is_amd() && _cpuid_info.ext_cpuid1_rcx.bits.sse4a != 0) result |= CPU_SSE4A; + if (_cpuid_info.std_cpuid1_rcx.bits.sse4_1 != 0) + result |= CPU_SSE4_1; + if (_cpuid_info.std_cpuid1_rcx.bits.sse4_2 != 0) + result |= CPU_SSE4_2; return result; } @@ -380,7 +377,8 @@ public: static bool supports_sse2() { return (_cpuFeatures & CPU_SSE2) != 0; } static bool supports_sse3() { return (_cpuFeatures & CPU_SSE3) != 0; } static bool supports_ssse3() { return (_cpuFeatures & CPU_SSSE3)!= 0; } - static bool supports_sse4() { return (_cpuFeatures & CPU_SSE4) != 0; } + static bool supports_sse4_1() { return (_cpuFeatures & CPU_SSE4_1) != 0; } + static bool supports_sse4_2() { return (_cpuFeatures & CPU_SSE4_2) != 0; } // // AMD features // diff --git a/hotspot/src/cpu/x86/vm/vm_version_x86_64.cpp b/hotspot/src/cpu/x86/vm/vm_version_x86_64.cpp index 709d82e6e68..7994aab7a78 100644 --- a/hotspot/src/cpu/x86/vm/vm_version_x86_64.cpp +++ b/hotspot/src/cpu/x86/vm/vm_version_x86_64.cpp @@ -186,8 +186,10 @@ void VM_Version::get_processor_features() { if (!VM_Version::supports_sse2()) { vm_exit_during_initialization("Unknown x64 processor: SSE2 not supported"); } - if (UseSSE < 4) - _cpuFeatures &= ~CPU_SSE4; + if (UseSSE < 4) { + _cpuFeatures &= ~CPU_SSE4_1; + _cpuFeatures &= ~CPU_SSE4_2; + } if (UseSSE < 3) { _cpuFeatures &= ~CPU_SSE3; _cpuFeatures &= ~CPU_SSSE3; @@ -204,7 +206,7 @@ void VM_Version::get_processor_features() { } char buf[256]; - jio_snprintf(buf, sizeof(buf), "(%u cores per cpu, %u threads per core) family %d model %d stepping %d%s%s%s%s%s%s%s%s%s%s%s%s%s%s", + jio_snprintf(buf, sizeof(buf), "(%u cores per cpu, %u threads per core) family %d model %d stepping %d%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s", cores_per_cpu(), threads_per_core(), cpu_family(), _model, _stepping, (supports_cmov() ? ", cmov" : ""), @@ -215,7 +217,8 @@ void VM_Version::get_processor_features() { (supports_sse2() ? ", sse2" : ""), (supports_sse3() ? ", sse3" : ""), (supports_ssse3()? ", ssse3": ""), - (supports_sse4() ? ", sse4" : ""), + (supports_sse4_1() ? ", sse4.1" : ""), + (supports_sse4_2() ? ", sse4.2" : ""), (supports_mmx_ext() ? ", mmxext" : ""), (supports_3dnow() ? ", 3dnow" : ""), (supports_3dnow2() ? ", 3dnowext" : ""), @@ -228,7 +231,7 @@ void VM_Version::get_processor_features() { // older Pentiums which do not support it. if( UseSSE > 4 ) UseSSE=4; if( UseSSE < 0 ) UseSSE=0; - if( !supports_sse4() ) // Drop to 3 if no SSE4 support + if( !supports_sse4_1() ) // Drop to 3 if no SSE4 support UseSSE = MIN2((intx)3,UseSSE); if( !supports_sse3() ) // Drop to 2 if no SSE3 support UseSSE = MIN2((intx)2,UseSSE); @@ -314,6 +317,14 @@ void VM_Version::get_processor_features() { MaxLoopPad = 11; } #endif // COMPILER2 + if( FLAG_IS_DEFAULT(UseXMMForArrayCopy) ) { + UseXMMForArrayCopy = true; // use SSE2 movq on new Intel cpus + } + if( supports_sse4_2() && supports_ht() ) { // Newest Intel cpus + if( FLAG_IS_DEFAULT(UseUnalignedLoadStores) && UseXMMForArrayCopy ) { + UseUnalignedLoadStores = true; // use movdqu on newest Intel cpus + } + } } } @@ -355,7 +366,7 @@ void VM_Version::get_processor_features() { #ifndef PRODUCT if (PrintMiscellaneous && Verbose) { - tty->print_cr("Logical CPUs per package: %u", + tty->print_cr("Logical CPUs per core: %u", logical_processors_per_package()); tty->print_cr("UseSSE=%d",UseSSE); tty->print("Allocation: "); diff --git a/hotspot/src/cpu/x86/vm/vm_version_x86_64.hpp b/hotspot/src/cpu/x86/vm/vm_version_x86_64.hpp index dc60b370073..37ba2d07558 100644 --- a/hotspot/src/cpu/x86/vm/vm_version_x86_64.hpp +++ b/hotspot/src/cpu/x86/vm/vm_version_x86_64.hpp @@ -68,9 +68,9 @@ public: cmpxchg16: 1, : 4, dca : 1, - : 4, - popcnt : 1, - : 8; + sse4_1 : 1, + sse4_2 : 1, + : 11; } bits; }; @@ -177,8 +177,9 @@ protected: CPU_SSE2 = (1 << 7), CPU_SSE3 = (1 << 8), CPU_SSSE3= (1 << 9), - CPU_SSE4 = (1 <<10), - CPU_SSE4A= (1 <<11) + CPU_SSE4A= (1 <<10), + CPU_SSE4_1 = (1 << 11), + CPU_SSE4_2 = (1 << 12) } cpuFeatureFlags; // cpuid information block. All info derived from executing cpuid with @@ -240,22 +241,14 @@ protected: static CpuidInfo _cpuid_info; // Extractors and predicates - static bool is_extended_cpu_family() { - const uint32_t Extended_Cpu_Family = 0xf; - return _cpuid_info.std_cpuid1_eax.bits.family == Extended_Cpu_Family; - } static uint32_t extended_cpu_family() { uint32_t result = _cpuid_info.std_cpuid1_eax.bits.family; - if (is_extended_cpu_family()) { - result += _cpuid_info.std_cpuid1_eax.bits.ext_family; - } + result += _cpuid_info.std_cpuid1_eax.bits.ext_family; return result; } static uint32_t extended_cpu_model() { uint32_t result = _cpuid_info.std_cpuid1_eax.bits.model; - if (is_extended_cpu_family()) { - result |= _cpuid_info.std_cpuid1_eax.bits.ext_model << 4; - } + result |= _cpuid_info.std_cpuid1_eax.bits.ext_model << 4; return result; } static uint32_t cpu_stepping() { @@ -293,6 +286,10 @@ protected: result |= CPU_SSSE3; if (is_amd() && _cpuid_info.ext_cpuid1_ecx.bits.sse4a != 0) result |= CPU_SSE4A; + if (_cpuid_info.std_cpuid1_ecx.bits.sse4_1 != 0) + result |= CPU_SSE4_1; + if (_cpuid_info.std_cpuid1_ecx.bits.sse4_2 != 0) + result |= CPU_SSE4_2; return result; } @@ -380,7 +377,8 @@ public: static bool supports_sse2() { return (_cpuFeatures & CPU_SSE2) != 0; } static bool supports_sse3() { return (_cpuFeatures & CPU_SSE3) != 0; } static bool supports_ssse3() { return (_cpuFeatures & CPU_SSSE3)!= 0; } - static bool supports_sse4() { return (_cpuFeatures & CPU_SSE4) != 0; } + static bool supports_sse4_1() { return (_cpuFeatures & CPU_SSE4_1) != 0; } + static bool supports_sse4_2() { return (_cpuFeatures & CPU_SSE4_2) != 0; } // // AMD features // diff --git a/hotspot/src/cpu/x86/vm/x86_32.ad b/hotspot/src/cpu/x86/vm/x86_32.ad index a4a4199312d..589051e06bf 100644 --- a/hotspot/src/cpu/x86/vm/x86_32.ad +++ b/hotspot/src/cpu/x86/vm/x86_32.ad @@ -4810,6 +4810,16 @@ operand immL0() %{ interface(CONST_INTER); %} +// Long Immediate zero +operand immL_M1() %{ + predicate( n->get_long() == -1L ); + match(ConL); + op_cost(0); + + format %{ %} + interface(CONST_INTER); +%} + // Long immediate from 0 to 127. // Used for a shorter form of long mul by 10. operand immL_127() %{ @@ -8621,6 +8631,18 @@ instruct xorI_eReg(eRegI dst, eRegI src, eFlagsReg cr) %{ ins_pipe( ialu_reg_reg ); %} +// Xor Register with Immediate -1 +instruct xorI_eReg_im1(eRegI dst, immI_M1 imm) %{ + match(Set dst (XorI dst imm)); + + size(2); + format %{ "NOT $dst" %} + ins_encode %{ + __ notl($dst$$Register); + %} + ins_pipe( ialu_reg ); +%} + // Xor Register with Immediate instruct xorI_eReg_imm(eRegI dst, immI src, eFlagsReg cr) %{ match(Set dst (XorI dst src)); @@ -8938,6 +8960,18 @@ instruct xorl_eReg(eRegL dst, eRegL src, eFlagsReg cr) %{ ins_pipe( ialu_reg_reg_long ); %} +// Xor Long Register with Immediate -1 +instruct xorl_eReg_im1(eRegL dst, immL_M1 imm) %{ + match(Set dst (XorL dst imm)); + format %{ "NOT $dst.lo\n\t" + "NOT $dst.hi" %} + ins_encode %{ + __ notl($dst$$Register); + __ notl(HIGH_FROM_LOW($dst$$Register)); + %} + ins_pipe( ialu_reg_long ); +%} + // Xor Long Register with Immediate instruct xorl_eReg_imm(eRegL dst, immL src, eFlagsReg cr) %{ match(Set dst (XorL dst src)); diff --git a/hotspot/src/cpu/x86/vm/x86_64.ad b/hotspot/src/cpu/x86/vm/x86_64.ad index 62b46da14b0..4245284593d 100644 --- a/hotspot/src/cpu/x86/vm/x86_64.ad +++ b/hotspot/src/cpu/x86/vm/x86_64.ad @@ -9309,6 +9309,17 @@ instruct xorI_rReg(rRegI dst, rRegI src, rFlagsReg cr) ins_pipe(ialu_reg_reg); %} +// Xor Register with Immediate -1 +instruct xorI_rReg_im1(rRegI dst, immI_M1 imm) %{ + match(Set dst (XorI dst imm)); + + format %{ "not $dst" %} + ins_encode %{ + __ notl($dst$$Register); + %} + ins_pipe(ialu_reg); +%} + // Xor Register with Immediate instruct xorI_rReg_imm(rRegI dst, immI src, rFlagsReg cr) %{ @@ -9529,6 +9540,17 @@ instruct xorL_rReg(rRegL dst, rRegL src, rFlagsReg cr) ins_pipe(ialu_reg_reg); %} +// Xor Register with Immediate -1 +instruct xorL_rReg_im1(rRegL dst, immL_M1 imm) %{ + match(Set dst (XorL dst imm)); + + format %{ "notq $dst" %} + ins_encode %{ + __ notq($dst$$Register); + %} + ins_pipe(ialu_reg); +%} + // Xor Register with Immediate instruct xorL_rReg_imm(rRegL dst, immL32 src, rFlagsReg cr) %{ diff --git a/hotspot/src/os/linux/launcher/java.c b/hotspot/src/os/linux/launcher/java.c index d538db1d03d..c68bb41449b 100644 --- a/hotspot/src/os/linux/launcher/java.c +++ b/hotspot/src/os/linux/launcher/java.c @@ -1110,7 +1110,7 @@ static jstring getPlatformEncoding(JNIEnv *env) { if (propname) { jclass cls; jmethodID mid; - NULL_CHECK0 (cls = (*env)->FindClass(env, "java/lang/System")); + NULL_CHECK0 (cls = FindBootStrapClass(env, "java/lang/System")); NULL_CHECK0 (mid = (*env)->GetStaticMethodID( env, cls, "getProperty", @@ -1125,7 +1125,7 @@ static jstring getPlatformEncoding(JNIEnv *env) { static jboolean isEncodingSupported(JNIEnv *env, jstring enc) { jclass cls; jmethodID mid; - NULL_CHECK0 (cls = (*env)->FindClass(env, "java/nio/charset/Charset")); + NULL_CHECK0 (cls = FindBootStrapClass(env, "java/nio/charset/Charset")); NULL_CHECK0 (mid = (*env)->GetStaticMethodID( env, cls, "isSupported", @@ -1161,7 +1161,7 @@ NewPlatformString(JNIEnv *env, char *s) #else if (isEncodingSupported(env, enc) == JNI_TRUE) { #endif - NULL_CHECK0(cls = (*env)->FindClass(env, "java/lang/String")); + NULL_CHECK0(cls = FindBootStrapClass(env, "java/lang/String")); NULL_CHECK0(mid = (*env)->GetMethodID(env, cls, "", "([BLjava/lang/String;)V")); str = (*env)->NewObject(env, cls, mid, ary, enc); @@ -1172,7 +1172,7 @@ NewPlatformString(JNIEnv *env, char *s) the encoding name, in which the StringCoding class will pickup the iso-8859-1 as the fallback converter for us. */ - NULL_CHECK0(cls = (*env)->FindClass(env, "java/lang/String")); + NULL_CHECK0(cls = FindBootStrapClass(env, "java/lang/String")); NULL_CHECK0(mid = (*env)->GetMethodID(env, cls, "", "([B)V")); str = (*env)->NewObject(env, cls, mid, ary); @@ -1195,7 +1195,7 @@ NewPlatformStringArray(JNIEnv *env, char **strv, int strc) jarray ary; int i; - NULL_CHECK0(cls = (*env)->FindClass(env, "java/lang/String")); + NULL_CHECK0(cls = FindBootStrapClass(env, "java/lang/String")); NULL_CHECK0(ary = (*env)->NewObjectArray(env, strc, cls, 0)); for (i = 0; i < strc; i++) { jstring str = NewPlatformString(env, *strv++); @@ -1224,6 +1224,7 @@ LoadClass(JNIEnv *env, char *name) c = *t++; *s++ = (c == '.') ? '/' : c; } while (c != '\0'); + // use the application class loader for main-class cls = (*env)->FindClass(env, buf); free(buf); @@ -1250,7 +1251,7 @@ GetMainClassName(JNIEnv *env, char *jarname) jobject jar, man, attr; jstring str, result = 0; - NULL_CHECK0(cls = (*env)->FindClass(env, "java/util/jar/JarFile")); + NULL_CHECK0(cls = FindBootStrapClass(env, "java/util/jar/JarFile")); NULL_CHECK0(mid = (*env)->GetMethodID(env, cls, "", "(Ljava/lang/String;)V")); NULL_CHECK0(str = NewPlatformString(env, jarname)); @@ -1471,7 +1472,7 @@ PrintJavaVersion(JNIEnv *env) jclass ver; jmethodID print; - NULL_CHECK(ver = (*env)->FindClass(env, "sun/misc/Version")); + NULL_CHECK(ver = FindBootStrapClass(env, "sun/misc/Version")); NULL_CHECK(print = (*env)->GetStaticMethodID(env, ver, "print", "()V")); (*env)->CallStaticVoidMethod(env, ver, print); diff --git a/hotspot/src/os/linux/launcher/java.h b/hotspot/src/os/linux/launcher/java.h index c6f43df9c00..9e4a1a6f623 100644 --- a/hotspot/src/os/linux/launcher/java.h +++ b/hotspot/src/os/linux/launcher/java.h @@ -100,5 +100,15 @@ void* MemAlloc(size_t size); * Make launcher spit debug output. */ extern jboolean _launcher_debug; +/* + * This allows for finding classes from the VM's bootstrap class loader + * directly, FindClass uses the application class loader internally, this will + * cause unnecessary searching of the classpath for the required classes. + */ +typedef jclass (JNICALL FindClassFromBootLoader_t(JNIEnv *env, + const char *name, + jboolean throwError)); + +jclass FindBootStrapClass(JNIEnv *env, const char *classname); #endif /* _JAVA_H_ */ diff --git a/hotspot/src/os/linux/launcher/java_md.c b/hotspot/src/os/linux/launcher/java_md.c index 90c6e62c31b..248df18fc4b 100644 --- a/hotspot/src/os/linux/launcher/java_md.c +++ b/hotspot/src/os/linux/launcher/java_md.c @@ -1826,3 +1826,23 @@ UnsetEnv(char *name) { return(borrowed_unsetenv(name)); } +/* + * The implementation for finding classes from the bootstrap + * class loader, refer to java.h + */ +static FindClassFromBootLoader_t *findBootClass = NULL; + +jclass +FindBootStrapClass(JNIEnv *env, const char* classname) +{ + if (findBootClass == NULL) { + findBootClass = (FindClassFromBootLoader_t *)dlsym(RTLD_DEFAULT, + "JVM_FindClassFromBootLoader"); + if (findBootClass == NULL) { + fprintf(stderr, "Error: could load method JVM_FindClassFromBootLoader"); + return NULL; + } + } + return findBootClass(env, classname, JNI_FALSE); +} + diff --git a/hotspot/src/os/linux/vm/globals_linux.hpp b/hotspot/src/os/linux/vm/globals_linux.hpp index e22b84b0f92..9c4482ab927 100644 --- a/hotspot/src/os/linux/vm/globals_linux.hpp +++ b/hotspot/src/os/linux/vm/globals_linux.hpp @@ -38,5 +38,6 @@ // platforms, but they may have different default values on other platforms. // define_pd_global(bool, UseLargePages, false); +define_pd_global(bool, UseLargePagesIndividualAllocation, false); define_pd_global(bool, UseOSErrorReporting, false); define_pd_global(bool, UseThreadPriorities, true) ; diff --git a/hotspot/src/os/linux/vm/os_linux.cpp b/hotspot/src/os/linux/vm/os_linux.cpp index 551857525cd..3e93e6d5983 100644 --- a/hotspot/src/os/linux/vm/os_linux.cpp +++ b/hotspot/src/os/linux/vm/os_linux.cpp @@ -1261,6 +1261,17 @@ jlong os::elapsed_frequency() { return (1000 * 1000); } +// For now, we say that linux does not support vtime. I have no idea +// whether it can actually be made to (DLD, 9/13/05). + +bool os::supports_vtime() { return false; } +bool os::enable_vtime() { return false; } +bool os::vtime_enabled() { return false; } +double os::elapsedVTime() { + // better than nothing, but not much + return elapsedTime(); +} + jlong os::javaTimeMillis() { timeval time; int status = gettimeofday(&time, NULL); diff --git a/hotspot/src/os/solaris/launcher/java.c b/hotspot/src/os/solaris/launcher/java.c index cb67cc0f088..17a939bb3a4 100644 --- a/hotspot/src/os/solaris/launcher/java.c +++ b/hotspot/src/os/solaris/launcher/java.c @@ -1110,7 +1110,7 @@ static jstring getPlatformEncoding(JNIEnv *env) { if (propname) { jclass cls; jmethodID mid; - NULL_CHECK0 (cls = (*env)->FindClass(env, "java/lang/System")); + NULL_CHECK0 (cls = FindBootStrapClass(env, "java/lang/System")); NULL_CHECK0 (mid = (*env)->GetStaticMethodID( env, cls, "getProperty", @@ -1125,7 +1125,7 @@ static jstring getPlatformEncoding(JNIEnv *env) { static jboolean isEncodingSupported(JNIEnv *env, jstring enc) { jclass cls; jmethodID mid; - NULL_CHECK0 (cls = (*env)->FindClass(env, "java/nio/charset/Charset")); + NULL_CHECK0 (cls = FindBootStrapClass(env, "java/nio/charset/Charset")); NULL_CHECK0 (mid = (*env)->GetStaticMethodID( env, cls, "isSupported", @@ -1161,7 +1161,7 @@ NewPlatformString(JNIEnv *env, char *s) #else if (isEncodingSupported(env, enc) == JNI_TRUE) { #endif - NULL_CHECK0(cls = (*env)->FindClass(env, "java/lang/String")); + NULL_CHECK0(cls = FindBootStrapClass(env, "java/lang/String")); NULL_CHECK0(mid = (*env)->GetMethodID(env, cls, "", "([BLjava/lang/String;)V")); str = (*env)->NewObject(env, cls, mid, ary, enc); @@ -1172,7 +1172,7 @@ NewPlatformString(JNIEnv *env, char *s) the encoding name, in which the StringCoding class will pickup the iso-8859-1 as the fallback converter for us. */ - NULL_CHECK0(cls = (*env)->FindClass(env, "java/lang/String")); + NULL_CHECK0(cls = FindBootStrapClass(env, "java/lang/String")); NULL_CHECK0(mid = (*env)->GetMethodID(env, cls, "", "([B)V")); str = (*env)->NewObject(env, cls, mid, ary); @@ -1195,7 +1195,7 @@ NewPlatformStringArray(JNIEnv *env, char **strv, int strc) jarray ary; int i; - NULL_CHECK0(cls = (*env)->FindClass(env, "java/lang/String")); + NULL_CHECK0(cls = FindBootStrapClass(env, "java/lang/String")); NULL_CHECK0(ary = (*env)->NewObjectArray(env, strc, cls, 0)); for (i = 0; i < strc; i++) { jstring str = NewPlatformString(env, *strv++); @@ -1224,6 +1224,7 @@ LoadClass(JNIEnv *env, char *name) c = *t++; *s++ = (c == '.') ? '/' : c; } while (c != '\0'); + // use the application class loader for the main-class cls = (*env)->FindClass(env, buf); free(buf); @@ -1250,7 +1251,7 @@ GetMainClassName(JNIEnv *env, char *jarname) jobject jar, man, attr; jstring str, result = 0; - NULL_CHECK0(cls = (*env)->FindClass(env, "java/util/jar/JarFile")); + NULL_CHECK0(cls = FindBootStrapClass(env, "java/util/jar/JarFile")); NULL_CHECK0(mid = (*env)->GetMethodID(env, cls, "", "(Ljava/lang/String;)V")); NULL_CHECK0(str = NewPlatformString(env, jarname)); @@ -1471,7 +1472,7 @@ PrintJavaVersion(JNIEnv *env) jclass ver; jmethodID print; - NULL_CHECK(ver = (*env)->FindClass(env, "sun/misc/Version")); + NULL_CHECK(ver = FindBootStrapClass(env, "sun/misc/Version")); NULL_CHECK(print = (*env)->GetStaticMethodID(env, ver, "print", "()V")); (*env)->CallStaticVoidMethod(env, ver, print); diff --git a/hotspot/src/os/solaris/launcher/java.h b/hotspot/src/os/solaris/launcher/java.h index 1e3321b7d4c..3bea15527d2 100644 --- a/hotspot/src/os/solaris/launcher/java.h +++ b/hotspot/src/os/solaris/launcher/java.h @@ -101,4 +101,15 @@ void* MemAlloc(size_t size); */ extern jboolean _launcher_debug; +/* + * This allows for finding classes from the VM's bootstrap class loader + * directly, FindClass uses the application class loader internally, this will + * cause unnecessary searching of the classpath for the required classes. + */ +typedef jclass (JNICALL FindClassFromBootLoader_t(JNIEnv *env, + const char *name, + jboolean throwError)); + +jclass FindBootStrapClass(JNIEnv *env, const char *classname); + #endif /* _JAVA_H_ */ diff --git a/hotspot/src/os/solaris/launcher/java_md.c b/hotspot/src/os/solaris/launcher/java_md.c index 09f8b89ea17..b006d24b676 100644 --- a/hotspot/src/os/solaris/launcher/java_md.c +++ b/hotspot/src/os/solaris/launcher/java_md.c @@ -1826,3 +1826,24 @@ UnsetEnv(char *name) { return(borrowed_unsetenv(name)); } + +/* + * The implementation for finding classes from the bootstrap + * class loader, refer to java.h + */ +static FindClassFromBootLoader_t *findBootClass = NULL; + +jclass +FindBootStrapClass(JNIEnv *env, const char* classname) +{ + if (findBootClass == NULL) { + findBootClass = (FindClassFromBootLoader_t *)dlsym(RTLD_DEFAULT, + "JVM_FindClassFromBootLoader"); + if (findBootClass == NULL) { + fprintf(stderr, "Error: could not load method JVM_FindClassFromBootLoader"); + return NULL; + } + } + return findBootClass(env, classname, JNI_FALSE); +} + diff --git a/hotspot/src/os/solaris/vm/globals_solaris.hpp b/hotspot/src/os/solaris/vm/globals_solaris.hpp index 8f00adbe2a3..85fcc8c8752 100644 --- a/hotspot/src/os/solaris/vm/globals_solaris.hpp +++ b/hotspot/src/os/solaris/vm/globals_solaris.hpp @@ -44,5 +44,6 @@ // platforms, but they may have different default values on other platforms. // define_pd_global(bool, UseLargePages, true); +define_pd_global(bool, UseLargePagesIndividualAllocation, false); define_pd_global(bool, UseOSErrorReporting, false); define_pd_global(bool, UseThreadPriorities, false); diff --git a/hotspot/src/os/solaris/vm/os_solaris.cpp b/hotspot/src/os/solaris/vm/os_solaris.cpp index 0c533f06246..c4386d85988 100644 --- a/hotspot/src/os/solaris/vm/os_solaris.cpp +++ b/hotspot/src/os/solaris/vm/os_solaris.cpp @@ -462,16 +462,14 @@ int os::active_processor_count() { int online_cpus = sysconf(_SC_NPROCESSORS_ONLN); pid_t pid = getpid(); psetid_t pset = PS_NONE; - // Are we running in a processor set? + // Are we running in a processor set or is there any processor set around? if (pset_bind(PS_QUERY, P_PID, pid, &pset) == 0) { - if (pset != PS_NONE) { - uint_t pset_cpus; - // Query number of cpus in processor set - if (pset_info(pset, NULL, &pset_cpus, NULL) == 0) { - assert(pset_cpus > 0 && pset_cpus <= online_cpus, "sanity check"); - _processors_online = pset_cpus; - return pset_cpus; - } + uint_t pset_cpus; + // Query the number of cpus available to us. + if (pset_info(pset, NULL, &pset_cpus, NULL) == 0) { + assert(pset_cpus > 0 && pset_cpus <= online_cpus, "sanity check"); + _processors_online = pset_cpus; + return pset_cpus; } } // Otherwise return number of online cpus @@ -1691,6 +1689,40 @@ bool os::getTimesSecs(double* process_real_time, } } +bool os::supports_vtime() { return true; } + +bool os::enable_vtime() { + int fd = open("/proc/self/ctl", O_WRONLY); + if (fd == -1) + return false; + + long cmd[] = { PCSET, PR_MSACCT }; + int res = write(fd, cmd, sizeof(long) * 2); + close(fd); + if (res != sizeof(long) * 2) + return false; + + return true; +} + +bool os::vtime_enabled() { + int fd = open("/proc/self/status", O_RDONLY); + if (fd == -1) + return false; + + pstatus_t status; + int res = read(fd, (void*) &status, sizeof(pstatus_t)); + close(fd); + if (res != sizeof(pstatus_t)) + return false; + + return status.pr_flags & PR_MSACCT; +} + +double os::elapsedVTime() { + return (double)gethrvtime() / (double)hrtime_hz; +} + // Used internally for comparisons only // getTimeMillis guaranteed to not move backwards on Solaris jlong getTimeMillis() { @@ -2688,7 +2720,7 @@ size_t os::numa_get_leaf_groups(int *ids, size_t size) { return bottom; } -// Detect the topology change. Typically happens during CPU pluggin-unplugging. +// Detect the topology change. Typically happens during CPU plugging-unplugging. bool os::numa_topology_changed() { int is_stale = Solaris::lgrp_cookie_stale(Solaris::lgrp_cookie()); if (is_stale != -1 && is_stale) { diff --git a/hotspot/src/os/windows/vm/globals_windows.hpp b/hotspot/src/os/windows/vm/globals_windows.hpp index 228c69c84c0..b5b6ef870d3 100644 --- a/hotspot/src/os/windows/vm/globals_windows.hpp +++ b/hotspot/src/os/windows/vm/globals_windows.hpp @@ -37,5 +37,6 @@ // platforms, but they may have different default values on other platforms. // define_pd_global(bool, UseLargePages, false); +define_pd_global(bool, UseLargePagesIndividualAllocation, true); define_pd_global(bool, UseOSErrorReporting, false); // for now. define_pd_global(bool, UseThreadPriorities, true) ; diff --git a/hotspot/src/os/windows/vm/os_windows.cpp b/hotspot/src/os/windows/vm/os_windows.cpp index 60e99f67f9d..bc86280bd87 100644 --- a/hotspot/src/os/windows/vm/os_windows.cpp +++ b/hotspot/src/os/windows/vm/os_windows.cpp @@ -737,6 +737,17 @@ FILETIME java_to_windows_time(jlong l) { return result; } +// For now, we say that Windows does not support vtime. I have no idea +// whether it can actually be made to (DLD, 9/13/05). + +bool os::supports_vtime() { return false; } +bool os::enable_vtime() { return false; } +bool os::vtime_enabled() { return false; } +double os::elapsedVTime() { + // better than nothing, but not much + return elapsedTime(); +} + jlong os::javaTimeMillis() { if (UseFakeTimers) { return fake_time++; @@ -2582,9 +2593,104 @@ bool os::can_execute_large_page_memory() { } char* os::reserve_memory_special(size_t bytes) { - DWORD flag = MEM_RESERVE | MEM_COMMIT | MEM_LARGE_PAGES; - char * res = (char *)VirtualAlloc(NULL, bytes, flag, PAGE_EXECUTE_READWRITE); - return res; + + if (UseLargePagesIndividualAllocation) { + if (TracePageSizes && Verbose) { + tty->print_cr("Reserving large pages individually."); + } + char * p_buf; + // first reserve enough address space in advance since we want to be + // able to break a single contiguous virtual address range into multiple + // large page commits but WS2003 does not allow reserving large page space + // so we just use 4K pages for reserve, this gives us a legal contiguous + // address space. then we will deallocate that reservation, and re alloc + // using large pages + const size_t size_of_reserve = bytes + _large_page_size; + if (bytes > size_of_reserve) { + // Overflowed. + warning("Individually allocated large pages failed, " + "use -XX:-UseLargePagesIndividualAllocation to turn off"); + return NULL; + } + p_buf = (char *) VirtualAlloc(NULL, + size_of_reserve, // size of Reserve + MEM_RESERVE, + PAGE_EXECUTE_READWRITE); + // If reservation failed, return NULL + if (p_buf == NULL) return NULL; + + release_memory(p_buf, bytes + _large_page_size); + // round up to page boundary. If the size_of_reserve did not + // overflow and the reservation did not fail, this align up + // should not overflow. + p_buf = (char *) align_size_up((size_t)p_buf, _large_page_size); + + // now go through and allocate one page at a time until all bytes are + // allocated + size_t bytes_remaining = align_size_up(bytes, _large_page_size); + // An overflow of align_size_up() would have been caught above + // in the calculation of size_of_reserve. + char * next_alloc_addr = p_buf; + +#ifdef ASSERT + // Variable for the failure injection + long ran_num = os::random(); + size_t fail_after = ran_num % bytes; +#endif + + while (bytes_remaining) { + size_t bytes_to_rq = MIN2(bytes_remaining, _large_page_size); + // Note allocate and commit + char * p_new; + +#ifdef ASSERT + bool inject_error = LargePagesIndividualAllocationInjectError && + (bytes_remaining <= fail_after); +#else + const bool inject_error = false; +#endif + + if (inject_error) { + p_new = NULL; + } else { + p_new = (char *) VirtualAlloc(next_alloc_addr, + bytes_to_rq, + MEM_RESERVE | MEM_COMMIT | MEM_LARGE_PAGES, + PAGE_EXECUTE_READWRITE); + } + + if (p_new == NULL) { + // Free any allocated pages + if (next_alloc_addr > p_buf) { + // Some memory was committed so release it. + size_t bytes_to_release = bytes - bytes_remaining; + release_memory(p_buf, bytes_to_release); + } +#ifdef ASSERT + if (UseLargePagesIndividualAllocation && + LargePagesIndividualAllocationInjectError) { + if (TracePageSizes && Verbose) { + tty->print_cr("Reserving large pages individually failed."); + } + } +#endif + return NULL; + } + bytes_remaining -= bytes_to_rq; + next_alloc_addr += bytes_to_rq; + } + + return p_buf; + + } else { + // normal policy just allocate it all at once + DWORD flag = MEM_RESERVE | MEM_COMMIT | MEM_LARGE_PAGES; + char * res = (char *)VirtualAlloc(NULL, + bytes, + flag, + PAGE_EXECUTE_READWRITE); + return res; + } } bool os::release_memory_special(char* base, size_t bytes) { @@ -2972,6 +3078,7 @@ size_t os::win32::_default_stack_size = 0; volatile intx os::win32::_os_thread_count = 0; bool os::win32::_is_nt = false; +bool os::win32::_is_windows_2003 = false; void os::win32::initialize_system_info() { @@ -2994,7 +3101,15 @@ void os::win32::initialize_system_info() { GetVersionEx(&oi); switch(oi.dwPlatformId) { case VER_PLATFORM_WIN32_WINDOWS: _is_nt = false; break; - case VER_PLATFORM_WIN32_NT: _is_nt = true; break; + case VER_PLATFORM_WIN32_NT: + _is_nt = true; + { + int os_vers = oi.dwMajorVersion * 1000 + oi.dwMinorVersion; + if (os_vers == 5002) { + _is_windows_2003 = true; + } + } + break; default: fatal("Unknown platform"); } @@ -3092,9 +3207,13 @@ void os::init(void) { NoYieldsInMicrolock = true; } #endif + // This may be overridden later when argument processing is done. + FLAG_SET_ERGO(bool, UseLargePagesIndividualAllocation, + os::win32::is_windows_2003()); + // Initialize main_process and main_thread main_process = GetCurrentProcess(); // Remember main_process is a pseudo handle - if (!DuplicateHandle(main_process, GetCurrentThread(), main_process, + if (!DuplicateHandle(main_process, GetCurrentThread(), main_process, &main_thread, THREAD_ALL_ACCESS, false, 0)) { fatal("DuplicateHandle failed\n"); } diff --git a/hotspot/src/os/windows/vm/os_windows.hpp b/hotspot/src/os/windows/vm/os_windows.hpp index 8a66e3c6374..9b5a0301a08 100644 --- a/hotspot/src/os/windows/vm/os_windows.hpp +++ b/hotspot/src/os/windows/vm/os_windows.hpp @@ -34,6 +34,7 @@ class win32 { static julong _physical_memory; static size_t _default_stack_size; static bool _is_nt; + static bool _is_windows_2003; public: // Windows-specific interface: @@ -60,6 +61,9 @@ class win32 { // Tells whether the platform is NT or Windown95 static bool is_nt() { return _is_nt; } + // Tells whether the platform is Windows 2003 + static bool is_windows_2003() { return _is_windows_2003; } + // Returns the byte size of a virtual memory page static int vm_page_size() { return _vm_page_size; } diff --git a/hotspot/src/share/vm/adlc/formssel.cpp b/hotspot/src/share/vm/adlc/formssel.cpp index b45de088afb..2957ae39fc8 100644 --- a/hotspot/src/share/vm/adlc/formssel.cpp +++ b/hotspot/src/share/vm/adlc/formssel.cpp @@ -3768,6 +3768,10 @@ bool MatchRule::is_chain_rule(FormDict &globals) const { int MatchRule::is_ideal_copy() const { if( _rChild ) { const char *opType = _rChild->_opType; +#if 1 + if( strcmp(opType,"CastIP")==0 ) + return 1; +#else if( strcmp(opType,"CastII")==0 ) return 1; // Do not treat *CastPP this way, because it @@ -3787,6 +3791,7 @@ int MatchRule::is_ideal_copy() const { // return 1; //if( strcmp(opType,"CastP2X")==0 ) // return 1; +#endif } if( is_chain_rule(_AD.globalNames()) && _lChild && strncmp(_lChild->_opType,"stackSlot",9)==0 ) diff --git a/hotspot/src/share/vm/asm/assembler.cpp b/hotspot/src/share/vm/asm/assembler.cpp index 34ae81ce1cd..e9d99621b6b 100644 --- a/hotspot/src/share/vm/asm/assembler.cpp +++ b/hotspot/src/share/vm/asm/assembler.cpp @@ -249,8 +249,6 @@ void AbstractAssembler::block_comment(const char* comment) { bool MacroAssembler::needs_explicit_null_check(intptr_t offset) { // Exception handler checks the nmethod's implicit null checks table // only when this method returns false. -#ifndef SPARC - // Sparc does not have based addressing if (UseCompressedOops) { // The first page after heap_base is unmapped and // the 'offset' is equal to [heap_base + offset] for @@ -261,7 +259,6 @@ bool MacroAssembler::needs_explicit_null_check(intptr_t offset) { offset = (intptr_t)(pointer_delta((void*)offset, (void*)heap_base, 1)); } } -#endif // SPARC return offset < 0 || os::vm_page_size() <= offset; } diff --git a/hotspot/src/share/vm/c1/c1_CodeStubs.hpp b/hotspot/src/share/vm/c1/c1_CodeStubs.hpp index 9009e0d8c72..4c47e777b2b 100644 --- a/hotspot/src/share/vm/c1/c1_CodeStubs.hpp +++ b/hotspot/src/share/vm/c1/c1_CodeStubs.hpp @@ -482,3 +482,81 @@ class ArrayCopyStub: public CodeStub { virtual void print_name(outputStream* out) const { out->print("ArrayCopyStub"); } #endif // PRODUCT }; + +////////////////////////////////////////////////////////////////////////////////////////// +#ifndef SERIALGC + +// Code stubs for Garbage-First barriers. +class G1PreBarrierStub: public CodeStub { + private: + LIR_Opr _addr; + LIR_Opr _pre_val; + LIR_PatchCode _patch_code; + CodeEmitInfo* _info; + + public: + // pre_val (a temporary register) must be a register; + // addr (the address of the field to be read) must be a LIR_Address + G1PreBarrierStub(LIR_Opr addr, LIR_Opr pre_val, LIR_PatchCode patch_code, CodeEmitInfo* info) : + _addr(addr), _pre_val(pre_val), _patch_code(patch_code), _info(info) + { + assert(_pre_val->is_register(), "should be temporary register"); + assert(_addr->is_address(), "should be the address of the field"); + } + + LIR_Opr addr() const { return _addr; } + LIR_Opr pre_val() const { return _pre_val; } + LIR_PatchCode patch_code() const { return _patch_code; } + CodeEmitInfo* info() const { return _info; } + + virtual void emit_code(LIR_Assembler* e); + virtual void visit(LIR_OpVisitState* visitor) { + // don't pass in the code emit info since it's processed in the fast + // path + if (_info != NULL) + visitor->do_slow_case(_info); + else + visitor->do_slow_case(); + visitor->do_input(_addr); + visitor->do_temp(_pre_val); + } +#ifndef PRODUCT + virtual void print_name(outputStream* out) const { out->print("G1PreBarrierStub"); } +#endif // PRODUCT +}; + +class G1PostBarrierStub: public CodeStub { + private: + LIR_Opr _addr; + LIR_Opr _new_val; + + static jbyte* _byte_map_base; + static jbyte* byte_map_base_slow(); + static jbyte* byte_map_base() { + if (_byte_map_base == NULL) { + _byte_map_base = byte_map_base_slow(); + } + return _byte_map_base; + } + + public: + // addr (the address of the object head) and new_val must be registers. + G1PostBarrierStub(LIR_Opr addr, LIR_Opr new_val): _addr(addr), _new_val(new_val) { } + + LIR_Opr addr() const { return _addr; } + LIR_Opr new_val() const { return _new_val; } + + virtual void emit_code(LIR_Assembler* e); + virtual void visit(LIR_OpVisitState* visitor) { + // don't pass in the code emit info since it's processed in the fast path + visitor->do_slow_case(); + visitor->do_input(_addr); + visitor->do_input(_new_val); + } +#ifndef PRODUCT + virtual void print_name(outputStream* out) const { out->print("G1PostBarrierStub"); } +#endif // PRODUCT +}; + +#endif // SERIALGC +////////////////////////////////////////////////////////////////////////////////////////// diff --git a/hotspot/src/share/vm/c1/c1_LIRAssembler.cpp b/hotspot/src/share/vm/c1/c1_LIRAssembler.cpp index 48f76fd7767..5a46b03e5d8 100644 --- a/hotspot/src/share/vm/c1/c1_LIRAssembler.cpp +++ b/hotspot/src/share/vm/c1/c1_LIRAssembler.cpp @@ -74,6 +74,7 @@ void LIR_Assembler::patching_epilog(PatchingStub* patch, LIR_PatchCode patch_cod LIR_Assembler::LIR_Assembler(Compilation* c): _compilation(c) , _masm(c->masm()) + , _bs(Universe::heap()->barrier_set()) , _frame_map(c->frame_map()) , _current_block(NULL) , _pending_non_safepoint(NULL) diff --git a/hotspot/src/share/vm/c1/c1_LIRAssembler.hpp b/hotspot/src/share/vm/c1/c1_LIRAssembler.hpp index 1cbf564288c..f5de2c73479 100644 --- a/hotspot/src/share/vm/c1/c1_LIRAssembler.hpp +++ b/hotspot/src/share/vm/c1/c1_LIRAssembler.hpp @@ -24,11 +24,13 @@ class Compilation; class ScopeValue; +class BarrierSet; class LIR_Assembler: public CompilationResourceObj { private: C1_MacroAssembler* _masm; CodeStubList* _slow_case_stubs; + BarrierSet* _bs; Compilation* _compilation; FrameMap* _frame_map; diff --git a/hotspot/src/share/vm/c1/c1_LIRGenerator.cpp b/hotspot/src/share/vm/c1/c1_LIRGenerator.cpp index 209899357d3..16552a7e5d3 100644 --- a/hotspot/src/share/vm/c1/c1_LIRGenerator.cpp +++ b/hotspot/src/share/vm/c1/c1_LIRGenerator.cpp @@ -285,16 +285,7 @@ jlong LIRItem::get_jlong_constant() const { void LIRGenerator::init() { - BarrierSet* bs = Universe::heap()->barrier_set(); - assert(bs->kind() == BarrierSet::CardTableModRef, "Wrong barrier set kind"); - CardTableModRefBS* ct = (CardTableModRefBS*)bs; - assert(sizeof(*ct->byte_map_base) == sizeof(jbyte), "adjust this code"); - -#ifdef _LP64 - _card_table_base = new LIR_Const((jlong)ct->byte_map_base); -#else - _card_table_base = new LIR_Const((jint)ct->byte_map_base); -#endif + _bs = Universe::heap()->barrier_set(); } @@ -1239,8 +1230,37 @@ LIR_Opr LIRGenerator::load_constant(LIR_Const* c) { // Various barriers +void LIRGenerator::pre_barrier(LIR_Opr addr_opr, bool patch, CodeEmitInfo* info) { + // Do the pre-write barrier, if any. + switch (_bs->kind()) { +#ifndef SERIALGC + case BarrierSet::G1SATBCT: + case BarrierSet::G1SATBCTLogging: + G1SATBCardTableModRef_pre_barrier(addr_opr, patch, info); + break; +#endif // SERIALGC + case BarrierSet::CardTableModRef: + case BarrierSet::CardTableExtension: + // No pre barriers + break; + case BarrierSet::ModRef: + case BarrierSet::Other: + // No pre barriers + break; + default : + ShouldNotReachHere(); + + } +} + void LIRGenerator::post_barrier(LIR_OprDesc* addr, LIR_OprDesc* new_val) { - switch (Universe::heap()->barrier_set()->kind()) { + switch (_bs->kind()) { +#ifndef SERIALGC + case BarrierSet::G1SATBCT: + case BarrierSet::G1SATBCTLogging: + G1SATBCardTableModRef_post_barrier(addr, new_val); + break; +#endif // SERIALGC case BarrierSet::CardTableModRef: case BarrierSet::CardTableExtension: CardTableModRef_post_barrier(addr, new_val); @@ -1254,11 +1274,120 @@ void LIRGenerator::post_barrier(LIR_OprDesc* addr, LIR_OprDesc* new_val) { } } +//////////////////////////////////////////////////////////////////////// +#ifndef SERIALGC + +void LIRGenerator::G1SATBCardTableModRef_pre_barrier(LIR_Opr addr_opr, bool patch, CodeEmitInfo* info) { + if (G1DisablePreBarrier) return; + + // First we test whether marking is in progress. + BasicType flag_type; + if (in_bytes(PtrQueue::byte_width_of_active()) == 4) { + flag_type = T_INT; + } else { + guarantee(in_bytes(PtrQueue::byte_width_of_active()) == 1, + "Assumption"); + flag_type = T_BYTE; + } + LIR_Opr thrd = getThreadPointer(); + LIR_Address* mark_active_flag_addr = + new LIR_Address(thrd, + in_bytes(JavaThread::satb_mark_queue_offset() + + PtrQueue::byte_offset_of_active()), + flag_type); + // Read the marking-in-progress flag. + LIR_Opr flag_val = new_register(T_INT); + __ load(mark_active_flag_addr, flag_val); + + LabelObj* start_store = new LabelObj(); + + LIR_PatchCode pre_val_patch_code = + patch ? lir_patch_normal : lir_patch_none; + + LIR_Opr pre_val = new_register(T_OBJECT); + + __ cmp(lir_cond_notEqual, flag_val, LIR_OprFact::intConst(0)); + if (!addr_opr->is_address()) { + assert(addr_opr->is_register(), "must be"); + addr_opr = LIR_OprFact::address(new LIR_Address(addr_opr, 0, T_OBJECT)); + } + CodeStub* slow = new G1PreBarrierStub(addr_opr, pre_val, pre_val_patch_code, + info); + __ branch(lir_cond_notEqual, T_INT, slow); + __ branch_destination(slow->continuation()); +} + +void LIRGenerator::G1SATBCardTableModRef_post_barrier(LIR_OprDesc* addr, LIR_OprDesc* new_val) { + if (G1DisablePostBarrier) return; + + // If the "new_val" is a constant NULL, no barrier is necessary. + if (new_val->is_constant() && + new_val->as_constant_ptr()->as_jobject() == NULL) return; + + if (!new_val->is_register()) { + LIR_Opr new_val_reg = new_pointer_register(); + if (new_val->is_constant()) { + __ move(new_val, new_val_reg); + } else { + __ leal(new_val, new_val_reg); + } + new_val = new_val_reg; + } + assert(new_val->is_register(), "must be a register at this point"); + + if (addr->is_address()) { + LIR_Address* address = addr->as_address_ptr(); + LIR_Opr ptr = new_pointer_register(); + if (!address->index()->is_valid() && address->disp() == 0) { + __ move(address->base(), ptr); + } else { + assert(address->disp() != max_jint, "lea doesn't support patched addresses!"); + __ leal(addr, ptr); + } + addr = ptr; + } + assert(addr->is_register(), "must be a register at this point"); + + LIR_Opr xor_res = new_pointer_register(); + LIR_Opr xor_shift_res = new_pointer_register(); + + if (TwoOperandLIRForm ) { + __ move(addr, xor_res); + __ logical_xor(xor_res, new_val, xor_res); + __ move(xor_res, xor_shift_res); + __ unsigned_shift_right(xor_shift_res, + LIR_OprFact::intConst(HeapRegion::LogOfHRGrainBytes), + xor_shift_res, + LIR_OprDesc::illegalOpr()); + } else { + __ logical_xor(addr, new_val, xor_res); + __ unsigned_shift_right(xor_res, + LIR_OprFact::intConst(HeapRegion::LogOfHRGrainBytes), + xor_shift_res, + LIR_OprDesc::illegalOpr()); + } + + if (!new_val->is_register()) { + LIR_Opr new_val_reg = new_pointer_register(); + __ leal(new_val, new_val_reg); + new_val = new_val_reg; + } + assert(new_val->is_register(), "must be a register at this point"); + + __ cmp(lir_cond_notEqual, xor_shift_res, LIR_OprFact::intptrConst(NULL_WORD)); + + CodeStub* slow = new G1PostBarrierStub(addr, new_val); + __ branch(lir_cond_notEqual, T_INT, slow); + __ branch_destination(slow->continuation()); +} + +#endif // SERIALGC +//////////////////////////////////////////////////////////////////////// + void LIRGenerator::CardTableModRef_post_barrier(LIR_OprDesc* addr, LIR_OprDesc* new_val) { - BarrierSet* bs = Universe::heap()->barrier_set(); - assert(sizeof(*((CardTableModRefBS*)bs)->byte_map_base) == sizeof(jbyte), "adjust this code"); - LIR_Const* card_table_base = new LIR_Const(((CardTableModRefBS*)bs)->byte_map_base); + assert(sizeof(*((CardTableModRefBS*)_bs)->byte_map_base) == sizeof(jbyte), "adjust this code"); + LIR_Const* card_table_base = new LIR_Const(((CardTableModRefBS*)_bs)->byte_map_base); if (addr->is_address()) { LIR_Address* address = addr->as_address_ptr(); LIR_Opr ptr = new_register(T_OBJECT); @@ -1388,6 +1517,13 @@ void LIRGenerator::do_StoreField(StoreField* x) { __ membar_release(); } + if (is_oop) { + // Do the pre-write barrier, if any. + pre_barrier(LIR_OprFact::address(address), + needs_patching, + (info ? new CodeEmitInfo(info) : NULL)); + } + if (is_volatile) { assert(!needs_patching && x->is_loaded(), "how do we know it's volatile if it's not loaded"); @@ -1398,7 +1534,12 @@ void LIRGenerator::do_StoreField(StoreField* x) { } if (is_oop) { +#ifdef PRECISE_CARDMARK + // Precise cardmarks don't work + post_barrier(LIR_OprFact::address(address), value.result()); +#else post_barrier(object.result(), value.result()); +#endif // PRECISE_CARDMARK } if (is_volatile && os::is_MP()) { diff --git a/hotspot/src/share/vm/c1/c1_LIRGenerator.hpp b/hotspot/src/share/vm/c1/c1_LIRGenerator.hpp index 1b70887883f..1b8555b2055 100644 --- a/hotspot/src/share/vm/c1/c1_LIRGenerator.hpp +++ b/hotspot/src/share/vm/c1/c1_LIRGenerator.hpp @@ -145,6 +145,7 @@ class PhiResolver: public CompilationResourceObj { // only the classes below belong in the same file class LIRGenerator: public InstructionVisitor, public BlockClosure { + private: Compilation* _compilation; ciMethod* _method; // method that we are compiling @@ -154,6 +155,7 @@ class LIRGenerator: public InstructionVisitor, public BlockClosure { Values _instruction_for_operand; BitMap2D _vreg_flags; // flags which can be set on a per-vreg basis LIR_List* _lir; + BarrierSet* _bs; LIRGenerator* gen() { return this; @@ -174,8 +176,6 @@ class LIRGenerator: public InstructionVisitor, public BlockClosure { LIR_OprList _reg_for_constants; Values _unpinned_constants; - LIR_Const* _card_table_base; - friend class PhiResolver; // unified bailout support @@ -196,8 +196,6 @@ class LIRGenerator: public InstructionVisitor, public BlockClosure { LIR_Opr load_constant(Constant* x); LIR_Opr load_constant(LIR_Const* constant); - LIR_Const* card_table_base() const { return _card_table_base; } - void set_result(Value x, LIR_Opr opr) { assert(opr->is_valid(), "must set to valid value"); assert(x->operand()->is_illegal(), "operand should never change"); @@ -253,12 +251,17 @@ class LIRGenerator: public InstructionVisitor, public BlockClosure { // generic interface + void pre_barrier(LIR_Opr addr_opr, bool patch, CodeEmitInfo* info); void post_barrier(LIR_OprDesc* addr, LIR_OprDesc* new_val); // specific implementations + // pre barriers + + void G1SATBCardTableModRef_pre_barrier(LIR_Opr addr_opr, bool patch, CodeEmitInfo* info); // post barriers + void G1SATBCardTableModRef_post_barrier(LIR_OprDesc* addr, LIR_OprDesc* new_val); void CardTableModRef_post_barrier(LIR_OprDesc* addr, LIR_OprDesc* new_val); diff --git a/hotspot/src/share/vm/c1/c1_Runtime1.cpp b/hotspot/src/share/vm/c1/c1_Runtime1.cpp index 4a4765099e5..04750308db3 100644 --- a/hotspot/src/share/vm/c1/c1_Runtime1.cpp +++ b/hotspot/src/share/vm/c1/c1_Runtime1.cpp @@ -168,6 +168,8 @@ void Runtime1::generate_blob_for(StubID id) { switch (id) { // These stubs don't need to have an oopmap case dtrace_object_alloc_id: + case g1_pre_barrier_slow_id: + case g1_post_barrier_slow_id: case slow_subtype_check_id: case fpu2long_stub_id: case unwind_exception_id: diff --git a/hotspot/src/share/vm/c1/c1_Runtime1.hpp b/hotspot/src/share/vm/c1/c1_Runtime1.hpp index df6c03883f3..4380b72ee89 100644 --- a/hotspot/src/share/vm/c1/c1_Runtime1.hpp +++ b/hotspot/src/share/vm/c1/c1_Runtime1.hpp @@ -56,6 +56,8 @@ class StubAssembler; stub(access_field_patching) \ stub(load_klass_patching) \ stub(jvmti_exception_throw) \ + stub(g1_pre_barrier_slow) \ + stub(g1_post_barrier_slow) \ stub(fpu2long_stub) \ stub(counter_overflow) \ last_entry(number_of_ids) diff --git a/hotspot/src/share/vm/c1/c1_globals.hpp b/hotspot/src/share/vm/c1/c1_globals.hpp index 52a10b72490..87efd1b04da 100644 --- a/hotspot/src/share/vm/c1/c1_globals.hpp +++ b/hotspot/src/share/vm/c1/c1_globals.hpp @@ -213,9 +213,6 @@ develop(bool, UseFastLocking, true, \ "Use fast inlined locking code") \ \ - product(bool, FastTLABRefill, true, \ - "Use fast TLAB refill code") \ - \ develop(bool, UseSlowPath, false, \ "For debugging: test slow cases by always using them") \ \ diff --git a/hotspot/src/share/vm/ci/ciMethodBlocks.cpp b/hotspot/src/share/vm/ci/ciMethodBlocks.cpp index 2810523f4fc..4f7e52559e6 100644 --- a/hotspot/src/share/vm/ci/ciMethodBlocks.cpp +++ b/hotspot/src/share/vm/ci/ciMethodBlocks.cpp @@ -49,7 +49,7 @@ bool ciMethodBlocks::is_block_start(int bci) { // first half. Returns the range beginning at bci. ciBlock *ciMethodBlocks::split_block_at(int bci) { ciBlock *former_block = block_containing(bci); - ciBlock *new_block = new(_arena) ciBlock(_method, _num_blocks++, this, former_block->start_bci()); + ciBlock *new_block = new(_arena) ciBlock(_method, _num_blocks++, former_block->start_bci()); _blocks->append(new_block); assert(former_block != NULL, "must not be NULL"); new_block->set_limit_bci(bci); @@ -83,7 +83,7 @@ ciBlock *ciMethodBlocks::make_block_at(int bci) { if (cb == NULL ) { // This is our first time visiting this bytecode. Create // a fresh block and assign it this starting point. - ciBlock *nb = new(_arena) ciBlock(_method, _num_blocks++, this, bci); + ciBlock *nb = new(_arena) ciBlock(_method, _num_blocks++, bci); _blocks->append(nb); _bci_to_block[bci] = nb; return nb; @@ -98,6 +98,11 @@ ciBlock *ciMethodBlocks::make_block_at(int bci) { } } +ciBlock *ciMethodBlocks::make_dummy_block() { + ciBlock *dum = new(_arena) ciBlock(_method, -1, 0); + return dum; +} + void ciMethodBlocks::do_analysis() { ciBytecodeStream s(_method); ciBlock *cur_block = block_containing(0); @@ -253,7 +258,7 @@ ciMethodBlocks::ciMethodBlocks(Arena *arena, ciMethod *meth): _method(meth), Copy::zero_to_words((HeapWord*) _bci_to_block, b2bsize / sizeof(HeapWord)); // create initial block covering the entire method - ciBlock *b = new(arena) ciBlock(_method, _num_blocks++, this, 0); + ciBlock *b = new(arena) ciBlock(_method, _num_blocks++, 0); _blocks->append(b); _bci_to_block[0] = b; @@ -334,7 +339,7 @@ void ciMethodBlocks::dump() { #endif -ciBlock::ciBlock(ciMethod *method, int index, ciMethodBlocks *mb, int start_bci) : +ciBlock::ciBlock(ciMethod *method, int index, int start_bci) : #ifndef PRODUCT _method(method), #endif diff --git a/hotspot/src/share/vm/ci/ciMethodBlocks.hpp b/hotspot/src/share/vm/ci/ciMethodBlocks.hpp index edfdf19b07e..f1a07513d1f 100644 --- a/hotspot/src/share/vm/ci/ciMethodBlocks.hpp +++ b/hotspot/src/share/vm/ci/ciMethodBlocks.hpp @@ -48,6 +48,8 @@ public: int num_blocks() { return _num_blocks;} void clear_processed(); + ciBlock *make_dummy_block(); // a block not associated with a bci + #ifndef PRODUCT void dump(); #endif @@ -81,7 +83,7 @@ public: fall_through_bci = -1 }; - ciBlock(ciMethod *method, int index, ciMethodBlocks *mb, int start_bci); + ciBlock(ciMethod *method, int index, int start_bci); int start_bci() const { return _start_bci; } int limit_bci() const { return _limit_bci; } int control_bci() const { return _control_bci; } @@ -94,7 +96,6 @@ public: int ex_limit_bci() const { return _ex_limit_bci; } bool contains(int bci) const { return start_bci() <= bci && bci < limit_bci(); } - // flag handling bool processed() const { return (_flags & Processed) != 0; } bool is_handler() const { return (_flags & Handler) != 0; } diff --git a/hotspot/src/share/vm/ci/ciTypeFlow.cpp b/hotspot/src/share/vm/ci/ciTypeFlow.cpp index df053f35ea4..2054ee5b536 100644 --- a/hotspot/src/share/vm/ci/ciTypeFlow.cpp +++ b/hotspot/src/share/vm/ci/ciTypeFlow.cpp @@ -338,8 +338,10 @@ ciTypeFlow::StateVector::StateVector(ciTypeFlow* analyzer) { } _trap_bci = -1; _trap_index = 0; + _def_locals.clear(); } + // ------------------------------------------------------------------ // ciTypeFlow::get_start_state // @@ -735,7 +737,7 @@ void ciTypeFlow::StateVector::do_multianewarray(ciBytecodeStream* str) { void ciTypeFlow::StateVector::do_new(ciBytecodeStream* str) { bool will_link; ciKlass* klass = str->get_klass(will_link); - if (!will_link) { + if (!will_link || str->is_unresolved_klass()) { trap(str, klass, str->get_klass_index()); } else { push_object(klass); @@ -1268,7 +1270,9 @@ bool ciTypeFlow::StateVector::apply_one_bytecode(ciBytecodeStream* str) { } case Bytecodes::_iinc: { - check_int(local(str->get_index())); + int lnum = str->get_index(); + check_int(local(lnum)); + store_to_local(lnum); break; } case Bytecodes::_iload: load_local_int(str->get_index()); break; @@ -1506,6 +1510,46 @@ void ciTypeFlow::StateVector::print_on(outputStream* st) const { } #endif + +// ------------------------------------------------------------------ +// ciTypeFlow::SuccIter::next +// +void ciTypeFlow::SuccIter::next() { + int succ_ct = _pred->successors()->length(); + int next = _index + 1; + if (next < succ_ct) { + _index = next; + _succ = _pred->successors()->at(next); + return; + } + for (int i = next - succ_ct; i < _pred->exceptions()->length(); i++) { + // Do not compile any code for unloaded exception types. + // Following compiler passes are responsible for doing this also. + ciInstanceKlass* exception_klass = _pred->exc_klasses()->at(i); + if (exception_klass->is_loaded()) { + _index = next; + _succ = _pred->exceptions()->at(i); + return; + } + next++; + } + _index = -1; + _succ = NULL; +} + +// ------------------------------------------------------------------ +// ciTypeFlow::SuccIter::set_succ +// +void ciTypeFlow::SuccIter::set_succ(Block* succ) { + int succ_ct = _pred->successors()->length(); + if (_index < succ_ct) { + _pred->successors()->at_put(_index, succ); + } else { + int idx = _index - succ_ct; + _pred->exceptions()->at_put(idx, succ); + } +} + // ciTypeFlow::Block // // A basic block. @@ -1526,10 +1570,11 @@ ciTypeFlow::Block::Block(ciTypeFlow* outer, _jsrs = new_jsrs; _next = NULL; _on_work_list = false; - _pre_order = -1; assert(!has_pre_order(), ""); - _private_copy = false; + _backedge_copy = false; + _exception_entry = false; _trap_bci = -1; _trap_index = 0; + df_init(); if (CITraceTypeFlow) { tty->print_cr(">> Created new block"); @@ -1541,55 +1586,13 @@ ciTypeFlow::Block::Block(ciTypeFlow* outer, } // ------------------------------------------------------------------ -// ciTypeFlow::Block::clone_loop_head -// -ciTypeFlow::Block* -ciTypeFlow::Block::clone_loop_head(ciTypeFlow* analyzer, - int branch_bci, - ciTypeFlow::Block* target, - ciTypeFlow::JsrSet* jsrs) { - // Loop optimizations are not performed on Tier1 compiles. Do nothing. - if (analyzer->env()->comp_level() < CompLevel_full_optimization) { - return target; - } - - // The current block ends with a branch. - // - // If the target block appears to be the test-clause of a for loop, and - // it is not too large, and it has not yet been cloned, clone it. - // The pre-existing copy becomes the private clone used only by - // the initial iteration of the loop. (We know we are simulating - // the initial iteration right now, since we have never calculated - // successors before for this block.) - - if (branch_bci <= start() - && (target->limit() - target->start()) <= CICloneLoopTestLimit - && target->private_copy_count() == 0) { - // Setting the private_copy bit ensures that the target block cannot be - // reached by any other paths, such as fall-in from the loop body. - // The private copy will be accessible only on successor lists - // created up to this point. - target->set_private_copy(true); - if (CITraceTypeFlow) { - tty->print(">> Cloning a test-clause block "); - print_value_on(tty); - tty->cr(); - } - // If the target is the current block, then later on a new copy of the - // target block will be created when its bytecodes are reached by - // an alternate path. (This is the case for loops with the loop - // head at the bci-wise bottom of the loop, as with pre-1.4.2 javac.) - // - // Otherwise, duplicate the target block now and use it immediately. - // (The case for loops with the loop head at the bci-wise top of the - // loop, as with 1.4.2 javac.) - // - // In either case, the new copy of the block will remain public. - if (target != this) { - target = analyzer->block_at(branch_bci, jsrs); - } - } - return target; +// ciTypeFlow::Block::df_init +void ciTypeFlow::Block::df_init() { + _pre_order = -1; assert(!has_pre_order(), ""); + _post_order = -1; assert(!has_post_order(), ""); + _loop = NULL; + _irreducible_entry = false; + _rpo_next = NULL; } // ------------------------------------------------------------------ @@ -1644,7 +1647,6 @@ ciTypeFlow::Block::successors(ciBytecodeStream* str, case Bytecodes::_ifnull: case Bytecodes::_ifnonnull: // Our successors are the branch target and the next bci. branch_bci = str->get_dest(); - clone_loop_head(analyzer, branch_bci, this, jsrs); _successors = new (arena) GrowableArray(arena, 2, 0, NULL); assert(_successors->length() == IF_NOT_TAKEN, ""); @@ -1658,14 +1660,7 @@ ciTypeFlow::Block::successors(ciBytecodeStream* str, _successors = new (arena) GrowableArray(arena, 1, 0, NULL); assert(_successors->length() == GOTO_TARGET, ""); - target = analyzer->block_at(branch_bci, jsrs); - // If the target block has not been visited yet, and looks like - // a two-way branch, attempt to clone it if it is a loop head. - if (target->_successors != NULL - && target->_successors->length() == (IF_TAKEN + 1)) { - target = clone_loop_head(analyzer, branch_bci, target, jsrs); - } - _successors->append(target); + _successors->append(analyzer->block_at(branch_bci, jsrs)); break; case Bytecodes::_jsr: @@ -1801,65 +1796,60 @@ void ciTypeFlow::Block::compute_exceptions() { } // ------------------------------------------------------------------ -// ciTypeFlow::Block::is_simpler_than -// -// A relation used to order our work list. We work on a block earlier -// if it has a smaller jsr stack or it occurs earlier in the program -// text. -// -// Note: maybe we should redo this functionality to make blocks -// which correspond to exceptions lower priority. -bool ciTypeFlow::Block::is_simpler_than(ciTypeFlow::Block* other) { - if (other == NULL) { - return true; - } else { - int size1 = _jsrs->size(); - int size2 = other->_jsrs->size(); - if (size1 < size2) { - return true; - } else if (size2 < size1) { - return false; - } else { -#if 0 - if (size1 > 0) { - int r1 = _jsrs->record_at(0)->return_address(); - int r2 = _jsrs->record_at(0)->return_address(); - if (r1 < r2) { - return true; - } else if (r2 < r1) { - return false; - } else { - int e1 = _jsrs->record_at(0)->return_address(); - int e2 = _jsrs->record_at(0)->return_address(); - if (e1 < e2) { - return true; - } else if (e2 < e1) { - return false; - } - } - } -#endif - return (start() <= other->start()); - } - } +// ciTypeFlow::Block::set_backedge_copy +// Use this only to make a pre-existing public block into a backedge copy. +void ciTypeFlow::Block::set_backedge_copy(bool z) { + assert(z || (z == is_backedge_copy()), "cannot make a backedge copy public"); + _backedge_copy = z; } // ------------------------------------------------------------------ -// ciTypeFlow::Block::set_private_copy -// Use this only to make a pre-existing public block into a private copy. -void ciTypeFlow::Block::set_private_copy(bool z) { - assert(z || (z == is_private_copy()), "cannot make a private copy public"); - _private_copy = z; +// ciTypeFlow::Block::is_clonable_exit +// +// At most 2 normal successors, one of which continues looping, +// and all exceptional successors must exit. +bool ciTypeFlow::Block::is_clonable_exit(ciTypeFlow::Loop* lp) { + int normal_cnt = 0; + int in_loop_cnt = 0; + for (SuccIter iter(this); !iter.done(); iter.next()) { + Block* succ = iter.succ(); + if (iter.is_normal_ctrl()) { + if (++normal_cnt > 2) return false; + if (lp->contains(succ->loop())) { + if (++in_loop_cnt > 1) return false; + } + } else { + if (lp->contains(succ->loop())) return false; + } + } + return in_loop_cnt == 1; +} + +// ------------------------------------------------------------------ +// ciTypeFlow::Block::looping_succ +// +ciTypeFlow::Block* ciTypeFlow::Block::looping_succ(ciTypeFlow::Loop* lp) { + assert(successors()->length() <= 2, "at most 2 normal successors"); + for (SuccIter iter(this); !iter.done(); iter.next()) { + Block* succ = iter.succ(); + if (lp->contains(succ->loop())) { + return succ; + } + } + return NULL; } #ifndef PRODUCT // ------------------------------------------------------------------ // ciTypeFlow::Block::print_value_on void ciTypeFlow::Block::print_value_on(outputStream* st) const { - if (has_pre_order()) st->print("#%-2d ", pre_order()); + if (has_pre_order()) st->print("#%-2d ", pre_order()); + if (has_rpo()) st->print("rpo#%-2d ", rpo()); st->print("[%d - %d)", start(), limit()); + if (is_loop_head()) st->print(" lphd"); + if (is_irreducible_entry()) st->print(" irred"); if (_jsrs->size() > 0) { st->print("/"); _jsrs->print_on(st); } - if (is_private_copy()) st->print("/private_copy"); + if (is_backedge_copy()) st->print("/backedge_copy"); } // ------------------------------------------------------------------ @@ -1871,6 +1861,16 @@ void ciTypeFlow::Block::print_on(outputStream* st) const { st->print_cr(" ==================================================== "); st->print (" "); print_value_on(st); + st->print(" Stored locals: "); def_locals()->print_on(st, outer()->method()->max_locals()); tty->cr(); + if (loop() && loop()->parent() != NULL) { + st->print(" loops:"); + Loop* lp = loop(); + do { + st->print(" %d<-%d", lp->head()->pre_order(),lp->tail()->pre_order()); + if (lp->is_irreducible()) st->print("(ir)"); + lp = lp->parent(); + } while (lp->parent() != NULL); + } st->cr(); _state->print_on(st); if (_successors == NULL) { @@ -1907,6 +1907,21 @@ void ciTypeFlow::Block::print_on(outputStream* st) const { } #endif +#ifndef PRODUCT +// ------------------------------------------------------------------ +// ciTypeFlow::LocalSet::print_on +void ciTypeFlow::LocalSet::print_on(outputStream* st, int limit) const { + st->print("{"); + for (int i = 0; i < max; i++) { + if (test(i)) st->print(" %d", i); + } + if (limit > max) { + st->print(" %d..%d ", max, limit); + } + st->print(" }"); +} +#endif + // ciTypeFlow // // This is a pass over the bytecodes which computes the following: @@ -1922,12 +1937,11 @@ ciTypeFlow::ciTypeFlow(ciEnv* env, ciMethod* method, int osr_bci) { _max_locals = method->max_locals(); _max_stack = method->max_stack(); _code_size = method->code_size(); + _has_irreducible_entry = false; _osr_bci = osr_bci; _failure_reason = NULL; assert(start_bci() >= 0 && start_bci() < code_size() , "correct osr_bci argument"); - _work_list = NULL; - _next_pre_order = 0; _ciblock_count = _methodBlocks->num_blocks(); _idx_to_blocklist = NEW_ARENA_ARRAY(arena(), GrowableArray*, _ciblock_count); @@ -1949,12 +1963,6 @@ ciTypeFlow::Block* ciTypeFlow::work_list_next() { _work_list = next_block->next(); next_block->set_next(NULL); next_block->set_on_work_list(false); - if (!next_block->has_pre_order()) { - // Assign "pre_order" as each new block is taken from the work list. - // This number may be used by following phases to order block visits. - assert(!have_block_count(), "must not have mapped blocks yet") - next_block->set_pre_order(_next_pre_order++); - } return next_block; } @@ -1962,30 +1970,37 @@ ciTypeFlow::Block* ciTypeFlow::work_list_next() { // ciTypeFlow::add_to_work_list // // Add a basic block to our work list. +// List is sorted by decreasing postorder sort (same as increasing RPO) void ciTypeFlow::add_to_work_list(ciTypeFlow::Block* block) { assert(!block->is_on_work_list(), "must not already be on work list"); if (CITraceTypeFlow) { - tty->print(">> Adding block%s ", block->has_pre_order() ? " (again)" : ""); + tty->print(">> Adding block "); block->print_value_on(tty); tty->print_cr(" to the work list : "); } block->set_on_work_list(true); - if (block->is_simpler_than(_work_list)) { + + // decreasing post order sort + + Block* prev = NULL; + Block* current = _work_list; + int po = block->post_order(); + while (current != NULL) { + if (!current->has_post_order() || po > current->post_order()) + break; + prev = current; + current = current->next(); + } + if (prev == NULL) { block->set_next(_work_list); _work_list = block; } else { - Block *temp = _work_list; - while (!block->is_simpler_than(temp->next())) { - if (CITraceTypeFlow) { - tty->print("."); - } - temp = temp->next(); - } - block->set_next(temp->next()); - temp->set_next(block); + block->set_next(current); + prev->set_next(block); } + if (CITraceTypeFlow) { tty->cr(); } @@ -2008,7 +2023,7 @@ ciTypeFlow::Block* ciTypeFlow::block_at(int bci, ciTypeFlow::JsrSet* jsrs, Creat assert(ciblk->start_bci() == bci, "bad ciBlock boundaries"); Block* block = get_block_for(ciblk->index(), jsrs, option); - assert(block == NULL? (option == no_create): block->is_private_copy() == (option == create_private_copy), "create option consistent with result"); + assert(block == NULL? (option == no_create): block->is_backedge_copy() == (option == create_backedge_copy), "create option consistent with result"); if (CITraceTypeFlow) { if (block != NULL) { @@ -2072,8 +2087,9 @@ void ciTypeFlow::flow_exceptions(GrowableArray* exceptions, } if (block->meet_exception(exception_klass, state)) { - // Block was modified. Add it to the work list. - if (!block->is_on_work_list()) { + // Block was modified and has PO. Add it to the work list. + if (block->has_post_order() && + !block->is_on_work_list()) { add_to_work_list(block); } } @@ -2091,8 +2107,9 @@ void ciTypeFlow::flow_successors(GrowableArray* successors, for (int i = 0; i < len; i++) { Block* block = successors->at(i); if (block->meet(state)) { - // Block was modified. Add it to the work list. - if (!block->is_on_work_list()) { + // Block was modified and has PO. Add it to the work list. + if (block->has_post_order() && + !block->is_on_work_list()) { add_to_work_list(block); } } @@ -2133,6 +2150,111 @@ bool ciTypeFlow::can_trap(ciBytecodeStream& str) { return true; } +// ------------------------------------------------------------------ +// ciTypeFlow::clone_loop_heads +// +// Clone the loop heads +bool ciTypeFlow::clone_loop_heads(Loop* lp, StateVector* temp_vector, JsrSet* temp_set) { + bool rslt = false; + for (PreorderLoops iter(loop_tree_root()); !iter.done(); iter.next()) { + lp = iter.current(); + Block* head = lp->head(); + if (lp == loop_tree_root() || + lp->is_irreducible() || + !head->is_clonable_exit(lp)) + continue; + + // check not already cloned + if (head->backedge_copy_count() != 0) + continue; + + // check _no_ shared head below us + Loop* ch; + for (ch = lp->child(); ch != NULL && ch->head() != head; ch = ch->sibling()); + if (ch != NULL) + continue; + + // Clone head + Block* new_head = head->looping_succ(lp); + Block* clone = clone_loop_head(lp, temp_vector, temp_set); + // Update lp's info + clone->set_loop(lp); + lp->set_head(new_head); + lp->set_tail(clone); + // And move original head into outer loop + head->set_loop(lp->parent()); + + rslt = true; + } + return rslt; +} + +// ------------------------------------------------------------------ +// ciTypeFlow::clone_loop_head +// +// Clone lp's head and replace tail's successors with clone. +// +// | +// v +// head <-> body +// | +// v +// exit +// +// new_head +// +// | +// v +// head ----------\ +// | | +// | v +// | clone <-> body +// | | +// | /--/ +// | | +// v v +// exit +// +ciTypeFlow::Block* ciTypeFlow::clone_loop_head(Loop* lp, StateVector* temp_vector, JsrSet* temp_set) { + Block* head = lp->head(); + Block* tail = lp->tail(); + if (CITraceTypeFlow) { + tty->print(">> Requesting clone of loop head "); head->print_value_on(tty); + tty->print(" for predecessor "); tail->print_value_on(tty); + tty->cr(); + } + Block* clone = block_at(head->start(), head->jsrs(), create_backedge_copy); + assert(clone->backedge_copy_count() == 1, "one backedge copy for all back edges"); + + assert(!clone->has_pre_order(), "just created"); + clone->set_next_pre_order(); + + // Insert clone after (orig) tail in reverse post order + clone->set_rpo_next(tail->rpo_next()); + tail->set_rpo_next(clone); + + // tail->head becomes tail->clone + for (SuccIter iter(tail); !iter.done(); iter.next()) { + if (iter.succ() == head) { + iter.set_succ(clone); + break; + } + } + flow_block(tail, temp_vector, temp_set); + if (head == tail) { + // For self-loops, clone->head becomes clone->clone + flow_block(clone, temp_vector, temp_set); + for (SuccIter iter(clone); !iter.done(); iter.next()) { + if (iter.succ() == head) { + iter.set_succ(clone); + break; + } + } + } + flow_block(clone, temp_vector, temp_set); + + return clone; +} // ------------------------------------------------------------------ // ciTypeFlow::flow_block @@ -2159,11 +2281,14 @@ void ciTypeFlow::flow_block(ciTypeFlow::Block* block, // Grab the state from the current block. block->copy_state_into(state); + state->def_locals()->clear(); GrowableArray* exceptions = block->exceptions(); GrowableArray* exc_klasses = block->exc_klasses(); bool has_exceptions = exceptions->length() > 0; + bool exceptions_used = false; + ciBytecodeStream str(method()); str.reset_to_bci(start); Bytecodes::Code code; @@ -2172,6 +2297,7 @@ void ciTypeFlow::flow_block(ciTypeFlow::Block* block, // Check for exceptional control flow from this point. if (has_exceptions && can_trap(str)) { flow_exceptions(exceptions, exc_klasses, state); + exceptions_used = true; } // Apply the effects of the current bytecode to our state. bool res = state->apply_one_bytecode(&str); @@ -2189,9 +2315,14 @@ void ciTypeFlow::flow_block(ciTypeFlow::Block* block, block->print_on(tty); } + // Save set of locals defined in this block + block->def_locals()->add(state->def_locals()); + // Record (no) successors. block->successors(&str, state, jsrs); + assert(!has_exceptions || exceptions_used, "Not removing exceptions"); + // Discontinue interpretation of this Block. return; } @@ -2202,6 +2333,7 @@ void ciTypeFlow::flow_block(ciTypeFlow::Block* block, // Check for exceptional control flow from this point. if (has_exceptions && can_trap(str)) { flow_exceptions(exceptions, exc_klasses, state); + exceptions_used = true; } // Fix the JsrSet to reflect effect of the bytecode. @@ -2218,10 +2350,305 @@ void ciTypeFlow::flow_block(ciTypeFlow::Block* block, successors = block->successors(&str, NULL, NULL); } + // Save set of locals defined in this block + block->def_locals()->add(state->def_locals()); + + // Remove untaken exception paths + if (!exceptions_used) + exceptions->clear(); + // Pass our state to successors. flow_successors(successors, state); } +// ------------------------------------------------------------------ +// ciTypeFlow::PostOrderLoops::next +// +// Advance to next loop tree using a postorder, left-to-right traversal. +void ciTypeFlow::PostorderLoops::next() { + assert(!done(), "must not be done."); + if (_current->sibling() != NULL) { + _current = _current->sibling(); + while (_current->child() != NULL) { + _current = _current->child(); + } + } else { + _current = _current->parent(); + } +} + +// ------------------------------------------------------------------ +// ciTypeFlow::PreOrderLoops::next +// +// Advance to next loop tree using a preorder, left-to-right traversal. +void ciTypeFlow::PreorderLoops::next() { + assert(!done(), "must not be done."); + if (_current->child() != NULL) { + _current = _current->child(); + } else if (_current->sibling() != NULL) { + _current = _current->sibling(); + } else { + while (_current != _root && _current->sibling() == NULL) { + _current = _current->parent(); + } + if (_current == _root) { + _current = NULL; + assert(done(), "must be done."); + } else { + assert(_current->sibling() != NULL, "must be more to do"); + _current = _current->sibling(); + } + } +} + +// ------------------------------------------------------------------ +// ciTypeFlow::Loop::sorted_merge +// +// Merge the branch lp into this branch, sorting on the loop head +// pre_orders. Returns the leaf of the merged branch. +// Child and sibling pointers will be setup later. +// Sort is (looking from leaf towards the root) +// descending on primary key: loop head's pre_order, and +// ascending on secondary key: loop tail's pre_order. +ciTypeFlow::Loop* ciTypeFlow::Loop::sorted_merge(Loop* lp) { + Loop* leaf = this; + Loop* prev = NULL; + Loop* current = leaf; + while (lp != NULL) { + int lp_pre_order = lp->head()->pre_order(); + // Find insertion point for "lp" + while (current != NULL) { + if (current == lp) + return leaf; // Already in list + if (current->head()->pre_order() < lp_pre_order) + break; + if (current->head()->pre_order() == lp_pre_order && + current->tail()->pre_order() > lp->tail()->pre_order()) { + break; + } + prev = current; + current = current->parent(); + } + Loop* next_lp = lp->parent(); // Save future list of items to insert + // Insert lp before current + lp->set_parent(current); + if (prev != NULL) { + prev->set_parent(lp); + } else { + leaf = lp; + } + prev = lp; // Inserted item is new prev[ious] + lp = next_lp; // Next item to insert + } + return leaf; +} + +// ------------------------------------------------------------------ +// ciTypeFlow::build_loop_tree +// +// Incrementally build loop tree. +void ciTypeFlow::build_loop_tree(Block* blk) { + assert(!blk->is_post_visited(), "precondition"); + Loop* innermost = NULL; // merge of loop tree branches over all successors + + for (SuccIter iter(blk); !iter.done(); iter.next()) { + Loop* lp = NULL; + Block* succ = iter.succ(); + if (!succ->is_post_visited()) { + // Found backedge since predecessor post visited, but successor is not + assert(succ->pre_order() <= blk->pre_order(), "should be backedge"); + + // Create a LoopNode to mark this loop. + lp = new (arena()) Loop(succ, blk); + if (succ->loop() == NULL) + succ->set_loop(lp); + // succ->loop will be updated to innermost loop on a later call, when blk==succ + + } else { // Nested loop + lp = succ->loop(); + + // If succ is loop head, find outer loop. + while (lp != NULL && lp->head() == succ) { + lp = lp->parent(); + } + if (lp == NULL) { + // Infinite loop, it's parent is the root + lp = loop_tree_root(); + } + } + + // Check for irreducible loop. + // Successor has already been visited. If the successor's loop head + // has already been post-visited, then this is another entry into the loop. + while (lp->head()->is_post_visited() && lp != loop_tree_root()) { + _has_irreducible_entry = true; + lp->set_irreducible(succ); + if (!succ->is_on_work_list()) { + // Assume irreducible entries need more data flow + add_to_work_list(succ); + } + lp = lp->parent(); + assert(lp != NULL, "nested loop must have parent by now"); + } + + // Merge loop tree branch for all successors. + innermost = innermost == NULL ? lp : innermost->sorted_merge(lp); + + } // end loop + + if (innermost == NULL) { + assert(blk->successors()->length() == 0, "CFG exit"); + blk->set_loop(loop_tree_root()); + } else if (innermost->head() == blk) { + // If loop header, complete the tree pointers + if (blk->loop() != innermost) { +#if ASSERT + assert(blk->loop()->head() == innermost->head(), "same head"); + Loop* dl; + for (dl = innermost; dl != NULL && dl != blk->loop(); dl = dl->parent()); + assert(dl == blk->loop(), "blk->loop() already in innermost list"); +#endif + blk->set_loop(innermost); + } + innermost->def_locals()->add(blk->def_locals()); + Loop* l = innermost; + Loop* p = l->parent(); + while (p && l->head() == blk) { + l->set_sibling(p->child()); // Put self on parents 'next child' + p->set_child(l); // Make self the first child of parent + p->def_locals()->add(l->def_locals()); + l = p; // Walk up the parent chain + p = l->parent(); + } + } else { + blk->set_loop(innermost); + innermost->def_locals()->add(blk->def_locals()); + } +} + +// ------------------------------------------------------------------ +// ciTypeFlow::Loop::contains +// +// Returns true if lp is nested loop. +bool ciTypeFlow::Loop::contains(ciTypeFlow::Loop* lp) const { + assert(lp != NULL, ""); + if (this == lp || head() == lp->head()) return true; + int depth1 = depth(); + int depth2 = lp->depth(); + if (depth1 > depth2) + return false; + while (depth1 < depth2) { + depth2--; + lp = lp->parent(); + } + return this == lp; +} + +// ------------------------------------------------------------------ +// ciTypeFlow::Loop::depth +// +// Loop depth +int ciTypeFlow::Loop::depth() const { + int dp = 0; + for (Loop* lp = this->parent(); lp != NULL; lp = lp->parent()) + dp++; + return dp; +} + +#ifndef PRODUCT +// ------------------------------------------------------------------ +// ciTypeFlow::Loop::print +void ciTypeFlow::Loop::print(outputStream* st, int indent) const { + for (int i = 0; i < indent; i++) st->print(" "); + st->print("%d<-%d %s", + is_root() ? 0 : this->head()->pre_order(), + is_root() ? 0 : this->tail()->pre_order(), + is_irreducible()?" irr":""); + st->print(" defs: "); + def_locals()->print_on(st, _head->outer()->method()->max_locals()); + st->cr(); + for (Loop* ch = child(); ch != NULL; ch = ch->sibling()) + ch->print(st, indent+2); +} +#endif + +// ------------------------------------------------------------------ +// ciTypeFlow::df_flow_types +// +// Perform the depth first type flow analysis. Helper for flow_types. +void ciTypeFlow::df_flow_types(Block* start, + bool do_flow, + StateVector* temp_vector, + JsrSet* temp_set) { + int dft_len = 100; + GrowableArray stk(arena(), dft_len, 0, NULL); + + ciBlock* dummy = _methodBlocks->make_dummy_block(); + JsrSet* root_set = new JsrSet(NULL, 0); + Block* root_head = new (arena()) Block(this, dummy, root_set); + Block* root_tail = new (arena()) Block(this, dummy, root_set); + root_head->set_pre_order(0); + root_head->set_post_order(0); + root_tail->set_pre_order(max_jint); + root_tail->set_post_order(max_jint); + set_loop_tree_root(new (arena()) Loop(root_head, root_tail)); + + stk.push(start); + + _next_pre_order = 0; // initialize pre_order counter + _rpo_list = NULL; + int next_po = 0; // initialize post_order counter + + // Compute RPO and the control flow graph + int size; + while ((size = stk.length()) > 0) { + Block* blk = stk.top(); // Leave node on stack + if (!blk->is_visited()) { + // forward arc in graph + assert (!blk->has_pre_order(), ""); + blk->set_next_pre_order(); + + if (_next_pre_order >= MaxNodeLimit / 2) { + // Too many basic blocks. Bail out. + // This can happen when try/finally constructs are nested to depth N, + // and there is O(2**N) cloning of jsr bodies. See bug 4697245! + // "MaxNodeLimit / 2" is used because probably the parser will + // generate at least twice that many nodes and bail out. + record_failure("too many basic blocks"); + return; + } + if (do_flow) { + flow_block(blk, temp_vector, temp_set); + if (failing()) return; // Watch for bailouts. + } + } else if (!blk->is_post_visited()) { + // cross or back arc + for (SuccIter iter(blk); !iter.done(); iter.next()) { + Block* succ = iter.succ(); + if (!succ->is_visited()) { + stk.push(succ); + } + } + if (stk.length() == size) { + // There were no additional children, post visit node now + stk.pop(); // Remove node from stack + + build_loop_tree(blk); + blk->set_post_order(next_po++); // Assign post order + prepend_to_rpo_list(blk); + assert(blk->is_post_visited(), ""); + + if (blk->is_loop_head() && !blk->is_on_work_list()) { + // Assume loop heads need more data flow + add_to_work_list(blk); + } + } + } else { + stk.pop(); // Remove post-visited node from stack + } + } +} + // ------------------------------------------------------------------ // ciTypeFlow::flow_types // @@ -2233,91 +2660,93 @@ void ciTypeFlow::flow_types() { JsrSet* temp_set = new JsrSet(NULL, 16); // Create the method entry block. - Block* block = block_at(start_bci(), temp_set); - block->set_pre_order(_next_pre_order++); - assert(block->is_start(), "start block must have order #0"); + Block* start = block_at(start_bci(), temp_set); // Load the initial state into it. const StateVector* start_state = get_start_state(); if (failing()) return; - block->meet(start_state); - add_to_work_list(block); + start->meet(start_state); - // Trickle away. - while (!work_list_empty()) { - Block* block = work_list_next(); - flow_block(block, temp_vector, temp_set); + // Depth first visit + df_flow_types(start, true /*do flow*/, temp_vector, temp_set); + if (failing()) return; + assert(_rpo_list == start, "must be start"); - // NodeCountCutoff is the number of nodes at which the parser - // will bail out. Probably if we already have lots of BBs, - // the parser will generate at least twice that many nodes and bail out. - // Therefore, this is a conservatively large limit at which to - // bail out in the pre-parse typeflow pass. - int block_limit = MaxNodeLimit / 2; + // Any loops found? + if (loop_tree_root()->child() != NULL && + env()->comp_level() >= CompLevel_full_optimization) { + // Loop optimizations are not performed on Tier1 compiles. - if (_next_pre_order >= block_limit) { - // Too many basic blocks. Bail out. - // - // This can happen when try/finally constructs are nested to depth N, - // and there is O(2**N) cloning of jsr bodies. See bug 4697245! - record_failure("too many basic blocks"); - return; + bool changed = clone_loop_heads(loop_tree_root(), temp_vector, temp_set); + + // If some loop heads were cloned, recompute postorder and loop tree + if (changed) { + loop_tree_root()->set_child(NULL); + for (Block* blk = _rpo_list; blk != NULL;) { + Block* next = blk->rpo_next(); + blk->df_init(); + blk = next; + } + df_flow_types(start, false /*no flow*/, temp_vector, temp_set); } + } - // Watch for bailouts. - if (failing()) return; + if (CITraceTypeFlow) { + tty->print_cr("\nLoop tree"); + loop_tree_root()->print(); + } + + // Continue flow analysis until fixed point reached + + debug_only(int max_block = _next_pre_order;) + + while (!work_list_empty()) { + Block* blk = work_list_next(); + assert (blk->has_post_order(), "post order assigned above"); + + flow_block(blk, temp_vector, temp_set); + + assert (max_block == _next_pre_order, "no new blocks"); + assert (!failing(), "no more bailouts"); } } // ------------------------------------------------------------------ // ciTypeFlow::map_blocks // -// Create the block map, which indexes blocks in pre_order. +// Create the block map, which indexes blocks in reverse post-order. void ciTypeFlow::map_blocks() { assert(_block_map == NULL, "single initialization"); - int pre_order_limit = _next_pre_order; - _block_map = NEW_ARENA_ARRAY(arena(), Block*, pre_order_limit); - assert(pre_order_limit == block_count(), ""); - int po; - for (po = 0; po < pre_order_limit; po++) { - debug_only(_block_map[po] = NULL); + int block_ct = _next_pre_order; + _block_map = NEW_ARENA_ARRAY(arena(), Block*, block_ct); + assert(block_ct == block_count(), ""); + + Block* blk = _rpo_list; + for (int m = 0; m < block_ct; m++) { + int rpo = blk->rpo(); + assert(rpo == m, "should be sequential"); + _block_map[rpo] = blk; + blk = blk->rpo_next(); } - ciMethodBlocks *mblks = _methodBlocks; - ciBlock* current = NULL; - int limit_bci = code_size(); - for (int bci = 0; bci < limit_bci; bci++) { - ciBlock* ciblk = mblks->block_containing(bci); - if (ciblk != NULL && ciblk != current) { - current = ciblk; - int curidx = ciblk->index(); - int block_count = (_idx_to_blocklist[curidx] == NULL) ? 0 : _idx_to_blocklist[curidx]->length(); - for (int i = 0; i < block_count; i++) { - Block* block = _idx_to_blocklist[curidx]->at(i); - if (!block->has_pre_order()) continue; - int po = block->pre_order(); - assert(_block_map[po] == NULL, "unique ref to block"); - assert(0 <= po && po < pre_order_limit, ""); - _block_map[po] = block; - } - } - } - for (po = 0; po < pre_order_limit; po++) { - assert(_block_map[po] != NULL, "must not drop any blocks"); - Block* block = _block_map[po]; + assert(blk == NULL, "should be done"); + + for (int j = 0; j < block_ct; j++) { + assert(_block_map[j] != NULL, "must not drop any blocks"); + Block* block = _block_map[j]; // Remove dead blocks from successor lists: for (int e = 0; e <= 1; e++) { GrowableArray* l = e? block->exceptions(): block->successors(); - for (int i = 0; i < l->length(); i++) { - Block* s = l->at(i); - if (!s->has_pre_order()) { + for (int k = 0; k < l->length(); k++) { + Block* s = l->at(k); + if (!s->has_post_order()) { if (CITraceTypeFlow) { tty->print("Removing dead %s successor of #%d: ", (e? "exceptional": "normal"), block->pre_order()); s->print_value_on(tty); tty->cr(); } l->remove(s); - --i; + --k; } } } @@ -2329,7 +2758,7 @@ void ciTypeFlow::map_blocks() { // // Find a block with this ciBlock which has a compatible JsrSet. // If no such block exists, create it, unless the option is no_create. -// If the option is create_private_copy, always create a fresh private copy. +// If the option is create_backedge_copy, always create a fresh backedge copy. ciTypeFlow::Block* ciTypeFlow::get_block_for(int ciBlockIndex, ciTypeFlow::JsrSet* jsrs, CreateOption option) { Arena* a = arena(); GrowableArray* blocks = _idx_to_blocklist[ciBlockIndex]; @@ -2342,11 +2771,11 @@ ciTypeFlow::Block* ciTypeFlow::get_block_for(int ciBlockIndex, ciTypeFlow::JsrSe _idx_to_blocklist[ciBlockIndex] = blocks; } - if (option != create_private_copy) { + if (option != create_backedge_copy) { int len = blocks->length(); for (int i = 0; i < len; i++) { Block* block = blocks->at(i); - if (!block->is_private_copy() && block->is_compatible_with(jsrs)) { + if (!block->is_backedge_copy() && block->is_compatible_with(jsrs)) { return block; } } @@ -2357,15 +2786,15 @@ ciTypeFlow::Block* ciTypeFlow::get_block_for(int ciBlockIndex, ciTypeFlow::JsrSe // We did not find a compatible block. Create one. Block* new_block = new (a) Block(this, _methodBlocks->block(ciBlockIndex), jsrs); - if (option == create_private_copy) new_block->set_private_copy(true); + if (option == create_backedge_copy) new_block->set_backedge_copy(true); blocks->append(new_block); return new_block; } // ------------------------------------------------------------------ -// ciTypeFlow::private_copy_count +// ciTypeFlow::backedge_copy_count // -int ciTypeFlow::private_copy_count(int ciBlockIndex, ciTypeFlow::JsrSet* jsrs) const { +int ciTypeFlow::backedge_copy_count(int ciBlockIndex, ciTypeFlow::JsrSet* jsrs) const { GrowableArray* blocks = _idx_to_blocklist[ciBlockIndex]; if (blocks == NULL) { @@ -2376,7 +2805,7 @@ int ciTypeFlow::private_copy_count(int ciBlockIndex, ciTypeFlow::JsrSet* jsrs) c int len = blocks->length(); for (int i = 0; i < len; i++) { Block* block = blocks->at(i); - if (block->is_private_copy() && block->is_compatible_with(jsrs)) { + if (block->is_backedge_copy() && block->is_compatible_with(jsrs)) { count++; } } @@ -2405,10 +2834,12 @@ void ciTypeFlow::do_flow() { if (failing()) { return; } - if (CIPrintTypeFlow || CITraceTypeFlow) { - print_on(tty); - } + map_blocks(); + + if (CIPrintTypeFlow || CITraceTypeFlow) { + rpo_print_on(tty); + } } // ------------------------------------------------------------------ @@ -2466,4 +2897,19 @@ void ciTypeFlow::print_on(outputStream* st) const { st->print_cr("********************************************************"); st->cr(); } + +void ciTypeFlow::rpo_print_on(outputStream* st) const { + st->print_cr("********************************************************"); + st->print ("TypeFlow for "); + method()->name()->print_symbol_on(st); + int limit_bci = code_size(); + st->print_cr(" %d bytes", limit_bci); + for (Block* blk = _rpo_list; blk != NULL; blk = blk->rpo_next()) { + blk->print_on(st); + st->print_cr("--------------------------------------------------------"); + st->cr(); + } + st->print_cr("********************************************************"); + st->cr(); +} #endif diff --git a/hotspot/src/share/vm/ci/ciTypeFlow.hpp b/hotspot/src/share/vm/ci/ciTypeFlow.hpp index f095d662b99..4dae26ea512 100644 --- a/hotspot/src/share/vm/ci/ciTypeFlow.hpp +++ b/hotspot/src/share/vm/ci/ciTypeFlow.hpp @@ -34,11 +34,13 @@ private: int _max_locals; int _max_stack; int _code_size; + bool _has_irreducible_entry; const char* _failure_reason; public: class StateVector; + class Loop; class Block; // Build a type flow analyzer @@ -55,6 +57,7 @@ public: int max_stack() const { return _max_stack; } int max_cells() const { return _max_locals + _max_stack; } int code_size() const { return _code_size; } + bool has_irreducible_entry() const { return _has_irreducible_entry; } // Represents information about an "active" jsr call. This // class represents a call to the routine at some entry address @@ -125,6 +128,19 @@ public: void print_on(outputStream* st) const PRODUCT_RETURN; }; + class LocalSet VALUE_OBJ_CLASS_SPEC { + private: + enum Constants { max = 63 }; + uint64_t _bits; + public: + LocalSet() : _bits(0) {} + void add(uint32_t i) { if (i < (uint32_t)max) _bits |= (1LL << i); } + void add(LocalSet* ls) { _bits |= ls->_bits; } + bool test(uint32_t i) const { return i < (uint32_t)max ? (_bits>>i)&1U : true; } + void clear() { _bits = 0; } + void print_on(outputStream* st, int limit) const PRODUCT_RETURN; + }; + // Used as a combined index for locals and temps enum Cell { Cell_0, Cell_max = INT_MAX @@ -142,6 +158,8 @@ public: int _trap_bci; int _trap_index; + LocalSet _def_locals; // For entire block + static ciType* type_meet_internal(ciType* t1, ciType* t2, ciTypeFlow* analyzer); public: @@ -181,6 +199,9 @@ public: int monitor_count() const { return _monitor_count; } void set_monitor_count(int mc) { _monitor_count = mc; } + LocalSet* def_locals() { return &_def_locals; } + const LocalSet* def_locals() const { return &_def_locals; } + static Cell start_cell() { return (Cell)0; } static Cell next_cell(Cell c) { return (Cell)(((int)c) + 1); } Cell limit_cell() const { @@ -250,6 +271,10 @@ public: return type->basic_type() == T_DOUBLE; } + void store_to_local(int lnum) { + _def_locals.add((uint) lnum); + } + void push_translate(ciType* type); void push_int() { @@ -358,6 +383,7 @@ public: "must be reference type or return address"); overwrite_local_double_long(index); set_type_at(local(index), type); + store_to_local(index); } void load_local_double(int index) { @@ -376,6 +402,8 @@ public: overwrite_local_double_long(index); set_type_at(local(index), type); set_type_at(local(index+1), type2); + store_to_local(index); + store_to_local(index+1); } void load_local_float(int index) { @@ -388,6 +416,7 @@ public: assert(is_float(type), "must be float type"); overwrite_local_double_long(index); set_type_at(local(index), type); + store_to_local(index); } void load_local_int(int index) { @@ -400,6 +429,7 @@ public: assert(is_int(type), "must be int type"); overwrite_local_double_long(index); set_type_at(local(index), type); + store_to_local(index); } void load_local_long(int index) { @@ -418,6 +448,8 @@ public: overwrite_local_double_long(index); set_type_at(local(index), type); set_type_at(local(index+1), type2); + store_to_local(index); + store_to_local(index+1); } // Stop interpretation of this path with a trap. @@ -450,13 +482,31 @@ public: }; // Parameter for "find_block" calls: - // Describes the difference between a public and private copy. + // Describes the difference between a public and backedge copy. enum CreateOption { create_public_copy, - create_private_copy, + create_backedge_copy, no_create }; + // Successor iterator + class SuccIter : public StackObj { + private: + Block* _pred; + int _index; + Block* _succ; + public: + SuccIter() : _pred(NULL), _index(-1), _succ(NULL) {} + SuccIter(Block* pred) : _pred(pred), _index(-1), _succ(NULL) { next(); } + int index() { return _index; } + Block* pred() { return _pred; } // Return predecessor + bool done() { return _index < 0; } // Finished? + Block* succ() { return _succ; } // Return current successor + void next(); // Advance + void set_succ(Block* succ); // Update current successor + bool is_normal_ctrl() { return index() < _pred->successors()->length(); } + }; + // A basic block class Block : public ResourceObj { private: @@ -470,15 +520,24 @@ public: int _trap_bci; int _trap_index; - // A reasonable approximation to pre-order, provided.to the client. + // pre_order, assigned at first visit. Used as block ID and "visited" tag int _pre_order; - // Has this block been cloned for some special purpose? - bool _private_copy; + // A post-order, used to compute the reverse post order (RPO) provided to the client + int _post_order; // used to compute rpo + + // Has this block been cloned for a loop backedge? + bool _backedge_copy; // A pointer used for our internal work list - Block* _next; - bool _on_work_list; + Block* _next; + bool _on_work_list; // on the work list + Block* _rpo_next; // Reverse post order list + + // Loop info + Loop* _loop; // nearest loop + bool _irreducible_entry; // entry to irreducible loop + bool _exception_entry; // entry to exception handler ciBlock* ciblock() const { return _ciblock; } StateVector* state() const { return _state; } @@ -504,10 +563,11 @@ public: int start() const { return _ciblock->start_bci(); } int limit() const { return _ciblock->limit_bci(); } int control() const { return _ciblock->control_bci(); } + JsrSet* jsrs() const { return _jsrs; } - bool is_private_copy() const { return _private_copy; } - void set_private_copy(bool z); - int private_copy_count() const { return outer()->private_copy_count(ciblock()->index(), _jsrs); } + bool is_backedge_copy() const { return _backedge_copy; } + void set_backedge_copy(bool z); + int backedge_copy_count() const { return outer()->backedge_copy_count(ciblock()->index(), _jsrs); } // access to entry state int stack_size() const { return _state->stack_size(); } @@ -515,6 +575,20 @@ public: ciType* local_type_at(int i) const { return _state->local_type_at(i); } ciType* stack_type_at(int i) const { return _state->stack_type_at(i); } + // Data flow on locals + bool is_invariant_local(uint v) const { + assert(is_loop_head(), "only loop heads"); + // Find outermost loop with same loop head + Loop* lp = loop(); + while (lp->parent() != NULL) { + if (lp->parent()->head() != lp->head()) break; + lp = lp->parent(); + } + return !lp->def_locals()->test(v); + } + LocalSet* def_locals() { return _state->def_locals(); } + const LocalSet* def_locals() const { return _state->def_locals(); } + // Get the successors for this Block. GrowableArray* successors(ciBytecodeStream* str, StateVector* state, @@ -524,13 +598,6 @@ public: return _successors; } - // Helper function for "successors" when making private copies of - // loop heads for C2. - Block * clone_loop_head(ciTypeFlow* analyzer, - int branch_bci, - Block* target, - JsrSet* jsrs); - // Get the exceptional successors for this Block. GrowableArray* exceptions() { if (_exceptions == NULL) { @@ -584,17 +651,126 @@ public: bool is_on_work_list() const { return _on_work_list; } bool has_pre_order() const { return _pre_order >= 0; } - void set_pre_order(int po) { assert(!has_pre_order() && po >= 0, ""); _pre_order = po; } + void set_pre_order(int po) { assert(!has_pre_order(), ""); _pre_order = po; } int pre_order() const { assert(has_pre_order(), ""); return _pre_order; } + void set_next_pre_order() { set_pre_order(outer()->inc_next_pre_order()); } bool is_start() const { return _pre_order == outer()->start_block_num(); } - // A ranking used in determining order within the work list. - bool is_simpler_than(Block* other); + // Reverse post order + void df_init(); + bool has_post_order() const { return _post_order >= 0; } + void set_post_order(int po) { assert(!has_post_order() && po >= 0, ""); _post_order = po; } + void reset_post_order(int o){ _post_order = o; } + int post_order() const { assert(has_post_order(), ""); return _post_order; } + + bool has_rpo() const { return has_post_order() && outer()->have_block_count(); } + int rpo() const { assert(has_rpo(), ""); return outer()->block_count() - post_order() - 1; } + void set_rpo_next(Block* b) { _rpo_next = b; } + Block* rpo_next() { return _rpo_next; } + + // Loops + Loop* loop() const { return _loop; } + void set_loop(Loop* lp) { _loop = lp; } + bool is_loop_head() const { return _loop && _loop->head() == this; } + void set_irreducible_entry(bool c) { _irreducible_entry = c; } + bool is_irreducible_entry() const { return _irreducible_entry; } + bool is_visited() const { return has_pre_order(); } + bool is_post_visited() const { return has_post_order(); } + bool is_clonable_exit(Loop* lp); + Block* looping_succ(Loop* lp); // Successor inside of loop + bool is_single_entry_loop_head() const { + if (!is_loop_head()) return false; + for (Loop* lp = loop(); lp != NULL && lp->head() == this; lp = lp->parent()) + if (lp->is_irreducible()) return false; + return true; + } void print_value_on(outputStream* st) const PRODUCT_RETURN; void print_on(outputStream* st) const PRODUCT_RETURN; }; + // Loop + class Loop : public ResourceObj { + private: + Loop* _parent; + Loop* _sibling; // List of siblings, null terminated + Loop* _child; // Head of child list threaded thru sibling pointer + Block* _head; // Head of loop + Block* _tail; // Tail of loop + bool _irreducible; + LocalSet _def_locals; + + public: + Loop(Block* head, Block* tail) : + _head(head), _tail(tail), + _parent(NULL), _sibling(NULL), _child(NULL), + _irreducible(false), _def_locals() {} + + Loop* parent() const { return _parent; } + Loop* sibling() const { return _sibling; } + Loop* child() const { return _child; } + Block* head() const { return _head; } + Block* tail() const { return _tail; } + void set_parent(Loop* p) { _parent = p; } + void set_sibling(Loop* s) { _sibling = s; } + void set_child(Loop* c) { _child = c; } + void set_head(Block* hd) { _head = hd; } + void set_tail(Block* tl) { _tail = tl; } + + int depth() const; // nesting depth + + // Returns true if lp is a nested loop or us. + bool contains(Loop* lp) const; + bool contains(Block* blk) const { return contains(blk->loop()); } + + // Data flow on locals + LocalSet* def_locals() { return &_def_locals; } + const LocalSet* def_locals() const { return &_def_locals; } + + // Merge the branch lp into this branch, sorting on the loop head + // pre_orders. Returns the new branch. + Loop* sorted_merge(Loop* lp); + + // Mark non-single entry to loop + void set_irreducible(Block* entry) { + _irreducible = true; + entry->set_irreducible_entry(true); + } + bool is_irreducible() const { return _irreducible; } + + bool is_root() const { return _tail->pre_order() == max_jint; } + + void print(outputStream* st = tty, int indent = 0) const PRODUCT_RETURN; + }; + + // Postorder iteration over the loop tree. + class PostorderLoops : public StackObj { + private: + Loop* _root; + Loop* _current; + public: + PostorderLoops(Loop* root) : _root(root), _current(root) { + while (_current->child() != NULL) { + _current = _current->child(); + } + } + bool done() { return _current == NULL; } // Finished iterating? + void next(); // Advance to next loop + Loop* current() { return _current; } // Return current loop. + }; + + // Preorder iteration over the loop tree. + class PreorderLoops : public StackObj { + private: + Loop* _root; + Loop* _current; + public: + PreorderLoops(Loop* root) : _root(root), _current(root) {} + bool done() { return _current == NULL; } // Finished iterating? + void next(); // Advance to next loop + Loop* current() { return _current; } // Return current loop. + }; + // Standard indexes of successors, for various bytecodes. enum { FALL_THROUGH = 0, // normal control @@ -619,6 +795,12 @@ private: // Tells if a given instruction is able to generate an exception edge. bool can_trap(ciBytecodeStream& str); + // Clone the loop heads. Returns true if any cloning occurred. + bool clone_loop_heads(Loop* lp, StateVector* temp_vector, JsrSet* temp_set); + + // Clone lp's head and replace tail's successors with clone. + Block* clone_loop_head(Loop* lp, StateVector* temp_vector, JsrSet* temp_set); + public: // Return the block beginning at bci which has a JsrSet compatible // with jsrs. @@ -627,8 +809,8 @@ public: // block factory Block* get_block_for(int ciBlockIndex, JsrSet* jsrs, CreateOption option = create_public_copy); - // How many of the blocks have the private_copy bit set? - int private_copy_count(int ciBlockIndex, JsrSet* jsrs) const; + // How many of the blocks have the backedge_copy bit set? + int backedge_copy_count(int ciBlockIndex, JsrSet* jsrs) const; // Return an existing block containing bci which has a JsrSet compatible // with jsrs, or NULL if there is none. @@ -651,11 +833,18 @@ public: return _block_map[po]; } Block* start_block() const { return pre_order_at(start_block_num()); } int start_block_num() const { return 0; } + Block* rpo_at(int rpo) const { assert(0 <= rpo && rpo < block_count(), "out of bounds"); + return _block_map[rpo]; } + int next_pre_order() { return _next_pre_order; } + int inc_next_pre_order() { return _next_pre_order++; } private: // A work list used during flow analysis. Block* _work_list; + // List of blocks in reverse post order + Block* _rpo_list; + // Next Block::_pre_order. After mapping, doubles as block_count. int _next_pre_order; @@ -668,6 +857,15 @@ private: // Add a basic block to our work list. void add_to_work_list(Block* block); + // Prepend a basic block to rpo list. + void prepend_to_rpo_list(Block* blk) { + blk->set_rpo_next(_rpo_list); + _rpo_list = blk; + } + + // Root of the loop tree + Loop* _loop_tree_root; + // State used for make_jsr_record int _jsr_count; GrowableArray* _jsr_records; @@ -677,6 +875,9 @@ public: // does not already exist. JsrRecord* make_jsr_record(int entry_address, int return_address); + void set_loop_tree_root(Loop* ltr) { _loop_tree_root = ltr; } + Loop* loop_tree_root() { return _loop_tree_root; } + private: // Get the initial state for start_bci: const StateVector* get_start_state(); @@ -703,6 +904,15 @@ private: // necessary. void flow_types(); + // Perform the depth first type flow analysis. Helper for flow_types. + void df_flow_types(Block* start, + bool do_flow, + StateVector* temp_vector, + JsrSet* temp_set); + + // Incrementally build loop tree. + void build_loop_tree(Block* blk); + // Create the block map, which indexes blocks in pre_order. void map_blocks(); @@ -711,4 +921,6 @@ public: void do_flow(); void print_on(outputStream* st) const PRODUCT_RETURN; + + void rpo_print_on(outputStream* st) const PRODUCT_RETURN; }; diff --git a/hotspot/src/share/vm/code/nmethod.cpp b/hotspot/src/share/vm/code/nmethod.cpp index 282a4876262..6baa28690db 100644 --- a/hotspot/src/share/vm/code/nmethod.cpp +++ b/hotspot/src/share/vm/code/nmethod.cpp @@ -1350,11 +1350,7 @@ bool nmethod::can_unload(BoolObjectClosure* is_alive, return false; } } - if (!UseParallelOldGC || !VerifyParallelOldWithMarkSweep) { - // Cannot do this test if verification of the UseParallelOldGC - // code using the PSMarkSweep code is being done. - assert(unloading_occurred, "Inconsistency in unloading"); - } + assert(unloading_occurred, "Inconsistency in unloading"); make_unloaded(is_alive, obj); return true; } diff --git a/hotspot/src/share/vm/compiler/methodLiveness.cpp b/hotspot/src/share/vm/compiler/methodLiveness.cpp index 92d60cfeca8..a9a90a07197 100644 --- a/hotspot/src/share/vm/compiler/methodLiveness.cpp +++ b/hotspot/src/share/vm/compiler/methodLiveness.cpp @@ -76,8 +76,9 @@ class BitCounter: public BitMapClosure { BitCounter() : _count(0) {} // Callback when bit in map is set - virtual void do_bit(size_t offset) { + virtual bool do_bit(size_t offset) { _count++; + return true; } int count() { @@ -467,7 +468,7 @@ MethodLivenessResult MethodLiveness::get_liveness_at(int entry_bci) { bci = 0; } - MethodLivenessResult answer(NULL,0); + MethodLivenessResult answer((uintptr_t*)NULL,0); if (_block_count > 0) { if (TimeLivenessAnalysis) _time_total.start(); diff --git a/hotspot/src/share/vm/compiler/methodLiveness.hpp b/hotspot/src/share/vm/compiler/methodLiveness.hpp index a679c34d007..705a36f762b 100644 --- a/hotspot/src/share/vm/compiler/methodLiveness.hpp +++ b/hotspot/src/share/vm/compiler/methodLiveness.hpp @@ -29,7 +29,7 @@ class MethodLivenessResult : public BitMap { bool _is_valid; public: - MethodLivenessResult(uintptr_t* map, idx_t size_in_bits) + MethodLivenessResult(BitMap::bm_word_t* map, idx_t size_in_bits) : BitMap(map, size_in_bits) , _is_valid(false) {} diff --git a/hotspot/src/share/vm/gc_implementation/concurrentMarkSweep/compactibleFreeListSpace.cpp b/hotspot/src/share/vm/gc_implementation/concurrentMarkSweep/compactibleFreeListSpace.cpp index af9a9ecf3dc..4b1fecd7cb3 100644 --- a/hotspot/src/share/vm/gc_implementation/concurrentMarkSweep/compactibleFreeListSpace.cpp +++ b/hotspot/src/share/vm/gc_implementation/concurrentMarkSweep/compactibleFreeListSpace.cpp @@ -790,7 +790,7 @@ CompactibleFreeListSpace::object_iterate_careful_m(MemRegion mr, } -HeapWord* CompactibleFreeListSpace::block_start(const void* p) const { +HeapWord* CompactibleFreeListSpace::block_start_const(const void* p) const { NOT_PRODUCT(verify_objects_initialized()); return _bt.block_start(p); } @@ -2286,9 +2286,9 @@ void CompactibleFreeListSpace::verifyIndexedFreeLists() const { } void CompactibleFreeListSpace::verifyIndexedFreeList(size_t size) const { - guarantee(size % 2 == 0, "Odd slots should be empty"); - for (FreeChunk* fc = _indexedFreeList[size].head(); fc != NULL; - fc = fc->next()) { + FreeChunk* fc = _indexedFreeList[size].head(); + guarantee((size % 2 == 0) || fc == NULL, "Odd slots should be empty"); + for (; fc != NULL; fc = fc->next()) { guarantee(fc->size() == size, "Size inconsistency"); guarantee(fc->isFree(), "!free?"); guarantee(fc->next() == NULL || fc->next()->prev() == fc, "Broken list"); @@ -2790,10 +2790,11 @@ initialize_sequential_subtasks_for_rescan(int n_threads) { assert(n_threads > 0, "Unexpected n_threads argument"); const size_t task_size = rescan_task_size(); size_t n_tasks = (used_region().word_size() + task_size - 1)/task_size; - assert((used_region().start() + (n_tasks - 1)*task_size < - used_region().end()) && - (used_region().start() + n_tasks*task_size >= - used_region().end()), "n_task calculation incorrect"); + assert((n_tasks == 0) == used_region().is_empty(), "n_tasks incorrect"); + assert(n_tasks == 0 || + ((used_region().start() + (n_tasks - 1)*task_size < used_region().end()) && + (used_region().start() + n_tasks*task_size >= used_region().end())), + "n_tasks calculation incorrect"); SequentialSubTasksDone* pst = conc_par_seq_tasks(); assert(!pst->valid(), "Clobbering existing data?"); pst->set_par_threads(n_threads); @@ -2833,7 +2834,7 @@ initialize_sequential_subtasks_for_marking(int n_threads, assert(n_tasks == 0 || ((span.start() + (n_tasks - 1)*task_size < span.end()) && (span.start() + n_tasks*task_size >= span.end())), - "n_task calculation incorrect"); + "n_tasks calculation incorrect"); SequentialSubTasksDone* pst = conc_par_seq_tasks(); assert(!pst->valid(), "Clobbering existing data?"); pst->set_par_threads(n_threads); diff --git a/hotspot/src/share/vm/gc_implementation/concurrentMarkSweep/compactibleFreeListSpace.hpp b/hotspot/src/share/vm/gc_implementation/concurrentMarkSweep/compactibleFreeListSpace.hpp index 9ac7b03d40d..5306a8f3085 100644 --- a/hotspot/src/share/vm/gc_implementation/concurrentMarkSweep/compactibleFreeListSpace.hpp +++ b/hotspot/src/share/vm/gc_implementation/concurrentMarkSweep/compactibleFreeListSpace.hpp @@ -502,7 +502,7 @@ class CompactibleFreeListSpace: public CompactibleSpace { void blk_iterate(BlkClosure* cl); void blk_iterate_careful(BlkClosureCareful* cl); - HeapWord* block_start(const void* p) const; + HeapWord* block_start_const(const void* p) const; HeapWord* block_start_careful(const void* p) const; size_t block_size(const HeapWord* p) const; size_t block_size_no_stall(HeapWord* p, const CMSCollector* c) const; diff --git a/hotspot/src/share/vm/gc_implementation/concurrentMarkSweep/concurrentMarkSweepGeneration.cpp b/hotspot/src/share/vm/gc_implementation/concurrentMarkSweep/concurrentMarkSweepGeneration.cpp index 697ce756825..db40a525454 100644 --- a/hotspot/src/share/vm/gc_implementation/concurrentMarkSweep/concurrentMarkSweepGeneration.cpp +++ b/hotspot/src/share/vm/gc_implementation/concurrentMarkSweep/concurrentMarkSweepGeneration.cpp @@ -2761,13 +2761,14 @@ class VerifyMarkedClosure: public BitMapClosure { public: VerifyMarkedClosure(CMSBitMap* bm): _marks(bm), _failed(false) {} - void do_bit(size_t offset) { + bool do_bit(size_t offset) { HeapWord* addr = _marks->offsetToHeapWord(offset); if (!_marks->isMarked(addr)) { oop(addr)->print(); gclog_or_tty->print_cr(" ("INTPTR_FORMAT" should have been marked)", addr); _failed = true; } + return true; } bool failed() { return _failed; } @@ -3650,6 +3651,7 @@ class CMSConcMarkingTask: public YieldingFlexibleGangTask { CompactibleFreeListSpace* _cms_space; CompactibleFreeListSpace* _perm_space; HeapWord* _global_finger; + HeapWord* _restart_addr; // Exposed here for yielding support Mutex* const _bit_map_lock; @@ -3680,7 +3682,7 @@ class CMSConcMarkingTask: public YieldingFlexibleGangTask { _term.set_task(this); assert(_cms_space->bottom() < _perm_space->bottom(), "Finger incorrectly initialized below"); - _global_finger = _cms_space->bottom(); + _restart_addr = _global_finger = _cms_space->bottom(); } @@ -3698,6 +3700,10 @@ class CMSConcMarkingTask: public YieldingFlexibleGangTask { bool result() { return _result; } void reset(HeapWord* ra) { + assert(_global_finger >= _cms_space->end(), "Postcondition of ::work(i)"); + assert(_global_finger >= _perm_space->end(), "Postcondition of ::work(i)"); + assert(ra < _perm_space->end(), "ra too large"); + _restart_addr = _global_finger = ra; _term.reset_for_reuse(); } @@ -3842,16 +3848,24 @@ void CMSConcMarkingTask::do_scan_and_mark(int i, CompactibleFreeListSpace* sp) { int n_tasks = pst->n_tasks(); // We allow that there may be no tasks to do here because // we are restarting after a stack overflow. - assert(pst->valid() || n_tasks == 0, "Uninitializd use?"); + assert(pst->valid() || n_tasks == 0, "Uninitialized use?"); int nth_task = 0; - HeapWord* start = sp->bottom(); + HeapWord* aligned_start = sp->bottom(); + if (sp->used_region().contains(_restart_addr)) { + // Align down to a card boundary for the start of 0th task + // for this space. + aligned_start = + (HeapWord*)align_size_down((uintptr_t)_restart_addr, + CardTableModRefBS::card_size); + } + size_t chunk_size = sp->marking_task_size(); while (!pst->is_task_claimed(/* reference */ nth_task)) { // Having claimed the nth task in this space, // compute the chunk that it corresponds to: - MemRegion span = MemRegion(start + nth_task*chunk_size, - start + (nth_task+1)*chunk_size); + MemRegion span = MemRegion(aligned_start + nth_task*chunk_size, + aligned_start + (nth_task+1)*chunk_size); // Try and bump the global finger via a CAS; // note that we need to do the global finger bump // _before_ taking the intersection below, because @@ -3866,26 +3880,40 @@ void CMSConcMarkingTask::do_scan_and_mark(int i, CompactibleFreeListSpace* sp) { // beyond the "top" address of the space. span = span.intersection(sp->used_region()); if (!span.is_empty()) { // Non-null task - // We want to skip the first object because - // the protocol is to scan any object in its entirety - // that _starts_ in this span; a fortiori, any - // object starting in an earlier span is scanned - // as part of an earlier claimed task. - // Below we use the "careful" version of block_start - // so we do not try to navigate uninitialized objects. - HeapWord* prev_obj = sp->block_start_careful(span.start()); - // Below we use a variant of block_size that uses the - // Printezis bits to avoid waiting for allocated - // objects to become initialized/parsable. - while (prev_obj < span.start()) { - size_t sz = sp->block_size_no_stall(prev_obj, _collector); - if (sz > 0) { - prev_obj += sz; + HeapWord* prev_obj; + assert(!span.contains(_restart_addr) || nth_task == 0, + "Inconsistency"); + if (nth_task == 0) { + // For the 0th task, we'll not need to compute a block_start. + if (span.contains(_restart_addr)) { + // In the case of a restart because of stack overflow, + // we might additionally skip a chunk prefix. + prev_obj = _restart_addr; } else { - // In this case we may end up doing a bit of redundant - // scanning, but that appears unavoidable, short of - // locking the free list locks; see bug 6324141. - break; + prev_obj = span.start(); + } + } else { + // We want to skip the first object because + // the protocol is to scan any object in its entirety + // that _starts_ in this span; a fortiori, any + // object starting in an earlier span is scanned + // as part of an earlier claimed task. + // Below we use the "careful" version of block_start + // so we do not try to navigate uninitialized objects. + prev_obj = sp->block_start_careful(span.start()); + // Below we use a variant of block_size that uses the + // Printezis bits to avoid waiting for allocated + // objects to become initialized/parsable. + while (prev_obj < span.start()) { + size_t sz = sp->block_size_no_stall(prev_obj, _collector); + if (sz > 0) { + prev_obj += sz; + } else { + // In this case we may end up doing a bit of redundant + // scanning, but that appears unavoidable, short of + // locking the free list locks; see bug 6324141. + break; + } } } if (prev_obj < span.end()) { @@ -3938,12 +3966,14 @@ class Par_ConcMarkingClosure: public OopClosure { void handle_stack_overflow(HeapWord* lost); }; -// Grey object rescan during work stealing phase -- -// the salient assumption here is that stolen oops must -// always be initialized, so we do not need to check for -// uninitialized objects before scanning here. +// Grey object scanning during work stealing phase -- +// the salient assumption here is that any references +// that are in these stolen objects being scanned must +// already have been initialized (else they would not have +// been published), so we do not need to check for +// uninitialized objects before pushing here. void Par_ConcMarkingClosure::do_oop(oop obj) { - assert(obj->is_oop_or_null(), "expected an oop or NULL"); + assert(obj->is_oop_or_null(true), "expected an oop or NULL"); HeapWord* addr = (HeapWord*)obj; // Check if oop points into the CMS generation // and is not marked @@ -4001,7 +4031,7 @@ void Par_ConcMarkingClosure::trim_queue(size_t max) { // in CMSCollector's _restart_address. void Par_ConcMarkingClosure::handle_stack_overflow(HeapWord* lost) { // We need to do this under a mutex to prevent other - // workers from interfering with the expansion below. + // workers from interfering with the work done below. MutexLockerEx ml(_overflow_stack->par_lock(), Mutex::_no_safepoint_check_flag); // Remember the least grey address discarded @@ -4640,8 +4670,11 @@ size_t CMSCollector::preclean_card_table(ConcurrentMarkSweepGeneration* gen, startTimer(); sample_eden(); // Get and clear dirty region from card table - dirtyRegion = _ct->ct_bs()->dirty_card_range_after_preclean( - MemRegion(nextAddr, endAddr)); + dirtyRegion = _ct->ct_bs()->dirty_card_range_after_reset( + MemRegion(nextAddr, endAddr), + true, + CardTableModRefBS::precleaned_card_val()); + assert(dirtyRegion.start() >= nextAddr, "returned region inconsistent?"); } @@ -5409,8 +5442,8 @@ void CMSCollector::do_remark_non_parallel() { &mrias_cl); { TraceTime t("grey object rescan", PrintGCDetails, false, gclog_or_tty); - // Iterate over the dirty cards, marking them precleaned, and - // setting the corresponding bits in the mod union table. + // Iterate over the dirty cards, setting the corresponding bits in the + // mod union table. { ModUnionClosure modUnionClosure(&_modUnionTable); _ct->ct_bs()->dirty_card_iterate( @@ -6182,7 +6215,7 @@ HeapWord* CMSCollector::next_card_start_after_block(HeapWord* addr) const { // bit vector itself. That is done by a separate call CMSBitMap::allocate() // further below. CMSBitMap::CMSBitMap(int shifter, int mutex_rank, const char* mutex_name): - _bm(NULL,0), + _bm(), _shifter(shifter), _lock(mutex_rank >= 0 ? new Mutex(mutex_rank, mutex_name, true) : NULL) { @@ -6207,7 +6240,7 @@ bool CMSBitMap::allocate(MemRegion mr) { } assert(_virtual_space.committed_size() == brs.size(), "didn't reserve backing store for all of CMS bit map?"); - _bm.set_map((uintptr_t*)_virtual_space.low()); + _bm.set_map((BitMap::bm_word_t*)_virtual_space.low()); assert(_virtual_space.committed_size() << (_shifter + LogBitsPerByte) >= _bmWordSize, "inconsistency in bit map sizing"); _bm.set_size(_bmWordSize >> _shifter); @@ -6554,7 +6587,7 @@ void Par_MarkRefsIntoAndScanClosure::do_oop(oop obj) { if (obj != NULL) { // Ignore mark word because this could be an already marked oop // that may be chained at the end of the overflow list. - assert(obj->is_oop(), "expected an oop"); + assert(obj->is_oop(true), "expected an oop"); HeapWord* addr = (HeapWord*)obj; if (_span.contains(addr) && !_bit_map->isMarked(addr)) { @@ -6845,10 +6878,10 @@ void MarkFromRootsClosure::reset(HeapWord* addr) { // Should revisit to see if this should be restructured for // greater efficiency. -void MarkFromRootsClosure::do_bit(size_t offset) { +bool MarkFromRootsClosure::do_bit(size_t offset) { if (_skipBits > 0) { _skipBits--; - return; + return true; } // convert offset into a HeapWord* HeapWord* addr = _bitMap->startWord() + offset; @@ -6886,10 +6919,11 @@ void MarkFromRootsClosure::do_bit(size_t offset) { } // ...else the setting of klass will dirty the card anyway. } DEBUG_ONLY(}) - return; + return true; } } scanOopsInOop(addr); + return true; } // We take a break if we've been at this for a while, @@ -7023,10 +7057,10 @@ Par_MarkFromRootsClosure::Par_MarkFromRootsClosure(CMSConcMarkingTask* task, // Should revisit to see if this should be restructured for // greater efficiency. -void Par_MarkFromRootsClosure::do_bit(size_t offset) { +bool Par_MarkFromRootsClosure::do_bit(size_t offset) { if (_skip_bits > 0) { _skip_bits--; - return; + return true; } // convert offset into a HeapWord* HeapWord* addr = _bit_map->startWord() + offset; @@ -7041,10 +7075,11 @@ void Par_MarkFromRootsClosure::do_bit(size_t offset) { if (p->klass_or_null() == NULL || !p->is_parsable()) { // in the case of Clean-on-Enter optimization, redirty card // and avoid clearing card by increasing the threshold. - return; + return true; } } scan_oops_in_oop(addr); + return true; } void Par_MarkFromRootsClosure::scan_oops_in_oop(HeapWord* ptr) { @@ -7167,7 +7202,7 @@ void MarkFromRootsVerifyClosure::reset(HeapWord* addr) { // Should revisit to see if this should be restructured for // greater efficiency. -void MarkFromRootsVerifyClosure::do_bit(size_t offset) { +bool MarkFromRootsVerifyClosure::do_bit(size_t offset) { // convert offset into a HeapWord* HeapWord* addr = _verification_bm->startWord() + offset; assert(_verification_bm->endWord() && addr < _verification_bm->endWord(), @@ -7195,6 +7230,7 @@ void MarkFromRootsVerifyClosure::do_bit(size_t offset) { new_oop->oop_iterate(&_pam_verify_closure); } assert(_mark_stack->isEmpty(), "tautology, emphasizing post-condition"); + return true; } PushAndMarkVerifyClosure::PushAndMarkVerifyClosure( @@ -7289,6 +7325,8 @@ Par_PushOrMarkClosure::Par_PushOrMarkClosure(CMSCollector* collector, _should_remember_klasses(collector->should_unload_classes()) { } +// Assumes thread-safe access by callers, who are +// responsible for mutual exclusion. void CMSCollector::lower_restart_addr(HeapWord* low) { assert(_span.contains(low), "Out of bounds addr"); if (_restart_addr == NULL) { @@ -7314,7 +7352,7 @@ void PushOrMarkClosure::handle_stack_overflow(HeapWord* lost) { // in CMSCollector's _restart_address. void Par_PushOrMarkClosure::handle_stack_overflow(HeapWord* lost) { // We need to do this under a mutex to prevent other - // workers from interfering with the expansion below. + // workers from interfering with the work done below. MutexLockerEx ml(_overflow_stack->par_lock(), Mutex::_no_safepoint_check_flag); // Remember the least grey address discarded @@ -7438,8 +7476,12 @@ PushAndMarkClosure::PushAndMarkClosure(CMSCollector* collector, // Grey object rescan during pre-cleaning and second checkpoint phases -- // the non-parallel version (the parallel version appears further below.) void PushAndMarkClosure::do_oop(oop obj) { - // If _concurrent_precleaning, ignore mark word verification - assert(obj->is_oop_or_null(_concurrent_precleaning), + // Ignore mark word verification. If during concurrent precleaning, + // the object monitor may be locked. If during the checkpoint + // phases, the object may already have been reached by a different + // path and may be at the end of the global overflow list (so + // the mark word may be NULL). + assert(obj->is_oop_or_null(true /* ignore mark word */), "expected an oop or NULL"); HeapWord* addr = (HeapWord*)obj; // Check if oop points into the CMS generation diff --git a/hotspot/src/share/vm/gc_implementation/concurrentMarkSweep/concurrentMarkSweepGeneration.hpp b/hotspot/src/share/vm/gc_implementation/concurrentMarkSweep/concurrentMarkSweepGeneration.hpp index 672bb8a8da7..9abc84c6a31 100644 --- a/hotspot/src/share/vm/gc_implementation/concurrentMarkSweep/concurrentMarkSweepGeneration.hpp +++ b/hotspot/src/share/vm/gc_implementation/concurrentMarkSweep/concurrentMarkSweepGeneration.hpp @@ -1327,7 +1327,7 @@ class MarkFromRootsClosure: public BitMapClosure { CMSMarkStack* markStack, CMSMarkStack* revisitStack, bool should_yield, bool verifying = false); - void do_bit(size_t offset); + bool do_bit(size_t offset); void reset(HeapWord* addr); inline void do_yield_check(); @@ -1363,7 +1363,7 @@ class Par_MarkFromRootsClosure: public BitMapClosure { CMSMarkStack* overflow_stack, CMSMarkStack* revisit_stack, bool should_yield); - void do_bit(size_t offset); + bool do_bit(size_t offset); inline void do_yield_check(); private: @@ -1411,7 +1411,7 @@ class MarkFromRootsVerifyClosure: public BitMapClosure { CMSBitMap* verification_bm, CMSBitMap* cms_bm, CMSMarkStack* mark_stack); - void do_bit(size_t offset); + bool do_bit(size_t offset); void reset(HeapWord* addr); }; @@ -1420,8 +1420,9 @@ class MarkFromRootsVerifyClosure: public BitMapClosure { // "empty" (i.e. the bit vector doesn't have any 1-bits). class FalseBitMapClosure: public BitMapClosure { public: - void do_bit(size_t offset) { + bool do_bit(size_t offset) { guarantee(false, "Should not have a 1 bit"); + return true; } }; diff --git a/hotspot/src/share/vm/gc_implementation/g1/bufferingOopClosure.hpp b/hotspot/src/share/vm/gc_implementation/g1/bufferingOopClosure.hpp new file mode 100644 index 00000000000..1124e5d799f --- /dev/null +++ b/hotspot/src/share/vm/gc_implementation/g1/bufferingOopClosure.hpp @@ -0,0 +1,195 @@ +/* + * Copyright 2001-2007 Sun Microsystems, Inc. All Rights Reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara, + * CA 95054 USA or visit www.sun.com if you need additional information or + * have any questions. + * + */ + +// A BufferingOops closure tries to separate out the cost of finding roots +// from the cost of applying closures to them. It maintains an array of +// ref-containing locations. Until the array is full, applying the closure +// to an oop* merely records that location in the array. Since this +// closure app cost is small, an elapsed timer can approximately attribute +// all of this cost to the cost of finding the roots. When the array fills +// up, the wrapped closure is applied to all elements, keeping track of +// this elapsed time of this process, and leaving the array empty. +// The caller must be sure to call "done" to process any unprocessed +// buffered entriess. + +class Generation; +class HeapRegion; + +class BufferingOopClosure: public OopClosure { +protected: + enum PrivateConstants { + BufferLength = 1024 + }; + + oop *_buffer[BufferLength]; + oop **_buffer_top; + oop **_buffer_curr; + + OopClosure *_oc; + double _closure_app_seconds; + + void process_buffer () { + + double start = os::elapsedTime(); + for (oop **curr = _buffer; curr < _buffer_curr; ++curr) { + _oc->do_oop(*curr); + } + _buffer_curr = _buffer; + _closure_app_seconds += (os::elapsedTime() - start); + } + +public: + virtual void do_oop(narrowOop* p) { + guarantee(false, "NYI"); + } + virtual void do_oop(oop *p) { + if (_buffer_curr == _buffer_top) { + process_buffer(); + } + + *_buffer_curr = p; + ++_buffer_curr; + } + void done () { + if (_buffer_curr > _buffer) { + process_buffer(); + } + } + double closure_app_seconds () { + return _closure_app_seconds; + } + BufferingOopClosure (OopClosure *oc) : + _oc(oc), + _buffer_curr(_buffer), _buffer_top(_buffer + BufferLength), + _closure_app_seconds(0.0) { } +}; + +class BufferingOopsInGenClosure: public OopsInGenClosure { + BufferingOopClosure _boc; + OopsInGenClosure* _oc; +public: + BufferingOopsInGenClosure(OopsInGenClosure *oc) : + _boc(oc), _oc(oc) {} + + virtual void do_oop(narrowOop* p) { + guarantee(false, "NYI"); + } + + virtual void do_oop(oop* p) { + assert(generation()->is_in_reserved(p), "Must be in!"); + _boc.do_oop(p); + } + + void done() { + _boc.done(); + } + + double closure_app_seconds () { + return _boc.closure_app_seconds(); + } + + void set_generation(Generation* gen) { + OopsInGenClosure::set_generation(gen); + _oc->set_generation(gen); + } + + void reset_generation() { + // Make sure we finish the current work with the current generation. + _boc.done(); + OopsInGenClosure::reset_generation(); + _oc->reset_generation(); + } + +}; + + +class BufferingOopsInHeapRegionClosure: public OopsInHeapRegionClosure { +private: + enum PrivateConstants { + BufferLength = 1024 + }; + + oop *_buffer[BufferLength]; + oop **_buffer_top; + oop **_buffer_curr; + + HeapRegion *_hr_buffer[BufferLength]; + HeapRegion **_hr_curr; + + OopsInHeapRegionClosure *_oc; + double _closure_app_seconds; + + void process_buffer () { + + assert((_hr_curr - _hr_buffer) == (_buffer_curr - _buffer), + "the two lengths should be the same"); + + double start = os::elapsedTime(); + HeapRegion **hr_curr = _hr_buffer; + HeapRegion *hr_prev = NULL; + for (oop **curr = _buffer; curr < _buffer_curr; ++curr) { + HeapRegion *region = *hr_curr; + if (region != hr_prev) { + _oc->set_region(region); + hr_prev = region; + } + _oc->do_oop(*curr); + ++hr_curr; + } + _buffer_curr = _buffer; + _hr_curr = _hr_buffer; + _closure_app_seconds += (os::elapsedTime() - start); + } + +public: + virtual void do_oop(narrowOop *p) { + guarantee(false, "NYI"); + } + + virtual void do_oop(oop *p) { + if (_buffer_curr == _buffer_top) { + assert(_hr_curr > _hr_buffer, "_hr_curr should be consistent with _buffer_curr"); + process_buffer(); + } + + *_buffer_curr = p; + ++_buffer_curr; + *_hr_curr = _from; + ++_hr_curr; + } + void done () { + if (_buffer_curr > _buffer) { + assert(_hr_curr > _hr_buffer, "_hr_curr should be consistent with _buffer_curr"); + process_buffer(); + } + } + double closure_app_seconds () { + return _closure_app_seconds; + } + BufferingOopsInHeapRegionClosure (OopsInHeapRegionClosure *oc) : + _oc(oc), + _buffer_curr(_buffer), _buffer_top(_buffer + BufferLength), + _hr_curr(_hr_buffer), + _closure_app_seconds(0.0) { } +}; diff --git a/hotspot/src/share/vm/gc_implementation/g1/collectionSetChooser.cpp b/hotspot/src/share/vm/gc_implementation/g1/collectionSetChooser.cpp new file mode 100644 index 00000000000..fbc5f4f151b --- /dev/null +++ b/hotspot/src/share/vm/gc_implementation/g1/collectionSetChooser.cpp @@ -0,0 +1,409 @@ +/* + * Copyright 2001-2007 Sun Microsystems, Inc. All Rights Reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara, + * CA 95054 USA or visit www.sun.com if you need additional information or + * have any questions. + * + */ + +# include "incls/_precompiled.incl" +# include "incls/_collectionSetChooser.cpp.incl" + +CSetChooserCache::CSetChooserCache() { + for (int i = 0; i < CacheLength; ++i) + _cache[i] = NULL; + clear(); +} + +void CSetChooserCache::clear() { + _occupancy = 0; + _first = 0; + for (int i = 0; i < CacheLength; ++i) { + HeapRegion *hr = _cache[i]; + if (hr != NULL) + hr->set_sort_index(-1); + _cache[i] = NULL; + } +} + +#ifndef PRODUCT +bool CSetChooserCache::verify() { + int index = _first; + HeapRegion *prev = NULL; + for (int i = 0; i < _occupancy; ++i) { + guarantee(_cache[index] != NULL, "cache entry should not be empty"); + HeapRegion *hr = _cache[index]; + guarantee(!hr->is_young(), "should not be young!"); + if (prev != NULL) { + guarantee(prev->gc_efficiency() >= hr->gc_efficiency(), + "cache should be correctly ordered"); + } + guarantee(hr->sort_index() == get_sort_index(index), + "sort index should be correct"); + index = trim_index(index + 1); + prev = hr; + } + + for (int i = 0; i < (CacheLength - _occupancy); ++i) { + guarantee(_cache[index] == NULL, "cache entry should be empty"); + index = trim_index(index + 1); + } + + guarantee(index == _first, "we should have reached where we started from"); + return true; +} +#endif // PRODUCT + +void CSetChooserCache::insert(HeapRegion *hr) { + assert(!is_full(), "cache should not be empty"); + hr->calc_gc_efficiency(); + + int empty_index; + if (_occupancy == 0) { + empty_index = _first; + } else { + empty_index = trim_index(_first + _occupancy); + assert(_cache[empty_index] == NULL, "last slot should be empty"); + int last_index = trim_index(empty_index - 1); + HeapRegion *last = _cache[last_index]; + assert(last != NULL,"as the cache is not empty, last should not be empty"); + while (empty_index != _first && + last->gc_efficiency() < hr->gc_efficiency()) { + _cache[empty_index] = last; + last->set_sort_index(get_sort_index(empty_index)); + empty_index = last_index; + last_index = trim_index(last_index - 1); + last = _cache[last_index]; + } + } + _cache[empty_index] = hr; + hr->set_sort_index(get_sort_index(empty_index)); + + ++_occupancy; + assert(verify(), "cache should be consistent"); +} + +HeapRegion *CSetChooserCache::remove_first() { + if (_occupancy > 0) { + assert(_cache[_first] != NULL, "cache should have at least one region"); + HeapRegion *ret = _cache[_first]; + _cache[_first] = NULL; + ret->set_sort_index(-1); + --_occupancy; + _first = trim_index(_first + 1); + assert(verify(), "cache should be consistent"); + return ret; + } else { + return NULL; + } +} + +// this is a bit expensive... but we expect that it should not be called +// to often. +void CSetChooserCache::remove(HeapRegion *hr) { + assert(_occupancy > 0, "cache should not be empty"); + assert(hr->sort_index() < -1, "should already be in the cache"); + int index = get_index(hr->sort_index()); + assert(_cache[index] == hr, "index should be correct"); + int next_index = trim_index(index + 1); + int last_index = trim_index(_first + _occupancy - 1); + while (index != last_index) { + assert(_cache[next_index] != NULL, "should not be null"); + _cache[index] = _cache[next_index]; + _cache[index]->set_sort_index(get_sort_index(index)); + + index = next_index; + next_index = trim_index(next_index+1); + } + assert(index == last_index, "should have reached the last one"); + _cache[index] = NULL; + hr->set_sort_index(-1); + --_occupancy; + assert(verify(), "cache should be consistent"); +} + +static inline int orderRegions(HeapRegion* hr1, HeapRegion* hr2) { + if (hr1 == NULL) { + if (hr2 == NULL) return 0; + else return 1; + } else if (hr2 == NULL) { + return -1; + } + if (hr2->gc_efficiency() < hr1->gc_efficiency()) return -1; + else if (hr1->gc_efficiency() < hr2->gc_efficiency()) return 1; + else return 0; +} + +static int orderRegions(HeapRegion** hr1p, HeapRegion** hr2p) { + return orderRegions(*hr1p, *hr2p); +} + +CollectionSetChooser::CollectionSetChooser() : + // The line below is the worst bit of C++ hackery I've ever written + // (Detlefs, 11/23). You should think of it as equivalent to + // "_regions(100, true)": initialize the growable array and inform it + // that it should allocate its elem array(s) on the C heap. The first + // argument, however, is actually a comma expression (new-expr, 100). + // The purpose of the new_expr is to inform the growable array that it + // is *already* allocated on the C heap: it uses the placement syntax to + // keep it from actually doing any allocation. + _markedRegions((ResourceObj::operator new (sizeof(GrowableArray), + (void*)&_markedRegions, + ResourceObj::C_HEAP), + 100), + true), + _curMarkedIndex(0), + _numMarkedRegions(0), + _unmarked_age_1_returned_as_new(false), + _first_par_unreserved_idx(0) +{} + + + +#ifndef PRODUCT +bool CollectionSetChooser::verify() { + int index = 0; + guarantee(_curMarkedIndex <= _numMarkedRegions, + "_curMarkedIndex should be within bounds"); + while (index < _curMarkedIndex) { + guarantee(_markedRegions.at(index++) == NULL, + "all entries before _curMarkedIndex should be NULL"); + } + HeapRegion *prev = NULL; + while (index < _numMarkedRegions) { + HeapRegion *curr = _markedRegions.at(index++); + if (curr != NULL) { + int si = curr->sort_index(); + guarantee(!curr->is_young(), "should not be young!"); + guarantee(si > -1 && si == (index-1), "sort index invariant"); + if (prev != NULL) { + guarantee(orderRegions(prev, curr) != 1, "regions should be sorted"); + } + prev = curr; + } + } + return _cache.verify(); +} +#endif + +bool +CollectionSetChooser::addRegionToCache() { + assert(!_cache.is_full(), "cache should not be full"); + + HeapRegion *hr = NULL; + while (hr == NULL && _curMarkedIndex < _numMarkedRegions) { + hr = _markedRegions.at(_curMarkedIndex++); + } + if (hr == NULL) + return false; + assert(!hr->is_young(), "should not be young!"); + assert(hr->sort_index() == _curMarkedIndex-1, "sort_index invariant"); + _markedRegions.at_put(hr->sort_index(), NULL); + _cache.insert(hr); + assert(!_cache.is_empty(), "cache should not be empty"); + assert(verify(), "cache should be consistent"); + return false; +} + +void +CollectionSetChooser::fillCache() { + while (!_cache.is_full() && addRegionToCache()) { + } +} + +void +CollectionSetChooser::sortMarkedHeapRegions() { + guarantee(_cache.is_empty(), "cache should be empty"); + // First trim any unused portion of the top in the parallel case. + if (_first_par_unreserved_idx > 0) { + if (G1PrintParCleanupStats) { + gclog_or_tty->print(" Truncating _markedRegions from %d to %d.\n", + _markedRegions.length(), _first_par_unreserved_idx); + } + assert(_first_par_unreserved_idx <= _markedRegions.length(), + "Or we didn't reserved enough length"); + _markedRegions.trunc_to(_first_par_unreserved_idx); + } + _markedRegions.sort(orderRegions); + assert(_numMarkedRegions <= _markedRegions.length(), "Requirement"); + assert(_numMarkedRegions == 0 + || _markedRegions.at(_numMarkedRegions-1) != NULL, + "Testing _numMarkedRegions"); + assert(_numMarkedRegions == _markedRegions.length() + || _markedRegions.at(_numMarkedRegions) == NULL, + "Testing _numMarkedRegions"); + if (G1PrintParCleanupStats) { + gclog_or_tty->print_cr(" Sorted %d marked regions.", _numMarkedRegions); + } + for (int i = 0; i < _numMarkedRegions; i++) { + assert(_markedRegions.at(i) != NULL, "Should be true by sorting!"); + _markedRegions.at(i)->set_sort_index(i); + if (G1PrintRegionLivenessInfo > 0) { + if (i == 0) gclog_or_tty->print_cr("Sorted marked regions:"); + if (i < G1PrintRegionLivenessInfo || + (_numMarkedRegions-i) < G1PrintRegionLivenessInfo) { + HeapRegion* hr = _markedRegions.at(i); + size_t u = hr->used(); + gclog_or_tty->print_cr(" Region %d: %d used, %d max live, %5.2f%%.", + i, u, hr->max_live_bytes(), + 100.0*(float)hr->max_live_bytes()/(float)u); + } + } + } + if (G1PolicyVerbose > 1) + printSortedHeapRegions(); + assert(verify(), "should now be sorted"); +} + +void +printHeapRegion(HeapRegion *hr) { + if (hr->isHumongous()) + gclog_or_tty->print("H: "); + if (hr->in_collection_set()) + gclog_or_tty->print("CS: "); + if (hr->popular()) + gclog_or_tty->print("pop: "); + gclog_or_tty->print_cr("Region " PTR_FORMAT " (%s%s) " + "[" PTR_FORMAT ", " PTR_FORMAT"] " + "Used: " SIZE_FORMAT "K, garbage: " SIZE_FORMAT "K.", + hr, hr->is_young() ? "Y " : " ", + hr->is_marked()? "M1" : "M0", + hr->bottom(), hr->end(), + hr->used()/K, hr->garbage_bytes()/K); +} + +void +CollectionSetChooser::addMarkedHeapRegion(HeapRegion* hr) { + assert(!hr->isHumongous(), + "Humongous regions shouldn't be added to the collection set"); + assert(!hr->is_young(), "should not be young!"); + _markedRegions.append(hr); + _numMarkedRegions++; + hr->calc_gc_efficiency(); +} + +void +CollectionSetChooser:: +prepareForAddMarkedHeapRegionsPar(size_t n_regions, size_t chunkSize) { + _first_par_unreserved_idx = 0; + size_t max_waste = ParallelGCThreads * chunkSize; + // it should be aligned with respect to chunkSize + size_t aligned_n_regions = + (n_regions + (chunkSize - 1)) / chunkSize * chunkSize; + assert( aligned_n_regions % chunkSize == 0, "should be aligned" ); + _markedRegions.at_put_grow((int)(aligned_n_regions + max_waste - 1), NULL); +} + +jint +CollectionSetChooser::getParMarkedHeapRegionChunk(jint n_regions) { + jint res = Atomic::add(n_regions, &_first_par_unreserved_idx); + assert(_markedRegions.length() > res + n_regions - 1, + "Should already have been expanded"); + return res - n_regions; +} + +void +CollectionSetChooser::setMarkedHeapRegion(jint index, HeapRegion* hr) { + assert(_markedRegions.at(index) == NULL, "precondition"); + assert(!hr->is_young(), "should not be young!"); + _markedRegions.at_put(index, hr); + hr->calc_gc_efficiency(); +} + +void +CollectionSetChooser::incNumMarkedHeapRegions(jint inc_by) { + (void)Atomic::add(inc_by, &_numMarkedRegions); +} + +void +CollectionSetChooser::clearMarkedHeapRegions(){ + for (int i = 0; i < _markedRegions.length(); i++) { + HeapRegion* r = _markedRegions.at(i); + if (r != NULL) r->set_sort_index(-1); + } + _markedRegions.clear(); + _curMarkedIndex = 0; + _numMarkedRegions = 0; + _cache.clear(); +}; + +void +CollectionSetChooser::updateAfterFullCollection() { + G1CollectedHeap* g1h = G1CollectedHeap::heap(); + clearMarkedHeapRegions(); +} + +void +CollectionSetChooser::printSortedHeapRegions() { + gclog_or_tty->print_cr("Printing %d Heap Regions sorted by amount of known garbage", + _numMarkedRegions); + for (int i = 0; i < _markedRegions.length(); i++) { + printHeapRegion(_markedRegions.at(i)); + } + gclog_or_tty->print_cr("Done sorted heap region print"); +} + +void CollectionSetChooser::removeRegion(HeapRegion *hr) { + int si = hr->sort_index(); + assert(si == -1 || hr->is_marked(), "Sort index not valid."); + if (si > -1) { + assert(_markedRegions.at(si) == hr, "Sort index not valid." ); + _markedRegions.at_put(si, NULL); + } else if (si < -1) { + assert(_cache.region_in_cache(hr), "should be in the cache"); + _cache.remove(hr); + assert(hr->sort_index() == -1, "sort index invariant"); + } + hr->set_sort_index(-1); +} + +// if time_remaining < 0.0, then this method should try to return +// a region, whether it fits within the remaining time or not +HeapRegion* +CollectionSetChooser::getNextMarkedRegion(double time_remaining, + double avg_prediction) { + G1CollectedHeap* g1h = G1CollectedHeap::heap(); + G1CollectorPolicy* g1p = g1h->g1_policy(); + fillCache(); + if (_cache.is_empty()) { + assert(_curMarkedIndex == _numMarkedRegions, + "if cache is empty, list should also be empty"); + return NULL; + } + + HeapRegion *hr = _cache.get_first(); + assert(hr != NULL, "if cache not empty, first entry should be non-null"); + double predicted_time = g1h->predict_region_elapsed_time_ms(hr, false); + + if (g1p->adaptive_young_list_length()) { + if (time_remaining - predicted_time < 0.0) { + g1h->check_if_region_is_too_expensive(predicted_time); + return NULL; + } + } else { + if (predicted_time > 2.0 * avg_prediction) { + return NULL; + } + } + + HeapRegion *hr2 = _cache.remove_first(); + assert(hr == hr2, "cache contents should not have changed"); + + return hr; +} diff --git a/hotspot/src/share/vm/gc_implementation/g1/collectionSetChooser.hpp b/hotspot/src/share/vm/gc_implementation/g1/collectionSetChooser.hpp new file mode 100644 index 00000000000..60d8bf2057e --- /dev/null +++ b/hotspot/src/share/vm/gc_implementation/g1/collectionSetChooser.hpp @@ -0,0 +1,138 @@ +/* + * Copyright 2001-2007 Sun Microsystems, Inc. All Rights Reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara, + * CA 95054 USA or visit www.sun.com if you need additional information or + * have any questions. + * + */ + +// We need to sort heap regions by collection desirability. + +class CSetChooserCache { +private: + enum { + CacheLength = 16 + } PrivateConstants; + + HeapRegion* _cache[CacheLength]; + int _occupancy; // number of region in cache + int _first; // "first" region in the cache + + // adding CacheLength to deal with negative values + inline int trim_index(int index) { + return (index + CacheLength) % CacheLength; + } + + inline int get_sort_index(int index) { + return -index-2; + } + inline int get_index(int sort_index) { + return -sort_index-2; + } + +public: + CSetChooserCache(void); + + inline int occupancy(void) { return _occupancy; } + inline bool is_full() { return _occupancy == CacheLength; } + inline bool is_empty() { return _occupancy == 0; } + + void clear(void); + void insert(HeapRegion *hr); + HeapRegion *remove_first(void); + void remove (HeapRegion *hr); + inline HeapRegion *get_first(void) { + return _cache[_first]; + } + +#ifndef PRODUCT + bool verify (void); + bool region_in_cache(HeapRegion *hr) { + int sort_index = hr->sort_index(); + if (sort_index < -1) { + int index = get_index(sort_index); + guarantee(index < CacheLength, "should be within bounds"); + return _cache[index] == hr; + } else + return 0; + } +#endif // PRODUCT +}; + +class CollectionSetChooser: public CHeapObj { + + GrowableArray _markedRegions; + int _curMarkedIndex; + int _numMarkedRegions; + CSetChooserCache _cache; + + // True iff last collection pause ran of out new "age 0" regions, and + // returned an "age 1" region. + bool _unmarked_age_1_returned_as_new; + + jint _first_par_unreserved_idx; + +public: + + HeapRegion* getNextMarkedRegion(double time_so_far, double avg_prediction); + + CollectionSetChooser(); + + void printSortedHeapRegions(); + + void sortMarkedHeapRegions(); + void fillCache(); + bool addRegionToCache(void); + void addMarkedHeapRegion(HeapRegion *hr); + + // Must be called before calls to getParMarkedHeapRegionChunk. + // "n_regions" is the number of regions, "chunkSize" the chunk size. + void prepareForAddMarkedHeapRegionsPar(size_t n_regions, size_t chunkSize); + // Returns the first index in a contiguous chunk of "n_regions" indexes + // that the calling thread has reserved. These must be set by the + // calling thread using "setMarkedHeapRegion" (to NULL if necessary). + jint getParMarkedHeapRegionChunk(jint n_regions); + // Set the marked array entry at index to hr. Careful to claim the index + // first if in parallel. + void setMarkedHeapRegion(jint index, HeapRegion* hr); + // Atomically increment the number of claimed regions by "inc_by". + void incNumMarkedHeapRegions(jint inc_by); + + void clearMarkedHeapRegions(); + + void updateAfterFullCollection(); + + // Ensure that "hr" is not a member of the marked region array or the cache + void removeRegion(HeapRegion* hr); + + bool unmarked_age_1_returned_as_new() { return _unmarked_age_1_returned_as_new; } + + // Returns true if the used portion of "_markedRegions" is properly + // sorted, otherwise asserts false. +#ifndef PRODUCT + bool verify(void); + bool regionProperlyOrdered(HeapRegion* r) { + int si = r->sort_index(); + return (si == -1) || + (si > -1 && _markedRegions.at(si) == r) || + (si < -1 && _cache.region_in_cache(r)); + } +#endif + +}; diff --git a/hotspot/src/share/vm/gc_implementation/g1/concurrentG1Refine.cpp b/hotspot/src/share/vm/gc_implementation/g1/concurrentG1Refine.cpp new file mode 100644 index 00000000000..2eb2bc0ca69 --- /dev/null +++ b/hotspot/src/share/vm/gc_implementation/g1/concurrentG1Refine.cpp @@ -0,0 +1,355 @@ +/* + * Copyright 2001-2007 Sun Microsystems, Inc. All Rights Reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara, + * CA 95054 USA or visit www.sun.com if you need additional information or + * have any questions. + * + */ + +#include "incls/_precompiled.incl" +#include "incls/_concurrentG1Refine.cpp.incl" + +bool ConcurrentG1Refine::_enabled = false; + +ConcurrentG1Refine::ConcurrentG1Refine() : + _pya(PYA_continue), _last_pya(PYA_continue), + _last_cards_during(), _first_traversal(false), + _card_counts(NULL), _cur_card_count_histo(NULL), _cum_card_count_histo(NULL), + _hot_cache(NULL), + _def_use_cache(false), _use_cache(false), + _n_periods(0), _total_cards(0), _total_travs(0) +{ + if (G1ConcRefine) { + _cg1rThread = new ConcurrentG1RefineThread(this); + assert(cg1rThread() != NULL, "Conc refine should have been created"); + assert(cg1rThread()->cg1r() == this, + "Conc refine thread should refer to this"); + } else { + _cg1rThread = NULL; + } +} + +void ConcurrentG1Refine::init() { + if (G1ConcRSLogCacheSize > 0 || G1ConcRSCountTraversals) { + G1CollectedHeap* g1h = G1CollectedHeap::heap(); + _n_card_counts = + (unsigned) (g1h->g1_reserved_obj_bytes() >> CardTableModRefBS::card_shift); + _card_counts = NEW_C_HEAP_ARRAY(unsigned char, _n_card_counts); + for (size_t i = 0; i < _n_card_counts; i++) _card_counts[i] = 0; + ModRefBarrierSet* bs = g1h->mr_bs(); + guarantee(bs->is_a(BarrierSet::CardTableModRef), "Precondition"); + CardTableModRefBS* ctbs = (CardTableModRefBS*)bs; + _ct_bot = ctbs->byte_for_const(g1h->reserved_region().start()); + if (G1ConcRSCountTraversals) { + _cur_card_count_histo = NEW_C_HEAP_ARRAY(unsigned, 256); + _cum_card_count_histo = NEW_C_HEAP_ARRAY(unsigned, 256); + for (int i = 0; i < 256; i++) { + _cur_card_count_histo[i] = 0; + _cum_card_count_histo[i] = 0; + } + } + } + if (G1ConcRSLogCacheSize > 0) { + _def_use_cache = true; + _use_cache = true; + _hot_cache_size = (1 << G1ConcRSLogCacheSize); + _hot_cache = NEW_C_HEAP_ARRAY(jbyte*, _hot_cache_size); + _n_hot = 0; + _hot_cache_idx = 0; + } +} + +ConcurrentG1Refine::~ConcurrentG1Refine() { + if (G1ConcRSLogCacheSize > 0 || G1ConcRSCountTraversals) { + assert(_card_counts != NULL, "Logic"); + FREE_C_HEAP_ARRAY(unsigned char, _card_counts); + assert(_cur_card_count_histo != NULL, "Logic"); + FREE_C_HEAP_ARRAY(unsigned, _cur_card_count_histo); + assert(_cum_card_count_histo != NULL, "Logic"); + FREE_C_HEAP_ARRAY(unsigned, _cum_card_count_histo); + } + if (G1ConcRSLogCacheSize > 0) { + assert(_hot_cache != NULL, "Logic"); + FREE_C_HEAP_ARRAY(jbyte*, _hot_cache); + } +} + +bool ConcurrentG1Refine::refine() { + G1CollectedHeap* g1h = G1CollectedHeap::heap(); + unsigned cards_before = g1h->g1_rem_set()->conc_refine_cards(); + clear_hot_cache(); // Any previous values in this are now invalid. + g1h->g1_rem_set()->concurrentRefinementPass(this); + _traversals++; + unsigned cards_after = g1h->g1_rem_set()->conc_refine_cards(); + unsigned cards_during = cards_after-cards_before; + // If this is the first traversal in the current enabling + // and we did some cards, or if the number of cards found is decreasing + // sufficiently quickly, then keep going. Otherwise, sleep a while. + bool res = + (_first_traversal && cards_during > 0) + || + (!_first_traversal && cards_during * 3 < _last_cards_during * 2); + _last_cards_during = cards_during; + _first_traversal = false; + return res; +} + +void ConcurrentG1Refine::enable() { + MutexLocker x(G1ConcRefine_mon); + if (!_enabled) { + _enabled = true; + _first_traversal = true; _last_cards_during = 0; + G1ConcRefine_mon->notify_all(); + } +} + +unsigned ConcurrentG1Refine::disable() { + MutexLocker x(G1ConcRefine_mon); + if (_enabled) { + _enabled = false; + return _traversals; + } else { + return 0; + } +} + +void ConcurrentG1Refine::wait_for_ConcurrentG1Refine_enabled() { + G1ConcRefine_mon->lock(); + while (!_enabled) { + G1ConcRefine_mon->wait(Mutex::_no_safepoint_check_flag); + } + G1ConcRefine_mon->unlock(); + _traversals = 0; +}; + +void ConcurrentG1Refine::set_pya_restart() { + // If we're using the log-based RS barrier, the above will cause + // in-progress traversals of completed log buffers to quit early; we will + // also abandon all other buffers. + if (G1RSBarrierUseQueue) { + DirtyCardQueueSet& dcqs = JavaThread::dirty_card_queue_set(); + dcqs.abandon_logs(); + if (_cg1rThread->do_traversal()) { + _pya = PYA_restart; + } else { + _cg1rThread->set_do_traversal(true); + // Reset the post-yield actions. + _pya = PYA_continue; + _last_pya = PYA_continue; + } + } else { + _pya = PYA_restart; + } +} + +void ConcurrentG1Refine::set_pya_cancel() { + _pya = PYA_cancel; +} + +PostYieldAction ConcurrentG1Refine::get_pya() { + if (_pya != PYA_continue) { + jint val = _pya; + while (true) { + jint val_read = Atomic::cmpxchg(PYA_continue, &_pya, val); + if (val_read == val) { + PostYieldAction res = (PostYieldAction)val; + assert(res != PYA_continue, "Only the refine thread should reset."); + _last_pya = res; + return res; + } else { + val = val_read; + } + } + } + // QQQ WELL WHAT DO WE RETURN HERE??? + // make up something! + return PYA_continue; +} + +PostYieldAction ConcurrentG1Refine::get_last_pya() { + PostYieldAction res = _last_pya; + _last_pya = PYA_continue; + return res; +} + +bool ConcurrentG1Refine::do_traversal() { + return _cg1rThread->do_traversal(); +} + +int ConcurrentG1Refine::add_card_count(jbyte* card_ptr) { + size_t card_num = (card_ptr - _ct_bot); + guarantee(0 <= card_num && card_num < _n_card_counts, "Bounds"); + unsigned char cnt = _card_counts[card_num]; + if (cnt < 255) _card_counts[card_num]++; + return cnt; + _total_travs++; +} + +jbyte* ConcurrentG1Refine::cache_insert(jbyte* card_ptr) { + int count = add_card_count(card_ptr); + // Count previously unvisited cards. + if (count == 0) _total_cards++; + // We'll assume a traversal unless we store it in the cache. + if (count < G1ConcRSHotCardLimit) { + _total_travs++; + return card_ptr; + } + // Otherwise, it's hot. + jbyte* res = NULL; + MutexLockerEx x(HotCardCache_lock, Mutex::_no_safepoint_check_flag); + if (_n_hot == _hot_cache_size) { + _total_travs++; + res = _hot_cache[_hot_cache_idx]; + _n_hot--; + } + // Now _n_hot < _hot_cache_size, and we can insert at _hot_cache_idx. + _hot_cache[_hot_cache_idx] = card_ptr; + _hot_cache_idx++; + if (_hot_cache_idx == _hot_cache_size) _hot_cache_idx = 0; + _n_hot++; + return res; +} + + +void ConcurrentG1Refine::clean_up_cache(int worker_i, G1RemSet* g1rs) { + assert(!use_cache(), "cache should be disabled"); + int start_ind = _hot_cache_idx-1; + for (int i = 0; i < _n_hot; i++) { + int ind = start_ind - i; + if (ind < 0) ind = ind + _hot_cache_size; + jbyte* entry = _hot_cache[ind]; + if (entry != NULL) { + g1rs->concurrentRefineOneCard(entry, worker_i); + } + } + _n_hot = 0; + _hot_cache_idx = 0; +} + +void ConcurrentG1Refine::clear_and_record_card_counts() { + if (G1ConcRSLogCacheSize == 0 && !G1ConcRSCountTraversals) return; + _n_periods++; + if (G1ConcRSCountTraversals) { + for (size_t i = 0; i < _n_card_counts; i++) { + unsigned char bucket = _card_counts[i]; + _cur_card_count_histo[bucket]++; + _card_counts[i] = 0; + } + gclog_or_tty->print_cr("Card counts:"); + for (int i = 0; i < 256; i++) { + if (_cur_card_count_histo[i] > 0) { + gclog_or_tty->print_cr(" %3d: %9d", i, _cur_card_count_histo[i]); + _cum_card_count_histo[i] += _cur_card_count_histo[i]; + _cur_card_count_histo[i] = 0; + } + } + } else { + assert(G1ConcRSLogCacheSize > 0, "Logic"); + Copy::fill_to_words((HeapWord*)(&_card_counts[0]), + _n_card_counts / HeapWordSize); + } +} + +void +ConcurrentG1Refine:: +print_card_count_histo_range(unsigned* histo, int from, int to, + float& cum_card_pct, + float& cum_travs_pct) { + unsigned cards = 0; + unsigned travs = 0; + guarantee(to <= 256, "Precondition"); + for (int i = from; i < to-1; i++) { + cards += histo[i]; + travs += histo[i] * i; + } + if (to == 256) { + unsigned histo_card_sum = 0; + unsigned histo_trav_sum = 0; + for (int i = 1; i < 255; i++) { + histo_trav_sum += histo[i] * i; + } + cards += histo[255]; + // correct traversals for the last one. + unsigned travs_255 = (unsigned) (_total_travs - histo_trav_sum); + travs += travs_255; + + } else { + cards += histo[to-1]; + travs += histo[to-1] * (to-1); + } + float fperiods = (float)_n_periods; + float f_tot_cards = (float)_total_cards/fperiods; + float f_tot_travs = (float)_total_travs/fperiods; + if (cards > 0) { + float fcards = (float)cards/fperiods; + float ftravs = (float)travs/fperiods; + if (to == 256) { + gclog_or_tty->print(" %4d- %10.2f%10.2f", from, fcards, ftravs); + } else { + gclog_or_tty->print(" %4d-%4d %10.2f%10.2f", from, to-1, fcards, ftravs); + } + float pct_cards = fcards*100.0/f_tot_cards; + cum_card_pct += pct_cards; + float pct_travs = ftravs*100.0/f_tot_travs; + cum_travs_pct += pct_travs; + gclog_or_tty->print_cr("%10.2f%10.2f%10.2f%10.2f", + pct_cards, cum_card_pct, + pct_travs, cum_travs_pct); + } +} + +void ConcurrentG1Refine::print_final_card_counts() { + if (!G1ConcRSCountTraversals) return; + + gclog_or_tty->print_cr("Did %d total traversals of %d distinct cards.", + _total_travs, _total_cards); + float fperiods = (float)_n_periods; + gclog_or_tty->print_cr(" This is an average of %8.2f traversals, %8.2f cards, " + "per collection.", (float)_total_travs/fperiods, + (float)_total_cards/fperiods); + gclog_or_tty->print_cr(" This is an average of %8.2f traversals/distinct " + "dirty card.\n", + _total_cards > 0 ? + (float)_total_travs/(float)_total_cards : 0.0); + + + gclog_or_tty->print_cr("Histogram:\n\n%10s %10s%10s%10s%10s%10s%10s", + "range", "# cards", "# travs", "% cards", "(cum)", + "% travs", "(cum)"); + gclog_or_tty->print_cr("------------------------------------------------------------" + "-------------"); + float cum_cards_pct = 0.0; + float cum_travs_pct = 0.0; + for (int i = 1; i < 10; i++) { + print_card_count_histo_range(_cum_card_count_histo, i, i+1, + cum_cards_pct, cum_travs_pct); + } + for (int i = 10; i < 100; i += 10) { + print_card_count_histo_range(_cum_card_count_histo, i, i+10, + cum_cards_pct, cum_travs_pct); + } + print_card_count_histo_range(_cum_card_count_histo, 100, 150, + cum_cards_pct, cum_travs_pct); + print_card_count_histo_range(_cum_card_count_histo, 150, 200, + cum_cards_pct, cum_travs_pct); + print_card_count_histo_range(_cum_card_count_histo, 150, 255, + cum_cards_pct, cum_travs_pct); + print_card_count_histo_range(_cum_card_count_histo, 255, 256, + cum_cards_pct, cum_travs_pct); +} diff --git a/hotspot/src/share/vm/gc_implementation/g1/concurrentG1Refine.hpp b/hotspot/src/share/vm/gc_implementation/g1/concurrentG1Refine.hpp new file mode 100644 index 00000000000..ea9a997d7a6 --- /dev/null +++ b/hotspot/src/share/vm/gc_implementation/g1/concurrentG1Refine.hpp @@ -0,0 +1,132 @@ +/* + * Copyright 2001-2007 Sun Microsystems, Inc. All Rights Reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara, + * CA 95054 USA or visit www.sun.com if you need additional information or + * have any questions. + * + */ + +// Forward decl +class ConcurrentG1RefineThread; +class G1RemSet; + +// What to do after a yield: +enum PostYieldAction { + PYA_continue, // Continue the traversal + PYA_restart, // Restart + PYA_cancel // It's been completed by somebody else: cancel. +}; + +class ConcurrentG1Refine { + ConcurrentG1RefineThread* _cg1rThread; + + volatile jint _pya; + PostYieldAction _last_pya; + + static bool _enabled; // Protected by G1ConcRefine_mon. + unsigned _traversals; + + // Number of cards processed during last refinement traversal. + unsigned _first_traversal; + unsigned _last_cards_during; + + // The cache for card refinement. + bool _use_cache; + bool _def_use_cache; + size_t _n_periods; + size_t _total_cards; + size_t _total_travs; + + unsigned char* _card_counts; + unsigned _n_card_counts; + const jbyte* _ct_bot; + unsigned* _cur_card_count_histo; + unsigned* _cum_card_count_histo; + jbyte** _hot_cache; + int _hot_cache_size; + int _n_hot; + int _hot_cache_idx; + + // Returns the count of this card after incrementing it. + int add_card_count(jbyte* card_ptr); + + void print_card_count_histo_range(unsigned* histo, int from, int to, + float& cum_card_pct, + float& cum_travs_pct); + public: + ConcurrentG1Refine(); + ~ConcurrentG1Refine(); + + void init(); // Accomplish some initialization that has to wait. + + // Enabled Conc refinement, waking up thread if necessary. + void enable(); + + // Returns the number of traversals performed since this refiner was enabled. + unsigned disable(); + + // Requires G1ConcRefine_mon to be held. + bool enabled() { return _enabled; } + + // Returns only when G1 concurrent refinement has been enabled. + void wait_for_ConcurrentG1Refine_enabled(); + + // Do one concurrent refinement pass over the card table. Returns "true" + // if heuristics determine that another pass should be done immediately. + bool refine(); + + // Indicate that an in-progress refinement pass should start over. + void set_pya_restart(); + // Indicate that an in-progress refinement pass should quit. + void set_pya_cancel(); + + // Get the appropriate post-yield action. Also sets last_pya. + PostYieldAction get_pya(); + + // The last PYA read by "get_pya". + PostYieldAction get_last_pya(); + + bool do_traversal(); + + ConcurrentG1RefineThread* cg1rThread() { return _cg1rThread; } + + // If this is the first entry for the slot, writes into the cache and + // returns NULL. If it causes an eviction, returns the evicted pointer. + // Otherwise, its a cache hit, and returns NULL. + jbyte* cache_insert(jbyte* card_ptr); + + // Process the cached entries. + void clean_up_cache(int worker_i, G1RemSet* g1rs); + + // Discard entries in the hot cache. + void clear_hot_cache() { + _hot_cache_idx = 0; _n_hot = 0; + } + + bool hot_cache_is_empty() { return _n_hot == 0; } + + bool use_cache() { return _use_cache; } + void set_use_cache(bool b) { + if (b) _use_cache = _def_use_cache; + else _use_cache = false; + } + + void clear_and_record_card_counts(); + void print_final_card_counts(); +}; diff --git a/hotspot/src/share/vm/gc_implementation/g1/concurrentG1RefineThread.cpp b/hotspot/src/share/vm/gc_implementation/g1/concurrentG1RefineThread.cpp new file mode 100644 index 00000000000..110c08327c3 --- /dev/null +++ b/hotspot/src/share/vm/gc_implementation/g1/concurrentG1RefineThread.cpp @@ -0,0 +1,246 @@ +/* + * Copyright 2001-2007 Sun Microsystems, Inc. All Rights Reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara, + * CA 95054 USA or visit www.sun.com if you need additional information or + * have any questions. + * + */ + +#include "incls/_precompiled.incl" +#include "incls/_concurrentG1RefineThread.cpp.incl" + +// ======= Concurrent Mark Thread ======== + +// The CM thread is created when the G1 garbage collector is used + +ConcurrentG1RefineThread:: +ConcurrentG1RefineThread(ConcurrentG1Refine* cg1r) : + ConcurrentGCThread(), + _cg1r(cg1r), + _started(false), + _in_progress(false), + _do_traversal(false), + _vtime_accum(0.0), + _co_tracker(G1CRGroup), + _interval_ms(5.0) +{ + create_and_start(); +} + +const long timeout = 200; // ms. + +void ConcurrentG1RefineThread::traversalBasedRefinement() { + _cg1r->wait_for_ConcurrentG1Refine_enabled(); + MutexLocker x(G1ConcRefine_mon); + while (_cg1r->enabled()) { + MutexUnlocker ux(G1ConcRefine_mon); + ResourceMark rm; + HandleMark hm; + + if (TraceG1Refine) gclog_or_tty->print_cr("G1-Refine starting pass"); + _sts.join(); + bool no_sleep = _cg1r->refine(); + _sts.leave(); + if (!no_sleep) { + MutexLockerEx x(CGC_lock, Mutex::_no_safepoint_check_flag); + // We do this only for the timeout; we don't expect this to be signalled. + CGC_lock->wait(Mutex::_no_safepoint_check_flag, timeout); + } + } +} + +void ConcurrentG1RefineThread::queueBasedRefinement() { + DirtyCardQueueSet& dcqs = JavaThread::dirty_card_queue_set(); + // Wait for completed log buffers to exist. + { + MutexLockerEx x(DirtyCardQ_CBL_mon, Mutex::_no_safepoint_check_flag); + while (!_do_traversal && !dcqs.process_completed_buffers() && + !_should_terminate) { + DirtyCardQ_CBL_mon->wait(Mutex::_no_safepoint_check_flag); + } + } + + if (_should_terminate) { + return; + } + + // Now we take them off (this doesn't hold locks while it applies + // closures.) (If we did a full collection, then we'll do a full + // traversal. + _sts.join(); + if (_do_traversal) { + (void)_cg1r->refine(); + switch (_cg1r->get_last_pya()) { + case PYA_cancel: case PYA_continue: + // Continue was caught and handled inside "refine". If it's still + // "continue" when we get here, we're done. + _do_traversal = false; + break; + case PYA_restart: + assert(_do_traversal, "Because of Full GC."); + break; + } + } else { + int n_logs = 0; + int lower_limit = 0; + double start_vtime_sec; // only used when G1SmoothConcRefine is on + int prev_buffer_num; // only used when G1SmoothConcRefine is on + + if (G1SmoothConcRefine) { + lower_limit = 0; + start_vtime_sec = os::elapsedVTime(); + prev_buffer_num = (int) dcqs.completed_buffers_num(); + } else { + lower_limit = DCQBarrierProcessCompletedThreshold / 4; // For now. + } + while (dcqs.apply_closure_to_completed_buffer(0, lower_limit)) { + double end_vtime_sec; + double elapsed_vtime_sec; + int elapsed_vtime_ms; + int curr_buffer_num; + + if (G1SmoothConcRefine) { + end_vtime_sec = os::elapsedVTime(); + elapsed_vtime_sec = end_vtime_sec - start_vtime_sec; + elapsed_vtime_ms = (int) (elapsed_vtime_sec * 1000.0); + curr_buffer_num = (int) dcqs.completed_buffers_num(); + + if (curr_buffer_num > prev_buffer_num || + curr_buffer_num > DCQBarrierProcessCompletedThreshold) { + decreaseInterval(elapsed_vtime_ms); + } else if (curr_buffer_num < prev_buffer_num) { + increaseInterval(elapsed_vtime_ms); + } + } + + sample_young_list_rs_lengths(); + _co_tracker.update(false); + + if (G1SmoothConcRefine) { + start_vtime_sec = os::elapsedVTime(); + prev_buffer_num = curr_buffer_num; + + _sts.leave(); + os::sleep(Thread::current(), (jlong) _interval_ms, false); + _sts.join(); + } + + n_logs++; + } + // Make sure we harvest the PYA, if any. + (void)_cg1r->get_pya(); + } + _sts.leave(); +} + +void ConcurrentG1RefineThread::sample_young_list_rs_lengths() { + G1CollectedHeap* g1h = G1CollectedHeap::heap(); + G1CollectorPolicy* g1p = g1h->g1_policy(); + if (g1p->adaptive_young_list_length()) { + int regions_visited = 0; + + g1h->young_list_rs_length_sampling_init(); + while (g1h->young_list_rs_length_sampling_more()) { + g1h->young_list_rs_length_sampling_next(); + ++regions_visited; + + // we try to yield every time we visit 10 regions + if (regions_visited == 10) { + if (_sts.should_yield()) { + _sts.yield("G1 refine"); + // we just abandon the iteration + break; + } + regions_visited = 0; + } + } + + g1p->check_prediction_validity(); + } +} + +void ConcurrentG1RefineThread::run() { + initialize_in_thread(); + _vtime_start = os::elapsedVTime(); + wait_for_universe_init(); + + _co_tracker.enable(); + _co_tracker.start(); + + while (!_should_terminate) { + // wait until started is set. + if (G1RSBarrierUseQueue) { + queueBasedRefinement(); + } else { + traversalBasedRefinement(); + } + _sts.join(); + _co_tracker.update(); + _sts.leave(); + if (os::supports_vtime()) { + _vtime_accum = (os::elapsedVTime() - _vtime_start); + } else { + _vtime_accum = 0.0; + } + } + _sts.join(); + _co_tracker.update(true); + _sts.leave(); + assert(_should_terminate, "just checking"); + + terminate(); +} + + +void ConcurrentG1RefineThread::yield() { + if (TraceG1Refine) gclog_or_tty->print_cr("G1-Refine-yield"); + _sts.yield("G1 refine"); + if (TraceG1Refine) gclog_or_tty->print_cr("G1-Refine-yield-end"); +} + +void ConcurrentG1RefineThread::stop() { + // it is ok to take late safepoints here, if needed + { + MutexLockerEx mu(Terminator_lock); + _should_terminate = true; + } + + { + MutexLockerEx x(DirtyCardQ_CBL_mon, Mutex::_no_safepoint_check_flag); + DirtyCardQ_CBL_mon->notify_all(); + } + + { + MutexLockerEx mu(Terminator_lock); + while (!_has_terminated) { + Terminator_lock->wait(); + } + } + if (TraceG1Refine) gclog_or_tty->print_cr("G1-Refine-stop"); +} + +void ConcurrentG1RefineThread::print() { + gclog_or_tty->print("\"Concurrent G1 Refinement Thread\" "); + Thread::print(); + gclog_or_tty->cr(); +} + +void ConcurrentG1RefineThread::set_do_traversal(bool b) { + _do_traversal = b; +} diff --git a/hotspot/src/share/vm/gc_implementation/g1/concurrentG1RefineThread.hpp b/hotspot/src/share/vm/gc_implementation/g1/concurrentG1RefineThread.hpp new file mode 100644 index 00000000000..69f272c7895 --- /dev/null +++ b/hotspot/src/share/vm/gc_implementation/g1/concurrentG1RefineThread.hpp @@ -0,0 +1,104 @@ +/* + * Copyright 2001-2007 Sun Microsystems, Inc. All Rights Reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara, + * CA 95054 USA or visit www.sun.com if you need additional information or + * have any questions. + * + */ + +// Forward Decl. +class ConcurrentG1Refine; + +// The G1 Concurrent Refinement Thread (could be several in the future). + +class ConcurrentG1RefineThread: public ConcurrentGCThread { + friend class VMStructs; + friend class G1CollectedHeap; + + double _vtime_start; // Initial virtual time. + double _vtime_accum; // Initial virtual time. + + public: + virtual void run(); + + private: + ConcurrentG1Refine* _cg1r; + bool _started; + bool _in_progress; + volatile bool _restart; + + COTracker _co_tracker; + double _interval_ms; + + bool _do_traversal; + + void decreaseInterval(int processing_time_ms) { + double min_interval_ms = (double) processing_time_ms; + _interval_ms = 0.8 * _interval_ms; + if (_interval_ms < min_interval_ms) + _interval_ms = min_interval_ms; + } + void increaseInterval(int processing_time_ms) { + double max_interval_ms = 9.0 * (double) processing_time_ms; + _interval_ms = 1.1 * _interval_ms; + if (max_interval_ms > 0 && _interval_ms > max_interval_ms) + _interval_ms = max_interval_ms; + } + + void sleepBeforeNextCycle(); + + void traversalBasedRefinement(); + + void queueBasedRefinement(); + + // For use by G1CollectedHeap, which is a friend. + static SuspendibleThreadSet* sts() { return &_sts; } + + public: + // Constructor + ConcurrentG1RefineThread(ConcurrentG1Refine* cg1r); + + // Printing + void print(); + + // Total virtual time so far. + double vtime_accum() { return _vtime_accum; } + + ConcurrentG1Refine* cg1r() { return _cg1r; } + + + void set_started() { _started = true; } + void clear_started() { _started = false; } + bool started() { return _started; } + + void set_in_progress() { _in_progress = true; } + void clear_in_progress() { _in_progress = false; } + bool in_progress() { return _in_progress; } + + void set_do_traversal(bool b); + bool do_traversal() { return _do_traversal; } + + void sample_young_list_rs_lengths(); + + // Yield for GC + void yield(); + + // shutdown + static void stop(); +}; diff --git a/hotspot/src/share/vm/gc_implementation/g1/concurrentMark.cpp b/hotspot/src/share/vm/gc_implementation/g1/concurrentMark.cpp new file mode 100644 index 00000000000..036e0e7fcf3 --- /dev/null +++ b/hotspot/src/share/vm/gc_implementation/g1/concurrentMark.cpp @@ -0,0 +1,3979 @@ +/* + * Copyright 2001-2007 Sun Microsystems, Inc. All Rights Reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara, + * CA 95054 USA or visit www.sun.com if you need additional information or + * have any questions. + * + */ + +#include "incls/_precompiled.incl" +#include "incls/_concurrentMark.cpp.incl" + +// +// CMS Bit Map Wrapper + +CMBitMapRO::CMBitMapRO(ReservedSpace rs, int shifter): + _bm((uintptr_t*)NULL,0), + _shifter(shifter) { + _bmStartWord = (HeapWord*)(rs.base()); + _bmWordSize = rs.size()/HeapWordSize; // rs.size() is in bytes + ReservedSpace brs(ReservedSpace::allocation_align_size_up( + (_bmWordSize >> (_shifter + LogBitsPerByte)) + 1)); + + guarantee(brs.is_reserved(), "couldn't allocate CMS bit map"); + // For now we'll just commit all of the bit map up fromt. + // Later on we'll try to be more parsimonious with swap. + guarantee(_virtual_space.initialize(brs, brs.size()), + "couldn't reseve backing store for CMS bit map"); + assert(_virtual_space.committed_size() == brs.size(), + "didn't reserve backing store for all of CMS bit map?"); + _bm.set_map((uintptr_t*)_virtual_space.low()); + assert(_virtual_space.committed_size() << (_shifter + LogBitsPerByte) >= + _bmWordSize, "inconsistency in bit map sizing"); + _bm.set_size(_bmWordSize >> _shifter); +} + +HeapWord* CMBitMapRO::getNextMarkedWordAddress(HeapWord* addr, + HeapWord* limit) const { + // First we must round addr *up* to a possible object boundary. + addr = (HeapWord*)align_size_up((intptr_t)addr, + HeapWordSize << _shifter); + size_t addrOffset = heapWordToOffset(addr); + if (limit == NULL) limit = _bmStartWord + _bmWordSize; + size_t limitOffset = heapWordToOffset(limit); + size_t nextOffset = _bm.get_next_one_offset(addrOffset, limitOffset); + HeapWord* nextAddr = offsetToHeapWord(nextOffset); + assert(nextAddr >= addr, "get_next_one postcondition"); + assert(nextAddr == limit || isMarked(nextAddr), + "get_next_one postcondition"); + return nextAddr; +} + +HeapWord* CMBitMapRO::getNextUnmarkedWordAddress(HeapWord* addr, + HeapWord* limit) const { + size_t addrOffset = heapWordToOffset(addr); + if (limit == NULL) limit = _bmStartWord + _bmWordSize; + size_t limitOffset = heapWordToOffset(limit); + size_t nextOffset = _bm.get_next_zero_offset(addrOffset, limitOffset); + HeapWord* nextAddr = offsetToHeapWord(nextOffset); + assert(nextAddr >= addr, "get_next_one postcondition"); + assert(nextAddr == limit || !isMarked(nextAddr), + "get_next_one postcondition"); + return nextAddr; +} + +int CMBitMapRO::heapWordDiffToOffsetDiff(size_t diff) const { + assert((diff & ((1 << _shifter) - 1)) == 0, "argument check"); + return (int) (diff >> _shifter); +} + +bool CMBitMapRO::iterate(BitMapClosure* cl, MemRegion mr) { + HeapWord* left = MAX2(_bmStartWord, mr.start()); + HeapWord* right = MIN2(_bmStartWord + _bmWordSize, mr.end()); + if (right > left) { + // Right-open interval [leftOffset, rightOffset). + return _bm.iterate(cl, heapWordToOffset(left), heapWordToOffset(right)); + } else { + return true; + } +} + +void CMBitMapRO::mostly_disjoint_range_union(BitMap* from_bitmap, + size_t from_start_index, + HeapWord* to_start_word, + size_t word_num) { + _bm.mostly_disjoint_range_union(from_bitmap, + from_start_index, + heapWordToOffset(to_start_word), + word_num); +} + +#ifndef PRODUCT +bool CMBitMapRO::covers(ReservedSpace rs) const { + // assert(_bm.map() == _virtual_space.low(), "map inconsistency"); + assert(((size_t)_bm.size() * (1 << _shifter)) == _bmWordSize, + "size inconsistency"); + return _bmStartWord == (HeapWord*)(rs.base()) && + _bmWordSize == rs.size()>>LogHeapWordSize; +} +#endif + +void CMBitMap::clearAll() { + _bm.clear(); + return; +} + +void CMBitMap::markRange(MemRegion mr) { + mr.intersection(MemRegion(_bmStartWord, _bmWordSize)); + assert(!mr.is_empty(), "unexpected empty region"); + assert((offsetToHeapWord(heapWordToOffset(mr.end())) == + ((HeapWord *) mr.end())), + "markRange memory region end is not card aligned"); + // convert address range into offset range + _bm.at_put_range(heapWordToOffset(mr.start()), + heapWordToOffset(mr.end()), true); +} + +void CMBitMap::clearRange(MemRegion mr) { + mr.intersection(MemRegion(_bmStartWord, _bmWordSize)); + assert(!mr.is_empty(), "unexpected empty region"); + // convert address range into offset range + _bm.at_put_range(heapWordToOffset(mr.start()), + heapWordToOffset(mr.end()), false); +} + +MemRegion CMBitMap::getAndClearMarkedRegion(HeapWord* addr, + HeapWord* end_addr) { + HeapWord* start = getNextMarkedWordAddress(addr); + start = MIN2(start, end_addr); + HeapWord* end = getNextUnmarkedWordAddress(start); + end = MIN2(end, end_addr); + assert(start <= end, "Consistency check"); + MemRegion mr(start, end); + if (!mr.is_empty()) { + clearRange(mr); + } + return mr; +} + +CMMarkStack::CMMarkStack(ConcurrentMark* cm) : + _base(NULL), _cm(cm) +#ifdef ASSERT + , _drain_in_progress(false) + , _drain_in_progress_yields(false) +#endif +{} + +void CMMarkStack::allocate(size_t size) { + _base = NEW_C_HEAP_ARRAY(oop, size); + if (_base == NULL) + vm_exit_during_initialization("Failed to allocate " + "CM region mark stack"); + _index = 0; + // QQQQ cast ... + _capacity = (jint) size; + _oops_do_bound = -1; + NOT_PRODUCT(_max_depth = 0); +} + +CMMarkStack::~CMMarkStack() { + if (_base != NULL) FREE_C_HEAP_ARRAY(oop, _base); +} + +void CMMarkStack::par_push(oop ptr) { + while (true) { + if (isFull()) { + _overflow = true; + return; + } + // Otherwise... + jint index = _index; + jint next_index = index+1; + jint res = Atomic::cmpxchg(next_index, &_index, index); + if (res == index) { + _base[index] = ptr; + // Note that we don't maintain this atomically. We could, but it + // doesn't seem necessary. + NOT_PRODUCT(_max_depth = MAX2(_max_depth, next_index)); + return; + } + // Otherwise, we need to try again. + } +} + +void CMMarkStack::par_adjoin_arr(oop* ptr_arr, int n) { + while (true) { + if (isFull()) { + _overflow = true; + return; + } + // Otherwise... + jint index = _index; + jint next_index = index + n; + if (next_index > _capacity) { + _overflow = true; + return; + } + jint res = Atomic::cmpxchg(next_index, &_index, index); + if (res == index) { + for (int i = 0; i < n; i++) { + int ind = index + i; + assert(ind < _capacity, "By overflow test above."); + _base[ind] = ptr_arr[i]; + } + NOT_PRODUCT(_max_depth = MAX2(_max_depth, next_index)); + return; + } + // Otherwise, we need to try again. + } +} + + +void CMMarkStack::par_push_arr(oop* ptr_arr, int n) { + MutexLockerEx x(ParGCRareEvent_lock, Mutex::_no_safepoint_check_flag); + jint start = _index; + jint next_index = start + n; + if (next_index > _capacity) { + _overflow = true; + return; + } + // Otherwise. + _index = next_index; + for (int i = 0; i < n; i++) { + int ind = start + i; + guarantee(ind < _capacity, "By overflow test above."); + _base[ind] = ptr_arr[i]; + } +} + + +bool CMMarkStack::par_pop_arr(oop* ptr_arr, int max, int* n) { + MutexLockerEx x(ParGCRareEvent_lock, Mutex::_no_safepoint_check_flag); + jint index = _index; + if (index == 0) { + *n = 0; + return false; + } else { + int k = MIN2(max, index); + jint new_ind = index - k; + for (int j = 0; j < k; j++) { + ptr_arr[j] = _base[new_ind + j]; + } + _index = new_ind; + *n = k; + return true; + } +} + + +CMRegionStack::CMRegionStack() : _base(NULL) {} + +void CMRegionStack::allocate(size_t size) { + _base = NEW_C_HEAP_ARRAY(MemRegion, size); + if (_base == NULL) + vm_exit_during_initialization("Failed to allocate " + "CM region mark stack"); + _index = 0; + // QQQQ cast ... + _capacity = (jint) size; +} + +CMRegionStack::~CMRegionStack() { + if (_base != NULL) FREE_C_HEAP_ARRAY(oop, _base); +} + +void CMRegionStack::push(MemRegion mr) { + assert(mr.word_size() > 0, "Precondition"); + while (true) { + if (isFull()) { + _overflow = true; + return; + } + // Otherwise... + jint index = _index; + jint next_index = index+1; + jint res = Atomic::cmpxchg(next_index, &_index, index); + if (res == index) { + _base[index] = mr; + return; + } + // Otherwise, we need to try again. + } +} + +MemRegion CMRegionStack::pop() { + while (true) { + // Otherwise... + jint index = _index; + + if (index == 0) { + return MemRegion(); + } + jint next_index = index-1; + jint res = Atomic::cmpxchg(next_index, &_index, index); + if (res == index) { + MemRegion mr = _base[next_index]; + if (mr.start() != NULL) { + tmp_guarantee_CM( mr.end() != NULL, "invariant" ); + tmp_guarantee_CM( mr.word_size() > 0, "invariant" ); + return mr; + } else { + // that entry was invalidated... let's skip it + tmp_guarantee_CM( mr.end() == NULL, "invariant" ); + } + } + // Otherwise, we need to try again. + } +} + +bool CMRegionStack::invalidate_entries_into_cset() { + bool result = false; + G1CollectedHeap* g1h = G1CollectedHeap::heap(); + for (int i = 0; i < _oops_do_bound; ++i) { + MemRegion mr = _base[i]; + if (mr.start() != NULL) { + tmp_guarantee_CM( mr.end() != NULL, "invariant"); + tmp_guarantee_CM( mr.word_size() > 0, "invariant" ); + HeapRegion* hr = g1h->heap_region_containing(mr.start()); + tmp_guarantee_CM( hr != NULL, "invariant" ); + if (hr->in_collection_set()) { + // The region points into the collection set + _base[i] = MemRegion(); + result = true; + } + } else { + // that entry was invalidated... let's skip it + tmp_guarantee_CM( mr.end() == NULL, "invariant" ); + } + } + return result; +} + +template +bool CMMarkStack::drain(OopClosureClass* cl, CMBitMap* bm, bool yield_after) { + assert(!_drain_in_progress || !_drain_in_progress_yields || yield_after + || SafepointSynchronize::is_at_safepoint(), + "Drain recursion must be yield-safe."); + bool res = true; + debug_only(_drain_in_progress = true); + debug_only(_drain_in_progress_yields = yield_after); + while (!isEmpty()) { + oop newOop = pop(); + assert(G1CollectedHeap::heap()->is_in_reserved(newOop), "Bad pop"); + assert(newOop->is_oop(), "Expected an oop"); + assert(bm == NULL || bm->isMarked((HeapWord*)newOop), + "only grey objects on this stack"); + // iterate over the oops in this oop, marking and pushing + // the ones in CMS generation. + newOop->oop_iterate(cl); + if (yield_after && _cm->do_yield_check()) { + res = false; break; + } + } + debug_only(_drain_in_progress = false); + return res; +} + +void CMMarkStack::oops_do(OopClosure* f) { + if (_index == 0) return; + assert(_oops_do_bound != -1 && _oops_do_bound <= _index, + "Bound must be set."); + for (int i = 0; i < _oops_do_bound; i++) { + f->do_oop(&_base[i]); + } + _oops_do_bound = -1; +} + +bool ConcurrentMark::not_yet_marked(oop obj) const { + return (_g1h->is_obj_ill(obj) + || (_g1h->is_in_permanent(obj) + && !nextMarkBitMap()->isMarked((HeapWord*)obj))); +} + +#ifdef _MSC_VER // the use of 'this' below gets a warning, make it go away +#pragma warning( disable:4355 ) // 'this' : used in base member initializer list +#endif // _MSC_VER + +ConcurrentMark::ConcurrentMark(ReservedSpace rs, + int max_regions) : + _markBitMap1(rs, MinObjAlignment - 1), + _markBitMap2(rs, MinObjAlignment - 1), + + _parallel_marking_threads(0), + _sleep_factor(0.0), + _marking_task_overhead(1.0), + _cleanup_sleep_factor(0.0), + _cleanup_task_overhead(1.0), + _region_bm(max_regions, false /* in_resource_area*/), + _card_bm((rs.size() + CardTableModRefBS::card_size - 1) >> + CardTableModRefBS::card_shift, + false /* in_resource_area*/), + _prevMarkBitMap(&_markBitMap1), + _nextMarkBitMap(&_markBitMap2), + _at_least_one_mark_complete(false), + + _markStack(this), + _regionStack(), + // _finger set in set_non_marking_state + + _max_task_num(MAX2(ParallelGCThreads, (size_t)1)), + // _active_tasks set in set_non_marking_state + // _tasks set inside the constructor + _task_queues(new CMTaskQueueSet((int) _max_task_num)), + _terminator(ParallelTaskTerminator((int) _max_task_num, _task_queues)), + + _has_overflown(false), + _concurrent(false), + + // _verbose_level set below + + _init_times(), + _remark_times(), _remark_mark_times(), _remark_weak_ref_times(), + _cleanup_times(), + _total_counting_time(0.0), + _total_rs_scrub_time(0.0), + + _parallel_workers(NULL), + _cleanup_co_tracker(G1CLGroup) +{ + CMVerboseLevel verbose_level = + (CMVerboseLevel) G1MarkingVerboseLevel; + if (verbose_level < no_verbose) + verbose_level = no_verbose; + if (verbose_level > high_verbose) + verbose_level = high_verbose; + _verbose_level = verbose_level; + + if (verbose_low()) + gclog_or_tty->print_cr("[global] init, heap start = "PTR_FORMAT", " + "heap end = "PTR_FORMAT, _heap_start, _heap_end); + + _markStack.allocate(G1CMStackSize); + _regionStack.allocate(G1CMRegionStackSize); + + // Create & start a ConcurrentMark thread. + if (G1ConcMark) { + _cmThread = new ConcurrentMarkThread(this); + assert(cmThread() != NULL, "CM Thread should have been created"); + assert(cmThread()->cm() != NULL, "CM Thread should refer to this cm"); + } else { + _cmThread = NULL; + } + _g1h = G1CollectedHeap::heap(); + assert(CGC_lock != NULL, "Where's the CGC_lock?"); + assert(_markBitMap1.covers(rs), "_markBitMap1 inconsistency"); + assert(_markBitMap2.covers(rs), "_markBitMap2 inconsistency"); + + SATBMarkQueueSet& satb_qs = JavaThread::satb_mark_queue_set(); + satb_qs.set_buffer_size(G1SATBLogBufferSize); + + int size = (int) MAX2(ParallelGCThreads, (size_t)1); + _par_cleanup_thread_state = NEW_C_HEAP_ARRAY(ParCleanupThreadState*, size); + for (int i = 0 ; i < size; i++) { + _par_cleanup_thread_state[i] = new ParCleanupThreadState; + } + + _tasks = NEW_C_HEAP_ARRAY(CMTask*, _max_task_num); + _accum_task_vtime = NEW_C_HEAP_ARRAY(double, _max_task_num); + + // so that the assertion in MarkingTaskQueue::task_queue doesn't fail + _active_tasks = _max_task_num; + for (int i = 0; i < (int) _max_task_num; ++i) { + CMTaskQueue* task_queue = new CMTaskQueue(); + task_queue->initialize(); + _task_queues->register_queue(i, task_queue); + + _tasks[i] = new CMTask(i, this, task_queue, _task_queues); + _accum_task_vtime[i] = 0.0; + } + + if (ParallelMarkingThreads > ParallelGCThreads) { + vm_exit_during_initialization("Can't have more ParallelMarkingThreads " + "than ParallelGCThreads."); + } + if (ParallelGCThreads == 0) { + // if we are not running with any parallel GC threads we will not + // spawn any marking threads either + _parallel_marking_threads = 0; + _sleep_factor = 0.0; + _marking_task_overhead = 1.0; + } else { + if (ParallelMarkingThreads > 0) { + // notice that ParallelMarkingThreads overwrites G1MarkingOverheadPerc + // if both are set + + _parallel_marking_threads = ParallelMarkingThreads; + _sleep_factor = 0.0; + _marking_task_overhead = 1.0; + } else if (G1MarkingOverheadPerc > 0) { + // we will calculate the number of parallel marking threads + // based on a target overhead with respect to the soft real-time + // goal + + double marking_overhead = (double) G1MarkingOverheadPerc / 100.0; + double overall_cm_overhead = + (double) G1MaxPauseTimeMS * marking_overhead / (double) G1TimeSliceMS; + double cpu_ratio = 1.0 / (double) os::processor_count(); + double marking_thread_num = ceil(overall_cm_overhead / cpu_ratio); + double marking_task_overhead = + overall_cm_overhead / marking_thread_num * + (double) os::processor_count(); + double sleep_factor = + (1.0 - marking_task_overhead) / marking_task_overhead; + + _parallel_marking_threads = (size_t) marking_thread_num; + _sleep_factor = sleep_factor; + _marking_task_overhead = marking_task_overhead; + } else { + _parallel_marking_threads = MAX2((ParallelGCThreads + 2) / 4, (size_t)1); + _sleep_factor = 0.0; + _marking_task_overhead = 1.0; + } + + if (parallel_marking_threads() > 1) + _cleanup_task_overhead = 1.0; + else + _cleanup_task_overhead = marking_task_overhead(); + _cleanup_sleep_factor = + (1.0 - cleanup_task_overhead()) / cleanup_task_overhead(); + +#if 0 + gclog_or_tty->print_cr("Marking Threads %d", parallel_marking_threads()); + gclog_or_tty->print_cr("CM Marking Task Overhead %1.4lf", marking_task_overhead()); + gclog_or_tty->print_cr("CM Sleep Factor %1.4lf", sleep_factor()); + gclog_or_tty->print_cr("CL Marking Task Overhead %1.4lf", cleanup_task_overhead()); + gclog_or_tty->print_cr("CL Sleep Factor %1.4lf", cleanup_sleep_factor()); +#endif + + guarantee( parallel_marking_threads() > 0, "peace of mind" ); + _parallel_workers = new WorkGang("Parallel Marking Threads", + (int) parallel_marking_threads(), false, true); + if (_parallel_workers == NULL) + vm_exit_during_initialization("Failed necessary allocation."); + } + + // so that the call below can read a sensible value + _heap_start = (HeapWord*) rs.base(); + set_non_marking_state(); +} + +void ConcurrentMark::update_g1_committed(bool force) { + // If concurrent marking is not in progress, then we do not need to + // update _heap_end. This has a subtle and important + // side-effect. Imagine that two evacuation pauses happen between + // marking completion and remark. The first one can grow the + // heap (hence now the finger is below the heap end). Then, the + // second one could unnecessarily push regions on the region + // stack. This causes the invariant that the region stack is empty + // at the beginning of remark to be false. By ensuring that we do + // not observe heap expansions after marking is complete, then we do + // not have this problem. + if (!concurrent_marking_in_progress() && !force) + return; + + MemRegion committed = _g1h->g1_committed(); + tmp_guarantee_CM( committed.start() == _heap_start, + "start shouldn't change" ); + HeapWord* new_end = committed.end(); + if (new_end > _heap_end) { + // The heap has been expanded. + + _heap_end = new_end; + } + // Notice that the heap can also shrink. However, this only happens + // during a Full GC (at least currently) and the entire marking + // phase will bail out and the task will not be restarted. So, let's + // do nothing. +} + +void ConcurrentMark::reset() { + // Starting values for these two. This should be called in a STW + // phase. CM will be notified of any future g1_committed expansions + // will be at the end of evacuation pauses, when tasks are + // inactive. + MemRegion committed = _g1h->g1_committed(); + _heap_start = committed.start(); + _heap_end = committed.end(); + + guarantee( _heap_start != NULL && + _heap_end != NULL && + _heap_start < _heap_end, "heap bounds should look ok" ); + + // reset all the marking data structures and any necessary flags + clear_marking_state(); + + if (verbose_low()) + gclog_or_tty->print_cr("[global] resetting"); + + // We do reset all of them, since different phases will use + // different number of active threads. So, it's easiest to have all + // of them ready. + for (int i = 0; i < (int) _max_task_num; ++i) + _tasks[i]->reset(_nextMarkBitMap); + + // we need this to make sure that the flag is on during the evac + // pause with initial mark piggy-backed + set_concurrent_marking_in_progress(); +} + +void ConcurrentMark::set_phase(size_t active_tasks, bool concurrent) { + guarantee( active_tasks <= _max_task_num, "we should not have more" ); + + _active_tasks = active_tasks; + // Need to update the three data structures below according to the + // number of active threads for this phase. + _terminator = ParallelTaskTerminator((int) active_tasks, _task_queues); + _first_overflow_barrier_sync.set_n_workers((int) active_tasks); + _second_overflow_barrier_sync.set_n_workers((int) active_tasks); + + _concurrent = concurrent; + // We propagate this to all tasks, not just the active ones. + for (int i = 0; i < (int) _max_task_num; ++i) + _tasks[i]->set_concurrent(concurrent); + + if (concurrent) { + set_concurrent_marking_in_progress(); + } else { + // We currently assume that the concurrent flag has been set to + // false before we start remark. At this point we should also be + // in a STW phase. + guarantee( !concurrent_marking_in_progress(), "invariant" ); + guarantee( _finger == _heap_end, "only way to get here" ); + update_g1_committed(true); + } +} + +void ConcurrentMark::set_non_marking_state() { + // We set the global marking state to some default values when we're + // not doing marking. + clear_marking_state(); + _active_tasks = 0; + clear_concurrent_marking_in_progress(); +} + +ConcurrentMark::~ConcurrentMark() { + int size = (int) MAX2(ParallelGCThreads, (size_t)1); + for (int i = 0; i < size; i++) delete _par_cleanup_thread_state[i]; + FREE_C_HEAP_ARRAY(ParCleanupThreadState*, + _par_cleanup_thread_state); + + for (int i = 0; i < (int) _max_task_num; ++i) { + delete _task_queues->queue(i); + delete _tasks[i]; + } + delete _task_queues; + FREE_C_HEAP_ARRAY(CMTask*, _max_task_num); +} + +// This closure is used to mark refs into the g1 generation +// from external roots in the CMS bit map. +// Called at the first checkpoint. +// + +#define PRINT_REACHABLE_AT_INITIAL_MARK 0 +#if PRINT_REACHABLE_AT_INITIAL_MARK +static FILE* reachable_file = NULL; + +class PrintReachableClosure: public OopsInGenClosure { + CMBitMap* _bm; + int _level; +public: + PrintReachableClosure(CMBitMap* bm) : + _bm(bm), _level(0) { + guarantee(reachable_file != NULL, "pre-condition"); + } + void do_oop(oop* p) { + oop obj = *p; + HeapWord* obj_addr = (HeapWord*)obj; + if (obj == NULL) return; + fprintf(reachable_file, "%d: "PTR_FORMAT" -> "PTR_FORMAT" (%d)\n", + _level, p, (void*) obj, _bm->isMarked(obj_addr)); + if (!_bm->isMarked(obj_addr)) { + _bm->mark(obj_addr); + _level++; + obj->oop_iterate(this); + _level--; + } + } +}; +#endif // PRINT_REACHABLE_AT_INITIAL_MARK + +#define SEND_HEAP_DUMP_TO_FILE 0 +#if SEND_HEAP_DUMP_TO_FILE +static FILE* heap_dump_file = NULL; +#endif // SEND_HEAP_DUMP_TO_FILE + +void ConcurrentMark::clearNextBitmap() { + guarantee(!G1CollectedHeap::heap()->mark_in_progress(), "Precondition."); + + // clear the mark bitmap (no grey objects to start with). + // We need to do this in chunks and offer to yield in between + // each chunk. + HeapWord* start = _nextMarkBitMap->startWord(); + HeapWord* end = _nextMarkBitMap->endWord(); + HeapWord* cur = start; + size_t chunkSize = M; + while (cur < end) { + HeapWord* next = cur + chunkSize; + if (next > end) + next = end; + MemRegion mr(cur,next); + _nextMarkBitMap->clearRange(mr); + cur = next; + do_yield_check(); + } +} + +class NoteStartOfMarkHRClosure: public HeapRegionClosure { +public: + bool doHeapRegion(HeapRegion* r) { + if (!r->continuesHumongous()) { + r->note_start_of_marking(true); + } + return false; + } +}; + +void ConcurrentMark::checkpointRootsInitialPre() { + G1CollectedHeap* g1h = G1CollectedHeap::heap(); + G1CollectorPolicy* g1p = g1h->g1_policy(); + + _has_aborted = false; + + // Find all the reachable objects... +#if PRINT_REACHABLE_AT_INITIAL_MARK + guarantee(reachable_file == NULL, "Protocol"); + char fn_buf[100]; + sprintf(fn_buf, "/tmp/reachable.txt.%d", os::current_process_id()); + reachable_file = fopen(fn_buf, "w"); + // clear the mark bitmap (no grey objects to start with) + _nextMarkBitMap->clearAll(); + PrintReachableClosure prcl(_nextMarkBitMap); + g1h->process_strong_roots( + false, // fake perm gen collection + SharedHeap::SO_AllClasses, + &prcl, // Regular roots + &prcl // Perm Gen Roots + ); + // The root iteration above "consumed" dirty cards in the perm gen. + // Therefore, as a shortcut, we dirty all such cards. + g1h->rem_set()->invalidate(g1h->perm_gen()->used_region(), false); + fclose(reachable_file); + reachable_file = NULL; + // clear the mark bitmap again. + _nextMarkBitMap->clearAll(); + COMPILER2_PRESENT(DerivedPointerTable::update_pointers()); + COMPILER2_PRESENT(DerivedPointerTable::clear()); +#endif // PRINT_REACHABLE_AT_INITIAL_MARK + + // Initialise marking structures. This has to be done in a STW phase. + reset(); +} + +class CMMarkRootsClosure: public OopsInGenClosure { +private: + ConcurrentMark* _cm; + G1CollectedHeap* _g1h; + bool _do_barrier; + +public: + CMMarkRootsClosure(ConcurrentMark* cm, + G1CollectedHeap* g1h, + bool do_barrier) : _cm(cm), _g1h(g1h), + _do_barrier(do_barrier) { } + + virtual void do_oop(narrowOop* p) { + guarantee(false, "NYI"); + } + + virtual void do_oop(oop* p) { + oop thisOop = *p; + if (thisOop != NULL) { + assert(thisOop->is_oop() || thisOop->mark() == NULL, + "expected an oop, possibly with mark word displaced"); + HeapWord* addr = (HeapWord*)thisOop; + if (_g1h->is_in_g1_reserved(addr)) { + _cm->grayRoot(thisOop); + } + } + if (_do_barrier) { + assert(!_g1h->is_in_g1_reserved(p), + "Should be called on external roots"); + do_barrier(p); + } + } +}; + +void ConcurrentMark::checkpointRootsInitialPost() { + G1CollectedHeap* g1h = G1CollectedHeap::heap(); + + // For each region note start of marking. + NoteStartOfMarkHRClosure startcl; + g1h->heap_region_iterate(&startcl); + + // Start weak-reference discovery. + ReferenceProcessor* rp = g1h->ref_processor(); + rp->verify_no_references_recorded(); + rp->enable_discovery(); // enable ("weak") refs discovery + + SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set(); + satb_mq_set.set_process_completed_threshold(G1SATBProcessCompletedThreshold); + satb_mq_set.set_active_all_threads(true); + + // update_g1_committed() will be called at the end of an evac pause + // when marking is on. So, it's also called at the end of the + // initial-mark pause to update the heap end, if the heap expands + // during it. No need to call it here. + + guarantee( !_cleanup_co_tracker.enabled(), "invariant" ); + + size_t max_marking_threads = + MAX2((size_t) 1, parallel_marking_threads()); + for (int i = 0; i < (int)_max_task_num; ++i) { + _tasks[i]->enable_co_tracker(); + if (i < (int) max_marking_threads) + _tasks[i]->reset_co_tracker(marking_task_overhead()); + else + _tasks[i]->reset_co_tracker(0.0); + } +} + +// Checkpoint the roots into this generation from outside +// this generation. [Note this initial checkpoint need only +// be approximate -- we'll do a catch up phase subsequently.] +void ConcurrentMark::checkpointRootsInitial() { + assert(SafepointSynchronize::is_at_safepoint(), "world should be stopped"); + G1CollectedHeap* g1h = G1CollectedHeap::heap(); + + double start = os::elapsedTime(); + GCOverheadReporter::recordSTWStart(start); + + // If there has not been a GC[n-1] since last GC[n] cycle completed, + // precede our marking with a collection of all + // younger generations to keep floating garbage to a minimum. + // YSR: we won't do this for now -- it's an optimization to be + // done post-beta. + + // YSR: ignoring weak refs for now; will do at bug fixing stage + // EVM: assert(discoveredRefsAreClear()); + + + G1CollectorPolicy* g1p = G1CollectedHeap::heap()->g1_policy(); + g1p->record_concurrent_mark_init_start(); + checkpointRootsInitialPre(); + + // YSR: when concurrent precleaning is in place, we'll + // need to clear the cached card table here + + ResourceMark rm; + HandleMark hm; + + g1h->ensure_parsability(false); + g1h->perm_gen()->save_marks(); + + CMMarkRootsClosure notOlder(this, g1h, false); + CMMarkRootsClosure older(this, g1h, true); + + g1h->set_marking_started(); + g1h->rem_set()->prepare_for_younger_refs_iterate(false); + + g1h->process_strong_roots(false, // fake perm gen collection + SharedHeap::SO_AllClasses, + ¬Older, // Regular roots + &older // Perm Gen Roots + ); + checkpointRootsInitialPost(); + + // Statistics. + double end = os::elapsedTime(); + _init_times.add((end - start) * 1000.0); + GCOverheadReporter::recordSTWEnd(end); + + g1p->record_concurrent_mark_init_end(); +} + +/* + Notice that in the next two methods, we actually leave the STS + during the barrier sync and join it immediately afterwards. If we + do not do this, this then the following deadlock can occur: one + thread could be in the barrier sync code, waiting for the other + thread to also sync up, whereas another one could be trying to + yield, while also waiting for the other threads to sync up too. + + Because the thread that does the sync barrier has left the STS, it + is possible to be suspended for a Full GC or an evacuation pause + could occur. This is actually safe, since the entering the sync + barrier is one of the last things do_marking_step() does, and it + doesn't manipulate any data structures afterwards. +*/ + +void ConcurrentMark::enter_first_sync_barrier(int task_num) { + if (verbose_low()) + gclog_or_tty->print_cr("[%d] entering first barrier", task_num); + + ConcurrentGCThread::stsLeave(); + _first_overflow_barrier_sync.enter(); + ConcurrentGCThread::stsJoin(); + // at this point everyone should have synced up and not be doing any + // more work + + if (verbose_low()) + gclog_or_tty->print_cr("[%d] leaving first barrier", task_num); + + // let task 0 do this + if (task_num == 0) { + // task 0 is responsible for clearing the global data structures + clear_marking_state(); + + if (PrintGC) { + gclog_or_tty->date_stamp(PrintGCDateStamps); + gclog_or_tty->stamp(PrintGCTimeStamps); + gclog_or_tty->print_cr("[GC concurrent-mark-reset-for-overflow]"); + } + } + + // after this, each task should reset its own data structures then + // then go into the second barrier +} + +void ConcurrentMark::enter_second_sync_barrier(int task_num) { + if (verbose_low()) + gclog_or_tty->print_cr("[%d] entering second barrier", task_num); + + ConcurrentGCThread::stsLeave(); + _second_overflow_barrier_sync.enter(); + ConcurrentGCThread::stsJoin(); + // at this point everything should be re-initialised and ready to go + + if (verbose_low()) + gclog_or_tty->print_cr("[%d] leaving second barrier", task_num); +} + +void ConcurrentMark::grayRoot(oop p) { + HeapWord* addr = (HeapWord*) p; + // We can't really check against _heap_start and _heap_end, since it + // is possible during an evacuation pause with piggy-backed + // initial-mark that the committed space is expanded during the + // pause without CM observing this change. So the assertions below + // is a bit conservative; but better than nothing. + tmp_guarantee_CM( _g1h->g1_committed().contains(addr), + "address should be within the heap bounds" ); + + if (!_nextMarkBitMap->isMarked(addr)) + _nextMarkBitMap->parMark(addr); +} + +void ConcurrentMark::grayRegionIfNecessary(MemRegion mr) { + // The objects on the region have already been marked "in bulk" by + // the caller. We only need to decide whether to push the region on + // the region stack or not. + + if (!concurrent_marking_in_progress() || !_should_gray_objects) + // We're done with marking and waiting for remark. We do not need to + // push anything else on the region stack. + return; + + HeapWord* finger = _finger; + + if (verbose_low()) + gclog_or_tty->print_cr("[global] attempting to push " + "region ["PTR_FORMAT", "PTR_FORMAT"), finger is at " + PTR_FORMAT, mr.start(), mr.end(), finger); + + if (mr.start() < finger) { + // The finger is always heap region aligned and it is not possible + // for mr to span heap regions. + tmp_guarantee_CM( mr.end() <= finger, "invariant" ); + + tmp_guarantee_CM( mr.start() <= mr.end() && + _heap_start <= mr.start() && + mr.end() <= _heap_end, + "region boundaries should fall within the committed space" ); + if (verbose_low()) + gclog_or_tty->print_cr("[global] region ["PTR_FORMAT", "PTR_FORMAT") " + "below the finger, pushing it", + mr.start(), mr.end()); + + if (!region_stack_push(mr)) { + if (verbose_low()) + gclog_or_tty->print_cr("[global] region stack has overflown."); + } + } +} + +void ConcurrentMark::markAndGrayObjectIfNecessary(oop p) { + // The object is not marked by the caller. We need to at least mark + // it and maybe push in on the stack. + + HeapWord* addr = (HeapWord*)p; + if (!_nextMarkBitMap->isMarked(addr)) { + // We definitely need to mark it, irrespective whether we bail out + // because we're done with marking. + if (_nextMarkBitMap->parMark(addr)) { + if (!concurrent_marking_in_progress() || !_should_gray_objects) + // If we're done with concurrent marking and we're waiting for + // remark, then we're not pushing anything on the stack. + return; + + // No OrderAccess:store_load() is needed. It is implicit in the + // CAS done in parMark(addr) above + HeapWord* finger = _finger; + + if (addr < finger) { + if (!mark_stack_push(oop(addr))) { + if (verbose_low()) + gclog_or_tty->print_cr("[global] global stack overflow " + "during parMark"); + } + } + } + } +} + +class CMConcurrentMarkingTask: public AbstractGangTask { +private: + ConcurrentMark* _cm; + ConcurrentMarkThread* _cmt; + +public: + void work(int worker_i) { + guarantee( Thread::current()->is_ConcurrentGC_thread(), + "this should only be done by a conc GC thread" ); + + double start_vtime = os::elapsedVTime(); + + ConcurrentGCThread::stsJoin(); + + guarantee( (size_t)worker_i < _cm->active_tasks(), "invariant" ); + CMTask* the_task = _cm->task(worker_i); + the_task->start_co_tracker(); + the_task->record_start_time(); + if (!_cm->has_aborted()) { + do { + double start_vtime_sec = os::elapsedVTime(); + double start_time_sec = os::elapsedTime(); + the_task->do_marking_step(10.0); + double end_time_sec = os::elapsedTime(); + double end_vtime_sec = os::elapsedVTime(); + double elapsed_vtime_sec = end_vtime_sec - start_vtime_sec; + double elapsed_time_sec = end_time_sec - start_time_sec; + _cm->clear_has_overflown(); + + bool ret = _cm->do_yield_check(worker_i); + + jlong sleep_time_ms; + if (!_cm->has_aborted() && the_task->has_aborted()) { + sleep_time_ms = + (jlong) (elapsed_vtime_sec * _cm->sleep_factor() * 1000.0); + ConcurrentGCThread::stsLeave(); + os::sleep(Thread::current(), sleep_time_ms, false); + ConcurrentGCThread::stsJoin(); + } + double end_time2_sec = os::elapsedTime(); + double elapsed_time2_sec = end_time2_sec - start_time_sec; + + the_task->update_co_tracker(); + +#if 0 + gclog_or_tty->print_cr("CM: elapsed %1.4lf ms, sleep %1.4lf ms, " + "overhead %1.4lf", + elapsed_vtime_sec * 1000.0, (double) sleep_time_ms, + the_task->conc_overhead(os::elapsedTime()) * 8.0); + gclog_or_tty->print_cr("elapsed time %1.4lf ms, time 2: %1.4lf ms", + elapsed_time_sec * 1000.0, elapsed_time2_sec * 1000.0); +#endif + } while (!_cm->has_aborted() && the_task->has_aborted()); + } + the_task->record_end_time(); + guarantee( !the_task->has_aborted() || _cm->has_aborted(), "invariant" ); + + ConcurrentGCThread::stsLeave(); + + double end_vtime = os::elapsedVTime(); + the_task->update_co_tracker(true); + _cm->update_accum_task_vtime(worker_i, end_vtime - start_vtime); + } + + CMConcurrentMarkingTask(ConcurrentMark* cm, + ConcurrentMarkThread* cmt) : + AbstractGangTask("Concurrent Mark"), _cm(cm), _cmt(cmt) { } + + ~CMConcurrentMarkingTask() { } +}; + +void ConcurrentMark::markFromRoots() { + // we might be tempted to assert that: + // assert(asynch == !SafepointSynchronize::is_at_safepoint(), + // "inconsistent argument?"); + // However that wouldn't be right, because it's possible that + // a safepoint is indeed in progress as a younger generation + // stop-the-world GC happens even as we mark in this generation. + + _restart_for_overflow = false; + + set_phase(MAX2((size_t) 1, parallel_marking_threads()), true); + + CMConcurrentMarkingTask markingTask(this, cmThread()); + if (parallel_marking_threads() > 0) + _parallel_workers->run_task(&markingTask); + else + markingTask.work(0); + print_stats(); +} + +void ConcurrentMark::checkpointRootsFinal(bool clear_all_soft_refs) { + // world is stopped at this checkpoint + assert(SafepointSynchronize::is_at_safepoint(), + "world should be stopped"); + G1CollectedHeap* g1h = G1CollectedHeap::heap(); + + // If a full collection has happened, we shouldn't do this. + if (has_aborted()) { + g1h->set_marking_complete(); // So bitmap clearing isn't confused + return; + } + + G1CollectorPolicy* g1p = g1h->g1_policy(); + g1p->record_concurrent_mark_remark_start(); + + double start = os::elapsedTime(); + GCOverheadReporter::recordSTWStart(start); + + checkpointRootsFinalWork(); + + double mark_work_end = os::elapsedTime(); + + weakRefsWork(clear_all_soft_refs); + + if (has_overflown()) { + // Oops. We overflowed. Restart concurrent marking. + _restart_for_overflow = true; + // Clear the flag. We do not need it any more. + clear_has_overflown(); + if (G1TraceMarkStackOverflow) + gclog_or_tty->print_cr("\nRemark led to restart for overflow."); + } else { + // We're done with marking. + JavaThread::satb_mark_queue_set().set_active_all_threads(false); + } + +#if VERIFY_OBJS_PROCESSED + _scan_obj_cl.objs_processed = 0; + ThreadLocalObjQueue::objs_enqueued = 0; +#endif + + // Statistics + double now = os::elapsedTime(); + _remark_mark_times.add((mark_work_end - start) * 1000.0); + _remark_weak_ref_times.add((now - mark_work_end) * 1000.0); + _remark_times.add((now - start) * 1000.0); + + GCOverheadReporter::recordSTWEnd(now); + for (int i = 0; i < (int)_max_task_num; ++i) + _tasks[i]->disable_co_tracker(); + _cleanup_co_tracker.enable(); + _cleanup_co_tracker.reset(cleanup_task_overhead()); + g1p->record_concurrent_mark_remark_end(); +} + + +#define CARD_BM_TEST_MODE 0 + +class CalcLiveObjectsClosure: public HeapRegionClosure { + + CMBitMapRO* _bm; + ConcurrentMark* _cm; + COTracker* _co_tracker; + bool _changed; + bool _yield; + size_t _words_done; + size_t _tot_live; + size_t _tot_used; + size_t _regions_done; + double _start_vtime_sec; + + BitMap* _region_bm; + BitMap* _card_bm; + intptr_t _bottom_card_num; + bool _final; + + void mark_card_num_range(intptr_t start_card_num, intptr_t last_card_num) { + for (intptr_t i = start_card_num; i <= last_card_num; i++) { +#if CARD_BM_TEST_MODE + guarantee(_card_bm->at(i - _bottom_card_num), + "Should already be set."); +#else + _card_bm->par_at_put(i - _bottom_card_num, 1); +#endif + } + } + +public: + CalcLiveObjectsClosure(bool final, + CMBitMapRO *bm, ConcurrentMark *cm, + BitMap* region_bm, BitMap* card_bm, + COTracker* co_tracker) : + _bm(bm), _cm(cm), _changed(false), _yield(true), + _words_done(0), _tot_live(0), _tot_used(0), + _region_bm(region_bm), _card_bm(card_bm), + _final(final), _co_tracker(co_tracker), + _regions_done(0), _start_vtime_sec(0.0) + { + _bottom_card_num = + intptr_t(uintptr_t(G1CollectedHeap::heap()->reserved_region().start()) >> + CardTableModRefBS::card_shift); + } + + bool doHeapRegion(HeapRegion* hr) { + if (_co_tracker != NULL) + _co_tracker->update(); + + if (!_final && _regions_done == 0) + _start_vtime_sec = os::elapsedVTime(); + + if (hr->continuesHumongous()) return false; + + HeapWord* nextTop = hr->next_top_at_mark_start(); + HeapWord* start = hr->top_at_conc_mark_count(); + assert(hr->bottom() <= start && start <= hr->end() && + hr->bottom() <= nextTop && nextTop <= hr->end() && + start <= nextTop, + "Preconditions."); + // Otherwise, record the number of word's we'll examine. + size_t words_done = (nextTop - start); + // Find the first marked object at or after "start". + start = _bm->getNextMarkedWordAddress(start, nextTop); + size_t marked_bytes = 0; + + // Below, the term "card num" means the result of shifting an address + // by the card shift -- address 0 corresponds to card number 0. One + // must subtract the card num of the bottom of the heap to obtain a + // card table index. + // The first card num of the sequence of live cards currently being + // constructed. -1 ==> no sequence. + intptr_t start_card_num = -1; + // The last card num of the sequence of live cards currently being + // constructed. -1 ==> no sequence. + intptr_t last_card_num = -1; + + while (start < nextTop) { + if (_yield && _cm->do_yield_check()) { + // We yielded. It might be for a full collection, in which case + // all bets are off; terminate the traversal. + if (_cm->has_aborted()) { + _changed = false; + return true; + } else { + // Otherwise, it might be a collection pause, and the region + // we're looking at might be in the collection set. We'll + // abandon this region. + return false; + } + } + oop obj = oop(start); + int obj_sz = obj->size(); + // The card num of the start of the current object. + intptr_t obj_card_num = + intptr_t(uintptr_t(start) >> CardTableModRefBS::card_shift); + + HeapWord* obj_last = start + obj_sz - 1; + intptr_t obj_last_card_num = + intptr_t(uintptr_t(obj_last) >> CardTableModRefBS::card_shift); + + if (obj_card_num != last_card_num) { + if (start_card_num == -1) { + assert(last_card_num == -1, "Both or neither."); + start_card_num = obj_card_num; + } else { + assert(last_card_num != -1, "Both or neither."); + assert(obj_card_num >= last_card_num, "Inv"); + if ((obj_card_num - last_card_num) > 1) { + // Mark the last run, and start a new one. + mark_card_num_range(start_card_num, last_card_num); + start_card_num = obj_card_num; + } + } +#if CARD_BM_TEST_MODE + /* + gclog_or_tty->print_cr("Setting bits from %d/%d.", + obj_card_num - _bottom_card_num, + obj_last_card_num - _bottom_card_num); + */ + for (intptr_t j = obj_card_num; j <= obj_last_card_num; j++) { + _card_bm->par_at_put(j - _bottom_card_num, 1); + } +#endif + } + // In any case, we set the last card num. + last_card_num = obj_last_card_num; + + marked_bytes += obj_sz * HeapWordSize; + // Find the next marked object after this one. + start = _bm->getNextMarkedWordAddress(start + 1, nextTop); + _changed = true; + } + // Handle the last range, if any. + if (start_card_num != -1) + mark_card_num_range(start_card_num, last_card_num); + if (_final) { + // Mark the allocated-since-marking portion... + HeapWord* tp = hr->top(); + if (nextTop < tp) { + start_card_num = + intptr_t(uintptr_t(nextTop) >> CardTableModRefBS::card_shift); + last_card_num = + intptr_t(uintptr_t(tp) >> CardTableModRefBS::card_shift); + mark_card_num_range(start_card_num, last_card_num); + // This definitely means the region has live objects. + _region_bm->par_at_put(hr->hrs_index(), 1); + } + } + + hr->add_to_marked_bytes(marked_bytes); + // Update the live region bitmap. + if (marked_bytes > 0) { + _region_bm->par_at_put(hr->hrs_index(), 1); + } + hr->set_top_at_conc_mark_count(nextTop); + _tot_live += hr->next_live_bytes(); + _tot_used += hr->used(); + _words_done = words_done; + + if (!_final) { + ++_regions_done; + if (_regions_done % 10 == 0) { + double end_vtime_sec = os::elapsedVTime(); + double elapsed_vtime_sec = end_vtime_sec - _start_vtime_sec; + if (elapsed_vtime_sec > (10.0 / 1000.0)) { + jlong sleep_time_ms = + (jlong) (elapsed_vtime_sec * _cm->cleanup_sleep_factor() * 1000.0); +#if 0 + gclog_or_tty->print_cr("CL: elapsed %1.4lf ms, sleep %1.4lf ms, " + "overhead %1.4lf", + elapsed_vtime_sec * 1000.0, (double) sleep_time_ms, + _co_tracker->concOverhead(os::elapsedTime())); +#endif + os::sleep(Thread::current(), sleep_time_ms, false); + _start_vtime_sec = end_vtime_sec; + } + } + } + + return false; + } + + bool changed() { return _changed; } + void reset() { _changed = false; _words_done = 0; } + void no_yield() { _yield = false; } + size_t words_done() { return _words_done; } + size_t tot_live() { return _tot_live; } + size_t tot_used() { return _tot_used; } +}; + + +void ConcurrentMark::calcDesiredRegions() { + guarantee( _cleanup_co_tracker.enabled(), "invariant" ); + _cleanup_co_tracker.start(); + + _region_bm.clear(); + _card_bm.clear(); + CalcLiveObjectsClosure calccl(false /*final*/, + nextMarkBitMap(), this, + &_region_bm, &_card_bm, + &_cleanup_co_tracker); + G1CollectedHeap *g1h = G1CollectedHeap::heap(); + g1h->heap_region_iterate(&calccl); + + do { + calccl.reset(); + g1h->heap_region_iterate(&calccl); + } while (calccl.changed()); + + _cleanup_co_tracker.update(true); +} + +class G1ParFinalCountTask: public AbstractGangTask { +protected: + G1CollectedHeap* _g1h; + CMBitMap* _bm; + size_t _n_workers; + size_t *_live_bytes; + size_t *_used_bytes; + BitMap* _region_bm; + BitMap* _card_bm; +public: + G1ParFinalCountTask(G1CollectedHeap* g1h, CMBitMap* bm, + BitMap* region_bm, BitMap* card_bm) : + AbstractGangTask("G1 final counting"), _g1h(g1h), + _bm(bm), _region_bm(region_bm), _card_bm(card_bm) + { + if (ParallelGCThreads > 0) + _n_workers = _g1h->workers()->total_workers(); + else + _n_workers = 1; + _live_bytes = NEW_C_HEAP_ARRAY(size_t, _n_workers); + _used_bytes = NEW_C_HEAP_ARRAY(size_t, _n_workers); + } + + ~G1ParFinalCountTask() { + FREE_C_HEAP_ARRAY(size_t, _live_bytes); + FREE_C_HEAP_ARRAY(size_t, _used_bytes); + } + + void work(int i) { + CalcLiveObjectsClosure calccl(true /*final*/, + _bm, _g1h->concurrent_mark(), + _region_bm, _card_bm, + NULL /* CO tracker */); + calccl.no_yield(); + if (ParallelGCThreads > 0) { + _g1h->heap_region_par_iterate_chunked(&calccl, i, + HeapRegion::FinalCountClaimValue); + } else { + _g1h->heap_region_iterate(&calccl); + } + assert(calccl.complete(), "Shouldn't have yielded!"); + + guarantee( (size_t)i < _n_workers, "invariant" ); + _live_bytes[i] = calccl.tot_live(); + _used_bytes[i] = calccl.tot_used(); + } + size_t live_bytes() { + size_t live_bytes = 0; + for (size_t i = 0; i < _n_workers; ++i) + live_bytes += _live_bytes[i]; + return live_bytes; + } + size_t used_bytes() { + size_t used_bytes = 0; + for (size_t i = 0; i < _n_workers; ++i) + used_bytes += _used_bytes[i]; + return used_bytes; + } +}; + +class G1ParNoteEndTask; + +class G1NoteEndOfConcMarkClosure : public HeapRegionClosure { + G1CollectedHeap* _g1; + int _worker_num; + size_t _max_live_bytes; + size_t _regions_claimed; + size_t _freed_bytes; + size_t _cleared_h_regions; + size_t _freed_regions; + UncleanRegionList* _unclean_region_list; + double _claimed_region_time; + double _max_region_time; + +public: + G1NoteEndOfConcMarkClosure(G1CollectedHeap* g1, + UncleanRegionList* list, + int worker_num); + size_t freed_bytes() { return _freed_bytes; } + size_t cleared_h_regions() { return _cleared_h_regions; } + size_t freed_regions() { return _freed_regions; } + UncleanRegionList* unclean_region_list() { + return _unclean_region_list; + } + + bool doHeapRegion(HeapRegion *r); + + size_t max_live_bytes() { return _max_live_bytes; } + size_t regions_claimed() { return _regions_claimed; } + double claimed_region_time_sec() { return _claimed_region_time; } + double max_region_time_sec() { return _max_region_time; } +}; + +class G1ParNoteEndTask: public AbstractGangTask { + friend class G1NoteEndOfConcMarkClosure; +protected: + G1CollectedHeap* _g1h; + size_t _max_live_bytes; + size_t _freed_bytes; + ConcurrentMark::ParCleanupThreadState** _par_cleanup_thread_state; +public: + G1ParNoteEndTask(G1CollectedHeap* g1h, + ConcurrentMark::ParCleanupThreadState** + par_cleanup_thread_state) : + AbstractGangTask("G1 note end"), _g1h(g1h), + _max_live_bytes(0), _freed_bytes(0), + _par_cleanup_thread_state(par_cleanup_thread_state) + {} + + void work(int i) { + double start = os::elapsedTime(); + G1NoteEndOfConcMarkClosure g1_note_end(_g1h, + &_par_cleanup_thread_state[i]->list, + i); + if (ParallelGCThreads > 0) { + _g1h->heap_region_par_iterate_chunked(&g1_note_end, i, + HeapRegion::NoteEndClaimValue); + } else { + _g1h->heap_region_iterate(&g1_note_end); + } + assert(g1_note_end.complete(), "Shouldn't have yielded!"); + + // Now finish up freeing the current thread's regions. + _g1h->finish_free_region_work(g1_note_end.freed_bytes(), + g1_note_end.cleared_h_regions(), + 0, NULL); + { + MutexLockerEx x(ParGCRareEvent_lock, Mutex::_no_safepoint_check_flag); + _max_live_bytes += g1_note_end.max_live_bytes(); + _freed_bytes += g1_note_end.freed_bytes(); + } + double end = os::elapsedTime(); + if (G1PrintParCleanupStats) { + gclog_or_tty->print(" Worker thread %d [%8.3f..%8.3f = %8.3f ms] " + "claimed %d regions (tot = %8.3f ms, max = %8.3f ms).\n", + i, start, end, (end-start)*1000.0, + g1_note_end.regions_claimed(), + g1_note_end.claimed_region_time_sec()*1000.0, + g1_note_end.max_region_time_sec()*1000.0); + } + } + size_t max_live_bytes() { return _max_live_bytes; } + size_t freed_bytes() { return _freed_bytes; } +}; + +class G1ParScrubRemSetTask: public AbstractGangTask { +protected: + G1RemSet* _g1rs; + BitMap* _region_bm; + BitMap* _card_bm; +public: + G1ParScrubRemSetTask(G1CollectedHeap* g1h, + BitMap* region_bm, BitMap* card_bm) : + AbstractGangTask("G1 ScrubRS"), _g1rs(g1h->g1_rem_set()), + _region_bm(region_bm), _card_bm(card_bm) + {} + + void work(int i) { + if (ParallelGCThreads > 0) { + _g1rs->scrub_par(_region_bm, _card_bm, i, + HeapRegion::ScrubRemSetClaimValue); + } else { + _g1rs->scrub(_region_bm, _card_bm); + } + } + +}; + +G1NoteEndOfConcMarkClosure:: +G1NoteEndOfConcMarkClosure(G1CollectedHeap* g1, + UncleanRegionList* list, + int worker_num) + : _g1(g1), _worker_num(worker_num), + _max_live_bytes(0), _regions_claimed(0), + _freed_bytes(0), _cleared_h_regions(0), _freed_regions(0), + _claimed_region_time(0.0), _max_region_time(0.0), + _unclean_region_list(list) +{} + +bool G1NoteEndOfConcMarkClosure::doHeapRegion(HeapRegion *r) { + // We use a claim value of zero here because all regions + // were claimed with value 1 in the FinalCount task. + r->reset_gc_time_stamp(); + if (!r->continuesHumongous()) { + double start = os::elapsedTime(); + _regions_claimed++; + r->note_end_of_marking(); + _max_live_bytes += r->max_live_bytes(); + _g1->free_region_if_totally_empty_work(r, + _freed_bytes, + _cleared_h_regions, + _freed_regions, + _unclean_region_list, + true /*par*/); + double region_time = (os::elapsedTime() - start); + _claimed_region_time += region_time; + if (region_time > _max_region_time) _max_region_time = region_time; + } + return false; +} + +void ConcurrentMark::cleanup() { + // world is stopped at this checkpoint + assert(SafepointSynchronize::is_at_safepoint(), + "world should be stopped"); + G1CollectedHeap* g1h = G1CollectedHeap::heap(); + + // If a full collection has happened, we shouldn't do this. + if (has_aborted()) { + g1h->set_marking_complete(); // So bitmap clearing isn't confused + return; + } + + _cleanup_co_tracker.disable(); + + G1CollectorPolicy* g1p = G1CollectedHeap::heap()->g1_policy(); + g1p->record_concurrent_mark_cleanup_start(); + + double start = os::elapsedTime(); + GCOverheadReporter::recordSTWStart(start); + + // Do counting once more with the world stopped for good measure. + G1ParFinalCountTask g1_par_count_task(g1h, nextMarkBitMap(), + &_region_bm, &_card_bm); + if (ParallelGCThreads > 0) { + assert(g1h->check_heap_region_claim_values( + HeapRegion::InitialClaimValue), + "sanity check"); + + int n_workers = g1h->workers()->total_workers(); + g1h->set_par_threads(n_workers); + g1h->workers()->run_task(&g1_par_count_task); + g1h->set_par_threads(0); + + assert(g1h->check_heap_region_claim_values( + HeapRegion::FinalCountClaimValue), + "sanity check"); + } else { + g1_par_count_task.work(0); + } + + size_t known_garbage_bytes = + g1_par_count_task.used_bytes() - g1_par_count_task.live_bytes(); +#if 0 + gclog_or_tty->print_cr("used %1.2lf, live %1.2lf, garbage %1.2lf", + (double) g1_par_count_task.used_bytes() / (double) (1024 * 1024), + (double) g1_par_count_task.live_bytes() / (double) (1024 * 1024), + (double) known_garbage_bytes / (double) (1024 * 1024)); +#endif // 0 + g1p->set_known_garbage_bytes(known_garbage_bytes); + + size_t start_used_bytes = g1h->used(); + _at_least_one_mark_complete = true; + g1h->set_marking_complete(); + + double count_end = os::elapsedTime(); + double this_final_counting_time = (count_end - start); + if (G1PrintParCleanupStats) { + gclog_or_tty->print_cr("Cleanup:"); + gclog_or_tty->print_cr(" Finalize counting: %8.3f ms", + this_final_counting_time*1000.0); + } + _total_counting_time += this_final_counting_time; + + // Install newly created mark bitMap as "prev". + swapMarkBitMaps(); + + g1h->reset_gc_time_stamp(); + + // Note end of marking in all heap regions. + double note_end_start = os::elapsedTime(); + G1ParNoteEndTask g1_par_note_end_task(g1h, _par_cleanup_thread_state); + if (ParallelGCThreads > 0) { + int n_workers = g1h->workers()->total_workers(); + g1h->set_par_threads(n_workers); + g1h->workers()->run_task(&g1_par_note_end_task); + g1h->set_par_threads(0); + + assert(g1h->check_heap_region_claim_values(HeapRegion::NoteEndClaimValue), + "sanity check"); + } else { + g1_par_note_end_task.work(0); + } + g1h->set_unclean_regions_coming(true); + double note_end_end = os::elapsedTime(); + // Tell the mutators that there might be unclean regions coming... + if (G1PrintParCleanupStats) { + gclog_or_tty->print_cr(" note end of marking: %8.3f ms.", + (note_end_end - note_end_start)*1000.0); + } + + + // call below, since it affects the metric by which we sort the heap + // regions. + if (G1ScrubRemSets) { + double rs_scrub_start = os::elapsedTime(); + G1ParScrubRemSetTask g1_par_scrub_rs_task(g1h, &_region_bm, &_card_bm); + if (ParallelGCThreads > 0) { + int n_workers = g1h->workers()->total_workers(); + g1h->set_par_threads(n_workers); + g1h->workers()->run_task(&g1_par_scrub_rs_task); + g1h->set_par_threads(0); + + assert(g1h->check_heap_region_claim_values( + HeapRegion::ScrubRemSetClaimValue), + "sanity check"); + } else { + g1_par_scrub_rs_task.work(0); + } + + double rs_scrub_end = os::elapsedTime(); + double this_rs_scrub_time = (rs_scrub_end - rs_scrub_start); + _total_rs_scrub_time += this_rs_scrub_time; + } + + // this will also free any regions totally full of garbage objects, + // and sort the regions. + g1h->g1_policy()->record_concurrent_mark_cleanup_end( + g1_par_note_end_task.freed_bytes(), + g1_par_note_end_task.max_live_bytes()); + + // Statistics. + double end = os::elapsedTime(); + _cleanup_times.add((end - start) * 1000.0); + GCOverheadReporter::recordSTWEnd(end); + + // G1CollectedHeap::heap()->print(); + // gclog_or_tty->print_cr("HEAP GC TIME STAMP : %d", + // G1CollectedHeap::heap()->get_gc_time_stamp()); + + if (PrintGC || PrintGCDetails) { + g1h->print_size_transition(gclog_or_tty, + start_used_bytes, + g1h->used(), + g1h->capacity()); + } + + size_t cleaned_up_bytes = start_used_bytes - g1h->used(); + g1p->decrease_known_garbage_bytes(cleaned_up_bytes); + + // We need to make this be a "collection" so any collection pause that + // races with it goes around and waits for completeCleanup to finish. + g1h->increment_total_collections(); + +#ifndef PRODUCT + if (G1VerifyConcMark) { + G1CollectedHeap::heap()->prepare_for_verify(); + G1CollectedHeap::heap()->verify(true,false); + } +#endif +} + +void ConcurrentMark::completeCleanup() { + // A full collection intervened. + if (has_aborted()) return; + + int first = 0; + int last = (int)MAX2(ParallelGCThreads, (size_t)1); + for (int t = 0; t < last; t++) { + UncleanRegionList* list = &_par_cleanup_thread_state[t]->list; + assert(list->well_formed(), "Inv"); + HeapRegion* hd = list->hd(); + while (hd != NULL) { + // Now finish up the other stuff. + hd->rem_set()->clear(); + HeapRegion* next_hd = hd->next_from_unclean_list(); + (void)list->pop(); + guarantee(list->hd() == next_hd, "how not?"); + _g1h->put_region_on_unclean_list(hd); + if (!hd->isHumongous()) { + // Add this to the _free_regions count by 1. + _g1h->finish_free_region_work(0, 0, 1, NULL); + } + hd = list->hd(); + guarantee(hd == next_hd, "how not?"); + } + } +} + + +class G1CMIsAliveClosure: public BoolObjectClosure { + G1CollectedHeap* _g1; + public: + G1CMIsAliveClosure(G1CollectedHeap* g1) : + _g1(g1) + {} + + void do_object(oop obj) { + assert(false, "not to be invoked"); + } + bool do_object_b(oop obj) { + HeapWord* addr = (HeapWord*)obj; + return addr != NULL && + (!_g1->is_in_g1_reserved(addr) || !_g1->is_obj_ill(obj)); + } +}; + +class G1CMKeepAliveClosure: public OopClosure { + G1CollectedHeap* _g1; + ConcurrentMark* _cm; + CMBitMap* _bitMap; + public: + G1CMKeepAliveClosure(G1CollectedHeap* g1, ConcurrentMark* cm, + CMBitMap* bitMap) : + _g1(g1), _cm(cm), + _bitMap(bitMap) {} + + void do_oop(narrowOop* p) { + guarantee(false, "NYI"); + } + + void do_oop(oop* p) { + oop thisOop = *p; + HeapWord* addr = (HeapWord*)thisOop; + if (_g1->is_in_g1_reserved(addr) && _g1->is_obj_ill(thisOop)) { + _bitMap->mark(addr); + _cm->mark_stack_push(thisOop); + } + } +}; + +class G1CMDrainMarkingStackClosure: public VoidClosure { + CMMarkStack* _markStack; + CMBitMap* _bitMap; + G1CMKeepAliveClosure* _oopClosure; + public: + G1CMDrainMarkingStackClosure(CMBitMap* bitMap, CMMarkStack* markStack, + G1CMKeepAliveClosure* oopClosure) : + _bitMap(bitMap), + _markStack(markStack), + _oopClosure(oopClosure) + {} + + void do_void() { + _markStack->drain((OopClosure*)_oopClosure, _bitMap, false); + } +}; + +void ConcurrentMark::weakRefsWork(bool clear_all_soft_refs) { + ResourceMark rm; + HandleMark hm; + ReferencePolicy* soft_ref_policy; + + // Process weak references. + if (clear_all_soft_refs) { + soft_ref_policy = new AlwaysClearPolicy(); + } else { +#ifdef COMPILER2 + soft_ref_policy = new LRUMaxHeapPolicy(); +#else + soft_ref_policy = new LRUCurrentHeapPolicy(); +#endif + } + assert(_markStack.isEmpty(), "mark stack should be empty"); + + G1CollectedHeap* g1 = G1CollectedHeap::heap(); + G1CMIsAliveClosure g1IsAliveClosure(g1); + + G1CMKeepAliveClosure g1KeepAliveClosure(g1, this, nextMarkBitMap()); + G1CMDrainMarkingStackClosure + g1DrainMarkingStackClosure(nextMarkBitMap(), &_markStack, + &g1KeepAliveClosure); + + // XXXYYY Also: copy the parallel ref processing code from CMS. + ReferenceProcessor* rp = g1->ref_processor(); + rp->process_discovered_references(soft_ref_policy, + &g1IsAliveClosure, + &g1KeepAliveClosure, + &g1DrainMarkingStackClosure, + NULL); + assert(_markStack.overflow() || _markStack.isEmpty(), + "mark stack should be empty (unless it overflowed)"); + if (_markStack.overflow()) { + set_has_overflown(); + } + + rp->enqueue_discovered_references(); + rp->verify_no_references_recorded(); + assert(!rp->discovery_enabled(), "should have been disabled"); + + // Now clean up stale oops in SymbolTable and StringTable + SymbolTable::unlink(&g1IsAliveClosure); + StringTable::unlink(&g1IsAliveClosure); +} + +void ConcurrentMark::swapMarkBitMaps() { + CMBitMapRO* temp = _prevMarkBitMap; + _prevMarkBitMap = (CMBitMapRO*)_nextMarkBitMap; + _nextMarkBitMap = (CMBitMap*) temp; +} + +class CMRemarkTask: public AbstractGangTask { +private: + ConcurrentMark *_cm; + +public: + void work(int worker_i) { + // Since all available tasks are actually started, we should + // only proceed if we're supposed to be actived. + if ((size_t)worker_i < _cm->active_tasks()) { + CMTask* task = _cm->task(worker_i); + task->record_start_time(); + do { + task->do_marking_step(1000000000.0 /* something very large */); + } while (task->has_aborted() && !_cm->has_overflown()); + // If we overflow, then we do not want to restart. We instead + // want to abort remark and do concurrent marking again. + task->record_end_time(); + } + } + + CMRemarkTask(ConcurrentMark* cm) : + AbstractGangTask("Par Remark"), _cm(cm) { } +}; + +void ConcurrentMark::checkpointRootsFinalWork() { + ResourceMark rm; + HandleMark hm; + G1CollectedHeap* g1h = G1CollectedHeap::heap(); + + g1h->ensure_parsability(false); + + if (ParallelGCThreads > 0) { + g1h->change_strong_roots_parity(); + // this is remark, so we'll use up all available threads + int active_workers = ParallelGCThreads; + set_phase(active_workers, false); + + CMRemarkTask remarkTask(this); + // We will start all available threads, even if we decide that the + // active_workers will be fewer. The extra ones will just bail out + // immediately. + int n_workers = g1h->workers()->total_workers(); + g1h->set_par_threads(n_workers); + g1h->workers()->run_task(&remarkTask); + g1h->set_par_threads(0); + + SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set(); + guarantee( satb_mq_set.completed_buffers_num() == 0, "invariant" ); + } else { + g1h->change_strong_roots_parity(); + // this is remark, so we'll use up all available threads + int active_workers = 1; + set_phase(active_workers, false); + + CMRemarkTask remarkTask(this); + // We will start all available threads, even if we decide that the + // active_workers will be fewer. The extra ones will just bail out + // immediately. + remarkTask.work(0); + + SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set(); + guarantee( satb_mq_set.completed_buffers_num() == 0, "invariant" ); + } + + print_stats(); + + if (!restart_for_overflow()) + set_non_marking_state(); + +#if VERIFY_OBJS_PROCESSED + if (_scan_obj_cl.objs_processed != ThreadLocalObjQueue::objs_enqueued) { + gclog_or_tty->print_cr("Processed = %d, enqueued = %d.", + _scan_obj_cl.objs_processed, + ThreadLocalObjQueue::objs_enqueued); + guarantee(_scan_obj_cl.objs_processed == + ThreadLocalObjQueue::objs_enqueued, + "Different number of objs processed and enqueued."); + } +#endif +} + +class ReachablePrinterOopClosure: public OopClosure { +private: + G1CollectedHeap* _g1h; + CMBitMapRO* _bitmap; + outputStream* _out; + +public: + ReachablePrinterOopClosure(CMBitMapRO* bitmap, outputStream* out) : + _bitmap(bitmap), _g1h(G1CollectedHeap::heap()), _out(out) { } + + void do_oop(narrowOop* p) { + guarantee(false, "NYI"); + } + + void do_oop(oop* p) { + oop obj = *p; + const char* str = NULL; + const char* str2 = ""; + + if (!_g1h->is_in_g1_reserved(obj)) + str = "outside G1 reserved"; + else { + HeapRegion* hr = _g1h->heap_region_containing(obj); + guarantee( hr != NULL, "invariant" ); + if (hr->obj_allocated_since_prev_marking(obj)) { + str = "over TAMS"; + if (_bitmap->isMarked((HeapWord*) obj)) + str2 = " AND MARKED"; + } else if (_bitmap->isMarked((HeapWord*) obj)) + str = "marked"; + else + str = "#### NOT MARKED ####"; + } + + _out->print_cr(" "PTR_FORMAT" contains "PTR_FORMAT" %s%s", + p, (void*) obj, str, str2); + } +}; + +class ReachablePrinterClosure: public BitMapClosure { +private: + CMBitMapRO* _bitmap; + outputStream* _out; + +public: + ReachablePrinterClosure(CMBitMapRO* bitmap, outputStream* out) : + _bitmap(bitmap), _out(out) { } + + bool do_bit(size_t offset) { + HeapWord* addr = _bitmap->offsetToHeapWord(offset); + ReachablePrinterOopClosure oopCl(_bitmap, _out); + + _out->print_cr(" obj "PTR_FORMAT", offset %10d (marked)", addr, offset); + oop(addr)->oop_iterate(&oopCl); + _out->print_cr(""); + + return true; + } +}; + +class ObjInRegionReachablePrinterClosure : public ObjectClosure { +private: + CMBitMapRO* _bitmap; + outputStream* _out; + +public: + void do_object(oop o) { + ReachablePrinterOopClosure oopCl(_bitmap, _out); + + _out->print_cr(" obj "PTR_FORMAT" (over TAMS)", (void*) o); + o->oop_iterate(&oopCl); + _out->print_cr(""); + } + + ObjInRegionReachablePrinterClosure(CMBitMapRO* bitmap, outputStream* out) : + _bitmap(bitmap), _out(out) { } +}; + +class RegionReachablePrinterClosure : public HeapRegionClosure { +private: + CMBitMapRO* _bitmap; + outputStream* _out; + +public: + bool doHeapRegion(HeapRegion* hr) { + HeapWord* b = hr->bottom(); + HeapWord* e = hr->end(); + HeapWord* t = hr->top(); + HeapWord* p = hr->prev_top_at_mark_start(); + _out->print_cr("** ["PTR_FORMAT", "PTR_FORMAT"] top: "PTR_FORMAT" " + "PTAMS: "PTR_FORMAT, b, e, t, p); + _out->print_cr(""); + + ObjInRegionReachablePrinterClosure ocl(_bitmap, _out); + hr->object_iterate_mem_careful(MemRegion(p, t), &ocl); + + return false; + } + + RegionReachablePrinterClosure(CMBitMapRO* bitmap, + outputStream* out) : + _bitmap(bitmap), _out(out) { } +}; + +void ConcurrentMark::print_prev_bitmap_reachable() { + outputStream* out = gclog_or_tty; + +#if SEND_HEAP_DUMP_TO_FILE + guarantee(heap_dump_file == NULL, "Protocol"); + char fn_buf[100]; + sprintf(fn_buf, "/tmp/dump.txt.%d", os::current_process_id()); + heap_dump_file = fopen(fn_buf, "w"); + fileStream fstream(heap_dump_file); + out = &fstream; +#endif // SEND_HEAP_DUMP_TO_FILE + + RegionReachablePrinterClosure rcl(_prevMarkBitMap, out); + out->print_cr("--- ITERATING OVER REGIONS WITH PTAMS < TOP"); + _g1h->heap_region_iterate(&rcl); + out->print_cr(""); + + ReachablePrinterClosure cl(_prevMarkBitMap, out); + out->print_cr("--- REACHABLE OBJECTS ON THE BITMAP"); + _prevMarkBitMap->iterate(&cl); + out->print_cr(""); + +#if SEND_HEAP_DUMP_TO_FILE + fclose(heap_dump_file); + heap_dump_file = NULL; +#endif // SEND_HEAP_DUMP_TO_FILE +} + +// This note is for drainAllSATBBuffers and the code in between. +// In the future we could reuse a task to do this work during an +// evacuation pause (since now tasks are not active and can be claimed +// during an evacuation pause). This was a late change to the code and +// is currently not being taken advantage of. + +class CMGlobalObjectClosure : public ObjectClosure { +private: + ConcurrentMark* _cm; + +public: + void do_object(oop obj) { + _cm->deal_with_reference(obj); + } + + CMGlobalObjectClosure(ConcurrentMark* cm) : _cm(cm) { } +}; + +void ConcurrentMark::deal_with_reference(oop obj) { + if (verbose_high()) + gclog_or_tty->print_cr("[global] we're dealing with reference "PTR_FORMAT, + (void*) obj); + + + HeapWord* objAddr = (HeapWord*) obj; + if (_g1h->is_in_g1_reserved(objAddr)) { + tmp_guarantee_CM( obj != NULL, "is_in_g1_reserved should ensure this" ); + HeapRegion* hr = _g1h->heap_region_containing(obj); + if (_g1h->is_obj_ill(obj, hr)) { + if (verbose_high()) + gclog_or_tty->print_cr("[global] "PTR_FORMAT" is not considered " + "marked", (void*) obj); + + // we need to mark it first + if (_nextMarkBitMap->parMark(objAddr)) { + // No OrderAccess:store_load() is needed. It is implicit in the + // CAS done in parMark(objAddr) above + HeapWord* finger = _finger; + if (objAddr < finger) { + if (verbose_high()) + gclog_or_tty->print_cr("[global] below the global finger " + "("PTR_FORMAT"), pushing it", finger); + if (!mark_stack_push(obj)) { + if (verbose_low()) + gclog_or_tty->print_cr("[global] global stack overflow during " + "deal_with_reference"); + } + } + } + } + } +} + +void ConcurrentMark::drainAllSATBBuffers() { + CMGlobalObjectClosure oc(this); + SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set(); + satb_mq_set.set_closure(&oc); + + while (satb_mq_set.apply_closure_to_completed_buffer()) { + if (verbose_medium()) + gclog_or_tty->print_cr("[global] processed an SATB buffer"); + } + + // no need to check whether we should do this, as this is only + // called during an evacuation pause + satb_mq_set.iterate_closure_all_threads(); + + satb_mq_set.set_closure(NULL); + guarantee( satb_mq_set.completed_buffers_num() == 0, "invariant" ); +} + +void ConcurrentMark::markPrev(oop p) { + // Note we are overriding the read-only view of the prev map here, via + // the cast. + ((CMBitMap*)_prevMarkBitMap)->mark((HeapWord*)p); +} + +void ConcurrentMark::clear(oop p) { + assert(p != NULL && p->is_oop(), "expected an oop"); + HeapWord* addr = (HeapWord*)p; + assert(addr >= _nextMarkBitMap->startWord() || + addr < _nextMarkBitMap->endWord(), "in a region"); + + _nextMarkBitMap->clear(addr); +} + +void ConcurrentMark::clearRangeBothMaps(MemRegion mr) { + // Note we are overriding the read-only view of the prev map here, via + // the cast. + ((CMBitMap*)_prevMarkBitMap)->clearRange(mr); + _nextMarkBitMap->clearRange(mr); +} + +HeapRegion* +ConcurrentMark::claim_region(int task_num) { + // "checkpoint" the finger + HeapWord* finger = _finger; + + // _heap_end will not change underneath our feet; it only changes at + // yield points. + while (finger < _heap_end) { + tmp_guarantee_CM( _g1h->is_in_g1_reserved(finger), "invariant" ); + + // is the gap between reading the finger and doing the CAS too long? + + HeapRegion* curr_region = _g1h->heap_region_containing(finger); + HeapWord* bottom = curr_region->bottom(); + HeapWord* end = curr_region->end(); + HeapWord* limit = curr_region->next_top_at_mark_start(); + + if (verbose_low()) + gclog_or_tty->print_cr("[%d] curr_region = "PTR_FORMAT" " + "["PTR_FORMAT", "PTR_FORMAT"), " + "limit = "PTR_FORMAT, + task_num, curr_region, bottom, end, limit); + + HeapWord* res = + (HeapWord*) Atomic::cmpxchg_ptr(end, &_finger, finger); + if (res == finger) { + // we succeeded + + // notice that _finger == end cannot be guaranteed here since, + // someone else might have moved the finger even further + guarantee( _finger >= end, "the finger should have moved forward" ); + + if (verbose_low()) + gclog_or_tty->print_cr("[%d] we were successful with region = " + PTR_FORMAT, task_num, curr_region); + + if (limit > bottom) { + if (verbose_low()) + gclog_or_tty->print_cr("[%d] region "PTR_FORMAT" is not empty, " + "returning it ", task_num, curr_region); + return curr_region; + } else { + tmp_guarantee_CM( limit == bottom, + "the region limit should be at bottom" ); + if (verbose_low()) + gclog_or_tty->print_cr("[%d] region "PTR_FORMAT" is empty, " + "returning NULL", task_num, curr_region); + // we return NULL and the caller should try calling + // claim_region() again. + return NULL; + } + } else { + guarantee( _finger > finger, "the finger should have moved forward" ); + if (verbose_low()) + gclog_or_tty->print_cr("[%d] somebody else moved the finger, " + "global finger = "PTR_FORMAT", " + "our finger = "PTR_FORMAT, + task_num, _finger, finger); + + // read it again + finger = _finger; + } + } + + return NULL; +} + +void ConcurrentMark::oops_do(OopClosure* cl) { + if (_markStack.size() > 0 && verbose_low()) + gclog_or_tty->print_cr("[global] scanning the global marking stack, " + "size = %d", _markStack.size()); + // we first iterate over the contents of the mark stack... + _markStack.oops_do(cl); + + for (int i = 0; i < (int)_max_task_num; ++i) { + OopTaskQueue* queue = _task_queues->queue((int)i); + + if (queue->size() > 0 && verbose_low()) + gclog_or_tty->print_cr("[global] scanning task queue of task %d, " + "size = %d", i, queue->size()); + + // ...then over the contents of the all the task queues. + queue->oops_do(cl); + } + + // finally, invalidate any entries that in the region stack that + // point into the collection set + if (_regionStack.invalidate_entries_into_cset()) { + // otherwise, any gray objects copied during the evacuation pause + // might not be visited. + guarantee( _should_gray_objects, "invariant" ); + } +} + +void ConcurrentMark::clear_marking_state() { + _markStack.setEmpty(); + _markStack.clear_overflow(); + _regionStack.setEmpty(); + _regionStack.clear_overflow(); + clear_has_overflown(); + _finger = _heap_start; + + for (int i = 0; i < (int)_max_task_num; ++i) { + OopTaskQueue* queue = _task_queues->queue(i); + queue->set_empty(); + } +} + +void ConcurrentMark::print_stats() { + if (verbose_stats()) { + gclog_or_tty->print_cr("---------------------------------------------------------------------"); + for (size_t i = 0; i < _active_tasks; ++i) { + _tasks[i]->print_stats(); + gclog_or_tty->print_cr("---------------------------------------------------------------------"); + } + } +} + +class CSMarkOopClosure: public OopClosure { + friend class CSMarkBitMapClosure; + + G1CollectedHeap* _g1h; + CMBitMap* _bm; + ConcurrentMark* _cm; + oop* _ms; + jint* _array_ind_stack; + int _ms_size; + int _ms_ind; + int _array_increment; + + bool push(oop obj, int arr_ind = 0) { + if (_ms_ind == _ms_size) { + gclog_or_tty->print_cr("Mark stack is full."); + return false; + } + _ms[_ms_ind] = obj; + if (obj->is_objArray()) _array_ind_stack[_ms_ind] = arr_ind; + _ms_ind++; + return true; + } + + oop pop() { + if (_ms_ind == 0) return NULL; + else { + _ms_ind--; + return _ms[_ms_ind]; + } + } + + bool drain() { + while (_ms_ind > 0) { + oop obj = pop(); + assert(obj != NULL, "Since index was non-zero."); + if (obj->is_objArray()) { + jint arr_ind = _array_ind_stack[_ms_ind]; + objArrayOop aobj = objArrayOop(obj); + jint len = aobj->length(); + jint next_arr_ind = arr_ind + _array_increment; + if (next_arr_ind < len) { + push(obj, next_arr_ind); + } + // Now process this portion of this one. + int lim = MIN2(next_arr_ind, len); + assert(!UseCompressedOops, "This needs to be fixed"); + for (int j = arr_ind; j < lim; j++) { + do_oop(aobj->obj_at_addr(j)); + } + + } else { + obj->oop_iterate(this); + } + if (abort()) return false; + } + return true; + } + +public: + CSMarkOopClosure(ConcurrentMark* cm, int ms_size) : + _g1h(G1CollectedHeap::heap()), + _cm(cm), + _bm(cm->nextMarkBitMap()), + _ms_size(ms_size), _ms_ind(0), + _ms(NEW_C_HEAP_ARRAY(oop, ms_size)), + _array_ind_stack(NEW_C_HEAP_ARRAY(jint, ms_size)), + _array_increment(MAX2(ms_size/8, 16)) + {} + + ~CSMarkOopClosure() { + FREE_C_HEAP_ARRAY(oop, _ms); + FREE_C_HEAP_ARRAY(jint, _array_ind_stack); + } + + void do_oop(narrowOop* p) { + guarantee(false, "NYI"); + } + + void do_oop(oop* p) { + oop obj = *p; + if (obj == NULL) return; + if (obj->is_forwarded()) { + // If the object has already been forwarded, we have to make sure + // that it's marked. So follow the forwarding pointer. Note that + // this does the right thing for self-forwarding pointers in the + // evacuation failure case. + obj = obj->forwardee(); + } + HeapRegion* hr = _g1h->heap_region_containing(obj); + if (hr != NULL) { + if (hr->in_collection_set()) { + if (_g1h->is_obj_ill(obj)) { + _bm->mark((HeapWord*)obj); + if (!push(obj)) { + gclog_or_tty->print_cr("Setting abort in CSMarkOopClosure because push failed."); + set_abort(); + } + } + } else { + // Outside the collection set; we need to gray it + _cm->deal_with_reference(obj); + } + } + } +}; + +class CSMarkBitMapClosure: public BitMapClosure { + G1CollectedHeap* _g1h; + CMBitMap* _bitMap; + ConcurrentMark* _cm; + CSMarkOopClosure _oop_cl; +public: + CSMarkBitMapClosure(ConcurrentMark* cm, int ms_size) : + _g1h(G1CollectedHeap::heap()), + _bitMap(cm->nextMarkBitMap()), + _oop_cl(cm, ms_size) + {} + + ~CSMarkBitMapClosure() {} + + bool do_bit(size_t offset) { + // convert offset into a HeapWord* + HeapWord* addr = _bitMap->offsetToHeapWord(offset); + assert(_bitMap->endWord() && addr < _bitMap->endWord(), + "address out of range"); + assert(_bitMap->isMarked(addr), "tautology"); + oop obj = oop(addr); + if (!obj->is_forwarded()) { + if (!_oop_cl.push(obj)) return false; + if (!_oop_cl.drain()) return false; + } + // Otherwise... + return true; + } +}; + + +class CompleteMarkingInCSHRClosure: public HeapRegionClosure { + CMBitMap* _bm; + CSMarkBitMapClosure _bit_cl; + enum SomePrivateConstants { + MSSize = 1000 + }; + bool _completed; +public: + CompleteMarkingInCSHRClosure(ConcurrentMark* cm) : + _bm(cm->nextMarkBitMap()), + _bit_cl(cm, MSSize), + _completed(true) + {} + + ~CompleteMarkingInCSHRClosure() {} + + bool doHeapRegion(HeapRegion* r) { + if (!r->evacuation_failed()) { + MemRegion mr = MemRegion(r->bottom(), r->next_top_at_mark_start()); + if (!mr.is_empty()) { + if (!_bm->iterate(&_bit_cl, mr)) { + _completed = false; + return true; + } + } + } + return false; + } + + bool completed() { return _completed; } +}; + +class ClearMarksInHRClosure: public HeapRegionClosure { + CMBitMap* _bm; +public: + ClearMarksInHRClosure(CMBitMap* bm): _bm(bm) { } + + bool doHeapRegion(HeapRegion* r) { + if (!r->used_region().is_empty() && !r->evacuation_failed()) { + MemRegion usedMR = r->used_region(); + _bm->clearRange(r->used_region()); + } + return false; + } +}; + +void ConcurrentMark::complete_marking_in_collection_set() { + G1CollectedHeap* g1h = G1CollectedHeap::heap(); + + if (!g1h->mark_in_progress()) { + g1h->g1_policy()->record_mark_closure_time(0.0); + return; + } + + int i = 1; + double start = os::elapsedTime(); + while (true) { + i++; + CompleteMarkingInCSHRClosure cmplt(this); + g1h->collection_set_iterate(&cmplt); + if (cmplt.completed()) break; + } + double end_time = os::elapsedTime(); + double elapsed_time_ms = (end_time - start) * 1000.0; + g1h->g1_policy()->record_mark_closure_time(elapsed_time_ms); + if (PrintGCDetails) { + gclog_or_tty->print_cr("Mark closure took %5.2f ms.", elapsed_time_ms); + } + + ClearMarksInHRClosure clr(nextMarkBitMap()); + g1h->collection_set_iterate(&clr); +} + +// The next two methods deal with the following optimisation. Some +// objects are gray by being marked and located above the finger. If +// they are copied, during an evacuation pause, below the finger then +// the need to be pushed on the stack. The observation is that, if +// there are no regions in the collection set located above the +// finger, then the above cannot happen, hence we do not need to +// explicitly gray any objects when copying them to below the +// finger. The global stack will be scanned to ensure that, if it +// points to objects being copied, it will update their +// location. There is a tricky situation with the gray objects in +// region stack that are being coped, however. See the comment in +// newCSet(). + +void ConcurrentMark::newCSet() { + if (!concurrent_marking_in_progress()) + // nothing to do if marking is not in progress + return; + + // find what the lowest finger is among the global and local fingers + _min_finger = _finger; + for (int i = 0; i < (int)_max_task_num; ++i) { + CMTask* task = _tasks[i]; + HeapWord* task_finger = task->finger(); + if (task_finger != NULL && task_finger < _min_finger) + _min_finger = task_finger; + } + + _should_gray_objects = false; + + // This fixes a very subtle and fustrating bug. It might be the case + // that, during en evacuation pause, heap regions that contain + // objects that are gray (by being in regions contained in the + // region stack) are included in the collection set. Since such gray + // objects will be moved, and because it's not easy to redirect + // region stack entries to point to a new location (because objects + // in one region might be scattered to multiple regions after they + // are copied), one option is to ensure that all marked objects + // copied during a pause are pushed on the stack. Notice, however, + // that this problem can only happen when the region stack is not + // empty during an evacuation pause. So, we make the fix a bit less + // conservative and ensure that regions are pushed on the stack, + // irrespective whether all collection set regions are below the + // finger, if the region stack is not empty. This is expected to be + // a rare case, so I don't think it's necessary to be smarted about it. + if (!region_stack_empty()) + _should_gray_objects = true; +} + +void ConcurrentMark::registerCSetRegion(HeapRegion* hr) { + if (!concurrent_marking_in_progress()) + return; + + HeapWord* region_end = hr->end(); + if (region_end > _min_finger) + _should_gray_objects = true; +} + +void ConcurrentMark::disable_co_trackers() { + if (has_aborted()) { + if (_cleanup_co_tracker.enabled()) + _cleanup_co_tracker.disable(); + for (int i = 0; i < (int)_max_task_num; ++i) { + CMTask* task = _tasks[i]; + if (task->co_tracker_enabled()) + task->disable_co_tracker(); + } + } else { + guarantee( !_cleanup_co_tracker.enabled(), "invariant" ); + for (int i = 0; i < (int)_max_task_num; ++i) { + CMTask* task = _tasks[i]; + guarantee( !task->co_tracker_enabled(), "invariant" ); + } + } +} + +// abandon current marking iteration due to a Full GC +void ConcurrentMark::abort() { + // If we're not marking, nothing to do. + if (!G1ConcMark) return; + + // Clear all marks to force marking thread to do nothing + _nextMarkBitMap->clearAll(); + // Empty mark stack + clear_marking_state(); + for (int i = 0; i < (int)_max_task_num; ++i) + _tasks[i]->clear_region_fields(); + _has_aborted = true; + + SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set(); + satb_mq_set.abandon_partial_marking(); + satb_mq_set.set_active_all_threads(false); +} + +static void print_ms_time_info(const char* prefix, const char* name, + NumberSeq& ns) { + gclog_or_tty->print_cr("%s%5d %12s: total time = %8.2f s (avg = %8.2f ms).", + prefix, ns.num(), name, ns.sum()/1000.0, ns.avg()); + if (ns.num() > 0) { + gclog_or_tty->print_cr("%s [std. dev = %8.2f ms, max = %8.2f ms]", + prefix, ns.sd(), ns.maximum()); + } +} + +void ConcurrentMark::print_summary_info() { + gclog_or_tty->print_cr(" Concurrent marking:"); + print_ms_time_info(" ", "init marks", _init_times); + print_ms_time_info(" ", "remarks", _remark_times); + { + print_ms_time_info(" ", "final marks", _remark_mark_times); + print_ms_time_info(" ", "weak refs", _remark_weak_ref_times); + + } + print_ms_time_info(" ", "cleanups", _cleanup_times); + gclog_or_tty->print_cr(" Final counting total time = %8.2f s (avg = %8.2f ms).", + _total_counting_time, + (_cleanup_times.num() > 0 ? _total_counting_time * 1000.0 / + (double)_cleanup_times.num() + : 0.0)); + if (G1ScrubRemSets) { + gclog_or_tty->print_cr(" RS scrub total time = %8.2f s (avg = %8.2f ms).", + _total_rs_scrub_time, + (_cleanup_times.num() > 0 ? _total_rs_scrub_time * 1000.0 / + (double)_cleanup_times.num() + : 0.0)); + } + gclog_or_tty->print_cr(" Total stop_world time = %8.2f s.", + (_init_times.sum() + _remark_times.sum() + + _cleanup_times.sum())/1000.0); + gclog_or_tty->print_cr(" Total concurrent time = %8.2f s " + "(%8.2f s marking, %8.2f s counting).", + cmThread()->vtime_accum(), + cmThread()->vtime_mark_accum(), + cmThread()->vtime_count_accum()); +} + +// Closures +// XXX: there seems to be a lot of code duplication here; +// should refactor and consolidate the shared code. + +// This closure is used to mark refs into the CMS generation in +// the CMS bit map. Called at the first checkpoint. + +// We take a break if someone is trying to stop the world. +bool ConcurrentMark::do_yield_check(int worker_i) { + if (should_yield()) { + if (worker_i == 0) + _g1h->g1_policy()->record_concurrent_pause(); + cmThread()->yield(); + if (worker_i == 0) + _g1h->g1_policy()->record_concurrent_pause_end(); + return true; + } else { + return false; + } +} + +bool ConcurrentMark::should_yield() { + return cmThread()->should_yield(); +} + +bool ConcurrentMark::containing_card_is_marked(void* p) { + size_t offset = pointer_delta(p, _g1h->reserved_region().start(), 1); + return _card_bm.at(offset >> CardTableModRefBS::card_shift); +} + +bool ConcurrentMark::containing_cards_are_marked(void* start, + void* last) { + return + containing_card_is_marked(start) && + containing_card_is_marked(last); +} + +#ifndef PRODUCT +// for debugging purposes +void ConcurrentMark::print_finger() { + gclog_or_tty->print_cr("heap ["PTR_FORMAT", "PTR_FORMAT"), global finger = "PTR_FORMAT, + _heap_start, _heap_end, _finger); + for (int i = 0; i < (int) _max_task_num; ++i) { + gclog_or_tty->print(" %d: "PTR_FORMAT, i, _tasks[i]->finger()); + } + gclog_or_tty->print_cr(""); +} +#endif + +// Closure for iteration over bitmaps +class CMBitMapClosure : public BitMapClosure { +private: + // the bitmap that is being iterated over + CMBitMap* _nextMarkBitMap; + ConcurrentMark* _cm; + CMTask* _task; + // true if we're scanning a heap region claimed by the task (so that + // we move the finger along), false if we're not, i.e. currently when + // scanning a heap region popped from the region stack (so that we + // do not move the task finger along; it'd be a mistake if we did so). + bool _scanning_heap_region; + +public: + CMBitMapClosure(CMTask *task, + ConcurrentMark* cm, + CMBitMap* nextMarkBitMap) + : _task(task), _cm(cm), _nextMarkBitMap(nextMarkBitMap) { } + + void set_scanning_heap_region(bool scanning_heap_region) { + _scanning_heap_region = scanning_heap_region; + } + + bool do_bit(size_t offset) { + HeapWord* addr = _nextMarkBitMap->offsetToHeapWord(offset); + tmp_guarantee_CM( _nextMarkBitMap->isMarked(addr), "invariant" ); + tmp_guarantee_CM( addr < _cm->finger(), "invariant" ); + + if (_scanning_heap_region) { + statsOnly( _task->increase_objs_found_on_bitmap() ); + tmp_guarantee_CM( addr >= _task->finger(), "invariant" ); + // We move that task's local finger along. + _task->move_finger_to(addr); + } else { + // We move the task's region finger along. + _task->move_region_finger_to(addr); + } + + _task->scan_object(oop(addr)); + // we only partially drain the local queue and global stack + _task->drain_local_queue(true); + _task->drain_global_stack(true); + + // if the has_aborted flag has been raised, we need to bail out of + // the iteration + return !_task->has_aborted(); + } +}; + +// Closure for iterating over objects, currently only used for +// processing SATB buffers. +class CMObjectClosure : public ObjectClosure { +private: + CMTask* _task; + +public: + void do_object(oop obj) { + _task->deal_with_reference(obj); + } + + CMObjectClosure(CMTask* task) : _task(task) { } +}; + +// Closure for iterating over object fields +class CMOopClosure : public OopClosure { +private: + G1CollectedHeap* _g1h; + ConcurrentMark* _cm; + CMTask* _task; + +public: + void do_oop(narrowOop* p) { + guarantee(false, "NYI"); + } + + void do_oop(oop* p) { + tmp_guarantee_CM( _g1h->is_in_g1_reserved((HeapWord*) p), "invariant" ); + + oop obj = *p; + if (_cm->verbose_high()) + gclog_or_tty->print_cr("[%d] we're looking at location " + "*"PTR_FORMAT" = "PTR_FORMAT, + _task->task_id(), p, (void*) obj); + _task->deal_with_reference(obj); + } + + CMOopClosure(G1CollectedHeap* g1h, + ConcurrentMark* cm, + CMTask* task) + : _g1h(g1h), _cm(cm), _task(task) { } +}; + +void CMTask::setup_for_region(HeapRegion* hr) { + tmp_guarantee_CM( hr != NULL && !hr->continuesHumongous(), + "claim_region() should have filtered out continues humongous regions" ); + + if (_cm->verbose_low()) + gclog_or_tty->print_cr("[%d] setting up for region "PTR_FORMAT, + _task_id, hr); + + _curr_region = hr; + _finger = hr->bottom(); + update_region_limit(); +} + +void CMTask::update_region_limit() { + HeapRegion* hr = _curr_region; + HeapWord* bottom = hr->bottom(); + HeapWord* limit = hr->next_top_at_mark_start(); + + if (limit == bottom) { + if (_cm->verbose_low()) + gclog_or_tty->print_cr("[%d] found an empty region " + "["PTR_FORMAT", "PTR_FORMAT")", + _task_id, bottom, limit); + // The region was collected underneath our feet. + // We set the finger to bottom to ensure that the bitmap + // iteration that will follow this will not do anything. + // (this is not a condition that holds when we set the region up, + // as the region is not supposed to be empty in the first place) + _finger = bottom; + } else if (limit >= _region_limit) { + tmp_guarantee_CM( limit >= _finger, "peace of mind" ); + } else { + tmp_guarantee_CM( limit < _region_limit, "only way to get here" ); + // This can happen under some pretty unusual circumstances. An + // evacuation pause empties the region underneath our feet (NTAMS + // at bottom). We then do some allocation in the region (NTAMS + // stays at bottom), followed by the region being used as a GC + // alloc region (NTAMS will move to top() and the objects + // originally below it will be grayed). All objects now marked in + // the region are explicitly grayed, if below the global finger, + // and we do not need in fact to scan anything else. So, we simply + // set _finger to be limit to ensure that the bitmap iteration + // doesn't do anything. + _finger = limit; + } + + _region_limit = limit; +} + +void CMTask::giveup_current_region() { + tmp_guarantee_CM( _curr_region != NULL, "invariant" ); + if (_cm->verbose_low()) + gclog_or_tty->print_cr("[%d] giving up region "PTR_FORMAT, + _task_id, _curr_region); + clear_region_fields(); +} + +void CMTask::clear_region_fields() { + // Values for these three fields that indicate that we're not + // holding on to a region. + _curr_region = NULL; + _finger = NULL; + _region_limit = NULL; + + _region_finger = NULL; +} + +void CMTask::reset(CMBitMap* nextMarkBitMap) { + guarantee( nextMarkBitMap != NULL, "invariant" ); + + if (_cm->verbose_low()) + gclog_or_tty->print_cr("[%d] resetting", _task_id); + + _nextMarkBitMap = nextMarkBitMap; + clear_region_fields(); + + _calls = 0; + _elapsed_time_ms = 0.0; + _termination_time_ms = 0.0; + _termination_start_time_ms = 0.0; + +#if _MARKING_STATS_ + _local_pushes = 0; + _local_pops = 0; + _local_max_size = 0; + _objs_scanned = 0; + _global_pushes = 0; + _global_pops = 0; + _global_max_size = 0; + _global_transfers_to = 0; + _global_transfers_from = 0; + _region_stack_pops = 0; + _regions_claimed = 0; + _objs_found_on_bitmap = 0; + _satb_buffers_processed = 0; + _steal_attempts = 0; + _steals = 0; + _aborted = 0; + _aborted_overflow = 0; + _aborted_cm_aborted = 0; + _aborted_yield = 0; + _aborted_timed_out = 0; + _aborted_satb = 0; + _aborted_termination = 0; +#endif // _MARKING_STATS_ +} + +bool CMTask::should_exit_termination() { + regular_clock_call(); + // This is called when we are in the termination protocol. We should + // quit if, for some reason, this task wants to abort or the global + // stack is not empty (this means that we can get work from it). + return !_cm->mark_stack_empty() || has_aborted(); +} + +// This determines whether the method below will check both the local +// and global fingers when determining whether to push on the stack a +// gray object (value 1) or whether it will only check the global one +// (value 0). The tradeoffs are that the former will be a bit more +// accurate and possibly push less on the stack, but it might also be +// a little bit slower. + +#define _CHECK_BOTH_FINGERS_ 1 + +void CMTask::deal_with_reference(oop obj) { + if (_cm->verbose_high()) + gclog_or_tty->print_cr("[%d] we're dealing with reference = "PTR_FORMAT, + _task_id, (void*) obj); + + ++_refs_reached; + + HeapWord* objAddr = (HeapWord*) obj; + if (_g1h->is_in_g1_reserved(objAddr)) { + tmp_guarantee_CM( obj != NULL, "is_in_g1_reserved should ensure this" ); + HeapRegion* hr = _g1h->heap_region_containing(obj); + if (_g1h->is_obj_ill(obj, hr)) { + if (_cm->verbose_high()) + gclog_or_tty->print_cr("[%d] "PTR_FORMAT" is not considered marked", + _task_id, (void*) obj); + + // we need to mark it first + if (_nextMarkBitMap->parMark(objAddr)) { + // No OrderAccess:store_load() is needed. It is implicit in the + // CAS done in parMark(objAddr) above + HeapWord* global_finger = _cm->finger(); + +#if _CHECK_BOTH_FINGERS_ + // we will check both the local and global fingers + + if (_finger != NULL && objAddr < _finger) { + if (_cm->verbose_high()) + gclog_or_tty->print_cr("[%d] below the local finger ("PTR_FORMAT"), " + "pushing it", _task_id, _finger); + push(obj); + } else if (_curr_region != NULL && objAddr < _region_limit) { + // do nothing + } else if (objAddr < global_finger) { + // Notice that the global finger might be moving forward + // concurrently. This is not a problem. In the worst case, we + // mark the object while it is above the global finger and, by + // the time we read the global finger, it has moved forward + // passed this object. In this case, the object will probably + // be visited when a task is scanning the region and will also + // be pushed on the stack. So, some duplicate work, but no + // correctness problems. + + if (_cm->verbose_high()) + gclog_or_tty->print_cr("[%d] below the global finger " + "("PTR_FORMAT"), pushing it", + _task_id, global_finger); + push(obj); + } else { + // do nothing + } +#else // _CHECK_BOTH_FINGERS_ + // we will only check the global finger + + if (objAddr < global_finger) { + // see long comment above + + if (_cm->verbose_high()) + gclog_or_tty->print_cr("[%d] below the global finger " + "("PTR_FORMAT"), pushing it", + _task_id, global_finger); + push(obj); + } +#endif // _CHECK_BOTH_FINGERS_ + } + } + } +} + +void CMTask::push(oop obj) { + HeapWord* objAddr = (HeapWord*) obj; + tmp_guarantee_CM( _g1h->is_in_g1_reserved(objAddr), "invariant" ); + tmp_guarantee_CM( !_g1h->is_obj_ill(obj), "invariant" ); + tmp_guarantee_CM( _nextMarkBitMap->isMarked(objAddr), "invariant" ); + + if (_cm->verbose_high()) + gclog_or_tty->print_cr("[%d] pushing "PTR_FORMAT, _task_id, (void*) obj); + + if (!_task_queue->push(obj)) { + // The local task queue looks full. We need to push some entries + // to the global stack. + + if (_cm->verbose_medium()) + gclog_or_tty->print_cr("[%d] task queue overflow, " + "moving entries to the global stack", + _task_id); + move_entries_to_global_stack(); + + // this should succeed since, even if we overflow the global + // stack, we should have definitely removed some entries from the + // local queue. So, there must be space on it. + bool success = _task_queue->push(obj); + tmp_guarantee_CM( success, "invariant" ); + } + + statsOnly( int tmp_size = _task_queue->size(); + if (tmp_size > _local_max_size) + _local_max_size = tmp_size; + ++_local_pushes ); +} + +void CMTask::reached_limit() { + tmp_guarantee_CM( _words_scanned >= _words_scanned_limit || + _refs_reached >= _refs_reached_limit , + "shouldn't have been called otherwise" ); + regular_clock_call(); +} + +void CMTask::regular_clock_call() { + if (has_aborted()) + return; + + // First, we need to recalculate the words scanned and refs reached + // limits for the next clock call. + recalculate_limits(); + + // During the regular clock call we do the following + + // (1) If an overflow has been flagged, then we abort. + if (_cm->has_overflown()) { + set_has_aborted(); + return; + } + + // If we are not concurrent (i.e. we're doing remark) we don't need + // to check anything else. The other steps are only needed during + // the concurrent marking phase. + if (!concurrent()) + return; + + // (2) If marking has been aborted for Full GC, then we also abort. + if (_cm->has_aborted()) { + set_has_aborted(); + statsOnly( ++_aborted_cm_aborted ); + return; + } + + double curr_time_ms = os::elapsedVTime() * 1000.0; + + // (3) If marking stats are enabled, then we update the step history. +#if _MARKING_STATS_ + if (_words_scanned >= _words_scanned_limit) + ++_clock_due_to_scanning; + if (_refs_reached >= _refs_reached_limit) + ++_clock_due_to_marking; + + double last_interval_ms = curr_time_ms - _interval_start_time_ms; + _interval_start_time_ms = curr_time_ms; + _all_clock_intervals_ms.add(last_interval_ms); + + if (_cm->verbose_medium()) { + gclog_or_tty->print_cr("[%d] regular clock, interval = %1.2lfms, " + "scanned = %d%s, refs reached = %d%s", + _task_id, last_interval_ms, + _words_scanned, + (_words_scanned >= _words_scanned_limit) ? " (*)" : "", + _refs_reached, + (_refs_reached >= _refs_reached_limit) ? " (*)" : ""); + } +#endif // _MARKING_STATS_ + + // (4) We check whether we should yield. If we have to, then we abort. + if (_cm->should_yield()) { + // We should yield. To do this we abort the task. The caller is + // responsible for yielding. + set_has_aborted(); + statsOnly( ++_aborted_yield ); + return; + } + + // (5) We check whether we've reached our time quota. If we have, + // then we abort. + double elapsed_time_ms = curr_time_ms - _start_time_ms; + if (elapsed_time_ms > _time_target_ms) { + set_has_aborted(); + _has_aborted_timed_out = true; + statsOnly( ++_aborted_timed_out ); + return; + } + + // (6) Finally, we check whether there are enough completed STAB + // buffers available for processing. If there are, we abort. + SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set(); + if (!_draining_satb_buffers && satb_mq_set.process_completed_buffers()) { + if (_cm->verbose_low()) + gclog_or_tty->print_cr("[%d] aborting to deal with pending SATB buffers", + _task_id); + // we do need to process SATB buffers, we'll abort and restart + // the marking task to do so + set_has_aborted(); + statsOnly( ++_aborted_satb ); + return; + } +} + +void CMTask::recalculate_limits() { + _real_words_scanned_limit = _words_scanned + words_scanned_period; + _words_scanned_limit = _real_words_scanned_limit; + + _real_refs_reached_limit = _refs_reached + refs_reached_period; + _refs_reached_limit = _real_refs_reached_limit; +} + +void CMTask::decrease_limits() { + // This is called when we believe that we're going to do an infrequent + // operation which will increase the per byte scanned cost (i.e. move + // entries to/from the global stack). It basically tries to decrease the + // scanning limit so that the clock is called earlier. + + if (_cm->verbose_medium()) + gclog_or_tty->print_cr("[%d] decreasing limits", _task_id); + + _words_scanned_limit = _real_words_scanned_limit - + 3 * words_scanned_period / 4; + _refs_reached_limit = _real_refs_reached_limit - + 3 * refs_reached_period / 4; +} + +void CMTask::move_entries_to_global_stack() { + // local array where we'll store the entries that will be popped + // from the local queue + oop buffer[global_stack_transfer_size]; + + int n = 0; + oop obj; + while (n < global_stack_transfer_size && _task_queue->pop_local(obj)) { + buffer[n] = obj; + ++n; + } + + if (n > 0) { + // we popped at least one entry from the local queue + + statsOnly( ++_global_transfers_to; _local_pops += n ); + + if (!_cm->mark_stack_push(buffer, n)) { + if (_cm->verbose_low()) + gclog_or_tty->print_cr("[%d] aborting due to global stack overflow", _task_id); + set_has_aborted(); + } else { + // the transfer was successful + + if (_cm->verbose_medium()) + gclog_or_tty->print_cr("[%d] pushed %d entries to the global stack", + _task_id, n); + statsOnly( int tmp_size = _cm->mark_stack_size(); + if (tmp_size > _global_max_size) + _global_max_size = tmp_size; + _global_pushes += n ); + } + } + + // this operation was quite expensive, so decrease the limits + decrease_limits(); +} + +void CMTask::get_entries_from_global_stack() { + // local array where we'll store the entries that will be popped + // from the global stack. + oop buffer[global_stack_transfer_size]; + int n; + _cm->mark_stack_pop(buffer, global_stack_transfer_size, &n); + tmp_guarantee_CM( n <= global_stack_transfer_size, + "we should not pop more than the given limit" ); + if (n > 0) { + // yes, we did actually pop at least one entry + + statsOnly( ++_global_transfers_from; _global_pops += n ); + if (_cm->verbose_medium()) + gclog_or_tty->print_cr("[%d] popped %d entries from the global stack", + _task_id, n); + for (int i = 0; i < n; ++i) { + bool success = _task_queue->push(buffer[i]); + // We only call this when the local queue is empty or under a + // given target limit. So, we do not expect this push to fail. + tmp_guarantee_CM( success, "invariant" ); + } + + statsOnly( int tmp_size = _task_queue->size(); + if (tmp_size > _local_max_size) + _local_max_size = tmp_size; + _local_pushes += n ); + } + + // this operation was quite expensive, so decrease the limits + decrease_limits(); +} + +void CMTask::drain_local_queue(bool partially) { + if (has_aborted()) + return; + + // Decide what the target size is, depending whether we're going to + // drain it partially (so that other tasks can steal if they run out + // of things to do) or totally (at the very end). + size_t target_size; + if (partially) + target_size = MIN2((size_t)_task_queue->max_elems()/3, GCDrainStackTargetSize); + else + target_size = 0; + + if (_task_queue->size() > target_size) { + if (_cm->verbose_high()) + gclog_or_tty->print_cr("[%d] draining local queue, target size = %d", + _task_id, target_size); + + oop obj; + bool ret = _task_queue->pop_local(obj); + while (ret) { + statsOnly( ++_local_pops ); + + if (_cm->verbose_high()) + gclog_or_tty->print_cr("[%d] popped "PTR_FORMAT, _task_id, + (void*) obj); + + tmp_guarantee_CM( _g1h->is_in_g1_reserved((HeapWord*) obj), + "invariant" ); + + scan_object(obj); + + if (_task_queue->size() <= target_size || has_aborted()) + ret = false; + else + ret = _task_queue->pop_local(obj); + } + + if (_cm->verbose_high()) + gclog_or_tty->print_cr("[%d] drained local queue, size = %d", + _task_id, _task_queue->size()); + } +} + +void CMTask::drain_global_stack(bool partially) { + if (has_aborted()) + return; + + // We have a policy to drain the local queue before we attempt to + // drain the global stack. + tmp_guarantee_CM( partially || _task_queue->size() == 0, "invariant" ); + + // Decide what the target size is, depending whether we're going to + // drain it partially (so that other tasks can steal if they run out + // of things to do) or totally (at the very end). Notice that, + // because we move entries from the global stack in chunks or + // because another task might be doing the same, we might in fact + // drop below the target. But, this is not a problem. + size_t target_size; + if (partially) + target_size = _cm->partial_mark_stack_size_target(); + else + target_size = 0; + + if (_cm->mark_stack_size() > target_size) { + if (_cm->verbose_low()) + gclog_or_tty->print_cr("[%d] draining global_stack, target size %d", + _task_id, target_size); + + while (!has_aborted() && _cm->mark_stack_size() > target_size) { + get_entries_from_global_stack(); + drain_local_queue(partially); + } + + if (_cm->verbose_low()) + gclog_or_tty->print_cr("[%d] drained global stack, size = %d", + _task_id, _cm->mark_stack_size()); + } +} + +// SATB Queue has several assumptions on whether to call the par or +// non-par versions of the methods. this is why some of the code is +// replicated. We should really get rid of the single-threaded version +// of the code to simplify things. +void CMTask::drain_satb_buffers() { + if (has_aborted()) + return; + + // We set this so that the regular clock knows that we're in the + // middle of draining buffers and doesn't set the abort flag when it + // notices that SATB buffers are available for draining. It'd be + // very counter productive if it did that. :-) + _draining_satb_buffers = true; + + CMObjectClosure oc(this); + SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set(); + if (ParallelGCThreads > 0) + satb_mq_set.set_par_closure(_task_id, &oc); + else + satb_mq_set.set_closure(&oc); + + // This keeps claiming and applying the closure to completed buffers + // until we run out of buffers or we need to abort. + if (ParallelGCThreads > 0) { + while (!has_aborted() && + satb_mq_set.par_apply_closure_to_completed_buffer(_task_id)) { + if (_cm->verbose_medium()) + gclog_or_tty->print_cr("[%d] processed an SATB buffer", _task_id); + statsOnly( ++_satb_buffers_processed ); + regular_clock_call(); + } + } else { + while (!has_aborted() && + satb_mq_set.apply_closure_to_completed_buffer()) { + if (_cm->verbose_medium()) + gclog_or_tty->print_cr("[%d] processed an SATB buffer", _task_id); + statsOnly( ++_satb_buffers_processed ); + regular_clock_call(); + } + } + + if (!concurrent() && !has_aborted()) { + // We should only do this during remark. + if (ParallelGCThreads > 0) + satb_mq_set.par_iterate_closure_all_threads(_task_id); + else + satb_mq_set.iterate_closure_all_threads(); + } + + _draining_satb_buffers = false; + + tmp_guarantee_CM( has_aborted() || + concurrent() || + satb_mq_set.completed_buffers_num() == 0, "invariant" ); + + if (ParallelGCThreads > 0) + satb_mq_set.set_par_closure(_task_id, NULL); + else + satb_mq_set.set_closure(NULL); + + // again, this was a potentially expensive operation, decrease the + // limits to get the regular clock call early + decrease_limits(); +} + +void CMTask::drain_region_stack(BitMapClosure* bc) { + if (has_aborted()) + return; + + tmp_guarantee_CM( _region_finger == NULL, + "it should be NULL when we're not scanning a region" ); + + if (!_cm->region_stack_empty()) { + if (_cm->verbose_low()) + gclog_or_tty->print_cr("[%d] draining region stack, size = %d", + _task_id, _cm->region_stack_size()); + + MemRegion mr = _cm->region_stack_pop(); + // it returns MemRegion() if the pop fails + statsOnly(if (mr.start() != NULL) ++_region_stack_pops ); + + while (mr.start() != NULL) { + if (_cm->verbose_medium()) + gclog_or_tty->print_cr("[%d] we are scanning region " + "["PTR_FORMAT", "PTR_FORMAT")", + _task_id, mr.start(), mr.end()); + tmp_guarantee_CM( mr.end() <= _cm->finger(), + "otherwise the region shouldn't be on the stack" ); + assert(!mr.is_empty(), "Only non-empty regions live on the region stack"); + if (_nextMarkBitMap->iterate(bc, mr)) { + tmp_guarantee_CM( !has_aborted(), + "cannot abort the task without aborting the bitmap iteration" ); + + // We finished iterating over the region without aborting. + regular_clock_call(); + if (has_aborted()) + mr = MemRegion(); + else { + mr = _cm->region_stack_pop(); + // it returns MemRegion() if the pop fails + statsOnly(if (mr.start() != NULL) ++_region_stack_pops ); + } + } else { + guarantee( has_aborted(), "currently the only way to do so" ); + + // The only way to abort the bitmap iteration is to return + // false from the do_bit() method. However, inside the + // do_bit() method we move the _region_finger to point to the + // object currently being looked at. So, if we bail out, we + // have definitely set _region_finger to something non-null. + guarantee( _region_finger != NULL, "invariant" ); + + // The iteration was actually aborted. So now _region_finger + // points to the address of the object we last scanned. If we + // leave it there, when we restart this task, we will rescan + // the object. It is easy to avoid this. We move the finger by + // enough to point to the next possible object header (the + // bitmap knows by how much we need to move it as it knows its + // granularity). + MemRegion newRegion = + MemRegion(_nextMarkBitMap->nextWord(_region_finger), mr.end()); + + if (!newRegion.is_empty()) { + if (_cm->verbose_low()) { + gclog_or_tty->print_cr("[%d] pushing unscanned region" + "[" PTR_FORMAT "," PTR_FORMAT ") on region stack", + _task_id, + newRegion.start(), newRegion.end()); + } + // Now push the part of the region we didn't scan on the + // region stack to make sure a task scans it later. + _cm->region_stack_push(newRegion); + } + // break from while + mr = MemRegion(); + } + _region_finger = NULL; + } + + // We only push regions on the region stack during evacuation + // pauses. So if we come out the above iteration because we region + // stack is empty, it will remain empty until the next yield + // point. So, the guarantee below is safe. + guarantee( has_aborted() || _cm->region_stack_empty(), + "only way to exit the loop" ); + + if (_cm->verbose_low()) + gclog_or_tty->print_cr("[%d] drained region stack, size = %d", + _task_id, _cm->region_stack_size()); + } +} + +void CMTask::print_stats() { + gclog_or_tty->print_cr("Marking Stats, task = %d, calls = %d", + _task_id, _calls); + gclog_or_tty->print_cr(" Elapsed time = %1.2lfms, Termination time = %1.2lfms", + _elapsed_time_ms, _termination_time_ms); + gclog_or_tty->print_cr(" Step Times (cum): num = %d, avg = %1.2lfms, sd = %1.2lfms", + _step_times_ms.num(), _step_times_ms.avg(), + _step_times_ms.sd()); + gclog_or_tty->print_cr(" max = %1.2lfms, total = %1.2lfms", + _step_times_ms.maximum(), _step_times_ms.sum()); + +#if _MARKING_STATS_ + gclog_or_tty->print_cr(" Clock Intervals (cum): num = %d, avg = %1.2lfms, sd = %1.2lfms", + _all_clock_intervals_ms.num(), _all_clock_intervals_ms.avg(), + _all_clock_intervals_ms.sd()); + gclog_or_tty->print_cr(" max = %1.2lfms, total = %1.2lfms", + _all_clock_intervals_ms.maximum(), + _all_clock_intervals_ms.sum()); + gclog_or_tty->print_cr(" Clock Causes (cum): scanning = %d, marking = %d", + _clock_due_to_scanning, _clock_due_to_marking); + gclog_or_tty->print_cr(" Objects: scanned = %d, found on the bitmap = %d", + _objs_scanned, _objs_found_on_bitmap); + gclog_or_tty->print_cr(" Local Queue: pushes = %d, pops = %d, max size = %d", + _local_pushes, _local_pops, _local_max_size); + gclog_or_tty->print_cr(" Global Stack: pushes = %d, pops = %d, max size = %d", + _global_pushes, _global_pops, _global_max_size); + gclog_or_tty->print_cr(" transfers to = %d, transfers from = %d", + _global_transfers_to,_global_transfers_from); + gclog_or_tty->print_cr(" Regions: claimed = %d, Region Stack: pops = %d", + _regions_claimed, _region_stack_pops); + gclog_or_tty->print_cr(" SATB buffers: processed = %d", _satb_buffers_processed); + gclog_or_tty->print_cr(" Steals: attempts = %d, successes = %d", + _steal_attempts, _steals); + gclog_or_tty->print_cr(" Aborted: %d, due to", _aborted); + gclog_or_tty->print_cr(" overflow: %d, global abort: %d, yield: %d", + _aborted_overflow, _aborted_cm_aborted, _aborted_yield); + gclog_or_tty->print_cr(" time out: %d, SATB: %d, termination: %d", + _aborted_timed_out, _aborted_satb, _aborted_termination); +#endif // _MARKING_STATS_ +} + +/***************************************************************************** + + The do_marking_step(time_target_ms) method is the building block + of the parallel marking framework. It can be called in parallel + with other invocations of do_marking_step() on different tasks + (but only one per task, obviously) and concurrently with the + mutator threads, or during remark, hence it eliminates the need + for two versions of the code. When called during remark, it will + pick up from where the task left off during the concurrent marking + phase. Interestingly, tasks are also claimable during evacuation + pauses too, since do_marking_step() ensures that it aborts before + it needs to yield. + + The data structures that is uses to do marking work are the + following: + + (1) Marking Bitmap. If there are gray objects that appear only + on the bitmap (this happens either when dealing with an overflow + or when the initial marking phase has simply marked the roots + and didn't push them on the stack), then tasks claim heap + regions whose bitmap they then scan to find gray objects. A + global finger indicates where the end of the last claimed region + is. A local finger indicates how far into the region a task has + scanned. The two fingers are used to determine how to gray an + object (i.e. whether simply marking it is OK, as it will be + visited by a task in the future, or whether it needs to be also + pushed on a stack). + + (2) Local Queue. The local queue of the task which is accessed + reasonably efficiently by the task. Other tasks can steal from + it when they run out of work. Throughout the marking phase, a + task attempts to keep its local queue short but not totally + empty, so that entries are available for stealing by other + tasks. Only when there is no more work, a task will totally + drain its local queue. + + (3) Global Mark Stack. This handles local queue overflow. During + marking only sets of entries are moved between it and the local + queues, as access to it requires a mutex and more fine-grain + interaction with it which might cause contention. If it + overflows, then the marking phase should restart and iterate + over the bitmap to identify gray objects. Throughout the marking + phase, tasks attempt to keep the global mark stack at a small + length but not totally empty, so that entries are available for + popping by other tasks. Only when there is no more work, tasks + will totally drain the global mark stack. + + (4) Global Region Stack. Entries on it correspond to areas of + the bitmap that need to be scanned since they contain gray + objects. Pushes on the region stack only happen during + evacuation pauses and typically correspond to areas covered by + GC LABS. If it overflows, then the marking phase should restart + and iterate over the bitmap to identify gray objects. Tasks will + try to totally drain the region stack as soon as possible. + + (5) SATB Buffer Queue. This is where completed SATB buffers are + made available. Buffers are regularly removed from this queue + and scanned for roots, so that the queue doesn't get too + long. During remark, all completed buffers are processed, as + well as the filled in parts of any uncompleted buffers. + + The do_marking_step() method tries to abort when the time target + has been reached. There are a few other cases when the + do_marking_step() method also aborts: + + (1) When the marking phase has been aborted (after a Full GC). + + (2) When a global overflow (either on the global stack or the + region stack) has been triggered. Before the task aborts, it + will actually sync up with the other tasks to ensure that all + the marking data structures (local queues, stacks, fingers etc.) + are re-initialised so that when do_marking_step() completes, + the marking phase can immediately restart. + + (3) When enough completed SATB buffers are available. The + do_marking_step() method only tries to drain SATB buffers right + at the beginning. So, if enough buffers are available, the + marking step aborts and the SATB buffers are processed at + the beginning of the next invocation. + + (4) To yield. when we have to yield then we abort and yield + right at the end of do_marking_step(). This saves us from a lot + of hassle as, by yielding we might allow a Full GC. If this + happens then objects will be compacted underneath our feet, the + heap might shrink, etc. We save checking for this by just + aborting and doing the yield right at the end. + + From the above it follows that the do_marking_step() method should + be called in a loop (or, otherwise, regularly) until it completes. + + If a marking step completes without its has_aborted() flag being + true, it means it has completed the current marking phase (and + also all other marking tasks have done so and have all synced up). + + A method called regular_clock_call() is invoked "regularly" (in + sub ms intervals) throughout marking. It is this clock method that + checks all the abort conditions which were mentioned above and + decides when the task should abort. A work-based scheme is used to + trigger this clock method: when the number of object words the + marking phase has scanned or the number of references the marking + phase has visited reach a given limit. Additional invocations to + the method clock have been planted in a few other strategic places + too. The initial reason for the clock method was to avoid calling + vtime too regularly, as it is quite expensive. So, once it was in + place, it was natural to piggy-back all the other conditions on it + too and not constantly check them throughout the code. + + *****************************************************************************/ + +void CMTask::do_marking_step(double time_target_ms) { + guarantee( time_target_ms >= 1.0, "minimum granularity is 1ms" ); + guarantee( concurrent() == _cm->concurrent(), "they should be the same" ); + + guarantee( concurrent() || _cm->region_stack_empty(), + "the region stack should have been cleared before remark" ); + guarantee( _region_finger == NULL, + "this should be non-null only when a region is being scanned" ); + + G1CollectorPolicy* g1_policy = _g1h->g1_policy(); + guarantee( _task_queues != NULL, "invariant" ); + guarantee( _task_queue != NULL, "invariant" ); + guarantee( _task_queues->queue(_task_id) == _task_queue, "invariant" ); + + guarantee( !_claimed, + "only one thread should claim this task at any one time" ); + + // OK, this doesn't safeguard again all possible scenarios, as it is + // possible for two threads to set the _claimed flag at the same + // time. But it is only for debugging purposes anyway and it will + // catch most problems. + _claimed = true; + + _start_time_ms = os::elapsedVTime() * 1000.0; + statsOnly( _interval_start_time_ms = _start_time_ms ); + + double diff_prediction_ms = + g1_policy->get_new_prediction(&_marking_step_diffs_ms); + _time_target_ms = time_target_ms - diff_prediction_ms; + + // set up the variables that are used in the work-based scheme to + // call the regular clock method + _words_scanned = 0; + _refs_reached = 0; + recalculate_limits(); + + // clear all flags + clear_has_aborted(); + _has_aborted_timed_out = false; + _draining_satb_buffers = false; + + ++_calls; + + if (_cm->verbose_low()) + gclog_or_tty->print_cr("[%d] >>>>>>>>>> START, call = %d, " + "target = %1.2lfms >>>>>>>>>>", + _task_id, _calls, _time_target_ms); + + // Set up the bitmap and oop closures. Anything that uses them is + // eventually called from this method, so it is OK to allocate these + // statically. + CMBitMapClosure bitmap_closure(this, _cm, _nextMarkBitMap); + CMOopClosure oop_closure(_g1h, _cm, this); + set_oop_closure(&oop_closure); + + if (_cm->has_overflown()) { + // This can happen if the region stack or the mark stack overflows + // during a GC pause and this task, after a yield point, + // restarts. We have to abort as we need to get into the overflow + // protocol which happens right at the end of this task. + set_has_aborted(); + } + + // First drain any available SATB buffers. After this, we will not + // look at SATB buffers before the next invocation of this method. + // If enough completed SATB buffers are queued up, the regular clock + // will abort this task so that it restarts. + drain_satb_buffers(); + // ...then partially drain the local queue and the global stack + drain_local_queue(true); + drain_global_stack(true); + + // Then totally drain the region stack. We will not look at + // it again before the next invocation of this method. Entries on + // the region stack are only added during evacuation pauses, for + // which we have to yield. When we do, we abort the task anyway so + // it will look at the region stack again when it restarts. + bitmap_closure.set_scanning_heap_region(false); + drain_region_stack(&bitmap_closure); + // ...then partially drain the local queue and the global stack + drain_local_queue(true); + drain_global_stack(true); + + do { + if (!has_aborted() && _curr_region != NULL) { + // This means that we're already holding on to a region. + tmp_guarantee_CM( _finger != NULL, + "if region is not NULL, then the finger " + "should not be NULL either" ); + + // We might have restarted this task after an evacuation pause + // which might have evacuated the region we're holding on to + // underneath our feet. Let's read its limit again to make sure + // that we do not iterate over a region of the heap that + // contains garbage (update_region_limit() will also move + // _finger to the start of the region if it is found empty). + update_region_limit(); + // We will start from _finger not from the start of the region, + // as we might be restarting this task after aborting half-way + // through scanning this region. In this case, _finger points to + // the address where we last found a marked object. If this is a + // fresh region, _finger points to start(). + MemRegion mr = MemRegion(_finger, _region_limit); + + if (_cm->verbose_low()) + gclog_or_tty->print_cr("[%d] we're scanning part " + "["PTR_FORMAT", "PTR_FORMAT") " + "of region "PTR_FORMAT, + _task_id, _finger, _region_limit, _curr_region); + + // Let's iterate over the bitmap of the part of the + // region that is left. + bitmap_closure.set_scanning_heap_region(true); + if (mr.is_empty() || + _nextMarkBitMap->iterate(&bitmap_closure, mr)) { + // We successfully completed iterating over the region. Now, + // let's give up the region. + giveup_current_region(); + regular_clock_call(); + } else { + guarantee( has_aborted(), "currently the only way to do so" ); + // The only way to abort the bitmap iteration is to return + // false from the do_bit() method. However, inside the + // do_bit() method we move the _finger to point to the + // object currently being looked at. So, if we bail out, we + // have definitely set _finger to something non-null. + guarantee( _finger != NULL, "invariant" ); + + // Region iteration was actually aborted. So now _finger + // points to the address of the object we last scanned. If we + // leave it there, when we restart this task, we will rescan + // the object. It is easy to avoid this. We move the finger by + // enough to point to the next possible object header (the + // bitmap knows by how much we need to move it as it knows its + // granularity). + move_finger_to(_nextMarkBitMap->nextWord(_finger)); + } + } + // At this point we have either completed iterating over the + // region we were holding on to, or we have aborted. + + // We then partially drain the local queue and the global stack. + // (Do we really need this?) + drain_local_queue(true); + drain_global_stack(true); + + // Read the note on the claim_region() method on why it might + // return NULL with potentially more regions available for + // claiming and why we have to check out_of_regions() to determine + // whether we're done or not. + while (!has_aborted() && _curr_region == NULL && !_cm->out_of_regions()) { + // We are going to try to claim a new region. We should have + // given up on the previous one. + tmp_guarantee_CM( _curr_region == NULL && + _finger == NULL && + _region_limit == NULL, "invariant" ); + if (_cm->verbose_low()) + gclog_or_tty->print_cr("[%d] trying to claim a new region", _task_id); + HeapRegion* claimed_region = _cm->claim_region(_task_id); + if (claimed_region != NULL) { + // Yes, we managed to claim one + statsOnly( ++_regions_claimed ); + + if (_cm->verbose_low()) + gclog_or_tty->print_cr("[%d] we successfully claimed " + "region "PTR_FORMAT, + _task_id, claimed_region); + + setup_for_region(claimed_region); + tmp_guarantee_CM( _curr_region == claimed_region, "invariant" ); + } + // It is important to call the regular clock here. It might take + // a while to claim a region if, for example, we hit a large + // block of empty regions. So we need to call the regular clock + // method once round the loop to make sure it's called + // frequently enough. + regular_clock_call(); + } + + if (!has_aborted() && _curr_region == NULL) { + tmp_guarantee_CM( _cm->out_of_regions(), + "at this point we should be out of regions" ); + } + } while ( _curr_region != NULL && !has_aborted()); + + if (!has_aborted()) { + // We cannot check whether the global stack is empty, since other + // tasks might be pushing objects to it concurrently. We also cannot + // check if the region stack is empty because if a thread is aborting + // it can push a partially done region back. + tmp_guarantee_CM( _cm->out_of_regions(), + "at this point we should be out of regions" ); + + if (_cm->verbose_low()) + gclog_or_tty->print_cr("[%d] all regions claimed", _task_id); + + // Try to reduce the number of available SATB buffers so that + // remark has less work to do. + drain_satb_buffers(); + } + + // Since we've done everything else, we can now totally drain the + // local queue and global stack. + drain_local_queue(false); + drain_global_stack(false); + + // Attempt at work stealing from other task's queues. + if (!has_aborted()) { + // We have not aborted. This means that we have finished all that + // we could. Let's try to do some stealing... + + // We cannot check whether the global stack is empty, since other + // tasks might be pushing objects to it concurrently. We also cannot + // check if the region stack is empty because if a thread is aborting + // it can push a partially done region back. + guarantee( _cm->out_of_regions() && + _task_queue->size() == 0, "only way to reach here" ); + + if (_cm->verbose_low()) + gclog_or_tty->print_cr("[%d] starting to steal", _task_id); + + while (!has_aborted()) { + oop obj; + statsOnly( ++_steal_attempts ); + + if (_cm->try_stealing(_task_id, &_hash_seed, obj)) { + if (_cm->verbose_medium()) + gclog_or_tty->print_cr("[%d] stolen "PTR_FORMAT" successfully", + _task_id, (void*) obj); + + statsOnly( ++_steals ); + + tmp_guarantee_CM( _nextMarkBitMap->isMarked((HeapWord*) obj), + "any stolen object should be marked" ); + scan_object(obj); + + // And since we're towards the end, let's totally drain the + // local queue and global stack. + drain_local_queue(false); + drain_global_stack(false); + } else { + break; + } + } + } + + // We still haven't aborted. Now, let's try to get into the + // termination protocol. + if (!has_aborted()) { + // We cannot check whether the global stack is empty, since other + // tasks might be concurrently pushing objects on it. We also cannot + // check if the region stack is empty because if a thread is aborting + // it can push a partially done region back. + guarantee( _cm->out_of_regions() && + _task_queue->size() == 0, "only way to reach here" ); + + if (_cm->verbose_low()) + gclog_or_tty->print_cr("[%d] starting termination protocol", _task_id); + + _termination_start_time_ms = os::elapsedVTime() * 1000.0; + // The CMTask class also extends the TerminatorTerminator class, + // hence its should_exit_termination() method will also decide + // whether to exit the termination protocol or not. + bool finished = _cm->terminator()->offer_termination(this); + double termination_end_time_ms = os::elapsedVTime() * 1000.0; + _termination_time_ms += + termination_end_time_ms - _termination_start_time_ms; + + if (finished) { + // We're all done. + + if (_task_id == 0) { + // let's allow task 0 to do this + if (concurrent()) { + guarantee( _cm->concurrent_marking_in_progress(), "invariant" ); + // we need to set this to false before the next + // safepoint. This way we ensure that the marking phase + // doesn't observe any more heap expansions. + _cm->clear_concurrent_marking_in_progress(); + } + } + + // We can now guarantee that the global stack is empty, since + // all other tasks have finished. + guarantee( _cm->out_of_regions() && + _cm->region_stack_empty() && + _cm->mark_stack_empty() && + _task_queue->size() == 0 && + !_cm->has_overflown() && + !_cm->mark_stack_overflow() && + !_cm->region_stack_overflow(), + "only way to reach here" ); + + if (_cm->verbose_low()) + gclog_or_tty->print_cr("[%d] all tasks terminated", _task_id); + } else { + // Apparently there's more work to do. Let's abort this task. It + // will restart it and we can hopefully find more things to do. + + if (_cm->verbose_low()) + gclog_or_tty->print_cr("[%d] apparently there is more work to do", _task_id); + + set_has_aborted(); + statsOnly( ++_aborted_termination ); + } + } + + // Mainly for debugging purposes to make sure that a pointer to the + // closure which was statically allocated in this frame doesn't + // escape it by accident. + set_oop_closure(NULL); + double end_time_ms = os::elapsedVTime() * 1000.0; + double elapsed_time_ms = end_time_ms - _start_time_ms; + // Update the step history. + _step_times_ms.add(elapsed_time_ms); + + if (has_aborted()) { + // The task was aborted for some reason. + + statsOnly( ++_aborted ); + + if (_has_aborted_timed_out) { + double diff_ms = elapsed_time_ms - _time_target_ms; + // Keep statistics of how well we did with respect to hitting + // our target only if we actually timed out (if we aborted for + // other reasons, then the results might get skewed). + _marking_step_diffs_ms.add(diff_ms); + } + + if (_cm->has_overflown()) { + // This is the interesting one. We aborted because a global + // overflow was raised. This means we have to restart the + // marking phase and start iterating over regions. However, in + // order to do this we have to make sure that all tasks stop + // what they are doing and re-initialise in a safe manner. We + // will achieve this with the use of two barrier sync points. + + if (_cm->verbose_low()) + gclog_or_tty->print_cr("[%d] detected overflow", _task_id); + + _cm->enter_first_sync_barrier(_task_id); + // When we exit this sync barrier we know that all tasks have + // stopped doing marking work. So, it's now safe to + // re-initialise our data structures. At the end of this method, + // task 0 will clear the global data structures. + + statsOnly( ++_aborted_overflow ); + + // We clear the local state of this task... + clear_region_fields(); + + // ...and enter the second barrier. + _cm->enter_second_sync_barrier(_task_id); + // At this point everything has bee re-initialised and we're + // ready to restart. + } + + if (_cm->verbose_low()) { + gclog_or_tty->print_cr("[%d] <<<<<<<<<< ABORTING, target = %1.2lfms, " + "elapsed = %1.2lfms <<<<<<<<<<", + _task_id, _time_target_ms, elapsed_time_ms); + if (_cm->has_aborted()) + gclog_or_tty->print_cr("[%d] ========== MARKING ABORTED ==========", + _task_id); + } + } else { + if (_cm->verbose_low()) + gclog_or_tty->print_cr("[%d] <<<<<<<<<< FINISHED, target = %1.2lfms, " + "elapsed = %1.2lfms <<<<<<<<<<", + _task_id, _time_target_ms, elapsed_time_ms); + } + + _claimed = false; +} + +CMTask::CMTask(int task_id, + ConcurrentMark* cm, + CMTaskQueue* task_queue, + CMTaskQueueSet* task_queues) + : _g1h(G1CollectedHeap::heap()), + _co_tracker(G1CMGroup), + _task_id(task_id), _cm(cm), + _claimed(false), + _nextMarkBitMap(NULL), _hash_seed(17), + _task_queue(task_queue), + _task_queues(task_queues), + _oop_closure(NULL) { + guarantee( task_queue != NULL, "invariant" ); + guarantee( task_queues != NULL, "invariant" ); + + statsOnly( _clock_due_to_scanning = 0; + _clock_due_to_marking = 0 ); + + _marking_step_diffs_ms.add(0.5); +} diff --git a/hotspot/src/share/vm/gc_implementation/g1/concurrentMark.hpp b/hotspot/src/share/vm/gc_implementation/g1/concurrentMark.hpp new file mode 100644 index 00000000000..a572f74f3a7 --- /dev/null +++ b/hotspot/src/share/vm/gc_implementation/g1/concurrentMark.hpp @@ -0,0 +1,1049 @@ +/* + * Copyright 2001-2007 Sun Microsystems, Inc. All Rights Reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara, + * CA 95054 USA or visit www.sun.com if you need additional information or + * have any questions. + * + */ + +class G1CollectedHeap; +class CMTask; +typedef GenericTaskQueue CMTaskQueue; +typedef GenericTaskQueueSet CMTaskQueueSet; + +// A generic CM bit map. This is essentially a wrapper around the BitMap +// class, with one bit per (1<<_shifter) HeapWords. + +class CMBitMapRO { + protected: + HeapWord* _bmStartWord; // base address of range covered by map + size_t _bmWordSize; // map size (in #HeapWords covered) + const int _shifter; // map to char or bit + VirtualSpace _virtual_space; // underlying the bit map + BitMap _bm; // the bit map itself + + public: + // constructor + CMBitMapRO(ReservedSpace rs, int shifter); + + enum { do_yield = true }; + + // inquiries + HeapWord* startWord() const { return _bmStartWord; } + size_t sizeInWords() const { return _bmWordSize; } + // the following is one past the last word in space + HeapWord* endWord() const { return _bmStartWord + _bmWordSize; } + + // read marks + + bool isMarked(HeapWord* addr) const { + assert(_bmStartWord <= addr && addr < (_bmStartWord + _bmWordSize), + "outside underlying space?"); + return _bm.at(heapWordToOffset(addr)); + } + + // iteration + bool iterate(BitMapClosure* cl) { return _bm.iterate(cl); } + bool iterate(BitMapClosure* cl, MemRegion mr); + + // Return the address corresponding to the next marked bit at or after + // "addr", and before "limit", if "limit" is non-NULL. If there is no + // such bit, returns "limit" if that is non-NULL, or else "endWord()". + HeapWord* getNextMarkedWordAddress(HeapWord* addr, + HeapWord* limit = NULL) const; + // Return the address corresponding to the next unmarked bit at or after + // "addr", and before "limit", if "limit" is non-NULL. If there is no + // such bit, returns "limit" if that is non-NULL, or else "endWord()". + HeapWord* getNextUnmarkedWordAddress(HeapWord* addr, + HeapWord* limit = NULL) const; + + // conversion utilities + // XXX Fix these so that offsets are size_t's... + HeapWord* offsetToHeapWord(size_t offset) const { + return _bmStartWord + (offset << _shifter); + } + size_t heapWordToOffset(HeapWord* addr) const { + return pointer_delta(addr, _bmStartWord) >> _shifter; + } + int heapWordDiffToOffsetDiff(size_t diff) const; + HeapWord* nextWord(HeapWord* addr) { + return offsetToHeapWord(heapWordToOffset(addr) + 1); + } + + void mostly_disjoint_range_union(BitMap* from_bitmap, + size_t from_start_index, + HeapWord* to_start_word, + size_t word_num); + + // debugging + NOT_PRODUCT(bool covers(ReservedSpace rs) const;) +}; + +class CMBitMap : public CMBitMapRO { + + public: + // constructor + CMBitMap(ReservedSpace rs, int shifter) : + CMBitMapRO(rs, shifter) {} + + // write marks + void mark(HeapWord* addr) { + assert(_bmStartWord <= addr && addr < (_bmStartWord + _bmWordSize), + "outside underlying space?"); + _bm.at_put(heapWordToOffset(addr), true); + } + void clear(HeapWord* addr) { + assert(_bmStartWord <= addr && addr < (_bmStartWord + _bmWordSize), + "outside underlying space?"); + _bm.at_put(heapWordToOffset(addr), false); + } + bool parMark(HeapWord* addr) { + assert(_bmStartWord <= addr && addr < (_bmStartWord + _bmWordSize), + "outside underlying space?"); + return _bm.par_at_put(heapWordToOffset(addr), true); + } + bool parClear(HeapWord* addr) { + assert(_bmStartWord <= addr && addr < (_bmStartWord + _bmWordSize), + "outside underlying space?"); + return _bm.par_at_put(heapWordToOffset(addr), false); + } + void markRange(MemRegion mr); + void clearAll(); + void clearRange(MemRegion mr); + + // Starting at the bit corresponding to "addr" (inclusive), find the next + // "1" bit, if any. This bit starts some run of consecutive "1"'s; find + // the end of this run (stopping at "end_addr"). Return the MemRegion + // covering from the start of the region corresponding to the first bit + // of the run to the end of the region corresponding to the last bit of + // the run. If there is no "1" bit at or after "addr", return an empty + // MemRegion. + MemRegion getAndClearMarkedRegion(HeapWord* addr, HeapWord* end_addr); +}; + +// Represents a marking stack used by the CM collector. +// Ideally this should be GrowableArray<> just like MSC's marking stack(s). +class CMMarkStack { + ConcurrentMark* _cm; + oop* _base; // bottom of stack + jint _index; // one more than last occupied index + jint _capacity; // max #elements + jint _oops_do_bound; // Number of elements to include in next iteration. + NOT_PRODUCT(jint _max_depth;) // max depth plumbed during run + + bool _overflow; + DEBUG_ONLY(bool _drain_in_progress;) + DEBUG_ONLY(bool _drain_in_progress_yields;) + + public: + CMMarkStack(ConcurrentMark* cm); + ~CMMarkStack(); + + void allocate(size_t size); + + oop pop() { + if (!isEmpty()) { + return _base[--_index] ; + } + return NULL; + } + + // If overflow happens, don't do the push, and record the overflow. + // *Requires* that "ptr" is already marked. + void push(oop ptr) { + if (isFull()) { + // Record overflow. + _overflow = true; + return; + } else { + _base[_index++] = ptr; + NOT_PRODUCT(_max_depth = MAX2(_max_depth, _index)); + } + } + // Non-block impl. Note: concurrency is allowed only with other + // "par_push" operations, not with "pop" or "drain". We would need + // parallel versions of them if such concurrency was desired. + void par_push(oop ptr); + + // Pushes the first "n" elements of "ptr_arr" on the stack. + // Non-block impl. Note: concurrency is allowed only with other + // "par_adjoin_arr" or "push" operations, not with "pop" or "drain". + void par_adjoin_arr(oop* ptr_arr, int n); + + // Pushes the first "n" elements of "ptr_arr" on the stack. + // Locking impl: concurrency is allowed only with + // "par_push_arr" and/or "par_pop_arr" operations, which use the same + // locking strategy. + void par_push_arr(oop* ptr_arr, int n); + + // If returns false, the array was empty. Otherwise, removes up to "max" + // elements from the stack, and transfers them to "ptr_arr" in an + // unspecified order. The actual number transferred is given in "n" ("n + // == 0" is deliberately redundant with the return value.) Locking impl: + // concurrency is allowed only with "par_push_arr" and/or "par_pop_arr" + // operations, which use the same locking strategy. + bool par_pop_arr(oop* ptr_arr, int max, int* n); + + // Drain the mark stack, applying the given closure to all fields of + // objects on the stack. (That is, continue until the stack is empty, + // even if closure applications add entries to the stack.) The "bm" + // argument, if non-null, may be used to verify that only marked objects + // are on the mark stack. If "yield_after" is "true", then the + // concurrent marker performing the drain offers to yield after + // processing each object. If a yield occurs, stops the drain operation + // and returns false. Otherwise, returns true. + template + bool drain(OopClosureClass* cl, CMBitMap* bm, bool yield_after = false); + + bool isEmpty() { return _index == 0; } + bool isFull() { return _index == _capacity; } + int maxElems() { return _capacity; } + + bool overflow() { return _overflow; } + void clear_overflow() { _overflow = false; } + + int size() { return _index; } + + void setEmpty() { _index = 0; clear_overflow(); } + + // Record the current size; a subsequent "oops_do" will iterate only over + // indices valid at the time of this call. + void set_oops_do_bound(jint bound = -1) { + if (bound == -1) { + _oops_do_bound = _index; + } else { + _oops_do_bound = bound; + } + } + jint oops_do_bound() { return _oops_do_bound; } + // iterate over the oops in the mark stack, up to the bound recorded via + // the call above. + void oops_do(OopClosure* f); +}; + +class CMRegionStack { + MemRegion* _base; + jint _capacity; + jint _index; + jint _oops_do_bound; + bool _overflow; +public: + CMRegionStack(); + ~CMRegionStack(); + void allocate(size_t size); + + // This is lock-free; assumes that it will only be called in parallel + // with other "push" operations (no pops). + void push(MemRegion mr); + + // Lock-free; assumes that it will only be called in parallel + // with other "pop" operations (no pushes). + MemRegion pop(); + + bool isEmpty() { return _index == 0; } + bool isFull() { return _index == _capacity; } + + bool overflow() { return _overflow; } + void clear_overflow() { _overflow = false; } + + int size() { return _index; } + + // It iterates over the entries in the region stack and it + // invalidates (i.e. assigns MemRegion()) the ones that point to + // regions in the collection set. + bool invalidate_entries_into_cset(); + + // This gives an upper bound up to which the iteration in + // invalidate_entries_into_cset() will reach. This prevents + // newly-added entries to be unnecessarily scanned. + void set_oops_do_bound() { + _oops_do_bound = _index; + } + + void setEmpty() { _index = 0; clear_overflow(); } +}; + +// this will enable a variety of different statistics per GC task +#define _MARKING_STATS_ 0 +// this will enable the higher verbose levels +#define _MARKING_VERBOSE_ 0 + +#if _MARKING_STATS_ +#define statsOnly(statement) \ +do { \ + statement ; \ +} while (0) +#else // _MARKING_STATS_ +#define statsOnly(statement) \ +do { \ +} while (0) +#endif // _MARKING_STATS_ + +// Some extra guarantees that I like to also enable in optimised mode +// when debugging. If you want to enable them, comment out the assert +// macro and uncomment out the guaratee macro +// #define tmp_guarantee_CM(expr, str) guarantee(expr, str) +#define tmp_guarantee_CM(expr, str) assert(expr, str) + +typedef enum { + no_verbose = 0, // verbose turned off + stats_verbose, // only prints stats at the end of marking + low_verbose, // low verbose, mostly per region and per major event + medium_verbose, // a bit more detailed than low + high_verbose // per object verbose +} CMVerboseLevel; + + +class ConcurrentMarkThread; + +class ConcurrentMark { + friend class ConcurrentMarkThread; + friend class CMTask; + friend class CMBitMapClosure; + friend class CSMarkOopClosure; + friend class CMGlobalObjectClosure; + friend class CMRemarkTask; + friend class CMConcurrentMarkingTask; + friend class G1ParNoteEndTask; + friend class CalcLiveObjectsClosure; + +protected: + ConcurrentMarkThread* _cmThread; // the thread doing the work + G1CollectedHeap* _g1h; // the heap. + size_t _parallel_marking_threads; // the number of marking + // threads we'll use + double _sleep_factor; // how much we have to sleep, with + // respect to the work we just did, to + // meet the marking overhead goal + double _marking_task_overhead; // marking target overhead for + // a single task + + // same as the two above, but for the cleanup task + double _cleanup_sleep_factor; + double _cleanup_task_overhead; + + // Stuff related to age cohort processing. + struct ParCleanupThreadState { + char _pre[64]; + UncleanRegionList list; + char _post[64]; + }; + ParCleanupThreadState** _par_cleanup_thread_state; + + // CMS marking support structures + CMBitMap _markBitMap1; + CMBitMap _markBitMap2; + CMBitMapRO* _prevMarkBitMap; // completed mark bitmap + CMBitMap* _nextMarkBitMap; // under-construction mark bitmap + bool _at_least_one_mark_complete; + + BitMap _region_bm; + BitMap _card_bm; + + // Heap bounds + HeapWord* _heap_start; + HeapWord* _heap_end; + + // For gray objects + CMMarkStack _markStack; // Grey objects behind global finger. + CMRegionStack _regionStack; // Grey regions behind global finger. + HeapWord* volatile _finger; // the global finger, region aligned, + // always points to the end of the + // last claimed region + + // marking tasks + size_t _max_task_num; // maximum task number + size_t _active_tasks; // task num currently active + CMTask** _tasks; // task queue array (max_task_num len) + CMTaskQueueSet* _task_queues; // task queue set + ParallelTaskTerminator _terminator; // for termination + + // Two sync barriers that are used to synchronise tasks when an + // overflow occurs. The algorithm is the following. All tasks enter + // the first one to ensure that they have all stopped manipulating + // the global data structures. After they exit it, they re-initialise + // their data structures and task 0 re-initialises the global data + // structures. Then, they enter the second sync barrier. This + // ensure, that no task starts doing work before all data + // structures (local and global) have been re-initialised. When they + // exit it, they are free to start working again. + WorkGangBarrierSync _first_overflow_barrier_sync; + WorkGangBarrierSync _second_overflow_barrier_sync; + + + // this is set by any task, when an overflow on the global data + // structures is detected. + volatile bool _has_overflown; + // true: marking is concurrent, false: we're in remark + volatile bool _concurrent; + // set at the end of a Full GC so that marking aborts + volatile bool _has_aborted; + // used when remark aborts due to an overflow to indicate that + // another concurrent marking phase should start + volatile bool _restart_for_overflow; + + // This is true from the very start of concurrent marking until the + // point when all the tasks complete their work. It is really used + // to determine the points between the end of concurrent marking and + // time of remark. + volatile bool _concurrent_marking_in_progress; + + // verbose level + CMVerboseLevel _verbose_level; + + COTracker _cleanup_co_tracker; + + // These two fields are used to implement the optimisation that + // avoids pushing objects on the global/region stack if there are + // no collection set regions above the lowest finger. + + // This is the lowest finger (among the global and local fingers), + // which is calculated before a new collection set is chosen. + HeapWord* _min_finger; + // If this flag is true, objects/regions that are marked below the + // finger should be pushed on the stack(s). If this is flag is + // false, it is safe not to push them on the stack(s). + bool _should_gray_objects; + + // All of these times are in ms. + NumberSeq _init_times; + NumberSeq _remark_times; + NumberSeq _remark_mark_times; + NumberSeq _remark_weak_ref_times; + NumberSeq _cleanup_times; + double _total_counting_time; + double _total_rs_scrub_time; + + double* _accum_task_vtime; // accumulated task vtime + + WorkGang* _parallel_workers; + + void weakRefsWork(bool clear_all_soft_refs); + + void swapMarkBitMaps(); + + // It resets the global marking data structures, as well as the + // task local ones; should be called during initial mark. + void reset(); + // It resets all the marking data structures. + void clear_marking_state(); + + // It should be called to indicate which phase we're in (concurrent + // mark or remark) and how many threads are currently active. + void set_phase(size_t active_tasks, bool concurrent); + // We do this after we're done with marking so that the marking data + // structures are initialised to a sensible and predictable state. + void set_non_marking_state(); + + // prints all gathered CM-related statistics + void print_stats(); + + // accessor methods + size_t parallel_marking_threads() { return _parallel_marking_threads; } + double sleep_factor() { return _sleep_factor; } + double marking_task_overhead() { return _marking_task_overhead;} + double cleanup_sleep_factor() { return _cleanup_sleep_factor; } + double cleanup_task_overhead() { return _cleanup_task_overhead;} + + HeapWord* finger() { return _finger; } + bool concurrent() { return _concurrent; } + size_t active_tasks() { return _active_tasks; } + ParallelTaskTerminator* terminator() { return &_terminator; } + + // It claims the next available region to be scanned by a marking + // task. It might return NULL if the next region is empty or we have + // run out of regions. In the latter case, out_of_regions() + // determines whether we've really run out of regions or the task + // should call claim_region() again. This might seem a bit + // awkward. Originally, the code was written so that claim_region() + // either successfully returned with a non-empty region or there + // were no more regions to be claimed. The problem with this was + // that, in certain circumstances, it iterated over large chunks of + // the heap finding only empty regions and, while it was working, it + // was preventing the calling task to call its regular clock + // method. So, this way, each task will spend very little time in + // claim_region() and is allowed to call the regular clock method + // frequently. + HeapRegion* claim_region(int task); + + // It determines whether we've run out of regions to scan. + bool out_of_regions() { return _finger == _heap_end; } + + // Returns the task with the given id + CMTask* task(int id) { + guarantee( 0 <= id && id < (int) _active_tasks, "task id not within " + "active bounds" ); + return _tasks[id]; + } + + // Returns the task queue with the given id + CMTaskQueue* task_queue(int id) { + guarantee( 0 <= id && id < (int) _active_tasks, "task queue id not within " + "active bounds" ); + return (CMTaskQueue*) _task_queues->queue(id); + } + + // Returns the task queue set + CMTaskQueueSet* task_queues() { return _task_queues; } + + // Access / manipulation of the overflow flag which is set to + // indicate that the global stack or region stack has overflown + bool has_overflown() { return _has_overflown; } + void set_has_overflown() { _has_overflown = true; } + void clear_has_overflown() { _has_overflown = false; } + + bool has_aborted() { return _has_aborted; } + bool restart_for_overflow() { return _restart_for_overflow; } + + // Methods to enter the two overflow sync barriers + void enter_first_sync_barrier(int task_num); + void enter_second_sync_barrier(int task_num); + +public: + // Manipulation of the global mark stack. + // Notice that the first mark_stack_push is CAS-based, whereas the + // two below are Mutex-based. This is OK since the first one is only + // called during evacuation pauses and doesn't compete with the + // other two (which are called by the marking tasks during + // concurrent marking or remark). + bool mark_stack_push(oop p) { + _markStack.par_push(p); + if (_markStack.overflow()) { + set_has_overflown(); + return false; + } + return true; + } + bool mark_stack_push(oop* arr, int n) { + _markStack.par_push_arr(arr, n); + if (_markStack.overflow()) { + set_has_overflown(); + return false; + } + return true; + } + void mark_stack_pop(oop* arr, int max, int* n) { + _markStack.par_pop_arr(arr, max, n); + } + size_t mark_stack_size() { return _markStack.size(); } + size_t partial_mark_stack_size_target() { return _markStack.maxElems()/3; } + bool mark_stack_overflow() { return _markStack.overflow(); } + bool mark_stack_empty() { return _markStack.isEmpty(); } + + // Manipulation of the region stack + bool region_stack_push(MemRegion mr) { + _regionStack.push(mr); + if (_regionStack.overflow()) { + set_has_overflown(); + return false; + } + return true; + } + MemRegion region_stack_pop() { return _regionStack.pop(); } + int region_stack_size() { return _regionStack.size(); } + bool region_stack_overflow() { return _regionStack.overflow(); } + bool region_stack_empty() { return _regionStack.isEmpty(); } + + bool concurrent_marking_in_progress() { + return _concurrent_marking_in_progress; + } + void set_concurrent_marking_in_progress() { + _concurrent_marking_in_progress = true; + } + void clear_concurrent_marking_in_progress() { + _concurrent_marking_in_progress = false; + } + + void update_accum_task_vtime(int i, double vtime) { + _accum_task_vtime[i] += vtime; + } + + double all_task_accum_vtime() { + double ret = 0.0; + for (int i = 0; i < (int)_max_task_num; ++i) + ret += _accum_task_vtime[i]; + return ret; + } + + // Attempts to steal an object from the task queues of other tasks + bool try_stealing(int task_num, int* hash_seed, oop& obj) { + return _task_queues->steal(task_num, hash_seed, obj); + } + + // It grays an object by first marking it. Then, if it's behind the + // global finger, it also pushes it on the global stack. + void deal_with_reference(oop obj); + + ConcurrentMark(ReservedSpace rs, int max_regions); + ~ConcurrentMark(); + ConcurrentMarkThread* cmThread() { return _cmThread; } + + CMBitMapRO* prevMarkBitMap() const { return _prevMarkBitMap; } + CMBitMap* nextMarkBitMap() const { return _nextMarkBitMap; } + + // The following three are interaction between CM and + // G1CollectedHeap + + // This notifies CM that a root during initial-mark needs to be + // grayed and it's MT-safe. Currently, we just mark it. But, in the + // future, we can experiment with pushing it on the stack and we can + // do this without changing G1CollectedHeap. + void grayRoot(oop p); + // It's used during evacuation pauses to gray a region, if + // necessary, and it's MT-safe. It assumes that the caller has + // marked any objects on that region. If _should_gray_objects is + // true and we're still doing concurrent marking, the region is + // pushed on the region stack, if it is located below the global + // finger, otherwise we do nothing. + void grayRegionIfNecessary(MemRegion mr); + // It's used during evacuation pauses to mark and, if necessary, + // gray a single object and it's MT-safe. It assumes the caller did + // not mark the object. If _should_gray_objects is true and we're + // still doing concurrent marking, the objects is pushed on the + // global stack, if it is located below the global finger, otherwise + // we do nothing. + void markAndGrayObjectIfNecessary(oop p); + + // This iterates over the bitmap of the previous marking and prints + // out all objects that are marked on the bitmap and indicates + // whether what they point to is also marked or not. + void print_prev_bitmap_reachable(); + + // Clear the next marking bitmap (will be called concurrently). + void clearNextBitmap(); + + // main CMS steps and related support + void checkpointRootsInitial(); + + // These two do the work that needs to be done before and after the + // initial root checkpoint. Since this checkpoint can be done at two + // different points (i.e. an explicit pause or piggy-backed on a + // young collection), then it's nice to be able to easily share the + // pre/post code. It might be the case that we can put everything in + // the post method. TP + void checkpointRootsInitialPre(); + void checkpointRootsInitialPost(); + + // Do concurrent phase of marking, to a tentative transitive closure. + void markFromRoots(); + + // Process all unprocessed SATB buffers. It is called at the + // beginning of an evacuation pause. + void drainAllSATBBuffers(); + + void checkpointRootsFinal(bool clear_all_soft_refs); + void checkpointRootsFinalWork(); + void calcDesiredRegions(); + void cleanup(); + void completeCleanup(); + + // Mark in the previous bitmap. NB: this is usually read-only, so use + // this carefully! + void markPrev(oop p); + void clear(oop p); + // Clears marks for all objects in the given range, for both prev and + // next bitmaps. NB: the previous bitmap is usually read-only, so use + // this carefully! + void clearRangeBothMaps(MemRegion mr); + + // Record the current top of the mark and region stacks; a + // subsequent oops_do() on the mark stack and + // invalidate_entries_into_cset() on the region stack will iterate + // only over indices valid at the time of this call. + void set_oops_do_bound() { + _markStack.set_oops_do_bound(); + _regionStack.set_oops_do_bound(); + } + // Iterate over the oops in the mark stack and all local queues. It + // also calls invalidate_entries_into_cset() on the region stack. + void oops_do(OopClosure* f); + // It is called at the end of an evacuation pause during marking so + // that CM is notified of where the new end of the heap is. It + // doesn't do anything if concurrent_marking_in_progress() is false, + // unless the force parameter is true. + void update_g1_committed(bool force = false); + + void complete_marking_in_collection_set(); + + // It indicates that a new collection set is being chosen. + void newCSet(); + // It registers a collection set heap region with CM. This is used + // to determine whether any heap regions are located above the finger. + void registerCSetRegion(HeapRegion* hr); + + // Returns "true" if at least one mark has been completed. + bool at_least_one_mark_complete() { return _at_least_one_mark_complete; } + + bool isMarked(oop p) const { + assert(p != NULL && p->is_oop(), "expected an oop"); + HeapWord* addr = (HeapWord*)p; + assert(addr >= _nextMarkBitMap->startWord() || + addr < _nextMarkBitMap->endWord(), "in a region"); + + return _nextMarkBitMap->isMarked(addr); + } + + inline bool not_yet_marked(oop p) const; + + // XXX Debug code + bool containing_card_is_marked(void* p); + bool containing_cards_are_marked(void* start, void* last); + + bool isPrevMarked(oop p) const { + assert(p != NULL && p->is_oop(), "expected an oop"); + HeapWord* addr = (HeapWord*)p; + assert(addr >= _prevMarkBitMap->startWord() || + addr < _prevMarkBitMap->endWord(), "in a region"); + + return _prevMarkBitMap->isMarked(addr); + } + + inline bool do_yield_check(int worker_i = 0); + inline bool should_yield(); + + // Called to abort the marking cycle after a Full GC takes palce. + void abort(); + + void disable_co_trackers(); + + // This prints the global/local fingers. It is used for debugging. + NOT_PRODUCT(void print_finger();) + + void print_summary_info(); + + // The following indicate whether a given verbose level has been + // set. Notice that anything above stats is conditional to + // _MARKING_VERBOSE_ having been set to 1 + bool verbose_stats() + { return _verbose_level >= stats_verbose; } + bool verbose_low() + { return _MARKING_VERBOSE_ && _verbose_level >= low_verbose; } + bool verbose_medium() + { return _MARKING_VERBOSE_ && _verbose_level >= medium_verbose; } + bool verbose_high() + { return _MARKING_VERBOSE_ && _verbose_level >= high_verbose; } +}; + +// A class representing a marking task. +class CMTask : public TerminatorTerminator { +private: + enum PrivateConstants { + // the regular clock call is called once the scanned words reaches + // this limit + words_scanned_period = 12*1024, + // the regular clock call is called once the number of visited + // references reaches this limit + refs_reached_period = 384, + // initial value for the hash seed, used in the work stealing code + init_hash_seed = 17, + // how many entries will be transferred between global stack and + // local queues + global_stack_transfer_size = 16 + }; + + int _task_id; + G1CollectedHeap* _g1h; + ConcurrentMark* _cm; + CMBitMap* _nextMarkBitMap; + // the task queue of this task + CMTaskQueue* _task_queue; + // the task queue set---needed for stealing + CMTaskQueueSet* _task_queues; + // indicates whether the task has been claimed---this is only for + // debugging purposes + bool _claimed; + + // number of calls to this task + int _calls; + + // concurrent overhead over a single CPU for this task + COTracker _co_tracker; + + // when the virtual timer reaches this time, the marking step should + // exit + double _time_target_ms; + // the start time of the current marking step + double _start_time_ms; + + // the oop closure used for iterations over oops + OopClosure* _oop_closure; + + // the region this task is scanning, NULL if we're not scanning any + HeapRegion* _curr_region; + // the local finger of this task, NULL if we're not scanning a region + HeapWord* _finger; + // limit of the region this task is scanning, NULL if we're not scanning one + HeapWord* _region_limit; + + // This is used only when we scan regions popped from the region + // stack. It records what the last object on such a region we + // scanned was. It is used to ensure that, if we abort region + // iteration, we do not rescan the first part of the region. This + // should be NULL when we're not scanning a region from the region + // stack. + HeapWord* _region_finger; + + // the number of words this task has scanned + size_t _words_scanned; + // When _words_scanned reaches this limit, the regular clock is + // called. Notice that this might be decreased under certain + // circumstances (i.e. when we believe that we did an expensive + // operation). + size_t _words_scanned_limit; + // the initial value of _words_scanned_limit (i.e. what it was + // before it was decreased). + size_t _real_words_scanned_limit; + + // the number of references this task has visited + size_t _refs_reached; + // When _refs_reached reaches this limit, the regular clock is + // called. Notice this this might be decreased under certain + // circumstances (i.e. when we believe that we did an expensive + // operation). + size_t _refs_reached_limit; + // the initial value of _refs_reached_limit (i.e. what it was before + // it was decreased). + size_t _real_refs_reached_limit; + + // used by the work stealing stuff + int _hash_seed; + // if this is true, then the task has aborted for some reason + bool _has_aborted; + // set when the task aborts because it has met its time quota + bool _has_aborted_timed_out; + // true when we're draining SATB buffers; this avoids the task + // aborting due to SATB buffers being available (as we're already + // dealing with them) + bool _draining_satb_buffers; + + // number sequence of past step times + NumberSeq _step_times_ms; + // elapsed time of this task + double _elapsed_time_ms; + // termination time of this task + double _termination_time_ms; + // when this task got into the termination protocol + double _termination_start_time_ms; + + // true when the task is during a concurrent phase, false when it is + // in the remark phase (so, in the latter case, we do not have to + // check all the things that we have to check during the concurrent + // phase, i.e. SATB buffer availability...) + bool _concurrent; + + TruncatedSeq _marking_step_diffs_ms; + + // LOTS of statistics related with this task +#if _MARKING_STATS_ + NumberSeq _all_clock_intervals_ms; + double _interval_start_time_ms; + + int _aborted; + int _aborted_overflow; + int _aborted_cm_aborted; + int _aborted_yield; + int _aborted_timed_out; + int _aborted_satb; + int _aborted_termination; + + int _steal_attempts; + int _steals; + + int _clock_due_to_marking; + int _clock_due_to_scanning; + + int _local_pushes; + int _local_pops; + int _local_max_size; + int _objs_scanned; + + int _global_pushes; + int _global_pops; + int _global_max_size; + + int _global_transfers_to; + int _global_transfers_from; + + int _region_stack_pops; + + int _regions_claimed; + int _objs_found_on_bitmap; + + int _satb_buffers_processed; +#endif // _MARKING_STATS_ + + // it updates the local fields after this task has claimed + // a new region to scan + void setup_for_region(HeapRegion* hr); + // it brings up-to-date the limit of the region + void update_region_limit(); + // it resets the local fields after a task has finished scanning a + // region + void giveup_current_region(); + + // called when either the words scanned or the refs visited limit + // has been reached + void reached_limit(); + // recalculates the words scanned and refs visited limits + void recalculate_limits(); + // decreases the words scanned and refs visited limits when we reach + // an expensive operation + void decrease_limits(); + // it checks whether the words scanned or refs visited reached their + // respective limit and calls reached_limit() if they have + void check_limits() { + if (_words_scanned >= _words_scanned_limit || + _refs_reached >= _refs_reached_limit) + reached_limit(); + } + // this is supposed to be called regularly during a marking step as + // it checks a bunch of conditions that might cause the marking step + // to abort + void regular_clock_call(); + bool concurrent() { return _concurrent; } + +public: + // It resets the task; it should be called right at the beginning of + // a marking phase. + void reset(CMBitMap* _nextMarkBitMap); + // it clears all the fields that correspond to a claimed region. + void clear_region_fields(); + + void set_concurrent(bool concurrent) { _concurrent = concurrent; } + + void enable_co_tracker() { + guarantee( !_co_tracker.enabled(), "invariant" ); + _co_tracker.enable(); + } + void disable_co_tracker() { + guarantee( _co_tracker.enabled(), "invariant" ); + _co_tracker.disable(); + } + bool co_tracker_enabled() { + return _co_tracker.enabled(); + } + void reset_co_tracker(double starting_conc_overhead = 0.0) { + _co_tracker.reset(starting_conc_overhead); + } + void start_co_tracker() { + _co_tracker.start(); + } + void update_co_tracker(bool force_end = false) { + _co_tracker.update(force_end); + } + + // The main method of this class which performs a marking step + // trying not to exceed the given duration. However, it might exit + // prematurely, according to some conditions (i.e. SATB buffers are + // available for processing). + void do_marking_step(double target_ms); + + // These two calls start and stop the timer + void record_start_time() { + _elapsed_time_ms = os::elapsedTime() * 1000.0; + } + void record_end_time() { + _elapsed_time_ms = os::elapsedTime() * 1000.0 - _elapsed_time_ms; + } + + // returns the task ID + int task_id() { return _task_id; } + + // From TerminatorTerminator. It determines whether this task should + // exit the termination protocol after it's entered it. + virtual bool should_exit_termination(); + + HeapWord* finger() { return _finger; } + + bool has_aborted() { return _has_aborted; } + void set_has_aborted() { _has_aborted = true; } + void clear_has_aborted() { _has_aborted = false; } + bool claimed() { return _claimed; } + + void set_oop_closure(OopClosure* oop_closure) { + _oop_closure = oop_closure; + } + + // It grays the object by marking it and, if necessary, pushing it + // on the local queue + void deal_with_reference(oop obj); + + // It scans an object and visits its children. + void scan_object(oop obj) { + tmp_guarantee_CM( _nextMarkBitMap->isMarked((HeapWord*) obj), + "invariant" ); + + if (_cm->verbose_high()) + gclog_or_tty->print_cr("[%d] we're scanning object "PTR_FORMAT, + _task_id, (void*) obj); + + size_t obj_size = obj->size(); + _words_scanned += obj_size; + + obj->oop_iterate(_oop_closure); + statsOnly( ++_objs_scanned ); + check_limits(); + } + + // It pushes an object on the local queue. + void push(oop obj); + + // These two move entries to/from the global stack. + void move_entries_to_global_stack(); + void get_entries_from_global_stack(); + + // It pops and scans objects from the local queue. If partially is + // true, then it stops when the queue size is of a given limit. If + // partially is false, then it stops when the queue is empty. + void drain_local_queue(bool partially); + // It moves entries from the global stack to the local queue and + // drains the local queue. If partially is true, then it stops when + // both the global stack and the local queue reach a given size. If + // partially if false, it tries to empty them totally. + void drain_global_stack(bool partially); + // It keeps picking SATB buffers and processing them until no SATB + // buffers are available. + void drain_satb_buffers(); + // It keeps popping regions from the region stack and processing + // them until the region stack is empty. + void drain_region_stack(BitMapClosure* closure); + + // moves the local finger to a new location + inline void move_finger_to(HeapWord* new_finger) { + tmp_guarantee_CM( new_finger >= _finger && new_finger < _region_limit, + "invariant" ); + _finger = new_finger; + } + + // moves the region finger to a new location + inline void move_region_finger_to(HeapWord* new_finger) { + tmp_guarantee_CM( new_finger < _cm->finger(), "invariant" ); + _region_finger = new_finger; + } + + CMTask(int task_num, ConcurrentMark *cm, + CMTaskQueue* task_queue, CMTaskQueueSet* task_queues); + + // it prints statistics associated with this task + void print_stats(); + +#if _MARKING_STATS_ + void increase_objs_found_on_bitmap() { ++_objs_found_on_bitmap; } +#endif // _MARKING_STATS_ +}; diff --git a/hotspot/src/share/vm/gc_implementation/g1/concurrentMarkThread.cpp b/hotspot/src/share/vm/gc_implementation/g1/concurrentMarkThread.cpp new file mode 100644 index 00000000000..e26df0caae0 --- /dev/null +++ b/hotspot/src/share/vm/gc_implementation/g1/concurrentMarkThread.cpp @@ -0,0 +1,336 @@ +/* + * Copyright 2001-2007 Sun Microsystems, Inc. All Rights Reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara, + * CA 95054 USA or visit www.sun.com if you need additional information or + * have any questions. + * + */ + +#include "incls/_precompiled.incl" +#include "incls/_concurrentMarkThread.cpp.incl" + +// ======= Concurrent Mark Thread ======== + +// The CM thread is created when the G1 garbage collector is used + +SurrogateLockerThread* + ConcurrentMarkThread::_slt = NULL; + +ConcurrentMarkThread::ConcurrentMarkThread(ConcurrentMark* cm) : + ConcurrentGCThread(), + _cm(cm), + _started(false), + _in_progress(false), + _vtime_accum(0.0), + _vtime_mark_accum(0.0), + _vtime_count_accum(0.0) +{ + create_and_start(); +} + +class CMCheckpointRootsInitialClosure: public VoidClosure { + + ConcurrentMark* _cm; +public: + + CMCheckpointRootsInitialClosure(ConcurrentMark* cm) : + _cm(cm) {} + + void do_void(){ + _cm->checkpointRootsInitial(); + } +}; + +class CMCheckpointRootsFinalClosure: public VoidClosure { + + ConcurrentMark* _cm; +public: + + CMCheckpointRootsFinalClosure(ConcurrentMark* cm) : + _cm(cm) {} + + void do_void(){ + _cm->checkpointRootsFinal(false); // !clear_all_soft_refs + } +}; + +class CMCleanUp: public VoidClosure { + ConcurrentMark* _cm; +public: + + CMCleanUp(ConcurrentMark* cm) : + _cm(cm) {} + + void do_void(){ + _cm->cleanup(); + } +}; + + + +void ConcurrentMarkThread::run() { + initialize_in_thread(); + _vtime_start = os::elapsedVTime(); + wait_for_universe_init(); + + G1CollectedHeap* g1 = G1CollectedHeap::heap(); + G1CollectorPolicy* g1_policy = g1->g1_policy(); + G1MMUTracker *mmu_tracker = g1_policy->mmu_tracker(); + Thread *current_thread = Thread::current(); + + while (!_should_terminate) { + // wait until started is set. + sleepBeforeNextCycle(); + { + ResourceMark rm; + HandleMark hm; + double cycle_start = os::elapsedVTime(); + double mark_start_sec = os::elapsedTime(); + char verbose_str[128]; + + if (PrintGC) { + gclog_or_tty->date_stamp(PrintGCDateStamps); + gclog_or_tty->stamp(PrintGCTimeStamps); + tty->print_cr("[GC concurrent-mark-start]"); + } + + if (!g1_policy->in_young_gc_mode()) { + // this ensures the flag is not set if we bail out of the marking + // cycle; normally the flag is cleared immediately after cleanup + g1->set_marking_complete(); + + if (g1_policy->adaptive_young_list_length()) { + double now = os::elapsedTime(); + double init_prediction_ms = g1_policy->predict_init_time_ms(); + jlong sleep_time_ms = mmu_tracker->when_ms(now, init_prediction_ms); + os::sleep(current_thread, sleep_time_ms, false); + } + + // We don't have to skip here if we've been asked to restart, because + // in the worst case we just enqueue a new VM operation to start a + // marking. Note that the init operation resets has_aborted() + CMCheckpointRootsInitialClosure init_cl(_cm); + strcpy(verbose_str, "GC initial-mark"); + VM_CGC_Operation op(&init_cl, verbose_str); + VMThread::execute(&op); + } + + int iter = 0; + do { + iter++; + if (!cm()->has_aborted()) { + _cm->markFromRoots(); + } else { + if (TraceConcurrentMark) + gclog_or_tty->print_cr("CM-skip-mark-from-roots"); + } + + double mark_end_time = os::elapsedVTime(); + double mark_end_sec = os::elapsedTime(); + _vtime_mark_accum += (mark_end_time - cycle_start); + if (!cm()->has_aborted()) { + if (g1_policy->adaptive_young_list_length()) { + double now = os::elapsedTime(); + double remark_prediction_ms = g1_policy->predict_remark_time_ms(); + jlong sleep_time_ms = mmu_tracker->when_ms(now, remark_prediction_ms); + os::sleep(current_thread, sleep_time_ms, false); + } + + if (PrintGC) { + gclog_or_tty->date_stamp(PrintGCDateStamps); + gclog_or_tty->stamp(PrintGCTimeStamps); + gclog_or_tty->print_cr("[GC concurrent-mark-end, %1.7lf sec]", + mark_end_sec - mark_start_sec); + } + + CMCheckpointRootsFinalClosure final_cl(_cm); + sprintf(verbose_str, "GC remark"); + VM_CGC_Operation op(&final_cl, verbose_str); + VMThread::execute(&op); + } else { + if (TraceConcurrentMark) + gclog_or_tty->print_cr("CM-skip-remark"); + } + if (cm()->restart_for_overflow() && + G1TraceMarkStackOverflow) { + gclog_or_tty->print_cr("Restarting conc marking because of MS overflow " + "in remark (restart #%d).", iter); + } + + if (cm()->restart_for_overflow()) { + if (PrintGC) { + gclog_or_tty->date_stamp(PrintGCDateStamps); + gclog_or_tty->stamp(PrintGCTimeStamps); + gclog_or_tty->print_cr("[GC concurrent-mark-restart-for-overflow]"); + } + } + } while (cm()->restart_for_overflow()); + double counting_start_time = os::elapsedVTime(); + + // YSR: These look dubious (i.e. redundant) !!! FIX ME + slt()->manipulatePLL(SurrogateLockerThread::acquirePLL); + slt()->manipulatePLL(SurrogateLockerThread::releaseAndNotifyPLL); + + if (!cm()->has_aborted()) { + double count_start_sec = os::elapsedTime(); + if (PrintGC) { + gclog_or_tty->date_stamp(PrintGCDateStamps); + gclog_or_tty->stamp(PrintGCTimeStamps); + gclog_or_tty->print_cr("[GC concurrent-count-start]"); + } + + _sts.join(); + _cm->calcDesiredRegions(); + _sts.leave(); + + if (!cm()->has_aborted()) { + double count_end_sec = os::elapsedTime(); + if (PrintGC) { + gclog_or_tty->date_stamp(PrintGCDateStamps); + gclog_or_tty->stamp(PrintGCTimeStamps); + gclog_or_tty->print_cr("[GC concurrent-count-end, %1.7lf]", + count_end_sec - count_start_sec); + } + } + } else { + if (TraceConcurrentMark) gclog_or_tty->print_cr("CM-skip-end-game"); + } + double end_time = os::elapsedVTime(); + _vtime_count_accum += (end_time - counting_start_time); + // Update the total virtual time before doing this, since it will try + // to measure it to get the vtime for this marking. We purposely + // neglect the presumably-short "completeCleanup" phase here. + _vtime_accum = (end_time - _vtime_start); + if (!cm()->has_aborted()) { + if (g1_policy->adaptive_young_list_length()) { + double now = os::elapsedTime(); + double cleanup_prediction_ms = g1_policy->predict_cleanup_time_ms(); + jlong sleep_time_ms = mmu_tracker->when_ms(now, cleanup_prediction_ms); + os::sleep(current_thread, sleep_time_ms, false); + } + + CMCleanUp cl_cl(_cm); + sprintf(verbose_str, "GC cleanup"); + VM_CGC_Operation op(&cl_cl, verbose_str); + VMThread::execute(&op); + } else { + if (TraceConcurrentMark) gclog_or_tty->print_cr("CM-skip-cleanup"); + G1CollectedHeap::heap()->set_marking_complete(); + } + + if (!cm()->has_aborted()) { + double cleanup_start_sec = os::elapsedTime(); + if (PrintGC) { + gclog_or_tty->date_stamp(PrintGCDateStamps); + gclog_or_tty->stamp(PrintGCTimeStamps); + gclog_or_tty->print_cr("[GC concurrent-cleanup-start]"); + } + + // Now do the remainder of the cleanup operation. + _sts.join(); + _cm->completeCleanup(); + if (!cm()->has_aborted()) { + g1_policy->record_concurrent_mark_cleanup_completed(); + + double cleanup_end_sec = os::elapsedTime(); + if (PrintGC) { + gclog_or_tty->date_stamp(PrintGCDateStamps); + gclog_or_tty->stamp(PrintGCTimeStamps); + gclog_or_tty->print_cr("[GC concurrent-cleanup-end, %1.7lf]", + cleanup_end_sec - cleanup_start_sec); + } + } + _sts.leave(); + } + // We're done: no more unclean regions coming. + G1CollectedHeap::heap()->set_unclean_regions_coming(false); + + if (cm()->has_aborted()) { + if (PrintGC) { + gclog_or_tty->date_stamp(PrintGCDateStamps); + gclog_or_tty->stamp(PrintGCTimeStamps); + gclog_or_tty->print_cr("[GC concurrent-mark-abort]"); + } + } + + _sts.join(); + _cm->disable_co_trackers(); + _sts.leave(); + + // we now want to allow clearing of the marking bitmap to be + // suspended by a collection pause. + _sts.join(); + _cm->clearNextBitmap(); + _sts.leave(); + } + } + assert(_should_terminate, "just checking"); + + terminate(); +} + + +void ConcurrentMarkThread::yield() { + if (TraceConcurrentMark) gclog_or_tty->print_cr("CM-yield"); + _sts.yield("Concurrent Mark"); + if (TraceConcurrentMark) gclog_or_tty->print_cr("CM-yield-end"); +} + +void ConcurrentMarkThread::stop() { + // it is ok to take late safepoints here, if needed + MutexLockerEx mu(Terminator_lock); + _should_terminate = true; + while (!_has_terminated) { + Terminator_lock->wait(); + } + if (TraceConcurrentMark) gclog_or_tty->print_cr("CM-stop"); +} + +void ConcurrentMarkThread::print() { + gclog_or_tty->print("\"Concurrent Mark GC Thread\" "); + Thread::print(); + gclog_or_tty->cr(); +} + +void ConcurrentMarkThread::sleepBeforeNextCycle() { + clear_in_progress(); + // We join here because we don't want to do the "shouldConcurrentMark()" + // below while the world is otherwise stopped. + MutexLockerEx x(CGC_lock, Mutex::_no_safepoint_check_flag); + while (!started()) { + if (TraceConcurrentMark) gclog_or_tty->print_cr("CM-sleeping"); + CGC_lock->wait(Mutex::_no_safepoint_check_flag); + } + set_in_progress(); + clear_started(); + if (TraceConcurrentMark) gclog_or_tty->print_cr("CM-starting"); + + return; +} + +// Note: this method, although exported by the ConcurrentMarkSweepThread, +// which is a non-JavaThread, can only be called by a JavaThread. +// Currently this is done at vm creation time (post-vm-init) by the +// main/Primordial (Java)Thread. +// XXX Consider changing this in the future to allow the CMS thread +// itself to create this thread? +void ConcurrentMarkThread::makeSurrogateLockerThread(TRAPS) { + assert(_slt == NULL, "SLT already created"); + _slt = SurrogateLockerThread::make(THREAD); +} diff --git a/hotspot/src/share/vm/gc_implementation/g1/concurrentMarkThread.hpp b/hotspot/src/share/vm/gc_implementation/g1/concurrentMarkThread.hpp new file mode 100644 index 00000000000..c382778c212 --- /dev/null +++ b/hotspot/src/share/vm/gc_implementation/g1/concurrentMarkThread.hpp @@ -0,0 +1,84 @@ +/* + * Copyright 2001-2007 Sun Microsystems, Inc. All Rights Reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara, + * CA 95054 USA or visit www.sun.com if you need additional information or + * have any questions. + * + */ + +// The Concurrent Mark GC Thread (could be several in the future). +// This is copied from the Concurrent Mark Sweep GC Thread +// Still under construction. + +class ConcurrentMark; + +class ConcurrentMarkThread: public ConcurrentGCThread { + friend class VMStructs; + + double _vtime_start; // Initial virtual time. + double _vtime_accum; // Accumulated virtual time. + + double _vtime_mark_accum; + double _vtime_count_accum; + + public: + virtual void run(); + + private: + ConcurrentMark* _cm; + bool _started; + bool _in_progress; + + void sleepBeforeNextCycle(); + + static SurrogateLockerThread* _slt; + + public: + // Constructor + ConcurrentMarkThread(ConcurrentMark* cm); + + static void makeSurrogateLockerThread(TRAPS); + static SurrogateLockerThread* slt() { return _slt; } + + // Printing + void print(); + + // Total virtual time so far. + double vtime_accum(); + // Marking virtual time so far + double vtime_mark_accum(); + // Counting virtual time so far. + double vtime_count_accum() { return _vtime_count_accum; } + + ConcurrentMark* cm() { return _cm; } + + void set_started() { _started = true; } + void clear_started() { _started = false; } + bool started() { return _started; } + + void set_in_progress() { _in_progress = true; } + void clear_in_progress() { _in_progress = false; } + bool in_progress() { return _in_progress; } + + // Yield for GC + void yield(); + + // shutdown + static void stop(); +}; diff --git a/hotspot/src/share/vm/gc_implementation/g1/concurrentMarkThread.inline.hpp b/hotspot/src/share/vm/gc_implementation/g1/concurrentMarkThread.inline.hpp new file mode 100644 index 00000000000..b011973557f --- /dev/null +++ b/hotspot/src/share/vm/gc_implementation/g1/concurrentMarkThread.inline.hpp @@ -0,0 +1,33 @@ +/* + * Copyright 2001-2007 Sun Microsystems, Inc. All Rights Reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara, + * CA 95054 USA or visit www.sun.com if you need additional information or + * have any questions. + * + */ + + // Total virtual time so far. +inline double ConcurrentMarkThread::vtime_accum() { + return _vtime_accum + _cm->all_task_accum_vtime(); +} + +// Marking virtual time so far +inline double ConcurrentMarkThread::vtime_mark_accum() { + return _vtime_mark_accum + _cm->all_task_accum_vtime(); +} diff --git a/hotspot/src/share/vm/gc_implementation/g1/concurrentZFThread.cpp b/hotspot/src/share/vm/gc_implementation/g1/concurrentZFThread.cpp new file mode 100644 index 00000000000..642c59cb9bf --- /dev/null +++ b/hotspot/src/share/vm/gc_implementation/g1/concurrentZFThread.cpp @@ -0,0 +1,191 @@ +/* + * Copyright 2001-2007 Sun Microsystems, Inc. All Rights Reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara, + * CA 95054 USA or visit www.sun.com if you need additional information or + * have any questions. + * + */ + +#include "incls/_precompiled.incl" +#include "incls/_concurrentZFThread.cpp.incl" + +// ======= Concurrent Zero-Fill Thread ======== + +// The CM thread is created when the G1 garbage collector is used + +int ConcurrentZFThread::_region_allocs = 0; +int ConcurrentZFThread::_sync_zfs = 0; +int ConcurrentZFThread::_zf_waits = 0; +int ConcurrentZFThread::_regions_filled = 0; + +ConcurrentZFThread::ConcurrentZFThread() : + ConcurrentGCThread(), + _co_tracker(G1ZFGroup) +{ + create_and_start(); +} + +void ConcurrentZFThread::wait_for_ZF_completed(HeapRegion* hr) { + assert(ZF_mon->owned_by_self(), "Precondition."); + note_zf_wait(); + while (hr->zero_fill_state() == HeapRegion::ZeroFilling) { + ZF_mon->wait(Mutex::_no_safepoint_check_flag); + } +} + +void ConcurrentZFThread::processHeapRegion(HeapRegion* hr) { + assert(!Universe::heap()->is_gc_active(), + "This should not happen during GC."); + assert(hr != NULL, "Precondition"); + // These are unlocked reads, but if this test is successful, then no + // other thread will attempt this zero filling. Only a GC thread can + // modify the ZF state of a region whose state is zero-filling, and this + // should only happen while the ZF thread is locking out GC. + if (hr->zero_fill_state() == HeapRegion::ZeroFilling + && hr->zero_filler() == Thread::current()) { + assert(hr->top() == hr->bottom(), "better be empty!"); + assert(!hr->isHumongous(), "Only free regions on unclean list."); + Copy::fill_to_words(hr->bottom(), hr->capacity()/HeapWordSize); + note_region_filled(); + } +} + +void ConcurrentZFThread::run() { + initialize_in_thread(); + Thread* thr_self = Thread::current(); + _vtime_start = os::elapsedVTime(); + wait_for_universe_init(); + _co_tracker.enable(); + _co_tracker.start(); + + G1CollectedHeap* g1 = G1CollectedHeap::heap(); + _sts.join(); + while (!_should_terminate) { + _sts.leave(); + + { + MutexLockerEx x(ZF_mon, Mutex::_no_safepoint_check_flag); + + // This local variable will hold a region being zero-filled. This + // region will neither be on the unclean or zero-filled lists, and + // will not be available for allocation; thus, we might have an + // allocation fail, causing a full GC, because of this, but this is a + // price we will pay. (In future, we might want to make the fact + // that there's a region being zero-filled apparent to the G1 heap, + // which could then wait for it in this extreme case...) + HeapRegion* to_fill; + + while (!g1->should_zf() + || (to_fill = g1->pop_unclean_region_list_locked()) == NULL) + ZF_mon->wait(Mutex::_no_safepoint_check_flag); + while (to_fill->zero_fill_state() == HeapRegion::ZeroFilling) + ZF_mon->wait(Mutex::_no_safepoint_check_flag); + + // So now to_fill is non-NULL and is not ZeroFilling. It might be + // Allocated or ZeroFilled. (The latter could happen if this thread + // starts the zero-filling of a region, but a GC intervenes and + // pushes new regions needing on the front of the filling on the + // front of the list.) + + switch (to_fill->zero_fill_state()) { + case HeapRegion::Allocated: + to_fill = NULL; + break; + + case HeapRegion::NotZeroFilled: + to_fill->set_zero_fill_in_progress(thr_self); + + ZF_mon->unlock(); + _sts.join(); + processHeapRegion(to_fill); + _sts.leave(); + ZF_mon->lock_without_safepoint_check(); + + if (to_fill->zero_fill_state() == HeapRegion::ZeroFilling + && to_fill->zero_filler() == thr_self) { + to_fill->set_zero_fill_complete(); + (void)g1->put_free_region_on_list_locked(to_fill); + } + break; + + case HeapRegion::ZeroFilled: + (void)g1->put_free_region_on_list_locked(to_fill); + break; + + case HeapRegion::ZeroFilling: + ShouldNotReachHere(); + break; + } + } + _vtime_accum = (os::elapsedVTime() - _vtime_start); + _sts.join(); + + _co_tracker.update(); + } + _co_tracker.update(false); + _sts.leave(); + + assert(_should_terminate, "just checking"); + terminate(); +} + +bool ConcurrentZFThread::offer_yield() { + if (_sts.should_yield()) { + _sts.yield("Concurrent ZF"); + return true; + } else { + return false; + } +} + +void ConcurrentZFThread::stop() { + // it is ok to take late safepoints here, if needed + MutexLockerEx mu(Terminator_lock); + _should_terminate = true; + while (!_has_terminated) { + Terminator_lock->wait(); + } +} + +void ConcurrentZFThread::print() { + gclog_or_tty->print("\"Concurrent ZF Thread\" "); + Thread::print(); + gclog_or_tty->cr(); +} + + +double ConcurrentZFThread::_vtime_accum; + +void ConcurrentZFThread::print_summary_info() { + gclog_or_tty->print("\nConcurrent Zero-Filling:\n"); + gclog_or_tty->print(" Filled %d regions, used %5.2fs.\n", + _regions_filled, + vtime_accum()); + gclog_or_tty->print(" Of %d region allocs, %d (%5.2f%%) required sync ZF,\n", + _region_allocs, _sync_zfs, + (_region_allocs > 0 ? + (float)_sync_zfs/(float)_region_allocs*100.0 : + 0.0)); + gclog_or_tty->print(" and %d (%5.2f%%) required a ZF wait.\n", + _zf_waits, + (_region_allocs > 0 ? + (float)_zf_waits/(float)_region_allocs*100.0 : + 0.0)); + +} diff --git a/hotspot/src/share/vm/gc_implementation/g1/concurrentZFThread.hpp b/hotspot/src/share/vm/gc_implementation/g1/concurrentZFThread.hpp new file mode 100644 index 00000000000..9a483dbce86 --- /dev/null +++ b/hotspot/src/share/vm/gc_implementation/g1/concurrentZFThread.hpp @@ -0,0 +1,85 @@ +/* + * Copyright 2001-2007 Sun Microsystems, Inc. All Rights Reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara, + * CA 95054 USA or visit www.sun.com if you need additional information or + * have any questions. + * + */ + +// The Concurrent ZF Thread. Performs concurrent zero-filling. + +class ConcurrentZFThread: public ConcurrentGCThread { + friend class VMStructs; + friend class ZeroFillRegionClosure; + + private: + + // Zero fill the heap region. + void processHeapRegion(HeapRegion* r); + + // Stats + // Allocation (protected by heap lock). + static int _region_allocs; // Number of regions allocated + static int _sync_zfs; // Synchronous zero-fills + + static int _zf_waits; // Wait for conc zero-fill completion. + + // Number of regions CFZ thread fills. + static int _regions_filled; + + COTracker _co_tracker; + + double _vtime_start; // Initial virtual time. + + // These are static because the "print_summary_info" method is, and + // it currently assumes there is only one ZF thread. We'll change when + // we need to. + static double _vtime_accum; // Initial virtual time. + static double vtime_accum() { return _vtime_accum; } + + // Offer yield for GC. Returns true if yield occurred. + bool offer_yield(); + + public: + // Constructor + ConcurrentZFThread(); + + // Main loop. + virtual void run(); + + // Printing + void print(); + + // Waits until "r" has been zero-filled. Requires caller to hold the + // ZF_mon. + static void wait_for_ZF_completed(HeapRegion* r); + + // Get or clear the current unclean region. Should be done + // while holding the ZF_needed_mon lock. + + // shutdown + static void stop(); + + // Stats + static void note_region_alloc() {_region_allocs++; } + static void note_sync_zfs() { _sync_zfs++; } + static void note_zf_wait() { _zf_waits++; } + static void note_region_filled() { _regions_filled++; } + + static void print_summary_info(); +}; diff --git a/hotspot/src/share/vm/gc_implementation/g1/dirtyCardQueue.cpp b/hotspot/src/share/vm/gc_implementation/g1/dirtyCardQueue.cpp new file mode 100644 index 00000000000..28159924298 --- /dev/null +++ b/hotspot/src/share/vm/gc_implementation/g1/dirtyCardQueue.cpp @@ -0,0 +1,308 @@ +/* + * Copyright 2001-2007 Sun Microsystems, Inc. All Rights Reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara, + * CA 95054 USA or visit www.sun.com if you need additional information or + * have any questions. + * + */ + +# include "incls/_precompiled.incl" +# include "incls/_dirtyCardQueue.cpp.incl" + +bool DirtyCardQueue::apply_closure(CardTableEntryClosure* cl, + bool consume, + size_t worker_i) { + bool res = true; + if (_buf != NULL) { + res = apply_closure_to_buffer(cl, _buf, _index, _sz, + consume, + (int) worker_i); + if (res && consume) _index = _sz; + } + return res; +} + +bool DirtyCardQueue::apply_closure_to_buffer(CardTableEntryClosure* cl, + void** buf, + size_t index, size_t sz, + bool consume, + int worker_i) { + if (cl == NULL) return true; + for (size_t i = index; i < sz; i += oopSize) { + int ind = byte_index_to_index((int)i); + jbyte* card_ptr = (jbyte*)buf[ind]; + if (card_ptr != NULL) { + // Set the entry to null, so we don't do it again (via the test + // above) if we reconsider this buffer. + if (consume) buf[ind] = NULL; + if (!cl->do_card_ptr(card_ptr, worker_i)) return false; + } + } + return true; +} + +#ifdef _MSC_VER // the use of 'this' below gets a warning, make it go away +#pragma warning( disable:4355 ) // 'this' : used in base member initializer list +#endif // _MSC_VER + +DirtyCardQueueSet::DirtyCardQueueSet() : + PtrQueueSet(true /*notify_when_complete*/), + _closure(NULL), + _shared_dirty_card_queue(this, true /*perm*/), + _free_ids(NULL), + _processed_buffers_mut(0), _processed_buffers_rs_thread(0) +{ + _all_active = true; +} + +size_t DirtyCardQueueSet::num_par_ids() { + return MAX2(ParallelGCThreads, (size_t)2); +} + + +void DirtyCardQueueSet::initialize(Monitor* cbl_mon, Mutex* fl_lock, + int max_completed_queue, + Mutex* lock) { + PtrQueueSet::initialize(cbl_mon, fl_lock, max_completed_queue); + set_buffer_size(DCQBarrierQueueBufferSize); + set_process_completed_threshold(DCQBarrierProcessCompletedThreshold); + + _shared_dirty_card_queue.set_lock(lock); + _free_ids = new FreeIdSet((int) num_par_ids(), _cbl_mon); + bool b = _free_ids->claim_perm_id(0); + guarantee(b, "Must reserve id zero for concurrent refinement thread."); +} + +void DirtyCardQueueSet::handle_zero_index_for_thread(JavaThread* t) { + t->dirty_card_queue().handle_zero_index(); +} + +void DirtyCardQueueSet::set_closure(CardTableEntryClosure* closure) { + _closure = closure; +} + +void DirtyCardQueueSet::iterate_closure_all_threads(bool consume, + size_t worker_i) { + assert(SafepointSynchronize::is_at_safepoint(), "Must be at safepoint."); + for(JavaThread* t = Threads::first(); t; t = t->next()) { + bool b = t->dirty_card_queue().apply_closure(_closure, consume); + guarantee(b, "Should not be interrupted."); + } + bool b = shared_dirty_card_queue()->apply_closure(_closure, + consume, + worker_i); + guarantee(b, "Should not be interrupted."); +} + +bool DirtyCardQueueSet::mut_process_buffer(void** buf) { + + // Used to determine if we had already claimed a par_id + // before entering this method. + bool already_claimed = false; + + // We grab the current JavaThread. + JavaThread* thread = JavaThread::current(); + + // We get the the number of any par_id that this thread + // might have already claimed. + int worker_i = thread->get_claimed_par_id(); + + // If worker_i is not -1 then the thread has already claimed + // a par_id. We make note of it using the already_claimed value + if (worker_i != -1) { + already_claimed = true; + } else { + + // Otherwise we need to claim a par id + worker_i = _free_ids->claim_par_id(); + + // And store the par_id value in the thread + thread->set_claimed_par_id(worker_i); + } + + bool b = false; + if (worker_i != -1) { + b = DirtyCardQueue::apply_closure_to_buffer(_closure, buf, 0, + _sz, true, worker_i); + if (b) Atomic::inc(&_processed_buffers_mut); + + // If we had not claimed an id before entering the method + // then we must release the id. + if (!already_claimed) { + + // we release the id + _free_ids->release_par_id(worker_i); + + // and set the claimed_id in the thread to -1 + thread->set_claimed_par_id(-1); + } + } + return b; +} + +DirtyCardQueueSet::CompletedBufferNode* +DirtyCardQueueSet::get_completed_buffer_lock(int stop_at) { + CompletedBufferNode* nd = NULL; + MutexLockerEx x(_cbl_mon, Mutex::_no_safepoint_check_flag); + + if ((int)_n_completed_buffers <= stop_at) { + _process_completed = false; + return NULL; + } + + if (_completed_buffers_head != NULL) { + nd = _completed_buffers_head; + _completed_buffers_head = nd->next; + if (_completed_buffers_head == NULL) + _completed_buffers_tail = NULL; + _n_completed_buffers--; + } + debug_only(assert_completed_buffer_list_len_correct_locked()); + return nd; +} + +// We only do this in contexts where there is no concurrent enqueueing. +DirtyCardQueueSet::CompletedBufferNode* +DirtyCardQueueSet::get_completed_buffer_CAS() { + CompletedBufferNode* nd = _completed_buffers_head; + + while (nd != NULL) { + CompletedBufferNode* next = nd->next; + CompletedBufferNode* result = + (CompletedBufferNode*)Atomic::cmpxchg_ptr(next, + &_completed_buffers_head, + nd); + if (result == nd) { + return result; + } else { + nd = _completed_buffers_head; + } + } + assert(_completed_buffers_head == NULL, "Loop post"); + _completed_buffers_tail = NULL; + return NULL; +} + +bool DirtyCardQueueSet:: +apply_closure_to_completed_buffer_helper(int worker_i, + CompletedBufferNode* nd) { + if (nd != NULL) { + bool b = + DirtyCardQueue::apply_closure_to_buffer(_closure, nd->buf, + nd->index, _sz, + true, worker_i); + void** buf = nd->buf; + size_t index = nd->index; + delete nd; + if (b) { + deallocate_buffer(buf); + return true; // In normal case, go on to next buffer. + } else { + enqueue_complete_buffer(buf, index, true); + return false; + } + } else { + return false; + } +} + +bool DirtyCardQueueSet::apply_closure_to_completed_buffer(int worker_i, + int stop_at, + bool with_CAS) +{ + CompletedBufferNode* nd = NULL; + if (with_CAS) { + guarantee(stop_at == 0, "Precondition"); + nd = get_completed_buffer_CAS(); + } else { + nd = get_completed_buffer_lock(stop_at); + } + bool res = apply_closure_to_completed_buffer_helper(worker_i, nd); + if (res) _processed_buffers_rs_thread++; + return res; +} + +void DirtyCardQueueSet::apply_closure_to_all_completed_buffers() { + CompletedBufferNode* nd = _completed_buffers_head; + while (nd != NULL) { + bool b = + DirtyCardQueue::apply_closure_to_buffer(_closure, nd->buf, 0, _sz, + false); + guarantee(b, "Should not stop early."); + nd = nd->next; + } +} + +void DirtyCardQueueSet::abandon_logs() { + assert(SafepointSynchronize::is_at_safepoint(), "Must be at safepoint."); + CompletedBufferNode* buffers_to_delete = NULL; + { + MutexLockerEx x(_cbl_mon, Mutex::_no_safepoint_check_flag); + while (_completed_buffers_head != NULL) { + CompletedBufferNode* nd = _completed_buffers_head; + _completed_buffers_head = nd->next; + nd->next = buffers_to_delete; + buffers_to_delete = nd; + } + _n_completed_buffers = 0; + _completed_buffers_tail = NULL; + debug_only(assert_completed_buffer_list_len_correct_locked()); + } + while (buffers_to_delete != NULL) { + CompletedBufferNode* nd = buffers_to_delete; + buffers_to_delete = nd->next; + deallocate_buffer(nd->buf); + delete nd; + } + // Since abandon is done only at safepoints, we can safely manipulate + // these queues. + for (JavaThread* t = Threads::first(); t; t = t->next()) { + t->dirty_card_queue().reset(); + } + shared_dirty_card_queue()->reset(); +} + + +void DirtyCardQueueSet::concatenate_logs() { + // Iterate over all the threads, if we find a partial log add it to + // the global list of logs. Temporarily turn off the limit on the number + // of outstanding buffers. + int save_max_completed_queue = _max_completed_queue; + _max_completed_queue = max_jint; + assert(SafepointSynchronize::is_at_safepoint(), "Must be at safepoint."); + for (JavaThread* t = Threads::first(); t; t = t->next()) { + DirtyCardQueue& dcq = t->dirty_card_queue(); + if (dcq.size() != 0) { + void **buf = t->dirty_card_queue().get_buf(); + // We must NULL out the unused entries, then enqueue. + for (size_t i = 0; i < t->dirty_card_queue().get_index(); i += oopSize) { + buf[PtrQueue::byte_index_to_index((int)i)] = NULL; + } + enqueue_complete_buffer(dcq.get_buf(), dcq.get_index()); + dcq.reinitialize(); + } + } + if (_shared_dirty_card_queue.size() != 0) { + enqueue_complete_buffer(_shared_dirty_card_queue.get_buf(), + _shared_dirty_card_queue.get_index()); + _shared_dirty_card_queue.reinitialize(); + } + // Restore the completed buffer queue limit. + _max_completed_queue = save_max_completed_queue; +} diff --git a/hotspot/src/share/vm/gc_implementation/g1/dirtyCardQueue.hpp b/hotspot/src/share/vm/gc_implementation/g1/dirtyCardQueue.hpp new file mode 100644 index 00000000000..86876fd949d --- /dev/null +++ b/hotspot/src/share/vm/gc_implementation/g1/dirtyCardQueue.hpp @@ -0,0 +1,152 @@ +/* + * Copyright 2001-2007 Sun Microsystems, Inc. All Rights Reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara, + * CA 95054 USA or visit www.sun.com if you need additional information or + * have any questions. + * + */ + +class FreeIdSet; + +// A closure class for processing card table entries. Note that we don't +// require these closure objects to be stack-allocated. +class CardTableEntryClosure: public CHeapObj { +public: + // Process the card whose card table entry is "card_ptr". If returns + // "false", terminate the iteration early. + virtual bool do_card_ptr(jbyte* card_ptr, int worker_i = 0) = 0; +}; + +// A ptrQueue whose elements are "oops", pointers to object heads. +class DirtyCardQueue: public PtrQueue { +public: + DirtyCardQueue(PtrQueueSet* qset_, bool perm = false) : + PtrQueue(qset_, perm) + { + // Dirty card queues are always active. + _active = true; + } + // Apply the closure to all elements, and reset the index to make the + // buffer empty. If a closure application returns "false", return + // "false" immediately, halting the iteration. If "consume" is true, + // deletes processed entries from logs. + bool apply_closure(CardTableEntryClosure* cl, + bool consume = true, + size_t worker_i = 0); + + // Apply the closure to all elements of "buf", down to "index" + // (inclusive.) If returns "false", then a closure application returned + // "false", and we return immediately. If "consume" is true, entries are + // set to NULL as they are processed, so they will not be processed again + // later. + static bool apply_closure_to_buffer(CardTableEntryClosure* cl, + void** buf, size_t index, size_t sz, + bool consume = true, + int worker_i = 0); + void **get_buf() { return _buf;} + void set_buf(void **buf) {_buf = buf;} + size_t get_index() { return _index;} + void reinitialize() { _buf = 0; _sz = 0; _index = 0;} +}; + + + +class DirtyCardQueueSet: public PtrQueueSet { + CardTableEntryClosure* _closure; + + DirtyCardQueue _shared_dirty_card_queue; + + // Override. + bool mut_process_buffer(void** buf); + + // Protected by the _cbl_mon. + FreeIdSet* _free_ids; + + // The number of completed buffers processed by mutator and rs thread, + // respectively. + jint _processed_buffers_mut; + jint _processed_buffers_rs_thread; + +public: + DirtyCardQueueSet(); + + void initialize(Monitor* cbl_mon, Mutex* fl_lock, + int max_completed_queue = 0, + Mutex* lock = NULL); + + // The number of parallel ids that can be claimed to allow collector or + // mutator threads to do card-processing work. + static size_t num_par_ids(); + + static void handle_zero_index_for_thread(JavaThread* t); + + // Register "blk" as "the closure" for all queues. Only one such closure + // is allowed. The "apply_closure_to_completed_buffer" method will apply + // this closure to a completed buffer, and "iterate_closure_all_threads" + // applies it to partially-filled buffers (the latter should only be done + // with the world stopped). + void set_closure(CardTableEntryClosure* closure); + + // If there is a registered closure for buffers, apply it to all entries + // in all currently-active buffers. This should only be applied at a + // safepoint. (Currently must not be called in parallel; this should + // change in the future.) If "consume" is true, processed entries are + // discarded. + void iterate_closure_all_threads(bool consume = true, + size_t worker_i = 0); + + // If there exists some completed buffer, pop it, then apply the + // registered closure to all its elements, nulling out those elements + // processed. If all elements are processed, returns "true". If no + // completed buffers exist, returns false. If a completed buffer exists, + // but is only partially completed before a "yield" happens, the + // partially completed buffer (with its processed elements set to NULL) + // is returned to the completed buffer set, and this call returns false. + bool apply_closure_to_completed_buffer(int worker_i = 0, + int stop_at = 0, + bool with_CAS = false); + bool apply_closure_to_completed_buffer_helper(int worker_i, + CompletedBufferNode* nd); + + CompletedBufferNode* get_completed_buffer_CAS(); + CompletedBufferNode* get_completed_buffer_lock(int stop_at); + // Applies the current closure to all completed buffers, + // non-consumptively. + void apply_closure_to_all_completed_buffers(); + + DirtyCardQueue* shared_dirty_card_queue() { + return &_shared_dirty_card_queue; + } + + // If a full collection is happening, reset partial logs, and ignore + // completed ones: the full collection will make them all irrelevant. + void abandon_logs(); + + // If any threads have partial logs, add them to the global list of logs. + void concatenate_logs(); + void clear_n_completed_buffers() { _n_completed_buffers = 0;} + + jint processed_buffers_mut() { + return _processed_buffers_mut; + } + jint processed_buffers_rs_thread() { + return _processed_buffers_rs_thread; + } + +}; diff --git a/hotspot/src/share/vm/gc_implementation/g1/g1BlockOffsetTable.cpp b/hotspot/src/share/vm/gc_implementation/g1/g1BlockOffsetTable.cpp new file mode 100644 index 00000000000..4d9d7f0ce6c --- /dev/null +++ b/hotspot/src/share/vm/gc_implementation/g1/g1BlockOffsetTable.cpp @@ -0,0 +1,628 @@ +/* + * Copyright 2001-2007 Sun Microsystems, Inc. All Rights Reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara, + * CA 95054 USA or visit www.sun.com if you need additional information or + * have any questions. + * + */ + +#include "incls/_precompiled.incl" +#include "incls/_g1BlockOffsetTable.cpp.incl" + +////////////////////////////////////////////////////////////////////// +// G1BlockOffsetSharedArray +////////////////////////////////////////////////////////////////////// + +G1BlockOffsetSharedArray::G1BlockOffsetSharedArray(MemRegion reserved, + size_t init_word_size) : + _reserved(reserved), _end(NULL) +{ + size_t size = compute_size(reserved.word_size()); + ReservedSpace rs(ReservedSpace::allocation_align_size_up(size)); + if (!rs.is_reserved()) { + vm_exit_during_initialization("Could not reserve enough space for heap offset array"); + } + if (!_vs.initialize(rs, 0)) { + vm_exit_during_initialization("Could not reserve enough space for heap offset array"); + } + _offset_array = (u_char*)_vs.low_boundary(); + resize(init_word_size); + if (TraceBlockOffsetTable) { + gclog_or_tty->print_cr("G1BlockOffsetSharedArray::G1BlockOffsetSharedArray: "); + gclog_or_tty->print_cr(" " + " rs.base(): " INTPTR_FORMAT + " rs.size(): " INTPTR_FORMAT + " rs end(): " INTPTR_FORMAT, + rs.base(), rs.size(), rs.base() + rs.size()); + gclog_or_tty->print_cr(" " + " _vs.low_boundary(): " INTPTR_FORMAT + " _vs.high_boundary(): " INTPTR_FORMAT, + _vs.low_boundary(), + _vs.high_boundary()); + } +} + +void G1BlockOffsetSharedArray::resize(size_t new_word_size) { + assert(new_word_size <= _reserved.word_size(), "Resize larger than reserved"); + size_t new_size = compute_size(new_word_size); + size_t old_size = _vs.committed_size(); + size_t delta; + char* high = _vs.high(); + _end = _reserved.start() + new_word_size; + if (new_size > old_size) { + delta = ReservedSpace::page_align_size_up(new_size - old_size); + assert(delta > 0, "just checking"); + if (!_vs.expand_by(delta)) { + // Do better than this for Merlin + vm_exit_out_of_memory(delta, "offset table expansion"); + } + assert(_vs.high() == high + delta, "invalid expansion"); + // Initialization of the contents is left to the + // G1BlockOffsetArray that uses it. + } else { + delta = ReservedSpace::page_align_size_down(old_size - new_size); + if (delta == 0) return; + _vs.shrink_by(delta); + assert(_vs.high() == high - delta, "invalid expansion"); + } +} + +bool G1BlockOffsetSharedArray::is_card_boundary(HeapWord* p) const { + assert(p >= _reserved.start(), "just checking"); + size_t delta = pointer_delta(p, _reserved.start()); + return (delta & right_n_bits(LogN_words)) == (size_t)NoBits; +} + + +////////////////////////////////////////////////////////////////////// +// G1BlockOffsetArray +////////////////////////////////////////////////////////////////////// + +G1BlockOffsetArray::G1BlockOffsetArray(G1BlockOffsetSharedArray* array, + MemRegion mr, bool init_to_zero) : + G1BlockOffsetTable(mr.start(), mr.end()), + _unallocated_block(_bottom), + _array(array), _csp(NULL), + _init_to_zero(init_to_zero) { + assert(_bottom <= _end, "arguments out of order"); + if (!_init_to_zero) { + // initialize cards to point back to mr.start() + set_remainder_to_point_to_start(mr.start() + N_words, mr.end()); + _array->set_offset_array(0, 0); // set first card to 0 + } +} + +void G1BlockOffsetArray::set_space(Space* sp) { + _sp = sp; + _csp = sp->toContiguousSpace(); +} + +// The arguments follow the normal convention of denoting +// a right-open interval: [start, end) +void +G1BlockOffsetArray:: set_remainder_to_point_to_start(HeapWord* start, HeapWord* end) { + + if (start >= end) { + // The start address is equal to the end address (or to + // the right of the end address) so there are not cards + // that need to be updated.. + return; + } + + // Write the backskip value for each region. + // + // offset + // card 2nd 3rd + // | +- 1st | | + // v v v v + // +-+-+-+-+-+-+-+-+-+-+-+-+-+-+ +-+-+-+-+-+-+-+-+-+-+- + // |x|0|0|0|0|0|0|0|1|1|1|1|1|1| ... |1|1|1|1|2|2|2|2|2|2| ... + // +-+-+-+-+-+-+-+-+-+-+-+-+-+-+ +-+-+-+-+-+-+-+-+-+-+- + // 11 19 75 + // 12 + // + // offset card is the card that points to the start of an object + // x - offset value of offset card + // 1st - start of first logarithmic region + // 0 corresponds to logarithmic value N_words + 0 and 2**(3 * 0) = 1 + // 2nd - start of second logarithmic region + // 1 corresponds to logarithmic value N_words + 1 and 2**(3 * 1) = 8 + // 3rd - start of third logarithmic region + // 2 corresponds to logarithmic value N_words + 2 and 2**(3 * 2) = 64 + // + // integer below the block offset entry is an example of + // the index of the entry + // + // Given an address, + // Find the index for the address + // Find the block offset table entry + // Convert the entry to a back slide + // (e.g., with today's, offset = 0x81 => + // back slip = 2**(3*(0x81 - N_words)) = 2**3) = 8 + // Move back N (e.g., 8) entries and repeat with the + // value of the new entry + // + size_t start_card = _array->index_for(start); + size_t end_card = _array->index_for(end-1); + assert(start ==_array->address_for_index(start_card), "Precondition"); + assert(end ==_array->address_for_index(end_card)+N_words, "Precondition"); + set_remainder_to_point_to_start_incl(start_card, end_card); // closed interval +} + +// Unlike the normal convention in this code, the argument here denotes +// a closed, inclusive interval: [start_card, end_card], cf set_remainder_to_point_to_start() +// above. +void +G1BlockOffsetArray::set_remainder_to_point_to_start_incl(size_t start_card, size_t end_card) { + if (start_card > end_card) { + return; + } + assert(start_card > _array->index_for(_bottom), "Cannot be first card"); + assert(_array->offset_array(start_card-1) <= N_words, + "Offset card has an unexpected value"); + size_t start_card_for_region = start_card; + u_char offset = max_jubyte; + for (int i = 0; i < BlockOffsetArray::N_powers; i++) { + // -1 so that the the card with the actual offset is counted. Another -1 + // so that the reach ends in this region and not at the start + // of the next. + size_t reach = start_card - 1 + (BlockOffsetArray::power_to_cards_back(i+1) - 1); + offset = N_words + i; + if (reach >= end_card) { + _array->set_offset_array(start_card_for_region, end_card, offset); + start_card_for_region = reach + 1; + break; + } + _array->set_offset_array(start_card_for_region, reach, offset); + start_card_for_region = reach + 1; + } + assert(start_card_for_region > end_card, "Sanity check"); + DEBUG_ONLY(check_all_cards(start_card, end_card);) +} + +// The block [blk_start, blk_end) has been allocated; +// adjust the block offset table to represent this information; +// right-open interval: [blk_start, blk_end) +void +G1BlockOffsetArray::alloc_block(HeapWord* blk_start, HeapWord* blk_end) { + mark_block(blk_start, blk_end); + allocated(blk_start, blk_end); +} + +// Adjust BOT to show that a previously whole block has been split +// into two. +void G1BlockOffsetArray::split_block(HeapWord* blk, size_t blk_size, + size_t left_blk_size) { + // Verify that the BOT shows [blk, blk + blk_size) to be one block. + verify_single_block(blk, blk_size); + // Update the BOT to indicate that [blk + left_blk_size, blk + blk_size) + // is one single block. + mark_block(blk + left_blk_size, blk + blk_size); +} + + +// Action_mark - update the BOT for the block [blk_start, blk_end). +// Current typical use is for splitting a block. +// Action_single - udpate the BOT for an allocation. +// Action_verify - BOT verification. +void G1BlockOffsetArray::do_block_internal(HeapWord* blk_start, + HeapWord* blk_end, + Action action) { + assert(Universe::heap()->is_in_reserved(blk_start), + "reference must be into the heap"); + assert(Universe::heap()->is_in_reserved(blk_end-1), + "limit must be within the heap"); + // This is optimized to make the test fast, assuming we only rarely + // cross boundaries. + uintptr_t end_ui = (uintptr_t)(blk_end - 1); + uintptr_t start_ui = (uintptr_t)blk_start; + // Calculate the last card boundary preceding end of blk + intptr_t boundary_before_end = (intptr_t)end_ui; + clear_bits(boundary_before_end, right_n_bits(LogN)); + if (start_ui <= (uintptr_t)boundary_before_end) { + // blk starts at or crosses a boundary + // Calculate index of card on which blk begins + size_t start_index = _array->index_for(blk_start); + // Index of card on which blk ends + size_t end_index = _array->index_for(blk_end - 1); + // Start address of card on which blk begins + HeapWord* boundary = _array->address_for_index(start_index); + assert(boundary <= blk_start, "blk should start at or after boundary"); + if (blk_start != boundary) { + // blk starts strictly after boundary + // adjust card boundary and start_index forward to next card + boundary += N_words; + start_index++; + } + assert(start_index <= end_index, "monotonicity of index_for()"); + assert(boundary <= (HeapWord*)boundary_before_end, "tautology"); + switch (action) { + case Action_mark: { + if (init_to_zero()) { + _array->set_offset_array(start_index, boundary, blk_start); + break; + } // Else fall through to the next case + } + case Action_single: { + _array->set_offset_array(start_index, boundary, blk_start); + // We have finished marking the "offset card". We need to now + // mark the subsequent cards that this blk spans. + if (start_index < end_index) { + HeapWord* rem_st = _array->address_for_index(start_index) + N_words; + HeapWord* rem_end = _array->address_for_index(end_index) + N_words; + set_remainder_to_point_to_start(rem_st, rem_end); + } + break; + } + case Action_check: { + _array->check_offset_array(start_index, boundary, blk_start); + // We have finished checking the "offset card". We need to now + // check the subsequent cards that this blk spans. + check_all_cards(start_index + 1, end_index); + break; + } + default: + ShouldNotReachHere(); + } + } +} + +// The card-interval [start_card, end_card] is a closed interval; this +// is an expensive check -- use with care and only under protection of +// suitable flag. +void G1BlockOffsetArray::check_all_cards(size_t start_card, size_t end_card) const { + + if (end_card < start_card) { + return; + } + guarantee(_array->offset_array(start_card) == N_words, "Wrong value in second card"); + for (size_t c = start_card + 1; c <= end_card; c++ /* yeah! */) { + u_char entry = _array->offset_array(c); + if (c - start_card > BlockOffsetArray::power_to_cards_back(1)) { + guarantee(entry > N_words, "Should be in logarithmic region"); + } + size_t backskip = BlockOffsetArray::entry_to_cards_back(entry); + size_t landing_card = c - backskip; + guarantee(landing_card >= (start_card - 1), "Inv"); + if (landing_card >= start_card) { + guarantee(_array->offset_array(landing_card) <= entry, "monotonicity"); + } else { + guarantee(landing_card == start_card - 1, "Tautology"); + guarantee(_array->offset_array(landing_card) <= N_words, "Offset value"); + } + } +} + +// The range [blk_start, blk_end) represents a single contiguous block +// of storage; modify the block offset table to represent this +// information; Right-open interval: [blk_start, blk_end) +// NOTE: this method does _not_ adjust _unallocated_block. +void +G1BlockOffsetArray::single_block(HeapWord* blk_start, HeapWord* blk_end) { + do_block_internal(blk_start, blk_end, Action_single); +} + +// Mark the BOT such that if [blk_start, blk_end) straddles a card +// boundary, the card following the first such boundary is marked +// with the appropriate offset. +// NOTE: this method does _not_ adjust _unallocated_block or +// any cards subsequent to the first one. +void +G1BlockOffsetArray::mark_block(HeapWord* blk_start, HeapWord* blk_end) { + do_block_internal(blk_start, blk_end, Action_mark); +} + +void G1BlockOffsetArray::join_blocks(HeapWord* blk1, HeapWord* blk2) { + HeapWord* blk1_start = Universe::heap()->block_start(blk1); + HeapWord* blk2_start = Universe::heap()->block_start(blk2); + assert(blk1 == blk1_start && blk2 == blk2_start, + "Must be block starts."); + assert(blk1 + _sp->block_size(blk1) == blk2, "Must be contiguous."); + size_t blk1_start_index = _array->index_for(blk1); + size_t blk2_start_index = _array->index_for(blk2); + assert(blk1_start_index <= blk2_start_index, "sanity"); + HeapWord* blk2_card_start = _array->address_for_index(blk2_start_index); + if (blk2 == blk2_card_start) { + // blk2 starts a card. Does blk1 start on the prevous card, or futher + // back? + assert(blk1_start_index < blk2_start_index, "must be lower card."); + if (blk1_start_index + 1 == blk2_start_index) { + // previous card; new value for blk2 card is size of blk1. + _array->set_offset_array(blk2_start_index, (u_char) _sp->block_size(blk1)); + } else { + // Earlier card; go back a card. + _array->set_offset_array(blk2_start_index, N_words); + } + } else { + // blk2 does not start a card. Does it cross a card? If not, nothing + // to do. + size_t blk2_end_index = + _array->index_for(blk2 + _sp->block_size(blk2) - 1); + assert(blk2_end_index >= blk2_start_index, "sanity"); + if (blk2_end_index > blk2_start_index) { + // Yes, it crosses a card. The value for the next card must change. + if (blk1_start_index + 1 == blk2_start_index) { + // previous card; new value for second blk2 card is size of blk1. + _array->set_offset_array(blk2_start_index + 1, + (u_char) _sp->block_size(blk1)); + } else { + // Earlier card; go back a card. + _array->set_offset_array(blk2_start_index + 1, N_words); + } + } + } +} + +HeapWord* G1BlockOffsetArray::block_start_unsafe(const void* addr) { + assert(_bottom <= addr && addr < _end, + "addr must be covered by this Array"); + // Must read this exactly once because it can be modified by parallel + // allocation. + HeapWord* ub = _unallocated_block; + if (BlockOffsetArrayUseUnallocatedBlock && addr >= ub) { + assert(ub < _end, "tautology (see above)"); + return ub; + } + // Otherwise, find the block start using the table. + HeapWord* q = block_at_or_preceding(addr, false, 0); + return forward_to_block_containing_addr(q, addr); +} + +// This duplicates a little code from the above: unavoidable. +HeapWord* +G1BlockOffsetArray::block_start_unsafe_const(const void* addr) const { + assert(_bottom <= addr && addr < _end, + "addr must be covered by this Array"); + // Must read this exactly once because it can be modified by parallel + // allocation. + HeapWord* ub = _unallocated_block; + if (BlockOffsetArrayUseUnallocatedBlock && addr >= ub) { + assert(ub < _end, "tautology (see above)"); + return ub; + } + // Otherwise, find the block start using the table. + HeapWord* q = block_at_or_preceding(addr, false, 0); + HeapWord* n = q + _sp->block_size(q); + return forward_to_block_containing_addr_const(q, n, addr); +} + + +HeapWord* +G1BlockOffsetArray::forward_to_block_containing_addr_slow(HeapWord* q, + HeapWord* n, + const void* addr) { + // We're not in the normal case. We need to handle an important subcase + // here: LAB allocation. An allocation previously recorded in the + // offset table was actually a lab allocation, and was divided into + // several objects subsequently. Fix this situation as we answer the + // query, by updating entries as we cross them. + + // If the fist object's end q is at the card boundary. Start refining + // with the corresponding card (the value of the entry will be basically + // set to 0). If the object crosses the boundary -- start from the next card. + size_t next_index = _array->index_for(n) + !_array->is_card_boundary(n); + HeapWord* next_boundary = _array->address_for_index(next_index); + if (csp() != NULL) { + if (addr >= csp()->top()) return csp()->top(); + while (next_boundary < addr) { + while (n <= next_boundary) { + q = n; + oop obj = oop(q); + if (obj->klass() == NULL) return q; + n += obj->size(); + } + assert(q <= next_boundary && n > next_boundary, "Consequence of loop"); + // [q, n) is the block that crosses the boundary. + alloc_block_work2(&next_boundary, &next_index, q, n); + } + } else { + while (next_boundary < addr) { + while (n <= next_boundary) { + q = n; + oop obj = oop(q); + if (obj->klass() == NULL) return q; + n += _sp->block_size(q); + } + assert(q <= next_boundary && n > next_boundary, "Consequence of loop"); + // [q, n) is the block that crosses the boundary. + alloc_block_work2(&next_boundary, &next_index, q, n); + } + } + return forward_to_block_containing_addr_const(q, n, addr); +} + +HeapWord* G1BlockOffsetArray::block_start_careful(const void* addr) const { + assert(_array->offset_array(0) == 0, "objects can't cross covered areas"); + + assert(_bottom <= addr && addr < _end, + "addr must be covered by this Array"); + // Must read this exactly once because it can be modified by parallel + // allocation. + HeapWord* ub = _unallocated_block; + if (BlockOffsetArrayUseUnallocatedBlock && addr >= ub) { + assert(ub < _end, "tautology (see above)"); + return ub; + } + + // Otherwise, find the block start using the table, but taking + // care (cf block_start_unsafe() above) not to parse any objects/blocks + // on the cards themsleves. + size_t index = _array->index_for(addr); + assert(_array->address_for_index(index) == addr, + "arg should be start of card"); + + HeapWord* q = (HeapWord*)addr; + uint offset; + do { + offset = _array->offset_array(index--); + q -= offset; + } while (offset == N_words); + assert(q <= addr, "block start should be to left of arg"); + return q; +} + +// Note that the committed size of the covered space may have changed, +// so the table size might also wish to change. +void G1BlockOffsetArray::resize(size_t new_word_size) { + HeapWord* new_end = _bottom + new_word_size; + if (_end < new_end && !init_to_zero()) { + // verify that the old and new boundaries are also card boundaries + assert(_array->is_card_boundary(_end), + "_end not a card boundary"); + assert(_array->is_card_boundary(new_end), + "new _end would not be a card boundary"); + // set all the newly added cards + _array->set_offset_array(_end, new_end, N_words); + } + _end = new_end; // update _end +} + +void G1BlockOffsetArray::set_region(MemRegion mr) { + _bottom = mr.start(); + _end = mr.end(); +} + +// +// threshold_ +// | _index_ +// v v +// +-------+-------+-------+-------+-------+ +// | i-1 | i | i+1 | i+2 | i+3 | +// +-------+-------+-------+-------+-------+ +// ( ^ ] +// block-start +// +void G1BlockOffsetArray::alloc_block_work2(HeapWord** threshold_, size_t* index_, + HeapWord* blk_start, HeapWord* blk_end) { + // For efficiency, do copy-in/copy-out. + HeapWord* threshold = *threshold_; + size_t index = *index_; + + assert(blk_start != NULL && blk_end > blk_start, + "phantom block"); + assert(blk_end > threshold, "should be past threshold"); + assert(blk_start <= threshold, "blk_start should be at or before threshold") + assert(pointer_delta(threshold, blk_start) <= N_words, + "offset should be <= BlockOffsetSharedArray::N"); + assert(Universe::heap()->is_in_reserved(blk_start), + "reference must be into the heap"); + assert(Universe::heap()->is_in_reserved(blk_end-1), + "limit must be within the heap"); + assert(threshold == _array->_reserved.start() + index*N_words, + "index must agree with threshold"); + + DEBUG_ONLY(size_t orig_index = index;) + + // Mark the card that holds the offset into the block. Note + // that _next_offset_index and _next_offset_threshold are not + // updated until the end of this method. + _array->set_offset_array(index, threshold, blk_start); + + // We need to now mark the subsequent cards that this blk spans. + + // Index of card on which blk ends. + size_t end_index = _array->index_for(blk_end - 1); + + // Are there more cards left to be updated? + if (index + 1 <= end_index) { + HeapWord* rem_st = _array->address_for_index(index + 1); + // Calculate rem_end this way because end_index + // may be the last valid index in the covered region. + HeapWord* rem_end = _array->address_for_index(end_index) + N_words; + set_remainder_to_point_to_start(rem_st, rem_end); + } + + index = end_index + 1; + // Calculate threshold_ this way because end_index + // may be the last valid index in the covered region. + threshold = _array->address_for_index(end_index) + N_words; + assert(threshold >= blk_end, "Incorrect offset threshold"); + + // index_ and threshold_ updated here. + *threshold_ = threshold; + *index_ = index; + +#ifdef ASSERT + // The offset can be 0 if the block starts on a boundary. That + // is checked by an assertion above. + size_t start_index = _array->index_for(blk_start); + HeapWord* boundary = _array->address_for_index(start_index); + assert((_array->offset_array(orig_index) == 0 && + blk_start == boundary) || + (_array->offset_array(orig_index) > 0 && + _array->offset_array(orig_index) <= N_words), + "offset array should have been set"); + for (size_t j = orig_index + 1; j <= end_index; j++) { + assert(_array->offset_array(j) > 0 && + _array->offset_array(j) <= + (u_char) (N_words+BlockOffsetArray::N_powers-1), + "offset array should have been set"); + } +#endif +} + +////////////////////////////////////////////////////////////////////// +// G1BlockOffsetArrayContigSpace +////////////////////////////////////////////////////////////////////// + +HeapWord* +G1BlockOffsetArrayContigSpace::block_start_unsafe(const void* addr) { + assert(_bottom <= addr && addr < _end, + "addr must be covered by this Array"); + HeapWord* q = block_at_or_preceding(addr, true, _next_offset_index-1); + return forward_to_block_containing_addr(q, addr); +} + +HeapWord* +G1BlockOffsetArrayContigSpace:: +block_start_unsafe_const(const void* addr) const { + assert(_bottom <= addr && addr < _end, + "addr must be covered by this Array"); + HeapWord* q = block_at_or_preceding(addr, true, _next_offset_index-1); + HeapWord* n = q + _sp->block_size(q); + return forward_to_block_containing_addr_const(q, n, addr); +} + +G1BlockOffsetArrayContigSpace:: +G1BlockOffsetArrayContigSpace(G1BlockOffsetSharedArray* array, + MemRegion mr) : + G1BlockOffsetArray(array, mr, true) +{ + _next_offset_threshold = NULL; + _next_offset_index = 0; +} + +HeapWord* G1BlockOffsetArrayContigSpace::initialize_threshold() { + assert(!Universe::heap()->is_in_reserved(_array->_offset_array), + "just checking"); + _next_offset_index = _array->index_for(_bottom); + _next_offset_index++; + _next_offset_threshold = + _array->address_for_index(_next_offset_index); + return _next_offset_threshold; +} + +void G1BlockOffsetArrayContigSpace::zero_bottom_entry() { + assert(!Universe::heap()->is_in_reserved(_array->_offset_array), + "just checking"); + size_t bottom_index = _array->index_for(_bottom); + assert(_array->address_for_index(bottom_index) == _bottom, + "Precondition of call"); + _array->set_offset_array(bottom_index, 0); +} diff --git a/hotspot/src/share/vm/gc_implementation/g1/g1BlockOffsetTable.hpp b/hotspot/src/share/vm/gc_implementation/g1/g1BlockOffsetTable.hpp new file mode 100644 index 00000000000..8c72f768933 --- /dev/null +++ b/hotspot/src/share/vm/gc_implementation/g1/g1BlockOffsetTable.hpp @@ -0,0 +1,487 @@ +/* + * Copyright 2001-2007 Sun Microsystems, Inc. All Rights Reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara, + * CA 95054 USA or visit www.sun.com if you need additional information or + * have any questions. + * + */ + +// The CollectedHeap type requires subtypes to implement a method +// "block_start". For some subtypes, notably generational +// systems using card-table-based write barriers, the efficiency of this +// operation may be important. Implementations of the "BlockOffsetArray" +// class may be useful in providing such efficient implementations. +// +// While generally mirroring the structure of the BOT for GenCollectedHeap, +// the following types are tailored more towards G1's uses; these should, +// however, be merged back into a common BOT to avoid code duplication +// and reduce maintenance overhead. +// +// G1BlockOffsetTable (abstract) +// -- G1BlockOffsetArray (uses G1BlockOffsetSharedArray) +// -- G1BlockOffsetArrayContigSpace +// +// A main impediment to the consolidation of this code might be the +// effect of making some of the block_start*() calls non-const as +// below. Whether that might adversely affect performance optimizations +// that compilers might normally perform in the case of non-G1 +// collectors needs to be carefully investigated prior to any such +// consolidation. + +// Forward declarations +class ContiguousSpace; +class G1BlockOffsetSharedArray; + +class G1BlockOffsetTable VALUE_OBJ_CLASS_SPEC { + friend class VMStructs; +protected: + // These members describe the region covered by the table. + + // The space this table is covering. + HeapWord* _bottom; // == reserved.start + HeapWord* _end; // End of currently allocated region. + +public: + // Initialize the table to cover the given space. + // The contents of the initial table are undefined. + G1BlockOffsetTable(HeapWord* bottom, HeapWord* end) : + _bottom(bottom), _end(end) + { + assert(_bottom <= _end, "arguments out of order"); + } + + // Note that the committed size of the covered space may have changed, + // so the table size might also wish to change. + virtual void resize(size_t new_word_size) = 0; + + virtual void set_bottom(HeapWord* new_bottom) { + assert(new_bottom <= _end, "new_bottom > _end"); + _bottom = new_bottom; + resize(pointer_delta(_end, _bottom)); + } + + // Requires "addr" to be contained by a block, and returns the address of + // the start of that block. (May have side effects, namely updating of + // shared array entries that "point" too far backwards. This can occur, + // for example, when LAB allocation is used in a space covered by the + // table.) + virtual HeapWord* block_start_unsafe(const void* addr) = 0; + // Same as above, but does not have any of the possible side effects + // discussed above. + virtual HeapWord* block_start_unsafe_const(const void* addr) const = 0; + + // Returns the address of the start of the block containing "addr", or + // else "null" if it is covered by no block. (May have side effects, + // namely updating of shared array entries that "point" too far + // backwards. This can occur, for example, when lab allocation is used + // in a space covered by the table.) + inline HeapWord* block_start(const void* addr); + // Same as above, but does not have any of the possible side effects + // discussed above. + inline HeapWord* block_start_const(const void* addr) const; +}; + +// This implementation of "G1BlockOffsetTable" divides the covered region +// into "N"-word subregions (where "N" = 2^"LogN". An array with an entry +// for each such subregion indicates how far back one must go to find the +// start of the chunk that includes the first word of the subregion. +// +// Each BlockOffsetArray is owned by a Space. However, the actual array +// may be shared by several BlockOffsetArrays; this is useful +// when a single resizable area (such as a generation) is divided up into +// several spaces in which contiguous allocation takes place, +// such as, for example, in G1 or in the train generation.) + +// Here is the shared array type. + +class G1BlockOffsetSharedArray: public CHeapObj { + friend class G1BlockOffsetArray; + friend class G1BlockOffsetArrayContigSpace; + friend class VMStructs; + +private: + // The reserved region covered by the shared array. + MemRegion _reserved; + + // End of the current committed region. + HeapWord* _end; + + // Array for keeping offsets for retrieving object start fast given an + // address. + VirtualSpace _vs; + u_char* _offset_array; // byte array keeping backwards offsets + + // Bounds checking accessors: + // For performance these have to devolve to array accesses in product builds. + u_char offset_array(size_t index) const { + assert(index < _vs.committed_size(), "index out of range"); + return _offset_array[index]; + } + + void set_offset_array(size_t index, u_char offset) { + assert(index < _vs.committed_size(), "index out of range"); + assert(offset <= N_words, "offset too large"); + _offset_array[index] = offset; + } + + void set_offset_array(size_t index, HeapWord* high, HeapWord* low) { + assert(index < _vs.committed_size(), "index out of range"); + assert(high >= low, "addresses out of order"); + assert(pointer_delta(high, low) <= N_words, "offset too large"); + _offset_array[index] = (u_char) pointer_delta(high, low); + } + + void set_offset_array(HeapWord* left, HeapWord* right, u_char offset) { + assert(index_for(right - 1) < _vs.committed_size(), + "right address out of range"); + assert(left < right, "Heap addresses out of order"); + size_t num_cards = pointer_delta(right, left) >> LogN_words; + memset(&_offset_array[index_for(left)], offset, num_cards); + } + + void set_offset_array(size_t left, size_t right, u_char offset) { + assert(right < _vs.committed_size(), "right address out of range"); + assert(left <= right, "indexes out of order"); + size_t num_cards = right - left + 1; + memset(&_offset_array[left], offset, num_cards); + } + + void check_offset_array(size_t index, HeapWord* high, HeapWord* low) const { + assert(index < _vs.committed_size(), "index out of range"); + assert(high >= low, "addresses out of order"); + assert(pointer_delta(high, low) <= N_words, "offset too large"); + assert(_offset_array[index] == pointer_delta(high, low), + "Wrong offset"); + } + + bool is_card_boundary(HeapWord* p) const; + + // Return the number of slots needed for an offset array + // that covers mem_region_words words. + // We always add an extra slot because if an object + // ends on a card boundary we put a 0 in the next + // offset array slot, so we want that slot always + // to be reserved. + + size_t compute_size(size_t mem_region_words) { + size_t number_of_slots = (mem_region_words / N_words) + 1; + return ReservedSpace::page_align_size_up(number_of_slots); + } + +public: + enum SomePublicConstants { + LogN = 9, + LogN_words = LogN - LogHeapWordSize, + N_bytes = 1 << LogN, + N_words = 1 << LogN_words + }; + + // Initialize the table to cover from "base" to (at least) + // "base + init_word_size". In the future, the table may be expanded + // (see "resize" below) up to the size of "_reserved" (which must be at + // least "init_word_size".) The contents of the initial table are + // undefined; it is the responsibility of the constituent + // G1BlockOffsetTable(s) to initialize cards. + G1BlockOffsetSharedArray(MemRegion reserved, size_t init_word_size); + + // Notes a change in the committed size of the region covered by the + // table. The "new_word_size" may not be larger than the size of the + // reserved region this table covers. + void resize(size_t new_word_size); + + void set_bottom(HeapWord* new_bottom); + + // Updates all the BlockOffsetArray's sharing this shared array to + // reflect the current "top"'s of their spaces. + void update_offset_arrays(); + + // Return the appropriate index into "_offset_array" for "p". + inline size_t index_for(const void* p) const; + + // Return the address indicating the start of the region corresponding to + // "index" in "_offset_array". + inline HeapWord* address_for_index(size_t index) const; +}; + +// And here is the G1BlockOffsetTable subtype that uses the array. + +class G1BlockOffsetArray: public G1BlockOffsetTable { + friend class G1BlockOffsetSharedArray; + friend class G1BlockOffsetArrayContigSpace; + friend class VMStructs; +private: + enum SomePrivateConstants { + N_words = G1BlockOffsetSharedArray::N_words, + LogN = G1BlockOffsetSharedArray::LogN + }; + + // The following enums are used by do_block_helper + enum Action { + Action_single, // BOT records a single block (see single_block()) + Action_mark, // BOT marks the start of a block (see mark_block()) + Action_check // Check that BOT records block correctly + // (see verify_single_block()). + }; + + // This is the array, which can be shared by several BlockOffsetArray's + // servicing different + G1BlockOffsetSharedArray* _array; + + // The space that owns this subregion. + Space* _sp; + + // If "_sp" is a contiguous space, the field below is the view of "_sp" + // as a contiguous space, else NULL. + ContiguousSpace* _csp; + + // If true, array entries are initialized to 0; otherwise, they are + // initialized to point backwards to the beginning of the covered region. + bool _init_to_zero; + + // The portion [_unallocated_block, _sp.end()) of the space that + // is a single block known not to contain any objects. + // NOTE: See BlockOffsetArrayUseUnallocatedBlock flag. + HeapWord* _unallocated_block; + + // Sets the entries + // corresponding to the cards starting at "start" and ending at "end" + // to point back to the card before "start": the interval [start, end) + // is right-open. + void set_remainder_to_point_to_start(HeapWord* start, HeapWord* end); + // Same as above, except that the args here are a card _index_ interval + // that is closed: [start_index, end_index] + void set_remainder_to_point_to_start_incl(size_t start, size_t end); + + // A helper function for BOT adjustment/verification work + void do_block_internal(HeapWord* blk_start, HeapWord* blk_end, Action action); + +protected: + + ContiguousSpace* csp() const { return _csp; } + + // Returns the address of a block whose start is at most "addr". + // If "has_max_index" is true, "assumes "max_index" is the last valid one + // in the array. + inline HeapWord* block_at_or_preceding(const void* addr, + bool has_max_index, + size_t max_index) const; + + // "q" is a block boundary that is <= "addr"; "n" is the address of the + // next block (or the end of the space.) Return the address of the + // beginning of the block that contains "addr". Does so without side + // effects (see, e.g., spec of block_start.) + inline HeapWord* + forward_to_block_containing_addr_const(HeapWord* q, HeapWord* n, + const void* addr) const; + + // "q" is a block boundary that is <= "addr"; return the address of the + // beginning of the block that contains "addr". May have side effects + // on "this", by updating imprecise entries. + inline HeapWord* forward_to_block_containing_addr(HeapWord* q, + const void* addr); + + // "q" is a block boundary that is <= "addr"; "n" is the address of the + // next block (or the end of the space.) Return the address of the + // beginning of the block that contains "addr". May have side effects + // on "this", by updating imprecise entries. + HeapWord* forward_to_block_containing_addr_slow(HeapWord* q, + HeapWord* n, + const void* addr); + + // Requires that "*threshold_" be the first array entry boundary at or + // above "blk_start", and that "*index_" be the corresponding array + // index. If the block starts at or crosses "*threshold_", records + // "blk_start" as the appropriate block start for the array index + // starting at "*threshold_", and for any other indices crossed by the + // block. Updates "*threshold_" and "*index_" to correspond to the first + // index after the block end. + void alloc_block_work2(HeapWord** threshold_, size_t* index_, + HeapWord* blk_start, HeapWord* blk_end); + +public: + // The space may not have it's bottom and top set yet, which is why the + // region is passed as a parameter. If "init_to_zero" is true, the + // elements of the array are initialized to zero. Otherwise, they are + // initialized to point backwards to the beginning. + G1BlockOffsetArray(G1BlockOffsetSharedArray* array, MemRegion mr, + bool init_to_zero); + + // Note: this ought to be part of the constructor, but that would require + // "this" to be passed as a parameter to a member constructor for + // the containing concrete subtype of Space. + // This would be legal C++, but MS VC++ doesn't allow it. + void set_space(Space* sp); + + // Resets the covered region to the given "mr". + void set_region(MemRegion mr); + + // Resets the covered region to one with the same _bottom as before but + // the "new_word_size". + void resize(size_t new_word_size); + + // These must be guaranteed to work properly (i.e., do nothing) + // when "blk_start" ("blk" for second version) is "NULL". + virtual void alloc_block(HeapWord* blk_start, HeapWord* blk_end); + virtual void alloc_block(HeapWord* blk, size_t size) { + alloc_block(blk, blk + size); + } + + // The following methods are useful and optimized for a + // general, non-contiguous space. + + // The given arguments are required to be the starts of adjacent ("blk1" + // before "blk2") well-formed blocks covered by "this". After this call, + // they should be considered to form one block. + virtual void join_blocks(HeapWord* blk1, HeapWord* blk2); + + // Given a block [blk_start, blk_start + full_blk_size), and + // a left_blk_size < full_blk_size, adjust the BOT to show two + // blocks [blk_start, blk_start + left_blk_size) and + // [blk_start + left_blk_size, blk_start + full_blk_size). + // It is assumed (and verified in the non-product VM) that the + // BOT was correct for the original block. + void split_block(HeapWord* blk_start, size_t full_blk_size, + size_t left_blk_size); + + // Adjust the BOT to show that it has a single block in the + // range [blk_start, blk_start + size). All necessary BOT + // cards are adjusted, but _unallocated_block isn't. + void single_block(HeapWord* blk_start, HeapWord* blk_end); + void single_block(HeapWord* blk, size_t size) { + single_block(blk, blk + size); + } + + // Adjust BOT to show that it has a block in the range + // [blk_start, blk_start + size). Only the first card + // of BOT is touched. It is assumed (and verified in the + // non-product VM) that the remaining cards of the block + // are correct. + void mark_block(HeapWord* blk_start, HeapWord* blk_end); + void mark_block(HeapWord* blk, size_t size) { + mark_block(blk, blk + size); + } + + // Adjust _unallocated_block to indicate that a particular + // block has been newly allocated or freed. It is assumed (and + // verified in the non-product VM) that the BOT is correct for + // the given block. + inline void allocated(HeapWord* blk_start, HeapWord* blk_end) { + // Verify that the BOT shows [blk, blk + blk_size) to be one block. + verify_single_block(blk_start, blk_end); + if (BlockOffsetArrayUseUnallocatedBlock) { + _unallocated_block = MAX2(_unallocated_block, blk_end); + } + } + + inline void allocated(HeapWord* blk, size_t size) { + allocated(blk, blk + size); + } + + inline void freed(HeapWord* blk_start, HeapWord* blk_end); + + inline void freed(HeapWord* blk, size_t size); + + virtual HeapWord* block_start_unsafe(const void* addr); + virtual HeapWord* block_start_unsafe_const(const void* addr) const; + + // Requires "addr" to be the start of a card and returns the + // start of the block that contains the given address. + HeapWord* block_start_careful(const void* addr) const; + + // If true, initialize array slots with no allocated blocks to zero. + // Otherwise, make them point back to the front. + bool init_to_zero() { return _init_to_zero; } + + // Verification & debugging - ensure that the offset table reflects the fact + // that the block [blk_start, blk_end) or [blk, blk + size) is a + // single block of storage. NOTE: can;t const this because of + // call to non-const do_block_internal() below. + inline void verify_single_block(HeapWord* blk_start, HeapWord* blk_end) { + if (VerifyBlockOffsetArray) { + do_block_internal(blk_start, blk_end, Action_check); + } + } + + inline void verify_single_block(HeapWord* blk, size_t size) { + verify_single_block(blk, blk + size); + } + + // Verify that the given block is before _unallocated_block + inline void verify_not_unallocated(HeapWord* blk_start, + HeapWord* blk_end) const { + if (BlockOffsetArrayUseUnallocatedBlock) { + assert(blk_start < blk_end, "Block inconsistency?"); + assert(blk_end <= _unallocated_block, "_unallocated_block problem"); + } + } + + inline void verify_not_unallocated(HeapWord* blk, size_t size) const { + verify_not_unallocated(blk, blk + size); + } + + void check_all_cards(size_t left_card, size_t right_card) const; +}; + +// A subtype of BlockOffsetArray that takes advantage of the fact +// that its underlying space is a ContiguousSpace, so that its "active" +// region can be more efficiently tracked (than for a non-contiguous space). +class G1BlockOffsetArrayContigSpace: public G1BlockOffsetArray { + friend class VMStructs; + + // allocation boundary at which offset array must be updated + HeapWord* _next_offset_threshold; + size_t _next_offset_index; // index corresponding to that boundary + + // Work function to be called when allocation start crosses the next + // threshold in the contig space. + void alloc_block_work1(HeapWord* blk_start, HeapWord* blk_end) { + alloc_block_work2(&_next_offset_threshold, &_next_offset_index, + blk_start, blk_end); + } + + + public: + G1BlockOffsetArrayContigSpace(G1BlockOffsetSharedArray* array, MemRegion mr); + + // Initialize the threshold to reflect the first boundary after the + // bottom of the covered region. + HeapWord* initialize_threshold(); + + // Zero out the entry for _bottom (offset will be zero). + void zero_bottom_entry(); + + // Return the next threshold, the point at which the table should be + // updated. + HeapWord* threshold() const { return _next_offset_threshold; } + + // These must be guaranteed to work properly (i.e., do nothing) + // when "blk_start" ("blk" for second version) is "NULL". In this + // implementation, that's true because NULL is represented as 0, and thus + // never exceeds the "_next_offset_threshold". + void alloc_block(HeapWord* blk_start, HeapWord* blk_end) { + if (blk_end > _next_offset_threshold) + alloc_block_work1(blk_start, blk_end); + } + void alloc_block(HeapWord* blk, size_t size) { + alloc_block(blk, blk+size); + } + + HeapWord* block_start_unsafe(const void* addr); + HeapWord* block_start_unsafe_const(const void* addr) const; +}; diff --git a/hotspot/src/share/vm/gc_implementation/g1/g1BlockOffsetTable.inline.hpp b/hotspot/src/share/vm/gc_implementation/g1/g1BlockOffsetTable.inline.hpp new file mode 100644 index 00000000000..45e148532c4 --- /dev/null +++ b/hotspot/src/share/vm/gc_implementation/g1/g1BlockOffsetTable.inline.hpp @@ -0,0 +1,153 @@ +/* + * Copyright 2001-2007 Sun Microsystems, Inc. All Rights Reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara, + * CA 95054 USA or visit www.sun.com if you need additional information or + * have any questions. + * + */ + +inline HeapWord* G1BlockOffsetTable::block_start(const void* addr) { + if (addr >= _bottom && addr < _end) { + return block_start_unsafe(addr); + } else { + return NULL; + } +} + +inline HeapWord* +G1BlockOffsetTable::block_start_const(const void* addr) const { + if (addr >= _bottom && addr < _end) { + return block_start_unsafe_const(addr); + } else { + return NULL; + } +} + +inline size_t G1BlockOffsetSharedArray::index_for(const void* p) const { + char* pc = (char*)p; + assert(pc >= (char*)_reserved.start() && + pc < (char*)_reserved.end(), + "p not in range."); + size_t delta = pointer_delta(pc, _reserved.start(), sizeof(char)); + size_t result = delta >> LogN; + assert(result < _vs.committed_size(), "bad index from address"); + return result; +} + +inline HeapWord* +G1BlockOffsetSharedArray::address_for_index(size_t index) const { + assert(index < _vs.committed_size(), "bad index"); + HeapWord* result = _reserved.start() + (index << LogN_words); + assert(result >= _reserved.start() && result < _reserved.end(), + "bad address from index"); + return result; +} + +inline HeapWord* +G1BlockOffsetArray::block_at_or_preceding(const void* addr, + bool has_max_index, + size_t max_index) const { + assert(_array->offset_array(0) == 0, "objects can't cross covered areas"); + size_t index = _array->index_for(addr); + // We must make sure that the offset table entry we use is valid. If + // "addr" is past the end, start at the last known one and go forward. + if (has_max_index) { + index = MIN2(index, max_index); + } + HeapWord* q = _array->address_for_index(index); + + uint offset = _array->offset_array(index); // Extend u_char to uint. + while (offset >= N_words) { + // The excess of the offset from N_words indicates a power of Base + // to go back by. + size_t n_cards_back = BlockOffsetArray::entry_to_cards_back(offset); + q -= (N_words * n_cards_back); + assert(q >= _sp->bottom(), "Went below bottom!"); + index -= n_cards_back; + offset = _array->offset_array(index); + } + assert(offset < N_words, "offset too large"); + q -= offset; + return q; +} + +inline HeapWord* +G1BlockOffsetArray:: +forward_to_block_containing_addr_const(HeapWord* q, HeapWord* n, + const void* addr) const { + if (csp() != NULL) { + if (addr >= csp()->top()) return csp()->top(); + while (n <= addr) { + q = n; + oop obj = oop(q); + if (obj->klass() == NULL) return q; + n += obj->size(); + } + } else { + while (n <= addr) { + q = n; + oop obj = oop(q); + if (obj->klass() == NULL) return q; + n += _sp->block_size(q); + } + } + assert(q <= n, "wrong order for q and addr"); + assert(addr < n, "wrong order for addr and n"); + return q; +} + +inline HeapWord* +G1BlockOffsetArray::forward_to_block_containing_addr(HeapWord* q, + const void* addr) { + if (oop(q)->klass() == NULL) return q; + HeapWord* n = q + _sp->block_size(q); + // In the normal case, where the query "addr" is a card boundary, and the + // offset table chunks are the same size as cards, the block starting at + // "q" will contain addr, so the test below will fail, and we'll fall + // through quickly. + if (n <= addr) { + q = forward_to_block_containing_addr_slow(q, n, addr); + } + assert(q <= addr, "wrong order for current and arg"); + return q; +} + +////////////////////////////////////////////////////////////////////////// +// BlockOffsetArrayNonContigSpace inlines +////////////////////////////////////////////////////////////////////////// +inline void G1BlockOffsetArray::freed(HeapWord* blk_start, HeapWord* blk_end) { + // Verify that the BOT shows [blk_start, blk_end) to be one block. + verify_single_block(blk_start, blk_end); + // adjust _unallocated_block upward or downward + // as appropriate + if (BlockOffsetArrayUseUnallocatedBlock) { + assert(_unallocated_block <= _end, + "Inconsistent value for _unallocated_block"); + if (blk_end >= _unallocated_block && blk_start <= _unallocated_block) { + // CMS-specific note: a block abutting _unallocated_block to + // its left is being freed, a new block is being added or + // we are resetting following a compaction + _unallocated_block = blk_start; + } + } +} + +inline void G1BlockOffsetArray::freed(HeapWord* blk, size_t size) { + freed(blk, blk + size); +} diff --git a/hotspot/src/share/vm/gc_implementation/g1/g1CollectedHeap.cpp b/hotspot/src/share/vm/gc_implementation/g1/g1CollectedHeap.cpp new file mode 100644 index 00000000000..370745f36e1 --- /dev/null +++ b/hotspot/src/share/vm/gc_implementation/g1/g1CollectedHeap.cpp @@ -0,0 +1,5497 @@ +/* + * Copyright 2001-2007 Sun Microsystems, Inc. All Rights Reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara, + * CA 95054 USA or visit www.sun.com if you need additional information or + * have any questions. + * + */ + +#include "incls/_precompiled.incl" +#include "incls/_g1CollectedHeap.cpp.incl" + +// turn it on so that the contents of the young list (scan-only / +// to-be-collected) are printed at "strategic" points before / during +// / after the collection --- this is useful for debugging +#define SCAN_ONLY_VERBOSE 0 +// CURRENT STATUS +// This file is under construction. Search for "FIXME". + +// INVARIANTS/NOTES +// +// All allocation activity covered by the G1CollectedHeap interface is +// serialized by acquiring the HeapLock. This happens in +// mem_allocate_work, which all such allocation functions call. +// (Note that this does not apply to TLAB allocation, which is not part +// of this interface: it is done by clients of this interface.) + +// Local to this file. + +// Finds the first HeapRegion. +// No longer used, but might be handy someday. + +class FindFirstRegionClosure: public HeapRegionClosure { + HeapRegion* _a_region; +public: + FindFirstRegionClosure() : _a_region(NULL) {} + bool doHeapRegion(HeapRegion* r) { + _a_region = r; + return true; + } + HeapRegion* result() { return _a_region; } +}; + + +class RefineCardTableEntryClosure: public CardTableEntryClosure { + SuspendibleThreadSet* _sts; + G1RemSet* _g1rs; + ConcurrentG1Refine* _cg1r; + bool _concurrent; +public: + RefineCardTableEntryClosure(SuspendibleThreadSet* sts, + G1RemSet* g1rs, + ConcurrentG1Refine* cg1r) : + _sts(sts), _g1rs(g1rs), _cg1r(cg1r), _concurrent(true) + {} + bool do_card_ptr(jbyte* card_ptr, int worker_i) { + _g1rs->concurrentRefineOneCard(card_ptr, worker_i); + if (_concurrent && _sts->should_yield()) { + // Caller will actually yield. + return false; + } + // Otherwise, we finished successfully; return true. + return true; + } + void set_concurrent(bool b) { _concurrent = b; } +}; + + +class ClearLoggedCardTableEntryClosure: public CardTableEntryClosure { + int _calls; + G1CollectedHeap* _g1h; + CardTableModRefBS* _ctbs; + int _histo[256]; +public: + ClearLoggedCardTableEntryClosure() : + _calls(0) + { + _g1h = G1CollectedHeap::heap(); + _ctbs = (CardTableModRefBS*)_g1h->barrier_set(); + for (int i = 0; i < 256; i++) _histo[i] = 0; + } + bool do_card_ptr(jbyte* card_ptr, int worker_i) { + if (_g1h->is_in_reserved(_ctbs->addr_for(card_ptr))) { + _calls++; + unsigned char* ujb = (unsigned char*)card_ptr; + int ind = (int)(*ujb); + _histo[ind]++; + *card_ptr = -1; + } + return true; + } + int calls() { return _calls; } + void print_histo() { + gclog_or_tty->print_cr("Card table value histogram:"); + for (int i = 0; i < 256; i++) { + if (_histo[i] != 0) { + gclog_or_tty->print_cr(" %d: %d", i, _histo[i]); + } + } + } +}; + +class RedirtyLoggedCardTableEntryClosure: public CardTableEntryClosure { + int _calls; + G1CollectedHeap* _g1h; + CardTableModRefBS* _ctbs; +public: + RedirtyLoggedCardTableEntryClosure() : + _calls(0) + { + _g1h = G1CollectedHeap::heap(); + _ctbs = (CardTableModRefBS*)_g1h->barrier_set(); + } + bool do_card_ptr(jbyte* card_ptr, int worker_i) { + if (_g1h->is_in_reserved(_ctbs->addr_for(card_ptr))) { + _calls++; + *card_ptr = 0; + } + return true; + } + int calls() { return _calls; } +}; + +YoungList::YoungList(G1CollectedHeap* g1h) + : _g1h(g1h), _head(NULL), + _scan_only_head(NULL), _scan_only_tail(NULL), _curr_scan_only(NULL), + _length(0), _scan_only_length(0), + _last_sampled_rs_lengths(0), + _survivor_head(NULL), _survivors_tail(NULL), _survivor_length(0) +{ + guarantee( check_list_empty(false), "just making sure..." ); +} + +void YoungList::push_region(HeapRegion *hr) { + assert(!hr->is_young(), "should not already be young"); + assert(hr->get_next_young_region() == NULL, "cause it should!"); + + hr->set_next_young_region(_head); + _head = hr; + + hr->set_young(); + double yg_surv_rate = _g1h->g1_policy()->predict_yg_surv_rate((int)_length); + ++_length; +} + +void YoungList::add_survivor_region(HeapRegion* hr) { + assert(!hr->is_survivor(), "should not already be for survived"); + assert(hr->get_next_young_region() == NULL, "cause it should!"); + + hr->set_next_young_region(_survivor_head); + if (_survivor_head == NULL) { + _survivors_tail = hr; + } + _survivor_head = hr; + + hr->set_survivor(); + ++_survivor_length; +} + +HeapRegion* YoungList::pop_region() { + while (_head != NULL) { + assert( length() > 0, "list should not be empty" ); + HeapRegion* ret = _head; + _head = ret->get_next_young_region(); + ret->set_next_young_region(NULL); + --_length; + assert(ret->is_young(), "region should be very young"); + + // Replace 'Survivor' region type with 'Young'. So the region will + // be treated as a young region and will not be 'confused' with + // newly created survivor regions. + if (ret->is_survivor()) { + ret->set_young(); + } + + if (!ret->is_scan_only()) { + return ret; + } + + // scan-only, we'll add it to the scan-only list + if (_scan_only_tail == NULL) { + guarantee( _scan_only_head == NULL, "invariant" ); + + _scan_only_head = ret; + _curr_scan_only = ret; + } else { + guarantee( _scan_only_head != NULL, "invariant" ); + _scan_only_tail->set_next_young_region(ret); + } + guarantee( ret->get_next_young_region() == NULL, "invariant" ); + _scan_only_tail = ret; + + // no need to be tagged as scan-only any more + ret->set_young(); + + ++_scan_only_length; + } + assert( length() == 0, "list should be empty" ); + return NULL; +} + +void YoungList::empty_list(HeapRegion* list) { + while (list != NULL) { + HeapRegion* next = list->get_next_young_region(); + list->set_next_young_region(NULL); + list->uninstall_surv_rate_group(); + list->set_not_young(); + list = next; + } +} + +void YoungList::empty_list() { + assert(check_list_well_formed(), "young list should be well formed"); + + empty_list(_head); + _head = NULL; + _length = 0; + + empty_list(_scan_only_head); + _scan_only_head = NULL; + _scan_only_tail = NULL; + _scan_only_length = 0; + _curr_scan_only = NULL; + + empty_list(_survivor_head); + _survivor_head = NULL; + _survivors_tail = NULL; + _survivor_length = 0; + + _last_sampled_rs_lengths = 0; + + assert(check_list_empty(false), "just making sure..."); +} + +bool YoungList::check_list_well_formed() { + bool ret = true; + + size_t length = 0; + HeapRegion* curr = _head; + HeapRegion* last = NULL; + while (curr != NULL) { + if (!curr->is_young() || curr->is_scan_only()) { + gclog_or_tty->print_cr("### YOUNG REGION "PTR_FORMAT"-"PTR_FORMAT" " + "incorrectly tagged (%d, %d)", + curr->bottom(), curr->end(), + curr->is_young(), curr->is_scan_only()); + ret = false; + } + ++length; + last = curr; + curr = curr->get_next_young_region(); + } + ret = ret && (length == _length); + + if (!ret) { + gclog_or_tty->print_cr("### YOUNG LIST seems not well formed!"); + gclog_or_tty->print_cr("### list has %d entries, _length is %d", + length, _length); + } + + bool scan_only_ret = true; + length = 0; + curr = _scan_only_head; + last = NULL; + while (curr != NULL) { + if (!curr->is_young() || curr->is_scan_only()) { + gclog_or_tty->print_cr("### SCAN-ONLY REGION "PTR_FORMAT"-"PTR_FORMAT" " + "incorrectly tagged (%d, %d)", + curr->bottom(), curr->end(), + curr->is_young(), curr->is_scan_only()); + scan_only_ret = false; + } + ++length; + last = curr; + curr = curr->get_next_young_region(); + } + scan_only_ret = scan_only_ret && (length == _scan_only_length); + + if ( (last != _scan_only_tail) || + (_scan_only_head == NULL && _scan_only_tail != NULL) || + (_scan_only_head != NULL && _scan_only_tail == NULL) ) { + gclog_or_tty->print_cr("## _scan_only_tail is set incorrectly"); + scan_only_ret = false; + } + + if (_curr_scan_only != NULL && _curr_scan_only != _scan_only_head) { + gclog_or_tty->print_cr("### _curr_scan_only is set incorrectly"); + scan_only_ret = false; + } + + if (!scan_only_ret) { + gclog_or_tty->print_cr("### SCAN-ONLY LIST seems not well formed!"); + gclog_or_tty->print_cr("### list has %d entries, _scan_only_length is %d", + length, _scan_only_length); + } + + return ret && scan_only_ret; +} + +bool YoungList::check_list_empty(bool ignore_scan_only_list, + bool check_sample) { + bool ret = true; + + if (_length != 0) { + gclog_or_tty->print_cr("### YOUNG LIST should have 0 length, not %d", + _length); + ret = false; + } + if (check_sample && _last_sampled_rs_lengths != 0) { + gclog_or_tty->print_cr("### YOUNG LIST has non-zero last sampled RS lengths"); + ret = false; + } + if (_head != NULL) { + gclog_or_tty->print_cr("### YOUNG LIST does not have a NULL head"); + ret = false; + } + if (!ret) { + gclog_or_tty->print_cr("### YOUNG LIST does not seem empty"); + } + + if (ignore_scan_only_list) + return ret; + + bool scan_only_ret = true; + if (_scan_only_length != 0) { + gclog_or_tty->print_cr("### SCAN-ONLY LIST should have 0 length, not %d", + _scan_only_length); + scan_only_ret = false; + } + if (_scan_only_head != NULL) { + gclog_or_tty->print_cr("### SCAN-ONLY LIST does not have a NULL head"); + scan_only_ret = false; + } + if (_scan_only_tail != NULL) { + gclog_or_tty->print_cr("### SCAN-ONLY LIST does not have a NULL tail"); + scan_only_ret = false; + } + if (!scan_only_ret) { + gclog_or_tty->print_cr("### SCAN-ONLY LIST does not seem empty"); + } + + return ret && scan_only_ret; +} + +void +YoungList::rs_length_sampling_init() { + _sampled_rs_lengths = 0; + _curr = _head; +} + +bool +YoungList::rs_length_sampling_more() { + return _curr != NULL; +} + +void +YoungList::rs_length_sampling_next() { + assert( _curr != NULL, "invariant" ); + _sampled_rs_lengths += _curr->rem_set()->occupied(); + _curr = _curr->get_next_young_region(); + if (_curr == NULL) { + _last_sampled_rs_lengths = _sampled_rs_lengths; + // gclog_or_tty->print_cr("last sampled RS lengths = %d", _last_sampled_rs_lengths); + } +} + +void +YoungList::reset_auxilary_lists() { + // We could have just "moved" the scan-only list to the young list. + // However, the scan-only list is ordered according to the region + // age in descending order, so, by moving one entry at a time, we + // ensure that it is recreated in ascending order. + + guarantee( is_empty(), "young list should be empty" ); + assert(check_list_well_formed(), "young list should be well formed"); + + // Add survivor regions to SurvRateGroup. + _g1h->g1_policy()->note_start_adding_survivor_regions(); + for (HeapRegion* curr = _survivor_head; + curr != NULL; + curr = curr->get_next_young_region()) { + _g1h->g1_policy()->set_region_survivors(curr); + } + _g1h->g1_policy()->note_stop_adding_survivor_regions(); + + if (_survivor_head != NULL) { + _head = _survivor_head; + _length = _survivor_length + _scan_only_length; + _survivors_tail->set_next_young_region(_scan_only_head); + } else { + _head = _scan_only_head; + _length = _scan_only_length; + } + + for (HeapRegion* curr = _scan_only_head; + curr != NULL; + curr = curr->get_next_young_region()) { + curr->recalculate_age_in_surv_rate_group(); + } + _scan_only_head = NULL; + _scan_only_tail = NULL; + _scan_only_length = 0; + _curr_scan_only = NULL; + + _survivor_head = NULL; + _survivors_tail = NULL; + _survivor_length = 0; + _g1h->g1_policy()->finished_recalculating_age_indexes(); + + assert(check_list_well_formed(), "young list should be well formed"); +} + +void YoungList::print() { + HeapRegion* lists[] = {_head, _scan_only_head, _survivor_head}; + const char* names[] = {"YOUNG", "SCAN-ONLY", "SURVIVOR"}; + + for (unsigned int list = 0; list < ARRAY_SIZE(lists); ++list) { + gclog_or_tty->print_cr("%s LIST CONTENTS", names[list]); + HeapRegion *curr = lists[list]; + if (curr == NULL) + gclog_or_tty->print_cr(" empty"); + while (curr != NULL) { + gclog_or_tty->print_cr(" [%08x-%08x], t: %08x, P: %08x, N: %08x, C: %08x, " + "age: %4d, y: %d, s-o: %d, surv: %d", + curr->bottom(), curr->end(), + curr->top(), + curr->prev_top_at_mark_start(), + curr->next_top_at_mark_start(), + curr->top_at_conc_mark_count(), + curr->age_in_surv_rate_group_cond(), + curr->is_young(), + curr->is_scan_only(), + curr->is_survivor()); + curr = curr->get_next_young_region(); + } + } + + gclog_or_tty->print_cr(""); +} + +void G1CollectedHeap::stop_conc_gc_threads() { + _cg1r->cg1rThread()->stop(); + _czft->stop(); + _cmThread->stop(); +} + + +void G1CollectedHeap::check_ct_logs_at_safepoint() { + DirtyCardQueueSet& dcqs = JavaThread::dirty_card_queue_set(); + CardTableModRefBS* ct_bs = (CardTableModRefBS*)barrier_set(); + + // Count the dirty cards at the start. + CountNonCleanMemRegionClosure count1(this); + ct_bs->mod_card_iterate(&count1); + int orig_count = count1.n(); + + // First clear the logged cards. + ClearLoggedCardTableEntryClosure clear; + dcqs.set_closure(&clear); + dcqs.apply_closure_to_all_completed_buffers(); + dcqs.iterate_closure_all_threads(false); + clear.print_histo(); + + // Now ensure that there's no dirty cards. + CountNonCleanMemRegionClosure count2(this); + ct_bs->mod_card_iterate(&count2); + if (count2.n() != 0) { + gclog_or_tty->print_cr("Card table has %d entries; %d originally", + count2.n(), orig_count); + } + guarantee(count2.n() == 0, "Card table should be clean."); + + RedirtyLoggedCardTableEntryClosure redirty; + JavaThread::dirty_card_queue_set().set_closure(&redirty); + dcqs.apply_closure_to_all_completed_buffers(); + dcqs.iterate_closure_all_threads(false); + gclog_or_tty->print_cr("Log entries = %d, dirty cards = %d.", + clear.calls(), orig_count); + guarantee(redirty.calls() == clear.calls(), + "Or else mechanism is broken."); + + CountNonCleanMemRegionClosure count3(this); + ct_bs->mod_card_iterate(&count3); + if (count3.n() != orig_count) { + gclog_or_tty->print_cr("Should have restored them all: orig = %d, final = %d.", + orig_count, count3.n()); + guarantee(count3.n() >= orig_count, "Should have restored them all."); + } + + JavaThread::dirty_card_queue_set().set_closure(_refine_cte_cl); +} + +// Private class members. + +G1CollectedHeap* G1CollectedHeap::_g1h; + +// Private methods. + +// Finds a HeapRegion that can be used to allocate a given size of block. + + +HeapRegion* G1CollectedHeap::newAllocRegion_work(size_t word_size, + bool do_expand, + bool zero_filled) { + ConcurrentZFThread::note_region_alloc(); + HeapRegion* res = alloc_free_region_from_lists(zero_filled); + if (res == NULL && do_expand) { + expand(word_size * HeapWordSize); + res = alloc_free_region_from_lists(zero_filled); + assert(res == NULL || + (!res->isHumongous() && + (!zero_filled || + res->zero_fill_state() == HeapRegion::Allocated)), + "Alloc Regions must be zero filled (and non-H)"); + } + if (res != NULL && res->is_empty()) _free_regions--; + assert(res == NULL || + (!res->isHumongous() && + (!zero_filled || + res->zero_fill_state() == HeapRegion::Allocated)), + "Non-young alloc Regions must be zero filled (and non-H)"); + + if (G1TraceRegions) { + if (res != NULL) { + gclog_or_tty->print_cr("new alloc region %d:["PTR_FORMAT", "PTR_FORMAT"], " + "top "PTR_FORMAT, + res->hrs_index(), res->bottom(), res->end(), res->top()); + } + } + + return res; +} + +HeapRegion* G1CollectedHeap::newAllocRegionWithExpansion(int purpose, + size_t word_size, + bool zero_filled) { + HeapRegion* alloc_region = NULL; + if (_gc_alloc_region_counts[purpose] < g1_policy()->max_regions(purpose)) { + alloc_region = newAllocRegion_work(word_size, true, zero_filled); + if (purpose == GCAllocForSurvived && alloc_region != NULL) { + _young_list->add_survivor_region(alloc_region); + } + ++_gc_alloc_region_counts[purpose]; + } else { + g1_policy()->note_alloc_region_limit_reached(purpose); + } + return alloc_region; +} + +// If could fit into free regions w/o expansion, try. +// Otherwise, if can expand, do so. +// Otherwise, if using ex regions might help, try with ex given back. +HeapWord* G1CollectedHeap::humongousObjAllocate(size_t word_size) { + assert(regions_accounted_for(), "Region leakage!"); + + // We can't allocate H regions while cleanupComplete is running, since + // some of the regions we find to be empty might not yet be added to the + // unclean list. (If we're already at a safepoint, this call is + // unnecessary, not to mention wrong.) + if (!SafepointSynchronize::is_at_safepoint()) + wait_for_cleanup_complete(); + + size_t num_regions = + round_to(word_size, HeapRegion::GrainWords) / HeapRegion::GrainWords; + + // Special case if < one region??? + + // Remember the ft size. + size_t x_size = expansion_regions(); + + HeapWord* res = NULL; + bool eliminated_allocated_from_lists = false; + + // Can the allocation potentially fit in the free regions? + if (free_regions() >= num_regions) { + res = _hrs->obj_allocate(word_size); + } + if (res == NULL) { + // Try expansion. + size_t fs = _hrs->free_suffix(); + if (fs + x_size >= num_regions) { + expand((num_regions - fs) * HeapRegion::GrainBytes); + res = _hrs->obj_allocate(word_size); + assert(res != NULL, "This should have worked."); + } else { + // Expansion won't help. Are there enough free regions if we get rid + // of reservations? + size_t avail = free_regions(); + if (avail >= num_regions) { + res = _hrs->obj_allocate(word_size); + if (res != NULL) { + remove_allocated_regions_from_lists(); + eliminated_allocated_from_lists = true; + } + } + } + } + if (res != NULL) { + // Increment by the number of regions allocated. + // FIXME: Assumes regions all of size GrainBytes. +#ifndef PRODUCT + mr_bs()->verify_clean_region(MemRegion(res, res + num_regions * + HeapRegion::GrainWords)); +#endif + if (!eliminated_allocated_from_lists) + remove_allocated_regions_from_lists(); + _summary_bytes_used += word_size * HeapWordSize; + _free_regions -= num_regions; + _num_humongous_regions += (int) num_regions; + } + assert(regions_accounted_for(), "Region Leakage"); + return res; +} + +HeapWord* +G1CollectedHeap::attempt_allocation_slow(size_t word_size, + bool permit_collection_pause) { + HeapWord* res = NULL; + HeapRegion* allocated_young_region = NULL; + + assert( SafepointSynchronize::is_at_safepoint() || + Heap_lock->owned_by_self(), "pre condition of the call" ); + + if (isHumongous(word_size)) { + // Allocation of a humongous object can, in a sense, complete a + // partial region, if the previous alloc was also humongous, and + // caused the test below to succeed. + if (permit_collection_pause) + do_collection_pause_if_appropriate(word_size); + res = humongousObjAllocate(word_size); + assert(_cur_alloc_region == NULL + || !_cur_alloc_region->isHumongous(), + "Prevent a regression of this bug."); + + } else { + // We may have concurrent cleanup working at the time. Wait for it + // to complete. In the future we would probably want to make the + // concurrent cleanup truly concurrent by decoupling it from the + // allocation. + if (!SafepointSynchronize::is_at_safepoint()) + wait_for_cleanup_complete(); + // If we do a collection pause, this will be reset to a non-NULL + // value. If we don't, nulling here ensures that we allocate a new + // region below. + if (_cur_alloc_region != NULL) { + // We're finished with the _cur_alloc_region. + _summary_bytes_used += _cur_alloc_region->used(); + _cur_alloc_region = NULL; + } + assert(_cur_alloc_region == NULL, "Invariant."); + // Completion of a heap region is perhaps a good point at which to do + // a collection pause. + if (permit_collection_pause) + do_collection_pause_if_appropriate(word_size); + // Make sure we have an allocation region available. + if (_cur_alloc_region == NULL) { + if (!SafepointSynchronize::is_at_safepoint()) + wait_for_cleanup_complete(); + bool next_is_young = should_set_young_locked(); + // If the next region is not young, make sure it's zero-filled. + _cur_alloc_region = newAllocRegion(word_size, !next_is_young); + if (_cur_alloc_region != NULL) { + _summary_bytes_used -= _cur_alloc_region->used(); + if (next_is_young) { + set_region_short_lived_locked(_cur_alloc_region); + allocated_young_region = _cur_alloc_region; + } + } + } + assert(_cur_alloc_region == NULL || !_cur_alloc_region->isHumongous(), + "Prevent a regression of this bug."); + + // Now retry the allocation. + if (_cur_alloc_region != NULL) { + res = _cur_alloc_region->allocate(word_size); + } + } + + // NOTE: fails frequently in PRT + assert(regions_accounted_for(), "Region leakage!"); + + if (res != NULL) { + if (!SafepointSynchronize::is_at_safepoint()) { + assert( permit_collection_pause, "invariant" ); + assert( Heap_lock->owned_by_self(), "invariant" ); + Heap_lock->unlock(); + } + + if (allocated_young_region != NULL) { + HeapRegion* hr = allocated_young_region; + HeapWord* bottom = hr->bottom(); + HeapWord* end = hr->end(); + MemRegion mr(bottom, end); + ((CardTableModRefBS*)_g1h->barrier_set())->dirty(mr); + } + } + + assert( SafepointSynchronize::is_at_safepoint() || + (res == NULL && Heap_lock->owned_by_self()) || + (res != NULL && !Heap_lock->owned_by_self()), + "post condition of the call" ); + + return res; +} + +HeapWord* +G1CollectedHeap::mem_allocate(size_t word_size, + bool is_noref, + bool is_tlab, + bool* gc_overhead_limit_was_exceeded) { + debug_only(check_for_valid_allocation_state()); + assert(no_gc_in_progress(), "Allocation during gc not allowed"); + HeapWord* result = NULL; + + // Loop until the allocation is satisified, + // or unsatisfied after GC. + for (int try_count = 1; /* return or throw */; try_count += 1) { + int gc_count_before; + { + Heap_lock->lock(); + result = attempt_allocation(word_size); + if (result != NULL) { + // attempt_allocation should have unlocked the heap lock + assert(is_in(result), "result not in heap"); + return result; + } + // Read the gc count while the heap lock is held. + gc_count_before = SharedHeap::heap()->total_collections(); + Heap_lock->unlock(); + } + + // Create the garbage collection operation... + VM_G1CollectForAllocation op(word_size, + gc_count_before); + + // ...and get the VM thread to execute it. + VMThread::execute(&op); + if (op.prologue_succeeded()) { + result = op.result(); + assert(result == NULL || is_in(result), "result not in heap"); + return result; + } + + // Give a warning if we seem to be looping forever. + if ((QueuedAllocationWarningCount > 0) && + (try_count % QueuedAllocationWarningCount == 0)) { + warning("G1CollectedHeap::mem_allocate_work retries %d times", + try_count); + } + } +} + +void G1CollectedHeap::abandon_cur_alloc_region() { + if (_cur_alloc_region != NULL) { + // We're finished with the _cur_alloc_region. + if (_cur_alloc_region->is_empty()) { + _free_regions++; + free_region(_cur_alloc_region); + } else { + _summary_bytes_used += _cur_alloc_region->used(); + } + _cur_alloc_region = NULL; + } +} + +class PostMCRemSetClearClosure: public HeapRegionClosure { + ModRefBarrierSet* _mr_bs; +public: + PostMCRemSetClearClosure(ModRefBarrierSet* mr_bs) : _mr_bs(mr_bs) {} + bool doHeapRegion(HeapRegion* r) { + r->reset_gc_time_stamp(); + if (r->continuesHumongous()) + return false; + HeapRegionRemSet* hrrs = r->rem_set(); + if (hrrs != NULL) hrrs->clear(); + // You might think here that we could clear just the cards + // corresponding to the used region. But no: if we leave a dirty card + // in a region we might allocate into, then it would prevent that card + // from being enqueued, and cause it to be missed. + // Re: the performance cost: we shouldn't be doing full GC anyway! + _mr_bs->clear(MemRegion(r->bottom(), r->end())); + return false; + } +}; + + +class PostMCRemSetInvalidateClosure: public HeapRegionClosure { + ModRefBarrierSet* _mr_bs; +public: + PostMCRemSetInvalidateClosure(ModRefBarrierSet* mr_bs) : _mr_bs(mr_bs) {} + bool doHeapRegion(HeapRegion* r) { + if (r->continuesHumongous()) return false; + if (r->used_region().word_size() != 0) { + _mr_bs->invalidate(r->used_region(), true /*whole heap*/); + } + return false; + } +}; + +void G1CollectedHeap::do_collection(bool full, bool clear_all_soft_refs, + size_t word_size) { + ResourceMark rm; + + if (full && DisableExplicitGC) { + gclog_or_tty->print("\n\n\nDisabling Explicit GC\n\n\n"); + return; + } + + assert(SafepointSynchronize::is_at_safepoint(), "should be at safepoint"); + assert(Thread::current() == VMThread::vm_thread(), "should be in vm thread"); + + if (GC_locker::is_active()) { + return; // GC is disabled (e.g. JNI GetXXXCritical operation) + } + + { + IsGCActiveMark x; + + // Timing + gclog_or_tty->date_stamp(PrintGC && PrintGCDateStamps); + TraceCPUTime tcpu(PrintGCDetails, true, gclog_or_tty); + TraceTime t(full ? "Full GC (System.gc())" : "Full GC", PrintGC, true, gclog_or_tty); + + double start = os::elapsedTime(); + GCOverheadReporter::recordSTWStart(start); + g1_policy()->record_full_collection_start(); + + gc_prologue(true); + increment_total_collections(); + + size_t g1h_prev_used = used(); + assert(used() == recalculate_used(), "Should be equal"); + + if (VerifyBeforeGC && total_collections() >= VerifyGCStartAt) { + HandleMark hm; // Discard invalid handles created during verification + prepare_for_verify(); + gclog_or_tty->print(" VerifyBeforeGC:"); + Universe::verify(true); + } + assert(regions_accounted_for(), "Region leakage!"); + + COMPILER2_PRESENT(DerivedPointerTable::clear()); + + // We want to discover references, but not process them yet. + // This mode is disabled in + // instanceRefKlass::process_discovered_references if the + // generation does some collection work, or + // instanceRefKlass::enqueue_discovered_references if the + // generation returns without doing any work. + ref_processor()->disable_discovery(); + ref_processor()->abandon_partial_discovery(); + ref_processor()->verify_no_references_recorded(); + + // Abandon current iterations of concurrent marking and concurrent + // refinement, if any are in progress. + concurrent_mark()->abort(); + + // Make sure we'll choose a new allocation region afterwards. + abandon_cur_alloc_region(); + assert(_cur_alloc_region == NULL, "Invariant."); + g1_rem_set()->as_HRInto_G1RemSet()->cleanupHRRS(); + tear_down_region_lists(); + set_used_regions_to_need_zero_fill(); + if (g1_policy()->in_young_gc_mode()) { + empty_young_list(); + g1_policy()->set_full_young_gcs(true); + } + + // Temporarily make reference _discovery_ single threaded (non-MT). + ReferenceProcessorMTMutator rp_disc_ser(ref_processor(), false); + + // Temporarily make refs discovery atomic + ReferenceProcessorAtomicMutator rp_disc_atomic(ref_processor(), true); + + // Temporarily clear _is_alive_non_header + ReferenceProcessorIsAliveMutator rp_is_alive_null(ref_processor(), NULL); + + ref_processor()->enable_discovery(); + + // Do collection work + { + HandleMark hm; // Discard invalid handles created during gc + G1MarkSweep::invoke_at_safepoint(ref_processor(), clear_all_soft_refs); + } + // Because freeing humongous regions may have added some unclean + // regions, it is necessary to tear down again before rebuilding. + tear_down_region_lists(); + rebuild_region_lists(); + + _summary_bytes_used = recalculate_used(); + + ref_processor()->enqueue_discovered_references(); + + COMPILER2_PRESENT(DerivedPointerTable::update_pointers()); + + if (VerifyAfterGC && total_collections() >= VerifyGCStartAt) { + HandleMark hm; // Discard invalid handles created during verification + gclog_or_tty->print(" VerifyAfterGC:"); + Universe::verify(false); + } + NOT_PRODUCT(ref_processor()->verify_no_references_recorded()); + + reset_gc_time_stamp(); + // Since everything potentially moved, we will clear all remembered + // sets, and clear all cards. Later we will also cards in the used + // portion of the heap after the resizing (which could be a shrinking.) + // We will also reset the GC time stamps of the regions. + PostMCRemSetClearClosure rs_clear(mr_bs()); + heap_region_iterate(&rs_clear); + + // Resize the heap if necessary. + resize_if_necessary_after_full_collection(full ? 0 : word_size); + + // Since everything potentially moved, we will clear all remembered + // sets, but also dirty all cards corresponding to used regions. + PostMCRemSetInvalidateClosure rs_invalidate(mr_bs()); + heap_region_iterate(&rs_invalidate); + if (_cg1r->use_cache()) { + _cg1r->clear_and_record_card_counts(); + _cg1r->clear_hot_cache(); + } + + if (PrintGC) { + print_size_transition(gclog_or_tty, g1h_prev_used, used(), capacity()); + } + + if (true) { // FIXME + // Ask the permanent generation to adjust size for full collections + perm()->compute_new_size(); + } + + double end = os::elapsedTime(); + GCOverheadReporter::recordSTWEnd(end); + g1_policy()->record_full_collection_end(); + + gc_epilogue(true); + + // Abandon concurrent refinement. This must happen last: in the + // dirty-card logging system, some cards may be dirty by weak-ref + // processing, and may be enqueued. But the whole card table is + // dirtied, so this should abandon those logs, and set "do_traversal" + // to true. + concurrent_g1_refine()->set_pya_restart(); + + assert(regions_accounted_for(), "Region leakage!"); + } + + if (g1_policy()->in_young_gc_mode()) { + _young_list->reset_sampled_info(); + assert( check_young_list_empty(false, false), + "young list should be empty at this point"); + } +} + +void G1CollectedHeap::do_full_collection(bool clear_all_soft_refs) { + do_collection(true, clear_all_soft_refs, 0); +} + +// This code is mostly copied from TenuredGeneration. +void +G1CollectedHeap:: +resize_if_necessary_after_full_collection(size_t word_size) { + assert(MinHeapFreeRatio <= MaxHeapFreeRatio, "sanity check"); + + // Include the current allocation, if any, and bytes that will be + // pre-allocated to support collections, as "used". + const size_t used_after_gc = used(); + const size_t capacity_after_gc = capacity(); + const size_t free_after_gc = capacity_after_gc - used_after_gc; + + // We don't have floating point command-line arguments + const double minimum_free_percentage = (double) MinHeapFreeRatio / 100; + const double maximum_used_percentage = 1.0 - minimum_free_percentage; + const double maximum_free_percentage = (double) MaxHeapFreeRatio / 100; + const double minimum_used_percentage = 1.0 - maximum_free_percentage; + + size_t minimum_desired_capacity = (size_t) (used_after_gc / maximum_used_percentage); + size_t maximum_desired_capacity = (size_t) (used_after_gc / minimum_used_percentage); + + // Don't shrink less than the initial size. + minimum_desired_capacity = + MAX2(minimum_desired_capacity, + collector_policy()->initial_heap_byte_size()); + maximum_desired_capacity = + MAX2(maximum_desired_capacity, + collector_policy()->initial_heap_byte_size()); + + // We are failing here because minimum_desired_capacity is + assert(used_after_gc <= minimum_desired_capacity, "sanity check"); + assert(minimum_desired_capacity <= maximum_desired_capacity, "sanity check"); + + if (PrintGC && Verbose) { + const double free_percentage = ((double)free_after_gc) / capacity(); + gclog_or_tty->print_cr("Computing new size after full GC "); + gclog_or_tty->print_cr(" " + " minimum_free_percentage: %6.2f", + minimum_free_percentage); + gclog_or_tty->print_cr(" " + " maximum_free_percentage: %6.2f", + maximum_free_percentage); + gclog_or_tty->print_cr(" " + " capacity: %6.1fK" + " minimum_desired_capacity: %6.1fK" + " maximum_desired_capacity: %6.1fK", + capacity() / (double) K, + minimum_desired_capacity / (double) K, + maximum_desired_capacity / (double) K); + gclog_or_tty->print_cr(" " + " free_after_gc : %6.1fK" + " used_after_gc : %6.1fK", + free_after_gc / (double) K, + used_after_gc / (double) K); + gclog_or_tty->print_cr(" " + " free_percentage: %6.2f", + free_percentage); + } + if (capacity() < minimum_desired_capacity) { + // Don't expand unless it's significant + size_t expand_bytes = minimum_desired_capacity - capacity_after_gc; + expand(expand_bytes); + if (PrintGC && Verbose) { + gclog_or_tty->print_cr(" expanding:" + " minimum_desired_capacity: %6.1fK" + " expand_bytes: %6.1fK", + minimum_desired_capacity / (double) K, + expand_bytes / (double) K); + } + + // No expansion, now see if we want to shrink + } else if (capacity() > maximum_desired_capacity) { + // Capacity too large, compute shrinking size + size_t shrink_bytes = capacity_after_gc - maximum_desired_capacity; + shrink(shrink_bytes); + if (PrintGC && Verbose) { + gclog_or_tty->print_cr(" " + " shrinking:" + " initSize: %.1fK" + " maximum_desired_capacity: %.1fK", + collector_policy()->initial_heap_byte_size() / (double) K, + maximum_desired_capacity / (double) K); + gclog_or_tty->print_cr(" " + " shrink_bytes: %.1fK", + shrink_bytes / (double) K); + } + } +} + + +HeapWord* +G1CollectedHeap::satisfy_failed_allocation(size_t word_size) { + HeapWord* result = NULL; + + // In a G1 heap, we're supposed to keep allocation from failing by + // incremental pauses. Therefore, at least for now, we'll favor + // expansion over collection. (This might change in the future if we can + // do something smarter than full collection to satisfy a failed alloc.) + + result = expand_and_allocate(word_size); + if (result != NULL) { + assert(is_in(result), "result not in heap"); + return result; + } + + // OK, I guess we have to try collection. + + do_collection(false, false, word_size); + + result = attempt_allocation(word_size, /*permit_collection_pause*/false); + + if (result != NULL) { + assert(is_in(result), "result not in heap"); + return result; + } + + // Try collecting soft references. + do_collection(false, true, word_size); + result = attempt_allocation(word_size, /*permit_collection_pause*/false); + if (result != NULL) { + assert(is_in(result), "result not in heap"); + return result; + } + + // What else? We might try synchronous finalization later. If the total + // space available is large enough for the allocation, then a more + // complete compaction phase than we've tried so far might be + // appropriate. + return NULL; +} + +// Attempting to expand the heap sufficiently +// to support an allocation of the given "word_size". If +// successful, perform the allocation and return the address of the +// allocated block, or else "NULL". + +HeapWord* G1CollectedHeap::expand_and_allocate(size_t word_size) { + size_t expand_bytes = word_size * HeapWordSize; + if (expand_bytes < MinHeapDeltaBytes) { + expand_bytes = MinHeapDeltaBytes; + } + expand(expand_bytes); + assert(regions_accounted_for(), "Region leakage!"); + HeapWord* result = attempt_allocation(word_size, false /* permit_collection_pause */); + return result; +} + +size_t G1CollectedHeap::free_region_if_totally_empty(HeapRegion* hr) { + size_t pre_used = 0; + size_t cleared_h_regions = 0; + size_t freed_regions = 0; + UncleanRegionList local_list; + free_region_if_totally_empty_work(hr, pre_used, cleared_h_regions, + freed_regions, &local_list); + + finish_free_region_work(pre_used, cleared_h_regions, freed_regions, + &local_list); + return pre_used; +} + +void +G1CollectedHeap::free_region_if_totally_empty_work(HeapRegion* hr, + size_t& pre_used, + size_t& cleared_h, + size_t& freed_regions, + UncleanRegionList* list, + bool par) { + assert(!hr->continuesHumongous(), "should have filtered these out"); + size_t res = 0; + if (!hr->popular() && hr->used() > 0 && hr->garbage_bytes() == hr->used()) { + if (!hr->is_young()) { + if (G1PolicyVerbose > 0) + gclog_or_tty->print_cr("Freeing empty region "PTR_FORMAT "(" SIZE_FORMAT " bytes)" + " during cleanup", hr, hr->used()); + free_region_work(hr, pre_used, cleared_h, freed_regions, list, par); + } + } +} + +// FIXME: both this and shrink could probably be more efficient by +// doing one "VirtualSpace::expand_by" call rather than several. +void G1CollectedHeap::expand(size_t expand_bytes) { + size_t old_mem_size = _g1_storage.committed_size(); + // We expand by a minimum of 1K. + expand_bytes = MAX2(expand_bytes, (size_t)K); + size_t aligned_expand_bytes = + ReservedSpace::page_align_size_up(expand_bytes); + aligned_expand_bytes = align_size_up(aligned_expand_bytes, + HeapRegion::GrainBytes); + expand_bytes = aligned_expand_bytes; + while (expand_bytes > 0) { + HeapWord* base = (HeapWord*)_g1_storage.high(); + // Commit more storage. + bool successful = _g1_storage.expand_by(HeapRegion::GrainBytes); + if (!successful) { + expand_bytes = 0; + } else { + expand_bytes -= HeapRegion::GrainBytes; + // Expand the committed region. + HeapWord* high = (HeapWord*) _g1_storage.high(); + _g1_committed.set_end(high); + // Create a new HeapRegion. + MemRegion mr(base, high); + bool is_zeroed = !_g1_max_committed.contains(base); + HeapRegion* hr = new HeapRegion(_bot_shared, mr, is_zeroed); + + // Now update max_committed if necessary. + _g1_max_committed.set_end(MAX2(_g1_max_committed.end(), high)); + + // Add it to the HeapRegionSeq. + _hrs->insert(hr); + // Set the zero-fill state, according to whether it's already + // zeroed. + { + MutexLockerEx x(ZF_mon, Mutex::_no_safepoint_check_flag); + if (is_zeroed) { + hr->set_zero_fill_complete(); + put_free_region_on_list_locked(hr); + } else { + hr->set_zero_fill_needed(); + put_region_on_unclean_list_locked(hr); + } + } + _free_regions++; + // And we used up an expansion region to create it. + _expansion_regions--; + // Tell the cardtable about it. + Universe::heap()->barrier_set()->resize_covered_region(_g1_committed); + // And the offset table as well. + _bot_shared->resize(_g1_committed.word_size()); + } + } + if (Verbose && PrintGC) { + size_t new_mem_size = _g1_storage.committed_size(); + gclog_or_tty->print_cr("Expanding garbage-first heap from %ldK by %ldK to %ldK", + old_mem_size/K, aligned_expand_bytes/K, + new_mem_size/K); + } +} + +void G1CollectedHeap::shrink_helper(size_t shrink_bytes) +{ + size_t old_mem_size = _g1_storage.committed_size(); + size_t aligned_shrink_bytes = + ReservedSpace::page_align_size_down(shrink_bytes); + aligned_shrink_bytes = align_size_down(aligned_shrink_bytes, + HeapRegion::GrainBytes); + size_t num_regions_deleted = 0; + MemRegion mr = _hrs->shrink_by(aligned_shrink_bytes, num_regions_deleted); + + assert(mr.end() == (HeapWord*)_g1_storage.high(), "Bad shrink!"); + if (mr.byte_size() > 0) + _g1_storage.shrink_by(mr.byte_size()); + assert(mr.start() == (HeapWord*)_g1_storage.high(), "Bad shrink!"); + + _g1_committed.set_end(mr.start()); + _free_regions -= num_regions_deleted; + _expansion_regions += num_regions_deleted; + + // Tell the cardtable about it. + Universe::heap()->barrier_set()->resize_covered_region(_g1_committed); + + // And the offset table as well. + _bot_shared->resize(_g1_committed.word_size()); + + HeapRegionRemSet::shrink_heap(n_regions()); + + if (Verbose && PrintGC) { + size_t new_mem_size = _g1_storage.committed_size(); + gclog_or_tty->print_cr("Shrinking garbage-first heap from %ldK by %ldK to %ldK", + old_mem_size/K, aligned_shrink_bytes/K, + new_mem_size/K); + } +} + +void G1CollectedHeap::shrink(size_t shrink_bytes) { + release_gc_alloc_regions(); + tear_down_region_lists(); // We will rebuild them in a moment. + shrink_helper(shrink_bytes); + rebuild_region_lists(); +} + +// Public methods. + +#ifdef _MSC_VER // the use of 'this' below gets a warning, make it go away +#pragma warning( disable:4355 ) // 'this' : used in base member initializer list +#endif // _MSC_VER + + +G1CollectedHeap::G1CollectedHeap(G1CollectorPolicy* policy_) : + SharedHeap(policy_), + _g1_policy(policy_), + _ref_processor(NULL), + _process_strong_tasks(new SubTasksDone(G1H_PS_NumElements)), + _bot_shared(NULL), + _par_alloc_during_gc_lock(Mutex::leaf, "par alloc during GC lock"), + _objs_with_preserved_marks(NULL), _preserved_marks_of_objs(NULL), + _evac_failure_scan_stack(NULL) , + _mark_in_progress(false), + _cg1r(NULL), _czft(NULL), _summary_bytes_used(0), + _cur_alloc_region(NULL), + _refine_cte_cl(NULL), + _free_region_list(NULL), _free_region_list_size(0), + _free_regions(0), + _popular_object_boundary(NULL), + _cur_pop_hr_index(0), + _popular_regions_to_be_evacuated(NULL), + _pop_obj_rc_at_copy(), + _full_collection(false), + _unclean_region_list(), + _unclean_regions_coming(false), + _young_list(new YoungList(this)), + _gc_time_stamp(0), + _surviving_young_words(NULL) +{ + _g1h = this; // To catch bugs. + if (_process_strong_tasks == NULL || !_process_strong_tasks->valid()) { + vm_exit_during_initialization("Failed necessary allocation."); + } + int n_queues = MAX2((int)ParallelGCThreads, 1); + _task_queues = new RefToScanQueueSet(n_queues); + + int n_rem_sets = HeapRegionRemSet::num_par_rem_sets(); + assert(n_rem_sets > 0, "Invariant."); + + HeapRegionRemSetIterator** iter_arr = + NEW_C_HEAP_ARRAY(HeapRegionRemSetIterator*, n_queues); + for (int i = 0; i < n_queues; i++) { + iter_arr[i] = new HeapRegionRemSetIterator(); + } + _rem_set_iterator = iter_arr; + + for (int i = 0; i < n_queues; i++) { + RefToScanQueue* q = new RefToScanQueue(); + q->initialize(); + _task_queues->register_queue(i, q); + } + + for (int ap = 0; ap < GCAllocPurposeCount; ++ap) { + _gc_alloc_regions[ap] = NULL; + _gc_alloc_region_counts[ap] = 0; + } + guarantee(_task_queues != NULL, "task_queues allocation failure."); +} + +jint G1CollectedHeap::initialize() { + os::enable_vtime(); + + // Necessary to satisfy locking discipline assertions. + + MutexLocker x(Heap_lock); + + // While there are no constraints in the GC code that HeapWordSize + // be any particular value, there are multiple other areas in the + // system which believe this to be true (e.g. oop->object_size in some + // cases incorrectly returns the size in wordSize units rather than + // HeapWordSize). + guarantee(HeapWordSize == wordSize, "HeapWordSize must equal wordSize"); + + size_t init_byte_size = collector_policy()->initial_heap_byte_size(); + size_t max_byte_size = collector_policy()->max_heap_byte_size(); + + // Ensure that the sizes are properly aligned. + Universe::check_alignment(init_byte_size, HeapRegion::GrainBytes, "g1 heap"); + Universe::check_alignment(max_byte_size, HeapRegion::GrainBytes, "g1 heap"); + + // We allocate this in any case, but only do no work if the command line + // param is off. + _cg1r = new ConcurrentG1Refine(); + + // Reserve the maximum. + PermanentGenerationSpec* pgs = collector_policy()->permanent_generation(); + // Includes the perm-gen. + ReservedSpace heap_rs(max_byte_size + pgs->max_size(), + HeapRegion::GrainBytes, + false /*ism*/); + + if (!heap_rs.is_reserved()) { + vm_exit_during_initialization("Could not reserve enough space for object heap"); + return JNI_ENOMEM; + } + + // It is important to do this in a way such that concurrent readers can't + // temporarily think somethings in the heap. (I've actually seen this + // happen in asserts: DLD.) + _reserved.set_word_size(0); + _reserved.set_start((HeapWord*)heap_rs.base()); + _reserved.set_end((HeapWord*)(heap_rs.base() + heap_rs.size())); + + _expansion_regions = max_byte_size/HeapRegion::GrainBytes; + + _num_humongous_regions = 0; + + // Create the gen rem set (and barrier set) for the entire reserved region. + _rem_set = collector_policy()->create_rem_set(_reserved, 2); + set_barrier_set(rem_set()->bs()); + if (barrier_set()->is_a(BarrierSet::ModRef)) { + _mr_bs = (ModRefBarrierSet*)_barrier_set; + } else { + vm_exit_during_initialization("G1 requires a mod ref bs."); + return JNI_ENOMEM; + } + + // Also create a G1 rem set. + if (G1UseHRIntoRS) { + if (mr_bs()->is_a(BarrierSet::CardTableModRef)) { + _g1_rem_set = new HRInto_G1RemSet(this, (CardTableModRefBS*)mr_bs()); + } else { + vm_exit_during_initialization("G1 requires a cardtable mod ref bs."); + return JNI_ENOMEM; + } + } else { + _g1_rem_set = new StupidG1RemSet(this); + } + + // Carve out the G1 part of the heap. + + ReservedSpace g1_rs = heap_rs.first_part(max_byte_size); + _g1_reserved = MemRegion((HeapWord*)g1_rs.base(), + g1_rs.size()/HeapWordSize); + ReservedSpace perm_gen_rs = heap_rs.last_part(max_byte_size); + + _perm_gen = pgs->init(perm_gen_rs, pgs->init_size(), rem_set()); + + _g1_storage.initialize(g1_rs, 0); + _g1_committed = MemRegion((HeapWord*)_g1_storage.low(), (size_t) 0); + _g1_max_committed = _g1_committed; + _hrs = new HeapRegionSeq(_expansion_regions); + guarantee(_hrs != NULL, "Couldn't allocate HeapRegionSeq"); + guarantee(_cur_alloc_region == NULL, "from constructor"); + + _bot_shared = new G1BlockOffsetSharedArray(_reserved, + heap_word_size(init_byte_size)); + + _g1h = this; + + // Create the ConcurrentMark data structure and thread. + // (Must do this late, so that "max_regions" is defined.) + _cm = new ConcurrentMark(heap_rs, (int) max_regions()); + _cmThread = _cm->cmThread(); + + // ...and the concurrent zero-fill thread, if necessary. + if (G1ConcZeroFill) { + _czft = new ConcurrentZFThread(); + } + + + + // Allocate the popular regions; take them off free lists. + size_t pop_byte_size = G1NumPopularRegions * HeapRegion::GrainBytes; + expand(pop_byte_size); + _popular_object_boundary = + _g1_reserved.start() + (G1NumPopularRegions * HeapRegion::GrainWords); + for (int i = 0; i < G1NumPopularRegions; i++) { + HeapRegion* hr = newAllocRegion(HeapRegion::GrainWords); + // assert(hr != NULL && hr->bottom() < _popular_object_boundary, + // "Should be enough, and all should be below boundary."); + hr->set_popular(true); + } + assert(_cur_pop_hr_index == 0, "Start allocating at the first region."); + + // Initialize the from_card cache structure of HeapRegionRemSet. + HeapRegionRemSet::init_heap(max_regions()); + + // Now expand into the rest of the initial heap size. + expand(init_byte_size - pop_byte_size); + + // Perform any initialization actions delegated to the policy. + g1_policy()->init(); + + g1_policy()->note_start_of_mark_thread(); + + _refine_cte_cl = + new RefineCardTableEntryClosure(ConcurrentG1RefineThread::sts(), + g1_rem_set(), + concurrent_g1_refine()); + JavaThread::dirty_card_queue_set().set_closure(_refine_cte_cl); + + JavaThread::satb_mark_queue_set().initialize(SATB_Q_CBL_mon, + SATB_Q_FL_lock, + 0, + Shared_SATB_Q_lock); + if (G1RSBarrierUseQueue) { + JavaThread::dirty_card_queue_set().initialize(DirtyCardQ_CBL_mon, + DirtyCardQ_FL_lock, + G1DirtyCardQueueMax, + Shared_DirtyCardQ_lock); + } + // In case we're keeping closure specialization stats, initialize those + // counts and that mechanism. + SpecializationStats::clear(); + + _gc_alloc_region_list = NULL; + + // Do later initialization work for concurrent refinement. + _cg1r->init(); + + const char* group_names[] = { "CR", "ZF", "CM", "CL" }; + GCOverheadReporter::initGCOverheadReporter(4, group_names); + + return JNI_OK; +} + +void G1CollectedHeap::ref_processing_init() { + SharedHeap::ref_processing_init(); + MemRegion mr = reserved_region(); + _ref_processor = ReferenceProcessor::create_ref_processor( + mr, // span + false, // Reference discovery is not atomic + // (though it shouldn't matter here.) + true, // mt_discovery + NULL, // is alive closure: need to fill this in for efficiency + ParallelGCThreads, + ParallelRefProcEnabled, + true); // Setting next fields of discovered + // lists requires a barrier. +} + +size_t G1CollectedHeap::capacity() const { + return _g1_committed.byte_size(); +} + +void G1CollectedHeap::iterate_dirty_card_closure(bool concurrent, + int worker_i) { + DirtyCardQueueSet& dcqs = JavaThread::dirty_card_queue_set(); + int n_completed_buffers = 0; + while (dcqs.apply_closure_to_completed_buffer(worker_i, 0, true)) { + n_completed_buffers++; + } + g1_policy()->record_update_rs_processed_buffers(worker_i, + (double) n_completed_buffers); + dcqs.clear_n_completed_buffers(); + // Finish up the queue... + if (worker_i == 0) concurrent_g1_refine()->clean_up_cache(worker_i, + g1_rem_set()); + assert(!dcqs.completed_buffers_exist_dirty(), "Completed buffers exist!"); +} + + +// Computes the sum of the storage used by the various regions. + +size_t G1CollectedHeap::used() const { + assert(Heap_lock->owner() != NULL, + "Should be owned on this thread's behalf."); + size_t result = _summary_bytes_used; + if (_cur_alloc_region != NULL) + result += _cur_alloc_region->used(); + return result; +} + +class SumUsedClosure: public HeapRegionClosure { + size_t _used; +public: + SumUsedClosure() : _used(0) {} + bool doHeapRegion(HeapRegion* r) { + if (!r->continuesHumongous()) { + _used += r->used(); + } + return false; + } + size_t result() { return _used; } +}; + +size_t G1CollectedHeap::recalculate_used() const { + SumUsedClosure blk; + _hrs->iterate(&blk); + return blk.result(); +} + +#ifndef PRODUCT +class SumUsedRegionsClosure: public HeapRegionClosure { + size_t _num; +public: + // _num is set to 1 to account for the popular region + SumUsedRegionsClosure() : _num(G1NumPopularRegions) {} + bool doHeapRegion(HeapRegion* r) { + if (r->continuesHumongous() || r->used() > 0 || r->is_gc_alloc_region()) { + _num += 1; + } + return false; + } + size_t result() { return _num; } +}; + +size_t G1CollectedHeap::recalculate_used_regions() const { + SumUsedRegionsClosure blk; + _hrs->iterate(&blk); + return blk.result(); +} +#endif // PRODUCT + +size_t G1CollectedHeap::unsafe_max_alloc() { + if (_free_regions > 0) return HeapRegion::GrainBytes; + // otherwise, is there space in the current allocation region? + + // We need to store the current allocation region in a local variable + // here. The problem is that this method doesn't take any locks and + // there may be other threads which overwrite the current allocation + // region field. attempt_allocation(), for example, sets it to NULL + // and this can happen *after* the NULL check here but before the call + // to free(), resulting in a SIGSEGV. Note that this doesn't appear + // to be a problem in the optimized build, since the two loads of the + // current allocation region field are optimized away. + HeapRegion* car = _cur_alloc_region; + + // FIXME: should iterate over all regions? + if (car == NULL) { + return 0; + } + return car->free(); +} + +void G1CollectedHeap::collect(GCCause::Cause cause) { + // The caller doesn't have the Heap_lock + assert(!Heap_lock->owned_by_self(), "this thread should not own the Heap_lock"); + MutexLocker ml(Heap_lock); + collect_locked(cause); +} + +void G1CollectedHeap::collect_as_vm_thread(GCCause::Cause cause) { + assert(Thread::current()->is_VM_thread(), "Precondition#1"); + assert(Heap_lock->is_locked(), "Precondition#2"); + GCCauseSetter gcs(this, cause); + switch (cause) { + case GCCause::_heap_inspection: + case GCCause::_heap_dump: { + HandleMark hm; + do_full_collection(false); // don't clear all soft refs + break; + } + default: // XXX FIX ME + ShouldNotReachHere(); // Unexpected use of this function + } +} + + +void G1CollectedHeap::collect_locked(GCCause::Cause cause) { + // Don't want to do a GC until cleanup is completed. + wait_for_cleanup_complete(); + + // Read the GC count while holding the Heap_lock + int gc_count_before = SharedHeap::heap()->total_collections(); + { + MutexUnlocker mu(Heap_lock); // give up heap lock, execute gets it back + VM_G1CollectFull op(gc_count_before, cause); + VMThread::execute(&op); + } +} + +bool G1CollectedHeap::is_in(const void* p) const { + if (_g1_committed.contains(p)) { + HeapRegion* hr = _hrs->addr_to_region(p); + return hr->is_in(p); + } else { + return _perm_gen->as_gen()->is_in(p); + } +} + +// Iteration functions. + +// Iterates an OopClosure over all ref-containing fields of objects +// within a HeapRegion. + +class IterateOopClosureRegionClosure: public HeapRegionClosure { + MemRegion _mr; + OopClosure* _cl; +public: + IterateOopClosureRegionClosure(MemRegion mr, OopClosure* cl) + : _mr(mr), _cl(cl) {} + bool doHeapRegion(HeapRegion* r) { + if (! r->continuesHumongous()) { + r->oop_iterate(_cl); + } + return false; + } +}; + +void G1CollectedHeap::oop_iterate(OopClosure* cl) { + IterateOopClosureRegionClosure blk(_g1_committed, cl); + _hrs->iterate(&blk); +} + +void G1CollectedHeap::oop_iterate(MemRegion mr, OopClosure* cl) { + IterateOopClosureRegionClosure blk(mr, cl); + _hrs->iterate(&blk); +} + +// Iterates an ObjectClosure over all objects within a HeapRegion. + +class IterateObjectClosureRegionClosure: public HeapRegionClosure { + ObjectClosure* _cl; +public: + IterateObjectClosureRegionClosure(ObjectClosure* cl) : _cl(cl) {} + bool doHeapRegion(HeapRegion* r) { + if (! r->continuesHumongous()) { + r->object_iterate(_cl); + } + return false; + } +}; + +void G1CollectedHeap::object_iterate(ObjectClosure* cl) { + IterateObjectClosureRegionClosure blk(cl); + _hrs->iterate(&blk); +} + +void G1CollectedHeap::object_iterate_since_last_GC(ObjectClosure* cl) { + // FIXME: is this right? + guarantee(false, "object_iterate_since_last_GC not supported by G1 heap"); +} + +// Calls a SpaceClosure on a HeapRegion. + +class SpaceClosureRegionClosure: public HeapRegionClosure { + SpaceClosure* _cl; +public: + SpaceClosureRegionClosure(SpaceClosure* cl) : _cl(cl) {} + bool doHeapRegion(HeapRegion* r) { + _cl->do_space(r); + return false; + } +}; + +void G1CollectedHeap::space_iterate(SpaceClosure* cl) { + SpaceClosureRegionClosure blk(cl); + _hrs->iterate(&blk); +} + +void G1CollectedHeap::heap_region_iterate(HeapRegionClosure* cl) { + _hrs->iterate(cl); +} + +void G1CollectedHeap::heap_region_iterate_from(HeapRegion* r, + HeapRegionClosure* cl) { + _hrs->iterate_from(r, cl); +} + +void +G1CollectedHeap::heap_region_iterate_from(int idx, HeapRegionClosure* cl) { + _hrs->iterate_from(idx, cl); +} + +HeapRegion* G1CollectedHeap::region_at(size_t idx) { return _hrs->at(idx); } + +void +G1CollectedHeap::heap_region_par_iterate_chunked(HeapRegionClosure* cl, + int worker, + jint claim_value) { + const size_t regions = n_regions(); + const size_t worker_num = (ParallelGCThreads > 0 ? ParallelGCThreads : 1); + // try to spread out the starting points of the workers + const size_t start_index = regions / worker_num * (size_t) worker; + + // each worker will actually look at all regions + for (size_t count = 0; count < regions; ++count) { + const size_t index = (start_index + count) % regions; + assert(0 <= index && index < regions, "sanity"); + HeapRegion* r = region_at(index); + // we'll ignore "continues humongous" regions (we'll process them + // when we come across their corresponding "start humongous" + // region) and regions already claimed + if (r->claim_value() == claim_value || r->continuesHumongous()) { + continue; + } + // OK, try to claim it + if (r->claimHeapRegion(claim_value)) { + // success! + assert(!r->continuesHumongous(), "sanity"); + if (r->startsHumongous()) { + // If the region is "starts humongous" we'll iterate over its + // "continues humongous" first; in fact we'll do them + // first. The order is important. In on case, calling the + // closure on the "starts humongous" region might de-allocate + // and clear all its "continues humongous" regions and, as a + // result, we might end up processing them twice. So, we'll do + // them first (notice: most closures will ignore them anyway) and + // then we'll do the "starts humongous" region. + for (size_t ch_index = index + 1; ch_index < regions; ++ch_index) { + HeapRegion* chr = region_at(ch_index); + + // if the region has already been claimed or it's not + // "continues humongous" we're done + if (chr->claim_value() == claim_value || + !chr->continuesHumongous()) { + break; + } + + // Noone should have claimed it directly. We can given + // that we claimed its "starts humongous" region. + assert(chr->claim_value() != claim_value, "sanity"); + assert(chr->humongous_start_region() == r, "sanity"); + + if (chr->claimHeapRegion(claim_value)) { + // we should always be able to claim it; noone else should + // be trying to claim this region + + bool res2 = cl->doHeapRegion(chr); + assert(!res2, "Should not abort"); + + // Right now, this holds (i.e., no closure that actually + // does something with "continues humongous" regions + // clears them). We might have to weaken it in the future, + // but let's leave these two asserts here for extra safety. + assert(chr->continuesHumongous(), "should still be the case"); + assert(chr->humongous_start_region() == r, "sanity"); + } else { + guarantee(false, "we should not reach here"); + } + } + } + + assert(!r->continuesHumongous(), "sanity"); + bool res = cl->doHeapRegion(r); + assert(!res, "Should not abort"); + } + } +} + +class ResetClaimValuesClosure: public HeapRegionClosure { +public: + bool doHeapRegion(HeapRegion* r) { + r->set_claim_value(HeapRegion::InitialClaimValue); + return false; + } +}; + +void +G1CollectedHeap::reset_heap_region_claim_values() { + ResetClaimValuesClosure blk; + heap_region_iterate(&blk); +} + +#ifdef ASSERT +// This checks whether all regions in the heap have the correct claim +// value. I also piggy-backed on this a check to ensure that the +// humongous_start_region() information on "continues humongous" +// regions is correct. + +class CheckClaimValuesClosure : public HeapRegionClosure { +private: + jint _claim_value; + size_t _failures; + HeapRegion* _sh_region; +public: + CheckClaimValuesClosure(jint claim_value) : + _claim_value(claim_value), _failures(0), _sh_region(NULL) { } + bool doHeapRegion(HeapRegion* r) { + if (r->claim_value() != _claim_value) { + gclog_or_tty->print_cr("Region ["PTR_FORMAT","PTR_FORMAT"), " + "claim value = %d, should be %d", + r->bottom(), r->end(), r->claim_value(), + _claim_value); + ++_failures; + } + if (!r->isHumongous()) { + _sh_region = NULL; + } else if (r->startsHumongous()) { + _sh_region = r; + } else if (r->continuesHumongous()) { + if (r->humongous_start_region() != _sh_region) { + gclog_or_tty->print_cr("Region ["PTR_FORMAT","PTR_FORMAT"), " + "HS = "PTR_FORMAT", should be "PTR_FORMAT, + r->bottom(), r->end(), + r->humongous_start_region(), + _sh_region); + ++_failures; + } + } + return false; + } + size_t failures() { + return _failures; + } +}; + +bool G1CollectedHeap::check_heap_region_claim_values(jint claim_value) { + CheckClaimValuesClosure cl(claim_value); + heap_region_iterate(&cl); + return cl.failures() == 0; +} +#endif // ASSERT + +void G1CollectedHeap::collection_set_iterate(HeapRegionClosure* cl) { + HeapRegion* r = g1_policy()->collection_set(); + while (r != NULL) { + HeapRegion* next = r->next_in_collection_set(); + if (cl->doHeapRegion(r)) { + cl->incomplete(); + return; + } + r = next; + } +} + +void G1CollectedHeap::collection_set_iterate_from(HeapRegion* r, + HeapRegionClosure *cl) { + assert(r->in_collection_set(), + "Start region must be a member of the collection set."); + HeapRegion* cur = r; + while (cur != NULL) { + HeapRegion* next = cur->next_in_collection_set(); + if (cl->doHeapRegion(cur) && false) { + cl->incomplete(); + return; + } + cur = next; + } + cur = g1_policy()->collection_set(); + while (cur != r) { + HeapRegion* next = cur->next_in_collection_set(); + if (cl->doHeapRegion(cur) && false) { + cl->incomplete(); + return; + } + cur = next; + } +} + +CompactibleSpace* G1CollectedHeap::first_compactible_space() { + return _hrs->length() > 0 ? _hrs->at(0) : NULL; +} + + +Space* G1CollectedHeap::space_containing(const void* addr) const { + Space* res = heap_region_containing(addr); + if (res == NULL) + res = perm_gen()->space_containing(addr); + return res; +} + +HeapWord* G1CollectedHeap::block_start(const void* addr) const { + Space* sp = space_containing(addr); + if (sp != NULL) { + return sp->block_start(addr); + } + return NULL; +} + +size_t G1CollectedHeap::block_size(const HeapWord* addr) const { + Space* sp = space_containing(addr); + assert(sp != NULL, "block_size of address outside of heap"); + return sp->block_size(addr); +} + +bool G1CollectedHeap::block_is_obj(const HeapWord* addr) const { + Space* sp = space_containing(addr); + return sp->block_is_obj(addr); +} + +bool G1CollectedHeap::supports_tlab_allocation() const { + return true; +} + +size_t G1CollectedHeap::tlab_capacity(Thread* ignored) const { + return HeapRegion::GrainBytes; +} + +size_t G1CollectedHeap::unsafe_max_tlab_alloc(Thread* ignored) const { + // Return the remaining space in the cur alloc region, but not less than + // the min TLAB size. + // Also, no more than half the region size, since we can't allow tlabs to + // grow big enough to accomodate humongous objects. + + // We need to story it locally, since it might change between when we + // test for NULL and when we use it later. + ContiguousSpace* cur_alloc_space = _cur_alloc_region; + if (cur_alloc_space == NULL) { + return HeapRegion::GrainBytes/2; + } else { + return MAX2(MIN2(cur_alloc_space->free(), + (size_t)(HeapRegion::GrainBytes/2)), + (size_t)MinTLABSize); + } +} + +HeapWord* G1CollectedHeap::allocate_new_tlab(size_t size) { + bool dummy; + return G1CollectedHeap::mem_allocate(size, false, true, &dummy); +} + +bool G1CollectedHeap::allocs_are_zero_filled() { + return false; +} + +size_t G1CollectedHeap::large_typearray_limit() { + // FIXME + return HeapRegion::GrainBytes/HeapWordSize; +} + +size_t G1CollectedHeap::max_capacity() const { + return _g1_committed.byte_size(); +} + +jlong G1CollectedHeap::millis_since_last_gc() { + // assert(false, "NYI"); + return 0; +} + + +void G1CollectedHeap::prepare_for_verify() { + if (SafepointSynchronize::is_at_safepoint() || ! UseTLAB) { + ensure_parsability(false); + } + g1_rem_set()->prepare_for_verify(); +} + +class VerifyLivenessOopClosure: public OopClosure { + G1CollectedHeap* g1h; +public: + VerifyLivenessOopClosure(G1CollectedHeap* _g1h) { + g1h = _g1h; + } + void do_oop(narrowOop *p) { + guarantee(false, "NYI"); + } + void do_oop(oop *p) { + oop obj = *p; + assert(obj == NULL || !g1h->is_obj_dead(obj), + "Dead object referenced by a not dead object"); + } +}; + +class VerifyObjsInRegionClosure: public ObjectClosure { + G1CollectedHeap* _g1h; + size_t _live_bytes; + HeapRegion *_hr; +public: + VerifyObjsInRegionClosure(HeapRegion *hr) : _live_bytes(0), _hr(hr) { + _g1h = G1CollectedHeap::heap(); + } + void do_object(oop o) { + VerifyLivenessOopClosure isLive(_g1h); + assert(o != NULL, "Huh?"); + if (!_g1h->is_obj_dead(o)) { + o->oop_iterate(&isLive); + if (!_hr->obj_allocated_since_prev_marking(o)) + _live_bytes += (o->size() * HeapWordSize); + } + } + size_t live_bytes() { return _live_bytes; } +}; + +class PrintObjsInRegionClosure : public ObjectClosure { + HeapRegion *_hr; + G1CollectedHeap *_g1; +public: + PrintObjsInRegionClosure(HeapRegion *hr) : _hr(hr) { + _g1 = G1CollectedHeap::heap(); + }; + + void do_object(oop o) { + if (o != NULL) { + HeapWord *start = (HeapWord *) o; + size_t word_sz = o->size(); + gclog_or_tty->print("\nPrinting obj "PTR_FORMAT" of size " SIZE_FORMAT + " isMarkedPrev %d isMarkedNext %d isAllocSince %d\n", + (void*) o, word_sz, + _g1->isMarkedPrev(o), + _g1->isMarkedNext(o), + _hr->obj_allocated_since_prev_marking(o)); + HeapWord *end = start + word_sz; + HeapWord *cur; + int *val; + for (cur = start; cur < end; cur++) { + val = (int *) cur; + gclog_or_tty->print("\t "PTR_FORMAT":"PTR_FORMAT"\n", val, *val); + } + } + } +}; + +class VerifyRegionClosure: public HeapRegionClosure { +public: + bool _allow_dirty; + bool _par; + VerifyRegionClosure(bool allow_dirty, bool par = false) + : _allow_dirty(allow_dirty), _par(par) {} + bool doHeapRegion(HeapRegion* r) { + guarantee(_par || r->claim_value() == HeapRegion::InitialClaimValue, + "Should be unclaimed at verify points."); + if (r->isHumongous()) { + if (r->startsHumongous()) { + // Verify the single H object. + oop(r->bottom())->verify(); + size_t word_sz = oop(r->bottom())->size(); + guarantee(r->top() == r->bottom() + word_sz, + "Only one object in a humongous region"); + } + } else { + VerifyObjsInRegionClosure not_dead_yet_cl(r); + r->verify(_allow_dirty); + r->object_iterate(¬_dead_yet_cl); + guarantee(r->max_live_bytes() >= not_dead_yet_cl.live_bytes(), + "More live objects than counted in last complete marking."); + } + return false; + } +}; + +class VerifyRootsClosure: public OopsInGenClosure { +private: + G1CollectedHeap* _g1h; + bool _failures; + +public: + VerifyRootsClosure() : + _g1h(G1CollectedHeap::heap()), _failures(false) { } + + bool failures() { return _failures; } + + void do_oop(narrowOop* p) { + guarantee(false, "NYI"); + } + + void do_oop(oop* p) { + oop obj = *p; + if (obj != NULL) { + if (_g1h->is_obj_dead(obj)) { + gclog_or_tty->print_cr("Root location "PTR_FORMAT" " + "points to dead obj "PTR_FORMAT, p, (void*) obj); + obj->print_on(gclog_or_tty); + _failures = true; + } + } + } +}; + +// This is the task used for parallel heap verification. + +class G1ParVerifyTask: public AbstractGangTask { +private: + G1CollectedHeap* _g1h; + bool _allow_dirty; + +public: + G1ParVerifyTask(G1CollectedHeap* g1h, bool allow_dirty) : + AbstractGangTask("Parallel verify task"), + _g1h(g1h), _allow_dirty(allow_dirty) { } + + void work(int worker_i) { + VerifyRegionClosure blk(_allow_dirty, true); + _g1h->heap_region_par_iterate_chunked(&blk, worker_i, + HeapRegion::ParVerifyClaimValue); + } +}; + +void G1CollectedHeap::verify(bool allow_dirty, bool silent) { + if (SafepointSynchronize::is_at_safepoint() || ! UseTLAB) { + if (!silent) { gclog_or_tty->print("roots "); } + VerifyRootsClosure rootsCl; + process_strong_roots(false, + SharedHeap::SO_AllClasses, + &rootsCl, + &rootsCl); + rem_set()->invalidate(perm_gen()->used_region(), false); + if (!silent) { gclog_or_tty->print("heapRegions "); } + if (GCParallelVerificationEnabled && ParallelGCThreads > 1) { + assert(check_heap_region_claim_values(HeapRegion::InitialClaimValue), + "sanity check"); + + G1ParVerifyTask task(this, allow_dirty); + int n_workers = workers()->total_workers(); + set_par_threads(n_workers); + workers()->run_task(&task); + set_par_threads(0); + + assert(check_heap_region_claim_values(HeapRegion::ParVerifyClaimValue), + "sanity check"); + + reset_heap_region_claim_values(); + + assert(check_heap_region_claim_values(HeapRegion::InitialClaimValue), + "sanity check"); + } else { + VerifyRegionClosure blk(allow_dirty); + _hrs->iterate(&blk); + } + if (!silent) gclog_or_tty->print("remset "); + rem_set()->verify(); + guarantee(!rootsCl.failures(), "should not have had failures"); + } else { + if (!silent) gclog_or_tty->print("(SKIPPING roots, heapRegions, remset) "); + } +} + +class PrintRegionClosure: public HeapRegionClosure { + outputStream* _st; +public: + PrintRegionClosure(outputStream* st) : _st(st) {} + bool doHeapRegion(HeapRegion* r) { + r->print_on(_st); + return false; + } +}; + +void G1CollectedHeap::print() const { print_on(gclog_or_tty); } + +void G1CollectedHeap::print_on(outputStream* st) const { + PrintRegionClosure blk(st); + _hrs->iterate(&blk); +} + +void G1CollectedHeap::print_gc_threads_on(outputStream* st) const { + if (ParallelGCThreads > 0) { + workers()->print_worker_threads(); + } + st->print("\"G1 concurrent mark GC Thread\" "); + _cmThread->print(); + st->cr(); + st->print("\"G1 concurrent refinement GC Thread\" "); + _cg1r->cg1rThread()->print_on(st); + st->cr(); + st->print("\"G1 zero-fill GC Thread\" "); + _czft->print_on(st); + st->cr(); +} + +void G1CollectedHeap::gc_threads_do(ThreadClosure* tc) const { + if (ParallelGCThreads > 0) { + workers()->threads_do(tc); + } + tc->do_thread(_cmThread); + tc->do_thread(_cg1r->cg1rThread()); + tc->do_thread(_czft); +} + +void G1CollectedHeap::print_tracing_info() const { + concurrent_g1_refine()->print_final_card_counts(); + + // We'll overload this to mean "trace GC pause statistics." + if (TraceGen0Time || TraceGen1Time) { + // The "G1CollectorPolicy" is keeping track of these stats, so delegate + // to that. + g1_policy()->print_tracing_info(); + } + if (SummarizeG1RSStats) { + g1_rem_set()->print_summary_info(); + } + if (SummarizeG1ConcMark) { + concurrent_mark()->print_summary_info(); + } + if (SummarizeG1ZFStats) { + ConcurrentZFThread::print_summary_info(); + } + if (G1SummarizePopularity) { + print_popularity_summary_info(); + } + g1_policy()->print_yg_surv_rate_info(); + + GCOverheadReporter::printGCOverhead(); + + SpecializationStats::print(); +} + + +int G1CollectedHeap::addr_to_arena_id(void* addr) const { + HeapRegion* hr = heap_region_containing(addr); + if (hr == NULL) { + return 0; + } else { + return 1; + } +} + +G1CollectedHeap* G1CollectedHeap::heap() { + assert(_sh->kind() == CollectedHeap::G1CollectedHeap, + "not a garbage-first heap"); + return _g1h; +} + +void G1CollectedHeap::gc_prologue(bool full /* Ignored */) { + if (PrintHeapAtGC){ + gclog_or_tty->print_cr(" {Heap before GC collections=%d:", total_collections()); + Universe::print(); + } + assert(InlineCacheBuffer::is_empty(), "should have cleaned up ICBuffer"); + // Call allocation profiler + AllocationProfiler::iterate_since_last_gc(); + // Fill TLAB's and such + ensure_parsability(true); +} + +void G1CollectedHeap::gc_epilogue(bool full /* Ignored */) { + // FIXME: what is this about? + // I'm ignoring the "fill_newgen()" call if "alloc_event_enabled" + // is set. + COMPILER2_PRESENT(assert(DerivedPointerTable::is_empty(), + "derived pointer present")); + + if (PrintHeapAtGC){ + gclog_or_tty->print_cr(" Heap after GC collections=%d:", total_collections()); + Universe::print(); + gclog_or_tty->print("} "); + } +} + +void G1CollectedHeap::do_collection_pause() { + // Read the GC count while holding the Heap_lock + // we need to do this _before_ wait_for_cleanup_complete(), to + // ensure that we do not give up the heap lock and potentially + // pick up the wrong count + int gc_count_before = SharedHeap::heap()->total_collections(); + + // Don't want to do a GC pause while cleanup is being completed! + wait_for_cleanup_complete(); + + g1_policy()->record_stop_world_start(); + { + MutexUnlocker mu(Heap_lock); // give up heap lock, execute gets it back + VM_G1IncCollectionPause op(gc_count_before); + VMThread::execute(&op); + } +} + +void +G1CollectedHeap::doConcurrentMark() { + if (G1ConcMark) { + MutexLockerEx x(CGC_lock, Mutex::_no_safepoint_check_flag); + if (!_cmThread->in_progress()) { + _cmThread->set_started(); + CGC_lock->notify(); + } + } +} + +class VerifyMarkedObjsClosure: public ObjectClosure { + G1CollectedHeap* _g1h; + public: + VerifyMarkedObjsClosure(G1CollectedHeap* g1h) : _g1h(g1h) {} + void do_object(oop obj) { + assert(obj->mark()->is_marked() ? !_g1h->is_obj_dead(obj) : true, + "markandsweep mark should agree with concurrent deadness"); + } +}; + +void +G1CollectedHeap::checkConcurrentMark() { + VerifyMarkedObjsClosure verifycl(this); + doConcurrentMark(); + // MutexLockerEx x(getMarkBitMapLock(), + // Mutex::_no_safepoint_check_flag); + object_iterate(&verifycl); +} + +void G1CollectedHeap::do_sync_mark() { + _cm->checkpointRootsInitial(); + _cm->markFromRoots(); + _cm->checkpointRootsFinal(false); +} + +// + +double G1CollectedHeap::predict_region_elapsed_time_ms(HeapRegion *hr, + bool young) { + return _g1_policy->predict_region_elapsed_time_ms(hr, young); +} + +void G1CollectedHeap::check_if_region_is_too_expensive(double + predicted_time_ms) { + _g1_policy->check_if_region_is_too_expensive(predicted_time_ms); +} + +size_t G1CollectedHeap::pending_card_num() { + size_t extra_cards = 0; + JavaThread *curr = Threads::first(); + while (curr != NULL) { + DirtyCardQueue& dcq = curr->dirty_card_queue(); + extra_cards += dcq.size(); + curr = curr->next(); + } + DirtyCardQueueSet& dcqs = JavaThread::dirty_card_queue_set(); + size_t buffer_size = dcqs.buffer_size(); + size_t buffer_num = dcqs.completed_buffers_num(); + return buffer_size * buffer_num + extra_cards; +} + +size_t G1CollectedHeap::max_pending_card_num() { + DirtyCardQueueSet& dcqs = JavaThread::dirty_card_queue_set(); + size_t buffer_size = dcqs.buffer_size(); + size_t buffer_num = dcqs.completed_buffers_num(); + int thread_num = Threads::number_of_threads(); + return (buffer_num + thread_num) * buffer_size; +} + +size_t G1CollectedHeap::cards_scanned() { + HRInto_G1RemSet* g1_rset = (HRInto_G1RemSet*) g1_rem_set(); + return g1_rset->cardsScanned(); +} + +void +G1CollectedHeap::setup_surviving_young_words() { + guarantee( _surviving_young_words == NULL, "pre-condition" ); + size_t array_length = g1_policy()->young_cset_length(); + _surviving_young_words = NEW_C_HEAP_ARRAY(size_t, array_length); + if (_surviving_young_words == NULL) { + vm_exit_out_of_memory(sizeof(size_t) * array_length, + "Not enough space for young surv words summary."); + } + memset(_surviving_young_words, 0, array_length * sizeof(size_t)); + for (size_t i = 0; i < array_length; ++i) { + guarantee( _surviving_young_words[i] == 0, "invariant" ); + } +} + +void +G1CollectedHeap::update_surviving_young_words(size_t* surv_young_words) { + MutexLockerEx x(ParGCRareEvent_lock, Mutex::_no_safepoint_check_flag); + size_t array_length = g1_policy()->young_cset_length(); + for (size_t i = 0; i < array_length; ++i) + _surviving_young_words[i] += surv_young_words[i]; +} + +void +G1CollectedHeap::cleanup_surviving_young_words() { + guarantee( _surviving_young_words != NULL, "pre-condition" ); + FREE_C_HEAP_ARRAY(size_t, _surviving_young_words); + _surviving_young_words = NULL; +} + +// + +void +G1CollectedHeap::do_collection_pause_at_safepoint(HeapRegion* popular_region) { + char verbose_str[128]; + sprintf(verbose_str, "GC pause "); + if (popular_region != NULL) + strcat(verbose_str, "(popular)"); + else if (g1_policy()->in_young_gc_mode()) { + if (g1_policy()->full_young_gcs()) + strcat(verbose_str, "(young)"); + else + strcat(verbose_str, "(partial)"); + } + bool reset_should_initiate_conc_mark = false; + if (popular_region != NULL && g1_policy()->should_initiate_conc_mark()) { + // we currently do not allow an initial mark phase to be piggy-backed + // on a popular pause + reset_should_initiate_conc_mark = true; + g1_policy()->unset_should_initiate_conc_mark(); + } + if (g1_policy()->should_initiate_conc_mark()) + strcat(verbose_str, " (initial-mark)"); + + GCCauseSetter x(this, (popular_region == NULL ? + GCCause::_g1_inc_collection_pause : + GCCause::_g1_pop_region_collection_pause)); + + // if PrintGCDetails is on, we'll print long statistics information + // in the collector policy code, so let's not print this as the output + // is messy if we do. + gclog_or_tty->date_stamp(PrintGC && PrintGCDateStamps); + TraceCPUTime tcpu(PrintGCDetails, true, gclog_or_tty); + TraceTime t(verbose_str, PrintGC && !PrintGCDetails, true, gclog_or_tty); + + ResourceMark rm; + assert(SafepointSynchronize::is_at_safepoint(), "should be at safepoint"); + assert(Thread::current() == VMThread::vm_thread(), "should be in vm thread"); + guarantee(!is_gc_active(), "collection is not reentrant"); + assert(regions_accounted_for(), "Region leakage!"); + + increment_gc_time_stamp(); + + if (g1_policy()->in_young_gc_mode()) { + assert(check_young_list_well_formed(), + "young list should be well formed"); + } + + if (GC_locker::is_active()) { + return; // GC is disabled (e.g. JNI GetXXXCritical operation) + } + + bool abandoned = false; + { // Call to jvmpi::post_class_unload_events must occur outside of active GC + IsGCActiveMark x; + + gc_prologue(false); + increment_total_collections(); + +#if G1_REM_SET_LOGGING + gclog_or_tty->print_cr("\nJust chose CS, heap:"); + print(); +#endif + + if (VerifyBeforeGC && total_collections() >= VerifyGCStartAt) { + HandleMark hm; // Discard invalid handles created during verification + prepare_for_verify(); + gclog_or_tty->print(" VerifyBeforeGC:"); + Universe::verify(false); + } + + COMPILER2_PRESENT(DerivedPointerTable::clear()); + + // We want to turn off ref discovere, if necessary, and turn it back on + // on again later if we do. + bool was_enabled = ref_processor()->discovery_enabled(); + if (was_enabled) ref_processor()->disable_discovery(); + + // Forget the current alloc region (we might even choose it to be part + // of the collection set!). + abandon_cur_alloc_region(); + + // The elapsed time induced by the start time below deliberately elides + // the possible verification above. + double start_time_sec = os::elapsedTime(); + GCOverheadReporter::recordSTWStart(start_time_sec); + size_t start_used_bytes = used(); + if (!G1ConcMark) { + do_sync_mark(); + } + + g1_policy()->record_collection_pause_start(start_time_sec, + start_used_bytes); + +#if SCAN_ONLY_VERBOSE + _young_list->print(); +#endif // SCAN_ONLY_VERBOSE + + if (g1_policy()->should_initiate_conc_mark()) { + concurrent_mark()->checkpointRootsInitialPre(); + } + save_marks(); + + // We must do this before any possible evacuation that should propogate + // marks, including evacuation of popular objects in a popular pause. + if (mark_in_progress()) { + double start_time_sec = os::elapsedTime(); + + _cm->drainAllSATBBuffers(); + double finish_mark_ms = (os::elapsedTime() - start_time_sec) * 1000.0; + g1_policy()->record_satb_drain_time(finish_mark_ms); + + } + // Record the number of elements currently on the mark stack, so we + // only iterate over these. (Since evacuation may add to the mark + // stack, doing more exposes race conditions.) If no mark is in + // progress, this will be zero. + _cm->set_oops_do_bound(); + + assert(regions_accounted_for(), "Region leakage."); + + bool abandoned = false; + + if (mark_in_progress()) + concurrent_mark()->newCSet(); + + // Now choose the CS. + if (popular_region == NULL) { + g1_policy()->choose_collection_set(); + } else { + // We may be evacuating a single region (for popularity). + g1_policy()->record_popular_pause_preamble_start(); + popularity_pause_preamble(popular_region); + g1_policy()->record_popular_pause_preamble_end(); + abandoned = (g1_policy()->collection_set() == NULL); + // Now we allow more regions to be added (we have to collect + // all popular regions). + if (!abandoned) { + g1_policy()->choose_collection_set(popular_region); + } + } + // We may abandon a pause if we find no region that will fit in the MMU + // pause. + abandoned = (g1_policy()->collection_set() == NULL); + + // Nothing to do if we were unable to choose a collection set. + if (!abandoned) { +#if G1_REM_SET_LOGGING + gclog_or_tty->print_cr("\nAfter pause, heap:"); + print(); +#endif + + setup_surviving_young_words(); + + // Set up the gc allocation regions. + get_gc_alloc_regions(); + + // Actually do the work... + evacuate_collection_set(); + free_collection_set(g1_policy()->collection_set()); + g1_policy()->clear_collection_set(); + + if (popular_region != NULL) { + // We have to wait until now, because we don't want the region to + // be rescheduled for pop-evac during RS update. + popular_region->set_popular_pending(false); + } + + release_gc_alloc_regions(); + + cleanup_surviving_young_words(); + + if (g1_policy()->in_young_gc_mode()) { + _young_list->reset_sampled_info(); + assert(check_young_list_empty(true), + "young list should be empty"); + +#if SCAN_ONLY_VERBOSE + _young_list->print(); +#endif // SCAN_ONLY_VERBOSE + + _young_list->reset_auxilary_lists(); + } + } else { + COMPILER2_PRESENT(DerivedPointerTable::update_pointers()); + } + + if (evacuation_failed()) { + _summary_bytes_used = recalculate_used(); + } else { + // The "used" of the the collection set have already been subtracted + // when they were freed. Add in the bytes evacuated. + _summary_bytes_used += g1_policy()->bytes_in_to_space(); + } + + if (g1_policy()->in_young_gc_mode() && + g1_policy()->should_initiate_conc_mark()) { + concurrent_mark()->checkpointRootsInitialPost(); + set_marking_started(); + doConcurrentMark(); + } + +#if SCAN_ONLY_VERBOSE + _young_list->print(); +#endif // SCAN_ONLY_VERBOSE + + double end_time_sec = os::elapsedTime(); + g1_policy()->record_pause_time((end_time_sec - start_time_sec)*1000.0); + GCOverheadReporter::recordSTWEnd(end_time_sec); + g1_policy()->record_collection_pause_end(popular_region != NULL, + abandoned); + + assert(regions_accounted_for(), "Region leakage."); + + if (VerifyAfterGC && total_collections() >= VerifyGCStartAt) { + HandleMark hm; // Discard invalid handles created during verification + gclog_or_tty->print(" VerifyAfterGC:"); + Universe::verify(false); + } + + if (was_enabled) ref_processor()->enable_discovery(); + + { + size_t expand_bytes = g1_policy()->expansion_amount(); + if (expand_bytes > 0) { + size_t bytes_before = capacity(); + expand(expand_bytes); + } + } + + if (mark_in_progress()) + concurrent_mark()->update_g1_committed(); + + gc_epilogue(false); + } + + assert(verify_region_lists(), "Bad region lists."); + + if (reset_should_initiate_conc_mark) + g1_policy()->set_should_initiate_conc_mark(); + + if (ExitAfterGCNum > 0 && total_collections() == ExitAfterGCNum) { + gclog_or_tty->print_cr("Stopping after GC #%d", ExitAfterGCNum); + print_tracing_info(); + vm_exit(-1); + } +} + +void G1CollectedHeap::set_gc_alloc_region(int purpose, HeapRegion* r) { + assert(purpose >= 0 && purpose < GCAllocPurposeCount, "invalid purpose"); + HeapWord* original_top = NULL; + if (r != NULL) + original_top = r->top(); + + // We will want to record the used space in r as being there before gc. + // One we install it as a GC alloc region it's eligible for allocation. + // So record it now and use it later. + size_t r_used = 0; + if (r != NULL) { + r_used = r->used(); + + if (ParallelGCThreads > 0) { + // need to take the lock to guard against two threads calling + // get_gc_alloc_region concurrently (very unlikely but...) + MutexLockerEx x(ParGCRareEvent_lock, Mutex::_no_safepoint_check_flag); + r->save_marks(); + } + } + HeapRegion* old_alloc_region = _gc_alloc_regions[purpose]; + _gc_alloc_regions[purpose] = r; + if (old_alloc_region != NULL) { + // Replace aliases too. + for (int ap = 0; ap < GCAllocPurposeCount; ++ap) { + if (_gc_alloc_regions[ap] == old_alloc_region) { + _gc_alloc_regions[ap] = r; + } + } + } + if (r != NULL) { + push_gc_alloc_region(r); + if (mark_in_progress() && original_top != r->next_top_at_mark_start()) { + // We are using a region as a GC alloc region after it has been used + // as a mutator allocation region during the current marking cycle. + // The mutator-allocated objects are currently implicitly marked, but + // when we move hr->next_top_at_mark_start() forward at the the end + // of the GC pause, they won't be. We therefore mark all objects in + // the "gap". We do this object-by-object, since marking densely + // does not currently work right with marking bitmap iteration. This + // means we rely on TLAB filling at the start of pauses, and no + // "resuscitation" of filled TLAB's. If we want to do this, we need + // to fix the marking bitmap iteration. + HeapWord* curhw = r->next_top_at_mark_start(); + HeapWord* t = original_top; + + while (curhw < t) { + oop cur = (oop)curhw; + // We'll assume parallel for generality. This is rare code. + concurrent_mark()->markAndGrayObjectIfNecessary(cur); // can't we just mark them? + curhw = curhw + cur->size(); + } + assert(curhw == t, "Should have parsed correctly."); + } + if (G1PolicyVerbose > 1) { + gclog_or_tty->print("New alloc region ["PTR_FORMAT", "PTR_FORMAT", " PTR_FORMAT") " + "for survivors:", r->bottom(), original_top, r->end()); + r->print(); + } + g1_policy()->record_before_bytes(r_used); + } +} + +void G1CollectedHeap::push_gc_alloc_region(HeapRegion* hr) { + assert(Thread::current()->is_VM_thread() || + par_alloc_during_gc_lock()->owned_by_self(), "Precondition"); + assert(!hr->is_gc_alloc_region() && !hr->in_collection_set(), + "Precondition."); + hr->set_is_gc_alloc_region(true); + hr->set_next_gc_alloc_region(_gc_alloc_region_list); + _gc_alloc_region_list = hr; +} + +#ifdef G1_DEBUG +class FindGCAllocRegion: public HeapRegionClosure { +public: + bool doHeapRegion(HeapRegion* r) { + if (r->is_gc_alloc_region()) { + gclog_or_tty->print_cr("Region %d ["PTR_FORMAT"...] is still a gc_alloc_region.", + r->hrs_index(), r->bottom()); + } + return false; + } +}; +#endif // G1_DEBUG + +void G1CollectedHeap::forget_alloc_region_list() { + assert(Thread::current()->is_VM_thread(), "Precondition"); + while (_gc_alloc_region_list != NULL) { + HeapRegion* r = _gc_alloc_region_list; + assert(r->is_gc_alloc_region(), "Invariant."); + _gc_alloc_region_list = r->next_gc_alloc_region(); + r->set_next_gc_alloc_region(NULL); + r->set_is_gc_alloc_region(false); + if (r->is_empty()) { + ++_free_regions; + } + } +#ifdef G1_DEBUG + FindGCAllocRegion fa; + heap_region_iterate(&fa); +#endif // G1_DEBUG +} + + +bool G1CollectedHeap::check_gc_alloc_regions() { + // TODO: allocation regions check + return true; +} + +void G1CollectedHeap::get_gc_alloc_regions() { + for (int ap = 0; ap < GCAllocPurposeCount; ++ap) { + // Create new GC alloc regions. + HeapRegion* alloc_region = _gc_alloc_regions[ap]; + // Clear this alloc region, so that in case it turns out to be + // unacceptable, we end up with no allocation region, rather than a bad + // one. + _gc_alloc_regions[ap] = NULL; + if (alloc_region == NULL || alloc_region->in_collection_set()) { + // Can't re-use old one. Allocate a new one. + alloc_region = newAllocRegionWithExpansion(ap, 0); + } + if (alloc_region != NULL) { + set_gc_alloc_region(ap, alloc_region); + } + } + // Set alternative regions for allocation purposes that have reached + // thier limit. + for (int ap = 0; ap < GCAllocPurposeCount; ++ap) { + GCAllocPurpose alt_purpose = g1_policy()->alternative_purpose(ap); + if (_gc_alloc_regions[ap] == NULL && alt_purpose != ap) { + _gc_alloc_regions[ap] = _gc_alloc_regions[alt_purpose]; + } + } + assert(check_gc_alloc_regions(), "alloc regions messed up"); +} + +void G1CollectedHeap::release_gc_alloc_regions() { + // We keep a separate list of all regions that have been alloc regions in + // the current collection pause. Forget that now. + forget_alloc_region_list(); + + // The current alloc regions contain objs that have survived + // collection. Make them no longer GC alloc regions. + for (int ap = 0; ap < GCAllocPurposeCount; ++ap) { + HeapRegion* r = _gc_alloc_regions[ap]; + if (r != NULL && r->is_empty()) { + { + MutexLockerEx x(ZF_mon, Mutex::_no_safepoint_check_flag); + r->set_zero_fill_complete(); + put_free_region_on_list_locked(r); + } + } + // set_gc_alloc_region will also NULLify all aliases to the region + set_gc_alloc_region(ap, NULL); + _gc_alloc_region_counts[ap] = 0; + } +} + +void G1CollectedHeap::init_for_evac_failure(OopsInHeapRegionClosure* cl) { + _drain_in_progress = false; + set_evac_failure_closure(cl); + _evac_failure_scan_stack = new (ResourceObj::C_HEAP) GrowableArray(40, true); +} + +void G1CollectedHeap::finalize_for_evac_failure() { + assert(_evac_failure_scan_stack != NULL && + _evac_failure_scan_stack->length() == 0, + "Postcondition"); + assert(!_drain_in_progress, "Postcondition"); + // Don't have to delete, since the scan stack is a resource object. + _evac_failure_scan_stack = NULL; +} + + + +// *** Sequential G1 Evacuation + +HeapWord* G1CollectedHeap::allocate_during_gc(GCAllocPurpose purpose, size_t word_size) { + HeapRegion* alloc_region = _gc_alloc_regions[purpose]; + // let the caller handle alloc failure + if (alloc_region == NULL) return NULL; + assert(isHumongous(word_size) || !alloc_region->isHumongous(), + "Either the object is humongous or the region isn't"); + HeapWord* block = alloc_region->allocate(word_size); + if (block == NULL) { + block = allocate_during_gc_slow(purpose, alloc_region, false, word_size); + } + return block; +} + +class G1IsAliveClosure: public BoolObjectClosure { + G1CollectedHeap* _g1; +public: + G1IsAliveClosure(G1CollectedHeap* g1) : _g1(g1) {} + void do_object(oop p) { assert(false, "Do not call."); } + bool do_object_b(oop p) { + // It is reachable if it is outside the collection set, or is inside + // and forwarded. + +#ifdef G1_DEBUG + gclog_or_tty->print_cr("is alive "PTR_FORMAT" in CS %d forwarded %d overall %d", + (void*) p, _g1->obj_in_cs(p), p->is_forwarded(), + !_g1->obj_in_cs(p) || p->is_forwarded()); +#endif // G1_DEBUG + + return !_g1->obj_in_cs(p) || p->is_forwarded(); + } +}; + +class G1KeepAliveClosure: public OopClosure { + G1CollectedHeap* _g1; +public: + G1KeepAliveClosure(G1CollectedHeap* g1) : _g1(g1) {} + void do_oop(narrowOop* p) { + guarantee(false, "NYI"); + } + void do_oop(oop* p) { + oop obj = *p; +#ifdef G1_DEBUG + if (PrintGC && Verbose) { + gclog_or_tty->print_cr("keep alive *"PTR_FORMAT" = "PTR_FORMAT" "PTR_FORMAT, + p, (void*) obj, (void*) *p); + } +#endif // G1_DEBUG + + if (_g1->obj_in_cs(obj)) { + assert( obj->is_forwarded(), "invariant" ); + *p = obj->forwardee(); + +#ifdef G1_DEBUG + gclog_or_tty->print_cr(" in CSet: moved "PTR_FORMAT" -> "PTR_FORMAT, + (void*) obj, (void*) *p); +#endif // G1_DEBUG + } + } +}; + +class RecreateRSetEntriesClosure: public OopClosure { +private: + G1CollectedHeap* _g1; + G1RemSet* _g1_rem_set; + HeapRegion* _from; +public: + RecreateRSetEntriesClosure(G1CollectedHeap* g1, HeapRegion* from) : + _g1(g1), _g1_rem_set(g1->g1_rem_set()), _from(from) + {} + + void do_oop(narrowOop* p) { + guarantee(false, "NYI"); + } + void do_oop(oop* p) { + assert(_from->is_in_reserved(p), "paranoia"); + if (*p != NULL) { + _g1_rem_set->write_ref(_from, p); + } + } +}; + +class RemoveSelfPointerClosure: public ObjectClosure { +private: + G1CollectedHeap* _g1; + ConcurrentMark* _cm; + HeapRegion* _hr; + size_t _prev_marked_bytes; + size_t _next_marked_bytes; +public: + RemoveSelfPointerClosure(G1CollectedHeap* g1, HeapRegion* hr) : + _g1(g1), _cm(_g1->concurrent_mark()), _hr(hr), + _prev_marked_bytes(0), _next_marked_bytes(0) + {} + + size_t prev_marked_bytes() { return _prev_marked_bytes; } + size_t next_marked_bytes() { return _next_marked_bytes; } + + // The original idea here was to coalesce evacuated and dead objects. + // However that caused complications with the block offset table (BOT). + // In particular if there were two TLABs, one of them partially refined. + // |----- TLAB_1--------|----TLAB_2-~~~(partially refined part)~~~| + // The BOT entries of the unrefined part of TLAB_2 point to the start + // of TLAB_2. If the last object of the TLAB_1 and the first object + // of TLAB_2 are coalesced, then the cards of the unrefined part + // would point into middle of the filler object. + // + // The current approach is to not coalesce and leave the BOT contents intact. + void do_object(oop obj) { + if (obj->is_forwarded() && obj->forwardee() == obj) { + // The object failed to move. + assert(!_g1->is_obj_dead(obj), "We should not be preserving dead objs."); + _cm->markPrev(obj); + assert(_cm->isPrevMarked(obj), "Should be marked!"); + _prev_marked_bytes += (obj->size() * HeapWordSize); + if (_g1->mark_in_progress() && !_g1->is_obj_ill(obj)) { + _cm->markAndGrayObjectIfNecessary(obj); + } + obj->set_mark(markOopDesc::prototype()); + // While we were processing RSet buffers during the + // collection, we actually didn't scan any cards on the + // collection set, since we didn't want to update remebered + // sets with entries that point into the collection set, given + // that live objects fromthe collection set are about to move + // and such entries will be stale very soon. This change also + // dealt with a reliability issue which involved scanning a + // card in the collection set and coming across an array that + // was being chunked and looking malformed. The problem is + // that, if evacuation fails, we might have remembered set + // entries missing given that we skipped cards on the + // collection set. So, we'll recreate such entries now. + RecreateRSetEntriesClosure cl(_g1, _hr); + obj->oop_iterate(&cl); + assert(_cm->isPrevMarked(obj), "Should be marked!"); + } else { + // The object has been either evacuated or is dead. Fill it with a + // dummy object. + MemRegion mr((HeapWord*)obj, obj->size()); + SharedHeap::fill_region_with_object(mr); + _cm->clearRangeBothMaps(mr); + } + } +}; + +void G1CollectedHeap::remove_self_forwarding_pointers() { + HeapRegion* cur = g1_policy()->collection_set(); + + while (cur != NULL) { + assert(g1_policy()->assertMarkedBytesDataOK(), "Should be!"); + + if (cur->evacuation_failed()) { + RemoveSelfPointerClosure rspc(_g1h, cur); + assert(cur->in_collection_set(), "bad CS"); + cur->object_iterate(&rspc); + + // A number of manipulations to make the TAMS be the current top, + // and the marked bytes be the ones observed in the iteration. + if (_g1h->concurrent_mark()->at_least_one_mark_complete()) { + // The comments below are the postconditions achieved by the + // calls. Note especially the last such condition, which says that + // the count of marked bytes has been properly restored. + cur->note_start_of_marking(false); + // _next_top_at_mark_start == top, _next_marked_bytes == 0 + cur->add_to_marked_bytes(rspc.prev_marked_bytes()); + // _next_marked_bytes == prev_marked_bytes. + cur->note_end_of_marking(); + // _prev_top_at_mark_start == top(), + // _prev_marked_bytes == prev_marked_bytes + } + // If there is no mark in progress, we modified the _next variables + // above needlessly, but harmlessly. + if (_g1h->mark_in_progress()) { + cur->note_start_of_marking(false); + // _next_top_at_mark_start == top, _next_marked_bytes == 0 + // _next_marked_bytes == next_marked_bytes. + } + + // Now make sure the region has the right index in the sorted array. + g1_policy()->note_change_in_marked_bytes(cur); + } + cur = cur->next_in_collection_set(); + } + assert(g1_policy()->assertMarkedBytesDataOK(), "Should be!"); + + // Now restore saved marks, if any. + if (_objs_with_preserved_marks != NULL) { + assert(_preserved_marks_of_objs != NULL, "Both or none."); + assert(_objs_with_preserved_marks->length() == + _preserved_marks_of_objs->length(), "Both or none."); + guarantee(_objs_with_preserved_marks->length() == + _preserved_marks_of_objs->length(), "Both or none."); + for (int i = 0; i < _objs_with_preserved_marks->length(); i++) { + oop obj = _objs_with_preserved_marks->at(i); + markOop m = _preserved_marks_of_objs->at(i); + obj->set_mark(m); + } + // Delete the preserved marks growable arrays (allocated on the C heap). + delete _objs_with_preserved_marks; + delete _preserved_marks_of_objs; + _objs_with_preserved_marks = NULL; + _preserved_marks_of_objs = NULL; + } +} + +void G1CollectedHeap::push_on_evac_failure_scan_stack(oop obj) { + _evac_failure_scan_stack->push(obj); +} + +void G1CollectedHeap::drain_evac_failure_scan_stack() { + assert(_evac_failure_scan_stack != NULL, "precondition"); + + while (_evac_failure_scan_stack->length() > 0) { + oop obj = _evac_failure_scan_stack->pop(); + _evac_failure_closure->set_region(heap_region_containing(obj)); + obj->oop_iterate_backwards(_evac_failure_closure); + } +} + +void G1CollectedHeap::handle_evacuation_failure(oop old) { + markOop m = old->mark(); + // forward to self + assert(!old->is_forwarded(), "precondition"); + + old->forward_to(old); + handle_evacuation_failure_common(old, m); +} + +oop +G1CollectedHeap::handle_evacuation_failure_par(OopsInHeapRegionClosure* cl, + oop old) { + markOop m = old->mark(); + oop forward_ptr = old->forward_to_atomic(old); + if (forward_ptr == NULL) { + // Forward-to-self succeeded. + if (_evac_failure_closure != cl) { + MutexLockerEx x(EvacFailureStack_lock, Mutex::_no_safepoint_check_flag); + assert(!_drain_in_progress, + "Should only be true while someone holds the lock."); + // Set the global evac-failure closure to the current thread's. + assert(_evac_failure_closure == NULL, "Or locking has failed."); + set_evac_failure_closure(cl); + // Now do the common part. + handle_evacuation_failure_common(old, m); + // Reset to NULL. + set_evac_failure_closure(NULL); + } else { + // The lock is already held, and this is recursive. + assert(_drain_in_progress, "This should only be the recursive case."); + handle_evacuation_failure_common(old, m); + } + return old; + } else { + // Someone else had a place to copy it. + return forward_ptr; + } +} + +void G1CollectedHeap::handle_evacuation_failure_common(oop old, markOop m) { + set_evacuation_failed(true); + + preserve_mark_if_necessary(old, m); + + HeapRegion* r = heap_region_containing(old); + if (!r->evacuation_failed()) { + r->set_evacuation_failed(true); + if (G1TraceRegions) { + gclog_or_tty->print("evacuation failed in heap region "PTR_FORMAT" " + "["PTR_FORMAT","PTR_FORMAT")\n", + r, r->bottom(), r->end()); + } + } + + push_on_evac_failure_scan_stack(old); + + if (!_drain_in_progress) { + // prevent recursion in copy_to_survivor_space() + _drain_in_progress = true; + drain_evac_failure_scan_stack(); + _drain_in_progress = false; + } +} + +void G1CollectedHeap::preserve_mark_if_necessary(oop obj, markOop m) { + if (m != markOopDesc::prototype()) { + if (_objs_with_preserved_marks == NULL) { + assert(_preserved_marks_of_objs == NULL, "Both or none."); + _objs_with_preserved_marks = + new (ResourceObj::C_HEAP) GrowableArray(40, true); + _preserved_marks_of_objs = + new (ResourceObj::C_HEAP) GrowableArray(40, true); + } + _objs_with_preserved_marks->push(obj); + _preserved_marks_of_objs->push(m); + } +} + +// *** Parallel G1 Evacuation + +HeapWord* G1CollectedHeap::par_allocate_during_gc(GCAllocPurpose purpose, + size_t word_size) { + HeapRegion* alloc_region = _gc_alloc_regions[purpose]; + // let the caller handle alloc failure + if (alloc_region == NULL) return NULL; + + HeapWord* block = alloc_region->par_allocate(word_size); + if (block == NULL) { + MutexLockerEx x(par_alloc_during_gc_lock(), + Mutex::_no_safepoint_check_flag); + block = allocate_during_gc_slow(purpose, alloc_region, true, word_size); + } + return block; +} + +HeapWord* +G1CollectedHeap::allocate_during_gc_slow(GCAllocPurpose purpose, + HeapRegion* alloc_region, + bool par, + size_t word_size) { + HeapWord* block = NULL; + // In the parallel case, a previous thread to obtain the lock may have + // already assigned a new gc_alloc_region. + if (alloc_region != _gc_alloc_regions[purpose]) { + assert(par, "But should only happen in parallel case."); + alloc_region = _gc_alloc_regions[purpose]; + if (alloc_region == NULL) return NULL; + block = alloc_region->par_allocate(word_size); + if (block != NULL) return block; + // Otherwise, continue; this new region is empty, too. + } + assert(alloc_region != NULL, "We better have an allocation region"); + // Another thread might have obtained alloc_region for the given + // purpose, and might be attempting to allocate in it, and might + // succeed. Therefore, we can't do the "finalization" stuff on the + // region below until we're sure the last allocation has happened. + // We ensure this by allocating the remaining space with a garbage + // object. + if (par) par_allocate_remaining_space(alloc_region); + // Now we can do the post-GC stuff on the region. + alloc_region->note_end_of_copying(); + g1_policy()->record_after_bytes(alloc_region->used()); + + if (_gc_alloc_region_counts[purpose] >= g1_policy()->max_regions(purpose)) { + // Cannot allocate more regions for the given purpose. + GCAllocPurpose alt_purpose = g1_policy()->alternative_purpose(purpose); + // Is there an alternative? + if (purpose != alt_purpose) { + HeapRegion* alt_region = _gc_alloc_regions[alt_purpose]; + // Has not the alternative region been aliased? + if (alloc_region != alt_region) { + // Try to allocate in the alternative region. + if (par) { + block = alt_region->par_allocate(word_size); + } else { + block = alt_region->allocate(word_size); + } + // Make an alias. + _gc_alloc_regions[purpose] = _gc_alloc_regions[alt_purpose]; + } + if (block != NULL) { + return block; + } + // Both the allocation region and the alternative one are full + // and aliased, replace them with a new allocation region. + purpose = alt_purpose; + } else { + set_gc_alloc_region(purpose, NULL); + return NULL; + } + } + + // Now allocate a new region for allocation. + alloc_region = newAllocRegionWithExpansion(purpose, word_size, false /*zero_filled*/); + + // let the caller handle alloc failure + if (alloc_region != NULL) { + + assert(check_gc_alloc_regions(), "alloc regions messed up"); + assert(alloc_region->saved_mark_at_top(), + "Mark should have been saved already."); + // We used to assert that the region was zero-filled here, but no + // longer. + + // This must be done last: once it's installed, other regions may + // allocate in it (without holding the lock.) + set_gc_alloc_region(purpose, alloc_region); + + if (par) { + block = alloc_region->par_allocate(word_size); + } else { + block = alloc_region->allocate(word_size); + } + // Caller handles alloc failure. + } else { + // This sets other apis using the same old alloc region to NULL, also. + set_gc_alloc_region(purpose, NULL); + } + return block; // May be NULL. +} + +void G1CollectedHeap::par_allocate_remaining_space(HeapRegion* r) { + HeapWord* block = NULL; + size_t free_words; + do { + free_words = r->free()/HeapWordSize; + // If there's too little space, no one can allocate, so we're done. + if (free_words < (size_t)oopDesc::header_size()) return; + // Otherwise, try to claim it. + block = r->par_allocate(free_words); + } while (block == NULL); + SharedHeap::fill_region_with_object(MemRegion(block, free_words)); +} + +#define use_local_bitmaps 1 +#define verify_local_bitmaps 0 + +#ifndef PRODUCT + +class GCLabBitMap; +class GCLabBitMapClosure: public BitMapClosure { +private: + ConcurrentMark* _cm; + GCLabBitMap* _bitmap; + +public: + GCLabBitMapClosure(ConcurrentMark* cm, + GCLabBitMap* bitmap) { + _cm = cm; + _bitmap = bitmap; + } + + virtual bool do_bit(size_t offset); +}; + +#endif // PRODUCT + +#define oop_buffer_length 256 + +class GCLabBitMap: public BitMap { +private: + ConcurrentMark* _cm; + + int _shifter; + size_t _bitmap_word_covers_words; + + // beginning of the heap + HeapWord* _heap_start; + + // this is the actual start of the GCLab + HeapWord* _real_start_word; + + // this is the actual end of the GCLab + HeapWord* _real_end_word; + + // this is the first word, possibly located before the actual start + // of the GCLab, that corresponds to the first bit of the bitmap + HeapWord* _start_word; + + // size of a GCLab in words + size_t _gclab_word_size; + + static int shifter() { + return MinObjAlignment - 1; + } + + // how many heap words does a single bitmap word corresponds to? + static size_t bitmap_word_covers_words() { + return BitsPerWord << shifter(); + } + + static size_t gclab_word_size() { + return ParallelGCG1AllocBufferSize / HeapWordSize; + } + + static size_t bitmap_size_in_bits() { + size_t bits_in_bitmap = gclab_word_size() >> shifter(); + // We are going to ensure that the beginning of a word in this + // bitmap also corresponds to the beginning of a word in the + // global marking bitmap. To handle the case where a GCLab + // starts from the middle of the bitmap, we need to add enough + // space (i.e. up to a bitmap word) to ensure that we have + // enough bits in the bitmap. + return bits_in_bitmap + BitsPerWord - 1; + } +public: + GCLabBitMap(HeapWord* heap_start) + : BitMap(bitmap_size_in_bits()), + _cm(G1CollectedHeap::heap()->concurrent_mark()), + _shifter(shifter()), + _bitmap_word_covers_words(bitmap_word_covers_words()), + _heap_start(heap_start), + _gclab_word_size(gclab_word_size()), + _real_start_word(NULL), + _real_end_word(NULL), + _start_word(NULL) + { + guarantee( size_in_words() >= bitmap_size_in_words(), + "just making sure"); + } + + inline unsigned heapWordToOffset(HeapWord* addr) { + unsigned offset = (unsigned) pointer_delta(addr, _start_word) >> _shifter; + assert(offset < size(), "offset should be within bounds"); + return offset; + } + + inline HeapWord* offsetToHeapWord(size_t offset) { + HeapWord* addr = _start_word + (offset << _shifter); + assert(_real_start_word <= addr && addr < _real_end_word, "invariant"); + return addr; + } + + bool fields_well_formed() { + bool ret1 = (_real_start_word == NULL) && + (_real_end_word == NULL) && + (_start_word == NULL); + if (ret1) + return true; + + bool ret2 = _real_start_word >= _start_word && + _start_word < _real_end_word && + (_real_start_word + _gclab_word_size) == _real_end_word && + (_start_word + _gclab_word_size + _bitmap_word_covers_words) + > _real_end_word; + return ret2; + } + + inline bool mark(HeapWord* addr) { + guarantee(use_local_bitmaps, "invariant"); + assert(fields_well_formed(), "invariant"); + + if (addr >= _real_start_word && addr < _real_end_word) { + assert(!isMarked(addr), "should not have already been marked"); + + // first mark it on the bitmap + at_put(heapWordToOffset(addr), true); + + return true; + } else { + return false; + } + } + + inline bool isMarked(HeapWord* addr) { + guarantee(use_local_bitmaps, "invariant"); + assert(fields_well_formed(), "invariant"); + + return at(heapWordToOffset(addr)); + } + + void set_buffer(HeapWord* start) { + guarantee(use_local_bitmaps, "invariant"); + clear(); + + assert(start != NULL, "invariant"); + _real_start_word = start; + _real_end_word = start + _gclab_word_size; + + size_t diff = + pointer_delta(start, _heap_start) % _bitmap_word_covers_words; + _start_word = start - diff; + + assert(fields_well_formed(), "invariant"); + } + +#ifndef PRODUCT + void verify() { + // verify that the marks have been propagated + GCLabBitMapClosure cl(_cm, this); + iterate(&cl); + } +#endif // PRODUCT + + void retire() { + guarantee(use_local_bitmaps, "invariant"); + assert(fields_well_formed(), "invariant"); + + if (_start_word != NULL) { + CMBitMap* mark_bitmap = _cm->nextMarkBitMap(); + + // this means that the bitmap was set up for the GCLab + assert(_real_start_word != NULL && _real_end_word != NULL, "invariant"); + + mark_bitmap->mostly_disjoint_range_union(this, + 0, // always start from the start of the bitmap + _start_word, + size_in_words()); + _cm->grayRegionIfNecessary(MemRegion(_real_start_word, _real_end_word)); + +#ifndef PRODUCT + if (use_local_bitmaps && verify_local_bitmaps) + verify(); +#endif // PRODUCT + } else { + assert(_real_start_word == NULL && _real_end_word == NULL, "invariant"); + } + } + + static size_t bitmap_size_in_words() { + return (bitmap_size_in_bits() + BitsPerWord - 1) / BitsPerWord; + } +}; + +#ifndef PRODUCT + +bool GCLabBitMapClosure::do_bit(size_t offset) { + HeapWord* addr = _bitmap->offsetToHeapWord(offset); + guarantee(_cm->isMarked(oop(addr)), "it should be!"); + return true; +} + +#endif // PRODUCT + +class G1ParGCAllocBuffer: public ParGCAllocBuffer { +private: + bool _retired; + bool _during_marking; + GCLabBitMap _bitmap; + +public: + G1ParGCAllocBuffer() : + ParGCAllocBuffer(ParallelGCG1AllocBufferSize / HeapWordSize), + _during_marking(G1CollectedHeap::heap()->mark_in_progress()), + _bitmap(G1CollectedHeap::heap()->reserved_region().start()), + _retired(false) + { } + + inline bool mark(HeapWord* addr) { + guarantee(use_local_bitmaps, "invariant"); + assert(_during_marking, "invariant"); + return _bitmap.mark(addr); + } + + inline void set_buf(HeapWord* buf) { + if (use_local_bitmaps && _during_marking) + _bitmap.set_buffer(buf); + ParGCAllocBuffer::set_buf(buf); + _retired = false; + } + + inline void retire(bool end_of_gc, bool retain) { + if (_retired) + return; + if (use_local_bitmaps && _during_marking) { + _bitmap.retire(); + } + ParGCAllocBuffer::retire(end_of_gc, retain); + _retired = true; + } +}; + + +class G1ParScanThreadState : public StackObj { +protected: + G1CollectedHeap* _g1h; + RefToScanQueue* _refs; + + typedef GrowableArray OverflowQueue; + OverflowQueue* _overflowed_refs; + + G1ParGCAllocBuffer _alloc_buffers[GCAllocPurposeCount]; + + size_t _alloc_buffer_waste; + size_t _undo_waste; + + OopsInHeapRegionClosure* _evac_failure_cl; + G1ParScanHeapEvacClosure* _evac_cl; + G1ParScanPartialArrayClosure* _partial_scan_cl; + + int _hash_seed; + int _queue_num; + + int _term_attempts; +#if G1_DETAILED_STATS + int _pushes, _pops, _steals, _steal_attempts; + int _overflow_pushes; +#endif + + double _start; + double _start_strong_roots; + double _strong_roots_time; + double _start_term; + double _term_time; + + // Map from young-age-index (0 == not young, 1 is youngest) to + // surviving words. base is what we get back from the malloc call + size_t* _surviving_young_words_base; + // this points into the array, as we use the first few entries for padding + size_t* _surviving_young_words; + +#define PADDING_ELEM_NUM (64 / sizeof(size_t)) + + void add_to_alloc_buffer_waste(size_t waste) { _alloc_buffer_waste += waste; } + + void add_to_undo_waste(size_t waste) { _undo_waste += waste; } + +public: + G1ParScanThreadState(G1CollectedHeap* g1h, int queue_num) + : _g1h(g1h), + _refs(g1h->task_queue(queue_num)), + _hash_seed(17), _queue_num(queue_num), + _term_attempts(0), +#if G1_DETAILED_STATS + _pushes(0), _pops(0), _steals(0), + _steal_attempts(0), _overflow_pushes(0), +#endif + _strong_roots_time(0), _term_time(0), + _alloc_buffer_waste(0), _undo_waste(0) + { + // we allocate G1YoungSurvRateNumRegions plus one entries, since + // we "sacrifice" entry 0 to keep track of surviving bytes for + // non-young regions (where the age is -1) + // We also add a few elements at the beginning and at the end in + // an attempt to eliminate cache contention + size_t real_length = 1 + _g1h->g1_policy()->young_cset_length(); + size_t array_length = PADDING_ELEM_NUM + + real_length + + PADDING_ELEM_NUM; + _surviving_young_words_base = NEW_C_HEAP_ARRAY(size_t, array_length); + if (_surviving_young_words_base == NULL) + vm_exit_out_of_memory(array_length * sizeof(size_t), + "Not enough space for young surv histo."); + _surviving_young_words = _surviving_young_words_base + PADDING_ELEM_NUM; + memset(_surviving_young_words, 0, real_length * sizeof(size_t)); + + _overflowed_refs = new OverflowQueue(10); + + _start = os::elapsedTime(); + } + + ~G1ParScanThreadState() { + FREE_C_HEAP_ARRAY(size_t, _surviving_young_words_base); + } + + RefToScanQueue* refs() { return _refs; } + OverflowQueue* overflowed_refs() { return _overflowed_refs; } + + inline G1ParGCAllocBuffer* alloc_buffer(GCAllocPurpose purpose) { + return &_alloc_buffers[purpose]; + } + + size_t alloc_buffer_waste() { return _alloc_buffer_waste; } + size_t undo_waste() { return _undo_waste; } + + void push_on_queue(oop* ref) { + if (!refs()->push(ref)) { + overflowed_refs()->push(ref); + IF_G1_DETAILED_STATS(note_overflow_push()); + } else { + IF_G1_DETAILED_STATS(note_push()); + } + } + + void pop_from_queue(oop*& ref) { + if (!refs()->pop_local(ref)) { + ref = NULL; + } else { + IF_G1_DETAILED_STATS(note_pop()); + } + } + + void pop_from_overflow_queue(oop*& ref) { + ref = overflowed_refs()->pop(); + } + + int refs_to_scan() { return refs()->size(); } + int overflowed_refs_to_scan() { return overflowed_refs()->length(); } + + HeapWord* allocate_slow(GCAllocPurpose purpose, size_t word_sz) { + + HeapWord* obj = NULL; + if (word_sz * 100 < + (size_t)(ParallelGCG1AllocBufferSize / HeapWordSize) * + ParallelGCBufferWastePct) { + G1ParGCAllocBuffer* alloc_buf = alloc_buffer(purpose); + add_to_alloc_buffer_waste(alloc_buf->words_remaining()); + alloc_buf->retire(false, false); + + HeapWord* buf = + _g1h->par_allocate_during_gc(purpose, ParallelGCG1AllocBufferSize / HeapWordSize); + if (buf == NULL) return NULL; // Let caller handle allocation failure. + // Otherwise. + alloc_buf->set_buf(buf); + + obj = alloc_buf->allocate(word_sz); + assert(obj != NULL, "buffer was definitely big enough..."); + } + else { + obj = _g1h->par_allocate_during_gc(purpose, word_sz); + } + return obj; + } + + HeapWord* allocate(GCAllocPurpose purpose, size_t word_sz) { + HeapWord* obj = alloc_buffer(purpose)->allocate(word_sz); + if (obj != NULL) return obj; + return allocate_slow(purpose, word_sz); + } + + void undo_allocation(GCAllocPurpose purpose, HeapWord* obj, size_t word_sz) { + if (alloc_buffer(purpose)->contains(obj)) { + guarantee(alloc_buffer(purpose)->contains(obj + word_sz - 1), + "should contain whole object"); + alloc_buffer(purpose)->undo_allocation(obj, word_sz); + } + else { + SharedHeap::fill_region_with_object(MemRegion(obj, word_sz)); + add_to_undo_waste(word_sz); + } + } + + void set_evac_failure_closure(OopsInHeapRegionClosure* evac_failure_cl) { + _evac_failure_cl = evac_failure_cl; + } + OopsInHeapRegionClosure* evac_failure_closure() { + return _evac_failure_cl; + } + + void set_evac_closure(G1ParScanHeapEvacClosure* evac_cl) { + _evac_cl = evac_cl; + } + + void set_partial_scan_closure(G1ParScanPartialArrayClosure* partial_scan_cl) { + _partial_scan_cl = partial_scan_cl; + } + + int* hash_seed() { return &_hash_seed; } + int queue_num() { return _queue_num; } + + int term_attempts() { return _term_attempts; } + void note_term_attempt() { _term_attempts++; } + +#if G1_DETAILED_STATS + int pushes() { return _pushes; } + int pops() { return _pops; } + int steals() { return _steals; } + int steal_attempts() { return _steal_attempts; } + int overflow_pushes() { return _overflow_pushes; } + + void note_push() { _pushes++; } + void note_pop() { _pops++; } + void note_steal() { _steals++; } + void note_steal_attempt() { _steal_attempts++; } + void note_overflow_push() { _overflow_pushes++; } +#endif + + void start_strong_roots() { + _start_strong_roots = os::elapsedTime(); + } + void end_strong_roots() { + _strong_roots_time += (os::elapsedTime() - _start_strong_roots); + } + double strong_roots_time() { return _strong_roots_time; } + + void start_term_time() { + note_term_attempt(); + _start_term = os::elapsedTime(); + } + void end_term_time() { + _term_time += (os::elapsedTime() - _start_term); + } + double term_time() { return _term_time; } + + double elapsed() { + return os::elapsedTime() - _start; + } + + size_t* surviving_young_words() { + // We add on to hide entry 0 which accumulates surviving words for + // age -1 regions (i.e. non-young ones) + return _surviving_young_words; + } + + void retire_alloc_buffers() { + for (int ap = 0; ap < GCAllocPurposeCount; ++ap) { + size_t waste = _alloc_buffers[ap].words_remaining(); + add_to_alloc_buffer_waste(waste); + _alloc_buffers[ap].retire(true, false); + } + } + + void trim_queue() { + while (refs_to_scan() > 0 || overflowed_refs_to_scan() > 0) { + oop *ref_to_scan = NULL; + if (overflowed_refs_to_scan() == 0) { + pop_from_queue(ref_to_scan); + } else { + pop_from_overflow_queue(ref_to_scan); + } + if (ref_to_scan != NULL) { + if ((intptr_t)ref_to_scan & G1_PARTIAL_ARRAY_MASK) { + _partial_scan_cl->do_oop_nv(ref_to_scan); + } else { + // Note: we can use "raw" versions of "region_containing" because + // "obj_to_scan" is definitely in the heap, and is not in a + // humongous region. + HeapRegion* r = _g1h->heap_region_containing_raw(ref_to_scan); + _evac_cl->set_region(r); + _evac_cl->do_oop_nv(ref_to_scan); + } + } + } + } +}; + + +G1ParClosureSuper::G1ParClosureSuper(G1CollectedHeap* g1, G1ParScanThreadState* par_scan_state) : + _g1(g1), _g1_rem(_g1->g1_rem_set()), _cm(_g1->concurrent_mark()), + _par_scan_state(par_scan_state) { } + +// This closure is applied to the fields of the objects that have just been copied. +// Should probably be made inline and moved in g1OopClosures.inline.hpp. +void G1ParScanClosure::do_oop_nv(oop* p) { + oop obj = *p; + if (obj != NULL) { + if (_g1->obj_in_cs(obj)) { + if (obj->is_forwarded()) { + *p = obj->forwardee(); + } else { + _par_scan_state->push_on_queue(p); + return; + } + } + _g1_rem->par_write_ref(_from, p, _par_scan_state->queue_num()); + } +} + +void G1ParCopyHelper::mark_forwardee(oop* p) { + // This is called _after_ do_oop_work has been called, hence after + // the object has been relocated to its new location and *p points + // to its new location. + + oop thisOop = *p; + if (thisOop != NULL) { + assert((_g1->evacuation_failed()) || (!_g1->obj_in_cs(thisOop)), + "shouldn't still be in the CSet if evacuation didn't fail."); + HeapWord* addr = (HeapWord*)thisOop; + if (_g1->is_in_g1_reserved(addr)) + _cm->grayRoot(oop(addr)); + } +} + +oop G1ParCopyHelper::copy_to_survivor_space(oop old) { + size_t word_sz = old->size(); + HeapRegion* from_region = _g1->heap_region_containing_raw(old); + // +1 to make the -1 indexes valid... + int young_index = from_region->young_index_in_cset()+1; + assert( (from_region->is_young() && young_index > 0) || + (!from_region->is_young() && young_index == 0), "invariant" ); + G1CollectorPolicy* g1p = _g1->g1_policy(); + markOop m = old->mark(); + GCAllocPurpose alloc_purpose = g1p->evacuation_destination(from_region, m->age(), + word_sz); + HeapWord* obj_ptr = _par_scan_state->allocate(alloc_purpose, word_sz); + oop obj = oop(obj_ptr); + + if (obj_ptr == NULL) { + // This will either forward-to-self, or detect that someone else has + // installed a forwarding pointer. + OopsInHeapRegionClosure* cl = _par_scan_state->evac_failure_closure(); + return _g1->handle_evacuation_failure_par(cl, old); + } + + oop forward_ptr = old->forward_to_atomic(obj); + if (forward_ptr == NULL) { + Copy::aligned_disjoint_words((HeapWord*) old, obj_ptr, word_sz); + obj->set_mark(m); + if (g1p->track_object_age(alloc_purpose)) { + obj->incr_age(); + } + // preserve "next" mark bit + if (_g1->mark_in_progress() && !_g1->is_obj_ill(old)) { + if (!use_local_bitmaps || + !_par_scan_state->alloc_buffer(alloc_purpose)->mark(obj_ptr)) { + // if we couldn't mark it on the local bitmap (this happens when + // the object was not allocated in the GCLab), we have to bite + // the bullet and do the standard parallel mark + _cm->markAndGrayObjectIfNecessary(obj); + } +#if 1 + if (_g1->isMarkedNext(old)) { + _cm->nextMarkBitMap()->parClear((HeapWord*)old); + } +#endif + } + + size_t* surv_young_words = _par_scan_state->surviving_young_words(); + surv_young_words[young_index] += word_sz; + + if (obj->is_objArray() && arrayOop(obj)->length() >= ParGCArrayScanChunk) { + arrayOop(old)->set_length(0); + _par_scan_state->push_on_queue((oop*) ((intptr_t)old | G1_PARTIAL_ARRAY_MASK)); + } else { + _scanner->set_region(_g1->heap_region_containing(obj)); + obj->oop_iterate_backwards(_scanner); + } + } else { + _par_scan_state->undo_allocation(alloc_purpose, obj_ptr, word_sz); + obj = forward_ptr; + } + return obj; +} + +template +void G1ParCopyClosure::do_oop_work(oop* p) { + oop obj = *p; + assert(barrier != G1BarrierRS || obj != NULL, + "Precondition: G1BarrierRS implies obj is nonNull"); + + if (obj != NULL) { + if (_g1->obj_in_cs(obj)) { +#if G1_REM_SET_LOGGING + gclog_or_tty->print_cr("Loc "PTR_FORMAT" contains pointer "PTR_FORMAT" into CS.", + p, (void*) obj); +#endif + if (obj->is_forwarded()) { + *p = obj->forwardee(); + } else { + *p = copy_to_survivor_space(obj); + } + // When scanning the RS, we only care about objs in CS. + if (barrier == G1BarrierRS) { + _g1_rem->par_write_ref(_from, p, _par_scan_state->queue_num()); + } + } + // When scanning moved objs, must look at all oops. + if (barrier == G1BarrierEvac) { + _g1_rem->par_write_ref(_from, p, _par_scan_state->queue_num()); + } + + if (do_gen_barrier) { + par_do_barrier(p); + } + } +} + +template void G1ParCopyClosure::do_oop_work(oop* p); + +template void G1ParScanPartialArrayClosure::process_array_chunk( + oop obj, int start, int end) { + // process our set of indices (include header in first chunk) + assert(start < end, "invariant"); + T* const base = (T*)objArrayOop(obj)->base(); + T* const start_addr = base + start; + T* const end_addr = base + end; + MemRegion mr((HeapWord*)start_addr, (HeapWord*)end_addr); + _scanner.set_region(_g1->heap_region_containing(obj)); + obj->oop_iterate(&_scanner, mr); +} + +void G1ParScanPartialArrayClosure::do_oop_nv(oop* p) { + assert(!UseCompressedOops, "Needs to be fixed to work with compressed oops"); + oop old = oop((intptr_t)p & ~G1_PARTIAL_ARRAY_MASK); + assert(old->is_objArray(), "must be obj array"); + assert(old->is_forwarded(), "must be forwarded"); + assert(Universe::heap()->is_in_reserved(old), "must be in heap."); + + objArrayOop obj = objArrayOop(old->forwardee()); + assert((void*)old != (void*)old->forwardee(), "self forwarding here?"); + // Process ParGCArrayScanChunk elements now + // and push the remainder back onto queue + int start = arrayOop(old)->length(); + int end = obj->length(); + int remainder = end - start; + assert(start <= end, "just checking"); + if (remainder > 2 * ParGCArrayScanChunk) { + // Test above combines last partial chunk with a full chunk + end = start + ParGCArrayScanChunk; + arrayOop(old)->set_length(end); + // Push remainder. + _par_scan_state->push_on_queue((oop*) ((intptr_t) old | G1_PARTIAL_ARRAY_MASK)); + } else { + // Restore length so that the heap remains parsable in + // case of evacuation failure. + arrayOop(old)->set_length(end); + } + + // process our set of indices (include header in first chunk) + process_array_chunk(obj, start, end); + oop* start_addr = start == 0 ? (oop*)obj : obj->obj_at_addr(start); + oop* end_addr = (oop*)(obj->base()) + end; // obj_at_addr(end) asserts end < length + MemRegion mr((HeapWord*)start_addr, (HeapWord*)end_addr); + _scanner.set_region(_g1->heap_region_containing(obj)); + obj->oop_iterate(&_scanner, mr); +} + +int G1ScanAndBalanceClosure::_nq = 0; + +class G1ParEvacuateFollowersClosure : public VoidClosure { +protected: + G1CollectedHeap* _g1h; + G1ParScanThreadState* _par_scan_state; + RefToScanQueueSet* _queues; + ParallelTaskTerminator* _terminator; + + G1ParScanThreadState* par_scan_state() { return _par_scan_state; } + RefToScanQueueSet* queues() { return _queues; } + ParallelTaskTerminator* terminator() { return _terminator; } + +public: + G1ParEvacuateFollowersClosure(G1CollectedHeap* g1h, + G1ParScanThreadState* par_scan_state, + RefToScanQueueSet* queues, + ParallelTaskTerminator* terminator) + : _g1h(g1h), _par_scan_state(par_scan_state), + _queues(queues), _terminator(terminator) {} + + void do_void() { + G1ParScanThreadState* pss = par_scan_state(); + while (true) { + oop* ref_to_scan; + pss->trim_queue(); + IF_G1_DETAILED_STATS(pss->note_steal_attempt()); + if (queues()->steal(pss->queue_num(), + pss->hash_seed(), + ref_to_scan)) { + IF_G1_DETAILED_STATS(pss->note_steal()); + pss->push_on_queue(ref_to_scan); + continue; + } + pss->start_term_time(); + if (terminator()->offer_termination()) break; + pss->end_term_time(); + } + pss->end_term_time(); + pss->retire_alloc_buffers(); + } +}; + +class G1ParTask : public AbstractGangTask { +protected: + G1CollectedHeap* _g1h; + RefToScanQueueSet *_queues; + ParallelTaskTerminator _terminator; + + Mutex _stats_lock; + Mutex* stats_lock() { return &_stats_lock; } + + size_t getNCards() { + return (_g1h->capacity() + G1BlockOffsetSharedArray::N_bytes - 1) + / G1BlockOffsetSharedArray::N_bytes; + } + +public: + G1ParTask(G1CollectedHeap* g1h, int workers, RefToScanQueueSet *task_queues) + : AbstractGangTask("G1 collection"), + _g1h(g1h), + _queues(task_queues), + _terminator(workers, _queues), + _stats_lock(Mutex::leaf, "parallel G1 stats lock", true) + {} + + RefToScanQueueSet* queues() { return _queues; } + + RefToScanQueue *work_queue(int i) { + return queues()->queue(i); + } + + void work(int i) { + ResourceMark rm; + HandleMark hm; + + G1ParScanThreadState pss(_g1h, i); + G1ParScanHeapEvacClosure scan_evac_cl(_g1h, &pss); + G1ParScanHeapEvacClosure evac_failure_cl(_g1h, &pss); + G1ParScanPartialArrayClosure partial_scan_cl(_g1h, &pss); + + pss.set_evac_closure(&scan_evac_cl); + pss.set_evac_failure_closure(&evac_failure_cl); + pss.set_partial_scan_closure(&partial_scan_cl); + + G1ParScanExtRootClosure only_scan_root_cl(_g1h, &pss); + G1ParScanPermClosure only_scan_perm_cl(_g1h, &pss); + G1ParScanHeapRSClosure only_scan_heap_rs_cl(_g1h, &pss); + G1ParScanAndMarkExtRootClosure scan_mark_root_cl(_g1h, &pss); + G1ParScanAndMarkPermClosure scan_mark_perm_cl(_g1h, &pss); + G1ParScanAndMarkHeapRSClosure scan_mark_heap_rs_cl(_g1h, &pss); + + OopsInHeapRegionClosure *scan_root_cl; + OopsInHeapRegionClosure *scan_perm_cl; + OopsInHeapRegionClosure *scan_so_cl; + + if (_g1h->g1_policy()->should_initiate_conc_mark()) { + scan_root_cl = &scan_mark_root_cl; + scan_perm_cl = &scan_mark_perm_cl; + scan_so_cl = &scan_mark_heap_rs_cl; + } else { + scan_root_cl = &only_scan_root_cl; + scan_perm_cl = &only_scan_perm_cl; + scan_so_cl = &only_scan_heap_rs_cl; + } + + pss.start_strong_roots(); + _g1h->g1_process_strong_roots(/* not collecting perm */ false, + SharedHeap::SO_AllClasses, + scan_root_cl, + &only_scan_heap_rs_cl, + scan_so_cl, + scan_perm_cl, + i); + pss.end_strong_roots(); + { + double start = os::elapsedTime(); + G1ParEvacuateFollowersClosure evac(_g1h, &pss, _queues, &_terminator); + evac.do_void(); + double elapsed_ms = (os::elapsedTime()-start)*1000.0; + double term_ms = pss.term_time()*1000.0; + _g1h->g1_policy()->record_obj_copy_time(i, elapsed_ms-term_ms); + _g1h->g1_policy()->record_termination_time(i, term_ms); + } + _g1h->update_surviving_young_words(pss.surviving_young_words()+1); + + // Clean up any par-expanded rem sets. + HeapRegionRemSet::par_cleanup(); + + MutexLocker x(stats_lock()); + if (ParallelGCVerbose) { + gclog_or_tty->print("Thread %d complete:\n", i); +#if G1_DETAILED_STATS + gclog_or_tty->print(" Pushes: %7d Pops: %7d Overflows: %7d Steals %7d (in %d attempts)\n", + pss.pushes(), + pss.pops(), + pss.overflow_pushes(), + pss.steals(), + pss.steal_attempts()); +#endif + double elapsed = pss.elapsed(); + double strong_roots = pss.strong_roots_time(); + double term = pss.term_time(); + gclog_or_tty->print(" Elapsed: %7.2f ms.\n" + " Strong roots: %7.2f ms (%6.2f%%)\n" + " Termination: %7.2f ms (%6.2f%%) (in %d entries)\n", + elapsed * 1000.0, + strong_roots * 1000.0, (strong_roots*100.0/elapsed), + term * 1000.0, (term*100.0/elapsed), + pss.term_attempts()); + size_t total_waste = pss.alloc_buffer_waste() + pss.undo_waste(); + gclog_or_tty->print(" Waste: %8dK\n" + " Alloc Buffer: %8dK\n" + " Undo: %8dK\n", + (total_waste * HeapWordSize) / K, + (pss.alloc_buffer_waste() * HeapWordSize) / K, + (pss.undo_waste() * HeapWordSize) / K); + } + + assert(pss.refs_to_scan() == 0, "Task queue should be empty"); + assert(pss.overflowed_refs_to_scan() == 0, "Overflow queue should be empty"); + } +}; + +// *** Common G1 Evacuation Stuff + +class G1CountClosure: public OopsInHeapRegionClosure { +public: + int n; + G1CountClosure() : n(0) {} + void do_oop(narrowOop* p) { + guarantee(false, "NYI"); + } + void do_oop(oop* p) { + oop obj = *p; + assert(obj != NULL && G1CollectedHeap::heap()->obj_in_cs(obj), + "Rem set closure called on non-rem-set pointer."); + n++; + } +}; + +static G1CountClosure count_closure; + +void +G1CollectedHeap:: +g1_process_strong_roots(bool collecting_perm_gen, + SharedHeap::ScanningOption so, + OopClosure* scan_non_heap_roots, + OopsInHeapRegionClosure* scan_rs, + OopsInHeapRegionClosure* scan_so, + OopsInGenClosure* scan_perm, + int worker_i) { + // First scan the strong roots, including the perm gen. + double ext_roots_start = os::elapsedTime(); + double closure_app_time_sec = 0.0; + + BufferingOopClosure buf_scan_non_heap_roots(scan_non_heap_roots); + BufferingOopsInGenClosure buf_scan_perm(scan_perm); + buf_scan_perm.set_generation(perm_gen()); + + process_strong_roots(collecting_perm_gen, so, + &buf_scan_non_heap_roots, + &buf_scan_perm); + // Finish up any enqueued closure apps. + buf_scan_non_heap_roots.done(); + buf_scan_perm.done(); + double ext_roots_end = os::elapsedTime(); + g1_policy()->reset_obj_copy_time(worker_i); + double obj_copy_time_sec = + buf_scan_non_heap_roots.closure_app_seconds() + + buf_scan_perm.closure_app_seconds(); + g1_policy()->record_obj_copy_time(worker_i, obj_copy_time_sec * 1000.0); + double ext_root_time_ms = + ((ext_roots_end - ext_roots_start) - obj_copy_time_sec) * 1000.0; + g1_policy()->record_ext_root_scan_time(worker_i, ext_root_time_ms); + + // Scan strong roots in mark stack. + if (!_process_strong_tasks->is_task_claimed(G1H_PS_mark_stack_oops_do)) { + concurrent_mark()->oops_do(scan_non_heap_roots); + } + double mark_stack_scan_ms = (os::elapsedTime() - ext_roots_end) * 1000.0; + g1_policy()->record_mark_stack_scan_time(worker_i, mark_stack_scan_ms); + + // XXX What should this be doing in the parallel case? + g1_policy()->record_collection_pause_end_CH_strong_roots(); + if (G1VerifyRemSet) { + // :::: FIXME :::: + // The stupid remembered set doesn't know how to filter out dead + // objects, which the smart one does, and so when it is created + // and then compared the number of entries in each differs and + // the verification code fails. + guarantee(false, "verification code is broken, see note"); + + // Let's make sure that the current rem set agrees with the stupidest + // one possible! + bool refs_enabled = ref_processor()->discovery_enabled(); + if (refs_enabled) ref_processor()->disable_discovery(); + StupidG1RemSet stupid(this); + count_closure.n = 0; + stupid.oops_into_collection_set_do(&count_closure, worker_i); + int stupid_n = count_closure.n; + count_closure.n = 0; + g1_rem_set()->oops_into_collection_set_do(&count_closure, worker_i); + guarantee(count_closure.n == stupid_n, "Old and new rem sets differ."); + gclog_or_tty->print_cr("\nFound %d pointers in heap RS.", count_closure.n); + if (refs_enabled) ref_processor()->enable_discovery(); + } + if (scan_so != NULL) { + scan_scan_only_set(scan_so, worker_i); + } + // Now scan the complement of the collection set. + if (scan_rs != NULL) { + g1_rem_set()->oops_into_collection_set_do(scan_rs, worker_i); + } + // Finish with the ref_processor roots. + if (!_process_strong_tasks->is_task_claimed(G1H_PS_refProcessor_oops_do)) { + ref_processor()->oops_do(scan_non_heap_roots); + } + g1_policy()->record_collection_pause_end_G1_strong_roots(); + _process_strong_tasks->all_tasks_completed(); +} + +void +G1CollectedHeap::scan_scan_only_region(HeapRegion* r, + OopsInHeapRegionClosure* oc, + int worker_i) { + HeapWord* startAddr = r->bottom(); + HeapWord* endAddr = r->used_region().end(); + + oc->set_region(r); + + HeapWord* p = r->bottom(); + HeapWord* t = r->top(); + guarantee( p == r->next_top_at_mark_start(), "invariant" ); + while (p < t) { + oop obj = oop(p); + p += obj->oop_iterate(oc); + } +} + +void +G1CollectedHeap::scan_scan_only_set(OopsInHeapRegionClosure* oc, + int worker_i) { + double start = os::elapsedTime(); + + BufferingOopsInHeapRegionClosure boc(oc); + + FilterInHeapRegionAndIntoCSClosure scan_only(this, &boc); + FilterAndMarkInHeapRegionAndIntoCSClosure scan_and_mark(this, &boc, concurrent_mark()); + + OopsInHeapRegionClosure *foc; + if (g1_policy()->should_initiate_conc_mark()) + foc = &scan_and_mark; + else + foc = &scan_only; + + HeapRegion* hr; + int n = 0; + while ((hr = _young_list->par_get_next_scan_only_region()) != NULL) { + scan_scan_only_region(hr, foc, worker_i); + ++n; + } + boc.done(); + + double closure_app_s = boc.closure_app_seconds(); + g1_policy()->record_obj_copy_time(worker_i, closure_app_s * 1000.0); + double ms = (os::elapsedTime() - start - closure_app_s)*1000.0; + g1_policy()->record_scan_only_time(worker_i, ms, n); +} + +void +G1CollectedHeap::g1_process_weak_roots(OopClosure* root_closure, + OopClosure* non_root_closure) { + SharedHeap::process_weak_roots(root_closure, non_root_closure); +} + + +class SaveMarksClosure: public HeapRegionClosure { +public: + bool doHeapRegion(HeapRegion* r) { + r->save_marks(); + return false; + } +}; + +void G1CollectedHeap::save_marks() { + if (ParallelGCThreads == 0) { + SaveMarksClosure sm; + heap_region_iterate(&sm); + } + // We do this even in the parallel case + perm_gen()->save_marks(); +} + +void G1CollectedHeap::evacuate_collection_set() { + set_evacuation_failed(false); + + g1_rem_set()->prepare_for_oops_into_collection_set_do(); + concurrent_g1_refine()->set_use_cache(false); + int n_workers = (ParallelGCThreads > 0 ? workers()->total_workers() : 1); + + set_par_threads(n_workers); + G1ParTask g1_par_task(this, n_workers, _task_queues); + + init_for_evac_failure(NULL); + + change_strong_roots_parity(); // In preparation for parallel strong roots. + rem_set()->prepare_for_younger_refs_iterate(true); + double start_par = os::elapsedTime(); + + if (ParallelGCThreads > 0) { + // The individual threads will set their evac-failure closures. + workers()->run_task(&g1_par_task); + } else { + g1_par_task.work(0); + } + + double par_time = (os::elapsedTime() - start_par) * 1000.0; + g1_policy()->record_par_time(par_time); + set_par_threads(0); + // Is this the right thing to do here? We don't save marks + // on individual heap regions when we allocate from + // them in parallel, so this seems like the correct place for this. + all_alloc_regions_note_end_of_copying(); + { + G1IsAliveClosure is_alive(this); + G1KeepAliveClosure keep_alive(this); + JNIHandles::weak_oops_do(&is_alive, &keep_alive); + } + + g1_rem_set()->cleanup_after_oops_into_collection_set_do(); + concurrent_g1_refine()->set_use_cache(true); + + finalize_for_evac_failure(); + + // Must do this before removing self-forwarding pointers, which clears + // the per-region evac-failure flags. + concurrent_mark()->complete_marking_in_collection_set(); + + if (evacuation_failed()) { + remove_self_forwarding_pointers(); + + if (PrintGCDetails) { + gclog_or_tty->print(" (evacuation failed)"); + } else if (PrintGC) { + gclog_or_tty->print("--"); + } + } + + COMPILER2_PRESENT(DerivedPointerTable::update_pointers()); +} + +void G1CollectedHeap::free_region(HeapRegion* hr) { + size_t pre_used = 0; + size_t cleared_h_regions = 0; + size_t freed_regions = 0; + UncleanRegionList local_list; + + HeapWord* start = hr->bottom(); + HeapWord* end = hr->prev_top_at_mark_start(); + size_t used_bytes = hr->used(); + size_t live_bytes = hr->max_live_bytes(); + if (used_bytes > 0) { + guarantee( live_bytes <= used_bytes, "invariant" ); + } else { + guarantee( live_bytes == 0, "invariant" ); + } + + size_t garbage_bytes = used_bytes - live_bytes; + if (garbage_bytes > 0) + g1_policy()->decrease_known_garbage_bytes(garbage_bytes); + + free_region_work(hr, pre_used, cleared_h_regions, freed_regions, + &local_list); + finish_free_region_work(pre_used, cleared_h_regions, freed_regions, + &local_list); +} + +void +G1CollectedHeap::free_region_work(HeapRegion* hr, + size_t& pre_used, + size_t& cleared_h_regions, + size_t& freed_regions, + UncleanRegionList* list, + bool par) { + assert(!hr->popular(), "should not free popular regions"); + pre_used += hr->used(); + if (hr->isHumongous()) { + assert(hr->startsHumongous(), + "Only the start of a humongous region should be freed."); + int ind = _hrs->find(hr); + assert(ind != -1, "Should have an index."); + // Clear the start region. + hr->hr_clear(par, true /*clear_space*/); + list->insert_before_head(hr); + cleared_h_regions++; + freed_regions++; + // Clear any continued regions. + ind++; + while ((size_t)ind < n_regions()) { + HeapRegion* hrc = _hrs->at(ind); + if (!hrc->continuesHumongous()) break; + // Otherwise, does continue the H region. + assert(hrc->humongous_start_region() == hr, "Huh?"); + hrc->hr_clear(par, true /*clear_space*/); + cleared_h_regions++; + freed_regions++; + list->insert_before_head(hrc); + ind++; + } + } else { + hr->hr_clear(par, true /*clear_space*/); + list->insert_before_head(hr); + freed_regions++; + // If we're using clear2, this should not be enabled. + // assert(!hr->in_cohort(), "Can't be both free and in a cohort."); + } +} + +void G1CollectedHeap::finish_free_region_work(size_t pre_used, + size_t cleared_h_regions, + size_t freed_regions, + UncleanRegionList* list) { + if (list != NULL && list->sz() > 0) { + prepend_region_list_on_unclean_list(list); + } + // Acquire a lock, if we're parallel, to update possibly-shared + // variables. + Mutex* lock = (n_par_threads() > 0) ? ParGCRareEvent_lock : NULL; + { + MutexLockerEx x(lock, Mutex::_no_safepoint_check_flag); + _summary_bytes_used -= pre_used; + _num_humongous_regions -= (int) cleared_h_regions; + _free_regions += freed_regions; + } +} + + +void G1CollectedHeap::dirtyCardsForYoungRegions(CardTableModRefBS* ct_bs, HeapRegion* list) { + while (list != NULL) { + guarantee( list->is_young(), "invariant" ); + + HeapWord* bottom = list->bottom(); + HeapWord* end = list->end(); + MemRegion mr(bottom, end); + ct_bs->dirty(mr); + + list = list->get_next_young_region(); + } +} + +void G1CollectedHeap::cleanUpCardTable() { + CardTableModRefBS* ct_bs = (CardTableModRefBS*) (barrier_set()); + double start = os::elapsedTime(); + + ct_bs->clear(_g1_committed); + + // now, redirty the cards of the scan-only and survivor regions + // (it seemed faster to do it this way, instead of iterating over + // all regions and then clearing / dirtying as approprite) + dirtyCardsForYoungRegions(ct_bs, _young_list->first_scan_only_region()); + dirtyCardsForYoungRegions(ct_bs, _young_list->first_survivor_region()); + + double elapsed = os::elapsedTime() - start; + g1_policy()->record_clear_ct_time( elapsed * 1000.0); +} + + +void G1CollectedHeap::do_collection_pause_if_appropriate(size_t word_size) { + // First do any popular regions. + HeapRegion* hr; + while ((hr = popular_region_to_evac()) != NULL) { + evac_popular_region(hr); + } + // Now do heuristic pauses. + if (g1_policy()->should_do_collection_pause(word_size)) { + do_collection_pause(); + } +} + +void G1CollectedHeap::free_collection_set(HeapRegion* cs_head) { + double young_time_ms = 0.0; + double non_young_time_ms = 0.0; + + G1CollectorPolicy* policy = g1_policy(); + + double start_sec = os::elapsedTime(); + bool non_young = true; + + HeapRegion* cur = cs_head; + int age_bound = -1; + size_t rs_lengths = 0; + + while (cur != NULL) { + if (non_young) { + if (cur->is_young()) { + double end_sec = os::elapsedTime(); + double elapsed_ms = (end_sec - start_sec) * 1000.0; + non_young_time_ms += elapsed_ms; + + start_sec = os::elapsedTime(); + non_young = false; + } + } else { + if (!cur->is_on_free_list()) { + double end_sec = os::elapsedTime(); + double elapsed_ms = (end_sec - start_sec) * 1000.0; + young_time_ms += elapsed_ms; + + start_sec = os::elapsedTime(); + non_young = true; + } + } + + rs_lengths += cur->rem_set()->occupied(); + + HeapRegion* next = cur->next_in_collection_set(); + assert(cur->in_collection_set(), "bad CS"); + cur->set_next_in_collection_set(NULL); + cur->set_in_collection_set(false); + + if (cur->is_young()) { + int index = cur->young_index_in_cset(); + guarantee( index != -1, "invariant" ); + guarantee( (size_t)index < policy->young_cset_length(), "invariant" ); + size_t words_survived = _surviving_young_words[index]; + cur->record_surv_words_in_group(words_survived); + } else { + int index = cur->young_index_in_cset(); + guarantee( index == -1, "invariant" ); + } + + assert( (cur->is_young() && cur->young_index_in_cset() > -1) || + (!cur->is_young() && cur->young_index_in_cset() == -1), + "invariant" ); + + if (!cur->evacuation_failed()) { + // And the region is empty. + assert(!cur->is_empty(), + "Should not have empty regions in a CS."); + free_region(cur); + } else { + guarantee( !cur->is_scan_only(), "should not be scan only" ); + cur->uninstall_surv_rate_group(); + if (cur->is_young()) + cur->set_young_index_in_cset(-1); + cur->set_not_young(); + cur->set_evacuation_failed(false); + } + cur = next; + } + + policy->record_max_rs_lengths(rs_lengths); + policy->cset_regions_freed(); + + double end_sec = os::elapsedTime(); + double elapsed_ms = (end_sec - start_sec) * 1000.0; + if (non_young) + non_young_time_ms += elapsed_ms; + else + young_time_ms += elapsed_ms; + + policy->record_young_free_cset_time_ms(young_time_ms); + policy->record_non_young_free_cset_time_ms(non_young_time_ms); +} + +HeapRegion* +G1CollectedHeap::alloc_region_from_unclean_list_locked(bool zero_filled) { + assert(ZF_mon->owned_by_self(), "Precondition"); + HeapRegion* res = pop_unclean_region_list_locked(); + if (res != NULL) { + assert(!res->continuesHumongous() && + res->zero_fill_state() != HeapRegion::Allocated, + "Only free regions on unclean list."); + if (zero_filled) { + res->ensure_zero_filled_locked(); + res->set_zero_fill_allocated(); + } + } + return res; +} + +HeapRegion* G1CollectedHeap::alloc_region_from_unclean_list(bool zero_filled) { + MutexLockerEx zx(ZF_mon, Mutex::_no_safepoint_check_flag); + return alloc_region_from_unclean_list_locked(zero_filled); +} + +void G1CollectedHeap::put_region_on_unclean_list(HeapRegion* r) { + MutexLockerEx x(ZF_mon, Mutex::_no_safepoint_check_flag); + put_region_on_unclean_list_locked(r); + if (should_zf()) ZF_mon->notify_all(); // Wake up ZF thread. +} + +void G1CollectedHeap::set_unclean_regions_coming(bool b) { + MutexLockerEx x(Cleanup_mon); + set_unclean_regions_coming_locked(b); +} + +void G1CollectedHeap::set_unclean_regions_coming_locked(bool b) { + assert(Cleanup_mon->owned_by_self(), "Precondition"); + _unclean_regions_coming = b; + // Wake up mutator threads that might be waiting for completeCleanup to + // finish. + if (!b) Cleanup_mon->notify_all(); +} + +void G1CollectedHeap::wait_for_cleanup_complete() { + MutexLockerEx x(Cleanup_mon); + wait_for_cleanup_complete_locked(); +} + +void G1CollectedHeap::wait_for_cleanup_complete_locked() { + assert(Cleanup_mon->owned_by_self(), "precondition"); + while (_unclean_regions_coming) { + Cleanup_mon->wait(); + } +} + +void +G1CollectedHeap::put_region_on_unclean_list_locked(HeapRegion* r) { + assert(ZF_mon->owned_by_self(), "precondition."); + _unclean_region_list.insert_before_head(r); +} + +void +G1CollectedHeap::prepend_region_list_on_unclean_list(UncleanRegionList* list) { + MutexLockerEx x(ZF_mon, Mutex::_no_safepoint_check_flag); + prepend_region_list_on_unclean_list_locked(list); + if (should_zf()) ZF_mon->notify_all(); // Wake up ZF thread. +} + +void +G1CollectedHeap:: +prepend_region_list_on_unclean_list_locked(UncleanRegionList* list) { + assert(ZF_mon->owned_by_self(), "precondition."); + _unclean_region_list.prepend_list(list); +} + +HeapRegion* G1CollectedHeap::pop_unclean_region_list_locked() { + assert(ZF_mon->owned_by_self(), "precondition."); + HeapRegion* res = _unclean_region_list.pop(); + if (res != NULL) { + // Inform ZF thread that there's a new unclean head. + if (_unclean_region_list.hd() != NULL && should_zf()) + ZF_mon->notify_all(); + } + return res; +} + +HeapRegion* G1CollectedHeap::peek_unclean_region_list_locked() { + assert(ZF_mon->owned_by_self(), "precondition."); + return _unclean_region_list.hd(); +} + + +bool G1CollectedHeap::move_cleaned_region_to_free_list_locked() { + assert(ZF_mon->owned_by_self(), "Precondition"); + HeapRegion* r = peek_unclean_region_list_locked(); + if (r != NULL && r->zero_fill_state() == HeapRegion::ZeroFilled) { + // Result of below must be equal to "r", since we hold the lock. + (void)pop_unclean_region_list_locked(); + put_free_region_on_list_locked(r); + return true; + } else { + return false; + } +} + +bool G1CollectedHeap::move_cleaned_region_to_free_list() { + MutexLockerEx x(ZF_mon, Mutex::_no_safepoint_check_flag); + return move_cleaned_region_to_free_list_locked(); +} + + +void G1CollectedHeap::put_free_region_on_list_locked(HeapRegion* r) { + assert(ZF_mon->owned_by_self(), "precondition."); + assert(_free_region_list_size == free_region_list_length(), "Inv"); + assert(r->zero_fill_state() == HeapRegion::ZeroFilled, + "Regions on free list must be zero filled"); + assert(!r->isHumongous(), "Must not be humongous."); + assert(r->is_empty(), "Better be empty"); + assert(!r->is_on_free_list(), + "Better not already be on free list"); + assert(!r->is_on_unclean_list(), + "Better not already be on unclean list"); + r->set_on_free_list(true); + r->set_next_on_free_list(_free_region_list); + _free_region_list = r; + _free_region_list_size++; + assert(_free_region_list_size == free_region_list_length(), "Inv"); +} + +void G1CollectedHeap::put_free_region_on_list(HeapRegion* r) { + MutexLockerEx x(ZF_mon, Mutex::_no_safepoint_check_flag); + put_free_region_on_list_locked(r); +} + +HeapRegion* G1CollectedHeap::pop_free_region_list_locked() { + assert(ZF_mon->owned_by_self(), "precondition."); + assert(_free_region_list_size == free_region_list_length(), "Inv"); + HeapRegion* res = _free_region_list; + if (res != NULL) { + _free_region_list = res->next_from_free_list(); + _free_region_list_size--; + res->set_on_free_list(false); + res->set_next_on_free_list(NULL); + assert(_free_region_list_size == free_region_list_length(), "Inv"); + } + return res; +} + + +HeapRegion* G1CollectedHeap::alloc_free_region_from_lists(bool zero_filled) { + // By self, or on behalf of self. + assert(Heap_lock->is_locked(), "Precondition"); + HeapRegion* res = NULL; + bool first = true; + while (res == NULL) { + if (zero_filled || !first) { + MutexLockerEx x(ZF_mon, Mutex::_no_safepoint_check_flag); + res = pop_free_region_list_locked(); + if (res != NULL) { + assert(!res->zero_fill_is_allocated(), + "No allocated regions on free list."); + res->set_zero_fill_allocated(); + } else if (!first) { + break; // We tried both, time to return NULL. + } + } + + if (res == NULL) { + res = alloc_region_from_unclean_list(zero_filled); + } + assert(res == NULL || + !zero_filled || + res->zero_fill_is_allocated(), + "We must have allocated the region we're returning"); + first = false; + } + return res; +} + +void G1CollectedHeap::remove_allocated_regions_from_lists() { + MutexLockerEx x(ZF_mon, Mutex::_no_safepoint_check_flag); + { + HeapRegion* prev = NULL; + HeapRegion* cur = _unclean_region_list.hd(); + while (cur != NULL) { + HeapRegion* next = cur->next_from_unclean_list(); + if (cur->zero_fill_is_allocated()) { + // Remove from the list. + if (prev == NULL) { + (void)_unclean_region_list.pop(); + } else { + _unclean_region_list.delete_after(prev); + } + cur->set_on_unclean_list(false); + cur->set_next_on_unclean_list(NULL); + } else { + prev = cur; + } + cur = next; + } + assert(_unclean_region_list.sz() == unclean_region_list_length(), + "Inv"); + } + + { + HeapRegion* prev = NULL; + HeapRegion* cur = _free_region_list; + while (cur != NULL) { + HeapRegion* next = cur->next_from_free_list(); + if (cur->zero_fill_is_allocated()) { + // Remove from the list. + if (prev == NULL) { + _free_region_list = cur->next_from_free_list(); + } else { + prev->set_next_on_free_list(cur->next_from_free_list()); + } + cur->set_on_free_list(false); + cur->set_next_on_free_list(NULL); + _free_region_list_size--; + } else { + prev = cur; + } + cur = next; + } + assert(_free_region_list_size == free_region_list_length(), "Inv"); + } +} + +bool G1CollectedHeap::verify_region_lists() { + MutexLockerEx x(ZF_mon, Mutex::_no_safepoint_check_flag); + return verify_region_lists_locked(); +} + +bool G1CollectedHeap::verify_region_lists_locked() { + HeapRegion* unclean = _unclean_region_list.hd(); + while (unclean != NULL) { + guarantee(unclean->is_on_unclean_list(), "Well, it is!"); + guarantee(!unclean->is_on_free_list(), "Well, it shouldn't be!"); + guarantee(unclean->zero_fill_state() != HeapRegion::Allocated, + "Everything else is possible."); + unclean = unclean->next_from_unclean_list(); + } + guarantee(_unclean_region_list.sz() == unclean_region_list_length(), "Inv"); + + HeapRegion* free_r = _free_region_list; + while (free_r != NULL) { + assert(free_r->is_on_free_list(), "Well, it is!"); + assert(!free_r->is_on_unclean_list(), "Well, it shouldn't be!"); + switch (free_r->zero_fill_state()) { + case HeapRegion::NotZeroFilled: + case HeapRegion::ZeroFilling: + guarantee(false, "Should not be on free list."); + break; + default: + // Everything else is possible. + break; + } + free_r = free_r->next_from_free_list(); + } + guarantee(_free_region_list_size == free_region_list_length(), "Inv"); + // If we didn't do an assertion... + return true; +} + +size_t G1CollectedHeap::free_region_list_length() { + assert(ZF_mon->owned_by_self(), "precondition."); + size_t len = 0; + HeapRegion* cur = _free_region_list; + while (cur != NULL) { + len++; + cur = cur->next_from_free_list(); + } + return len; +} + +size_t G1CollectedHeap::unclean_region_list_length() { + assert(ZF_mon->owned_by_self(), "precondition."); + return _unclean_region_list.length(); +} + +size_t G1CollectedHeap::n_regions() { + return _hrs->length(); +} + +size_t G1CollectedHeap::max_regions() { + return + (size_t)align_size_up(g1_reserved_obj_bytes(), HeapRegion::GrainBytes) / + HeapRegion::GrainBytes; +} + +size_t G1CollectedHeap::free_regions() { + /* Possibly-expensive assert. + assert(_free_regions == count_free_regions(), + "_free_regions is off."); + */ + return _free_regions; +} + +bool G1CollectedHeap::should_zf() { + return _free_region_list_size < (size_t) G1ConcZFMaxRegions; +} + +class RegionCounter: public HeapRegionClosure { + size_t _n; +public: + RegionCounter() : _n(0) {} + bool doHeapRegion(HeapRegion* r) { + if (r->is_empty() && !r->popular()) { + assert(!r->isHumongous(), "H regions should not be empty."); + _n++; + } + return false; + } + int res() { return (int) _n; } +}; + +size_t G1CollectedHeap::count_free_regions() { + RegionCounter rc; + heap_region_iterate(&rc); + size_t n = rc.res(); + if (_cur_alloc_region != NULL && _cur_alloc_region->is_empty()) + n--; + return n; +} + +size_t G1CollectedHeap::count_free_regions_list() { + size_t n = 0; + size_t o = 0; + ZF_mon->lock_without_safepoint_check(); + HeapRegion* cur = _free_region_list; + while (cur != NULL) { + cur = cur->next_from_free_list(); + n++; + } + size_t m = unclean_region_list_length(); + ZF_mon->unlock(); + return n + m; +} + +bool G1CollectedHeap::should_set_young_locked() { + assert(heap_lock_held_for_gc(), + "the heap lock should already be held by or for this thread"); + return (g1_policy()->in_young_gc_mode() && + g1_policy()->should_add_next_region_to_young_list()); +} + +void G1CollectedHeap::set_region_short_lived_locked(HeapRegion* hr) { + assert(heap_lock_held_for_gc(), + "the heap lock should already be held by or for this thread"); + _young_list->push_region(hr); + g1_policy()->set_region_short_lived(hr); +} + +class NoYoungRegionsClosure: public HeapRegionClosure { +private: + bool _success; +public: + NoYoungRegionsClosure() : _success(true) { } + bool doHeapRegion(HeapRegion* r) { + if (r->is_young()) { + gclog_or_tty->print_cr("Region ["PTR_FORMAT", "PTR_FORMAT") tagged as young", + r->bottom(), r->end()); + _success = false; + } + return false; + } + bool success() { return _success; } +}; + +bool G1CollectedHeap::check_young_list_empty(bool ignore_scan_only_list, + bool check_sample) { + bool ret = true; + + ret = _young_list->check_list_empty(ignore_scan_only_list, check_sample); + if (!ignore_scan_only_list) { + NoYoungRegionsClosure closure; + heap_region_iterate(&closure); + ret = ret && closure.success(); + } + + return ret; +} + +void G1CollectedHeap::empty_young_list() { + assert(heap_lock_held_for_gc(), + "the heap lock should already be held by or for this thread"); + assert(g1_policy()->in_young_gc_mode(), "should be in young GC mode"); + + _young_list->empty_list(); +} + +bool G1CollectedHeap::all_alloc_regions_no_allocs_since_save_marks() { + bool no_allocs = true; + for (int ap = 0; ap < GCAllocPurposeCount && no_allocs; ++ap) { + HeapRegion* r = _gc_alloc_regions[ap]; + no_allocs = r == NULL || r->saved_mark_at_top(); + } + return no_allocs; +} + +void G1CollectedHeap::all_alloc_regions_note_end_of_copying() { + for (int ap = 0; ap < GCAllocPurposeCount; ++ap) { + HeapRegion* r = _gc_alloc_regions[ap]; + if (r != NULL) { + // Check for aliases. + bool has_processed_alias = false; + for (int i = 0; i < ap; ++i) { + if (_gc_alloc_regions[i] == r) { + has_processed_alias = true; + break; + } + } + if (!has_processed_alias) { + r->note_end_of_copying(); + g1_policy()->record_after_bytes(r->used()); + } + } + } +} + + +// Done at the start of full GC. +void G1CollectedHeap::tear_down_region_lists() { + MutexLockerEx x(ZF_mon, Mutex::_no_safepoint_check_flag); + while (pop_unclean_region_list_locked() != NULL) ; + assert(_unclean_region_list.hd() == NULL && _unclean_region_list.sz() == 0, + "Postconditions of loop.") + while (pop_free_region_list_locked() != NULL) ; + assert(_free_region_list == NULL, "Postcondition of loop."); + if (_free_region_list_size != 0) { + gclog_or_tty->print_cr("Size is %d.", _free_region_list_size); + print(); + } + assert(_free_region_list_size == 0, "Postconditions of loop."); +} + + +class RegionResetter: public HeapRegionClosure { + G1CollectedHeap* _g1; + int _n; +public: + RegionResetter() : _g1(G1CollectedHeap::heap()), _n(0) {} + bool doHeapRegion(HeapRegion* r) { + if (r->continuesHumongous()) return false; + if (r->top() > r->bottom()) { + if (r->top() < r->end()) { + Copy::fill_to_words(r->top(), + pointer_delta(r->end(), r->top())); + } + r->set_zero_fill_allocated(); + } else { + assert(r->is_empty(), "tautology"); + if (r->popular()) { + if (r->zero_fill_state() != HeapRegion::Allocated) { + r->ensure_zero_filled_locked(); + r->set_zero_fill_allocated(); + } + } else { + _n++; + switch (r->zero_fill_state()) { + case HeapRegion::NotZeroFilled: + case HeapRegion::ZeroFilling: + _g1->put_region_on_unclean_list_locked(r); + break; + case HeapRegion::Allocated: + r->set_zero_fill_complete(); + // no break; go on to put on free list. + case HeapRegion::ZeroFilled: + _g1->put_free_region_on_list_locked(r); + break; + } + } + } + return false; + } + + int getFreeRegionCount() {return _n;} +}; + +// Done at the end of full GC. +void G1CollectedHeap::rebuild_region_lists() { + MutexLockerEx x(ZF_mon, Mutex::_no_safepoint_check_flag); + // This needs to go at the end of the full GC. + RegionResetter rs; + heap_region_iterate(&rs); + _free_regions = rs.getFreeRegionCount(); + // Tell the ZF thread it may have work to do. + if (should_zf()) ZF_mon->notify_all(); +} + +class UsedRegionsNeedZeroFillSetter: public HeapRegionClosure { + G1CollectedHeap* _g1; + int _n; +public: + UsedRegionsNeedZeroFillSetter() : _g1(G1CollectedHeap::heap()), _n(0) {} + bool doHeapRegion(HeapRegion* r) { + if (r->continuesHumongous()) return false; + if (r->top() > r->bottom()) { + // There are assertions in "set_zero_fill_needed()" below that + // require top() == bottom(), so this is technically illegal. + // We'll skirt the law here, by making that true temporarily. + DEBUG_ONLY(HeapWord* save_top = r->top(); + r->set_top(r->bottom())); + r->set_zero_fill_needed(); + DEBUG_ONLY(r->set_top(save_top)); + } + return false; + } +}; + +// Done at the start of full GC. +void G1CollectedHeap::set_used_regions_to_need_zero_fill() { + MutexLockerEx x(ZF_mon, Mutex::_no_safepoint_check_flag); + // This needs to go at the end of the full GC. + UsedRegionsNeedZeroFillSetter rs; + heap_region_iterate(&rs); +} + +class CountObjClosure: public ObjectClosure { + size_t _n; +public: + CountObjClosure() : _n(0) {} + void do_object(oop obj) { _n++; } + size_t n() { return _n; } +}; + +size_t G1CollectedHeap::pop_object_used_objs() { + size_t sum_objs = 0; + for (int i = 0; i < G1NumPopularRegions; i++) { + CountObjClosure cl; + _hrs->at(i)->object_iterate(&cl); + sum_objs += cl.n(); + } + return sum_objs; +} + +size_t G1CollectedHeap::pop_object_used_bytes() { + size_t sum_bytes = 0; + for (int i = 0; i < G1NumPopularRegions; i++) { + sum_bytes += _hrs->at(i)->used(); + } + return sum_bytes; +} + + +static int nq = 0; + +HeapWord* G1CollectedHeap::allocate_popular_object(size_t word_size) { + while (_cur_pop_hr_index < G1NumPopularRegions) { + HeapRegion* cur_pop_region = _hrs->at(_cur_pop_hr_index); + HeapWord* res = cur_pop_region->allocate(word_size); + if (res != NULL) { + // We account for popular objs directly in the used summary: + _summary_bytes_used += (word_size * HeapWordSize); + return res; + } + // Otherwise, try the next region (first making sure that we remember + // the last "top" value as the "next_top_at_mark_start", so that + // objects made popular during markings aren't automatically considered + // live). + cur_pop_region->note_end_of_copying(); + // Otherwise, try the next region. + _cur_pop_hr_index++; + } + // XXX: For now !!! + vm_exit_out_of_memory(word_size, + "Not enough pop obj space (To Be Fixed)"); + return NULL; +} + +class HeapRegionList: public CHeapObj { + public: + HeapRegion* hr; + HeapRegionList* next; +}; + +void G1CollectedHeap::schedule_popular_region_evac(HeapRegion* r) { + // This might happen during parallel GC, so protect by this lock. + MutexLockerEx x(ParGCRareEvent_lock, Mutex::_no_safepoint_check_flag); + // We don't schedule regions whose evacuations are already pending, or + // are already being evacuated. + if (!r->popular_pending() && !r->in_collection_set()) { + r->set_popular_pending(true); + if (G1TracePopularity) { + gclog_or_tty->print_cr("Scheduling region "PTR_FORMAT" " + "["PTR_FORMAT", "PTR_FORMAT") for pop-object evacuation.", + r, r->bottom(), r->end()); + } + HeapRegionList* hrl = new HeapRegionList; + hrl->hr = r; + hrl->next = _popular_regions_to_be_evacuated; + _popular_regions_to_be_evacuated = hrl; + } +} + +HeapRegion* G1CollectedHeap::popular_region_to_evac() { + MutexLockerEx x(ParGCRareEvent_lock, Mutex::_no_safepoint_check_flag); + HeapRegion* res = NULL; + while (_popular_regions_to_be_evacuated != NULL && res == NULL) { + HeapRegionList* hrl = _popular_regions_to_be_evacuated; + _popular_regions_to_be_evacuated = hrl->next; + res = hrl->hr; + // The G1RSPopLimit may have increased, so recheck here... + if (res->rem_set()->occupied() < (size_t) G1RSPopLimit) { + // Hah: don't need to schedule. + if (G1TracePopularity) { + gclog_or_tty->print_cr("Unscheduling region "PTR_FORMAT" " + "["PTR_FORMAT", "PTR_FORMAT") " + "for pop-object evacuation (size %d < limit %d)", + res, res->bottom(), res->end(), + res->rem_set()->occupied(), G1RSPopLimit); + } + res->set_popular_pending(false); + res = NULL; + } + // We do not reset res->popular() here; if we did so, it would allow + // the region to be "rescheduled" for popularity evacuation. Instead, + // this is done in the collection pause, with the world stopped. + // So the invariant is that the regions in the list have the popularity + // boolean set, but having the boolean set does not imply membership + // on the list (though there can at most one such pop-pending region + // not on the list at any time). + delete hrl; + } + return res; +} + +void G1CollectedHeap::evac_popular_region(HeapRegion* hr) { + while (true) { + // Don't want to do a GC pause while cleanup is being completed! + wait_for_cleanup_complete(); + + // Read the GC count while holding the Heap_lock + int gc_count_before = SharedHeap::heap()->total_collections(); + g1_policy()->record_stop_world_start(); + + { + MutexUnlocker mu(Heap_lock); // give up heap lock, execute gets it back + VM_G1PopRegionCollectionPause op(gc_count_before, hr); + VMThread::execute(&op); + + // If the prolog succeeded, we didn't do a GC for this. + if (op.prologue_succeeded()) break; + } + // Otherwise we didn't. We should recheck the size, though, since + // the limit may have increased... + if (hr->rem_set()->occupied() < (size_t) G1RSPopLimit) { + hr->set_popular_pending(false); + break; + } + } +} + +void G1CollectedHeap::atomic_inc_obj_rc(oop obj) { + Atomic::inc(obj_rc_addr(obj)); +} + +class CountRCClosure: public OopsInHeapRegionClosure { + G1CollectedHeap* _g1h; + bool _parallel; +public: + CountRCClosure(G1CollectedHeap* g1h) : + _g1h(g1h), _parallel(ParallelGCThreads > 0) + {} + void do_oop(narrowOop* p) { + guarantee(false, "NYI"); + } + void do_oop(oop* p) { + oop obj = *p; + assert(obj != NULL, "Precondition."); + if (_parallel) { + // We go sticky at the limit to avoid excess contention. + // If we want to track the actual RC's further, we'll need to keep a + // per-thread hash table or something for the popular objects. + if (_g1h->obj_rc(obj) < G1ObjPopLimit) { + _g1h->atomic_inc_obj_rc(obj); + } + } else { + _g1h->inc_obj_rc(obj); + } + } +}; + +class EvacPopObjClosure: public ObjectClosure { + G1CollectedHeap* _g1h; + size_t _pop_objs; + size_t _max_rc; +public: + EvacPopObjClosure(G1CollectedHeap* g1h) : + _g1h(g1h), _pop_objs(0), _max_rc(0) {} + + void do_object(oop obj) { + size_t rc = _g1h->obj_rc(obj); + _max_rc = MAX2(rc, _max_rc); + if (rc >= (size_t) G1ObjPopLimit) { + _g1h->_pop_obj_rc_at_copy.add((double)rc); + size_t word_sz = obj->size(); + HeapWord* new_pop_loc = _g1h->allocate_popular_object(word_sz); + oop new_pop_obj = (oop)new_pop_loc; + Copy::aligned_disjoint_words((HeapWord*)obj, new_pop_loc, word_sz); + obj->forward_to(new_pop_obj); + G1ScanAndBalanceClosure scan_and_balance(_g1h); + new_pop_obj->oop_iterate_backwards(&scan_and_balance); + // preserve "next" mark bit if marking is in progress. + if (_g1h->mark_in_progress() && !_g1h->is_obj_ill(obj)) { + _g1h->concurrent_mark()->markAndGrayObjectIfNecessary(new_pop_obj); + } + + if (G1TracePopularity) { + gclog_or_tty->print_cr("Found obj " PTR_FORMAT " of word size " SIZE_FORMAT + " pop (%d), move to " PTR_FORMAT, + (void*) obj, word_sz, + _g1h->obj_rc(obj), (void*) new_pop_obj); + } + _pop_objs++; + } + } + size_t pop_objs() { return _pop_objs; } + size_t max_rc() { return _max_rc; } +}; + +class G1ParCountRCTask : public AbstractGangTask { + G1CollectedHeap* _g1h; + BitMap _bm; + + size_t getNCards() { + return (_g1h->capacity() + G1BlockOffsetSharedArray::N_bytes - 1) + / G1BlockOffsetSharedArray::N_bytes; + } + CountRCClosure _count_rc_closure; +public: + G1ParCountRCTask(G1CollectedHeap* g1h) : + AbstractGangTask("G1 Par RC Count task"), + _g1h(g1h), _bm(getNCards()), _count_rc_closure(g1h) + {} + + void work(int i) { + ResourceMark rm; + HandleMark hm; + _g1h->g1_rem_set()->oops_into_collection_set_do(&_count_rc_closure, i); + } +}; + +void G1CollectedHeap::popularity_pause_preamble(HeapRegion* popular_region) { + // We're evacuating a single region (for popularity). + if (G1TracePopularity) { + gclog_or_tty->print_cr("Doing pop region pause for ["PTR_FORMAT", "PTR_FORMAT")", + popular_region->bottom(), popular_region->end()); + } + g1_policy()->set_single_region_collection_set(popular_region); + size_t max_rc; + if (!compute_reference_counts_and_evac_popular(popular_region, + &max_rc)) { + // We didn't evacuate any popular objects. + // We increase the RS popularity limit, to prevent this from + // happening in the future. + if (G1RSPopLimit < (1 << 30)) { + G1RSPopLimit *= 2; + } + // For now, interesting enough for a message: +#if 1 + gclog_or_tty->print_cr("In pop region pause for ["PTR_FORMAT", "PTR_FORMAT"), " + "failed to find a pop object (max = %d).", + popular_region->bottom(), popular_region->end(), + max_rc); + gclog_or_tty->print_cr("Increased G1RSPopLimit to %d.", G1RSPopLimit); +#endif // 0 + // Also, we reset the collection set to NULL, to make the rest of + // the collection do nothing. + assert(popular_region->next_in_collection_set() == NULL, + "should be single-region."); + popular_region->set_in_collection_set(false); + popular_region->set_popular_pending(false); + g1_policy()->clear_collection_set(); + } +} + +bool G1CollectedHeap:: +compute_reference_counts_and_evac_popular(HeapRegion* popular_region, + size_t* max_rc) { + HeapWord* rc_region_bot; + HeapWord* rc_region_end; + + // Set up the reference count region. + HeapRegion* rc_region = newAllocRegion(HeapRegion::GrainWords); + if (rc_region != NULL) { + rc_region_bot = rc_region->bottom(); + rc_region_end = rc_region->end(); + } else { + rc_region_bot = NEW_C_HEAP_ARRAY(HeapWord, HeapRegion::GrainWords); + if (rc_region_bot == NULL) { + vm_exit_out_of_memory(HeapRegion::GrainWords, + "No space for RC region."); + } + rc_region_end = rc_region_bot + HeapRegion::GrainWords; + } + + if (G1TracePopularity) + gclog_or_tty->print_cr("RC region is ["PTR_FORMAT", "PTR_FORMAT")", + rc_region_bot, rc_region_end); + if (rc_region_bot > popular_region->bottom()) { + _rc_region_above = true; + _rc_region_diff = + pointer_delta(rc_region_bot, popular_region->bottom(), 1); + } else { + assert(rc_region_bot < popular_region->bottom(), "Can't be equal."); + _rc_region_above = false; + _rc_region_diff = + pointer_delta(popular_region->bottom(), rc_region_bot, 1); + } + g1_policy()->record_pop_compute_rc_start(); + // Count external references. + g1_rem_set()->prepare_for_oops_into_collection_set_do(); + if (ParallelGCThreads > 0) { + + set_par_threads(workers()->total_workers()); + G1ParCountRCTask par_count_rc_task(this); + workers()->run_task(&par_count_rc_task); + set_par_threads(0); + + } else { + CountRCClosure count_rc_closure(this); + g1_rem_set()->oops_into_collection_set_do(&count_rc_closure, 0); + } + g1_rem_set()->cleanup_after_oops_into_collection_set_do(); + g1_policy()->record_pop_compute_rc_end(); + + // Now evacuate popular objects. + g1_policy()->record_pop_evac_start(); + EvacPopObjClosure evac_pop_obj_cl(this); + popular_region->object_iterate(&evac_pop_obj_cl); + *max_rc = evac_pop_obj_cl.max_rc(); + + // Make sure the last "top" value of the current popular region is copied + // as the "next_top_at_mark_start", so that objects made popular during + // markings aren't automatically considered live. + HeapRegion* cur_pop_region = _hrs->at(_cur_pop_hr_index); + cur_pop_region->note_end_of_copying(); + + if (rc_region != NULL) { + free_region(rc_region); + } else { + FREE_C_HEAP_ARRAY(HeapWord, rc_region_bot); + } + g1_policy()->record_pop_evac_end(); + + return evac_pop_obj_cl.pop_objs() > 0; +} + +class CountPopObjInfoClosure: public HeapRegionClosure { + size_t _objs; + size_t _bytes; + + class CountObjClosure: public ObjectClosure { + int _n; + public: + CountObjClosure() : _n(0) {} + void do_object(oop obj) { _n++; } + size_t n() { return _n; } + }; + +public: + CountPopObjInfoClosure() : _objs(0), _bytes(0) {} + bool doHeapRegion(HeapRegion* r) { + _bytes += r->used(); + CountObjClosure blk; + r->object_iterate(&blk); + _objs += blk.n(); + return false; + } + size_t objs() { return _objs; } + size_t bytes() { return _bytes; } +}; + + +void G1CollectedHeap::print_popularity_summary_info() const { + CountPopObjInfoClosure blk; + for (int i = 0; i <= _cur_pop_hr_index; i++) { + blk.doHeapRegion(_hrs->at(i)); + } + gclog_or_tty->print_cr("\nPopular objects: %d objs, %d bytes.", + blk.objs(), blk.bytes()); + gclog_or_tty->print_cr(" RC at copy = [avg = %5.2f, max = %5.2f, sd = %5.2f].", + _pop_obj_rc_at_copy.avg(), + _pop_obj_rc_at_copy.maximum(), + _pop_obj_rc_at_copy.sd()); +} + +void G1CollectedHeap::set_refine_cte_cl_concurrency(bool concurrent) { + _refine_cte_cl->set_concurrent(concurrent); +} + +#ifndef PRODUCT + +class PrintHeapRegionClosure: public HeapRegionClosure { +public: + bool doHeapRegion(HeapRegion *r) { + gclog_or_tty->print("Region: "PTR_FORMAT":", r); + if (r != NULL) { + if (r->is_on_free_list()) + gclog_or_tty->print("Free "); + if (r->is_young()) + gclog_or_tty->print("Young "); + if (r->isHumongous()) + gclog_or_tty->print("Is Humongous "); + r->print(); + } + return false; + } +}; + +class SortHeapRegionClosure : public HeapRegionClosure { + size_t young_regions,free_regions, unclean_regions; + size_t hum_regions, count; + size_t unaccounted, cur_unclean, cur_alloc; + size_t total_free; + HeapRegion* cur; +public: + SortHeapRegionClosure(HeapRegion *_cur) : cur(_cur), young_regions(0), + free_regions(0), unclean_regions(0), + hum_regions(0), + count(0), unaccounted(0), + cur_alloc(0), total_free(0) + {} + bool doHeapRegion(HeapRegion *r) { + count++; + if (r->is_on_free_list()) free_regions++; + else if (r->is_on_unclean_list()) unclean_regions++; + else if (r->isHumongous()) hum_regions++; + else if (r->is_young()) young_regions++; + else if (r == cur) cur_alloc++; + else unaccounted++; + return false; + } + void print() { + total_free = free_regions + unclean_regions; + gclog_or_tty->print("%d regions\n", count); + gclog_or_tty->print("%d free: free_list = %d unclean = %d\n", + total_free, free_regions, unclean_regions); + gclog_or_tty->print("%d humongous %d young\n", + hum_regions, young_regions); + gclog_or_tty->print("%d cur_alloc\n", cur_alloc); + gclog_or_tty->print("UHOH unaccounted = %d\n", unaccounted); + } +}; + +void G1CollectedHeap::print_region_counts() { + SortHeapRegionClosure sc(_cur_alloc_region); + PrintHeapRegionClosure cl; + heap_region_iterate(&cl); + heap_region_iterate(&sc); + sc.print(); + print_region_accounting_info(); +}; + +bool G1CollectedHeap::regions_accounted_for() { + // TODO: regions accounting for young/survivor/tenured + return true; +} + +bool G1CollectedHeap::print_region_accounting_info() { + gclog_or_tty->print_cr("P regions: %d.", G1NumPopularRegions); + gclog_or_tty->print_cr("Free regions: %d (count: %d count list %d) (clean: %d unclean: %d).", + free_regions(), + count_free_regions(), count_free_regions_list(), + _free_region_list_size, _unclean_region_list.sz()); + gclog_or_tty->print_cr("cur_alloc: %d.", + (_cur_alloc_region == NULL ? 0 : 1)); + gclog_or_tty->print_cr("H regions: %d.", _num_humongous_regions); + + // TODO: check regions accounting for young/survivor/tenured + return true; +} + +bool G1CollectedHeap::is_in_closed_subset(const void* p) const { + HeapRegion* hr = heap_region_containing(p); + if (hr == NULL) { + return is_in_permanent(p); + } else { + return hr->is_in(p); + } +} +#endif // PRODUCT + +void G1CollectedHeap::g1_unimplemented() { + // Unimplemented(); +} + + +// Local Variables: *** +// c-indentation-style: gnu *** +// End: *** diff --git a/hotspot/src/share/vm/gc_implementation/g1/g1CollectedHeap.hpp b/hotspot/src/share/vm/gc_implementation/g1/g1CollectedHeap.hpp new file mode 100644 index 00000000000..557456e4024 --- /dev/null +++ b/hotspot/src/share/vm/gc_implementation/g1/g1CollectedHeap.hpp @@ -0,0 +1,1203 @@ +/* + * Copyright 2001-2007 Sun Microsystems, Inc. All Rights Reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara, + * CA 95054 USA or visit www.sun.com if you need additional information or + * have any questions. + * + */ + +// A "G1CollectedHeap" is an implementation of a java heap for HotSpot. +// It uses the "Garbage First" heap organization and algorithm, which +// may combine concurrent marking with parallel, incremental compaction of +// heap subsets that will yield large amounts of garbage. + +class HeapRegion; +class HeapRegionSeq; +class HeapRegionList; +class PermanentGenerationSpec; +class GenerationSpec; +class OopsInHeapRegionClosure; +class G1ScanHeapEvacClosure; +class ObjectClosure; +class SpaceClosure; +class CompactibleSpaceClosure; +class Space; +class G1CollectorPolicy; +class GenRemSet; +class G1RemSet; +class HeapRegionRemSetIterator; +class ConcurrentMark; +class ConcurrentMarkThread; +class ConcurrentG1Refine; +class ConcurrentZFThread; + +// If want to accumulate detailed statistics on work queues +// turn this on. +#define G1_DETAILED_STATS 0 + +#if G1_DETAILED_STATS +# define IF_G1_DETAILED_STATS(code) code +#else +# define IF_G1_DETAILED_STATS(code) +#endif + +typedef GenericTaskQueue RefToScanQueue; +typedef GenericTaskQueueSet RefToScanQueueSet; + +enum G1GCThreadGroups { + G1CRGroup = 0, + G1ZFGroup = 1, + G1CMGroup = 2, + G1CLGroup = 3 +}; + +enum GCAllocPurpose { + GCAllocForTenured, + GCAllocForSurvived, + GCAllocPurposeCount +}; + +class YoungList : public CHeapObj { +private: + G1CollectedHeap* _g1h; + + HeapRegion* _head; + + HeapRegion* _scan_only_head; + HeapRegion* _scan_only_tail; + size_t _length; + size_t _scan_only_length; + + size_t _last_sampled_rs_lengths; + size_t _sampled_rs_lengths; + HeapRegion* _curr; + HeapRegion* _curr_scan_only; + + HeapRegion* _survivor_head; + HeapRegion* _survivors_tail; + size_t _survivor_length; + + void empty_list(HeapRegion* list); + +public: + YoungList(G1CollectedHeap* g1h); + + void push_region(HeapRegion* hr); + void add_survivor_region(HeapRegion* hr); + HeapRegion* pop_region(); + void empty_list(); + bool is_empty() { return _length == 0; } + size_t length() { return _length; } + size_t scan_only_length() { return _scan_only_length; } + + void rs_length_sampling_init(); + bool rs_length_sampling_more(); + void rs_length_sampling_next(); + + void reset_sampled_info() { + _last_sampled_rs_lengths = 0; + } + size_t sampled_rs_lengths() { return _last_sampled_rs_lengths; } + + // for development purposes + void reset_auxilary_lists(); + HeapRegion* first_region() { return _head; } + HeapRegion* first_scan_only_region() { return _scan_only_head; } + HeapRegion* first_survivor_region() { return _survivor_head; } + HeapRegion* par_get_next_scan_only_region() { + MutexLockerEx x(ParGCRareEvent_lock, Mutex::_no_safepoint_check_flag); + HeapRegion* ret = _curr_scan_only; + if (ret != NULL) + _curr_scan_only = ret->get_next_young_region(); + return ret; + } + + // debugging + bool check_list_well_formed(); + bool check_list_empty(bool ignore_scan_only_list, + bool check_sample = true); + void print(); +}; + +class RefineCardTableEntryClosure; +class G1CollectedHeap : public SharedHeap { + friend class VM_G1CollectForAllocation; + friend class VM_GenCollectForPermanentAllocation; + friend class VM_G1CollectFull; + friend class VM_G1IncCollectionPause; + friend class VM_G1PopRegionCollectionPause; + friend class VMStructs; + + // Closures used in implementation. + friend class G1ParCopyHelper; + friend class G1IsAliveClosure; + friend class G1EvacuateFollowersClosure; + friend class G1ParScanThreadState; + friend class G1ParScanClosureSuper; + friend class G1ParEvacuateFollowersClosure; + friend class G1ParTask; + friend class G1FreeGarbageRegionClosure; + friend class RefineCardTableEntryClosure; + friend class G1PrepareCompactClosure; + friend class RegionSorter; + friend class CountRCClosure; + friend class EvacPopObjClosure; + + // Other related classes. + friend class G1MarkSweep; + +private: + enum SomePrivateConstants { + VeryLargeInBytes = HeapRegion::GrainBytes/2, + VeryLargeInWords = VeryLargeInBytes/HeapWordSize, + MinHeapDeltaBytes = 10 * HeapRegion::GrainBytes, // FIXME + NumAPIs = HeapRegion::MaxAge + }; + + + // The one and only G1CollectedHeap, so static functions can find it. + static G1CollectedHeap* _g1h; + + // Storage for the G1 heap (excludes the permanent generation). + VirtualSpace _g1_storage; + MemRegion _g1_reserved; + + // The part of _g1_storage that is currently committed. + MemRegion _g1_committed; + + // The maximum part of _g1_storage that has ever been committed. + MemRegion _g1_max_committed; + + // The number of regions that are completely free. + size_t _free_regions; + + // The number of regions we could create by expansion. + size_t _expansion_regions; + + // Return the number of free regions in the heap (by direct counting.) + size_t count_free_regions(); + // Return the number of free regions on the free and unclean lists. + size_t count_free_regions_list(); + + // The block offset table for the G1 heap. + G1BlockOffsetSharedArray* _bot_shared; + + // Move all of the regions off the free lists, then rebuild those free + // lists, before and after full GC. + void tear_down_region_lists(); + void rebuild_region_lists(); + // This sets all non-empty regions to need zero-fill (which they will if + // they are empty after full collection.) + void set_used_regions_to_need_zero_fill(); + + // The sequence of all heap regions in the heap. + HeapRegionSeq* _hrs; + + // The region from which normal-sized objects are currently being + // allocated. May be NULL. + HeapRegion* _cur_alloc_region; + + // Postcondition: cur_alloc_region == NULL. + void abandon_cur_alloc_region(); + + // The to-space memory regions into which objects are being copied during + // a GC. + HeapRegion* _gc_alloc_regions[GCAllocPurposeCount]; + uint _gc_alloc_region_counts[GCAllocPurposeCount]; + + // A list of the regions that have been set to be alloc regions in the + // current collection. + HeapRegion* _gc_alloc_region_list; + + // When called by par thread, require par_alloc_during_gc_lock() to be held. + void push_gc_alloc_region(HeapRegion* hr); + + // This should only be called single-threaded. Undeclares all GC alloc + // regions. + void forget_alloc_region_list(); + + // Should be used to set an alloc region, because there's other + // associated bookkeeping. + void set_gc_alloc_region(int purpose, HeapRegion* r); + + // Check well-formedness of alloc region list. + bool check_gc_alloc_regions(); + + // Outside of GC pauses, the number of bytes used in all regions other + // than the current allocation region. + size_t _summary_bytes_used; + + // Summary information about popular objects; method to print it. + NumberSeq _pop_obj_rc_at_copy; + void print_popularity_summary_info() const; + + volatile unsigned _gc_time_stamp; + + size_t* _surviving_young_words; + + void setup_surviving_young_words(); + void update_surviving_young_words(size_t* surv_young_words); + void cleanup_surviving_young_words(); + +protected: + + // Returns "true" iff none of the gc alloc regions have any allocations + // since the last call to "save_marks". + bool all_alloc_regions_no_allocs_since_save_marks(); + // Calls "note_end_of_copying on all gc alloc_regions. + void all_alloc_regions_note_end_of_copying(); + + // The number of regions allocated to hold humongous objects. + int _num_humongous_regions; + YoungList* _young_list; + + // The current policy object for the collector. + G1CollectorPolicy* _g1_policy; + + // Parallel allocation lock to protect the current allocation region. + Mutex _par_alloc_during_gc_lock; + Mutex* par_alloc_during_gc_lock() { return &_par_alloc_during_gc_lock; } + + // If possible/desirable, allocate a new HeapRegion for normal object + // allocation sufficient for an allocation of the given "word_size". + // If "do_expand" is true, will attempt to expand the heap if necessary + // to to satisfy the request. If "zero_filled" is true, requires a + // zero-filled region. + // (Returning NULL will trigger a GC.) + virtual HeapRegion* newAllocRegion_work(size_t word_size, + bool do_expand, + bool zero_filled); + + virtual HeapRegion* newAllocRegion(size_t word_size, + bool zero_filled = true) { + return newAllocRegion_work(word_size, false, zero_filled); + } + virtual HeapRegion* newAllocRegionWithExpansion(int purpose, + size_t word_size, + bool zero_filled = true); + + // Attempt to allocate an object of the given (very large) "word_size". + // Returns "NULL" on failure. + virtual HeapWord* humongousObjAllocate(size_t word_size); + + // If possible, allocate a block of the given word_size, else return "NULL". + // Returning NULL will trigger GC or heap expansion. + // These two methods have rather awkward pre- and + // post-conditions. If they are called outside a safepoint, then + // they assume that the caller is holding the heap lock. Upon return + // they release the heap lock, if they are returning a non-NULL + // value. attempt_allocation_slow() also dirties the cards of a + // newly-allocated young region after it releases the heap + // lock. This change in interface was the neatest way to achieve + // this card dirtying without affecting mem_allocate(), which is a + // more frequently called method. We tried two or three different + // approaches, but they were even more hacky. + HeapWord* attempt_allocation(size_t word_size, + bool permit_collection_pause = true); + + HeapWord* attempt_allocation_slow(size_t word_size, + bool permit_collection_pause = true); + + // Allocate blocks during garbage collection. Will ensure an + // allocation region, either by picking one or expanding the + // heap, and then allocate a block of the given size. The block + // may not be a humongous - it must fit into a single heap region. + HeapWord* allocate_during_gc(GCAllocPurpose purpose, size_t word_size); + HeapWord* par_allocate_during_gc(GCAllocPurpose purpose, size_t word_size); + + HeapWord* allocate_during_gc_slow(GCAllocPurpose purpose, + HeapRegion* alloc_region, + bool par, + size_t word_size); + + // Ensure that no further allocations can happen in "r", bearing in mind + // that parallel threads might be attempting allocations. + void par_allocate_remaining_space(HeapRegion* r); + + // Helper function for two callbacks below. + // "full", if true, indicates that the GC is for a System.gc() request, + // and should collect the entire heap. If "clear_all_soft_refs" is true, + // all soft references are cleared during the GC. If "full" is false, + // "word_size" describes the allocation that the GC should + // attempt (at least) to satisfy. + void do_collection(bool full, bool clear_all_soft_refs, + size_t word_size); + + // Callback from VM_G1CollectFull operation. + // Perform a full collection. + void do_full_collection(bool clear_all_soft_refs); + + // Resize the heap if necessary after a full collection. If this is + // after a collect-for allocation, "word_size" is the allocation size, + // and will be considered part of the used portion of the heap. + void resize_if_necessary_after_full_collection(size_t word_size); + + // Callback from VM_G1CollectForAllocation operation. + // This function does everything necessary/possible to satisfy a + // failed allocation request (including collection, expansion, etc.) + HeapWord* satisfy_failed_allocation(size_t word_size); + + // Attempting to expand the heap sufficiently + // to support an allocation of the given "word_size". If + // successful, perform the allocation and return the address of the + // allocated block, or else "NULL". + virtual HeapWord* expand_and_allocate(size_t word_size); + +public: + // Expand the garbage-first heap by at least the given size (in bytes!). + // (Rounds up to a HeapRegion boundary.) + virtual void expand(size_t expand_bytes); + + // Do anything common to GC's. + virtual void gc_prologue(bool full); + virtual void gc_epilogue(bool full); + +protected: + + // Shrink the garbage-first heap by at most the given size (in bytes!). + // (Rounds down to a HeapRegion boundary.) + virtual void shrink(size_t expand_bytes); + void shrink_helper(size_t expand_bytes); + + // Do an incremental collection: identify a collection set, and evacuate + // its live objects elsewhere. + virtual void do_collection_pause(); + + // The guts of the incremental collection pause, executed by the vm + // thread. If "popular_region" is non-NULL, this pause should evacuate + // this single region whose remembered set has gotten large, moving + // any popular objects to one of the popular regions. + virtual void do_collection_pause_at_safepoint(HeapRegion* popular_region); + + // Actually do the work of evacuating the collection set. + virtual void evacuate_collection_set(); + + // If this is an appropriate right time, do a collection pause. + // The "word_size" argument, if non-zero, indicates the size of an + // allocation request that is prompting this query. + void do_collection_pause_if_appropriate(size_t word_size); + + // The g1 remembered set of the heap. + G1RemSet* _g1_rem_set; + // And it's mod ref barrier set, used to track updates for the above. + ModRefBarrierSet* _mr_bs; + + // The Heap Region Rem Set Iterator. + HeapRegionRemSetIterator** _rem_set_iterator; + + // The closure used to refine a single card. + RefineCardTableEntryClosure* _refine_cte_cl; + + // A function to check the consistency of dirty card logs. + void check_ct_logs_at_safepoint(); + + // After a collection pause, make the regions in the CS into free + // regions. + void free_collection_set(HeapRegion* cs_head); + + // Applies "scan_non_heap_roots" to roots outside the heap, + // "scan_rs" to roots inside the heap (having done "set_region" to + // indicate the region in which the root resides), and does "scan_perm" + // (setting the generation to the perm generation.) If "scan_rs" is + // NULL, then this step is skipped. The "worker_i" + // param is for use with parallel roots processing, and should be + // the "i" of the calling parallel worker thread's work(i) function. + // In the sequential case this param will be ignored. + void g1_process_strong_roots(bool collecting_perm_gen, + SharedHeap::ScanningOption so, + OopClosure* scan_non_heap_roots, + OopsInHeapRegionClosure* scan_rs, + OopsInHeapRegionClosure* scan_so, + OopsInGenClosure* scan_perm, + int worker_i); + + void scan_scan_only_set(OopsInHeapRegionClosure* oc, + int worker_i); + void scan_scan_only_region(HeapRegion* hr, + OopsInHeapRegionClosure* oc, + int worker_i); + + // Apply "blk" to all the weak roots of the system. These include + // JNI weak roots, the code cache, system dictionary, symbol table, + // string table, and referents of reachable weak refs. + void g1_process_weak_roots(OopClosure* root_closure, + OopClosure* non_root_closure); + + // Invoke "save_marks" on all heap regions. + void save_marks(); + + // Free a heap region. + void free_region(HeapRegion* hr); + // A component of "free_region", exposed for 'batching'. + // All the params after "hr" are out params: the used bytes of the freed + // region(s), the number of H regions cleared, the number of regions + // freed, and pointers to the head and tail of a list of freed contig + // regions, linked throught the "next_on_unclean_list" field. + void free_region_work(HeapRegion* hr, + size_t& pre_used, + size_t& cleared_h, + size_t& freed_regions, + UncleanRegionList* list, + bool par = false); + + + // The concurrent marker (and the thread it runs in.) + ConcurrentMark* _cm; + ConcurrentMarkThread* _cmThread; + bool _mark_in_progress; + + // The concurrent refiner. + ConcurrentG1Refine* _cg1r; + + // The concurrent zero-fill thread. + ConcurrentZFThread* _czft; + + // The parallel task queues + RefToScanQueueSet *_task_queues; + + // True iff a evacuation has failed in the current collection. + bool _evacuation_failed; + + // Set the attribute indicating whether evacuation has failed in the + // current collection. + void set_evacuation_failed(bool b) { _evacuation_failed = b; } + + // Failed evacuations cause some logical from-space objects to have + // forwarding pointers to themselves. Reset them. + void remove_self_forwarding_pointers(); + + // When one is non-null, so is the other. Together, they each pair is + // an object with a preserved mark, and its mark value. + GrowableArray* _objs_with_preserved_marks; + GrowableArray* _preserved_marks_of_objs; + + // Preserve the mark of "obj", if necessary, in preparation for its mark + // word being overwritten with a self-forwarding-pointer. + void preserve_mark_if_necessary(oop obj, markOop m); + + // The stack of evac-failure objects left to be scanned. + GrowableArray* _evac_failure_scan_stack; + // The closure to apply to evac-failure objects. + + OopsInHeapRegionClosure* _evac_failure_closure; + // Set the field above. + void + set_evac_failure_closure(OopsInHeapRegionClosure* evac_failure_closure) { + _evac_failure_closure = evac_failure_closure; + } + + // Push "obj" on the scan stack. + void push_on_evac_failure_scan_stack(oop obj); + // Process scan stack entries until the stack is empty. + void drain_evac_failure_scan_stack(); + // True iff an invocation of "drain_scan_stack" is in progress; to + // prevent unnecessary recursion. + bool _drain_in_progress; + + // Do any necessary initialization for evacuation-failure handling. + // "cl" is the closure that will be used to process evac-failure + // objects. + void init_for_evac_failure(OopsInHeapRegionClosure* cl); + // Do any necessary cleanup for evacuation-failure handling data + // structures. + void finalize_for_evac_failure(); + + // An attempt to evacuate "obj" has failed; take necessary steps. + void handle_evacuation_failure(oop obj); + oop handle_evacuation_failure_par(OopsInHeapRegionClosure* cl, oop obj); + void handle_evacuation_failure_common(oop obj, markOop m); + + + // Ensure that the relevant gc_alloc regions are set. + void get_gc_alloc_regions(); + // We're done with GC alloc regions; release them, as appropriate. + void release_gc_alloc_regions(); + + // ("Weak") Reference processing support + ReferenceProcessor* _ref_processor; + + enum G1H_process_strong_roots_tasks { + G1H_PS_mark_stack_oops_do, + G1H_PS_refProcessor_oops_do, + // Leave this one last. + G1H_PS_NumElements + }; + + SubTasksDone* _process_strong_tasks; + + // Allocate space to hold a popular object. Result is guaranteed below + // "popular_object_boundary()". Note: CURRENTLY halts the system if we + // run out of space to hold popular objects. + HeapWord* allocate_popular_object(size_t word_size); + + // The boundary between popular and non-popular objects. + HeapWord* _popular_object_boundary; + + HeapRegionList* _popular_regions_to_be_evacuated; + + // Compute which objects in "single_region" are popular. If any are, + // evacuate them to a popular region, leaving behind forwarding pointers, + // and select "popular_region" as the single collection set region. + // Otherwise, leave the collection set null. + void popularity_pause_preamble(HeapRegion* populer_region); + + // Compute which objects in "single_region" are popular, and evacuate + // them to a popular region, leaving behind forwarding pointers. + // Returns "true" if at least one popular object is discovered and + // evacuated. In any case, "*max_rc" is set to the maximum reference + // count of an object in the region. + bool compute_reference_counts_and_evac_popular(HeapRegion* populer_region, + size_t* max_rc); + // Subroutines used in the above. + bool _rc_region_above; + size_t _rc_region_diff; + jint* obj_rc_addr(oop obj) { + uintptr_t obj_addr = (uintptr_t)obj; + if (_rc_region_above) { + jint* res = (jint*)(obj_addr + _rc_region_diff); + assert((uintptr_t)res > obj_addr, "RC region is above."); + return res; + } else { + jint* res = (jint*)(obj_addr - _rc_region_diff); + assert((uintptr_t)res < obj_addr, "RC region is below."); + return res; + } + } + jint obj_rc(oop obj) { + return *obj_rc_addr(obj); + } + void inc_obj_rc(oop obj) { + (*obj_rc_addr(obj))++; + } + void atomic_inc_obj_rc(oop obj); + + + // Number of popular objects and bytes (latter is cheaper!). + size_t pop_object_used_objs(); + size_t pop_object_used_bytes(); + + // Index of the popular region in which allocation is currently being + // done. + int _cur_pop_hr_index; + + // List of regions which require zero filling. + UncleanRegionList _unclean_region_list; + bool _unclean_regions_coming; + + bool check_age_cohort_well_formed_work(int a, HeapRegion* hr); + +public: + void set_refine_cte_cl_concurrency(bool concurrent); + + RefToScanQueue *task_queue(int i); + + // Create a G1CollectedHeap with the specified policy. + // Must call the initialize method afterwards. + // May not return if something goes wrong. + G1CollectedHeap(G1CollectorPolicy* policy); + + // Initialize the G1CollectedHeap to have the initial and + // maximum sizes, permanent generation, and remembered and barrier sets + // specified by the policy object. + jint initialize(); + + void ref_processing_init(); + + void set_par_threads(int t) { + SharedHeap::set_par_threads(t); + _process_strong_tasks->set_par_threads(t); + } + + virtual CollectedHeap::Name kind() const { + return CollectedHeap::G1CollectedHeap; + } + + // The current policy object for the collector. + G1CollectorPolicy* g1_policy() const { return _g1_policy; } + + // Adaptive size policy. No such thing for g1. + virtual AdaptiveSizePolicy* size_policy() { return NULL; } + + // The rem set and barrier set. + G1RemSet* g1_rem_set() const { return _g1_rem_set; } + ModRefBarrierSet* mr_bs() const { return _mr_bs; } + + // The rem set iterator. + HeapRegionRemSetIterator* rem_set_iterator(int i) { + return _rem_set_iterator[i]; + } + + HeapRegionRemSetIterator* rem_set_iterator() { + return _rem_set_iterator[0]; + } + + unsigned get_gc_time_stamp() { + return _gc_time_stamp; + } + + void reset_gc_time_stamp() { + _gc_time_stamp = 0; + OrderAccess::fence(); + } + + void increment_gc_time_stamp() { + ++_gc_time_stamp; + OrderAccess::fence(); + } + + void iterate_dirty_card_closure(bool concurrent, int worker_i); + + // The shared block offset table array. + G1BlockOffsetSharedArray* bot_shared() const { return _bot_shared; } + + // Reference Processing accessor + ReferenceProcessor* ref_processor() { return _ref_processor; } + + // Reserved (g1 only; super method includes perm), capacity and the used + // portion in bytes. + size_t g1_reserved_obj_bytes() { return _g1_reserved.byte_size(); } + virtual size_t capacity() const; + virtual size_t used() const; + size_t recalculate_used() const; +#ifndef PRODUCT + size_t recalculate_used_regions() const; +#endif // PRODUCT + + // These virtual functions do the actual allocation. + virtual HeapWord* mem_allocate(size_t word_size, + bool is_noref, + bool is_tlab, + bool* gc_overhead_limit_was_exceeded); + + // Some heaps may offer a contiguous region for shared non-blocking + // allocation, via inlined code (by exporting the address of the top and + // end fields defining the extent of the contiguous allocation region.) + // But G1CollectedHeap doesn't yet support this. + + // Return an estimate of the maximum allocation that could be performed + // without triggering any collection or expansion activity. In a + // generational collector, for example, this is probably the largest + // allocation that could be supported (without expansion) in the youngest + // generation. It is "unsafe" because no locks are taken; the result + // should be treated as an approximation, not a guarantee, for use in + // heuristic resizing decisions. + virtual size_t unsafe_max_alloc(); + + virtual bool is_maximal_no_gc() const { + return _g1_storage.uncommitted_size() == 0; + } + + // The total number of regions in the heap. + size_t n_regions(); + + // The number of regions that are completely free. + size_t max_regions(); + + // The number of regions that are completely free. + size_t free_regions(); + + // The number of regions that are not completely free. + size_t used_regions() { return n_regions() - free_regions(); } + + // True iff the ZF thread should run. + bool should_zf(); + + // The number of regions available for "regular" expansion. + size_t expansion_regions() { return _expansion_regions; } + +#ifndef PRODUCT + bool regions_accounted_for(); + bool print_region_accounting_info(); + void print_region_counts(); +#endif + + HeapRegion* alloc_region_from_unclean_list(bool zero_filled); + HeapRegion* alloc_region_from_unclean_list_locked(bool zero_filled); + + void put_region_on_unclean_list(HeapRegion* r); + void put_region_on_unclean_list_locked(HeapRegion* r); + + void prepend_region_list_on_unclean_list(UncleanRegionList* list); + void prepend_region_list_on_unclean_list_locked(UncleanRegionList* list); + + void set_unclean_regions_coming(bool b); + void set_unclean_regions_coming_locked(bool b); + // Wait for cleanup to be complete. + void wait_for_cleanup_complete(); + // Like above, but assumes that the calling thread owns the Heap_lock. + void wait_for_cleanup_complete_locked(); + + // Return the head of the unclean list. + HeapRegion* peek_unclean_region_list_locked(); + // Remove and return the head of the unclean list. + HeapRegion* pop_unclean_region_list_locked(); + + // List of regions which are zero filled and ready for allocation. + HeapRegion* _free_region_list; + // Number of elements on the free list. + size_t _free_region_list_size; + + // If the head of the unclean list is ZeroFilled, move it to the free + // list. + bool move_cleaned_region_to_free_list_locked(); + bool move_cleaned_region_to_free_list(); + + void put_free_region_on_list_locked(HeapRegion* r); + void put_free_region_on_list(HeapRegion* r); + + // Remove and return the head element of the free list. + HeapRegion* pop_free_region_list_locked(); + + // If "zero_filled" is true, we first try the free list, then we try the + // unclean list, zero-filling the result. If "zero_filled" is false, we + // first try the unclean list, then the zero-filled list. + HeapRegion* alloc_free_region_from_lists(bool zero_filled); + + // Verify the integrity of the region lists. + void remove_allocated_regions_from_lists(); + bool verify_region_lists(); + bool verify_region_lists_locked(); + size_t unclean_region_list_length(); + size_t free_region_list_length(); + + // Perform a collection of the heap; intended for use in implementing + // "System.gc". This probably implies as full a collection as the + // "CollectedHeap" supports. + virtual void collect(GCCause::Cause cause); + + // The same as above but assume that the caller holds the Heap_lock. + void collect_locked(GCCause::Cause cause); + + // This interface assumes that it's being called by the + // vm thread. It collects the heap assuming that the + // heap lock is already held and that we are executing in + // the context of the vm thread. + virtual void collect_as_vm_thread(GCCause::Cause cause); + + // True iff a evacuation has failed in the most-recent collection. + bool evacuation_failed() { return _evacuation_failed; } + + // Free a region if it is totally full of garbage. Returns the number of + // bytes freed (0 ==> didn't free it). + size_t free_region_if_totally_empty(HeapRegion *hr); + void free_region_if_totally_empty_work(HeapRegion *hr, + size_t& pre_used, + size_t& cleared_h_regions, + size_t& freed_regions, + UncleanRegionList* list, + bool par = false); + + // If we've done free region work that yields the given changes, update + // the relevant global variables. + void finish_free_region_work(size_t pre_used, + size_t cleared_h_regions, + size_t freed_regions, + UncleanRegionList* list); + + + // Returns "TRUE" iff "p" points into the allocated area of the heap. + virtual bool is_in(const void* p) const; + + // Return "TRUE" iff the given object address is within the collection + // set. + inline bool obj_in_cs(oop obj); + + // Return "TRUE" iff the given object address is in the reserved + // region of g1 (excluding the permanent generation). + bool is_in_g1_reserved(const void* p) const { + return _g1_reserved.contains(p); + } + + // Returns a MemRegion that corresponds to the space that has been + // committed in the heap + MemRegion g1_committed() { + return _g1_committed; + } + + NOT_PRODUCT( bool is_in_closed_subset(const void* p) const; ) + + // Dirty card table entries covering a list of young regions. + void dirtyCardsForYoungRegions(CardTableModRefBS* ct_bs, HeapRegion* list); + + // This resets the card table to all zeros. It is used after + // a collection pause which used the card table to claim cards. + void cleanUpCardTable(); + + // Iteration functions. + + // Iterate over all the ref-containing fields of all objects, calling + // "cl.do_oop" on each. + virtual void oop_iterate(OopClosure* cl); + + // Same as above, restricted to a memory region. + virtual void oop_iterate(MemRegion mr, OopClosure* cl); + + // Iterate over all objects, calling "cl.do_object" on each. + virtual void object_iterate(ObjectClosure* cl); + + // Iterate over all objects allocated since the last collection, calling + // "cl.do_object" on each. The heap must have been initialized properly + // to support this function, or else this call will fail. + virtual void object_iterate_since_last_GC(ObjectClosure* cl); + + // Iterate over all spaces in use in the heap, in ascending address order. + virtual void space_iterate(SpaceClosure* cl); + + // Iterate over heap regions, in address order, terminating the + // iteration early if the "doHeapRegion" method returns "true". + void heap_region_iterate(HeapRegionClosure* blk); + + // Iterate over heap regions starting with r (or the first region if "r" + // is NULL), in address order, terminating early if the "doHeapRegion" + // method returns "true". + void heap_region_iterate_from(HeapRegion* r, HeapRegionClosure* blk); + + // As above but starting from the region at index idx. + void heap_region_iterate_from(int idx, HeapRegionClosure* blk); + + HeapRegion* region_at(size_t idx); + + // Divide the heap region sequence into "chunks" of some size (the number + // of regions divided by the number of parallel threads times some + // overpartition factor, currently 4). Assumes that this will be called + // in parallel by ParallelGCThreads worker threads with discinct worker + // ids in the range [0..max(ParallelGCThreads-1, 1)], that all parallel + // calls will use the same "claim_value", and that that claim value is + // different from the claim_value of any heap region before the start of + // the iteration. Applies "blk->doHeapRegion" to each of the regions, by + // attempting to claim the first region in each chunk, and, if + // successful, applying the closure to each region in the chunk (and + // setting the claim value of the second and subsequent regions of the + // chunk.) For now requires that "doHeapRegion" always returns "false", + // i.e., that a closure never attempt to abort a traversal. + void heap_region_par_iterate_chunked(HeapRegionClosure* blk, + int worker, + jint claim_value); + + // It resets all the region claim values to the default. + void reset_heap_region_claim_values(); + +#ifdef ASSERT + bool check_heap_region_claim_values(jint claim_value); +#endif // ASSERT + + // Iterate over the regions (if any) in the current collection set. + void collection_set_iterate(HeapRegionClosure* blk); + + // As above but starting from region r + void collection_set_iterate_from(HeapRegion* r, HeapRegionClosure *blk); + + // Returns the first (lowest address) compactible space in the heap. + virtual CompactibleSpace* first_compactible_space(); + + // A CollectedHeap will contain some number of spaces. This finds the + // space containing a given address, or else returns NULL. + virtual Space* space_containing(const void* addr) const; + + // A G1CollectedHeap will contain some number of heap regions. This + // finds the region containing a given address, or else returns NULL. + HeapRegion* heap_region_containing(const void* addr) const; + + // Like the above, but requires "addr" to be in the heap (to avoid a + // null-check), and unlike the above, may return an continuing humongous + // region. + HeapRegion* heap_region_containing_raw(const void* addr) const; + + // A CollectedHeap is divided into a dense sequence of "blocks"; that is, + // each address in the (reserved) heap is a member of exactly + // one block. The defining characteristic of a block is that it is + // possible to find its size, and thus to progress forward to the next + // block. (Blocks may be of different sizes.) Thus, blocks may + // represent Java objects, or they might be free blocks in a + // free-list-based heap (or subheap), as long as the two kinds are + // distinguishable and the size of each is determinable. + + // Returns the address of the start of the "block" that contains the + // address "addr". We say "blocks" instead of "object" since some heaps + // may not pack objects densely; a chunk may either be an object or a + // non-object. + virtual HeapWord* block_start(const void* addr) const; + + // Requires "addr" to be the start of a chunk, and returns its size. + // "addr + size" is required to be the start of a new chunk, or the end + // of the active area of the heap. + virtual size_t block_size(const HeapWord* addr) const; + + // Requires "addr" to be the start of a block, and returns "TRUE" iff + // the block is an object. + virtual bool block_is_obj(const HeapWord* addr) const; + + // Does this heap support heap inspection? (+PrintClassHistogram) + virtual bool supports_heap_inspection() const { return true; } + + // Section on thread-local allocation buffers (TLABs) + // See CollectedHeap for semantics. + + virtual bool supports_tlab_allocation() const; + virtual size_t tlab_capacity(Thread* thr) const; + virtual size_t unsafe_max_tlab_alloc(Thread* thr) const; + virtual HeapWord* allocate_new_tlab(size_t size); + + // Can a compiler initialize a new object without store barriers? + // This permission only extends from the creation of a new object + // via a TLAB up to the first subsequent safepoint. + virtual bool can_elide_tlab_store_barriers() const { + // Since G1's TLAB's may, on occasion, come from non-young regions + // as well. (Is there a flag controlling that? XXX) + return false; + } + + // Can a compiler elide a store barrier when it writes + // a permanent oop into the heap? Applies when the compiler + // is storing x to the heap, where x->is_perm() is true. + virtual bool can_elide_permanent_oop_store_barriers() const { + // At least until perm gen collection is also G1-ified, at + // which point this should return false. + return true; + } + + virtual bool allocs_are_zero_filled(); + + // The boundary between a "large" and "small" array of primitives, in + // words. + virtual size_t large_typearray_limit(); + + // All popular objects are guaranteed to have addresses below this + // boundary. + HeapWord* popular_object_boundary() { + return _popular_object_boundary; + } + + // Declare the region as one that should be evacuated because its + // remembered set is too large. + void schedule_popular_region_evac(HeapRegion* r); + // If there is a popular region to evacuate it, remove it from the list + // and return it. + HeapRegion* popular_region_to_evac(); + // Evacuate the given popular region. + void evac_popular_region(HeapRegion* r); + + // Returns "true" iff the given word_size is "very large". + static bool isHumongous(size_t word_size) { + return word_size >= VeryLargeInWords; + } + + // Update mod union table with the set of dirty cards. + void updateModUnion(); + + // Set the mod union bits corresponding to the given memRegion. Note + // that this is always a safe operation, since it doesn't clear any + // bits. + void markModUnionRange(MemRegion mr); + + // Records the fact that a marking phase is no longer in progress. + void set_marking_complete() { + _mark_in_progress = false; + } + void set_marking_started() { + _mark_in_progress = true; + } + bool mark_in_progress() { + return _mark_in_progress; + } + + // Print the maximum heap capacity. + virtual size_t max_capacity() const; + + virtual jlong millis_since_last_gc(); + + // Perform any cleanup actions necessary before allowing a verification. + virtual void prepare_for_verify(); + + // Perform verification. + virtual void verify(bool allow_dirty, bool silent); + virtual void print() const; + virtual void print_on(outputStream* st) const; + + virtual void print_gc_threads_on(outputStream* st) const; + virtual void gc_threads_do(ThreadClosure* tc) const; + + // Override + void print_tracing_info() const; + + // If "addr" is a pointer into the (reserved?) heap, returns a positive + // number indicating the "arena" within the heap in which "addr" falls. + // Or else returns 0. + virtual int addr_to_arena_id(void* addr) const; + + // Convenience function to be used in situations where the heap type can be + // asserted to be this type. + static G1CollectedHeap* heap(); + + void empty_young_list(); + bool should_set_young_locked(); + + void set_region_short_lived_locked(HeapRegion* hr); + // add appropriate methods for any other surv rate groups + + void young_list_rs_length_sampling_init() { + _young_list->rs_length_sampling_init(); + } + bool young_list_rs_length_sampling_more() { + return _young_list->rs_length_sampling_more(); + } + void young_list_rs_length_sampling_next() { + _young_list->rs_length_sampling_next(); + } + size_t young_list_sampled_rs_lengths() { + return _young_list->sampled_rs_lengths(); + } + + size_t young_list_length() { return _young_list->length(); } + size_t young_list_scan_only_length() { + return _young_list->scan_only_length(); } + + HeapRegion* pop_region_from_young_list() { + return _young_list->pop_region(); + } + + HeapRegion* young_list_first_region() { + return _young_list->first_region(); + } + + // debugging + bool check_young_list_well_formed() { + return _young_list->check_list_well_formed(); + } + bool check_young_list_empty(bool ignore_scan_only_list, + bool check_sample = true); + + // *** Stuff related to concurrent marking. It's not clear to me that so + // many of these need to be public. + + // The functions below are helper functions that a subclass of + // "CollectedHeap" can use in the implementation of its virtual + // functions. + // This performs a concurrent marking of the live objects in a + // bitmap off to the side. + void doConcurrentMark(); + + // This is called from the marksweep collector which then does + // a concurrent mark and verifies that the results agree with + // the stop the world marking. + void checkConcurrentMark(); + void do_sync_mark(); + + bool isMarkedPrev(oop obj) const; + bool isMarkedNext(oop obj) const; + + // Determine if an object is dead, given the object and also + // the region to which the object belongs. An object is dead + // iff a) it was not allocated since the last mark and b) it + // is not marked. + + bool is_obj_dead(const oop obj, const HeapRegion* hr) const { + return + !hr->obj_allocated_since_prev_marking(obj) && + !isMarkedPrev(obj); + } + + // This is used when copying an object to survivor space. + // If the object is marked live, then we mark the copy live. + // If the object is allocated since the start of this mark + // cycle, then we mark the copy live. + // If the object has been around since the previous mark + // phase, and hasn't been marked yet during this phase, + // then we don't mark it, we just wait for the + // current marking cycle to get to it. + + // This function returns true when an object has been + // around since the previous marking and hasn't yet + // been marked during this marking. + + bool is_obj_ill(const oop obj, const HeapRegion* hr) const { + return + !hr->obj_allocated_since_next_marking(obj) && + !isMarkedNext(obj); + } + + // Determine if an object is dead, given only the object itself. + // This will find the region to which the object belongs and + // then call the region version of the same function. + + // Added if it is in permanent gen it isn't dead. + // Added if it is NULL it isn't dead. + + bool is_obj_dead(oop obj) { + HeapRegion* hr = heap_region_containing(obj); + if (hr == NULL) { + if (Universe::heap()->is_in_permanent(obj)) + return false; + else if (obj == NULL) return false; + else return true; + } + else return is_obj_dead(obj, hr); + } + + bool is_obj_ill(oop obj) { + HeapRegion* hr = heap_region_containing(obj); + if (hr == NULL) { + if (Universe::heap()->is_in_permanent(obj)) + return false; + else if (obj == NULL) return false; + else return true; + } + else return is_obj_ill(obj, hr); + } + + // The following is just to alert the verification code + // that a full collection has occurred and that the + // remembered sets are no longer up to date. + bool _full_collection; + void set_full_collection() { _full_collection = true;} + void clear_full_collection() {_full_collection = false;} + bool full_collection() {return _full_collection;} + + ConcurrentMark* concurrent_mark() const { return _cm; } + ConcurrentG1Refine* concurrent_g1_refine() const { return _cg1r; } + +public: + void stop_conc_gc_threads(); + + // + + double predict_region_elapsed_time_ms(HeapRegion* hr, bool young); + void check_if_region_is_too_expensive(double predicted_time_ms); + size_t pending_card_num(); + size_t max_pending_card_num(); + size_t cards_scanned(); + + // + +protected: + size_t _max_heap_capacity; + +// debug_only(static void check_for_valid_allocation_state();) + +public: + // Temporary: call to mark things unimplemented for the G1 heap (e.g., + // MemoryService). In productization, we can make this assert false + // to catch such places (as well as searching for calls to this...) + static void g1_unimplemented(); + +}; + +// Local Variables: *** +// c-indentation-style: gnu *** +// End: *** diff --git a/hotspot/src/share/vm/gc_implementation/g1/g1CollectedHeap.inline.hpp b/hotspot/src/share/vm/gc_implementation/g1/g1CollectedHeap.inline.hpp new file mode 100644 index 00000000000..8cafe3d9885 --- /dev/null +++ b/hotspot/src/share/vm/gc_implementation/g1/g1CollectedHeap.inline.hpp @@ -0,0 +1,91 @@ +/* + * Copyright 2001-2007 Sun Microsystems, Inc. All Rights Reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara, + * CA 95054 USA or visit www.sun.com if you need additional information or + * have any questions. + * + */ + +// Inline functions for G1CollectedHeap + +inline HeapRegion* +G1CollectedHeap::heap_region_containing(const void* addr) const { + HeapRegion* hr = _hrs->addr_to_region(addr); + // hr can be null if addr in perm_gen + if (hr != NULL && hr->continuesHumongous()) { + hr = hr->humongous_start_region(); + } + return hr; +} + +inline HeapRegion* +G1CollectedHeap::heap_region_containing_raw(const void* addr) const { + HeapRegion* res = _hrs->addr_to_region(addr); + assert(res != NULL, "addr outside of heap?"); + return res; +} + +inline bool G1CollectedHeap::obj_in_cs(oop obj) { + HeapRegion* r = _hrs->addr_to_region(obj); + return r != NULL && r->in_collection_set(); +} + +inline HeapWord* G1CollectedHeap::attempt_allocation(size_t word_size, + bool permit_collection_pause) { + HeapWord* res = NULL; + + assert( SafepointSynchronize::is_at_safepoint() || + Heap_lock->owned_by_self(), "pre-condition of the call" ); + + if (_cur_alloc_region != NULL) { + + // If this allocation causes a region to become non empty, + // then we need to update our free_regions count. + + if (_cur_alloc_region->is_empty()) { + res = _cur_alloc_region->allocate(word_size); + if (res != NULL) + _free_regions--; + } else { + res = _cur_alloc_region->allocate(word_size); + } + } + if (res != NULL) { + if (!SafepointSynchronize::is_at_safepoint()) { + assert( Heap_lock->owned_by_self(), "invariant" ); + Heap_lock->unlock(); + } + return res; + } + // attempt_allocation_slow will also unlock the heap lock when appropriate. + return attempt_allocation_slow(word_size, permit_collection_pause); +} + +inline RefToScanQueue* G1CollectedHeap::task_queue(int i) { + return _task_queues->queue(i); +} + + +inline bool G1CollectedHeap::isMarkedPrev(oop obj) const { + return _cm->prevMarkBitMap()->isMarked((HeapWord *)obj); +} + +inline bool G1CollectedHeap::isMarkedNext(oop obj) const { + return _cm->nextMarkBitMap()->isMarked((HeapWord *)obj); +} diff --git a/hotspot/src/share/vm/gc_implementation/g1/g1CollectorPolicy.cpp b/hotspot/src/share/vm/gc_implementation/g1/g1CollectorPolicy.cpp new file mode 100644 index 00000000000..97e697c8073 --- /dev/null +++ b/hotspot/src/share/vm/gc_implementation/g1/g1CollectorPolicy.cpp @@ -0,0 +1,3163 @@ +/* + * Copyright 2001-2007 Sun Microsystems, Inc. All Rights Reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara, + * CA 95054 USA or visit www.sun.com if you need additional information or + * have any questions. + * + */ + +#include "incls/_precompiled.incl" +#include "incls/_g1CollectorPolicy.cpp.incl" + +#define PREDICTIONS_VERBOSE 0 + +// + +// Different defaults for different number of GC threads +// They were chosen by running GCOld and SPECjbb on debris with different +// numbers of GC threads and choosing them based on the results + +// all the same +static double rs_length_diff_defaults[] = { + 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0 +}; + +static double cost_per_card_ms_defaults[] = { + 0.01, 0.005, 0.005, 0.003, 0.003, 0.002, 0.002, 0.0015 +}; + +static double cost_per_scan_only_region_ms_defaults[] = { + 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0 +}; + +// all the same +static double fully_young_cards_per_entry_ratio_defaults[] = { + 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0 +}; + +static double cost_per_entry_ms_defaults[] = { + 0.015, 0.01, 0.01, 0.008, 0.008, 0.0055, 0.0055, 0.005 +}; + +static double cost_per_byte_ms_defaults[] = { + 0.00006, 0.00003, 0.00003, 0.000015, 0.000015, 0.00001, 0.00001, 0.000009 +}; + +// these should be pretty consistent +static double constant_other_time_ms_defaults[] = { + 5.0, 5.0, 5.0, 5.0, 5.0, 5.0, 5.0, 5.0 +}; + + +static double young_other_cost_per_region_ms_defaults[] = { + 0.3, 0.2, 0.2, 0.15, 0.15, 0.12, 0.12, 0.1 +}; + +static double non_young_other_cost_per_region_ms_defaults[] = { + 1.0, 0.7, 0.7, 0.5, 0.5, 0.42, 0.42, 0.30 +}; + +// + +G1CollectorPolicy::G1CollectorPolicy() : + _parallel_gc_threads((ParallelGCThreads > 0) ? ParallelGCThreads : 1), + _n_pauses(0), + _recent_CH_strong_roots_times_ms(new TruncatedSeq(NumPrevPausesForHeuristics)), + _recent_G1_strong_roots_times_ms(new TruncatedSeq(NumPrevPausesForHeuristics)), + _recent_evac_times_ms(new TruncatedSeq(NumPrevPausesForHeuristics)), + _recent_pause_times_ms(new TruncatedSeq(NumPrevPausesForHeuristics)), + _recent_rs_sizes(new TruncatedSeq(NumPrevPausesForHeuristics)), + _recent_gc_times_ms(new TruncatedSeq(NumPrevPausesForHeuristics)), + _all_pause_times_ms(new NumberSeq()), + _stop_world_start(0.0), + _all_stop_world_times_ms(new NumberSeq()), + _all_yield_times_ms(new NumberSeq()), + + _all_mod_union_times_ms(new NumberSeq()), + + _non_pop_summary(new NonPopSummary()), + _pop_summary(new PopSummary()), + _non_pop_abandoned_summary(new NonPopAbandonedSummary()), + _pop_abandoned_summary(new PopAbandonedSummary()), + + _cur_clear_ct_time_ms(0.0), + + _region_num_young(0), + _region_num_tenured(0), + _prev_region_num_young(0), + _prev_region_num_tenured(0), + + _aux_num(10), + _all_aux_times_ms(new NumberSeq[_aux_num]), + _cur_aux_start_times_ms(new double[_aux_num]), + _cur_aux_times_ms(new double[_aux_num]), + _cur_aux_times_set(new bool[_aux_num]), + + _pop_compute_rc_start(0.0), + _pop_evac_start(0.0), + + _concurrent_mark_init_times_ms(new TruncatedSeq(NumPrevPausesForHeuristics)), + _concurrent_mark_remark_times_ms(new TruncatedSeq(NumPrevPausesForHeuristics)), + _concurrent_mark_cleanup_times_ms(new TruncatedSeq(NumPrevPausesForHeuristics)), + + // + + _alloc_rate_ms_seq(new TruncatedSeq(TruncatedSeqLength)), + _prev_collection_pause_end_ms(0.0), + _pending_card_diff_seq(new TruncatedSeq(TruncatedSeqLength)), + _rs_length_diff_seq(new TruncatedSeq(TruncatedSeqLength)), + _cost_per_card_ms_seq(new TruncatedSeq(TruncatedSeqLength)), + _cost_per_scan_only_region_ms_seq(new TruncatedSeq(TruncatedSeqLength)), + _fully_young_cards_per_entry_ratio_seq(new TruncatedSeq(TruncatedSeqLength)), + _partially_young_cards_per_entry_ratio_seq( + new TruncatedSeq(TruncatedSeqLength)), + _cost_per_entry_ms_seq(new TruncatedSeq(TruncatedSeqLength)), + _partially_young_cost_per_entry_ms_seq(new TruncatedSeq(TruncatedSeqLength)), + _cost_per_byte_ms_seq(new TruncatedSeq(TruncatedSeqLength)), + _cost_per_byte_ms_during_cm_seq(new TruncatedSeq(TruncatedSeqLength)), + _cost_per_scan_only_region_ms_during_cm_seq(new TruncatedSeq(TruncatedSeqLength)), + _constant_other_time_ms_seq(new TruncatedSeq(TruncatedSeqLength)), + _young_other_cost_per_region_ms_seq(new TruncatedSeq(TruncatedSeqLength)), + _non_young_other_cost_per_region_ms_seq( + new TruncatedSeq(TruncatedSeqLength)), + + _pending_cards_seq(new TruncatedSeq(TruncatedSeqLength)), + _scanned_cards_seq(new TruncatedSeq(TruncatedSeqLength)), + _rs_lengths_seq(new TruncatedSeq(TruncatedSeqLength)), + + _pause_time_target_ms((double) G1MaxPauseTimeMS), + + // + + _in_young_gc_mode(false), + _full_young_gcs(true), + _full_young_pause_num(0), + _partial_young_pause_num(0), + + _during_marking(false), + _in_marking_window(false), + _in_marking_window_im(false), + + _known_garbage_ratio(0.0), + _known_garbage_bytes(0), + + _young_gc_eff_seq(new TruncatedSeq(TruncatedSeqLength)), + _target_pause_time_ms(-1.0), + + _recent_prev_end_times_for_all_gcs_sec(new TruncatedSeq(NumPrevPausesForHeuristics)), + + _recent_CS_bytes_used_before(new TruncatedSeq(NumPrevPausesForHeuristics)), + _recent_CS_bytes_surviving(new TruncatedSeq(NumPrevPausesForHeuristics)), + + _recent_avg_pause_time_ratio(0.0), + _num_markings(0), + _n_marks(0), + _n_pauses_at_mark_end(0), + + _all_full_gc_times_ms(new NumberSeq()), + + _conc_refine_enabled(0), + _conc_refine_zero_traversals(0), + _conc_refine_max_traversals(0), + _conc_refine_current_delta(G1ConcRefineInitialDelta), + + // G1PausesBtwnConcMark defaults to -1 + // so the hack is to do the cast QQQ FIXME + _pauses_btwn_concurrent_mark((size_t)G1PausesBtwnConcMark), + _n_marks_since_last_pause(0), + _conc_mark_initiated(false), + _should_initiate_conc_mark(false), + _should_revert_to_full_young_gcs(false), + _last_full_young_gc(false), + + _prev_collection_pause_used_at_end_bytes(0), + + _collection_set(NULL), +#ifdef _MSC_VER // the use of 'this' below gets a warning, make it go away +#pragma warning( disable:4355 ) // 'this' : used in base member initializer list +#endif // _MSC_VER + + _short_lived_surv_rate_group(new SurvRateGroup(this, "Short Lived", + G1YoungSurvRateNumRegionsSummary)), + _survivor_surv_rate_group(new SurvRateGroup(this, "Survivor", + G1YoungSurvRateNumRegionsSummary)) + // add here any more surv rate groups +{ + _recent_prev_end_times_for_all_gcs_sec->add(os::elapsedTime()); + _prev_collection_pause_end_ms = os::elapsedTime() * 1000.0; + + _par_last_ext_root_scan_times_ms = new double[_parallel_gc_threads]; + _par_last_mark_stack_scan_times_ms = new double[_parallel_gc_threads]; + _par_last_scan_only_times_ms = new double[_parallel_gc_threads]; + _par_last_scan_only_regions_scanned = new double[_parallel_gc_threads]; + + _par_last_update_rs_start_times_ms = new double[_parallel_gc_threads]; + _par_last_update_rs_times_ms = new double[_parallel_gc_threads]; + _par_last_update_rs_processed_buffers = new double[_parallel_gc_threads]; + + _par_last_scan_rs_start_times_ms = new double[_parallel_gc_threads]; + _par_last_scan_rs_times_ms = new double[_parallel_gc_threads]; + _par_last_scan_new_refs_times_ms = new double[_parallel_gc_threads]; + + _par_last_obj_copy_times_ms = new double[_parallel_gc_threads]; + + _par_last_termination_times_ms = new double[_parallel_gc_threads]; + + // we store the data from the first pass during popularity pauses + _pop_par_last_update_rs_start_times_ms = new double[_parallel_gc_threads]; + _pop_par_last_update_rs_times_ms = new double[_parallel_gc_threads]; + _pop_par_last_update_rs_processed_buffers = new double[_parallel_gc_threads]; + + _pop_par_last_scan_rs_start_times_ms = new double[_parallel_gc_threads]; + _pop_par_last_scan_rs_times_ms = new double[_parallel_gc_threads]; + + _pop_par_last_closure_app_times_ms = new double[_parallel_gc_threads]; + + // start conservatively + _expensive_region_limit_ms = 0.5 * (double) G1MaxPauseTimeMS; + + // + + int index; + if (ParallelGCThreads == 0) + index = 0; + else if (ParallelGCThreads > 8) + index = 7; + else + index = ParallelGCThreads - 1; + + _pending_card_diff_seq->add(0.0); + _rs_length_diff_seq->add(rs_length_diff_defaults[index]); + _cost_per_card_ms_seq->add(cost_per_card_ms_defaults[index]); + _cost_per_scan_only_region_ms_seq->add( + cost_per_scan_only_region_ms_defaults[index]); + _fully_young_cards_per_entry_ratio_seq->add( + fully_young_cards_per_entry_ratio_defaults[index]); + _cost_per_entry_ms_seq->add(cost_per_entry_ms_defaults[index]); + _cost_per_byte_ms_seq->add(cost_per_byte_ms_defaults[index]); + _constant_other_time_ms_seq->add(constant_other_time_ms_defaults[index]); + _young_other_cost_per_region_ms_seq->add( + young_other_cost_per_region_ms_defaults[index]); + _non_young_other_cost_per_region_ms_seq->add( + non_young_other_cost_per_region_ms_defaults[index]); + + // + + double time_slice = (double) G1TimeSliceMS / 1000.0; + double max_gc_time = (double) G1MaxPauseTimeMS / 1000.0; + guarantee(max_gc_time < time_slice, + "Max GC time should not be greater than the time slice"); + _mmu_tracker = new G1MMUTrackerQueue(time_slice, max_gc_time); + _sigma = (double) G1ConfidencePerc / 100.0; + + // start conservatively (around 50ms is about right) + _concurrent_mark_init_times_ms->add(0.05); + _concurrent_mark_remark_times_ms->add(0.05); + _concurrent_mark_cleanup_times_ms->add(0.20); + _tenuring_threshold = MaxTenuringThreshold; + + initialize_all(); +} + +// Increment "i", mod "len" +static void inc_mod(int& i, int len) { + i++; if (i == len) i = 0; +} + +void G1CollectorPolicy::initialize_flags() { + set_min_alignment(HeapRegion::GrainBytes); + set_max_alignment(GenRemSet::max_alignment_constraint(rem_set_name())); + CollectorPolicy::initialize_flags(); +} + +void G1CollectorPolicy::init() { + // Set aside an initial future to_space. + _g1 = G1CollectedHeap::heap(); + size_t regions = Universe::heap()->capacity() / HeapRegion::GrainBytes; + + assert(Heap_lock->owned_by_self(), "Locking discipline."); + + if (G1SteadyStateUsed < 50) { + vm_exit_during_initialization("G1SteadyStateUsed must be at least 50%."); + } + if (UseConcMarkSweepGC) { + vm_exit_during_initialization("-XX:+UseG1GC is incompatible with " + "-XX:+UseConcMarkSweepGC."); + } + + if (G1Gen) { + _in_young_gc_mode = true; + + if (G1YoungGenSize == 0) { + set_adaptive_young_list_length(true); + _young_list_fixed_length = 0; + } else { + set_adaptive_young_list_length(false); + _young_list_fixed_length = (G1YoungGenSize / HeapRegion::GrainBytes); + } + _free_regions_at_end_of_collection = _g1->free_regions(); + _scan_only_regions_at_end_of_collection = 0; + calculate_young_list_min_length(); + guarantee( _young_list_min_length == 0, "invariant, not enough info" ); + calculate_young_list_target_config(); + } else { + _young_list_fixed_length = 0; + _in_young_gc_mode = false; + } +} + +void G1CollectorPolicy::calculate_young_list_min_length() { + _young_list_min_length = 0; + + if (!adaptive_young_list_length()) + return; + + if (_alloc_rate_ms_seq->num() > 3) { + double now_sec = os::elapsedTime(); + double when_ms = _mmu_tracker->when_max_gc_sec(now_sec) * 1000.0; + double alloc_rate_ms = predict_alloc_rate_ms(); + int min_regions = (int) ceil(alloc_rate_ms * when_ms); + int current_region_num = (int) _g1->young_list_length(); + _young_list_min_length = min_regions + current_region_num; + } +} + +void G1CollectorPolicy::calculate_young_list_target_config() { + if (adaptive_young_list_length()) { + size_t rs_lengths = (size_t) get_new_prediction(_rs_lengths_seq); + calculate_young_list_target_config(rs_lengths); + } else { + if (full_young_gcs()) + _young_list_target_length = _young_list_fixed_length; + else + _young_list_target_length = _young_list_fixed_length / 2; + _young_list_target_length = MAX2(_young_list_target_length, (size_t)1); + size_t so_length = calculate_optimal_so_length(_young_list_target_length); + guarantee( so_length < _young_list_target_length, "invariant" ); + _young_list_so_prefix_length = so_length; + } +} + +// This method calculate the optimal scan-only set for a fixed young +// gen size. I couldn't work out how to reuse the more elaborate one, +// i.e. calculate_young_list_target_config(rs_length), as the loops are +// fundamentally different (the other one finds a config for different +// S-O lengths, whereas here we need to do the opposite). +size_t G1CollectorPolicy::calculate_optimal_so_length( + size_t young_list_length) { + if (!G1UseScanOnlyPrefix) + return 0; + + if (_all_pause_times_ms->num() < 3) { + // we won't use a scan-only set at the beginning to allow the rest + // of the predictors to warm up + return 0; + } + + if (_cost_per_scan_only_region_ms_seq->num() < 3) { + // then, we'll only set the S-O set to 1 for a little bit of time, + // to get enough information on the scanning cost + return 1; + } + + size_t pending_cards = (size_t) get_new_prediction(_pending_cards_seq); + size_t rs_lengths = (size_t) get_new_prediction(_rs_lengths_seq); + size_t adj_rs_lengths = rs_lengths + predict_rs_length_diff(); + size_t scanned_cards; + if (full_young_gcs()) + scanned_cards = predict_young_card_num(adj_rs_lengths); + else + scanned_cards = predict_non_young_card_num(adj_rs_lengths); + double base_time_ms = predict_base_elapsed_time_ms(pending_cards, + scanned_cards); + + size_t so_length = 0; + double max_gc_eff = 0.0; + for (size_t i = 0; i < young_list_length; ++i) { + double gc_eff = 0.0; + double pause_time_ms = 0.0; + predict_gc_eff(young_list_length, i, base_time_ms, + &gc_eff, &pause_time_ms); + if (gc_eff > max_gc_eff) { + max_gc_eff = gc_eff; + so_length = i; + } + } + + // set it to 95% of the optimal to make sure we sample the "area" + // around the optimal length to get up-to-date survival rate data + return so_length * 950 / 1000; +} + +// This is a really cool piece of code! It finds the best +// target configuration (young length / scan-only prefix length) so +// that GC efficiency is maximized and that we also meet a pause +// time. It's a triple nested loop. These loops are explained below +// from the inside-out :-) +// +// (a) The innermost loop will try to find the optimal young length +// for a fixed S-O length. It uses a binary search to speed up the +// process. We assume that, for a fixed S-O length, as we add more +// young regions to the CSet, the GC efficiency will only go up (I'll +// skip the proof). So, using a binary search to optimize this process +// makes perfect sense. +// +// (b) The middle loop will fix the S-O length before calling the +// innermost one. It will vary it between two parameters, increasing +// it by a given increment. +// +// (c) The outermost loop will call the middle loop three times. +// (1) The first time it will explore all possible S-O length values +// from 0 to as large as it can get, using a coarse increment (to +// quickly "home in" to where the optimal seems to be). +// (2) The second time it will explore the values around the optimal +// that was found by the first iteration using a fine increment. +// (3) Once the optimal config has been determined by the second +// iteration, we'll redo the calculation, but setting the S-O length +// to 95% of the optimal to make sure we sample the "area" +// around the optimal length to get up-to-date survival rate data +// +// Termination conditions for the iterations are several: the pause +// time is over the limit, we do not have enough to-space, etc. + +void G1CollectorPolicy::calculate_young_list_target_config(size_t rs_lengths) { + guarantee( adaptive_young_list_length(), "pre-condition" ); + + double start_time_sec = os::elapsedTime(); + size_t min_reserve_perc = MAX2((size_t)2, (size_t)G1MinReservePerc); + min_reserve_perc = MIN2((size_t) 50, min_reserve_perc); + size_t reserve_regions = + (size_t) ((double) min_reserve_perc * (double) _g1->n_regions() / 100.0); + + if (full_young_gcs() && _free_regions_at_end_of_collection > 0) { + // we are in fully-young mode and there are free regions in the heap + + size_t min_so_length = 0; + size_t max_so_length = 0; + + if (G1UseScanOnlyPrefix) { + if (_all_pause_times_ms->num() < 3) { + // we won't use a scan-only set at the beginning to allow the rest + // of the predictors to warm up + min_so_length = 0; + max_so_length = 0; + } else if (_cost_per_scan_only_region_ms_seq->num() < 3) { + // then, we'll only set the S-O set to 1 for a little bit of time, + // to get enough information on the scanning cost + min_so_length = 1; + max_so_length = 1; + } else if (_in_marking_window || _last_full_young_gc) { + // no S-O prefix during a marking phase either, as at the end + // of the marking phase we'll have to use a very small young + // length target to fill up the rest of the CSet with + // non-young regions and, if we have lots of scan-only regions + // left-over, we will not be able to add any more non-young + // regions. + min_so_length = 0; + max_so_length = 0; + } else { + // this is the common case; we'll never reach the maximum, we + // one of the end conditions will fire well before that + // (hopefully!) + min_so_length = 0; + max_so_length = _free_regions_at_end_of_collection - 1; + } + } else { + // no S-O prefix, as the switch is not set, but we still need to + // do one iteration to calculate the best young target that + // meets the pause time; this way we reuse the same code instead + // of replicating it + min_so_length = 0; + max_so_length = 0; + } + + double target_pause_time_ms = _mmu_tracker->max_gc_time() * 1000.0; + size_t pending_cards = (size_t) get_new_prediction(_pending_cards_seq); + size_t adj_rs_lengths = rs_lengths + predict_rs_length_diff(); + size_t scanned_cards; + if (full_young_gcs()) + scanned_cards = predict_young_card_num(adj_rs_lengths); + else + scanned_cards = predict_non_young_card_num(adj_rs_lengths); + // calculate this once, so that we don't have to recalculate it in + // the innermost loop + double base_time_ms = predict_base_elapsed_time_ms(pending_cards, + scanned_cards); + + // the result + size_t final_young_length = 0; + size_t final_so_length = 0; + double final_gc_eff = 0.0; + // we'll also keep track of how many times we go into the inner loop + // this is for profiling reasons + size_t calculations = 0; + + // this determines which of the three iterations the outer loop is in + typedef enum { + pass_type_coarse, + pass_type_fine, + pass_type_final + } pass_type_t; + + // range of the outer loop's iteration + size_t from_so_length = min_so_length; + size_t to_so_length = max_so_length; + guarantee( from_so_length <= to_so_length, "invariant" ); + + // this will keep the S-O length that's found by the second + // iteration of the outer loop; we'll keep it just in case the third + // iteration fails to find something + size_t fine_so_length = 0; + + // the increment step for the coarse (first) iteration + size_t so_coarse_increments = 5; + + // the common case, we'll start with the coarse iteration + pass_type_t pass = pass_type_coarse; + size_t so_length_incr = so_coarse_increments; + + if (from_so_length == to_so_length) { + // not point in doing the coarse iteration, we'll go directly into + // the fine one (we essentially trying to find the optimal young + // length for a fixed S-O length). + so_length_incr = 1; + pass = pass_type_final; + } else if (to_so_length - from_so_length < 3 * so_coarse_increments) { + // again, the range is too short so no point in foind the coarse + // iteration either + so_length_incr = 1; + pass = pass_type_fine; + } + + bool done = false; + // this is the outermost loop + while (!done) { +#if 0 + // leave this in for debugging, just in case + gclog_or_tty->print_cr("searching between " SIZE_FORMAT " and " SIZE_FORMAT + ", incr " SIZE_FORMAT ", pass %s", + from_so_length, to_so_length, so_length_incr, + (pass == pass_type_coarse) ? "coarse" : + (pass == pass_type_fine) ? "fine" : "final"); +#endif // 0 + + size_t so_length = from_so_length; + size_t init_free_regions = + MAX2((size_t)0, + _free_regions_at_end_of_collection + + _scan_only_regions_at_end_of_collection - reserve_regions); + + // this determines whether a configuration was found + bool gc_eff_set = false; + // this is the middle loop + while (so_length <= to_so_length) { + // base time, which excludes region-related time; again we + // calculate it once to avoid recalculating it in the + // innermost loop + double base_time_with_so_ms = + base_time_ms + predict_scan_only_time_ms(so_length); + // it's already over the pause target, go around + if (base_time_with_so_ms > target_pause_time_ms) + break; + + size_t starting_young_length = so_length+1; + + // we make sure that the short young length that makes sense + // (one more than the S-O length) is feasible + size_t min_young_length = starting_young_length; + double min_gc_eff; + bool min_ok; + ++calculations; + min_ok = predict_gc_eff(min_young_length, so_length, + base_time_with_so_ms, + init_free_regions, target_pause_time_ms, + &min_gc_eff); + + if (min_ok) { + // the shortest young length is indeed feasible; we'll know + // set up the max young length and we'll do a binary search + // between min_young_length and max_young_length + size_t max_young_length = _free_regions_at_end_of_collection - 1; + double max_gc_eff = 0.0; + bool max_ok = false; + + // the innermost loop! (finally!) + while (max_young_length > min_young_length) { + // we'll make sure that min_young_length is always at a + // feasible config + guarantee( min_ok, "invariant" ); + + ++calculations; + max_ok = predict_gc_eff(max_young_length, so_length, + base_time_with_so_ms, + init_free_regions, target_pause_time_ms, + &max_gc_eff); + + size_t diff = (max_young_length - min_young_length) / 2; + if (max_ok) { + min_young_length = max_young_length; + min_gc_eff = max_gc_eff; + min_ok = true; + } + max_young_length = min_young_length + diff; + } + + // the innermost loop found a config + guarantee( min_ok, "invariant" ); + if (min_gc_eff > final_gc_eff) { + // it's the best config so far, so we'll keep it + final_gc_eff = min_gc_eff; + final_young_length = min_young_length; + final_so_length = so_length; + gc_eff_set = true; + } + } + + // incremental the fixed S-O length and go around + so_length += so_length_incr; + } + + // this is the end of the outermost loop and we need to decide + // what to do during the next iteration + if (pass == pass_type_coarse) { + // we just did the coarse pass (first iteration) + + if (!gc_eff_set) + // we didn't find a feasible config so we'll just bail out; of + // course, it might be the case that we missed it; but I'd say + // it's a bit unlikely + done = true; + else { + // We did find a feasible config with optimal GC eff during + // the first pass. So the second pass we'll only consider the + // S-O lengths around that config with a fine increment. + + guarantee( so_length_incr == so_coarse_increments, "invariant" ); + guarantee( final_so_length >= min_so_length, "invariant" ); + +#if 0 + // leave this in for debugging, just in case + gclog_or_tty->print_cr(" coarse pass: SO length " SIZE_FORMAT, + final_so_length); +#endif // 0 + + from_so_length = + (final_so_length - min_so_length > so_coarse_increments) ? + final_so_length - so_coarse_increments + 1 : min_so_length; + to_so_length = + (max_so_length - final_so_length > so_coarse_increments) ? + final_so_length + so_coarse_increments - 1 : max_so_length; + + pass = pass_type_fine; + so_length_incr = 1; + } + } else if (pass == pass_type_fine) { + // we just finished the second pass + + if (!gc_eff_set) { + // we didn't find a feasible config (yes, it's possible; + // notice that, sometimes, we go directly into the fine + // iteration and skip the coarse one) so we bail out + done = true; + } else { + // We did find a feasible config with optimal GC eff + guarantee( so_length_incr == 1, "invariant" ); + + if (final_so_length == 0) { + // The config is of an empty S-O set, so we'll just bail out + done = true; + } else { + // we'll go around once more, setting the S-O length to 95% + // of the optimal + size_t new_so_length = 950 * final_so_length / 1000; + +#if 0 + // leave this in for debugging, just in case + gclog_or_tty->print_cr(" fine pass: SO length " SIZE_FORMAT + ", setting it to " SIZE_FORMAT, + final_so_length, new_so_length); +#endif // 0 + + from_so_length = new_so_length; + to_so_length = new_so_length; + fine_so_length = final_so_length; + + pass = pass_type_final; + } + } + } else if (pass == pass_type_final) { + // we just finished the final (third) pass + + if (!gc_eff_set) + // we didn't find a feasible config, so we'll just use the one + // we found during the second pass, which we saved + final_so_length = fine_so_length; + + // and we're done! + done = true; + } else { + guarantee( false, "should never reach here" ); + } + + // we now go around the outermost loop + } + + // we should have at least one region in the target young length + _young_list_target_length = MAX2((size_t) 1, final_young_length); + if (final_so_length >= final_young_length) + // and we need to ensure that the S-O length is not greater than + // the target young length (this is being a bit careful) + final_so_length = 0; + _young_list_so_prefix_length = final_so_length; + guarantee( !_in_marking_window || !_last_full_young_gc || + _young_list_so_prefix_length == 0, "invariant" ); + + // let's keep an eye of how long we spend on this calculation + // right now, I assume that we'll print it when we need it; we + // should really adde it to the breakdown of a pause + double end_time_sec = os::elapsedTime(); + double elapsed_time_ms = (end_time_sec - start_time_sec) * 1000.0; + +#if 0 + // leave this in for debugging, just in case + gclog_or_tty->print_cr("target = %1.1lf ms, young = " SIZE_FORMAT + ", SO = " SIZE_FORMAT ", " + "elapsed %1.2lf ms, calcs: " SIZE_FORMAT " (%s%s) " + SIZE_FORMAT SIZE_FORMAT, + target_pause_time_ms, + _young_list_target_length - _young_list_so_prefix_length, + _young_list_so_prefix_length, + elapsed_time_ms, + calculations, + full_young_gcs() ? "full" : "partial", + should_initiate_conc_mark() ? " i-m" : "", + in_marking_window(), + in_marking_window_im()); +#endif // 0 + + if (_young_list_target_length < _young_list_min_length) { + // bummer; this means that, if we do a pause when the optimal + // config dictates, we'll violate the pause spacing target (the + // min length was calculate based on the application's current + // alloc rate); + + // so, we have to bite the bullet, and allocate the minimum + // number. We'll violate our target, but we just can't meet it. + + size_t so_length = 0; + // a note further up explains why we do not want an S-O length + // during marking + if (!_in_marking_window && !_last_full_young_gc) + // but we can still try to see whether we can find an optimal + // S-O length + so_length = calculate_optimal_so_length(_young_list_min_length); + +#if 0 + // leave this in for debugging, just in case + gclog_or_tty->print_cr("adjusted target length from " + SIZE_FORMAT " to " SIZE_FORMAT + ", SO " SIZE_FORMAT, + _young_list_target_length, _young_list_min_length, + so_length); +#endif // 0 + + _young_list_target_length = + MAX2(_young_list_min_length, (size_t)1); + _young_list_so_prefix_length = so_length; + } + } else { + // we are in a partially-young mode or we've run out of regions (due + // to evacuation failure) + +#if 0 + // leave this in for debugging, just in case + gclog_or_tty->print_cr("(partial) setting target to " SIZE_FORMAT + ", SO " SIZE_FORMAT, + _young_list_min_length, 0); +#endif // 0 + + // we'll do the pause as soon as possible and with no S-O prefix + // (see above for the reasons behind the latter) + _young_list_target_length = + MAX2(_young_list_min_length, (size_t) 1); + _young_list_so_prefix_length = 0; + } + + _rs_lengths_prediction = rs_lengths; +} + +// This is used by: calculate_optimal_so_length(length). It returns +// the GC eff and predicted pause time for a particular config +void +G1CollectorPolicy::predict_gc_eff(size_t young_length, + size_t so_length, + double base_time_ms, + double* ret_gc_eff, + double* ret_pause_time_ms) { + double so_time_ms = predict_scan_only_time_ms(so_length); + double accum_surv_rate_adj = 0.0; + if (so_length > 0) + accum_surv_rate_adj = accum_yg_surv_rate_pred((int)(so_length - 1)); + double accum_surv_rate = + accum_yg_surv_rate_pred((int)(young_length - 1)) - accum_surv_rate_adj; + size_t bytes_to_copy = + (size_t) (accum_surv_rate * (double) HeapRegion::GrainBytes); + double copy_time_ms = predict_object_copy_time_ms(bytes_to_copy); + double young_other_time_ms = + predict_young_other_time_ms(young_length - so_length); + double pause_time_ms = + base_time_ms + so_time_ms + copy_time_ms + young_other_time_ms; + size_t reclaimed_bytes = + (young_length - so_length) * HeapRegion::GrainBytes - bytes_to_copy; + double gc_eff = (double) reclaimed_bytes / pause_time_ms; + + *ret_gc_eff = gc_eff; + *ret_pause_time_ms = pause_time_ms; +} + +// This is used by: calculate_young_list_target_config(rs_length). It +// returns the GC eff of a particular config. It returns false if that +// config violates any of the end conditions of the search in the +// calling method, or true upon success. The end conditions were put +// here since it's called twice and it was best not to replicate them +// in the caller. Also, passing the parameteres avoids having to +// recalculate them in the innermost loop. +bool +G1CollectorPolicy::predict_gc_eff(size_t young_length, + size_t so_length, + double base_time_with_so_ms, + size_t init_free_regions, + double target_pause_time_ms, + double* ret_gc_eff) { + *ret_gc_eff = 0.0; + + if (young_length >= init_free_regions) + // end condition 1: not enough space for the young regions + return false; + + double accum_surv_rate_adj = 0.0; + if (so_length > 0) + accum_surv_rate_adj = accum_yg_surv_rate_pred((int)(so_length - 1)); + double accum_surv_rate = + accum_yg_surv_rate_pred((int)(young_length - 1)) - accum_surv_rate_adj; + size_t bytes_to_copy = + (size_t) (accum_surv_rate * (double) HeapRegion::GrainBytes); + double copy_time_ms = predict_object_copy_time_ms(bytes_to_copy); + double young_other_time_ms = + predict_young_other_time_ms(young_length - so_length); + double pause_time_ms = + base_time_with_so_ms + copy_time_ms + young_other_time_ms; + + if (pause_time_ms > target_pause_time_ms) + // end condition 2: over the target pause time + return false; + + size_t reclaimed_bytes = + (young_length - so_length) * HeapRegion::GrainBytes - bytes_to_copy; + size_t free_bytes = + (init_free_regions - young_length) * HeapRegion::GrainBytes; + + if ((2.0 + sigma()) * (double) bytes_to_copy > (double) free_bytes) + // end condition 3: out of to-space (conservatively) + return false; + + // success! + double gc_eff = (double) reclaimed_bytes / pause_time_ms; + *ret_gc_eff = gc_eff; + + return true; +} + +void G1CollectorPolicy::check_prediction_validity() { + guarantee( adaptive_young_list_length(), "should not call this otherwise" ); + + size_t rs_lengths = _g1->young_list_sampled_rs_lengths(); + if (rs_lengths > _rs_lengths_prediction) { + // add 10% to avoid having to recalculate often + size_t rs_lengths_prediction = rs_lengths * 1100 / 1000; + calculate_young_list_target_config(rs_lengths_prediction); + } +} + +HeapWord* G1CollectorPolicy::mem_allocate_work(size_t size, + bool is_tlab, + bool* gc_overhead_limit_was_exceeded) { + guarantee(false, "Not using this policy feature yet."); + return NULL; +} + +// This method controls how a collector handles one or more +// of its generations being fully allocated. +HeapWord* G1CollectorPolicy::satisfy_failed_allocation(size_t size, + bool is_tlab) { + guarantee(false, "Not using this policy feature yet."); + return NULL; +} + + +#ifndef PRODUCT +bool G1CollectorPolicy::verify_young_ages() { + HeapRegion* head = _g1->young_list_first_region(); + return + verify_young_ages(head, _short_lived_surv_rate_group); + // also call verify_young_ages on any additional surv rate groups +} + +bool +G1CollectorPolicy::verify_young_ages(HeapRegion* head, + SurvRateGroup *surv_rate_group) { + guarantee( surv_rate_group != NULL, "pre-condition" ); + + const char* name = surv_rate_group->name(); + bool ret = true; + int prev_age = -1; + + for (HeapRegion* curr = head; + curr != NULL; + curr = curr->get_next_young_region()) { + SurvRateGroup* group = curr->surv_rate_group(); + if (group == NULL && !curr->is_survivor()) { + gclog_or_tty->print_cr("## %s: encountered NULL surv_rate_group", name); + ret = false; + } + + if (surv_rate_group == group) { + int age = curr->age_in_surv_rate_group(); + + if (age < 0) { + gclog_or_tty->print_cr("## %s: encountered negative age", name); + ret = false; + } + + if (age <= prev_age) { + gclog_or_tty->print_cr("## %s: region ages are not strictly increasing " + "(%d, %d)", name, age, prev_age); + ret = false; + } + prev_age = age; + } + } + + return ret; +} +#endif // PRODUCT + +void G1CollectorPolicy::record_full_collection_start() { + _cur_collection_start_sec = os::elapsedTime(); + // Release the future to-space so that it is available for compaction into. + _g1->set_full_collection(); +} + +void G1CollectorPolicy::record_full_collection_end() { + // Consider this like a collection pause for the purposes of allocation + // since last pause. + double end_sec = os::elapsedTime(); + double full_gc_time_sec = end_sec - _cur_collection_start_sec; + double full_gc_time_ms = full_gc_time_sec * 1000.0; + + checkpoint_conc_overhead(); + + _all_full_gc_times_ms->add(full_gc_time_ms); + + update_recent_gc_times(end_sec, full_gc_time_sec); + + _g1->clear_full_collection(); + + // "Nuke" the heuristics that control the fully/partially young GC + // transitions and make sure we start with fully young GCs after the + // Full GC. + set_full_young_gcs(true); + _last_full_young_gc = false; + _should_revert_to_full_young_gcs = false; + _should_initiate_conc_mark = false; + _known_garbage_bytes = 0; + _known_garbage_ratio = 0.0; + _in_marking_window = false; + _in_marking_window_im = false; + + _short_lived_surv_rate_group->record_scan_only_prefix(0); + _short_lived_surv_rate_group->start_adding_regions(); + // also call this on any additional surv rate groups + + _prev_region_num_young = _region_num_young; + _prev_region_num_tenured = _region_num_tenured; + + _free_regions_at_end_of_collection = _g1->free_regions(); + _scan_only_regions_at_end_of_collection = 0; + calculate_young_list_min_length(); + calculate_young_list_target_config(); + } + +void G1CollectorPolicy::record_pop_compute_rc_start() { + _pop_compute_rc_start = os::elapsedTime(); +} +void G1CollectorPolicy::record_pop_compute_rc_end() { + double ms = (os::elapsedTime() - _pop_compute_rc_start)*1000.0; + _cur_popular_compute_rc_time_ms = ms; + _pop_compute_rc_start = 0.0; +} +void G1CollectorPolicy::record_pop_evac_start() { + _pop_evac_start = os::elapsedTime(); +} +void G1CollectorPolicy::record_pop_evac_end() { + double ms = (os::elapsedTime() - _pop_evac_start)*1000.0; + _cur_popular_evac_time_ms = ms; + _pop_evac_start = 0.0; +} + +void G1CollectorPolicy::record_before_bytes(size_t bytes) { + _bytes_in_to_space_before_gc += bytes; +} + +void G1CollectorPolicy::record_after_bytes(size_t bytes) { + _bytes_in_to_space_after_gc += bytes; +} + +void G1CollectorPolicy::record_stop_world_start() { + _stop_world_start = os::elapsedTime(); +} + +void G1CollectorPolicy::record_collection_pause_start(double start_time_sec, + size_t start_used) { + if (PrintGCDetails) { + gclog_or_tty->stamp(PrintGCTimeStamps); + gclog_or_tty->print("[GC pause"); + if (in_young_gc_mode()) + gclog_or_tty->print(" (%s)", full_young_gcs() ? "young" : "partial"); + } + + assert(_g1->used_regions() == _g1->recalculate_used_regions(), + "sanity"); + + double s_w_t_ms = (start_time_sec - _stop_world_start) * 1000.0; + _all_stop_world_times_ms->add(s_w_t_ms); + _stop_world_start = 0.0; + + _cur_collection_start_sec = start_time_sec; + _cur_collection_pause_used_at_start_bytes = start_used; + _cur_collection_pause_used_regions_at_start = _g1->used_regions(); + _pending_cards = _g1->pending_card_num(); + _max_pending_cards = _g1->max_pending_card_num(); + + _bytes_in_to_space_before_gc = 0; + _bytes_in_to_space_after_gc = 0; + _bytes_in_collection_set_before_gc = 0; + +#ifdef DEBUG + // initialise these to something well known so that we can spot + // if they are not set properly + + for (int i = 0; i < _parallel_gc_threads; ++i) { + _par_last_ext_root_scan_times_ms[i] = -666.0; + _par_last_mark_stack_scan_times_ms[i] = -666.0; + _par_last_scan_only_times_ms[i] = -666.0; + _par_last_scan_only_regions_scanned[i] = -666.0; + _par_last_update_rs_start_times_ms[i] = -666.0; + _par_last_update_rs_times_ms[i] = -666.0; + _par_last_update_rs_processed_buffers[i] = -666.0; + _par_last_scan_rs_start_times_ms[i] = -666.0; + _par_last_scan_rs_times_ms[i] = -666.0; + _par_last_scan_new_refs_times_ms[i] = -666.0; + _par_last_obj_copy_times_ms[i] = -666.0; + _par_last_termination_times_ms[i] = -666.0; + + _pop_par_last_update_rs_start_times_ms[i] = -666.0; + _pop_par_last_update_rs_times_ms[i] = -666.0; + _pop_par_last_update_rs_processed_buffers[i] = -666.0; + _pop_par_last_scan_rs_start_times_ms[i] = -666.0; + _pop_par_last_scan_rs_times_ms[i] = -666.0; + _pop_par_last_closure_app_times_ms[i] = -666.0; + } +#endif + + for (int i = 0; i < _aux_num; ++i) { + _cur_aux_times_ms[i] = 0.0; + _cur_aux_times_set[i] = false; + } + + _satb_drain_time_set = false; + _last_satb_drain_processed_buffers = -1; + + if (in_young_gc_mode()) + _last_young_gc_full = false; + + + // do that for any other surv rate groups + _short_lived_surv_rate_group->stop_adding_regions(); + size_t short_lived_so_length = _young_list_so_prefix_length; + _short_lived_surv_rate_group->record_scan_only_prefix(short_lived_so_length); + tag_scan_only(short_lived_so_length); + + assert( verify_young_ages(), "region age verification" ); +} + +void G1CollectorPolicy::tag_scan_only(size_t short_lived_scan_only_length) { + // done in a way that it can be extended for other surv rate groups too... + + HeapRegion* head = _g1->young_list_first_region(); + bool finished_short_lived = (short_lived_scan_only_length == 0); + + if (finished_short_lived) + return; + + for (HeapRegion* curr = head; + curr != NULL; + curr = curr->get_next_young_region()) { + SurvRateGroup* surv_rate_group = curr->surv_rate_group(); + int age = curr->age_in_surv_rate_group(); + + if (surv_rate_group == _short_lived_surv_rate_group) { + if ((size_t)age < short_lived_scan_only_length) + curr->set_scan_only(); + else + finished_short_lived = true; + } + + + if (finished_short_lived) + return; + } + + guarantee( false, "we should never reach here" ); +} + +void G1CollectorPolicy::record_popular_pause_preamble_start() { + _cur_popular_preamble_start_ms = os::elapsedTime() * 1000.0; +} + +void G1CollectorPolicy::record_popular_pause_preamble_end() { + _cur_popular_preamble_time_ms = + (os::elapsedTime() * 1000.0) - _cur_popular_preamble_start_ms; + + // copy the recorded statistics of the first pass to temporary arrays + for (int i = 0; i < _parallel_gc_threads; ++i) { + _pop_par_last_update_rs_start_times_ms[i] = _par_last_update_rs_start_times_ms[i]; + _pop_par_last_update_rs_times_ms[i] = _par_last_update_rs_times_ms[i]; + _pop_par_last_update_rs_processed_buffers[i] = _par_last_update_rs_processed_buffers[i]; + _pop_par_last_scan_rs_start_times_ms[i] = _par_last_scan_rs_start_times_ms[i]; + _pop_par_last_scan_rs_times_ms[i] = _par_last_scan_rs_times_ms[i]; + _pop_par_last_closure_app_times_ms[i] = _par_last_obj_copy_times_ms[i]; + } +} + +void G1CollectorPolicy::record_mark_closure_time(double mark_closure_time_ms) { + _mark_closure_time_ms = mark_closure_time_ms; +} + +void G1CollectorPolicy::record_concurrent_mark_init_start() { + _mark_init_start_sec = os::elapsedTime(); + guarantee(!in_young_gc_mode(), "should not do be here in young GC mode"); +} + +void G1CollectorPolicy::record_concurrent_mark_init_end_pre(double + mark_init_elapsed_time_ms) { + _during_marking = true; + _should_initiate_conc_mark = false; + _cur_mark_stop_world_time_ms = mark_init_elapsed_time_ms; +} + +void G1CollectorPolicy::record_concurrent_mark_init_end() { + double end_time_sec = os::elapsedTime(); + double elapsed_time_ms = (end_time_sec - _mark_init_start_sec) * 1000.0; + _concurrent_mark_init_times_ms->add(elapsed_time_ms); + checkpoint_conc_overhead(); + record_concurrent_mark_init_end_pre(elapsed_time_ms); + + _mmu_tracker->add_pause(_mark_init_start_sec, end_time_sec, true); +} + +void G1CollectorPolicy::record_concurrent_mark_remark_start() { + _mark_remark_start_sec = os::elapsedTime(); + _during_marking = false; +} + +void G1CollectorPolicy::record_concurrent_mark_remark_end() { + double end_time_sec = os::elapsedTime(); + double elapsed_time_ms = (end_time_sec - _mark_remark_start_sec)*1000.0; + checkpoint_conc_overhead(); + _concurrent_mark_remark_times_ms->add(elapsed_time_ms); + _cur_mark_stop_world_time_ms += elapsed_time_ms; + _prev_collection_pause_end_ms += elapsed_time_ms; + + _mmu_tracker->add_pause(_mark_remark_start_sec, end_time_sec, true); +} + +void G1CollectorPolicy::record_concurrent_mark_cleanup_start() { + _mark_cleanup_start_sec = os::elapsedTime(); +} + +void +G1CollectorPolicy::record_concurrent_mark_cleanup_end(size_t freed_bytes, + size_t max_live_bytes) { + record_concurrent_mark_cleanup_end_work1(freed_bytes, max_live_bytes); + record_concurrent_mark_cleanup_end_work2(); +} + +void +G1CollectorPolicy:: +record_concurrent_mark_cleanup_end_work1(size_t freed_bytes, + size_t max_live_bytes) { + if (_n_marks < 2) _n_marks++; + if (G1PolicyVerbose > 0) + gclog_or_tty->print_cr("At end of marking, max_live is " SIZE_FORMAT " MB " + " (of " SIZE_FORMAT " MB heap).", + max_live_bytes/M, _g1->capacity()/M); +} + +// The important thing about this is that it includes "os::elapsedTime". +void G1CollectorPolicy::record_concurrent_mark_cleanup_end_work2() { + checkpoint_conc_overhead(); + double end_time_sec = os::elapsedTime(); + double elapsed_time_ms = (end_time_sec - _mark_cleanup_start_sec)*1000.0; + _concurrent_mark_cleanup_times_ms->add(elapsed_time_ms); + _cur_mark_stop_world_time_ms += elapsed_time_ms; + _prev_collection_pause_end_ms += elapsed_time_ms; + + _mmu_tracker->add_pause(_mark_cleanup_start_sec, end_time_sec, true); + + _num_markings++; + + // We did a marking, so reset the "since_last_mark" variables. + double considerConcMarkCost = 1.0; + // If there are available processors, concurrent activity is free... + if (Threads::number_of_non_daemon_threads() * 2 < + os::active_processor_count()) { + considerConcMarkCost = 0.0; + } + _n_pauses_at_mark_end = _n_pauses; + _n_marks_since_last_pause++; + _conc_mark_initiated = false; +} + +void +G1CollectorPolicy::record_concurrent_mark_cleanup_completed() { + if (in_young_gc_mode()) { + _should_revert_to_full_young_gcs = false; + _last_full_young_gc = true; + _in_marking_window = false; + if (adaptive_young_list_length()) + calculate_young_list_target_config(); + } +} + +void G1CollectorPolicy::record_concurrent_pause() { + if (_stop_world_start > 0.0) { + double yield_ms = (os::elapsedTime() - _stop_world_start) * 1000.0; + _all_yield_times_ms->add(yield_ms); + } +} + +void G1CollectorPolicy::record_concurrent_pause_end() { +} + +void G1CollectorPolicy::record_collection_pause_end_CH_strong_roots() { + _cur_CH_strong_roots_end_sec = os::elapsedTime(); + _cur_CH_strong_roots_dur_ms = + (_cur_CH_strong_roots_end_sec - _cur_collection_start_sec) * 1000.0; +} + +void G1CollectorPolicy::record_collection_pause_end_G1_strong_roots() { + _cur_G1_strong_roots_end_sec = os::elapsedTime(); + _cur_G1_strong_roots_dur_ms = + (_cur_G1_strong_roots_end_sec - _cur_CH_strong_roots_end_sec) * 1000.0; +} + +template +T sum_of(T* sum_arr, int start, int n, int N) { + T sum = (T)0; + for (int i = 0; i < n; i++) { + int j = (start + i) % N; + sum += sum_arr[j]; + } + return sum; +} + +void G1CollectorPolicy::print_par_stats (int level, + const char* str, + double* data, + bool summary) { + double min = data[0], max = data[0]; + double total = 0.0; + int j; + for (j = 0; j < level; ++j) + gclog_or_tty->print(" "); + gclog_or_tty->print("[%s (ms):", str); + for (uint i = 0; i < ParallelGCThreads; ++i) { + double val = data[i]; + if (val < min) + min = val; + if (val > max) + max = val; + total += val; + gclog_or_tty->print(" %3.1lf", val); + } + if (summary) { + gclog_or_tty->print_cr(""); + double avg = total / (double) ParallelGCThreads; + gclog_or_tty->print(" "); + for (j = 0; j < level; ++j) + gclog_or_tty->print(" "); + gclog_or_tty->print("Avg: %5.1lf, Min: %5.1lf, Max: %5.1lf", + avg, min, max); + } + gclog_or_tty->print_cr("]"); +} + +void G1CollectorPolicy::print_par_buffers (int level, + const char* str, + double* data, + bool summary) { + double min = data[0], max = data[0]; + double total = 0.0; + int j; + for (j = 0; j < level; ++j) + gclog_or_tty->print(" "); + gclog_or_tty->print("[%s :", str); + for (uint i = 0; i < ParallelGCThreads; ++i) { + double val = data[i]; + if (val < min) + min = val; + if (val > max) + max = val; + total += val; + gclog_or_tty->print(" %d", (int) val); + } + if (summary) { + gclog_or_tty->print_cr(""); + double avg = total / (double) ParallelGCThreads; + gclog_or_tty->print(" "); + for (j = 0; j < level; ++j) + gclog_or_tty->print(" "); + gclog_or_tty->print("Sum: %d, Avg: %d, Min: %d, Max: %d", + (int)total, (int)avg, (int)min, (int)max); + } + gclog_or_tty->print_cr("]"); +} + +void G1CollectorPolicy::print_stats (int level, + const char* str, + double value) { + for (int j = 0; j < level; ++j) + gclog_or_tty->print(" "); + gclog_or_tty->print_cr("[%s: %5.1lf ms]", str, value); +} + +void G1CollectorPolicy::print_stats (int level, + const char* str, + int value) { + for (int j = 0; j < level; ++j) + gclog_or_tty->print(" "); + gclog_or_tty->print_cr("[%s: %d]", str, value); +} + +double G1CollectorPolicy::avg_value (double* data) { + if (ParallelGCThreads > 0) { + double ret = 0.0; + for (uint i = 0; i < ParallelGCThreads; ++i) + ret += data[i]; + return ret / (double) ParallelGCThreads; + } else { + return data[0]; + } +} + +double G1CollectorPolicy::max_value (double* data) { + if (ParallelGCThreads > 0) { + double ret = data[0]; + for (uint i = 1; i < ParallelGCThreads; ++i) + if (data[i] > ret) + ret = data[i]; + return ret; + } else { + return data[0]; + } +} + +double G1CollectorPolicy::sum_of_values (double* data) { + if (ParallelGCThreads > 0) { + double sum = 0.0; + for (uint i = 0; i < ParallelGCThreads; i++) + sum += data[i]; + return sum; + } else { + return data[0]; + } +} + +double G1CollectorPolicy::max_sum (double* data1, + double* data2) { + double ret = data1[0] + data2[0]; + + if (ParallelGCThreads > 0) { + for (uint i = 1; i < ParallelGCThreads; ++i) { + double data = data1[i] + data2[i]; + if (data > ret) + ret = data; + } + } + return ret; +} + +// Anything below that is considered to be zero +#define MIN_TIMER_GRANULARITY 0.0000001 + +void G1CollectorPolicy::record_collection_pause_end(bool popular, + bool abandoned) { + double end_time_sec = os::elapsedTime(); + double elapsed_ms = _last_pause_time_ms; + bool parallel = ParallelGCThreads > 0; + double evac_ms = (end_time_sec - _cur_G1_strong_roots_end_sec) * 1000.0; + size_t rs_size = + _cur_collection_pause_used_regions_at_start - collection_set_size(); + size_t cur_used_bytes = _g1->used(); + assert(cur_used_bytes == _g1->recalculate_used(), "It should!"); + bool last_pause_included_initial_mark = false; + +#ifndef PRODUCT + if (G1YoungSurvRateVerbose) { + gclog_or_tty->print_cr(""); + _short_lived_surv_rate_group->print(); + // do that for any other surv rate groups too + } +#endif // PRODUCT + + checkpoint_conc_overhead(); + + if (in_young_gc_mode()) { + last_pause_included_initial_mark = _should_initiate_conc_mark; + if (last_pause_included_initial_mark) + record_concurrent_mark_init_end_pre(0.0); + + size_t min_used_targ = + (_g1->capacity() / 100) * (G1SteadyStateUsed - G1SteadyStateUsedDelta); + + if (cur_used_bytes > min_used_targ) { + if (cur_used_bytes <= _prev_collection_pause_used_at_end_bytes) { + } else if (!_g1->mark_in_progress() && !_last_full_young_gc) { + _should_initiate_conc_mark = true; + } + } + + _prev_collection_pause_used_at_end_bytes = cur_used_bytes; + } + + _mmu_tracker->add_pause(end_time_sec - elapsed_ms/1000.0, + end_time_sec, false); + + guarantee(_cur_collection_pause_used_regions_at_start >= + collection_set_size(), + "Negative RS size?"); + + // This assert is exempted when we're doing parallel collection pauses, + // because the fragmentation caused by the parallel GC allocation buffers + // can lead to more memory being used during collection than was used + // before. Best leave this out until the fragmentation problem is fixed. + // Pauses in which evacuation failed can also lead to negative + // collections, since no space is reclaimed from a region containing an + // object whose evacuation failed. + // Further, we're now always doing parallel collection. But I'm still + // leaving this here as a placeholder for a more precise assertion later. + // (DLD, 10/05.) + assert((true || parallel) // Always using GC LABs now. + || _g1->evacuation_failed() + || _cur_collection_pause_used_at_start_bytes >= cur_used_bytes, + "Negative collection"); + + size_t freed_bytes = + _cur_collection_pause_used_at_start_bytes - cur_used_bytes; + size_t surviving_bytes = _collection_set_bytes_used_before - freed_bytes; + double survival_fraction = + (double)surviving_bytes/ + (double)_collection_set_bytes_used_before; + + _n_pauses++; + + if (!abandoned) { + _recent_CH_strong_roots_times_ms->add(_cur_CH_strong_roots_dur_ms); + _recent_G1_strong_roots_times_ms->add(_cur_G1_strong_roots_dur_ms); + _recent_evac_times_ms->add(evac_ms); + _recent_pause_times_ms->add(elapsed_ms); + + _recent_rs_sizes->add(rs_size); + + // We exempt parallel collection from this check because Alloc Buffer + // fragmentation can produce negative collections. Same with evac + // failure. + // Further, we're now always doing parallel collection. But I'm still + // leaving this here as a placeholder for a more precise assertion later. + // (DLD, 10/05. + assert((true || parallel) + || _g1->evacuation_failed() + || surviving_bytes <= _collection_set_bytes_used_before, + "Or else negative collection!"); + _recent_CS_bytes_used_before->add(_collection_set_bytes_used_before); + _recent_CS_bytes_surviving->add(surviving_bytes); + + // this is where we update the allocation rate of the application + double app_time_ms = + (_cur_collection_start_sec * 1000.0 - _prev_collection_pause_end_ms); + if (app_time_ms < MIN_TIMER_GRANULARITY) { + // This usually happens due to the timer not having the required + // granularity. Some Linuxes are the usual culprits. + // We'll just set it to something (arbitrarily) small. + app_time_ms = 1.0; + } + size_t regions_allocated = + (_region_num_young - _prev_region_num_young) + + (_region_num_tenured - _prev_region_num_tenured); + double alloc_rate_ms = (double) regions_allocated / app_time_ms; + _alloc_rate_ms_seq->add(alloc_rate_ms); + _prev_region_num_young = _region_num_young; + _prev_region_num_tenured = _region_num_tenured; + + double interval_ms = + (end_time_sec - _recent_prev_end_times_for_all_gcs_sec->oldest()) * 1000.0; + update_recent_gc_times(end_time_sec, elapsed_ms); + _recent_avg_pause_time_ratio = _recent_gc_times_ms->sum()/interval_ms; + assert(recent_avg_pause_time_ratio() < 1.00, "All GC?"); + } + + if (G1PolicyVerbose > 1) { + gclog_or_tty->print_cr(" Recording collection pause(%d)", _n_pauses); + } + + PauseSummary* summary; + if (!abandoned && !popular) + summary = _non_pop_summary; + else if (!abandoned && popular) + summary = _pop_summary; + else if (abandoned && !popular) + summary = _non_pop_abandoned_summary; + else if (abandoned && popular) + summary = _pop_abandoned_summary; + else + guarantee(false, "should not get here!"); + + double pop_update_rs_time; + double pop_update_rs_processed_buffers; + double pop_scan_rs_time; + double pop_closure_app_time; + double pop_other_time; + + if (popular) { + PopPreambleSummary* preamble_summary = summary->pop_preamble_summary(); + guarantee(preamble_summary != NULL, "should not be null!"); + + pop_update_rs_time = avg_value(_pop_par_last_update_rs_times_ms); + pop_update_rs_processed_buffers = + sum_of_values(_pop_par_last_update_rs_processed_buffers); + pop_scan_rs_time = avg_value(_pop_par_last_scan_rs_times_ms); + pop_closure_app_time = avg_value(_pop_par_last_closure_app_times_ms); + pop_other_time = _cur_popular_preamble_time_ms - + (pop_update_rs_time + pop_scan_rs_time + pop_closure_app_time + + _cur_popular_evac_time_ms); + + preamble_summary->record_pop_preamble_time_ms(_cur_popular_preamble_time_ms); + preamble_summary->record_pop_update_rs_time_ms(pop_update_rs_time); + preamble_summary->record_pop_scan_rs_time_ms(pop_scan_rs_time); + preamble_summary->record_pop_closure_app_time_ms(pop_closure_app_time); + preamble_summary->record_pop_evacuation_time_ms(_cur_popular_evac_time_ms); + preamble_summary->record_pop_other_time_ms(pop_other_time); + } + + double ext_root_scan_time = avg_value(_par_last_ext_root_scan_times_ms); + double mark_stack_scan_time = avg_value(_par_last_mark_stack_scan_times_ms); + double scan_only_time = avg_value(_par_last_scan_only_times_ms); + double scan_only_regions_scanned = + sum_of_values(_par_last_scan_only_regions_scanned); + double update_rs_time = avg_value(_par_last_update_rs_times_ms); + double update_rs_processed_buffers = + sum_of_values(_par_last_update_rs_processed_buffers); + double scan_rs_time = avg_value(_par_last_scan_rs_times_ms); + double obj_copy_time = avg_value(_par_last_obj_copy_times_ms); + double termination_time = avg_value(_par_last_termination_times_ms); + + double parallel_other_time; + if (!abandoned) { + MainBodySummary* body_summary = summary->main_body_summary(); + guarantee(body_summary != NULL, "should not be null!"); + + if (_satb_drain_time_set) + body_summary->record_satb_drain_time_ms(_cur_satb_drain_time_ms); + else + body_summary->record_satb_drain_time_ms(0.0); + body_summary->record_ext_root_scan_time_ms(ext_root_scan_time); + body_summary->record_mark_stack_scan_time_ms(mark_stack_scan_time); + body_summary->record_scan_only_time_ms(scan_only_time); + body_summary->record_update_rs_time_ms(update_rs_time); + body_summary->record_scan_rs_time_ms(scan_rs_time); + body_summary->record_obj_copy_time_ms(obj_copy_time); + if (parallel) { + body_summary->record_parallel_time_ms(_cur_collection_par_time_ms); + body_summary->record_clear_ct_time_ms(_cur_clear_ct_time_ms); + body_summary->record_termination_time_ms(termination_time); + parallel_other_time = _cur_collection_par_time_ms - + (update_rs_time + ext_root_scan_time + mark_stack_scan_time + + scan_only_time + scan_rs_time + obj_copy_time + termination_time); + body_summary->record_parallel_other_time_ms(parallel_other_time); + } + body_summary->record_mark_closure_time_ms(_mark_closure_time_ms); + } + + if (G1PolicyVerbose > 1) { + gclog_or_tty->print_cr(" ET: %10.6f ms (avg: %10.6f ms)\n" + " CH Strong: %10.6f ms (avg: %10.6f ms)\n" + " G1 Strong: %10.6f ms (avg: %10.6f ms)\n" + " Evac: %10.6f ms (avg: %10.6f ms)\n" + " ET-RS: %10.6f ms (avg: %10.6f ms)\n" + " |RS|: " SIZE_FORMAT, + elapsed_ms, recent_avg_time_for_pauses_ms(), + _cur_CH_strong_roots_dur_ms, recent_avg_time_for_CH_strong_ms(), + _cur_G1_strong_roots_dur_ms, recent_avg_time_for_G1_strong_ms(), + evac_ms, recent_avg_time_for_evac_ms(), + scan_rs_time, + recent_avg_time_for_pauses_ms() - + recent_avg_time_for_G1_strong_ms(), + rs_size); + + gclog_or_tty->print_cr(" Used at start: " SIZE_FORMAT"K" + " At end " SIZE_FORMAT "K\n" + " garbage : " SIZE_FORMAT "K" + " of " SIZE_FORMAT "K\n" + " survival : %6.2f%% (%6.2f%% avg)", + _cur_collection_pause_used_at_start_bytes/K, + _g1->used()/K, freed_bytes/K, + _collection_set_bytes_used_before/K, + survival_fraction*100.0, + recent_avg_survival_fraction()*100.0); + gclog_or_tty->print_cr(" Recent %% gc pause time: %6.2f", + recent_avg_pause_time_ratio() * 100.0); + } + + double other_time_ms = elapsed_ms; + if (popular) + other_time_ms -= _cur_popular_preamble_time_ms; + + if (!abandoned) { + if (_satb_drain_time_set) + other_time_ms -= _cur_satb_drain_time_ms; + + if (parallel) + other_time_ms -= _cur_collection_par_time_ms + _cur_clear_ct_time_ms; + else + other_time_ms -= + update_rs_time + + ext_root_scan_time + mark_stack_scan_time + scan_only_time + + scan_rs_time + obj_copy_time; + } + + if (PrintGCDetails) { + gclog_or_tty->print_cr("%s%s, %1.8lf secs]", + (popular && !abandoned) ? " (popular)" : + (!popular && abandoned) ? " (abandoned)" : + (popular && abandoned) ? " (popular/abandoned)" : "", + (last_pause_included_initial_mark) ? " (initial-mark)" : "", + elapsed_ms / 1000.0); + + if (!abandoned) { + if (_satb_drain_time_set) + print_stats(1, "SATB Drain Time", _cur_satb_drain_time_ms); + if (_last_satb_drain_processed_buffers >= 0) + print_stats(2, "Processed Buffers", _last_satb_drain_processed_buffers); + } + if (popular) + print_stats(1, "Popularity Preamble", _cur_popular_preamble_time_ms); + if (parallel) { + if (popular) { + print_par_stats(2, "Update RS (Start)", _pop_par_last_update_rs_start_times_ms, false); + print_par_stats(2, "Update RS", _pop_par_last_update_rs_times_ms); + if (G1RSBarrierUseQueue) + print_par_buffers(3, "Processed Buffers", + _pop_par_last_update_rs_processed_buffers, true); + print_par_stats(2, "Scan RS", _pop_par_last_scan_rs_times_ms); + print_par_stats(2, "Closure app", _pop_par_last_closure_app_times_ms); + print_stats(2, "Evacuation", _cur_popular_evac_time_ms); + print_stats(2, "Other", pop_other_time); + } + if (!abandoned) { + print_stats(1, "Parallel Time", _cur_collection_par_time_ms); + if (!popular) { + print_par_stats(2, "Update RS (Start)", _par_last_update_rs_start_times_ms, false); + print_par_stats(2, "Update RS", _par_last_update_rs_times_ms); + if (G1RSBarrierUseQueue) + print_par_buffers(3, "Processed Buffers", + _par_last_update_rs_processed_buffers, true); + } + print_par_stats(2, "Ext Root Scanning", _par_last_ext_root_scan_times_ms); + print_par_stats(2, "Mark Stack Scanning", _par_last_mark_stack_scan_times_ms); + print_par_stats(2, "Scan-Only Scanning", _par_last_scan_only_times_ms); + print_par_buffers(3, "Scan-Only Regions", + _par_last_scan_only_regions_scanned, true); + print_par_stats(2, "Scan RS", _par_last_scan_rs_times_ms); + print_par_stats(2, "Object Copy", _par_last_obj_copy_times_ms); + print_par_stats(2, "Termination", _par_last_termination_times_ms); + print_stats(2, "Other", parallel_other_time); + print_stats(1, "Clear CT", _cur_clear_ct_time_ms); + } + } else { + if (popular) { + print_stats(2, "Update RS", pop_update_rs_time); + if (G1RSBarrierUseQueue) + print_stats(3, "Processed Buffers", + (int)pop_update_rs_processed_buffers); + print_stats(2, "Scan RS", pop_scan_rs_time); + print_stats(2, "Closure App", pop_closure_app_time); + print_stats(2, "Evacuation", _cur_popular_evac_time_ms); + print_stats(2, "Other", pop_other_time); + } + if (!abandoned) { + if (!popular) { + print_stats(1, "Update RS", update_rs_time); + if (G1RSBarrierUseQueue) + print_stats(2, "Processed Buffers", + (int)update_rs_processed_buffers); + } + print_stats(1, "Ext Root Scanning", ext_root_scan_time); + print_stats(1, "Mark Stack Scanning", mark_stack_scan_time); + print_stats(1, "Scan-Only Scanning", scan_only_time); + print_stats(1, "Scan RS", scan_rs_time); + print_stats(1, "Object Copying", obj_copy_time); + } + } + print_stats(1, "Other", other_time_ms); + for (int i = 0; i < _aux_num; ++i) { + if (_cur_aux_times_set[i]) { + char buffer[96]; + sprintf(buffer, "Aux%d", i); + print_stats(1, buffer, _cur_aux_times_ms[i]); + } + } + } + if (PrintGCDetails) + gclog_or_tty->print(" ["); + if (PrintGC || PrintGCDetails) + _g1->print_size_transition(gclog_or_tty, + _cur_collection_pause_used_at_start_bytes, + _g1->used(), _g1->capacity()); + if (PrintGCDetails) + gclog_or_tty->print_cr("]"); + + _all_pause_times_ms->add(elapsed_ms); + summary->record_total_time_ms(elapsed_ms); + summary->record_other_time_ms(other_time_ms); + for (int i = 0; i < _aux_num; ++i) + if (_cur_aux_times_set[i]) + _all_aux_times_ms[i].add(_cur_aux_times_ms[i]); + + // Reset marks-between-pauses counter. + _n_marks_since_last_pause = 0; + + // Update the efficiency-since-mark vars. + double proc_ms = elapsed_ms * (double) _parallel_gc_threads; + if (elapsed_ms < MIN_TIMER_GRANULARITY) { + // This usually happens due to the timer not having the required + // granularity. Some Linuxes are the usual culprits. + // We'll just set it to something (arbitrarily) small. + proc_ms = 1.0; + } + double cur_efficiency = (double) freed_bytes / proc_ms; + + bool new_in_marking_window = _in_marking_window; + bool new_in_marking_window_im = false; + if (_should_initiate_conc_mark) { + new_in_marking_window = true; + new_in_marking_window_im = true; + } + + if (in_young_gc_mode()) { + if (_last_full_young_gc) { + set_full_young_gcs(false); + _last_full_young_gc = false; + } + + if ( !_last_young_gc_full ) { + if ( _should_revert_to_full_young_gcs || + _known_garbage_ratio < 0.05 || + (adaptive_young_list_length() && + (get_gc_eff_factor() * cur_efficiency < predict_young_gc_eff())) ) { + set_full_young_gcs(true); + } + } + _should_revert_to_full_young_gcs = false; + + if (_last_young_gc_full && !_during_marking) + _young_gc_eff_seq->add(cur_efficiency); + } + + _short_lived_surv_rate_group->start_adding_regions(); + // do that for any other surv rate groupsx + + // + + if (!popular && !abandoned) { + double pause_time_ms = elapsed_ms; + + size_t diff = 0; + if (_max_pending_cards >= _pending_cards) + diff = _max_pending_cards - _pending_cards; + _pending_card_diff_seq->add((double) diff); + + double cost_per_card_ms = 0.0; + if (_pending_cards > 0) { + cost_per_card_ms = update_rs_time / (double) _pending_cards; + _cost_per_card_ms_seq->add(cost_per_card_ms); + } + + double cost_per_scan_only_region_ms = 0.0; + if (scan_only_regions_scanned > 0.0) { + cost_per_scan_only_region_ms = + scan_only_time / scan_only_regions_scanned; + if (_in_marking_window_im) + _cost_per_scan_only_region_ms_during_cm_seq->add(cost_per_scan_only_region_ms); + else + _cost_per_scan_only_region_ms_seq->add(cost_per_scan_only_region_ms); + } + + size_t cards_scanned = _g1->cards_scanned(); + + double cost_per_entry_ms = 0.0; + if (cards_scanned > 10) { + cost_per_entry_ms = scan_rs_time / (double) cards_scanned; + if (_last_young_gc_full) + _cost_per_entry_ms_seq->add(cost_per_entry_ms); + else + _partially_young_cost_per_entry_ms_seq->add(cost_per_entry_ms); + } + + if (_max_rs_lengths > 0) { + double cards_per_entry_ratio = + (double) cards_scanned / (double) _max_rs_lengths; + if (_last_young_gc_full) + _fully_young_cards_per_entry_ratio_seq->add(cards_per_entry_ratio); + else + _partially_young_cards_per_entry_ratio_seq->add(cards_per_entry_ratio); + } + + size_t rs_length_diff = _max_rs_lengths - _recorded_rs_lengths; + if (rs_length_diff >= 0) + _rs_length_diff_seq->add((double) rs_length_diff); + + size_t copied_bytes = surviving_bytes; + double cost_per_byte_ms = 0.0; + if (copied_bytes > 0) { + cost_per_byte_ms = obj_copy_time / (double) copied_bytes; + if (_in_marking_window) + _cost_per_byte_ms_during_cm_seq->add(cost_per_byte_ms); + else + _cost_per_byte_ms_seq->add(cost_per_byte_ms); + } + + double all_other_time_ms = pause_time_ms - + (update_rs_time + scan_only_time + scan_rs_time + obj_copy_time + + _mark_closure_time_ms + termination_time); + + double young_other_time_ms = 0.0; + if (_recorded_young_regions > 0) { + young_other_time_ms = + _recorded_young_cset_choice_time_ms + + _recorded_young_free_cset_time_ms; + _young_other_cost_per_region_ms_seq->add(young_other_time_ms / + (double) _recorded_young_regions); + } + double non_young_other_time_ms = 0.0; + if (_recorded_non_young_regions > 0) { + non_young_other_time_ms = + _recorded_non_young_cset_choice_time_ms + + _recorded_non_young_free_cset_time_ms; + + _non_young_other_cost_per_region_ms_seq->add(non_young_other_time_ms / + (double) _recorded_non_young_regions); + } + + double constant_other_time_ms = all_other_time_ms - + (young_other_time_ms + non_young_other_time_ms); + _constant_other_time_ms_seq->add(constant_other_time_ms); + + double survival_ratio = 0.0; + if (_bytes_in_collection_set_before_gc > 0) { + survival_ratio = (double) bytes_in_to_space_during_gc() / + (double) _bytes_in_collection_set_before_gc; + } + + _pending_cards_seq->add((double) _pending_cards); + _scanned_cards_seq->add((double) cards_scanned); + _rs_lengths_seq->add((double) _max_rs_lengths); + + double expensive_region_limit_ms = + (double) G1MaxPauseTimeMS - predict_constant_other_time_ms(); + if (expensive_region_limit_ms < 0.0) { + // this means that the other time was predicted to be longer than + // than the max pause time + expensive_region_limit_ms = (double) G1MaxPauseTimeMS; + } + _expensive_region_limit_ms = expensive_region_limit_ms; + + if (PREDICTIONS_VERBOSE) { + gclog_or_tty->print_cr(""); + gclog_or_tty->print_cr("PREDICTIONS %1.4lf %d " + "REGIONS %d %d %d %d " + "PENDING_CARDS %d %d " + "CARDS_SCANNED %d %d " + "RS_LENGTHS %d %d " + "SCAN_ONLY_SCAN %1.6lf %1.6lf " + "RS_UPDATE %1.6lf %1.6lf RS_SCAN %1.6lf %1.6lf " + "SURVIVAL_RATIO %1.6lf %1.6lf " + "OBJECT_COPY %1.6lf %1.6lf OTHER_CONSTANT %1.6lf %1.6lf " + "OTHER_YOUNG %1.6lf %1.6lf " + "OTHER_NON_YOUNG %1.6lf %1.6lf " + "VTIME_DIFF %1.6lf TERMINATION %1.6lf " + "ELAPSED %1.6lf %1.6lf ", + _cur_collection_start_sec, + (!_last_young_gc_full) ? 2 : + (last_pause_included_initial_mark) ? 1 : 0, + _recorded_region_num, + _recorded_young_regions, + _recorded_scan_only_regions, + _recorded_non_young_regions, + _predicted_pending_cards, _pending_cards, + _predicted_cards_scanned, cards_scanned, + _predicted_rs_lengths, _max_rs_lengths, + _predicted_scan_only_scan_time_ms, scan_only_time, + _predicted_rs_update_time_ms, update_rs_time, + _predicted_rs_scan_time_ms, scan_rs_time, + _predicted_survival_ratio, survival_ratio, + _predicted_object_copy_time_ms, obj_copy_time, + _predicted_constant_other_time_ms, constant_other_time_ms, + _predicted_young_other_time_ms, young_other_time_ms, + _predicted_non_young_other_time_ms, + non_young_other_time_ms, + _vtime_diff_ms, termination_time, + _predicted_pause_time_ms, elapsed_ms); + } + + if (G1PolicyVerbose > 0) { + gclog_or_tty->print_cr("Pause Time, predicted: %1.4lfms (predicted %s), actual: %1.4lfms", + _predicted_pause_time_ms, + (_within_target) ? "within" : "outside", + elapsed_ms); + } + + } + + _in_marking_window = new_in_marking_window; + _in_marking_window_im = new_in_marking_window_im; + _free_regions_at_end_of_collection = _g1->free_regions(); + _scan_only_regions_at_end_of_collection = _g1->young_list_length(); + calculate_young_list_min_length(); + calculate_young_list_target_config(); + + // + + _target_pause_time_ms = -1.0; + + // TODO: calculate tenuring threshold + _tenuring_threshold = MaxTenuringThreshold; +} + +// + +double +G1CollectorPolicy:: +predict_young_collection_elapsed_time_ms(size_t adjustment) { + guarantee( adjustment == 0 || adjustment == 1, "invariant" ); + + G1CollectedHeap* g1h = G1CollectedHeap::heap(); + size_t young_num = g1h->young_list_length(); + if (young_num == 0) + return 0.0; + + young_num += adjustment; + size_t pending_cards = predict_pending_cards(); + size_t rs_lengths = g1h->young_list_sampled_rs_lengths() + + predict_rs_length_diff(); + size_t card_num; + if (full_young_gcs()) + card_num = predict_young_card_num(rs_lengths); + else + card_num = predict_non_young_card_num(rs_lengths); + size_t young_byte_size = young_num * HeapRegion::GrainBytes; + double accum_yg_surv_rate = + _short_lived_surv_rate_group->accum_surv_rate(adjustment); + + size_t bytes_to_copy = + (size_t) (accum_yg_surv_rate * (double) HeapRegion::GrainBytes); + + return + predict_rs_update_time_ms(pending_cards) + + predict_rs_scan_time_ms(card_num) + + predict_object_copy_time_ms(bytes_to_copy) + + predict_young_other_time_ms(young_num) + + predict_constant_other_time_ms(); +} + +double +G1CollectorPolicy::predict_base_elapsed_time_ms(size_t pending_cards) { + size_t rs_length = predict_rs_length_diff(); + size_t card_num; + if (full_young_gcs()) + card_num = predict_young_card_num(rs_length); + else + card_num = predict_non_young_card_num(rs_length); + return predict_base_elapsed_time_ms(pending_cards, card_num); +} + +double +G1CollectorPolicy::predict_base_elapsed_time_ms(size_t pending_cards, + size_t scanned_cards) { + return + predict_rs_update_time_ms(pending_cards) + + predict_rs_scan_time_ms(scanned_cards) + + predict_constant_other_time_ms(); +} + +double +G1CollectorPolicy::predict_region_elapsed_time_ms(HeapRegion* hr, + bool young) { + size_t rs_length = hr->rem_set()->occupied(); + size_t card_num; + if (full_young_gcs()) + card_num = predict_young_card_num(rs_length); + else + card_num = predict_non_young_card_num(rs_length); + size_t bytes_to_copy = predict_bytes_to_copy(hr); + + double region_elapsed_time_ms = + predict_rs_scan_time_ms(card_num) + + predict_object_copy_time_ms(bytes_to_copy); + + if (young) + region_elapsed_time_ms += predict_young_other_time_ms(1); + else + region_elapsed_time_ms += predict_non_young_other_time_ms(1); + + return region_elapsed_time_ms; +} + +size_t +G1CollectorPolicy::predict_bytes_to_copy(HeapRegion* hr) { + size_t bytes_to_copy; + if (hr->is_marked()) + bytes_to_copy = hr->max_live_bytes(); + else { + guarantee( hr->is_young() && hr->age_in_surv_rate_group() != -1, + "invariant" ); + int age = hr->age_in_surv_rate_group(); + double yg_surv_rate = predict_yg_surv_rate(age); + bytes_to_copy = (size_t) ((double) hr->used() * yg_surv_rate); + } + + return bytes_to_copy; +} + +void +G1CollectorPolicy::start_recording_regions() { + _recorded_rs_lengths = 0; + _recorded_scan_only_regions = 0; + _recorded_young_regions = 0; + _recorded_non_young_regions = 0; + +#if PREDICTIONS_VERBOSE + _predicted_rs_lengths = 0; + _predicted_cards_scanned = 0; + + _recorded_marked_bytes = 0; + _recorded_young_bytes = 0; + _predicted_bytes_to_copy = 0; +#endif // PREDICTIONS_VERBOSE +} + +void +G1CollectorPolicy::record_cset_region(HeapRegion* hr, bool young) { + if (young) { + ++_recorded_young_regions; + } else { + ++_recorded_non_young_regions; + } +#if PREDICTIONS_VERBOSE + if (young) { + _recorded_young_bytes += hr->asSpace()->used(); + } else { + _recorded_marked_bytes += hr->max_live_bytes(); + } + _predicted_bytes_to_copy += predict_bytes_to_copy(hr); +#endif // PREDICTIONS_VERBOSE + + size_t rs_length = hr->rem_set()->occupied(); + _recorded_rs_lengths += rs_length; +} + +void +G1CollectorPolicy::record_scan_only_regions(size_t scan_only_length) { + _recorded_scan_only_regions = scan_only_length; +} + +void +G1CollectorPolicy::end_recording_regions() { +#if PREDICTIONS_VERBOSE + _predicted_pending_cards = predict_pending_cards(); + _predicted_rs_lengths = _recorded_rs_lengths + predict_rs_length_diff(); + if (full_young_gcs()) + _predicted_cards_scanned += predict_young_card_num(_predicted_rs_lengths); + else + _predicted_cards_scanned += + predict_non_young_card_num(_predicted_rs_lengths); + _recorded_region_num = _recorded_young_regions + _recorded_non_young_regions; + + _predicted_young_survival_ratio = 0.0; + for (int i = 0; i < _recorded_young_regions; ++i) + _predicted_young_survival_ratio += predict_yg_surv_rate(i); + _predicted_young_survival_ratio /= (double) _recorded_young_regions; + + _predicted_scan_only_scan_time_ms = + predict_scan_only_time_ms(_recorded_scan_only_regions); + _predicted_rs_update_time_ms = + predict_rs_update_time_ms(_g1->pending_card_num()); + _predicted_rs_scan_time_ms = + predict_rs_scan_time_ms(_predicted_cards_scanned); + _predicted_object_copy_time_ms = + predict_object_copy_time_ms(_predicted_bytes_to_copy); + _predicted_constant_other_time_ms = + predict_constant_other_time_ms(); + _predicted_young_other_time_ms = + predict_young_other_time_ms(_recorded_young_regions); + _predicted_non_young_other_time_ms = + predict_non_young_other_time_ms(_recorded_non_young_regions); + + _predicted_pause_time_ms = + _predicted_scan_only_scan_time_ms + + _predicted_rs_update_time_ms + + _predicted_rs_scan_time_ms + + _predicted_object_copy_time_ms + + _predicted_constant_other_time_ms + + _predicted_young_other_time_ms + + _predicted_non_young_other_time_ms; +#endif // PREDICTIONS_VERBOSE +} + +void G1CollectorPolicy::check_if_region_is_too_expensive(double + predicted_time_ms) { + // I don't think we need to do this when in young GC mode since + // marking will be initiated next time we hit the soft limit anyway... + if (predicted_time_ms > _expensive_region_limit_ms) { + if (!in_young_gc_mode()) { + set_full_young_gcs(true); + _should_initiate_conc_mark = true; + } else + // no point in doing another partial one + _should_revert_to_full_young_gcs = true; + } +} + +// + + +void G1CollectorPolicy::update_recent_gc_times(double end_time_sec, + double elapsed_ms) { + _recent_gc_times_ms->add(elapsed_ms); + _recent_prev_end_times_for_all_gcs_sec->add(end_time_sec); + _prev_collection_pause_end_ms = end_time_sec * 1000.0; +} + +double G1CollectorPolicy::recent_avg_time_for_pauses_ms() { + if (_recent_pause_times_ms->num() == 0) return (double) G1MaxPauseTimeMS; + else return _recent_pause_times_ms->avg(); +} + +double G1CollectorPolicy::recent_avg_time_for_CH_strong_ms() { + if (_recent_CH_strong_roots_times_ms->num() == 0) + return (double)G1MaxPauseTimeMS/3.0; + else return _recent_CH_strong_roots_times_ms->avg(); +} + +double G1CollectorPolicy::recent_avg_time_for_G1_strong_ms() { + if (_recent_G1_strong_roots_times_ms->num() == 0) + return (double)G1MaxPauseTimeMS/3.0; + else return _recent_G1_strong_roots_times_ms->avg(); +} + +double G1CollectorPolicy::recent_avg_time_for_evac_ms() { + if (_recent_evac_times_ms->num() == 0) return (double)G1MaxPauseTimeMS/3.0; + else return _recent_evac_times_ms->avg(); +} + +int G1CollectorPolicy::number_of_recent_gcs() { + assert(_recent_CH_strong_roots_times_ms->num() == + _recent_G1_strong_roots_times_ms->num(), "Sequence out of sync"); + assert(_recent_G1_strong_roots_times_ms->num() == + _recent_evac_times_ms->num(), "Sequence out of sync"); + assert(_recent_evac_times_ms->num() == + _recent_pause_times_ms->num(), "Sequence out of sync"); + assert(_recent_pause_times_ms->num() == + _recent_CS_bytes_used_before->num(), "Sequence out of sync"); + assert(_recent_CS_bytes_used_before->num() == + _recent_CS_bytes_surviving->num(), "Sequence out of sync"); + return _recent_pause_times_ms->num(); +} + +double G1CollectorPolicy::recent_avg_survival_fraction() { + return recent_avg_survival_fraction_work(_recent_CS_bytes_surviving, + _recent_CS_bytes_used_before); +} + +double G1CollectorPolicy::last_survival_fraction() { + return last_survival_fraction_work(_recent_CS_bytes_surviving, + _recent_CS_bytes_used_before); +} + +double +G1CollectorPolicy::recent_avg_survival_fraction_work(TruncatedSeq* surviving, + TruncatedSeq* before) { + assert(surviving->num() == before->num(), "Sequence out of sync"); + if (before->sum() > 0.0) { + double recent_survival_rate = surviving->sum() / before->sum(); + // We exempt parallel collection from this check because Alloc Buffer + // fragmentation can produce negative collections. + // Further, we're now always doing parallel collection. But I'm still + // leaving this here as a placeholder for a more precise assertion later. + // (DLD, 10/05.) + assert((true || ParallelGCThreads > 0) || + _g1->evacuation_failed() || + recent_survival_rate <= 1.0, "Or bad frac"); + return recent_survival_rate; + } else { + return 1.0; // Be conservative. + } +} + +double +G1CollectorPolicy::last_survival_fraction_work(TruncatedSeq* surviving, + TruncatedSeq* before) { + assert(surviving->num() == before->num(), "Sequence out of sync"); + if (surviving->num() > 0 && before->last() > 0.0) { + double last_survival_rate = surviving->last() / before->last(); + // We exempt parallel collection from this check because Alloc Buffer + // fragmentation can produce negative collections. + // Further, we're now always doing parallel collection. But I'm still + // leaving this here as a placeholder for a more precise assertion later. + // (DLD, 10/05.) + assert((true || ParallelGCThreads > 0) || + last_survival_rate <= 1.0, "Or bad frac"); + return last_survival_rate; + } else { + return 1.0; + } +} + +static const int survival_min_obs = 5; +static double survival_min_obs_limits[] = { 0.9, 0.7, 0.5, 0.3, 0.1 }; +static const double min_survival_rate = 0.1; + +double +G1CollectorPolicy::conservative_avg_survival_fraction_work(double avg, + double latest) { + double res = avg; + if (number_of_recent_gcs() < survival_min_obs) { + res = MAX2(res, survival_min_obs_limits[number_of_recent_gcs()]); + } + res = MAX2(res, latest); + res = MAX2(res, min_survival_rate); + // In the parallel case, LAB fragmentation can produce "negative + // collections"; so can evac failure. Cap at 1.0 + res = MIN2(res, 1.0); + return res; +} + +size_t G1CollectorPolicy::expansion_amount() { + if ((int)(recent_avg_pause_time_ratio() * 100.0) > G1GCPct) { + // We will double the existing space, or take G1ExpandByPctOfAvail % of + // the available expansion space, whichever is smaller, bounded below + // by a minimum expansion (unless that's all that's left.) + const size_t min_expand_bytes = 1*M; + size_t reserved_bytes = _g1->g1_reserved_obj_bytes(); + size_t committed_bytes = _g1->capacity(); + size_t uncommitted_bytes = reserved_bytes - committed_bytes; + size_t expand_bytes; + size_t expand_bytes_via_pct = + uncommitted_bytes * G1ExpandByPctOfAvail / 100; + expand_bytes = MIN2(expand_bytes_via_pct, committed_bytes); + expand_bytes = MAX2(expand_bytes, min_expand_bytes); + expand_bytes = MIN2(expand_bytes, uncommitted_bytes); + if (G1PolicyVerbose > 1) { + gclog_or_tty->print("Decided to expand: ratio = %5.2f, " + "committed = %d%s, uncommited = %d%s, via pct = %d%s.\n" + " Answer = %d.\n", + recent_avg_pause_time_ratio(), + byte_size_in_proper_unit(committed_bytes), + proper_unit_for_byte_size(committed_bytes), + byte_size_in_proper_unit(uncommitted_bytes), + proper_unit_for_byte_size(uncommitted_bytes), + byte_size_in_proper_unit(expand_bytes_via_pct), + proper_unit_for_byte_size(expand_bytes_via_pct), + byte_size_in_proper_unit(expand_bytes), + proper_unit_for_byte_size(expand_bytes)); + } + return expand_bytes; + } else { + return 0; + } +} + +void G1CollectorPolicy::note_start_of_mark_thread() { + _mark_thread_startup_sec = os::elapsedTime(); +} + +class CountCSClosure: public HeapRegionClosure { + G1CollectorPolicy* _g1_policy; +public: + CountCSClosure(G1CollectorPolicy* g1_policy) : + _g1_policy(g1_policy) {} + bool doHeapRegion(HeapRegion* r) { + _g1_policy->_bytes_in_collection_set_before_gc += r->used(); + return false; + } +}; + +void G1CollectorPolicy::count_CS_bytes_used() { + CountCSClosure cs_closure(this); + _g1->collection_set_iterate(&cs_closure); +} + +static void print_indent(int level) { + for (int j = 0; j < level+1; ++j) + gclog_or_tty->print(" "); +} + +void G1CollectorPolicy::print_summary (int level, + const char* str, + NumberSeq* seq) const { + double sum = seq->sum(); + print_indent(level); + gclog_or_tty->print_cr("%-24s = %8.2lf s (avg = %8.2lf ms)", + str, sum / 1000.0, seq->avg()); +} + +void G1CollectorPolicy::print_summary_sd (int level, + const char* str, + NumberSeq* seq) const { + print_summary(level, str, seq); + print_indent(level + 5); + gclog_or_tty->print_cr("(num = %5d, std dev = %8.2lf ms, max = %8.2lf ms)", + seq->num(), seq->sd(), seq->maximum()); +} + +void G1CollectorPolicy::check_other_times(int level, + NumberSeq* other_times_ms, + NumberSeq* calc_other_times_ms) const { + bool should_print = false; + + double max_sum = MAX2(fabs(other_times_ms->sum()), + fabs(calc_other_times_ms->sum())); + double min_sum = MIN2(fabs(other_times_ms->sum()), + fabs(calc_other_times_ms->sum())); + double sum_ratio = max_sum / min_sum; + if (sum_ratio > 1.1) { + should_print = true; + print_indent(level + 1); + gclog_or_tty->print_cr("## CALCULATED OTHER SUM DOESN'T MATCH RECORDED ###"); + } + + double max_avg = MAX2(fabs(other_times_ms->avg()), + fabs(calc_other_times_ms->avg())); + double min_avg = MIN2(fabs(other_times_ms->avg()), + fabs(calc_other_times_ms->avg())); + double avg_ratio = max_avg / min_avg; + if (avg_ratio > 1.1) { + should_print = true; + print_indent(level + 1); + gclog_or_tty->print_cr("## CALCULATED OTHER AVG DOESN'T MATCH RECORDED ###"); + } + + if (other_times_ms->sum() < -0.01) { + print_indent(level + 1); + gclog_or_tty->print_cr("## RECORDED OTHER SUM IS NEGATIVE ###"); + } + + if (other_times_ms->avg() < -0.01) { + print_indent(level + 1); + gclog_or_tty->print_cr("## RECORDED OTHER AVG IS NEGATIVE ###"); + } + + if (calc_other_times_ms->sum() < -0.01) { + should_print = true; + print_indent(level + 1); + gclog_or_tty->print_cr("## CALCULATED OTHER SUM IS NEGATIVE ###"); + } + + if (calc_other_times_ms->avg() < -0.01) { + should_print = true; + print_indent(level + 1); + gclog_or_tty->print_cr("## CALCULATED OTHER AVG IS NEGATIVE ###"); + } + + if (should_print) + print_summary(level, "Other(Calc)", calc_other_times_ms); +} + +void G1CollectorPolicy::print_summary(PauseSummary* summary) const { + bool parallel = ParallelGCThreads > 0; + MainBodySummary* body_summary = summary->main_body_summary(); + PopPreambleSummary* preamble_summary = summary->pop_preamble_summary(); + + if (summary->get_total_seq()->num() > 0) { + print_summary_sd(0, + (preamble_summary == NULL) ? "Non-Popular Pauses" : + "Popular Pauses", + summary->get_total_seq()); + if (preamble_summary != NULL) { + print_summary(1, "Popularity Preamble", + preamble_summary->get_pop_preamble_seq()); + print_summary(2, "Update RS", preamble_summary->get_pop_update_rs_seq()); + print_summary(2, "Scan RS", preamble_summary->get_pop_scan_rs_seq()); + print_summary(2, "Closure App", + preamble_summary->get_pop_closure_app_seq()); + print_summary(2, "Evacuation", + preamble_summary->get_pop_evacuation_seq()); + print_summary(2, "Other", preamble_summary->get_pop_other_seq()); + { + NumberSeq* other_parts[] = { + preamble_summary->get_pop_update_rs_seq(), + preamble_summary->get_pop_scan_rs_seq(), + preamble_summary->get_pop_closure_app_seq(), + preamble_summary->get_pop_evacuation_seq() + }; + NumberSeq calc_other_times_ms(preamble_summary->get_pop_preamble_seq(), + 4, other_parts); + check_other_times(2, preamble_summary->get_pop_other_seq(), + &calc_other_times_ms); + } + } + if (body_summary != NULL) { + print_summary(1, "SATB Drain", body_summary->get_satb_drain_seq()); + if (parallel) { + print_summary(1, "Parallel Time", body_summary->get_parallel_seq()); + print_summary(2, "Update RS", body_summary->get_update_rs_seq()); + print_summary(2, "Ext Root Scanning", + body_summary->get_ext_root_scan_seq()); + print_summary(2, "Mark Stack Scanning", + body_summary->get_mark_stack_scan_seq()); + print_summary(2, "Scan-Only Scanning", + body_summary->get_scan_only_seq()); + print_summary(2, "Scan RS", body_summary->get_scan_rs_seq()); + print_summary(2, "Object Copy", body_summary->get_obj_copy_seq()); + print_summary(2, "Termination", body_summary->get_termination_seq()); + print_summary(2, "Other", body_summary->get_parallel_other_seq()); + { + NumberSeq* other_parts[] = { + body_summary->get_update_rs_seq(), + body_summary->get_ext_root_scan_seq(), + body_summary->get_mark_stack_scan_seq(), + body_summary->get_scan_only_seq(), + body_summary->get_scan_rs_seq(), + body_summary->get_obj_copy_seq(), + body_summary->get_termination_seq() + }; + NumberSeq calc_other_times_ms(body_summary->get_parallel_seq(), + 7, other_parts); + check_other_times(2, body_summary->get_parallel_other_seq(), + &calc_other_times_ms); + } + print_summary(1, "Mark Closure", body_summary->get_mark_closure_seq()); + print_summary(1, "Clear CT", body_summary->get_clear_ct_seq()); + } else { + print_summary(1, "Update RS", body_summary->get_update_rs_seq()); + print_summary(1, "Ext Root Scanning", + body_summary->get_ext_root_scan_seq()); + print_summary(1, "Mark Stack Scanning", + body_summary->get_mark_stack_scan_seq()); + print_summary(1, "Scan-Only Scanning", + body_summary->get_scan_only_seq()); + print_summary(1, "Scan RS", body_summary->get_scan_rs_seq()); + print_summary(1, "Object Copy", body_summary->get_obj_copy_seq()); + } + } + print_summary(1, "Other", summary->get_other_seq()); + { + NumberSeq calc_other_times_ms; + if (body_summary != NULL) { + // not abandoned + if (parallel) { + // parallel + NumberSeq* other_parts[] = { + body_summary->get_satb_drain_seq(), + (preamble_summary == NULL) ? NULL : + preamble_summary->get_pop_preamble_seq(), + body_summary->get_parallel_seq(), + body_summary->get_clear_ct_seq() + }; + calc_other_times_ms = NumberSeq (summary->get_total_seq(), + 4, other_parts); + } else { + // serial + NumberSeq* other_parts[] = { + body_summary->get_satb_drain_seq(), + (preamble_summary == NULL) ? NULL : + preamble_summary->get_pop_preamble_seq(), + body_summary->get_update_rs_seq(), + body_summary->get_ext_root_scan_seq(), + body_summary->get_mark_stack_scan_seq(), + body_summary->get_scan_only_seq(), + body_summary->get_scan_rs_seq(), + body_summary->get_obj_copy_seq() + }; + calc_other_times_ms = NumberSeq(summary->get_total_seq(), + 8, other_parts); + } + } else { + // abandoned + NumberSeq* other_parts[] = { + (preamble_summary == NULL) ? NULL : + preamble_summary->get_pop_preamble_seq() + }; + calc_other_times_ms = NumberSeq(summary->get_total_seq(), + 1, other_parts); + } + check_other_times(1, summary->get_other_seq(), &calc_other_times_ms); + } + } else { + print_indent(0); + gclog_or_tty->print_cr("none"); + } + gclog_or_tty->print_cr(""); +} + +void +G1CollectorPolicy::print_abandoned_summary(PauseSummary* non_pop_summary, + PauseSummary* pop_summary) const { + bool printed = false; + if (non_pop_summary->get_total_seq()->num() > 0) { + printed = true; + print_summary(non_pop_summary); + } + if (pop_summary->get_total_seq()->num() > 0) { + printed = true; + print_summary(pop_summary); + } + + if (!printed) { + print_indent(0); + gclog_or_tty->print_cr("none"); + gclog_or_tty->print_cr(""); + } +} + +void G1CollectorPolicy::print_tracing_info() const { + if (TraceGen0Time) { + gclog_or_tty->print_cr("ALL PAUSES"); + print_summary_sd(0, "Total", _all_pause_times_ms); + gclog_or_tty->print_cr(""); + gclog_or_tty->print_cr(""); + gclog_or_tty->print_cr(" Full Young GC Pauses: %8d", _full_young_pause_num); + gclog_or_tty->print_cr(" Partial Young GC Pauses: %8d", _partial_young_pause_num); + gclog_or_tty->print_cr(""); + + gclog_or_tty->print_cr("NON-POPULAR PAUSES"); + print_summary(_non_pop_summary); + + gclog_or_tty->print_cr("POPULAR PAUSES"); + print_summary(_pop_summary); + + gclog_or_tty->print_cr("ABANDONED PAUSES"); + print_abandoned_summary(_non_pop_abandoned_summary, + _pop_abandoned_summary); + + gclog_or_tty->print_cr("MISC"); + print_summary_sd(0, "Stop World", _all_stop_world_times_ms); + print_summary_sd(0, "Yields", _all_yield_times_ms); + for (int i = 0; i < _aux_num; ++i) { + if (_all_aux_times_ms[i].num() > 0) { + char buffer[96]; + sprintf(buffer, "Aux%d", i); + print_summary_sd(0, buffer, &_all_aux_times_ms[i]); + } + } + + size_t all_region_num = _region_num_young + _region_num_tenured; + gclog_or_tty->print_cr(" New Regions %8d, Young %8d (%6.2lf%%), " + "Tenured %8d (%6.2lf%%)", + all_region_num, + _region_num_young, + (double) _region_num_young / (double) all_region_num * 100.0, + _region_num_tenured, + (double) _region_num_tenured / (double) all_region_num * 100.0); + + if (!G1RSBarrierUseQueue) { + gclog_or_tty->print_cr("Of %d times conc refinement was enabled, %d (%7.2f%%) " + "did zero traversals.", + _conc_refine_enabled, _conc_refine_zero_traversals, + _conc_refine_enabled > 0 ? + 100.0 * (float)_conc_refine_zero_traversals/ + (float)_conc_refine_enabled : 0.0); + gclog_or_tty->print_cr(" Max # of traversals = %d.", + _conc_refine_max_traversals); + gclog_or_tty->print_cr(""); + } + } + if (TraceGen1Time) { + if (_all_full_gc_times_ms->num() > 0) { + gclog_or_tty->print("\n%4d full_gcs: total time = %8.2f s", + _all_full_gc_times_ms->num(), + _all_full_gc_times_ms->sum() / 1000.0); + gclog_or_tty->print_cr(" (avg = %8.2fms).", _all_full_gc_times_ms->avg()); + gclog_or_tty->print_cr(" [std. dev = %8.2f ms, max = %8.2f ms]", + _all_full_gc_times_ms->sd(), + _all_full_gc_times_ms->maximum()); + } + } +} + +void G1CollectorPolicy::print_yg_surv_rate_info() const { +#ifndef PRODUCT + _short_lived_surv_rate_group->print_surv_rate_summary(); + // add this call for any other surv rate groups +#endif // PRODUCT +} + +void G1CollectorPolicy::update_conc_refine_data() { + unsigned traversals = _g1->concurrent_g1_refine()->disable(); + if (traversals == 0) _conc_refine_zero_traversals++; + _conc_refine_max_traversals = MAX2(_conc_refine_max_traversals, + (size_t)traversals); + + if (G1PolicyVerbose > 1) + gclog_or_tty->print_cr("Did a CR traversal series: %d traversals.", traversals); + double multiplier = 1.0; + if (traversals == 0) { + multiplier = 4.0; + } else if (traversals > (size_t)G1ConcRefineTargTraversals) { + multiplier = 1.0/1.5; + } else if (traversals < (size_t)G1ConcRefineTargTraversals) { + multiplier = 1.5; + } + if (G1PolicyVerbose > 1) { + gclog_or_tty->print_cr(" Multiplier = %7.2f.", multiplier); + gclog_or_tty->print(" Delta went from %d regions to ", + _conc_refine_current_delta); + } + _conc_refine_current_delta = + MIN2(_g1->n_regions(), + (size_t)(_conc_refine_current_delta * multiplier)); + _conc_refine_current_delta = + MAX2(_conc_refine_current_delta, (size_t)1); + if (G1PolicyVerbose > 1) { + gclog_or_tty->print_cr("%d regions.", _conc_refine_current_delta); + } + _conc_refine_enabled++; +} + +void G1CollectorPolicy::set_single_region_collection_set(HeapRegion* hr) { + assert(collection_set() == NULL, "Must be no current CS."); + _collection_set_size = 0; + _collection_set_bytes_used_before = 0; + add_to_collection_set(hr); + count_CS_bytes_used(); +} + +bool +G1CollectorPolicy::should_add_next_region_to_young_list() { + assert(in_young_gc_mode(), "should be in young GC mode"); + bool ret; + size_t young_list_length = _g1->young_list_length(); + + if (young_list_length < _young_list_target_length) { + ret = true; + ++_region_num_young; + } else { + ret = false; + ++_region_num_tenured; + } + + return ret; +} + +#ifndef PRODUCT +// for debugging, bit of a hack... +static char* +region_num_to_mbs(int length) { + static char buffer[64]; + double bytes = (double) (length * HeapRegion::GrainBytes); + double mbs = bytes / (double) (1024 * 1024); + sprintf(buffer, "%7.2lfMB", mbs); + return buffer; +} +#endif // PRODUCT + +void +G1CollectorPolicy::checkpoint_conc_overhead() { + double conc_overhead = 0.0; + if (G1AccountConcurrentOverhead) + conc_overhead = COTracker::totalPredConcOverhead(); + _mmu_tracker->update_conc_overhead(conc_overhead); +#if 0 + gclog_or_tty->print(" CO %1.4lf TARGET %1.4lf", + conc_overhead, _mmu_tracker->max_gc_time()); +#endif +} + + +uint G1CollectorPolicy::max_regions(int purpose) { + switch (purpose) { + case GCAllocForSurvived: + return G1MaxSurvivorRegions; + case GCAllocForTenured: + return UINT_MAX; + default: + return UINT_MAX; + }; +} + +void +G1CollectorPolicy_BestRegionsFirst:: +set_single_region_collection_set(HeapRegion* hr) { + G1CollectorPolicy::set_single_region_collection_set(hr); + _collectionSetChooser->removeRegion(hr); +} + + +bool +G1CollectorPolicy_BestRegionsFirst::should_do_collection_pause(size_t + word_size) { + assert(_g1->regions_accounted_for(), "Region leakage!"); + // Initiate a pause when we reach the steady-state "used" target. + size_t used_hard = (_g1->capacity() / 100) * G1SteadyStateUsed; + size_t used_soft = + MAX2((_g1->capacity() / 100) * (G1SteadyStateUsed - G1SteadyStateUsedDelta), + used_hard/2); + size_t used = _g1->used(); + + double max_pause_time_ms = _mmu_tracker->max_gc_time() * 1000.0; + + size_t young_list_length = _g1->young_list_length(); + bool reached_target_length = young_list_length >= _young_list_target_length; + + if (in_young_gc_mode()) { + if (reached_target_length) { + assert( young_list_length > 0 && _g1->young_list_length() > 0, + "invariant" ); + _target_pause_time_ms = max_pause_time_ms; + return true; + } + } else { + guarantee( false, "should not reach here" ); + } + + return false; +} + +#ifndef PRODUCT +class HRSortIndexIsOKClosure: public HeapRegionClosure { + CollectionSetChooser* _chooser; +public: + HRSortIndexIsOKClosure(CollectionSetChooser* chooser) : + _chooser(chooser) {} + + bool doHeapRegion(HeapRegion* r) { + if (!r->continuesHumongous()) { + assert(_chooser->regionProperlyOrdered(r), "Ought to be."); + } + return false; + } +}; + +bool G1CollectorPolicy_BestRegionsFirst::assertMarkedBytesDataOK() { + HRSortIndexIsOKClosure cl(_collectionSetChooser); + _g1->heap_region_iterate(&cl); + return true; +} +#endif + +void +G1CollectorPolicy_BestRegionsFirst:: +record_collection_pause_start(double start_time_sec, size_t start_used) { + G1CollectorPolicy::record_collection_pause_start(start_time_sec, start_used); +} + +class NextNonCSElemFinder: public HeapRegionClosure { + HeapRegion* _res; +public: + NextNonCSElemFinder(): _res(NULL) {} + bool doHeapRegion(HeapRegion* r) { + if (!r->in_collection_set()) { + _res = r; + return true; + } else { + return false; + } + } + HeapRegion* res() { return _res; } +}; + +class KnownGarbageClosure: public HeapRegionClosure { + CollectionSetChooser* _hrSorted; + +public: + KnownGarbageClosure(CollectionSetChooser* hrSorted) : + _hrSorted(hrSorted) + {} + + bool doHeapRegion(HeapRegion* r) { + // We only include humongous regions in collection + // sets when concurrent mark shows that their contained object is + // unreachable. + + // Do we have any marking information for this region? + if (r->is_marked()) { + // We don't include humongous regions in collection + // sets because we collect them immediately at the end of a marking + // cycle. We also don't include young regions because we *must* + // include them in the next collection pause. + if (!r->isHumongous() && !r->is_young()) { + _hrSorted->addMarkedHeapRegion(r); + } + } + return false; + } +}; + +class ParKnownGarbageHRClosure: public HeapRegionClosure { + CollectionSetChooser* _hrSorted; + jint _marked_regions_added; + jint _chunk_size; + jint _cur_chunk_idx; + jint _cur_chunk_end; // Cur chunk [_cur_chunk_idx, _cur_chunk_end) + int _worker; + int _invokes; + + void get_new_chunk() { + _cur_chunk_idx = _hrSorted->getParMarkedHeapRegionChunk(_chunk_size); + _cur_chunk_end = _cur_chunk_idx + _chunk_size; + } + void add_region(HeapRegion* r) { + if (_cur_chunk_idx == _cur_chunk_end) { + get_new_chunk(); + } + assert(_cur_chunk_idx < _cur_chunk_end, "postcondition"); + _hrSorted->setMarkedHeapRegion(_cur_chunk_idx, r); + _marked_regions_added++; + _cur_chunk_idx++; + } + +public: + ParKnownGarbageHRClosure(CollectionSetChooser* hrSorted, + jint chunk_size, + int worker) : + _hrSorted(hrSorted), _chunk_size(chunk_size), _worker(worker), + _marked_regions_added(0), _cur_chunk_idx(0), _cur_chunk_end(0), + _invokes(0) + {} + + bool doHeapRegion(HeapRegion* r) { + // We only include humongous regions in collection + // sets when concurrent mark shows that their contained object is + // unreachable. + _invokes++; + + // Do we have any marking information for this region? + if (r->is_marked()) { + // We don't include humongous regions in collection + // sets because we collect them immediately at the end of a marking + // cycle. + // We also do not include young regions in collection sets + if (!r->isHumongous() && !r->is_young()) { + add_region(r); + } + } + return false; + } + jint marked_regions_added() { return _marked_regions_added; } + int invokes() { return _invokes; } +}; + +class ParKnownGarbageTask: public AbstractGangTask { + CollectionSetChooser* _hrSorted; + jint _chunk_size; + G1CollectedHeap* _g1; +public: + ParKnownGarbageTask(CollectionSetChooser* hrSorted, jint chunk_size) : + AbstractGangTask("ParKnownGarbageTask"), + _hrSorted(hrSorted), _chunk_size(chunk_size), + _g1(G1CollectedHeap::heap()) + {} + + void work(int i) { + ParKnownGarbageHRClosure parKnownGarbageCl(_hrSorted, _chunk_size, i); + // Back to zero for the claim value. + _g1->heap_region_par_iterate_chunked(&parKnownGarbageCl, i, + HeapRegion::InitialClaimValue); + jint regions_added = parKnownGarbageCl.marked_regions_added(); + _hrSorted->incNumMarkedHeapRegions(regions_added); + if (G1PrintParCleanupStats) { + gclog_or_tty->print(" Thread %d called %d times, added %d regions to list.\n", + i, parKnownGarbageCl.invokes(), regions_added); + } + } +}; + +void +G1CollectorPolicy_BestRegionsFirst:: +record_concurrent_mark_cleanup_end(size_t freed_bytes, + size_t max_live_bytes) { + double start; + if (G1PrintParCleanupStats) start = os::elapsedTime(); + record_concurrent_mark_cleanup_end_work1(freed_bytes, max_live_bytes); + + _collectionSetChooser->clearMarkedHeapRegions(); + double clear_marked_end; + if (G1PrintParCleanupStats) { + clear_marked_end = os::elapsedTime(); + gclog_or_tty->print_cr(" clear marked regions + work1: %8.3f ms.", + (clear_marked_end - start)*1000.0); + } + if (ParallelGCThreads > 0) { + const size_t OverpartitionFactor = 4; + const size_t MinChunkSize = 8; + const size_t ChunkSize = + MAX2(_g1->n_regions() / (ParallelGCThreads * OverpartitionFactor), + MinChunkSize); + _collectionSetChooser->prepareForAddMarkedHeapRegionsPar(_g1->n_regions(), + ChunkSize); + ParKnownGarbageTask parKnownGarbageTask(_collectionSetChooser, + (int) ChunkSize); + _g1->workers()->run_task(&parKnownGarbageTask); + + assert(_g1->check_heap_region_claim_values(HeapRegion::InitialClaimValue), + "sanity check"); + } else { + KnownGarbageClosure knownGarbagecl(_collectionSetChooser); + _g1->heap_region_iterate(&knownGarbagecl); + } + double known_garbage_end; + if (G1PrintParCleanupStats) { + known_garbage_end = os::elapsedTime(); + gclog_or_tty->print_cr(" compute known garbage: %8.3f ms.", + (known_garbage_end - clear_marked_end)*1000.0); + } + _collectionSetChooser->sortMarkedHeapRegions(); + double sort_end; + if (G1PrintParCleanupStats) { + sort_end = os::elapsedTime(); + gclog_or_tty->print_cr(" sorting: %8.3f ms.", + (sort_end - known_garbage_end)*1000.0); + } + + record_concurrent_mark_cleanup_end_work2(); + double work2_end; + if (G1PrintParCleanupStats) { + work2_end = os::elapsedTime(); + gclog_or_tty->print_cr(" work2: %8.3f ms.", + (work2_end - sort_end)*1000.0); + } +} + +// Add the heap region to the collection set and return the conservative +// estimate of the number of live bytes. +void G1CollectorPolicy:: +add_to_collection_set(HeapRegion* hr) { + if (G1TraceRegions) { + gclog_or_tty->print_cr("added region to cset %d:["PTR_FORMAT", "PTR_FORMAT"], " + "top "PTR_FORMAT", young %s", + hr->hrs_index(), hr->bottom(), hr->end(), + hr->top(), (hr->is_young()) ? "YES" : "NO"); + } + + if (_g1->mark_in_progress()) + _g1->concurrent_mark()->registerCSetRegion(hr); + + assert(!hr->in_collection_set(), + "should not already be in the CSet"); + hr->set_in_collection_set(true); + hr->set_next_in_collection_set(_collection_set); + _collection_set = hr; + _collection_set_size++; + _collection_set_bytes_used_before += hr->used(); +} + +void +G1CollectorPolicy_BestRegionsFirst:: +choose_collection_set(HeapRegion* pop_region) { + double non_young_start_time_sec; + start_recording_regions(); + + if (pop_region != NULL) { + _target_pause_time_ms = (double) G1MaxPauseTimeMS; + } else { + guarantee(_target_pause_time_ms > -1.0, + "_target_pause_time_ms should have been set!"); + } + + // pop region is either null (and so is CS), or else it *is* the CS. + assert(_collection_set == pop_region, "Precondition"); + + double base_time_ms = predict_base_elapsed_time_ms(_pending_cards); + double predicted_pause_time_ms = base_time_ms; + + double target_time_ms = _target_pause_time_ms; + double time_remaining_ms = target_time_ms - base_time_ms; + + // the 10% and 50% values are arbitrary... + if (time_remaining_ms < 0.10*target_time_ms) { + time_remaining_ms = 0.50 * target_time_ms; + _within_target = false; + } else { + _within_target = true; + } + + // We figure out the number of bytes available for future to-space. + // For new regions without marking information, we must assume the + // worst-case of complete survival. If we have marking information for a + // region, we can bound the amount of live data. We can add a number of + // such regions, as long as the sum of the live data bounds does not + // exceed the available evacuation space. + size_t max_live_bytes = _g1->free_regions() * HeapRegion::GrainBytes; + + size_t expansion_bytes = + _g1->expansion_regions() * HeapRegion::GrainBytes; + + if (pop_region == NULL) { + _collection_set_bytes_used_before = 0; + _collection_set_size = 0; + } + + // Adjust for expansion and slop. + max_live_bytes = max_live_bytes + expansion_bytes; + + assert(pop_region != NULL || _g1->regions_accounted_for(), "Region leakage!"); + + HeapRegion* hr; + if (in_young_gc_mode()) { + double young_start_time_sec = os::elapsedTime(); + + if (G1PolicyVerbose > 0) { + gclog_or_tty->print_cr("Adding %d young regions to the CSet", + _g1->young_list_length()); + } + _young_cset_length = 0; + _last_young_gc_full = full_young_gcs() ? true : false; + if (_last_young_gc_full) + ++_full_young_pause_num; + else + ++_partial_young_pause_num; + hr = _g1->pop_region_from_young_list(); + while (hr != NULL) { + + assert( hr->young_index_in_cset() == -1, "invariant" ); + assert( hr->age_in_surv_rate_group() != -1, "invariant" ); + hr->set_young_index_in_cset((int) _young_cset_length); + + ++_young_cset_length; + double predicted_time_ms = predict_region_elapsed_time_ms(hr, true); + time_remaining_ms -= predicted_time_ms; + predicted_pause_time_ms += predicted_time_ms; + if (hr == pop_region) { + // The popular region was young. Skip over it. + assert(hr->in_collection_set(), "It's the pop region."); + } else { + assert(!hr->in_collection_set(), "It's not the pop region."); + add_to_collection_set(hr); + record_cset_region(hr, true); + } + max_live_bytes -= MIN2(hr->max_live_bytes(), max_live_bytes); + if (G1PolicyVerbose > 0) { + gclog_or_tty->print_cr(" Added [" PTR_FORMAT ", " PTR_FORMAT") to CS.", + hr->bottom(), hr->end()); + gclog_or_tty->print_cr(" (" SIZE_FORMAT " KB left in heap.)", + max_live_bytes/K); + } + hr = _g1->pop_region_from_young_list(); + } + + record_scan_only_regions(_g1->young_list_scan_only_length()); + + double young_end_time_sec = os::elapsedTime(); + _recorded_young_cset_choice_time_ms = + (young_end_time_sec - young_start_time_sec) * 1000.0; + + non_young_start_time_sec = os::elapsedTime(); + + if (_young_cset_length > 0 && _last_young_gc_full) { + // don't bother adding more regions... + goto choose_collection_set_end; + } + } else if (pop_region != NULL) { + // We're not in young mode, and we chose a popular region; don't choose + // any more. + return; + } + + if (!in_young_gc_mode() || !full_young_gcs()) { + bool should_continue = true; + NumberSeq seq; + double avg_prediction = 100000000000000000.0; // something very large + do { + hr = _collectionSetChooser->getNextMarkedRegion(time_remaining_ms, + avg_prediction); + if (hr != NULL && !hr->popular()) { + double predicted_time_ms = predict_region_elapsed_time_ms(hr, false); + time_remaining_ms -= predicted_time_ms; + predicted_pause_time_ms += predicted_time_ms; + add_to_collection_set(hr); + record_cset_region(hr, false); + max_live_bytes -= MIN2(hr->max_live_bytes(), max_live_bytes); + if (G1PolicyVerbose > 0) { + gclog_or_tty->print_cr(" (" SIZE_FORMAT " KB left in heap.)", + max_live_bytes/K); + } + seq.add(predicted_time_ms); + avg_prediction = seq.avg() + seq.sd(); + } + should_continue = + ( hr != NULL) && + ( (adaptive_young_list_length()) ? time_remaining_ms > 0.0 + : _collection_set_size < _young_list_fixed_length ); + } while (should_continue); + + if (!adaptive_young_list_length() && + _collection_set_size < _young_list_fixed_length) + _should_revert_to_full_young_gcs = true; + } + +choose_collection_set_end: + count_CS_bytes_used(); + + end_recording_regions(); + + double non_young_end_time_sec = os::elapsedTime(); + _recorded_non_young_cset_choice_time_ms = + (non_young_end_time_sec - non_young_start_time_sec) * 1000.0; +} + +void G1CollectorPolicy_BestRegionsFirst::record_full_collection_end() { + G1CollectorPolicy::record_full_collection_end(); + _collectionSetChooser->updateAfterFullCollection(); +} + +void G1CollectorPolicy_BestRegionsFirst:: +expand_if_possible(size_t numRegions) { + size_t expansion_bytes = numRegions * HeapRegion::GrainBytes; + _g1->expand(expansion_bytes); +} + +void G1CollectorPolicy_BestRegionsFirst:: +record_collection_pause_end(bool popular, bool abandoned) { + G1CollectorPolicy::record_collection_pause_end(popular, abandoned); + assert(assertMarkedBytesDataOK(), "Marked regions not OK at pause end."); +} + +// Local Variables: *** +// c-indentation-style: gnu *** +// End: *** diff --git a/hotspot/src/share/vm/gc_implementation/g1/g1CollectorPolicy.hpp b/hotspot/src/share/vm/gc_implementation/g1/g1CollectorPolicy.hpp new file mode 100644 index 00000000000..110f81fef0e --- /dev/null +++ b/hotspot/src/share/vm/gc_implementation/g1/g1CollectorPolicy.hpp @@ -0,0 +1,1199 @@ +/* + * Copyright 2001-2007 Sun Microsystems, Inc. All Rights Reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara, + * CA 95054 USA or visit www.sun.com if you need additional information or + * have any questions. + * + */ + +// A G1CollectorPolicy makes policy decisions that determine the +// characteristics of the collector. Examples include: +// * choice of collection set. +// * when to collect. + +class HeapRegion; +class CollectionSetChooser; + +// Yes, this is a bit unpleasant... but it saves replicating the same thing +// over and over again and introducing subtle problems through small typos and +// cutting and pasting mistakes. The macros below introduces a number +// sequnce into the following two classes and the methods that access it. + +#define define_num_seq(name) \ +private: \ + NumberSeq _all_##name##_times_ms; \ +public: \ + void record_##name##_time_ms(double ms) { \ + _all_##name##_times_ms.add(ms); \ + } \ + NumberSeq* get_##name##_seq() { \ + return &_all_##name##_times_ms; \ + } + +class MainBodySummary; +class PopPreambleSummary; + +class PauseSummary { + define_num_seq(total) + define_num_seq(other) + +public: + virtual MainBodySummary* main_body_summary() { return NULL; } + virtual PopPreambleSummary* pop_preamble_summary() { return NULL; } +}; + +class MainBodySummary { + define_num_seq(satb_drain) // optional + define_num_seq(parallel) // parallel only + define_num_seq(ext_root_scan) + define_num_seq(mark_stack_scan) + define_num_seq(scan_only) + define_num_seq(update_rs) + define_num_seq(scan_rs) + define_num_seq(scan_new_refs) // Only for temp use; added to + // in parallel case. + define_num_seq(obj_copy) + define_num_seq(termination) // parallel only + define_num_seq(parallel_other) // parallel only + define_num_seq(mark_closure) + define_num_seq(clear_ct) // parallel only +}; + +class PopPreambleSummary { + define_num_seq(pop_preamble) + define_num_seq(pop_update_rs) + define_num_seq(pop_scan_rs) + define_num_seq(pop_closure_app) + define_num_seq(pop_evacuation) + define_num_seq(pop_other) +}; + +class NonPopSummary: public PauseSummary, + public MainBodySummary { +public: + virtual MainBodySummary* main_body_summary() { return this; } +}; + +class PopSummary: public PauseSummary, + public MainBodySummary, + public PopPreambleSummary { +public: + virtual MainBodySummary* main_body_summary() { return this; } + virtual PopPreambleSummary* pop_preamble_summary() { return this; } +}; + +class NonPopAbandonedSummary: public PauseSummary { +}; + +class PopAbandonedSummary: public PauseSummary, + public PopPreambleSummary { +public: + virtual PopPreambleSummary* pop_preamble_summary() { return this; } +}; + +class G1CollectorPolicy: public CollectorPolicy { +protected: + // The number of pauses during the execution. + long _n_pauses; + + // either equal to the number of parallel threads, if ParallelGCThreads + // has been set, or 1 otherwise + int _parallel_gc_threads; + + enum SomePrivateConstants { + NumPrevPausesForHeuristics = 10, + NumPrevGCsForHeuristics = 10, + NumAPIs = HeapRegion::MaxAge + }; + + G1MMUTracker* _mmu_tracker; + + void initialize_flags(); + + void initialize_all() { + initialize_flags(); + initialize_size_info(); + initialize_perm_generation(PermGen::MarkSweepCompact); + } + + virtual size_t default_init_heap_size() { + // Pick some reasonable default. + return 8*M; + } + + + double _cur_collection_start_sec; + size_t _cur_collection_pause_used_at_start_bytes; + size_t _cur_collection_pause_used_regions_at_start; + size_t _prev_collection_pause_used_at_end_bytes; + double _cur_collection_par_time_ms; + double _cur_satb_drain_time_ms; + double _cur_clear_ct_time_ms; + bool _satb_drain_time_set; + double _cur_popular_preamble_start_ms; + double _cur_popular_preamble_time_ms; + double _cur_popular_compute_rc_time_ms; + double _cur_popular_evac_time_ms; + + double _cur_CH_strong_roots_end_sec; + double _cur_CH_strong_roots_dur_ms; + double _cur_G1_strong_roots_end_sec; + double _cur_G1_strong_roots_dur_ms; + + // Statistics for recent GC pauses. See below for how indexed. + TruncatedSeq* _recent_CH_strong_roots_times_ms; + TruncatedSeq* _recent_G1_strong_roots_times_ms; + TruncatedSeq* _recent_evac_times_ms; + // These exclude marking times. + TruncatedSeq* _recent_pause_times_ms; + TruncatedSeq* _recent_gc_times_ms; + + TruncatedSeq* _recent_CS_bytes_used_before; + TruncatedSeq* _recent_CS_bytes_surviving; + + TruncatedSeq* _recent_rs_sizes; + + TruncatedSeq* _concurrent_mark_init_times_ms; + TruncatedSeq* _concurrent_mark_remark_times_ms; + TruncatedSeq* _concurrent_mark_cleanup_times_ms; + + NonPopSummary* _non_pop_summary; + PopSummary* _pop_summary; + NonPopAbandonedSummary* _non_pop_abandoned_summary; + PopAbandonedSummary* _pop_abandoned_summary; + + NumberSeq* _all_pause_times_ms; + NumberSeq* _all_full_gc_times_ms; + double _stop_world_start; + NumberSeq* _all_stop_world_times_ms; + NumberSeq* _all_yield_times_ms; + + size_t _region_num_young; + size_t _region_num_tenured; + size_t _prev_region_num_young; + size_t _prev_region_num_tenured; + + NumberSeq* _all_mod_union_times_ms; + + int _aux_num; + NumberSeq* _all_aux_times_ms; + double* _cur_aux_start_times_ms; + double* _cur_aux_times_ms; + bool* _cur_aux_times_set; + + double* _par_last_ext_root_scan_times_ms; + double* _par_last_mark_stack_scan_times_ms; + double* _par_last_scan_only_times_ms; + double* _par_last_scan_only_regions_scanned; + double* _par_last_update_rs_start_times_ms; + double* _par_last_update_rs_times_ms; + double* _par_last_update_rs_processed_buffers; + double* _par_last_scan_rs_start_times_ms; + double* _par_last_scan_rs_times_ms; + double* _par_last_scan_new_refs_times_ms; + double* _par_last_obj_copy_times_ms; + double* _par_last_termination_times_ms; + + // there are two pases during popular pauses, so we need to store + // somewhere the results of the first pass + double* _pop_par_last_update_rs_start_times_ms; + double* _pop_par_last_update_rs_times_ms; + double* _pop_par_last_update_rs_processed_buffers; + double* _pop_par_last_scan_rs_start_times_ms; + double* _pop_par_last_scan_rs_times_ms; + double* _pop_par_last_closure_app_times_ms; + + double _pop_compute_rc_start; + double _pop_evac_start; + + // indicates that we are in young GC mode + bool _in_young_gc_mode; + + // indicates whether we are in full young or partially young GC mode + bool _full_young_gcs; + + // if true, then it tries to dynamically adjust the length of the + // young list + bool _adaptive_young_list_length; + size_t _young_list_min_length; + size_t _young_list_target_length; + size_t _young_list_so_prefix_length; + size_t _young_list_fixed_length; + + size_t _young_cset_length; + bool _last_young_gc_full; + + double _target_pause_time_ms; + + unsigned _full_young_pause_num; + unsigned _partial_young_pause_num; + + bool _during_marking; + bool _in_marking_window; + bool _in_marking_window_im; + + SurvRateGroup* _short_lived_surv_rate_group; + SurvRateGroup* _survivor_surv_rate_group; + // add here any more surv rate groups + + bool during_marking() { + return _during_marking; + } + + // + +private: + enum PredictionConstants { + TruncatedSeqLength = 10 + }; + + TruncatedSeq* _alloc_rate_ms_seq; + double _prev_collection_pause_end_ms; + + TruncatedSeq* _pending_card_diff_seq; + TruncatedSeq* _rs_length_diff_seq; + TruncatedSeq* _cost_per_card_ms_seq; + TruncatedSeq* _cost_per_scan_only_region_ms_seq; + TruncatedSeq* _fully_young_cards_per_entry_ratio_seq; + TruncatedSeq* _partially_young_cards_per_entry_ratio_seq; + TruncatedSeq* _cost_per_entry_ms_seq; + TruncatedSeq* _partially_young_cost_per_entry_ms_seq; + TruncatedSeq* _cost_per_byte_ms_seq; + TruncatedSeq* _constant_other_time_ms_seq; + TruncatedSeq* _young_other_cost_per_region_ms_seq; + TruncatedSeq* _non_young_other_cost_per_region_ms_seq; + + TruncatedSeq* _pending_cards_seq; + TruncatedSeq* _scanned_cards_seq; + TruncatedSeq* _rs_lengths_seq; + + TruncatedSeq* _cost_per_byte_ms_during_cm_seq; + TruncatedSeq* _cost_per_scan_only_region_ms_during_cm_seq; + + TruncatedSeq* _young_gc_eff_seq; + + TruncatedSeq* _max_conc_overhead_seq; + + size_t _recorded_young_regions; + size_t _recorded_scan_only_regions; + size_t _recorded_non_young_regions; + size_t _recorded_region_num; + + size_t _free_regions_at_end_of_collection; + size_t _scan_only_regions_at_end_of_collection; + + size_t _recorded_rs_lengths; + size_t _max_rs_lengths; + + size_t _recorded_marked_bytes; + size_t _recorded_young_bytes; + + size_t _predicted_pending_cards; + size_t _predicted_cards_scanned; + size_t _predicted_rs_lengths; + size_t _predicted_bytes_to_copy; + + double _predicted_survival_ratio; + double _predicted_rs_update_time_ms; + double _predicted_rs_scan_time_ms; + double _predicted_scan_only_scan_time_ms; + double _predicted_object_copy_time_ms; + double _predicted_constant_other_time_ms; + double _predicted_young_other_time_ms; + double _predicted_non_young_other_time_ms; + double _predicted_pause_time_ms; + + double _vtime_diff_ms; + + double _recorded_young_free_cset_time_ms; + double _recorded_non_young_free_cset_time_ms; + + double _sigma; + double _expensive_region_limit_ms; + + size_t _rs_lengths_prediction; + + size_t _known_garbage_bytes; + double _known_garbage_ratio; + + double sigma() { + return _sigma; + } + + // A function that prevents us putting too much stock in small sample + // sets. Returns a number between 2.0 and 1.0, depending on the number + // of samples. 5 or more samples yields one; fewer scales linearly from + // 2.0 at 1 sample to 1.0 at 5. + double confidence_factor(int samples) { + if (samples > 4) return 1.0; + else return 1.0 + sigma() * ((double)(5 - samples))/2.0; + } + + double get_new_neg_prediction(TruncatedSeq* seq) { + return seq->davg() - sigma() * seq->dsd(); + } + +#ifndef PRODUCT + bool verify_young_ages(HeapRegion* head, SurvRateGroup *surv_rate_group); +#endif // PRODUCT + +protected: + double _pause_time_target_ms; + double _recorded_young_cset_choice_time_ms; + double _recorded_non_young_cset_choice_time_ms; + bool _within_target; + size_t _pending_cards; + size_t _max_pending_cards; + +public: + + void set_region_short_lived(HeapRegion* hr) { + hr->install_surv_rate_group(_short_lived_surv_rate_group); + } + + void set_region_survivors(HeapRegion* hr) { + hr->install_surv_rate_group(_survivor_surv_rate_group); + } + +#ifndef PRODUCT + bool verify_young_ages(); +#endif // PRODUCT + + void tag_scan_only(size_t short_lived_scan_only_length); + + double get_new_prediction(TruncatedSeq* seq) { + return MAX2(seq->davg() + sigma() * seq->dsd(), + seq->davg() * confidence_factor(seq->num())); + } + + size_t young_cset_length() { + return _young_cset_length; + } + + void record_max_rs_lengths(size_t rs_lengths) { + _max_rs_lengths = rs_lengths; + } + + size_t predict_pending_card_diff() { + double prediction = get_new_neg_prediction(_pending_card_diff_seq); + if (prediction < 0.00001) + return 0; + else + return (size_t) prediction; + } + + size_t predict_pending_cards() { + size_t max_pending_card_num = _g1->max_pending_card_num(); + size_t diff = predict_pending_card_diff(); + size_t prediction; + if (diff > max_pending_card_num) + prediction = max_pending_card_num; + else + prediction = max_pending_card_num - diff; + + return prediction; + } + + size_t predict_rs_length_diff() { + return (size_t) get_new_prediction(_rs_length_diff_seq); + } + + double predict_alloc_rate_ms() { + return get_new_prediction(_alloc_rate_ms_seq); + } + + double predict_cost_per_card_ms() { + return get_new_prediction(_cost_per_card_ms_seq); + } + + double predict_rs_update_time_ms(size_t pending_cards) { + return (double) pending_cards * predict_cost_per_card_ms(); + } + + double predict_fully_young_cards_per_entry_ratio() { + return get_new_prediction(_fully_young_cards_per_entry_ratio_seq); + } + + double predict_partially_young_cards_per_entry_ratio() { + if (_partially_young_cards_per_entry_ratio_seq->num() < 2) + return predict_fully_young_cards_per_entry_ratio(); + else + return get_new_prediction(_partially_young_cards_per_entry_ratio_seq); + } + + size_t predict_young_card_num(size_t rs_length) { + return (size_t) ((double) rs_length * + predict_fully_young_cards_per_entry_ratio()); + } + + size_t predict_non_young_card_num(size_t rs_length) { + return (size_t) ((double) rs_length * + predict_partially_young_cards_per_entry_ratio()); + } + + double predict_rs_scan_time_ms(size_t card_num) { + if (full_young_gcs()) + return (double) card_num * get_new_prediction(_cost_per_entry_ms_seq); + else + return predict_partially_young_rs_scan_time_ms(card_num); + } + + double predict_partially_young_rs_scan_time_ms(size_t card_num) { + if (_partially_young_cost_per_entry_ms_seq->num() < 3) + return (double) card_num * get_new_prediction(_cost_per_entry_ms_seq); + else + return (double) card_num * + get_new_prediction(_partially_young_cost_per_entry_ms_seq); + } + + double predict_scan_only_time_ms_during_cm(size_t scan_only_region_num) { + if (_cost_per_scan_only_region_ms_during_cm_seq->num() < 3) + return 1.5 * (double) scan_only_region_num * + get_new_prediction(_cost_per_scan_only_region_ms_seq); + else + return (double) scan_only_region_num * + get_new_prediction(_cost_per_scan_only_region_ms_during_cm_seq); + } + + double predict_scan_only_time_ms(size_t scan_only_region_num) { + if (_in_marking_window_im) + return predict_scan_only_time_ms_during_cm(scan_only_region_num); + else + return (double) scan_only_region_num * + get_new_prediction(_cost_per_scan_only_region_ms_seq); + } + + double predict_object_copy_time_ms_during_cm(size_t bytes_to_copy) { + if (_cost_per_byte_ms_during_cm_seq->num() < 3) + return 1.1 * (double) bytes_to_copy * + get_new_prediction(_cost_per_byte_ms_seq); + else + return (double) bytes_to_copy * + get_new_prediction(_cost_per_byte_ms_during_cm_seq); + } + + double predict_object_copy_time_ms(size_t bytes_to_copy) { + if (_in_marking_window && !_in_marking_window_im) + return predict_object_copy_time_ms_during_cm(bytes_to_copy); + else + return (double) bytes_to_copy * + get_new_prediction(_cost_per_byte_ms_seq); + } + + double predict_constant_other_time_ms() { + return get_new_prediction(_constant_other_time_ms_seq); + } + + double predict_young_other_time_ms(size_t young_num) { + return + (double) young_num * + get_new_prediction(_young_other_cost_per_region_ms_seq); + } + + double predict_non_young_other_time_ms(size_t non_young_num) { + return + (double) non_young_num * + get_new_prediction(_non_young_other_cost_per_region_ms_seq); + } + + void check_if_region_is_too_expensive(double predicted_time_ms); + + double predict_young_collection_elapsed_time_ms(size_t adjustment); + double predict_base_elapsed_time_ms(size_t pending_cards); + double predict_base_elapsed_time_ms(size_t pending_cards, + size_t scanned_cards); + size_t predict_bytes_to_copy(HeapRegion* hr); + double predict_region_elapsed_time_ms(HeapRegion* hr, bool young); + + // for use by: calculate_optimal_so_length(length) + void predict_gc_eff(size_t young_region_num, + size_t so_length, + double base_time_ms, + double *gc_eff, + double *pause_time_ms); + + // for use by: calculate_young_list_target_config(rs_length) + bool predict_gc_eff(size_t young_region_num, + size_t so_length, + double base_time_with_so_ms, + size_t init_free_regions, + double target_pause_time_ms, + double* gc_eff); + + void start_recording_regions(); + void record_cset_region(HeapRegion* hr, bool young); + void record_scan_only_regions(size_t scan_only_length); + void end_recording_regions(); + + void record_vtime_diff_ms(double vtime_diff_ms) { + _vtime_diff_ms = vtime_diff_ms; + } + + void record_young_free_cset_time_ms(double time_ms) { + _recorded_young_free_cset_time_ms = time_ms; + } + + void record_non_young_free_cset_time_ms(double time_ms) { + _recorded_non_young_free_cset_time_ms = time_ms; + } + + double predict_young_gc_eff() { + return get_new_neg_prediction(_young_gc_eff_seq); + } + + // + +public: + void cset_regions_freed() { + bool propagate = _last_young_gc_full && !_in_marking_window; + _short_lived_surv_rate_group->all_surviving_words_recorded(propagate); + _survivor_surv_rate_group->all_surviving_words_recorded(propagate); + // also call it on any more surv rate groups + } + + void set_known_garbage_bytes(size_t known_garbage_bytes) { + _known_garbage_bytes = known_garbage_bytes; + size_t heap_bytes = _g1->capacity(); + _known_garbage_ratio = (double) _known_garbage_bytes / (double) heap_bytes; + } + + void decrease_known_garbage_bytes(size_t known_garbage_bytes) { + guarantee( _known_garbage_bytes >= known_garbage_bytes, "invariant" ); + + _known_garbage_bytes -= known_garbage_bytes; + size_t heap_bytes = _g1->capacity(); + _known_garbage_ratio = (double) _known_garbage_bytes / (double) heap_bytes; + } + + G1MMUTracker* mmu_tracker() { + return _mmu_tracker; + } + + double predict_init_time_ms() { + return get_new_prediction(_concurrent_mark_init_times_ms); + } + + double predict_remark_time_ms() { + return get_new_prediction(_concurrent_mark_remark_times_ms); + } + + double predict_cleanup_time_ms() { + return get_new_prediction(_concurrent_mark_cleanup_times_ms); + } + + // Returns an estimate of the survival rate of the region at yg-age + // "yg_age". + double predict_yg_surv_rate(int age) { + TruncatedSeq* seq = _short_lived_surv_rate_group->get_seq(age); + if (seq->num() == 0) + gclog_or_tty->print("BARF! age is %d", age); + guarantee( seq->num() > 0, "invariant" ); + double pred = get_new_prediction(seq); + if (pred > 1.0) + pred = 1.0; + return pred; + } + + double accum_yg_surv_rate_pred(int age) { + return _short_lived_surv_rate_group->accum_surv_rate_pred(age); + } + +protected: + void print_stats (int level, const char* str, double value); + void print_stats (int level, const char* str, int value); + void print_par_stats (int level, const char* str, double* data) { + print_par_stats(level, str, data, true); + } + void print_par_stats (int level, const char* str, double* data, bool summary); + void print_par_buffers (int level, const char* str, double* data, bool summary); + + void check_other_times(int level, + NumberSeq* other_times_ms, + NumberSeq* calc_other_times_ms) const; + + void print_summary (PauseSummary* stats) const; + void print_abandoned_summary(PauseSummary* non_pop_summary, + PauseSummary* pop_summary) const; + + void print_summary (int level, const char* str, NumberSeq* seq) const; + void print_summary_sd (int level, const char* str, NumberSeq* seq) const; + + double avg_value (double* data); + double max_value (double* data); + double sum_of_values (double* data); + double max_sum (double* data1, double* data2); + + int _last_satb_drain_processed_buffers; + int _last_update_rs_processed_buffers; + double _last_pause_time_ms; + + size_t _bytes_in_to_space_before_gc; + size_t _bytes_in_to_space_after_gc; + size_t bytes_in_to_space_during_gc() { + return + _bytes_in_to_space_after_gc - _bytes_in_to_space_before_gc; + } + size_t _bytes_in_collection_set_before_gc; + // Used to count used bytes in CS. + friend class CountCSClosure; + + // Statistics kept per GC stoppage, pause or full. + TruncatedSeq* _recent_prev_end_times_for_all_gcs_sec; + + // We track markings. + int _num_markings; + double _mark_thread_startup_sec; // Time at startup of marking thread + + // Add a new GC of the given duration and end time to the record. + void update_recent_gc_times(double end_time_sec, double elapsed_ms); + + // The head of the list (via "next_in_collection_set()") representing the + // current collection set. + HeapRegion* _collection_set; + size_t _collection_set_size; + size_t _collection_set_bytes_used_before; + + // Info about marking. + int _n_marks; // Sticky at 2, so we know when we've done at least 2. + + // The number of collection pauses at the end of the last mark. + size_t _n_pauses_at_mark_end; + + // ==== This section is for stats related to starting Conc Refinement on time. + size_t _conc_refine_enabled; + size_t _conc_refine_zero_traversals; + size_t _conc_refine_max_traversals; + // In # of heap regions. + size_t _conc_refine_current_delta; + + // At the beginning of a collection pause, update the variables above, + // especially the "delta". + void update_conc_refine_data(); + // ==== + + // Stash a pointer to the g1 heap. + G1CollectedHeap* _g1; + + // The average time in ms per collection pause, averaged over recent pauses. + double recent_avg_time_for_pauses_ms(); + + // The average time in ms for processing CollectedHeap strong roots, per + // collection pause, averaged over recent pauses. + double recent_avg_time_for_CH_strong_ms(); + + // The average time in ms for processing the G1 remembered set, per + // pause, averaged over recent pauses. + double recent_avg_time_for_G1_strong_ms(); + + // The average time in ms for "evacuating followers", per pause, averaged + // over recent pauses. + double recent_avg_time_for_evac_ms(); + + // The number of "recent" GCs recorded in the number sequences + int number_of_recent_gcs(); + + // The average survival ratio, computed by the total number of bytes + // suriviving / total number of bytes before collection over the last + // several recent pauses. + double recent_avg_survival_fraction(); + // The survival fraction of the most recent pause; if there have been no + // pauses, returns 1.0. + double last_survival_fraction(); + + // Returns a "conservative" estimate of the recent survival rate, i.e., + // one that may be higher than "recent_avg_survival_fraction". + // This is conservative in several ways: + // If there have been few pauses, it will assume a potential high + // variance, and err on the side of caution. + // It puts a lower bound (currently 0.1) on the value it will return. + // To try to detect phase changes, if the most recent pause ("latest") has a + // higher-than average ("avg") survival rate, it returns that rate. + // "work" version is a utility function; young is restricted to young regions. + double conservative_avg_survival_fraction_work(double avg, + double latest); + + // The arguments are the two sequences that keep track of the number of bytes + // surviving and the total number of bytes before collection, resp., + // over the last evereal recent pauses + // Returns the survival rate for the category in the most recent pause. + // If there have been no pauses, returns 1.0. + double last_survival_fraction_work(TruncatedSeq* surviving, + TruncatedSeq* before); + + // The arguments are the two sequences that keep track of the number of bytes + // surviving and the total number of bytes before collection, resp., + // over the last several recent pauses + // Returns the average survival ration over the last several recent pauses + // If there have been no pauses, return 1.0 + double recent_avg_survival_fraction_work(TruncatedSeq* surviving, + TruncatedSeq* before); + + double conservative_avg_survival_fraction() { + double avg = recent_avg_survival_fraction(); + double latest = last_survival_fraction(); + return conservative_avg_survival_fraction_work(avg, latest); + } + + // The ratio of gc time to elapsed time, computed over recent pauses. + double _recent_avg_pause_time_ratio; + + double recent_avg_pause_time_ratio() { + return _recent_avg_pause_time_ratio; + } + + // Number of pauses between concurrent marking. + size_t _pauses_btwn_concurrent_mark; + + size_t _n_marks_since_last_pause; + + // True iff CM has been initiated. + bool _conc_mark_initiated; + + // True iff CM should be initiated + bool _should_initiate_conc_mark; + bool _should_revert_to_full_young_gcs; + bool _last_full_young_gc; + + // This set of variables tracks the collector efficiency, in order to + // determine whether we should initiate a new marking. + double _cur_mark_stop_world_time_ms; + double _mark_init_start_sec; + double _mark_remark_start_sec; + double _mark_cleanup_start_sec; + double _mark_closure_time_ms; + + void calculate_young_list_min_length(); + void calculate_young_list_target_config(); + void calculate_young_list_target_config(size_t rs_lengths); + size_t calculate_optimal_so_length(size_t young_list_length); + +public: + + G1CollectorPolicy(); + + virtual G1CollectorPolicy* as_g1_policy() { return this; } + + virtual CollectorPolicy::Name kind() { + return CollectorPolicy::G1CollectorPolicyKind; + } + + void check_prediction_validity(); + + size_t bytes_in_collection_set() { + return _bytes_in_collection_set_before_gc; + } + + size_t bytes_in_to_space() { + return bytes_in_to_space_during_gc(); + } + + unsigned calc_gc_alloc_time_stamp() { + return _all_pause_times_ms->num() + 1; + } + +protected: + + // Count the number of bytes used in the CS. + void count_CS_bytes_used(); + + // Together these do the base cleanup-recording work. Subclasses might + // want to put something between them. + void record_concurrent_mark_cleanup_end_work1(size_t freed_bytes, + size_t max_live_bytes); + void record_concurrent_mark_cleanup_end_work2(); + +public: + + virtual void init(); + + virtual HeapWord* mem_allocate_work(size_t size, + bool is_tlab, + bool* gc_overhead_limit_was_exceeded); + + // This method controls how a collector handles one or more + // of its generations being fully allocated. + virtual HeapWord* satisfy_failed_allocation(size_t size, + bool is_tlab); + + BarrierSet::Name barrier_set_name() { return BarrierSet::G1SATBCTLogging; } + + GenRemSet::Name rem_set_name() { return GenRemSet::CardTable; } + + // The number of collection pauses so far. + long n_pauses() const { return _n_pauses; } + + // Update the heuristic info to record a collection pause of the given + // start time, where the given number of bytes were used at the start. + // This may involve changing the desired size of a collection set. + + virtual void record_stop_world_start(); + + virtual void record_collection_pause_start(double start_time_sec, + size_t start_used); + + virtual void record_popular_pause_preamble_start(); + virtual void record_popular_pause_preamble_end(); + + // Must currently be called while the world is stopped. + virtual void record_concurrent_mark_init_start(); + virtual void record_concurrent_mark_init_end(); + void record_concurrent_mark_init_end_pre(double + mark_init_elapsed_time_ms); + + void record_mark_closure_time(double mark_closure_time_ms); + + virtual void record_concurrent_mark_remark_start(); + virtual void record_concurrent_mark_remark_end(); + + virtual void record_concurrent_mark_cleanup_start(); + virtual void record_concurrent_mark_cleanup_end(size_t freed_bytes, + size_t max_live_bytes); + virtual void record_concurrent_mark_cleanup_completed(); + + virtual void record_concurrent_pause(); + virtual void record_concurrent_pause_end(); + + virtual void record_collection_pause_end_CH_strong_roots(); + virtual void record_collection_pause_end_G1_strong_roots(); + + virtual void record_collection_pause_end(bool popular, bool abandoned); + + // Record the fact that a full collection occurred. + virtual void record_full_collection_start(); + virtual void record_full_collection_end(); + + void record_ext_root_scan_time(int worker_i, double ms) { + _par_last_ext_root_scan_times_ms[worker_i] = ms; + } + + void record_mark_stack_scan_time(int worker_i, double ms) { + _par_last_mark_stack_scan_times_ms[worker_i] = ms; + } + + void record_scan_only_time(int worker_i, double ms, int n) { + _par_last_scan_only_times_ms[worker_i] = ms; + _par_last_scan_only_regions_scanned[worker_i] = (double) n; + } + + void record_satb_drain_time(double ms) { + _cur_satb_drain_time_ms = ms; + _satb_drain_time_set = true; + } + + void record_satb_drain_processed_buffers (int processed_buffers) { + _last_satb_drain_processed_buffers = processed_buffers; + } + + void record_mod_union_time(double ms) { + _all_mod_union_times_ms->add(ms); + } + + void record_update_rs_start_time(int thread, double ms) { + _par_last_update_rs_start_times_ms[thread] = ms; + } + + void record_update_rs_time(int thread, double ms) { + _par_last_update_rs_times_ms[thread] = ms; + } + + void record_update_rs_processed_buffers (int thread, + double processed_buffers) { + _par_last_update_rs_processed_buffers[thread] = processed_buffers; + } + + void record_scan_rs_start_time(int thread, double ms) { + _par_last_scan_rs_start_times_ms[thread] = ms; + } + + void record_scan_rs_time(int thread, double ms) { + _par_last_scan_rs_times_ms[thread] = ms; + } + + void record_scan_new_refs_time(int thread, double ms) { + _par_last_scan_new_refs_times_ms[thread] = ms; + } + + double get_scan_new_refs_time(int thread) { + return _par_last_scan_new_refs_times_ms[thread]; + } + + void reset_obj_copy_time(int thread) { + _par_last_obj_copy_times_ms[thread] = 0.0; + } + + void reset_obj_copy_time() { + reset_obj_copy_time(0); + } + + void record_obj_copy_time(int thread, double ms) { + _par_last_obj_copy_times_ms[thread] += ms; + } + + void record_obj_copy_time(double ms) { + record_obj_copy_time(0, ms); + } + + void record_termination_time(int thread, double ms) { + _par_last_termination_times_ms[thread] = ms; + } + + void record_termination_time(double ms) { + record_termination_time(0, ms); + } + + void record_pause_time(double ms) { + _last_pause_time_ms = ms; + } + + void record_clear_ct_time(double ms) { + _cur_clear_ct_time_ms = ms; + } + + void record_par_time(double ms) { + _cur_collection_par_time_ms = ms; + } + + void record_aux_start_time(int i) { + guarantee(i < _aux_num, "should be within range"); + _cur_aux_start_times_ms[i] = os::elapsedTime() * 1000.0; + } + + void record_aux_end_time(int i) { + guarantee(i < _aux_num, "should be within range"); + double ms = os::elapsedTime() * 1000.0 - _cur_aux_start_times_ms[i]; + _cur_aux_times_set[i] = true; + _cur_aux_times_ms[i] += ms; + } + + void record_pop_compute_rc_start(); + void record_pop_compute_rc_end(); + + void record_pop_evac_start(); + void record_pop_evac_end(); + + // Record the fact that "bytes" bytes allocated in a region. + void record_before_bytes(size_t bytes); + void record_after_bytes(size_t bytes); + + // Returns "true" if this is a good time to do a collection pause. + // The "word_size" argument, if non-zero, indicates the size of an + // allocation request that is prompting this query. + virtual bool should_do_collection_pause(size_t word_size) = 0; + + // Choose a new collection set. Marks the chosen regions as being + // "in_collection_set", and links them together. The head and number of + // the collection set are available via access methods. + // If "pop_region" is non-NULL, it is a popular region that has already + // been added to the collection set. + virtual void choose_collection_set(HeapRegion* pop_region = NULL) = 0; + + void clear_collection_set() { _collection_set = NULL; } + + // The head of the list (via "next_in_collection_set()") representing the + // current collection set. + HeapRegion* collection_set() { return _collection_set; } + + // Sets the collection set to the given single region. + virtual void set_single_region_collection_set(HeapRegion* hr); + + // The number of elements in the current collection set. + size_t collection_set_size() { return _collection_set_size; } + + // Add "hr" to the CS. + void add_to_collection_set(HeapRegion* hr); + + bool should_initiate_conc_mark() { return _should_initiate_conc_mark; } + void set_should_initiate_conc_mark() { _should_initiate_conc_mark = true; } + void unset_should_initiate_conc_mark(){ _should_initiate_conc_mark = false; } + + void checkpoint_conc_overhead(); + + // If an expansion would be appropriate, because recent GC overhead had + // exceeded the desired limit, return an amount to expand by. + virtual size_t expansion_amount(); + + // note start of mark thread + void note_start_of_mark_thread(); + + // The marked bytes of the "r" has changed; reclassify it's desirability + // for marking. Also asserts that "r" is eligible for a CS. + virtual void note_change_in_marked_bytes(HeapRegion* r) = 0; + +#ifndef PRODUCT + // Check any appropriate marked bytes info, asserting false if + // something's wrong, else returning "true". + virtual bool assertMarkedBytesDataOK() = 0; +#endif + + // Print tracing information. + void print_tracing_info() const; + + // Print stats on young survival ratio + void print_yg_surv_rate_info() const; + + void finished_recalculating_age_indexes() { + _short_lived_surv_rate_group->finished_recalculating_age_indexes(); + // do that for any other surv rate groups + } + + bool should_add_next_region_to_young_list(); + + bool in_young_gc_mode() { + return _in_young_gc_mode; + } + void set_in_young_gc_mode(bool in_young_gc_mode) { + _in_young_gc_mode = in_young_gc_mode; + } + + bool full_young_gcs() { + return _full_young_gcs; + } + void set_full_young_gcs(bool full_young_gcs) { + _full_young_gcs = full_young_gcs; + } + + bool adaptive_young_list_length() { + return _adaptive_young_list_length; + } + void set_adaptive_young_list_length(bool adaptive_young_list_length) { + _adaptive_young_list_length = adaptive_young_list_length; + } + + inline double get_gc_eff_factor() { + double ratio = _known_garbage_ratio; + + double square = ratio * ratio; + // square = square * square; + double ret = square * 9.0 + 1.0; +#if 0 + gclog_or_tty->print_cr("ratio = %1.2lf, ret = %1.2lf", ratio, ret); +#endif // 0 + guarantee(0.0 <= ret && ret < 10.0, "invariant!"); + return ret; + } + + // + // Survivor regions policy. + // +protected: + + // Current tenuring threshold, set to 0 if the collector reaches the + // maximum amount of suvivors regions. + int _tenuring_threshold; + +public: + + inline GCAllocPurpose + evacuation_destination(HeapRegion* src_region, int age, size_t word_sz) { + if (age < _tenuring_threshold && src_region->is_young()) { + return GCAllocForSurvived; + } else { + return GCAllocForTenured; + } + } + + inline bool track_object_age(GCAllocPurpose purpose) { + return purpose == GCAllocForSurvived; + } + + inline GCAllocPurpose alternative_purpose(int purpose) { + return GCAllocForTenured; + } + + uint max_regions(int purpose); + + // The limit on regions for a particular purpose is reached. + void note_alloc_region_limit_reached(int purpose) { + if (purpose == GCAllocForSurvived) { + _tenuring_threshold = 0; + } + } + + void note_start_adding_survivor_regions() { + _survivor_surv_rate_group->start_adding_regions(); + } + + void note_stop_adding_survivor_regions() { + _survivor_surv_rate_group->stop_adding_regions(); + } +}; + +// This encapsulates a particular strategy for a g1 Collector. +// +// Start a concurrent mark when our heap size is n bytes +// greater then our heap size was at the last concurrent +// mark. Where n is a function of the CMSTriggerRatio +// and the MinHeapFreeRatio. +// +// Start a g1 collection pause when we have allocated the +// average number of bytes currently being freed in +// a collection, but only if it is at least one region +// full +// +// Resize Heap based on desired +// allocation space, where desired allocation space is +// a function of survival rate and desired future to size. +// +// Choose collection set by first picking all older regions +// which have a survival rate which beats our projected young +// survival rate. Then fill out the number of needed regions +// with young regions. + +class G1CollectorPolicy_BestRegionsFirst: public G1CollectorPolicy { + CollectionSetChooser* _collectionSetChooser; + // If the estimated is less then desirable, resize if possible. + void expand_if_possible(size_t numRegions); + + virtual void choose_collection_set(HeapRegion* pop_region = NULL); + virtual void record_collection_pause_start(double start_time_sec, + size_t start_used); + virtual void record_concurrent_mark_cleanup_end(size_t freed_bytes, + size_t max_live_bytes); + virtual void record_full_collection_end(); + +public: + G1CollectorPolicy_BestRegionsFirst() { + _collectionSetChooser = new CollectionSetChooser(); + } + void record_collection_pause_end(bool popular, bool abandoned); + bool should_do_collection_pause(size_t word_size); + virtual void set_single_region_collection_set(HeapRegion* hr); + // This is not needed any more, after the CSet choosing code was + // changed to use the pause prediction work. But let's leave the + // hook in just in case. + void note_change_in_marked_bytes(HeapRegion* r) { } +#ifndef PRODUCT + bool assertMarkedBytesDataOK(); +#endif +}; + +// This should move to some place more general... + +// If we have "n" measurements, and we've kept track of their "sum" and the +// "sum_of_squares" of the measurements, this returns the variance of the +// sequence. +inline double variance(int n, double sum_of_squares, double sum) { + double n_d = (double)n; + double avg = sum/n_d; + return (sum_of_squares - 2.0 * avg * sum + n_d * avg * avg) / n_d; +} + +// Local Variables: *** +// c-indentation-style: gnu *** +// End: *** diff --git a/hotspot/src/share/vm/gc_implementation/g1/g1MMUTracker.cpp b/hotspot/src/share/vm/gc_implementation/g1/g1MMUTracker.cpp new file mode 100644 index 00000000000..74209dc3b79 --- /dev/null +++ b/hotspot/src/share/vm/gc_implementation/g1/g1MMUTracker.cpp @@ -0,0 +1,187 @@ +/* + * Copyright 2001-2007 Sun Microsystems, Inc. All Rights Reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara, + * CA 95054 USA or visit www.sun.com if you need additional information or + * have any questions. + * + */ + +#include "incls/_precompiled.incl" +#include "incls/_g1MMUTracker.cpp.incl" + +#define _DISABLE_MMU 0 + +// can't rely on comparing doubles with tolerating a small margin for error +#define SMALL_MARGIN 0.0000001 +#define is_double_leq_0(_value) ( (_value) < SMALL_MARGIN ) +#define is_double_leq(_val1, _val2) is_double_leq_0((_val1) - (_val2)) +#define is_double_geq(_val1, _val2) is_double_leq_0((_val2) - (_val1)) + +/***** ALL TIMES ARE IN SECS!!!!!!! *****/ + +G1MMUTracker::G1MMUTracker(double time_slice, double max_gc_time) : + _time_slice(time_slice), + _max_gc_time(max_gc_time), + _conc_overhead_time_sec(0.0) { } + +void +G1MMUTracker::update_conc_overhead(double conc_overhead) { + double conc_overhead_time_sec = _time_slice * conc_overhead; + if (conc_overhead_time_sec > 0.9 * _max_gc_time) { + // We are screwed, as we only seem to have <10% of the soft + // real-time goal available for pauses. Let's admit defeat and + // allow something more generous as a pause target. + conc_overhead_time_sec = 0.75 * _max_gc_time; + } + + _conc_overhead_time_sec = conc_overhead_time_sec; +} + +G1MMUTrackerQueue::G1MMUTrackerQueue(double time_slice, double max_gc_time) : + G1MMUTracker(time_slice, max_gc_time), + _head_index(0), + _tail_index(trim_index(_head_index+1)), + _no_entries(0) { } + +void G1MMUTrackerQueue::remove_expired_entries(double current_time) { + double limit = current_time - _time_slice; + while (_no_entries > 0) { + if (is_double_geq(limit, _array[_tail_index].end_time())) { + _tail_index = trim_index(_tail_index + 1); + --_no_entries; + } else + return; + } + guarantee(_no_entries == 0, "should have no entries in the array"); +} + +double G1MMUTrackerQueue::calculate_gc_time(double current_time) { + double gc_time = 0.0; + double limit = current_time - _time_slice; + for (int i = 0; i < _no_entries; ++i) { + int index = trim_index(_tail_index + i); + G1MMUTrackerQueueElem *elem = &_array[index]; + if (elem->end_time() > limit) { + if (elem->start_time() > limit) + gc_time += elem->duration(); + else + gc_time += elem->end_time() - limit; + } + } + return gc_time; +} + +void G1MMUTrackerQueue::add_pause(double start, double end, bool gc_thread) { + double longest_allowed = longest_pause_internal(start); + if (longest_allowed < 0.0) + longest_allowed = 0.0; + double duration = end - start; + + remove_expired_entries(end); + if (_no_entries == QueueLength) { + // OK, right now when we fill up we bomb out + // there are a few ways of dealing with this "gracefully" + // increase the array size (:-) + // remove the oldest entry (this might allow more GC time for + // the time slice than what's allowed) + // concolidate the two entries with the minimum gap between them + // (this mighte allow less GC time than what's allowed) + guarantee(0, "array full, currently we can't recover"); + } + _head_index = trim_index(_head_index + 1); + ++_no_entries; + _array[_head_index] = G1MMUTrackerQueueElem(start, end); +} + +// basically the _internal call does not remove expired entries +// this is for trying things out in the future and a couple +// of other places (debugging) + +double G1MMUTrackerQueue::longest_pause(double current_time) { + if (_DISABLE_MMU) + return _max_gc_time; + + MutexLockerEx x(MMUTracker_lock, Mutex::_no_safepoint_check_flag); + remove_expired_entries(current_time); + + return longest_pause_internal(current_time); +} + +double G1MMUTrackerQueue::longest_pause_internal(double current_time) { + double target_time = _max_gc_time; + + while( 1 ) { + double gc_time = + calculate_gc_time(current_time + target_time) + _conc_overhead_time_sec; + double diff = target_time + gc_time - _max_gc_time; + if (!is_double_leq_0(diff)) { + target_time -= diff; + if (is_double_leq_0(target_time)) { + target_time = -1.0; + break; + } + } else { + break; + } + } + + return target_time; +} + +// basically the _internal call does not remove expired entries +// this is for trying things out in the future and a couple +// of other places (debugging) + +double G1MMUTrackerQueue::when_sec(double current_time, double pause_time) { + if (_DISABLE_MMU) + return 0.0; + + MutexLockerEx x(MMUTracker_lock, Mutex::_no_safepoint_check_flag); + remove_expired_entries(current_time); + + return when_internal(current_time, pause_time); +} + +double G1MMUTrackerQueue::when_internal(double current_time, + double pause_time) { + // if the pause is over the maximum, just assume that it's the maximum + double adjusted_pause_time = + (pause_time > max_gc_time()) ? max_gc_time() : pause_time; + double earliest_end = current_time + adjusted_pause_time; + double limit = earliest_end - _time_slice; + double gc_time = calculate_gc_time(earliest_end); + double diff = gc_time + adjusted_pause_time - max_gc_time(); + if (is_double_leq_0(diff)) + return 0.0; + + int index = _tail_index; + while ( 1 ) { + G1MMUTrackerQueueElem *elem = &_array[index]; + if (elem->end_time() > limit) { + if (elem->start_time() > limit) + diff -= elem->duration(); + else + diff -= elem->end_time() - limit; + if (is_double_leq_0(diff)) + return elem->end_time() + diff + _time_slice - adjusted_pause_time - current_time; + } + index = trim_index(index+1); + guarantee(index != trim_index(_head_index + 1), "should not go past head"); + } +} diff --git a/hotspot/src/share/vm/gc_implementation/g1/g1MMUTracker.hpp b/hotspot/src/share/vm/gc_implementation/g1/g1MMUTracker.hpp new file mode 100644 index 00000000000..88a3707626b --- /dev/null +++ b/hotspot/src/share/vm/gc_implementation/g1/g1MMUTracker.hpp @@ -0,0 +1,130 @@ +/* + * Copyright 2001-2007 Sun Microsystems, Inc. All Rights Reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara, + * CA 95054 USA or visit www.sun.com if you need additional information or + * have any questions. + * + */ + +// Keeps track of the GC work and decides when it is OK to do GC work +// and for how long so that the MMU invariants are maintained. + +/***** ALL TIMES ARE IN SECS!!!!!!! *****/ + +// this is the "interface" +class G1MMUTracker { +protected: + double _time_slice; + double _max_gc_time; // this is per time slice + + double _conc_overhead_time_sec; + +public: + G1MMUTracker(double time_slice, double max_gc_time); + + void update_conc_overhead(double conc_overhead); + + virtual void add_pause(double start, double end, bool gc_thread) = 0; + virtual double longest_pause(double current_time) = 0; + virtual double when_sec(double current_time, double pause_time) = 0; + + double max_gc_time() { + return _max_gc_time - _conc_overhead_time_sec; + } + + inline bool now_max_gc(double current_time) { + return when_sec(current_time, max_gc_time()) < 0.00001; + } + + inline double when_max_gc_sec(double current_time) { + return when_sec(current_time, max_gc_time()); + } + + inline jlong when_max_gc_ms(double current_time) { + double when = when_max_gc_sec(current_time); + return (jlong) (when * 1000.0); + } + + inline jlong when_ms(double current_time, double pause_time) { + double when = when_sec(current_time, pause_time); + return (jlong) (when * 1000.0); + } +}; + +class G1MMUTrackerQueueElem { +private: + double _start_time; + double _end_time; + +public: + inline double start_time() { return _start_time; } + inline double end_time() { return _end_time; } + inline double duration() { return _end_time - _start_time; } + + G1MMUTrackerQueueElem() { + _start_time = 0.0; + _end_time = 0.0; + } + + G1MMUTrackerQueueElem(double start_time, double end_time) { + _start_time = start_time; + _end_time = end_time; + } +}; + +// this is an implementation of the MMUTracker using a (fixed-size) queue +// that keeps track of all the recent pause times +class G1MMUTrackerQueue: public G1MMUTracker { +private: + enum PrivateConstants { + QueueLength = 64 + }; + + // The array keeps track of all the pauses that fall within a time + // slice (the last time slice during which pauses took place). + // The data structure implemented is a circular queue. + // Head "points" to the most recent addition, tail to the oldest one. + // The array is of fixed size and I don't think we'll need more than + // two or three entries with the current behaviour of G1 pauses. + // If the array is full, an easy fix is to look for the pauses with + // the shortest gap between them and concolidate them. + + G1MMUTrackerQueueElem _array[QueueLength]; + int _head_index; + int _tail_index; + int _no_entries; + + inline int trim_index(int index) { + return (index + QueueLength) % QueueLength; + } + + void remove_expired_entries(double current_time); + double calculate_gc_time(double current_time); + + double longest_pause_internal(double current_time); + double when_internal(double current_time, double pause_time); + +public: + G1MMUTrackerQueue(double time_slice, double max_gc_time); + + virtual void add_pause(double start, double end, bool gc_thread); + + virtual double longest_pause(double current_time); + virtual double when_sec(double current_time, double pause_time); +}; diff --git a/hotspot/src/share/vm/gc_implementation/g1/g1MarkSweep.cpp b/hotspot/src/share/vm/gc_implementation/g1/g1MarkSweep.cpp new file mode 100644 index 00000000000..3c5eccb4a7b --- /dev/null +++ b/hotspot/src/share/vm/gc_implementation/g1/g1MarkSweep.cpp @@ -0,0 +1,385 @@ +/* + * Copyright 2001-2007 Sun Microsystems, Inc. All Rights Reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara, + * CA 95054 USA or visit www.sun.com if you need additional information or + * have any questions. + * + */ + +#include "incls/_precompiled.incl" +#include "incls/_g1MarkSweep.cpp.incl" + +class HeapRegion; + +void G1MarkSweep::invoke_at_safepoint(ReferenceProcessor* rp, + bool clear_all_softrefs) { + assert(SafepointSynchronize::is_at_safepoint(), "must be at a safepoint"); + + // hook up weak ref data so it can be used during Mark-Sweep + assert(GenMarkSweep::ref_processor() == NULL, "no stomping"); + GenMarkSweep::_ref_processor = rp; + assert(rp != NULL, "should be non-NULL"); + + // When collecting the permanent generation methodOops may be moving, + // so we either have to flush all bcp data or convert it into bci. + CodeCache::gc_prologue(); + Threads::gc_prologue(); + + // Increment the invocation count for the permanent generation, since it is + // implicitly collected whenever we do a full mark sweep collection. + SharedHeap* sh = SharedHeap::heap(); + sh->perm_gen()->stat_record()->invocations++; + + bool marked_for_unloading = false; + + allocate_stacks(); + + // We should save the marks of the currently locked biased monitors. + // The marking doesn't preserve the marks of biased objects. + BiasedLocking::preserve_marks(); + + mark_sweep_phase1(marked_for_unloading, clear_all_softrefs); + + if (G1VerifyConcMark) { + G1CollectedHeap* g1h = G1CollectedHeap::heap(); + g1h->checkConcurrentMark(); + } + + mark_sweep_phase2(); + + // Don't add any more derived pointers during phase3 + COMPILER2_PRESENT(DerivedPointerTable::set_active(false)); + + mark_sweep_phase3(); + + mark_sweep_phase4(); + + GenMarkSweep::restore_marks(); + BiasedLocking::restore_marks(); + GenMarkSweep::deallocate_stacks(); + + // We must invalidate the perm-gen rs, so that it gets rebuilt. + GenRemSet* rs = sh->rem_set(); + rs->invalidate(sh->perm_gen()->used_region(), true /*whole_heap*/); + + // "free at last gc" is calculated from these. + // CHF: cheating for now!!! + // Universe::set_heap_capacity_at_last_gc(Universe::heap()->capacity()); + // Universe::set_heap_used_at_last_gc(Universe::heap()->used()); + + Threads::gc_epilogue(); + CodeCache::gc_epilogue(); + + // refs processing: clean slate + GenMarkSweep::_ref_processor = NULL; +} + + +void G1MarkSweep::allocate_stacks() { + GenMarkSweep::_preserved_count_max = 0; + GenMarkSweep::_preserved_marks = NULL; + GenMarkSweep::_preserved_count = 0; + GenMarkSweep::_preserved_mark_stack = NULL; + GenMarkSweep::_preserved_oop_stack = NULL; + + GenMarkSweep::_marking_stack = + new (ResourceObj::C_HEAP) GrowableArray(4000, true); + + size_t size = SystemDictionary::number_of_classes() * 2; + GenMarkSweep::_revisit_klass_stack = + new (ResourceObj::C_HEAP) GrowableArray((int)size, true); +} + +void G1MarkSweep::mark_sweep_phase1(bool& marked_for_unloading, + bool clear_all_softrefs) { + // Recursively traverse all live objects and mark them + EventMark m("1 mark object"); + TraceTime tm("phase 1", PrintGC && Verbose, true, gclog_or_tty); + GenMarkSweep::trace(" 1"); + + SharedHeap* sh = SharedHeap::heap(); + + sh->process_strong_roots(true, // Collecting permanent generation. + SharedHeap::SO_SystemClasses, + &GenMarkSweep::follow_root_closure, + &GenMarkSweep::follow_root_closure); + + // Process reference objects found during marking + ReferencePolicy *soft_ref_policy; + if (clear_all_softrefs) { + soft_ref_policy = new AlwaysClearPolicy(); + } else { +#ifdef COMPILER2 + soft_ref_policy = new LRUMaxHeapPolicy(); +#else + soft_ref_policy = new LRUCurrentHeapPolicy(); +#endif + } + assert(soft_ref_policy != NULL,"No soft reference policy"); + GenMarkSweep::ref_processor()->process_discovered_references( + soft_ref_policy, + &GenMarkSweep::is_alive, + &GenMarkSweep::keep_alive, + &GenMarkSweep::follow_stack_closure, + NULL); + + // Follow system dictionary roots and unload classes + bool purged_class = SystemDictionary::do_unloading(&GenMarkSweep::is_alive); + assert(GenMarkSweep::_marking_stack->is_empty(), + "stack should be empty by now"); + + // Follow code cache roots (has to be done after system dictionary, + // assumes all live klasses are marked) + CodeCache::do_unloading(&GenMarkSweep::is_alive, + &GenMarkSweep::keep_alive, + purged_class); + GenMarkSweep::follow_stack(); + + // Update subklass/sibling/implementor links of live klasses + GenMarkSweep::follow_weak_klass_links(); + assert(GenMarkSweep::_marking_stack->is_empty(), + "stack should be empty by now"); + + // Visit symbol and interned string tables and delete unmarked oops + SymbolTable::unlink(&GenMarkSweep::is_alive); + StringTable::unlink(&GenMarkSweep::is_alive); + + assert(GenMarkSweep::_marking_stack->is_empty(), + "stack should be empty by now"); +} + +class G1PrepareCompactClosure: public HeapRegionClosure { + ModRefBarrierSet* _mrbs; + CompactPoint _cp; + bool _popular_only; + + void free_humongous_region(HeapRegion* hr) { + HeapWord* bot = hr->bottom(); + HeapWord* end = hr->end(); + assert(hr->startsHumongous(), + "Only the start of a humongous region should be freed."); + G1CollectedHeap::heap()->free_region(hr); + hr->prepare_for_compaction(&_cp); + // Also clear the part of the card table that will be unused after + // compaction. + _mrbs->clear(MemRegion(hr->compaction_top(), hr->end())); + } + +public: + G1PrepareCompactClosure(CompactibleSpace* cs, bool popular_only) : + _cp(NULL, cs, cs->initialize_threshold()), + _mrbs(G1CollectedHeap::heap()->mr_bs()), + _popular_only(popular_only) + {} + bool doHeapRegion(HeapRegion* hr) { + if (_popular_only && !hr->popular()) + return true; // terminate early + else if (!_popular_only && hr->popular()) + return false; // skip this one. + + if (hr->isHumongous()) { + if (hr->startsHumongous()) { + oop obj = oop(hr->bottom()); + if (obj->is_gc_marked()) { + obj->forward_to(obj); + } else { + free_humongous_region(hr); + } + } else { + assert(hr->continuesHumongous(), "Invalid humongous."); + } + } else { + hr->prepare_for_compaction(&_cp); + // Also clear the part of the card table that will be unused after + // compaction. + _mrbs->clear(MemRegion(hr->compaction_top(), hr->end())); + } + return false; + } +}; +// Stolen verbatim from g1CollectedHeap.cpp +class FindFirstRegionClosure: public HeapRegionClosure { + HeapRegion* _a_region; + bool _find_popular; +public: + FindFirstRegionClosure(bool find_popular) : + _a_region(NULL), _find_popular(find_popular) {} + bool doHeapRegion(HeapRegion* r) { + if (r->popular() == _find_popular) { + _a_region = r; + return true; + } else { + return false; + } + } + HeapRegion* result() { return _a_region; } +}; + +void G1MarkSweep::mark_sweep_phase2() { + // Now all live objects are marked, compute the new object addresses. + + // It is imperative that we traverse perm_gen LAST. If dead space is + // allowed a range of dead object may get overwritten by a dead int + // array. If perm_gen is not traversed last a klassOop may get + // overwritten. This is fine since it is dead, but if the class has dead + // instances we have to skip them, and in order to find their size we + // need the klassOop! + // + // It is not required that we traverse spaces in the same order in + // phase2, phase3 and phase4, but the ValidateMarkSweep live oops + // tracking expects us to do so. See comment under phase4. + + G1CollectedHeap* g1h = G1CollectedHeap::heap(); + Generation* pg = g1h->perm_gen(); + + EventMark m("2 compute new addresses"); + TraceTime tm("phase 2", PrintGC && Verbose, true, gclog_or_tty); + GenMarkSweep::trace("2"); + + // First we compact the popular regions. + if (G1NumPopularRegions > 0) { + CompactibleSpace* sp = g1h->first_compactible_space(); + FindFirstRegionClosure cl(true /*find_popular*/); + g1h->heap_region_iterate(&cl); + HeapRegion *r = cl.result(); + assert(r->popular(), "should have found a popular region."); + assert(r == sp, "first popular heap region should " + "== first compactible space"); + G1PrepareCompactClosure blk(sp, true/*popular_only*/); + g1h->heap_region_iterate(&blk); + } + + // Now we do the regular regions. + FindFirstRegionClosure cl(false /*find_popular*/); + g1h->heap_region_iterate(&cl); + HeapRegion *r = cl.result(); + assert(!r->popular(), "should have founda non-popular region."); + CompactibleSpace* sp = r; + if (r->isHumongous() && oop(r->bottom())->is_gc_marked()) { + sp = r->next_compaction_space(); + } + + G1PrepareCompactClosure blk(sp, false/*popular_only*/); + g1h->heap_region_iterate(&blk); + + CompactPoint perm_cp(pg, NULL, NULL); + pg->prepare_for_compaction(&perm_cp); +} + +class G1AdjustPointersClosure: public HeapRegionClosure { + public: + bool doHeapRegion(HeapRegion* r) { + if (r->isHumongous()) { + if (r->startsHumongous()) { + // We must adjust the pointers on the single H object. + oop obj = oop(r->bottom()); + debug_only(GenMarkSweep::track_interior_pointers(obj)); + // point all the oops to the new location + obj->adjust_pointers(); + debug_only(GenMarkSweep::check_interior_pointers()); + } + } else { + // This really ought to be "as_CompactibleSpace"... + r->adjust_pointers(); + } + return false; + } +}; + +void G1MarkSweep::mark_sweep_phase3() { + G1CollectedHeap* g1h = G1CollectedHeap::heap(); + Generation* pg = g1h->perm_gen(); + + // Adjust the pointers to reflect the new locations + EventMark m("3 adjust pointers"); + TraceTime tm("phase 3", PrintGC && Verbose, true, gclog_or_tty); + GenMarkSweep::trace("3"); + + SharedHeap* sh = SharedHeap::heap(); + + sh->process_strong_roots(true, // Collecting permanent generation. + SharedHeap::SO_AllClasses, + &GenMarkSweep::adjust_root_pointer_closure, + &GenMarkSweep::adjust_pointer_closure); + + g1h->ref_processor()->weak_oops_do(&GenMarkSweep::adjust_root_pointer_closure); + + // Now adjust pointers in remaining weak roots. (All of which should + // have been cleared if they pointed to non-surviving objects.) + g1h->g1_process_weak_roots(&GenMarkSweep::adjust_root_pointer_closure, + &GenMarkSweep::adjust_pointer_closure); + + GenMarkSweep::adjust_marks(); + + G1AdjustPointersClosure blk; + g1h->heap_region_iterate(&blk); + pg->adjust_pointers(); +} + +class G1SpaceCompactClosure: public HeapRegionClosure { +public: + G1SpaceCompactClosure() {} + + bool doHeapRegion(HeapRegion* hr) { + if (hr->isHumongous()) { + if (hr->startsHumongous()) { + oop obj = oop(hr->bottom()); + if (obj->is_gc_marked()) { + obj->init_mark(); + } else { + assert(hr->is_empty(), "Should have been cleared in phase 2."); + } + hr->reset_during_compaction(); + } + } else { + hr->compact(); + } + return false; + } +}; + +void G1MarkSweep::mark_sweep_phase4() { + // All pointers are now adjusted, move objects accordingly + + // It is imperative that we traverse perm_gen first in phase4. All + // classes must be allocated earlier than their instances, and traversing + // perm_gen first makes sure that all klassOops have moved to their new + // location before any instance does a dispatch through it's klass! + + // The ValidateMarkSweep live oops tracking expects us to traverse spaces + // in the same order in phase2, phase3 and phase4. We don't quite do that + // here (perm_gen first rather than last), so we tell the validate code + // to use a higher index (saved from phase2) when verifying perm_gen. + G1CollectedHeap* g1h = G1CollectedHeap::heap(); + Generation* pg = g1h->perm_gen(); + + EventMark m("4 compact heap"); + TraceTime tm("phase 4", PrintGC && Verbose, true, gclog_or_tty); + GenMarkSweep::trace("4"); + + pg->compact(); + + G1SpaceCompactClosure blk; + g1h->heap_region_iterate(&blk); + +} + +// Local Variables: *** +// c-indentation-style: gnu *** +// End: *** diff --git a/hotspot/src/share/vm/gc_implementation/g1/g1MarkSweep.hpp b/hotspot/src/share/vm/gc_implementation/g1/g1MarkSweep.hpp new file mode 100644 index 00000000000..a0bd370bf01 --- /dev/null +++ b/hotspot/src/share/vm/gc_implementation/g1/g1MarkSweep.hpp @@ -0,0 +1,57 @@ +/* + * Copyright 2001-2007 Sun Microsystems, Inc. All Rights Reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara, + * CA 95054 USA or visit www.sun.com if you need additional information or + * have any questions. + * + */ + +class ReferenceProcessor; + +// G1MarkSweep takes care of global mark-compact garbage collection for a +// G1CollectedHeap using a four-phase pointer forwarding algorithm. All +// generations are assumed to support marking; those that can also support +// compaction. +// +// Class unloading will only occur when a full gc is invoked. + + +class G1MarkSweep : AllStatic { + friend class VM_G1MarkSweep; + friend class Scavenge; + + public: + + static void invoke_at_safepoint(ReferenceProcessor* rp, + bool clear_all_softrefs); + + private: + + // Mark live objects + static void mark_sweep_phase1(bool& marked_for_deopt, + bool clear_all_softrefs); + // Calculate new addresses + static void mark_sweep_phase2(); + // Update pointers + static void mark_sweep_phase3(); + // Move objects to new positions + static void mark_sweep_phase4(); + + static void allocate_stacks(); +}; diff --git a/hotspot/src/share/vm/gc_implementation/g1/g1OopClosures.hpp b/hotspot/src/share/vm/gc_implementation/g1/g1OopClosures.hpp new file mode 100644 index 00000000000..58653196a36 --- /dev/null +++ b/hotspot/src/share/vm/gc_implementation/g1/g1OopClosures.hpp @@ -0,0 +1,202 @@ +/* + * Copyright 2001-2007 Sun Microsystems, Inc. All Rights Reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara, + * CA 95054 USA or visit www.sun.com if you need additional information or + * have any questions. + * + */ + +class HeapRegion; +class G1CollectedHeap; +class G1RemSet; +class HRInto_G1RemSet; +class G1RemSet; +class ConcurrentMark; +class DirtyCardToOopClosure; +class CMBitMap; +class CMMarkStack; +class G1ParScanThreadState; + +// A class that scans oops in a given heap region (much as OopsInGenClosure +// scans oops in a generation.) +class OopsInHeapRegionClosure: public OopsInGenClosure { +protected: + HeapRegion* _from; +public: + virtual void set_region(HeapRegion* from) { _from = from; } +}; + + +class G1ScanAndBalanceClosure : public OopClosure { + G1CollectedHeap* _g1; + static int _nq; +public: + G1ScanAndBalanceClosure(G1CollectedHeap* g1) : _g1(g1) { } + inline void do_oop_nv(oop* p); + inline void do_oop_nv(narrowOop* p) { guarantee(false, "NYI"); } + virtual void do_oop(oop* p); + virtual void do_oop(narrowOop* p) { guarantee(false, "NYI"); } +}; + +class G1ParClosureSuper : public OopsInHeapRegionClosure { +protected: + G1CollectedHeap* _g1; + G1RemSet* _g1_rem; + ConcurrentMark* _cm; + G1ParScanThreadState* _par_scan_state; +public: + G1ParClosureSuper(G1CollectedHeap* g1, G1ParScanThreadState* par_scan_state); + bool apply_to_weak_ref_discovered_field() { return true; } +}; + +class G1ParScanClosure : public G1ParClosureSuper { +public: + G1ParScanClosure(G1CollectedHeap* g1, G1ParScanThreadState* par_scan_state) : + G1ParClosureSuper(g1, par_scan_state) { } + void do_oop_nv(oop* p); // should be made inline + inline void do_oop_nv(narrowOop* p) { guarantee(false, "NYI"); } + virtual void do_oop(oop* p) { do_oop_nv(p); } + virtual void do_oop(narrowOop* p) { do_oop_nv(p); } +}; + +#define G1_PARTIAL_ARRAY_MASK 1 + +class G1ParScanPartialArrayClosure : public G1ParClosureSuper { + G1ParScanClosure _scanner; + template void process_array_chunk(oop obj, int start, int end); +public: + G1ParScanPartialArrayClosure(G1CollectedHeap* g1, G1ParScanThreadState* par_scan_state) : + G1ParClosureSuper(g1, par_scan_state), _scanner(g1, par_scan_state) { } + void do_oop_nv(oop* p); + void do_oop_nv(narrowOop* p) { guarantee(false, "NYI"); } + virtual void do_oop(oop* p) { do_oop_nv(p); } + virtual void do_oop(narrowOop* p) { do_oop_nv(p); } +}; + + +class G1ParCopyHelper : public G1ParClosureSuper { + G1ParScanClosure *_scanner; +protected: + void mark_forwardee(oop* p); + oop copy_to_survivor_space(oop obj); +public: + G1ParCopyHelper(G1CollectedHeap* g1, G1ParScanThreadState* par_scan_state, + G1ParScanClosure *scanner) : + G1ParClosureSuper(g1, par_scan_state), _scanner(scanner) { } +}; + +template +class G1ParCopyClosure : public G1ParCopyHelper { + G1ParScanClosure _scanner; + void do_oop_work(oop* p); + void do_oop_work(narrowOop* p) { guarantee(false, "NYI"); } +public: + G1ParCopyClosure(G1CollectedHeap* g1, G1ParScanThreadState* par_scan_state) : + _scanner(g1, par_scan_state), G1ParCopyHelper(g1, par_scan_state, &_scanner) { } + inline void do_oop_nv(oop* p) { + do_oop_work(p); + if (do_mark_forwardee) + mark_forwardee(p); + } + inline void do_oop_nv(narrowOop* p) { guarantee(false, "NYI"); } + virtual void do_oop(oop* p) { do_oop_nv(p); } + virtual void do_oop(narrowOop* p) { do_oop_nv(p); } +}; + +typedef G1ParCopyClosure G1ParScanExtRootClosure; +typedef G1ParCopyClosure G1ParScanPermClosure; +typedef G1ParCopyClosure G1ParScanAndMarkExtRootClosure; +typedef G1ParCopyClosure G1ParScanAndMarkPermClosure; +typedef G1ParCopyClosure G1ParScanHeapRSClosure; +typedef G1ParCopyClosure G1ParScanAndMarkHeapRSClosure; +typedef G1ParCopyClosure G1ParScanHeapEvacClosure; + + +class FilterIntoCSClosure: public OopClosure { + G1CollectedHeap* _g1; + OopClosure* _oc; + DirtyCardToOopClosure* _dcto_cl; +public: + FilterIntoCSClosure( DirtyCardToOopClosure* dcto_cl, + G1CollectedHeap* g1, OopClosure* oc) : + _dcto_cl(dcto_cl), _g1(g1), _oc(oc) + {} + inline void do_oop_nv(oop* p); + inline void do_oop_nv(narrowOop* p) { guarantee(false, "NYI"); } + virtual void do_oop(oop* p); + virtual void do_oop(narrowOop* p) { guarantee(false, "NYI"); } + bool apply_to_weak_ref_discovered_field() { return true; } + bool do_header() { return false; } +}; + +class FilterInHeapRegionAndIntoCSClosure : public OopsInHeapRegionClosure { + G1CollectedHeap* _g1; + OopsInHeapRegionClosure* _oc; +public: + FilterInHeapRegionAndIntoCSClosure(G1CollectedHeap* g1, + OopsInHeapRegionClosure* oc) : + _g1(g1), _oc(oc) + {} + inline void do_oop_nv(oop* p); + inline void do_oop_nv(narrowOop* p) { guarantee(false, "NYI"); } + virtual void do_oop(oop* p); + virtual void do_oop(narrowOop* p) { guarantee(false, "NYI"); } + bool apply_to_weak_ref_discovered_field() { return true; } + bool do_header() { return false; } + void set_region(HeapRegion* from) { + _oc->set_region(from); + } +}; + +class FilterAndMarkInHeapRegionAndIntoCSClosure : public OopsInHeapRegionClosure { + G1CollectedHeap* _g1; + ConcurrentMark* _cm; + OopsInHeapRegionClosure* _oc; +public: + FilterAndMarkInHeapRegionAndIntoCSClosure(G1CollectedHeap* g1, + OopsInHeapRegionClosure* oc, + ConcurrentMark* cm) + : _g1(g1), _oc(oc), _cm(cm) { } + + inline void do_oop_nv(oop* p); + inline void do_oop_nv(narrowOop* p) { guarantee(false, "NYI"); } + virtual void do_oop(oop* p); + virtual void do_oop(narrowOop* p) { guarantee(false, "NYI"); } + bool apply_to_weak_ref_discovered_field() { return true; } + bool do_header() { return false; } + void set_region(HeapRegion* from) { + _oc->set_region(from); + } +}; + +class FilterOutOfRegionClosure: public OopClosure { + HeapWord* _r_bottom; + HeapWord* _r_end; + OopClosure* _oc; + int _out_of_region; +public: + FilterOutOfRegionClosure(HeapRegion* r, OopClosure* oc); + inline void do_oop_nv(oop* p); + inline void do_oop_nv(narrowOop* p) { guarantee(false, "NYI"); } + virtual void do_oop(oop* p); + virtual void do_oop(narrowOop* p) { guarantee(false, "NYI"); } + bool apply_to_weak_ref_discovered_field() { return true; } + bool do_header() { return false; } + int out_of_region() { return _out_of_region; } +}; diff --git a/hotspot/src/share/vm/gc_implementation/g1/g1OopClosures.inline.hpp b/hotspot/src/share/vm/gc_implementation/g1/g1OopClosures.inline.hpp new file mode 100644 index 00000000000..fdca16083fc --- /dev/null +++ b/hotspot/src/share/vm/gc_implementation/g1/g1OopClosures.inline.hpp @@ -0,0 +1,112 @@ +/* + * Copyright 2001-2007 Sun Microsystems, Inc. All Rights Reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara, + * CA 95054 USA or visit www.sun.com if you need additional information or + * have any questions. + * + */ + +/* + * This really ought to be an inline function, but apparently the C++ + * compiler sometimes sees fit to ignore inline declarations. Sigh. + */ + +// This must a ifdef'ed because the counting it controls is in a +// perf-critical inner loop. +#define FILTERINTOCSCLOSURE_DOHISTOGRAMCOUNT 0 + +inline void FilterIntoCSClosure::do_oop_nv(oop* p) { + oop obj = *p; + if (obj != NULL && _g1->obj_in_cs(obj)) { + _oc->do_oop(p); +#if FILTERINTOCSCLOSURE_DOHISTOGRAMCOUNT + _dcto_cl->incr_count(); +#endif + } +} + +inline void FilterIntoCSClosure::do_oop(oop* p) +{ + do_oop_nv(p); +} + +#define FILTEROUTOFREGIONCLOSURE_DOHISTOGRAMCOUNT 0 + +inline void FilterOutOfRegionClosure::do_oop_nv(oop* p) { + oop obj = *p; + HeapWord* obj_hw = (HeapWord*)obj; + if (obj_hw != NULL && (obj_hw < _r_bottom || obj_hw >= _r_end)) { + _oc->do_oop(p); +#if FILTEROUTOFREGIONCLOSURE_DOHISTOGRAMCOUNT + _out_of_region++; +#endif + } +} + +inline void FilterOutOfRegionClosure::do_oop(oop* p) +{ + do_oop_nv(p); +} + +inline void FilterInHeapRegionAndIntoCSClosure::do_oop_nv(oop* p) { + oop obj = *p; + if (obj != NULL && _g1->obj_in_cs(obj)) + _oc->do_oop(p); +} + +inline void FilterInHeapRegionAndIntoCSClosure::do_oop(oop* p) +{ + do_oop_nv(p); +} + + +inline void FilterAndMarkInHeapRegionAndIntoCSClosure::do_oop_nv(oop* p) { + oop obj = *p; + if (obj != NULL) { + HeapRegion* hr = _g1->heap_region_containing((HeapWord*) obj); + if (hr != NULL) { + if (hr->in_collection_set()) + _oc->do_oop(p); + else if (!hr->is_young()) + _cm->grayRoot(obj); + } + } +} + +inline void FilterAndMarkInHeapRegionAndIntoCSClosure::do_oop(oop* p) +{ + do_oop_nv(p); +} + +inline void G1ScanAndBalanceClosure::do_oop_nv(oop* p) { + RefToScanQueue* q; + if (ParallelGCThreads > 0) { + // Deal the work out equally. + _nq = (_nq + 1) % ParallelGCThreads; + q = _g1->task_queue(_nq); + } else { + q = _g1->task_queue(0); + } + bool nooverflow = q->push(p); + guarantee(nooverflow, "Overflow during poplularity region processing"); +} + +inline void G1ScanAndBalanceClosure::do_oop(oop* p) { + do_oop_nv(p); +} diff --git a/hotspot/src/share/vm/gc_implementation/g1/g1RemSet.cpp b/hotspot/src/share/vm/gc_implementation/g1/g1RemSet.cpp new file mode 100644 index 00000000000..462be9a689e --- /dev/null +++ b/hotspot/src/share/vm/gc_implementation/g1/g1RemSet.cpp @@ -0,0 +1,1003 @@ +/* + * Copyright 2001-2007 Sun Microsystems, Inc. All Rights Reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara, + * CA 95054 USA or visit www.sun.com if you need additional information or + * have any questions. + * + */ + +#include "incls/_precompiled.incl" +#include "incls/_g1RemSet.cpp.incl" + +#define CARD_REPEAT_HISTO 0 + +#if CARD_REPEAT_HISTO +static size_t ct_freq_sz; +static jbyte* ct_freq = NULL; + +void init_ct_freq_table(size_t heap_sz_bytes) { + if (ct_freq == NULL) { + ct_freq_sz = heap_sz_bytes/CardTableModRefBS::card_size; + ct_freq = new jbyte[ct_freq_sz]; + for (size_t j = 0; j < ct_freq_sz; j++) ct_freq[j] = 0; + } +} + +void ct_freq_note_card(size_t index) { + assert(0 <= index && index < ct_freq_sz, "Bounds error."); + if (ct_freq[index] < 100) { ct_freq[index]++; } +} + +static IntHistogram card_repeat_count(10, 10); + +void ct_freq_update_histo_and_reset() { + for (size_t j = 0; j < ct_freq_sz; j++) { + card_repeat_count.add_entry(ct_freq[j]); + ct_freq[j] = 0; + } + +} +#endif + + +class IntoCSOopClosure: public OopsInHeapRegionClosure { + OopsInHeapRegionClosure* _blk; + G1CollectedHeap* _g1; +public: + IntoCSOopClosure(G1CollectedHeap* g1, OopsInHeapRegionClosure* blk) : + _g1(g1), _blk(blk) {} + void set_region(HeapRegion* from) { + _blk->set_region(from); + } + virtual void do_oop(narrowOop* p) { + guarantee(false, "NYI"); + } + virtual void do_oop(oop* p) { + oop obj = *p; + if (_g1->obj_in_cs(obj)) _blk->do_oop(p); + } + bool apply_to_weak_ref_discovered_field() { return true; } + bool idempotent() { return true; } +}; + +class IntoCSRegionClosure: public HeapRegionClosure { + IntoCSOopClosure _blk; + G1CollectedHeap* _g1; +public: + IntoCSRegionClosure(G1CollectedHeap* g1, OopsInHeapRegionClosure* blk) : + _g1(g1), _blk(g1, blk) {} + bool doHeapRegion(HeapRegion* r) { + if (!r->in_collection_set()) { + _blk.set_region(r); + if (r->isHumongous()) { + if (r->startsHumongous()) { + oop obj = oop(r->bottom()); + obj->oop_iterate(&_blk); + } + } else { + r->oop_before_save_marks_iterate(&_blk); + } + } + return false; + } +}; + +void +StupidG1RemSet::oops_into_collection_set_do(OopsInHeapRegionClosure* oc, + int worker_i) { + IntoCSRegionClosure rc(_g1, oc); + _g1->heap_region_iterate(&rc); +} + +class UpdateRSOopClosure: public OopClosure { + HeapRegion* _from; + HRInto_G1RemSet* _rs; + int _worker_i; +public: + UpdateRSOopClosure(HRInto_G1RemSet* rs, int worker_i = 0) : + _from(NULL), _rs(rs), _worker_i(worker_i) { + guarantee(_rs != NULL, "Requires an HRIntoG1RemSet"); + } + + void set_from(HeapRegion* from) { + assert(from != NULL, "from region must be non-NULL"); + _from = from; + } + + virtual void do_oop(narrowOop* p) { + guarantee(false, "NYI"); + } + virtual void do_oop(oop* p) { + assert(_from != NULL, "from region must be non-NULL"); + _rs->par_write_ref(_from, p, _worker_i); + } + // Override: this closure is idempotent. + // bool idempotent() { return true; } + bool apply_to_weak_ref_discovered_field() { return true; } +}; + +class UpdateRSOutOfRegionClosure: public HeapRegionClosure { + G1CollectedHeap* _g1h; + ModRefBarrierSet* _mr_bs; + UpdateRSOopClosure _cl; + int _worker_i; +public: + UpdateRSOutOfRegionClosure(G1CollectedHeap* g1, int worker_i = 0) : + _cl(g1->g1_rem_set()->as_HRInto_G1RemSet(), worker_i), + _mr_bs(g1->mr_bs()), + _worker_i(worker_i), + _g1h(g1) + {} + bool doHeapRegion(HeapRegion* r) { + if (!r->in_collection_set() && !r->continuesHumongous()) { + _cl.set_from(r); + r->set_next_filter_kind(HeapRegionDCTOC::OutOfRegionFilterKind); + _mr_bs->mod_oop_in_space_iterate(r, &_cl, true, true); + } + return false; + } +}; + +class VerifyRSCleanCardOopClosure: public OopClosure { + G1CollectedHeap* _g1; +public: + VerifyRSCleanCardOopClosure(G1CollectedHeap* g1) : _g1(g1) {} + + virtual void do_oop(narrowOop* p) { + guarantee(false, "NYI"); + } + virtual void do_oop(oop* p) { + oop obj = *p; + HeapRegion* to = _g1->heap_region_containing(obj); + guarantee(to == NULL || !to->in_collection_set(), + "Missed a rem set member."); + } +}; + +HRInto_G1RemSet::HRInto_G1RemSet(G1CollectedHeap* g1, CardTableModRefBS* ct_bs) + : G1RemSet(g1), _ct_bs(ct_bs), _g1p(_g1->g1_policy()), + _cg1r(g1->concurrent_g1_refine()), + _par_traversal_in_progress(false), _new_refs(NULL), + _cards_scanned(NULL), _total_cards_scanned(0) +{ + _seq_task = new SubTasksDone(NumSeqTasks); + _new_refs = NEW_C_HEAP_ARRAY(GrowableArray*, ParallelGCThreads); +} + +HRInto_G1RemSet::~HRInto_G1RemSet() { + delete _seq_task; +} + +void CountNonCleanMemRegionClosure::do_MemRegion(MemRegion mr) { + if (_g1->is_in_g1_reserved(mr.start())) { + _n += (int) ((mr.byte_size() / CardTableModRefBS::card_size)); + if (_start_first == NULL) _start_first = mr.start(); + } +} + +class ScanRSClosure : public HeapRegionClosure { + size_t _cards_done, _cards; + G1CollectedHeap* _g1h; + OopsInHeapRegionClosure* _oc; + G1BlockOffsetSharedArray* _bot_shared; + CardTableModRefBS *_ct_bs; + int _worker_i; + bool _try_claimed; +public: + ScanRSClosure(OopsInHeapRegionClosure* oc, int worker_i) : + _oc(oc), + _cards(0), + _cards_done(0), + _worker_i(worker_i), + _try_claimed(false) + { + _g1h = G1CollectedHeap::heap(); + _bot_shared = _g1h->bot_shared(); + _ct_bs = (CardTableModRefBS*) (_g1h->barrier_set()); + } + + void set_try_claimed() { _try_claimed = true; } + + void scanCard(size_t index, HeapRegion *r) { + _cards_done++; + DirtyCardToOopClosure* cl = + r->new_dcto_closure(_oc, + CardTableModRefBS::Precise, + HeapRegionDCTOC::IntoCSFilterKind); + + // Set the "from" region in the closure. + _oc->set_region(r); + HeapWord* card_start = _bot_shared->address_for_index(index); + HeapWord* card_end = card_start + G1BlockOffsetSharedArray::N_words; + Space *sp = SharedHeap::heap()->space_containing(card_start); + MemRegion sm_region; + if (ParallelGCThreads > 0) { + // first find the used area + sm_region = sp->used_region_at_save_marks(); + } else { + // The closure is not idempotent. We shouldn't look at objects + // allocated during the GC. + sm_region = sp->used_region_at_save_marks(); + } + MemRegion mr = sm_region.intersection(MemRegion(card_start,card_end)); + if (!mr.is_empty()) { + cl->do_MemRegion(mr); + } + } + + void printCard(HeapRegion* card_region, size_t card_index, + HeapWord* card_start) { + gclog_or_tty->print_cr("T %d Region [" PTR_FORMAT ", " PTR_FORMAT ") " + "RS names card %p: " + "[" PTR_FORMAT ", " PTR_FORMAT ")", + _worker_i, + card_region->bottom(), card_region->end(), + card_index, + card_start, card_start + G1BlockOffsetSharedArray::N_words); + } + + bool doHeapRegion(HeapRegion* r) { + assert(r->in_collection_set(), "should only be called on elements of CS."); + HeapRegionRemSet* hrrs = r->rem_set(); + if (hrrs->iter_is_complete()) return false; // All done. + if (!_try_claimed && !hrrs->claim_iter()) return false; + // If we didn't return above, then + // _try_claimed || r->claim_iter() + // is true: either we're supposed to work on claimed-but-not-complete + // regions, or we successfully claimed the region. + HeapRegionRemSetIterator* iter = _g1h->rem_set_iterator(_worker_i); + hrrs->init_iterator(iter); + size_t card_index; + while (iter->has_next(card_index)) { + HeapWord* card_start = _g1h->bot_shared()->address_for_index(card_index); + +#if 0 + gclog_or_tty->print("Rem set iteration yielded card [" PTR_FORMAT ", " PTR_FORMAT ").\n", + card_start, card_start + CardTableModRefBS::card_size_in_words); +#endif + + HeapRegion* card_region = _g1h->heap_region_containing(card_start); + assert(card_region != NULL, "Yielding cards not in the heap?"); + _cards++; + + if (!card_region->in_collection_set()) { + // If the card is dirty, then we will scan it during updateRS. + if (!_ct_bs->is_card_claimed(card_index) && + !_ct_bs->is_card_dirty(card_index)) { + assert(_ct_bs->is_card_clean(card_index) || + _ct_bs->is_card_claimed(card_index), + "Card is either dirty, clean, or claimed"); + if (_ct_bs->claim_card(card_index)) + scanCard(card_index, card_region); + } + } + } + hrrs->set_iter_complete(); + return false; + } + // Set all cards back to clean. + void cleanup() {_g1h->cleanUpCardTable();} + size_t cards_done() { return _cards_done;} + size_t cards_looked_up() { return _cards;} +}; + +// We want the parallel threads to start their scanning at +// different collection set regions to avoid contention. +// If we have: +// n collection set regions +// p threads +// Then thread t will start at region t * floor (n/p) + +HeapRegion* HRInto_G1RemSet::calculateStartRegion(int worker_i) { + HeapRegion* result = _g1p->collection_set(); + if (ParallelGCThreads > 0) { + size_t cs_size = _g1p->collection_set_size(); + int n_workers = _g1->workers()->total_workers(); + size_t cs_spans = cs_size / n_workers; + size_t ind = cs_spans * worker_i; + for (size_t i = 0; i < ind; i++) + result = result->next_in_collection_set(); + } + return result; +} + +void HRInto_G1RemSet::scanRS(OopsInHeapRegionClosure* oc, int worker_i) { + double rs_time_start = os::elapsedTime(); + HeapRegion *startRegion = calculateStartRegion(worker_i); + + BufferingOopsInHeapRegionClosure boc(oc); + ScanRSClosure scanRScl(&boc, worker_i); + _g1->collection_set_iterate_from(startRegion, &scanRScl); + scanRScl.set_try_claimed(); + _g1->collection_set_iterate_from(startRegion, &scanRScl); + + boc.done(); + double closure_app_time_sec = boc.closure_app_seconds(); + double scan_rs_time_sec = (os::elapsedTime() - rs_time_start) - + closure_app_time_sec; + double closure_app_time_ms = closure_app_time_sec * 1000.0; + + assert( _cards_scanned != NULL, "invariant" ); + _cards_scanned[worker_i] = scanRScl.cards_done(); + + _g1p->record_scan_rs_start_time(worker_i, rs_time_start * 1000.0); + _g1p->record_scan_rs_time(worker_i, scan_rs_time_sec * 1000.0); + if (ParallelGCThreads > 0) { + // In this case, we called scanNewRefsRS and recorded the corresponding + // time. + double scan_new_refs_time_ms = _g1p->get_scan_new_refs_time(worker_i); + if (scan_new_refs_time_ms > 0.0) { + closure_app_time_ms += scan_new_refs_time_ms; + } + } + _g1p->record_obj_copy_time(worker_i, closure_app_time_ms); +} + +void HRInto_G1RemSet::updateRS(int worker_i) { + ConcurrentG1Refine* cg1r = _g1->concurrent_g1_refine(); + + double start = os::elapsedTime(); + _g1p->record_update_rs_start_time(worker_i, start * 1000.0); + + if (G1RSBarrierUseQueue && !cg1r->do_traversal()) { + // Apply the appropriate closure to all remaining log entries. + _g1->iterate_dirty_card_closure(false, worker_i); + // Now there should be no dirty cards. + if (G1RSLogCheckCardTable) { + CountNonCleanMemRegionClosure cl(_g1); + _ct_bs->mod_card_iterate(&cl); + // XXX This isn't true any more: keeping cards of young regions + // marked dirty broke it. Need some reasonable fix. + guarantee(cl.n() == 0, "Card table should be clean."); + } + } else { + UpdateRSOutOfRegionClosure update_rs(_g1, worker_i); + _g1->heap_region_iterate(&update_rs); + // We did a traversal; no further one is necessary. + if (G1RSBarrierUseQueue) { + assert(cg1r->do_traversal(), "Or we shouldn't have gotten here."); + cg1r->set_pya_cancel(); + } + if (_cg1r->use_cache()) { + _cg1r->clear_and_record_card_counts(); + _cg1r->clear_hot_cache(); + } + } + _g1p->record_update_rs_time(worker_i, (os::elapsedTime() - start) * 1000.0); +} + +#ifndef PRODUCT +class PrintRSClosure : public HeapRegionClosure { + int _count; +public: + PrintRSClosure() : _count(0) {} + bool doHeapRegion(HeapRegion* r) { + HeapRegionRemSet* hrrs = r->rem_set(); + _count += (int) hrrs->occupied(); + if (hrrs->occupied() == 0) { + gclog_or_tty->print("Heap Region [" PTR_FORMAT ", " PTR_FORMAT ") " + "has no remset entries\n", + r->bottom(), r->end()); + } else { + gclog_or_tty->print("Printing rem set for heap region [" PTR_FORMAT ", " PTR_FORMAT ")\n", + r->bottom(), r->end()); + r->print(); + hrrs->print(); + gclog_or_tty->print("\nDone printing rem set\n"); + } + return false; + } + int occupied() {return _count;} +}; +#endif + +class CountRSSizeClosure: public HeapRegionClosure { + size_t _n; + size_t _tot; + size_t _max; + HeapRegion* _max_r; + enum { + N = 20, + MIN = 6 + }; + int _histo[N]; +public: + CountRSSizeClosure() : _n(0), _tot(0), _max(0), _max_r(NULL) { + for (int i = 0; i < N; i++) _histo[i] = 0; + } + bool doHeapRegion(HeapRegion* r) { + if (!r->continuesHumongous()) { + size_t occ = r->rem_set()->occupied(); + _n++; + _tot += occ; + if (occ > _max) { + _max = occ; + _max_r = r; + } + // Fit it into a histo bin. + int s = 1 << MIN; + int i = 0; + while (occ > (size_t) s && i < (N-1)) { + s = s << 1; + i++; + } + _histo[i]++; + } + return false; + } + size_t n() { return _n; } + size_t tot() { return _tot; } + size_t mx() { return _max; } + HeapRegion* mxr() { return _max_r; } + void print_histo() { + int mx = N; + while (mx >= 0) { + if (_histo[mx-1] > 0) break; + mx--; + } + gclog_or_tty->print_cr("Number of regions with given RS sizes:"); + gclog_or_tty->print_cr(" <= %8d %8d", 1 << MIN, _histo[0]); + for (int i = 1; i < mx-1; i++) { + gclog_or_tty->print_cr(" %8d - %8d %8d", + (1 << (MIN + i - 1)) + 1, + 1 << (MIN + i), + _histo[i]); + } + gclog_or_tty->print_cr(" > %8d %8d", (1 << (MIN+mx-2))+1, _histo[mx-1]); + } +}; + +void +HRInto_G1RemSet::scanNewRefsRS(OopsInHeapRegionClosure* oc, + int worker_i) { + double scan_new_refs_start_sec = os::elapsedTime(); + G1CollectedHeap* g1h = G1CollectedHeap::heap(); + CardTableModRefBS* ct_bs = (CardTableModRefBS*) (g1h->barrier_set()); + while (_new_refs[worker_i]->is_nonempty()) { + oop* p = _new_refs[worker_i]->pop(); + oop obj = *p; + // *p was in the collection set when p was pushed on "_new_refs", but + // another thread may have processed this location from an RS, so it + // might not point into the CS any longer. If so, it's obviously been + // processed, and we don't need to do anything further. + if (g1h->obj_in_cs(obj)) { + HeapRegion* r = g1h->heap_region_containing(p); + + DEBUG_ONLY(HeapRegion* to = g1h->heap_region_containing(obj)); + assert(ParallelGCThreads > 1 + || to->rem_set()->contains_reference(p), + "Invariant: pushed after being added." + "(Not reliable in parallel code.)"); + oc->set_region(r); + // If "p" has already been processed concurrently, this is + // idempotent. + oc->do_oop(p); + } + } + _g1p->record_scan_new_refs_time(worker_i, + (os::elapsedTime() - scan_new_refs_start_sec) + * 1000.0); +} + +void HRInto_G1RemSet::set_par_traversal(bool b) { + _par_traversal_in_progress = b; + HeapRegionRemSet::set_par_traversal(b); +} + +void HRInto_G1RemSet::cleanupHRRS() { + HeapRegionRemSet::cleanup(); +} + +void +HRInto_G1RemSet::oops_into_collection_set_do(OopsInHeapRegionClosure* oc, + int worker_i) { +#if CARD_REPEAT_HISTO + ct_freq_update_histo_and_reset(); +#endif + if (worker_i == 0) { + _cg1r->clear_and_record_card_counts(); + } + + // Make this into a command-line flag... + if (G1RSCountHisto && (ParallelGCThreads == 0 || worker_i == 0)) { + CountRSSizeClosure count_cl; + _g1->heap_region_iterate(&count_cl); + gclog_or_tty->print_cr("Avg of %d RS counts is %f, max is %d, " + "max region is " PTR_FORMAT, + count_cl.n(), (float)count_cl.tot()/(float)count_cl.n(), + count_cl.mx(), count_cl.mxr()); + count_cl.print_histo(); + } + + if (ParallelGCThreads > 0) { + // This is a temporary change to serialize the update and scanning + // of remembered sets. There are some race conditions when this is + // done in parallel and they are causing failures. When we resolve + // said race conditions, we'll revert back to parallel remembered + // set updating and scanning. See CRs 6677707 and 6677708. + if (worker_i == 0) { + updateRS(worker_i); + scanNewRefsRS(oc, worker_i); + scanRS(oc, worker_i); + } + } else { + assert(worker_i == 0, "invariant"); + + updateRS(0); + scanRS(oc, 0); + } +} + +void HRInto_G1RemSet:: +prepare_for_oops_into_collection_set_do() { +#if G1_REM_SET_LOGGING + PrintRSClosure cl; + _g1->collection_set_iterate(&cl); +#endif + cleanupHRRS(); + ConcurrentG1Refine* cg1r = _g1->concurrent_g1_refine(); + _g1->set_refine_cte_cl_concurrency(false); + DirtyCardQueueSet& dcqs = JavaThread::dirty_card_queue_set(); + dcqs.concatenate_logs(); + + assert(!_par_traversal_in_progress, "Invariant between iterations."); + if (ParallelGCThreads > 0) { + set_par_traversal(true); + int n_workers = _g1->workers()->total_workers(); + _seq_task->set_par_threads(n_workers); + for (uint i = 0; i < ParallelGCThreads; i++) + _new_refs[i] = new (ResourceObj::C_HEAP) GrowableArray(8192,true); + + if (cg1r->do_traversal()) { + updateRS(0); + // Have to do this again after updaters + cleanupHRRS(); + } + } + guarantee( _cards_scanned == NULL, "invariant" ); + _cards_scanned = NEW_C_HEAP_ARRAY(size_t, n_workers()); + _total_cards_scanned = 0; +} + + +class cleanUpIteratorsClosure : public HeapRegionClosure { + bool doHeapRegion(HeapRegion *r) { + HeapRegionRemSet* hrrs = r->rem_set(); + hrrs->init_for_par_iteration(); + return false; + } +}; + +void HRInto_G1RemSet::cleanup_after_oops_into_collection_set_do() { + guarantee( _cards_scanned != NULL, "invariant" ); + _total_cards_scanned = 0; + for (uint i = 0; i < n_workers(); ++i) + _total_cards_scanned += _cards_scanned[i]; + FREE_C_HEAP_ARRAY(size_t, _cards_scanned); + _cards_scanned = NULL; + // Cleanup after copy +#if G1_REM_SET_LOGGING + PrintRSClosure cl; + _g1->heap_region_iterate(&cl); +#endif + _g1->set_refine_cte_cl_concurrency(true); + cleanUpIteratorsClosure iterClosure; + _g1->collection_set_iterate(&iterClosure); + // Set all cards back to clean. + _g1->cleanUpCardTable(); + if (ParallelGCThreads > 0) { + ConcurrentG1Refine* cg1r = _g1->concurrent_g1_refine(); + if (cg1r->do_traversal()) { + cg1r->cg1rThread()->set_do_traversal(false); + } + for (uint i = 0; i < ParallelGCThreads; i++) { + delete _new_refs[i]; + } + set_par_traversal(false); + } + assert(!_par_traversal_in_progress, "Invariant between iterations."); +} + +class UpdateRSObjectClosure: public ObjectClosure { + UpdateRSOopClosure* _update_rs_oop_cl; +public: + UpdateRSObjectClosure(UpdateRSOopClosure* update_rs_oop_cl) : + _update_rs_oop_cl(update_rs_oop_cl) {} + void do_object(oop obj) { + obj->oop_iterate(_update_rs_oop_cl); + } + +}; + +class ScrubRSClosure: public HeapRegionClosure { + G1CollectedHeap* _g1h; + BitMap* _region_bm; + BitMap* _card_bm; + CardTableModRefBS* _ctbs; +public: + ScrubRSClosure(BitMap* region_bm, BitMap* card_bm) : + _g1h(G1CollectedHeap::heap()), + _region_bm(region_bm), _card_bm(card_bm), + _ctbs(NULL) + { + ModRefBarrierSet* bs = _g1h->mr_bs(); + guarantee(bs->is_a(BarrierSet::CardTableModRef), "Precondition"); + _ctbs = (CardTableModRefBS*)bs; + } + + bool doHeapRegion(HeapRegion* r) { + if (!r->continuesHumongous()) { + r->rem_set()->scrub(_ctbs, _region_bm, _card_bm); + } + return false; + } +}; + +void HRInto_G1RemSet::scrub(BitMap* region_bm, BitMap* card_bm) { + ScrubRSClosure scrub_cl(region_bm, card_bm); + _g1->heap_region_iterate(&scrub_cl); +} + +void HRInto_G1RemSet::scrub_par(BitMap* region_bm, BitMap* card_bm, + int worker_num, int claim_val) { + ScrubRSClosure scrub_cl(region_bm, card_bm); + _g1->heap_region_par_iterate_chunked(&scrub_cl, worker_num, claim_val); +} + + +class ConcRefineRegionClosure: public HeapRegionClosure { + G1CollectedHeap* _g1h; + CardTableModRefBS* _ctbs; + ConcurrentGCThread* _cgc_thrd; + ConcurrentG1Refine* _cg1r; + unsigned _cards_processed; + UpdateRSOopClosure _update_rs_oop_cl; +public: + ConcRefineRegionClosure(CardTableModRefBS* ctbs, + ConcurrentG1Refine* cg1r, + HRInto_G1RemSet* g1rs) : + _ctbs(ctbs), _cg1r(cg1r), _cgc_thrd(cg1r->cg1rThread()), + _update_rs_oop_cl(g1rs), _cards_processed(0), + _g1h(G1CollectedHeap::heap()) + {} + + bool doHeapRegion(HeapRegion* r) { + if (!r->in_collection_set() && + !r->continuesHumongous() && + !r->is_young()) { + _update_rs_oop_cl.set_from(r); + UpdateRSObjectClosure update_rs_obj_cl(&_update_rs_oop_cl); + + // For each run of dirty card in the region: + // 1) Clear the cards. + // 2) Process the range corresponding to the run, adding any + // necessary RS entries. + // 1 must precede 2, so that a concurrent modification redirties the + // card. If a processing attempt does not succeed, because it runs + // into an unparseable region, we will do binary search to find the + // beginning of the next parseable region. + HeapWord* startAddr = r->bottom(); + HeapWord* endAddr = r->used_region().end(); + HeapWord* lastAddr; + HeapWord* nextAddr; + + for (nextAddr = lastAddr = startAddr; + nextAddr < endAddr; + nextAddr = lastAddr) { + MemRegion dirtyRegion; + + // Get and clear dirty region from card table + MemRegion next_mr(nextAddr, endAddr); + dirtyRegion = + _ctbs->dirty_card_range_after_reset( + next_mr, + true, CardTableModRefBS::clean_card_val()); + assert(dirtyRegion.start() >= nextAddr, + "returned region inconsistent?"); + + if (!dirtyRegion.is_empty()) { + HeapWord* stop_point = + r->object_iterate_mem_careful(dirtyRegion, + &update_rs_obj_cl); + if (stop_point == NULL) { + lastAddr = dirtyRegion.end(); + _cards_processed += + (int) (dirtyRegion.word_size() / CardTableModRefBS::card_size_in_words); + } else { + // We're going to skip one or more cards that we can't parse. + HeapWord* next_parseable_card = + r->next_block_start_careful(stop_point); + // Round this up to a card boundary. + next_parseable_card = + _ctbs->addr_for(_ctbs->byte_after_const(next_parseable_card)); + // Now we invalidate the intervening cards so we'll see them + // again. + MemRegion remaining_dirty = + MemRegion(stop_point, dirtyRegion.end()); + MemRegion skipped = + MemRegion(stop_point, next_parseable_card); + _ctbs->invalidate(skipped.intersection(remaining_dirty)); + + // Now start up again where we can parse. + lastAddr = next_parseable_card; + + // Count how many we did completely. + _cards_processed += + (stop_point - dirtyRegion.start()) / + CardTableModRefBS::card_size_in_words; + } + // Allow interruption at regular intervals. + // (Might need to make them more regular, if we get big + // dirty regions.) + if (_cgc_thrd != NULL) { + if (_cgc_thrd->should_yield()) { + _cgc_thrd->yield(); + switch (_cg1r->get_pya()) { + case PYA_continue: + // This may have changed: re-read. + endAddr = r->used_region().end(); + continue; + case PYA_restart: case PYA_cancel: + return true; + } + } + } + } else { + break; + } + } + } + // A good yield opportunity. + if (_cgc_thrd != NULL) { + if (_cgc_thrd->should_yield()) { + _cgc_thrd->yield(); + switch (_cg1r->get_pya()) { + case PYA_restart: case PYA_cancel: + return true; + default: + break; + } + + } + } + return false; + } + + unsigned cards_processed() { return _cards_processed; } +}; + + +void HRInto_G1RemSet::concurrentRefinementPass(ConcurrentG1Refine* cg1r) { + ConcRefineRegionClosure cr_cl(ct_bs(), cg1r, this); + _g1->heap_region_iterate(&cr_cl); + _conc_refine_traversals++; + _conc_refine_cards += cr_cl.cards_processed(); +} + +static IntHistogram out_of_histo(50, 50); + + + +void HRInto_G1RemSet::concurrentRefineOneCard(jbyte* card_ptr, int worker_i) { + // If the card is no longer dirty, nothing to do. + if (*card_ptr != CardTableModRefBS::dirty_card_val()) return; + + // Construct the region representing the card. + HeapWord* start = _ct_bs->addr_for(card_ptr); + // And find the region containing it. + HeapRegion* r = _g1->heap_region_containing(start); + if (r == NULL) { + guarantee(_g1->is_in_permanent(start), "Or else where?"); + return; // Not in the G1 heap (might be in perm, for example.) + } + // Why do we have to check here whether a card is on a young region, + // given that we dirty young regions and, as a result, the + // post-barrier is supposed to filter them out and never to enqueue + // them? When we allocate a new region as the "allocation region" we + // actually dirty its cards after we release the lock, since card + // dirtying while holding the lock was a performance bottleneck. So, + // as a result, it is possible for other threads to actually + // allocate objects in the region (after the acquire the lock) + // before all the cards on the region are dirtied. This is unlikely, + // and it doesn't happen often, but it can happen. So, the extra + // check below filters out those cards. + if (r->is_young()) { + return; + } + // While we are processing RSet buffers during the collection, we + // actually don't want to scan any cards on the collection set, + // since we don't want to update remebered sets with entries that + // point into the collection set, given that live objects from the + // collection set are about to move and such entries will be stale + // very soon. This change also deals with a reliability issue which + // involves scanning a card in the collection set and coming across + // an array that was being chunked and looking malformed. Note, + // however, that if evacuation fails, we have to scan any objects + // that were not moved and create any missing entries. + if (r->in_collection_set()) { + return; + } + + // Should we defer it? + if (_cg1r->use_cache()) { + card_ptr = _cg1r->cache_insert(card_ptr); + // If it was not an eviction, nothing to do. + if (card_ptr == NULL) return; + + // OK, we have to reset the card start, region, etc. + start = _ct_bs->addr_for(card_ptr); + r = _g1->heap_region_containing(start); + if (r == NULL) { + guarantee(_g1->is_in_permanent(start), "Or else where?"); + return; // Not in the G1 heap (might be in perm, for example.) + } + guarantee(!r->is_young(), "It was evicted in the current minor cycle."); + } + + HeapWord* end = _ct_bs->addr_for(card_ptr + 1); + MemRegion dirtyRegion(start, end); + +#if CARD_REPEAT_HISTO + init_ct_freq_table(_g1->g1_reserved_obj_bytes()); + ct_freq_note_card(_ct_bs->index_for(start)); +#endif + + UpdateRSOopClosure update_rs_oop_cl(this, worker_i); + update_rs_oop_cl.set_from(r); + FilterOutOfRegionClosure filter_then_update_rs_oop_cl(r, &update_rs_oop_cl); + + // Undirty the card. + *card_ptr = CardTableModRefBS::clean_card_val(); + // We must complete this write before we do any of the reads below. + OrderAccess::storeload(); + // And process it, being careful of unallocated portions of TLAB's. + HeapWord* stop_point = + r->oops_on_card_seq_iterate_careful(dirtyRegion, + &filter_then_update_rs_oop_cl); + // If stop_point is non-null, then we encountered an unallocated region + // (perhaps the unfilled portion of a TLAB.) For now, we'll dirty the + // card and re-enqueue: if we put off the card until a GC pause, then the + // unallocated portion will be filled in. Alternatively, we might try + // the full complexity of the technique used in "regular" precleaning. + if (stop_point != NULL) { + // The card might have gotten re-dirtied and re-enqueued while we + // worked. (In fact, it's pretty likely.) + if (*card_ptr != CardTableModRefBS::dirty_card_val()) { + *card_ptr = CardTableModRefBS::dirty_card_val(); + MutexLockerEx x(Shared_DirtyCardQ_lock, + Mutex::_no_safepoint_check_flag); + DirtyCardQueue* sdcq = + JavaThread::dirty_card_queue_set().shared_dirty_card_queue(); + sdcq->enqueue(card_ptr); + } + } else { + out_of_histo.add_entry(filter_then_update_rs_oop_cl.out_of_region()); + _conc_refine_cards++; + } +} + +class HRRSStatsIter: public HeapRegionClosure { + size_t _occupied; + size_t _total_mem_sz; + size_t _max_mem_sz; + HeapRegion* _max_mem_sz_region; +public: + HRRSStatsIter() : + _occupied(0), + _total_mem_sz(0), + _max_mem_sz(0), + _max_mem_sz_region(NULL) + {} + + bool doHeapRegion(HeapRegion* r) { + if (r->continuesHumongous()) return false; + size_t mem_sz = r->rem_set()->mem_size(); + if (mem_sz > _max_mem_sz) { + _max_mem_sz = mem_sz; + _max_mem_sz_region = r; + } + _total_mem_sz += mem_sz; + size_t occ = r->rem_set()->occupied(); + _occupied += occ; + return false; + } + size_t total_mem_sz() { return _total_mem_sz; } + size_t max_mem_sz() { return _max_mem_sz; } + size_t occupied() { return _occupied; } + HeapRegion* max_mem_sz_region() { return _max_mem_sz_region; } +}; + +void HRInto_G1RemSet::print_summary_info() { + G1CollectedHeap* g1 = G1CollectedHeap::heap(); + ConcurrentG1RefineThread* cg1r_thrd = + g1->concurrent_g1_refine()->cg1rThread(); + +#if CARD_REPEAT_HISTO + gclog_or_tty->print_cr("\nG1 card_repeat count histogram: "); + gclog_or_tty->print_cr(" # of repeats --> # of cards with that number."); + card_repeat_count.print_on(gclog_or_tty); +#endif + + if (FILTEROUTOFREGIONCLOSURE_DOHISTOGRAMCOUNT) { + gclog_or_tty->print_cr("\nG1 rem-set out-of-region histogram: "); + gclog_or_tty->print_cr(" # of CS ptrs --> # of cards with that number."); + out_of_histo.print_on(gclog_or_tty); + } + gclog_or_tty->print_cr("\n Concurrent RS processed %d cards in " + "%5.2fs.", + _conc_refine_cards, cg1r_thrd->vtime_accum()); + + DirtyCardQueueSet& dcqs = JavaThread::dirty_card_queue_set(); + jint tot_processed_buffers = + dcqs.processed_buffers_mut() + dcqs.processed_buffers_rs_thread(); + gclog_or_tty->print_cr(" Of %d completed buffers:", tot_processed_buffers); + gclog_or_tty->print_cr(" %8d (%5.1f%%) by conc RS thread.", + dcqs.processed_buffers_rs_thread(), + 100.0*(float)dcqs.processed_buffers_rs_thread()/ + (float)tot_processed_buffers); + gclog_or_tty->print_cr(" %8d (%5.1f%%) by mutator threads.", + dcqs.processed_buffers_mut(), + 100.0*(float)dcqs.processed_buffers_mut()/ + (float)tot_processed_buffers); + gclog_or_tty->print_cr(" Did %d concurrent refinement traversals.", + _conc_refine_traversals); + if (!G1RSBarrierUseQueue) { + gclog_or_tty->print_cr(" Scanned %8.2f cards/traversal.", + _conc_refine_traversals > 0 ? + (float)_conc_refine_cards/(float)_conc_refine_traversals : + 0); + } + gclog_or_tty->print_cr(""); + if (G1UseHRIntoRS) { + HRRSStatsIter blk; + g1->heap_region_iterate(&blk); + gclog_or_tty->print_cr(" Total heap region rem set sizes = " SIZE_FORMAT "K." + " Max = " SIZE_FORMAT "K.", + blk.total_mem_sz()/K, blk.max_mem_sz()/K); + gclog_or_tty->print_cr(" Static structures = " SIZE_FORMAT "K," + " free_lists = " SIZE_FORMAT "K.", + HeapRegionRemSet::static_mem_size()/K, + HeapRegionRemSet::fl_mem_size()/K); + gclog_or_tty->print_cr(" %d occupied cards represented.", + blk.occupied()); + gclog_or_tty->print_cr(" Max sz region = [" PTR_FORMAT ", " PTR_FORMAT " )" + " %s, cap = " SIZE_FORMAT "K, occ = " SIZE_FORMAT "K.", + blk.max_mem_sz_region()->bottom(), blk.max_mem_sz_region()->end(), + (blk.max_mem_sz_region()->popular() ? "POP" : ""), + (blk.max_mem_sz_region()->rem_set()->mem_size() + K - 1)/K, + (blk.max_mem_sz_region()->rem_set()->occupied() + K - 1)/K); + gclog_or_tty->print_cr(" Did %d coarsenings.", + HeapRegionRemSet::n_coarsenings()); + + } +} +void HRInto_G1RemSet::prepare_for_verify() { + if (G1HRRSFlushLogBuffersOnVerify && VerifyBeforeGC && !_g1->full_collection()) { + cleanupHRRS(); + _g1->set_refine_cte_cl_concurrency(false); + if (SafepointSynchronize::is_at_safepoint()) { + DirtyCardQueueSet& dcqs = JavaThread::dirty_card_queue_set(); + dcqs.concatenate_logs(); + } + bool cg1r_use_cache = _cg1r->use_cache(); + _cg1r->set_use_cache(false); + updateRS(0); + _cg1r->set_use_cache(cg1r_use_cache); + } +} diff --git a/hotspot/src/share/vm/gc_implementation/g1/g1RemSet.hpp b/hotspot/src/share/vm/gc_implementation/g1/g1RemSet.hpp new file mode 100644 index 00000000000..a07ba882a2c --- /dev/null +++ b/hotspot/src/share/vm/gc_implementation/g1/g1RemSet.hpp @@ -0,0 +1,216 @@ +/* + * Copyright 2001-2007 Sun Microsystems, Inc. All Rights Reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara, + * CA 95054 USA or visit www.sun.com if you need additional information or + * have any questions. + * + */ + +// A G1RemSet provides ways of iterating over pointers into a selected +// collection set. + +class G1CollectedHeap; +class CardTableModRefBarrierSet; +class HRInto_G1RemSet; +class ConcurrentG1Refine; + +class G1RemSet { +protected: + G1CollectedHeap* _g1; + + unsigned _conc_refine_traversals; + unsigned _conc_refine_cards; + + size_t n_workers(); + +public: + G1RemSet(G1CollectedHeap* g1) : + _g1(g1), _conc_refine_traversals(0), _conc_refine_cards(0) + {} + + // Invoke "blk->do_oop" on all pointers into the CS in object in regions + // outside the CS (having invoked "blk->set_region" to set the "from" + // region correctly beforehand.) The "worker_i" param is for the + // parallel case where the number of the worker thread calling this + // function can be helpful in partitioning the work to be done. It + // should be the same as the "i" passed to the calling thread's + // work(i) function. In the sequential case this param will be ingored. + virtual void oops_into_collection_set_do(OopsInHeapRegionClosure* blk, + int worker_i) = 0; + + // Prepare for and cleanup after an oops_into_collection_set_do + // call. Must call each of these once before and after (in sequential + // code) any threads call oops into collection set do. (This offers an + // opportunity to sequential setup and teardown of structures needed by a + // parallel iteration over the CS's RS.) + virtual void prepare_for_oops_into_collection_set_do() = 0; + virtual void cleanup_after_oops_into_collection_set_do() = 0; + + // If "this" is of the given subtype, return "this", else "NULL". + virtual HRInto_G1RemSet* as_HRInto_G1RemSet() { return NULL; } + + // Record, if necessary, the fact that *p (where "p" is in region "from") + // has changed to its new value. + virtual void write_ref(HeapRegion* from, oop* p) = 0; + virtual void par_write_ref(HeapRegion* from, oop* p, int tid) = 0; + + // Requires "region_bm" and "card_bm" to be bitmaps with 1 bit per region + // or card, respectively, such that a region or card with a corresponding + // 0 bit contains no part of any live object. Eliminates any remembered + // set entries that correspond to dead heap ranges. + virtual void scrub(BitMap* region_bm, BitMap* card_bm) = 0; + // Like the above, but assumes is called in parallel: "worker_num" is the + // parallel thread id of the current thread, and "claim_val" is the + // value that should be used to claim heap regions. + virtual void scrub_par(BitMap* region_bm, BitMap* card_bm, + int worker_num, int claim_val) = 0; + + // Do any "refinement" activity that might be appropriate to the given + // G1RemSet. If "refinement" has iterateive "passes", do one pass. + // If "t" is non-NULL, it is the thread performing the refinement. + // Default implementation does nothing. + virtual void concurrentRefinementPass(ConcurrentG1Refine* cg1r) {} + + // Refine the card corresponding to "card_ptr". If "sts" is non-NULL, + // join and leave around parts that must be atomic wrt GC. (NULL means + // being done at a safepoint.) + virtual void concurrentRefineOneCard(jbyte* card_ptr, int worker_i) {} + + unsigned conc_refine_cards() { return _conc_refine_cards; } + + // Print any relevant summary info. + virtual void print_summary_info() {} + + // Prepare remebered set for verification. + virtual void prepare_for_verify() {}; +}; + + +// The simplest possible G1RemSet: iterates over all objects in non-CS +// regions, searching for pointers into the CS. +class StupidG1RemSet: public G1RemSet { +public: + StupidG1RemSet(G1CollectedHeap* g1) : G1RemSet(g1) {} + + void oops_into_collection_set_do(OopsInHeapRegionClosure* blk, + int worker_i); + + void prepare_for_oops_into_collection_set_do() {} + void cleanup_after_oops_into_collection_set_do() {} + + // Nothing is necessary in the version below. + void write_ref(HeapRegion* from, oop* p) {} + void par_write_ref(HeapRegion* from, oop* p, int tid) {} + + void scrub(BitMap* region_bm, BitMap* card_bm) {} + void scrub_par(BitMap* region_bm, BitMap* card_bm, + int worker_num, int claim_val) {} + +}; + +// A G1RemSet in which each heap region has a rem set that records the +// external heap references into it. Uses a mod ref bs to track updates, +// so that they can be used to update the individual region remsets. + +class HRInto_G1RemSet: public G1RemSet { +protected: + enum SomePrivateConstants { + UpdateRStoMergeSync = 0, + MergeRStoDoDirtySync = 1, + DoDirtySync = 2, + LastSync = 3, + + SeqTask = 0, + NumSeqTasks = 1 + }; + + CardTableModRefBS* _ct_bs; + SubTasksDone* _seq_task; + G1CollectorPolicy* _g1p; + + ConcurrentG1Refine* _cg1r; + + size_t* _cards_scanned; + size_t _total_cards_scanned; + + // _par_traversal_in_progress is "true" iff a parallel traversal is in + // progress. If so, then cards added to remembered sets should also have + // their references into the collection summarized in "_new_refs". + bool _par_traversal_in_progress; + void set_par_traversal(bool b); + GrowableArray** _new_refs; + +public: + // This is called to reset dual hash tables after the gc pause + // is finished and the initial hash table is no longer being + // scanned. + void cleanupHRRS(); + + HRInto_G1RemSet(G1CollectedHeap* g1, CardTableModRefBS* ct_bs); + ~HRInto_G1RemSet(); + + void oops_into_collection_set_do(OopsInHeapRegionClosure* blk, + int worker_i); + + void prepare_for_oops_into_collection_set_do(); + void cleanup_after_oops_into_collection_set_do(); + void scanRS(OopsInHeapRegionClosure* oc, int worker_i); + void scanNewRefsRS(OopsInHeapRegionClosure* oc, int worker_i); + void updateRS(int worker_i); + HeapRegion* calculateStartRegion(int i); + + HRInto_G1RemSet* as_HRInto_G1RemSet() { return this; } + + CardTableModRefBS* ct_bs() { return _ct_bs; } + size_t cardsScanned() { return _total_cards_scanned; } + + // Record, if necessary, the fact that *p (where "p" is in region "from", + // which is required to be non-NULL) has changed to a new non-NULL value. + inline void write_ref(HeapRegion* from, oop* p); + // The "_nv" version is the same; it exists just so that it is not virtual. + inline void write_ref_nv(HeapRegion* from, oop* p); + + inline bool self_forwarded(oop obj); + inline void par_write_ref(HeapRegion* from, oop* p, int tid); + + void scrub(BitMap* region_bm, BitMap* card_bm); + void scrub_par(BitMap* region_bm, BitMap* card_bm, + int worker_num, int claim_val); + + virtual void concurrentRefinementPass(ConcurrentG1Refine* t); + virtual void concurrentRefineOneCard(jbyte* card_ptr, int worker_i); + + virtual void print_summary_info(); + virtual void prepare_for_verify(); +}; + +#define G1_REM_SET_LOGGING 0 + +class CountNonCleanMemRegionClosure: public MemRegionClosure { + G1CollectedHeap* _g1; + int _n; + HeapWord* _start_first; +public: + CountNonCleanMemRegionClosure(G1CollectedHeap* g1) : + _g1(g1), _n(0), _start_first(NULL) + {} + void do_MemRegion(MemRegion mr); + int n() { return _n; }; + HeapWord* start_first() { return _start_first; } +}; diff --git a/hotspot/src/share/vm/gc_implementation/g1/g1RemSet.inline.hpp b/hotspot/src/share/vm/gc_implementation/g1/g1RemSet.inline.hpp new file mode 100644 index 00000000000..e3f1b5cc81d --- /dev/null +++ b/hotspot/src/share/vm/gc_implementation/g1/g1RemSet.inline.hpp @@ -0,0 +1,104 @@ +/* + * Copyright 2001-2007 Sun Microsystems, Inc. All Rights Reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara, + * CA 95054 USA or visit www.sun.com if you need additional information or + * have any questions. + * + */ + +inline size_t G1RemSet::n_workers() { + if (_g1->workers() != NULL) { + return _g1->workers()->total_workers(); + } else { + return 1; + } +} + +inline void HRInto_G1RemSet::write_ref_nv(HeapRegion* from, oop* p) { + oop obj = *p; + assert(from != NULL && from->is_in_reserved(p), + "p is not in a from"); + HeapRegion* to = _g1->heap_region_containing(obj); + if (from != to && to != NULL) { + if (!to->popular() && !from->is_survivor()) { +#if G1_REM_SET_LOGGING + gclog_or_tty->print_cr("Adding " PTR_FORMAT " (" PTR_FORMAT ") to RS" + " for region [" PTR_FORMAT ", " PTR_FORMAT ")", + p, obj, + to->bottom(), to->end()); +#endif + assert(to->rem_set() != NULL, "Need per-region 'into' remsets."); + if (to->rem_set()->add_reference(p)) { + _g1->schedule_popular_region_evac(to); + } + } + } +} + +inline void HRInto_G1RemSet::write_ref(HeapRegion* from, oop* p) { + write_ref_nv(from, p); +} + +inline bool HRInto_G1RemSet::self_forwarded(oop obj) { + bool result = (obj->is_forwarded() && (obj->forwardee()== obj)); + return result; +} + +inline void HRInto_G1RemSet::par_write_ref(HeapRegion* from, oop* p, int tid) { + oop obj = *p; +#ifdef ASSERT + // can't do because of races + // assert(obj == NULL || obj->is_oop(), "expected an oop"); + + // Do the safe subset of is_oop + if (obj != NULL) { +#ifdef CHECK_UNHANDLED_OOPS + oopDesc* o = obj.obj(); +#else + oopDesc* o = obj; +#endif // CHECK_UNHANDLED_OOPS + assert((intptr_t)o % MinObjAlignmentInBytes == 0, "not oop aligned"); + assert(Universe::heap()->is_in_reserved(obj), "must be in heap"); + } +#endif // ASSERT + assert(from == NULL || from->is_in_reserved(p), + "p is not in from"); + HeapRegion* to = _g1->heap_region_containing(obj); + // The test below could be optimized by applying a bit op to to and from. + if (to != NULL && from != NULL && from != to) { + if (!to->popular() && !from->is_survivor()) { +#if G1_REM_SET_LOGGING + gclog_or_tty->print_cr("Adding " PTR_FORMAT " (" PTR_FORMAT ") to RS" + " for region [" PTR_FORMAT ", " PTR_FORMAT ")", + p, obj, + to->bottom(), to->end()); +#endif + assert(to->rem_set() != NULL, "Need per-region 'into' remsets."); + if (to->rem_set()->add_reference(p, tid)) { + _g1->schedule_popular_region_evac(to); + } + } + // There is a tricky infinite loop if we keep pushing + // self forwarding pointers onto our _new_refs list. + if (_par_traversal_in_progress && + to->in_collection_set() && !self_forwarded(obj)) { + _new_refs[tid]->push(p); + } + } +} diff --git a/hotspot/src/share/vm/gc_implementation/g1/g1SATBCardTableModRefBS.cpp b/hotspot/src/share/vm/gc_implementation/g1/g1SATBCardTableModRefBS.cpp new file mode 100644 index 00000000000..37414989eb9 --- /dev/null +++ b/hotspot/src/share/vm/gc_implementation/g1/g1SATBCardTableModRefBS.cpp @@ -0,0 +1,150 @@ +/* + * Copyright 2001-2007 Sun Microsystems, Inc. All Rights Reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara, + * CA 95054 USA or visit www.sun.com if you need additional information or + * have any questions. + * + */ + +#include "incls/_precompiled.incl" +#include "incls/_g1SATBCardTableModRefBS.cpp.incl" + +G1SATBCardTableModRefBS::G1SATBCardTableModRefBS(MemRegion whole_heap, + int max_covered_regions) : + CardTableModRefBSForCTRS(whole_heap, max_covered_regions) +{ + _kind = G1SATBCT; +} + + +void G1SATBCardTableModRefBS::enqueue(oop pre_val) { + if (!JavaThread::satb_mark_queue_set().active()) return; + Thread* thr = Thread::current(); + if (thr->is_Java_thread()) { + JavaThread* jt = (JavaThread*)thr; + jt->satb_mark_queue().enqueue(pre_val); + } else { + MutexLocker x(Shared_SATB_Q_lock); + JavaThread::satb_mark_queue_set().shared_satb_queue()->enqueue(pre_val); + } +} + +// When we know the current java thread: +void +G1SATBCardTableModRefBS::write_ref_field_pre_static(void* field, + oop newVal, + JavaThread* jt) { + if (!JavaThread::satb_mark_queue_set().active()) return; + assert(!UseCompressedOops, "Else will need to modify this to deal with narrowOop"); + oop preVal = *(oop*)field; + if (preVal != NULL) { + jt->satb_mark_queue().enqueue(preVal); + } +} + +void +G1SATBCardTableModRefBS::write_ref_array_pre(MemRegion mr) { + if (!JavaThread::satb_mark_queue_set().active()) return; + assert(!UseCompressedOops, "Else will need to modify this to deal with narrowOop"); + oop* elem_ptr = (oop*)mr.start(); + while ((HeapWord*)elem_ptr < mr.end()) { + oop elem = *elem_ptr; + if (elem != NULL) enqueue(elem); + elem_ptr++; + } +} + + + +G1SATBCardTableLoggingModRefBS:: +G1SATBCardTableLoggingModRefBS(MemRegion whole_heap, + int max_covered_regions) : + G1SATBCardTableModRefBS(whole_heap, max_covered_regions), + _dcqs(JavaThread::dirty_card_queue_set()) +{ + _kind = G1SATBCTLogging; +} + +void +G1SATBCardTableLoggingModRefBS::write_ref_field_work(void* field, + oop new_val) { + jbyte* byte = byte_for(field); + if (*byte != dirty_card) { + *byte = dirty_card; + Thread* thr = Thread::current(); + if (thr->is_Java_thread()) { + JavaThread* jt = (JavaThread*)thr; + jt->dirty_card_queue().enqueue(byte); + } else { + MutexLockerEx x(Shared_DirtyCardQ_lock, + Mutex::_no_safepoint_check_flag); + _dcqs.shared_dirty_card_queue()->enqueue(byte); + } + } +} + +void +G1SATBCardTableLoggingModRefBS::write_ref_field_static(void* field, + oop new_val) { + uintptr_t field_uint = (uintptr_t)field; + uintptr_t new_val_uint = (uintptr_t)new_val; + uintptr_t comb = field_uint ^ new_val_uint; + comb = comb >> HeapRegion::LogOfHRGrainBytes; + if (comb == 0) return; + if (new_val == NULL) return; + // Otherwise, log it. + G1SATBCardTableLoggingModRefBS* g1_bs = + (G1SATBCardTableLoggingModRefBS*)Universe::heap()->barrier_set(); + g1_bs->write_ref_field_work(field, new_val); +} + +void +G1SATBCardTableLoggingModRefBS::invalidate(MemRegion mr, bool whole_heap) { + jbyte* byte = byte_for(mr.start()); + jbyte* last_byte = byte_for(mr.last()); + Thread* thr = Thread::current(); + if (whole_heap) { + while (byte <= last_byte) { + *byte = dirty_card; + byte++; + } + } else { + // Enqueue if necessary. + if (thr->is_Java_thread()) { + JavaThread* jt = (JavaThread*)thr; + while (byte <= last_byte) { + if (*byte != dirty_card) { + *byte = dirty_card; + jt->dirty_card_queue().enqueue(byte); + } + byte++; + } + } else { + MutexLockerEx x(Shared_DirtyCardQ_lock, + Mutex::_no_safepoint_check_flag); + while (byte <= last_byte) { + if (*byte != dirty_card) { + *byte = dirty_card; + _dcqs.shared_dirty_card_queue()->enqueue(byte); + } + byte++; + } + } + } +} diff --git a/hotspot/src/share/vm/gc_implementation/g1/g1SATBCardTableModRefBS.hpp b/hotspot/src/share/vm/gc_implementation/g1/g1SATBCardTableModRefBS.hpp new file mode 100644 index 00000000000..86f8283d449 --- /dev/null +++ b/hotspot/src/share/vm/gc_implementation/g1/g1SATBCardTableModRefBS.hpp @@ -0,0 +1,107 @@ +/* + * Copyright 2001-2007 Sun Microsystems, Inc. All Rights Reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara, + * CA 95054 USA or visit www.sun.com if you need additional information or + * have any questions. + * + */ + +#ifndef SERIALGC + +class DirtyCardQueueSet; + +// This barrier is specialized to use a logging barrier to support +// snapshot-at-the-beginning marking. + +class G1SATBCardTableModRefBS: public CardTableModRefBSForCTRS { +private: + // Add "pre_val" to a set of objects that may have been disconnected from the + // pre-marking object graph. + static void enqueue(oop pre_val); + +public: + G1SATBCardTableModRefBS(MemRegion whole_heap, + int max_covered_regions); + + bool is_a(BarrierSet::Name bsn) { + return bsn == BarrierSet::G1SATBCT || CardTableModRefBS::is_a(bsn); + } + + virtual bool has_write_ref_pre_barrier() { return true; } + + // This notes that we don't need to access any BarrierSet data + // structures, so this can be called from a static context. + static void write_ref_field_pre_static(void* field, oop newVal) { + assert(!UseCompressedOops, "Else needs to be templatized"); + oop preVal = *((oop*)field); + if (preVal != NULL) { + enqueue(preVal); + } + } + + // When we know the current java thread: + static void write_ref_field_pre_static(void* field, oop newVal, + JavaThread* jt); + + // We export this to make it available in cases where the static + // type of the barrier set is known. Note that it is non-virtual. + inline void inline_write_ref_field_pre(void* field, oop newVal) { + write_ref_field_pre_static(field, newVal); + } + + // This is the more general virtual version. + void write_ref_field_pre_work(void* field, oop new_val) { + inline_write_ref_field_pre(field, new_val); + } + + virtual void write_ref_array_pre(MemRegion mr); + +}; + +// Adds card-table logging to the post-barrier. +// Usual invariant: all dirty cards are logged in the DirtyCardQueueSet. +class G1SATBCardTableLoggingModRefBS: public G1SATBCardTableModRefBS { + private: + DirtyCardQueueSet& _dcqs; + public: + G1SATBCardTableLoggingModRefBS(MemRegion whole_heap, + int max_covered_regions); + + bool is_a(BarrierSet::Name bsn) { + return bsn == BarrierSet::G1SATBCTLogging || + G1SATBCardTableModRefBS::is_a(bsn); + } + + void write_ref_field_work(void* field, oop new_val); + + // Can be called from static contexts. + static void write_ref_field_static(void* field, oop new_val); + + // NB: if you do a whole-heap invalidation, the "usual invariant" defined + // above no longer applies. + void invalidate(MemRegion mr, bool whole_heap = false); + + void write_region_work(MemRegion mr) { invalidate(mr); } + void write_ref_array_work(MemRegion mr) { invalidate(mr); } + + +}; + + +#endif // SERIALGC diff --git a/hotspot/src/share/vm/gc_implementation/g1/g1_globals.cpp b/hotspot/src/share/vm/gc_implementation/g1/g1_globals.cpp new file mode 100644 index 00000000000..065ad42fc78 --- /dev/null +++ b/hotspot/src/share/vm/gc_implementation/g1/g1_globals.cpp @@ -0,0 +1,32 @@ +/* + * Copyright 2001-2007 Sun Microsystems, Inc. All Rights Reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara, + * CA 95054 USA or visit www.sun.com if you need additional information or + * have any questions. + * + */ + +#include "incls/_precompiled.incl" +#include "incls/_g1_globals.cpp.incl" + +G1_FLAGS(MATERIALIZE_DEVELOPER_FLAG, MATERIALIZE_PD_DEVELOPER_FLAG, \ + MATERIALIZE_PRODUCT_FLAG, MATERIALIZE_PD_PRODUCT_FLAG, \ + MATERIALIZE_DIAGNOSTIC_FLAG, MATERIALIZE_EXPERIMENTAL_FLAG, \ + MATERIALIZE_NOTPRODUCT_FLAG, \ + MATERIALIZE_MANAGEABLE_FLAG, MATERIALIZE_PRODUCT_RW_FLAG) diff --git a/hotspot/src/share/vm/gc_implementation/g1/g1_globals.hpp b/hotspot/src/share/vm/gc_implementation/g1/g1_globals.hpp new file mode 100644 index 00000000000..72a684812d5 --- /dev/null +++ b/hotspot/src/share/vm/gc_implementation/g1/g1_globals.hpp @@ -0,0 +1,287 @@ +/* + * Copyright 2001-2007 Sun Microsystems, Inc. All Rights Reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara, + * CA 95054 USA or visit www.sun.com if you need additional information or + * have any questions. + * + */ + +// +// Defines all globals flags used by the garbage-first compiler. +// + +#define G1_FLAGS(develop, develop_pd, product, product_pd, diagnostic, experimental, notproduct, manageable, product_rw) \ + \ + product(intx, ParallelGCG1AllocBufferSize, 4*K, \ + "Size of parallel G1 allocation buffers in to-space.") \ + \ + product(intx, G1TimeSliceMS, 500, \ + "Time slice for MMU specification") \ + \ + product(intx, G1MaxPauseTimeMS, 200, \ + "Max GC time per MMU time slice") \ + \ + product(intx, G1ConfidencePerc, 50, \ + "Confidence level for MMU/pause predictions") \ + \ + product(intx, G1MarkingOverheadPerc, 0, \ + "Overhead of concurrent marking") \ + \ + product(bool, G1AccountConcurrentOverhead, false, \ + "Whether soft real-time compliance in G1 will take into account" \ + "concurrent overhead") \ + \ + product(intx, G1YoungGenSize, 0, \ + "Size of the G1 young generation, 0 is the adaptive policy") \ + \ + product(bool, G1Gen, true, \ + "If true, it will enable the generational G1") \ + \ + develop(intx, G1GCPct, 10, \ + "The desired percent time spent on GC") \ + \ + product(intx, G1PolicyVerbose, 0, \ + "The verbosity level on G1 policy decisions") \ + \ + develop(bool, G1UseHRIntoRS, true, \ + "Determines whether the 'advanced' HR Into rem set is used.") \ + \ + product(bool, G1VerifyRemSet, false, \ + "If true, verify the rem set functioning at each GC") \ + \ + product(bool, G1VerifyConcMark, false, \ + "If true, verify the conc marking code at full GC time") \ + \ + develop(intx, G1MarkingVerboseLevel, 0, \ + "Level (0-4) of verboseness of the marking code") \ + \ + develop(bool, G1VerifyConcMarkPrintReachable, true, \ + "If conc mark verification fails, print reachable objects") \ + \ + develop(bool, G1TraceMarkStackOverflow, false, \ + "If true, extra debugging code for CM restart for ovflw.") \ + \ + product(bool, G1VerifyMarkingInEvac, false, \ + "If true, verify marking info during evacuation") \ + \ + develop(intx, G1PausesBtwnConcMark, -1, \ + "If positive, fixed number of pauses between conc markings") \ + \ + product(intx, G1EfficiencyPctCausesMark, 80, \ + "The cum gc efficiency since mark fall-off that causes " \ + "new marking") \ + \ + product(bool, TraceConcurrentMark, false, \ + "Trace concurrent mark") \ + \ + product(bool, SummarizeG1ConcMark, false, \ + "Summarize concurrent mark info") \ + \ + product(bool, SummarizeG1RSStats, false, \ + "Summarize remembered set processing info") \ + \ + product(bool, SummarizeG1ZFStats, false, \ + "Summarize zero-filling info") \ + \ + product(bool, TraceG1Refine, false, \ + "Trace G1 concurrent refinement") \ + \ + develop(bool, G1ConcMark, true, \ + "If true, run concurrent marking for G1") \ + \ + product(intx, G1CMStackSize, 2 * 1024 * 1024, \ + "Size of the mark stack for concurrent marking.") \ + \ + product(intx, G1CMRegionStackSize, 1024 * 1024, \ + "Size of the region stack for concurrent marking.") \ + \ + develop(bool, G1ConcRefine, true, \ + "If true, run concurrent rem set refinement for G1") \ + \ + develop(intx, G1ConcRefineTargTraversals, 4, \ + "Number of concurrent refinement we try to achieve") \ + \ + develop(intx, G1ConcRefineInitialDelta, 4, \ + "Number of heap regions of alloc ahead of starting collection " \ + "pause to start concurrent refinement (initially)") \ + \ + product(bool, G1SmoothConcRefine, true, \ + "Attempts to smooth out the overhead of concurrent refinement") \ + \ + develop(bool, G1ConcZeroFill, true, \ + "If true, run concurrent zero-filling thread") \ + \ + develop(intx, G1ConcZFMaxRegions, 1, \ + "Stop zero-filling when # of zf'd regions reaches") \ + \ + product(intx, G1SteadyStateUsed, 90, \ + "If non-0, try to maintain 'used' at this pct (of max)") \ + \ + product(intx, G1SteadyStateUsedDelta, 30, \ + "If G1SteadyStateUsed is non-0, then do pause this number of " \ + "of percentage points earlier if no marking is in progress.") \ + \ + develop(bool, G1SATBBarrierPrintNullPreVals, false, \ + "If true, count frac of ptr writes with null pre-vals.") \ + \ + product(intx, G1SATBLogBufferSize, 1*K, \ + "Number of entries in an SATB log buffer.") \ + \ + product(intx, G1SATBProcessCompletedThreshold, 20, \ + "Number of completed buffers that triggers log processing.") \ + \ + develop(intx, G1ExtraRegionSurvRate, 33, \ + "If the young survival rate is S, and there's room left in " \ + "to-space, we will allow regions whose survival rate is up to " \ + "S + (1 - S)*X, where X is this parameter (as a fraction.)") \ + \ + develop(intx, G1InitYoungSurvRatio, 50, \ + "Expected Survival Rate for newly allocated bytes") \ + \ + develop(bool, G1SATBPrintStubs, false, \ + "If true, print generated stubs for the SATB barrier") \ + \ + product(intx, G1ExpandByPctOfAvail, 20, \ + "When expanding, % of uncommitted space to claim.") \ + \ + develop(bool, G1RSBarrierRegionFilter, true, \ + "If true, generate region filtering code in RS barrier") \ + \ + develop(bool, G1RSBarrierNullFilter, true, \ + "If true, generate null-pointer filtering code in RS barrier") \ + \ + develop(bool, G1PrintCTFilterStats, false, \ + "If true, print stats on RS filtering effectiveness") \ + \ + develop(bool, G1RSBarrierUseQueue, true, \ + "If true, use queueing RS barrier") \ + \ + develop(bool, G1RSLogCheckCardTable, false, \ + "If true, verify that no dirty cards remain after RS log " \ + "processing.") \ + \ + product(intx, G1MinPausesBetweenMarks, 2, \ + "Number of inefficient pauses necessary to trigger marking.") \ + \ + product(intx, G1InefficientPausePct, 80, \ + "Threshold of an 'inefficient' pauses (as % of cum efficiency.") \ + \ + product(intx, G1RSPopLimit, 32768, \ + "Limit that defines popularity. Should go away! XXX") \ + \ + develop(bool, G1RSCountHisto, false, \ + "If true, print a histogram of RS occupancies after each pause") \ + \ + product(intx, G1ObjPopLimit, 256, \ + "Limit that defines popularity for an object.") \ + \ + product(bool, G1TraceFileOverwrite, false, \ + "Allow the trace file to be overwritten") \ + \ + develop(intx, G1PrintRegionLivenessInfo, 0, \ + "When > 0, print the occupancies of the best and worst" \ + "regions.") \ + \ + develop(bool, G1TracePopularity, false, \ + "When true, provide detailed tracing of popularity.") \ + \ + product(bool, G1SummarizePopularity, false, \ + "When true, provide end-of-run-summarization of popularity.") \ + \ + product(intx, G1NumPopularRegions, 1, \ + "Number of regions reserved to hold popular objects. " \ + "Should go away later.") \ + \ + develop(bool, G1PrintParCleanupStats, false, \ + "When true, print extra stats about parallel cleanup.") \ + \ + product(bool, G1DoAgeCohortChecks, false, \ + "When true, check well-formedness of age cohort structures.") \ + \ + develop(bool, G1DisablePreBarrier, false, \ + "Disable generation of pre-barrier (i.e., marking barrier) ") \ + \ + develop(bool, G1DisablePostBarrier, false, \ + "Disable generation of post-barrier (i.e., RS barrier) ") \ + \ + product(intx, G1DirtyCardQueueMax, 30, \ + "Maximum number of completed RS buffers before mutator threads " \ + "start processing them.") \ + \ + develop(intx, G1ConcRSLogCacheSize, 10, \ + "Log base 2 of the length of conc RS hot-card cache.") \ + \ + product(bool, G1ConcRSCountTraversals, false, \ + "If true, gather data about the number of times CR traverses " \ + "cards ") \ + \ + product(intx, G1ConcRSHotCardLimit, 4, \ + "The threshold that defines (>=) a hot card.") \ + \ + develop(bool, G1PrintOopAppls, false, \ + "When true, print applications of closures to external locs.") \ + \ + product(intx, G1LogRSRegionEntries, 7, \ + "Log_2 of max number of regions for which we keep bitmaps.") \ + \ + develop(bool, G1RecordHRRSOops, false, \ + "When true, record recent calls to rem set operations.") \ + \ + develop(bool, G1RecordHRRSEvents, false, \ + "When true, record recent calls to rem set operations.") \ + \ + develop(intx, G1MaxVerifyFailures, -1, \ + "The maximum number of verification failrues to print. " \ + "-1 means print all.") \ + \ + develop(bool, G1ScrubRemSets, true, \ + "When true, do RS scrubbing after cleanup.") \ + \ + develop(bool, G1RSScrubVerbose, false, \ + "When true, do RS scrubbing with verbose output.") \ + \ + develop(bool, G1YoungSurvRateVerbose, false, \ + "print out the survival rate of young regions according to age.") \ + \ + develop(intx, G1YoungSurvRateNumRegionsSummary, 0, \ + "the number of regions for which we'll print a surv rate " \ + "summary.") \ + \ + product(bool, G1UseScanOnlyPrefix, false, \ + "It determines whether the system will calculate an optimum " \ + "scan-only set.") \ + \ + product(intx, G1MinReservePerc, 10, \ + "It determines the minimum reserve we should have in the heap " \ + "to minimize the probability of promotion failure.") \ + \ + product(bool, G1TraceRegions, false, \ + "If set G1 will print information on which regions are being " \ + "allocated and which are reclaimed.") \ + \ + develop(bool, G1HRRSUseSparseTable, true, \ + "When true, use sparse table to save space.") \ + \ + develop(bool, G1HRRSFlushLogBuffersOnVerify, false, \ + "Forces flushing of log buffers before verification.") \ + \ + product(intx, G1MaxSurvivorRegions, 0, \ + "The maximum number of survivor regions") + +G1_FLAGS(DECLARE_DEVELOPER_FLAG, DECLARE_PD_DEVELOPER_FLAG, DECLARE_PRODUCT_FLAG, DECLARE_PD_PRODUCT_FLAG, DECLARE_DIAGNOSTIC_FLAG, DECLARE_EXPERIMENTAL_FLAG, DECLARE_NOTPRODUCT_FLAG, DECLARE_MANAGEABLE_FLAG, DECLARE_PRODUCT_RW_FLAG) diff --git a/hotspot/src/share/vm/gc_implementation/g1/g1_specialized_oop_closures.hpp b/hotspot/src/share/vm/gc_implementation/g1/g1_specialized_oop_closures.hpp new file mode 100644 index 00000000000..39f72a616b7 --- /dev/null +++ b/hotspot/src/share/vm/gc_implementation/g1/g1_specialized_oop_closures.hpp @@ -0,0 +1,64 @@ +/* + * Copyright 2001-2007 Sun Microsystems, Inc. All Rights Reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara, + * CA 95054 USA or visit www.sun.com if you need additional information or + * have any questions. + * + */ + +// The following OopClosure types get specialized versions of +// "oop_oop_iterate" that invoke the closures' do_oop methods +// non-virtually, using a mechanism defined in this file. Extend these +// macros in the obvious way to add specializations for new closures. + +// Forward declarations. +enum G1Barrier { + G1BarrierNone, G1BarrierRS, G1BarrierEvac +}; + +template +class G1ParCopyClosure; +class G1ParScanClosure; + +typedef G1ParCopyClosure G1ParScanHeapEvacClosure; + +class FilterIntoCSClosure; +class FilterOutOfRegionClosure; +class FilterInHeapRegionAndIntoCSClosure; +class FilterAndMarkInHeapRegionAndIntoCSClosure; +class G1ScanAndBalanceClosure; + +#ifdef FURTHER_SPECIALIZED_OOP_OOP_ITERATE_CLOSURES +#error "FURTHER_SPECIALIZED_OOP_OOP_ITERATE_CLOSURES already defined." +#endif + +#define FURTHER_SPECIALIZED_OOP_OOP_ITERATE_CLOSURES(f) \ + f(G1ParScanHeapEvacClosure,_nv) \ + f(G1ParScanClosure,_nv) \ + f(FilterIntoCSClosure,_nv) \ + f(FilterOutOfRegionClosure,_nv) \ + f(FilterInHeapRegionAndIntoCSClosure,_nv) \ + f(FilterAndMarkInHeapRegionAndIntoCSClosure,_nv) \ + f(G1ScanAndBalanceClosure,_nv) + +#ifdef FURTHER_SPECIALIZED_SINCE_SAVE_MARKS_CLOSURES +#error "FURTHER_SPECIALIZED_SINCE_SAVE_MARKS_CLOSURES already defined." +#endif + +#define FURTHER_SPECIALIZED_SINCE_SAVE_MARKS_CLOSURES(f) diff --git a/hotspot/src/share/vm/gc_implementation/g1/heapRegion.cpp b/hotspot/src/share/vm/gc_implementation/g1/heapRegion.cpp new file mode 100644 index 00000000000..e5105cc81f0 --- /dev/null +++ b/hotspot/src/share/vm/gc_implementation/g1/heapRegion.cpp @@ -0,0 +1,873 @@ +/* + * Copyright 2001-2007 Sun Microsystems, Inc. All Rights Reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara, + * CA 95054 USA or visit www.sun.com if you need additional information or + * have any questions. + * + */ + +#include "incls/_precompiled.incl" +#include "incls/_heapRegion.cpp.incl" + +HeapRegionDCTOC::HeapRegionDCTOC(G1CollectedHeap* g1, + HeapRegion* hr, OopClosure* cl, + CardTableModRefBS::PrecisionStyle precision, + FilterKind fk) : + ContiguousSpaceDCTOC(hr, cl, precision, NULL), + _hr(hr), _fk(fk), _g1(g1) +{} + +FilterOutOfRegionClosure::FilterOutOfRegionClosure(HeapRegion* r, + OopClosure* oc) : + _r_bottom(r->bottom()), _r_end(r->end()), + _oc(oc), _out_of_region(0) +{} + +class VerifyLiveClosure: public OopClosure { + G1CollectedHeap* _g1h; + CardTableModRefBS* _bs; + oop _containing_obj; + bool _failures; + int _n_failures; +public: + VerifyLiveClosure(G1CollectedHeap* g1h) : + _g1h(g1h), _bs(NULL), _containing_obj(NULL), + _failures(false), _n_failures(0) + { + BarrierSet* bs = _g1h->barrier_set(); + if (bs->is_a(BarrierSet::CardTableModRef)) + _bs = (CardTableModRefBS*)bs; + } + + void set_containing_obj(oop obj) { + _containing_obj = obj; + } + + bool failures() { return _failures; } + int n_failures() { return _n_failures; } + + virtual void do_oop(narrowOop* p) { + guarantee(false, "NYI"); + } + + void do_oop(oop* p) { + assert(_containing_obj != NULL, "Precondition"); + assert(!_g1h->is_obj_dead(_containing_obj), "Precondition"); + oop obj = *p; + if (obj != NULL) { + bool failed = false; + if (!_g1h->is_in_closed_subset(obj) || _g1h->is_obj_dead(obj)) { + if (!_failures) { + gclog_or_tty->print_cr(""); + gclog_or_tty->print_cr("----------"); + } + if (!_g1h->is_in_closed_subset(obj)) { + gclog_or_tty->print_cr("Field "PTR_FORMAT + " of live obj "PTR_FORMAT + " points to obj "PTR_FORMAT + " not in the heap.", + p, (void*) _containing_obj, (void*) obj); + } else { + gclog_or_tty->print_cr("Field "PTR_FORMAT + " of live obj "PTR_FORMAT + " points to dead obj "PTR_FORMAT".", + p, (void*) _containing_obj, (void*) obj); + } + gclog_or_tty->print_cr("Live obj:"); + _containing_obj->print_on(gclog_or_tty); + gclog_or_tty->print_cr("Bad referent:"); + obj->print_on(gclog_or_tty); + gclog_or_tty->print_cr("----------"); + _failures = true; + failed = true; + _n_failures++; + } + + if (!_g1h->full_collection()) { + HeapRegion* from = _g1h->heap_region_containing(p); + HeapRegion* to = _g1h->heap_region_containing(*p); + if (from != NULL && to != NULL && + from != to && + !to->popular() && + !to->isHumongous()) { + jbyte cv_obj = *_bs->byte_for_const(_containing_obj); + jbyte cv_field = *_bs->byte_for_const(p); + const jbyte dirty = CardTableModRefBS::dirty_card_val(); + + bool is_bad = !(from->is_young() + || to->rem_set()->contains_reference(p) + || !G1HRRSFlushLogBuffersOnVerify && // buffers were not flushed + (_containing_obj->is_objArray() ? + cv_field == dirty + : cv_obj == dirty || cv_field == dirty)); + if (is_bad) { + if (!_failures) { + gclog_or_tty->print_cr(""); + gclog_or_tty->print_cr("----------"); + } + gclog_or_tty->print_cr("Missing rem set entry:"); + gclog_or_tty->print_cr("Field "PTR_FORMAT + " of obj "PTR_FORMAT + ", in region %d ["PTR_FORMAT + ", "PTR_FORMAT"),", + p, (void*) _containing_obj, + from->hrs_index(), + from->bottom(), + from->end()); + _containing_obj->print_on(gclog_or_tty); + gclog_or_tty->print_cr("points to obj "PTR_FORMAT + " in region %d ["PTR_FORMAT + ", "PTR_FORMAT").", + (void*) obj, to->hrs_index(), + to->bottom(), to->end()); + obj->print_on(gclog_or_tty); + gclog_or_tty->print_cr("Obj head CTE = %d, field CTE = %d.", + cv_obj, cv_field); + gclog_or_tty->print_cr("----------"); + _failures = true; + if (!failed) _n_failures++; + } + } + } + } + } +}; + +template +HeapWord* walk_mem_region_loop(ClosureType* cl, G1CollectedHeap* g1h, + HeapRegion* hr, + HeapWord* cur, HeapWord* top) { + oop cur_oop = oop(cur); + int oop_size = cur_oop->size(); + HeapWord* next_obj = cur + oop_size; + while (next_obj < top) { + // Keep filtering the remembered set. + if (!g1h->is_obj_dead(cur_oop, hr)) { + // Bottom lies entirely below top, so we can call the + // non-memRegion version of oop_iterate below. +#ifndef PRODUCT + if (G1VerifyMarkingInEvac) { + VerifyLiveClosure vl_cl(g1h); + cur_oop->oop_iterate(&vl_cl); + } +#endif + cur_oop->oop_iterate(cl); + } + cur = next_obj; + cur_oop = oop(cur); + oop_size = cur_oop->size(); + next_obj = cur + oop_size; + } + return cur; +} + +void HeapRegionDCTOC::walk_mem_region_with_cl(MemRegion mr, + HeapWord* bottom, + HeapWord* top, + OopClosure* cl) { + G1CollectedHeap* g1h = _g1; + + int oop_size; + + OopClosure* cl2 = cl; + FilterIntoCSClosure intoCSFilt(this, g1h, cl); + FilterOutOfRegionClosure outOfRegionFilt(_hr, cl); + switch (_fk) { + case IntoCSFilterKind: cl2 = &intoCSFilt; break; + case OutOfRegionFilterKind: cl2 = &outOfRegionFilt; break; + } + + // Start filtering what we add to the remembered set. If the object is + // not considered dead, either because it is marked (in the mark bitmap) + // or it was allocated after marking finished, then we add it. Otherwise + // we can safely ignore the object. + if (!g1h->is_obj_dead(oop(bottom), _hr)) { +#ifndef PRODUCT + if (G1VerifyMarkingInEvac) { + VerifyLiveClosure vl_cl(g1h); + oop(bottom)->oop_iterate(&vl_cl, mr); + } +#endif + oop_size = oop(bottom)->oop_iterate(cl2, mr); + } else { + oop_size = oop(bottom)->size(); + } + + bottom += oop_size; + + if (bottom < top) { + // We replicate the loop below for several kinds of possible filters. + switch (_fk) { + case NoFilterKind: + bottom = walk_mem_region_loop(cl, g1h, _hr, bottom, top); + break; + case IntoCSFilterKind: { + FilterIntoCSClosure filt(this, g1h, cl); + bottom = walk_mem_region_loop(&filt, g1h, _hr, bottom, top); + break; + } + case OutOfRegionFilterKind: { + FilterOutOfRegionClosure filt(_hr, cl); + bottom = walk_mem_region_loop(&filt, g1h, _hr, bottom, top); + break; + } + default: + ShouldNotReachHere(); + } + + // Last object. Need to do dead-obj filtering here too. + if (!g1h->is_obj_dead(oop(bottom), _hr)) { +#ifndef PRODUCT + if (G1VerifyMarkingInEvac) { + VerifyLiveClosure vl_cl(g1h); + oop(bottom)->oop_iterate(&vl_cl, mr); + } +#endif + oop(bottom)->oop_iterate(cl2, mr); + } + } +} + +void HeapRegion::reset_after_compaction() { + G1OffsetTableContigSpace::reset_after_compaction(); + // After a compaction the mark bitmap is invalid, so we must + // treat all objects as being inside the unmarked area. + zero_marked_bytes(); + init_top_at_mark_start(); +} + +DirtyCardToOopClosure* +HeapRegion::new_dcto_closure(OopClosure* cl, + CardTableModRefBS::PrecisionStyle precision, + HeapRegionDCTOC::FilterKind fk) { + return new HeapRegionDCTOC(G1CollectedHeap::heap(), + this, cl, precision, fk); +} + +void HeapRegion::hr_clear(bool par, bool clear_space) { + _humongous_type = NotHumongous; + _humongous_start_region = NULL; + _in_collection_set = false; + _is_gc_alloc_region = false; + + // Age stuff (if parallel, this will be done separately, since it needs + // to be sequential). + G1CollectedHeap* g1h = G1CollectedHeap::heap(); + + set_young_index_in_cset(-1); + uninstall_surv_rate_group(); + set_young_type(NotYoung); + + // In case it had been the start of a humongous sequence, reset its end. + set_end(_orig_end); + + if (!par) { + // If this is parallel, this will be done later. + HeapRegionRemSet* hrrs = rem_set(); + if (hrrs != NULL) hrrs->clear(); + _claimed = InitialClaimValue; + } + zero_marked_bytes(); + set_sort_index(-1); + if ((uintptr_t)bottom() >= (uintptr_t)g1h->popular_object_boundary()) + set_popular(false); + + _offsets.resize(HeapRegion::GrainWords); + init_top_at_mark_start(); + if (clear_space) clear(SpaceDecorator::Mangle); +} + +// +void HeapRegion::calc_gc_efficiency() { + G1CollectedHeap* g1h = G1CollectedHeap::heap(); + _gc_efficiency = (double) garbage_bytes() / + g1h->predict_region_elapsed_time_ms(this, false); +} +// + +void HeapRegion::set_startsHumongous() { + _humongous_type = StartsHumongous; + _humongous_start_region = this; + assert(end() == _orig_end, "Should be normal before alloc."); +} + +bool HeapRegion::claimHeapRegion(jint claimValue) { + jint current = _claimed; + if (current != claimValue) { + jint res = Atomic::cmpxchg(claimValue, &_claimed, current); + if (res == current) { + return true; + } + } + return false; +} + +HeapWord* HeapRegion::next_block_start_careful(HeapWord* addr) { + HeapWord* low = addr; + HeapWord* high = end(); + while (low < high) { + size_t diff = pointer_delta(high, low); + // Must add one below to bias toward the high amount. Otherwise, if + // "high" were at the desired value, and "low" were one less, we + // would not converge on "high". This is not symmetric, because + // we set "high" to a block start, which might be the right one, + // which we don't do for "low". + HeapWord* middle = low + (diff+1)/2; + if (middle == high) return high; + HeapWord* mid_bs = block_start_careful(middle); + if (mid_bs < addr) { + low = middle; + } else { + high = mid_bs; + } + } + assert(low == high && low >= addr, "Didn't work."); + return low; +} + +void HeapRegion::set_next_on_unclean_list(HeapRegion* r) { + assert(r == NULL || r->is_on_unclean_list(), "Malformed unclean list."); + _next_in_special_set = r; +} + +void HeapRegion::set_on_unclean_list(bool b) { + _is_on_unclean_list = b; +} + +void HeapRegion::initialize(MemRegion mr, bool clear_space, bool mangle_space) { + G1OffsetTableContigSpace::initialize(mr, false, mangle_space); + hr_clear(false/*par*/, clear_space); +} +#ifdef _MSC_VER // the use of 'this' below gets a warning, make it go away +#pragma warning( disable:4355 ) // 'this' : used in base member initializer list +#endif // _MSC_VER + + +HeapRegion:: +HeapRegion(G1BlockOffsetSharedArray* sharedOffsetArray, + MemRegion mr, bool is_zeroed) + : G1OffsetTableContigSpace(sharedOffsetArray, mr, is_zeroed), + _next_fk(HeapRegionDCTOC::NoFilterKind), + _hrs_index(-1), + _humongous_type(NotHumongous), _humongous_start_region(NULL), + _in_collection_set(false), _is_gc_alloc_region(false), + _is_on_free_list(false), _is_on_unclean_list(false), + _next_in_special_set(NULL), _orig_end(NULL), + _claimed(InitialClaimValue), _evacuation_failed(false), + _prev_marked_bytes(0), _next_marked_bytes(0), _sort_index(-1), + _popularity(NotPopular), + _young_type(NotYoung), _next_young_region(NULL), + _young_index_in_cset(-1), _surv_rate_group(NULL), _age_index(-1), + _rem_set(NULL), _zfs(NotZeroFilled) +{ + _orig_end = mr.end(); + // Note that initialize() will set the start of the unmarked area of the + // region. + this->initialize(mr, !is_zeroed, SpaceDecorator::Mangle); + set_top(bottom()); + set_saved_mark(); + + _rem_set = new HeapRegionRemSet(sharedOffsetArray, this); + + assert(HeapRegionRemSet::num_par_rem_sets() > 0, "Invariant."); + // In case the region is allocated during a pause, note the top. + // We haven't done any counting on a brand new region. + _top_at_conc_mark_count = bottom(); +} + +class NextCompactionHeapRegionClosure: public HeapRegionClosure { + const HeapRegion* _target; + bool _target_seen; + HeapRegion* _last; + CompactibleSpace* _res; +public: + NextCompactionHeapRegionClosure(const HeapRegion* target) : + _target(target), _target_seen(false), _res(NULL) {} + bool doHeapRegion(HeapRegion* cur) { + if (_target_seen) { + if (!cur->isHumongous()) { + _res = cur; + return true; + } + } else if (cur == _target) { + _target_seen = true; + } + return false; + } + CompactibleSpace* result() { return _res; } +}; + +CompactibleSpace* HeapRegion::next_compaction_space() const { + G1CollectedHeap* g1h = G1CollectedHeap::heap(); + // cast away const-ness + HeapRegion* r = (HeapRegion*) this; + NextCompactionHeapRegionClosure blk(r); + g1h->heap_region_iterate_from(r, &blk); + return blk.result(); +} + +void HeapRegion::set_continuesHumongous(HeapRegion* start) { + // The order is important here. + start->add_continuingHumongousRegion(this); + _humongous_type = ContinuesHumongous; + _humongous_start_region = start; +} + +void HeapRegion::add_continuingHumongousRegion(HeapRegion* cont) { + // Must join the blocks of the current H region seq with the block of the + // added region. + offsets()->join_blocks(bottom(), cont->bottom()); + arrayOop obj = (arrayOop)(bottom()); + obj->set_length((int) (obj->length() + cont->capacity()/jintSize)); + set_end(cont->end()); + set_top(cont->end()); +} + +void HeapRegion::save_marks() { + set_saved_mark(); +} + +void HeapRegion::oops_in_mr_iterate(MemRegion mr, OopClosure* cl) { + HeapWord* p = mr.start(); + HeapWord* e = mr.end(); + oop obj; + while (p < e) { + obj = oop(p); + p += obj->oop_iterate(cl); + } + assert(p == e, "bad memregion: doesn't end on obj boundary"); +} + +#define HeapRegion_OOP_SINCE_SAVE_MARKS_DEFN(OopClosureType, nv_suffix) \ +void HeapRegion::oop_since_save_marks_iterate##nv_suffix(OopClosureType* cl) { \ + ContiguousSpace::oop_since_save_marks_iterate##nv_suffix(cl); \ +} +SPECIALIZED_SINCE_SAVE_MARKS_CLOSURES(HeapRegion_OOP_SINCE_SAVE_MARKS_DEFN) + + +void HeapRegion::oop_before_save_marks_iterate(OopClosure* cl) { + oops_in_mr_iterate(MemRegion(bottom(), saved_mark_word()), cl); +} + +#ifdef DEBUG +HeapWord* HeapRegion::allocate(size_t size) { + jint state = zero_fill_state(); + assert(!G1CollectedHeap::heap()->allocs_are_zero_filled() || + zero_fill_is_allocated(), + "When ZF is on, only alloc in ZF'd regions"); + return G1OffsetTableContigSpace::allocate(size); +} +#endif + +void HeapRegion::set_zero_fill_state_work(ZeroFillState zfs) { + assert(top() == bottom() || zfs == Allocated, + "Region must be empty, or we must be setting it to allocated."); + assert(ZF_mon->owned_by_self() || + Universe::heap()->is_gc_active(), + "Must hold the lock or be a full GC to modify."); + _zfs = zfs; +} + +void HeapRegion::set_zero_fill_complete() { + set_zero_fill_state_work(ZeroFilled); + if (ZF_mon->owned_by_self()) { + ZF_mon->notify_all(); + } +} + + +void HeapRegion::ensure_zero_filled() { + MutexLockerEx x(ZF_mon, Mutex::_no_safepoint_check_flag); + ensure_zero_filled_locked(); +} + +void HeapRegion::ensure_zero_filled_locked() { + assert(ZF_mon->owned_by_self(), "Precondition"); + bool should_ignore_zf = SafepointSynchronize::is_at_safepoint(); + assert(should_ignore_zf || Heap_lock->is_locked(), + "Either we're in a GC or we're allocating a region."); + switch (zero_fill_state()) { + case HeapRegion::NotZeroFilled: + set_zero_fill_in_progress(Thread::current()); + { + ZF_mon->unlock(); + Copy::fill_to_words(bottom(), capacity()/HeapWordSize); + ZF_mon->lock_without_safepoint_check(); + } + // A trap. + guarantee(zero_fill_state() == HeapRegion::ZeroFilling + && zero_filler() == Thread::current(), + "AHA! Tell Dave D if you see this..."); + set_zero_fill_complete(); + // gclog_or_tty->print_cr("Did sync ZF."); + ConcurrentZFThread::note_sync_zfs(); + break; + case HeapRegion::ZeroFilling: + if (should_ignore_zf) { + // We can "break" the lock and take over the work. + Copy::fill_to_words(bottom(), capacity()/HeapWordSize); + set_zero_fill_complete(); + ConcurrentZFThread::note_sync_zfs(); + break; + } else { + ConcurrentZFThread::wait_for_ZF_completed(this); + } + case HeapRegion::ZeroFilled: + // Nothing to do. + break; + case HeapRegion::Allocated: + guarantee(false, "Should not call on allocated regions."); + } + assert(zero_fill_state() == HeapRegion::ZeroFilled, "Post"); +} + +HeapWord* +HeapRegion::object_iterate_mem_careful(MemRegion mr, + ObjectClosure* cl) { + G1CollectedHeap* g1h = G1CollectedHeap::heap(); + // We used to use "block_start_careful" here. But we're actually happy + // to update the BOT while we do this... + HeapWord* cur = block_start(mr.start()); + mr = mr.intersection(used_region()); + if (mr.is_empty()) return NULL; + // Otherwise, find the obj that extends onto mr.start(). + + assert(cur <= mr.start() + && (oop(cur)->klass() == NULL || + cur + oop(cur)->size() > mr.start()), + "postcondition of block_start"); + oop obj; + while (cur < mr.end()) { + obj = oop(cur); + if (obj->klass() == NULL) { + // Ran into an unparseable point. + return cur; + } else if (!g1h->is_obj_dead(obj)) { + cl->do_object(obj); + } + if (cl->abort()) return cur; + // The check above must occur before the operation below, since an + // abort might invalidate the "size" operation. + cur += obj->size(); + } + return NULL; +} + +HeapWord* +HeapRegion:: +oops_on_card_seq_iterate_careful(MemRegion mr, + FilterOutOfRegionClosure* cl) { + G1CollectedHeap* g1h = G1CollectedHeap::heap(); + + // If we're within a stop-world GC, then we might look at a card in a + // GC alloc region that extends onto a GC LAB, which may not be + // parseable. Stop such at the "saved_mark" of the region. + if (G1CollectedHeap::heap()->is_gc_active()) { + mr = mr.intersection(used_region_at_save_marks()); + } else { + mr = mr.intersection(used_region()); + } + if (mr.is_empty()) return NULL; + // Otherwise, find the obj that extends onto mr.start(). + + // We used to use "block_start_careful" here. But we're actually happy + // to update the BOT while we do this... + HeapWord* cur = block_start(mr.start()); + assert(cur <= mr.start(), "Postcondition"); + + while (cur <= mr.start()) { + if (oop(cur)->klass() == NULL) { + // Ran into an unparseable point. + return cur; + } + // Otherwise... + int sz = oop(cur)->size(); + if (cur + sz > mr.start()) break; + // Otherwise, go on. + cur = cur + sz; + } + oop obj; + obj = oop(cur); + // If we finish this loop... + assert(cur <= mr.start() + && obj->klass() != NULL + && cur + obj->size() > mr.start(), + "Loop postcondition"); + if (!g1h->is_obj_dead(obj)) { + obj->oop_iterate(cl, mr); + } + + HeapWord* next; + while (cur < mr.end()) { + obj = oop(cur); + if (obj->klass() == NULL) { + // Ran into an unparseable point. + return cur; + }; + // Otherwise: + next = (cur + obj->size()); + if (!g1h->is_obj_dead(obj)) { + if (next < mr.end()) { + obj->oop_iterate(cl); + } else { + // this obj spans the boundary. If it's an array, stop at the + // boundary. + if (obj->is_objArray()) { + obj->oop_iterate(cl, mr); + } else { + obj->oop_iterate(cl); + } + } + } + cur = next; + } + return NULL; +} + +void HeapRegion::print() const { print_on(gclog_or_tty); } +void HeapRegion::print_on(outputStream* st) const { + if (isHumongous()) { + if (startsHumongous()) + st->print(" HS"); + else + st->print(" HC"); + } else { + st->print(" "); + } + if (in_collection_set()) + st->print(" CS"); + else if (is_gc_alloc_region()) + st->print(" A "); + else + st->print(" "); + if (is_young()) + st->print(is_scan_only() ? " SO" : (is_survivor() ? " SU" : " Y ")); + else + st->print(" "); + if (is_empty()) + st->print(" F"); + else + st->print(" "); + st->print(" %d", _gc_time_stamp); + G1OffsetTableContigSpace::print_on(st); +} + +#define OBJ_SAMPLE_INTERVAL 0 +#define BLOCK_SAMPLE_INTERVAL 100 + +// This really ought to be commoned up into OffsetTableContigSpace somehow. +// We would need a mechanism to make that code skip dead objects. + +void HeapRegion::verify(bool allow_dirty) const { + G1CollectedHeap* g1 = G1CollectedHeap::heap(); + HeapWord* p = bottom(); + HeapWord* prev_p = NULL; + int objs = 0; + int blocks = 0; + VerifyLiveClosure vl_cl(g1); + while (p < top()) { + size_t size = oop(p)->size(); + if (blocks == BLOCK_SAMPLE_INTERVAL) { + guarantee(p == block_start_const(p + (size/2)), + "check offset computation"); + blocks = 0; + } else { + blocks++; + } + if (objs == OBJ_SAMPLE_INTERVAL) { + oop obj = oop(p); + if (!g1->is_obj_dead(obj, this)) { + obj->verify(); + vl_cl.set_containing_obj(obj); + obj->oop_iterate(&vl_cl); + if (G1MaxVerifyFailures >= 0 + && vl_cl.n_failures() >= G1MaxVerifyFailures) break; + } + objs = 0; + } else { + objs++; + } + prev_p = p; + p += size; + } + HeapWord* rend = end(); + HeapWord* rtop = top(); + if (rtop < rend) { + guarantee(block_start_const(rtop + (rend - rtop) / 2) == rtop, + "check offset computation"); + } + if (vl_cl.failures()) { + gclog_or_tty->print_cr("Heap:"); + G1CollectedHeap::heap()->print(); + gclog_or_tty->print_cr(""); + } + if (G1VerifyConcMark && + G1VerifyConcMarkPrintReachable && + vl_cl.failures()) { + g1->concurrent_mark()->print_prev_bitmap_reachable(); + } + guarantee(!vl_cl.failures(), "should not have had any failures"); + guarantee(p == top(), "end of last object must match end of space"); +} + +// G1OffsetTableContigSpace code; copied from space.cpp. Hope this can go +// away eventually. + +void G1OffsetTableContigSpace::initialize(MemRegion mr, bool clear_space, bool mangle_space) { + // false ==> we'll do the clearing if there's clearing to be done. + ContiguousSpace::initialize(mr, false, mangle_space); + _offsets.zero_bottom_entry(); + _offsets.initialize_threshold(); + if (clear_space) clear(mangle_space); +} + +void G1OffsetTableContigSpace::clear(bool mangle_space) { + ContiguousSpace::clear(mangle_space); + _offsets.zero_bottom_entry(); + _offsets.initialize_threshold(); +} + +void G1OffsetTableContigSpace::set_bottom(HeapWord* new_bottom) { + Space::set_bottom(new_bottom); + _offsets.set_bottom(new_bottom); +} + +void G1OffsetTableContigSpace::set_end(HeapWord* new_end) { + Space::set_end(new_end); + _offsets.resize(new_end - bottom()); +} + +void G1OffsetTableContigSpace::print() const { + print_short(); + gclog_or_tty->print_cr(" [" INTPTR_FORMAT ", " INTPTR_FORMAT ", " + INTPTR_FORMAT ", " INTPTR_FORMAT ")", + bottom(), top(), _offsets.threshold(), end()); +} + +HeapWord* G1OffsetTableContigSpace::initialize_threshold() { + return _offsets.initialize_threshold(); +} + +HeapWord* G1OffsetTableContigSpace::cross_threshold(HeapWord* start, + HeapWord* end) { + _offsets.alloc_block(start, end); + return _offsets.threshold(); +} + +HeapWord* G1OffsetTableContigSpace::saved_mark_word() const { + G1CollectedHeap* g1h = G1CollectedHeap::heap(); + assert( _gc_time_stamp <= g1h->get_gc_time_stamp(), "invariant" ); + if (_gc_time_stamp < g1h->get_gc_time_stamp()) + return top(); + else + return ContiguousSpace::saved_mark_word(); +} + +void G1OffsetTableContigSpace::set_saved_mark() { + G1CollectedHeap* g1h = G1CollectedHeap::heap(); + unsigned curr_gc_time_stamp = g1h->get_gc_time_stamp(); + + if (_gc_time_stamp < curr_gc_time_stamp) { + // The order of these is important, as another thread might be + // about to start scanning this region. If it does so after + // set_saved_mark and before _gc_time_stamp = ..., then the latter + // will be false, and it will pick up top() as the high water mark + // of region. If it does so after _gc_time_stamp = ..., then it + // will pick up the right saved_mark_word() as the high water mark + // of the region. Either way, the behaviour will be correct. + ContiguousSpace::set_saved_mark(); + _gc_time_stamp = curr_gc_time_stamp; + OrderAccess::fence(); + } +} + +G1OffsetTableContigSpace:: +G1OffsetTableContigSpace(G1BlockOffsetSharedArray* sharedOffsetArray, + MemRegion mr, bool is_zeroed) : + _offsets(sharedOffsetArray, mr), + _par_alloc_lock(Mutex::leaf, "OffsetTableContigSpace par alloc lock", true), + _gc_time_stamp(0) +{ + _offsets.set_space(this); + initialize(mr, !is_zeroed, SpaceDecorator::Mangle); +} + +size_t RegionList::length() { + size_t len = 0; + HeapRegion* cur = hd(); + DEBUG_ONLY(HeapRegion* last = NULL); + while (cur != NULL) { + len++; + DEBUG_ONLY(last = cur); + cur = get_next(cur); + } + assert(last == tl(), "Invariant"); + return len; +} + +void RegionList::insert_before_head(HeapRegion* r) { + assert(well_formed(), "Inv"); + set_next(r, hd()); + _hd = r; + _sz++; + if (tl() == NULL) _tl = r; + assert(well_formed(), "Inv"); +} + +void RegionList::prepend_list(RegionList* new_list) { + assert(well_formed(), "Precondition"); + assert(new_list->well_formed(), "Precondition"); + HeapRegion* new_tl = new_list->tl(); + if (new_tl != NULL) { + set_next(new_tl, hd()); + _hd = new_list->hd(); + _sz += new_list->sz(); + if (tl() == NULL) _tl = new_list->tl(); + } else { + assert(new_list->hd() == NULL && new_list->sz() == 0, "Inv"); + } + assert(well_formed(), "Inv"); +} + +void RegionList::delete_after(HeapRegion* r) { + assert(well_formed(), "Precondition"); + HeapRegion* next = get_next(r); + assert(r != NULL, "Precondition"); + HeapRegion* next_tl = get_next(next); + set_next(r, next_tl); + dec_sz(); + if (next == tl()) { + assert(next_tl == NULL, "Inv"); + _tl = r; + } + assert(well_formed(), "Inv"); +} + +HeapRegion* RegionList::pop() { + assert(well_formed(), "Inv"); + HeapRegion* res = hd(); + if (res != NULL) { + _hd = get_next(res); + _sz--; + set_next(res, NULL); + if (sz() == 0) _tl = NULL; + } + assert(well_formed(), "Inv"); + return res; +} diff --git a/hotspot/src/share/vm/gc_implementation/g1/heapRegion.hpp b/hotspot/src/share/vm/gc_implementation/g1/heapRegion.hpp new file mode 100644 index 00000000000..eee90ecb9bb --- /dev/null +++ b/hotspot/src/share/vm/gc_implementation/g1/heapRegion.hpp @@ -0,0 +1,937 @@ +/* + * Copyright 2001-2007 Sun Microsystems, Inc. All Rights Reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara, + * CA 95054 USA or visit www.sun.com if you need additional information or + * have any questions. + * + */ + +#ifndef SERIALGC + +// A HeapRegion is the smallest piece of a G1CollectedHeap that +// can be collected independently. + +// NOTE: Although a HeapRegion is a Space, its +// Space::initDirtyCardClosure method must not be called. +// The problem is that the existence of this method breaks +// the independence of barrier sets from remembered sets. +// The solution is to remove this method from the definition +// of a Space. + +class CompactibleSpace; +class ContiguousSpace; +class HeapRegionRemSet; +class HeapRegionRemSetIterator; +class HeapRegion; + +// A dirty card to oop closure for heap regions. It +// knows how to get the G1 heap and how to use the bitmap +// in the concurrent marker used by G1 to filter remembered +// sets. + +class HeapRegionDCTOC : public ContiguousSpaceDCTOC { +public: + // Specification of possible DirtyCardToOopClosure filtering. + enum FilterKind { + NoFilterKind, + IntoCSFilterKind, + OutOfRegionFilterKind + }; + +protected: + HeapRegion* _hr; + FilterKind _fk; + G1CollectedHeap* _g1; + + void walk_mem_region_with_cl(MemRegion mr, + HeapWord* bottom, HeapWord* top, + OopClosure* cl); + + // We don't specialize this for FilteringClosure; filtering is handled by + // the "FilterKind" mechanism. But we provide this to avoid a compiler + // warning. + void walk_mem_region_with_cl(MemRegion mr, + HeapWord* bottom, HeapWord* top, + FilteringClosure* cl) { + HeapRegionDCTOC::walk_mem_region_with_cl(mr, bottom, top, + (OopClosure*)cl); + } + + // Get the actual top of the area on which the closure will + // operate, given where the top is assumed to be (the end of the + // memory region passed to do_MemRegion) and where the object + // at the top is assumed to start. For example, an object may + // start at the top but actually extend past the assumed top, + // in which case the top becomes the end of the object. + HeapWord* get_actual_top(HeapWord* top, HeapWord* top_obj) { + return ContiguousSpaceDCTOC::get_actual_top(top, top_obj); + } + + // Walk the given memory region from bottom to (actual) top + // looking for objects and applying the oop closure (_cl) to + // them. The base implementation of this treats the area as + // blocks, where a block may or may not be an object. Sub- + // classes should override this to provide more accurate + // or possibly more efficient walking. + void walk_mem_region(MemRegion mr, HeapWord* bottom, HeapWord* top) { + Filtering_DCTOC::walk_mem_region(mr, bottom, top); + } + +public: + HeapRegionDCTOC(G1CollectedHeap* g1, + HeapRegion* hr, OopClosure* cl, + CardTableModRefBS::PrecisionStyle precision, + FilterKind fk); +}; + + +// The complicating factor is that BlockOffsetTable diverged +// significantly, and we need functionality that is only in the G1 version. +// So I copied that code, which led to an alternate G1 version of +// OffsetTableContigSpace. If the two versions of BlockOffsetTable could +// be reconciled, then G1OffsetTableContigSpace could go away. + +// The idea behind time stamps is the following. Doing a save_marks on +// all regions at every GC pause is time consuming (if I remember +// well, 10ms or so). So, we would like to do that only for regions +// that are GC alloc regions. To achieve this, we use time +// stamps. For every evacuation pause, G1CollectedHeap generates a +// unique time stamp (essentially a counter that gets +// incremented). Every time we want to call save_marks on a region, +// we set the saved_mark_word to top and also copy the current GC +// time stamp to the time stamp field of the space. Reading the +// saved_mark_word involves checking the time stamp of the +// region. If it is the same as the current GC time stamp, then we +// can safely read the saved_mark_word field, as it is valid. If the +// time stamp of the region is not the same as the current GC time +// stamp, then we instead read top, as the saved_mark_word field is +// invalid. Time stamps (on the regions and also on the +// G1CollectedHeap) are reset at every cleanup (we iterate over +// the regions anyway) and at the end of a Full GC. The current scheme +// that uses sequential unsigned ints will fail only if we have 4b +// evacuation pauses between two cleanups, which is _highly_ unlikely. + +class G1OffsetTableContigSpace: public ContiguousSpace { + friend class VMStructs; + protected: + G1BlockOffsetArrayContigSpace _offsets; + Mutex _par_alloc_lock; + volatile unsigned _gc_time_stamp; + + public: + // Constructor. If "is_zeroed" is true, the MemRegion "mr" may be + // assumed to contain zeros. + G1OffsetTableContigSpace(G1BlockOffsetSharedArray* sharedOffsetArray, + MemRegion mr, bool is_zeroed = false); + + void set_bottom(HeapWord* value); + void set_end(HeapWord* value); + + virtual HeapWord* saved_mark_word() const; + virtual void set_saved_mark(); + void reset_gc_time_stamp() { _gc_time_stamp = 0; } + + virtual void initialize(MemRegion mr, bool clear_space, bool mangle_space); + virtual void clear(bool mangle_space); + + HeapWord* block_start(const void* p); + HeapWord* block_start_const(const void* p) const; + + // Add offset table update. + virtual HeapWord* allocate(size_t word_size); + HeapWord* par_allocate(size_t word_size); + + // MarkSweep support phase3 + virtual HeapWord* initialize_threshold(); + virtual HeapWord* cross_threshold(HeapWord* start, HeapWord* end); + + virtual void print() const; +}; + +class HeapRegion: public G1OffsetTableContigSpace { + friend class VMStructs; + private: + + enum HumongousType { + NotHumongous = 0, + StartsHumongous, + ContinuesHumongous + }; + + // The next filter kind that should be used for a "new_dcto_cl" call with + // the "traditional" signature. + HeapRegionDCTOC::FilterKind _next_fk; + + // Requires that the region "mr" be dense with objects, and begin and end + // with an object. + void oops_in_mr_iterate(MemRegion mr, OopClosure* cl); + + // The remembered set for this region. + // (Might want to make this "inline" later, to avoid some alloc failure + // issues.) + HeapRegionRemSet* _rem_set; + + G1BlockOffsetArrayContigSpace* offsets() { return &_offsets; } + + protected: + // If this region is a member of a HeapRegionSeq, the index in that + // sequence, otherwise -1. + int _hrs_index; + + HumongousType _humongous_type; + // For a humongous region, region in which it starts. + HeapRegion* _humongous_start_region; + // For the start region of a humongous sequence, it's original end(). + HeapWord* _orig_end; + + // True iff the region is in current collection_set. + bool _in_collection_set; + + // True iff the region is on the unclean list, waiting to be zero filled. + bool _is_on_unclean_list; + + // True iff the region is on the free list, ready for allocation. + bool _is_on_free_list; + + // Is this or has it been an allocation region in the current collection + // pause. + bool _is_gc_alloc_region; + + // True iff an attempt to evacuate an object in the region failed. + bool _evacuation_failed; + + // A heap region may be a member one of a number of special subsets, each + // represented as linked lists through the field below. Currently, these + // sets include: + // The collection set. + // The set of allocation regions used in a collection pause. + // Spaces that may contain gray objects. + HeapRegion* _next_in_special_set; + + // next region in the young "generation" region set + HeapRegion* _next_young_region; + + // For parallel heapRegion traversal. + jint _claimed; + + // We use concurrent marking to determine the amount of live data + // in each heap region. + size_t _prev_marked_bytes; // Bytes known to be live via last completed marking. + size_t _next_marked_bytes; // Bytes known to be live via in-progress marking. + + // See "sort_index" method. -1 means is not in the array. + int _sort_index; + + // Means it has (or at least had) a very large RS, and should not be + // considered for membership in a collection set. + enum PopularityState { + NotPopular, + PopularPending, + Popular + }; + PopularityState _popularity; + + // + double _gc_efficiency; + // + + enum YoungType { + NotYoung, // a region is not young + ScanOnly, // a region is young and scan-only + Young, // a region is young + Survivor // a region is young and it contains + // survivor + }; + + YoungType _young_type; + int _young_index_in_cset; + SurvRateGroup* _surv_rate_group; + int _age_index; + + // The start of the unmarked area. The unmarked area extends from this + // word until the top and/or end of the region, and is the part + // of the region for which no marking was done, i.e. objects may + // have been allocated in this part since the last mark phase. + // "prev" is the top at the start of the last completed marking. + // "next" is the top at the start of the in-progress marking (if any.) + HeapWord* _prev_top_at_mark_start; + HeapWord* _next_top_at_mark_start; + // If a collection pause is in progress, this is the top at the start + // of that pause. + + // We've counted the marked bytes of objects below here. + HeapWord* _top_at_conc_mark_count; + + void init_top_at_mark_start() { + assert(_prev_marked_bytes == 0 && + _next_marked_bytes == 0, + "Must be called after zero_marked_bytes."); + HeapWord* bot = bottom(); + _prev_top_at_mark_start = bot; + _next_top_at_mark_start = bot; + _top_at_conc_mark_count = bot; + } + + jint _zfs; // A member of ZeroFillState. Protected by ZF_lock. + Thread* _zero_filler; // If _zfs is ZeroFilling, the thread that (last) + // made it so. + + void set_young_type(YoungType new_type) { + //assert(_young_type != new_type, "setting the same type" ); + // TODO: add more assertions here + _young_type = new_type; + } + + public: + // If "is_zeroed" is "true", the region "mr" can be assumed to contain zeros. + HeapRegion(G1BlockOffsetSharedArray* sharedOffsetArray, + MemRegion mr, bool is_zeroed); + + enum SomePublicConstants { + // HeapRegions are GrainBytes-aligned + // and have sizes that are multiples of GrainBytes. + LogOfHRGrainBytes = 20, + LogOfHRGrainWords = LogOfHRGrainBytes - LogHeapWordSize, + GrainBytes = 1 << LogOfHRGrainBytes, + GrainWords = 1 <= marked_bytes(), + "Can't mark more than we have."); + return used_at_mark_start_bytes - marked_bytes(); + } + + // An upper bound on the number of live bytes in the region. + size_t max_live_bytes() { return used() - garbage_bytes(); } + + void add_to_marked_bytes(size_t incr_bytes) { + _next_marked_bytes = _next_marked_bytes + incr_bytes; + guarantee( _next_marked_bytes <= used(), "invariant" ); + } + + void zero_marked_bytes() { + _prev_marked_bytes = _next_marked_bytes = 0; + } + + bool isHumongous() const { return _humongous_type != NotHumongous; } + bool startsHumongous() const { return _humongous_type == StartsHumongous; } + bool continuesHumongous() const { return _humongous_type == ContinuesHumongous; } + // For a humongous region, region in which it starts. + HeapRegion* humongous_start_region() const { + return _humongous_start_region; + } + + // Causes the current region to represent a humongous object spanning "n" + // regions. + virtual void set_startsHumongous(); + + // The regions that continue a humongous sequence should be added using + // this method, in increasing address order. + void set_continuesHumongous(HeapRegion* start); + + void add_continuingHumongousRegion(HeapRegion* cont); + + // If the region has a remembered set, return a pointer to it. + HeapRegionRemSet* rem_set() const { + return _rem_set; + } + + // True iff the region is in current collection_set. + bool in_collection_set() const { + return _in_collection_set; + } + void set_in_collection_set(bool b) { + _in_collection_set = b; + } + HeapRegion* next_in_collection_set() { + assert(in_collection_set(), "should only invoke on member of CS."); + assert(_next_in_special_set == NULL || + _next_in_special_set->in_collection_set(), + "Malformed CS."); + return _next_in_special_set; + } + void set_next_in_collection_set(HeapRegion* r) { + assert(in_collection_set(), "should only invoke on member of CS."); + assert(r == NULL || r->in_collection_set(), "Malformed CS."); + _next_in_special_set = r; + } + + // True iff it is or has been an allocation region in the current + // collection pause. + bool is_gc_alloc_region() const { + return _is_gc_alloc_region; + } + void set_is_gc_alloc_region(bool b) { + _is_gc_alloc_region = b; + } + HeapRegion* next_gc_alloc_region() { + assert(is_gc_alloc_region(), "should only invoke on member of CS."); + assert(_next_in_special_set == NULL || + _next_in_special_set->is_gc_alloc_region(), + "Malformed CS."); + return _next_in_special_set; + } + void set_next_gc_alloc_region(HeapRegion* r) { + assert(is_gc_alloc_region(), "should only invoke on member of CS."); + assert(r == NULL || r->is_gc_alloc_region(), "Malformed CS."); + _next_in_special_set = r; + } + + bool is_reserved() { + return popular(); + } + + bool is_on_free_list() { + return _is_on_free_list; + } + + void set_on_free_list(bool b) { + _is_on_free_list = b; + } + + HeapRegion* next_from_free_list() { + assert(is_on_free_list(), + "Should only invoke on free space."); + assert(_next_in_special_set == NULL || + _next_in_special_set->is_on_free_list(), + "Malformed Free List."); + return _next_in_special_set; + } + + void set_next_on_free_list(HeapRegion* r) { + assert(r == NULL || r->is_on_free_list(), "Malformed free list."); + _next_in_special_set = r; + } + + bool is_on_unclean_list() { + return _is_on_unclean_list; + } + + void set_on_unclean_list(bool b); + + HeapRegion* next_from_unclean_list() { + assert(is_on_unclean_list(), + "Should only invoke on unclean space."); + assert(_next_in_special_set == NULL || + _next_in_special_set->is_on_unclean_list(), + "Malformed unclean List."); + return _next_in_special_set; + } + + void set_next_on_unclean_list(HeapRegion* r); + + HeapRegion* get_next_young_region() { return _next_young_region; } + void set_next_young_region(HeapRegion* hr) { + _next_young_region = hr; + } + + // Allows logical separation between objects allocated before and after. + void save_marks(); + + // Reset HR stuff to default values. + void hr_clear(bool par, bool clear_space); + + void initialize(MemRegion mr, bool clear_space, bool mangle_space); + + // Ensure that "this" is zero-filled. + void ensure_zero_filled(); + // This one requires that the calling thread holds ZF_mon. + void ensure_zero_filled_locked(); + + // Get the start of the unmarked area in this region. + HeapWord* prev_top_at_mark_start() const { return _prev_top_at_mark_start; } + HeapWord* next_top_at_mark_start() const { return _next_top_at_mark_start; } + + // Apply "cl->do_oop" to (the addresses of) all reference fields in objects + // allocated in the current region before the last call to "save_mark". + void oop_before_save_marks_iterate(OopClosure* cl); + + // This call determines the "filter kind" argument that will be used for + // the next call to "new_dcto_cl" on this region with the "traditional" + // signature (i.e., the call below.) The default, in the absence of a + // preceding call to this method, is "NoFilterKind", and a call to this + // method is necessary for each such call, or else it reverts to the + // default. + // (This is really ugly, but all other methods I could think of changed a + // lot of main-line code for G1.) + void set_next_filter_kind(HeapRegionDCTOC::FilterKind nfk) { + _next_fk = nfk; + } + + DirtyCardToOopClosure* + new_dcto_closure(OopClosure* cl, + CardTableModRefBS::PrecisionStyle precision, + HeapRegionDCTOC::FilterKind fk); + +#if WHASSUP + DirtyCardToOopClosure* + new_dcto_closure(OopClosure* cl, + CardTableModRefBS::PrecisionStyle precision, + HeapWord* boundary) { + assert(boundary == NULL, "This arg doesn't make sense here."); + DirtyCardToOopClosure* res = new_dcto_closure(cl, precision, _next_fk); + _next_fk = HeapRegionDCTOC::NoFilterKind; + return res; + } +#endif + + // + // Note the start or end of marking. This tells the heap region + // that the collector is about to start or has finished (concurrently) + // marking the heap. + // + + // Note the start of a marking phase. Record the + // start of the unmarked area of the region here. + void note_start_of_marking(bool during_initial_mark) { + init_top_at_conc_mark_count(); + _next_marked_bytes = 0; + if (during_initial_mark && is_young() && !is_survivor()) + _next_top_at_mark_start = bottom(); + else + _next_top_at_mark_start = top(); + } + + // Note the end of a marking phase. Install the start of + // the unmarked area that was captured at start of marking. + void note_end_of_marking() { + _prev_top_at_mark_start = _next_top_at_mark_start; + _prev_marked_bytes = _next_marked_bytes; + _next_marked_bytes = 0; + + guarantee(_prev_marked_bytes <= + (size_t) (prev_top_at_mark_start() - bottom()) * HeapWordSize, + "invariant"); + } + + // After an evacuation, we need to update _next_top_at_mark_start + // to be the current top. Note this is only valid if we have only + // ever evacuated into this region. If we evacuate, allocate, and + // then evacuate we are in deep doodoo. + void note_end_of_copying() { + assert(top() >= _next_top_at_mark_start, + "Increase only"); + _next_top_at_mark_start = top(); + } + + // Returns "false" iff no object in the region was allocated when the + // last mark phase ended. + bool is_marked() { return _prev_top_at_mark_start != bottom(); } + + // If "is_marked()" is true, then this is the index of the region in + // an array constructed at the end of marking of the regions in a + // "desirability" order. + int sort_index() { + return _sort_index; + } + void set_sort_index(int i) { + _sort_index = i; + } + + void init_top_at_conc_mark_count() { + _top_at_conc_mark_count = bottom(); + } + + void set_top_at_conc_mark_count(HeapWord *cur) { + assert(bottom() <= cur && cur <= end(), "Sanity."); + _top_at_conc_mark_count = cur; + } + + HeapWord* top_at_conc_mark_count() { + return _top_at_conc_mark_count; + } + + void reset_during_compaction() { + guarantee( isHumongous() && startsHumongous(), + "should only be called for humongous regions"); + + zero_marked_bytes(); + init_top_at_mark_start(); + } + + bool popular() { return _popularity == Popular; } + void set_popular(bool b) { + if (b) { + _popularity = Popular; + } else { + _popularity = NotPopular; + } + } + bool popular_pending() { return _popularity == PopularPending; } + void set_popular_pending(bool b) { + if (b) { + _popularity = PopularPending; + } else { + _popularity = NotPopular; + } + } + + // + void calc_gc_efficiency(void); + double gc_efficiency() { return _gc_efficiency;} + // + + bool is_young() const { return _young_type != NotYoung; } + bool is_scan_only() const { return _young_type == ScanOnly; } + bool is_survivor() const { return _young_type == Survivor; } + + int young_index_in_cset() const { return _young_index_in_cset; } + void set_young_index_in_cset(int index) { + assert( (index == -1) || is_young(), "pre-condition" ); + _young_index_in_cset = index; + } + + int age_in_surv_rate_group() { + assert( _surv_rate_group != NULL, "pre-condition" ); + assert( _age_index > -1, "pre-condition" ); + return _surv_rate_group->age_in_group(_age_index); + } + + void recalculate_age_in_surv_rate_group() { + assert( _surv_rate_group != NULL, "pre-condition" ); + assert( _age_index > -1, "pre-condition" ); + _age_index = _surv_rate_group->recalculate_age_index(_age_index); + } + + void record_surv_words_in_group(size_t words_survived) { + assert( _surv_rate_group != NULL, "pre-condition" ); + assert( _age_index > -1, "pre-condition" ); + int age_in_group = age_in_surv_rate_group(); + _surv_rate_group->record_surviving_words(age_in_group, words_survived); + } + + int age_in_surv_rate_group_cond() { + if (_surv_rate_group != NULL) + return age_in_surv_rate_group(); + else + return -1; + } + + SurvRateGroup* surv_rate_group() { + return _surv_rate_group; + } + + void install_surv_rate_group(SurvRateGroup* surv_rate_group) { + assert( surv_rate_group != NULL, "pre-condition" ); + assert( _surv_rate_group == NULL, "pre-condition" ); + assert( is_young(), "pre-condition" ); + + _surv_rate_group = surv_rate_group; + _age_index = surv_rate_group->next_age_index(); + } + + void uninstall_surv_rate_group() { + if (_surv_rate_group != NULL) { + assert( _age_index > -1, "pre-condition" ); + assert( is_young(), "pre-condition" ); + + _surv_rate_group = NULL; + _age_index = -1; + } else { + assert( _age_index == -1, "pre-condition" ); + } + } + + void set_young() { set_young_type(Young); } + + void set_scan_only() { set_young_type(ScanOnly); } + + void set_survivor() { set_young_type(Survivor); } + + void set_not_young() { set_young_type(NotYoung); } + + // Determine if an object has been allocated since the last + // mark performed by the collector. This returns true iff the object + // is within the unmarked area of the region. + bool obj_allocated_since_prev_marking(oop obj) const { + return (HeapWord *) obj >= prev_top_at_mark_start(); + } + bool obj_allocated_since_next_marking(oop obj) const { + return (HeapWord *) obj >= next_top_at_mark_start(); + } + + // For parallel heapRegion traversal. + bool claimHeapRegion(int claimValue); + jint claim_value() { return _claimed; } + // Use this carefully: only when you're sure no one is claiming... + void set_claim_value(int claimValue) { _claimed = claimValue; } + + // Returns the "evacuation_failed" property of the region. + bool evacuation_failed() { return _evacuation_failed; } + + // Sets the "evacuation_failed" property of the region. + void set_evacuation_failed(bool b) { + _evacuation_failed = b; + + if (b) { + init_top_at_conc_mark_count(); + _next_marked_bytes = 0; + } + } + + // Requires that "mr" be entirely within the region. + // Apply "cl->do_object" to all objects that intersect with "mr". + // If the iteration encounters an unparseable portion of the region, + // or if "cl->abort()" is true after a closure application, + // terminate the iteration and return the address of the start of the + // subregion that isn't done. (The two can be distinguished by querying + // "cl->abort()".) Return of "NULL" indicates that the iteration + // completed. + HeapWord* + object_iterate_mem_careful(MemRegion mr, ObjectClosure* cl); + + HeapWord* + oops_on_card_seq_iterate_careful(MemRegion mr, + FilterOutOfRegionClosure* cl); + + // The region "mr" is entirely in "this", and starts and ends at block + // boundaries. The caller declares that all the contained blocks are + // coalesced into one. + void declare_filled_region_to_BOT(MemRegion mr) { + _offsets.single_block(mr.start(), mr.end()); + } + + // A version of block start that is guaranteed to find *some* block + // boundary at or before "p", but does not object iteration, and may + // therefore be used safely when the heap is unparseable. + HeapWord* block_start_careful(const void* p) const { + return _offsets.block_start_careful(p); + } + + // Requires that "addr" is within the region. Returns the start of the + // first ("careful") block that starts at or after "addr", or else the + // "end" of the region if there is no such block. + HeapWord* next_block_start_careful(HeapWord* addr); + + // Returns the zero-fill-state of the current region. + ZeroFillState zero_fill_state() { return (ZeroFillState)_zfs; } + bool zero_fill_is_allocated() { return _zfs == Allocated; } + Thread* zero_filler() { return _zero_filler; } + + // Indicate that the contents of the region are unknown, and therefore + // might require zero-filling. + void set_zero_fill_needed() { + set_zero_fill_state_work(NotZeroFilled); + } + void set_zero_fill_in_progress(Thread* t) { + set_zero_fill_state_work(ZeroFilling); + _zero_filler = t; + } + void set_zero_fill_complete(); + void set_zero_fill_allocated() { + set_zero_fill_state_work(Allocated); + } + + void set_zero_fill_state_work(ZeroFillState zfs); + + // This is called when a full collection shrinks the heap. + // We want to set the heap region to a value which says + // it is no longer part of the heap. For now, we'll let "NotZF" fill + // that role. + void reset_zero_fill() { + set_zero_fill_state_work(NotZeroFilled); + _zero_filler = NULL; + } + +#define HeapRegion_OOP_SINCE_SAVE_MARKS_DECL(OopClosureType, nv_suffix) \ + virtual void oop_since_save_marks_iterate##nv_suffix(OopClosureType* cl); + SPECIALIZED_SINCE_SAVE_MARKS_CLOSURES(HeapRegion_OOP_SINCE_SAVE_MARKS_DECL) + + CompactibleSpace* next_compaction_space() const; + + virtual void reset_after_compaction(); + + void print() const; + void print_on(outputStream* st) const; + + // Override + virtual void verify(bool allow_dirty) const; + +#ifdef DEBUG + HeapWord* allocate(size_t size); +#endif +}; + +// HeapRegionClosure is used for iterating over regions. +// Terminates the iteration when the "doHeapRegion" method returns "true". +class HeapRegionClosure : public StackObj { + friend class HeapRegionSeq; + friend class G1CollectedHeap; + + bool _complete; + void incomplete() { _complete = false; } + + public: + HeapRegionClosure(): _complete(true) {} + + // Typically called on each region until it returns true. + virtual bool doHeapRegion(HeapRegion* r) = 0; + + // True after iteration if the closure was applied to all heap regions + // and returned "false" in all cases. + bool complete() { return _complete; } +}; + +// A linked lists of heap regions. It leaves the "next" field +// unspecified; that's up to subtypes. +class RegionList { +protected: + virtual HeapRegion* get_next(HeapRegion* chr) = 0; + virtual void set_next(HeapRegion* chr, + HeapRegion* new_next) = 0; + + HeapRegion* _hd; + HeapRegion* _tl; + size_t _sz; + + // Protected constructor because this type is only meaningful + // when the _get/_set next functions are defined. + RegionList() : _hd(NULL), _tl(NULL), _sz(0) {} +public: + void reset() { + _hd = NULL; + _tl = NULL; + _sz = 0; + } + HeapRegion* hd() { return _hd; } + HeapRegion* tl() { return _tl; } + size_t sz() { return _sz; } + size_t length(); + + bool well_formed() { + return + ((hd() == NULL && tl() == NULL && sz() == 0) + || (hd() != NULL && tl() != NULL && sz() > 0)) + && (sz() == length()); + } + virtual void insert_before_head(HeapRegion* r); + void prepend_list(RegionList* new_list); + virtual HeapRegion* pop(); + void dec_sz() { _sz--; } + // Requires that "r" is an element of the list, and is not the tail. + void delete_after(HeapRegion* r); +}; + +class EmptyNonHRegionList: public RegionList { +protected: + // Protected constructor because this type is only meaningful + // when the _get/_set next functions are defined. + EmptyNonHRegionList() : RegionList() {} + +public: + void insert_before_head(HeapRegion* r) { + // assert(r->is_empty(), "Better be empty"); + assert(!r->isHumongous(), "Better not be humongous."); + RegionList::insert_before_head(r); + } + void prepend_list(EmptyNonHRegionList* new_list) { + // assert(new_list->hd() == NULL || new_list->hd()->is_empty(), + // "Better be empty"); + assert(new_list->hd() == NULL || !new_list->hd()->isHumongous(), + "Better not be humongous."); + // assert(new_list->tl() == NULL || new_list->tl()->is_empty(), + // "Better be empty"); + assert(new_list->tl() == NULL || !new_list->tl()->isHumongous(), + "Better not be humongous."); + RegionList::prepend_list(new_list); + } +}; + +class UncleanRegionList: public EmptyNonHRegionList { +public: + HeapRegion* get_next(HeapRegion* hr) { + return hr->next_from_unclean_list(); + } + void set_next(HeapRegion* hr, HeapRegion* new_next) { + hr->set_next_on_unclean_list(new_next); + } + + UncleanRegionList() : EmptyNonHRegionList() {} + + void insert_before_head(HeapRegion* r) { + assert(!r->is_on_free_list(), + "Better not already be on free list"); + assert(!r->is_on_unclean_list(), + "Better not already be on unclean list"); + r->set_zero_fill_needed(); + r->set_on_unclean_list(true); + EmptyNonHRegionList::insert_before_head(r); + } + void prepend_list(UncleanRegionList* new_list) { + assert(new_list->tl() == NULL || !new_list->tl()->is_on_free_list(), + "Better not already be on free list"); + assert(new_list->tl() == NULL || new_list->tl()->is_on_unclean_list(), + "Better already be marked as on unclean list"); + assert(new_list->hd() == NULL || !new_list->hd()->is_on_free_list(), + "Better not already be on free list"); + assert(new_list->hd() == NULL || new_list->hd()->is_on_unclean_list(), + "Better already be marked as on unclean list"); + EmptyNonHRegionList::prepend_list(new_list); + } + HeapRegion* pop() { + HeapRegion* res = RegionList::pop(); + if (res != NULL) res->set_on_unclean_list(false); + return res; + } +}; + +// Local Variables: *** +// c-indentation-style: gnu *** +// End: *** + +#endif // SERIALGC diff --git a/hotspot/src/share/vm/gc_implementation/g1/heapRegion.inline.hpp b/hotspot/src/share/vm/gc_implementation/g1/heapRegion.inline.hpp new file mode 100644 index 00000000000..0e4970ed1c1 --- /dev/null +++ b/hotspot/src/share/vm/gc_implementation/g1/heapRegion.inline.hpp @@ -0,0 +1,60 @@ +/* + * Copyright 2001-2007 Sun Microsystems, Inc. All Rights Reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara, + * CA 95054 USA or visit www.sun.com if you need additional information or + * have any questions. + * + */ + +inline HeapWord* G1OffsetTableContigSpace::allocate(size_t size) { + HeapWord* res = ContiguousSpace::allocate(size); + if (res != NULL) { + _offsets.alloc_block(res, size); + } + return res; +} + +// Because of the requirement of keeping "_offsets" up to date with the +// allocations, we sequentialize these with a lock. Therefore, best if +// this is used for larger LAB allocations only. +inline HeapWord* G1OffsetTableContigSpace::par_allocate(size_t size) { + MutexLocker x(&_par_alloc_lock); + // This ought to be just "allocate", because of the lock above, but that + // ContiguousSpace::allocate asserts that either the allocating thread + // holds the heap lock or it is the VM thread and we're at a safepoint. + // The best I (dld) could figure was to put a field in ContiguousSpace + // meaning "locking at safepoint taken care of", and set/reset that + // here. But this will do for now, especially in light of the comment + // above. Perhaps in the future some lock-free manner of keeping the + // coordination. + HeapWord* res = ContiguousSpace::par_allocate(size); + if (res != NULL) { + _offsets.alloc_block(res, size); + } + return res; +} + +inline HeapWord* G1OffsetTableContigSpace::block_start(const void* p) { + return _offsets.block_start(p); +} + +inline HeapWord* +G1OffsetTableContigSpace::block_start_const(const void* p) const { + return _offsets.block_start_const(p); +} diff --git a/hotspot/src/share/vm/gc_implementation/g1/heapRegionRemSet.cpp b/hotspot/src/share/vm/gc_implementation/g1/heapRegionRemSet.cpp new file mode 100644 index 00000000000..f2d262ebfb5 --- /dev/null +++ b/hotspot/src/share/vm/gc_implementation/g1/heapRegionRemSet.cpp @@ -0,0 +1,1443 @@ +/* + * Copyright 2001-2007 Sun Microsystems, Inc. All Rights Reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara, + * CA 95054 USA or visit www.sun.com if you need additional information or + * have any questions. + * + */ + +#include "incls/_precompiled.incl" +#include "incls/_heapRegionRemSet.cpp.incl" + +#define HRRS_VERBOSE 0 + +#define PRT_COUNT_OCCUPIED 1 + +// OtherRegionsTable + +class PerRegionTable: public CHeapObj { + friend class OtherRegionsTable; + friend class HeapRegionRemSetIterator; + + HeapRegion* _hr; + BitMap _bm; +#if PRT_COUNT_OCCUPIED + jint _occupied; +#endif + PerRegionTable* _next_free; + + PerRegionTable* next_free() { return _next_free; } + void set_next_free(PerRegionTable* prt) { _next_free = prt; } + + + static PerRegionTable* _free_list; + +#ifdef _MSC_VER + // For some reason even though the classes are marked as friend they are unable + // to access CardsPerRegion when private/protected. Only the windows c++ compiler + // says this Sun CC and linux gcc don't have a problem with access when private + + public: + +#endif // _MSC_VER + + enum SomePrivateConstants { + CardsPerRegion = HeapRegion::GrainBytes >> CardTableModRefBS::card_shift + }; + +protected: + // We need access in order to union things into the base table. + BitMap* bm() { return &_bm; } + + void recount_occupied() { + _occupied = (jint) bm()->count_one_bits(); + } + + PerRegionTable(HeapRegion* hr) : + _hr(hr), +#if PRT_COUNT_OCCUPIED + _occupied(0), +#endif + _bm(CardsPerRegion, false /* in-resource-area */) + {} + + static void free(PerRegionTable* prt) { + while (true) { + PerRegionTable* fl = _free_list; + prt->set_next_free(fl); + PerRegionTable* res = + (PerRegionTable*) + Atomic::cmpxchg_ptr(prt, &_free_list, fl); + if (res == fl) return; + } + ShouldNotReachHere(); + } + + static PerRegionTable* alloc(HeapRegion* hr) { + PerRegionTable* fl = _free_list; + while (fl != NULL) { + PerRegionTable* nxt = fl->next_free(); + PerRegionTable* res = + (PerRegionTable*) + Atomic::cmpxchg_ptr(nxt, &_free_list, fl); + if (res == fl) { + fl->init(hr); + return fl; + } else { + fl = _free_list; + } + } + assert(fl == NULL, "Loop condition."); + return new PerRegionTable(hr); + } + + void add_card_work(short from_card, bool par) { + if (!_bm.at(from_card)) { + if (par) { + if (_bm.par_at_put(from_card, 1)) { +#if PRT_COUNT_OCCUPIED + Atomic::inc(&_occupied); +#endif + } + } else { + _bm.at_put(from_card, 1); +#if PRT_COUNT_OCCUPIED + _occupied++; +#endif + } + } + } + + void add_reference_work(oop* from, bool par) { + // Must make this robust in case "from" is not in "_hr", because of + // concurrency. + +#if HRRS_VERBOSE + gclog_or_tty->print_cr(" PRT::Add_reference_work(" PTR_FORMAT "->" PTR_FORMAT").", + from, *from); +#endif + + HeapRegion* loc_hr = hr(); + // If the test below fails, then this table was reused concurrently + // with this operation. This is OK, since the old table was coarsened, + // and adding a bit to the new table is never incorrect. + if (loc_hr->is_in_reserved(from)) { + size_t hw_offset = pointer_delta((HeapWord*)from, loc_hr->bottom()); + size_t from_card = + hw_offset >> + (CardTableModRefBS::card_shift - LogHeapWordSize); + + add_card_work((short) from_card, par); + } + } + +public: + + HeapRegion* hr() const { return _hr; } + +#if PRT_COUNT_OCCUPIED + jint occupied() const { + // Overkill, but if we ever need it... + // guarantee(_occupied == _bm.count_one_bits(), "Check"); + return _occupied; + } +#else + jint occupied() const { + return _bm.count_one_bits(); + } +#endif + + void init(HeapRegion* hr) { + _hr = hr; +#if PRT_COUNT_OCCUPIED + _occupied = 0; +#endif + _bm.clear(); + } + + void add_reference(oop* from) { + add_reference_work(from, /*parallel*/ true); + } + + void seq_add_reference(oop* from) { + add_reference_work(from, /*parallel*/ false); + } + + void scrub(CardTableModRefBS* ctbs, BitMap* card_bm) { + HeapWord* hr_bot = hr()->bottom(); + int hr_first_card_index = ctbs->index_for(hr_bot); + bm()->set_intersection_at_offset(*card_bm, hr_first_card_index); +#if PRT_COUNT_OCCUPIED + recount_occupied(); +#endif + } + + void add_card(short from_card_index) { + add_card_work(from_card_index, /*parallel*/ true); + } + + void seq_add_card(short from_card_index) { + add_card_work(from_card_index, /*parallel*/ false); + } + + // (Destructively) union the bitmap of the current table into the given + // bitmap (which is assumed to be of the same size.) + void union_bitmap_into(BitMap* bm) { + bm->set_union(_bm); + } + + // Mem size in bytes. + size_t mem_size() const { + return sizeof(this) + _bm.size_in_words() * HeapWordSize; + } + + static size_t fl_mem_size() { + PerRegionTable* cur = _free_list; + size_t res = 0; + while (cur != NULL) { + res += sizeof(PerRegionTable); + cur = cur->next_free(); + } + return res; + } + + // Requires "from" to be in "hr()". + bool contains_reference(oop* from) const { + assert(hr()->is_in_reserved(from), "Precondition."); + size_t card_ind = pointer_delta(from, hr()->bottom(), + CardTableModRefBS::card_size); + return _bm.at(card_ind); + } +}; + +PerRegionTable* PerRegionTable::_free_list = NULL; + + +#define COUNT_PAR_EXPANDS 0 + +#if COUNT_PAR_EXPANDS +static jint n_par_expands = 0; +static jint n_par_contracts = 0; +static jint par_expand_list_len = 0; +static jint max_par_expand_list_len = 0; + +static void print_par_expand() { + Atomic::inc(&n_par_expands); + Atomic::inc(&par_expand_list_len); + if (par_expand_list_len > max_par_expand_list_len) { + max_par_expand_list_len = par_expand_list_len; + } + if ((n_par_expands % 10) == 0) { + gclog_or_tty->print_cr("\n\n%d par expands: %d contracts, " + "len = %d, max_len = %d\n.", + n_par_expands, n_par_contracts, par_expand_list_len, + max_par_expand_list_len); + } +} +#endif + +class PosParPRT: public PerRegionTable { + PerRegionTable** _par_tables; + + enum SomePrivateConstants { + ReserveParTableExpansion = 1 + }; + + void par_expand() { + int n = HeapRegionRemSet::num_par_rem_sets()-1; + if (n <= 0) return; + if (_par_tables == NULL) { + PerRegionTable* res = + (PerRegionTable*) + Atomic::cmpxchg_ptr((PerRegionTable*)ReserveParTableExpansion, + &_par_tables, NULL); + if (res != NULL) return; + // Otherwise, we reserved the right to do the expansion. + + PerRegionTable** ptables = NEW_C_HEAP_ARRAY(PerRegionTable*, n); + for (int i = 0; i < n; i++) { + PerRegionTable* ptable = PerRegionTable::alloc(hr()); + ptables[i] = ptable; + } + // Here we do not need an atomic. + _par_tables = ptables; +#if COUNT_PAR_EXPANDS + print_par_expand(); +#endif + // We must put this table on the expanded list. + PosParPRT* exp_head = _par_expanded_list; + while (true) { + set_next_par_expanded(exp_head); + PosParPRT* res = + (PosParPRT*) + Atomic::cmpxchg_ptr(this, &_par_expanded_list, exp_head); + if (res == exp_head) return; + // Otherwise. + exp_head = res; + } + ShouldNotReachHere(); + } + } + + void par_contract() { + assert(_par_tables != NULL, "Precondition."); + int n = HeapRegionRemSet::num_par_rem_sets()-1; + for (int i = 0; i < n; i++) { + _par_tables[i]->union_bitmap_into(bm()); + PerRegionTable::free(_par_tables[i]); + _par_tables[i] = NULL; + } +#if PRT_COUNT_OCCUPIED + // We must recount the "occupied." + recount_occupied(); +#endif + FREE_C_HEAP_ARRAY(PerRegionTable*, _par_tables); + _par_tables = NULL; +#if COUNT_PAR_EXPANDS + Atomic::inc(&n_par_contracts); + Atomic::dec(&par_expand_list_len); +#endif + } + + static PerRegionTable** _par_table_fl; + + PosParPRT* _next; + + static PosParPRT* _free_list; + + PerRegionTable** par_tables() const { + assert(uintptr_t(NULL) == 0, "Assumption."); + if (uintptr_t(_par_tables) <= ReserveParTableExpansion) + return NULL; + else + return _par_tables; + } + + PosParPRT* _next_par_expanded; + PosParPRT* next_par_expanded() { return _next_par_expanded; } + void set_next_par_expanded(PosParPRT* ppprt) { _next_par_expanded = ppprt; } + static PosParPRT* _par_expanded_list; + +public: + + PosParPRT(HeapRegion* hr) : PerRegionTable(hr), _par_tables(NULL) {} + + jint occupied() const { + jint res = PerRegionTable::occupied(); + if (par_tables() != NULL) { + for (int i = 0; i < HeapRegionRemSet::num_par_rem_sets()-1; i++) { + res += par_tables()[i]->occupied(); + } + } + return res; + } + + void init(HeapRegion* hr) { + PerRegionTable::init(hr); + _next = NULL; + if (par_tables() != NULL) { + for (int i = 0; i < HeapRegionRemSet::num_par_rem_sets()-1; i++) { + par_tables()[i]->init(hr); + } + } + } + + static void free(PosParPRT* prt) { + while (true) { + PosParPRT* fl = _free_list; + prt->set_next(fl); + PosParPRT* res = + (PosParPRT*) + Atomic::cmpxchg_ptr(prt, &_free_list, fl); + if (res == fl) return; + } + ShouldNotReachHere(); + } + + static PosParPRT* alloc(HeapRegion* hr) { + PosParPRT* fl = _free_list; + while (fl != NULL) { + PosParPRT* nxt = fl->next(); + PosParPRT* res = + (PosParPRT*) + Atomic::cmpxchg_ptr(nxt, &_free_list, fl); + if (res == fl) { + fl->init(hr); + return fl; + } else { + fl = _free_list; + } + } + assert(fl == NULL, "Loop condition."); + return new PosParPRT(hr); + } + + PosParPRT* next() const { return _next; } + void set_next(PosParPRT* nxt) { _next = nxt; } + PosParPRT** next_addr() { return &_next; } + + void add_reference(oop* from, int tid) { + // Expand if necessary. + PerRegionTable** pt = par_tables(); + if (par_tables() == NULL && tid > 0 && hr()->is_gc_alloc_region()) { + par_expand(); + pt = par_tables(); + } + if (pt != NULL) { + // We always have to assume that mods to table 0 are in parallel, + // because of the claiming scheme in parallel expansion. A thread + // with tid != 0 that finds the table to be NULL, but doesn't succeed + // in claiming the right of expanding it, will end up in the else + // clause of the above if test. That thread could be delayed, and a + // thread 0 add reference could see the table expanded, and come + // here. Both threads would be adding in parallel. But we get to + // not use atomics for tids > 0. + if (tid == 0) { + PerRegionTable::add_reference(from); + } else { + pt[tid-1]->seq_add_reference(from); + } + } else { + // Not expanded -- add to the base table. + PerRegionTable::add_reference(from); + } + } + + void scrub(CardTableModRefBS* ctbs, BitMap* card_bm) { + assert(_par_tables == NULL, "Precondition"); + PerRegionTable::scrub(ctbs, card_bm); + } + + size_t mem_size() const { + size_t res = + PerRegionTable::mem_size() + sizeof(this) - sizeof(PerRegionTable); + if (_par_tables != NULL) { + for (int i = 0; i < HeapRegionRemSet::num_par_rem_sets()-1; i++) { + res += _par_tables[i]->mem_size(); + } + } + return res; + } + + static size_t fl_mem_size() { + PosParPRT* cur = _free_list; + size_t res = 0; + while (cur != NULL) { + res += sizeof(PosParPRT); + cur = cur->next(); + } + return res; + } + + bool contains_reference(oop* from) const { + if (PerRegionTable::contains_reference(from)) return true; + if (_par_tables != NULL) { + for (int i = 0; i < HeapRegionRemSet::num_par_rem_sets()-1; i++) { + if (_par_tables[i]->contains_reference(from)) return true; + } + } + return false; + } + + static void par_contract_all(); + +}; + +void PosParPRT::par_contract_all() { + PosParPRT* hd = _par_expanded_list; + while (hd != NULL) { + PosParPRT* nxt = hd->next_par_expanded(); + PosParPRT* res = + (PosParPRT*) + Atomic::cmpxchg_ptr(nxt, &_par_expanded_list, hd); + if (res == hd) { + // We claimed the right to contract this table. + hd->set_next_par_expanded(NULL); + hd->par_contract(); + hd = _par_expanded_list; + } else { + hd = res; + } + } +} + +PosParPRT* PosParPRT::_free_list = NULL; +PosParPRT* PosParPRT::_par_expanded_list = NULL; + +jint OtherRegionsTable::_cache_probes = 0; +jint OtherRegionsTable::_cache_hits = 0; + +size_t OtherRegionsTable::_max_fine_entries = 0; +size_t OtherRegionsTable::_mod_max_fine_entries_mask = 0; +#if SAMPLE_FOR_EVICTION +size_t OtherRegionsTable::_fine_eviction_stride = 0; +size_t OtherRegionsTable::_fine_eviction_sample_size = 0; +#endif + +OtherRegionsTable::OtherRegionsTable(HeapRegion* hr) : + _g1h(G1CollectedHeap::heap()), + _m(Mutex::leaf, "An OtherRegionsTable lock", true), + _hr(hr), + _coarse_map(G1CollectedHeap::heap()->max_regions(), + false /* in-resource-area */), + _fine_grain_regions(NULL), + _n_fine_entries(0), _n_coarse_entries(0), +#if SAMPLE_FOR_EVICTION + _fine_eviction_start(0), +#endif + _sparse_table(hr) +{ + typedef PosParPRT* PosParPRTPtr; + if (_max_fine_entries == 0) { + assert(_mod_max_fine_entries_mask == 0, "Both or none."); + _max_fine_entries = (1 << G1LogRSRegionEntries); + _mod_max_fine_entries_mask = _max_fine_entries - 1; +#if SAMPLE_FOR_EVICTION + assert(_fine_eviction_sample_size == 0 + && _fine_eviction_stride == 0, "All init at same time."); + _fine_eviction_sample_size = MAX2((size_t)4, (size_t)G1LogRSRegionEntries); + _fine_eviction_stride = _max_fine_entries / _fine_eviction_sample_size; +#endif + } + _fine_grain_regions = new PosParPRTPtr[_max_fine_entries]; + if (_fine_grain_regions == NULL) + vm_exit_out_of_memory(sizeof(void*)*_max_fine_entries, + "Failed to allocate _fine_grain_entries."); + for (size_t i = 0; i < _max_fine_entries; i++) { + _fine_grain_regions[i] = NULL; + } +} + +int** OtherRegionsTable::_from_card_cache = NULL; +size_t OtherRegionsTable::_from_card_cache_max_regions = 0; +size_t OtherRegionsTable::_from_card_cache_mem_size = 0; + +void OtherRegionsTable::init_from_card_cache(size_t max_regions) { + _from_card_cache_max_regions = max_regions; + + int n_par_rs = HeapRegionRemSet::num_par_rem_sets(); + _from_card_cache = NEW_C_HEAP_ARRAY(int*, n_par_rs); + for (int i = 0; i < n_par_rs; i++) { + _from_card_cache[i] = NEW_C_HEAP_ARRAY(int, max_regions); + for (size_t j = 0; j < max_regions; j++) { + _from_card_cache[i][j] = -1; // An invalid value. + } + } + _from_card_cache_mem_size = n_par_rs * max_regions * sizeof(int); +} + +void OtherRegionsTable::shrink_from_card_cache(size_t new_n_regs) { + for (int i = 0; i < HeapRegionRemSet::num_par_rem_sets(); i++) { + assert(new_n_regs <= _from_card_cache_max_regions, "Must be within max."); + for (size_t j = new_n_regs; j < _from_card_cache_max_regions; j++) { + _from_card_cache[i][j] = -1; // An invalid value. + } + } +} + +#ifndef PRODUCT +void OtherRegionsTable::print_from_card_cache() { + for (int i = 0; i < HeapRegionRemSet::num_par_rem_sets(); i++) { + for (size_t j = 0; j < _from_card_cache_max_regions; j++) { + gclog_or_tty->print_cr("_from_card_cache[%d][%d] = %d.", + i, j, _from_card_cache[i][j]); + } + } +} +#endif + +void OtherRegionsTable::add_reference(oop* from, int tid) { + size_t cur_hrs_ind = hr()->hrs_index(); + +#if HRRS_VERBOSE + gclog_or_tty->print_cr("ORT::add_reference_work(" PTR_FORMAT "->" PTR_FORMAT ").", + from, *from); +#endif + + int from_card = (int)(uintptr_t(from) >> CardTableModRefBS::card_shift); + +#if HRRS_VERBOSE + gclog_or_tty->print_cr("Table for [" PTR_FORMAT "...): card %d (cache = %d)", + hr()->bottom(), from_card, + _from_card_cache[tid][cur_hrs_ind]); +#endif + +#define COUNT_CACHE 0 +#if COUNT_CACHE + jint p = Atomic::add(1, &_cache_probes); + if ((p % 10000) == 0) { + jint hits = _cache_hits; + gclog_or_tty->print_cr("%d/%d = %5.2f%% RS cache hits.", + _cache_hits, p, 100.0* (float)hits/(float)p); + } +#endif + if (from_card == _from_card_cache[tid][cur_hrs_ind]) { +#if HRRS_VERBOSE + gclog_or_tty->print_cr(" from-card cache hit."); +#endif +#if COUNT_CACHE + Atomic::inc(&_cache_hits); +#endif + assert(contains_reference(from), "We just added it!"); + return; + } else { + _from_card_cache[tid][cur_hrs_ind] = from_card; + } + + // Note that this may be a continued H region. + HeapRegion* from_hr = _g1h->heap_region_containing_raw(from); + size_t from_hrs_ind = (size_t)from_hr->hrs_index(); + + // If the region is already coarsened, return. + if (_coarse_map.at(from_hrs_ind)) { +#if HRRS_VERBOSE + gclog_or_tty->print_cr(" coarse map hit."); +#endif + assert(contains_reference(from), "We just added it!"); + return; + } + + // Otherwise find a per-region table to add it to. + size_t ind = from_hrs_ind & _mod_max_fine_entries_mask; + PosParPRT* prt = find_region_table(ind, from_hr); + if (prt == NULL) { + MutexLockerEx x(&_m, Mutex::_no_safepoint_check_flag); + // Confirm that it's really not there... + prt = find_region_table(ind, from_hr); + if (prt == NULL) { + + uintptr_t from_hr_bot_card_index = + uintptr_t(from_hr->bottom()) + >> CardTableModRefBS::card_shift; + int card_index = from_card - from_hr_bot_card_index; + assert(0 <= card_index && card_index < PosParPRT::CardsPerRegion, + "Must be in range."); + if (G1HRRSUseSparseTable && + _sparse_table.add_card((short) from_hrs_ind, card_index)) { + if (G1RecordHRRSOops) { + HeapRegionRemSet::record(hr(), from); +#if HRRS_VERBOSE + gclog_or_tty->print(" Added card " PTR_FORMAT " to region " + "[" PTR_FORMAT "...) for ref " PTR_FORMAT ".\n", + align_size_down(uintptr_t(from), + CardTableModRefBS::card_size), + hr()->bottom(), from); +#endif + } +#if HRRS_VERBOSE + gclog_or_tty->print_cr(" added card to sparse table."); +#endif + assert(contains_reference_locked(from), "We just added it!"); + return; + } else { +#if HRRS_VERBOSE + gclog_or_tty->print_cr(" [tid %d] sparse table entry " + "overflow(f: %d, t: %d)", + tid, from_hrs_ind, cur_hrs_ind); +#endif + } + + // Otherwise, transfer from sparse to fine-grain. + short cards[SparsePRTEntry::CardsPerEntry]; + if (G1HRRSUseSparseTable) { + bool res = _sparse_table.get_cards((short) from_hrs_ind, &cards[0]); + assert(res, "There should have been an entry"); + } + + if (_n_fine_entries == _max_fine_entries) { + prt = delete_region_table(); + } else { + prt = PosParPRT::alloc(from_hr); + } + prt->init(from_hr); + // Record the outgoing pointer in the from_region's outgoing bitmap. + from_hr->rem_set()->add_outgoing_reference(hr()); + + PosParPRT* first_prt = _fine_grain_regions[ind]; + prt->set_next(first_prt); // XXX Maybe move to init? + _fine_grain_regions[ind] = prt; + _n_fine_entries++; + + // Add in the cards from the sparse table. + if (G1HRRSUseSparseTable) { + for (int i = 0; i < SparsePRTEntry::CardsPerEntry; i++) { + short c = cards[i]; + if (c != SparsePRTEntry::NullEntry) { + prt->add_card(c); + } + } + // Now we can delete the sparse entry. + bool res = _sparse_table.delete_entry((short) from_hrs_ind); + assert(res, "It should have been there."); + } + } + assert(prt != NULL && prt->hr() == from_hr, "consequence"); + } + // Note that we can't assert "prt->hr() == from_hr", because of the + // possibility of concurrent reuse. But see head comment of + // OtherRegionsTable for why this is OK. + assert(prt != NULL, "Inv"); + + prt->add_reference(from, tid); + if (G1RecordHRRSOops) { + HeapRegionRemSet::record(hr(), from); +#if HRRS_VERBOSE + gclog_or_tty->print("Added card " PTR_FORMAT " to region " + "[" PTR_FORMAT "...) for ref " PTR_FORMAT ".\n", + align_size_down(uintptr_t(from), + CardTableModRefBS::card_size), + hr()->bottom(), from); +#endif + } + assert(contains_reference(from), "We just added it!"); +} + +PosParPRT* +OtherRegionsTable::find_region_table(size_t ind, HeapRegion* hr) const { + assert(0 <= ind && ind < _max_fine_entries, "Preconditions."); + PosParPRT* prt = _fine_grain_regions[ind]; + while (prt != NULL && prt->hr() != hr) { + prt = prt->next(); + } + // Loop postcondition is the method postcondition. + return prt; +} + + +#define DRT_CENSUS 0 + +#if DRT_CENSUS +static const int HistoSize = 6; +static int global_histo[HistoSize] = { 0, 0, 0, 0, 0, 0 }; +static int coarsenings = 0; +static int occ_sum = 0; +#endif + +jint OtherRegionsTable::_n_coarsenings = 0; + +PosParPRT* OtherRegionsTable::delete_region_table() { +#if DRT_CENSUS + int histo[HistoSize] = { 0, 0, 0, 0, 0, 0 }; + const int histo_limits[] = { 1, 4, 16, 64, 256, 2048 }; +#endif + + assert(_m.owned_by_self(), "Precondition"); + assert(_n_fine_entries == _max_fine_entries, "Precondition"); + PosParPRT* max = NULL; + jint max_occ = 0; + PosParPRT** max_prev; + size_t max_ind; + +#if SAMPLE_FOR_EVICTION + size_t i = _fine_eviction_start; + for (size_t k = 0; k < _fine_eviction_sample_size; k++) { + size_t ii = i; + // Make sure we get a non-NULL sample. + while (_fine_grain_regions[ii] == NULL) { + ii++; + if (ii == _max_fine_entries) ii = 0; + guarantee(ii != i, "We must find one."); + } + PosParPRT** prev = &_fine_grain_regions[ii]; + PosParPRT* cur = *prev; + while (cur != NULL) { + jint cur_occ = cur->occupied(); + if (max == NULL || cur_occ > max_occ) { + max = cur; + max_prev = prev; + max_ind = i; + max_occ = cur_occ; + } + prev = cur->next_addr(); + cur = cur->next(); + } + i = i + _fine_eviction_stride; + if (i >= _n_fine_entries) i = i - _n_fine_entries; + } + _fine_eviction_start++; + if (_fine_eviction_start >= _n_fine_entries) + _fine_eviction_start -= _n_fine_entries; +#else + for (int i = 0; i < _max_fine_entries; i++) { + PosParPRT** prev = &_fine_grain_regions[i]; + PosParPRT* cur = *prev; + while (cur != NULL) { + jint cur_occ = cur->occupied(); +#if DRT_CENSUS + for (int k = 0; k < HistoSize; k++) { + if (cur_occ <= histo_limits[k]) { + histo[k]++; global_histo[k]++; break; + } + } +#endif + if (max == NULL || cur_occ > max_occ) { + max = cur; + max_prev = prev; + max_ind = i; + max_occ = cur_occ; + } + prev = cur->next_addr(); + cur = cur->next(); + } + } +#endif + // XXX + guarantee(max != NULL, "Since _n_fine_entries > 0"); +#if DRT_CENSUS + gclog_or_tty->print_cr("In a coarsening: histo of occs:"); + for (int k = 0; k < HistoSize; k++) { + gclog_or_tty->print_cr(" <= %4d: %5d.", histo_limits[k], histo[k]); + } + coarsenings++; + occ_sum += max_occ; + if ((coarsenings % 100) == 0) { + gclog_or_tty->print_cr("\ncoarsenings = %d; global summary:", coarsenings); + for (int k = 0; k < HistoSize; k++) { + gclog_or_tty->print_cr(" <= %4d: %5d.", histo_limits[k], global_histo[k]); + } + gclog_or_tty->print_cr("Avg occ of deleted region = %6.2f.", + (float)occ_sum/(float)coarsenings); + } +#endif + + // Set the corresponding coarse bit. + int max_hrs_index = max->hr()->hrs_index(); + if (!_coarse_map.at(max_hrs_index)) { + _coarse_map.at_put(max_hrs_index, true); + _n_coarse_entries++; +#if 0 + gclog_or_tty->print("Coarsened entry in region [" PTR_FORMAT "...] " + "for region [" PTR_FORMAT "...] (%d coarse entries).\n", + hr()->bottom(), + max->hr()->bottom(), + _n_coarse_entries); +#endif + } + + // Unsplice. + *max_prev = max->next(); + Atomic::inc(&_n_coarsenings); + _n_fine_entries--; + return max; +} + + +// At present, this must be called stop-world single-threaded. +void OtherRegionsTable::scrub(CardTableModRefBS* ctbs, + BitMap* region_bm, BitMap* card_bm) { + // First eliminated garbage regions from the coarse map. + if (G1RSScrubVerbose) + gclog_or_tty->print_cr("Scrubbing region %d:", hr()->hrs_index()); + + assert(_coarse_map.size() == region_bm->size(), "Precondition"); + if (G1RSScrubVerbose) + gclog_or_tty->print(" Coarse map: before = %d...", _n_coarse_entries); + _coarse_map.set_intersection(*region_bm); + _n_coarse_entries = _coarse_map.count_one_bits(); + if (G1RSScrubVerbose) + gclog_or_tty->print_cr(" after = %d.", _n_coarse_entries); + + // Now do the fine-grained maps. + for (size_t i = 0; i < _max_fine_entries; i++) { + PosParPRT* cur = _fine_grain_regions[i]; + PosParPRT** prev = &_fine_grain_regions[i]; + while (cur != NULL) { + PosParPRT* nxt = cur->next(); + // If the entire region is dead, eliminate. + if (G1RSScrubVerbose) + gclog_or_tty->print_cr(" For other region %d:", cur->hr()->hrs_index()); + if (!region_bm->at(cur->hr()->hrs_index())) { + *prev = nxt; + cur->set_next(NULL); + _n_fine_entries--; + if (G1RSScrubVerbose) + gclog_or_tty->print_cr(" deleted via region map."); + PosParPRT::free(cur); + } else { + // Do fine-grain elimination. + if (G1RSScrubVerbose) + gclog_or_tty->print(" occ: before = %4d.", cur->occupied()); + cur->scrub(ctbs, card_bm); + if (G1RSScrubVerbose) + gclog_or_tty->print_cr(" after = %4d.", cur->occupied()); + // Did that empty the table completely? + if (cur->occupied() == 0) { + *prev = nxt; + cur->set_next(NULL); + _n_fine_entries--; + PosParPRT::free(cur); + } else { + prev = cur->next_addr(); + } + } + cur = nxt; + } + } + // Since we may have deleted a from_card_cache entry from the RS, clear + // the FCC. + clear_fcc(); +} + + +size_t OtherRegionsTable::occupied() const { + // Cast away const in this case. + MutexLockerEx x((Mutex*)&_m, Mutex::_no_safepoint_check_flag); + size_t sum = occ_fine(); + sum += occ_sparse(); + sum += occ_coarse(); + return sum; +} + +size_t OtherRegionsTable::occ_fine() const { + size_t sum = 0; + for (size_t i = 0; i < _max_fine_entries; i++) { + PosParPRT* cur = _fine_grain_regions[i]; + while (cur != NULL) { + sum += cur->occupied(); + cur = cur->next(); + } + } + return sum; +} + +size_t OtherRegionsTable::occ_coarse() const { + return (_n_coarse_entries * PosParPRT::CardsPerRegion); +} + +size_t OtherRegionsTable::occ_sparse() const { + return _sparse_table.occupied(); +} + +size_t OtherRegionsTable::mem_size() const { + // Cast away const in this case. + MutexLockerEx x((Mutex*)&_m, Mutex::_no_safepoint_check_flag); + size_t sum = 0; + for (size_t i = 0; i < _max_fine_entries; i++) { + PosParPRT* cur = _fine_grain_regions[i]; + while (cur != NULL) { + sum += cur->mem_size(); + cur = cur->next(); + } + } + sum += (sizeof(PosParPRT*) * _max_fine_entries); + sum += (_coarse_map.size_in_words() * HeapWordSize); + sum += (_sparse_table.mem_size()); + sum += sizeof(*this) - sizeof(_sparse_table); // Avoid double counting above. + return sum; +} + +size_t OtherRegionsTable::static_mem_size() { + return _from_card_cache_mem_size; +} + +size_t OtherRegionsTable::fl_mem_size() { + return PerRegionTable::fl_mem_size() + PosParPRT::fl_mem_size(); +} + +void OtherRegionsTable::clear_fcc() { + for (int i = 0; i < HeapRegionRemSet::num_par_rem_sets(); i++) { + _from_card_cache[i][hr()->hrs_index()] = -1; + } +} + +void OtherRegionsTable::clear() { + MutexLockerEx x(&_m, Mutex::_no_safepoint_check_flag); + for (size_t i = 0; i < _max_fine_entries; i++) { + PosParPRT* cur = _fine_grain_regions[i]; + while (cur != NULL) { + PosParPRT* nxt = cur->next(); + PosParPRT::free(cur); + cur = nxt; + } + _fine_grain_regions[i] = NULL; + } + _sparse_table.clear(); + _coarse_map.clear(); + _n_fine_entries = 0; + _n_coarse_entries = 0; + + clear_fcc(); +} + +void OtherRegionsTable::clear_incoming_entry(HeapRegion* from_hr) { + MutexLockerEx x(&_m, Mutex::_no_safepoint_check_flag); + size_t hrs_ind = (size_t)from_hr->hrs_index(); + size_t ind = hrs_ind & _mod_max_fine_entries_mask; + if (del_single_region_table(ind, from_hr)) { + assert(!_coarse_map.at(hrs_ind), "Inv"); + } else { + _coarse_map.par_at_put(hrs_ind, 0); + } + // Check to see if any of the fcc entries come from here. + int hr_ind = hr()->hrs_index(); + for (int tid = 0; tid < HeapRegionRemSet::num_par_rem_sets(); tid++) { + int fcc_ent = _from_card_cache[tid][hr_ind]; + if (fcc_ent != -1) { + HeapWord* card_addr = (HeapWord*) + (uintptr_t(fcc_ent) << CardTableModRefBS::card_shift); + if (hr()->is_in_reserved(card_addr)) { + // Clear the from card cache. + _from_card_cache[tid][hr_ind] = -1; + } + } + } +} + +bool OtherRegionsTable::del_single_region_table(size_t ind, + HeapRegion* hr) { + assert(0 <= ind && ind < _max_fine_entries, "Preconditions."); + PosParPRT** prev_addr = &_fine_grain_regions[ind]; + PosParPRT* prt = *prev_addr; + while (prt != NULL && prt->hr() != hr) { + prev_addr = prt->next_addr(); + prt = prt->next(); + } + if (prt != NULL) { + assert(prt->hr() == hr, "Loop postcondition."); + *prev_addr = prt->next(); + PosParPRT::free(prt); + _n_fine_entries--; + return true; + } else { + return false; + } +} + +bool OtherRegionsTable::contains_reference(oop* from) const { + // Cast away const in this case. + MutexLockerEx x((Mutex*)&_m, Mutex::_no_safepoint_check_flag); + return contains_reference_locked(from); +} + +bool OtherRegionsTable::contains_reference_locked(oop* from) const { + HeapRegion* hr = _g1h->heap_region_containing_raw(from); + if (hr == NULL) return false; + size_t hr_ind = hr->hrs_index(); + // Is this region in the coarse map? + if (_coarse_map.at(hr_ind)) return true; + + PosParPRT* prt = find_region_table(hr_ind & _mod_max_fine_entries_mask, + hr); + if (prt != NULL) { + return prt->contains_reference(from); + + } else { + uintptr_t from_card = + (uintptr_t(from) >> CardTableModRefBS::card_shift); + uintptr_t hr_bot_card_index = + uintptr_t(hr->bottom()) >> CardTableModRefBS::card_shift; + assert(from_card >= hr_bot_card_index, "Inv"); + int card_index = from_card - hr_bot_card_index; + return _sparse_table.contains_card((short)hr_ind, card_index); + } + + +} + + +bool HeapRegionRemSet::_par_traversal = false; + +void HeapRegionRemSet::set_par_traversal(bool b) { + assert(_par_traversal != b, "Proper alternation..."); + _par_traversal = b; +} + +int HeapRegionRemSet::num_par_rem_sets() { + // We always have at least two, so that a mutator thread can claim an + // id and add to a rem set. + return (int) MAX2(ParallelGCThreads, (size_t)2); +} + +HeapRegionRemSet::HeapRegionRemSet(G1BlockOffsetSharedArray* bosa, + HeapRegion* hr) + : _bosa(bosa), _other_regions(hr), + _outgoing_region_map(G1CollectedHeap::heap()->max_regions(), + false /* in-resource-area */), + _iter_state(Unclaimed) +{} + + +void HeapRegionRemSet::init_for_par_iteration() { + _iter_state = Unclaimed; +} + +bool HeapRegionRemSet::claim_iter() { + if (_iter_state != Unclaimed) return false; + jint res = Atomic::cmpxchg(Claimed, (jint*)(&_iter_state), Unclaimed); + return (res == Unclaimed); +} + +void HeapRegionRemSet::set_iter_complete() { + _iter_state = Complete; +} + +bool HeapRegionRemSet::iter_is_complete() { + return _iter_state == Complete; +} + + +void HeapRegionRemSet::init_iterator(HeapRegionRemSetIterator* iter) const { + iter->initialize(this); +} + +#ifndef PRODUCT +void HeapRegionRemSet::print() const { + HeapRegionRemSetIterator iter; + init_iterator(&iter); + size_t card_index; + while (iter.has_next(card_index)) { + HeapWord* card_start = + G1CollectedHeap::heap()->bot_shared()->address_for_index(card_index); + gclog_or_tty->print_cr(" Card " PTR_FORMAT ".", card_start); + } + // XXX + if (iter.n_yielded() != occupied()) { + gclog_or_tty->print_cr("Yielded disagrees with occupied:"); + gclog_or_tty->print_cr(" %6d yielded (%6d coarse, %6d fine).", + iter.n_yielded(), + iter.n_yielded_coarse(), iter.n_yielded_fine()); + gclog_or_tty->print_cr(" %6d occ (%6d coarse, %6d fine).", + occupied(), occ_coarse(), occ_fine()); + } + guarantee(iter.n_yielded() == occupied(), + "We should have yielded all the represented cards."); +} +#endif + +void HeapRegionRemSet::cleanup() { + SparsePRT::cleanup_all(); +} + +void HeapRegionRemSet::par_cleanup() { + PosParPRT::par_contract_all(); +} + +void HeapRegionRemSet::add_outgoing_reference(HeapRegion* to_hr) { + _outgoing_region_map.par_at_put(to_hr->hrs_index(), 1); +} + +void HeapRegionRemSet::clear() { + clear_outgoing_entries(); + _outgoing_region_map.clear(); + _other_regions.clear(); + assert(occupied() == 0, "Should be clear."); +} + +void HeapRegionRemSet::clear_outgoing_entries() { + G1CollectedHeap* g1h = G1CollectedHeap::heap(); + size_t i = _outgoing_region_map.get_next_one_offset(0); + while (i < _outgoing_region_map.size()) { + HeapRegion* to_region = g1h->region_at(i); + to_region->rem_set()->clear_incoming_entry(hr()); + i = _outgoing_region_map.get_next_one_offset(i+1); + } +} + + +void HeapRegionRemSet::scrub(CardTableModRefBS* ctbs, + BitMap* region_bm, BitMap* card_bm) { + _other_regions.scrub(ctbs, region_bm, card_bm); +} + +//-------------------- Iteration -------------------- + +HeapRegionRemSetIterator:: +HeapRegionRemSetIterator() : + _hrrs(NULL), + _g1h(G1CollectedHeap::heap()), + _bosa(NULL), + _sparse_iter(size_t(G1CollectedHeap::heap()->reserved_region().start()) + >> CardTableModRefBS::card_shift) +{} + +void HeapRegionRemSetIterator::initialize(const HeapRegionRemSet* hrrs) { + _hrrs = hrrs; + _coarse_map = &_hrrs->_other_regions._coarse_map; + _fine_grain_regions = _hrrs->_other_regions._fine_grain_regions; + _bosa = _hrrs->bosa(); + + _is = Sparse; + // Set these values so that we increment to the first region. + _coarse_cur_region_index = -1; + _coarse_cur_region_cur_card = (PosParPRT::CardsPerRegion-1);; + + _cur_region_cur_card = 0; + + _fine_array_index = -1; + _fine_cur_prt = NULL; + + _n_yielded_coarse = 0; + _n_yielded_fine = 0; + _n_yielded_sparse = 0; + + _sparse_iter.init(&hrrs->_other_regions._sparse_table); +} + +bool HeapRegionRemSetIterator::coarse_has_next(size_t& card_index) { + if (_hrrs->_other_regions._n_coarse_entries == 0) return false; + // Go to the next card. + _coarse_cur_region_cur_card++; + // Was the last the last card in the current region? + if (_coarse_cur_region_cur_card == PosParPRT::CardsPerRegion) { + // Yes: find the next region. This may leave _coarse_cur_region_index + // Set to the last index, in which case there are no more coarse + // regions. + _coarse_cur_region_index = + (int) _coarse_map->get_next_one_offset(_coarse_cur_region_index + 1); + if ((size_t)_coarse_cur_region_index < _coarse_map->size()) { + _coarse_cur_region_cur_card = 0; + HeapWord* r_bot = + _g1h->region_at(_coarse_cur_region_index)->bottom(); + _cur_region_card_offset = _bosa->index_for(r_bot); + } else { + return false; + } + } + // If we didn't return false above, then we can yield a card. + card_index = _cur_region_card_offset + _coarse_cur_region_cur_card; + return true; +} + +void HeapRegionRemSetIterator::fine_find_next_non_null_prt() { + // Otherwise, find the next bucket list in the array. + _fine_array_index++; + while (_fine_array_index < (int) OtherRegionsTable::_max_fine_entries) { + _fine_cur_prt = _fine_grain_regions[_fine_array_index]; + if (_fine_cur_prt != NULL) return; + else _fine_array_index++; + } + assert(_fine_cur_prt == NULL, "Loop post"); +} + +bool HeapRegionRemSetIterator::fine_has_next(size_t& card_index) { + if (fine_has_next()) { + _cur_region_cur_card = + _fine_cur_prt->_bm.get_next_one_offset(_cur_region_cur_card + 1); + } + while (!fine_has_next()) { + if (_cur_region_cur_card == PosParPRT::CardsPerRegion) { + _cur_region_cur_card = 0; + _fine_cur_prt = _fine_cur_prt->next(); + } + if (_fine_cur_prt == NULL) { + fine_find_next_non_null_prt(); + if (_fine_cur_prt == NULL) return false; + } + assert(_fine_cur_prt != NULL && _cur_region_cur_card == 0, + "inv."); + HeapWord* r_bot = + _fine_cur_prt->hr()->bottom(); + _cur_region_card_offset = _bosa->index_for(r_bot); + _cur_region_cur_card = _fine_cur_prt->_bm.get_next_one_offset(0); + } + assert(fine_has_next(), "Or else we exited the loop via the return."); + card_index = _cur_region_card_offset + _cur_region_cur_card; + return true; +} + +bool HeapRegionRemSetIterator::fine_has_next() { + return + _fine_cur_prt != NULL && + _cur_region_cur_card < PosParPRT::CardsPerRegion; +} + +bool HeapRegionRemSetIterator::has_next(size_t& card_index) { + switch (_is) { + case Sparse: + if (_sparse_iter.has_next(card_index)) { + _n_yielded_sparse++; + return true; + } + // Otherwise, deliberate fall-through + _is = Fine; + case Fine: + if (fine_has_next(card_index)) { + _n_yielded_fine++; + return true; + } + // Otherwise, deliberate fall-through + _is = Coarse; + case Coarse: + if (coarse_has_next(card_index)) { + _n_yielded_coarse++; + return true; + } + // Otherwise... + break; + } + assert(ParallelGCThreads > 1 || + n_yielded() == _hrrs->occupied(), + "Should have yielded all the cards in the rem set " + "(in the non-par case)."); + return false; +} + + + +oop** HeapRegionRemSet::_recorded_oops = NULL; +HeapWord** HeapRegionRemSet::_recorded_cards = NULL; +HeapRegion** HeapRegionRemSet::_recorded_regions = NULL; +int HeapRegionRemSet::_n_recorded = 0; + +HeapRegionRemSet::Event* HeapRegionRemSet::_recorded_events = NULL; +int* HeapRegionRemSet::_recorded_event_index = NULL; +int HeapRegionRemSet::_n_recorded_events = 0; + +void HeapRegionRemSet::record(HeapRegion* hr, oop* f) { + if (_recorded_oops == NULL) { + assert(_n_recorded == 0 + && _recorded_cards == NULL + && _recorded_regions == NULL, + "Inv"); + _recorded_oops = NEW_C_HEAP_ARRAY(oop*, MaxRecorded); + _recorded_cards = NEW_C_HEAP_ARRAY(HeapWord*, MaxRecorded); + _recorded_regions = NEW_C_HEAP_ARRAY(HeapRegion*, MaxRecorded); + } + if (_n_recorded == MaxRecorded) { + gclog_or_tty->print_cr("Filled up 'recorded' (%d).", MaxRecorded); + } else { + _recorded_cards[_n_recorded] = + (HeapWord*)align_size_down(uintptr_t(f), + CardTableModRefBS::card_size); + _recorded_oops[_n_recorded] = f; + _recorded_regions[_n_recorded] = hr; + _n_recorded++; + } +} + +void HeapRegionRemSet::record_event(Event evnt) { + if (!G1RecordHRRSEvents) return; + + if (_recorded_events == NULL) { + assert(_n_recorded_events == 0 + && _recorded_event_index == NULL, + "Inv"); + _recorded_events = NEW_C_HEAP_ARRAY(Event, MaxRecordedEvents); + _recorded_event_index = NEW_C_HEAP_ARRAY(int, MaxRecordedEvents); + } + if (_n_recorded_events == MaxRecordedEvents) { + gclog_or_tty->print_cr("Filled up 'recorded_events' (%d).", MaxRecordedEvents); + } else { + _recorded_events[_n_recorded_events] = evnt; + _recorded_event_index[_n_recorded_events] = _n_recorded; + _n_recorded_events++; + } +} + +void HeapRegionRemSet::print_event(outputStream* str, Event evnt) { + switch (evnt) { + case Event_EvacStart: + str->print("Evac Start"); + break; + case Event_EvacEnd: + str->print("Evac End"); + break; + case Event_RSUpdateEnd: + str->print("RS Update End"); + break; + } +} + +void HeapRegionRemSet::print_recorded() { + int cur_evnt = 0; + Event cur_evnt_kind; + int cur_evnt_ind = 0; + if (_n_recorded_events > 0) { + cur_evnt_kind = _recorded_events[cur_evnt]; + cur_evnt_ind = _recorded_event_index[cur_evnt]; + } + + for (int i = 0; i < _n_recorded; i++) { + while (cur_evnt < _n_recorded_events && i == cur_evnt_ind) { + gclog_or_tty->print("Event: "); + print_event(gclog_or_tty, cur_evnt_kind); + gclog_or_tty->print_cr(""); + cur_evnt++; + if (cur_evnt < MaxRecordedEvents) { + cur_evnt_kind = _recorded_events[cur_evnt]; + cur_evnt_ind = _recorded_event_index[cur_evnt]; + } + } + gclog_or_tty->print("Added card " PTR_FORMAT " to region [" PTR_FORMAT "...]" + " for ref " PTR_FORMAT ".\n", + _recorded_cards[i], _recorded_regions[i]->bottom(), + _recorded_oops[i]); + } +} + +#ifndef PRODUCT +void HeapRegionRemSet::test() { + os::sleep(Thread::current(), (jlong)5000, false); + G1CollectedHeap* g1h = G1CollectedHeap::heap(); + + // Run with "-XX:G1LogRSRegionEntries=2", so that 1 and 5 end up in same + // hash bucket. + HeapRegion* hr0 = g1h->region_at(0); + HeapRegion* hr1 = g1h->region_at(1); + HeapRegion* hr2 = g1h->region_at(5); + HeapRegion* hr3 = g1h->region_at(6); + HeapRegion* hr4 = g1h->region_at(7); + HeapRegion* hr5 = g1h->region_at(8); + + HeapWord* hr1_start = hr1->bottom(); + HeapWord* hr1_mid = hr1_start + HeapRegion::GrainWords/2; + HeapWord* hr1_last = hr1->end() - 1; + + HeapWord* hr2_start = hr2->bottom(); + HeapWord* hr2_mid = hr2_start + HeapRegion::GrainWords/2; + HeapWord* hr2_last = hr2->end() - 1; + + HeapWord* hr3_start = hr3->bottom(); + HeapWord* hr3_mid = hr3_start + HeapRegion::GrainWords/2; + HeapWord* hr3_last = hr3->end() - 1; + + HeapRegionRemSet* hrrs = hr0->rem_set(); + + // Make three references from region 0x101... + hrrs->add_reference((oop*)hr1_start); + hrrs->add_reference((oop*)hr1_mid); + hrrs->add_reference((oop*)hr1_last); + + hrrs->add_reference((oop*)hr2_start); + hrrs->add_reference((oop*)hr2_mid); + hrrs->add_reference((oop*)hr2_last); + + hrrs->add_reference((oop*)hr3_start); + hrrs->add_reference((oop*)hr3_mid); + hrrs->add_reference((oop*)hr3_last); + + // Now cause a coarsening. + hrrs->add_reference((oop*)hr4->bottom()); + hrrs->add_reference((oop*)hr5->bottom()); + + // Now, does iteration yield these three? + HeapRegionRemSetIterator iter; + hrrs->init_iterator(&iter); + size_t sum = 0; + size_t card_index; + while (iter.has_next(card_index)) { + HeapWord* card_start = + G1CollectedHeap::heap()->bot_shared()->address_for_index(card_index); + gclog_or_tty->print_cr(" Card " PTR_FORMAT ".", card_start); + sum++; + } + guarantee(sum == 11 - 3 + 2048, "Failure"); + guarantee(sum == hrrs->occupied(), "Failure"); +} +#endif diff --git a/hotspot/src/share/vm/gc_implementation/g1/heapRegionRemSet.hpp b/hotspot/src/share/vm/gc_implementation/g1/heapRegionRemSet.hpp new file mode 100644 index 00000000000..bad558a6c46 --- /dev/null +++ b/hotspot/src/share/vm/gc_implementation/g1/heapRegionRemSet.hpp @@ -0,0 +1,470 @@ +/* + * Copyright 2001-2007 Sun Microsystems, Inc. All Rights Reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara, + * CA 95054 USA or visit www.sun.com if you need additional information or + * have any questions. + * + */ + +// Remembered set for a heap region. Represent a set of "cards" that +// contain pointers into the owner heap region. Cards are defined somewhat +// abstractly, in terms of what the "BlockOffsetTable" in use can parse. + +class G1CollectedHeap; +class G1BlockOffsetSharedArray; +class HeapRegion; +class HeapRegionRemSetIterator; +class PosParPRT; +class SparsePRT; + + +// The "_coarse_map" is a bitmap with one bit for each region, where set +// bits indicate that the corresponding region may contain some pointer +// into the owning region. + +// The "_fine_grain_entries" array is an open hash table of PerRegionTables +// (PRTs), indicating regions for which we're keeping the RS as a set of +// cards. The strategy is to cap the size of the fine-grain table, +// deleting an entry and setting the corresponding coarse-grained bit when +// we would overflow this cap. + +// We use a mixture of locking and lock-free techniques here. We allow +// threads to locate PRTs without locking, but threads attempting to alter +// a bucket list obtain a lock. This means that any failing attempt to +// find a PRT must be retried with the lock. It might seem dangerous that +// a read can find a PRT that is concurrently deleted. This is all right, +// because: +// +// 1) We only actually free PRT's at safe points (though we reuse them at +// other times). +// 2) We find PRT's in an attempt to add entries. If a PRT is deleted, +// it's _coarse_map bit is set, so the that we were attempting to add +// is represented. If a deleted PRT is re-used, a thread adding a bit, +// thinking the PRT is for a different region, does no harm. + +class OtherRegionsTable: public CHeapObj { + friend class HeapRegionRemSetIterator; + + G1CollectedHeap* _g1h; + Mutex _m; + HeapRegion* _hr; + + // These are protected by "_m". + BitMap _coarse_map; + size_t _n_coarse_entries; + static jint _n_coarsenings; + + PosParPRT** _fine_grain_regions; + size_t _n_fine_entries; + +#define SAMPLE_FOR_EVICTION 1 +#if SAMPLE_FOR_EVICTION + size_t _fine_eviction_start; + static size_t _fine_eviction_stride; + static size_t _fine_eviction_sample_size; +#endif + + SparsePRT _sparse_table; + + // These are static after init. + static size_t _max_fine_entries; + static size_t _mod_max_fine_entries_mask; + + // Requires "prt" to be the first element of the bucket list appropriate + // for "hr". If this list contains an entry for "hr", return it, + // otherwise return "NULL". + PosParPRT* find_region_table(size_t ind, HeapRegion* hr) const; + + // Find, delete, and return a candidate PosParPRT, if any exists, + // adding the deleted region to the coarse bitmap. Requires the caller + // to hold _m, and the fine-grain table to be full. + PosParPRT* delete_region_table(); + + // If a PRT for "hr" is in the bucket list indicated by "ind" (which must + // be the correct index for "hr"), delete it and return true; else return + // false. + bool del_single_region_table(size_t ind, HeapRegion* hr); + + static jint _cache_probes; + static jint _cache_hits; + + // Indexed by thread X heap region, to minimize thread contention. + static int** _from_card_cache; + static size_t _from_card_cache_max_regions; + static size_t _from_card_cache_mem_size; + +public: + OtherRegionsTable(HeapRegion* hr); + + HeapRegion* hr() const { return _hr; } + + // For now. Could "expand" some tables in the future, so that this made + // sense. + void add_reference(oop* from, int tid); + + void add_reference(oop* from) { + return add_reference(from, 0); + } + + // Removes any entries shown by the given bitmaps to contain only dead + // objects. + void scrub(CardTableModRefBS* ctbs, BitMap* region_bm, BitMap* card_bm); + + // Not const because it takes a lock. + size_t occupied() const; + size_t occ_fine() const; + size_t occ_coarse() const; + size_t occ_sparse() const; + + static jint n_coarsenings() { return _n_coarsenings; } + + // Returns size in bytes. + // Not const because it takes a lock. + size_t mem_size() const; + static size_t static_mem_size(); + static size_t fl_mem_size(); + + bool contains_reference(oop* from) const; + bool contains_reference_locked(oop* from) const; + + void clear(); + + // Specifically clear the from_card_cache. + void clear_fcc(); + + // "from_hr" is being cleared; remove any entries from it. + void clear_incoming_entry(HeapRegion* from_hr); + + // Declare the heap size (in # of regions) to the OtherRegionsTable. + // (Uses it to initialize from_card_cache). + static void init_from_card_cache(size_t max_regions); + + // Declares that only regions i s.t. 0 <= i < new_n_regs are in use. + // Make sure any entries for higher regions are invalid. + static void shrink_from_card_cache(size_t new_n_regs); + + static void print_from_card_cache(); + +}; + + +class HeapRegionRemSet : public CHeapObj { + friend class VMStructs; + friend class HeapRegionRemSetIterator; + +public: + enum Event { + Event_EvacStart, Event_EvacEnd, Event_RSUpdateEnd + }; + +private: + G1BlockOffsetSharedArray* _bosa; + G1BlockOffsetSharedArray* bosa() const { return _bosa; } + + static bool _par_traversal; + + OtherRegionsTable _other_regions; + + // One set bit for every region that has an entry for this one. + BitMap _outgoing_region_map; + + // Clear entries for the current region in any rem sets named in + // the _outgoing_region_map. + void clear_outgoing_entries(); + +#if MAYBE + // Audit the given card index. + void audit_card(size_t card_num, HeapRegion* hr, u2* rc_arr, + HeapRegionRemSet* empty_cards, size_t* one_obj_cards); + + // Assumes that "audit_stage1" has been called for "hr", to set up + // "shadow" and "new_rs" appropriately. Identifies individual popular + // objects; returns "true" if any are found. + bool audit_find_pop(HeapRegion* hr, u2* rc_arr); + + // Assumes that "audit_stage1" has been called for "hr", to set up + // "shadow" and "new_rs" appropriately. Identifies individual popular + // objects, and determines the number of entries in "new_rs" if any such + // popular objects are ignored. If this is sufficiently small, returns + // "false" to indicate that a constraint should not be introduced. + // Otherwise, returns "true" to indicate that we should go ahead with + // adding the constraint. + bool audit_stag(HeapRegion* hr, u2* rc_arr); + + + u2* alloc_rc_array(); + + SeqHeapRegionRemSet* audit_post(u2* rc_arr, size_t multi_obj_crds, + SeqHeapRegionRemSet* empty_cards); +#endif + + enum ParIterState { Unclaimed, Claimed, Complete }; + ParIterState _iter_state; + + // Unused unless G1RecordHRRSOops is true. + + static const int MaxRecorded = 1000000; + static oop** _recorded_oops; + static HeapWord** _recorded_cards; + static HeapRegion** _recorded_regions; + static int _n_recorded; + + static const int MaxRecordedEvents = 1000; + static Event* _recorded_events; + static int* _recorded_event_index; + static int _n_recorded_events; + + static void print_event(outputStream* str, Event evnt); + +public: + HeapRegionRemSet(G1BlockOffsetSharedArray* bosa, + HeapRegion* hr); + + static int num_par_rem_sets(); + static bool par_traversal() { return _par_traversal; } + static void set_par_traversal(bool b); + + HeapRegion* hr() const { + return _other_regions.hr(); + } + + size_t occupied() const { + return _other_regions.occupied(); + } + size_t occ_fine() const { + return _other_regions.occ_fine(); + } + size_t occ_coarse() const { + return _other_regions.occ_coarse(); + } + size_t occ_sparse() const { + return _other_regions.occ_sparse(); + } + + static jint n_coarsenings() { return OtherRegionsTable::n_coarsenings(); } + + /* Used in the sequential case. Returns "true" iff this addition causes + the size limit to be reached. */ + bool add_reference(oop* from) { + _other_regions.add_reference(from); + return false; + } + + /* Used in the parallel case. Returns "true" iff this addition causes + the size limit to be reached. */ + bool add_reference(oop* from, int tid) { + _other_regions.add_reference(from, tid); + return false; + } + + // Records the fact that the current region contains an outgoing + // reference into "to_hr". + void add_outgoing_reference(HeapRegion* to_hr); + + // Removes any entries shown by the given bitmaps to contain only dead + // objects. + void scrub(CardTableModRefBS* ctbs, BitMap* region_bm, BitMap* card_bm); + + // The region is being reclaimed; clear its remset, and any mention of + // entries for this region in other remsets. + void clear(); + + // Forget any entries due to pointers from "from_hr". + void clear_incoming_entry(HeapRegion* from_hr) { + _other_regions.clear_incoming_entry(from_hr); + } + +#if 0 + virtual void cleanup() = 0; +#endif + + // Should be called from single-threaded code. + void init_for_par_iteration(); + // Attempt to claim the region. Returns true iff this call caused an + // atomic transition from Unclaimed to Claimed. + bool claim_iter(); + // Sets the iteration state to "complete". + void set_iter_complete(); + // Returns "true" iff the region's iteration is complete. + bool iter_is_complete(); + + // Initialize the given iterator to iterate over this rem set. + void init_iterator(HeapRegionRemSetIterator* iter) const; + +#if 0 + // Apply the "do_card" method to the start address of every card in the + // rem set. Returns false if some application of the closure aborted. + virtual bool card_iterate(CardClosure* iter) = 0; +#endif + + // The actual # of bytes this hr_remset takes up. + size_t mem_size() { + return _other_regions.mem_size() + // This correction is necessary because the above includes the second + // part. + + sizeof(this) - sizeof(OtherRegionsTable); + } + + // Returns the memory occupancy of all static data structures associated + // with remembered sets. + static size_t static_mem_size() { + return OtherRegionsTable::static_mem_size(); + } + + // Returns the memory occupancy of all free_list data structures associated + // with remembered sets. + static size_t fl_mem_size() { + return OtherRegionsTable::fl_mem_size(); + } + + bool contains_reference(oop* from) const { + return _other_regions.contains_reference(from); + } + void print() const; + +#if MAYBE + // We are about to introduce a constraint, requiring the collection time + // of the region owning this RS to be <= "hr", and forgetting pointers + // from the owning region to "hr." Before doing so, examines this rem + // set for pointers to "hr", possibly identifying some popular objects., + // and possibly finding some cards to no longer contain pointers to "hr", + // + // These steps may prevent the the constraint from being necessary; in + // which case returns a set of cards now thought to contain no pointers + // into HR. In the normal (I assume) case, returns NULL, indicating that + // we should go ahead and add the constraint. + virtual SeqHeapRegionRemSet* audit(HeapRegion* hr) = 0; +#endif + + // Called during a stop-world phase to perform any deferred cleanups. + // The second version may be called by parallel threads after then finish + // collection work. + static void cleanup(); + static void par_cleanup(); + + // Declare the heap size (in # of regions) to the HeapRegionRemSet(s). + // (Uses it to initialize from_card_cache). + static void init_heap(size_t max_regions) { + OtherRegionsTable::init_from_card_cache(max_regions); + } + + // Declares that only regions i s.t. 0 <= i < new_n_regs are in use. + static void shrink_heap(size_t new_n_regs) { + OtherRegionsTable::shrink_from_card_cache(new_n_regs); + } + +#ifndef PRODUCT + static void print_from_card_cache() { + OtherRegionsTable::print_from_card_cache(); + } +#endif + + static void record(HeapRegion* hr, oop* f); + static void print_recorded(); + static void record_event(Event evnt); + + // Run unit tests. +#ifndef PRODUCT + static void test(); +#endif + +}; + +class HeapRegionRemSetIterator : public CHeapObj { + + // The region over which we're iterating. + const HeapRegionRemSet* _hrrs; + + // Local caching of HRRS fields. + const BitMap* _coarse_map; + PosParPRT** _fine_grain_regions; + + G1BlockOffsetSharedArray* _bosa; + G1CollectedHeap* _g1h; + + // The number yielded since initialization. + size_t _n_yielded_fine; + size_t _n_yielded_coarse; + size_t _n_yielded_sparse; + + // If true we're iterating over the coarse table; if false the fine + // table. + enum IterState { + Sparse, + Fine, + Coarse + }; + IterState _is; + + // In both kinds of iteration, heap offset of first card of current + // region. + size_t _cur_region_card_offset; + // Card offset within cur region. + size_t _cur_region_cur_card; + + // Coarse table iteration fields: + + // Current region index; + int _coarse_cur_region_index; + int _coarse_cur_region_cur_card; + + bool coarse_has_next(size_t& card_index); + + // Fine table iteration fields: + + // Index of bucket-list we're working on. + int _fine_array_index; + // Per Region Table we're doing within current bucket list. + PosParPRT* _fine_cur_prt; + + /* SparsePRT::*/ SparsePRTIter _sparse_iter; + + void fine_find_next_non_null_prt(); + + bool fine_has_next(); + bool fine_has_next(size_t& card_index); + +public: + // We require an iterator to be initialized before use, so the + // constructor does little. + HeapRegionRemSetIterator(); + + void initialize(const HeapRegionRemSet* hrrs); + + // If there remains one or more cards to be yielded, returns true and + // sets "card_index" to one of those cards (which is then considered + // yielded.) Otherwise, returns false (and leaves "card_index" + // undefined.) + bool has_next(size_t& card_index); + + size_t n_yielded_fine() { return _n_yielded_fine; } + size_t n_yielded_coarse() { return _n_yielded_coarse; } + size_t n_yielded_sparse() { return _n_yielded_sparse; } + size_t n_yielded() { + return n_yielded_fine() + n_yielded_coarse() + n_yielded_sparse(); + } +}; + +#if 0 +class CardClosure: public Closure { +public: + virtual void do_card(HeapWord* card_start) = 0; +}; + +#endif diff --git a/hotspot/src/share/vm/gc_implementation/g1/heapRegionSeq.cpp b/hotspot/src/share/vm/gc_implementation/g1/heapRegionSeq.cpp new file mode 100644 index 00000000000..315e4a351b3 --- /dev/null +++ b/hotspot/src/share/vm/gc_implementation/g1/heapRegionSeq.cpp @@ -0,0 +1,345 @@ +/* + * Copyright 2001-2007 Sun Microsystems, Inc. All Rights Reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara, + * CA 95054 USA or visit www.sun.com if you need additional information or + * have any questions. + * + */ + +#include "incls/_precompiled.incl" +#include "incls/_heapRegionSeq.cpp.incl" + +// Local to this file. + +static int orderRegions(HeapRegion** hr1p, HeapRegion** hr2p) { + if ((*hr1p)->end() <= (*hr2p)->bottom()) return -1; + else if ((*hr2p)->end() <= (*hr1p)->bottom()) return 1; + else if (*hr1p == *hr2p) return 0; + else { + assert(false, "We should never compare distinct overlapping regions."); + } + return 0; +} + +HeapRegionSeq::HeapRegionSeq(const size_t max_size) : + _alloc_search_start(0), + // The line below is the worst bit of C++ hackery I've ever written + // (Detlefs, 11/23). You should think of it as equivalent to + // "_regions(100, true)": initialize the growable array and inform it + // that it should allocate its elem array(s) on the C heap. The first + // argument, however, is actually a comma expression (new-expr, 100). + // The purpose of the new_expr is to inform the growable array that it + // is *already* allocated on the C heap: it uses the placement syntax to + // keep it from actually doing any allocation. + _regions((ResourceObj::operator new (sizeof(GrowableArray), + (void*)&_regions, + ResourceObj::C_HEAP), + (int)max_size), + true), + _next_rr_candidate(0), + _seq_bottom(NULL) +{} + +// Private methods. + +HeapWord* +HeapRegionSeq::alloc_obj_from_region_index(int ind, size_t word_size) { + assert(G1CollectedHeap::isHumongous(word_size), + "Allocation size should be humongous"); + int cur = ind; + int first = cur; + size_t sumSizes = 0; + while (cur < _regions.length() && sumSizes < word_size) { + // Loop invariant: + // For all i in [first, cur): + // _regions.at(i)->is_empty() + // && _regions.at(i) is contiguous with its predecessor, if any + // && sumSizes is the sum of the sizes of the regions in the interval + // [first, cur) + HeapRegion* curhr = _regions.at(cur); + if (curhr->is_empty() + && !curhr->is_reserved() + && (first == cur + || (_regions.at(cur-1)->end() == + curhr->bottom()))) { + sumSizes += curhr->capacity() / HeapWordSize; + } else { + first = cur + 1; + sumSizes = 0; + } + cur++; + } + if (sumSizes >= word_size) { + _alloc_search_start = cur; + // Mark the allocated regions as allocated. + bool zf = G1CollectedHeap::heap()->allocs_are_zero_filled(); + HeapRegion* first_hr = _regions.at(first); + for (int i = first; i < cur; i++) { + HeapRegion* hr = _regions.at(i); + if (zf) + hr->ensure_zero_filled(); + { + MutexLockerEx x(ZF_mon, Mutex::_no_safepoint_check_flag); + hr->set_zero_fill_allocated(); + } + size_t sz = hr->capacity() / HeapWordSize; + HeapWord* tmp = hr->allocate(sz); + assert(tmp != NULL, "Humongous allocation failure"); + MemRegion mr = MemRegion(tmp, sz); + SharedHeap::fill_region_with_object(mr); + hr->declare_filled_region_to_BOT(mr); + if (i == first) { + first_hr->set_startsHumongous(); + } else { + assert(i > first, "sanity"); + hr->set_continuesHumongous(first_hr); + } + } + HeapWord* first_hr_bot = first_hr->bottom(); + HeapWord* obj_end = first_hr_bot + word_size; + first_hr->set_top(obj_end); + return first_hr_bot; + } else { + // If we started from the beginning, we want to know why we can't alloc. + return NULL; + } +} + +void HeapRegionSeq::print_empty_runs(bool reserved_are_empty) { + int empty_run = 0; + int n_empty = 0; + bool at_least_one_reserved = false; + int empty_run_start; + for (int i = 0; i < _regions.length(); i++) { + HeapRegion* r = _regions.at(i); + if (r->continuesHumongous()) continue; + if (r->is_empty() && (reserved_are_empty || !r->is_reserved())) { + assert(!r->isHumongous(), "H regions should not be empty."); + if (empty_run == 0) empty_run_start = i; + empty_run++; + n_empty++; + if (r->is_reserved()) { + at_least_one_reserved = true; + } + } else { + if (empty_run > 0) { + gclog_or_tty->print(" %d:%d", empty_run_start, empty_run); + if (reserved_are_empty && at_least_one_reserved) + gclog_or_tty->print("(R)"); + empty_run = 0; + at_least_one_reserved = false; + } + } + } + if (empty_run > 0) { + gclog_or_tty->print(" %d:%d", empty_run_start, empty_run); + if (reserved_are_empty && at_least_one_reserved) gclog_or_tty->print("(R)"); + } + gclog_or_tty->print_cr(" [tot = %d]", n_empty); +} + +int HeapRegionSeq::find(HeapRegion* hr) { + // FIXME: optimized for adjacent regions of fixed size. + int ind = hr->hrs_index(); + if (ind != -1) { + assert(_regions.at(ind) == hr, "Mismatch"); + } + return ind; +} + + +// Public methods. + +void HeapRegionSeq::insert(HeapRegion* hr) { + assert(!_regions.is_full(), "Too many elements in HeapRegionSeq"); + if (_regions.length() == 0 + || _regions.top()->end() <= hr->bottom()) { + hr->set_hrs_index(_regions.length()); + _regions.append(hr); + } else { + _regions.append(hr); + _regions.sort(orderRegions); + for (int i = 0; i < _regions.length(); i++) { + _regions.at(i)->set_hrs_index(i); + } + } + char* bot = (char*)_regions.at(0)->bottom(); + if (_seq_bottom == NULL || bot < _seq_bottom) _seq_bottom = bot; +} + +size_t HeapRegionSeq::length() { + return _regions.length(); +} + +size_t HeapRegionSeq::free_suffix() { + size_t res = 0; + int first = _regions.length() - 1; + int cur = first; + while (cur >= 0 && + (_regions.at(cur)->is_empty() + && !_regions.at(cur)->is_reserved() + && (first == cur + || (_regions.at(cur+1)->bottom() == + _regions.at(cur)->end())))) { + res++; + cur--; + } + return res; +} + +HeapWord* HeapRegionSeq::obj_allocate(size_t word_size) { + int cur = _alloc_search_start; + // Make sure "cur" is a valid index. + assert(cur >= 0, "Invariant."); + HeapWord* res = alloc_obj_from_region_index(cur, word_size); + if (res == NULL) + res = alloc_obj_from_region_index(0, word_size); + return res; +} + +void HeapRegionSeq::iterate(HeapRegionClosure* blk) { + iterate_from((HeapRegion*)NULL, blk); +} + +// The first argument r is the heap region at which iteration begins. +// This operation runs fastest when r is NULL, or the heap region for +// which a HeapRegionClosure most recently returned true, or the +// heap region immediately to its right in the sequence. In all +// other cases a linear search is required to find the index of r. + +void HeapRegionSeq::iterate_from(HeapRegion* r, HeapRegionClosure* blk) { + + // :::: FIXME :::: + // Static cache value is bad, especially when we start doing parallel + // remembered set update. For now just don't cache anything (the + // code in the def'd out blocks). + +#if 0 + static int cached_j = 0; +#endif + int len = _regions.length(); + int j = 0; + // Find the index of r. + if (r != NULL) { +#if 0 + assert(cached_j >= 0, "Invariant."); + if ((cached_j < len) && (r == _regions.at(cached_j))) { + j = cached_j; + } else if ((cached_j + 1 < len) && (r == _regions.at(cached_j + 1))) { + j = cached_j + 1; + } else { + j = find(r); +#endif + if (j < 0) { + j = 0; + } +#if 0 + } +#endif + } + int i; + for (i = j; i < len; i += 1) { + int res = blk->doHeapRegion(_regions.at(i)); + if (res) { +#if 0 + cached_j = i; +#endif + blk->incomplete(); + return; + } + } + for (i = 0; i < j; i += 1) { + int res = blk->doHeapRegion(_regions.at(i)); + if (res) { +#if 0 + cached_j = i; +#endif + blk->incomplete(); + return; + } + } +} + +void HeapRegionSeq::iterate_from(int idx, HeapRegionClosure* blk) { + int len = _regions.length(); + int i; + for (i = idx; i < len; i++) { + if (blk->doHeapRegion(_regions.at(i))) { + blk->incomplete(); + return; + } + } + for (i = 0; i < idx; i++) { + if (blk->doHeapRegion(_regions.at(i))) { + blk->incomplete(); + return; + } + } +} + +MemRegion HeapRegionSeq::shrink_by(size_t shrink_bytes, + size_t& num_regions_deleted) { + assert(shrink_bytes % os::vm_page_size() == 0, "unaligned"); + assert(shrink_bytes % HeapRegion::GrainBytes == 0, "unaligned"); + + if (_regions.length() == 0) { + num_regions_deleted = 0; + return MemRegion(); + } + int j = _regions.length() - 1; + HeapWord* end = _regions.at(j)->end(); + HeapWord* last_start = end; + while (j >= 0 && shrink_bytes > 0) { + HeapRegion* cur = _regions.at(j); + // We have to leave humongous regions where they are, + // and work around them. + if (cur->isHumongous()) { + return MemRegion(last_start, end); + } + cur->reset_zero_fill(); + assert(cur == _regions.top(), "Should be top"); + if (!cur->is_empty()) break; + shrink_bytes -= cur->capacity(); + num_regions_deleted++; + _regions.pop(); + last_start = cur->bottom(); + // We need to delete these somehow, but can't currently do so here: if + // we do, the ZF thread may still access the deleted region. We'll + // leave this here as a reminder that we have to do something about + // this. + // delete cur; + j--; + } + return MemRegion(last_start, end); +} + + +class PrintHeapRegionClosure : public HeapRegionClosure { +public: + bool doHeapRegion(HeapRegion* r) { + gclog_or_tty->print(PTR_FORMAT ":", r); + r->print(); + return false; + } +}; + +void HeapRegionSeq::print() { + PrintHeapRegionClosure cl; + iterate(&cl); +} diff --git a/hotspot/src/share/vm/gc_implementation/g1/heapRegionSeq.hpp b/hotspot/src/share/vm/gc_implementation/g1/heapRegionSeq.hpp new file mode 100644 index 00000000000..a79ff99f4a2 --- /dev/null +++ b/hotspot/src/share/vm/gc_implementation/g1/heapRegionSeq.hpp @@ -0,0 +1,111 @@ +/* + * Copyright 2001-2007 Sun Microsystems, Inc. All Rights Reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara, + * CA 95054 USA or visit www.sun.com if you need additional information or + * have any questions. + * + */ + +class HeapRegion; +class HeapRegionClosure; + +class HeapRegionSeq: public CHeapObj { + + // _regions is kept sorted by start address order, and no two regions are + // overlapping. + GrowableArray _regions; + + // The index in "_regions" at which to start the next allocation search. + // (For efficiency only; private to obj_allocate after initialization.) + int _alloc_search_start; + + // Attempts to allocate a block of the (assumed humongous) word_size, + // starting at the region "ind". + HeapWord* alloc_obj_from_region_index(int ind, size_t word_size); + + // Currently, we're choosing collection sets in a round-robin fashion, + // starting here. + int _next_rr_candidate; + + // The bottom address of the bottom-most region, or else NULL if there + // are no regions in the sequence. + char* _seq_bottom; + + public: + // Initializes "this" to the empty sequence of regions. + HeapRegionSeq(const size_t max_size); + + // Adds "hr" to "this" sequence. Requires "hr" not to overlap with + // any region already in "this". (Will perform better if regions are + // inserted in ascending address order.) + void insert(HeapRegion* hr); + + // Given a HeapRegion*, returns its index within _regions, + // or returns -1 if not found. + int find(HeapRegion* hr); + + // Requires the index to be valid, and return the region at the index. + HeapRegion* at(size_t i) { return _regions.at((int)i); } + + // Return the number of regions in the sequence. + size_t length(); + + // Returns the number of contiguous regions at the end of the sequence + // that are available for allocation. + size_t free_suffix(); + + // Requires "word_size" to be humongous (in the technical sense). If + // possible, allocates a contiguous subsequence of the heap regions to + // satisfy the allocation, and returns the address of the beginning of + // that sequence, otherwise returns NULL. + HeapWord* obj_allocate(size_t word_size); + + // Apply the "doHeapRegion" method of "blk" to all regions in "this", + // in address order, terminating the iteration early + // if the "doHeapRegion" method returns "true". + void iterate(HeapRegionClosure* blk); + + // Apply the "doHeapRegion" method of "blk" to all regions in "this", + // starting at "r" (or first region, if "r" is NULL), in a circular + // manner, terminating the iteration early if the "doHeapRegion" method + // returns "true". + void iterate_from(HeapRegion* r, HeapRegionClosure* blk); + + // As above, but start from a given index in the sequence + // instead of a given heap region. + void iterate_from(int idx, HeapRegionClosure* blk); + + // Requires "shrink_bytes" to be a multiple of the page size and heap + // region granularity. Deletes as many "rightmost" completely free heap + // regions from the sequence as comprise shrink_bytes bytes. Returns the + // MemRegion indicating the region those regions comprised, and sets + // "num_regions_deleted" to the number of regions deleted. + MemRegion shrink_by(size_t shrink_bytes, size_t& num_regions_deleted); + + // If "addr" falls within a region in the sequence, return that region, + // or else NULL. + HeapRegion* addr_to_region(const void* addr); + + void print(); + + // Prints out runs of empty regions. If the arg is "true" reserved + // (popular regions are considered "empty". + void print_empty_runs(bool reserved_are_empty); + +}; diff --git a/hotspot/src/share/vm/gc_implementation/g1/heapRegionSeq.inline.hpp b/hotspot/src/share/vm/gc_implementation/g1/heapRegionSeq.inline.hpp new file mode 100644 index 00000000000..31f89d1440c --- /dev/null +++ b/hotspot/src/share/vm/gc_implementation/g1/heapRegionSeq.inline.hpp @@ -0,0 +1,40 @@ +/* + * Copyright 2001-2007 Sun Microsystems, Inc. All Rights Reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara, + * CA 95054 USA or visit www.sun.com if you need additional information or + * have any questions. + * + */ + +inline HeapRegion* HeapRegionSeq::addr_to_region(const void* addr) { + assert(_seq_bottom != NULL, "bad _seq_bottom in addr_to_region"); + if ((char*) addr >= _seq_bottom) { + size_t diff = (size_t) pointer_delta((HeapWord*) addr, + (HeapWord*) _seq_bottom); + int index = (int) (diff >> HeapRegion::LogOfHRGrainWords); + assert(index >= 0, "invariant / paranoia"); + if (index < _regions.length()) { + HeapRegion* hr = _regions.at(index); + assert(hr->is_in_reserved(addr), + "addr_to_region is wrong..."); + return hr; + } + } + return NULL; +} diff --git a/hotspot/src/share/vm/gc_implementation/g1/ptrQueue.cpp b/hotspot/src/share/vm/gc_implementation/g1/ptrQueue.cpp new file mode 100644 index 00000000000..7d1092bdc2a --- /dev/null +++ b/hotspot/src/share/vm/gc_implementation/g1/ptrQueue.cpp @@ -0,0 +1,208 @@ +/* + * Copyright 2001-2007 Sun Microsystems, Inc. All Rights Reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara, + * CA 95054 USA or visit www.sun.com if you need additional information or + * have any questions. + * + */ + +# include "incls/_precompiled.incl" +# include "incls/_ptrQueue.cpp.incl" + +PtrQueue::PtrQueue(PtrQueueSet* qset_, bool perm) : + _qset(qset_), _buf(NULL), _index(0), _active(false), + _perm(perm), _lock(NULL) +{} + +PtrQueue::~PtrQueue() { + if (!_perm && _buf != NULL) { + if (_index == _sz) { + // No work to do. + qset()->deallocate_buffer(_buf); + } else { + // We must NULL out the unused entries, then enqueue. + for (size_t i = 0; i < _index; i += oopSize) { + _buf[byte_index_to_index((int)i)] = NULL; + } + qset()->enqueue_complete_buffer(_buf); + _buf = NULL; + } + } +} + + +static int byte_index_to_index(int ind) { + assert((ind % oopSize) == 0, "Invariant."); + return ind / oopSize; +} + +static int index_to_byte_index(int byte_ind) { + return byte_ind * oopSize; +} + +void PtrQueue::enqueue_known_active(void* ptr) { + assert(0 <= _index && _index <= _sz, "Invariant."); + assert(_index == 0 || _buf != NULL, "invariant"); + + while (_index == 0) { + handle_zero_index(); + } + assert(_index > 0, "postcondition"); + + _index -= oopSize; + _buf[byte_index_to_index((int)_index)] = ptr; + assert(0 <= _index && _index <= _sz, "Invariant."); +} + +void PtrQueue::locking_enqueue_completed_buffer(void** buf) { + assert(_lock->owned_by_self(), "Required."); + _lock->unlock(); + qset()->enqueue_complete_buffer(buf); + // We must relock only because the caller will unlock, for the normal + // case. + _lock->lock_without_safepoint_check(); +} + + +PtrQueueSet::PtrQueueSet(bool notify_when_complete) : + _max_completed_queue(0), + _cbl_mon(NULL), _fl_lock(NULL), + _notify_when_complete(notify_when_complete), + _sz(0), + _completed_buffers_head(NULL), + _completed_buffers_tail(NULL), + _n_completed_buffers(0), + _process_completed_threshold(0), _process_completed(false), + _buf_free_list(NULL), _buf_free_list_sz(0) +{} + +void** PtrQueueSet::allocate_buffer() { + assert(_sz > 0, "Didn't set a buffer size."); + MutexLockerEx x(_fl_lock, Mutex::_no_safepoint_check_flag); + if (_buf_free_list != NULL) { + void** res = _buf_free_list; + _buf_free_list = (void**)_buf_free_list[0]; + _buf_free_list_sz--; + // Just override the next pointer with NULL, just in case we scan this part + // of the buffer. + res[0] = NULL; + return res; + } else { + return NEW_C_HEAP_ARRAY(void*, _sz); + } +} + +void PtrQueueSet::deallocate_buffer(void** buf) { + assert(_sz > 0, "Didn't set a buffer size."); + MutexLockerEx x(_fl_lock, Mutex::_no_safepoint_check_flag); + buf[0] = (void*)_buf_free_list; + _buf_free_list = buf; + _buf_free_list_sz++; +} + +void PtrQueueSet::reduce_free_list() { + // For now we'll adopt the strategy of deleting half. + MutexLockerEx x(_fl_lock, Mutex::_no_safepoint_check_flag); + size_t n = _buf_free_list_sz / 2; + while (n > 0) { + assert(_buf_free_list != NULL, "_buf_free_list_sz must be wrong."); + void** head = _buf_free_list; + _buf_free_list = (void**)_buf_free_list[0]; + FREE_C_HEAP_ARRAY(void*,head); + n--; + } +} + +void PtrQueueSet::enqueue_complete_buffer(void** buf, size_t index, bool ignore_max_completed) { + // I use explicit locking here because there's a bailout in the middle. + _cbl_mon->lock_without_safepoint_check(); + + Thread* thread = Thread::current(); + assert( ignore_max_completed || + thread->is_Java_thread() || + SafepointSynchronize::is_at_safepoint(), + "invariant" ); + ignore_max_completed = ignore_max_completed || !thread->is_Java_thread(); + + if (!ignore_max_completed && _max_completed_queue > 0 && + _n_completed_buffers >= (size_t) _max_completed_queue) { + _cbl_mon->unlock(); + bool b = mut_process_buffer(buf); + if (b) { + deallocate_buffer(buf); + return; + } + + // Otherwise, go ahead and enqueue the buffer. Must reaquire the lock. + _cbl_mon->lock_without_safepoint_check(); + } + + // Here we still hold the _cbl_mon. + CompletedBufferNode* cbn = new CompletedBufferNode; + cbn->buf = buf; + cbn->next = NULL; + cbn->index = index; + if (_completed_buffers_tail == NULL) { + assert(_completed_buffers_head == NULL, "Well-formedness"); + _completed_buffers_head = cbn; + _completed_buffers_tail = cbn; + } else { + _completed_buffers_tail->next = cbn; + _completed_buffers_tail = cbn; + } + _n_completed_buffers++; + + if (!_process_completed && + _n_completed_buffers == _process_completed_threshold) { + _process_completed = true; + if (_notify_when_complete) + _cbl_mon->notify_all(); + } + debug_only(assert_completed_buffer_list_len_correct_locked()); + _cbl_mon->unlock(); +} + +int PtrQueueSet::completed_buffers_list_length() { + int n = 0; + CompletedBufferNode* cbn = _completed_buffers_head; + while (cbn != NULL) { + n++; + cbn = cbn->next; + } + return n; +} + +void PtrQueueSet::assert_completed_buffer_list_len_correct() { + MutexLockerEx x(_cbl_mon, Mutex::_no_safepoint_check_flag); + assert_completed_buffer_list_len_correct_locked(); +} + +void PtrQueueSet::assert_completed_buffer_list_len_correct_locked() { + guarantee((size_t)completed_buffers_list_length() == _n_completed_buffers, + "Completed buffer length is wrong."); +} + +void PtrQueueSet::set_buffer_size(size_t sz) { + assert(_sz == 0 && sz > 0, "Should be called only once."); + _sz = sz * oopSize; +} + +void PtrQueueSet::set_process_completed_threshold(size_t sz) { + _process_completed_threshold = sz; +} diff --git a/hotspot/src/share/vm/gc_implementation/g1/ptrQueue.hpp b/hotspot/src/share/vm/gc_implementation/g1/ptrQueue.hpp new file mode 100644 index 00000000000..3079200615f --- /dev/null +++ b/hotspot/src/share/vm/gc_implementation/g1/ptrQueue.hpp @@ -0,0 +1,229 @@ +/* + * Copyright 2001-2007 Sun Microsystems, Inc. All Rights Reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara, + * CA 95054 USA or visit www.sun.com if you need additional information or + * have any questions. + * + */ + +// There are various techniques that require threads to be able to log +// addresses. For example, a generational write barrier might log +// the addresses of modified old-generation objects. This type supports +// this operation. + +class PtrQueueSet; + +class PtrQueue: public CHeapObj { + +protected: + // The ptr queue set to which this queue belongs. + PtrQueueSet* _qset; + + // Whether updates should be logged. + bool _active; + + // The buffer. + void** _buf; + // The index at which an object was last enqueued. Starts at "_sz" + // (indicating an empty buffer) and goes towards zero. + size_t _index; + + // The size of the buffer. + size_t _sz; + + // If true, the queue is permanent, and doesn't need to deallocate + // its buffer in the destructor (since that obtains a lock which may not + // be legally locked by then. + bool _perm; + + // If there is a lock associated with this buffer, this is that lock. + Mutex* _lock; + + PtrQueueSet* qset() { return _qset; } + +public: + // Initialize this queue to contain a null buffer, and be part of the + // given PtrQueueSet. + PtrQueue(PtrQueueSet*, bool perm = false); + // Release any contained resources. + ~PtrQueue(); + + // Associate a lock with a ptr queue. + void set_lock(Mutex* lock) { _lock = lock; } + + void reset() { if (_buf != NULL) _index = _sz; } + + // Enqueues the given "obj". + void enqueue(void* ptr) { + if (!_active) return; + else enqueue_known_active(ptr); + } + + inline void handle_zero_index(); + void locking_enqueue_completed_buffer(void** buf); + + void enqueue_known_active(void* ptr); + + size_t size() { + assert(_sz >= _index, "Invariant."); + return _buf == NULL ? 0 : _sz - _index; + } + + // Set the "active" property of the queue to "b". An enqueue to an + // inactive thread is a no-op. Setting a queue to inactive resets its + // log to the empty state. + void set_active(bool b) { + _active = b; + if (!b && _buf != NULL) { + _index = _sz; + } else if (b && _buf != NULL) { + assert(_index == _sz, "invariant: queues are empty when activated."); + } + } + + static int byte_index_to_index(int ind) { + assert((ind % oopSize) == 0, "Invariant."); + return ind / oopSize; + } + + static int index_to_byte_index(int byte_ind) { + return byte_ind * oopSize; + } + + // To support compiler. + static ByteSize byte_offset_of_index() { + return byte_offset_of(PtrQueue, _index); + } + static ByteSize byte_width_of_index() { return in_ByteSize(sizeof(size_t)); } + + static ByteSize byte_offset_of_buf() { + return byte_offset_of(PtrQueue, _buf); + } + static ByteSize byte_width_of_buf() { return in_ByteSize(sizeof(void*)); } + + static ByteSize byte_offset_of_active() { + return byte_offset_of(PtrQueue, _active); + } + static ByteSize byte_width_of_active() { return in_ByteSize(sizeof(bool)); } + +}; + +// A PtrQueueSet represents resources common to a set of pointer queues. +// In particular, the individual queues allocate buffers from this shared +// set, and return completed buffers to the set. +// All these variables are are protected by the TLOQ_CBL_mon. XXX ??? +class PtrQueueSet: public CHeapObj { + +protected: + + class CompletedBufferNode: public CHeapObj { + public: + void** buf; + size_t index; + CompletedBufferNode* next; + CompletedBufferNode() : buf(NULL), + index(0), next(NULL){ } + }; + + Monitor* _cbl_mon; // Protects the fields below. + CompletedBufferNode* _completed_buffers_head; + CompletedBufferNode* _completed_buffers_tail; + size_t _n_completed_buffers; + size_t _process_completed_threshold; + volatile bool _process_completed; + + // This (and the interpretation of the first element as a "next" + // pointer) are protected by the TLOQ_FL_lock. + Mutex* _fl_lock; + void** _buf_free_list; + size_t _buf_free_list_sz; + + // The size of all buffers in the set. + size_t _sz; + + bool _all_active; + + // If true, notify_all on _cbl_mon when the threshold is reached. + bool _notify_when_complete; + + // Maximum number of elements allowed on completed queue: after that, + // enqueuer does the work itself. Zero indicates no maximum. + int _max_completed_queue; + + int completed_buffers_list_length(); + void assert_completed_buffer_list_len_correct_locked(); + void assert_completed_buffer_list_len_correct(); + +protected: + // A mutator thread does the the work of processing a buffer. + // Returns "true" iff the work is complete (and the buffer may be + // deallocated). + virtual bool mut_process_buffer(void** buf) { + ShouldNotReachHere(); + return false; + } + +public: + // Create an empty ptr queue set. + PtrQueueSet(bool notify_when_complete = false); + + // Because of init-order concerns, we can't pass these as constructor + // arguments. + void initialize(Monitor* cbl_mon, Mutex* fl_lock, + int max_completed_queue = 0) { + _max_completed_queue = max_completed_queue; + assert(cbl_mon != NULL && fl_lock != NULL, "Init order issue?"); + _cbl_mon = cbl_mon; _fl_lock = fl_lock; + } + + // Return an empty oop array of size _sz (required to be non-zero). + void** allocate_buffer(); + + // Return an empty buffer to the free list. The "buf" argument is + // required to be a pointer to the head of an array of length "_sz". + void deallocate_buffer(void** buf); + + // Declares that "buf" is a complete buffer. + void enqueue_complete_buffer(void** buf, size_t index = 0, + bool ignore_max_completed = false); + + bool completed_buffers_exist_dirty() { + return _n_completed_buffers > 0; + } + + bool process_completed_buffers() { return _process_completed; } + + bool active() { return _all_active; } + + // Set the buffer size. Should be called before any "enqueue" operation + // can be called. And should only be called once. + void set_buffer_size(size_t sz); + + // Get the buffer size. + size_t buffer_size() { return _sz; } + + // Set the number of completed buffers that triggers log processing. + void set_process_completed_threshold(size_t sz); + + // Must only be called at a safe point. Indicates that the buffer free + // list size may be reduced, if that is deemed desirable. + void reduce_free_list(); + + size_t completed_buffers_num() { return _n_completed_buffers; } +}; diff --git a/hotspot/src/share/vm/gc_implementation/g1/ptrQueue.inline.hpp b/hotspot/src/share/vm/gc_implementation/g1/ptrQueue.inline.hpp new file mode 100644 index 00000000000..c3ff2260ffd --- /dev/null +++ b/hotspot/src/share/vm/gc_implementation/g1/ptrQueue.inline.hpp @@ -0,0 +1,41 @@ +/* + * Copyright 2001-2007 Sun Microsystems, Inc. All Rights Reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara, + * CA 95054 USA or visit www.sun.com if you need additional information or + * have any questions. + * + */ + +void PtrQueue::handle_zero_index() { + assert(0 == _index, "Precondition."); + // This thread records the full buffer and allocates a new one (while + // holding the lock if there is one). + void** buf = _buf; + _buf = qset()->allocate_buffer(); + _sz = qset()->buffer_size(); + _index = _sz; + assert(0 <= _index && _index <= _sz, "Invariant."); + if (buf != NULL) { + if (_lock) { + locking_enqueue_completed_buffer(buf); + } else { + qset()->enqueue_complete_buffer(buf); + } + } +} diff --git a/hotspot/src/share/vm/gc_implementation/g1/satbQueue.cpp b/hotspot/src/share/vm/gc_implementation/g1/satbQueue.cpp new file mode 100644 index 00000000000..a3e74a41ce0 --- /dev/null +++ b/hotspot/src/share/vm/gc_implementation/g1/satbQueue.cpp @@ -0,0 +1,160 @@ +/* + * Copyright 2001-2007 Sun Microsystems, Inc. All Rights Reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara, + * CA 95054 USA or visit www.sun.com if you need additional information or + * have any questions. + * + */ + +# include "incls/_precompiled.incl" +# include "incls/_satbQueue.cpp.incl" + +void ObjPtrQueue::apply_closure(ObjectClosure* cl) { + if (_buf != NULL) { + apply_closure_to_buffer(cl, _buf, _index, _sz); + _index = _sz; + } +} + +void ObjPtrQueue::apply_closure_to_buffer(ObjectClosure* cl, + void** buf, size_t index, size_t sz) { + if (cl == NULL) return; + for (size_t i = index; i < sz; i += oopSize) { + oop obj = (oop)buf[byte_index_to_index((int)i)]; + // There can be NULL entries because of destructors. + if (obj != NULL) { + cl->do_object(obj); + } + } +} +#ifdef _MSC_VER // the use of 'this' below gets a warning, make it go away +#pragma warning( disable:4355 ) // 'this' : used in base member initializer list +#endif // _MSC_VER + + +SATBMarkQueueSet::SATBMarkQueueSet() : + PtrQueueSet(), + _closure(NULL), _par_closures(NULL), + _shared_satb_queue(this, true /*perm*/) +{} + +void SATBMarkQueueSet::initialize(Monitor* cbl_mon, Mutex* fl_lock, + int max_completed_queue, + Mutex* lock) { + PtrQueueSet::initialize(cbl_mon, fl_lock, max_completed_queue); + _shared_satb_queue.set_lock(lock); + if (ParallelGCThreads > 0) { + _par_closures = NEW_C_HEAP_ARRAY(ObjectClosure*, ParallelGCThreads); + } +} + + +void SATBMarkQueueSet::handle_zero_index_for_thread(JavaThread* t) { + t->satb_mark_queue().handle_zero_index(); +} + +void SATBMarkQueueSet::set_active_all_threads(bool b) { + _all_active = b; + for(JavaThread* t = Threads::first(); t; t = t->next()) { + t->satb_mark_queue().set_active(b); + } +} + +void SATBMarkQueueSet::set_closure(ObjectClosure* closure) { + _closure = closure; +} + +void SATBMarkQueueSet::set_par_closure(int i, ObjectClosure* par_closure) { + assert(ParallelGCThreads > 0 && _par_closures != NULL, "Precondition"); + _par_closures[i] = par_closure; +} + +void SATBMarkQueueSet::iterate_closure_all_threads() { + for(JavaThread* t = Threads::first(); t; t = t->next()) { + t->satb_mark_queue().apply_closure(_closure); + } + shared_satb_queue()->apply_closure(_closure); +} + +void SATBMarkQueueSet::par_iterate_closure_all_threads(int worker) { + SharedHeap* sh = SharedHeap::heap(); + int parity = sh->strong_roots_parity(); + + for(JavaThread* t = Threads::first(); t; t = t->next()) { + if (t->claim_oops_do(true, parity)) { + t->satb_mark_queue().apply_closure(_par_closures[worker]); + } + } + // We'll have worker 0 do this one. + if (worker == 0) { + shared_satb_queue()->apply_closure(_par_closures[0]); + } +} + +bool SATBMarkQueueSet::apply_closure_to_completed_buffer_work(bool par, + int worker) { + CompletedBufferNode* nd = NULL; + { + MutexLockerEx x(_cbl_mon, Mutex::_no_safepoint_check_flag); + if (_completed_buffers_head != NULL) { + nd = _completed_buffers_head; + _completed_buffers_head = nd->next; + if (_completed_buffers_head == NULL) _completed_buffers_tail = NULL; + _n_completed_buffers--; + if (_n_completed_buffers == 0) _process_completed = false; + } + } + ObjectClosure* cl = (par ? _par_closures[worker] : _closure); + if (nd != NULL) { + ObjPtrQueue::apply_closure_to_buffer(cl, nd->buf, 0, _sz); + deallocate_buffer(nd->buf); + delete nd; + return true; + } else { + return false; + } +} + +void SATBMarkQueueSet::abandon_partial_marking() { + CompletedBufferNode* buffers_to_delete = NULL; + { + MutexLockerEx x(_cbl_mon, Mutex::_no_safepoint_check_flag); + while (_completed_buffers_head != NULL) { + CompletedBufferNode* nd = _completed_buffers_head; + _completed_buffers_head = nd->next; + nd->next = buffers_to_delete; + buffers_to_delete = nd; + } + _completed_buffers_tail = NULL; + _n_completed_buffers = 0; + debug_only(assert_completed_buffer_list_len_correct_locked()); + } + while (buffers_to_delete != NULL) { + CompletedBufferNode* nd = buffers_to_delete; + buffers_to_delete = nd->next; + deallocate_buffer(nd->buf); + delete nd; + } + assert(SafepointSynchronize::is_at_safepoint(), "Must be at safepoint."); + // So we can safely manipulate these queues. + for (JavaThread* t = Threads::first(); t; t = t->next()) { + t->satb_mark_queue().reset(); + } + shared_satb_queue()->reset(); +} diff --git a/hotspot/src/share/vm/gc_implementation/g1/satbQueue.hpp b/hotspot/src/share/vm/gc_implementation/g1/satbQueue.hpp new file mode 100644 index 00000000000..ab8bf5fa4ab --- /dev/null +++ b/hotspot/src/share/vm/gc_implementation/g1/satbQueue.hpp @@ -0,0 +1,105 @@ +/* + * Copyright 2001-2007 Sun Microsystems, Inc. All Rights Reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara, + * CA 95054 USA or visit www.sun.com if you need additional information or + * have any questions. + * + */ + +class ObjectClosure; +class JavaThread; + +// A ptrQueue whose elements are "oops", pointers to object heads. +class ObjPtrQueue: public PtrQueue { +public: + ObjPtrQueue(PtrQueueSet* qset_, bool perm = false) : + PtrQueue(qset_, perm) + {} + // Apply the closure to all elements, and reset the index to make the + // buffer empty. + void apply_closure(ObjectClosure* cl); + + // Apply the closure to all elements of "buf", down to "index" (inclusive.) + static void apply_closure_to_buffer(ObjectClosure* cl, + void** buf, size_t index, size_t sz); + +}; + + + +class SATBMarkQueueSet: public PtrQueueSet { + ObjectClosure* _closure; + ObjectClosure** _par_closures; // One per ParGCThread. + + ObjPtrQueue _shared_satb_queue; + + // Utility function to support sequential and parallel versions. If + // "par" is true, then "worker" is the par thread id; if "false", worker + // is ignored. + bool apply_closure_to_completed_buffer_work(bool par, int worker); + + +public: + SATBMarkQueueSet(); + + void initialize(Monitor* cbl_mon, Mutex* fl_lock, + int max_completed_queue = 0, + Mutex* lock = NULL); + + static void handle_zero_index_for_thread(JavaThread* t); + + // Apply "set_active(b)" to all thread tloq's. Should be called only + // with the world stopped. + void set_active_all_threads(bool b); + + // Register "blk" as "the closure" for all queues. Only one such closure + // is allowed. The "apply_closure_to_completed_buffer" method will apply + // this closure to a completed buffer, and "iterate_closure_all_threads" + // applies it to partially-filled buffers (the latter should only be done + // with the world stopped). + void set_closure(ObjectClosure* closure); + // Set the parallel closures: pointer is an array of pointers to + // closures, one for each parallel GC thread. + void set_par_closure(int i, ObjectClosure* closure); + + // If there is a registered closure for buffers, apply it to all entries + // in all currently-active buffers. This should only be applied at a + // safepoint. (Currently must not be called in parallel; this should + // change in the future.) + void iterate_closure_all_threads(); + // Parallel version of the above. + void par_iterate_closure_all_threads(int worker); + + // If there exists some completed buffer, pop it, then apply the + // registered closure to all its elements, and return true. If no + // completed buffers exist, return false. + bool apply_closure_to_completed_buffer() { + return apply_closure_to_completed_buffer_work(false, 0); + } + // Parallel version of the above. + bool par_apply_closure_to_completed_buffer(int worker) { + return apply_closure_to_completed_buffer_work(true, worker); + } + + ObjPtrQueue* shared_satb_queue() { return &_shared_satb_queue; } + + // If a marking is being abandoned, reset any unprocessed log buffers. + void abandon_partial_marking(); + +}; diff --git a/hotspot/src/share/vm/gc_implementation/g1/sparsePRT.cpp b/hotspot/src/share/vm/gc_implementation/g1/sparsePRT.cpp new file mode 100644 index 00000000000..af25662f603 --- /dev/null +++ b/hotspot/src/share/vm/gc_implementation/g1/sparsePRT.cpp @@ -0,0 +1,530 @@ +/* + * Copyright 2001-2007 Sun Microsystems, Inc. All Rights Reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara, + * CA 95054 USA or visit www.sun.com if you need additional information or + * have any questions. + * + */ + +#include "incls/_precompiled.incl" +#include "incls/_sparsePRT.cpp.incl" + +#define SPARSE_PRT_VERBOSE 0 + +#define UNROLL_CARD_LOOPS 1 + +void SparsePRT::init_iterator(SparsePRTIter* sprt_iter) { + sprt_iter->init(this); +} + +void SparsePRTEntry::init(short region_ind) { + _region_ind = region_ind; + _next_index = NullEntry; +#if UNROLL_CARD_LOOPS + assert(CardsPerEntry == 4, "Assumption. If changes, un-unroll."); + _cards[0] = NullEntry; + _cards[1] = NullEntry; + _cards[2] = NullEntry; + _cards[3] = NullEntry; +#else + for (int i = 0; i < CardsPerEntry; i++) _cards[i] = NullEntry; +#endif +} + +bool SparsePRTEntry::contains_card(short card_index) const { +#if UNROLL_CARD_LOOPS + assert(CardsPerEntry == 4, "Assumption. If changes, un-unroll."); + if (_cards[0] == card_index) return true; + if (_cards[1] == card_index) return true; + if (_cards[2] == card_index) return true; + if (_cards[3] == card_index) return true; +#else + for (int i = 0; i < CardsPerEntry; i++) { + if (_cards[i] == card_index) return true; + } +#endif + // Otherwise, we're full. + return false; +} + +int SparsePRTEntry::num_valid_cards() const { + int sum = 0; +#if UNROLL_CARD_LOOPS + assert(CardsPerEntry == 4, "Assumption. If changes, un-unroll."); + if (_cards[0] != NullEntry) sum++; + if (_cards[1] != NullEntry) sum++; + if (_cards[2] != NullEntry) sum++; + if (_cards[3] != NullEntry) sum++; +#else + for (int i = 0; i < CardsPerEntry; i++) { + if (_cards[i] != NulLEntry) sum++; + } +#endif + // Otherwise, we're full. + return sum; +} + +SparsePRTEntry::AddCardResult SparsePRTEntry::add_card(short card_index) { +#if UNROLL_CARD_LOOPS + assert(CardsPerEntry == 4, "Assumption. If changes, un-unroll."); + short c = _cards[0]; + if (c == card_index) return found; + if (c == NullEntry) { _cards[0] = card_index; return added; } + c = _cards[1]; + if (c == card_index) return found; + if (c == NullEntry) { _cards[1] = card_index; return added; } + c = _cards[2]; + if (c == card_index) return found; + if (c == NullEntry) { _cards[2] = card_index; return added; } + c = _cards[3]; + if (c == card_index) return found; + if (c == NullEntry) { _cards[3] = card_index; return added; } +#else + for (int i = 0; i < CardsPerEntry; i++) { + short c = _cards[i]; + if (c == card_index) return found; + if (c == NullEntry) { _cards[i] = card_index; return added; } + } +#endif + // Otherwise, we're full. + return overflow; +} + +void SparsePRTEntry::copy_cards(short* cards) const { +#if UNROLL_CARD_LOOPS + assert(CardsPerEntry == 4, "Assumption. If changes, un-unroll."); + cards[0] = _cards[0]; + cards[1] = _cards[1]; + cards[2] = _cards[2]; + cards[3] = _cards[3]; +#else + for (int i = 0; i < CardsPerEntry; i++) { + cards[i] = _cards[i]; + } +#endif +} + +void SparsePRTEntry::copy_cards(SparsePRTEntry* e) const { + copy_cards(&e->_cards[0]); +} + +// ---------------------------------------------------------------------- + +RSHashTable::RSHashTable(size_t capacity) : + _capacity(capacity), _capacity_mask(capacity-1), + _occupied_entries(0), _occupied_cards(0), + _entries(NEW_C_HEAP_ARRAY(SparsePRTEntry, capacity)), + _buckets(NEW_C_HEAP_ARRAY(short, capacity)), + _next_deleted(NULL), _deleted(false), + _free_list(NullEntry), _free_region(0) +{ + clear(); +} + +RSHashTable::~RSHashTable() { + if (_entries != NULL) { + FREE_C_HEAP_ARRAY(SparsePRTEntry, _entries); + _entries = NULL; + } + if (_buckets != NULL) { + FREE_C_HEAP_ARRAY(short, _buckets); + _buckets = NULL; + } +} + +void RSHashTable::clear() { + _occupied_entries = 0; + _occupied_cards = 0; + guarantee(_entries != NULL, "INV"); + guarantee(_buckets != NULL, "INV"); + // This will put -1 == NullEntry in the key field of all entries. + memset(_entries, -1, _capacity * sizeof(SparsePRTEntry)); + memset(_buckets, -1, _capacity * sizeof(short)); + _free_list = NullEntry; + _free_region = 0; +} + +bool RSHashTable::add_card(short region_ind, short card_index) { + SparsePRTEntry* e = entry_for_region_ind_create(region_ind); + assert(e != NULL && e->r_ind() == region_ind, + "Postcondition of call above."); + SparsePRTEntry::AddCardResult res = e->add_card(card_index); + if (res == SparsePRTEntry::added) _occupied_cards++; +#if SPARSE_PRT_VERBOSE + gclog_or_tty->print_cr(" after add_card[%d]: valid-cards = %d.", + pointer_delta(e, _entries, sizeof(SparsePRTEntry)), + e->num_valid_cards()); +#endif + assert(e->num_valid_cards() > 0, "Postcondition"); + return res != SparsePRTEntry::overflow; +} + +bool RSHashTable::get_cards(short region_ind, short* cards) { + short ind = (short) (region_ind & capacity_mask()); + short cur_ind = _buckets[ind]; + SparsePRTEntry* cur; + while (cur_ind != NullEntry && + (cur = entry(cur_ind))->r_ind() != region_ind) { + cur_ind = cur->next_index(); + } + + if (cur_ind == NullEntry) return false; + // Otherwise... + assert(cur->r_ind() == region_ind, "Postcondition of loop + test above."); + assert(cur->num_valid_cards() > 0, "Inv"); + cur->copy_cards(cards); + return true; +} + +bool RSHashTable::delete_entry(short region_ind) { + short ind = (short) (region_ind & capacity_mask()); + short* prev_loc = &_buckets[ind]; + short cur_ind = *prev_loc; + SparsePRTEntry* cur; + while (cur_ind != NullEntry && + (cur = entry(cur_ind))->r_ind() != region_ind) { + prev_loc = cur->next_index_addr(); + cur_ind = *prev_loc; + } + + if (cur_ind == NullEntry) return false; + // Otherwise, splice out "cur". + *prev_loc = cur->next_index(); + _occupied_cards -= cur->num_valid_cards(); + free_entry(cur_ind); + _occupied_entries--; + return true; +} + +SparsePRTEntry* RSHashTable::entry_for_region_ind(short region_ind) const { + assert(occupied_entries() < capacity(), "Precondition"); + short ind = (short) (region_ind & capacity_mask()); + short cur_ind = _buckets[ind]; + SparsePRTEntry* cur; + // XXX + // int k = 0; + while (cur_ind != NullEntry && + (cur = entry(cur_ind))->r_ind() != region_ind) { + /* + k++; + if (k > 10) { + gclog_or_tty->print_cr("RSHashTable::entry_for_region_ind(%d): " + "k = %d, cur_ind = %d.", region_ind, k, cur_ind); + if (k >= 1000) { + while (1) ; + } + } + */ + cur_ind = cur->next_index(); + } + + if (cur_ind != NullEntry) { + assert(cur->r_ind() == region_ind, "Loop postcondition + test"); + return cur; + } else { + return NULL; + } +} + +SparsePRTEntry* RSHashTable::entry_for_region_ind_create(short region_ind) { + SparsePRTEntry* res = entry_for_region_ind(region_ind); + if (res == NULL) { + short new_ind = alloc_entry(); + assert(0 <= new_ind && (size_t)new_ind < capacity(), "There should be room."); + res = entry(new_ind); + res->init(region_ind); + // Insert at front. + short ind = (short) (region_ind & capacity_mask()); + res->set_next_index(_buckets[ind]); + _buckets[ind] = new_ind; + _occupied_entries++; + } + return res; +} + +short RSHashTable::alloc_entry() { + short res; + if (_free_list != NullEntry) { + res = _free_list; + _free_list = entry(res)->next_index(); + return res; + } else if ((size_t) _free_region+1 < capacity()) { + res = _free_region; + _free_region++; + return res; + } else { + return NullEntry; + } +} + + +void RSHashTable::free_entry(short fi) { + entry(fi)->set_next_index(_free_list); + _free_list = fi; +} + + +void RSHashTable::add_entry(SparsePRTEntry* e) { + assert(e->num_valid_cards() > 0, "Precondition."); + SparsePRTEntry* e2 = entry_for_region_ind_create(e->r_ind()); + e->copy_cards(e2); + _occupied_cards += e2->num_valid_cards(); + assert(e2->num_valid_cards() > 0, "Postcondition."); +} + +RSHashTable* RSHashTable::_head_deleted_list = NULL; + +void RSHashTable::add_to_deleted_list(RSHashTable* rsht) { + assert(!rsht->deleted(), "Should delete only once."); + rsht->set_deleted(true); + RSHashTable* hd = _head_deleted_list; + while (true) { + rsht->_next_deleted = hd; + RSHashTable* res = + (RSHashTable*) + Atomic::cmpxchg_ptr(rsht, &_head_deleted_list, hd); + if (res == hd) return; + else hd = res; + } +} + +RSHashTable* RSHashTable::get_from_deleted_list() { + RSHashTable* hd = _head_deleted_list; + while (hd != NULL) { + RSHashTable* next = hd->next_deleted(); + RSHashTable* res = + (RSHashTable*) + Atomic::cmpxchg_ptr(next, &_head_deleted_list, hd); + if (res == hd) { + hd->set_next_deleted(NULL); + hd->set_deleted(false); + return hd; + } else { + hd = res; + } + } + return NULL; +} + +short /* RSHashTable:: */ RSHashTableIter::find_first_card_in_list() { + short res; + while (_bl_ind != RSHashTable::NullEntry) { + res = _rsht->entry(_bl_ind)->card(0); + if (res != SparsePRTEntry::NullEntry) { + return res; + } else { + _bl_ind = _rsht->entry(_bl_ind)->next_index(); + } + } + // Otherwise, none found: + return SparsePRTEntry::NullEntry; +} + +size_t /* RSHashTable:: */ RSHashTableIter::compute_card_ind(short ci) { + return + _heap_bot_card_ind + + (_rsht->entry(_bl_ind)->r_ind() * CardsPerRegion) + + ci; +} + +bool /* RSHashTable:: */ RSHashTableIter::has_next(size_t& card_index) { + _card_ind++; + short ci; + if (_card_ind < SparsePRTEntry::CardsPerEntry && + ((ci = _rsht->entry(_bl_ind)->card(_card_ind)) != + SparsePRTEntry::NullEntry)) { + card_index = compute_card_ind(ci); + return true; + } + // Otherwise, must find the next valid entry. + _card_ind = 0; + + if (_bl_ind != RSHashTable::NullEntry) { + _bl_ind = _rsht->entry(_bl_ind)->next_index(); + ci = find_first_card_in_list(); + if (ci != SparsePRTEntry::NullEntry) { + card_index = compute_card_ind(ci); + return true; + } + } + // If we didn't return above, must go to the next non-null table index. + _tbl_ind++; + while ((size_t)_tbl_ind < _rsht->capacity()) { + _bl_ind = _rsht->_buckets[_tbl_ind]; + ci = find_first_card_in_list(); + if (ci != SparsePRTEntry::NullEntry) { + card_index = compute_card_ind(ci); + return true; + } + // Otherwise, try next entry. + _tbl_ind++; + } + // Otherwise, there were no entry. + return false; +} + +bool RSHashTable::contains_card(short region_index, short card_index) const { + SparsePRTEntry* e = entry_for_region_ind(region_index); + return (e != NULL && e->contains_card(card_index)); +} + +size_t RSHashTable::mem_size() const { + return sizeof(this) + capacity() * (sizeof(SparsePRTEntry) + sizeof(short)); +} + + +// ---------------------------------------------------------------------- + +SparsePRT* SparsePRT::_head_expanded_list = NULL; + +void SparsePRT::add_to_expanded_list(SparsePRT* sprt) { + // We could expand multiple times in a pause -- only put on list once. + if (sprt->expanded()) return; + sprt->set_expanded(true); + SparsePRT* hd = _head_expanded_list; + while (true) { + sprt->_next_expanded = hd; + SparsePRT* res = + (SparsePRT*) + Atomic::cmpxchg_ptr(sprt, &_head_expanded_list, hd); + if (res == hd) return; + else hd = res; + } +} + +SparsePRT* SparsePRT::get_from_expanded_list() { + SparsePRT* hd = _head_expanded_list; + while (hd != NULL) { + SparsePRT* next = hd->next_expanded(); + SparsePRT* res = + (SparsePRT*) + Atomic::cmpxchg_ptr(next, &_head_expanded_list, hd); + if (res == hd) { + hd->set_next_expanded(NULL); + return hd; + } else { + hd = res; + } + } + return NULL; +} + + +void SparsePRT::cleanup_all() { + // First clean up all expanded tables so they agree on next and cur. + SparsePRT* sprt = get_from_expanded_list(); + while (sprt != NULL) { + sprt->cleanup(); + sprt = get_from_expanded_list(); + } + // Now delete all deleted RSHashTables. + RSHashTable* rsht = RSHashTable::get_from_deleted_list(); + while (rsht != NULL) { +#if SPARSE_PRT_VERBOSE + gclog_or_tty->print_cr("About to delete RSHT " PTR_FORMAT ".", rsht); +#endif + delete rsht; + rsht = RSHashTable::get_from_deleted_list(); + } +} + + +SparsePRT::SparsePRT(HeapRegion* hr) : + _expanded(false), _next_expanded(NULL) +{ + _cur = new RSHashTable(InitialCapacity); + _next = _cur; +} + +SparsePRT::~SparsePRT() { + assert(_next != NULL && _cur != NULL, "Inv"); + if (_cur != _next) { delete _cur; } + delete _next; +} + + +size_t SparsePRT::mem_size() const { + // We ignore "_cur" here, because it either = _next, or else it is + // on the deleted list. + return sizeof(this) + _next->mem_size(); +} + +bool SparsePRT::add_card(short region_id, short card_index) { +#if SPARSE_PRT_VERBOSE + gclog_or_tty->print_cr(" Adding card %d from region %d to region %d sparse.", + card_index, region_id, _hr->hrs_index()); +#endif + if (_next->occupied_entries() * 2 > _next->capacity()) { + expand(); + } + return _next->add_card(region_id, card_index); +} + +bool SparsePRT::get_cards(short region_id, short* cards) { + return _next->get_cards(region_id, cards); +} + +bool SparsePRT::delete_entry(short region_id) { + return _next->delete_entry(region_id); +} + +void SparsePRT::clear() { + // If they differ, _next is bigger then cur, so next has no chance of + // being the initial size. + if (_next != _cur) { + delete _next; + } + + if (_cur->capacity() != InitialCapacity) { + delete _cur; + _cur = new RSHashTable(InitialCapacity); + } else { + _cur->clear(); + } + _next = _cur; +} + +void SparsePRT::cleanup() { + // Make sure that the current and next tables agree. (Another mechanism + // takes care of deleting now-unused tables.) + _cur = _next; +} + +void SparsePRT::expand() { + RSHashTable* last = _next; + _next = new RSHashTable(last->capacity() * 2); + +#if SPARSE_PRT_VERBOSE + gclog_or_tty->print_cr(" Expanded sparse table for %d to %d.", + _hr->hrs_index(), _next->capacity()); +#endif + for (size_t i = 0; i < last->capacity(); i++) { + SparsePRTEntry* e = last->entry((int)i); + if (e->valid_entry()) { +#if SPARSE_PRT_VERBOSE + gclog_or_tty->print_cr(" During expansion, transferred entry for %d.", + e->r_ind()); +#endif + _next->add_entry(e); + } + } + if (last != _cur) + RSHashTable::add_to_deleted_list(last); + add_to_expanded_list(this); +} diff --git a/hotspot/src/share/vm/gc_implementation/g1/sparsePRT.hpp b/hotspot/src/share/vm/gc_implementation/g1/sparsePRT.hpp new file mode 100644 index 00000000000..e8fa9b00be1 --- /dev/null +++ b/hotspot/src/share/vm/gc_implementation/g1/sparsePRT.hpp @@ -0,0 +1,308 @@ +/* + * Copyright 2001-2007 Sun Microsystems, Inc. All Rights Reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara, + * CA 95054 USA or visit www.sun.com if you need additional information or + * have any questions. + * + */ + +// Sparse remembered set for a heap region (the "owning" region). Maps +// indices of other regions to short sequences of cards in the other region +// that might contain pointers into the owner region. + +// These tables only expand while they are accessed in parallel -- +// deletions may be done in single-threaded code. This allows us to allow +// unsynchronized reads/iterations, as long as expansions caused by +// insertions only enqueue old versions for deletions, but do not delete +// old versions synchronously. + + +class SparsePRTEntry { +public: + enum SomePublicConstants { + CardsPerEntry = (short)4, + NullEntry = (short)-1, + DeletedEntry = (short)-2 + }; + +private: + short _region_ind; + short _next_index; + short _cards[CardsPerEntry]; + +public: + + // Set the region_ind to the given value, and delete all cards. + inline void init(short region_ind); + + short r_ind() const { return _region_ind; } + bool valid_entry() const { return r_ind() >= 0; } + void set_r_ind(short rind) { _region_ind = rind; } + + short next_index() const { return _next_index; } + short* next_index_addr() { return &_next_index; } + void set_next_index(short ni) { _next_index = ni; } + + // Returns "true" iff the entry contains the given card index. + inline bool contains_card(short card_index) const; + + // Returns the number of non-NULL card entries. + inline int num_valid_cards() const; + + // Requires that the entry not contain the given card index. If there is + // space available, add the given card index to the entry and return + // "true"; otherwise, return "false" to indicate that the entry is full. + enum AddCardResult { + overflow, + found, + added + }; + inline AddCardResult add_card(short card_index); + + // Copy the current entry's cards into "cards". + inline void copy_cards(short* cards) const; + // Copy the current entry's cards into the "_card" array of "e." + inline void copy_cards(SparsePRTEntry* e) const; + + inline short card(int i) const { return _cards[i]; } +}; + + +class RSHashTable : public CHeapObj { + + friend class RSHashTableIter; + + enum SomePrivateConstants { + NullEntry = -1 + }; + + size_t _capacity; + size_t _capacity_mask; + size_t _occupied_entries; + size_t _occupied_cards; + + SparsePRTEntry* _entries; + short* _buckets; + short _free_region; + short _free_list; + + static RSHashTable* _head_deleted_list; + RSHashTable* _next_deleted; + RSHashTable* next_deleted() { return _next_deleted; } + void set_next_deleted(RSHashTable* rsht) { _next_deleted = rsht; } + bool _deleted; + void set_deleted(bool b) { _deleted = b; } + + // Requires that the caller hold a lock preventing parallel modifying + // operations, and that the the table be less than completely full. If + // an entry for "region_ind" is already in the table, finds it and + // returns its address; otherwise returns "NULL." + SparsePRTEntry* entry_for_region_ind(short region_ind) const; + + // Requires that the caller hold a lock preventing parallel modifying + // operations, and that the the table be less than completely full. If + // an entry for "region_ind" is already in the table, finds it and + // returns its address; otherwise allocates, initializes, inserts and + // returns a new entry for "region_ind". + SparsePRTEntry* entry_for_region_ind_create(short region_ind); + + // Returns the index of the next free entry in "_entries". + short alloc_entry(); + // Declares the entry "fi" to be free. (It must have already been + // deleted from any bucket lists. + void free_entry(short fi); + +public: + RSHashTable(size_t capacity); + ~RSHashTable(); + + // Attempts to ensure that the given card_index in the given region is in + // the sparse table. If successful (because the card was already + // present, or because it was successfullly added) returns "true". + // Otherwise, returns "false" to indicate that the addition would + // overflow the entry for the region. The caller must transfer these + // entries to a larger-capacity representation. + bool add_card(short region_id, short card_index); + + bool get_cards(short region_id, short* cards); + bool delete_entry(short region_id); + + bool contains_card(short region_id, short card_index) const; + + void add_entry(SparsePRTEntry* e); + + void clear(); + + size_t capacity() const { return _capacity; } + size_t capacity_mask() const { return _capacity_mask; } + size_t occupied_entries() const { return _occupied_entries; } + size_t occupied_cards() const { return _occupied_cards; } + size_t mem_size() const; + bool deleted() { return _deleted; } + + SparsePRTEntry* entry(int i) const { return &_entries[i]; } + + void print(); + + static void add_to_deleted_list(RSHashTable* rsht); + static RSHashTable* get_from_deleted_list(); + + +}; + + // ValueObj because will be embedded in HRRS iterator. +class RSHashTableIter: public CHeapObj { + short _tbl_ind; + short _bl_ind; + short _card_ind; + RSHashTable* _rsht; + size_t _heap_bot_card_ind; + + enum SomePrivateConstants { + CardsPerRegion = HeapRegion::GrainBytes >> CardTableModRefBS::card_shift + }; + + // If the bucket list pointed to by _bl_ind contains a card, sets + // _bl_ind to the index of that entry, and returns the card. + // Otherwise, returns SparseEntry::NullEnty. + short find_first_card_in_list(); + // Computes the proper card index for the card whose offset in the + // current region (as indicated by _bl_ind) is "ci". + // This is subject to errors when there is iteration concurrent with + // modification, but these errors should be benign. + size_t compute_card_ind(short ci); + + public: + RSHashTableIter(size_t heap_bot_card_ind) : + _tbl_ind(RSHashTable::NullEntry), + _bl_ind(RSHashTable::NullEntry), + _card_ind((SparsePRTEntry::CardsPerEntry-1)), + _rsht(NULL), + _heap_bot_card_ind(heap_bot_card_ind) + {} + + void init(RSHashTable* rsht) { + _rsht = rsht; + _tbl_ind = -1; // So that first increment gets to 0. + _bl_ind = RSHashTable::NullEntry; + _card_ind = (SparsePRTEntry::CardsPerEntry-1); + } + + bool has_next(size_t& card_index); + + }; + +// Concurrent accesss to a SparsePRT must be serialized by some external +// mutex. + +class SparsePRTIter; + +class SparsePRT : public CHeapObj { + // Iterations are done on the _cur hash table, since they only need to + // see entries visible at the start of a collection pause. + // All other operations are done using the _next hash table. + RSHashTable* _cur; + RSHashTable* _next; + + HeapRegion* _hr; + + enum SomeAdditionalPrivateConstants { + InitialCapacity = 16 + }; + + void expand(); + + bool _expanded; + + bool expanded() { return _expanded; } + void set_expanded(bool b) { _expanded = b; } + + SparsePRT* _next_expanded; + + SparsePRT* next_expanded() { return _next_expanded; } + void set_next_expanded(SparsePRT* nxt) { _next_expanded = nxt; } + + + static SparsePRT* _head_expanded_list; + +public: + SparsePRT(HeapRegion* hr); + + ~SparsePRT(); + + size_t occupied() const { return _next->occupied_cards(); } + size_t mem_size() const; + + // Attempts to ensure that the given card_index in the given region is in + // the sparse table. If successful (because the card was already + // present, or because it was successfullly added) returns "true". + // Otherwise, returns "false" to indicate that the addition would + // overflow the entry for the region. The caller must transfer these + // entries to a larger-capacity representation. + bool add_card(short region_id, short card_index); + + // If the table hold an entry for "region_ind", Copies its + // cards into "cards", which must be an array of length at least + // "CardsPerEntry", and returns "true"; otherwise, returns "false". + bool get_cards(short region_ind, short* cards); + + // If there is an entry for "region_ind", removes it and return "true"; + // otherwise returns "false." + bool delete_entry(short region_ind); + + // Clear the table, and reinitialize to initial capacity. + void clear(); + + // Ensure that "_cur" and "_next" point to the same table. + void cleanup(); + + // Clean up all tables on the expanded list. Called single threaded. + static void cleanup_all(); + RSHashTable* next() const { return _next; } + + + void init_iterator(SparsePRTIter* sprt_iter); + + static void add_to_expanded_list(SparsePRT* sprt); + static SparsePRT* get_from_expanded_list(); + + bool contains_card(short region_id, short card_index) const { + return _next->contains_card(region_id, card_index); + } + +#if 0 + void verify_is_cleared(); + void print(); +#endif +}; + + +class SparsePRTIter: public /* RSHashTable:: */RSHashTableIter { +public: + SparsePRTIter(size_t heap_bot_card_ind) : + /* RSHashTable:: */RSHashTableIter(heap_bot_card_ind) + {} + + void init(const SparsePRT* sprt) { + RSHashTableIter::init(sprt->next()); + } + bool has_next(size_t& card_index) { + return RSHashTableIter::has_next(card_index); + } +}; diff --git a/hotspot/src/share/vm/gc_implementation/g1/survRateGroup.cpp b/hotspot/src/share/vm/gc_implementation/g1/survRateGroup.cpp new file mode 100644 index 00000000000..0f2a5c95bc1 --- /dev/null +++ b/hotspot/src/share/vm/gc_implementation/g1/survRateGroup.cpp @@ -0,0 +1,264 @@ +/* + * Copyright 2001-2007 Sun Microsystems, Inc. All Rights Reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara, + * CA 95054 USA or visit www.sun.com if you need additional information or + * have any questions. + * + */ + +#include "incls/_precompiled.incl" +#include "incls/_survRateGroup.cpp.incl" + +SurvRateGroup::SurvRateGroup(G1CollectorPolicy* g1p, + const char* name, + size_t summary_surv_rates_len) : + _g1p(g1p), _name(name), + _all_regions_allocated(0), + _curr_length(0), _scan_only_prefix(0), _setup_seq_num(0), + _array_length(0), _surv_rate(NULL), _accum_surv_rate_pred(NULL), + _accum_surv_rate(0.0), _surv_rate_pred(NULL), _last_pred(0.0), + _summary_surv_rates_len(summary_surv_rates_len), + _summary_surv_rates_max_len(0), + _summary_surv_rates(NULL) { + + // the following will set up the arrays with length 1 + _curr_length = 1; + stop_adding_regions(); + guarantee( _array_length == 1, "invariant" ); + guarantee( _surv_rate_pred[0] != NULL, "invariant" ); + _surv_rate_pred[0]->add(0.4); + all_surviving_words_recorded(false); + _curr_length = 0; + + if (summary_surv_rates_len > 0) { + size_t length = summary_surv_rates_len; + _summary_surv_rates = NEW_C_HEAP_ARRAY(NumberSeq*, length); + if (_summary_surv_rates == NULL) { + vm_exit_out_of_memory(sizeof(NumberSeq*) * length, + "Not enough space for surv rate summary"); + } + for (size_t i = 0; i < length; ++i) + _summary_surv_rates[i] = new NumberSeq(); + } + + start_adding_regions(); +} + +void +SurvRateGroup::start_adding_regions() { + _setup_seq_num = _array_length; + _curr_length = _scan_only_prefix; + _accum_surv_rate = 0.0; + +#if 0 + gclog_or_tty->print_cr("start adding regions, seq num %d, length %d", + _setup_seq_num, _curr_length); +#endif // 0 +} + +void +SurvRateGroup::stop_adding_regions() { + size_t length = _curr_length; + +#if 0 + gclog_or_tty->print_cr("stop adding regions, length %d", length); +#endif // 0 + + if (length > _array_length) { + double* old_surv_rate = _surv_rate; + double* old_accum_surv_rate_pred = _accum_surv_rate_pred; + TruncatedSeq** old_surv_rate_pred = _surv_rate_pred; + + _surv_rate = NEW_C_HEAP_ARRAY(double, length); + if (_surv_rate == NULL) { + vm_exit_out_of_memory(sizeof(double) * length, + "Not enough space for surv rate array."); + } + _accum_surv_rate_pred = NEW_C_HEAP_ARRAY(double, length); + if (_accum_surv_rate_pred == NULL) { + vm_exit_out_of_memory(sizeof(double) * length, + "Not enough space for accum surv rate pred array."); + } + _surv_rate_pred = NEW_C_HEAP_ARRAY(TruncatedSeq*, length); + if (_surv_rate == NULL) { + vm_exit_out_of_memory(sizeof(TruncatedSeq*) * length, + "Not enough space for surv rate pred array."); + } + + for (size_t i = 0; i < _array_length; ++i) + _surv_rate_pred[i] = old_surv_rate_pred[i]; + +#if 0 + gclog_or_tty->print_cr("stop adding regions, new seqs %d to %d", + _array_length, length - 1); +#endif // 0 + + for (size_t i = _array_length; i < length; ++i) { + _surv_rate_pred[i] = new TruncatedSeq(10); + // _surv_rate_pred[i]->add(last_pred); + } + + _array_length = length; + + if (old_surv_rate != NULL) + FREE_C_HEAP_ARRAY(double, old_surv_rate); + if (old_accum_surv_rate_pred != NULL) + FREE_C_HEAP_ARRAY(double, old_accum_surv_rate_pred); + if (old_surv_rate_pred != NULL) + FREE_C_HEAP_ARRAY(NumberSeq*, old_surv_rate_pred); + } + + for (size_t i = 0; i < _array_length; ++i) + _surv_rate[i] = 0.0; +} + +double +SurvRateGroup::accum_surv_rate(size_t adjustment) { + // we might relax this one in the future... + guarantee( adjustment == 0 || adjustment == 1, "pre-condition" ); + + double ret = _accum_surv_rate; + if (adjustment > 0) { + TruncatedSeq* seq = get_seq(_curr_length+1); + double surv_rate = _g1p->get_new_prediction(seq); + ret += surv_rate; + } + + return ret; +} + +int +SurvRateGroup::next_age_index() { + TruncatedSeq* seq = get_seq(_curr_length); + double surv_rate = _g1p->get_new_prediction(seq); + _accum_surv_rate += surv_rate; + + ++_curr_length; + return (int) ++_all_regions_allocated; +} + +void +SurvRateGroup::record_scan_only_prefix(size_t scan_only_prefix) { + guarantee( scan_only_prefix <= _curr_length, "pre-condition" ); + _scan_only_prefix = scan_only_prefix; +} + +void +SurvRateGroup::record_surviving_words(int age_in_group, size_t surv_words) { + guarantee( 0 <= age_in_group && (size_t) age_in_group < _curr_length, + "pre-condition" ); + guarantee( _surv_rate[age_in_group] <= 0.00001, + "should only update each slot once" ); + + double surv_rate = (double) surv_words / (double) HeapRegion::GrainWords; + _surv_rate[age_in_group] = surv_rate; + _surv_rate_pred[age_in_group]->add(surv_rate); + if ((size_t)age_in_group < _summary_surv_rates_len) { + _summary_surv_rates[age_in_group]->add(surv_rate); + if ((size_t)(age_in_group+1) > _summary_surv_rates_max_len) + _summary_surv_rates_max_len = age_in_group+1; + } +} + +void +SurvRateGroup::all_surviving_words_recorded(bool propagate) { + if (propagate && _curr_length > 0) { // conservative + double surv_rate = _surv_rate_pred[_curr_length-1]->last(); + +#if 0 + gclog_or_tty->print_cr("propagating %1.2lf from %d to %d", + surv_rate, _curr_length, _array_length - 1); +#endif // 0 + + for (size_t i = _curr_length; i < _array_length; ++i) { + guarantee( _surv_rate[i] <= 0.00001, + "the slot should not have been updated" ); + _surv_rate_pred[i]->add(surv_rate); + } + } + + double accum = 0.0; + double pred = 0.0; + for (size_t i = 0; i < _array_length; ++i) { + pred = _g1p->get_new_prediction(_surv_rate_pred[i]); + if (pred > 1.0) pred = 1.0; + accum += pred; + _accum_surv_rate_pred[i] = accum; + // gclog_or_tty->print_cr("age %3d, accum %10.2lf", i, accum); + } + _last_pred = pred; +} + +#ifndef PRODUCT +void +SurvRateGroup::print() { + gclog_or_tty->print_cr("Surv Rate Group: %s (%d entries, %d scan-only)", + _name, _curr_length, _scan_only_prefix); + for (size_t i = 0; i < _curr_length; ++i) { + gclog_or_tty->print_cr(" age %4d surv rate %6.2lf %% pred %6.2lf %%%s", + i, _surv_rate[i] * 100.0, + _g1p->get_new_prediction(_surv_rate_pred[i]) * 100.0, + (i < _scan_only_prefix) ? " S-O" : " "); + } +} + +void +SurvRateGroup::print_surv_rate_summary() { + size_t length = _summary_surv_rates_max_len; + if (length == 0) + return; + + gclog_or_tty->print_cr(""); + gclog_or_tty->print_cr("%s Rate Summary (for up to age %d)", _name, length-1); + gclog_or_tty->print_cr(" age range survival rate (avg) samples (avg)"); + gclog_or_tty->print_cr(" ---------------------------------------------------------"); + + size_t index = 0; + size_t limit = MIN2((int) length, 10); + while (index < limit) { + gclog_or_tty->print_cr(" %4d %6.2lf%% %6.2lf", + index, _summary_surv_rates[index]->avg() * 100.0, + (double) _summary_surv_rates[index]->num()); + ++index; + } + + gclog_or_tty->print_cr(" ---------------------------------------------------------"); + + int num = 0; + double sum = 0.0; + int samples = 0; + while (index < length) { + ++num; + sum += _summary_surv_rates[index]->avg() * 100.0; + samples += _summary_surv_rates[index]->num(); + ++index; + + if (index == length || num % 10 == 0) { + gclog_or_tty->print_cr(" %4d .. %4d %6.2lf%% %6.2lf", + (index-1) / 10 * 10, index-1, sum / (double) num, + (double) samples / (double) num); + sum = 0.0; + num = 0; + samples = 0; + } + } + + gclog_or_tty->print_cr(" ---------------------------------------------------------"); +} +#endif // PRODUCT diff --git a/hotspot/src/share/vm/gc_implementation/g1/survRateGroup.hpp b/hotspot/src/share/vm/gc_implementation/g1/survRateGroup.hpp new file mode 100644 index 00000000000..ce69e65f08f --- /dev/null +++ b/hotspot/src/share/vm/gc_implementation/g1/survRateGroup.hpp @@ -0,0 +1,102 @@ +/* + * Copyright 2001-2007 Sun Microsystems, Inc. All Rights Reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara, + * CA 95054 USA or visit www.sun.com if you need additional information or + * have any questions. + * + */ + +class G1CollectorPolicy; + +class SurvRateGroup : public CHeapObj { +private: + G1CollectorPolicy* _g1p; + const char* _name; + + size_t _array_length; + double* _surv_rate; + double* _accum_surv_rate_pred; + double _last_pred; + double _accum_surv_rate; + TruncatedSeq** _surv_rate_pred; + NumberSeq** _summary_surv_rates; + size_t _summary_surv_rates_len; + size_t _summary_surv_rates_max_len; + + int _all_regions_allocated; + size_t _curr_length; + size_t _scan_only_prefix; + size_t _setup_seq_num; + +public: + SurvRateGroup(G1CollectorPolicy* g1p, + const char* name, + size_t summary_surv_rates_len); + void start_adding_regions(); + void stop_adding_regions(); + void record_scan_only_prefix(size_t scan_only_prefix); + void record_surviving_words(int age_in_group, size_t surv_words); + void all_surviving_words_recorded(bool propagate); + const char* name() { return _name; } + + size_t region_num() { return _curr_length; } + size_t scan_only_length() { return _scan_only_prefix; } + double accum_surv_rate_pred(int age) { + assert(age >= 0, "must be"); + if ((size_t)age < _array_length) + return _accum_surv_rate_pred[age]; + else { + double diff = (double) (age - _array_length + 1); + return _accum_surv_rate_pred[_array_length-1] + diff * _last_pred; + } + } + + double accum_surv_rate(size_t adjustment); + + TruncatedSeq* get_seq(size_t age) { + guarantee( 0 <= age, "pre-condition" ); + if (age >= _setup_seq_num) { + guarantee( _setup_seq_num > 0, "invariant" ); + age = _setup_seq_num-1; + } + TruncatedSeq* seq = _surv_rate_pred[age]; + guarantee( seq != NULL, "invariant" ); + return seq; + } + + int next_age_index(); + int age_in_group(int age_index) { + int ret = (int) (_all_regions_allocated - age_index); + assert( ret >= 0, "invariant" ); + return ret; + } + int recalculate_age_index(int age_index) { + int new_age_index = (int) _scan_only_prefix - age_in_group(age_index); + guarantee( new_age_index >= 0, "invariant" ); + return new_age_index; + } + void finished_recalculating_age_indexes() { + _all_regions_allocated = (int) _scan_only_prefix; + } + +#ifndef PRODUCT + void print(); + void print_surv_rate_summary(); +#endif // PRODUCT +}; diff --git a/hotspot/src/share/vm/gc_implementation/g1/vm_operations_g1.cpp b/hotspot/src/share/vm/gc_implementation/g1/vm_operations_g1.cpp new file mode 100644 index 00000000000..e5753d53b42 --- /dev/null +++ b/hotspot/src/share/vm/gc_implementation/g1/vm_operations_g1.cpp @@ -0,0 +1,79 @@ +/* + * Copyright 2001-2007 Sun Microsystems, Inc. All Rights Reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara, + * CA 95054 USA or visit www.sun.com if you need additional information or + * have any questions. + * + */ + +#include "incls/_precompiled.incl" +#include "incls/_vm_operations_g1.cpp.incl" + +void VM_G1CollectForAllocation::doit() { + JvmtiGCForAllocationMarker jgcm; + G1CollectedHeap* g1h = G1CollectedHeap::heap(); + _res = g1h->satisfy_failed_allocation(_size); + assert(g1h->is_in_or_null(_res), "result not in heap"); +} + +void VM_G1CollectFull::doit() { + JvmtiGCFullMarker jgcm; + G1CollectedHeap* g1h = G1CollectedHeap::heap(); + GCCauseSetter x(g1h, _gc_cause); + g1h->do_full_collection(false /* clear_all_soft_refs */); +} + +void VM_G1IncCollectionPause::doit() { + JvmtiGCForAllocationMarker jgcm; + G1CollectedHeap* g1h = G1CollectedHeap::heap(); + GCCauseSetter x(g1h, GCCause::_g1_inc_collection_pause); + g1h->do_collection_pause_at_safepoint(NULL); +} + +void VM_G1PopRegionCollectionPause::doit() { + JvmtiGCForAllocationMarker jgcm; + G1CollectedHeap* g1h = G1CollectedHeap::heap(); + g1h->do_collection_pause_at_safepoint(_pop_region); +} + + +void VM_CGC_Operation::doit() { + gclog_or_tty->date_stamp(PrintGC && PrintGCDateStamps); + TraceCPUTime tcpu(PrintGCDetails, true, gclog_or_tty); + TraceTime t(_printGCMessage, PrintGC, true, gclog_or_tty); + SharedHeap* sh = SharedHeap::heap(); + // This could go away if CollectedHeap gave access to _gc_is_active... + if (sh != NULL) { + IsGCActiveMark x; + _cl->do_void(); + } else { + _cl->do_void(); + } +} + +bool VM_CGC_Operation::doit_prologue() { + Heap_lock->lock(); + SharedHeap::heap()->_thread_holds_heap_lock_for_gc = true; + return true; +} + +void VM_CGC_Operation::doit_epilogue() { + SharedHeap::heap()->_thread_holds_heap_lock_for_gc = false; + Heap_lock->unlock(); +} diff --git a/hotspot/src/share/vm/gc_implementation/g1/vm_operations_g1.hpp b/hotspot/src/share/vm/gc_implementation/g1/vm_operations_g1.hpp new file mode 100644 index 00000000000..a914cea3b48 --- /dev/null +++ b/hotspot/src/share/vm/gc_implementation/g1/vm_operations_g1.hpp @@ -0,0 +1,114 @@ +/* + * Copyright 2001-2007 Sun Microsystems, Inc. All Rights Reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara, + * CA 95054 USA or visit www.sun.com if you need additional information or + * have any questions. + * + */ + +// VM_operations for the G1 collector. +// VM_GC_Operation: +// - VM_CGC_Operation +// - VM_G1CollectFull +// - VM_G1CollectForAllocation +// - VM_G1IncCollectionPause +// - VM_G1PopRegionCollectionPause + +class VM_G1CollectFull: public VM_GC_Operation { + private: + public: + VM_G1CollectFull(int gc_count_before, + GCCause::Cause gc_cause) + : VM_GC_Operation(gc_count_before) + { + _gc_cause = gc_cause; + } + ~VM_G1CollectFull() {} + virtual VMOp_Type type() const { return VMOp_G1CollectFull; } + virtual void doit(); + virtual const char* name() const { + return "full garbage-first collection"; + } +}; + +class VM_G1CollectForAllocation: public VM_GC_Operation { + private: + HeapWord* _res; + size_t _size; // size of object to be allocated + public: + VM_G1CollectForAllocation(size_t size, int gc_count_before) + : VM_GC_Operation(gc_count_before) { + _size = size; + _res = NULL; + } + ~VM_G1CollectForAllocation() {} + virtual VMOp_Type type() const { return VMOp_G1CollectForAllocation; } + virtual void doit(); + virtual const char* name() const { + return "garbage-first collection to satisfy allocation"; + } + HeapWord* result() { return _res; } +}; + +class VM_G1IncCollectionPause: public VM_GC_Operation { + public: + VM_G1IncCollectionPause(int gc_count_before) : + VM_GC_Operation(gc_count_before) {} + virtual VMOp_Type type() const { return VMOp_G1IncCollectionPause; } + virtual void doit(); + virtual const char* name() const { + return "garbage-first incremental collection pause"; + } +}; + +class VM_G1PopRegionCollectionPause: public VM_GC_Operation { + HeapRegion* _pop_region; + public: + VM_G1PopRegionCollectionPause(int gc_count_before, HeapRegion* pop_region) : + VM_GC_Operation(gc_count_before), + _pop_region(pop_region) + {} + virtual VMOp_Type type() const { return VMOp_G1PopRegionCollectionPause; } + virtual void doit(); + virtual const char* name() const { + return "garbage-first popular region collection pause"; + } +}; + +// Concurrent GC stop-the-world operations such as initial and final mark; +// consider sharing these with CMS's counterparts. +class VM_CGC_Operation: public VM_Operation { + VoidClosure* _cl; + const char* _printGCMessage; + public: + VM_CGC_Operation(VoidClosure* cl, const char *printGCMsg) : + _cl(cl), + _printGCMessage(printGCMsg) + {} + + ~VM_CGC_Operation() {} + + virtual VMOp_Type type() const { return VMOp_CGC_Operation; } + virtual void doit(); + virtual bool doit_prologue(); + virtual void doit_epilogue(); + virtual const char* name() const { + return "concurrent gc"; + } +}; diff --git a/hotspot/src/share/vm/gc_implementation/includeDB_gc_concurrentMarkSweep b/hotspot/src/share/vm/gc_implementation/includeDB_gc_concurrentMarkSweep index 505db4ad490..d751d2d1f6a 100644 --- a/hotspot/src/share/vm/gc_implementation/includeDB_gc_concurrentMarkSweep +++ b/hotspot/src/share/vm/gc_implementation/includeDB_gc_concurrentMarkSweep @@ -125,17 +125,6 @@ compactibleFreeListSpace.hpp space.hpp compactingPermGenGen.cpp concurrentMarkSweepGeneration.inline.hpp -concurrentGCThread.cpp concurrentGCThread.hpp -concurrentGCThread.cpp init.hpp -concurrentGCThread.cpp instanceRefKlass.hpp -concurrentGCThread.cpp interfaceSupport.hpp -concurrentGCThread.cpp java.hpp -concurrentGCThread.cpp javaCalls.hpp -concurrentGCThread.cpp oop.inline.hpp -concurrentGCThread.cpp systemDictionary.hpp - -concurrentGCThread.hpp thread.hpp - concurrentMarkSweepGeneration.cpp cardTableRS.hpp concurrentMarkSweepGeneration.cpp cmsAdaptiveSizePolicy.hpp concurrentMarkSweepGeneration.cpp cmsCollectorPolicy.hpp @@ -167,7 +156,7 @@ concurrentMarkSweepGeneration.cpp systemDictionary.hpp concurrentMarkSweepGeneration.cpp vmCMSOperations.hpp concurrentMarkSweepGeneration.cpp vmThread.hpp -concurrentMarkSweepGeneration.hpp bitMap.hpp +concurrentMarkSweepGeneration.hpp bitMap.inline.hpp concurrentMarkSweepGeneration.hpp freeBlockDictionary.hpp concurrentMarkSweepGeneration.hpp gSpaceCounters.hpp concurrentMarkSweepGeneration.hpp gcStats.hpp diff --git a/hotspot/src/share/vm/gc_implementation/includeDB_gc_g1 b/hotspot/src/share/vm/gc_implementation/includeDB_gc_g1 new file mode 100644 index 00000000000..53770855458 --- /dev/null +++ b/hotspot/src/share/vm/gc_implementation/includeDB_gc_g1 @@ -0,0 +1,351 @@ +// +// Copyright 2004-2006 Sun Microsystems, Inc. All Rights Reserved. +// DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. +// +// This code is free software; you can redistribute it and/or modify it +// under the terms of the GNU General Public License version 2 only, as +// published by the Free Software Foundation. +// +// This code is distributed in the hope that it will be useful, but WITHOUT +// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +// FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +// version 2 for more details (a copy is included in the LICENSE file that +// accompanied this code). +// +// You should have received a copy of the GNU General Public License version +// 2 along with this work; if not, write to the Free Software Foundation, +// Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. +// +// Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara, +// CA 95054 USA or visit www.sun.com if you need additional information or +// have any questions. +// +// + +// NOTE: DO NOT CHANGE THIS COPYRIGHT TO NEW STYLE - IT WILL BREAK makeDeps! + +bufferingOopClosure.hpp genOopClosures.hpp +bufferingOopClosure.hpp generation.hpp +bufferingOopClosure.hpp os.hpp + +cardTableRS.cpp concurrentMark.hpp +cardTableRS.cpp g1SATBCardTableModRefBS.hpp + +collectionSetChooser.cpp g1CollectedHeap.hpp +collectionSetChooser.cpp g1CollectorPolicy.hpp +collectionSetChooser.cpp collectionSetChooser.hpp + +collectionSetChooser.hpp heapRegion.hpp +collectionSetChooser.hpp growableArray.hpp + +concurrentG1Refine.cpp atomic.hpp +concurrentG1Refine.cpp concurrentG1Refine.hpp +concurrentG1Refine.cpp concurrentG1RefineThread.hpp +concurrentG1Refine.cpp copy.hpp +concurrentG1Refine.cpp g1CollectedHeap.hpp +concurrentG1Refine.cpp g1RemSet.hpp + +concurrentG1Refine.hpp globalDefinitions.hpp + +concurrentG1RefineThread.cpp concurrentG1Refine.hpp +concurrentG1RefineThread.cpp concurrentG1RefineThread.hpp +concurrentG1RefineThread.cpp g1CollectedHeap.hpp +concurrentG1RefineThread.cpp g1CollectorPolicy.hpp +concurrentG1RefineThread.cpp handles.inline.hpp +concurrentG1RefineThread.cpp mutexLocker.hpp +concurrentG1RefineThread.cpp resourceArea.hpp + +concurrentG1RefineThread.hpp concurrentGCThread.hpp +concurrentG1RefineThread.hpp coTracker.hpp + +concurrentMark.cpp concurrentMark.hpp +concurrentMark.cpp concurrentMarkThread.inline.hpp +concurrentMark.cpp g1CollectedHeap.inline.hpp +concurrentMark.cpp g1CollectorPolicy.hpp +concurrentMark.cpp g1RemSet.hpp +concurrentMark.cpp gcOverheadReporter.hpp +concurrentMark.cpp genOopClosures.inline.hpp +concurrentMark.cpp heapRegionRemSet.hpp +concurrentMark.cpp heapRegionSeq.inline.hpp +concurrentMark.cpp handles.inline.hpp +concurrentMark.cpp java.hpp +concurrentMark.cpp oop.inline.hpp +concurrentMark.cpp referencePolicy.hpp +concurrentMark.cpp resourceArea.hpp +concurrentMark.cpp symbolTable.hpp + +concurrentMark.hpp coTracker.hpp +concurrentMark.hpp heapRegion.hpp +concurrentMark.hpp taskqueue.hpp + +concurrentMarkThread.cpp concurrentMarkThread.inline.hpp +concurrentMarkThread.cpp g1CollectedHeap.inline.hpp +concurrentMarkThread.cpp g1CollectorPolicy.hpp +concurrentMarkThread.cpp g1MMUTracker.hpp +concurrentMarkThread.cpp resourceArea.hpp +concurrentMarkThread.cpp vm_operations_g1.hpp +concurrentMarkThread.cpp vmThread.hpp + +concurrentMarkThread.hpp concurrentGCThread.hpp + +concurrentMarkThread.inline.hpp concurrentMark.hpp +concurrentMarkThread.inline.hpp concurrentMarkThread.hpp + +concurrentZFThread.cpp concurrentZFThread.hpp +concurrentZFThread.cpp heapRegion.hpp +concurrentZFThread.cpp g1CollectedHeap.inline.hpp +concurrentZFThread.cpp copy.hpp +concurrentZFThread.cpp mutexLocker.hpp +concurrentZFThread.cpp space.inline.hpp + +concurrentZFThread.hpp concurrentGCThread.hpp +concurrentZFThread.hpp coTracker.hpp + +dirtyCardQueue.cpp atomic.hpp +dirtyCardQueue.cpp dirtyCardQueue.hpp +dirtyCardQueue.cpp heapRegionRemSet.hpp +dirtyCardQueue.cpp mutexLocker.hpp +dirtyCardQueue.cpp ptrQueue.inline.hpp +dirtyCardQueue.cpp safepoint.hpp +dirtyCardQueue.cpp thread.hpp +dirtyCardQueue.cpp thread_.inline.hpp +dirtyCardQueue.cpp workgroup.hpp + +dirtyCardQueue.hpp allocation.hpp +dirtyCardQueue.hpp ptrQueue.hpp + +g1BlockOffsetTable.cpp g1BlockOffsetTable.inline.hpp +g1BlockOffsetTable.cpp java.hpp +g1BlockOffsetTable.cpp oop.inline.hpp +g1BlockOffsetTable.cpp space.hpp + +g1BlockOffsetTable.hpp globalDefinitions.hpp +g1BlockOffsetTable.hpp memRegion.hpp +g1BlockOffsetTable.hpp virtualspace.hpp + +g1BlockOffsetTable.inline.hpp g1BlockOffsetTable.hpp +g1BlockOffsetTable.inline.hpp space.hpp + +g1CollectedHeap.cpp aprofiler.hpp +g1CollectedHeap.cpp bufferingOopClosure.hpp +g1CollectedHeap.cpp concurrentG1Refine.hpp +g1CollectedHeap.cpp concurrentG1RefineThread.hpp +g1CollectedHeap.cpp concurrentMarkThread.inline.hpp +g1CollectedHeap.cpp concurrentZFThread.hpp +g1CollectedHeap.cpp g1CollectedHeap.inline.hpp +g1CollectedHeap.cpp g1CollectorPolicy.hpp +g1CollectedHeap.cpp g1MarkSweep.hpp +g1CollectedHeap.cpp g1RemSet.hpp +g1CollectedHeap.cpp g1OopClosures.inline.hpp +g1CollectedHeap.cpp genOopClosures.inline.hpp +g1CollectedHeap.cpp gcLocker.inline.hpp +g1CollectedHeap.cpp gcOverheadReporter.hpp +g1CollectedHeap.cpp generationSpec.hpp +g1CollectedHeap.cpp heapRegionRemSet.hpp +g1CollectedHeap.cpp heapRegionSeq.inline.hpp +g1CollectedHeap.cpp icBuffer.hpp +g1CollectedHeap.cpp isGCActiveMark.hpp +g1CollectedHeap.cpp oop.inline.hpp +g1CollectedHeap.cpp oop.pcgc.inline.hpp +g1CollectedHeap.cpp parGCAllocBuffer.hpp +g1CollectedHeap.cpp vm_operations_g1.hpp +g1CollectedHeap.cpp vmThread.hpp + +g1CollectedHeap.hpp barrierSet.hpp +g1CollectedHeap.hpp heapRegion.hpp +g1CollectedHeap.hpp memRegion.hpp +g1CollectedHeap.hpp sharedHeap.hpp + +g1CollectedHeap.inline.hpp concurrentMark.hpp +g1CollectedHeap.inline.hpp g1CollectedHeap.hpp +g1CollectedHeap.inline.hpp heapRegionSeq.hpp +g1CollectedHeap.inline.hpp taskqueue.hpp + +g1CollectorPolicy.cpp concurrentG1Refine.hpp +g1CollectorPolicy.cpp concurrentMark.hpp +g1CollectorPolicy.cpp concurrentMarkThread.inline.hpp +g1CollectorPolicy.cpp debug.hpp +g1CollectorPolicy.cpp java.hpp +g1CollectorPolicy.cpp g1CollectedHeap.hpp +g1CollectorPolicy.cpp g1CollectorPolicy.hpp +g1CollectorPolicy.cpp heapRegionRemSet.hpp +g1CollectorPolicy.cpp mutexLocker.hpp + +g1CollectorPolicy.hpp collectorPolicy.hpp +g1CollectorPolicy.hpp collectionSetChooser.hpp +g1CollectorPolicy.hpp g1MMUTracker.hpp + +g1_globals.cpp g1_globals.hpp + +g1_globals.hpp globals.hpp + +globals.cpp g1_globals.hpp +top.hpp g1_globals.hpp + +g1MarkSweep.cpp aprofiler.hpp +g1MarkSweep.cpp biasedLocking.hpp +g1MarkSweep.cpp codeCache.hpp +g1MarkSweep.cpp events.hpp +g1MarkSweep.cpp fprofiler.hpp +g1MarkSweep.hpp g1CollectedHeap.hpp +g1MarkSweep.cpp g1MarkSweep.hpp +g1MarkSweep.cpp gcLocker.hpp +g1MarkSweep.cpp genCollectedHeap.hpp +g1MarkSweep.hpp heapRegion.hpp +g1MarkSweep.cpp icBuffer.hpp +g1MarkSweep.cpp instanceRefKlass.hpp +g1MarkSweep.cpp javaClasses.hpp +g1MarkSweep.cpp jvmtiExport.hpp +g1MarkSweep.cpp copy.hpp +g1MarkSweep.cpp modRefBarrierSet.hpp +g1MarkSweep.cpp oop.inline.hpp +g1MarkSweep.cpp referencePolicy.hpp +g1MarkSweep.cpp space.hpp +g1MarkSweep.cpp symbolTable.hpp +g1MarkSweep.cpp synchronizer.hpp +g1MarkSweep.cpp systemDictionary.hpp +g1MarkSweep.cpp thread.hpp +g1MarkSweep.cpp vmSymbols.hpp +g1MarkSweep.cpp vmThread.hpp + +g1MarkSweep.hpp generation.hpp +g1MarkSweep.hpp growableArray.hpp +g1MarkSweep.hpp markOop.hpp +g1MarkSweep.hpp genMarkSweep.hpp +g1MarkSweep.hpp oop.hpp +g1MarkSweep.hpp timer.hpp +g1MarkSweep.hpp universe.hpp + +g1OopClosures.inline.hpp concurrentMark.hpp +g1OopClosures.inline.hpp g1OopClosures.hpp +g1OopClosures.inline.hpp g1CollectedHeap.hpp +g1OopClosures.inline.hpp g1RemSet.hpp + +g1MMUTracker.cpp g1MMUTracker.hpp +g1MMUTracker.cpp ostream.hpp +g1MMUTracker.cpp mutexLocker.hpp + +g1MMUTracker.hpp debug.hpp + +g1RemSet.cpp bufferingOopClosure.hpp +g1RemSet.cpp concurrentG1Refine.hpp +g1RemSet.cpp concurrentG1RefineThread.hpp +g1RemSet.cpp g1BlockOffsetTable.inline.hpp +g1RemSet.cpp g1CollectedHeap.inline.hpp +g1RemSet.cpp g1CollectorPolicy.hpp +g1RemSet.cpp g1RemSet.inline.hpp +g1RemSet.cpp g1OopClosures.inline.hpp +g1RemSet.cpp heapRegionSeq.inline.hpp +g1RemSet.cpp intHisto.hpp +g1RemSet.cpp iterator.hpp +g1RemSet.cpp oop.inline.hpp + +g1RemSet.inline.hpp g1RemSet.hpp +g1RemSet.inline.hpp heapRegionRemSet.hpp + +g1SATBCardTableModRefBS.cpp g1SATBCardTableModRefBS.hpp +g1SATBCardTableModRefBS.cpp heapRegion.hpp +g1SATBCardTableModRefBS.cpp mutexLocker.hpp +g1SATBCardTableModRefBS.cpp thread.hpp +g1SATBCardTableModRefBS.cpp thread_.inline.hpp +g1SATBCardTableModRefBS.cpp satbQueue.hpp + +g1SATBCardTableModRefBS.hpp cardTableModRefBS.hpp +g1SATBCardTableModRefBS.hpp memRegion.hpp + +heapRegion.cpp concurrentZFThread.hpp +heapRegion.cpp g1BlockOffsetTable.inline.hpp +heapRegion.cpp g1CollectedHeap.inline.hpp +heapRegion.cpp g1OopClosures.inline.hpp +heapRegion.cpp genOopClosures.inline.hpp +heapRegion.cpp heapRegion.inline.hpp +heapRegion.cpp heapRegionRemSet.hpp +heapRegion.cpp heapRegionSeq.inline.hpp +heapRegion.cpp iterator.hpp +heapRegion.cpp oop.inline.hpp + +heapRegion.hpp space.hpp +heapRegion.hpp spaceDecorator.hpp +heapRegion.hpp g1BlockOffsetTable.inline.hpp +heapRegion.hpp watermark.hpp +heapRegion.hpp g1_specialized_oop_closures.hpp +heapRegion.hpp survRateGroup.hpp + +heapRegionRemSet.hpp sparsePRT.hpp + +heapRegionRemSet.cpp allocation.hpp +heapRegionRemSet.cpp bitMap.inline.hpp +heapRegionRemSet.cpp g1BlockOffsetTable.inline.hpp +heapRegionRemSet.cpp g1CollectedHeap.inline.hpp +heapRegionRemSet.cpp heapRegionRemSet.hpp +heapRegionRemSet.cpp heapRegionSeq.inline.hpp +heapRegionRemSet.cpp globalDefinitions.hpp +heapRegionRemSet.cpp space.inline.hpp + +heapRegionSeq.cpp allocation.hpp +heapRegionSeq.cpp g1CollectedHeap.hpp +heapRegionSeq.cpp heapRegionSeq.hpp + +heapRegionSeq.hpp growableArray.hpp +heapRegionSeq.hpp heapRegion.hpp + +heapRegionSeq.inline.hpp heapRegionSeq.hpp + +klass.hpp g1OopClosures.hpp + +ptrQueue.cpp allocation.hpp +ptrQueue.cpp allocation.inline.hpp +ptrQueue.cpp mutex.hpp +ptrQueue.cpp mutexLocker.hpp +ptrQueue.cpp ptrQueue.hpp +ptrQueue.cpp ptrQueue.inline.hpp +ptrQueue.cpp thread_.inline.hpp + +ptrQueue.hpp allocation.hpp +ptrQueue.hpp sizes.hpp + +ptrQueue.inline.hpp ptrQueue.hpp + +satbQueue.cpp allocation.inline.hpp +satbQueue.cpp mutexLocker.hpp +satbQueue.cpp ptrQueue.inline.hpp +satbQueue.cpp satbQueue.hpp +satbQueue.cpp sharedHeap.hpp +satbQueue.cpp thread.hpp + +satbQueue.hpp ptrQueue.hpp + +sparsePRT.cpp allocation.inline.hpp +sparsePRT.cpp cardTableModRefBS.hpp +sparsePRT.cpp heapRegion.hpp +sparsePRT.cpp heapRegionRemSet.hpp +sparsePRT.cpp mutexLocker.hpp +sparsePRT.cpp sparsePRT.hpp +sparsePRT.cpp space.inline.hpp + +sparsePRT.hpp allocation.hpp +sparsePRT.hpp cardTableModRefBS.hpp +sparsePRT.hpp globalDefinitions.hpp +sparsePRT.hpp heapRegion.hpp +sparsePRT.hpp mutex.hpp + +specialized_oop_closures.hpp g1_specialized_oop_closures.hpp + +survRateGroup.hpp numberSeq.hpp + +survRateGroup.cpp allocation.hpp +survRateGroup.cpp g1CollectedHeap.hpp +survRateGroup.cpp g1CollectorPolicy.hpp +survRateGroup.cpp heapRegion.hpp +survRateGroup.cpp survRateGroup.hpp + +thread.cpp concurrentMarkThread.inline.hpp + +universe.cpp g1CollectedHeap.hpp +universe.cpp g1CollectorPolicy.hpp + +vm_operations_g1.hpp vmGCOperations.hpp + +vm_operations_g1.cpp vm_operations_g1.hpp +vm_operations_g1.cpp g1CollectedHeap.hpp +vm_operations_g1.cpp isGCActiveMark.hpp diff --git a/hotspot/src/share/vm/gc_implementation/includeDB_gc_parallelScavenge b/hotspot/src/share/vm/gc_implementation/includeDB_gc_parallelScavenge index 574f8dc3d7b..13ba87c8391 100644 --- a/hotspot/src/share/vm/gc_implementation/includeDB_gc_parallelScavenge +++ b/hotspot/src/share/vm/gc_implementation/includeDB_gc_parallelScavenge @@ -150,7 +150,6 @@ parallelScavengeHeap.hpp psPermGen.hpp parallelScavengeHeap.hpp psYoungGen.hpp parallelScavengeHeap.hpp ostream.hpp -parMarkBitMap.cpp bitMap.hpp parMarkBitMap.cpp bitMap.inline.hpp parMarkBitMap.cpp oop.inline.hpp parMarkBitMap.cpp os.hpp @@ -159,7 +158,6 @@ parMarkBitMap.cpp parMarkBitMap.hpp parMarkBitMap.cpp parMarkBitMap.inline.hpp parMarkBitMap.cpp psParallelCompact.hpp -parMarkBitMap.hpp bitMap.hpp parMarkBitMap.hpp bitMap.inline.hpp parMarkBitMap.hpp psVirtualspace.hpp diff --git a/hotspot/src/share/vm/gc_implementation/includeDB_gc_shared b/hotspot/src/share/vm/gc_implementation/includeDB_gc_shared index 75a87ce7369..7ea2265898d 100644 --- a/hotspot/src/share/vm/gc_implementation/includeDB_gc_shared +++ b/hotspot/src/share/vm/gc_implementation/includeDB_gc_shared @@ -24,6 +24,23 @@ // NOTE: DO NOT CHANGE THIS COPYRIGHT TO NEW STYLE - IT WILL BREAK makeDeps! +concurrentGCThread.cpp concurrentGCThread.hpp +concurrentGCThread.cpp init.hpp +concurrentGCThread.cpp instanceRefKlass.hpp +concurrentGCThread.cpp interfaceSupport.hpp +concurrentGCThread.cpp java.hpp +concurrentGCThread.cpp javaCalls.hpp +concurrentGCThread.cpp oop.inline.hpp +concurrentGCThread.cpp systemDictionary.hpp + +concurrentGCThread.hpp thread.hpp + +coTracker.hpp globalDefinitions.hpp +coTracker.hpp numberSeq.hpp + +coTracker.cpp coTracker.hpp +coTracker.cpp os.hpp + allocationStats.cpp allocationStats.hpp allocationStats.cpp ostream.hpp @@ -37,6 +54,13 @@ gcAdaptivePolicyCounters.hpp gcPolicyCounters.hpp gcAdaptivePolicyCounters.cpp resourceArea.hpp gcAdaptivePolicyCounters.cpp gcAdaptivePolicyCounters.hpp +gcOverheadReporter.cpp allocation.inline.hpp +gcOverheadReporter.cpp concurrentGCThread.hpp +gcOverheadReporter.cpp coTracker.hpp +gcOverheadReporter.cpp gcOverheadReporter.hpp +gcOverheadReporter.cpp ostream.hpp +gcOverheadReporter.cpp thread_.inline.hpp + gSpaceCounters.cpp generation.hpp gSpaceCounters.cpp resourceArea.hpp gSpaceCounters.cpp gSpaceCounters.hpp @@ -75,3 +99,5 @@ spaceCounters.hpp immutableSpace.hpp spaceCounters.hpp mutableSpace.hpp spaceCounters.hpp perfData.hpp spaceCounters.hpp generationCounters.hpp + +vmGCOperations.cpp g1CollectedHeap.hpp diff --git a/hotspot/src/share/vm/gc_implementation/parallelScavenge/parMarkBitMap.hpp b/hotspot/src/share/vm/gc_implementation/parallelScavenge/parMarkBitMap.hpp index 3363a01155d..ca2ea6f9d9d 100644 --- a/hotspot/src/share/vm/gc_implementation/parallelScavenge/parMarkBitMap.hpp +++ b/hotspot/src/share/vm/gc_implementation/parallelScavenge/parMarkBitMap.hpp @@ -192,16 +192,16 @@ private: }; inline ParMarkBitMap::ParMarkBitMap(): - _beg_bits(NULL, 0), - _end_bits(NULL, 0) + _beg_bits(), + _end_bits() { _region_start = 0; _virtual_space = 0; } inline ParMarkBitMap::ParMarkBitMap(MemRegion covered_region): - _beg_bits(NULL, 0), - _end_bits(NULL, 0) + _beg_bits(), + _end_bits() { initialize(covered_region); } @@ -325,7 +325,7 @@ ParMarkBitMap::obj_size(HeapWord* beg_addr, HeapWord* end_addr) const inline size_t ParMarkBitMap::obj_size(idx_t beg_bit) const { - const idx_t end_bit = _end_bits.find_next_one_bit(beg_bit, size()); + const idx_t end_bit = _end_bits.get_next_one_offset_inline(beg_bit, size()); assert(is_marked(beg_bit), "obj not marked"); assert(end_bit < size(), "end bit missing"); return obj_size(beg_bit, end_bit); @@ -384,13 +384,13 @@ ParMarkBitMap::bit_to_addr(idx_t bit) const inline ParMarkBitMap::idx_t ParMarkBitMap::find_obj_beg(idx_t beg, idx_t end) const { - return _beg_bits.find_next_one_bit(beg, end); + return _beg_bits.get_next_one_offset_inline_aligned_right(beg, end); } inline ParMarkBitMap::idx_t ParMarkBitMap::find_obj_end(idx_t beg, idx_t end) const { - return _end_bits.find_next_one_bit(beg, end); + return _end_bits.get_next_one_offset_inline_aligned_right(beg, end); } inline HeapWord* diff --git a/hotspot/src/share/vm/gc_implementation/parallelScavenge/parallelScavengeHeap.cpp b/hotspot/src/share/vm/gc_implementation/parallelScavenge/parallelScavengeHeap.cpp index 5936c776a47..2d5334b780c 100644 --- a/hotspot/src/share/vm/gc_implementation/parallelScavenge/parallelScavengeHeap.cpp +++ b/hotspot/src/share/vm/gc_implementation/parallelScavenge/parallelScavengeHeap.cpp @@ -210,10 +210,6 @@ void ParallelScavengeHeap::post_initialize() { PSScavenge::initialize(); if (UseParallelOldGC) { PSParallelCompact::post_initialize(); - if (VerifyParallelOldWithMarkSweep) { - // Will be used for verification of par old. - PSMarkSweep::initialize(); - } } else { PSMarkSweep::initialize(); } @@ -402,7 +398,7 @@ HeapWord* ParallelScavengeHeap::mem_allocate( return result; } if (!is_tlab && - size >= (young_gen()->eden_space()->capacity_in_words() / 2)) { + size >= (young_gen()->eden_space()->capacity_in_words(Thread::current()) / 2)) { result = old_gen()->allocate(size, is_tlab); if (result != NULL) { return result; diff --git a/hotspot/src/share/vm/gc_implementation/parallelScavenge/parallelScavengeHeap.hpp b/hotspot/src/share/vm/gc_implementation/parallelScavenge/parallelScavengeHeap.hpp index 0fb2cb217fc..087cdf93c01 100644 --- a/hotspot/src/share/vm/gc_implementation/parallelScavenge/parallelScavengeHeap.hpp +++ b/hotspot/src/share/vm/gc_implementation/parallelScavenge/parallelScavengeHeap.hpp @@ -184,6 +184,20 @@ class ParallelScavengeHeap : public CollectedHeap { size_t tlab_capacity(Thread* thr) const; size_t unsafe_max_tlab_alloc(Thread* thr) const; + // Can a compiler initialize a new object without store barriers? + // This permission only extends from the creation of a new object + // via a TLAB up to the first subsequent safepoint. + virtual bool can_elide_tlab_store_barriers() const { + return true; + } + + // Can a compiler elide a store barrier when it writes + // a permanent oop into the heap? Applies when the compiler + // is storing x to the heap, where x->is_perm() is true. + virtual bool can_elide_permanent_oop_store_barriers() const { + return true; + } + void oop_iterate(OopClosure* cl); void object_iterate(ObjectClosure* cl); void permanent_oop_iterate(OopClosure* cl); diff --git a/hotspot/src/share/vm/gc_implementation/parallelScavenge/pcTasks.cpp b/hotspot/src/share/vm/gc_implementation/parallelScavenge/pcTasks.cpp index 2237542f523..7a3ebaf9fd3 100644 --- a/hotspot/src/share/vm/gc_implementation/parallelScavenge/pcTasks.cpp +++ b/hotspot/src/share/vm/gc_implementation/parallelScavenge/pcTasks.cpp @@ -146,7 +146,7 @@ void RefProcTaskExecutor::execute(ProcessTask& task) { ParallelScavengeHeap* heap = PSParallelCompact::gc_heap(); uint parallel_gc_threads = heap->gc_task_manager()->workers(); - ChunkTaskQueueSet* qset = ParCompactionManager::chunk_array(); + RegionTaskQueueSet* qset = ParCompactionManager::region_array(); ParallelTaskTerminator terminator(parallel_gc_threads, qset); GCTaskQueue* q = GCTaskQueue::create(); for(uint i=0; iis_gc_active(), "called outside gc"); - NOT_PRODUCT(TraceTime tm("StealChunkCompactionTask", + NOT_PRODUCT(TraceTime tm("StealRegionCompactionTask", PrintGCDetails && TraceParallelOldGCTasks, true, gclog_or_tty)); ParCompactionManager* cm = ParCompactionManager::gc_thread_compaction_manager(which); - // Has to drain stacks first because there may be chunks on + // Has to drain stacks first because there may be regions on // preloaded onto the stack and this thread may never have // done a draining task. Are the draining tasks needed? - cm->drain_chunk_stacks(); + cm->drain_region_stacks(); - size_t chunk_index = 0; + size_t region_index = 0; int random_seed = 17; // If we're the termination task, try 10 rounds of stealing before // setting the termination flag while(true) { - if (ParCompactionManager::steal(which, &random_seed, chunk_index)) { - PSParallelCompact::fill_and_update_chunk(cm, chunk_index); - cm->drain_chunk_stacks(); + if (ParCompactionManager::steal(which, &random_seed, region_index)) { + PSParallelCompact::fill_and_update_region(cm, region_index); + cm->drain_region_stacks(); } else { if (terminator()->offer_termination()) { break; @@ -249,11 +249,10 @@ void StealChunkCompactionTask::do_it(GCTaskManager* manager, uint which) { UpdateDensePrefixTask::UpdateDensePrefixTask( PSParallelCompact::SpaceId space_id, - size_t chunk_index_start, - size_t chunk_index_end) : - _space_id(space_id), _chunk_index_start(chunk_index_start), - _chunk_index_end(chunk_index_end) -{} + size_t region_index_start, + size_t region_index_end) : + _space_id(space_id), _region_index_start(region_index_start), + _region_index_end(region_index_end) {} void UpdateDensePrefixTask::do_it(GCTaskManager* manager, uint which) { @@ -265,8 +264,8 @@ void UpdateDensePrefixTask::do_it(GCTaskManager* manager, uint which) { PSParallelCompact::update_and_deadwood_in_dense_prefix(cm, _space_id, - _chunk_index_start, - _chunk_index_end); + _region_index_start, + _region_index_end); } void DrainStacksCompactionTask::do_it(GCTaskManager* manager, uint which) { @@ -278,6 +277,6 @@ void DrainStacksCompactionTask::do_it(GCTaskManager* manager, uint which) { ParCompactionManager* cm = ParCompactionManager::gc_thread_compaction_manager(which); - // Process any chunks already in the compaction managers stacks. - cm->drain_chunk_stacks(); + // Process any regions already in the compaction managers stacks. + cm->drain_region_stacks(); } diff --git a/hotspot/src/share/vm/gc_implementation/parallelScavenge/pcTasks.hpp b/hotspot/src/share/vm/gc_implementation/parallelScavenge/pcTasks.hpp index 69b23ac5b3c..f41b77c3d04 100644 --- a/hotspot/src/share/vm/gc_implementation/parallelScavenge/pcTasks.hpp +++ b/hotspot/src/share/vm/gc_implementation/parallelScavenge/pcTasks.hpp @@ -188,18 +188,18 @@ class StealMarkingTask : public GCTask { }; // -// StealChunkCompactionTask +// StealRegionCompactionTask // // This task is used to distribute work to idle threads. // -class StealChunkCompactionTask : public GCTask { +class StealRegionCompactionTask : public GCTask { private: ParallelTaskTerminator* const _terminator; public: - StealChunkCompactionTask(ParallelTaskTerminator* t); + StealRegionCompactionTask(ParallelTaskTerminator* t); - char* name() { return (char *)"steal-chunk-task"; } + char* name() { return (char *)"steal-region-task"; } ParallelTaskTerminator* terminator() { return _terminator; } virtual void do_it(GCTaskManager* manager, uint which); @@ -215,15 +215,15 @@ class StealChunkCompactionTask : public GCTask { class UpdateDensePrefixTask : public GCTask { private: PSParallelCompact::SpaceId _space_id; - size_t _chunk_index_start; - size_t _chunk_index_end; + size_t _region_index_start; + size_t _region_index_end; public: char* name() { return (char *)"update-dense_prefix-task"; } UpdateDensePrefixTask(PSParallelCompact::SpaceId space_id, - size_t chunk_index_start, - size_t chunk_index_end); + size_t region_index_start, + size_t region_index_end); virtual void do_it(GCTaskManager* manager, uint which); }; @@ -231,17 +231,17 @@ class UpdateDensePrefixTask : public GCTask { // // DrainStacksCompactionTask // -// This task processes chunks that have been added to the stacks of each +// This task processes regions that have been added to the stacks of each // compaction manager. // // Trying to use one draining thread does not work because there are no // guarantees about which task will be picked up by which thread. For example, -// if thread A gets all the preloaded chunks, thread A may not get a draining +// if thread A gets all the preloaded regions, thread A may not get a draining // task (they may all be done by other threads). // class DrainStacksCompactionTask : public GCTask { public: - char* name() { return (char *)"drain-chunk-task"; } + char* name() { return (char *)"drain-region-task"; } virtual void do_it(GCTaskManager* manager, uint which); }; diff --git a/hotspot/src/share/vm/gc_implementation/parallelScavenge/psCompactionManager.cpp b/hotspot/src/share/vm/gc_implementation/parallelScavenge/psCompactionManager.cpp index ba3684a1977..651cd420f26 100644 --- a/hotspot/src/share/vm/gc_implementation/parallelScavenge/psCompactionManager.cpp +++ b/hotspot/src/share/vm/gc_implementation/parallelScavenge/psCompactionManager.cpp @@ -30,7 +30,7 @@ ParCompactionManager** ParCompactionManager::_manager_array = NULL; OopTaskQueueSet* ParCompactionManager::_stack_array = NULL; ObjectStartArray* ParCompactionManager::_start_array = NULL; ParMarkBitMap* ParCompactionManager::_mark_bitmap = NULL; -ChunkTaskQueueSet* ParCompactionManager::_chunk_array = NULL; +RegionTaskQueueSet* ParCompactionManager::_region_array = NULL; ParCompactionManager::ParCompactionManager() : _action(CopyAndUpdate) { @@ -46,13 +46,13 @@ ParCompactionManager::ParCompactionManager() : // We want the overflow stack to be permanent _overflow_stack = new (ResourceObj::C_HEAP) GrowableArray(10, true); -#ifdef USE_ChunkTaskQueueWithOverflow - chunk_stack()->initialize(); +#ifdef USE_RegionTaskQueueWithOverflow + region_stack()->initialize(); #else - chunk_stack()->initialize(); + region_stack()->initialize(); // We want the overflow stack to be permanent - _chunk_overflow_stack = + _region_overflow_stack = new (ResourceObj::C_HEAP) GrowableArray(10, true); #endif @@ -86,18 +86,18 @@ void ParCompactionManager::initialize(ParMarkBitMap* mbm) { _stack_array = new OopTaskQueueSet(parallel_gc_threads); guarantee(_stack_array != NULL, "Count not initialize promotion manager"); - _chunk_array = new ChunkTaskQueueSet(parallel_gc_threads); - guarantee(_chunk_array != NULL, "Count not initialize promotion manager"); + _region_array = new RegionTaskQueueSet(parallel_gc_threads); + guarantee(_region_array != NULL, "Count not initialize promotion manager"); // Create and register the ParCompactionManager(s) for the worker threads. for(uint i=0; iregister_queue(i, _manager_array[i]->marking_stack()); -#ifdef USE_ChunkTaskQueueWithOverflow - chunk_array()->register_queue(i, _manager_array[i]->chunk_stack()->task_queue()); +#ifdef USE_RegionTaskQueueWithOverflow + region_array()->register_queue(i, _manager_array[i]->region_stack()->task_queue()); #else - chunk_array()->register_queue(i, _manager_array[i]->chunk_stack()); + region_array()->register_queue(i, _manager_array[i]->region_stack()); #endif } @@ -153,31 +153,31 @@ oop ParCompactionManager::retrieve_for_scanning() { return NULL; } -// Save chunk on a stack -void ParCompactionManager::save_for_processing(size_t chunk_index) { +// Save region on a stack +void ParCompactionManager::save_for_processing(size_t region_index) { #ifdef ASSERT const ParallelCompactData& sd = PSParallelCompact::summary_data(); - ParallelCompactData::ChunkData* const chunk_ptr = sd.chunk(chunk_index); - assert(chunk_ptr->claimed(), "must be claimed"); - assert(chunk_ptr->_pushed++ == 0, "should only be pushed once"); + ParallelCompactData::RegionData* const region_ptr = sd.region(region_index); + assert(region_ptr->claimed(), "must be claimed"); + assert(region_ptr->_pushed++ == 0, "should only be pushed once"); #endif - chunk_stack_push(chunk_index); + region_stack_push(region_index); } -void ParCompactionManager::chunk_stack_push(size_t chunk_index) { +void ParCompactionManager::region_stack_push(size_t region_index) { -#ifdef USE_ChunkTaskQueueWithOverflow - chunk_stack()->save(chunk_index); +#ifdef USE_RegionTaskQueueWithOverflow + region_stack()->save(region_index); #else - if(!chunk_stack()->push(chunk_index)) { - chunk_overflow_stack()->push(chunk_index); + if(!region_stack()->push(region_index)) { + region_overflow_stack()->push(region_index); } #endif } -bool ParCompactionManager::retrieve_for_processing(size_t& chunk_index) { -#ifdef USE_ChunkTaskQueueWithOverflow - return chunk_stack()->retrieve(chunk_index); +bool ParCompactionManager::retrieve_for_processing(size_t& region_index) { +#ifdef USE_RegionTaskQueueWithOverflow + return region_stack()->retrieve(region_index); #else // Should not be used in the parallel case ShouldNotReachHere(); @@ -230,14 +230,14 @@ void ParCompactionManager::drain_marking_stacks(OopClosure* blk) { assert(overflow_stack()->length() == 0, "Sanity"); } -void ParCompactionManager::drain_chunk_overflow_stack() { - size_t chunk_index = (size_t) -1; - while(chunk_stack()->retrieve_from_overflow(chunk_index)) { - PSParallelCompact::fill_and_update_chunk(this, chunk_index); +void ParCompactionManager::drain_region_overflow_stack() { + size_t region_index = (size_t) -1; + while(region_stack()->retrieve_from_overflow(region_index)) { + PSParallelCompact::fill_and_update_region(this, region_index); } } -void ParCompactionManager::drain_chunk_stacks() { +void ParCompactionManager::drain_region_stacks() { #ifdef ASSERT ParallelScavengeHeap* heap = (ParallelScavengeHeap*)Universe::heap(); assert(heap->kind() == CollectedHeap::ParallelScavengeHeap, "Sanity"); @@ -249,42 +249,42 @@ void ParCompactionManager::drain_chunk_stacks() { #if 1 // def DO_PARALLEL - the serial code hasn't been updated do { -#ifdef USE_ChunkTaskQueueWithOverflow +#ifdef USE_RegionTaskQueueWithOverflow // Drain overflow stack first, so other threads can steal from // claimed stack while we work. - size_t chunk_index = (size_t) -1; - while(chunk_stack()->retrieve_from_overflow(chunk_index)) { - PSParallelCompact::fill_and_update_chunk(this, chunk_index); + size_t region_index = (size_t) -1; + while(region_stack()->retrieve_from_overflow(region_index)) { + PSParallelCompact::fill_and_update_region(this, region_index); } - while (chunk_stack()->retrieve_from_stealable_queue(chunk_index)) { - PSParallelCompact::fill_and_update_chunk(this, chunk_index); + while (region_stack()->retrieve_from_stealable_queue(region_index)) { + PSParallelCompact::fill_and_update_region(this, region_index); } - } while (!chunk_stack()->is_empty()); + } while (!region_stack()->is_empty()); #else // Drain overflow stack first, so other threads can steal from // claimed stack while we work. - while(!chunk_overflow_stack()->is_empty()) { - size_t chunk_index = chunk_overflow_stack()->pop(); - PSParallelCompact::fill_and_update_chunk(this, chunk_index); + while(!region_overflow_stack()->is_empty()) { + size_t region_index = region_overflow_stack()->pop(); + PSParallelCompact::fill_and_update_region(this, region_index); } - size_t chunk_index = -1; + size_t region_index = -1; // obj is a reference!!! - while (chunk_stack()->pop_local(chunk_index)) { + while (region_stack()->pop_local(region_index)) { // It would be nice to assert about the type of objects we might // pop, but they can come from anywhere, unfortunately. - PSParallelCompact::fill_and_update_chunk(this, chunk_index); + PSParallelCompact::fill_and_update_region(this, region_index); } - } while((chunk_stack()->size() != 0) || - (chunk_overflow_stack()->length() != 0)); + } while((region_stack()->size() != 0) || + (region_overflow_stack()->length() != 0)); #endif -#ifdef USE_ChunkTaskQueueWithOverflow - assert(chunk_stack()->is_empty(), "Sanity"); +#ifdef USE_RegionTaskQueueWithOverflow + assert(region_stack()->is_empty(), "Sanity"); #else - assert(chunk_stack()->size() == 0, "Sanity"); - assert(chunk_overflow_stack()->length() == 0, "Sanity"); + assert(region_stack()->size() == 0, "Sanity"); + assert(region_overflow_stack()->length() == 0, "Sanity"); #endif #else oop obj; diff --git a/hotspot/src/share/vm/gc_implementation/parallelScavenge/psCompactionManager.hpp b/hotspot/src/share/vm/gc_implementation/parallelScavenge/psCompactionManager.hpp index d09266c5434..68268d39d34 100644 --- a/hotspot/src/share/vm/gc_implementation/parallelScavenge/psCompactionManager.hpp +++ b/hotspot/src/share/vm/gc_implementation/parallelScavenge/psCompactionManager.hpp @@ -52,7 +52,7 @@ class ParCompactionManager : public CHeapObj { friend class ParallelTaskTerminator; friend class ParMarkBitMap; friend class PSParallelCompact; - friend class StealChunkCompactionTask; + friend class StealRegionCompactionTask; friend class UpdateAndFillClosure; friend class RefProcTaskExecutor; @@ -72,27 +72,27 @@ class ParCompactionManager : public CHeapObj { // ------------------------ End don't putback if not needed private: - static ParCompactionManager** _manager_array; - static OopTaskQueueSet* _stack_array; - static ObjectStartArray* _start_array; - static ChunkTaskQueueSet* _chunk_array; - static PSOldGen* _old_gen; + static ParCompactionManager** _manager_array; + static OopTaskQueueSet* _stack_array; + static ObjectStartArray* _start_array; + static RegionTaskQueueSet* _region_array; + static PSOldGen* _old_gen; - OopTaskQueue _marking_stack; - GrowableArray* _overflow_stack; + OopTaskQueue _marking_stack; + GrowableArray* _overflow_stack; // Is there a way to reuse the _marking_stack for the - // saving empty chunks? For now just create a different + // saving empty regions? For now just create a different // type of TaskQueue. -#ifdef USE_ChunkTaskQueueWithOverflow - ChunkTaskQueueWithOverflow _chunk_stack; +#ifdef USE_RegionTaskQueueWithOverflow + RegionTaskQueueWithOverflow _region_stack; #else - ChunkTaskQueue _chunk_stack; - GrowableArray* _chunk_overflow_stack; + RegionTaskQueue _region_stack; + GrowableArray* _region_overflow_stack; #endif #if 1 // does this happen enough to need a per thread stack? - GrowableArray* _revisit_klass_stack; + GrowableArray* _revisit_klass_stack; #endif static ParMarkBitMap* _mark_bitmap; @@ -100,21 +100,22 @@ class ParCompactionManager : public CHeapObj { static PSOldGen* old_gen() { return _old_gen; } static ObjectStartArray* start_array() { return _start_array; } - static OopTaskQueueSet* stack_array() { return _stack_array; } + static OopTaskQueueSet* stack_array() { return _stack_array; } static void initialize(ParMarkBitMap* mbm); protected: // Array of tasks. Needed by the ParallelTaskTerminator. - static ChunkTaskQueueSet* chunk_array() { return _chunk_array; } - - OopTaskQueue* marking_stack() { return &_marking_stack; } - GrowableArray* overflow_stack() { return _overflow_stack; } -#ifdef USE_ChunkTaskQueueWithOverflow - ChunkTaskQueueWithOverflow* chunk_stack() { return &_chunk_stack; } + static RegionTaskQueueSet* region_array() { return _region_array; } + OopTaskQueue* marking_stack() { return &_marking_stack; } + GrowableArray* overflow_stack() { return _overflow_stack; } +#ifdef USE_RegionTaskQueueWithOverflow + RegionTaskQueueWithOverflow* region_stack() { return &_region_stack; } #else - ChunkTaskQueue* chunk_stack() { return &_chunk_stack; } - GrowableArray* chunk_overflow_stack() { return _chunk_overflow_stack; } + RegionTaskQueue* region_stack() { return &_region_stack; } + GrowableArray* region_overflow_stack() { + return _region_overflow_stack; + } #endif // Pushes onto the marking stack. If the marking stack is full, @@ -123,9 +124,9 @@ class ParCompactionManager : public CHeapObj { // Do not implement an equivalent stack_pop. Deal with the // marking stack and overflow stack directly. - // Pushes onto the chunk stack. If the chunk stack is full, - // pushes onto the chunk overflow stack. - void chunk_stack_push(size_t chunk_index); + // Pushes onto the region stack. If the region stack is full, + // pushes onto the region overflow stack. + void region_stack_push(size_t region_index); public: Action action() { return _action; } @@ -160,10 +161,10 @@ class ParCompactionManager : public CHeapObj { // Get a oop for scanning. If returns null, no oop were found. oop retrieve_for_scanning(); - // Save chunk for later processing. Must not fail. - void save_for_processing(size_t chunk_index); - // Get a chunk for processing. If returns null, no chunk were found. - bool retrieve_for_processing(size_t& chunk_index); + // Save region for later processing. Must not fail. + void save_for_processing(size_t region_index); + // Get a region for processing. If returns null, no region were found. + bool retrieve_for_processing(size_t& region_index); // Access function for compaction managers static ParCompactionManager* gc_thread_compaction_manager(int index); @@ -172,18 +173,18 @@ class ParCompactionManager : public CHeapObj { return stack_array()->steal(queue_num, seed, t); } - static bool steal(int queue_num, int* seed, ChunkTask& t) { - return chunk_array()->steal(queue_num, seed, t); + static bool steal(int queue_num, int* seed, RegionTask& t) { + return region_array()->steal(queue_num, seed, t); } // Process tasks remaining on any stack void drain_marking_stacks(OopClosure *blk); // Process tasks remaining on any stack - void drain_chunk_stacks(); + void drain_region_stacks(); // Process tasks remaining on any stack - void drain_chunk_overflow_stack(); + void drain_region_overflow_stack(); // Debugging support #ifdef ASSERT diff --git a/hotspot/src/share/vm/gc_implementation/parallelScavenge/psMarkSweep.cpp b/hotspot/src/share/vm/gc_implementation/parallelScavenge/psMarkSweep.cpp index 40a1d9a6631..97d9e31b7e3 100644 --- a/hotspot/src/share/vm/gc_implementation/parallelScavenge/psMarkSweep.cpp +++ b/hotspot/src/share/vm/gc_implementation/parallelScavenge/psMarkSweep.cpp @@ -35,9 +35,7 @@ void PSMarkSweep::initialize() { _ref_processor = new ReferenceProcessor(mr, true, // atomic_discovery false); // mt_discovery - if (!UseParallelOldGC || !VerifyParallelOldWithMarkSweep) { - _counters = new CollectorCounters("PSMarkSweep", 1); - } + _counters = new CollectorCounters("PSMarkSweep", 1); } // This method contains all heap specific policy for invoking mark sweep. @@ -518,9 +516,6 @@ void PSMarkSweep::mark_sweep_phase1(bool clear_all_softrefs) { follow_stack(); // Process reference objects found during marking - - // Skipping the reference processing for VerifyParallelOldWithMarkSweep - // affects the marking (makes it different). { ReferencePolicy *soft_ref_policy; if (clear_all_softrefs) { diff --git a/hotspot/src/share/vm/gc_implementation/parallelScavenge/psMarkSweepDecorator.cpp b/hotspot/src/share/vm/gc_implementation/parallelScavenge/psMarkSweepDecorator.cpp index 489cdea7272..40fb6a1b1b6 100644 --- a/hotspot/src/share/vm/gc_implementation/parallelScavenge/psMarkSweepDecorator.cpp +++ b/hotspot/src/share/vm/gc_implementation/parallelScavenge/psMarkSweepDecorator.cpp @@ -152,20 +152,15 @@ void PSMarkSweepDecorator::precompact() { oop(q)->forward_to(oop(compact_top)); assert(oop(q)->is_gc_marked(), "encoding the pointer should preserve the mark"); } else { - // Don't clear the mark since it's confuses parallel old - // verification. - if (!UseParallelOldGC || !VerifyParallelOldWithMarkSweep) { - // if the object isn't moving we can just set the mark to the default - // mark and handle it specially later on. - oop(q)->init_mark(); - } + // if the object isn't moving we can just set the mark to the default + // mark and handle it specially later on. + oop(q)->init_mark(); assert(oop(q)->forwardee() == NULL, "should be forwarded to NULL"); } // Update object start array - if (!UseParallelOldGC || !VerifyParallelOldWithMarkSweep) { - if (start_array) - start_array->allocate_block(compact_top); + if (start_array) { + start_array->allocate_block(compact_top); } VALIDATE_MARK_SWEEP_ONLY(MarkSweep::register_live_oop(oop(q), size)); @@ -219,19 +214,14 @@ void PSMarkSweepDecorator::precompact() { assert(oop(q)->is_gc_marked(), "encoding the pointer should preserve the mark"); } else { // if the object isn't moving we can just set the mark to the default - // Don't clear the mark since it's confuses parallel old - // verification. - if (!UseParallelOldGC || !VerifyParallelOldWithMarkSweep) { - // mark and handle it specially later on. - oop(q)->init_mark(); - } + // mark and handle it specially later on. + oop(q)->init_mark(); assert(oop(q)->forwardee() == NULL, "should be forwarded to NULL"); } - if (!UseParallelOldGC || !VerifyParallelOldWithMarkSweep) { - // Update object start array - if (start_array) - start_array->allocate_block(compact_top); + // Update object start array + if (start_array) { + start_array->allocate_block(compact_top); } VALIDATE_MARK_SWEEP_ONLY(MarkSweep::register_live_oop(oop(q), sz)); diff --git a/hotspot/src/share/vm/gc_implementation/parallelScavenge/psOldGen.cpp b/hotspot/src/share/vm/gc_implementation/parallelScavenge/psOldGen.cpp index b5cf4d31c09..2335a20e183 100644 --- a/hotspot/src/share/vm/gc_implementation/parallelScavenge/psOldGen.cpp +++ b/hotspot/src/share/vm/gc_implementation/parallelScavenge/psOldGen.cpp @@ -152,9 +152,7 @@ void PSOldGen::precompact() { assert(heap->kind() == CollectedHeap::ParallelScavengeHeap, "Sanity"); // Reset start array first. - debug_only(if (!UseParallelOldGC || !VerifyParallelOldWithMarkSweep) {) start_array()->reset(); - debug_only(}) object_mark_sweep()->precompact(); diff --git a/hotspot/src/share/vm/gc_implementation/parallelScavenge/psParallelCompact.cpp b/hotspot/src/share/vm/gc_implementation/parallelScavenge/psParallelCompact.cpp index 1abdd88ac65..a84a955f0d3 100644 --- a/hotspot/src/share/vm/gc_implementation/parallelScavenge/psParallelCompact.cpp +++ b/hotspot/src/share/vm/gc_implementation/parallelScavenge/psParallelCompact.cpp @@ -28,43 +28,31 @@ #include // All sizes are in HeapWords. -const size_t ParallelCompactData::Log2ChunkSize = 9; // 512 words -const size_t ParallelCompactData::ChunkSize = (size_t)1 << Log2ChunkSize; -const size_t ParallelCompactData::ChunkSizeBytes = ChunkSize << LogHeapWordSize; -const size_t ParallelCompactData::ChunkSizeOffsetMask = ChunkSize - 1; -const size_t ParallelCompactData::ChunkAddrOffsetMask = ChunkSizeBytes - 1; -const size_t ParallelCompactData::ChunkAddrMask = ~ChunkAddrOffsetMask; +const size_t ParallelCompactData::Log2RegionSize = 9; // 512 words +const size_t ParallelCompactData::RegionSize = (size_t)1 << Log2RegionSize; +const size_t ParallelCompactData::RegionSizeBytes = + RegionSize << LogHeapWordSize; +const size_t ParallelCompactData::RegionSizeOffsetMask = RegionSize - 1; +const size_t ParallelCompactData::RegionAddrOffsetMask = RegionSizeBytes - 1; +const size_t ParallelCompactData::RegionAddrMask = ~RegionAddrOffsetMask; -// 32-bit: 128 words covers 4 bitmap words -// 64-bit: 128 words covers 2 bitmap words -const size_t ParallelCompactData::Log2BlockSize = 7; // 128 words -const size_t ParallelCompactData::BlockSize = (size_t)1 << Log2BlockSize; -const size_t ParallelCompactData::BlockOffsetMask = BlockSize - 1; -const size_t ParallelCompactData::BlockMask = ~BlockOffsetMask; +const ParallelCompactData::RegionData::region_sz_t +ParallelCompactData::RegionData::dc_shift = 27; -const size_t ParallelCompactData::BlocksPerChunk = ChunkSize / BlockSize; +const ParallelCompactData::RegionData::region_sz_t +ParallelCompactData::RegionData::dc_mask = ~0U << dc_shift; -const ParallelCompactData::ChunkData::chunk_sz_t -ParallelCompactData::ChunkData::dc_shift = 27; +const ParallelCompactData::RegionData::region_sz_t +ParallelCompactData::RegionData::dc_one = 0x1U << dc_shift; -const ParallelCompactData::ChunkData::chunk_sz_t -ParallelCompactData::ChunkData::dc_mask = ~0U << dc_shift; +const ParallelCompactData::RegionData::region_sz_t +ParallelCompactData::RegionData::los_mask = ~dc_mask; -const ParallelCompactData::ChunkData::chunk_sz_t -ParallelCompactData::ChunkData::dc_one = 0x1U << dc_shift; +const ParallelCompactData::RegionData::region_sz_t +ParallelCompactData::RegionData::dc_claimed = 0x8U << dc_shift; -const ParallelCompactData::ChunkData::chunk_sz_t -ParallelCompactData::ChunkData::los_mask = ~dc_mask; - -const ParallelCompactData::ChunkData::chunk_sz_t -ParallelCompactData::ChunkData::dc_claimed = 0x8U << dc_shift; - -const ParallelCompactData::ChunkData::chunk_sz_t -ParallelCompactData::ChunkData::dc_completed = 0xcU << dc_shift; - -#ifdef ASSERT -short ParallelCompactData::BlockData::_cur_phase = 0; -#endif +const ParallelCompactData::RegionData::region_sz_t +ParallelCompactData::RegionData::dc_completed = 0xcU << dc_shift; SpaceInfo PSParallelCompact::_space_info[PSParallelCompact::last_space_id]; bool PSParallelCompact::_print_phases = false; @@ -100,99 +88,12 @@ GrowableArray* PSParallelCompact::_last_gc_live_oops_moved_to = NULL; GrowableArray * PSParallelCompact::_last_gc_live_oops_size = NULL; #endif -// XXX beg - verification code; only works while we also mark in object headers -static void -verify_mark_bitmap(ParMarkBitMap& _mark_bitmap) -{ - ParallelScavengeHeap* heap = PSParallelCompact::gc_heap(); - - PSPermGen* perm_gen = heap->perm_gen(); - PSOldGen* old_gen = heap->old_gen(); - PSYoungGen* young_gen = heap->young_gen(); - - MutableSpace* perm_space = perm_gen->object_space(); - MutableSpace* old_space = old_gen->object_space(); - MutableSpace* eden_space = young_gen->eden_space(); - MutableSpace* from_space = young_gen->from_space(); - MutableSpace* to_space = young_gen->to_space(); - - // 'from_space' here is the survivor space at the lower address. - if (to_space->bottom() < from_space->bottom()) { - from_space = to_space; - to_space = young_gen->from_space(); - } - - HeapWord* boundaries[12]; - unsigned int bidx = 0; - const unsigned int bidx_max = sizeof(boundaries) / sizeof(boundaries[0]); - - boundaries[0] = perm_space->bottom(); - boundaries[1] = perm_space->top(); - boundaries[2] = old_space->bottom(); - boundaries[3] = old_space->top(); - boundaries[4] = eden_space->bottom(); - boundaries[5] = eden_space->top(); - boundaries[6] = from_space->bottom(); - boundaries[7] = from_space->top(); - boundaries[8] = to_space->bottom(); - boundaries[9] = to_space->top(); - boundaries[10] = to_space->end(); - boundaries[11] = to_space->end(); - - BitMap::idx_t beg_bit = 0; - BitMap::idx_t end_bit; - BitMap::idx_t tmp_bit; - const BitMap::idx_t last_bit = _mark_bitmap.size(); - do { - HeapWord* addr = _mark_bitmap.bit_to_addr(beg_bit); - if (_mark_bitmap.is_marked(beg_bit)) { - oop obj = (oop)addr; - assert(obj->is_gc_marked(), "obj header is not marked"); - end_bit = _mark_bitmap.find_obj_end(beg_bit, last_bit); - const size_t size = _mark_bitmap.obj_size(beg_bit, end_bit); - assert(size == (size_t)obj->size(), "end bit wrong?"); - beg_bit = _mark_bitmap.find_obj_beg(beg_bit + 1, last_bit); - assert(beg_bit > end_bit, "bit set in middle of an obj"); - } else { - if (addr >= boundaries[bidx] && addr < boundaries[bidx + 1]) { - // a dead object in the current space. - oop obj = (oop)addr; - end_bit = _mark_bitmap.addr_to_bit(addr + obj->size()); - assert(!obj->is_gc_marked(), "obj marked in header, not in bitmap"); - tmp_bit = beg_bit + 1; - beg_bit = _mark_bitmap.find_obj_beg(tmp_bit, end_bit); - assert(beg_bit == end_bit, "beg bit set in unmarked obj"); - beg_bit = _mark_bitmap.find_obj_end(tmp_bit, end_bit); - assert(beg_bit == end_bit, "end bit set in unmarked obj"); - } else if (addr < boundaries[bidx + 2]) { - // addr is between top in the current space and bottom in the next. - end_bit = beg_bit + pointer_delta(boundaries[bidx + 2], addr); - tmp_bit = beg_bit; - beg_bit = _mark_bitmap.find_obj_beg(tmp_bit, end_bit); - assert(beg_bit == end_bit, "beg bit set above top"); - beg_bit = _mark_bitmap.find_obj_end(tmp_bit, end_bit); - assert(beg_bit == end_bit, "end bit set above top"); - bidx += 2; - } else if (bidx < bidx_max - 2) { - bidx += 2; // ??? - } else { - tmp_bit = beg_bit; - beg_bit = _mark_bitmap.find_obj_beg(tmp_bit, last_bit); - assert(beg_bit == last_bit, "beg bit set outside heap"); - beg_bit = _mark_bitmap.find_obj_end(tmp_bit, last_bit); - assert(beg_bit == last_bit, "end bit set outside heap"); - } - } - } while (beg_bit < last_bit); -} -// XXX end - verification code; only works while we also mark in object headers - #ifndef PRODUCT const char* PSParallelCompact::space_names[] = { "perm", "old ", "eden", "from", "to " }; -void PSParallelCompact::print_chunk_ranges() +void PSParallelCompact::print_region_ranges() { tty->print_cr("space bottom top end new_top"); tty->print_cr("------ ---------- ---------- ---------- ----------"); @@ -203,31 +104,31 @@ void PSParallelCompact::print_chunk_ranges() SIZE_FORMAT_W(10) " " SIZE_FORMAT_W(10) " " SIZE_FORMAT_W(10) " " SIZE_FORMAT_W(10) " ", id, space_names[id], - summary_data().addr_to_chunk_idx(space->bottom()), - summary_data().addr_to_chunk_idx(space->top()), - summary_data().addr_to_chunk_idx(space->end()), - summary_data().addr_to_chunk_idx(_space_info[id].new_top())); + summary_data().addr_to_region_idx(space->bottom()), + summary_data().addr_to_region_idx(space->top()), + summary_data().addr_to_region_idx(space->end()), + summary_data().addr_to_region_idx(_space_info[id].new_top())); } } void -print_generic_summary_chunk(size_t i, const ParallelCompactData::ChunkData* c) +print_generic_summary_region(size_t i, const ParallelCompactData::RegionData* c) { -#define CHUNK_IDX_FORMAT SIZE_FORMAT_W(7) -#define CHUNK_DATA_FORMAT SIZE_FORMAT_W(5) +#define REGION_IDX_FORMAT SIZE_FORMAT_W(7) +#define REGION_DATA_FORMAT SIZE_FORMAT_W(5) ParallelCompactData& sd = PSParallelCompact::summary_data(); - size_t dci = c->destination() ? sd.addr_to_chunk_idx(c->destination()) : 0; - tty->print_cr(CHUNK_IDX_FORMAT " " PTR_FORMAT " " - CHUNK_IDX_FORMAT " " PTR_FORMAT " " - CHUNK_DATA_FORMAT " " CHUNK_DATA_FORMAT " " - CHUNK_DATA_FORMAT " " CHUNK_IDX_FORMAT " %d", + size_t dci = c->destination() ? sd.addr_to_region_idx(c->destination()) : 0; + tty->print_cr(REGION_IDX_FORMAT " " PTR_FORMAT " " + REGION_IDX_FORMAT " " PTR_FORMAT " " + REGION_DATA_FORMAT " " REGION_DATA_FORMAT " " + REGION_DATA_FORMAT " " REGION_IDX_FORMAT " %d", i, c->data_location(), dci, c->destination(), c->partial_obj_size(), c->live_obj_size(), - c->data_size(), c->source_chunk(), c->destination_count()); + c->data_size(), c->source_region(), c->destination_count()); -#undef CHUNK_IDX_FORMAT -#undef CHUNK_DATA_FORMAT +#undef REGION_IDX_FORMAT +#undef REGION_DATA_FORMAT } void @@ -236,14 +137,14 @@ print_generic_summary_data(ParallelCompactData& summary_data, HeapWord* const end_addr) { size_t total_words = 0; - size_t i = summary_data.addr_to_chunk_idx(beg_addr); - const size_t last = summary_data.addr_to_chunk_idx(end_addr); + size_t i = summary_data.addr_to_region_idx(beg_addr); + const size_t last = summary_data.addr_to_region_idx(end_addr); HeapWord* pdest = 0; while (i <= last) { - ParallelCompactData::ChunkData* c = summary_data.chunk(i); + ParallelCompactData::RegionData* c = summary_data.region(i); if (c->data_size() != 0 || c->destination() != pdest) { - print_generic_summary_chunk(i, c); + print_generic_summary_region(i, c); total_words += c->data_size(); pdest = c->destination(); } @@ -265,16 +166,16 @@ print_generic_summary_data(ParallelCompactData& summary_data, } void -print_initial_summary_chunk(size_t i, - const ParallelCompactData::ChunkData* c, - bool newline = true) +print_initial_summary_region(size_t i, + const ParallelCompactData::RegionData* c, + bool newline = true) { tty->print(SIZE_FORMAT_W(5) " " PTR_FORMAT " " SIZE_FORMAT_W(5) " " SIZE_FORMAT_W(5) " " SIZE_FORMAT_W(5) " " SIZE_FORMAT_W(5) " %d", i, c->destination(), c->partial_obj_size(), c->live_obj_size(), - c->data_size(), c->source_chunk(), c->destination_count()); + c->data_size(), c->source_region(), c->destination_count()); if (newline) tty->cr(); } @@ -285,47 +186,48 @@ print_initial_summary_data(ParallelCompactData& summary_data, return; } - const size_t chunk_size = ParallelCompactData::ChunkSize; - HeapWord* const top_aligned_up = summary_data.chunk_align_up(space->top()); - const size_t end_chunk = summary_data.addr_to_chunk_idx(top_aligned_up); - const ParallelCompactData::ChunkData* c = summary_data.chunk(end_chunk - 1); + const size_t region_size = ParallelCompactData::RegionSize; + typedef ParallelCompactData::RegionData RegionData; + HeapWord* const top_aligned_up = summary_data.region_align_up(space->top()); + const size_t end_region = summary_data.addr_to_region_idx(top_aligned_up); + const RegionData* c = summary_data.region(end_region - 1); HeapWord* end_addr = c->destination() + c->data_size(); const size_t live_in_space = pointer_delta(end_addr, space->bottom()); - // Print (and count) the full chunks at the beginning of the space. - size_t full_chunk_count = 0; - size_t i = summary_data.addr_to_chunk_idx(space->bottom()); - while (i < end_chunk && summary_data.chunk(i)->data_size() == chunk_size) { - print_initial_summary_chunk(i, summary_data.chunk(i)); - ++full_chunk_count; + // Print (and count) the full regions at the beginning of the space. + size_t full_region_count = 0; + size_t i = summary_data.addr_to_region_idx(space->bottom()); + while (i < end_region && summary_data.region(i)->data_size() == region_size) { + print_initial_summary_region(i, summary_data.region(i)); + ++full_region_count; ++i; } - size_t live_to_right = live_in_space - full_chunk_count * chunk_size; + size_t live_to_right = live_in_space - full_region_count * region_size; double max_reclaimed_ratio = 0.0; - size_t max_reclaimed_ratio_chunk = 0; + size_t max_reclaimed_ratio_region = 0; size_t max_dead_to_right = 0; size_t max_live_to_right = 0; - // Print the 'reclaimed ratio' for chunks while there is something live in the - // chunk or to the right of it. The remaining chunks are empty (and + // Print the 'reclaimed ratio' for regions while there is something live in + // the region or to the right of it. The remaining regions are empty (and // uninteresting), and computing the ratio will result in division by 0. - while (i < end_chunk && live_to_right > 0) { - c = summary_data.chunk(i); - HeapWord* const chunk_addr = summary_data.chunk_to_addr(i); - const size_t used_to_right = pointer_delta(space->top(), chunk_addr); + while (i < end_region && live_to_right > 0) { + c = summary_data.region(i); + HeapWord* const region_addr = summary_data.region_to_addr(i); + const size_t used_to_right = pointer_delta(space->top(), region_addr); const size_t dead_to_right = used_to_right - live_to_right; const double reclaimed_ratio = double(dead_to_right) / live_to_right; if (reclaimed_ratio > max_reclaimed_ratio) { max_reclaimed_ratio = reclaimed_ratio; - max_reclaimed_ratio_chunk = i; + max_reclaimed_ratio_region = i; max_dead_to_right = dead_to_right; max_live_to_right = live_to_right; } - print_initial_summary_chunk(i, c, false); + print_initial_summary_region(i, c, false); tty->print_cr(" %12.10f " SIZE_FORMAT_W(10) " " SIZE_FORMAT_W(10), reclaimed_ratio, dead_to_right, live_to_right); @@ -333,14 +235,14 @@ print_initial_summary_data(ParallelCompactData& summary_data, ++i; } - // Any remaining chunks are empty. Print one more if there is one. - if (i < end_chunk) { - print_initial_summary_chunk(i, summary_data.chunk(i)); + // Any remaining regions are empty. Print one more if there is one. + if (i < end_region) { + print_initial_summary_region(i, summary_data.region(i)); } tty->print_cr("max: " SIZE_FORMAT_W(4) " d2r=" SIZE_FORMAT_W(10) " " "l2r=" SIZE_FORMAT_W(10) " max_ratio=%14.12f", - max_reclaimed_ratio_chunk, max_dead_to_right, + max_reclaimed_ratio_region, max_dead_to_right, max_live_to_right, max_reclaimed_ratio); } @@ -372,13 +274,9 @@ ParallelCompactData::ParallelCompactData() { _region_start = 0; - _chunk_vspace = 0; - _chunk_data = 0; - _chunk_count = 0; - - _block_vspace = 0; - _block_data = 0; - _block_count = 0; + _region_vspace = 0; + _region_data = 0; + _region_count = 0; } bool ParallelCompactData::initialize(MemRegion covered_region) @@ -387,18 +285,12 @@ bool ParallelCompactData::initialize(MemRegion covered_region) const size_t region_size = covered_region.word_size(); DEBUG_ONLY(_region_end = _region_start + region_size;) - assert(chunk_align_down(_region_start) == _region_start, + assert(region_align_down(_region_start) == _region_start, "region start not aligned"); - assert((region_size & ChunkSizeOffsetMask) == 0, - "region size not a multiple of ChunkSize"); + assert((region_size & RegionSizeOffsetMask) == 0, + "region size not a multiple of RegionSize"); - bool result = initialize_chunk_data(region_size); - - // Initialize the block data if it will be used for updating pointers, or if - // this is a debug build. - if (!UseParallelOldGCChunkPointerCalc || trueInDebug) { - result = result && initialize_block_data(region_size); - } + bool result = initialize_region_data(region_size); return result; } @@ -429,25 +321,13 @@ ParallelCompactData::create_vspace(size_t count, size_t element_size) return 0; } -bool ParallelCompactData::initialize_chunk_data(size_t region_size) +bool ParallelCompactData::initialize_region_data(size_t region_size) { - const size_t count = (region_size + ChunkSizeOffsetMask) >> Log2ChunkSize; - _chunk_vspace = create_vspace(count, sizeof(ChunkData)); - if (_chunk_vspace != 0) { - _chunk_data = (ChunkData*)_chunk_vspace->reserved_low_addr(); - _chunk_count = count; - return true; - } - return false; -} - -bool ParallelCompactData::initialize_block_data(size_t region_size) -{ - const size_t count = (region_size + BlockOffsetMask) >> Log2BlockSize; - _block_vspace = create_vspace(count, sizeof(BlockData)); - if (_block_vspace != 0) { - _block_data = (BlockData*)_block_vspace->reserved_low_addr(); - _block_count = count; + const size_t count = (region_size + RegionSizeOffsetMask) >> Log2RegionSize; + _region_vspace = create_vspace(count, sizeof(RegionData)); + if (_region_vspace != 0) { + _region_data = (RegionData*)_region_vspace->reserved_low_addr(); + _region_count = count; return true; } return false; @@ -455,38 +335,27 @@ bool ParallelCompactData::initialize_block_data(size_t region_size) void ParallelCompactData::clear() { - if (_block_data) { - memset(_block_data, 0, _block_vspace->committed_size()); - } - memset(_chunk_data, 0, _chunk_vspace->committed_size()); + memset(_region_data, 0, _region_vspace->committed_size()); } -void ParallelCompactData::clear_range(size_t beg_chunk, size_t end_chunk) { - assert(beg_chunk <= _chunk_count, "beg_chunk out of range"); - assert(end_chunk <= _chunk_count, "end_chunk out of range"); - assert(ChunkSize % BlockSize == 0, "ChunkSize not a multiple of BlockSize"); +void ParallelCompactData::clear_range(size_t beg_region, size_t end_region) { + assert(beg_region <= _region_count, "beg_region out of range"); + assert(end_region <= _region_count, "end_region out of range"); - const size_t chunk_cnt = end_chunk - beg_chunk; - - if (_block_data) { - const size_t blocks_per_chunk = ChunkSize / BlockSize; - const size_t beg_block = beg_chunk * blocks_per_chunk; - const size_t block_cnt = chunk_cnt * blocks_per_chunk; - memset(_block_data + beg_block, 0, block_cnt * sizeof(BlockData)); - } - memset(_chunk_data + beg_chunk, 0, chunk_cnt * sizeof(ChunkData)); + const size_t region_cnt = end_region - beg_region; + memset(_region_data + beg_region, 0, region_cnt * sizeof(RegionData)); } -HeapWord* ParallelCompactData::partial_obj_end(size_t chunk_idx) const +HeapWord* ParallelCompactData::partial_obj_end(size_t region_idx) const { - const ChunkData* cur_cp = chunk(chunk_idx); - const ChunkData* const end_cp = chunk(chunk_count() - 1); + const RegionData* cur_cp = region(region_idx); + const RegionData* const end_cp = region(region_count() - 1); - HeapWord* result = chunk_to_addr(chunk_idx); + HeapWord* result = region_to_addr(region_idx); if (cur_cp < end_cp) { do { result += cur_cp->partial_obj_size(); - } while (cur_cp->partial_obj_size() == ChunkSize && ++cur_cp < end_cp); + } while (cur_cp->partial_obj_size() == RegionSize && ++cur_cp < end_cp); } return result; } @@ -494,56 +363,56 @@ HeapWord* ParallelCompactData::partial_obj_end(size_t chunk_idx) const void ParallelCompactData::add_obj(HeapWord* addr, size_t len) { const size_t obj_ofs = pointer_delta(addr, _region_start); - const size_t beg_chunk = obj_ofs >> Log2ChunkSize; - const size_t end_chunk = (obj_ofs + len - 1) >> Log2ChunkSize; + const size_t beg_region = obj_ofs >> Log2RegionSize; + const size_t end_region = (obj_ofs + len - 1) >> Log2RegionSize; DEBUG_ONLY(Atomic::inc_ptr(&add_obj_count);) DEBUG_ONLY(Atomic::add_ptr(len, &add_obj_size);) - if (beg_chunk == end_chunk) { - // All in one chunk. - _chunk_data[beg_chunk].add_live_obj(len); + if (beg_region == end_region) { + // All in one region. + _region_data[beg_region].add_live_obj(len); return; } - // First chunk. - const size_t beg_ofs = chunk_offset(addr); - _chunk_data[beg_chunk].add_live_obj(ChunkSize - beg_ofs); + // First region. + const size_t beg_ofs = region_offset(addr); + _region_data[beg_region].add_live_obj(RegionSize - beg_ofs); klassOop klass = ((oop)addr)->klass(); - // Middle chunks--completely spanned by this object. - for (size_t chunk = beg_chunk + 1; chunk < end_chunk; ++chunk) { - _chunk_data[chunk].set_partial_obj_size(ChunkSize); - _chunk_data[chunk].set_partial_obj_addr(addr); + // Middle regions--completely spanned by this object. + for (size_t region = beg_region + 1; region < end_region; ++region) { + _region_data[region].set_partial_obj_size(RegionSize); + _region_data[region].set_partial_obj_addr(addr); } - // Last chunk. - const size_t end_ofs = chunk_offset(addr + len - 1); - _chunk_data[end_chunk].set_partial_obj_size(end_ofs + 1); - _chunk_data[end_chunk].set_partial_obj_addr(addr); + // Last region. + const size_t end_ofs = region_offset(addr + len - 1); + _region_data[end_region].set_partial_obj_size(end_ofs + 1); + _region_data[end_region].set_partial_obj_addr(addr); } void ParallelCompactData::summarize_dense_prefix(HeapWord* beg, HeapWord* end) { - assert(chunk_offset(beg) == 0, "not ChunkSize aligned"); - assert(chunk_offset(end) == 0, "not ChunkSize aligned"); + assert(region_offset(beg) == 0, "not RegionSize aligned"); + assert(region_offset(end) == 0, "not RegionSize aligned"); - size_t cur_chunk = addr_to_chunk_idx(beg); - const size_t end_chunk = addr_to_chunk_idx(end); + size_t cur_region = addr_to_region_idx(beg); + const size_t end_region = addr_to_region_idx(end); HeapWord* addr = beg; - while (cur_chunk < end_chunk) { - _chunk_data[cur_chunk].set_destination(addr); - _chunk_data[cur_chunk].set_destination_count(0); - _chunk_data[cur_chunk].set_source_chunk(cur_chunk); - _chunk_data[cur_chunk].set_data_location(addr); + while (cur_region < end_region) { + _region_data[cur_region].set_destination(addr); + _region_data[cur_region].set_destination_count(0); + _region_data[cur_region].set_source_region(cur_region); + _region_data[cur_region].set_data_location(addr); - // Update live_obj_size so the chunk appears completely full. - size_t live_size = ChunkSize - _chunk_data[cur_chunk].partial_obj_size(); - _chunk_data[cur_chunk].set_live_obj_size(live_size); + // Update live_obj_size so the region appears completely full. + size_t live_size = RegionSize - _region_data[cur_region].partial_obj_size(); + _region_data[cur_region].set_live_obj_size(live_size); - ++cur_chunk; - addr += ChunkSize; + ++cur_region; + addr += RegionSize; } } @@ -552,7 +421,7 @@ bool ParallelCompactData::summarize(HeapWord* target_beg, HeapWord* target_end, HeapWord** target_next, HeapWord** source_next) { // This is too strict. - // assert(chunk_offset(source_beg) == 0, "not ChunkSize aligned"); + // assert(region_offset(source_beg) == 0, "not RegionSize aligned"); if (TraceParallelOldGCSummaryPhase) { tty->print_cr("tb=" PTR_FORMAT " te=" PTR_FORMAT " " @@ -564,125 +433,93 @@ bool ParallelCompactData::summarize(HeapWord* target_beg, HeapWord* target_end, source_next != 0 ? *source_next : (HeapWord*) 0); } - size_t cur_chunk = addr_to_chunk_idx(source_beg); - const size_t end_chunk = addr_to_chunk_idx(chunk_align_up(source_end)); + size_t cur_region = addr_to_region_idx(source_beg); + const size_t end_region = addr_to_region_idx(region_align_up(source_end)); HeapWord *dest_addr = target_beg; - while (cur_chunk < end_chunk) { - size_t words = _chunk_data[cur_chunk].data_size(); + while (cur_region < end_region) { + size_t words = _region_data[cur_region].data_size(); #if 1 assert(pointer_delta(target_end, dest_addr) >= words, "source region does not fit into target region"); #else - // XXX - need some work on the corner cases here. If the chunk does not - // fit, then must either make sure any partial_obj from the chunk fits, or - // 'undo' the initial part of the partial_obj that is in the previous chunk. + // XXX - need some work on the corner cases here. If the region does not + // fit, then must either make sure any partial_obj from the region fits, or + // "undo" the initial part of the partial_obj that is in the previous + // region. if (dest_addr + words >= target_end) { // Let the caller know where to continue. *target_next = dest_addr; - *source_next = chunk_to_addr(cur_chunk); + *source_next = region_to_addr(cur_region); return false; } #endif // #if 1 - _chunk_data[cur_chunk].set_destination(dest_addr); + _region_data[cur_region].set_destination(dest_addr); - // Set the destination_count for cur_chunk, and if necessary, update - // source_chunk for a destination chunk. The source_chunk field is updated - // if cur_chunk is the first (left-most) chunk to be copied to a destination - // chunk. + // Set the destination_count for cur_region, and if necessary, update + // source_region for a destination region. The source_region field is + // updated if cur_region is the first (left-most) region to be copied to a + // destination region. // - // The destination_count calculation is a bit subtle. A chunk that has data - // that compacts into itself does not count itself as a destination. This - // maintains the invariant that a zero count means the chunk is available - // and can be claimed and then filled. + // The destination_count calculation is a bit subtle. A region that has + // data that compacts into itself does not count itself as a destination. + // This maintains the invariant that a zero count means the region is + // available and can be claimed and then filled. if (words > 0) { HeapWord* const last_addr = dest_addr + words - 1; - const size_t dest_chunk_1 = addr_to_chunk_idx(dest_addr); - const size_t dest_chunk_2 = addr_to_chunk_idx(last_addr); + const size_t dest_region_1 = addr_to_region_idx(dest_addr); + const size_t dest_region_2 = addr_to_region_idx(last_addr); #if 0 - // Initially assume that the destination chunks will be the same and + // Initially assume that the destination regions will be the same and // adjust the value below if necessary. Under this assumption, if - // cur_chunk == dest_chunk_2, then cur_chunk will be compacted completely - // into itself. - uint destination_count = cur_chunk == dest_chunk_2 ? 0 : 1; - if (dest_chunk_1 != dest_chunk_2) { - // Destination chunks differ; adjust destination_count. + // cur_region == dest_region_2, then cur_region will be compacted + // completely into itself. + uint destination_count = cur_region == dest_region_2 ? 0 : 1; + if (dest_region_1 != dest_region_2) { + // Destination regions differ; adjust destination_count. destination_count += 1; - // Data from cur_chunk will be copied to the start of dest_chunk_2. - _chunk_data[dest_chunk_2].set_source_chunk(cur_chunk); - } else if (chunk_offset(dest_addr) == 0) { - // Data from cur_chunk will be copied to the start of the destination - // chunk. - _chunk_data[dest_chunk_1].set_source_chunk(cur_chunk); + // Data from cur_region will be copied to the start of dest_region_2. + _region_data[dest_region_2].set_source_region(cur_region); + } else if (region_offset(dest_addr) == 0) { + // Data from cur_region will be copied to the start of the destination + // region. + _region_data[dest_region_1].set_source_region(cur_region); } #else - // Initially assume that the destination chunks will be different and + // Initially assume that the destination regions will be different and // adjust the value below if necessary. Under this assumption, if - // cur_chunk == dest_chunk2, then cur_chunk will be compacted partially - // into dest_chunk_1 and partially into itself. - uint destination_count = cur_chunk == dest_chunk_2 ? 1 : 2; - if (dest_chunk_1 != dest_chunk_2) { - // Data from cur_chunk will be copied to the start of dest_chunk_2. - _chunk_data[dest_chunk_2].set_source_chunk(cur_chunk); + // cur_region == dest_region2, then cur_region will be compacted partially + // into dest_region_1 and partially into itself. + uint destination_count = cur_region == dest_region_2 ? 1 : 2; + if (dest_region_1 != dest_region_2) { + // Data from cur_region will be copied to the start of dest_region_2. + _region_data[dest_region_2].set_source_region(cur_region); } else { - // Destination chunks are the same; adjust destination_count. + // Destination regions are the same; adjust destination_count. destination_count -= 1; - if (chunk_offset(dest_addr) == 0) { - // Data from cur_chunk will be copied to the start of the destination - // chunk. - _chunk_data[dest_chunk_1].set_source_chunk(cur_chunk); + if (region_offset(dest_addr) == 0) { + // Data from cur_region will be copied to the start of the destination + // region. + _region_data[dest_region_1].set_source_region(cur_region); } } #endif // #if 0 - _chunk_data[cur_chunk].set_destination_count(destination_count); - _chunk_data[cur_chunk].set_data_location(chunk_to_addr(cur_chunk)); + _region_data[cur_region].set_destination_count(destination_count); + _region_data[cur_region].set_data_location(region_to_addr(cur_region)); dest_addr += words; } - ++cur_chunk; + ++cur_region; } *target_next = dest_addr; return true; } -bool ParallelCompactData::partial_obj_ends_in_block(size_t block_index) { - HeapWord* block_addr = block_to_addr(block_index); - HeapWord* block_end_addr = block_addr + BlockSize; - size_t chunk_index = addr_to_chunk_idx(block_addr); - HeapWord* partial_obj_end_addr = partial_obj_end(chunk_index); - - // An object that ends at the end of the block, ends - // in the block (the last word of the object is to - // the left of the end). - if ((block_addr < partial_obj_end_addr) && - (partial_obj_end_addr <= block_end_addr)) { - return true; - } - - return false; -} - HeapWord* ParallelCompactData::calc_new_pointer(HeapWord* addr) { - HeapWord* result = NULL; - if (UseParallelOldGCChunkPointerCalc) { - result = chunk_calc_new_pointer(addr); - } else { - result = block_calc_new_pointer(addr); - } - return result; -} - -// This method is overly complicated (expensive) to be called -// for every reference. -// Try to restructure this so that a NULL is returned if -// the object is dead. But don't wast the cycles to explicitly check -// that it is dead since only live objects should be passed in. - -HeapWord* ParallelCompactData::chunk_calc_new_pointer(HeapWord* addr) { assert(addr != NULL, "Should detect NULL oop earlier"); assert(PSParallelCompact::gc_heap()->is_in(addr), "addr not in heap"); #ifdef ASSERT @@ -692,30 +529,30 @@ HeapWord* ParallelCompactData::chunk_calc_new_pointer(HeapWord* addr) { #endif assert(PSParallelCompact::mark_bitmap()->is_marked(addr), "obj not marked"); - // Chunk covering the object. - size_t chunk_index = addr_to_chunk_idx(addr); - const ChunkData* const chunk_ptr = chunk(chunk_index); - HeapWord* const chunk_addr = chunk_align_down(addr); + // Region covering the object. + size_t region_index = addr_to_region_idx(addr); + const RegionData* const region_ptr = region(region_index); + HeapWord* const region_addr = region_align_down(addr); - assert(addr < chunk_addr + ChunkSize, "Chunk does not cover object"); - assert(addr_to_chunk_ptr(chunk_addr) == chunk_ptr, "sanity check"); + assert(addr < region_addr + RegionSize, "Region does not cover object"); + assert(addr_to_region_ptr(region_addr) == region_ptr, "sanity check"); - HeapWord* result = chunk_ptr->destination(); + HeapWord* result = region_ptr->destination(); - // If all the data in the chunk is live, then the new location of the object - // can be calculated from the destination of the chunk plus the offset of the - // object in the chunk. - if (chunk_ptr->data_size() == ChunkSize) { - result += pointer_delta(addr, chunk_addr); + // If all the data in the region is live, then the new location of the object + // can be calculated from the destination of the region plus the offset of the + // object in the region. + if (region_ptr->data_size() == RegionSize) { + result += pointer_delta(addr, region_addr); return result; } // The new location of the object is - // chunk destination + - // size of the partial object extending onto the chunk + - // sizes of the live objects in the Chunk that are to the left of addr - const size_t partial_obj_size = chunk_ptr->partial_obj_size(); - HeapWord* const search_start = chunk_addr + partial_obj_size; + // region destination + + // size of the partial object extending onto the region + + // sizes of the live objects in the Region that are to the left of addr + const size_t partial_obj_size = region_ptr->partial_obj_size(); + HeapWord* const search_start = region_addr + partial_obj_size; const ParMarkBitMap* bitmap = PSParallelCompact::mark_bitmap(); size_t live_to_left = bitmap->live_words_in_range(search_start, oop(addr)); @@ -725,50 +562,6 @@ HeapWord* ParallelCompactData::chunk_calc_new_pointer(HeapWord* addr) { return result; } -HeapWord* ParallelCompactData::block_calc_new_pointer(HeapWord* addr) { - assert(addr != NULL, "Should detect NULL oop earlier"); - assert(PSParallelCompact::gc_heap()->is_in(addr), "addr not in heap"); -#ifdef ASSERT - if (PSParallelCompact::mark_bitmap()->is_unmarked(addr)) { - gclog_or_tty->print_cr("calc_new_pointer:: addr " PTR_FORMAT, addr); - } -#endif - assert(PSParallelCompact::mark_bitmap()->is_marked(addr), "obj not marked"); - - // Chunk covering the object. - size_t chunk_index = addr_to_chunk_idx(addr); - const ChunkData* const chunk_ptr = chunk(chunk_index); - HeapWord* const chunk_addr = chunk_align_down(addr); - - assert(addr < chunk_addr + ChunkSize, "Chunk does not cover object"); - assert(addr_to_chunk_ptr(chunk_addr) == chunk_ptr, "sanity check"); - - HeapWord* result = chunk_ptr->destination(); - - // If all the data in the chunk is live, then the new location of the object - // can be calculated from the destination of the chunk plus the offset of the - // object in the chunk. - if (chunk_ptr->data_size() == ChunkSize) { - result += pointer_delta(addr, chunk_addr); - return result; - } - - // The new location of the object is - // chunk destination + - // block offset + - // sizes of the live objects in the Block that are to the left of addr - const size_t block_offset = addr_to_block_ptr(addr)->offset(); - HeapWord* const search_start = chunk_addr + block_offset; - - const ParMarkBitMap* bitmap = PSParallelCompact::mark_bitmap(); - size_t live_to_left = bitmap->live_words_in_range(search_start, oop(addr)); - - result += block_offset + live_to_left; - assert(result <= addr, "object cannot move to the right"); - assert(result == chunk_calc_new_pointer(addr), "Should match"); - return result; -} - klassOop ParallelCompactData::calc_new_klass(klassOop old_klass) { klassOop updated_klass; if (PSParallelCompact::should_update_klass(old_klass)) { @@ -792,15 +585,14 @@ void ParallelCompactData::verify_clear(const PSVirtualSpace* vspace) void ParallelCompactData::verify_clear() { - verify_clear(_chunk_vspace); - verify_clear(_block_vspace); + verify_clear(_region_vspace); } #endif // #ifdef ASSERT #ifdef NOT_PRODUCT -ParallelCompactData::ChunkData* debug_chunk(size_t chunk_index) { +ParallelCompactData::RegionData* debug_region(size_t region_index) { ParallelCompactData& sd = PSParallelCompact::summary_data(); - return sd.chunk(chunk_index); + return sd.region(region_index); } #endif @@ -953,10 +745,10 @@ PSParallelCompact::clear_data_covering_space(SpaceId id) const idx_t end_bit = BitMap::word_align_up(_mark_bitmap.addr_to_bit(top)); _mark_bitmap.clear_range(beg_bit, end_bit); - const size_t beg_chunk = _summary_data.addr_to_chunk_idx(bot); - const size_t end_chunk = - _summary_data.addr_to_chunk_idx(_summary_data.chunk_align_up(max_top)); - _summary_data.clear_range(beg_chunk, end_chunk); + const size_t beg_region = _summary_data.addr_to_region_idx(bot); + const size_t end_region = + _summary_data.addr_to_region_idx(_summary_data.region_align_up(max_top)); + _summary_data.clear_range(beg_region, end_region); } void PSParallelCompact::pre_compact(PreGCValues* pre_gc_values) @@ -1072,19 +864,19 @@ HeapWord* PSParallelCompact::compute_dense_prefix_via_density(const SpaceId id, bool maximum_compaction) { - const size_t chunk_size = ParallelCompactData::ChunkSize; + const size_t region_size = ParallelCompactData::RegionSize; const ParallelCompactData& sd = summary_data(); const MutableSpace* const space = _space_info[id].space(); - HeapWord* const top_aligned_up = sd.chunk_align_up(space->top()); - const ChunkData* const beg_cp = sd.addr_to_chunk_ptr(space->bottom()); - const ChunkData* const end_cp = sd.addr_to_chunk_ptr(top_aligned_up); + HeapWord* const top_aligned_up = sd.region_align_up(space->top()); + const RegionData* const beg_cp = sd.addr_to_region_ptr(space->bottom()); + const RegionData* const end_cp = sd.addr_to_region_ptr(top_aligned_up); - // Skip full chunks at the beginning of the space--they are necessarily part + // Skip full regions at the beginning of the space--they are necessarily part // of the dense prefix. size_t full_count = 0; - const ChunkData* cp; - for (cp = beg_cp; cp < end_cp && cp->data_size() == chunk_size; ++cp) { + const RegionData* cp; + for (cp = beg_cp; cp < end_cp && cp->data_size() == region_size; ++cp) { ++full_count; } @@ -1093,7 +885,7 @@ PSParallelCompact::compute_dense_prefix_via_density(const SpaceId id, const bool interval_ended = gcs_since_max > HeapMaximumCompactionInterval; if (maximum_compaction || cp == end_cp || interval_ended) { _maximum_compaction_gc_num = total_invocations(); - return sd.chunk_to_addr(cp); + return sd.region_to_addr(cp); } HeapWord* const new_top = _space_info[id].new_top(); @@ -1116,52 +908,53 @@ PSParallelCompact::compute_dense_prefix_via_density(const SpaceId id, } // XXX - Use binary search? - HeapWord* dense_prefix = sd.chunk_to_addr(cp); - const ChunkData* full_cp = cp; - const ChunkData* const top_cp = sd.addr_to_chunk_ptr(space->top() - 1); + HeapWord* dense_prefix = sd.region_to_addr(cp); + const RegionData* full_cp = cp; + const RegionData* const top_cp = sd.addr_to_region_ptr(space->top() - 1); while (cp < end_cp) { - HeapWord* chunk_destination = cp->destination(); - const size_t cur_deadwood = pointer_delta(dense_prefix, chunk_destination); + HeapWord* region_destination = cp->destination(); + const size_t cur_deadwood = pointer_delta(dense_prefix, region_destination); if (TraceParallelOldGCDensePrefix && Verbose) { tty->print_cr("c#=" SIZE_FORMAT_W(4) " dst=" PTR_FORMAT " " "dp=" SIZE_FORMAT_W(8) " " "cdw=" SIZE_FORMAT_W(8), - sd.chunk(cp), chunk_destination, + sd.region(cp), region_destination, dense_prefix, cur_deadwood); } if (cur_deadwood >= deadwood_goal) { - // Found the chunk that has the correct amount of deadwood to the left. - // This typically occurs after crossing a fairly sparse set of chunks, so - // iterate backwards over those sparse chunks, looking for the chunk that - // has the lowest density of live objects 'to the right.' - size_t space_to_left = sd.chunk(cp) * chunk_size; + // Found the region that has the correct amount of deadwood to the left. + // This typically occurs after crossing a fairly sparse set of regions, so + // iterate backwards over those sparse regions, looking for the region + // that has the lowest density of live objects 'to the right.' + size_t space_to_left = sd.region(cp) * region_size; size_t live_to_left = space_to_left - cur_deadwood; size_t space_to_right = space_capacity - space_to_left; size_t live_to_right = space_live - live_to_left; double density_to_right = double(live_to_right) / space_to_right; while (cp > full_cp) { --cp; - const size_t prev_chunk_live_to_right = live_to_right - cp->data_size(); - const size_t prev_chunk_space_to_right = space_to_right + chunk_size; - double prev_chunk_density_to_right = - double(prev_chunk_live_to_right) / prev_chunk_space_to_right; - if (density_to_right <= prev_chunk_density_to_right) { + const size_t prev_region_live_to_right = live_to_right - + cp->data_size(); + const size_t prev_region_space_to_right = space_to_right + region_size; + double prev_region_density_to_right = + double(prev_region_live_to_right) / prev_region_space_to_right; + if (density_to_right <= prev_region_density_to_right) { return dense_prefix; } if (TraceParallelOldGCDensePrefix && Verbose) { tty->print_cr("backing up from c=" SIZE_FORMAT_W(4) " d2r=%10.8f " - "pc_d2r=%10.8f", sd.chunk(cp), density_to_right, - prev_chunk_density_to_right); + "pc_d2r=%10.8f", sd.region(cp), density_to_right, + prev_region_density_to_right); } - dense_prefix -= chunk_size; - live_to_right = prev_chunk_live_to_right; - space_to_right = prev_chunk_space_to_right; - density_to_right = prev_chunk_density_to_right; + dense_prefix -= region_size; + live_to_right = prev_region_live_to_right; + space_to_right = prev_region_space_to_right; + density_to_right = prev_region_density_to_right; } return dense_prefix; } - dense_prefix += chunk_size; + dense_prefix += region_size; ++cp; } @@ -1174,8 +967,8 @@ void PSParallelCompact::print_dense_prefix_stats(const char* const algorithm, const bool maximum_compaction, HeapWord* const addr) { - const size_t chunk_idx = summary_data().addr_to_chunk_idx(addr); - ChunkData* const cp = summary_data().chunk(chunk_idx); + const size_t region_idx = summary_data().addr_to_region_idx(addr); + RegionData* const cp = summary_data().region(region_idx); const MutableSpace* const space = _space_info[id].space(); HeapWord* const new_top = _space_info[id].new_top(); @@ -1191,7 +984,7 @@ void PSParallelCompact::print_dense_prefix_stats(const char* const algorithm, "d2l=" SIZE_FORMAT " d2l%%=%6.4f " "d2r=" SIZE_FORMAT " l2r=" SIZE_FORMAT " ratio=%10.8f", - algorithm, addr, chunk_idx, + algorithm, addr, region_idx, space_live, dead_to_left, dead_to_left_pct, dead_to_right, live_to_right, @@ -1253,52 +1046,52 @@ double PSParallelCompact::dead_wood_limiter(double density, size_t min_percent) return MAX2(limit, 0.0); } -ParallelCompactData::ChunkData* -PSParallelCompact::first_dead_space_chunk(const ChunkData* beg, - const ChunkData* end) +ParallelCompactData::RegionData* +PSParallelCompact::first_dead_space_region(const RegionData* beg, + const RegionData* end) { - const size_t chunk_size = ParallelCompactData::ChunkSize; + const size_t region_size = ParallelCompactData::RegionSize; ParallelCompactData& sd = summary_data(); - size_t left = sd.chunk(beg); - size_t right = end > beg ? sd.chunk(end) - 1 : left; + size_t left = sd.region(beg); + size_t right = end > beg ? sd.region(end) - 1 : left; // Binary search. while (left < right) { // Equivalent to (left + right) / 2, but does not overflow. const size_t middle = left + (right - left) / 2; - ChunkData* const middle_ptr = sd.chunk(middle); + RegionData* const middle_ptr = sd.region(middle); HeapWord* const dest = middle_ptr->destination(); - HeapWord* const addr = sd.chunk_to_addr(middle); + HeapWord* const addr = sd.region_to_addr(middle); assert(dest != NULL, "sanity"); assert(dest <= addr, "must move left"); if (middle > left && dest < addr) { right = middle - 1; - } else if (middle < right && middle_ptr->data_size() == chunk_size) { + } else if (middle < right && middle_ptr->data_size() == region_size) { left = middle + 1; } else { return middle_ptr; } } - return sd.chunk(left); + return sd.region(left); } -ParallelCompactData::ChunkData* -PSParallelCompact::dead_wood_limit_chunk(const ChunkData* beg, - const ChunkData* end, - size_t dead_words) +ParallelCompactData::RegionData* +PSParallelCompact::dead_wood_limit_region(const RegionData* beg, + const RegionData* end, + size_t dead_words) { ParallelCompactData& sd = summary_data(); - size_t left = sd.chunk(beg); - size_t right = end > beg ? sd.chunk(end) - 1 : left; + size_t left = sd.region(beg); + size_t right = end > beg ? sd.region(end) - 1 : left; // Binary search. while (left < right) { // Equivalent to (left + right) / 2, but does not overflow. const size_t middle = left + (right - left) / 2; - ChunkData* const middle_ptr = sd.chunk(middle); + RegionData* const middle_ptr = sd.region(middle); HeapWord* const dest = middle_ptr->destination(); - HeapWord* const addr = sd.chunk_to_addr(middle); + HeapWord* const addr = sd.region_to_addr(middle); assert(dest != NULL, "sanity"); assert(dest <= addr, "must move left"); @@ -1311,13 +1104,13 @@ PSParallelCompact::dead_wood_limit_chunk(const ChunkData* beg, return middle_ptr; } } - return sd.chunk(left); + return sd.region(left); } // The result is valid during the summary phase, after the initial summarization // of each space into itself, and before final summarization. inline double -PSParallelCompact::reclaimed_ratio(const ChunkData* const cp, +PSParallelCompact::reclaimed_ratio(const RegionData* const cp, HeapWord* const bottom, HeapWord* const top, HeapWord* const new_top) @@ -1331,12 +1124,13 @@ PSParallelCompact::reclaimed_ratio(const ChunkData* const cp, assert(top >= new_top, "summary data problem?"); assert(new_top > bottom, "space is empty; should not be here"); assert(new_top >= cp->destination(), "sanity"); - assert(top >= sd.chunk_to_addr(cp), "sanity"); + assert(top >= sd.region_to_addr(cp), "sanity"); HeapWord* const destination = cp->destination(); const size_t dense_prefix_live = pointer_delta(destination, bottom); const size_t compacted_region_live = pointer_delta(new_top, destination); - const size_t compacted_region_used = pointer_delta(top, sd.chunk_to_addr(cp)); + const size_t compacted_region_used = pointer_delta(top, + sd.region_to_addr(cp)); const size_t reclaimable = compacted_region_used - compacted_region_live; const double divisor = dense_prefix_live + 1.25 * compacted_region_live; @@ -1344,39 +1138,40 @@ PSParallelCompact::reclaimed_ratio(const ChunkData* const cp, } // Return the address of the end of the dense prefix, a.k.a. the start of the -// compacted region. The address is always on a chunk boundary. +// compacted region. The address is always on a region boundary. // -// Completely full chunks at the left are skipped, since no compaction can occur -// in those chunks. Then the maximum amount of dead wood to allow is computed, -// based on the density (amount live / capacity) of the generation; the chunk -// with approximately that amount of dead space to the left is identified as the -// limit chunk. Chunks between the last completely full chunk and the limit -// chunk are scanned and the one that has the best (maximum) reclaimed_ratio() -// is selected. +// Completely full regions at the left are skipped, since no compaction can +// occur in those regions. Then the maximum amount of dead wood to allow is +// computed, based on the density (amount live / capacity) of the generation; +// the region with approximately that amount of dead space to the left is +// identified as the limit region. Regions between the last completely full +// region and the limit region are scanned and the one that has the best +// (maximum) reclaimed_ratio() is selected. HeapWord* PSParallelCompact::compute_dense_prefix(const SpaceId id, bool maximum_compaction) { - const size_t chunk_size = ParallelCompactData::ChunkSize; + const size_t region_size = ParallelCompactData::RegionSize; const ParallelCompactData& sd = summary_data(); const MutableSpace* const space = _space_info[id].space(); HeapWord* const top = space->top(); - HeapWord* const top_aligned_up = sd.chunk_align_up(top); + HeapWord* const top_aligned_up = sd.region_align_up(top); HeapWord* const new_top = _space_info[id].new_top(); - HeapWord* const new_top_aligned_up = sd.chunk_align_up(new_top); + HeapWord* const new_top_aligned_up = sd.region_align_up(new_top); HeapWord* const bottom = space->bottom(); - const ChunkData* const beg_cp = sd.addr_to_chunk_ptr(bottom); - const ChunkData* const top_cp = sd.addr_to_chunk_ptr(top_aligned_up); - const ChunkData* const new_top_cp = sd.addr_to_chunk_ptr(new_top_aligned_up); + const RegionData* const beg_cp = sd.addr_to_region_ptr(bottom); + const RegionData* const top_cp = sd.addr_to_region_ptr(top_aligned_up); + const RegionData* const new_top_cp = + sd.addr_to_region_ptr(new_top_aligned_up); - // Skip full chunks at the beginning of the space--they are necessarily part + // Skip full regions at the beginning of the space--they are necessarily part // of the dense prefix. - const ChunkData* const full_cp = first_dead_space_chunk(beg_cp, new_top_cp); - assert(full_cp->destination() == sd.chunk_to_addr(full_cp) || + const RegionData* const full_cp = first_dead_space_region(beg_cp, new_top_cp); + assert(full_cp->destination() == sd.region_to_addr(full_cp) || space->is_empty(), "no dead space allowed to the left"); - assert(full_cp->data_size() < chunk_size || full_cp == new_top_cp - 1, - "chunk must have dead space"); + assert(full_cp->data_size() < region_size || full_cp == new_top_cp - 1, + "region must have dead space"); // The gc number is saved whenever a maximum compaction is done, and used to // determine when the maximum compaction interval has expired. This avoids @@ -1387,7 +1182,7 @@ PSParallelCompact::compute_dense_prefix(const SpaceId id, total_invocations() == HeapFirstMaximumCompactionCount; if (maximum_compaction || full_cp == top_cp || interval_ended) { _maximum_compaction_gc_num = total_invocations(); - return sd.chunk_to_addr(full_cp); + return sd.region_to_addr(full_cp); } const size_t space_live = pointer_delta(new_top, bottom); @@ -1413,15 +1208,15 @@ PSParallelCompact::compute_dense_prefix(const SpaceId id, dead_wood_max, dead_wood_limit); } - // Locate the chunk with the desired amount of dead space to the left. - const ChunkData* const limit_cp = - dead_wood_limit_chunk(full_cp, top_cp, dead_wood_limit); + // Locate the region with the desired amount of dead space to the left. + const RegionData* const limit_cp = + dead_wood_limit_region(full_cp, top_cp, dead_wood_limit); - // Scan from the first chunk with dead space to the limit chunk and find the + // Scan from the first region with dead space to the limit region and find the // one with the best (largest) reclaimed ratio. double best_ratio = 0.0; - const ChunkData* best_cp = full_cp; - for (const ChunkData* cp = full_cp; cp < limit_cp; ++cp) { + const RegionData* best_cp = full_cp; + for (const RegionData* cp = full_cp; cp < limit_cp; ++cp) { double tmp_ratio = reclaimed_ratio(cp, bottom, top, new_top); if (tmp_ratio > best_ratio) { best_cp = cp; @@ -1430,18 +1225,18 @@ PSParallelCompact::compute_dense_prefix(const SpaceId id, } #if 0 - // Something to consider: if the chunk with the best ratio is 'close to' the - // first chunk w/free space, choose the first chunk with free space - // ("first-free"). The first-free chunk is usually near the start of the + // Something to consider: if the region with the best ratio is 'close to' the + // first region w/free space, choose the first region with free space + // ("first-free"). The first-free region is usually near the start of the // heap, which means we are copying most of the heap already, so copy a bit // more to get complete compaction. - if (pointer_delta(best_cp, full_cp, sizeof(ChunkData)) < 4) { + if (pointer_delta(best_cp, full_cp, sizeof(RegionData)) < 4) { _maximum_compaction_gc_num = total_invocations(); best_cp = full_cp; } #endif // #if 0 - return sd.chunk_to_addr(best_cp); + return sd.region_to_addr(best_cp); } void PSParallelCompact::summarize_spaces_quick() @@ -1459,9 +1254,9 @@ void PSParallelCompact::summarize_spaces_quick() void PSParallelCompact::fill_dense_prefix_end(SpaceId id) { HeapWord* const dense_prefix_end = dense_prefix(id); - const ChunkData* chunk = _summary_data.addr_to_chunk_ptr(dense_prefix_end); + const RegionData* region = _summary_data.addr_to_region_ptr(dense_prefix_end); const idx_t dense_prefix_bit = _mark_bitmap.addr_to_bit(dense_prefix_end); - if (dead_space_crosses_boundary(chunk, dense_prefix_bit)) { + if (dead_space_crosses_boundary(region, dense_prefix_bit)) { // Only enough dead space is filled so that any remaining dead space to the // left is larger than the minimum filler object. (The remainder is filled // during the copy/update phase.) @@ -1552,7 +1347,7 @@ PSParallelCompact::summarize_space(SpaceId id, bool maximum_compaction) fill_dense_prefix_end(id); } - // Compute the destination of each Chunk, and thus each object. + // Compute the destination of each Region, and thus each object. _summary_data.summarize_dense_prefix(space->bottom(), dense_prefix_end); _summary_data.summarize(dense_prefix_end, space->end(), dense_prefix_end, space->top(), @@ -1560,19 +1355,19 @@ PSParallelCompact::summarize_space(SpaceId id, bool maximum_compaction) } if (TraceParallelOldGCSummaryPhase) { - const size_t chunk_size = ParallelCompactData::ChunkSize; + const size_t region_size = ParallelCompactData::RegionSize; HeapWord* const dense_prefix_end = _space_info[id].dense_prefix(); - const size_t dp_chunk = _summary_data.addr_to_chunk_idx(dense_prefix_end); + const size_t dp_region = _summary_data.addr_to_region_idx(dense_prefix_end); const size_t dp_words = pointer_delta(dense_prefix_end, space->bottom()); HeapWord* const new_top = _space_info[id].new_top(); - const HeapWord* nt_aligned_up = _summary_data.chunk_align_up(new_top); + const HeapWord* nt_aligned_up = _summary_data.region_align_up(new_top); const size_t cr_words = pointer_delta(nt_aligned_up, dense_prefix_end); tty->print_cr("id=%d cap=" SIZE_FORMAT " dp=" PTR_FORMAT " " - "dp_chunk=" SIZE_FORMAT " " "dp_count=" SIZE_FORMAT " " + "dp_region=" SIZE_FORMAT " " "dp_count=" SIZE_FORMAT " " "cr_count=" SIZE_FORMAT " " "nt=" PTR_FORMAT, id, space->capacity_in_words(), dense_prefix_end, - dp_chunk, dp_words / chunk_size, - cr_words / chunk_size, new_top); + dp_region, dp_words / region_size, + cr_words / region_size, new_top); } } @@ -1584,11 +1379,6 @@ void PSParallelCompact::summary_phase(ParCompactionManager* cm, // trace("2"); #ifdef ASSERT - if (VerifyParallelOldWithMarkSweep && - (PSParallelCompact::total_invocations() % - VerifyParallelOldWithMarkSweepInterval) == 0) { - verify_mark_bitmap(_mark_bitmap); - } if (TraceParallelOldGCMarkingPhase) { tty->print_cr("add_obj_count=" SIZE_FORMAT " " "add_obj_bytes=" SIZE_FORMAT, @@ -1605,7 +1395,7 @@ void PSParallelCompact::summary_phase(ParCompactionManager* cm, if (TraceParallelOldGCSummaryPhase) { tty->print_cr("summary_phase: after summarizing each space to self"); Universe::print(); - NOT_PRODUCT(print_chunk_ranges()); + NOT_PRODUCT(print_region_ranges()); if (Verbose) { NOT_PRODUCT(print_initial_summary_data(_summary_data, _space_info)); } @@ -1651,14 +1441,15 @@ void PSParallelCompact::summary_phase(ParCompactionManager* cm, space->bottom(), space->top(), new_top_addr); - // Clear the source_chunk field for each chunk in the space. + // Clear the source_region field for each region in the space. HeapWord* const new_top = _space_info[id].new_top(); - HeapWord* const clear_end = _summary_data.chunk_align_up(new_top); - ChunkData* beg_chunk = _summary_data.addr_to_chunk_ptr(space->bottom()); - ChunkData* end_chunk = _summary_data.addr_to_chunk_ptr(clear_end); - while (beg_chunk < end_chunk) { - beg_chunk->set_source_chunk(0); - ++beg_chunk; + HeapWord* const clear_end = _summary_data.region_align_up(new_top); + RegionData* beg_region = + _summary_data.addr_to_region_ptr(space->bottom()); + RegionData* end_region = _summary_data.addr_to_region_ptr(clear_end); + while (beg_region < end_region) { + beg_region->set_source_region(0); + ++beg_region; } // Reset the new_top value for the space. @@ -1666,243 +1457,16 @@ void PSParallelCompact::summary_phase(ParCompactionManager* cm, } } - // Fill in the block data after any changes to the chunks have - // been made. -#ifdef ASSERT - summarize_blocks(cm, perm_space_id); - summarize_blocks(cm, old_space_id); -#else - if (!UseParallelOldGCChunkPointerCalc) { - summarize_blocks(cm, perm_space_id); - summarize_blocks(cm, old_space_id); - } -#endif - if (TraceParallelOldGCSummaryPhase) { tty->print_cr("summary_phase: after final summarization"); Universe::print(); - NOT_PRODUCT(print_chunk_ranges()); + NOT_PRODUCT(print_region_ranges()); if (Verbose) { NOT_PRODUCT(print_generic_summary_data(_summary_data, _space_info)); } } } -// Fill in the BlockData. -// Iterate over the spaces and within each space iterate over -// the chunks and fill in the BlockData for each chunk. - -void PSParallelCompact::summarize_blocks(ParCompactionManager* cm, - SpaceId first_compaction_space_id) { -#if 0 - DEBUG_ONLY(ParallelCompactData::BlockData::set_cur_phase(1);) - for (SpaceId cur_space_id = first_compaction_space_id; - cur_space_id != last_space_id; - cur_space_id = next_compaction_space_id(cur_space_id)) { - // Iterate over the chunks in the space - size_t start_chunk_index = - _summary_data.addr_to_chunk_idx(space(cur_space_id)->bottom()); - BitBlockUpdateClosure bbu(mark_bitmap(), - cm, - start_chunk_index); - // Iterate over blocks. - for (size_t chunk_index = start_chunk_index; - chunk_index < _summary_data.chunk_count() && - _summary_data.chunk_to_addr(chunk_index) < space(cur_space_id)->top(); - chunk_index++) { - - // Reset the closure for the new chunk. Note that the closure - // maintains some data that does not get reset for each chunk - // so a new instance of the closure is no appropriate. - bbu.reset_chunk(chunk_index); - - // Start the iteration with the first live object. This - // may return the end of the chunk. That is acceptable since - // it will properly limit the iterations. - ParMarkBitMap::idx_t left_offset = mark_bitmap()->addr_to_bit( - _summary_data.first_live_or_end_in_chunk(chunk_index)); - - // End the iteration at the end of the chunk. - HeapWord* chunk_addr = _summary_data.chunk_to_addr(chunk_index); - HeapWord* chunk_end = chunk_addr + ParallelCompactData::ChunkSize; - ParMarkBitMap::idx_t right_offset = - mark_bitmap()->addr_to_bit(chunk_end); - - // Blocks that have not objects starting in them can be - // skipped because their data will never be used. - if (left_offset < right_offset) { - - // Iterate through the objects in the chunk. - ParMarkBitMap::idx_t last_offset = - mark_bitmap()->pair_iterate(&bbu, left_offset, right_offset); - - // If last_offset is less than right_offset, then the iterations - // terminated while it was looking for an end bit. "last_offset" - // is then the offset for the last start bit. In this situation - // the "offset" field for the next block to the right (_cur_block + 1) - // will not have been update although there may be live data - // to the left of the chunk. - - size_t cur_block_plus_1 = bbu.cur_block() + 1; - HeapWord* cur_block_plus_1_addr = - _summary_data.block_to_addr(bbu.cur_block()) + - ParallelCompactData::BlockSize; - HeapWord* last_offset_addr = mark_bitmap()->bit_to_addr(last_offset); - #if 1 // This code works. The else doesn't but should. Why does it? - // The current block (cur_block()) has already been updated. - // The last block that may need to be updated is either the - // next block (current block + 1) or the block where the - // last object starts (which can be greater than the - // next block if there were no objects found in intervening - // blocks). - size_t last_block = - MAX2(bbu.cur_block() + 1, - _summary_data.addr_to_block_idx(last_offset_addr)); - #else - // The current block has already been updated. The only block - // that remains to be updated is the block where the last - // object in the chunk starts. - size_t last_block = _summary_data.addr_to_block_idx(last_offset_addr); - #endif - assert_bit_is_start(last_offset); - assert((last_block == _summary_data.block_count()) || - (_summary_data.block(last_block)->raw_offset() == 0), - "Should not have been set"); - // Is the last block still in the current chunk? If still - // in this chunk, update the last block (the counting that - // included the current block is meant for the offset of the last - // block). If not in this chunk, do nothing. Should not - // update a block in the next chunk. - if (ParallelCompactData::chunk_contains_block(bbu.chunk_index(), - last_block)) { - if (last_offset < right_offset) { - // The last object started in this chunk but ends beyond - // this chunk. Update the block for this last object. - assert(mark_bitmap()->is_marked(last_offset), "Should be marked"); - // No end bit was found. The closure takes care of - // the cases where - // an objects crosses over into the next block - // an objects starts and ends in the next block - // It does not handle the case where an object is - // the first object in a later block and extends - // past the end of the chunk (i.e., the closure - // only handles complete objects that are in the range - // it is given). That object is handed back here - // for any special consideration necessary. - // - // Is the first bit in the last block a start or end bit? - // - // If the partial object ends in the last block L, - // then the 1st bit in L may be an end bit. - // - // Else does the last object start in a block after the current - // block? A block AA will already have been updated if an - // object ends in the next block AA+1. An object found to end in - // the AA+1 is the trigger that updates AA. Objects are being - // counted in the current block for updaing a following - // block. An object may start in later block - // block but may extend beyond the last block in the chunk. - // Updates are only done when the end of an object has been - // found. If the last object (covered by block L) starts - // beyond the current block, then no object ends in L (otherwise - // L would be the current block). So the first bit in L is - // a start bit. - // - // Else the last objects start in the current block and ends - // beyond the chunk. The current block has already been - // updated and there is no later block (with an object - // starting in it) that needs to be updated. - // - if (_summary_data.partial_obj_ends_in_block(last_block)) { - _summary_data.block(last_block)->set_end_bit_offset( - bbu.live_data_left()); - } else if (last_offset_addr >= cur_block_plus_1_addr) { - // The start of the object is on a later block - // (to the right of the current block and there are no - // complete live objects to the left of this last object - // within the chunk. - // The first bit in the block is for the start of the - // last object. - _summary_data.block(last_block)->set_start_bit_offset( - bbu.live_data_left()); - } else { - // The start of the last object was found in - // the current chunk (which has already - // been updated). - assert(bbu.cur_block() == - _summary_data.addr_to_block_idx(last_offset_addr), - "Should be a block already processed"); - } -#ifdef ASSERT - // Is there enough block information to find this object? - // The destination of the chunk has not been set so the - // values returned by calc_new_pointer() and - // block_calc_new_pointer() will only be - // offsets. But they should agree. - HeapWord* moved_obj_with_chunks = - _summary_data.chunk_calc_new_pointer(last_offset_addr); - HeapWord* moved_obj_with_blocks = - _summary_data.calc_new_pointer(last_offset_addr); - assert(moved_obj_with_chunks == moved_obj_with_blocks, - "Block calculation is wrong"); -#endif - } else if (last_block < _summary_data.block_count()) { - // Iterations ended looking for a start bit (but - // did not run off the end of the block table). - _summary_data.block(last_block)->set_start_bit_offset( - bbu.live_data_left()); - } - } -#ifdef ASSERT - // Is there enough block information to find this object? - HeapWord* left_offset_addr = mark_bitmap()->bit_to_addr(left_offset); - HeapWord* moved_obj_with_chunks = - _summary_data.calc_new_pointer(left_offset_addr); - HeapWord* moved_obj_with_blocks = - _summary_data.calc_new_pointer(left_offset_addr); - assert(moved_obj_with_chunks == moved_obj_with_blocks, - "Block calculation is wrong"); -#endif - - // Is there another block after the end of this chunk? -#ifdef ASSERT - if (last_block < _summary_data.block_count()) { - // No object may have been found in a block. If that - // block is at the end of the chunk, the iteration will - // terminate without incrementing the current block so - // that the current block is not the last block in the - // chunk. That situation precludes asserting that the - // current block is the last block in the chunk. Assert - // the lesser condition that the current block does not - // exceed the chunk. - assert(_summary_data.block_to_addr(last_block) <= - (_summary_data.chunk_to_addr(chunk_index) + - ParallelCompactData::ChunkSize), - "Chunk and block inconsistency"); - assert(last_offset <= right_offset, "Iteration over ran end"); - } -#endif - } -#ifdef ASSERT - if (PrintGCDetails && Verbose) { - if (_summary_data.chunk(chunk_index)->partial_obj_size() == 1) { - size_t first_block = - chunk_index / ParallelCompactData::BlocksPerChunk; - gclog_or_tty->print_cr("first_block " PTR_FORMAT - " _offset " PTR_FORMAT - "_first_is_start_bit %d", - first_block, - _summary_data.block(first_block)->raw_offset(), - _summary_data.block(first_block)->first_is_start_bit()); - } - } -#endif - } - } - DEBUG_ONLY(ParallelCompactData::BlockData::set_cur_phase(16);) -#endif // #if 0 -} - // This method should contain all heap-specific policy for invoking a full // collection. invoke_no_policy() will only attempt to compact the heap; it // will do nothing further. If we need to bail out for policy reasons, scavenge @@ -1937,18 +1501,9 @@ void PSParallelCompact::invoke(bool maximum_heap_compaction) { } } -bool ParallelCompactData::chunk_contains(size_t chunk_index, HeapWord* addr) { - size_t addr_chunk_index = addr_to_chunk_idx(addr); - return chunk_index == addr_chunk_index; -} - -bool ParallelCompactData::chunk_contains_block(size_t chunk_index, - size_t block_index) { - size_t first_block_in_chunk = chunk_index * BlocksPerChunk; - size_t last_block_in_chunk = (chunk_index + 1) * BlocksPerChunk - 1; - - return (first_block_in_chunk <= block_index) && - (block_index <= last_block_in_chunk); +bool ParallelCompactData::region_contains(size_t region_index, HeapWord* addr) { + size_t addr_region_index = addr_to_region_idx(addr); + return region_index == addr_region_index; } // This method contains no policy. You should probably @@ -2038,39 +1593,9 @@ void PSParallelCompact::invoke_no_policy(bool maximum_heap_compaction) { } #endif // #ifndef PRODUCT -#ifdef ASSERT - if (VerifyParallelOldWithMarkSweep && - (PSParallelCompact::total_invocations() % - VerifyParallelOldWithMarkSweepInterval) == 0) { - gclog_or_tty->print_cr("Verify marking with mark_sweep_phase1()"); - if (PrintGCDetails && Verbose) { - gclog_or_tty->print_cr("mark_sweep_phase1:"); - } - // Clear the discovered lists so that discovered objects - // don't look like they have been discovered twice. - ref_processor()->clear_discovered_references(); - - PSMarkSweep::allocate_stacks(); - MemRegion mr = Universe::heap()->reserved_region(); - PSMarkSweep::ref_processor()->enable_discovery(); - PSMarkSweep::mark_sweep_phase1(maximum_heap_compaction); - } -#endif - bool max_on_system_gc = UseMaximumCompactionOnSystemGC && is_system_gc; summary_phase(vmthread_cm, maximum_heap_compaction || max_on_system_gc); -#ifdef ASSERT - if (VerifyParallelOldWithMarkSweep && - (PSParallelCompact::total_invocations() % - VerifyParallelOldWithMarkSweepInterval) == 0) { - if (PrintGCDetails && Verbose) { - gclog_or_tty->print_cr("mark_sweep_phase2:"); - } - PSMarkSweep::mark_sweep_phase2(); - } -#endif - COMPILER2_PRESENT(assert(DerivedPointerTable::is_active(), "Sanity")); COMPILER2_PRESENT(DerivedPointerTable::set_active(false)); @@ -2078,28 +1603,6 @@ void PSParallelCompact::invoke_no_policy(bool maximum_heap_compaction) { // needed by the compaction for filling holes in the dense prefix. adjust_roots(); -#ifdef ASSERT - if (VerifyParallelOldWithMarkSweep && - (PSParallelCompact::total_invocations() % - VerifyParallelOldWithMarkSweepInterval) == 0) { - // Do a separate verify phase so that the verify - // code can use the the forwarding pointers to - // check the new pointer calculation. The restore_marks() - // has to be done before the real compact. - vmthread_cm->set_action(ParCompactionManager::VerifyUpdate); - compact_perm(vmthread_cm); - compact_serial(vmthread_cm); - vmthread_cm->set_action(ParCompactionManager::ResetObjects); - compact_perm(vmthread_cm); - compact_serial(vmthread_cm); - vmthread_cm->set_action(ParCompactionManager::UpdateAndCopy); - - // For debugging only - PSMarkSweep::restore_marks(); - PSMarkSweep::deallocate_stacks(); - } -#endif - compaction_start.update(); // Does the perm gen always have to be done serially because // klasses are used in the update of an object? @@ -2349,7 +1852,7 @@ void PSParallelCompact::marking_phase(ParCompactionManager* cm, ParallelScavengeHeap* heap = gc_heap(); uint parallel_gc_threads = heap->gc_task_manager()->workers(); - TaskQueueSetSuper* qset = ParCompactionManager::chunk_array(); + TaskQueueSetSuper* qset = ParCompactionManager::region_array(); ParallelTaskTerminator terminator(parallel_gc_threads, qset); PSParallelCompact::MarkAndPushClosure mark_and_push_closure(cm); @@ -2487,8 +1990,9 @@ void PSParallelCompact::compact_perm(ParCompactionManager* cm) { move_and_update(cm, perm_space_id); } -void PSParallelCompact::enqueue_chunk_draining_tasks(GCTaskQueue* q, - uint parallel_gc_threads) { +void PSParallelCompact::enqueue_region_draining_tasks(GCTaskQueue* q, + uint parallel_gc_threads) +{ TraceTime tm("drain task setup", print_phases(), true, gclog_or_tty); const unsigned int task_count = MAX2(parallel_gc_threads, 1U); @@ -2496,13 +2000,13 @@ void PSParallelCompact::enqueue_chunk_draining_tasks(GCTaskQueue* q, q->enqueue(new DrainStacksCompactionTask()); } - // Find all chunks that are available (can be filled immediately) and + // Find all regions that are available (can be filled immediately) and // distribute them to the thread stacks. The iteration is done in reverse - // order (high to low) so the chunks will be removed in ascending order. + // order (high to low) so the regions will be removed in ascending order. const ParallelCompactData& sd = PSParallelCompact::summary_data(); - size_t fillable_chunks = 0; // A count for diagnostic purposes. + size_t fillable_regions = 0; // A count for diagnostic purposes. unsigned int which = 0; // The worker thread number. for (unsigned int id = to_space_id; id > perm_space_id; --id) { @@ -2510,25 +2014,26 @@ void PSParallelCompact::enqueue_chunk_draining_tasks(GCTaskQueue* q, MutableSpace* const space = space_info->space(); HeapWord* const new_top = space_info->new_top(); - const size_t beg_chunk = sd.addr_to_chunk_idx(space_info->dense_prefix()); - const size_t end_chunk = sd.addr_to_chunk_idx(sd.chunk_align_up(new_top)); - assert(end_chunk > 0, "perm gen cannot be empty"); + const size_t beg_region = sd.addr_to_region_idx(space_info->dense_prefix()); + const size_t end_region = + sd.addr_to_region_idx(sd.region_align_up(new_top)); + assert(end_region > 0, "perm gen cannot be empty"); - for (size_t cur = end_chunk - 1; cur >= beg_chunk; --cur) { - if (sd.chunk(cur)->claim_unsafe()) { + for (size_t cur = end_region - 1; cur >= beg_region; --cur) { + if (sd.region(cur)->claim_unsafe()) { ParCompactionManager* cm = ParCompactionManager::manager_array(which); cm->save_for_processing(cur); if (TraceParallelOldGCCompactionPhase && Verbose) { - const size_t count_mod_8 = fillable_chunks & 7; + const size_t count_mod_8 = fillable_regions & 7; if (count_mod_8 == 0) gclog_or_tty->print("fillable: "); gclog_or_tty->print(" " SIZE_FORMAT_W(7), cur); if (count_mod_8 == 7) gclog_or_tty->cr(); } - NOT_PRODUCT(++fillable_chunks;) + NOT_PRODUCT(++fillable_regions;) - // Assign chunks to threads in round-robin fashion. + // Assign regions to threads in round-robin fashion. if (++which == task_count) { which = 0; } @@ -2537,8 +2042,8 @@ void PSParallelCompact::enqueue_chunk_draining_tasks(GCTaskQueue* q, } if (TraceParallelOldGCCompactionPhase) { - if (Verbose && (fillable_chunks & 7) != 0) gclog_or_tty->cr(); - gclog_or_tty->print_cr("%u initially fillable chunks", fillable_chunks); + if (Verbose && (fillable_regions & 7) != 0) gclog_or_tty->cr(); + gclog_or_tty->print_cr("%u initially fillable regions", fillable_regions); } } @@ -2551,7 +2056,7 @@ void PSParallelCompact::enqueue_dense_prefix_tasks(GCTaskQueue* q, ParallelCompactData& sd = PSParallelCompact::summary_data(); // Iterate over all the spaces adding tasks for updating - // chunks in the dense prefix. Assume that 1 gc thread + // regions in the dense prefix. Assume that 1 gc thread // will work on opening the gaps and the remaining gc threads // will work on the dense prefix. SpaceId space_id = old_space_id; @@ -2565,30 +2070,31 @@ void PSParallelCompact::enqueue_dense_prefix_tasks(GCTaskQueue* q, continue; } - // The dense prefix is before this chunk. - size_t chunk_index_end_dense_prefix = - sd.addr_to_chunk_idx(dense_prefix_end); - ChunkData* const dense_prefix_cp = sd.chunk(chunk_index_end_dense_prefix); + // The dense prefix is before this region. + size_t region_index_end_dense_prefix = + sd.addr_to_region_idx(dense_prefix_end); + RegionData* const dense_prefix_cp = + sd.region(region_index_end_dense_prefix); assert(dense_prefix_end == space->end() || dense_prefix_cp->available() || dense_prefix_cp->claimed(), - "The chunk after the dense prefix should always be ready to fill"); + "The region after the dense prefix should always be ready to fill"); - size_t chunk_index_start = sd.addr_to_chunk_idx(space->bottom()); + size_t region_index_start = sd.addr_to_region_idx(space->bottom()); // Is there dense prefix work? - size_t total_dense_prefix_chunks = - chunk_index_end_dense_prefix - chunk_index_start; - // How many chunks of the dense prefix should be given to + size_t total_dense_prefix_regions = + region_index_end_dense_prefix - region_index_start; + // How many regions of the dense prefix should be given to // each thread? - if (total_dense_prefix_chunks > 0) { + if (total_dense_prefix_regions > 0) { uint tasks_for_dense_prefix = 1; if (UseParallelDensePrefixUpdate) { - if (total_dense_prefix_chunks <= + if (total_dense_prefix_regions <= (parallel_gc_threads * PAR_OLD_DENSE_PREFIX_OVER_PARTITIONING)) { // Don't over partition. This assumes that // PAR_OLD_DENSE_PREFIX_OVER_PARTITIONING is a small integer value - // so there are not many chunks to process. + // so there are not many regions to process. tasks_for_dense_prefix = parallel_gc_threads; } else { // Over partition @@ -2596,50 +2102,50 @@ void PSParallelCompact::enqueue_dense_prefix_tasks(GCTaskQueue* q, PAR_OLD_DENSE_PREFIX_OVER_PARTITIONING; } } - size_t chunks_per_thread = total_dense_prefix_chunks / + size_t regions_per_thread = total_dense_prefix_regions / tasks_for_dense_prefix; - // Give each thread at least 1 chunk. - if (chunks_per_thread == 0) { - chunks_per_thread = 1; + // Give each thread at least 1 region. + if (regions_per_thread == 0) { + regions_per_thread = 1; } for (uint k = 0; k < tasks_for_dense_prefix; k++) { - if (chunk_index_start >= chunk_index_end_dense_prefix) { + if (region_index_start >= region_index_end_dense_prefix) { break; } - // chunk_index_end is not processed - size_t chunk_index_end = MIN2(chunk_index_start + chunks_per_thread, - chunk_index_end_dense_prefix); + // region_index_end is not processed + size_t region_index_end = MIN2(region_index_start + regions_per_thread, + region_index_end_dense_prefix); q->enqueue(new UpdateDensePrefixTask( space_id, - chunk_index_start, - chunk_index_end)); - chunk_index_start = chunk_index_end; + region_index_start, + region_index_end)); + region_index_start = region_index_end; } } // This gets any part of the dense prefix that did not // fit evenly. - if (chunk_index_start < chunk_index_end_dense_prefix) { + if (region_index_start < region_index_end_dense_prefix) { q->enqueue(new UpdateDensePrefixTask( space_id, - chunk_index_start, - chunk_index_end_dense_prefix)); + region_index_start, + region_index_end_dense_prefix)); } space_id = next_compaction_space_id(space_id); } // End tasks for dense prefix } -void PSParallelCompact::enqueue_chunk_stealing_tasks( +void PSParallelCompact::enqueue_region_stealing_tasks( GCTaskQueue* q, ParallelTaskTerminator* terminator_ptr, uint parallel_gc_threads) { TraceTime tm("steal task setup", print_phases(), true, gclog_or_tty); - // Once a thread has drained it's stack, it should try to steal chunks from + // Once a thread has drained it's stack, it should try to steal regions from // other threads. if (parallel_gc_threads > 1) { for (uint j = 0; j < parallel_gc_threads; j++) { - q->enqueue(new StealChunkCompactionTask(terminator_ptr)); + q->enqueue(new StealRegionCompactionTask(terminator_ptr)); } } } @@ -2654,13 +2160,13 @@ void PSParallelCompact::compact() { PSOldGen* old_gen = heap->old_gen(); old_gen->start_array()->reset(); uint parallel_gc_threads = heap->gc_task_manager()->workers(); - TaskQueueSetSuper* qset = ParCompactionManager::chunk_array(); + TaskQueueSetSuper* qset = ParCompactionManager::region_array(); ParallelTaskTerminator terminator(parallel_gc_threads, qset); GCTaskQueue* q = GCTaskQueue::create(); - enqueue_chunk_draining_tasks(q, parallel_gc_threads); + enqueue_region_draining_tasks(q, parallel_gc_threads); enqueue_dense_prefix_tasks(q, parallel_gc_threads); - enqueue_chunk_stealing_tasks(q, &terminator, parallel_gc_threads); + enqueue_region_stealing_tasks(q, &terminator, parallel_gc_threads); { TraceTime tm_pc("par compact", print_phases(), true, gclog_or_tty); @@ -2676,9 +2182,9 @@ void PSParallelCompact::compact() { WaitForBarrierGCTask::destroy(fin); #ifdef ASSERT - // Verify that all chunks have been processed before the deferred updates. + // Verify that all regions have been processed before the deferred updates. // Note that perm_space_id is skipped; this type of verification is not - // valid until the perm gen is compacted by chunks. + // valid until the perm gen is compacted by regions. for (unsigned int id = old_space_id; id < last_space_id; ++id) { verify_complete(SpaceId(id)); } @@ -2697,42 +2203,42 @@ void PSParallelCompact::compact() { #ifdef ASSERT void PSParallelCompact::verify_complete(SpaceId space_id) { - // All Chunks between space bottom() to new_top() should be marked as filled - // and all Chunks between new_top() and top() should be available (i.e., + // All Regions between space bottom() to new_top() should be marked as filled + // and all Regions between new_top() and top() should be available (i.e., // should have been emptied). ParallelCompactData& sd = summary_data(); SpaceInfo si = _space_info[space_id]; - HeapWord* new_top_addr = sd.chunk_align_up(si.new_top()); - HeapWord* old_top_addr = sd.chunk_align_up(si.space()->top()); - const size_t beg_chunk = sd.addr_to_chunk_idx(si.space()->bottom()); - const size_t new_top_chunk = sd.addr_to_chunk_idx(new_top_addr); - const size_t old_top_chunk = sd.addr_to_chunk_idx(old_top_addr); + HeapWord* new_top_addr = sd.region_align_up(si.new_top()); + HeapWord* old_top_addr = sd.region_align_up(si.space()->top()); + const size_t beg_region = sd.addr_to_region_idx(si.space()->bottom()); + const size_t new_top_region = sd.addr_to_region_idx(new_top_addr); + const size_t old_top_region = sd.addr_to_region_idx(old_top_addr); bool issued_a_warning = false; - size_t cur_chunk; - for (cur_chunk = beg_chunk; cur_chunk < new_top_chunk; ++cur_chunk) { - const ChunkData* const c = sd.chunk(cur_chunk); + size_t cur_region; + for (cur_region = beg_region; cur_region < new_top_region; ++cur_region) { + const RegionData* const c = sd.region(cur_region); if (!c->completed()) { - warning("chunk " SIZE_FORMAT " not filled: " + warning("region " SIZE_FORMAT " not filled: " "destination_count=" SIZE_FORMAT, - cur_chunk, c->destination_count()); + cur_region, c->destination_count()); issued_a_warning = true; } } - for (cur_chunk = new_top_chunk; cur_chunk < old_top_chunk; ++cur_chunk) { - const ChunkData* const c = sd.chunk(cur_chunk); + for (cur_region = new_top_region; cur_region < old_top_region; ++cur_region) { + const RegionData* const c = sd.region(cur_region); if (!c->available()) { - warning("chunk " SIZE_FORMAT " not empty: " + warning("region " SIZE_FORMAT " not empty: " "destination_count=" SIZE_FORMAT, - cur_chunk, c->destination_count()); + cur_region, c->destination_count()); issued_a_warning = true; } } if (issued_a_warning) { - print_chunk_ranges(); + print_region_ranges(); } } #endif // #ifdef ASSERT @@ -2933,46 +2439,47 @@ void PSParallelCompact::print_new_location_of_heap_address(HeapWord* q) { } #endif //VALIDATE_MARK_SWEEP -// Update interior oops in the ranges of chunks [beg_chunk, end_chunk). +// Update interior oops in the ranges of regions [beg_region, end_region). void PSParallelCompact::update_and_deadwood_in_dense_prefix(ParCompactionManager* cm, SpaceId space_id, - size_t beg_chunk, - size_t end_chunk) { + size_t beg_region, + size_t end_region) { ParallelCompactData& sd = summary_data(); ParMarkBitMap* const mbm = mark_bitmap(); - HeapWord* beg_addr = sd.chunk_to_addr(beg_chunk); - HeapWord* const end_addr = sd.chunk_to_addr(end_chunk); - assert(beg_chunk <= end_chunk, "bad chunk range"); + HeapWord* beg_addr = sd.region_to_addr(beg_region); + HeapWord* const end_addr = sd.region_to_addr(end_region); + assert(beg_region <= end_region, "bad region range"); assert(end_addr <= dense_prefix(space_id), "not in the dense prefix"); #ifdef ASSERT - // Claim the chunks to avoid triggering an assert when they are marked as + // Claim the regions to avoid triggering an assert when they are marked as // filled. - for (size_t claim_chunk = beg_chunk; claim_chunk < end_chunk; ++claim_chunk) { - assert(sd.chunk(claim_chunk)->claim_unsafe(), "claim() failed"); + for (size_t claim_region = beg_region; claim_region < end_region; ++claim_region) { + assert(sd.region(claim_region)->claim_unsafe(), "claim() failed"); } #endif // #ifdef ASSERT if (beg_addr != space(space_id)->bottom()) { // Find the first live object or block of dead space that *starts* in this - // range of chunks. If a partial object crosses onto the chunk, skip it; it - // will be marked for 'deferred update' when the object head is processed. - // If dead space crosses onto the chunk, it is also skipped; it will be - // filled when the prior chunk is processed. If neither of those apply, the - // first word in the chunk is the start of a live object or dead space. + // range of regions. If a partial object crosses onto the region, skip it; + // it will be marked for 'deferred update' when the object head is + // processed. If dead space crosses onto the region, it is also skipped; it + // will be filled when the prior region is processed. If neither of those + // apply, the first word in the region is the start of a live object or dead + // space. assert(beg_addr > space(space_id)->bottom(), "sanity"); - const ChunkData* const cp = sd.chunk(beg_chunk); + const RegionData* const cp = sd.region(beg_region); if (cp->partial_obj_size() != 0) { - beg_addr = sd.partial_obj_end(beg_chunk); + beg_addr = sd.partial_obj_end(beg_region); } else if (dead_space_crosses_boundary(cp, mbm->addr_to_bit(beg_addr))) { beg_addr = mbm->find_obj_beg(beg_addr, end_addr); } } if (beg_addr < end_addr) { - // A live object or block of dead space starts in this range of Chunks. + // A live object or block of dead space starts in this range of Regions. HeapWord* const dense_prefix_end = dense_prefix(space_id); // Create closures and iterate. @@ -2986,10 +2493,10 @@ PSParallelCompact::update_and_deadwood_in_dense_prefix(ParCompactionManager* cm, } } - // Mark the chunks as filled. - ChunkData* const beg_cp = sd.chunk(beg_chunk); - ChunkData* const end_cp = sd.chunk(end_chunk); - for (ChunkData* cp = beg_cp; cp < end_cp; ++cp) { + // Mark the regions as filled. + RegionData* const beg_cp = sd.region(beg_region); + RegionData* const end_cp = sd.region(end_region); + for (RegionData* cp = beg_cp; cp < end_cp; ++cp) { cp->set_completed(); } } @@ -3021,13 +2528,13 @@ void PSParallelCompact::update_deferred_objects(ParCompactionManager* cm, const MutableSpace* const space = space_info->space(); assert(space_info->dense_prefix() >= space->bottom(), "dense_prefix not set"); HeapWord* const beg_addr = space_info->dense_prefix(); - HeapWord* const end_addr = sd.chunk_align_up(space_info->new_top()); + HeapWord* const end_addr = sd.region_align_up(space_info->new_top()); - const ChunkData* const beg_chunk = sd.addr_to_chunk_ptr(beg_addr); - const ChunkData* const end_chunk = sd.addr_to_chunk_ptr(end_addr); - const ChunkData* cur_chunk; - for (cur_chunk = beg_chunk; cur_chunk < end_chunk; ++cur_chunk) { - HeapWord* const addr = cur_chunk->deferred_obj_addr(); + const RegionData* const beg_region = sd.addr_to_region_ptr(beg_addr); + const RegionData* const end_region = sd.addr_to_region_ptr(end_addr); + const RegionData* cur_region; + for (cur_region = beg_region; cur_region < end_region; ++cur_region) { + HeapWord* const addr = cur_region->deferred_obj_addr(); if (addr != NULL) { if (start_array != NULL) { start_array->allocate_block(addr); @@ -3073,45 +2580,45 @@ PSParallelCompact::skip_live_words(HeapWord* beg, HeapWord* end, size_t count) HeapWord* PSParallelCompact::first_src_addr(HeapWord* const dest_addr, - size_t src_chunk_idx) + size_t src_region_idx) { ParMarkBitMap* const bitmap = mark_bitmap(); const ParallelCompactData& sd = summary_data(); - const size_t ChunkSize = ParallelCompactData::ChunkSize; + const size_t RegionSize = ParallelCompactData::RegionSize; - assert(sd.is_chunk_aligned(dest_addr), "not aligned"); + assert(sd.is_region_aligned(dest_addr), "not aligned"); - const ChunkData* const src_chunk_ptr = sd.chunk(src_chunk_idx); - const size_t partial_obj_size = src_chunk_ptr->partial_obj_size(); - HeapWord* const src_chunk_destination = src_chunk_ptr->destination(); + const RegionData* const src_region_ptr = sd.region(src_region_idx); + const size_t partial_obj_size = src_region_ptr->partial_obj_size(); + HeapWord* const src_region_destination = src_region_ptr->destination(); - assert(dest_addr >= src_chunk_destination, "wrong src chunk"); - assert(src_chunk_ptr->data_size() > 0, "src chunk cannot be empty"); + assert(dest_addr >= src_region_destination, "wrong src region"); + assert(src_region_ptr->data_size() > 0, "src region cannot be empty"); - HeapWord* const src_chunk_beg = sd.chunk_to_addr(src_chunk_idx); - HeapWord* const src_chunk_end = src_chunk_beg + ChunkSize; + HeapWord* const src_region_beg = sd.region_to_addr(src_region_idx); + HeapWord* const src_region_end = src_region_beg + RegionSize; - HeapWord* addr = src_chunk_beg; - if (dest_addr == src_chunk_destination) { - // Return the first live word in the source chunk. + HeapWord* addr = src_region_beg; + if (dest_addr == src_region_destination) { + // Return the first live word in the source region. if (partial_obj_size == 0) { - addr = bitmap->find_obj_beg(addr, src_chunk_end); - assert(addr < src_chunk_end, "no objects start in src chunk"); + addr = bitmap->find_obj_beg(addr, src_region_end); + assert(addr < src_region_end, "no objects start in src region"); } return addr; } // Must skip some live data. - size_t words_to_skip = dest_addr - src_chunk_destination; - assert(src_chunk_ptr->data_size() > words_to_skip, "wrong src chunk"); + size_t words_to_skip = dest_addr - src_region_destination; + assert(src_region_ptr->data_size() > words_to_skip, "wrong src region"); if (partial_obj_size >= words_to_skip) { // All the live words to skip are part of the partial object. addr += words_to_skip; if (partial_obj_size == words_to_skip) { // Find the first live word past the partial object. - addr = bitmap->find_obj_beg(addr, src_chunk_end); - assert(addr < src_chunk_end, "wrong src chunk"); + addr = bitmap->find_obj_beg(addr, src_region_end); + assert(addr < src_region_end, "wrong src region"); } return addr; } @@ -3122,63 +2629,64 @@ PSParallelCompact::first_src_addr(HeapWord* const dest_addr, addr += partial_obj_size; } - // Skip over live words due to objects that start in the chunk. - addr = skip_live_words(addr, src_chunk_end, words_to_skip); - assert(addr < src_chunk_end, "wrong src chunk"); + // Skip over live words due to objects that start in the region. + addr = skip_live_words(addr, src_region_end, words_to_skip); + assert(addr < src_region_end, "wrong src region"); return addr; } void PSParallelCompact::decrement_destination_counts(ParCompactionManager* cm, - size_t beg_chunk, + size_t beg_region, HeapWord* end_addr) { ParallelCompactData& sd = summary_data(); - ChunkData* const beg = sd.chunk(beg_chunk); - HeapWord* const end_addr_aligned_up = sd.chunk_align_up(end_addr); - ChunkData* const end = sd.addr_to_chunk_ptr(end_addr_aligned_up); - size_t cur_idx = beg_chunk; - for (ChunkData* cur = beg; cur < end; ++cur, ++cur_idx) { - assert(cur->data_size() > 0, "chunk must have live data"); + RegionData* const beg = sd.region(beg_region); + HeapWord* const end_addr_aligned_up = sd.region_align_up(end_addr); + RegionData* const end = sd.addr_to_region_ptr(end_addr_aligned_up); + size_t cur_idx = beg_region; + for (RegionData* cur = beg; cur < end; ++cur, ++cur_idx) { + assert(cur->data_size() > 0, "region must have live data"); cur->decrement_destination_count(); - if (cur_idx <= cur->source_chunk() && cur->available() && cur->claim()) { + if (cur_idx <= cur->source_region() && cur->available() && cur->claim()) { cm->save_for_processing(cur_idx); } } } -size_t PSParallelCompact::next_src_chunk(MoveAndUpdateClosure& closure, - SpaceId& src_space_id, - HeapWord*& src_space_top, - HeapWord* end_addr) +size_t PSParallelCompact::next_src_region(MoveAndUpdateClosure& closure, + SpaceId& src_space_id, + HeapWord*& src_space_top, + HeapWord* end_addr) { - typedef ParallelCompactData::ChunkData ChunkData; + typedef ParallelCompactData::RegionData RegionData; ParallelCompactData& sd = PSParallelCompact::summary_data(); - const size_t chunk_size = ParallelCompactData::ChunkSize; + const size_t region_size = ParallelCompactData::RegionSize; - size_t src_chunk_idx = 0; + size_t src_region_idx = 0; - // Skip empty chunks (if any) up to the top of the space. - HeapWord* const src_aligned_up = sd.chunk_align_up(end_addr); - ChunkData* src_chunk_ptr = sd.addr_to_chunk_ptr(src_aligned_up); - HeapWord* const top_aligned_up = sd.chunk_align_up(src_space_top); - const ChunkData* const top_chunk_ptr = sd.addr_to_chunk_ptr(top_aligned_up); - while (src_chunk_ptr < top_chunk_ptr && src_chunk_ptr->data_size() == 0) { - ++src_chunk_ptr; + // Skip empty regions (if any) up to the top of the space. + HeapWord* const src_aligned_up = sd.region_align_up(end_addr); + RegionData* src_region_ptr = sd.addr_to_region_ptr(src_aligned_up); + HeapWord* const top_aligned_up = sd.region_align_up(src_space_top); + const RegionData* const top_region_ptr = + sd.addr_to_region_ptr(top_aligned_up); + while (src_region_ptr < top_region_ptr && src_region_ptr->data_size() == 0) { + ++src_region_ptr; } - if (src_chunk_ptr < top_chunk_ptr) { - // The next source chunk is in the current space. Update src_chunk_idx and - // the source address to match src_chunk_ptr. - src_chunk_idx = sd.chunk(src_chunk_ptr); - HeapWord* const src_chunk_addr = sd.chunk_to_addr(src_chunk_idx); - if (src_chunk_addr > closure.source()) { - closure.set_source(src_chunk_addr); + if (src_region_ptr < top_region_ptr) { + // The next source region is in the current space. Update src_region_idx + // and the source address to match src_region_ptr. + src_region_idx = sd.region(src_region_ptr); + HeapWord* const src_region_addr = sd.region_to_addr(src_region_idx); + if (src_region_addr > closure.source()) { + closure.set_source(src_region_addr); } - return src_chunk_idx; + return src_region_idx; } - // Switch to a new source space and find the first non-empty chunk. + // Switch to a new source space and find the first non-empty region. unsigned int space_id = src_space_id + 1; assert(space_id < last_space_id, "not enough spaces"); @@ -3187,14 +2695,14 @@ size_t PSParallelCompact::next_src_chunk(MoveAndUpdateClosure& closure, do { MutableSpace* space = _space_info[space_id].space(); HeapWord* const bottom = space->bottom(); - const ChunkData* const bottom_cp = sd.addr_to_chunk_ptr(bottom); + const RegionData* const bottom_cp = sd.addr_to_region_ptr(bottom); // Iterate over the spaces that do not compact into themselves. if (bottom_cp->destination() != bottom) { - HeapWord* const top_aligned_up = sd.chunk_align_up(space->top()); - const ChunkData* const top_cp = sd.addr_to_chunk_ptr(top_aligned_up); + HeapWord* const top_aligned_up = sd.region_align_up(space->top()); + const RegionData* const top_cp = sd.addr_to_region_ptr(top_aligned_up); - for (const ChunkData* src_cp = bottom_cp; src_cp < top_cp; ++src_cp) { + for (const RegionData* src_cp = bottom_cp; src_cp < top_cp; ++src_cp) { if (src_cp->live_obj_size() > 0) { // Found it. assert(src_cp->destination() == destination, @@ -3204,9 +2712,9 @@ size_t PSParallelCompact::next_src_chunk(MoveAndUpdateClosure& closure, src_space_id = SpaceId(space_id); src_space_top = space->top(); - const size_t src_chunk_idx = sd.chunk(src_cp); - closure.set_source(sd.chunk_to_addr(src_chunk_idx)); - return src_chunk_idx; + const size_t src_region_idx = sd.region(src_cp); + closure.set_source(sd.region_to_addr(src_region_idx)); + return src_region_idx; } else { assert(src_cp->data_size() == 0, "sanity"); } @@ -3214,38 +2722,38 @@ size_t PSParallelCompact::next_src_chunk(MoveAndUpdateClosure& closure, } } while (++space_id < last_space_id); - assert(false, "no source chunk was found"); + assert(false, "no source region was found"); return 0; } -void PSParallelCompact::fill_chunk(ParCompactionManager* cm, size_t chunk_idx) +void PSParallelCompact::fill_region(ParCompactionManager* cm, size_t region_idx) { typedef ParMarkBitMap::IterationStatus IterationStatus; - const size_t ChunkSize = ParallelCompactData::ChunkSize; + const size_t RegionSize = ParallelCompactData::RegionSize; ParMarkBitMap* const bitmap = mark_bitmap(); ParallelCompactData& sd = summary_data(); - ChunkData* const chunk_ptr = sd.chunk(chunk_idx); + RegionData* const region_ptr = sd.region(region_idx); // Get the items needed to construct the closure. - HeapWord* dest_addr = sd.chunk_to_addr(chunk_idx); + HeapWord* dest_addr = sd.region_to_addr(region_idx); SpaceId dest_space_id = space_id(dest_addr); ObjectStartArray* start_array = _space_info[dest_space_id].start_array(); HeapWord* new_top = _space_info[dest_space_id].new_top(); assert(dest_addr < new_top, "sanity"); - const size_t words = MIN2(pointer_delta(new_top, dest_addr), ChunkSize); + const size_t words = MIN2(pointer_delta(new_top, dest_addr), RegionSize); - // Get the source chunk and related info. - size_t src_chunk_idx = chunk_ptr->source_chunk(); - SpaceId src_space_id = space_id(sd.chunk_to_addr(src_chunk_idx)); + // Get the source region and related info. + size_t src_region_idx = region_ptr->source_region(); + SpaceId src_space_id = space_id(sd.region_to_addr(src_region_idx)); HeapWord* src_space_top = _space_info[src_space_id].space()->top(); MoveAndUpdateClosure closure(bitmap, cm, start_array, dest_addr, words); - closure.set_source(first_src_addr(dest_addr, src_chunk_idx)); + closure.set_source(first_src_addr(dest_addr, src_region_idx)); - // Adjust src_chunk_idx to prepare for decrementing destination counts (the - // destination count is not decremented when a chunk is copied to itself). - if (src_chunk_idx == chunk_idx) { - src_chunk_idx += 1; + // Adjust src_region_idx to prepare for decrementing destination counts (the + // destination count is not decremented when a region is copied to itself). + if (src_region_idx == region_idx) { + src_region_idx += 1; } if (bitmap->is_unmarked(closure.source())) { @@ -3255,32 +2763,33 @@ void PSParallelCompact::fill_chunk(ParCompactionManager* cm, size_t chunk_idx) HeapWord* const old_src_addr = closure.source(); closure.copy_partial_obj(); if (closure.is_full()) { - decrement_destination_counts(cm, src_chunk_idx, closure.source()); - chunk_ptr->set_deferred_obj_addr(NULL); - chunk_ptr->set_completed(); + decrement_destination_counts(cm, src_region_idx, closure.source()); + region_ptr->set_deferred_obj_addr(NULL); + region_ptr->set_completed(); return; } - HeapWord* const end_addr = sd.chunk_align_down(closure.source()); - if (sd.chunk_align_down(old_src_addr) != end_addr) { - // The partial object was copied from more than one source chunk. - decrement_destination_counts(cm, src_chunk_idx, end_addr); + HeapWord* const end_addr = sd.region_align_down(closure.source()); + if (sd.region_align_down(old_src_addr) != end_addr) { + // The partial object was copied from more than one source region. + decrement_destination_counts(cm, src_region_idx, end_addr); - // Move to the next source chunk, possibly switching spaces as well. All + // Move to the next source region, possibly switching spaces as well. All // args except end_addr may be modified. - src_chunk_idx = next_src_chunk(closure, src_space_id, src_space_top, - end_addr); + src_region_idx = next_src_region(closure, src_space_id, src_space_top, + end_addr); } } do { HeapWord* const cur_addr = closure.source(); - HeapWord* const end_addr = MIN2(sd.chunk_align_up(cur_addr + 1), + HeapWord* const end_addr = MIN2(sd.region_align_up(cur_addr + 1), src_space_top); IterationStatus status = bitmap->iterate(&closure, cur_addr, end_addr); if (status == ParMarkBitMap::incomplete) { - // The last obj that starts in the source chunk does not end in the chunk. + // The last obj that starts in the source region does not end in the + // region. assert(closure.source() < end_addr, "sanity") HeapWord* const obj_beg = closure.source(); HeapWord* const range_end = MIN2(obj_beg + closure.words_remaining(), @@ -3299,28 +2808,28 @@ void PSParallelCompact::fill_chunk(ParCompactionManager* cm, size_t chunk_idx) if (status == ParMarkBitMap::would_overflow) { // The last object did not fit. Note that interior oop updates were - // deferred, then copy enough of the object to fill the chunk. - chunk_ptr->set_deferred_obj_addr(closure.destination()); + // deferred, then copy enough of the object to fill the region. + region_ptr->set_deferred_obj_addr(closure.destination()); status = closure.copy_until_full(); // copies from closure.source() - decrement_destination_counts(cm, src_chunk_idx, closure.source()); - chunk_ptr->set_completed(); + decrement_destination_counts(cm, src_region_idx, closure.source()); + region_ptr->set_completed(); return; } if (status == ParMarkBitMap::full) { - decrement_destination_counts(cm, src_chunk_idx, closure.source()); - chunk_ptr->set_deferred_obj_addr(NULL); - chunk_ptr->set_completed(); + decrement_destination_counts(cm, src_region_idx, closure.source()); + region_ptr->set_deferred_obj_addr(NULL); + region_ptr->set_completed(); return; } - decrement_destination_counts(cm, src_chunk_idx, end_addr); + decrement_destination_counts(cm, src_region_idx, end_addr); - // Move to the next source chunk, possibly switching spaces as well. All + // Move to the next source region, possibly switching spaces as well. All // args except end_addr may be modified. - src_chunk_idx = next_src_chunk(closure, src_space_id, src_space_top, - end_addr); + src_region_idx = next_src_region(closure, src_space_id, src_space_top, + end_addr); } while (true); } @@ -3352,15 +2861,15 @@ PSParallelCompact::move_and_update(ParCompactionManager* cm, SpaceId space_id) { } #endif - const size_t beg_chunk = sd.addr_to_chunk_idx(beg_addr); - const size_t dp_chunk = sd.addr_to_chunk_idx(dp_addr); - if (beg_chunk < dp_chunk) { - update_and_deadwood_in_dense_prefix(cm, space_id, beg_chunk, dp_chunk); + const size_t beg_region = sd.addr_to_region_idx(beg_addr); + const size_t dp_region = sd.addr_to_region_idx(dp_addr); + if (beg_region < dp_region) { + update_and_deadwood_in_dense_prefix(cm, space_id, beg_region, dp_region); } - // The destination of the first live object that starts in the chunk is one - // past the end of the partial object entering the chunk (if any). - HeapWord* const dest_addr = sd.partial_obj_end(dp_chunk); + // The destination of the first live object that starts in the region is one + // past the end of the partial object entering the region (if any). + HeapWord* const dest_addr = sd.partial_obj_end(dp_region); HeapWord* const new_top = _space_info[space_id].new_top(); assert(new_top >= dest_addr, "bad new_top value"); const size_t words = pointer_delta(new_top, dest_addr); @@ -3469,172 +2978,6 @@ UpdateOnlyClosure::do_addr(HeapWord* addr, size_t words) { return ParMarkBitMap::incomplete; } -BitBlockUpdateClosure::BitBlockUpdateClosure(ParMarkBitMap* mbm, - ParCompactionManager* cm, - size_t chunk_index) : - ParMarkBitMapClosure(mbm, cm), - _live_data_left(0), - _cur_block(0) { - _chunk_start = - PSParallelCompact::summary_data().chunk_to_addr(chunk_index); - _chunk_end = - PSParallelCompact::summary_data().chunk_to_addr(chunk_index) + - ParallelCompactData::ChunkSize; - _chunk_index = chunk_index; - _cur_block = - PSParallelCompact::summary_data().addr_to_block_idx(_chunk_start); -} - -bool BitBlockUpdateClosure::chunk_contains_cur_block() { - return ParallelCompactData::chunk_contains_block(_chunk_index, _cur_block); -} - -void BitBlockUpdateClosure::reset_chunk(size_t chunk_index) { - DEBUG_ONLY(ParallelCompactData::BlockData::set_cur_phase(7);) - ParallelCompactData& sd = PSParallelCompact::summary_data(); - _chunk_index = chunk_index; - _live_data_left = 0; - _chunk_start = sd.chunk_to_addr(chunk_index); - _chunk_end = sd.chunk_to_addr(chunk_index) + ParallelCompactData::ChunkSize; - - // The first block in this chunk - size_t first_block = sd.addr_to_block_idx(_chunk_start); - size_t partial_live_size = sd.chunk(chunk_index)->partial_obj_size(); - - // Set the offset to 0. By definition it should have that value - // but it may have been written while processing an earlier chunk. - if (partial_live_size == 0) { - // No live object extends onto the chunk. The first bit - // in the bit map for the first chunk must be a start bit. - // Although there may not be any marked bits, it is safe - // to set it as a start bit. - sd.block(first_block)->set_start_bit_offset(0); - sd.block(first_block)->set_first_is_start_bit(true); - } else if (sd.partial_obj_ends_in_block(first_block)) { - sd.block(first_block)->set_end_bit_offset(0); - sd.block(first_block)->set_first_is_start_bit(false); - } else { - // The partial object extends beyond the first block. - // There is no object starting in the first block - // so the offset and bit parity are not needed. - // Set the the bit parity to start bit so assertions - // work when not bit is found. - sd.block(first_block)->set_end_bit_offset(0); - sd.block(first_block)->set_first_is_start_bit(false); - } - _cur_block = first_block; -#ifdef ASSERT - if (sd.block(first_block)->first_is_start_bit()) { - assert(!sd.partial_obj_ends_in_block(first_block), - "Partial object cannot end in first block"); - } - - if (PrintGCDetails && Verbose) { - if (partial_live_size == 1) { - gclog_or_tty->print_cr("first_block " PTR_FORMAT - " _offset " PTR_FORMAT - " _first_is_start_bit %d", - first_block, - sd.block(first_block)->raw_offset(), - sd.block(first_block)->first_is_start_bit()); - } - } -#endif - DEBUG_ONLY(ParallelCompactData::BlockData::set_cur_phase(17);) -} - -// This method is called when a object has been found (both beginning -// and end of the object) in the range of iteration. This method is -// calculating the words of live data to the left of a block. That live -// data includes any object starting to the left of the block (i.e., -// the live-data-to-the-left of block AAA will include the full size -// of any object entering AAA). - -ParMarkBitMapClosure::IterationStatus -BitBlockUpdateClosure::do_addr(HeapWord* addr, size_t words) { - // add the size to the block data. - HeapWord* obj = addr; - ParallelCompactData& sd = PSParallelCompact::summary_data(); - - assert(bitmap()->obj_size(obj) == words, "bad size"); - assert(_chunk_start <= obj, "object is not in chunk"); - assert(obj + words <= _chunk_end, "object is not in chunk"); - - // Update the live data to the left - size_t prev_live_data_left = _live_data_left; - _live_data_left = _live_data_left + words; - - // Is this object in the current block. - size_t block_of_obj = sd.addr_to_block_idx(obj); - size_t block_of_obj_last = sd.addr_to_block_idx(obj + words - 1); - HeapWord* block_of_obj_last_addr = sd.block_to_addr(block_of_obj_last); - if (_cur_block < block_of_obj) { - - // - // No object crossed the block boundary and this object was found - // on the other side of the block boundary. Update the offset for - // the new block with the data size that does not include this object. - // - // The first bit in block_of_obj is a start bit except in the - // case where the partial object for the chunk extends into - // this block. - if (sd.partial_obj_ends_in_block(block_of_obj)) { - sd.block(block_of_obj)->set_end_bit_offset(prev_live_data_left); - } else { - sd.block(block_of_obj)->set_start_bit_offset(prev_live_data_left); - } - - // Does this object pass beyond the its block? - if (block_of_obj < block_of_obj_last) { - // Object crosses block boundary. Two blocks need to be udpated: - // the current block where the object started - // the block where the object ends - // - // The offset for blocks with no objects starting in them - // (e.g., blocks between _cur_block and block_of_obj_last) - // should not be needed. - // Note that block_of_obj_last may be in another chunk. If so, - // it should be overwritten later. This is a problem (writting - // into a block in a later chunk) for parallel execution. - assert(obj < block_of_obj_last_addr, - "Object should start in previous block"); - - // obj is crossing into block_of_obj_last so the first bit - // is and end bit. - sd.block(block_of_obj_last)->set_end_bit_offset(_live_data_left); - - _cur_block = block_of_obj_last; - } else { - // _first_is_start_bit has already been set correctly - // in the if-then-else above so don't reset it here. - _cur_block = block_of_obj; - } - } else { - // The current block only changes if the object extends beyound - // the block it starts in. - // - // The object starts in the current block. - // Does this object pass beyond the end of it? - if (block_of_obj < block_of_obj_last) { - // Object crosses block boundary. - // See note above on possible blocks between block_of_obj and - // block_of_obj_last - assert(obj < block_of_obj_last_addr, - "Object should start in previous block"); - - sd.block(block_of_obj_last)->set_end_bit_offset(_live_data_left); - - _cur_block = block_of_obj_last; - } - } - - // Return incomplete if there are more blocks to be done. - if (chunk_contains_cur_block()) { - return ParMarkBitMap::incomplete; - } - return ParMarkBitMap::complete; -} - // Verify the new location using the forwarding pointer // from MarkSweep::mark_sweep_phase2(). Set the mark_word // to the initial value. @@ -3707,12 +3050,3 @@ PSParallelCompact::next_compaction_space_id(SpaceId id) { return last_space_id; } } - -// Here temporarily for debugging -#ifdef ASSERT - size_t ParallelCompactData::block_idx(BlockData* block) { - size_t index = pointer_delta(block, - PSParallelCompact::summary_data()._block_data, sizeof(BlockData)); - return index; - } -#endif diff --git a/hotspot/src/share/vm/gc_implementation/parallelScavenge/psParallelCompact.hpp b/hotspot/src/share/vm/gc_implementation/parallelScavenge/psParallelCompact.hpp index c3f1619db75..7ca899956f4 100644 --- a/hotspot/src/share/vm/gc_implementation/parallelScavenge/psParallelCompact.hpp +++ b/hotspot/src/share/vm/gc_implementation/parallelScavenge/psParallelCompact.hpp @@ -76,87 +76,80 @@ class ParallelCompactData { public: // Sizes are in HeapWords, unless indicated otherwise. - static const size_t Log2ChunkSize; - static const size_t ChunkSize; - static const size_t ChunkSizeBytes; + static const size_t Log2RegionSize; + static const size_t RegionSize; + static const size_t RegionSizeBytes; - // Mask for the bits in a size_t to get an offset within a chunk. - static const size_t ChunkSizeOffsetMask; - // Mask for the bits in a pointer to get an offset within a chunk. - static const size_t ChunkAddrOffsetMask; - // Mask for the bits in a pointer to get the address of the start of a chunk. - static const size_t ChunkAddrMask; + // Mask for the bits in a size_t to get an offset within a region. + static const size_t RegionSizeOffsetMask; + // Mask for the bits in a pointer to get an offset within a region. + static const size_t RegionAddrOffsetMask; + // Mask for the bits in a pointer to get the address of the start of a region. + static const size_t RegionAddrMask; - static const size_t Log2BlockSize; - static const size_t BlockSize; - static const size_t BlockOffsetMask; - static const size_t BlockMask; - - static const size_t BlocksPerChunk; - - class ChunkData + class RegionData { public: - // Destination address of the chunk. + // Destination address of the region. HeapWord* destination() const { return _destination; } - // The first chunk containing data destined for this chunk. - size_t source_chunk() const { return _source_chunk; } + // The first region containing data destined for this region. + size_t source_region() const { return _source_region; } - // The object (if any) starting in this chunk and ending in a different - // chunk that could not be updated during the main (parallel) compaction + // The object (if any) starting in this region and ending in a different + // region that could not be updated during the main (parallel) compaction // phase. This is different from _partial_obj_addr, which is an object that - // extends onto a source chunk. However, the two uses do not overlap in + // extends onto a source region. However, the two uses do not overlap in // time, so the same field is used to save space. HeapWord* deferred_obj_addr() const { return _partial_obj_addr; } - // The starting address of the partial object extending onto the chunk. + // The starting address of the partial object extending onto the region. HeapWord* partial_obj_addr() const { return _partial_obj_addr; } - // Size of the partial object extending onto the chunk (words). + // Size of the partial object extending onto the region (words). size_t partial_obj_size() const { return _partial_obj_size; } - // Size of live data that lies within this chunk due to objects that start - // in this chunk (words). This does not include the partial object - // extending onto the chunk (if any), or the part of an object that extends - // onto the next chunk (if any). + // Size of live data that lies within this region due to objects that start + // in this region (words). This does not include the partial object + // extending onto the region (if any), or the part of an object that extends + // onto the next region (if any). size_t live_obj_size() const { return _dc_and_los & los_mask; } - // Total live data that lies within the chunk (words). + // Total live data that lies within the region (words). size_t data_size() const { return partial_obj_size() + live_obj_size(); } - // The destination_count is the number of other chunks to which data from - // this chunk will be copied. At the end of the summary phase, the valid + // The destination_count is the number of other regions to which data from + // this region will be copied. At the end of the summary phase, the valid // values of destination_count are // - // 0 - data from the chunk will be compacted completely into itself, or the - // chunk is empty. The chunk can be claimed and then filled. - // 1 - data from the chunk will be compacted into 1 other chunk; some - // data from the chunk may also be compacted into the chunk itself. - // 2 - data from the chunk will be copied to 2 other chunks. + // 0 - data from the region will be compacted completely into itself, or the + // region is empty. The region can be claimed and then filled. + // 1 - data from the region will be compacted into 1 other region; some + // data from the region may also be compacted into the region itself. + // 2 - data from the region will be copied to 2 other regions. // - // During compaction as chunks are emptied, the destination_count is + // During compaction as regions are emptied, the destination_count is // decremented (atomically) and when it reaches 0, it can be claimed and // then filled. // - // A chunk is claimed for processing by atomically changing the - // destination_count to the claimed value (dc_claimed). After a chunk has + // A region is claimed for processing by atomically changing the + // destination_count to the claimed value (dc_claimed). After a region has // been filled, the destination_count should be set to the completed value // (dc_completed). inline uint destination_count() const; inline uint destination_count_raw() const; - // The location of the java heap data that corresponds to this chunk. + // The location of the java heap data that corresponds to this region. inline HeapWord* data_location() const; - // The highest address referenced by objects in this chunk. + // The highest address referenced by objects in this region. inline HeapWord* highest_ref() const; - // Whether this chunk is available to be claimed, has been claimed, or has + // Whether this region is available to be claimed, has been claimed, or has // been completed. // - // Minor subtlety: claimed() returns true if the chunk is marked - // completed(), which is desirable since a chunk must be claimed before it + // Minor subtlety: claimed() returns true if the region is marked + // completed(), which is desirable since a region must be claimed before it // can be completed. bool available() const { return _dc_and_los < dc_one; } bool claimed() const { return _dc_and_los >= dc_claimed; } @@ -164,11 +157,11 @@ public: // These are not atomic. void set_destination(HeapWord* addr) { _destination = addr; } - void set_source_chunk(size_t chunk) { _source_chunk = chunk; } + void set_source_region(size_t region) { _source_region = region; } void set_deferred_obj_addr(HeapWord* addr) { _partial_obj_addr = addr; } void set_partial_obj_addr(HeapWord* addr) { _partial_obj_addr = addr; } void set_partial_obj_size(size_t words) { - _partial_obj_size = (chunk_sz_t) words; + _partial_obj_size = (region_sz_t) words; } inline void set_destination_count(uint count); @@ -184,129 +177,57 @@ public: inline bool claim(); private: - // The type used to represent object sizes within a chunk. - typedef uint chunk_sz_t; + // The type used to represent object sizes within a region. + typedef uint region_sz_t; // Constants for manipulating the _dc_and_los field, which holds both the // destination count and live obj size. The live obj size lives at the // least significant end so no masking is necessary when adding. - static const chunk_sz_t dc_shift; // Shift amount. - static const chunk_sz_t dc_mask; // Mask for destination count. - static const chunk_sz_t dc_one; // 1, shifted appropriately. - static const chunk_sz_t dc_claimed; // Chunk has been claimed. - static const chunk_sz_t dc_completed; // Chunk has been completed. - static const chunk_sz_t los_mask; // Mask for live obj size. + static const region_sz_t dc_shift; // Shift amount. + static const region_sz_t dc_mask; // Mask for destination count. + static const region_sz_t dc_one; // 1, shifted appropriately. + static const region_sz_t dc_claimed; // Region has been claimed. + static const region_sz_t dc_completed; // Region has been completed. + static const region_sz_t los_mask; // Mask for live obj size. - HeapWord* _destination; - size_t _source_chunk; - HeapWord* _partial_obj_addr; - chunk_sz_t _partial_obj_size; - chunk_sz_t volatile _dc_and_los; + HeapWord* _destination; + size_t _source_region; + HeapWord* _partial_obj_addr; + region_sz_t _partial_obj_size; + region_sz_t volatile _dc_and_los; #ifdef ASSERT // These enable optimizations that are only partially implemented. Use // debug builds to prevent the code fragments from breaking. - HeapWord* _data_location; - HeapWord* _highest_ref; + HeapWord* _data_location; + HeapWord* _highest_ref; #endif // #ifdef ASSERT #ifdef ASSERT public: - uint _pushed; // 0 until chunk is pushed onto a worker's stack + uint _pushed; // 0 until region is pushed onto a worker's stack private: #endif }; - // 'Blocks' allow shorter sections of the bitmap to be searched. Each Block - // holds an offset, which is the amount of live data in the Chunk to the left - // of the first live object in the Block. This amount of live data will - // include any object extending into the block. The first block in - // a chunk does not include any partial object extending into the - // the chunk. - // - // The offset also encodes the - // 'parity' of the first 1 bit in the Block: a positive offset means the - // first 1 bit marks the start of an object, a negative offset means the first - // 1 bit marks the end of an object. - class BlockData - { - public: - typedef short int blk_ofs_t; - - blk_ofs_t offset() const { return _offset >= 0 ? _offset : -_offset; } - blk_ofs_t raw_offset() const { return _offset; } - void set_first_is_start_bit(bool v) { _first_is_start_bit = v; } - -#if 0 - // The need for this method was anticipated but it is - // never actually used. Do not include it for now. If - // it is needed, consider the problem of what is passed - // as "v". To avoid warning errors the method set_start_bit_offset() - // was changed to take a size_t as the parameter and to do the - // check for the possible overflow. Doing the cast in these - // methods better limits the potential problems because of - // the size of the field to this class. - void set_raw_offset(blk_ofs_t v) { _offset = v; } -#endif - void set_start_bit_offset(size_t val) { - assert(val >= 0, "sanity"); - _offset = (blk_ofs_t) val; - assert(val == (size_t) _offset, "Value is too large"); - _first_is_start_bit = true; - } - void set_end_bit_offset(size_t val) { - assert(val >= 0, "sanity"); - _offset = (blk_ofs_t) val; - assert(val == (size_t) _offset, "Value is too large"); - _offset = - _offset; - _first_is_start_bit = false; - } - bool first_is_start_bit() { - assert(_set_phase > 0, "Not initialized"); - return _first_is_start_bit; - } - bool first_is_end_bit() { - assert(_set_phase > 0, "Not initialized"); - return !_first_is_start_bit; - } - - private: - blk_ofs_t _offset; - // This is temporary until the mark_bitmap is separated into - // a start bit array and an end bit array. - bool _first_is_start_bit; -#ifdef ASSERT - short _set_phase; - static short _cur_phase; - public: - static void set_cur_phase(short v) { _cur_phase = v; } -#endif - }; - public: ParallelCompactData(); bool initialize(MemRegion covered_region); - size_t chunk_count() const { return _chunk_count; } + size_t region_count() const { return _region_count; } - // Convert chunk indices to/from ChunkData pointers. - inline ChunkData* chunk(size_t chunk_idx) const; - inline size_t chunk(const ChunkData* const chunk_ptr) const; + // Convert region indices to/from RegionData pointers. + inline RegionData* region(size_t region_idx) const; + inline size_t region(const RegionData* const region_ptr) const; - // Returns true if the given address is contained within the chunk - bool chunk_contains(size_t chunk_index, HeapWord* addr); - - size_t block_count() const { return _block_count; } - inline BlockData* block(size_t n) const; - - // Returns true if the given block is in the given chunk. - static bool chunk_contains_block(size_t chunk_index, size_t block_index); + // Returns true if the given address is contained within the region + bool region_contains(size_t region_index, HeapWord* addr); void add_obj(HeapWord* addr, size_t len); void add_obj(oop p, size_t len) { add_obj((HeapWord*)p, len); } - // Fill in the chunks covering [beg, end) so that no data moves; i.e., the - // destination of chunk n is simply the start of chunk n. The argument beg - // must be chunk-aligned; end need not be. + // Fill in the regions covering [beg, end) so that no data moves; i.e., the + // destination of region n is simply the start of region n. The argument beg + // must be region-aligned; end need not be. void summarize_dense_prefix(HeapWord* beg, HeapWord* end); bool summarize(HeapWord* target_beg, HeapWord* target_end, @@ -314,48 +235,33 @@ public: HeapWord** target_next, HeapWord** source_next = 0); void clear(); - void clear_range(size_t beg_chunk, size_t end_chunk); + void clear_range(size_t beg_region, size_t end_region); void clear_range(HeapWord* beg, HeapWord* end) { - clear_range(addr_to_chunk_idx(beg), addr_to_chunk_idx(end)); + clear_range(addr_to_region_idx(beg), addr_to_region_idx(end)); } - // Return the number of words between addr and the start of the chunk + // Return the number of words between addr and the start of the region // containing addr. - inline size_t chunk_offset(const HeapWord* addr) const; + inline size_t region_offset(const HeapWord* addr) const; - // Convert addresses to/from a chunk index or chunk pointer. - inline size_t addr_to_chunk_idx(const HeapWord* addr) const; - inline ChunkData* addr_to_chunk_ptr(const HeapWord* addr) const; - inline HeapWord* chunk_to_addr(size_t chunk) const; - inline HeapWord* chunk_to_addr(size_t chunk, size_t offset) const; - inline HeapWord* chunk_to_addr(const ChunkData* chunk) const; + // Convert addresses to/from a region index or region pointer. + inline size_t addr_to_region_idx(const HeapWord* addr) const; + inline RegionData* addr_to_region_ptr(const HeapWord* addr) const; + inline HeapWord* region_to_addr(size_t region) const; + inline HeapWord* region_to_addr(size_t region, size_t offset) const; + inline HeapWord* region_to_addr(const RegionData* region) const; - inline HeapWord* chunk_align_down(HeapWord* addr) const; - inline HeapWord* chunk_align_up(HeapWord* addr) const; - inline bool is_chunk_aligned(HeapWord* addr) const; - - // Analogous to chunk_offset() for blocks. - size_t block_offset(const HeapWord* addr) const; - size_t addr_to_block_idx(const HeapWord* addr) const; - size_t addr_to_block_idx(const oop obj) const { - return addr_to_block_idx((HeapWord*) obj); - } - inline BlockData* addr_to_block_ptr(const HeapWord* addr) const; - inline HeapWord* block_to_addr(size_t block) const; + inline HeapWord* region_align_down(HeapWord* addr) const; + inline HeapWord* region_align_up(HeapWord* addr) const; + inline bool is_region_aligned(HeapWord* addr) const; // Return the address one past the end of the partial object. - HeapWord* partial_obj_end(size_t chunk_idx) const; + HeapWord* partial_obj_end(size_t region_idx) const; // Return the new location of the object p after the // the compaction. HeapWord* calc_new_pointer(HeapWord* addr); - // Same as calc_new_pointer() using blocks. - HeapWord* block_calc_new_pointer(HeapWord* addr); - - // Same as calc_new_pointer() using chunks. - HeapWord* chunk_calc_new_pointer(HeapWord* addr); - HeapWord* calc_new_pointer(oop p) { return calc_new_pointer((HeapWord*) p); } @@ -363,22 +269,13 @@ public: // Return the updated address for the given klass klassOop calc_new_klass(klassOop); - // Given a block returns true if the partial object for the - // corresponding chunk ends in the block. Returns false, otherwise - // If there is no partial object, returns false. - bool partial_obj_ends_in_block(size_t block_index); - - // Returns the block index for the block - static size_t block_idx(BlockData* block); - #ifdef ASSERT void verify_clear(const PSVirtualSpace* vspace); void verify_clear(); #endif // #ifdef ASSERT private: - bool initialize_block_data(size_t region_size); - bool initialize_chunk_data(size_t region_size); + bool initialize_region_data(size_t region_size); PSVirtualSpace* create_vspace(size_t count, size_t element_size); private: @@ -387,74 +284,70 @@ private: HeapWord* _region_end; #endif // #ifdef ASSERT - PSVirtualSpace* _chunk_vspace; - ChunkData* _chunk_data; - size_t _chunk_count; - - PSVirtualSpace* _block_vspace; - BlockData* _block_data; - size_t _block_count; + PSVirtualSpace* _region_vspace; + RegionData* _region_data; + size_t _region_count; }; inline uint -ParallelCompactData::ChunkData::destination_count_raw() const +ParallelCompactData::RegionData::destination_count_raw() const { return _dc_and_los & dc_mask; } inline uint -ParallelCompactData::ChunkData::destination_count() const +ParallelCompactData::RegionData::destination_count() const { return destination_count_raw() >> dc_shift; } inline void -ParallelCompactData::ChunkData::set_destination_count(uint count) +ParallelCompactData::RegionData::set_destination_count(uint count) { assert(count <= (dc_completed >> dc_shift), "count too large"); - const chunk_sz_t live_sz = (chunk_sz_t) live_obj_size(); + const region_sz_t live_sz = (region_sz_t) live_obj_size(); _dc_and_los = (count << dc_shift) | live_sz; } -inline void ParallelCompactData::ChunkData::set_live_obj_size(size_t words) +inline void ParallelCompactData::RegionData::set_live_obj_size(size_t words) { assert(words <= los_mask, "would overflow"); - _dc_and_los = destination_count_raw() | (chunk_sz_t)words; + _dc_and_los = destination_count_raw() | (region_sz_t)words; } -inline void ParallelCompactData::ChunkData::decrement_destination_count() +inline void ParallelCompactData::RegionData::decrement_destination_count() { assert(_dc_and_los < dc_claimed, "already claimed"); assert(_dc_and_los >= dc_one, "count would go negative"); Atomic::add((int)dc_mask, (volatile int*)&_dc_and_los); } -inline HeapWord* ParallelCompactData::ChunkData::data_location() const +inline HeapWord* ParallelCompactData::RegionData::data_location() const { DEBUG_ONLY(return _data_location;) NOT_DEBUG(return NULL;) } -inline HeapWord* ParallelCompactData::ChunkData::highest_ref() const +inline HeapWord* ParallelCompactData::RegionData::highest_ref() const { DEBUG_ONLY(return _highest_ref;) NOT_DEBUG(return NULL;) } -inline void ParallelCompactData::ChunkData::set_data_location(HeapWord* addr) +inline void ParallelCompactData::RegionData::set_data_location(HeapWord* addr) { DEBUG_ONLY(_data_location = addr;) } -inline void ParallelCompactData::ChunkData::set_completed() +inline void ParallelCompactData::RegionData::set_completed() { assert(claimed(), "must be claimed first"); - _dc_and_los = dc_completed | (chunk_sz_t) live_obj_size(); + _dc_and_los = dc_completed | (region_sz_t) live_obj_size(); } -// MT-unsafe claiming of a chunk. Should only be used during single threaded +// MT-unsafe claiming of a region. Should only be used during single threaded // execution. -inline bool ParallelCompactData::ChunkData::claim_unsafe() +inline bool ParallelCompactData::RegionData::claim_unsafe() { if (available()) { _dc_and_los |= dc_claimed; @@ -463,13 +356,13 @@ inline bool ParallelCompactData::ChunkData::claim_unsafe() return false; } -inline void ParallelCompactData::ChunkData::add_live_obj(size_t words) +inline void ParallelCompactData::RegionData::add_live_obj(size_t words) { assert(words <= (size_t)los_mask - live_obj_size(), "overflow"); Atomic::add((int) words, (volatile int*) &_dc_and_los); } -inline void ParallelCompactData::ChunkData::set_highest_ref(HeapWord* addr) +inline void ParallelCompactData::RegionData::set_highest_ref(HeapWord* addr) { #ifdef ASSERT HeapWord* tmp = _highest_ref; @@ -479,7 +372,7 @@ inline void ParallelCompactData::ChunkData::set_highest_ref(HeapWord* addr) #endif // #ifdef ASSERT } -inline bool ParallelCompactData::ChunkData::claim() +inline bool ParallelCompactData::RegionData::claim() { const int los = (int) live_obj_size(); const int old = Atomic::cmpxchg(dc_claimed | los, @@ -487,119 +380,85 @@ inline bool ParallelCompactData::ChunkData::claim() return old == los; } -inline ParallelCompactData::ChunkData* -ParallelCompactData::chunk(size_t chunk_idx) const +inline ParallelCompactData::RegionData* +ParallelCompactData::region(size_t region_idx) const { - assert(chunk_idx <= chunk_count(), "bad arg"); - return _chunk_data + chunk_idx; + assert(region_idx <= region_count(), "bad arg"); + return _region_data + region_idx; } inline size_t -ParallelCompactData::chunk(const ChunkData* const chunk_ptr) const +ParallelCompactData::region(const RegionData* const region_ptr) const { - assert(chunk_ptr >= _chunk_data, "bad arg"); - assert(chunk_ptr <= _chunk_data + chunk_count(), "bad arg"); - return pointer_delta(chunk_ptr, _chunk_data, sizeof(ChunkData)); -} - -inline ParallelCompactData::BlockData* -ParallelCompactData::block(size_t n) const { - assert(n < block_count(), "bad arg"); - return _block_data + n; + assert(region_ptr >= _region_data, "bad arg"); + assert(region_ptr <= _region_data + region_count(), "bad arg"); + return pointer_delta(region_ptr, _region_data, sizeof(RegionData)); } inline size_t -ParallelCompactData::chunk_offset(const HeapWord* addr) const +ParallelCompactData::region_offset(const HeapWord* addr) const { assert(addr >= _region_start, "bad addr"); assert(addr <= _region_end, "bad addr"); - return (size_t(addr) & ChunkAddrOffsetMask) >> LogHeapWordSize; + return (size_t(addr) & RegionAddrOffsetMask) >> LogHeapWordSize; } inline size_t -ParallelCompactData::addr_to_chunk_idx(const HeapWord* addr) const +ParallelCompactData::addr_to_region_idx(const HeapWord* addr) const { assert(addr >= _region_start, "bad addr"); assert(addr <= _region_end, "bad addr"); - return pointer_delta(addr, _region_start) >> Log2ChunkSize; + return pointer_delta(addr, _region_start) >> Log2RegionSize; } -inline ParallelCompactData::ChunkData* -ParallelCompactData::addr_to_chunk_ptr(const HeapWord* addr) const +inline ParallelCompactData::RegionData* +ParallelCompactData::addr_to_region_ptr(const HeapWord* addr) const { - return chunk(addr_to_chunk_idx(addr)); + return region(addr_to_region_idx(addr)); } inline HeapWord* -ParallelCompactData::chunk_to_addr(size_t chunk) const +ParallelCompactData::region_to_addr(size_t region) const { - assert(chunk <= _chunk_count, "chunk out of range"); - return _region_start + (chunk << Log2ChunkSize); + assert(region <= _region_count, "region out of range"); + return _region_start + (region << Log2RegionSize); } inline HeapWord* -ParallelCompactData::chunk_to_addr(const ChunkData* chunk) const +ParallelCompactData::region_to_addr(const RegionData* region) const { - return chunk_to_addr(pointer_delta(chunk, _chunk_data, sizeof(ChunkData))); + return region_to_addr(pointer_delta(region, _region_data, + sizeof(RegionData))); } inline HeapWord* -ParallelCompactData::chunk_to_addr(size_t chunk, size_t offset) const +ParallelCompactData::region_to_addr(size_t region, size_t offset) const { - assert(chunk <= _chunk_count, "chunk out of range"); - assert(offset < ChunkSize, "offset too big"); // This may be too strict. - return chunk_to_addr(chunk) + offset; + assert(region <= _region_count, "region out of range"); + assert(offset < RegionSize, "offset too big"); // This may be too strict. + return region_to_addr(region) + offset; } inline HeapWord* -ParallelCompactData::chunk_align_down(HeapWord* addr) const +ParallelCompactData::region_align_down(HeapWord* addr) const { assert(addr >= _region_start, "bad addr"); - assert(addr < _region_end + ChunkSize, "bad addr"); - return (HeapWord*)(size_t(addr) & ChunkAddrMask); + assert(addr < _region_end + RegionSize, "bad addr"); + return (HeapWord*)(size_t(addr) & RegionAddrMask); } inline HeapWord* -ParallelCompactData::chunk_align_up(HeapWord* addr) const +ParallelCompactData::region_align_up(HeapWord* addr) const { assert(addr >= _region_start, "bad addr"); assert(addr <= _region_end, "bad addr"); - return chunk_align_down(addr + ChunkSizeOffsetMask); + return region_align_down(addr + RegionSizeOffsetMask); } inline bool -ParallelCompactData::is_chunk_aligned(HeapWord* addr) const +ParallelCompactData::is_region_aligned(HeapWord* addr) const { - return chunk_offset(addr) == 0; -} - -inline size_t -ParallelCompactData::block_offset(const HeapWord* addr) const -{ - assert(addr >= _region_start, "bad addr"); - assert(addr <= _region_end, "bad addr"); - return pointer_delta(addr, _region_start) & BlockOffsetMask; -} - -inline size_t -ParallelCompactData::addr_to_block_idx(const HeapWord* addr) const -{ - assert(addr >= _region_start, "bad addr"); - assert(addr <= _region_end, "bad addr"); - return pointer_delta(addr, _region_start) >> Log2BlockSize; -} - -inline ParallelCompactData::BlockData* -ParallelCompactData::addr_to_block_ptr(const HeapWord* addr) const -{ - return block(addr_to_block_idx(addr)); -} - -inline HeapWord* -ParallelCompactData::block_to_addr(size_t block) const -{ - assert(block < _block_count, "block out of range"); - return _region_start + (block << Log2BlockSize); + return region_offset(addr) == 0; } // Abstract closure for use with ParMarkBitMap::iterate(), which will invoke the @@ -687,45 +546,15 @@ inline void ParMarkBitMapClosure::decrement_words_remaining(size_t words) { _words_remaining -= words; } -// Closure for updating the block data during the summary phase. -class BitBlockUpdateClosure: public ParMarkBitMapClosure { - // ParallelCompactData::BlockData::blk_ofs_t _live_data_left; - size_t _live_data_left; - size_t _cur_block; - HeapWord* _chunk_start; - HeapWord* _chunk_end; - size_t _chunk_index; - - public: - BitBlockUpdateClosure(ParMarkBitMap* mbm, - ParCompactionManager* cm, - size_t chunk_index); - - size_t cur_block() { return _cur_block; } - size_t chunk_index() { return _chunk_index; } - size_t live_data_left() { return _live_data_left; } - // Returns true the first bit in the current block (cur_block) is - // a start bit. - // Returns true if the current block is within the chunk for the closure; - bool chunk_contains_cur_block(); - - // Set the chunk index and related chunk values for - // a new chunk. - void reset_chunk(size_t chunk_index); - - virtual IterationStatus do_addr(HeapWord* addr, size_t words); -}; - -// The UseParallelOldGC collector is a stop-the-world garbage -// collector that does parts of the collection using parallel threads. -// The collection includes the tenured generation and the young -// generation. The permanent generation is collected at the same -// time as the other two generations but the permanent generation -// is collect by a single GC thread. The permanent generation is -// collected serially because of the requirement that during the -// processing of a klass AAA, any objects reference by AAA must -// already have been processed. This requirement is enforced by -// a left (lower address) to right (higher address) sliding compaction. +// The UseParallelOldGC collector is a stop-the-world garbage collector that +// does parts of the collection using parallel threads. The collection includes +// the tenured generation and the young generation. The permanent generation is +// collected at the same time as the other two generations but the permanent +// generation is collect by a single GC thread. The permanent generation is +// collected serially because of the requirement that during the processing of a +// klass AAA, any objects reference by AAA must already have been processed. +// This requirement is enforced by a left (lower address) to right (higher +// address) sliding compaction. // // There are four phases of the collection. // @@ -740,81 +569,75 @@ class BitBlockUpdateClosure: public ParMarkBitMapClosure { // - move the objects to their destination // - update some references and reinitialize some variables // -// These three phases are invoked in PSParallelCompact::invoke_no_policy(). -// The marking phase is implemented in PSParallelCompact::marking_phase() -// and does a complete marking of the heap. -// The summary phase is implemented in PSParallelCompact::summary_phase(). -// The move and update phase is implemented in PSParallelCompact::compact(). +// These three phases are invoked in PSParallelCompact::invoke_no_policy(). The +// marking phase is implemented in PSParallelCompact::marking_phase() and does a +// complete marking of the heap. The summary phase is implemented in +// PSParallelCompact::summary_phase(). The move and update phase is implemented +// in PSParallelCompact::compact(). // -// A space that is being collected is divided into chunks and with -// each chunk is associated an object of type ParallelCompactData. -// Each chunk is of a fixed size and typically will contain more than -// 1 object and may have parts of objects at the front and back of the -// chunk. +// A space that is being collected is divided into regions and with each region +// is associated an object of type ParallelCompactData. Each region is of a +// fixed size and typically will contain more than 1 object and may have parts +// of objects at the front and back of the region. // -// chunk -----+---------------------+---------- +// region -----+---------------------+---------- // objects covered [ AAA )[ BBB )[ CCC )[ DDD ) // -// The marking phase does a complete marking of all live objects in the -// heap. The marking also compiles the size of the data for -// all live objects covered by the chunk. This size includes the -// part of any live object spanning onto the chunk (part of AAA -// if it is live) from the front, all live objects contained in the chunk -// (BBB and/or CCC if they are live), and the part of any live objects -// covered by the chunk that extends off the chunk (part of DDD if it is -// live). The marking phase uses multiple GC threads and marking is -// done in a bit array of type ParMarkBitMap. The marking of the -// bit map is done atomically as is the accumulation of the size of the -// live objects covered by a chunk. +// The marking phase does a complete marking of all live objects in the heap. +// The marking also compiles the size of the data for all live objects covered +// by the region. This size includes the part of any live object spanning onto +// the region (part of AAA if it is live) from the front, all live objects +// contained in the region (BBB and/or CCC if they are live), and the part of +// any live objects covered by the region that extends off the region (part of +// DDD if it is live). The marking phase uses multiple GC threads and marking +// is done in a bit array of type ParMarkBitMap. The marking of the bit map is +// done atomically as is the accumulation of the size of the live objects +// covered by a region. // -// The summary phase calculates the total live data to the left of -// each chunk XXX. Based on that total and the bottom of the space, -// it can calculate the starting location of the live data in XXX. -// The summary phase calculates for each chunk XXX quantites such as +// The summary phase calculates the total live data to the left of each region +// XXX. Based on that total and the bottom of the space, it can calculate the +// starting location of the live data in XXX. The summary phase calculates for +// each region XXX quantites such as // -// - the amount of live data at the beginning of a chunk from an object -// entering the chunk. -// - the location of the first live data on the chunk -// - a count of the number of chunks receiving live data from XXX. +// - the amount of live data at the beginning of a region from an object +// entering the region. +// - the location of the first live data on the region +// - a count of the number of regions receiving live data from XXX. // // See ParallelCompactData for precise details. The summary phase also -// calculates the dense prefix for the compaction. The dense prefix -// is a portion at the beginning of the space that is not moved. The -// objects in the dense prefix do need to have their object references -// updated. See method summarize_dense_prefix(). +// calculates the dense prefix for the compaction. The dense prefix is a +// portion at the beginning of the space that is not moved. The objects in the +// dense prefix do need to have their object references updated. See method +// summarize_dense_prefix(). // // The summary phase is done using 1 GC thread. // -// The compaction phase moves objects to their new location and updates -// all references in the object. +// The compaction phase moves objects to their new location and updates all +// references in the object. // -// A current exception is that objects that cross a chunk boundary -// are moved but do not have their references updated. References are -// not updated because it cannot easily be determined if the klass -// pointer KKK for the object AAA has been updated. KKK likely resides -// in a chunk to the left of the chunk containing AAA. These AAA's -// have there references updated at the end in a clean up phase. -// See the method PSParallelCompact::update_deferred_objects(). An -// alternate strategy is being investigated for this deferral of updating. -// -// Compaction is done on a chunk basis. A chunk that is ready to be -// filled is put on a ready list and GC threads take chunk off the list -// and fill them. A chunk is ready to be filled if it -// empty of live objects. Such a chunk may have been initially -// empty (only contained -// dead objects) or may have had all its live objects copied out already. -// A chunk that compacts into itself is also ready for filling. The -// ready list is initially filled with empty chunks and chunks compacting -// into themselves. There is always at least 1 chunk that can be put on -// the ready list. The chunks are atomically added and removed from -// the ready list. +// A current exception is that objects that cross a region boundary are moved +// but do not have their references updated. References are not updated because +// it cannot easily be determined if the klass pointer KKK for the object AAA +// has been updated. KKK likely resides in a region to the left of the region +// containing AAA. These AAA's have there references updated at the end in a +// clean up phase. See the method PSParallelCompact::update_deferred_objects(). +// An alternate strategy is being investigated for this deferral of updating. // +// Compaction is done on a region basis. A region that is ready to be filled is +// put on a ready list and GC threads take region off the list and fill them. A +// region is ready to be filled if it empty of live objects. Such a region may +// have been initially empty (only contained dead objects) or may have had all +// its live objects copied out already. A region that compacts into itself is +// also ready for filling. The ready list is initially filled with empty +// regions and regions compacting into themselves. There is always at least 1 +// region that can be put on the ready list. The regions are atomically added +// and removed from the ready list. + class PSParallelCompact : AllStatic { public: // Convenient access to type names. typedef ParMarkBitMap::idx_t idx_t; - typedef ParallelCompactData::ChunkData ChunkData; - typedef ParallelCompactData::BlockData BlockData; + typedef ParallelCompactData::RegionData RegionData; typedef enum { perm_space_id, old_space_id, eden_space_id, @@ -977,26 +800,26 @@ class PSParallelCompact : AllStatic { // not reclaimed). static double dead_wood_limiter(double density, size_t min_percent); - // Find the first (left-most) chunk in the range [beg, end) that has at least + // Find the first (left-most) region in the range [beg, end) that has at least // dead_words of dead space to the left. The argument beg must be the first - // chunk in the space that is not completely live. - static ChunkData* dead_wood_limit_chunk(const ChunkData* beg, - const ChunkData* end, - size_t dead_words); + // region in the space that is not completely live. + static RegionData* dead_wood_limit_region(const RegionData* beg, + const RegionData* end, + size_t dead_words); - // Return a pointer to the first chunk in the range [beg, end) that is not + // Return a pointer to the first region in the range [beg, end) that is not // completely full. - static ChunkData* first_dead_space_chunk(const ChunkData* beg, - const ChunkData* end); + static RegionData* first_dead_space_region(const RegionData* beg, + const RegionData* end); // Return a value indicating the benefit or 'yield' if the compacted region // were to start (or equivalently if the dense prefix were to end) at the - // candidate chunk. Higher values are better. + // candidate region. Higher values are better. // // The value is based on the amount of space reclaimed vs. the costs of (a) // updating references in the dense prefix plus (b) copying objects and // updating references in the compacted region. - static inline double reclaimed_ratio(const ChunkData* const candidate, + static inline double reclaimed_ratio(const RegionData* const candidate, HeapWord* const bottom, HeapWord* const top, HeapWord* const new_top); @@ -1005,9 +828,9 @@ class PSParallelCompact : AllStatic { static HeapWord* compute_dense_prefix(const SpaceId id, bool maximum_compaction); - // Return true if dead space crosses onto the specified Chunk; bit must be the - // bit index corresponding to the first word of the Chunk. - static inline bool dead_space_crosses_boundary(const ChunkData* chunk, + // Return true if dead space crosses onto the specified Region; bit must be + // the bit index corresponding to the first word of the Region. + static inline bool dead_space_crosses_boundary(const RegionData* region, idx_t bit); // Summary phase utility routine to fill dead space (if any) at the dense @@ -1019,12 +842,6 @@ class PSParallelCompact : AllStatic { static void summarize_space(SpaceId id, bool maximum_compaction); static void summary_phase(ParCompactionManager* cm, bool maximum_compaction); - static bool block_first_offset(size_t block_index, idx_t* block_offset_ptr); - - // Fill in the BlockData - static void summarize_blocks(ParCompactionManager* cm, - SpaceId first_compaction_space_id); - // The space that is compacted after space_id. static SpaceId next_compaction_space_id(SpaceId space_id); @@ -1038,16 +855,16 @@ class PSParallelCompact : AllStatic { static void compact_perm(ParCompactionManager* cm); static void compact(); - // Add available chunks to the stack and draining tasks to the task queue. - static void enqueue_chunk_draining_tasks(GCTaskQueue* q, - uint parallel_gc_threads); + // Add available regions to the stack and draining tasks to the task queue. + static void enqueue_region_draining_tasks(GCTaskQueue* q, + uint parallel_gc_threads); // Add dense prefix update tasks to the task queue. static void enqueue_dense_prefix_tasks(GCTaskQueue* q, uint parallel_gc_threads); - // Add chunk stealing tasks to the task queue. - static void enqueue_chunk_stealing_tasks( + // Add region stealing tasks to the task queue. + static void enqueue_region_stealing_tasks( GCTaskQueue* q, ParallelTaskTerminator* terminator_ptr, uint parallel_gc_threads); @@ -1154,56 +971,56 @@ class PSParallelCompact : AllStatic { // Move and update the live objects in the specified space. static void move_and_update(ParCompactionManager* cm, SpaceId space_id); - // Process the end of the given chunk range in the dense prefix. + // Process the end of the given region range in the dense prefix. // This includes saving any object not updated. - static void dense_prefix_chunks_epilogue(ParCompactionManager* cm, - size_t chunk_start_index, - size_t chunk_end_index, - idx_t exiting_object_offset, - idx_t chunk_offset_start, - idx_t chunk_offset_end); + static void dense_prefix_regions_epilogue(ParCompactionManager* cm, + size_t region_start_index, + size_t region_end_index, + idx_t exiting_object_offset, + idx_t region_offset_start, + idx_t region_offset_end); - // Update a chunk in the dense prefix. For each live object - // in the chunk, update it's interior references. For each + // Update a region in the dense prefix. For each live object + // in the region, update it's interior references. For each // dead object, fill it with deadwood. Dead space at the end - // of a chunk range will be filled to the start of the next - // live object regardless of the chunk_index_end. None of the + // of a region range will be filled to the start of the next + // live object regardless of the region_index_end. None of the // objects in the dense prefix move and dead space is dead // (holds only dead objects that don't need any processing), so // dead space can be filled in any order. static void update_and_deadwood_in_dense_prefix(ParCompactionManager* cm, SpaceId space_id, - size_t chunk_index_start, - size_t chunk_index_end); + size_t region_index_start, + size_t region_index_end); // Return the address of the count + 1st live word in the range [beg, end). static HeapWord* skip_live_words(HeapWord* beg, HeapWord* end, size_t count); // Return the address of the word to be copied to dest_addr, which must be - // aligned to a chunk boundary. + // aligned to a region boundary. static HeapWord* first_src_addr(HeapWord* const dest_addr, - size_t src_chunk_idx); + size_t src_region_idx); - // Determine the next source chunk, set closure.source() to the start of the - // new chunk return the chunk index. Parameter end_addr is the address one + // Determine the next source region, set closure.source() to the start of the + // new region return the region index. Parameter end_addr is the address one // beyond the end of source range just processed. If necessary, switch to a // new source space and set src_space_id (in-out parameter) and src_space_top // (out parameter) accordingly. - static size_t next_src_chunk(MoveAndUpdateClosure& closure, - SpaceId& src_space_id, - HeapWord*& src_space_top, - HeapWord* end_addr); + static size_t next_src_region(MoveAndUpdateClosure& closure, + SpaceId& src_space_id, + HeapWord*& src_space_top, + HeapWord* end_addr); - // Decrement the destination count for each non-empty source chunk in the - // range [beg_chunk, chunk(chunk_align_up(end_addr))). + // Decrement the destination count for each non-empty source region in the + // range [beg_region, region(region_align_up(end_addr))). static void decrement_destination_counts(ParCompactionManager* cm, - size_t beg_chunk, + size_t beg_region, HeapWord* end_addr); - // Fill a chunk, copying objects from one or more source chunks. - static void fill_chunk(ParCompactionManager* cm, size_t chunk_idx); - static void fill_and_update_chunk(ParCompactionManager* cm, size_t chunk) { - fill_chunk(cm, chunk); + // Fill a region, copying objects from one or more source regions. + static void fill_region(ParCompactionManager* cm, size_t region_idx); + static void fill_and_update_region(ParCompactionManager* cm, size_t region) { + fill_region(cm, region); } // Update the deferred objects in the space. @@ -1259,7 +1076,7 @@ class PSParallelCompact : AllStatic { #ifndef PRODUCT // Debugging support. static const char* space_names[last_space_id]; - static void print_chunk_ranges(); + static void print_region_ranges(); static void print_dense_prefix_stats(const char* const algorithm, const SpaceId id, const bool maximum_compaction, @@ -1267,7 +1084,7 @@ class PSParallelCompact : AllStatic { #endif // #ifndef PRODUCT #ifdef ASSERT - // Verify that all the chunks have been emptied. + // Verify that all the regions have been emptied. static void verify_complete(SpaceId space_id); #endif // #ifdef ASSERT }; @@ -1376,17 +1193,17 @@ inline double PSParallelCompact::normal_distribution(double density) { } inline bool -PSParallelCompact::dead_space_crosses_boundary(const ChunkData* chunk, +PSParallelCompact::dead_space_crosses_boundary(const RegionData* region, idx_t bit) { - assert(bit > 0, "cannot call this for the first bit/chunk"); - assert(_summary_data.chunk_to_addr(chunk) == _mark_bitmap.bit_to_addr(bit), + assert(bit > 0, "cannot call this for the first bit/region"); + assert(_summary_data.region_to_addr(region) == _mark_bitmap.bit_to_addr(bit), "sanity check"); // Dead space crosses the boundary if (1) a partial object does not extend - // onto the chunk, (2) an object does not start at the beginning of the chunk, - // and (3) an object does not end at the end of the prior chunk. - return chunk->partial_obj_size() == 0 && + // onto the region, (2) an object does not start at the beginning of the + // region, and (3) an object does not end at the end of the prior region. + return region->partial_obj_size() == 0 && !_mark_bitmap.is_obj_beg(bit) && !_mark_bitmap.is_obj_end(bit - 1); } diff --git a/hotspot/src/share/vm/gc_implementation/parallelScavenge/psPermGen.cpp b/hotspot/src/share/vm/gc_implementation/parallelScavenge/psPermGen.cpp index 53b3010ecee..5fd5f5539d1 100644 --- a/hotspot/src/share/vm/gc_implementation/parallelScavenge/psPermGen.cpp +++ b/hotspot/src/share/vm/gc_implementation/parallelScavenge/psPermGen.cpp @@ -123,8 +123,6 @@ void PSPermGen::move_and_update(ParCompactionManager* cm) { void PSPermGen::precompact() { // Reset start array first. - debug_only(if (!UseParallelOldGC || !VerifyParallelOldWithMarkSweep) {) _start_array.reset(); - debug_only(}) object_mark_sweep()->precompact(); } diff --git a/hotspot/src/share/vm/gc_implementation/shared/coTracker.cpp b/hotspot/src/share/vm/gc_implementation/shared/coTracker.cpp new file mode 100644 index 00000000000..c15fcc010e2 --- /dev/null +++ b/hotspot/src/share/vm/gc_implementation/shared/coTracker.cpp @@ -0,0 +1,189 @@ +/* + * Copyright 2001-2007 Sun Microsystems, Inc. All Rights Reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara, + * CA 95054 USA or visit www.sun.com if you need additional information or + * have any questions. + * + */ + +# include "incls/_precompiled.incl" +# include "incls/_coTracker.cpp.incl" + +COTracker* COTracker::_head = NULL; +double COTracker::_cpu_number = -1.0; + +void +COTracker::resetPeriod(double now_sec, double vnow_sec) { + guarantee( _enabled, "invariant" ); + _period_start_time_sec = now_sec; + _period_start_vtime_sec = vnow_sec; +} + +void +COTracker::setConcOverhead(double time_stamp_sec, + double conc_overhead) { + guarantee( _enabled, "invariant" ); + _conc_overhead = conc_overhead; + _time_stamp_sec = time_stamp_sec; + if (conc_overhead > 0.001) + _conc_overhead_seq.add(conc_overhead); +} + +void +COTracker::reset(double starting_conc_overhead) { + guarantee( _enabled, "invariant" ); + double now_sec = os::elapsedTime(); + setConcOverhead(now_sec, starting_conc_overhead); +} + +void +COTracker::start() { + guarantee( _enabled, "invariant" ); + resetPeriod(os::elapsedTime(), os::elapsedVTime()); +} + +void +COTracker::update(bool force_end) { + assert( _enabled, "invariant" ); + double end_time_sec = os::elapsedTime(); + double elapsed_time_sec = end_time_sec - _period_start_time_sec; + if (force_end || elapsed_time_sec > _update_period_sec) { + // reached the end of the period + double end_vtime_sec = os::elapsedVTime(); + double elapsed_vtime_sec = end_vtime_sec - _period_start_vtime_sec; + + double conc_overhead = elapsed_vtime_sec / elapsed_time_sec; + + setConcOverhead(end_time_sec, conc_overhead); + resetPeriod(end_time_sec, end_vtime_sec); + } +} + +void +COTracker::updateForSTW(double start_sec, double end_sec) { + if (!_enabled) + return; + + // During a STW pause, no concurrent GC thread has done any + // work. So, we can safely adjust the start of the current period by + // adding the duration of the STW pause to it, so that the STW pause + // doesn't affect the reading of the concurrent overhead (it's + // basically like excluding the time of the STW pause from the + // concurrent overhead calculation). + + double stw_duration_sec = end_sec - start_sec; + guarantee( stw_duration_sec > 0.0, "invariant" ); + + if (outOfDate(start_sec)) + _conc_overhead = 0.0; + else + _time_stamp_sec = end_sec; + _period_start_time_sec += stw_duration_sec; + _conc_overhead_seq = NumberSeq(); + + guarantee( os::elapsedTime() > _period_start_time_sec, "invariant" ); +} + +double +COTracker::predConcOverhead() { + if (_enabled) { + // tty->print(" %1.2lf", _conc_overhead_seq.maximum()); + return _conc_overhead_seq.maximum(); + } else { + // tty->print(" DD"); + return 0.0; + } +} + +void +COTracker::resetPred() { + _conc_overhead_seq = NumberSeq(); +} + +COTracker::COTracker(int group) + : _enabled(false), + _group(group), + _period_start_time_sec(-1.0), + _period_start_vtime_sec(-1.0), + _conc_overhead(-1.0), + _time_stamp_sec(-1.0), + _next(NULL) { + // GCOverheadReportingPeriodMS indicates how frequently the + // concurrent overhead will be recorded by the GC Overhead + // Reporter. We want to take readings less often than that. If we + // took readings more often than some of them might be lost. + _update_period_sec = ((double) GCOverheadReportingPeriodMS) / 1000.0 * 1.25; + _next = _head; + _head = this; + + if (_cpu_number < 0.0) + _cpu_number = (double) os::processor_count(); +} + +// statics + +void +COTracker::updateAllForSTW(double start_sec, double end_sec) { + for (COTracker* curr = _head; curr != NULL; curr = curr->_next) { + curr->updateForSTW(start_sec, end_sec); + } +} + +double +COTracker::totalConcOverhead(double now_sec) { + double total_conc_overhead = 0.0; + + for (COTracker* curr = _head; curr != NULL; curr = curr->_next) { + double conc_overhead = curr->concOverhead(now_sec); + total_conc_overhead += conc_overhead; + } + + return total_conc_overhead; +} + +double +COTracker::totalConcOverhead(double now_sec, + size_t group_num, + double* co_per_group) { + double total_conc_overhead = 0.0; + + for (size_t i = 0; i < group_num; ++i) + co_per_group[i] = 0.0; + + for (COTracker* curr = _head; curr != NULL; curr = curr->_next) { + size_t group = curr->_group; + assert( 0 <= group && group < group_num, "invariant" ); + double conc_overhead = curr->concOverhead(now_sec); + + co_per_group[group] += conc_overhead; + total_conc_overhead += conc_overhead; + } + + return total_conc_overhead; +} + +double +COTracker::totalPredConcOverhead() { + double total_pred_conc_overhead = 0.0; + for (COTracker* curr = _head; curr != NULL; curr = curr->_next) { + total_pred_conc_overhead += curr->predConcOverhead(); + curr->resetPred(); + } + return total_pred_conc_overhead / _cpu_number; +} diff --git a/hotspot/src/share/vm/gc_implementation/shared/coTracker.hpp b/hotspot/src/share/vm/gc_implementation/shared/coTracker.hpp new file mode 100644 index 00000000000..3c9fa012e57 --- /dev/null +++ b/hotspot/src/share/vm/gc_implementation/shared/coTracker.hpp @@ -0,0 +1,181 @@ +/* + * Copyright 2001-2007 Sun Microsystems, Inc. All Rights Reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara, + * CA 95054 USA or visit www.sun.com if you need additional information or + * have any questions. + * + */ + +// COTracker keeps track of the concurrent overhead of a GC thread. + +// A thread that needs to be tracked must, itself, start up its +// tracker with the start() method and then call the update() method +// at regular intervals. What the tracker does is to calculate the +// concurrent overhead of a process at a given update period. The +// tracker starts and when is detects that it has exceeded the given +// period, it calculates the duration of the period in wall-clock time +// and the duration of the period in vtime (i.e. how much time the +// concurrent processes really took up during this period). The ratio +// of the latter over the former is the concurrent overhead of that +// process for that period over a single CPU. This overhead is stored +// on the tracker, "timestamped" with the wall-clock time of the end +// of the period. When the concurrent overhead of this process needs +// to be queried, this last "reading" provides a good approximation +// (we assume that the concurrent overhead of a particular thread +// stays largely constant over time). The timestamp is necessary to +// detect when the process has stopped working and the recorded +// reading hasn't been updated for some time. + +// Each concurrent GC thread is considered to be part of a "group" +// (i.e. any available concurrent marking threads are part of the +// "concurrent marking thread group"). A COTracker is associated with +// a single group at construction-time. It's up to each collector to +// decide how groups will be mapped to such an id (ids should start +// from 0 and be consecutive; there's a hardcoded max group num +// defined on the GCOverheadTracker class). The notion of a group has +// been introduced to be able to identify how much overhead was +// imposed by each group, instead of getting a single value that +// covers all concurrent overhead. + +class COTracker { +private: + // It indicates whether this tracker is enabled or not. When the + // tracker is disabled, then it returns 0.0 as the latest concurrent + // overhead and several methods (reset, start, and update) are not + // supposed to be called on it. This enabling / disabling facility + // is really provided to make a bit more explicit in the code when a + // particulary tracker of a processes that doesn't run all the time + // (e.g. concurrent marking) is supposed to be used and not it's not. + bool _enabled; + + // The ID of the group associated with this tracker. + int _group; + + // The update period of the tracker. A new value for the concurrent + // overhead of the associated process will be made at intervals no + // smaller than this. + double _update_period_sec; + + // The start times (both wall-block time and vtime) of the current + // interval. + double _period_start_time_sec; + double _period_start_vtime_sec; + + // Number seq of the concurrent overhead readings within a period + NumberSeq _conc_overhead_seq; + + // The latest reading of the concurrent overhead (over a single CPU) + // imposed by the associated concurrent thread, made available at + // the indicated wall-clock time. + double _conc_overhead; + double _time_stamp_sec; + + // The number of CPUs that the host machine has (for convenience + // really, as we'd have to keep translating it into a double) + static double _cpu_number; + + // Fields that keep a list of all trackers created. This is useful, + // since it allows us to sum up the concurrent overhead without + // having to write code for a specific collector to broadcast a + // request to all its concurrent processes. + COTracker* _next; + static COTracker* _head; + + // It indicates that a new period is starting by updating the + // _period_start_time_sec and _period_start_vtime_sec fields. + void resetPeriod(double now_sec, double vnow_sec); + // It updates the latest concurrent overhead reading, taken at a + // given wall-clock time. + void setConcOverhead(double time_stamp_sec, double conc_overhead); + + // It determines whether the time stamp of the latest concurrent + // overhead reading is out of date or not. + bool outOfDate(double now_sec) { + // The latest reading is considered out of date, if it was taken + // 1.2x the update period. + return (now_sec - _time_stamp_sec) > 1.2 * _update_period_sec; + } + +public: + // The constructor which associates the tracker with a group ID. + COTracker(int group); + + // Methods to enable / disable the tracker and query whether it is enabled. + void enable() { _enabled = true; } + void disable() { _enabled = false; } + bool enabled() { return _enabled; } + + // It resets the tracker and sets concurrent overhead reading to be + // the given parameter and the associated time stamp to be now. + void reset(double starting_conc_overhead = 0.0); + // The tracker starts tracking. IT should only be called from the + // concurrent thread that is tracked by this tracker. + void start(); + // It updates the tracker and, if the current period is longer than + // the update period, the concurrent overhead reading will be + // updated. force_end being true indicates that it's the last call + // to update() by this process before the tracker is disabled (the + // tracker can be re-enabled later if necessary). It should only be + // called from the concurrent thread that is tracked by this tracker + // and while the thread has joined the STS. + void update(bool force_end = false); + // It adjusts the contents of the tracker to take into account a STW + // pause. + void updateForSTW(double start_sec, double end_sec); + + // It returns the last concurrent overhead reading over a single + // CPU. If the reading is out of date, or the tracker is disabled, + // it returns 0.0. + double concCPUOverhead(double now_sec) { + if (!_enabled || outOfDate(now_sec)) + return 0.0; + else + return _conc_overhead; + } + + // It returns the last concurrent overhead reading over all CPUs + // that the host machine has. If the reading is out of date, or the + // tracker is disabled, it returns 0.0. + double concOverhead(double now_sec) { + return concCPUOverhead(now_sec) / _cpu_number; + } + + double predConcOverhead(); + + void resetPred(); + + // statics + + // It notifies all trackers about a STW pause. + static void updateAllForSTW(double start_sec, double end_sec); + + // It returns the sum of the concurrent overhead readings of all + // available (and enabled) trackers for the given time stamp. The + // overhead is over all the CPUs of the host machine. + + static double totalConcOverhead(double now_sec); + // Like the previous method, but it also sums up the overheads per + // group number. The length of the co_per_group array must be at + // least as large group_num + static double totalConcOverhead(double now_sec, + size_t group_num, + double* co_per_group); + + static double totalPredConcOverhead(); +}; diff --git a/hotspot/src/share/vm/gc_implementation/shared/concurrentGCThread.cpp b/hotspot/src/share/vm/gc_implementation/shared/concurrentGCThread.cpp new file mode 100644 index 00000000000..8ed8b809e2a --- /dev/null +++ b/hotspot/src/share/vm/gc_implementation/shared/concurrentGCThread.cpp @@ -0,0 +1,314 @@ +/* + * Copyright 2001-2005 Sun Microsystems, Inc. All Rights Reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara, + * CA 95054 USA or visit www.sun.com if you need additional information or + * have any questions. + * + */ + +// CopyrightVersion 1.2 + +# include "incls/_precompiled.incl" +# include "incls/_concurrentGCThread.cpp.incl" + +bool ConcurrentGCThread::_should_terminate = false; +bool ConcurrentGCThread::_has_terminated = false; +int ConcurrentGCThread::_CGC_flag = CGC_nil; + +SuspendibleThreadSet ConcurrentGCThread::_sts; + +ConcurrentGCThread::ConcurrentGCThread() { + _sts.initialize(); +}; + +void ConcurrentGCThread::stopWorldAndDo(VoidClosure* op) { + MutexLockerEx x(Heap_lock, + Mutex::_no_safepoint_check_flag); + // warning("CGC: about to try stopping world"); + SafepointSynchronize::begin(); + // warning("CGC: successfully stopped world"); + op->do_void(); + SafepointSynchronize::end(); + // warning("CGC: successfully restarted world"); +} + +void ConcurrentGCThread::safepoint_synchronize() { + _sts.suspend_all(); +} + +void ConcurrentGCThread::safepoint_desynchronize() { + _sts.resume_all(); +} + +void ConcurrentGCThread::create_and_start() { + if (os::create_thread(this, os::cgc_thread)) { + // XXX: need to set this to low priority + // unless "agressive mode" set; priority + // should be just less than that of VMThread. + os::set_priority(this, NearMaxPriority); + if (!_should_terminate && !DisableStartThread) { + os::start_thread(this); + } + } +} + +void ConcurrentGCThread::initialize_in_thread() { + this->record_stack_base_and_size(); + this->initialize_thread_local_storage(); + this->set_active_handles(JNIHandleBlock::allocate_block()); + // From this time Thread::current() should be working. + assert(this == Thread::current(), "just checking"); +} + +void ConcurrentGCThread::wait_for_universe_init() { + MutexLockerEx x(CGC_lock, Mutex::_no_safepoint_check_flag); + while (!is_init_completed() && !_should_terminate) { + CGC_lock->wait(Mutex::_no_safepoint_check_flag, 200); + } +} + +void ConcurrentGCThread::terminate() { + // Signal that it is terminated + { + MutexLockerEx mu(Terminator_lock, + Mutex::_no_safepoint_check_flag); + _has_terminated = true; + Terminator_lock->notify(); + } + + // Thread destructor usually does this.. + ThreadLocalStorage::set_thread(NULL); +} + + +void SuspendibleThreadSet::initialize_work() { + MutexLocker x(STS_init_lock); + if (!_initialized) { + _m = new Monitor(Mutex::leaf, + "SuspendibleThreadSetLock", true); + _async = 0; + _async_stop = false; + _async_stopped = 0; + _initialized = true; + } +} + +void SuspendibleThreadSet::join() { + initialize(); + MutexLockerEx x(_m, Mutex::_no_safepoint_check_flag); + while (_async_stop) _m->wait(Mutex::_no_safepoint_check_flag); + _async++; + assert(_async > 0, "Huh."); +} + +void SuspendibleThreadSet::leave() { + assert(_initialized, "Must be initialized."); + MutexLockerEx x(_m, Mutex::_no_safepoint_check_flag); + _async--; + assert(_async >= 0, "Huh."); + if (_async_stop) _m->notify_all(); +} + +void SuspendibleThreadSet::yield(const char* id) { + assert(_initialized, "Must be initialized."); + if (_async_stop) { + MutexLockerEx x(_m, Mutex::_no_safepoint_check_flag); + if (_async_stop) { + _async_stopped++; + assert(_async_stopped > 0, "Huh."); + if (_async_stopped == _async) { + if (ConcGCYieldTimeout > 0) { + double now = os::elapsedTime(); + guarantee((now - _suspend_all_start) * 1000.0 < + (double)ConcGCYieldTimeout, + "Long delay; whodunit?"); + } + } + _m->notify_all(); + while (_async_stop) _m->wait(Mutex::_no_safepoint_check_flag); + _async_stopped--; + assert(_async >= 0, "Huh"); + _m->notify_all(); + } + } +} + +void SuspendibleThreadSet::suspend_all() { + initialize(); // If necessary. + if (ConcGCYieldTimeout > 0) { + _suspend_all_start = os::elapsedTime(); + } + MutexLockerEx x(_m, Mutex::_no_safepoint_check_flag); + assert(!_async_stop, "Only one at a time."); + _async_stop = true; + while (_async_stopped < _async) _m->wait(Mutex::_no_safepoint_check_flag); +} + +void SuspendibleThreadSet::resume_all() { + assert(_initialized, "Must be initialized."); + MutexLockerEx x(_m, Mutex::_no_safepoint_check_flag); + assert(_async_stopped == _async, "Huh."); + _async_stop = false; + _m->notify_all(); +} + +static void _sltLoop(JavaThread* thread, TRAPS) { + SurrogateLockerThread* slt = (SurrogateLockerThread*)thread; + slt->loop(); +} + +SurrogateLockerThread::SurrogateLockerThread() : + JavaThread(&_sltLoop), + _monitor(Mutex::nonleaf, "SLTMonitor"), + _buffer(empty) +{} + +SurrogateLockerThread* SurrogateLockerThread::make(TRAPS) { + klassOop k = + SystemDictionary::resolve_or_fail(vmSymbolHandles::java_lang_Thread(), + true, CHECK_NULL); + instanceKlassHandle klass (THREAD, k); + instanceHandle thread_oop = klass->allocate_instance_handle(CHECK_NULL); + + const char thread_name[] = "Surrogate Locker Thread (CMS)"; + Handle string = java_lang_String::create_from_str(thread_name, CHECK_NULL); + + // Initialize thread_oop to put it into the system threadGroup + Handle thread_group (THREAD, Universe::system_thread_group()); + JavaValue result(T_VOID); + JavaCalls::call_special(&result, thread_oop, + klass, + vmSymbolHandles::object_initializer_name(), + vmSymbolHandles::threadgroup_string_void_signature(), + thread_group, + string, + CHECK_NULL); + + SurrogateLockerThread* res; + { + MutexLocker mu(Threads_lock); + res = new SurrogateLockerThread(); + + // At this point it may be possible that no osthread was created for the + // JavaThread due to lack of memory. We would have to throw an exception + // in that case. However, since this must work and we do not allow + // exceptions anyway, check and abort if this fails. + if (res == NULL || res->osthread() == NULL) { + vm_exit_during_initialization("java.lang.OutOfMemoryError", + "unable to create new native thread"); + } + java_lang_Thread::set_thread(thread_oop(), res); + java_lang_Thread::set_priority(thread_oop(), NearMaxPriority); + java_lang_Thread::set_daemon(thread_oop()); + + res->set_threadObj(thread_oop()); + Threads::add(res); + Thread::start(res); + } + os::yield(); // This seems to help with initial start-up of SLT + return res; +} + +void SurrogateLockerThread::manipulatePLL(SLT_msg_type msg) { + MutexLockerEx x(&_monitor, Mutex::_no_safepoint_check_flag); + assert(_buffer == empty, "Should be empty"); + assert(msg != empty, "empty message"); + _buffer = msg; + while (_buffer != empty) { + _monitor.notify(); + _monitor.wait(Mutex::_no_safepoint_check_flag); + } +} + +// ======= Surrogate Locker Thread ============= + +void SurrogateLockerThread::loop() { + BasicLock pll_basic_lock; + SLT_msg_type msg; + debug_only(unsigned int owned = 0;) + + while (/* !isTerminated() */ 1) { + { + MutexLocker x(&_monitor); + // Since we are a JavaThread, we can't be here at a safepoint. + assert(!SafepointSynchronize::is_at_safepoint(), + "SLT is a JavaThread"); + // wait for msg buffer to become non-empty + while (_buffer == empty) { + _monitor.notify(); + _monitor.wait(); + } + msg = _buffer; + } + switch(msg) { + case acquirePLL: { + instanceRefKlass::acquire_pending_list_lock(&pll_basic_lock); + debug_only(owned++;) + break; + } + case releaseAndNotifyPLL: { + assert(owned > 0, "Don't have PLL"); + instanceRefKlass::release_and_notify_pending_list_lock(&pll_basic_lock); + debug_only(owned--;) + break; + } + case empty: + default: { + guarantee(false,"Unexpected message in _buffer"); + break; + } + } + { + MutexLocker x(&_monitor); + // Since we are a JavaThread, we can't be here at a safepoint. + assert(!SafepointSynchronize::is_at_safepoint(), + "SLT is a JavaThread"); + _buffer = empty; + _monitor.notify(); + } + } + assert(!_monitor.owned_by_self(), "Should unlock before exit."); +} + + +// ===== STS Access From Outside CGCT ===== + +void ConcurrentGCThread::stsYield(const char* id) { + assert( Thread::current()->is_ConcurrentGC_thread(), + "only a conc GC thread can call this" ); + _sts.yield(id); +} + +bool ConcurrentGCThread::stsShouldYield() { + assert( Thread::current()->is_ConcurrentGC_thread(), + "only a conc GC thread can call this" ); + return _sts.should_yield(); +} + +void ConcurrentGCThread::stsJoin() { + assert( Thread::current()->is_ConcurrentGC_thread(), + "only a conc GC thread can call this" ); + _sts.join(); +} + +void ConcurrentGCThread::stsLeave() { + assert( Thread::current()->is_ConcurrentGC_thread(), + "only a conc GC thread can call this" ); + _sts.leave(); +} diff --git a/hotspot/src/share/vm/gc_implementation/shared/concurrentGCThread.hpp b/hotspot/src/share/vm/gc_implementation/shared/concurrentGCThread.hpp new file mode 100644 index 00000000000..db6cc903ddf --- /dev/null +++ b/hotspot/src/share/vm/gc_implementation/shared/concurrentGCThread.hpp @@ -0,0 +1,167 @@ +/* + * Copyright 2001-2005 Sun Microsystems, Inc. All Rights Reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara, + * CA 95054 USA or visit www.sun.com if you need additional information or + * have any questions. + * + */ + +class VoidClosure; + +// A SuspendibleThreadSet is (obviously) a set of threads that can be +// suspended. A thread can join and later leave the set, and periodically +// yield. If some thread (not in the set) requests, via suspend_all, that +// the threads be suspended, then the requesting thread is blocked until +// all the threads in the set have yielded or left the set. (Threads may +// not enter the set when an attempted suspension is in progress.) The +// suspending thread later calls resume_all, allowing the suspended threads +// to continue. + +class SuspendibleThreadSet { + Monitor* _m; + int _async; + bool _async_stop; + int _async_stopped; + bool _initialized; + double _suspend_all_start; + + void initialize_work(); + + public: + SuspendibleThreadSet() : _initialized(false) {} + + // Add the current thread to the set. May block if a suspension + // is in progress. + void join(); + // Removes the current thread from the set. + void leave(); + // Returns "true" iff an suspension is in progress. + bool should_yield() { return _async_stop; } + // Suspends the current thread if a suspension is in progress (for + // the duration of the suspension.) + void yield(const char* id); + // Return when all threads in the set are suspended. + void suspend_all(); + // Allow suspended threads to resume. + void resume_all(); + // Redundant initializations okay. + void initialize() { + // Double-check dirty read idiom. + if (!_initialized) initialize_work(); + } +}; + + +class ConcurrentGCThread: public NamedThread { + friend class VMStructs; + +protected: + static bool _should_terminate; + static bool _has_terminated; + + enum CGC_flag_type { + CGC_nil = 0x0, + CGC_dont_suspend = 0x1, + CGC_CGC_safepoint = 0x2, + CGC_VM_safepoint = 0x4 + }; + + static int _CGC_flag; + + static bool CGC_flag_is_set(int b) { return (_CGC_flag & b) != 0; } + static int set_CGC_flag(int b) { return _CGC_flag |= b; } + static int reset_CGC_flag(int b) { return _CGC_flag &= ~b; } + + void stopWorldAndDo(VoidClosure* op); + + // All instances share this one set. + static SuspendibleThreadSet _sts; + + // Create and start the thread (setting it's priority high.) + void create_and_start(); + + // Do initialization steps in the thread: record stack base and size, + // init thread local storage, set JNI handle block. + void initialize_in_thread(); + + // Wait until Universe::is_fully_initialized(); + void wait_for_universe_init(); + + // Record that the current thread is terminating, and will do more + // concurrent work. + void terminate(); + +public: + // Constructor + + ConcurrentGCThread(); + ~ConcurrentGCThread() {} // Exists to call NamedThread destructor. + + // Tester + bool is_ConcurrentGC_thread() const { return true; } + + static void safepoint_synchronize(); + static void safepoint_desynchronize(); + + // All overridings should probably do _sts::yield, but we allow + // overriding for distinguished debugging messages. Default is to do + // nothing. + virtual void yield() {} + + bool should_yield() { return _sts.should_yield(); } + + // they are prefixed by sts since there are already yield() and + // should_yield() (non-static) methods in this class and it was an + // easy way to differentiate them. + static void stsYield(const char* id); + static bool stsShouldYield(); + static void stsJoin(); + static void stsLeave(); + +}; + +// The SurrogateLockerThread is used by concurrent GC threads for +// manipulating Java monitors, in particular, currently for +// manipulating the pending_list_lock. XXX +class SurrogateLockerThread: public JavaThread { + friend class VMStructs; + public: + enum SLT_msg_type { + empty = 0, // no message + acquirePLL, // acquire pending list lock + releaseAndNotifyPLL // notify and release pending list lock + }; + private: + // the following are shared with the CMSThread + SLT_msg_type _buffer; // communication buffer + Monitor _monitor; // monitor controlling buffer + BasicLock _basicLock; // used for PLL locking + + public: + static SurrogateLockerThread* make(TRAPS); + + SurrogateLockerThread(); + + bool is_hidden_from_external_view() const { return true; } + + void loop(); // main method + + void manipulatePLL(SLT_msg_type msg); + +}; diff --git a/hotspot/src/share/vm/gc_implementation/shared/gcOverheadReporter.cpp b/hotspot/src/share/vm/gc_implementation/shared/gcOverheadReporter.cpp new file mode 100644 index 00000000000..02a2edb755a --- /dev/null +++ b/hotspot/src/share/vm/gc_implementation/shared/gcOverheadReporter.cpp @@ -0,0 +1,179 @@ +/* + * Copyright 2001-2007 Sun Microsystems, Inc. All Rights Reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara, + * CA 95054 USA or visit www.sun.com if you need additional information or + * have any questions. + * + */ + +# include "incls/_precompiled.incl" +# include "incls/_gcOverheadReporter.cpp.incl" + +class COReportingThread : public ConcurrentGCThread { +private: + GCOverheadReporter* _reporter; + +public: + COReportingThread(GCOverheadReporter* reporter) : _reporter(reporter) { + guarantee( _reporter != NULL, "precondition" ); + create_and_start(); + } + + virtual void run() { + initialize_in_thread(); + wait_for_universe_init(); + + int period_ms = GCOverheadReportingPeriodMS; + + while ( true ) { + os::sleep(Thread::current(), period_ms, false); + + _sts.join(); + double now_sec = os::elapsedTime(); + _reporter->collect_and_record_conc_overhead(now_sec); + _sts.leave(); + } + + terminate(); + } +}; + +GCOverheadReporter* GCOverheadReporter::_reporter = NULL; + +GCOverheadReporter::GCOverheadReporter(size_t group_num, + const char* group_names[], + size_t length) + : _group_num(group_num), _prev_end_sec(0.0) { + guarantee( 0 <= group_num && group_num <= MaxGCOverheadGroupNum, + "precondition" ); + + _base = NEW_C_HEAP_ARRAY(GCOverheadReporterEntry, length); + _top = _base + length; + _curr = _base; + + for (size_t i = 0; i < group_num; ++i) { + guarantee( group_names[i] != NULL, "precondition" ); + _group_names[i] = group_names[i]; + } +} + +void +GCOverheadReporter::add(double start_sec, double end_sec, + double* conc_overhead, + double stw_overhead) { + assert( _curr <= _top, "invariant" ); + + if (_curr == _top) { + guarantee( false, "trace full" ); + return; + } + + _curr->_start_sec = start_sec; + _curr->_end_sec = end_sec; + for (size_t i = 0; i < _group_num; ++i) { + _curr->_conc_overhead[i] = + (conc_overhead != NULL) ? conc_overhead[i] : 0.0; + } + _curr->_stw_overhead = stw_overhead; + + ++_curr; +} + +void +GCOverheadReporter::collect_and_record_conc_overhead(double end_sec) { + double start_sec = _prev_end_sec; + guarantee( end_sec > start_sec, "invariant" ); + + double conc_overhead[MaxGCOverheadGroupNum]; + COTracker::totalConcOverhead(end_sec, _group_num, conc_overhead); + add_conc_overhead(start_sec, end_sec, conc_overhead); + _prev_end_sec = end_sec; +} + +void +GCOverheadReporter::record_stw_start(double start_sec) { + guarantee( start_sec > _prev_end_sec, "invariant" ); + collect_and_record_conc_overhead(start_sec); +} + +void +GCOverheadReporter::record_stw_end(double end_sec) { + double start_sec = _prev_end_sec; + COTracker::updateAllForSTW(start_sec, end_sec); + add_stw_overhead(start_sec, end_sec, 1.0); + + _prev_end_sec = end_sec; +} + +void +GCOverheadReporter::print() const { + tty->print_cr(""); + tty->print_cr("GC Overhead (%d entries)", _curr - _base); + tty->print_cr(""); + GCOverheadReporterEntry* curr = _base; + while (curr < _curr) { + double total = curr->_stw_overhead; + for (size_t i = 0; i < _group_num; ++i) + total += curr->_conc_overhead[i]; + + tty->print("OVERHEAD %12.8lf %12.8lf ", + curr->_start_sec, curr->_end_sec); + + for (size_t i = 0; i < _group_num; ++i) + tty->print("%s %12.8lf ", _group_names[i], curr->_conc_overhead[i]); + + tty->print_cr("STW %12.8lf TOT %12.8lf", curr->_stw_overhead, total); + ++curr; + } + tty->print_cr(""); +} + +// statics + +void +GCOverheadReporter::initGCOverheadReporter(size_t group_num, + const char* group_names[]) { + guarantee( _reporter == NULL, "should only be called once" ); + guarantee( 0 <= group_num && group_num <= MaxGCOverheadGroupNum, + "precondition" ); + guarantee( group_names != NULL, "pre-condition" ); + + if (GCOverheadReporting) { + _reporter = new GCOverheadReporter(group_num, group_names); + new COReportingThread(_reporter); + } +} + +void +GCOverheadReporter::recordSTWStart(double start_sec) { + if (_reporter != NULL) + _reporter->record_stw_start(start_sec); +} + +void +GCOverheadReporter::recordSTWEnd(double end_sec) { + if (_reporter != NULL) + _reporter->record_stw_end(end_sec); +} + +void +GCOverheadReporter::printGCOverhead() { + if (_reporter != NULL) + _reporter->print(); +} diff --git a/hotspot/src/share/vm/gc_implementation/shared/gcOverheadReporter.hpp b/hotspot/src/share/vm/gc_implementation/shared/gcOverheadReporter.hpp new file mode 100644 index 00000000000..c3483a68b00 --- /dev/null +++ b/hotspot/src/share/vm/gc_implementation/shared/gcOverheadReporter.hpp @@ -0,0 +1,141 @@ +/* + * Copyright 2001-2007 Sun Microsystems, Inc. All Rights Reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara, + * CA 95054 USA or visit www.sun.com if you need additional information or + * have any questions. + * + */ + +// Keeps track of the GC overhead (both concurrent and STW). It stores +// it in a large array and then prints it to tty at the end of the +// execution. + +// See coTracker.hpp for the explanation on what groups are. + +// Let's set a maximum number of concurrent overhead groups, to +// statically allocate any arrays we need and not to have to +// malloc/free them. This is just a bit more convenient. +enum { + MaxGCOverheadGroupNum = 4 +}; + +typedef struct { + double _start_sec; + double _end_sec; + + double _conc_overhead[MaxGCOverheadGroupNum]; + double _stw_overhead; +} GCOverheadReporterEntry; + +class GCOverheadReporter { + friend class COReportingThread; + +private: + enum PrivateConstants { + DefaultReporterLength = 128 * 1024 + }; + + // Reference to the single instance of this class. + static GCOverheadReporter* _reporter; + + // These three references point to the array that contains the GC + // overhead entries (_base is the base of the array, _top is the + // address passed the last entry of the array, _curr is the next + // entry to be used). + GCOverheadReporterEntry* _base; + GCOverheadReporterEntry* _top; + GCOverheadReporterEntry* _curr; + + // The number of concurrent overhead groups. + size_t _group_num; + + // The wall-clock time of the end of the last recorded period of GC + // overhead. + double _prev_end_sec; + + // Names for the concurrent overhead groups. + const char* _group_names[MaxGCOverheadGroupNum]; + + // Add a new entry to the large array. conc_overhead being NULL is + // equivalent to an array full of 0.0s. conc_overhead should have a + // length of at least _group_num. + void add(double start_sec, double end_sec, + double* conc_overhead, + double stw_overhead); + + // Add an entry that represents concurrent GC overhead. + // conc_overhead must be at least of length _group_num. + // conc_overhead being NULL is equivalent to an array full of 0.0s. + void add_conc_overhead(double start_sec, double end_sec, + double* conc_overhead) { + add(start_sec, end_sec, conc_overhead, 0.0); + } + + // Add an entry that represents STW GC overhead. + void add_stw_overhead(double start_sec, double end_sec, + double stw_overhead) { + add(start_sec, end_sec, NULL, stw_overhead); + } + + // It records the start of a STW pause (i.e. it records the + // concurrent overhead up to that point) + void record_stw_start(double start_sec); + + // It records the end of a STW pause (i.e. it records the overhead + // associated with the pause and adjusts all the trackers to reflect + // the pause) + void record_stw_end(double end_sec); + + // It queries all the trackers of their concurrent overhead and + // records it. + void collect_and_record_conc_overhead(double end_sec); + + // It prints the contents of the GC overhead array + void print() const; + + + // Constructor. The same preconditions for group_num and group_names + // from initGCOverheadReporter apply here too. + GCOverheadReporter(size_t group_num, + const char* group_names[], + size_t length = DefaultReporterLength); + +public: + + // statics + + // It initialises the GCOverheadReporter and launches the concurrent + // overhead reporting thread. Both actions happen only if the + // GCOverheadReporting parameter is set. The length of the + // group_names array should be >= group_num and group_num should be + // <= MaxGCOverheadGroupNum. Entries group_namnes[0..group_num-1] + // should not be NULL. + static void initGCOverheadReporter(size_t group_num, + const char* group_names[]); + + // The following three are provided for convenience and they are + // wrappers around record_stw_start(start_sec), record_stw_end(end_sec), + // and print(). Each of these checks whether GC overhead reporting + // is on (i.e. _reporter != NULL) and, if it is, calls the + // corresponding method. Saves from repeating this pattern again and + // again from the places where they need to be called. + static void recordSTWStart(double start_sec); + static void recordSTWEnd(double end_sec); + static void printGCOverhead(); +}; diff --git a/hotspot/src/share/vm/gc_implementation/shared/immutableSpace.hpp b/hotspot/src/share/vm/gc_implementation/shared/immutableSpace.hpp index 3f28ec710e4..39166412d8b 100644 --- a/hotspot/src/share/vm/gc_implementation/shared/immutableSpace.hpp +++ b/hotspot/src/share/vm/gc_implementation/shared/immutableSpace.hpp @@ -50,7 +50,8 @@ class ImmutableSpace: public CHeapObj { size_t capacity_in_bytes() const { return capacity_in_words() * HeapWordSize; } // Size computations. Sizes are in heapwords. - size_t capacity_in_words() const { return pointer_delta(end(), bottom()); } + size_t capacity_in_words() const { return pointer_delta(end(), bottom()); } + virtual size_t capacity_in_words(Thread*) const { return capacity_in_words(); } // Iteration. virtual void oop_iterate(OopClosure* cl); diff --git a/hotspot/src/share/vm/gc_implementation/shared/markSweep.inline.hpp b/hotspot/src/share/vm/gc_implementation/shared/markSweep.inline.hpp index fdcf48db842..7596433cc27 100644 --- a/hotspot/src/share/vm/gc_implementation/shared/markSweep.inline.hpp +++ b/hotspot/src/share/vm/gc_implementation/shared/markSweep.inline.hpp @@ -23,13 +23,6 @@ */ inline void MarkSweep::mark_object(oop obj) { -#ifndef SERIALGC - if (UseParallelOldGC && VerifyParallelOldWithMarkSweep) { - assert(PSParallelCompact::mark_bitmap()->is_marked(obj), - "Should be marked in the marking bitmap"); - } -#endif // SERIALGC - // some marks may contain information we need to preserve so we store them away // and overwrite the mark. We'll restore it at the end of markSweep. markOop mark = obj->mark(); diff --git a/hotspot/src/share/vm/gc_implementation/shared/mutableNUMASpace.cpp b/hotspot/src/share/vm/gc_implementation/shared/mutableNUMASpace.cpp index 344c6add09a..697e00de0ab 100644 --- a/hotspot/src/share/vm/gc_implementation/shared/mutableNUMASpace.cpp +++ b/hotspot/src/share/vm/gc_implementation/shared/mutableNUMASpace.cpp @@ -181,6 +181,25 @@ size_t MutableNUMASpace::unsafe_max_tlab_alloc(Thread *thr) const { return lgrp_spaces()->at(i)->space()->free_in_bytes(); } + +size_t MutableNUMASpace::capacity_in_words(Thread* thr) const { + guarantee(thr != NULL, "No thread"); + int lgrp_id = thr->lgrp_id(); + if (lgrp_id == -1) { + if (lgrp_spaces()->length() > 0) { + return capacity_in_words() / lgrp_spaces()->length(); + } else { + assert(false, "There should be at least one locality group"); + return 0; + } + } + int i = lgrp_spaces()->find(&lgrp_id, LGRPSpace::equals); + if (i == -1) { + return 0; + } + return lgrp_spaces()->at(i)->space()->capacity_in_words(); +} + // Check if the NUMA topology has changed. Add and remove spaces if needed. // The update can be forced by setting the force parameter equal to true. bool MutableNUMASpace::update_layout(bool force) { @@ -372,6 +391,8 @@ size_t MutableNUMASpace::default_chunk_size() { } // Produce a new chunk size. page_size() aligned. +// This function is expected to be called on sequence of i's from 0 to +// lgrp_spaces()->length(). size_t MutableNUMASpace::adaptive_chunk_size(int i, size_t limit) { size_t pages_available = base_space_size(); for (int j = 0; j < i; j++) { @@ -386,7 +407,7 @@ size_t MutableNUMASpace::adaptive_chunk_size(int i, size_t limit) { size_t chunk_size = 0; if (alloc_rate > 0) { LGRPSpace *ls = lgrp_spaces()->at(i); - chunk_size = (size_t)(ls->alloc_rate()->average() * pages_available / alloc_rate) * page_size(); + chunk_size = (size_t)(ls->alloc_rate()->average() / alloc_rate * pages_available) * page_size(); } chunk_size = MAX2(chunk_size, page_size()); @@ -722,7 +743,8 @@ HeapWord* MutableNUMASpace::allocate(size_t size) { i = os::random() % lgrp_spaces()->length(); } - MutableSpace *s = lgrp_spaces()->at(i)->space(); + LGRPSpace* ls = lgrp_spaces()->at(i); + MutableSpace *s = ls->space(); HeapWord *p = s->allocate(size); if (p != NULL) { @@ -743,6 +765,9 @@ HeapWord* MutableNUMASpace::allocate(size_t size) { *(int*)i = 0; } } + if (p == NULL) { + ls->set_allocation_failed(); + } return p; } @@ -761,7 +786,8 @@ HeapWord* MutableNUMASpace::cas_allocate(size_t size) { if (i == -1) { i = os::random() % lgrp_spaces()->length(); } - MutableSpace *s = lgrp_spaces()->at(i)->space(); + LGRPSpace *ls = lgrp_spaces()->at(i); + MutableSpace *s = ls->space(); HeapWord *p = s->cas_allocate(size); if (p != NULL) { size_t remainder = pointer_delta(s->end(), p + size); @@ -790,6 +816,9 @@ HeapWord* MutableNUMASpace::cas_allocate(size_t size) { *(int*)i = 0; } } + if (p == NULL) { + ls->set_allocation_failed(); + } return p; } diff --git a/hotspot/src/share/vm/gc_implementation/shared/mutableNUMASpace.hpp b/hotspot/src/share/vm/gc_implementation/shared/mutableNUMASpace.hpp index e3d824481f4..f7ca2462545 100644 --- a/hotspot/src/share/vm/gc_implementation/shared/mutableNUMASpace.hpp +++ b/hotspot/src/share/vm/gc_implementation/shared/mutableNUMASpace.hpp @@ -60,6 +60,7 @@ class MutableNUMASpace : public MutableSpace { MutableSpace* _space; MemRegion _invalid_region; AdaptiveWeightedAverage *_alloc_rate; + bool _allocation_failed; struct SpaceStats { size_t _local_space, _remote_space, _unbiased_space, _uncommited_space; @@ -81,7 +82,7 @@ class MutableNUMASpace : public MutableSpace { char* last_page_scanned() { return _last_page_scanned; } void set_last_page_scanned(char* p) { _last_page_scanned = p; } public: - LGRPSpace(int l) : _lgrp_id(l), _last_page_scanned(NULL) { + LGRPSpace(int l) : _lgrp_id(l), _last_page_scanned(NULL), _allocation_failed(false) { _space = new MutableSpace(); _alloc_rate = new AdaptiveWeightedAverage(NUMAChunkResizeWeight); } @@ -103,8 +104,21 @@ class MutableNUMASpace : public MutableSpace { return *(int*)lgrp_id_value == p->lgrp_id(); } + // Report a failed allocation. + void set_allocation_failed() { _allocation_failed = true; } + void sample() { - alloc_rate()->sample(space()->used_in_bytes()); + // If there was a failed allocation make allocation rate equal + // to the size of the whole chunk. This ensures the progress of + // the adaptation process. + size_t alloc_rate_sample; + if (_allocation_failed) { + alloc_rate_sample = space()->capacity_in_bytes(); + _allocation_failed = false; + } else { + alloc_rate_sample = space()->used_in_bytes(); + } + alloc_rate()->sample(alloc_rate_sample); } MemRegion invalid_region() const { return _invalid_region; } @@ -190,6 +204,9 @@ class MutableNUMASpace : public MutableSpace { virtual void ensure_parsability(); virtual size_t used_in_words() const; virtual size_t free_in_words() const; + + using MutableSpace::capacity_in_words; + virtual size_t capacity_in_words(Thread* thr) const; virtual size_t tlab_capacity(Thread* thr) const; virtual size_t unsafe_max_tlab_alloc(Thread* thr) const; diff --git a/hotspot/src/share/vm/gc_implementation/shared/vmGCOperations.cpp b/hotspot/src/share/vm/gc_implementation/shared/vmGCOperations.cpp index eee135f67df..7b4b76696a0 100644 --- a/hotspot/src/share/vm/gc_implementation/shared/vmGCOperations.cpp +++ b/hotspot/src/share/vm/gc_implementation/shared/vmGCOperations.cpp @@ -74,6 +74,7 @@ bool VM_GC_Operation::doit_prologue() { // If the GC count has changed someone beat us to the collection // Get the Heap_lock after the pending_list_lock. Heap_lock->lock(); + // Check invocations if (skip_operation()) { // skip collection @@ -82,6 +83,8 @@ bool VM_GC_Operation::doit_prologue() { _prologue_succeeded = false; } else { _prologue_succeeded = true; + SharedHeap* sh = SharedHeap::heap(); + if (sh != NULL) sh->_thread_holds_heap_lock_for_gc = true; } return _prologue_succeeded; } @@ -90,6 +93,8 @@ bool VM_GC_Operation::doit_prologue() { void VM_GC_Operation::doit_epilogue() { assert(Thread::current()->is_Java_thread(), "just checking"); // Release the Heap_lock first. + SharedHeap* sh = SharedHeap::heap(); + if (sh != NULL) sh->_thread_holds_heap_lock_for_gc = false; Heap_lock->unlock(); release_and_notify_pending_list_lock(); } @@ -148,12 +153,27 @@ void VM_GenCollectFull::doit() { void VM_GenCollectForPermanentAllocation::doit() { JvmtiGCForAllocationMarker jgcm; notify_gc_begin(true); - GenCollectedHeap* gch = GenCollectedHeap::heap(); - GCCauseSetter gccs(gch, _gc_cause); - gch->do_full_collection(gch->must_clear_all_soft_refs(), - gch->n_gens() - 1); - _res = gch->perm_gen()->allocate(_size, false); - assert(gch->is_in_reserved_or_null(_res), "result not in heap"); + SharedHeap* heap = (SharedHeap*)Universe::heap(); + GCCauseSetter gccs(heap, _gc_cause); + switch (heap->kind()) { + case (CollectedHeap::GenCollectedHeap): { + GenCollectedHeap* gch = (GenCollectedHeap*)heap; + gch->do_full_collection(gch->must_clear_all_soft_refs(), + gch->n_gens() - 1); + break; + } +#ifndef SERIALGC + case (CollectedHeap::G1CollectedHeap): { + G1CollectedHeap* g1h = (G1CollectedHeap*)heap; + g1h->do_full_collection(_gc_cause == GCCause::_last_ditch_collection); + break; + } +#endif // SERIALGC + default: + ShouldNotReachHere(); + } + _res = heap->perm_gen()->allocate(_size, false); + assert(heap->is_in_reserved_or_null(_res), "result not in heap"); if (_res == NULL && GC_locker::is_active_and_needs_gc()) { set_gc_locked(); } diff --git a/hotspot/src/share/vm/gc_interface/collectedHeap.cpp b/hotspot/src/share/vm/gc_interface/collectedHeap.cpp index 8ddf46a48e2..df8ef081632 100644 --- a/hotspot/src/share/vm/gc_interface/collectedHeap.cpp +++ b/hotspot/src/share/vm/gc_interface/collectedHeap.cpp @@ -138,13 +138,6 @@ oop CollectedHeap::new_store_barrier(oop new_obj) { return new_obj; } -bool CollectedHeap::can_elide_permanent_oop_store_barriers() const { - // %%% This needs refactoring. (It was gating logic from the server compiler.) - guarantee(kind() < CollectedHeap::G1CollectedHeap, ""); - return !UseConcMarkSweepGC; -} - - HeapWord* CollectedHeap::allocate_new_tlab(size_t size) { guarantee(false, "thread-local allocation buffers not supported"); return NULL; diff --git a/hotspot/src/share/vm/gc_interface/collectedHeap.hpp b/hotspot/src/share/vm/gc_interface/collectedHeap.hpp index 492801e983e..69369e3f7d4 100644 --- a/hotspot/src/share/vm/gc_interface/collectedHeap.hpp +++ b/hotspot/src/share/vm/gc_interface/collectedHeap.hpp @@ -364,10 +364,8 @@ class CollectedHeap : public CHeapObj { // Can a compiler initialize a new object without store barriers? // This permission only extends from the creation of a new object // via a TLAB up to the first subsequent safepoint. - virtual bool can_elide_tlab_store_barriers() const { - guarantee(kind() < CollectedHeap::G1CollectedHeap, "else change or refactor this"); - return true; - } + virtual bool can_elide_tlab_store_barriers() const = 0; + // If a compiler is eliding store barriers for TLAB-allocated objects, // there is probably a corresponding slow path which can produce // an object allocated anywhere. The compiler's runtime support @@ -379,12 +377,10 @@ class CollectedHeap : public CHeapObj { // Can a compiler elide a store barrier when it writes // a permanent oop into the heap? Applies when the compiler // is storing x to the heap, where x->is_perm() is true. - virtual bool can_elide_permanent_oop_store_barriers() const; + virtual bool can_elide_permanent_oop_store_barriers() const = 0; // Does this heap support heap inspection (+PrintClassHistogram?) - virtual bool supports_heap_inspection() const { - return false; // Until RFE 5023697 is implemented - } + virtual bool supports_heap_inspection() const = 0; // Perform a collection of the heap; intended for use in implementing // "System.gc". This probably implies as full a collection as the diff --git a/hotspot/src/share/vm/gc_interface/collectedHeap.inline.hpp b/hotspot/src/share/vm/gc_interface/collectedHeap.inline.hpp index c2f3caeb507..efc3210bf0a 100644 --- a/hotspot/src/share/vm/gc_interface/collectedHeap.inline.hpp +++ b/hotspot/src/share/vm/gc_interface/collectedHeap.inline.hpp @@ -122,7 +122,7 @@ HeapWord* CollectedHeap::common_mem_allocate_noinit(size_t size, bool is_noref, return result; } } - bool gc_overhead_limit_was_exceeded; + bool gc_overhead_limit_was_exceeded = false; result = Universe::heap()->mem_allocate(size, is_noref, false, diff --git a/hotspot/src/share/vm/gc_interface/gcCause.hpp b/hotspot/src/share/vm/gc_interface/gcCause.hpp index e512b0265e8..bae001c9ca7 100644 --- a/hotspot/src/share/vm/gc_interface/gcCause.hpp +++ b/hotspot/src/share/vm/gc_interface/gcCause.hpp @@ -60,6 +60,8 @@ class GCCause : public AllStatic { _old_generation_too_full_to_scavenge, _adaptive_size_policy, + _g1_inc_collection_pause, _g1_pop_region_collection_pause, + _last_ditch_collection, _last_gc_cause }; @@ -68,12 +70,14 @@ class GCCause : public AllStatic { return (cause == GCCause::_java_lang_system_gc || cause == GCCause::_jvmti_force_gc); } + inline static bool is_serviceability_requested_gc(GCCause::Cause cause) { return (cause == GCCause::_jvmti_force_gc || cause == GCCause::_heap_inspection || cause == GCCause::_heap_dump); } + // Return a string describing the GCCause. static const char* to_string(GCCause::Cause cause); // Return true if the GCCause is for a full collection. diff --git a/hotspot/src/share/vm/includeDB_compiler1 b/hotspot/src/share/vm/includeDB_compiler1 index d3006b094a0..3ea09fd8ad1 100644 --- a/hotspot/src/share/vm/includeDB_compiler1 +++ b/hotspot/src/share/vm/includeDB_compiler1 @@ -36,6 +36,9 @@ c1_CFGPrinter.cpp c1_ValueStack.hpp c1_CFGPrinter.hpp c1_Compilation.hpp c1_CFGPrinter.hpp c1_Instruction.hpp +cardTableModRefBS.cpp c1_LIR.hpp +cardTableModRefBS.cpp c1_LIRGenerator.hpp + c1_Canonicalizer.cpp c1_Canonicalizer.hpp c1_Canonicalizer.cpp c1_InstructionPrinter.hpp c1_Canonicalizer.cpp ciArray.hpp @@ -55,6 +58,7 @@ c1_CodeStubs_.cpp c1_FrameMap.hpp c1_CodeStubs_.cpp c1_LIRAssembler.hpp c1_CodeStubs_.cpp c1_MacroAssembler.hpp c1_CodeStubs_.cpp c1_Runtime1.hpp +c1_CodeStubs_.cpp g1SATBCardTableModRefBS.hpp c1_CodeStubs_.cpp nativeInst_.hpp c1_CodeStubs_.cpp sharedRuntime.hpp c1_CodeStubs_.cpp vmreg_.inline.hpp @@ -141,6 +145,7 @@ c1_globals_.hpp macros.hpp c1_globals_.hpp globalDefinitions.hpp c1_globals_.hpp macros.hpp +c1_GraphBuilder.cpp bitMap.inline.hpp c1_GraphBuilder.cpp bytecode.hpp c1_GraphBuilder.cpp c1_CFGPrinter.hpp c1_GraphBuilder.cpp c1_Canonicalizer.hpp @@ -158,6 +163,7 @@ c1_GraphBuilder.hpp c1_ValueStack.hpp c1_GraphBuilder.hpp ciMethodData.hpp c1_GraphBuilder.hpp ciStreams.hpp +c1_IR.cpp bitMap.inline.hpp c1_IR.cpp c1_Compilation.hpp c1_IR.cpp c1_FrameMap.hpp c1_IR.cpp c1_GraphBuilder.hpp @@ -232,20 +238,22 @@ c1_LIRAssembler_.cpp sharedRuntime.hpp c1_LIRAssembler_.hpp generate_platform_dependent_include -c1_LIRGenerator.cpp c1_Compilation.hpp -c1_LIRGenerator.cpp c1_FrameMap.hpp -c1_LIRGenerator.cpp c1_Instruction.hpp -c1_LIRGenerator.cpp c1_LIRAssembler.hpp -c1_LIRGenerator.cpp c1_LIRGenerator.hpp -c1_LIRGenerator.cpp c1_ValueStack.hpp -c1_LIRGenerator.cpp ciArrayKlass.hpp -c1_LIRGenerator.cpp ciInstance.hpp -c1_LIRGenerator.cpp sharedRuntime.hpp +c1_LIRGenerator.cpp bitMap.inline.hpp +c1_LIRGenerator.cpp c1_Compilation.hpp +c1_LIRGenerator.cpp c1_FrameMap.hpp +c1_LIRGenerator.cpp c1_Instruction.hpp +c1_LIRGenerator.cpp c1_LIRAssembler.hpp +c1_LIRGenerator.cpp c1_LIRGenerator.hpp +c1_LIRGenerator.cpp c1_ValueStack.hpp +c1_LIRGenerator.cpp ciArrayKlass.hpp +c1_LIRGenerator.cpp ciInstance.hpp +c1_LIRGenerator.cpp heapRegion.hpp +c1_LIRGenerator.cpp sharedRuntime.hpp -c1_LIRGenerator.hpp c1_Instruction.hpp -c1_LIRGenerator.hpp c1_LIR.hpp -c1_LIRGenerator.hpp ciMethodData.hpp -c1_LIRGenerator.hpp sizes.hpp +c1_LIRGenerator.hpp c1_Instruction.hpp +c1_LIRGenerator.hpp c1_LIR.hpp +c1_LIRGenerator.hpp ciMethodData.hpp +c1_LIRGenerator.hpp sizes.hpp c1_LIRGenerator_.cpp c1_Compilation.hpp c1_LIRGenerator_.cpp c1_FrameMap.hpp @@ -260,6 +268,7 @@ c1_LIRGenerator_.cpp ciTypeArrayKlass.hpp c1_LIRGenerator_.cpp sharedRuntime.hpp c1_LIRGenerator_.cpp vmreg_.inline.hpp +c1_LinearScan.cpp bitMap.inline.hpp c1_LinearScan.cpp c1_CFGPrinter.hpp c1_LinearScan.cpp c1_Compilation.hpp c1_LinearScan.cpp c1_FrameMap.hpp @@ -276,6 +285,7 @@ c1_LinearScan.hpp c1_Instruction.hpp c1_LinearScan.hpp c1_LIR.hpp c1_LinearScan.hpp c1_LIRGenerator.hpp +c1_LinearScan_.cpp bitMap.inline.hpp c1_LinearScan_.cpp c1_Instruction.hpp c1_LinearScan_.cpp c1_LinearScan.hpp @@ -298,6 +308,7 @@ c1_MacroAssembler_.cpp systemDictionary.hpp c1_MacroAssembler_.hpp generate_platform_dependent_include +c1_Optimizer.cpp bitMap.inline.hpp c1_Optimizer.cpp c1_Canonicalizer.hpp c1_Optimizer.cpp c1_Optimizer.hpp c1_Optimizer.cpp c1_ValueMap.hpp @@ -363,6 +374,7 @@ c1_Runtime1_.cpp signature.hpp c1_Runtime1_.cpp vframeArray.hpp c1_Runtime1_.cpp vmreg_.inline.hpp +c1_ValueMap.cpp bitMap.inline.hpp c1_ValueMap.cpp c1_Canonicalizer.hpp c1_ValueMap.cpp c1_IR.hpp c1_ValueMap.cpp c1_ValueMap.hpp @@ -433,4 +445,3 @@ thread.cpp c1_Compiler.hpp top.hpp c1_globals.hpp vmStructs.hpp c1_Runtime1.hpp - diff --git a/hotspot/src/share/vm/includeDB_compiler2 b/hotspot/src/share/vm/includeDB_compiler2 index 07ae8a87096..ea3b88cc172 100644 --- a/hotspot/src/share/vm/includeDB_compiler2 +++ b/hotspot/src/share/vm/includeDB_compiler2 @@ -461,10 +461,13 @@ globals.cpp c2_globals.hpp graphKit.cpp addnode.hpp graphKit.cpp barrierSet.hpp graphKit.cpp cardTableModRefBS.hpp +graphKit.cpp g1SATBCardTableModRefBS.hpp graphKit.cpp collectedHeap.hpp graphKit.cpp compileLog.hpp graphKit.cpp deoptimization.hpp graphKit.cpp graphKit.hpp +graphKit.cpp heapRegion.hpp +graphKit.cpp idealKit.hpp graphKit.cpp locknode.hpp graphKit.cpp machnode.hpp graphKit.cpp parse.hpp @@ -484,6 +487,7 @@ idealKit.cpp addnode.hpp idealKit.cpp callnode.hpp idealKit.cpp cfgnode.hpp idealKit.cpp idealKit.hpp +idealKit.cpp runtime.hpp idealKit.hpp connode.hpp idealKit.hpp mulnode.hpp @@ -582,6 +586,7 @@ locknode.hpp subnode.hpp loopTransform.cpp addnode.hpp loopTransform.cpp allocation.inline.hpp loopTransform.cpp connode.hpp +loopTransform.cpp compileLog.hpp loopTransform.cpp divnode.hpp loopTransform.cpp loopnode.hpp loopTransform.cpp mulnode.hpp @@ -597,6 +602,7 @@ loopnode.cpp addnode.hpp loopnode.cpp allocation.inline.hpp loopnode.cpp callnode.hpp loopnode.cpp ciMethodData.hpp +loopnode.cpp compileLog.hpp loopnode.cpp connode.hpp loopnode.cpp divnode.hpp loopnode.cpp loopnode.hpp @@ -915,9 +921,11 @@ runtime.cpp compilerOracle.hpp runtime.cpp connode.hpp runtime.cpp copy.hpp runtime.cpp fprofiler.hpp +runtime.cpp g1SATBCardTableModRefBS.hpp runtime.cpp gcLocker.inline.hpp runtime.cpp graphKit.hpp runtime.cpp handles.inline.hpp +runtime.cpp heapRegion.hpp runtime.cpp icBuffer.hpp runtime.cpp interfaceSupport.hpp runtime.cpp interpreter.hpp diff --git a/hotspot/src/share/vm/includeDB_core b/hotspot/src/share/vm/includeDB_core index 5b2045617d2..3b4dd2e38fe 100644 --- a/hotspot/src/share/vm/includeDB_core +++ b/hotspot/src/share/vm/includeDB_core @@ -288,6 +288,10 @@ attachListener.hpp allocation.hpp attachListener.hpp debug.hpp attachListener.hpp ostream.hpp +barrierSet.cpp barrierSet.hpp +barrierSet.cpp collectedHeap.hpp +barrierSet.cpp universe.hpp + barrierSet.hpp memRegion.hpp barrierSet.hpp oopsHierarchy.hpp @@ -295,7 +299,7 @@ barrierSet.inline.hpp barrierSet.hpp barrierSet.inline.hpp cardTableModRefBS.hpp bcEscapeAnalyzer.cpp bcEscapeAnalyzer.hpp -bcEscapeAnalyzer.cpp bitMap.hpp +bcEscapeAnalyzer.cpp bitMap.inline.hpp bcEscapeAnalyzer.cpp bytecode.hpp bcEscapeAnalyzer.cpp ciConstant.hpp bcEscapeAnalyzer.cpp ciField.hpp @@ -320,13 +324,12 @@ biasedLocking.cpp vm_operations.hpp biasedLocking.hpp growableArray.hpp biasedLocking.hpp handles.hpp -bitMap.cpp bitMap.hpp +bitMap.cpp allocation.inline.hpp bitMap.cpp bitMap.inline.hpp bitMap.cpp copy.hpp bitMap.cpp os_.inline.hpp bitMap.hpp allocation.hpp -bitMap.hpp ostream.hpp bitMap.hpp top.hpp bitMap.inline.hpp atomic.hpp @@ -645,6 +648,7 @@ ciKlassKlass.hpp ciSymbol.hpp ciMethod.cpp abstractCompiler.hpp ciMethod.cpp allocation.inline.hpp ciMethod.cpp bcEscapeAnalyzer.hpp +ciMethod.cpp bitMap.inline.hpp ciMethod.cpp ciCallProfile.hpp ciMethod.cpp ciExceptionHandler.hpp ciMethod.cpp ciInstanceKlass.hpp @@ -1759,7 +1763,7 @@ genRemSet.cpp genRemSet.hpp genRemSet.hpp oop.hpp -generateOopMap.cpp bitMap.hpp +generateOopMap.cpp bitMap.inline.hpp generateOopMap.cpp bytecodeStream.hpp generateOopMap.cpp generateOopMap.hpp generateOopMap.cpp handles.inline.hpp @@ -1808,6 +1812,8 @@ generation.inline.hpp genCollectedHeap.hpp generation.inline.hpp generation.hpp generation.inline.hpp space.hpp +genOopClosures.hpp oop.hpp + generationSpec.cpp compactPermGen.hpp generationSpec.cpp defNewGeneration.hpp generationSpec.cpp filemap.hpp @@ -2219,6 +2225,11 @@ invocationCounter.hpp allocation.hpp invocationCounter.hpp exceptions.hpp invocationCounter.hpp handles.hpp +intHisto.cpp intHisto.hpp + +intHisto.hpp allocation.hpp +intHisto.hpp growableArray.hpp + iterator.cpp iterator.hpp iterator.cpp oop.inline.hpp @@ -2818,6 +2829,7 @@ methodKlass.hpp klassOop.hpp methodKlass.hpp methodOop.hpp methodLiveness.cpp allocation.inline.hpp +methodLiveness.cpp bitMap.inline.hpp methodLiveness.cpp bytecode.hpp methodLiveness.cpp bytecodes.hpp methodLiveness.cpp ciMethod.hpp @@ -2964,6 +2976,11 @@ nmethod.cpp xmlstream.hpp nmethod.hpp codeBlob.hpp nmethod.hpp pcDesc.hpp +numberSeq.cpp debug.hpp +numberSeq.cpp numberSeq.hpp +numberSeq.cpp globalDefinitions.hpp +numberSeq.cpp allocation.inline.hpp + objArrayKlass.cpp collectedHeap.inline.hpp objArrayKlass.cpp copy.hpp objArrayKlass.cpp genOopClosures.inline.hpp @@ -3406,8 +3423,6 @@ referencePolicy.cpp javaClasses.hpp referencePolicy.cpp referencePolicy.hpp referencePolicy.cpp universe.hpp -referencePolicy.hpp oop.hpp - referenceProcessor.cpp collectedHeap.hpp referenceProcessor.cpp collectedHeap.inline.hpp referenceProcessor.cpp java.hpp @@ -3758,6 +3773,8 @@ spaceDecorator.cpp spaceDecorator.hpp specialized_oop_closures.cpp ostream.hpp specialized_oop_closures.cpp specialized_oop_closures.hpp +specialized_oop_closures.hpp atomic.hpp + stackMapFrame.cpp globalDefinitions.hpp stackMapFrame.cpp handles.inline.hpp stackMapFrame.cpp oop.inline.hpp @@ -4000,7 +4017,6 @@ taskqueue.cpp thread_.inline.hpp taskqueue.hpp allocation.hpp taskqueue.hpp allocation.inline.hpp -taskqueue.hpp debug.hpp taskqueue.hpp mutex.hpp taskqueue.hpp orderAccess_.inline.hpp @@ -4038,6 +4054,7 @@ templateInterpreter_.hpp generate_platform_dependent_include templateInterpreterGenerator_.hpp generate_platform_dependent_include +templateTable.cpp collectedHeap.hpp templateTable.cpp templateTable.hpp templateTable.cpp timer.hpp @@ -4542,6 +4559,7 @@ vm_operations.cpp compileBroker.hpp vm_operations.cpp compilerOracle.hpp vm_operations.cpp deoptimization.hpp vm_operations.cpp interfaceSupport.hpp +vm_operations.cpp isGCActiveMark.hpp vm_operations.cpp resourceArea.hpp vm_operations.cpp threadService.hpp vm_operations.cpp thread_.inline.hpp diff --git a/hotspot/src/share/vm/includeDB_features b/hotspot/src/share/vm/includeDB_features index 7db5dc0c26f..ddf2389a453 100644 --- a/hotspot/src/share/vm/includeDB_features +++ b/hotspot/src/share/vm/includeDB_features @@ -99,6 +99,7 @@ heapDumper.cpp ostream.hpp heapDumper.cpp reflectionUtils.hpp heapDumper.cpp symbolTable.hpp heapDumper.cpp systemDictionary.hpp +heapDumper.cpp threadService.hpp heapDumper.cpp universe.hpp heapDumper.cpp vframe.hpp heapDumper.cpp vmGCOperations.hpp diff --git a/hotspot/src/share/vm/includeDB_gc_parallel b/hotspot/src/share/vm/includeDB_gc_parallel index 31939d06567..1120e9b6ce1 100644 --- a/hotspot/src/share/vm/includeDB_gc_parallel +++ b/hotspot/src/share/vm/includeDB_gc_parallel @@ -21,6 +21,10 @@ // have any questions. // +assembler_.cpp g1SATBCardTableModRefBS.hpp +assembler_.cpp g1CollectedHeap.inline.hpp +assembler_.cpp heapRegion.hpp + collectorPolicy.cpp cmsAdaptiveSizePolicy.hpp collectorPolicy.cpp cmsGCAdaptivePolicyCounters.hpp @@ -37,6 +41,9 @@ heapDumper.cpp parallelScavengeHeap.hpp heapInspection.cpp parallelScavengeHeap.hpp +instanceKlass.cpp heapRegionSeq.inline.hpp +instanceKlass.cpp g1CollectedHeap.inline.hpp +instanceKlass.cpp g1OopClosures.inline.hpp instanceKlass.cpp oop.pcgc.inline.hpp instanceKlass.cpp psPromotionManager.inline.hpp instanceKlass.cpp psScavenge.inline.hpp @@ -48,6 +55,9 @@ instanceKlassKlass.cpp psPromotionManager.inline.hpp instanceKlassKlass.cpp psScavenge.inline.hpp instanceKlassKlass.cpp parOopClosures.inline.hpp +instanceRefKlass.cpp heapRegionSeq.inline.hpp +instanceRefKlass.cpp g1CollectedHeap.inline.hpp +instanceRefKlass.cpp g1OopClosures.inline.hpp instanceRefKlass.cpp oop.pcgc.inline.hpp instanceRefKlass.cpp psPromotionManager.inline.hpp instanceRefKlass.cpp psScavenge.inline.hpp @@ -70,6 +80,7 @@ memoryPool.hpp compactibleFreeListSpace.hpp memoryService.cpp cmsPermGen.hpp memoryService.cpp concurrentMarkSweepGeneration.hpp +memoryService.cpp g1CollectedHeap.inline.hpp memoryService.cpp parNewGeneration.hpp memoryService.cpp parallelScavengeHeap.hpp memoryService.cpp psMemoryPool.hpp @@ -80,6 +91,9 @@ memoryService.cpp psYoungGen.hpp methodDataKlass.cpp oop.pcgc.inline.hpp methodDataKlass.cpp psScavenge.inline.hpp +objArrayKlass.cpp heapRegionSeq.inline.hpp +objArrayKlass.cpp g1CollectedHeap.inline.hpp +objArrayKlass.cpp g1OopClosures.inline.hpp objArrayKlass.cpp oop.pcgc.inline.hpp objArrayKlass.cpp psPromotionManager.inline.hpp objArrayKlass.cpp psScavenge.inline.hpp @@ -122,6 +136,9 @@ safepoint.cpp concurrentMarkSweepThread.hpp thread.cpp concurrentMarkSweepThread.hpp thread.cpp pcTasks.hpp +thread.hpp dirtyCardQueue.hpp +thread.hpp satbQueue.hpp + universe.cpp parallelScavengeHeap.hpp universe.cpp cmsCollectorPolicy.hpp universe.cpp cmsAdaptiveSizePolicy.hpp diff --git a/hotspot/src/share/vm/includeDB_jvmti b/hotspot/src/share/vm/includeDB_jvmti index 7110bb1bf4e..b6b4ee5e960 100644 --- a/hotspot/src/share/vm/includeDB_jvmti +++ b/hotspot/src/share/vm/includeDB_jvmti @@ -209,6 +209,7 @@ jvmtiManageCapabilities.cpp jvmtiManageCapabilities.hpp jvmtiManageCapabilities.hpp allocation.hpp jvmtiManageCapabilities.hpp jvmti.h +jvmtiRedefineClasses.cpp bitMap.inline.hpp jvmtiRedefineClasses.cpp codeCache.hpp jvmtiRedefineClasses.cpp deoptimization.hpp jvmtiRedefineClasses.cpp gcLocker.hpp diff --git a/hotspot/src/share/vm/interpreter/templateTable.cpp b/hotspot/src/share/vm/interpreter/templateTable.cpp index c302af297e1..756b3f66516 100644 --- a/hotspot/src/share/vm/interpreter/templateTable.cpp +++ b/hotspot/src/share/vm/interpreter/templateTable.cpp @@ -172,6 +172,7 @@ Template TemplateTable::_template_table_wide[Bytecodes::number Template* TemplateTable::_desc; InterpreterMacroAssembler* TemplateTable::_masm; +BarrierSet* TemplateTable::_bs; void TemplateTable::def(Bytecodes::Code code, int flags, TosState in, TosState out, void (*gen)(), char filler) { @@ -244,6 +245,8 @@ void TemplateTable::initialize() { // Initialize table TraceTime timer("TemplateTable initialization", TraceStartupTime); + _bs = Universe::heap()->barrier_set(); + // For better readability const char _ = ' '; const int ____ = 0; diff --git a/hotspot/src/share/vm/interpreter/templateTable.hpp b/hotspot/src/share/vm/interpreter/templateTable.hpp index 4f40a7ac291..af6bf870db9 100644 --- a/hotspot/src/share/vm/interpreter/templateTable.hpp +++ b/hotspot/src/share/vm/interpreter/templateTable.hpp @@ -82,6 +82,7 @@ class TemplateTable: AllStatic { static Template* _desc; // the current template to be generated static Bytecodes::Code bytecode() { return _desc->bytecode(); } + static BarrierSet* _bs; // Cache the barrier set. public: //%note templates_1 static InterpreterMacroAssembler* _masm; // the assembler used when generating templates diff --git a/hotspot/src/share/vm/memory/allocation.hpp b/hotspot/src/share/vm/memory/allocation.hpp index 4bdcbc1ccfa..26482ebf792 100644 --- a/hotspot/src/share/vm/memory/allocation.hpp +++ b/hotspot/src/share/vm/memory/allocation.hpp @@ -338,6 +338,12 @@ class ResourceObj ALLOCATION_SUPER_CLASS_SPEC { DEBUG_ONLY(((ResourceObj *)res)->_allocation = RESOURCE_AREA;) return res; } + void* operator new(size_t size, void* where, allocation_type type) { + void* res = where; + // Set allocation type in the resource object + DEBUG_ONLY(((ResourceObj *)res)->_allocation = type;) + return res; + } void operator delete(void* p); }; diff --git a/hotspot/src/share/vm/memory/barrierSet.cpp b/hotspot/src/share/vm/memory/barrierSet.cpp new file mode 100644 index 00000000000..23308805a60 --- /dev/null +++ b/hotspot/src/share/vm/memory/barrierSet.cpp @@ -0,0 +1,36 @@ +/* + * Copyright 1997-2006 Sun Microsystems, Inc. All Rights Reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara, + * CA 95054 USA or visit www.sun.com if you need additional information or + * have any questions. + * + */ + +# include "incls/_precompiled.incl" +# include "incls/_barrierSet.cpp.incl" + +// count is in HeapWord's +void BarrierSet::static_write_ref_array_pre(HeapWord* start, size_t count) { + Universe::heap()->barrier_set()->write_ref_array_pre(MemRegion(start, start + count)); +} + +// count is in HeapWord's +void BarrierSet::static_write_ref_array_post(HeapWord* start, size_t count) { + Universe::heap()->barrier_set()->write_ref_array_work(MemRegion(start, start + count)); +} diff --git a/hotspot/src/share/vm/memory/barrierSet.hpp b/hotspot/src/share/vm/memory/barrierSet.hpp index 5d04d673c17..bff929b2c8b 100644 --- a/hotspot/src/share/vm/memory/barrierSet.hpp +++ b/hotspot/src/share/vm/memory/barrierSet.hpp @@ -32,6 +32,8 @@ public: ModRef, CardTableModRef, CardTableExtension, + G1SATBCT, + G1SATBCTLogging, Other, Uninit }; @@ -42,14 +44,16 @@ protected: public: + BarrierSet() { _kind = Uninit; } // To get around prohibition on RTTI. - virtual BarrierSet::Name kind() { return _kind; } + BarrierSet::Name kind() { return _kind; } virtual bool is_a(BarrierSet::Name bsn) = 0; // These operations indicate what kind of barriers the BarrierSet has. virtual bool has_read_ref_barrier() = 0; virtual bool has_read_prim_barrier() = 0; virtual bool has_write_ref_barrier() = 0; + virtual bool has_write_ref_pre_barrier() = 0; virtual bool has_write_prim_barrier() = 0; // These functions indicate whether a particular access of the given @@ -57,7 +61,8 @@ public: virtual bool read_ref_needs_barrier(void* field) = 0; virtual bool read_prim_needs_barrier(HeapWord* field, size_t bytes) = 0; virtual bool write_ref_needs_barrier(void* field, oop new_val) = 0; - virtual bool write_prim_needs_barrier(HeapWord* field, size_t bytes, juint val1, juint val2) = 0; + virtual bool write_prim_needs_barrier(HeapWord* field, size_t bytes, + juint val1, juint val2) = 0; // The first four operations provide a direct implementation of the // barrier set. An interpreter loop, for example, could call these @@ -75,6 +80,13 @@ public: // (For efficiency reasons, this operation is specialized for certain // barrier types. Semantically, it should be thought of as a call to the // virtual "_work" function below, which must implement the barrier.) + // First the pre-write versions... + inline void write_ref_field_pre(void* field, oop new_val); +protected: + virtual void write_ref_field_pre_work(void* field, oop new_val) {}; +public: + + // ...then the post-write version. inline void write_ref_field(void* field, oop new_val); protected: virtual void write_ref_field_work(void* field, oop new_val) = 0; @@ -92,6 +104,7 @@ public: // the particular barrier. virtual bool has_read_ref_array_opt() = 0; virtual bool has_read_prim_array_opt() = 0; + virtual bool has_write_ref_array_pre_opt() { return true; } virtual bool has_write_ref_array_opt() = 0; virtual bool has_write_prim_array_opt() = 0; @@ -104,7 +117,13 @@ public: virtual void read_ref_array(MemRegion mr) = 0; virtual void read_prim_array(MemRegion mr) = 0; + virtual void write_ref_array_pre(MemRegion mr) {} inline void write_ref_array(MemRegion mr); + + // Static versions, suitable for calling from generated code. + static void static_write_ref_array_pre(HeapWord* start, size_t count); + static void static_write_ref_array_post(HeapWord* start, size_t count); + protected: virtual void write_ref_array_work(MemRegion mr) = 0; public: @@ -120,33 +139,6 @@ protected: virtual void write_region_work(MemRegion mr) = 0; public: - // The remaining sets of operations are called by compilers or other code - // generators to insert barriers into generated code. There may be - // several such code generators; the signatures of these - // barrier-generating functions may differ from generator to generator. - // There will be a set of four function signatures for each code - // generator, which accomplish the generation of barriers of the four - // kinds listed above. - -#ifdef TBD - // Generates code to invoke the barrier, if any, necessary when reading - // the ref field at "offset" in "obj". - virtual void gen_read_ref_field() = 0; - - // Generates code to invoke the barrier, if any, necessary when reading - // the primitive field of "bytes" bytes at offset" in "obj". - virtual void gen_read_prim_field() = 0; - - // Generates code to invoke the barrier, if any, necessary when writing - // "new_val" into the ref field at "offset" in "obj". - virtual void gen_write_ref_field() = 0; - - // Generates code to invoke the barrier, if any, necessary when writing - // the "bytes"-byte value "new_val" into the primitive field at "offset" - // in "obj". - virtual void gen_write_prim_field() = 0; -#endif - // Some barrier sets create tables whose elements correspond to parts of // the heap; the CardTableModRefBS is an example. Such barrier sets will // normally reserve space for such tables, and commit parts of the table diff --git a/hotspot/src/share/vm/memory/barrierSet.inline.hpp b/hotspot/src/share/vm/memory/barrierSet.inline.hpp index c6ddc55a5bb..50382c994bb 100644 --- a/hotspot/src/share/vm/memory/barrierSet.inline.hpp +++ b/hotspot/src/share/vm/memory/barrierSet.inline.hpp @@ -26,6 +26,14 @@ // performance-critical calls when when the barrier is the most common // card-table kind. +void BarrierSet::write_ref_field_pre(void* field, oop new_val) { + if (kind() == CardTableModRef) { + ((CardTableModRefBS*)this)->inline_write_ref_field_pre(field, new_val); + } else { + write_ref_field_pre_work(field, new_val); + } +} + void BarrierSet::write_ref_field(void* field, oop new_val) { if (kind() == CardTableModRef) { ((CardTableModRefBS*)this)->inline_write_ref_field(field, new_val); diff --git a/hotspot/src/share/vm/memory/blockOffsetTable.cpp b/hotspot/src/share/vm/memory/blockOffsetTable.cpp index b1d0c62b898..078dc413537 100644 --- a/hotspot/src/share/vm/memory/blockOffsetTable.cpp +++ b/hotspot/src/share/vm/memory/blockOffsetTable.cpp @@ -184,7 +184,7 @@ BlockOffsetArray::set_remainder_to_point_to_start_incl(size_t start_card, size_t "Offset card has an unexpected value"); size_t start_card_for_region = start_card; u_char offset = max_jubyte; - for (int i = 0; i <= N_powers-1; i++) { + for (int i = 0; i < N_powers; i++) { // -1 so that the the card with the actual offset is counted. Another -1 // so that the reach ends in this region and not at the start // of the next. diff --git a/hotspot/src/share/vm/memory/blockOffsetTable.hpp b/hotspot/src/share/vm/memory/blockOffsetTable.hpp index 3d271aaf86e..670dd86d79b 100644 --- a/hotspot/src/share/vm/memory/blockOffsetTable.hpp +++ b/hotspot/src/share/vm/memory/blockOffsetTable.hpp @@ -214,6 +214,7 @@ public: ////////////////////////////////////////////////////////////////////////// class BlockOffsetArray: public BlockOffsetTable { friend class VMStructs; + friend class G1BlockOffsetArray; // temp. until we restructure and cleanup protected: // The following enums are used by do_block_internal() below enum Action { diff --git a/hotspot/src/share/vm/memory/cardTableModRefBS.cpp b/hotspot/src/share/vm/memory/cardTableModRefBS.cpp index 5f32640497e..8a006a4042b 100644 --- a/hotspot/src/share/vm/memory/cardTableModRefBS.cpp +++ b/hotspot/src/share/vm/memory/cardTableModRefBS.cpp @@ -344,6 +344,17 @@ void CardTableModRefBS::write_ref_field_work(void* field, oop newVal) { } +bool CardTableModRefBS::claim_card(size_t card_index) { + jbyte val = _byte_map[card_index]; + if (val != claimed_card_val()) { + jbyte res = Atomic::cmpxchg((jbyte) claimed_card_val(), &_byte_map[card_index], val); + if (res == val) + return true; + else return false; + } + return false; +} + void CardTableModRefBS::non_clean_card_iterate(Space* sp, MemRegion mr, DirtyCardToOopClosure* dcto_cl, @@ -443,7 +454,7 @@ void CardTableModRefBS::dirty_MemRegion(MemRegion mr) { } } -void CardTableModRefBS::invalidate(MemRegion mr) { +void CardTableModRefBS::invalidate(MemRegion mr, bool whole_heap) { for (int i = 0; i < _cur_covered_regions; i++) { MemRegion mri = mr.intersection(_covered[i]); if (!mri.is_empty()) dirty_MemRegion(mri); @@ -471,11 +482,15 @@ void CardTableModRefBS::clear(MemRegion mr) { } } +void CardTableModRefBS::dirty(MemRegion mr) { + jbyte* first = byte_for(mr.start()); + jbyte* last = byte_after(mr.last()); + memset(first, dirty_card, last-first); +} + // NOTES: // (1) Unlike mod_oop_in_space_iterate() above, dirty_card_iterate() // iterates over dirty cards ranges in increasing address order. -// (2) Unlike, e.g., dirty_card_range_after_preclean() below, -// this method does not make the dirty cards prelceaned. void CardTableModRefBS::dirty_card_iterate(MemRegion mr, MemRegionClosure* cl) { for (int i = 0; i < _cur_covered_regions; i++) { @@ -501,7 +516,9 @@ void CardTableModRefBS::dirty_card_iterate(MemRegion mr, } } -MemRegion CardTableModRefBS::dirty_card_range_after_preclean(MemRegion mr) { +MemRegion CardTableModRefBS::dirty_card_range_after_reset(MemRegion mr, + bool reset, + int reset_val) { for (int i = 0; i < _cur_covered_regions; i++) { MemRegion mri = mr.intersection(_covered[i]); if (!mri.is_empty()) { @@ -518,8 +535,10 @@ MemRegion CardTableModRefBS::dirty_card_range_after_preclean(MemRegion mr) { dirty_cards++, next_entry++); MemRegion cur_cards(addr_for(cur_entry), dirty_cards*card_size_in_words); - for (size_t i = 0; i < dirty_cards; i++) { - cur_entry[i] = precleaned_card; + if (reset) { + for (size_t i = 0; i < dirty_cards; i++) { + cur_entry[i] = reset_val; + } } return cur_cards; } diff --git a/hotspot/src/share/vm/memory/cardTableModRefBS.hpp b/hotspot/src/share/vm/memory/cardTableModRefBS.hpp index fa1db5f74b7..440d42c6ac1 100644 --- a/hotspot/src/share/vm/memory/cardTableModRefBS.hpp +++ b/hotspot/src/share/vm/memory/cardTableModRefBS.hpp @@ -54,6 +54,7 @@ class CardTableModRefBS: public ModRefBarrierSet { clean_card = -1, dirty_card = 0, precleaned_card = 1, + claimed_card = 3, last_card = 4, CT_MR_BS_last_reserved = 10 }; @@ -150,17 +151,6 @@ class CardTableModRefBS: public ModRefBarrierSet { return byte_for(p) + 1; } - // Mapping from card marking array entry to address of first word - HeapWord* addr_for(const jbyte* p) const { - assert(p >= _byte_map && p < _byte_map + _byte_map_size, - "out of bounds access to card marking array"); - size_t delta = pointer_delta(p, byte_map_base, sizeof(jbyte)); - HeapWord* result = (HeapWord*) (delta << card_shift); - assert(_whole_heap.contains(result), - "out of bounds accessor from card marking array"); - return result; - } - // Iterate over the portion of the card-table which covers the given // region mr in the given space and apply cl to any dirty sub-regions // of mr. cl and dcto_cl must either be the same closure or cl must @@ -263,16 +253,22 @@ public: card_size_in_words = card_size / sizeof(HeapWord) }; + static int clean_card_val() { return clean_card; } + static int dirty_card_val() { return dirty_card; } + static int claimed_card_val() { return claimed_card; } + static int precleaned_card_val() { return precleaned_card; } + // For RTTI simulation. - BarrierSet::Name kind() { return BarrierSet::CardTableModRef; } bool is_a(BarrierSet::Name bsn) { - return bsn == BarrierSet::CardTableModRef || bsn == BarrierSet::ModRef; + return bsn == BarrierSet::CardTableModRef || ModRefBarrierSet::is_a(bsn); } CardTableModRefBS(MemRegion whole_heap, int max_covered_regions); // *** Barrier set functions. + bool has_write_ref_pre_barrier() { return false; } + inline bool write_ref_needs_barrier(void* field, oop new_val) { // Note that this assumes the perm gen is the highest generation // in the address space @@ -315,11 +311,33 @@ public: // *** Card-table-barrier-specific things. + inline void inline_write_ref_field_pre(void* field, oop newVal) {} + inline void inline_write_ref_field(void* field, oop newVal) { jbyte* byte = byte_for(field); *byte = dirty_card; } + // These are used by G1, when it uses the card table as a temporary data + // structure for card claiming. + bool is_card_dirty(size_t card_index) { + return _byte_map[card_index] == dirty_card_val(); + } + + void mark_card_dirty(size_t card_index) { + _byte_map[card_index] = dirty_card_val(); + } + + bool is_card_claimed(size_t card_index) { + return _byte_map[card_index] == claimed_card_val(); + } + + bool claim_card(size_t card_index); + + bool is_card_clean(size_t card_index) { + return _byte_map[card_index] == clean_card_val(); + } + // Card marking array base (adjusted for heap low boundary) // This would be the 0th element of _byte_map, if the heap started at 0x0. // But since the heap starts at some higher address, this points to somewhere @@ -344,8 +362,9 @@ public: } // ModRefBS functions. - void invalidate(MemRegion mr); + virtual void invalidate(MemRegion mr, bool whole_heap = false); void clear(MemRegion mr); + void dirty(MemRegion mr); void mod_oop_in_space_iterate(Space* sp, OopClosure* cl, bool clear = false, bool before_save_marks = false); @@ -375,18 +394,39 @@ public: static uintx ct_max_alignment_constraint(); - // Apply closure cl to the dirty cards lying completely - // within MemRegion mr, setting the cards to precleaned. - void dirty_card_iterate(MemRegion mr, MemRegionClosure* cl); + // Apply closure "cl" to the dirty cards containing some part of + // MemRegion "mr". + void dirty_card_iterate(MemRegion mr, MemRegionClosure* cl); // Return the MemRegion corresponding to the first maximal run - // of dirty cards lying completely within MemRegion mr, after - // marking those cards precleaned. - MemRegion dirty_card_range_after_preclean(MemRegion mr); + // of dirty cards lying completely within MemRegion mr. + // If reset is "true", then sets those card table entries to the given + // value. + MemRegion dirty_card_range_after_reset(MemRegion mr, bool reset, + int reset_val); // Set all the dirty cards in the given region to precleaned state. void preclean_dirty_cards(MemRegion mr); + // Provide read-only access to the card table array. + const jbyte* byte_for_const(const void* p) const { + return byte_for(p); + } + const jbyte* byte_after_const(const void* p) const { + return byte_after(p); + } + + // Mapping from card marking array entry to address of first word + HeapWord* addr_for(const jbyte* p) const { + assert(p >= _byte_map && p < _byte_map + _byte_map_size, + "out of bounds access to card marking array"); + size_t delta = pointer_delta(p, byte_map_base, sizeof(jbyte)); + HeapWord* result = (HeapWord*) (delta << card_shift); + assert(_whole_heap.contains(result), + "out of bounds accessor from card marking array"); + return result; + } + // Mapping from address to card marking array index. int index_for(void* p) { assert(_whole_heap.contains(p), @@ -402,6 +442,7 @@ public: static size_t par_chunk_heapword_alignment() { return CardsPerStrideChunk * card_size_in_words; } + }; class CardTableRS; diff --git a/hotspot/src/share/vm/memory/cardTableRS.cpp b/hotspot/src/share/vm/memory/cardTableRS.cpp index 287cbd85751..133e3d28ded 100644 --- a/hotspot/src/share/vm/memory/cardTableRS.cpp +++ b/hotspot/src/share/vm/memory/cardTableRS.cpp @@ -27,10 +27,25 @@ CardTableRS::CardTableRS(MemRegion whole_heap, int max_covered_regions) : - GenRemSet(&_ct_bs), - _ct_bs(whole_heap, max_covered_regions), - _cur_youngergen_card_val(youngergenP1_card) + GenRemSet(), + _cur_youngergen_card_val(youngergenP1_card), + _regions_to_iterate(max_covered_regions - 1) { +#ifndef SERIALGC + if (UseG1GC) { + if (G1RSBarrierUseQueue) { + _ct_bs = new G1SATBCardTableLoggingModRefBS(whole_heap, + max_covered_regions); + } else { + _ct_bs = new G1SATBCardTableModRefBS(whole_heap, max_covered_regions); + } + } else { + _ct_bs = new CardTableModRefBSForCTRS(whole_heap, max_covered_regions); + } +#else + _ct_bs = new CardTableModRefBSForCTRS(whole_heap, max_covered_regions); +#endif + set_bs(_ct_bs); _last_cur_val_in_gen = new jbyte[GenCollectedHeap::max_gens + 1]; if (_last_cur_val_in_gen == NULL) { vm_exit_during_initialization("Could not last_cur_val_in_gen array."); @@ -38,20 +53,19 @@ CardTableRS::CardTableRS(MemRegion whole_heap, for (int i = 0; i < GenCollectedHeap::max_gens + 1; i++) { _last_cur_val_in_gen[i] = clean_card_val(); } - _ct_bs.set_CTRS(this); + _ct_bs->set_CTRS(this); } void CardTableRS::resize_covered_region(MemRegion new_region) { - _ct_bs.resize_covered_region(new_region); + _ct_bs->resize_covered_region(new_region); } jbyte CardTableRS::find_unused_youngergenP_card_value() { - GenCollectedHeap* gch = GenCollectedHeap::heap(); for (jbyte v = youngergenP1_card; v < cur_youngergen_and_prev_nonclean_card; v++) { bool seen = false; - for (int g = 0; g < gch->n_gens()+1; g++) { + for (int g = 0; g < _regions_to_iterate; g++) { if (_last_cur_val_in_gen[g] == v) { seen = true; break; @@ -221,11 +235,11 @@ void CardTableRS::write_ref_field_gc_par(void* field, oop new_val) { void CardTableRS::younger_refs_in_space_iterate(Space* sp, OopsInGenClosure* cl) { - DirtyCardToOopClosure* dcto_cl = sp->new_dcto_cl(cl, _ct_bs.precision(), + DirtyCardToOopClosure* dcto_cl = sp->new_dcto_cl(cl, _ct_bs->precision(), cl->gen_boundary()); ClearNoncleanCardWrapper clear_cl(dcto_cl, this); - _ct_bs.non_clean_card_iterate(sp, sp->used_region_at_save_marks(), + _ct_bs->non_clean_card_iterate(sp, sp->used_region_at_save_marks(), dcto_cl, &clear_cl, false); } @@ -549,7 +563,7 @@ void CardTableRS::verify() { if (ch->kind() == CollectedHeap::GenCollectedHeap) { GenCollectedHeap::heap()->generation_iterate(&blk, false); - _ct_bs.verify(); + _ct_bs->verify(); // If the old gen collections also collect perm, then we are only // interested in perm-to-young pointers, not perm-to-old pointers. diff --git a/hotspot/src/share/vm/memory/cardTableRS.hpp b/hotspot/src/share/vm/memory/cardTableRS.hpp index dab96d3991b..3f7ed40abe6 100644 --- a/hotspot/src/share/vm/memory/cardTableRS.hpp +++ b/hotspot/src/share/vm/memory/cardTableRS.hpp @@ -44,7 +44,7 @@ class CardTableRS: public GenRemSet { return CardTableModRefBS::card_is_dirty_wrt_gen_iter(cv); } - CardTableModRefBSForCTRS _ct_bs; + CardTableModRefBSForCTRS* _ct_bs; virtual void younger_refs_in_space_iterate(Space* sp, OopsInGenClosure* cl); @@ -73,6 +73,8 @@ class CardTableRS: public GenRemSet { jbyte _cur_youngergen_card_val; + int _regions_to_iterate; + jbyte cur_youngergen_card_val() { return _cur_youngergen_card_val; } @@ -96,7 +98,7 @@ public: CardTableRS* as_CardTableRS() { return this; } - CardTableModRefBS* ct_bs() { return &_ct_bs; } + CardTableModRefBS* ct_bs() { return _ct_bs; } // Override. void prepare_for_younger_refs_iterate(bool parallel); @@ -107,7 +109,7 @@ public: void younger_refs_iterate(Generation* g, OopsInGenClosure* blk); void inline_write_ref_field_gc(void* field, oop new_val) { - jbyte* byte = _ct_bs.byte_for(field); + jbyte* byte = _ct_bs->byte_for(field); *byte = youngergen_card; } void write_ref_field_gc_work(void* field, oop new_val) { @@ -122,25 +124,27 @@ public: void resize_covered_region(MemRegion new_region); bool is_aligned(HeapWord* addr) { - return _ct_bs.is_card_aligned(addr); + return _ct_bs->is_card_aligned(addr); } void verify(); void verify_aligned_region_empty(MemRegion mr); - void clear(MemRegion mr) { _ct_bs.clear(mr); } + void clear(MemRegion mr) { _ct_bs->clear(mr); } void clear_into_younger(Generation* gen, bool clear_perm); - void invalidate(MemRegion mr) { _ct_bs.invalidate(mr); } + void invalidate(MemRegion mr, bool whole_heap = false) { + _ct_bs->invalidate(mr, whole_heap); + } void invalidate_or_clear(Generation* gen, bool younger, bool perm); static uintx ct_max_alignment_constraint() { return CardTableModRefBS::ct_max_alignment_constraint(); } - jbyte* byte_for(void* p) { return _ct_bs.byte_for(p); } - jbyte* byte_after(void* p) { return _ct_bs.byte_after(p); } - HeapWord* addr_for(jbyte* p) { return _ct_bs.addr_for(p); } + jbyte* byte_for(void* p) { return _ct_bs->byte_for(p); } + jbyte* byte_after(void* p) { return _ct_bs->byte_after(p); } + HeapWord* addr_for(jbyte* p) { return _ct_bs->addr_for(p); } bool is_prev_nonclean_card_val(jbyte v) { return diff --git a/hotspot/src/share/vm/memory/collectorPolicy.cpp b/hotspot/src/share/vm/memory/collectorPolicy.cpp index 2d86d6466d3..3f885d9ecba 100644 --- a/hotspot/src/share/vm/memory/collectorPolicy.cpp +++ b/hotspot/src/share/vm/memory/collectorPolicy.cpp @@ -31,11 +31,11 @@ void CollectorPolicy::initialize_flags() { if (PermSize > MaxPermSize) { MaxPermSize = PermSize; } - PermSize = align_size_down(PermSize, min_alignment()); + PermSize = MAX2(min_alignment(), align_size_down_(PermSize, min_alignment())); MaxPermSize = align_size_up(MaxPermSize, max_alignment()); - MinPermHeapExpansion = align_size_down(MinPermHeapExpansion, min_alignment()); - MaxPermHeapExpansion = align_size_down(MaxPermHeapExpansion, min_alignment()); + MinPermHeapExpansion = MAX2(min_alignment(), align_size_down_(MinPermHeapExpansion, min_alignment())); + MaxPermHeapExpansion = MAX2(min_alignment(), align_size_down_(MaxPermHeapExpansion, min_alignment())); MinHeapDeltaBytes = align_size_up(MinHeapDeltaBytes, min_alignment()); @@ -55,25 +55,21 @@ void CollectorPolicy::initialize_flags() { void CollectorPolicy::initialize_size_info() { // User inputs from -mx and ms are aligned - _initial_heap_byte_size = align_size_up(Arguments::initial_heap_size(), - min_alignment()); - set_min_heap_byte_size(align_size_up(Arguments::min_heap_size(), - min_alignment())); - set_max_heap_byte_size(align_size_up(MaxHeapSize, max_alignment())); - - // Check validity of heap parameters from launcher + set_initial_heap_byte_size(Arguments::initial_heap_size()); if (initial_heap_byte_size() == 0) { set_initial_heap_byte_size(NewSize + OldSize); - } else { - Universe::check_alignment(initial_heap_byte_size(), min_alignment(), - "initial heap"); } + set_initial_heap_byte_size(align_size_up(_initial_heap_byte_size, + min_alignment())); + + set_min_heap_byte_size(Arguments::min_heap_size()); if (min_heap_byte_size() == 0) { set_min_heap_byte_size(NewSize + OldSize); - } else { - Universe::check_alignment(min_heap_byte_size(), min_alignment(), - "initial heap"); } + set_min_heap_byte_size(align_size_up(_min_heap_byte_size, + min_alignment())); + + set_max_heap_byte_size(align_size_up(MaxHeapSize, max_alignment())); // Check heap parameter properties if (initial_heap_byte_size() < M) { @@ -121,8 +117,6 @@ GenRemSet* CollectorPolicy::create_rem_set(MemRegion whole_heap, int max_covered_regions) { switch (rem_set_name()) { case GenRemSet::CardTable: { - if (barrier_set_name() != BarrierSet::CardTableModRef) - vm_exit_during_initialization("Mismatch between RS and BS."); CardTableRS* res = new CardTableRS(whole_heap, max_covered_regions); return res; } @@ -345,7 +339,7 @@ void GenCollectorPolicy::initialize_size_info() { // At this point all three sizes have been checked against the // maximum sizes but have not been checked for consistency - // amoung the three. + // among the three. // Final check min <= initial <= max set_min_gen0_size(MIN2(_min_gen0_size, _max_gen0_size)); diff --git a/hotspot/src/share/vm/memory/collectorPolicy.hpp b/hotspot/src/share/vm/memory/collectorPolicy.hpp index a1cea1544bc..2bc7d45524d 100644 --- a/hotspot/src/share/vm/memory/collectorPolicy.hpp +++ b/hotspot/src/share/vm/memory/collectorPolicy.hpp @@ -39,10 +39,12 @@ // Forward declarations. class GenCollectorPolicy; class TwoGenerationCollectorPolicy; +class AdaptiveSizePolicy; #ifndef SERIALGC class ConcurrentMarkSweepPolicy; +class G1CollectorPolicy; #endif // SERIALGC -class AdaptiveSizePolicy; + class GCPolicyCounters; class PermanentGenerationSpec; class MarkSweepPolicy; @@ -55,7 +57,7 @@ class CollectorPolicy : public CHeapObj { // Requires that the concrete subclass sets the alignment constraints // before calling. virtual void initialize_flags(); - virtual void initialize_size_info() = 0; + virtual void initialize_size_info(); // Initialize "_permanent_generation" to a spec for the given kind of // Perm Gen. void initialize_perm_generation(PermGen::Name pgnm); @@ -91,17 +93,18 @@ class CollectorPolicy : public CHeapObj { enum Name { CollectorPolicyKind, TwoGenerationCollectorPolicyKind, - TrainPolicyKind, ConcurrentMarkSweepPolicyKind, - ASConcurrentMarkSweepPolicyKind + ASConcurrentMarkSweepPolicyKind, + G1CollectorPolicyKind }; // Identification methods. - virtual GenCollectorPolicy* as_generation_policy() { return NULL; } + virtual GenCollectorPolicy* as_generation_policy() { return NULL; } virtual TwoGenerationCollectorPolicy* as_two_generation_policy() { return NULL; } virtual MarkSweepPolicy* as_mark_sweep_policy() { return NULL; } #ifndef SERIALGC virtual ConcurrentMarkSweepPolicy* as_concurrent_mark_sweep_policy() { return NULL; } + virtual G1CollectorPolicy* as_g1_policy() { return NULL; } #endif // SERIALGC // Note that these are not virtual. bool is_generation_policy() { return as_generation_policy() != NULL; } @@ -109,10 +112,13 @@ class CollectorPolicy : public CHeapObj { bool is_mark_sweep_policy() { return as_mark_sweep_policy() != NULL; } #ifndef SERIALGC bool is_concurrent_mark_sweep_policy() { return as_concurrent_mark_sweep_policy() != NULL; } + bool is_g1_policy() { return as_g1_policy() != NULL; } #else // SERIALGC bool is_concurrent_mark_sweep_policy() { return false; } + bool is_g1_policy() { return false; } #endif // SERIALGC + virtual PermanentGenerationSpec *permanent_generation() { assert(_permanent_generation != NULL, "Sanity check"); return _permanent_generation; diff --git a/hotspot/src/share/vm/memory/compactingPermGenGen.hpp b/hotspot/src/share/vm/memory/compactingPermGenGen.hpp index 913f1728f8c..4f2788a32f8 100644 --- a/hotspot/src/share/vm/memory/compactingPermGenGen.hpp +++ b/hotspot/src/share/vm/memory/compactingPermGenGen.hpp @@ -100,7 +100,7 @@ public: enum { vtbl_list_size = 16, // number of entries in the shared space vtable list. - num_virtuals = 100 // number of virtual methods in Klass (or + num_virtuals = 200 // number of virtual methods in Klass (or // subclass) objects, or greater. }; diff --git a/hotspot/src/share/vm/memory/dump.cpp b/hotspot/src/share/vm/memory/dump.cpp index cfe22633d15..cce5036d0b9 100644 --- a/hotspot/src/share/vm/memory/dump.cpp +++ b/hotspot/src/share/vm/memory/dump.cpp @@ -818,6 +818,40 @@ static void print_contents() { // across the space while doing this, as that causes the vtables to be // patched, undoing our useful work. Instead, iterate to make a list, // then use the list to do the fixing. +// +// Our constructed vtables: +// Dump time: +// 1. init_self_patching_vtbl_list: table of pointers to current virtual method addrs +// 2. generate_vtable_methods: create jump table, appended to above vtbl_list +// 3. PatchKlassVtables: for Klass list, patch the vtable entry to point to jump table +// rather than to current vtbl +// Table layout: NOTE FIXED SIZE +// 1. vtbl pointers +// 2. #Klass X #virtual methods per Klass +// 1 entry for each, in the order: +// Klass1:method1 entry, Klass1:method2 entry, ... Klass1:method entry +// Klass2:method1 entry, Klass2:method2 entry, ... Klass2:method entry +// ... +// Klass:method1 entry, Klass:method2 entry, +// ... Klass:method entry +// Sample entry: (Sparc): +// save(sp, -256, sp) +// ba,pt common_code +// mov XXX, %L0 %L0 gets: Klass index <<8 + method index (note: max method index 255) +// +// Restore time: +// 1. initialize_oops: reserve space for table +// 2. init_self_patching_vtbl_list: update pointers to NEW virtual method addrs in text +// +// Execution time: +// First virtual method call for any object of these Klass types: +// 1. object->klass->klass_part +// 2. vtable entry for that klass_part points to the jump table entries +// 3. branches to common_code with %O0/klass_part, %L0: Klass index <<8 + method index +// 4. common_code: +// Get address of new vtbl pointer for this Klass from updated table +// Update new vtbl pointer in the Klass: future virtual calls go direct +// Jump to method, using new vtbl pointer and method index class PatchKlassVtables: public ObjectClosure { private: diff --git a/hotspot/src/share/vm/memory/genCollectedHeap.hpp b/hotspot/src/share/vm/memory/genCollectedHeap.hpp index f6b0c408f20..f9a4d508753 100644 --- a/hotspot/src/share/vm/memory/genCollectedHeap.hpp +++ b/hotspot/src/share/vm/memory/genCollectedHeap.hpp @@ -252,6 +252,21 @@ public: virtual size_t unsafe_max_tlab_alloc(Thread* thr) const; virtual HeapWord* allocate_new_tlab(size_t size); + // Can a compiler initialize a new object without store barriers? + // This permission only extends from the creation of a new object + // via a TLAB up to the first subsequent safepoint. + virtual bool can_elide_tlab_store_barriers() const { + return true; + } + + // Can a compiler elide a store barrier when it writes + // a permanent oop into the heap? Applies when the compiler + // is storing x to the heap, where x->is_perm() is true. + virtual bool can_elide_permanent_oop_store_barriers() const { + // CMS needs to see all, even intra-generational, ref updates. + return !UseConcMarkSweepGC; + } + // The "requestor" generation is performing some garbage collection // action for which it would be useful to have scratch space. The // requestor promises to allocate no more than "max_alloc_words" in any diff --git a/hotspot/src/share/vm/memory/genMarkSweep.cpp b/hotspot/src/share/vm/memory/genMarkSweep.cpp index bc46afba59c..50562ab2e99 100644 --- a/hotspot/src/share/vm/memory/genMarkSweep.cpp +++ b/hotspot/src/share/vm/memory/genMarkSweep.cpp @@ -191,8 +191,10 @@ void GenMarkSweep::allocate_stacks() { void GenMarkSweep::deallocate_stacks() { - GenCollectedHeap* gch = GenCollectedHeap::heap(); - gch->release_scratch(); + if (!UseG1GC) { + GenCollectedHeap* gch = GenCollectedHeap::heap(); + gch->release_scratch(); + } if (_preserved_oop_stack) { delete _preserved_mark_stack; diff --git a/hotspot/src/share/vm/memory/genMarkSweep.hpp b/hotspot/src/share/vm/memory/genMarkSweep.hpp index c2c43704fcc..cbedd85c70c 100644 --- a/hotspot/src/share/vm/memory/genMarkSweep.hpp +++ b/hotspot/src/share/vm/memory/genMarkSweep.hpp @@ -24,6 +24,7 @@ class GenMarkSweep : public MarkSweep { friend class VM_MarkSweep; + friend class G1MarkSweep; public: static void invoke_at_safepoint(int level, ReferenceProcessor* rp, bool clear_all_softrefs); diff --git a/hotspot/src/share/vm/memory/genOopClosures.hpp b/hotspot/src/share/vm/memory/genOopClosures.hpp index 85121ee0b63..4cb2ca6c202 100644 --- a/hotspot/src/share/vm/memory/genOopClosures.hpp +++ b/hotspot/src/share/vm/memory/genOopClosures.hpp @@ -56,6 +56,9 @@ class OopsInGenClosure : public OopClosure { // pointers must call the method below. template void do_barrier(T* p); + // Version for use by closures that may be called in parallel code. + void par_do_barrier(oop* p); + public: OopsInGenClosure() : OopClosure(NULL), _orig_gen(NULL), _gen(NULL), _gen_boundary(NULL), _rs(NULL) {}; diff --git a/hotspot/src/share/vm/memory/genOopClosures.inline.hpp b/hotspot/src/share/vm/memory/genOopClosures.inline.hpp index 6746c8804f0..5f13668e9c4 100644 --- a/hotspot/src/share/vm/memory/genOopClosures.inline.hpp +++ b/hotspot/src/share/vm/memory/genOopClosures.inline.hpp @@ -48,6 +48,16 @@ template inline void OopsInGenClosure::do_barrier(T* p) { } } +inline void OopsInGenClosure::par_do_barrier(oop* p) { + assert(generation()->is_in_reserved(p), "expected ref in generation"); + oop obj = *p; + assert(obj != NULL, "expected non-null object"); + // If p points to a younger generation, mark the card. + if ((HeapWord*)obj < gen_boundary()) { + rs()->write_ref_field_gc_par(p, obj); + } +} + // NOTE! Any changes made here should also be made // in FastScanClosure::do_oop_work() template inline void ScanClosure::do_oop_work(T* p) { diff --git a/hotspot/src/share/vm/memory/genRemSet.hpp b/hotspot/src/share/vm/memory/genRemSet.hpp index bf06e3647dc..f7a0614cdb7 100644 --- a/hotspot/src/share/vm/memory/genRemSet.hpp +++ b/hotspot/src/share/vm/memory/genRemSet.hpp @@ -42,6 +42,7 @@ public: }; GenRemSet(BarrierSet * bs) : _bs(bs) {} + GenRemSet() : _bs(NULL) {} virtual Name rs_kind() = 0; @@ -53,6 +54,9 @@ public: // Return the barrier set associated with "this." BarrierSet* bs() { return _bs; } + // Set the barrier set. + void set_bs(BarrierSet* bs) { _bs = bs; } + // Do any (sequential) processing necessary to prepare for (possibly // "parallel", if that arg is true) calls to younger_refs_iterate. virtual void prepare_for_younger_refs_iterate(bool parallel) = 0; @@ -116,7 +120,10 @@ public: // Informs the RS that refs in the given "mr" may have changed // arbitrarily, and therefore may contain old-to-young pointers. - virtual void invalidate(MemRegion mr) = 0; + // If "whole heap" is true, then this invalidation is part of an + // invalidation of the whole heap, which an implementation might + // handle differently than that of a sub-part of the heap. + virtual void invalidate(MemRegion mr, bool whole_heap = false) = 0; // Informs the RS that refs in this generation // may have changed arbitrarily, and therefore may contain diff --git a/hotspot/src/share/vm/memory/heapInspection.cpp b/hotspot/src/share/vm/memory/heapInspection.cpp index 28e68260831..e3d6fbd7b5a 100644 --- a/hotspot/src/share/vm/memory/heapInspection.cpp +++ b/hotspot/src/share/vm/memory/heapInspection.cpp @@ -238,11 +238,14 @@ void HeapInspection::heap_inspection(outputStream* st) { HeapWord* ref; CollectedHeap* heap = Universe::heap(); + bool is_shared_heap = false; switch (heap->kind()) { + case CollectedHeap::G1CollectedHeap: case CollectedHeap::GenCollectedHeap: { - GenCollectedHeap* gch = (GenCollectedHeap*)heap; - gch->gc_prologue(false /* !full */); // get any necessary locks - ref = gch->perm_gen()->used_region().start(); + is_shared_heap = true; + SharedHeap* sh = (SharedHeap*)heap; + sh->gc_prologue(false /* !full */); // get any necessary locks, etc. + ref = sh->perm_gen()->used_region().start(); break; } #ifndef SERIALGC @@ -284,9 +287,9 @@ void HeapInspection::heap_inspection(outputStream* st) { } st->flush(); - if (Universe::heap()->kind() == CollectedHeap::GenCollectedHeap) { - GenCollectedHeap* gch = GenCollectedHeap::heap(); - gch->gc_epilogue(false /* !full */); // release all acquired locks + if (is_shared_heap) { + SharedHeap* sh = (SharedHeap*)heap; + sh->gc_epilogue(false /* !full */); // release all acquired locks, etc. } } diff --git a/hotspot/src/share/vm/memory/iterator.hpp b/hotspot/src/share/vm/memory/iterator.hpp index 47b9b28d2b7..07915f11ffd 100644 --- a/hotspot/src/share/vm/memory/iterator.hpp +++ b/hotspot/src/share/vm/memory/iterator.hpp @@ -26,9 +26,23 @@ class ReferenceProcessor; +// Closure provides abortability. + +class Closure : public StackObj { + protected: + bool _abort; + void set_abort() { _abort = true; } + public: + Closure() : _abort(false) {} + // A subtype can use this mechanism to indicate to some iterator mapping + // functions that the iteration should cease. + bool abort() { return _abort; } + void clear_abort() { _abort = false; } +}; + // OopClosure is used for iterating through roots (oop*) -class OopClosure : public StackObj { +class OopClosure : public Closure { public: ReferenceProcessor* _ref_processor; OopClosure(ReferenceProcessor* rp) : _ref_processor(rp) { } @@ -55,11 +69,16 @@ class OopClosure : public StackObj { Prefetch::style prefetch_style() { // Note that this is non-virtual. return Prefetch::do_none; } + + // True iff this closure may be safely applied more than once to an oop + // location without an intervening "major reset" (like the end of a GC). + virtual bool idempotent() { return false; } + virtual bool apply_to_weak_ref_discovered_field() { return false; } }; // ObjectClosure is used for iterating through an object space -class ObjectClosure : public StackObj { +class ObjectClosure : public Closure { public: // Called for each object. virtual void do_object(oop obj) = 0; diff --git a/hotspot/src/share/vm/memory/modRefBarrierSet.hpp b/hotspot/src/share/vm/memory/modRefBarrierSet.hpp index 62255e11680..a463aac35db 100644 --- a/hotspot/src/share/vm/memory/modRefBarrierSet.hpp +++ b/hotspot/src/share/vm/memory/modRefBarrierSet.hpp @@ -31,6 +31,13 @@ class Generation; class ModRefBarrierSet: public BarrierSet { public: + + ModRefBarrierSet() { _kind = BarrierSet::ModRef; } + + bool is_a(BarrierSet::Name bsn) { + return bsn == BarrierSet::ModRef; + } + // Barriers only on ref writes. bool has_read_ref_barrier() { return false; } bool has_read_prim_barrier() { return false; } @@ -85,8 +92,10 @@ public: bool clear = false, bool before_save_marks = false) = 0; - // Causes all refs in "mr" to be assumed to be modified. - virtual void invalidate(MemRegion mr) = 0; + // Causes all refs in "mr" to be assumed to be modified. If "whole_heap" + // is true, the caller asserts that the entire heap is being invalidated, + // which may admit an optimized implementation for some barriers. + virtual void invalidate(MemRegion mr, bool whole_heap = false) = 0; // The caller guarantees that "mr" contains no references. (Perhaps it's // objects have been moved elsewhere.) diff --git a/hotspot/src/share/vm/memory/referenceProcessor.cpp b/hotspot/src/share/vm/memory/referenceProcessor.cpp index 11edba7f7f4..ba024b91730 100644 --- a/hotspot/src/share/vm/memory/referenceProcessor.cpp +++ b/hotspot/src/share/vm/memory/referenceProcessor.cpp @@ -91,7 +91,8 @@ ReferenceProcessor::create_ref_processor(MemRegion span, bool mt_discovery, BoolObjectClosure* is_alive_non_header, int parallel_gc_threads, - bool mt_processing) { + bool mt_processing, + bool dl_needs_barrier) { int mt_degree = 1; if (parallel_gc_threads > 1) { mt_degree = parallel_gc_threads; @@ -99,7 +100,8 @@ ReferenceProcessor::create_ref_processor(MemRegion span, ReferenceProcessor* rp = new ReferenceProcessor(span, atomic_discovery, mt_discovery, mt_degree, - mt_processing && (parallel_gc_threads > 0)); + mt_processing && (parallel_gc_threads > 0), + dl_needs_barrier); if (rp == NULL) { vm_exit_during_initialization("Could not allocate ReferenceProcessor object"); } @@ -111,10 +113,13 @@ ReferenceProcessor::ReferenceProcessor(MemRegion span, bool atomic_discovery, bool mt_discovery, int mt_degree, - bool mt_processing) : + bool mt_processing, + bool discovered_list_needs_barrier) : _discovering_refs(false), _enqueuing_is_done(false), _is_alive_non_header(NULL), + _discovered_list_needs_barrier(discovered_list_needs_barrier), + _bs(NULL), _processing_is_mt(mt_processing), _next_id(0) { @@ -135,6 +140,10 @@ ReferenceProcessor::ReferenceProcessor(MemRegion span, _discoveredSoftRefs[i].set_head(sentinel_ref()); _discoveredSoftRefs[i].set_length(0); } + // If we do barreirs, cache a copy of the barrier set. + if (discovered_list_needs_barrier) { + _bs = Universe::heap()->barrier_set(); + } } #ifndef PRODUCT @@ -727,10 +736,15 @@ ReferenceProcessor::abandon_partial_discovered_list(DiscoveredList& refs_list) { refs_list.set_length(0); } -void -ReferenceProcessor::abandon_partial_discovered_list_arr(DiscoveredList refs_lists[]) { - for (int i = 0; i < _num_q; i++) { - abandon_partial_discovered_list(refs_lists[i]); +void ReferenceProcessor::abandon_partial_discovery() { + // loop over the lists + for (int i = 0; i < _num_q * subclasses_of_ref; i++) { + if (TraceReferenceGC && PrintGCDetails && ((i % _num_q) == 0)) { + gclog_or_tty->print_cr( + "\nAbandoning %s discovered list", + list_name(i)); + } + abandon_partial_discovered_list(_discoveredSoftRefs[i]); } } @@ -994,7 +1008,16 @@ ReferenceProcessor::add_to_discovered_list_mt(DiscoveredList& refs_list, assert(_discovery_is_mt, "!_discovery_is_mt should have been handled by caller"); // First we must make sure this object is only enqueued once. CAS in a non null // discovered_addr. - oop retest = oopDesc::atomic_compare_exchange_oop(refs_list.head(), discovered_addr, + oop current_head = refs_list.head(); + + // Note: In the case of G1, this pre-barrier is strictly + // not necessary because the only case we are interested in + // here is when *discovered_addr is NULL, so this will expand to + // nothing. As a result, I am just manually eliding this out for G1. + if (_discovered_list_needs_barrier && !UseG1GC) { + _bs->write_ref_field_pre((void*)discovered_addr, current_head); guarantee(false, "Needs to be fixed: YSR"); + } + oop retest = oopDesc::atomic_compare_exchange_oop(current_head, discovered_addr, NULL); if (retest == NULL) { // This thread just won the right to enqueue the object. @@ -1002,6 +1025,10 @@ ReferenceProcessor::add_to_discovered_list_mt(DiscoveredList& refs_list, // is necessary. refs_list.set_head(obj); refs_list.set_length(refs_list.length() + 1); + if (_discovered_list_needs_barrier) { + _bs->write_ref_field((void*)discovered_addr, current_head); guarantee(false, "Needs to be fixed: YSR"); + } + } else { // If retest was non NULL, another thread beat us to it: // The reference has already been discovered... @@ -1073,8 +1100,8 @@ bool ReferenceProcessor::discover_reference(oop obj, ReferenceType rt) { } } - HeapWord* discovered_addr = java_lang_ref_Reference::discovered_addr(obj); - oop discovered = java_lang_ref_Reference::discovered(obj); + HeapWord* const discovered_addr = java_lang_ref_Reference::discovered_addr(obj); + const oop discovered = java_lang_ref_Reference::discovered(obj); assert(discovered->is_oop_or_null(), "bad discovered field"); if (discovered != NULL) { // The reference has already been discovered... @@ -1094,7 +1121,7 @@ bool ReferenceProcessor::discover_reference(oop obj, ReferenceType rt) { // discovered twice except by concurrent collectors that potentially // trace the same Reference object twice. assert(UseConcMarkSweepGC, - "Only possible with a concurrent collector"); + "Only possible with an incremental-update concurrent collector"); return true; } } @@ -1122,12 +1149,24 @@ bool ReferenceProcessor::discover_reference(oop obj, ReferenceType rt) { return false; // nothing special needs to be done } - // We do a raw store here, the field will be visited later when - // processing the discovered references. if (_discovery_is_mt) { add_to_discovered_list_mt(*list, obj, discovered_addr); } else { - oop_store_raw(discovered_addr, list->head()); + // If "_discovered_list_needs_barrier", we do write barriers when + // updating the discovered reference list. Otherwise, we do a raw store + // here: the field will be visited later when processing the discovered + // references. + oop current_head = list->head(); + // As in the case further above, since we are over-writing a NULL + // pre-value, we can safely elide the pre-barrier here for the case of G1. + assert(discovered == NULL, "control point invariant"); + if (_discovered_list_needs_barrier && !UseG1GC) { // safe to elide for G1 + _bs->write_ref_field_pre((oop*)discovered_addr, current_head); + } + oop_store_raw(discovered_addr, current_head); + if (_discovered_list_needs_barrier) { + _bs->write_ref_field((oop*)discovered_addr, current_head); + } list->set_head(obj); list->set_length(list->length() + 1); } diff --git a/hotspot/src/share/vm/memory/referenceProcessor.hpp b/hotspot/src/share/vm/memory/referenceProcessor.hpp index 7ef57bdee00..c07c4e34388 100644 --- a/hotspot/src/share/vm/memory/referenceProcessor.hpp +++ b/hotspot/src/share/vm/memory/referenceProcessor.hpp @@ -54,6 +54,14 @@ class ReferenceProcessor : public CHeapObj { bool _discovery_is_atomic; // if discovery is atomic wrt // other collectors in configuration bool _discovery_is_mt; // true if reference discovery is MT. + // If true, setting "next" field of a discovered refs list requires + // write barrier(s). (Must be true if used in a collector in which + // elements of a discovered list may be moved during discovery: for + // example, a collector like Garbage-First that moves objects during a + // long-term concurrent marking phase that does weak reference + // discovery.) + bool _discovered_list_needs_barrier; + BarrierSet* _bs; // Cached copy of BarrierSet. bool _enqueuing_is_done; // true if all weak references enqueued bool _processing_is_mt; // true during phases when // reference processing is MT. @@ -196,7 +204,6 @@ class ReferenceProcessor : public CHeapObj { void verify_ok_to_handle_reflists() PRODUCT_RETURN; void abandon_partial_discovered_list(DiscoveredList& refs_list); - void abandon_partial_discovered_list_arr(DiscoveredList refs_lists[]); // Calculate the number of jni handles. unsigned int count_jni_refs(); @@ -217,6 +224,8 @@ class ReferenceProcessor : public CHeapObj { _discovery_is_atomic(true), _enqueuing_is_done(false), _discovery_is_mt(false), + _discovered_list_needs_barrier(false), + _bs(NULL), _is_alive_non_header(NULL), _num_q(0), _processing_is_mt(false), @@ -224,8 +233,10 @@ class ReferenceProcessor : public CHeapObj { {} ReferenceProcessor(MemRegion span, bool atomic_discovery, - bool mt_discovery, int mt_degree = 1, - bool mt_processing = false); + bool mt_discovery, + int mt_degree = 1, + bool mt_processing = false, + bool discovered_list_needs_barrier = false); // Allocates and initializes a reference processor. static ReferenceProcessor* create_ref_processor( @@ -234,8 +245,8 @@ class ReferenceProcessor : public CHeapObj { bool mt_discovery, BoolObjectClosure* is_alive_non_header = NULL, int parallel_gc_threads = 1, - bool mt_processing = false); - + bool mt_processing = false, + bool discovered_list_needs_barrier = false); // RefDiscoveryPolicy values enum { ReferenceBasedDiscovery = 0, @@ -296,6 +307,11 @@ class ReferenceProcessor : public CHeapObj { // Enqueue references at end of GC (called by the garbage collector) bool enqueue_discovered_references(AbstractRefProcTaskExecutor* task_executor = NULL); + // If a discovery is in process that is being superceded, abandon it: all + // the discovered lists will be empty, and all the objects on them will + // have NULL discovered fields. Must be called only at a safepoint. + void abandon_partial_discovery(); + // debugging void verify_no_references_recorded() PRODUCT_RETURN; static void verify(); diff --git a/hotspot/src/share/vm/memory/sharedHeap.cpp b/hotspot/src/share/vm/memory/sharedHeap.cpp index 14bd6d47ba3..853b0ebb310 100644 --- a/hotspot/src/share/vm/memory/sharedHeap.cpp +++ b/hotspot/src/share/vm/memory/sharedHeap.cpp @@ -57,15 +57,24 @@ SharedHeap::SharedHeap(CollectorPolicy* policy_) : } _sh = this; // ch is static, should be set only once. if ((UseParNewGC || - (UseConcMarkSweepGC && CMSParallelRemarkEnabled)) && + (UseConcMarkSweepGC && CMSParallelRemarkEnabled) || + UseG1GC) && ParallelGCThreads > 0) { - _workers = new WorkGang("Parallel GC Threads", ParallelGCThreads, true); + _workers = new WorkGang("Parallel GC Threads", ParallelGCThreads, + /* are_GC_task_threads */true, + /* are_ConcurrentGC_threads */false); if (_workers == NULL) { vm_exit_during_initialization("Failed necessary allocation."); } } } +bool SharedHeap::heap_lock_held_for_gc() { + Thread* t = Thread::current(); + return Heap_lock->owned_by_self() + || ( (t->is_GC_task_thread() || t->is_VM_thread()) + && _thread_holds_heap_lock_for_gc); +} void SharedHeap::set_par_threads(int t) { _n_par_threads = t; @@ -280,10 +289,11 @@ void SharedHeap::fill_region_with_object(MemRegion mr) { } // Some utilities. -void SharedHeap::print_size_transition(size_t bytes_before, +void SharedHeap::print_size_transition(outputStream* out, + size_t bytes_before, size_t bytes_after, size_t capacity) { - tty->print(" %d%s->%d%s(%d%s)", + out->print(" %d%s->%d%s(%d%s)", byte_size_in_proper_unit(bytes_before), proper_unit_for_byte_size(bytes_before), byte_size_in_proper_unit(bytes_after), diff --git a/hotspot/src/share/vm/memory/sharedHeap.hpp b/hotspot/src/share/vm/memory/sharedHeap.hpp index caf9e171f7a..662f011c7ab 100644 --- a/hotspot/src/share/vm/memory/sharedHeap.hpp +++ b/hotspot/src/share/vm/memory/sharedHeap.hpp @@ -44,6 +44,9 @@ class KlassHandle; class SharedHeap : public CollectedHeap { friend class VMStructs; + friend class VM_GC_Operation; + friend class VM_CGC_Operation; + private: // For claiming strong_roots tasks. SubTasksDone* _process_strong_tasks; @@ -82,6 +85,14 @@ protected: // function. SharedHeap(CollectorPolicy* policy_); + // Returns true if the calling thread holds the heap lock, + // or the calling thread is a par gc thread and the heap_lock is held + // by the vm thread doing a gc operation. + bool heap_lock_held_for_gc(); + // True if the heap_lock is held by the a non-gc thread invoking a gc + // operation. + bool _thread_holds_heap_lock_for_gc; + public: static SharedHeap* heap() { return _sh; } @@ -97,8 +108,8 @@ public: void set_perm(PermGen* perm_gen) { _perm_gen = perm_gen; } - // A helper function that fills an allocated-but-not-yet-initialized - // region with a garbage object. + // A helper function that fills a region of the heap with + // with a single object. static void fill_region_with_object(MemRegion mr); // Minimum garbage fill object size @@ -214,13 +225,12 @@ public: // "SharedHeap" can use in the implementation of its virtual // functions. -protected: +public: // Do anything common to GC's. virtual void gc_prologue(bool full) = 0; virtual void gc_epilogue(bool full) = 0; -public: // // New methods from CollectedHeap // @@ -266,7 +276,8 @@ public: } // Some utilities. - void print_size_transition(size_t bytes_before, + void print_size_transition(outputStream* out, + size_t bytes_before, size_t bytes_after, size_t capacity); }; diff --git a/hotspot/src/share/vm/memory/space.cpp b/hotspot/src/share/vm/memory/space.cpp index 59f42bbbd9c..37e6f61d2cc 100644 --- a/hotspot/src/share/vm/memory/space.cpp +++ b/hotspot/src/share/vm/memory/space.cpp @@ -105,7 +105,7 @@ void DirtyCardToOopClosure::do_MemRegion(MemRegion mr) { "Only ones we deal with for now."); assert(_precision != CardTableModRefBS::ObjHeadPreciseArray || - _last_bottom == NULL || + _cl->idempotent() || _last_bottom == NULL || top <= _last_bottom, "Not decreasing"); NOT_PRODUCT(_last_bottom = mr.start()); @@ -144,7 +144,14 @@ void DirtyCardToOopClosure::do_MemRegion(MemRegion mr) { walk_mem_region(mr, bottom_obj, top); } - _min_done = bottom; + // An idempotent closure might be applied in any order, so we don't + // record a _min_done for it. + if (!_cl->idempotent()) { + _min_done = bottom; + } else { + assert(_min_done == _last_explicit_min_done, + "Don't update _min_done for idempotent cl"); + } } DirtyCardToOopClosure* Space::new_dcto_cl(OopClosure* cl, @@ -250,7 +257,8 @@ void Space::clear(bool mangle_space) { } } -ContiguousSpace::ContiguousSpace(): CompactibleSpace(), _top(NULL) { +ContiguousSpace::ContiguousSpace(): CompactibleSpace(), _top(NULL), + _concurrent_iteration_safe_limit(NULL) { _mangler = new GenSpaceMangler(this); } @@ -263,17 +271,17 @@ void ContiguousSpace::initialize(MemRegion mr, bool mangle_space) { CompactibleSpace::initialize(mr, clear_space, mangle_space); - _concurrent_iteration_safe_limit = top(); + set_concurrent_iteration_safe_limit(top()); } void ContiguousSpace::clear(bool mangle_space) { set_top(bottom()); set_saved_mark(); - Space::clear(mangle_space); + CompactibleSpace::clear(mangle_space); } bool Space::is_in(const void* p) const { - HeapWord* b = block_start(p); + HeapWord* b = block_start_const(p); return b != NULL && block_is_obj(b); } @@ -342,10 +350,15 @@ void CompactibleSpace::initialize(MemRegion mr, bool clear_space, bool mangle_space) { Space::initialize(mr, clear_space, mangle_space); - _compaction_top = bottom(); + set_compaction_top(bottom()); _next_compaction_space = NULL; } +void CompactibleSpace::clear(bool mangle_space) { + Space::clear(mangle_space); + _compaction_top = bottom(); +} + HeapWord* CompactibleSpace::forward(oop q, size_t size, CompactPoint* cp, HeapWord* compact_top) { // q is alive @@ -520,8 +533,8 @@ void ContiguousSpace::verify(bool allow_dirty) const { } guarantee(p == top(), "end of last object must match end of space"); if (top() != end()) { - guarantee(top() == block_start(end()-1) && - top() == block_start(top()), + guarantee(top() == block_start_const(end()-1) && + top() == block_start_const(top()), "top should be start of unallocated block, if it exists"); } } @@ -753,7 +766,7 @@ ALL_SINCE_SAVE_MARKS_CLOSURES(ContigSpace_OOP_SINCE_SAVE_MARKS_DEFN) #undef ContigSpace_OOP_SINCE_SAVE_MARKS_DEFN // Very general, slow implementation. -HeapWord* ContiguousSpace::block_start(const void* p) const { +HeapWord* ContiguousSpace::block_start_const(const void* p) const { assert(MemRegion(bottom(), end()).contains(p), "p not in space"); if (p >= top()) { return top(); @@ -957,7 +970,8 @@ void OffsetTableContigSpace::verify(bool allow_dirty) const { // For a sampling of objects in the space, find it using the // block offset table. if (blocks == BLOCK_SAMPLE_INTERVAL) { - guarantee(p == block_start(p + (size/2)), "check offset computation"); + guarantee(p == block_start_const(p + (size/2)), + "check offset computation"); blocks = 0; } else { blocks++; diff --git a/hotspot/src/share/vm/memory/space.hpp b/hotspot/src/share/vm/memory/space.hpp index de28ba87a3c..8edfd49e444 100644 --- a/hotspot/src/share/vm/memory/space.hpp +++ b/hotspot/src/share/vm/memory/space.hpp @@ -105,7 +105,7 @@ class Space: public CHeapObj { virtual void set_bottom(HeapWord* value) { _bottom = value; } virtual void set_end(HeapWord* value) { _end = value; } - HeapWord* saved_mark_word() const { return _saved_mark_word; } + virtual HeapWord* saved_mark_word() const { return _saved_mark_word; } void set_saved_mark_word(HeapWord* p) { _saved_mark_word = p; } MemRegionClosure* preconsumptionDirtyCardClosure() const { @@ -131,9 +131,15 @@ class Space: public CHeapObj { return MemRegion(bottom(), saved_mark_word()); } - // Initialization. These may be run to reset an existing - // Space. + // Initialization. + // "initialize" should be called once on a space, before it is used for + // any purpose. The "mr" arguments gives the bounds of the space, and + // the "clear_space" argument should be true unless the memory in "mr" is + // known to be zeroed. virtual void initialize(MemRegion mr, bool clear_space, bool mangle_space); + + // The "clear" method must be called on a region that may have + // had allocation performed in it, but is now to be considered empty. virtual void clear(bool mangle_space); // For detecting GC bugs. Should only be called at GC boundaries, since @@ -218,7 +224,13 @@ class Space: public CHeapObj { // "block" that contains "p". We say "block" instead of "object" since // some heaps may not pack objects densely; a chunk may either be an // object or a non-object. If "p" is not in the space, return NULL. - virtual HeapWord* block_start(const void* p) const = 0; + virtual HeapWord* block_start_const(const void* p) const = 0; + + // The non-const version may have benevolent side effects on the data + // structure supporting these calls, possibly speeding up future calls. + // The default implementation, however, is simply to call the const + // version. + inline virtual HeapWord* block_start(const void* p); // Requires "addr" to be the start of a chunk, and returns its size. // "addr + size" is required to be the start of a new chunk, or the end @@ -284,12 +296,13 @@ protected: CardTableModRefBS::PrecisionStyle _precision; HeapWord* _boundary; // If non-NULL, process only non-NULL oops // pointing below boundary. - HeapWord* _min_done; // ObjHeadPreciseArray precision requires + HeapWord* _min_done; // ObjHeadPreciseArray precision requires // a downwards traversal; this is the // lowest location already done (or, // alternatively, the lowest address that // shouldn't be done again. NULL means infinity.) NOT_PRODUCT(HeapWord* _last_bottom;) + NOT_PRODUCT(HeapWord* _last_explicit_min_done;) // Get the actual top of the area on which the closure will // operate, given where the top is assumed to be (the end of the @@ -313,13 +326,15 @@ public: HeapWord* boundary) : _sp(sp), _cl(cl), _precision(precision), _boundary(boundary), _min_done(NULL) { - NOT_PRODUCT(_last_bottom = NULL;) + NOT_PRODUCT(_last_bottom = NULL); + NOT_PRODUCT(_last_explicit_min_done = NULL); } void do_MemRegion(MemRegion mr); void set_min_done(HeapWord* min_done) { _min_done = min_done; + NOT_PRODUCT(_last_explicit_min_done = _min_done); } #ifndef PRODUCT void set_last_bottom(HeapWord* last_bottom) { @@ -356,7 +371,11 @@ private: CompactibleSpace* _next_compaction_space; public: + CompactibleSpace() : + _compaction_top(NULL), _next_compaction_space(NULL) {} + virtual void initialize(MemRegion mr, bool clear_space, bool mangle_space); + virtual void clear(bool mangle_space); // Used temporarily during a compaction phase to hold the value // top should have when compaction is complete. @@ -513,7 +532,7 @@ protected: /* prefetch beyond q */ \ Prefetch::write(q, interval); \ /* size_t size = oop(q)->size(); changing this for cms for perm gen */\ - size_t size = block_size(q); \ + size_t size = block_size(q); \ compact_top = cp->space->forward(oop(q), size, cp, compact_top); \ q += size; \ end_of_live = q; \ @@ -577,156 +596,158 @@ protected: cp->space->set_compaction_top(compact_top); \ } -#define SCAN_AND_ADJUST_POINTERS(adjust_obj_size) { \ - /* adjust all the interior pointers to point at the new locations of objects \ - * Used by MarkSweep::mark_sweep_phase3() */ \ +#define SCAN_AND_ADJUST_POINTERS(adjust_obj_size) { \ + /* adjust all the interior pointers to point at the new locations of objects \ + * Used by MarkSweep::mark_sweep_phase3() */ \ \ - HeapWord* q = bottom(); \ - HeapWord* t = _end_of_live; /* Established by "prepare_for_compaction". */ \ + HeapWord* q = bottom(); \ + HeapWord* t = _end_of_live; /* Established by "prepare_for_compaction". */ \ \ - assert(_first_dead <= _end_of_live, "Stands to reason, no?"); \ + assert(_first_dead <= _end_of_live, "Stands to reason, no?"); \ \ - if (q < t && _first_dead > q && \ + if (q < t && _first_dead > q && \ !oop(q)->is_gc_marked()) { \ /* we have a chunk of the space which hasn't moved and we've \ * reinitialized the mark word during the previous pass, so we can't \ - * use is_gc_marked for the traversal. */ \ + * use is_gc_marked for the traversal. */ \ HeapWord* end = _first_dead; \ \ - while (q < end) { \ - /* I originally tried to conjoin "block_start(q) == q" to the \ - * assertion below, but that doesn't work, because you can't \ - * accurately traverse previous objects to get to the current one \ - * after their pointers (including pointers into permGen) have been \ - * updated, until the actual compaction is done. dld, 4/00 */ \ - assert(block_is_obj(q), \ - "should be at block boundaries, and should be looking at objs"); \ + while (q < end) { \ + /* I originally tried to conjoin "block_start(q) == q" to the \ + * assertion below, but that doesn't work, because you can't \ + * accurately traverse previous objects to get to the current one \ + * after their pointers (including pointers into permGen) have been \ + * updated, until the actual compaction is done. dld, 4/00 */ \ + assert(block_is_obj(q), \ + "should be at block boundaries, and should be looking at objs"); \ \ VALIDATE_MARK_SWEEP_ONLY(MarkSweep::track_interior_pointers(oop(q))); \ \ - /* point all the oops to the new location */ \ - size_t size = oop(q)->adjust_pointers(); \ - size = adjust_obj_size(size); \ + /* point all the oops to the new location */ \ + size_t size = oop(q)->adjust_pointers(); \ + size = adjust_obj_size(size); \ \ VALIDATE_MARK_SWEEP_ONLY(MarkSweep::check_interior_pointers()); \ - \ - VALIDATE_MARK_SWEEP_ONLY(MarkSweep::validate_live_oop(oop(q), size)); \ - \ - q += size; \ - } \ \ - if (_first_dead == t) { \ - q = t; \ - } else { \ - /* $$$ This is funky. Using this to read the previously written \ - * LiveRange. See also use below. */ \ + VALIDATE_MARK_SWEEP_ONLY(MarkSweep::validate_live_oop(oop(q), size)); \ + \ + q += size; \ + } \ + \ + if (_first_dead == t) { \ + q = t; \ + } else { \ + /* $$$ This is funky. Using this to read the previously written \ + * LiveRange. See also use below. */ \ q = (HeapWord*)oop(_first_dead)->mark()->decode_pointer(); \ - } \ - } \ + } \ + } \ \ const intx interval = PrefetchScanIntervalInBytes; \ \ - debug_only(HeapWord* prev_q = NULL); \ - while (q < t) { \ - /* prefetch beyond q */ \ + debug_only(HeapWord* prev_q = NULL); \ + while (q < t) { \ + /* prefetch beyond q */ \ Prefetch::write(q, interval); \ - if (oop(q)->is_gc_marked()) { \ - /* q is alive */ \ + if (oop(q)->is_gc_marked()) { \ + /* q is alive */ \ VALIDATE_MARK_SWEEP_ONLY(MarkSweep::track_interior_pointers(oop(q))); \ - /* point all the oops to the new location */ \ - size_t size = oop(q)->adjust_pointers(); \ - size = adjust_obj_size(size); \ - VALIDATE_MARK_SWEEP_ONLY(MarkSweep::check_interior_pointers()); \ + /* point all the oops to the new location */ \ + size_t size = oop(q)->adjust_pointers(); \ + size = adjust_obj_size(size); \ + VALIDATE_MARK_SWEEP_ONLY(MarkSweep::check_interior_pointers()); \ VALIDATE_MARK_SWEEP_ONLY(MarkSweep::validate_live_oop(oop(q), size)); \ - debug_only(prev_q = q); \ + debug_only(prev_q = q); \ q += size; \ - } else { \ - /* q is not a live object, so its mark should point at the next \ - * live object */ \ - debug_only(prev_q = q); \ - q = (HeapWord*) oop(q)->mark()->decode_pointer(); \ - assert(q > prev_q, "we should be moving forward through memory"); \ - } \ - } \ + } else { \ + /* q is not a live object, so its mark should point at the next \ + * live object */ \ + debug_only(prev_q = q); \ + q = (HeapWord*) oop(q)->mark()->decode_pointer(); \ + assert(q > prev_q, "we should be moving forward through memory"); \ + } \ + } \ \ - assert(q == t, "just checking"); \ + assert(q == t, "just checking"); \ } -#define SCAN_AND_COMPACT(obj_size) { \ +#define SCAN_AND_COMPACT(obj_size) { \ /* Copy all live objects to their new location \ - * Used by MarkSweep::mark_sweep_phase4() */ \ + * Used by MarkSweep::mark_sweep_phase4() */ \ \ - HeapWord* q = bottom(); \ - HeapWord* const t = _end_of_live; \ - debug_only(HeapWord* prev_q = NULL); \ + HeapWord* q = bottom(); \ + HeapWord* const t = _end_of_live; \ + debug_only(HeapWord* prev_q = NULL); \ \ - if (q < t && _first_dead > q && \ + if (q < t && _first_dead > q && \ !oop(q)->is_gc_marked()) { \ - debug_only( \ + debug_only( \ /* we have a chunk of the space which hasn't moved and we've reinitialized \ * the mark word during the previous pass, so we can't use is_gc_marked for \ * the traversal. */ \ - HeapWord* const end = _first_dead; \ - \ - while (q < end) { \ + HeapWord* const end = _first_dead; \ + \ + while (q < end) { \ size_t size = obj_size(q); \ assert(!oop(q)->is_gc_marked(), \ "should be unmarked (special dense prefix handling)"); \ - VALIDATE_MARK_SWEEP_ONLY(MarkSweep::live_oop_moved_to(q, size, q)); \ - debug_only(prev_q = q); \ + VALIDATE_MARK_SWEEP_ONLY(MarkSweep::live_oop_moved_to(q, size, q)); \ + debug_only(prev_q = q); \ q += size; \ - } \ - ) /* debug_only */ \ - \ - if (_first_dead == t) { \ - q = t; \ - } else { \ - /* $$$ Funky */ \ - q = (HeapWord*) oop(_first_dead)->mark()->decode_pointer(); \ - } \ - } \ + } \ + ) /* debug_only */ \ \ - const intx scan_interval = PrefetchScanIntervalInBytes; \ - const intx copy_interval = PrefetchCopyIntervalInBytes; \ - while (q < t) { \ - if (!oop(q)->is_gc_marked()) { \ - /* mark is pointer to next marked oop */ \ - debug_only(prev_q = q); \ - q = (HeapWord*) oop(q)->mark()->decode_pointer(); \ - assert(q > prev_q, "we should be moving forward through memory"); \ - } else { \ - /* prefetch beyond q */ \ + if (_first_dead == t) { \ + q = t; \ + } else { \ + /* $$$ Funky */ \ + q = (HeapWord*) oop(_first_dead)->mark()->decode_pointer(); \ + } \ + } \ + \ + const intx scan_interval = PrefetchScanIntervalInBytes; \ + const intx copy_interval = PrefetchCopyIntervalInBytes; \ + while (q < t) { \ + if (!oop(q)->is_gc_marked()) { \ + /* mark is pointer to next marked oop */ \ + debug_only(prev_q = q); \ + q = (HeapWord*) oop(q)->mark()->decode_pointer(); \ + assert(q > prev_q, "we should be moving forward through memory"); \ + } else { \ + /* prefetch beyond q */ \ Prefetch::read(q, scan_interval); \ \ /* size and destination */ \ size_t size = obj_size(q); \ HeapWord* compaction_top = (HeapWord*)oop(q)->forwardee(); \ \ - /* prefetch beyond compaction_top */ \ + /* prefetch beyond compaction_top */ \ Prefetch::write(compaction_top, copy_interval); \ \ - /* copy object and reinit its mark */ \ + /* copy object and reinit its mark */ \ VALIDATE_MARK_SWEEP_ONLY(MarkSweep::live_oop_moved_to(q, size, \ compaction_top)); \ - assert(q != compaction_top, "everything in this pass should be moving"); \ - Copy::aligned_conjoint_words(q, compaction_top, size); \ - oop(compaction_top)->init_mark(); \ - assert(oop(compaction_top)->klass() != NULL, "should have a class"); \ + assert(q != compaction_top, "everything in this pass should be moving"); \ + Copy::aligned_conjoint_words(q, compaction_top, size); \ + oop(compaction_top)->init_mark(); \ + assert(oop(compaction_top)->klass() != NULL, "should have a class"); \ \ - debug_only(prev_q = q); \ + debug_only(prev_q = q); \ q += size; \ - } \ - } \ + } \ + } \ \ + /* Let's remember if we were empty before we did the compaction. */ \ + bool was_empty = used_region().is_empty(); \ /* Reset space after compaction is complete */ \ - reset_after_compaction(); \ + reset_after_compaction(); \ /* We do this clear, below, since it has overloaded meanings for some */ \ /* space subtypes. For example, OffsetTableContigSpace's that were */ \ /* compacted into will have had their offset table thresholds updated */ \ /* continuously, but those that weren't need to have their thresholds */ \ /* re-initialized. Also mangles unused area for debugging. */ \ - if (is_empty()) { \ - clear(SpaceDecorator::Mangle); \ + if (used_region().is_empty()) { \ + if (!was_empty) clear(SpaceDecorator::Mangle); \ } else { \ if (ZapUnusedHeapArea) mangle_unused_area(); \ } \ @@ -752,20 +773,18 @@ class ContiguousSpace: public CompactibleSpace { inline HeapWord* par_allocate_impl(size_t word_size, HeapWord* end_value); public: - ContiguousSpace(); ~ContiguousSpace(); virtual void initialize(MemRegion mr, bool clear_space, bool mangle_space); + virtual void clear(bool mangle_space); // Accessors HeapWord* top() const { return _top; } void set_top(HeapWord* value) { _top = value; } - void set_saved_mark() { _saved_mark_word = top(); } - void reset_saved_mark() { _saved_mark_word = bottom(); } - - virtual void clear(bool mangle_space); + virtual void set_saved_mark() { _saved_mark_word = top(); } + void reset_saved_mark() { _saved_mark_word = bottom(); } WaterMark bottom_mark() { return WaterMark(this, bottom()); } WaterMark top_mark() { return WaterMark(this, top()); } @@ -874,7 +893,7 @@ class ContiguousSpace: public CompactibleSpace { virtual void object_iterate_from(WaterMark mark, ObjectClosure* blk); // Very inefficient implementation. - virtual HeapWord* block_start(const void* p) const; + virtual HeapWord* block_start_const(const void* p) const; size_t block_size(const HeapWord* p) const; // If a block is in the allocated area, it is an object. bool block_is_obj(const HeapWord* p) const { return p < top(); } @@ -979,7 +998,8 @@ class EdenSpace : public ContiguousSpace { HeapWord* _soft_end; public: - EdenSpace(DefNewGeneration* gen) : _gen(gen) { _soft_end = NULL; } + EdenSpace(DefNewGeneration* gen) : + _gen(gen), _soft_end(NULL) {} // Get/set just the 'soft' limit. HeapWord* soft_end() { return _soft_end; } @@ -1033,7 +1053,7 @@ class OffsetTableContigSpace: public ContiguousSpace { void clear(bool mangle_space); - inline HeapWord* block_start(const void* p) const; + inline HeapWord* block_start_const(const void* p) const; // Add offset table update. virtual inline HeapWord* allocate(size_t word_size); diff --git a/hotspot/src/share/vm/memory/space.inline.hpp b/hotspot/src/share/vm/memory/space.inline.hpp index 6ca9bf2b1a0..c63d0c68cc7 100644 --- a/hotspot/src/share/vm/memory/space.inline.hpp +++ b/hotspot/src/share/vm/memory/space.inline.hpp @@ -22,6 +22,10 @@ * */ +inline HeapWord* Space::block_start(const void* p) { + return block_start_const(p); +} + inline HeapWord* OffsetTableContigSpace::allocate(size_t size) { HeapWord* res = ContiguousSpace::allocate(size); if (res != NULL) { @@ -50,7 +54,8 @@ inline HeapWord* OffsetTableContigSpace::par_allocate(size_t size) { return res; } -inline HeapWord* OffsetTableContigSpace::block_start(const void* p) const { +inline HeapWord* +OffsetTableContigSpace::block_start_const(const void* p) const { return _offsets.block_start(p); } diff --git a/hotspot/src/share/vm/memory/specialized_oop_closures.hpp b/hotspot/src/share/vm/memory/specialized_oop_closures.hpp index 9a3e774e74d..cf5062033c8 100644 --- a/hotspot/src/share/vm/memory/specialized_oop_closures.hpp +++ b/hotspot/src/share/vm/memory/specialized_oop_closures.hpp @@ -59,6 +59,12 @@ class CMSInnerParMarkAndPushClosure; // This is split into several because of a Visual C++ 6.0 compiler bug // where very long macros cause the compiler to crash +// Some other heap might define further specialized closures. +#ifndef FURTHER_SPECIALIZED_OOP_OOP_ITERATE_CLOSURES +#define FURTHER_SPECIALIZED_OOP_OOP_ITERATE_CLOSURES(f) \ + /* None */ +#endif + #define SPECIALIZED_OOP_OOP_ITERATE_CLOSURES_S(f) \ f(ScanClosure,_nv) \ f(FastScanClosure,_nv) \ @@ -77,7 +83,7 @@ class CMSInnerParMarkAndPushClosure; SPECIALIZED_OOP_OOP_ITERATE_CLOSURES_P(f) #ifndef SERIALGC -#define SPECIALIZED_OOP_OOP_ITERATE_CLOSURES_3(f) \ +#define SPECIALIZED_OOP_OOP_ITERATE_CLOSURES_2(f) \ f(MarkRefsIntoAndScanClosure,_nv) \ f(Par_MarkRefsIntoAndScanClosure,_nv) \ f(PushAndMarkClosure,_nv) \ @@ -85,11 +91,13 @@ class CMSInnerParMarkAndPushClosure; f(PushOrMarkClosure,_nv) \ f(Par_PushOrMarkClosure,_nv) \ f(CMSKeepAliveClosure,_nv) \ - f(CMSInnerParMarkAndPushClosure,_nv) + f(CMSInnerParMarkAndPushClosure,_nv) \ + FURTHER_SPECIALIZED_OOP_OOP_ITERATE_CLOSURES(f) #else // SERIALGC -#define SPECIALIZED_OOP_OOP_ITERATE_CLOSURES_3(f) +#define SPECIALIZED_OOP_OOP_ITERATE_CLOSURES_2(f) #endif // SERIALGC + // We separate these out, because sometime the general one has // a different definition from the specialized ones, and sometimes it // doesn't. @@ -98,8 +106,8 @@ class CMSInnerParMarkAndPushClosure; f(OopClosure,_v) \ SPECIALIZED_OOP_OOP_ITERATE_CLOSURES_1(f) -#define ALL_OOP_OOP_ITERATE_CLOSURES_3(f) \ - SPECIALIZED_OOP_OOP_ITERATE_CLOSURES_3(f) +#define ALL_OOP_OOP_ITERATE_CLOSURES_2(f) \ + SPECIALIZED_OOP_OOP_ITERATE_CLOSURES_2(f) #ifndef SERIALGC // This macro applies an argument macro to all OopClosures for which we @@ -125,6 +133,13 @@ class CMSInnerParMarkAndPushClosure; // The "root_class" is the most general class to define; this may be // "OopClosure" in some applications and "OopsInGenClosure" in others. + +// Some other heap might define further specialized closures. +#ifndef FURTHER_SPECIALIZED_SINCE_SAVE_MARKS_CLOSURES +#define FURTHER_SPECIALIZED_SINCE_SAVE_MARKS_CLOSURES(f) \ + /* None */ +#endif + #define SPECIALIZED_SINCE_SAVE_MARKS_CLOSURES_YOUNG_S(f) \ f(ScanClosure,_nv) \ f(FastScanClosure,_nv) @@ -132,7 +147,8 @@ class CMSInnerParMarkAndPushClosure; #ifndef SERIALGC #define SPECIALIZED_SINCE_SAVE_MARKS_CLOSURES_YOUNG_P(f) \ f(ParScanWithBarrierClosure,_nv) \ - f(ParScanWithoutBarrierClosure,_nv) + f(ParScanWithoutBarrierClosure,_nv) \ + FURTHER_SPECIALIZED_SINCE_SAVE_MARKS_CLOSURES(f) #else // SERIALGC #define SPECIALIZED_SINCE_SAVE_MARKS_CLOSURES_YOUNG_P(f) #endif // SERIALGC @@ -179,13 +195,15 @@ public: #if ENABLE_SPECIALIZATION_STATS private: - static int _numCallsAll; + static bool _init; + static bool _wrapped; + static jint _numCallsAll; - static int _numCallsTotal[NUM_Kinds]; - static int _numCalls_nv[NUM_Kinds]; + static jint _numCallsTotal[NUM_Kinds]; + static jint _numCalls_nv[NUM_Kinds]; - static int _numDoOopCallsTotal[NUM_Kinds]; - static int _numDoOopCalls_nv[NUM_Kinds]; + static jint _numDoOopCallsTotal[NUM_Kinds]; + static jint _numDoOopCalls_nv[NUM_Kinds]; public: #endif static void clear() PRODUCT_RETURN; @@ -203,22 +221,22 @@ public: #if ENABLE_SPECIALIZATION_STATS inline void SpecializationStats::record_call() { - _numCallsAll++;; + Atomic::inc(&_numCallsAll); } inline void SpecializationStats::record_iterate_call_v(Kind k) { - _numCallsTotal[k]++; + Atomic::inc(&_numCallsTotal[k]); } inline void SpecializationStats::record_iterate_call_nv(Kind k) { - _numCallsTotal[k]++; - _numCalls_nv[k]++; + Atomic::inc(&_numCallsTotal[k]); + Atomic::inc(&_numCalls_nv[k]); } inline void SpecializationStats::record_do_oop_call_v(Kind k) { - _numDoOopCallsTotal[k]++; + Atomic::inc(&_numDoOopCallsTotal[k]); } inline void SpecializationStats::record_do_oop_call_nv(Kind k) { - _numDoOopCallsTotal[k]++; - _numDoOopCalls_nv[k]++; + Atomic::inc(&_numDoOopCallsTotal[k]); + Atomic::inc(&_numDoOopCalls_nv[k]); } #else // !ENABLE_SPECIALIZATION_STATS diff --git a/hotspot/src/share/vm/memory/universe.cpp b/hotspot/src/share/vm/memory/universe.cpp index c943f5749a7..651e9668041 100644 --- a/hotspot/src/share/vm/memory/universe.cpp +++ b/hotspot/src/share/vm/memory/universe.cpp @@ -739,6 +739,15 @@ jint Universe::initialize_heap() { fatal("UseParallelGC not supported in java kernel vm."); #endif // SERIALGC + } else if (UseG1GC) { +#ifndef SERIALGC + G1CollectorPolicy* g1p = new G1CollectorPolicy_BestRegionsFirst(); + G1CollectedHeap* g1h = new G1CollectedHeap(g1p); + Universe::_collectedHeap = g1h; +#else // SERIALGC + fatal("UseG1GC not supported in java kernel vm."); +#endif // SERIALGC + } else { GenCollectorPolicy *gc_policy; @@ -938,7 +947,10 @@ bool universe_post_init() { // This needs to be done before the first scavenge/gc, since // it's an input to soft ref clearing policy. - Universe::update_heap_info_at_gc(); + { + MutexLocker x(Heap_lock); + Universe::update_heap_info_at_gc(); + } // ("weak") refs processing infrastructure initialization Universe::heap()->post_initialize(); @@ -1194,10 +1206,11 @@ uintptr_t Universe::verify_klass_mask() { // ???: What if a CollectedHeap doesn't have a permanent generation? ShouldNotReachHere(); break; - case CollectedHeap::GenCollectedHeap: { - GenCollectedHeap* gch = (GenCollectedHeap*) Universe::heap(); - permanent_reserved = gch->perm_gen()->reserved(); - break; + case CollectedHeap::GenCollectedHeap: + case CollectedHeap::G1CollectedHeap: { + SharedHeap* sh = (SharedHeap*) Universe::heap(); + permanent_reserved = sh->perm_gen()->reserved(); + break; } #ifndef SERIALGC case CollectedHeap::ParallelScavengeHeap: { diff --git a/hotspot/src/share/vm/oops/generateOopMap.cpp b/hotspot/src/share/vm/oops/generateOopMap.cpp index bff8ccbecf4..4a9741c0fc8 100644 --- a/hotspot/src/share/vm/oops/generateOopMap.cpp +++ b/hotspot/src/share/vm/oops/generateOopMap.cpp @@ -370,21 +370,8 @@ void CellTypeState::print(outputStream *os) { void GenerateOopMap ::initialize_bb() { _gc_points = 0; _bb_count = 0; - int size = binsToHold(method()->code_size()); - _bb_hdr_bits = NEW_RESOURCE_ARRAY(uintptr_t,size); - memset(_bb_hdr_bits, 0, size*sizeof(uintptr_t)); -} - -void GenerateOopMap ::set_bbmark_bit(int bci) { - int idx = bci >> LogBitsPerWord; - uintptr_t bit = (uintptr_t)1 << (bci & (BitsPerWord-1)); - _bb_hdr_bits[idx] |= bit; -} - -void GenerateOopMap ::clear_bbmark_bit(int bci) { - int idx = bci >> LogBitsPerWord; - uintptr_t bit = (uintptr_t)1 << (bci & (BitsPerWord-1)); - _bb_hdr_bits[idx] &= (~bit); + _bb_hdr_bits.clear(); + _bb_hdr_bits.resize(method()->code_size()); } void GenerateOopMap::bb_mark_fct(GenerateOopMap *c, int bci, int *data) { @@ -952,6 +939,17 @@ void GenerateOopMap::init_basic_blocks() { _basic_blocks[bbNo-1]._end_bci = prev_bci; + // Check that the correct number of basicblocks was found + if (bbNo !=_bb_count) { + if (bbNo < _bb_count) { + verify_error("jump into the middle of instruction?"); + return; + } else { + verify_error("extra basic blocks - should not happen?"); + return; + } + } + _max_monitors = monitor_count; // Now that we have a bound on the depth of the monitor stack, we can @@ -985,17 +983,6 @@ void GenerateOopMap::init_basic_blocks() { } #endif - // Check that the correct number of basicblocks was found - if (bbNo !=_bb_count) { - if (bbNo < _bb_count) { - verify_error("jump into the middle of instruction?"); - return; - } else { - verify_error("extra basic blocks - should not happen?"); - return; - } - } - // Mark all alive blocks mark_reachable_code(); } @@ -1022,21 +1009,22 @@ void GenerateOopMap::update_basic_blocks(int bci, int delta, int new_method_size) { assert(new_method_size >= method()->code_size() + delta, "new method size is too small"); - int newWords = binsToHold(new_method_size); - uintptr_t * new_bb_hdr_bits = NEW_RESOURCE_ARRAY(uintptr_t, newWords); + BitMap::bm_word_t* new_bb_hdr_bits = + NEW_RESOURCE_ARRAY(BitMap::bm_word_t, + BitMap::word_align_up(new_method_size)); + _bb_hdr_bits.set_map(new_bb_hdr_bits); + _bb_hdr_bits.set_size(new_method_size); + _bb_hdr_bits.clear(); - BitMap bb_bits(new_bb_hdr_bits, new_method_size); - bb_bits.clear(); for(int k = 0; k < _bb_count; k++) { if (_basic_blocks[k]._bci > bci) { _basic_blocks[k]._bci += delta; _basic_blocks[k]._end_bci += delta; } - bb_bits.at_put(_basic_blocks[k]._bci, true); + _bb_hdr_bits.at_put(_basic_blocks[k]._bci, true); } - _bb_hdr_bits = new_bb_hdr_bits ; } // diff --git a/hotspot/src/share/vm/oops/generateOopMap.hpp b/hotspot/src/share/vm/oops/generateOopMap.hpp index f29982b3abf..432902ef362 100644 --- a/hotspot/src/share/vm/oops/generateOopMap.hpp +++ b/hotspot/src/share/vm/oops/generateOopMap.hpp @@ -341,16 +341,22 @@ class GenerateOopMap VALUE_OBJ_CLASS_SPEC { BasicBlock * _basic_blocks; // Array of basicblock info int _gc_points; int _bb_count; - uintptr_t * _bb_hdr_bits; + BitMap _bb_hdr_bits; // Basicblocks methods void initialize_bb (); void mark_bbheaders_and_count_gc_points(); - bool is_bb_header (int bci) const { return (_bb_hdr_bits[bci >> LogBitsPerWord] & ((uintptr_t)1 << (bci & (BitsPerWord-1)))) != 0; } + bool is_bb_header (int bci) const { + return _bb_hdr_bits.at(bci); + } int gc_points () const { return _gc_points; } int bb_count () const { return _bb_count; } - void set_bbmark_bit (int bci); - void clear_bbmark_bit (int bci); + void set_bbmark_bit (int bci) { + _bb_hdr_bits.at_put(bci, true); + } + void clear_bbmark_bit (int bci) { + _bb_hdr_bits.at_put(bci, false); + } BasicBlock * get_basic_block_at (int bci) const; BasicBlock * get_basic_block_containing (int bci) const; void interp_bb (BasicBlock *bb); diff --git a/hotspot/src/share/vm/oops/instanceKlass.cpp b/hotspot/src/share/vm/oops/instanceKlass.cpp index 8862efd299b..7a2d3408747 100644 --- a/hotspot/src/share/vm/oops/instanceKlass.cpp +++ b/hotspot/src/share/vm/oops/instanceKlass.cpp @@ -1515,10 +1515,9 @@ void instanceKlass::oop_follow_contents(ParCompactionManager* cm, // closure's do_header() method dicates whether the given closure should be // applied to the klass ptr in the object header. -#define InstanceKlass_OOP_OOP_ITERATE_DEFN(OopClosureType, nv_suffix) \ - \ -int instanceKlass::oop_oop_iterate##nv_suffix(oop obj, \ - OopClosureType* closure) {\ +#define InstanceKlass_OOP_OOP_ITERATE_DEFN(OopClosureType, nv_suffix) \ + \ +int instanceKlass::oop_oop_iterate##nv_suffix(oop obj, OopClosureType* closure) { \ SpecializationStats::record_iterate_call##nv_suffix(SpecializationStats::ik);\ /* header */ \ if (closure->do_header()) { \ @@ -1533,6 +1532,26 @@ int instanceKlass::oop_oop_iterate##nv_suffix(oop obj, \ return size_helper(); \ } +#ifndef SERIALGC +#define InstanceKlass_OOP_OOP_ITERATE_BACKWARDS_DEFN(OopClosureType, nv_suffix) \ + \ +int instanceKlass::oop_oop_iterate_backwards##nv_suffix(oop obj, \ + OopClosureType* closure) { \ + SpecializationStats::record_iterate_call##nv_suffix(SpecializationStats::ik); \ + /* header */ \ + if (closure->do_header()) { \ + obj->oop_iterate_header(closure); \ + } \ + /* instance variables */ \ + InstanceKlass_OOP_MAP_REVERSE_ITERATE( \ + obj, \ + SpecializationStats::record_do_oop_call##nv_suffix(SpecializationStats::ik);\ + (closure)->do_oop##nv_suffix(p), \ + assert_is_in_closed_subset) \ + return size_helper(); \ +} +#endif // !SERIALGC + #define InstanceKlass_OOP_OOP_ITERATE_DEFN_m(OopClosureType, nv_suffix) \ \ int instanceKlass::oop_oop_iterate##nv_suffix##_m(oop obj, \ @@ -1550,9 +1569,13 @@ int instanceKlass::oop_oop_iterate##nv_suffix##_m(oop obj, \ } ALL_OOP_OOP_ITERATE_CLOSURES_1(InstanceKlass_OOP_OOP_ITERATE_DEFN) -ALL_OOP_OOP_ITERATE_CLOSURES_3(InstanceKlass_OOP_OOP_ITERATE_DEFN) +ALL_OOP_OOP_ITERATE_CLOSURES_2(InstanceKlass_OOP_OOP_ITERATE_DEFN) ALL_OOP_OOP_ITERATE_CLOSURES_1(InstanceKlass_OOP_OOP_ITERATE_DEFN_m) -ALL_OOP_OOP_ITERATE_CLOSURES_3(InstanceKlass_OOP_OOP_ITERATE_DEFN_m) +ALL_OOP_OOP_ITERATE_CLOSURES_2(InstanceKlass_OOP_OOP_ITERATE_DEFN_m) +#ifndef SERIALGC +ALL_OOP_OOP_ITERATE_CLOSURES_1(InstanceKlass_OOP_OOP_ITERATE_BACKWARDS_DEFN) +ALL_OOP_OOP_ITERATE_CLOSURES_2(InstanceKlass_OOP_OOP_ITERATE_BACKWARDS_DEFN) +#endif // !SERIALGC void instanceKlass::iterate_static_fields(OopClosure* closure) { InstanceKlass_OOP_ITERATE( \ diff --git a/hotspot/src/share/vm/oops/instanceKlass.hpp b/hotspot/src/share/vm/oops/instanceKlass.hpp index 2dd61e56802..e96bd6bb794 100644 --- a/hotspot/src/share/vm/oops/instanceKlass.hpp +++ b/hotspot/src/share/vm/oops/instanceKlass.hpp @@ -655,13 +655,21 @@ class instanceKlass: public Klass { return oop_oop_iterate_v_m(obj, blk, mr); } -#define InstanceKlass_OOP_OOP_ITERATE_DECL(OopClosureType, nv_suffix) \ - int oop_oop_iterate##nv_suffix(oop obj, OopClosureType* blk); \ - int oop_oop_iterate##nv_suffix##_m(oop obj, OopClosureType* blk, \ +#define InstanceKlass_OOP_OOP_ITERATE_DECL(OopClosureType, nv_suffix) \ + int oop_oop_iterate##nv_suffix(oop obj, OopClosureType* blk); \ + int oop_oop_iterate##nv_suffix##_m(oop obj, OopClosureType* blk, \ MemRegion mr); ALL_OOP_OOP_ITERATE_CLOSURES_1(InstanceKlass_OOP_OOP_ITERATE_DECL) - ALL_OOP_OOP_ITERATE_CLOSURES_3(InstanceKlass_OOP_OOP_ITERATE_DECL) + ALL_OOP_OOP_ITERATE_CLOSURES_2(InstanceKlass_OOP_OOP_ITERATE_DECL) + +#ifndef SERIALGC +#define InstanceKlass_OOP_OOP_ITERATE_BACKWARDS_DECL(OopClosureType, nv_suffix) \ + int oop_oop_iterate_backwards##nv_suffix(oop obj, OopClosureType* blk); + + ALL_OOP_OOP_ITERATE_CLOSURES_1(InstanceKlass_OOP_OOP_ITERATE_BACKWARDS_DECL) + ALL_OOP_OOP_ITERATE_CLOSURES_2(InstanceKlass_OOP_OOP_ITERATE_BACKWARDS_DECL) +#endif // !SERIALGC void iterate_static_fields(OopClosure* closure); void iterate_static_fields(OopClosure* closure, MemRegion mr); diff --git a/hotspot/src/share/vm/oops/instanceRefKlass.cpp b/hotspot/src/share/vm/oops/instanceRefKlass.cpp index 686aabf6a15..c6d9f75b0cb 100644 --- a/hotspot/src/share/vm/oops/instanceRefKlass.cpp +++ b/hotspot/src/share/vm/oops/instanceRefKlass.cpp @@ -176,6 +176,11 @@ int instanceRefKlass::oop_adjust_pointers(oop obj) { } #define InstanceRefKlass_SPECIALIZED_OOP_ITERATE(T, nv_suffix, contains) \ + if (closure->apply_to_weak_ref_discovered_field()) { \ + T* disc_addr = (T*)java_lang_ref_Reference::discovered_addr(obj); \ + closure->do_oop##nv_suffix(disc_addr); \ + } \ + \ T* referent_addr = (T*)java_lang_ref_Reference::referent_addr(obj); \ oop referent = oopDesc::load_decode_heap_oop(referent_addr); \ if (referent != NULL && contains(referent_addr)) { \ @@ -219,6 +224,25 @@ oop_oop_iterate##nv_suffix(oop obj, OopClosureType* closure) { } \ } +#ifndef SERIALGC +#define InstanceRefKlass_OOP_OOP_ITERATE_BACKWARDS_DEFN(OopClosureType, nv_suffix) \ + \ +int instanceRefKlass:: \ +oop_oop_iterate_backwards##nv_suffix(oop obj, OopClosureType* closure) { \ + /* Get size before changing pointers */ \ + SpecializationStats::record_iterate_call##nv_suffix(SpecializationStats::irk);\ + \ + int size = instanceKlass::oop_oop_iterate_backwards##nv_suffix(obj, closure); \ + \ + if (UseCompressedOops) { \ + InstanceRefKlass_SPECIALIZED_OOP_ITERATE(narrowOop, nv_suffix, contains); \ + } else { \ + InstanceRefKlass_SPECIALIZED_OOP_ITERATE(oop, nv_suffix, contains); \ + } \ +} +#endif // !SERIALGC + + #define InstanceRefKlass_OOP_OOP_ITERATE_DEFN_m(OopClosureType, nv_suffix) \ \ int instanceRefKlass:: \ @@ -236,9 +260,13 @@ oop_oop_iterate##nv_suffix##_m(oop obj, } ALL_OOP_OOP_ITERATE_CLOSURES_1(InstanceRefKlass_OOP_OOP_ITERATE_DEFN) -ALL_OOP_OOP_ITERATE_CLOSURES_3(InstanceRefKlass_OOP_OOP_ITERATE_DEFN) +ALL_OOP_OOP_ITERATE_CLOSURES_2(InstanceRefKlass_OOP_OOP_ITERATE_DEFN) +#ifndef SERIALGC +ALL_OOP_OOP_ITERATE_CLOSURES_1(InstanceRefKlass_OOP_OOP_ITERATE_BACKWARDS_DEFN) +ALL_OOP_OOP_ITERATE_CLOSURES_2(InstanceRefKlass_OOP_OOP_ITERATE_BACKWARDS_DEFN) +#endif // SERIALGC ALL_OOP_OOP_ITERATE_CLOSURES_1(InstanceRefKlass_OOP_OOP_ITERATE_DEFN_m) -ALL_OOP_OOP_ITERATE_CLOSURES_3(InstanceRefKlass_OOP_OOP_ITERATE_DEFN_m) +ALL_OOP_OOP_ITERATE_CLOSURES_2(InstanceRefKlass_OOP_OOP_ITERATE_DEFN_m) #ifndef SERIALGC template @@ -423,7 +451,7 @@ void instanceRefKlass::oop_verify_on(oop obj, outputStream* st) { // Verify next field oop next = java_lang_ref_Reference::next(obj); if (next != NULL) { - guarantee(next->is_oop(), "next field verify fa iled"); + guarantee(next->is_oop(), "next field verify failed"); guarantee(next->is_instanceRef(), "next field verify failed"); if (gch != NULL && !gch->is_in_youngest(obj)) { // We do a specific remembered set check here since the next field is diff --git a/hotspot/src/share/vm/oops/instanceRefKlass.hpp b/hotspot/src/share/vm/oops/instanceRefKlass.hpp index ed8b11998bc..501d8172677 100644 --- a/hotspot/src/share/vm/oops/instanceRefKlass.hpp +++ b/hotspot/src/share/vm/oops/instanceRefKlass.hpp @@ -72,7 +72,15 @@ class instanceRefKlass: public instanceKlass { int oop_oop_iterate##nv_suffix##_m(oop obj, OopClosureType* blk, MemRegion mr); ALL_OOP_OOP_ITERATE_CLOSURES_1(InstanceRefKlass_OOP_OOP_ITERATE_DECL) - ALL_OOP_OOP_ITERATE_CLOSURES_3(InstanceRefKlass_OOP_OOP_ITERATE_DECL) + ALL_OOP_OOP_ITERATE_CLOSURES_2(InstanceRefKlass_OOP_OOP_ITERATE_DECL) + +#ifndef SERIALGC +#define InstanceRefKlass_OOP_OOP_ITERATE_BACKWARDS_DECL(OopClosureType, nv_suffix) \ + int oop_oop_iterate_backwards##nv_suffix(oop obj, OopClosureType* blk); + + ALL_OOP_OOP_ITERATE_CLOSURES_1(InstanceRefKlass_OOP_OOP_ITERATE_BACKWARDS_DECL) + ALL_OOP_OOP_ITERATE_CLOSURES_2(InstanceRefKlass_OOP_OOP_ITERATE_BACKWARDS_DECL) +#endif // !SERIALGC static void release_and_notify_pending_list_lock(BasicLock *pending_list_basic_lock); static void acquire_pending_list_lock(BasicLock *pending_list_basic_lock); diff --git a/hotspot/src/share/vm/oops/klass.hpp b/hotspot/src/share/vm/oops/klass.hpp index b2a3260a216..46852c02ee1 100644 --- a/hotspot/src/share/vm/oops/klass.hpp +++ b/hotspot/src/share/vm/oops/klass.hpp @@ -134,14 +134,14 @@ class Klass_vtbl { // Every subclass on which vtbl_value is called must include this macro. // Delay the installation of the klassKlass pointer until after the // the vtable for a new klass has been installed (after the call to new()). -#define DEFINE_ALLOCATE_PERMANENT(thisKlass) \ +#define DEFINE_ALLOCATE_PERMANENT(thisKlass) \ void* allocate_permanent(KlassHandle& klass_klass, int size, TRAPS) const { \ - void* result = new(klass_klass, size, THREAD) thisKlass(); \ - if (HAS_PENDING_EXCEPTION) return NULL; \ - klassOop new_klass = ((Klass*) result)->as_klassOop(); \ - OrderAccess::storestore(); \ - post_new_init_klass(klass_klass, new_klass, size); \ - return result; \ + void* result = new(klass_klass, size, THREAD) thisKlass(); \ + if (HAS_PENDING_EXCEPTION) return NULL; \ + klassOop new_klass = ((Klass*) result)->as_klassOop(); \ + OrderAccess::storestore(); \ + post_new_init_klass(klass_klass, new_klass, size); \ + return result; \ } bool null_vtbl() { return *(intptr_t*)this == 0; } @@ -694,6 +694,14 @@ class Klass : public Klass_vtbl { return oop_oop_iterate(obj, blk); } +#ifndef SERIALGC + // In case we don't have a specialized backward scanner use forward + // iteration. + virtual int oop_oop_iterate_backwards_v(oop obj, OopClosure* blk) { + return oop_oop_iterate_v(obj, blk); + } +#endif // !SERIALGC + // Iterates "blk" over all the oops in "obj" (of type "this") within "mr". // (I don't see why the _m should be required, but without it the Solaris // C++ gives warning messages about overridings of the "oop_oop_iterate" @@ -722,7 +730,19 @@ class Klass : public Klass_vtbl { } SPECIALIZED_OOP_OOP_ITERATE_CLOSURES_1(Klass_OOP_OOP_ITERATE_DECL) - SPECIALIZED_OOP_OOP_ITERATE_CLOSURES_3(Klass_OOP_OOP_ITERATE_DECL) + SPECIALIZED_OOP_OOP_ITERATE_CLOSURES_2(Klass_OOP_OOP_ITERATE_DECL) + +#ifndef SERIALGC +#define Klass_OOP_OOP_ITERATE_BACKWARDS_DECL(OopClosureType, nv_suffix) \ + virtual int oop_oop_iterate_backwards##nv_suffix(oop obj, \ + OopClosureType* blk) { \ + /* Default implementation reverts to general version. */ \ + return oop_oop_iterate_backwards_v(obj, blk); \ + } + + SPECIALIZED_OOP_OOP_ITERATE_CLOSURES_1(Klass_OOP_OOP_ITERATE_BACKWARDS_DECL) + SPECIALIZED_OOP_OOP_ITERATE_CLOSURES_2(Klass_OOP_OOP_ITERATE_BACKWARDS_DECL) +#endif // !SERIALGC virtual void array_klasses_do(void f(klassOop k)) {} virtual void with_array_klasses_do(void f(klassOop k)); diff --git a/hotspot/src/share/vm/oops/markOop.hpp b/hotspot/src/share/vm/oops/markOop.hpp index bc5d965c60c..8dd73688faf 100644 --- a/hotspot/src/share/vm/oops/markOop.hpp +++ b/hotspot/src/share/vm/oops/markOop.hpp @@ -222,11 +222,7 @@ class markOopDesc: public oopDesc { static markOop INFLATING() { return (markOop) 0; } // inflate-in-progress // Should this header be preserved during GC? - bool must_be_preserved(oop obj_containing_mark) const { - if (!UseBiasedLocking) - return (!is_unlocked() || !has_no_hash()); - return must_be_preserved_with_bias(obj_containing_mark); - } + inline bool must_be_preserved(oop obj_containing_mark) const; inline bool must_be_preserved_with_bias(oop obj_containing_mark) const; // Should this header (including its age bits) be preserved in the @@ -246,22 +242,14 @@ class markOopDesc: public oopDesc { // observation is that promotion failures are quite rare and // reducing the number of mark words preserved during them isn't a // high priority. - bool must_be_preserved_for_promotion_failure(oop obj_containing_mark) const { - if (!UseBiasedLocking) - return (this != prototype()); - return must_be_preserved_with_bias_for_promotion_failure(obj_containing_mark); - } + inline bool must_be_preserved_for_promotion_failure(oop obj_containing_mark) const; inline bool must_be_preserved_with_bias_for_promotion_failure(oop obj_containing_mark) const; // Should this header be preserved during a scavenge where CMS is // the old generation? // (This is basically the same body as must_be_preserved_for_promotion_failure(), // but takes the klassOop as argument instead) - bool must_be_preserved_for_cms_scavenge(klassOop klass_of_obj_containing_mark) const { - if (!UseBiasedLocking) - return (this != prototype()); - return must_be_preserved_with_bias_for_cms_scavenge(klass_of_obj_containing_mark); - } + inline bool must_be_preserved_for_cms_scavenge(klassOop klass_of_obj_containing_mark) const; inline bool must_be_preserved_with_bias_for_cms_scavenge(klassOop klass_of_obj_containing_mark) const; // WARNING: The following routines are used EXCLUSIVELY by diff --git a/hotspot/src/share/vm/oops/markOop.inline.hpp b/hotspot/src/share/vm/oops/markOop.inline.hpp index 46774b201d9..396c11e7549 100644 --- a/hotspot/src/share/vm/oops/markOop.inline.hpp +++ b/hotspot/src/share/vm/oops/markOop.inline.hpp @@ -39,6 +39,12 @@ inline bool markOopDesc::must_be_preserved_with_bias(oop obj_containing_mark) co return (!is_unlocked() || !has_no_hash()); } +inline bool markOopDesc::must_be_preserved(oop obj_containing_mark) const { + if (!UseBiasedLocking) + return (!is_unlocked() || !has_no_hash()); + return must_be_preserved_with_bias(obj_containing_mark); +} + // Should this header (including its age bits) be preserved in the // case of a promotion failure during scavenge? inline bool markOopDesc::must_be_preserved_with_bias_for_promotion_failure(oop obj_containing_mark) const { @@ -59,6 +65,13 @@ inline bool markOopDesc::must_be_preserved_with_bias_for_promotion_failure(oop o return (this != prototype()); } +inline bool markOopDesc::must_be_preserved_for_promotion_failure(oop obj_containing_mark) const { + if (!UseBiasedLocking) + return (this != prototype()); + return must_be_preserved_with_bias_for_promotion_failure(obj_containing_mark); +} + + // Should this header (including its age bits) be preserved in the // case of a scavenge in which CMS is the old generation? inline bool markOopDesc::must_be_preserved_with_bias_for_cms_scavenge(klassOop klass_of_obj_containing_mark) const { @@ -70,6 +83,11 @@ inline bool markOopDesc::must_be_preserved_with_bias_for_cms_scavenge(klassOop k } return (this != prototype()); } +inline bool markOopDesc::must_be_preserved_for_cms_scavenge(klassOop klass_of_obj_containing_mark) const { + if (!UseBiasedLocking) + return (this != prototype()); + return must_be_preserved_with_bias_for_cms_scavenge(klass_of_obj_containing_mark); +} inline markOop markOopDesc::prototype_for_object(oop obj) { #ifdef ASSERT diff --git a/hotspot/src/share/vm/oops/objArrayKlass.cpp b/hotspot/src/share/vm/oops/objArrayKlass.cpp index 8ca45f28703..1f2574b9c43 100644 --- a/hotspot/src/share/vm/oops/objArrayKlass.cpp +++ b/hotspot/src/share/vm/oops/objArrayKlass.cpp @@ -86,14 +86,18 @@ template void objArrayKlass::do_copy(arrayOop s, T* src, const size_t word_len = objArrayOopDesc::array_size(length); - // For performance reasons, we assume we are using a card marking write - // barrier. The assert will fail if this is not the case. BarrierSet* bs = Universe::heap()->barrier_set(); + // For performance reasons, we assume we are that the write barrier we + // are using has optimized modes for arrays of references. At least one + // of the asserts below will fail if this is not the case. assert(bs->has_write_ref_array_opt(), "Barrier set must have ref array opt"); + assert(bs->has_write_ref_array_pre_opt(), "For pre-barrier as well."); + MemRegion dst_mr = MemRegion((HeapWord*)dst, word_len); if (s == d) { // since source and destination are equal we do not need conversion checks. assert(length > 0, "sanity check"); + bs->write_ref_array_pre(dst_mr); Copy::conjoint_oops_atomic(src, dst, length); } else { // We have to make sure all elements conform to the destination array @@ -101,6 +105,7 @@ template void objArrayKlass::do_copy(arrayOop s, T* src, klassOop stype = objArrayKlass::cast(s->klass())->element_klass(); if (stype == bound || Klass::cast(stype)->is_subtype_of(bound)) { // elements are guaranteed to be subtypes, so no check necessary + bs->write_ref_array_pre(dst_mr); Copy::conjoint_oops_atomic(src, dst, length); } else { // slow case: need individual subtype checks @@ -110,8 +115,13 @@ template void objArrayKlass::do_copy(arrayOop s, T* src, for (T* p = dst; from < end; from++, p++) { // XXX this is going to be slow. T element = *from; - if (oopDesc::is_null(element) || - Klass::cast(oopDesc::decode_heap_oop_not_null(element)->klass())->is_subtype_of(bound)) { + // even slower now + bool element_is_null = oopDesc::is_null(element); + oop new_val = element_is_null ? oop(NULL) + : oopDesc::decode_heap_oop_not_null(element); + if (element_is_null || + Klass::cast((new_val->klass()))->is_subtype_of(bound)) { + bs->write_ref_field_pre(p, new_val); *p = *from; } else { // We must do a barrier to cover the partial copy. @@ -401,11 +411,11 @@ int objArrayKlass::oop_oop_iterate_range##nv_suffix(oop obj, } ALL_OOP_OOP_ITERATE_CLOSURES_1(ObjArrayKlass_OOP_OOP_ITERATE_DEFN) -ALL_OOP_OOP_ITERATE_CLOSURES_3(ObjArrayKlass_OOP_OOP_ITERATE_DEFN) +ALL_OOP_OOP_ITERATE_CLOSURES_2(ObjArrayKlass_OOP_OOP_ITERATE_DEFN) ALL_OOP_OOP_ITERATE_CLOSURES_1(ObjArrayKlass_OOP_OOP_ITERATE_DEFN_m) -ALL_OOP_OOP_ITERATE_CLOSURES_3(ObjArrayKlass_OOP_OOP_ITERATE_DEFN_m) +ALL_OOP_OOP_ITERATE_CLOSURES_2(ObjArrayKlass_OOP_OOP_ITERATE_DEFN_m) ALL_OOP_OOP_ITERATE_CLOSURES_1(ObjArrayKlass_OOP_OOP_ITERATE_DEFN_r) -ALL_OOP_OOP_ITERATE_CLOSURES_3(ObjArrayKlass_OOP_OOP_ITERATE_DEFN_r) +ALL_OOP_OOP_ITERATE_CLOSURES_2(ObjArrayKlass_OOP_OOP_ITERATE_DEFN_r) int objArrayKlass::oop_adjust_pointers(oop obj) { assert(obj->is_objArray(), "obj must be obj array"); @@ -465,8 +475,8 @@ jint objArrayKlass::compute_modifier_flags(TRAPS) const { assert(Universe::is_bootstrapping(), "partial objArray only at startup"); return JVM_ACC_ABSTRACT | JVM_ACC_FINAL | JVM_ACC_PUBLIC; } - // Recurse down the element list - jint element_flags = Klass::cast(element_klass())->compute_modifier_flags(CHECK_0); + // Return the flags of the bottom element type. + jint element_flags = Klass::cast(bottom_klass())->compute_modifier_flags(CHECK_0); return (element_flags & (JVM_ACC_PUBLIC | JVM_ACC_PRIVATE | JVM_ACC_PROTECTED)) | (JVM_ACC_ABSTRACT | JVM_ACC_FINAL); diff --git a/hotspot/src/share/vm/oops/objArrayKlass.hpp b/hotspot/src/share/vm/oops/objArrayKlass.hpp index 963b9ac5cf8..fcc62aad5b9 100644 --- a/hotspot/src/share/vm/oops/objArrayKlass.hpp +++ b/hotspot/src/share/vm/oops/objArrayKlass.hpp @@ -111,7 +111,7 @@ class objArrayKlass : public arrayKlass { int start, int end); ALL_OOP_OOP_ITERATE_CLOSURES_1(ObjArrayKlass_OOP_OOP_ITERATE_DECL) - ALL_OOP_OOP_ITERATE_CLOSURES_3(ObjArrayKlass_OOP_OOP_ITERATE_DECL) + ALL_OOP_OOP_ITERATE_CLOSURES_2(ObjArrayKlass_OOP_OOP_ITERATE_DECL) // JVM support jint compute_modifier_flags(TRAPS) const; diff --git a/hotspot/src/share/vm/oops/objArrayOop.cpp b/hotspot/src/share/vm/oops/objArrayOop.cpp index f05d837b431..a03464a6783 100644 --- a/hotspot/src/share/vm/oops/objArrayOop.cpp +++ b/hotspot/src/share/vm/oops/objArrayOop.cpp @@ -33,4 +33,4 @@ int objArrayOopDesc::oop_iterate_range(OopClosureType* blk, int start, int end) } ALL_OOP_OOP_ITERATE_CLOSURES_1(ObjArrayOop_OOP_ITERATE_DEFN) -ALL_OOP_OOP_ITERATE_CLOSURES_3(ObjArrayOop_OOP_ITERATE_DEFN) +ALL_OOP_OOP_ITERATE_CLOSURES_2(ObjArrayOop_OOP_ITERATE_DEFN) diff --git a/hotspot/src/share/vm/oops/objArrayOop.hpp b/hotspot/src/share/vm/oops/objArrayOop.hpp index 9e4b3674fbf..dd2c7c30aaf 100644 --- a/hotspot/src/share/vm/oops/objArrayOop.hpp +++ b/hotspot/src/share/vm/oops/objArrayOop.hpp @@ -29,6 +29,8 @@ class objArrayOopDesc : public arrayOopDesc { friend class objArrayKlass; friend class Runtime1; friend class psPromotionManager; + friend class CSMarkOopClosure; + friend class G1ParScanPartialArrayClosure; template T* obj_at_addr(int index) const { assert(is_within_bounds(index), "index out of bounds"); @@ -88,5 +90,5 @@ class objArrayOopDesc : public arrayOopDesc { int oop_iterate_range(OopClosureType* blk, int start, int end); ALL_OOP_OOP_ITERATE_CLOSURES_1(ObjArrayOop_OOP_ITERATE_DECL) - ALL_OOP_OOP_ITERATE_CLOSURES_3(ObjArrayOop_OOP_ITERATE_DECL) + ALL_OOP_OOP_ITERATE_CLOSURES_2(ObjArrayOop_OOP_ITERATE_DECL) }; diff --git a/hotspot/src/share/vm/oops/oop.hpp b/hotspot/src/share/vm/oops/oop.hpp index 4b84a802e14..07c2ba5ac52 100644 --- a/hotspot/src/share/vm/oops/oop.hpp +++ b/hotspot/src/share/vm/oops/oop.hpp @@ -363,12 +363,21 @@ class oopDesc { static void set_bs(BarrierSet* bs) { _bs = bs; } // iterators, returns size of object -#define OOP_ITERATE_DECL(OopClosureType, nv_suffix) \ +#define OOP_ITERATE_DECL(OopClosureType, nv_suffix) \ int oop_iterate(OopClosureType* blk); \ int oop_iterate(OopClosureType* blk, MemRegion mr); // Only in mr. ALL_OOP_OOP_ITERATE_CLOSURES_1(OOP_ITERATE_DECL) - ALL_OOP_OOP_ITERATE_CLOSURES_3(OOP_ITERATE_DECL) + ALL_OOP_OOP_ITERATE_CLOSURES_2(OOP_ITERATE_DECL) + +#ifndef SERIALGC + +#define OOP_ITERATE_BACKWARDS_DECL(OopClosureType, nv_suffix) \ + int oop_iterate_backwards(OopClosureType* blk); + + ALL_OOP_OOP_ITERATE_CLOSURES_1(OOP_ITERATE_BACKWARDS_DECL) + ALL_OOP_OOP_ITERATE_CLOSURES_2(OOP_ITERATE_BACKWARDS_DECL) +#endif void oop_iterate_header(OopClosure* blk); void oop_iterate_header(OopClosure* blk, MemRegion mr); diff --git a/hotspot/src/share/vm/oops/oop.inline.hpp b/hotspot/src/share/vm/oops/oop.inline.hpp index 454a4f609aa..722a0393003 100644 --- a/hotspot/src/share/vm/oops/oop.inline.hpp +++ b/hotspot/src/share/vm/oops/oop.inline.hpp @@ -395,10 +395,11 @@ inline int oopDesc::size_given_klass(Klass* klass) { s = (int)((size_t)round_to(size_in_bytes, MinObjAlignmentInBytes) / HeapWordSize); - // UseParNewGC can change the length field of an "old copy" of an object - // array in the young gen so it indicates the stealable portion of - // an already copied array. This will cause the first disjunct below - // to fail if the sizes are computed across such a concurrent change. + // UseParNewGC, UseParallelGC and UseG1GC can change the length field + // of an "old copy" of an object array in the young gen so it indicates + // the grey portion of an already copied array. This will cause the first + // disjunct below to fail if the two comparands are computed across such + // a concurrent change. // UseParNewGC also runs with promotion labs (which look like int // filler arrays) which are subject to changing their declared size // when finally retiring a PLAB; this also can cause the first disjunct @@ -408,13 +409,11 @@ inline int oopDesc::size_given_klass(Klass* klass) { // is_objArray() && is_forwarded() // covers first scenario above // || is_typeArray() // covers second scenario above // If and when UseParallelGC uses the same obj array oop stealing/chunking - // technique, or when G1 is integrated (and currently uses this array chunking - // technique) we will need to suitably modify the assertion. + // technique, we will need to suitably modify the assertion. assert((s == klass->oop_size(this)) || - (((UseParNewGC || UseParallelGC) && - Universe::heap()->is_gc_active()) && - (is_typeArray() || - (is_objArray() && is_forwarded()))), + (Universe::heap()->is_gc_active() && + ((is_typeArray() && UseParNewGC) || + (is_objArray() && is_forwarded() && (UseParNewGC || UseParallelGC || UseG1GC)))), "wrong array object size"); } else { // Must be zero, so bite the bullet and take the virtual call. @@ -441,16 +440,22 @@ inline void update_barrier_set(void* p, oop v) { oopDesc::bs()->write_ref_field(p, v); } +inline void update_barrier_set_pre(void* p, oop v) { + oopDesc::bs()->write_ref_field_pre(p, v); +} + template inline void oop_store(T* p, oop v) { if (always_do_update_barrier) { oop_store((volatile T*)p, v); } else { + update_barrier_set_pre(p, v); oopDesc::encode_store_heap_oop(p, v); update_barrier_set(p, v); } } template inline void oop_store(volatile T* p, oop v) { + update_barrier_set_pre((void*)p, v); // Used by release_obj_field_put, so use release_store_ptr. oopDesc::release_encode_store_heap_oop(p, v); update_barrier_set((void*)p, v); @@ -698,8 +703,19 @@ inline int oopDesc::oop_iterate(OopClosureType* blk, MemRegion mr) { \ } ALL_OOP_OOP_ITERATE_CLOSURES_1(OOP_ITERATE_DEFN) -ALL_OOP_OOP_ITERATE_CLOSURES_3(OOP_ITERATE_DEFN) +ALL_OOP_OOP_ITERATE_CLOSURES_2(OOP_ITERATE_DEFN) +#ifndef SERIALGC +#define OOP_ITERATE_BACKWARDS_DEFN(OopClosureType, nv_suffix) \ + \ +inline int oopDesc::oop_iterate_backwards(OopClosureType* blk) { \ + SpecializationStats::record_call(); \ + return blueprint()->oop_oop_iterate_backwards##nv_suffix(this, blk); \ +} + +ALL_OOP_OOP_ITERATE_CLOSURES_1(OOP_ITERATE_BACKWARDS_DEFN) +ALL_OOP_OOP_ITERATE_CLOSURES_2(OOP_ITERATE_BACKWARDS_DEFN) +#endif // !SERIALGC inline bool oopDesc::is_shared() const { return CompactingPermGenGen::is_shared(this); diff --git a/hotspot/src/share/vm/opto/addnode.cpp b/hotspot/src/share/vm/opto/addnode.cpp index e207e65eaab..2ff10cd083d 100644 --- a/hotspot/src/share/vm/opto/addnode.cpp +++ b/hotspot/src/share/vm/opto/addnode.cpp @@ -156,7 +156,8 @@ Node *AddNode::Ideal(PhaseGVN *phase, bool can_reshape) { if( add1_op == this_op && !con_right ) { Node *a12 = add1->in(2); const Type *t12 = phase->type( a12 ); - if( t12->singleton() && t12 != Type::TOP && (add1 != add1->in(1)) ) { + if( t12->singleton() && t12 != Type::TOP && (add1 != add1->in(1)) && + !(add1->in(1)->is_Phi() && add1->in(1)->as_Phi()->is_tripcount()) ) { assert(add1->in(1) != this, "dead loop in AddNode::Ideal"); add2 = add1->clone(); add2->set_req(2, in(2)); @@ -173,7 +174,8 @@ Node *AddNode::Ideal(PhaseGVN *phase, bool can_reshape) { if( add2_op == this_op && !con_left ) { Node *a22 = add2->in(2); const Type *t22 = phase->type( a22 ); - if( t22->singleton() && t22 != Type::TOP && (add2 != add2->in(1)) ) { + if( t22->singleton() && t22 != Type::TOP && (add2 != add2->in(1)) && + !(add2->in(1)->is_Phi() && add2->in(1)->as_Phi()->is_tripcount()) ) { assert(add2->in(1) != this, "dead loop in AddNode::Ideal"); Node *addx = add2->clone(); addx->set_req(1, in(1)); @@ -225,34 +227,63 @@ const Type *AddNode::add_of_identity( const Type *t1, const Type *t2 ) const { //============================================================================= //------------------------------Idealize--------------------------------------- Node *AddINode::Ideal(PhaseGVN *phase, bool can_reshape) { - int op1 = in(1)->Opcode(); - int op2 = in(2)->Opcode(); + Node* in1 = in(1); + Node* in2 = in(2); + int op1 = in1->Opcode(); + int op2 = in2->Opcode(); // Fold (con1-x)+con2 into (con1+con2)-x + if ( op1 == Op_AddI && op2 == Op_SubI ) { + // Swap edges to try optimizations below + in1 = in2; + in2 = in(1); + op1 = op2; + op2 = in2->Opcode(); + } if( op1 == Op_SubI ) { - const Type *t_sub1 = phase->type( in(1)->in(1) ); - const Type *t_2 = phase->type( in(2) ); + const Type *t_sub1 = phase->type( in1->in(1) ); + const Type *t_2 = phase->type( in2 ); if( t_sub1->singleton() && t_2->singleton() && t_sub1 != Type::TOP && t_2 != Type::TOP ) return new (phase->C, 3) SubINode(phase->makecon( add_ring( t_sub1, t_2 ) ), - in(1)->in(2) ); + in1->in(2) ); // Convert "(a-b)+(c-d)" into "(a+c)-(b+d)" if( op2 == Op_SubI ) { // Check for dead cycle: d = (a-b)+(c-d) - assert( in(1)->in(2) != this && in(2)->in(2) != this, + assert( in1->in(2) != this && in2->in(2) != this, "dead loop in AddINode::Ideal" ); Node *sub = new (phase->C, 3) SubINode(NULL, NULL); - sub->init_req(1, phase->transform(new (phase->C, 3) AddINode(in(1)->in(1), in(2)->in(1) ) )); - sub->init_req(2, phase->transform(new (phase->C, 3) AddINode(in(1)->in(2), in(2)->in(2) ) )); + sub->init_req(1, phase->transform(new (phase->C, 3) AddINode(in1->in(1), in2->in(1) ) )); + sub->init_req(2, phase->transform(new (phase->C, 3) AddINode(in1->in(2), in2->in(2) ) )); return sub; } + // Convert "(a-b)+(b+c)" into "(a+c)" + if( op2 == Op_AddI && in1->in(2) == in2->in(1) ) { + assert(in1->in(1) != this && in2->in(2) != this,"dead loop in AddINode::Ideal"); + return new (phase->C, 3) AddINode(in1->in(1), in2->in(2)); + } + // Convert "(a-b)+(c+b)" into "(a+c)" + if( op2 == Op_AddI && in1->in(2) == in2->in(2) ) { + assert(in1->in(1) != this && in2->in(1) != this,"dead loop in AddINode::Ideal"); + return new (phase->C, 3) AddINode(in1->in(1), in2->in(1)); + } + // Convert "(a-b)+(b-c)" into "(a-c)" + if( op2 == Op_SubI && in1->in(2) == in2->in(1) ) { + assert(in1->in(1) != this && in2->in(2) != this,"dead loop in AddINode::Ideal"); + return new (phase->C, 3) SubINode(in1->in(1), in2->in(2)); + } + // Convert "(a-b)+(c-a)" into "(c-b)" + if( op2 == Op_SubI && in1->in(1) == in2->in(2) ) { + assert(in1->in(2) != this && in2->in(1) != this,"dead loop in AddINode::Ideal"); + return new (phase->C, 3) SubINode(in2->in(1), in1->in(2)); + } } // Convert "x+(0-y)" into "(x-y)" - if( op2 == Op_SubI && phase->type(in(2)->in(1)) == TypeInt::ZERO ) - return new (phase->C, 3) SubINode(in(1), in(2)->in(2) ); + if( op2 == Op_SubI && phase->type(in2->in(1)) == TypeInt::ZERO ) + return new (phase->C, 3) SubINode(in1, in2->in(2) ); // Convert "(0-y)+x" into "(x-y)" - if( op1 == Op_SubI && phase->type(in(1)->in(1)) == TypeInt::ZERO ) - return new (phase->C, 3) SubINode( in(2), in(1)->in(2) ); + if( op1 == Op_SubI && phase->type(in1->in(1)) == TypeInt::ZERO ) + return new (phase->C, 3) SubINode( in2, in1->in(2) ); // Convert (x>>>z)+y into (x+(y<>>z for small constant z and y. // Helps with array allocation math constant folding @@ -266,15 +297,15 @@ Node *AddINode::Ideal(PhaseGVN *phase, bool can_reshape) { // Have not observed cases where type information exists to support // positive y and (x <= -(y << z)) if( op1 == Op_URShiftI && op2 == Op_ConI && - in(1)->in(2)->Opcode() == Op_ConI ) { - jint z = phase->type( in(1)->in(2) )->is_int()->get_con() & 0x1f; // only least significant 5 bits matter - jint y = phase->type( in(2) )->is_int()->get_con(); + in1->in(2)->Opcode() == Op_ConI ) { + jint z = phase->type( in1->in(2) )->is_int()->get_con() & 0x1f; // only least significant 5 bits matter + jint y = phase->type( in2 )->is_int()->get_con(); if( z < 5 && -5 < y && y < 0 ) { - const Type *t_in11 = phase->type(in(1)->in(1)); + const Type *t_in11 = phase->type(in1->in(1)); if( t_in11 != Type::TOP && (t_in11->is_int()->_lo >= -(y << z)) ) { - Node *a = phase->transform( new (phase->C, 3) AddINode( in(1)->in(1), phase->intcon(y<C, 3) URShiftINode( a, in(1)->in(2) ); + Node *a = phase->transform( new (phase->C, 3) AddINode( in1->in(1), phase->intcon(y<C, 3) URShiftINode( a, in1->in(2) ); } } } @@ -328,39 +359,73 @@ const Type *AddINode::add_ring( const Type *t0, const Type *t1 ) const { //============================================================================= //------------------------------Idealize--------------------------------------- Node *AddLNode::Ideal(PhaseGVN *phase, bool can_reshape) { - int op1 = in(1)->Opcode(); - int op2 = in(2)->Opcode(); + Node* in1 = in(1); + Node* in2 = in(2); + int op1 = in1->Opcode(); + int op2 = in2->Opcode(); + // Fold (con1-x)+con2 into (con1+con2)-x + if ( op1 == Op_AddL && op2 == Op_SubL ) { + // Swap edges to try optimizations below + in1 = in2; + in2 = in(1); + op1 = op2; + op2 = in2->Opcode(); + } // Fold (con1-x)+con2 into (con1+con2)-x if( op1 == Op_SubL ) { - const Type *t_sub1 = phase->type( in(1)->in(1) ); - const Type *t_2 = phase->type( in(2) ); + const Type *t_sub1 = phase->type( in1->in(1) ); + const Type *t_2 = phase->type( in2 ); if( t_sub1->singleton() && t_2->singleton() && t_sub1 != Type::TOP && t_2 != Type::TOP ) return new (phase->C, 3) SubLNode(phase->makecon( add_ring( t_sub1, t_2 ) ), - in(1)->in(2) ); + in1->in(2) ); // Convert "(a-b)+(c-d)" into "(a+c)-(b+d)" if( op2 == Op_SubL ) { // Check for dead cycle: d = (a-b)+(c-d) - assert( in(1)->in(2) != this && in(2)->in(2) != this, + assert( in1->in(2) != this && in2->in(2) != this, "dead loop in AddLNode::Ideal" ); Node *sub = new (phase->C, 3) SubLNode(NULL, NULL); - sub->init_req(1, phase->transform(new (phase->C, 3) AddLNode(in(1)->in(1), in(2)->in(1) ) )); - sub->init_req(2, phase->transform(new (phase->C, 3) AddLNode(in(1)->in(2), in(2)->in(2) ) )); + sub->init_req(1, phase->transform(new (phase->C, 3) AddLNode(in1->in(1), in2->in(1) ) )); + sub->init_req(2, phase->transform(new (phase->C, 3) AddLNode(in1->in(2), in2->in(2) ) )); return sub; } + // Convert "(a-b)+(b+c)" into "(a+c)" + if( op2 == Op_AddL && in1->in(2) == in2->in(1) ) { + assert(in1->in(1) != this && in2->in(2) != this,"dead loop in AddLNode::Ideal"); + return new (phase->C, 3) AddLNode(in1->in(1), in2->in(2)); + } + // Convert "(a-b)+(c+b)" into "(a+c)" + if( op2 == Op_AddL && in1->in(2) == in2->in(2) ) { + assert(in1->in(1) != this && in2->in(1) != this,"dead loop in AddLNode::Ideal"); + return new (phase->C, 3) AddLNode(in1->in(1), in2->in(1)); + } + // Convert "(a-b)+(b-c)" into "(a-c)" + if( op2 == Op_SubL && in1->in(2) == in2->in(1) ) { + assert(in1->in(1) != this && in2->in(2) != this,"dead loop in AddLNode::Ideal"); + return new (phase->C, 3) SubLNode(in1->in(1), in2->in(2)); + } + // Convert "(a-b)+(c-a)" into "(c-b)" + if( op2 == Op_SubL && in1->in(1) == in1->in(2) ) { + assert(in1->in(2) != this && in2->in(1) != this,"dead loop in AddLNode::Ideal"); + return new (phase->C, 3) SubLNode(in2->in(1), in1->in(2)); + } } // Convert "x+(0-y)" into "(x-y)" - if( op2 == Op_SubL && phase->type(in(2)->in(1)) == TypeLong::ZERO ) - return new (phase->C, 3) SubLNode(in(1), in(2)->in(2) ); + if( op2 == Op_SubL && phase->type(in2->in(1)) == TypeLong::ZERO ) + return new (phase->C, 3) SubLNode( in1, in2->in(2) ); + + // Convert "(0-y)+x" into "(x-y)" + if( op1 == Op_SubL && phase->type(in1->in(1)) == TypeInt::ZERO ) + return new (phase->C, 3) SubLNode( in2, in1->in(2) ); // Convert "X+X+X+X+X...+X+Y" into "k*X+Y" or really convert "X+(X+Y)" // into "(X<<1)+Y" and let shift-folding happen. if( op2 == Op_AddL && - in(2)->in(1) == in(1) && + in2->in(1) == in1 && op1 != Op_ConL && 0 ) { - Node *shift = phase->transform(new (phase->C, 3) LShiftLNode(in(1),phase->intcon(1))); - return new (phase->C, 3) AddLNode(shift,in(2)->in(2)); + Node *shift = phase->transform(new (phase->C, 3) LShiftLNode(in1,phase->intcon(1))); + return new (phase->C, 3) AddLNode(shift,in2->in(2)); } return AddNode::Ideal(phase, can_reshape); diff --git a/hotspot/src/share/vm/opto/bytecodeInfo.cpp b/hotspot/src/share/vm/opto/bytecodeInfo.cpp index 847988efe8d..38623fd5f76 100644 --- a/hotspot/src/share/vm/opto/bytecodeInfo.cpp +++ b/hotspot/src/share/vm/opto/bytecodeInfo.cpp @@ -25,19 +25,6 @@ #include "incls/_precompiled.incl" #include "incls/_bytecodeInfo.cpp.incl" -// These variables are declared in parse1.cpp -extern int explicit_null_checks_inserted; -extern int explicit_null_checks_elided; -extern int explicit_null_checks_inserted_old; -extern int explicit_null_checks_elided_old; -extern int nodes_created_old; -extern int nodes_created; -extern int methods_parsed_old; -extern int methods_parsed; -extern int methods_seen; -extern int methods_seen_old; - - //============================================================================= //------------------------------InlineTree------------------------------------- InlineTree::InlineTree( Compile* c, const InlineTree *caller_tree, ciMethod* callee, JVMState* caller_jvms, int caller_bci, float site_invoke_ratio ) @@ -517,27 +504,3 @@ InlineTree* InlineTree::find_subtree_from_root(InlineTree* root, JVMState* jvms, } return iltp; } - -// ---------------------------------------------------------------------------- -#ifndef PRODUCT - -static void per_method_stats() { - // Compute difference between this method's cumulative totals and old totals - int explicit_null_checks_cur = explicit_null_checks_inserted - explicit_null_checks_inserted_old; - int elided_null_checks_cur = explicit_null_checks_elided - explicit_null_checks_elided_old; - - // Print differences - if( explicit_null_checks_cur ) - tty->print_cr("XXX Explicit NULL checks inserted: %d", explicit_null_checks_cur); - if( elided_null_checks_cur ) - tty->print_cr("XXX Explicit NULL checks removed at parse time: %d", elided_null_checks_cur); - - // Store the current cumulative totals - nodes_created_old = nodes_created; - methods_parsed_old = methods_parsed; - methods_seen_old = methods_seen; - explicit_null_checks_inserted_old = explicit_null_checks_inserted; - explicit_null_checks_elided_old = explicit_null_checks_elided; -} - -#endif diff --git a/hotspot/src/share/vm/opto/callnode.cpp b/hotspot/src/share/vm/opto/callnode.cpp index 2e54ce71801..8e8fc24da20 100644 --- a/hotspot/src/share/vm/opto/callnode.cpp +++ b/hotspot/src/share/vm/opto/callnode.cpp @@ -1034,6 +1034,39 @@ AllocateNode::AllocateNode(Compile* C, const TypeFunc *atype, //============================================================================= uint AllocateArrayNode::size_of() const { return sizeof(*this); } +// Retrieve the length from the AllocateArrayNode. Narrow the type with a +// CastII, if appropriate. If we are not allowed to create new nodes, and +// a CastII is appropriate, return NULL. +Node *AllocateArrayNode::make_ideal_length(const TypeOopPtr* oop_type, PhaseTransform *phase, bool allow_new_nodes) { + Node *length = in(AllocateNode::ALength); + assert(length != NULL, "length is not null"); + + const TypeInt* length_type = phase->find_int_type(length); + const TypeAryPtr* ary_type = oop_type->isa_aryptr(); + + if (ary_type != NULL && length_type != NULL) { + const TypeInt* narrow_length_type = ary_type->narrow_size_type(length_type); + if (narrow_length_type != length_type) { + // Assert one of: + // - the narrow_length is 0 + // - the narrow_length is not wider than length + assert(narrow_length_type == TypeInt::ZERO || + (narrow_length_type->_hi <= length_type->_hi && + narrow_length_type->_lo >= length_type->_lo), + "narrow type must be narrower than length type"); + + // Return NULL if new nodes are not allowed + if (!allow_new_nodes) return NULL; + // Create a cast which is control dependent on the initialization to + // propagate the fact that the array length must be positive. + length = new (phase->C, 2) CastIINode(length, narrow_length_type); + length->set_req(0, initialization()->proj_out(0)); + } + } + + return length; +} + //============================================================================= uint LockNode::size_of() const { return sizeof(*this); } diff --git a/hotspot/src/share/vm/opto/callnode.hpp b/hotspot/src/share/vm/opto/callnode.hpp index 0f9b26e154c..20192ddf0f6 100644 --- a/hotspot/src/share/vm/opto/callnode.hpp +++ b/hotspot/src/share/vm/opto/callnode.hpp @@ -755,6 +755,15 @@ public: virtual int Opcode() const; virtual uint size_of() const; // Size is bigger + // Dig the length operand out of a array allocation site. + Node* Ideal_length() { + return in(AllocateNode::ALength); + } + + // Dig the length operand out of a array allocation site and narrow the + // type with a CastII, if necesssary + Node* make_ideal_length(const TypeOopPtr* ary_type, PhaseTransform *phase, bool can_create = true); + // Pattern-match a possible usage of AllocateArrayNode. // Return null if no allocation is recognized. static AllocateArrayNode* Ideal_array_allocation(Node* ptr, PhaseTransform* phase) { @@ -762,12 +771,6 @@ public: return (allo == NULL || !allo->is_AllocateArray()) ? NULL : allo->as_AllocateArray(); } - - // Dig the length operand out of a (possible) array allocation site. - static Node* Ideal_length(Node* ptr, PhaseTransform* phase) { - AllocateArrayNode* allo = Ideal_array_allocation(ptr, phase); - return (allo == NULL) ? NULL : allo->in(AllocateNode::ALength); - } }; //------------------------------AbstractLockNode----------------------------------- diff --git a/hotspot/src/share/vm/opto/cfgnode.cpp b/hotspot/src/share/vm/opto/cfgnode.cpp index 0e7b2845dbc..fdee151f1e9 100644 --- a/hotspot/src/share/vm/opto/cfgnode.cpp +++ b/hotspot/src/share/vm/opto/cfgnode.cpp @@ -1665,7 +1665,11 @@ Node *PhiNode::Ideal(PhaseGVN *phase, bool can_reshape) { // compress paths and change unreachable cycles to TOP // If not, we can update the input infinitely along a MergeMem cycle // Equivalent code is in MemNode::Ideal_common - Node *m = phase->transform(n); + Node *m = phase->transform(n); + if (outcnt() == 0) { // Above transform() may kill us! + progress = phase->C->top(); + break; + } // If tranformed to a MergeMem, get the desired slice // Otherwise the returned node represents memory for every slice Node *new_mem = (m->is_MergeMem()) ? @@ -1765,9 +1769,60 @@ Node *PhiNode::Ideal(PhaseGVN *phase, bool can_reshape) { } } +#ifdef _LP64 + // Push DecodeN down through phi. + // The rest of phi graph will transform by split EncodeP node though phis up. + if (UseCompressedOops && can_reshape && progress == NULL) { + bool may_push = true; + bool has_decodeN = false; + Node* in_decodeN = NULL; + for (uint i=1; iis_DecodeN() && ii->bottom_type() == bottom_type()) { + has_decodeN = true; + in_decodeN = ii->in(1); + } else if (!ii->is_Phi()) { + may_push = false; + } + } + + if (has_decodeN && may_push) { + PhaseIterGVN *igvn = phase->is_IterGVN(); + // Note: in_decodeN is used only to define the type of new phi here. + PhiNode *new_phi = PhiNode::make_blank(in(0), in_decodeN); + uint orig_cnt = req(); + for (uint i=1; iis_DecodeN()) { + assert(ii->bottom_type() == bottom_type(), "sanity"); + new_ii = ii->in(1); + } else { + assert(ii->is_Phi(), "sanity"); + if (ii->as_Phi() == this) { + new_ii = new_phi; + } else { + new_ii = new (phase->C, 2) EncodePNode(ii, in_decodeN->bottom_type()); + igvn->register_new_node_with_optimizer(new_ii); + } + } + new_phi->set_req(i, new_ii); + } + igvn->register_new_node_with_optimizer(new_phi, this); + progress = new (phase->C, 2) DecodeNNode(new_phi, bottom_type()); + } + } +#endif + return progress; // Return any progress } +//------------------------------is_tripcount----------------------------------- +bool PhiNode::is_tripcount() const { + return (in(0) != NULL && in(0)->is_CountedLoop() && + in(0)->as_CountedLoop()->phi() == this); +} + //------------------------------out_RegMask------------------------------------ const RegMask &PhiNode::in_RegMask(uint i) const { return i ? out_RegMask() : RegMask::Empty; @@ -1783,9 +1838,7 @@ const RegMask &PhiNode::out_RegMask() const { #ifndef PRODUCT void PhiNode::dump_spec(outputStream *st) const { TypeNode::dump_spec(st); - if (in(0) != NULL && - in(0)->is_CountedLoop() && - in(0)->as_CountedLoop()->phi() == this) { + if (is_tripcount()) { st->print(" #tripcount"); } } diff --git a/hotspot/src/share/vm/opto/cfgnode.hpp b/hotspot/src/share/vm/opto/cfgnode.hpp index 8fd493a308e..c902d12510a 100644 --- a/hotspot/src/share/vm/opto/cfgnode.hpp +++ b/hotspot/src/share/vm/opto/cfgnode.hpp @@ -162,6 +162,8 @@ public: return NULL; // not a copy! } + bool is_tripcount() const; + // Determine a unique non-trivial input, if any. // Ignore casts if it helps. Return NULL on failure. Node* unique_input(PhaseTransform *phase); diff --git a/hotspot/src/share/vm/opto/compile.cpp b/hotspot/src/share/vm/opto/compile.cpp index 8f833042bf2..91ae2e6b8f5 100644 --- a/hotspot/src/share/vm/opto/compile.cpp +++ b/hotspot/src/share/vm/opto/compile.cpp @@ -467,6 +467,7 @@ Compile::Compile( ciEnv* ci_env, C2Compiler* compiler, ciMethod* target, int osr } } set_print_assembly(print_opto_assembly); + set_parsed_irreducible_loop(false); #endif if (ProfileTraps) { @@ -550,6 +551,8 @@ Compile::Compile( ciEnv* ci_env, C2Compiler* compiler, ciMethod* target, int osr rethrow_exceptions(kit.transfer_exceptions_into_jvms()); } + print_method("Before RemoveUseless"); + // Remove clutter produced by parsing. if (!failing()) { ResourceMark rm; @@ -615,8 +618,6 @@ Compile::Compile( ciEnv* ci_env, C2Compiler* compiler, ciMethod* target, int osr if (failing()) return; NOT_PRODUCT( verify_graph_edges(); ) - print_method("Before Matching"); - #ifndef PRODUCT if (PrintIdeal) { ttyLocker ttyl; // keep the following output all in one block @@ -720,6 +721,7 @@ Compile::Compile( ciEnv* ci_env, TraceTime t1(NULL, &_t_totalCompilation, TimeCompiler, false); TraceTime t2(NULL, &_t_stubCompilation, TimeCompiler, false); set_print_assembly(PrintFrameConverterAssembly); + set_parsed_irreducible_loop(false); #endif CompileWrapper cw(this); Init(/*AliasLevel=*/ 0); @@ -2073,6 +2075,44 @@ static void final_graph_reshaping_impl( Node *n, Final_Reshape_Counts &fpu ) { } #ifdef _LP64 + case Op_CastPP: + if (n->in(1)->is_DecodeN() && UseImplicitNullCheckForNarrowOop) { + Compile* C = Compile::current(); + Node* in1 = n->in(1); + const Type* t = n->bottom_type(); + Node* new_in1 = in1->clone(); + new_in1->as_DecodeN()->set_type(t); + + if (!Matcher::clone_shift_expressions) { + // + // x86, ARM and friends can handle 2 adds in addressing mode + // and Matcher can fold a DecodeN node into address by using + // a narrow oop directly and do implicit NULL check in address: + // + // [R12 + narrow_oop_reg<<3 + offset] + // NullCheck narrow_oop_reg + // + // On other platforms (Sparc) we have to keep new DecodeN node and + // use it to do implicit NULL check in address: + // + // decode_not_null narrow_oop_reg, base_reg + // [base_reg + offset] + // NullCheck base_reg + // + // Pin the new DecodeN node to non-null path on these patforms (Sparc) + // to keep the information to which NULL check the new DecodeN node + // corresponds to use it as value in implicit_null_check(). + // + new_in1->set_req(0, n->in(0)); + } + + n->subsume_by(new_in1); + if (in1->outcnt() == 0) { + in1->disconnect_inputs(NULL); + } + } + break; + case Op_CmpP: // Do this transformation here to preserve CmpPNode::sub() and // other TypePtr related Ideal optimizations (for example, ptr nullness). @@ -2092,24 +2132,44 @@ static void final_graph_reshaping_impl( Node *n, Final_Reshape_Counts &fpu ) { } else if (in2->Opcode() == Op_ConP) { const Type* t = in2->bottom_type(); if (t == TypePtr::NULL_PTR && UseImplicitNullCheckForNarrowOop) { - if (Matcher::clone_shift_expressions) { - // x86, ARM and friends can handle 2 adds in addressing mode. - // Decode a narrow oop and do implicit NULL check in address - // [R12 + narrow_oop_reg<<3 + offset] - new_in2 = ConNode::make(C, TypeNarrowOop::NULL_PTR); - } else { - // Don't replace CmpP(o ,null) if 'o' is used in AddP - // to generate implicit NULL check on Sparc where - // narrow oops can't be used in address. - uint i = 0; - for (; i < in1->outcnt(); i++) { - if (in1->raw_out(i)->is_AddP()) - break; - } - if (i >= in1->outcnt()) { - new_in2 = ConNode::make(C, TypeNarrowOop::NULL_PTR); - } - } + new_in2 = ConNode::make(C, TypeNarrowOop::NULL_PTR); + // + // This transformation together with CastPP transformation above + // will generated code for implicit NULL checks for compressed oops. + // + // The original code after Optimize() + // + // LoadN memory, narrow_oop_reg + // decode narrow_oop_reg, base_reg + // CmpP base_reg, NULL + // CastPP base_reg // NotNull + // Load [base_reg + offset], val_reg + // + // after these transformations will be + // + // LoadN memory, narrow_oop_reg + // CmpN narrow_oop_reg, NULL + // decode_not_null narrow_oop_reg, base_reg + // Load [base_reg + offset], val_reg + // + // and the uncommon path (== NULL) will use narrow_oop_reg directly + // since narrow oops can be used in debug info now (see the code in + // final_graph_reshaping_walk()). + // + // At the end the code will be matched to + // on x86: + // + // Load_narrow_oop memory, narrow_oop_reg + // Load [R12 + narrow_oop_reg<<3 + offset], val_reg + // NullCheck narrow_oop_reg + // + // and on sparc: + // + // Load_narrow_oop memory, narrow_oop_reg + // decode_not_null narrow_oop_reg, base_reg + // Load [base_reg + offset], val_reg + // NullCheck base_reg + // } else if (t->isa_oopptr()) { new_in2 = ConNode::make(C, t->make_narrowoop()); } @@ -2126,6 +2186,49 @@ static void final_graph_reshaping_impl( Node *n, Final_Reshape_Counts &fpu ) { } } break; + + case Op_DecodeN: + assert(!n->in(1)->is_EncodeP(), "should be optimized out"); + break; + + case Op_EncodeP: { + Node* in1 = n->in(1); + if (in1->is_DecodeN()) { + n->subsume_by(in1->in(1)); + } else if (in1->Opcode() == Op_ConP) { + Compile* C = Compile::current(); + const Type* t = in1->bottom_type(); + if (t == TypePtr::NULL_PTR) { + n->subsume_by(ConNode::make(C, TypeNarrowOop::NULL_PTR)); + } else if (t->isa_oopptr()) { + n->subsume_by(ConNode::make(C, t->make_narrowoop())); + } + } + if (in1->outcnt() == 0) { + in1->disconnect_inputs(NULL); + } + break; + } + + case Op_Phi: + if (n->as_Phi()->bottom_type()->isa_narrowoop()) { + // The EncodeP optimization may create Phi with the same edges + // for all paths. It is not handled well by Register Allocator. + Node* unique_in = n->in(1); + assert(unique_in != NULL, ""); + uint cnt = n->req(); + for (uint i = 2; i < cnt; i++) { + Node* m = n->in(i); + assert(m != NULL, ""); + if (unique_in != m) + unique_in = NULL; + } + if (unique_in != NULL) { + n->subsume_by(unique_in); + } + } + break; + #endif case Op_ModI: diff --git a/hotspot/src/share/vm/opto/compile.hpp b/hotspot/src/share/vm/opto/compile.hpp index 455df6bda30..3310bb99003 100644 --- a/hotspot/src/share/vm/opto/compile.hpp +++ b/hotspot/src/share/vm/opto/compile.hpp @@ -160,6 +160,7 @@ class Compile : public Phase { bool _print_assembly; // True if we should dump assembly code for this compilation #ifndef PRODUCT bool _trace_opto_output; + bool _parsed_irreducible_loop; // True if ciTypeFlow detected irreducible loops during parsing #endif // Compilation environment. @@ -319,6 +320,8 @@ class Compile : public Phase { } #ifndef PRODUCT bool trace_opto_output() const { return _trace_opto_output; } + bool parsed_irreducible_loop() const { return _parsed_irreducible_loop; } + void set_parsed_irreducible_loop(bool z) { _parsed_irreducible_loop = z; } #endif void begin_method() { diff --git a/hotspot/src/share/vm/opto/connode.cpp b/hotspot/src/share/vm/opto/connode.cpp index ceebd76a6bb..7e1cafefa57 100644 --- a/hotspot/src/share/vm/opto/connode.cpp +++ b/hotspot/src/share/vm/opto/connode.cpp @@ -433,8 +433,8 @@ Node *ConstraintCastNode::Ideal_DU_postCCP( PhaseCCP *ccp ) { // If not converting int->oop, throw away cast after constant propagation Node *CastPPNode::Ideal_DU_postCCP( PhaseCCP *ccp ) { const Type *t = ccp->type(in(1)); - if (!t->isa_oop_ptr()) { - return NULL; // do not transform raw pointers + if (!t->isa_oop_ptr() || in(1)->is_DecodeN()) { + return NULL; // do not transform raw pointers or narrow oops } return ConstraintCastNode::Ideal_DU_postCCP(ccp); } diff --git a/hotspot/src/share/vm/opto/divnode.cpp b/hotspot/src/share/vm/opto/divnode.cpp index c1c2b5df442..94240873ef5 100644 --- a/hotspot/src/share/vm/opto/divnode.cpp +++ b/hotspot/src/share/vm/opto/divnode.cpp @@ -110,10 +110,13 @@ static Node *transform_int_divide( PhaseGVN *phase, Node *dividend, jint divisor } else if( dividend->Opcode() == Op_AndI ) { // An AND mask of sufficient size clears the low bits and // I can avoid rounding. - const TypeInt *andconi = phase->type( dividend->in(2) )->isa_int(); - if( andconi && andconi->is_con(-d) ) { - dividend = dividend->in(1); - needs_rounding = false; + const TypeInt *andconi_t = phase->type( dividend->in(2) )->isa_int(); + if( andconi_t && andconi_t->is_con() ) { + jint andconi = andconi_t->get_con(); + if( andconi < 0 && is_power_of_2(-andconi) && (-andconi) >= d ) { + dividend = dividend->in(1); + needs_rounding = false; + } } } @@ -316,10 +319,13 @@ static Node *transform_long_divide( PhaseGVN *phase, Node *dividend, jlong divis } else if( dividend->Opcode() == Op_AndL ) { // An AND mask of sufficient size clears the low bits and // I can avoid rounding. - const TypeLong *andconl = phase->type( dividend->in(2) )->isa_long(); - if( andconl && andconl->is_con(-d)) { - dividend = dividend->in(1); - needs_rounding = false; + const TypeLong *andconl_t = phase->type( dividend->in(2) )->isa_long(); + if( andconl_t && andconl_t->is_con() ) { + jlong andconl = andconl_t->get_con(); + if( andconl < 0 && is_power_of_2_long(-andconl) && (-andconl) >= d ) { + dividend = dividend->in(1); + needs_rounding = false; + } } } @@ -704,11 +710,18 @@ const Type *DivDNode::Value( PhaseTransform *phase ) const { if( t2 == TypeD::ONE ) return t1; - // If divisor is a constant and not zero, divide them numbers - if( t1->base() == Type::DoubleCon && - t2->base() == Type::DoubleCon && - t2->getd() != 0.0 ) // could be negative zero - return TypeD::make( t1->getd()/t2->getd() ); +#if defined(IA32) + if (!phase->C->method()->is_strict()) + // Can't trust native compilers to properly fold strict double + // division with round-to-zero on this platform. +#endif + { + // If divisor is a constant and not zero, divide them numbers + if( t1->base() == Type::DoubleCon && + t2->base() == Type::DoubleCon && + t2->getd() != 0.0 ) // could be negative zero + return TypeD::make( t1->getd()/t2->getd() ); + } // If the dividend is a constant zero // Note: if t1 and t2 are zero then result is NaN (JVMS page 213) diff --git a/hotspot/src/share/vm/opto/doCall.cpp b/hotspot/src/share/vm/opto/doCall.cpp index 99772d4450c..66cb13b9f28 100644 --- a/hotspot/src/share/vm/opto/doCall.cpp +++ b/hotspot/src/share/vm/opto/doCall.cpp @@ -795,7 +795,7 @@ ciMethod* Parse::optimize_inlining(ciMethod* caller, int bci, ciInstanceKlass* k ciInstanceKlass *ikl = receiver_type->klass()->as_instance_klass(); if (ikl->is_loaded() && ikl->is_initialized() && !ikl->is_interface() && - (ikl == actual_receiver || ikl->is_subclass_of(actual_receiver))) { + (ikl == actual_receiver || ikl->is_subtype_of(actual_receiver))) { // ikl is a same or better type than the original actual_receiver, // e.g. static receiver from bytecodes. actual_receiver = ikl; diff --git a/hotspot/src/share/vm/opto/graphKit.cpp b/hotspot/src/share/vm/opto/graphKit.cpp index 2fcb90f4e45..36a6eafde59 100644 --- a/hotspot/src/share/vm/opto/graphKit.cpp +++ b/hotspot/src/share/vm/opto/graphKit.cpp @@ -587,7 +587,7 @@ PreserveJVMState::PreserveJVMState(GraphKit* kit, bool clone_map) { #ifdef ASSERT _bci = kit->bci(); Parse* parser = kit->is_Parse(); - int block = (parser == NULL || parser->block() == NULL) ? -1 : parser->block()->pre_order(); + int block = (parser == NULL || parser->block() == NULL) ? -1 : parser->block()->rpo(); _block = block; #endif } @@ -596,7 +596,7 @@ PreserveJVMState::~PreserveJVMState() { #ifdef ASSERT assert(kit->bci() == _bci, "bci must not shift"); Parse* parser = kit->is_Parse(); - int block = (parser == NULL || parser->block() == NULL) ? -1 : parser->block()->pre_order(); + int block = (parser == NULL || parser->block() == NULL) ? -1 : parser->block()->rpo(); assert(block == _block, "block must not shift"); #endif kit->set_map(_map); @@ -1049,10 +1049,19 @@ Node* GraphKit::load_object_klass(Node* obj) { //-------------------------load_array_length----------------------------------- Node* GraphKit::load_array_length(Node* array) { // Special-case a fresh allocation to avoid building nodes: - Node* alen = AllocateArrayNode::Ideal_length(array, &_gvn); - if (alen != NULL) return alen; - Node *r_adr = basic_plus_adr(array, arrayOopDesc::length_offset_in_bytes()); - return _gvn.transform( new (C, 3) LoadRangeNode(0, immutable_memory(), r_adr, TypeInt::POS)); + AllocateArrayNode* alloc = AllocateArrayNode::Ideal_array_allocation(array, &_gvn); + Node *alen; + if (alloc == NULL) { + Node *r_adr = basic_plus_adr(array, arrayOopDesc::length_offset_in_bytes()); + alen = _gvn.transform( new (C, 3) LoadRangeNode(0, immutable_memory(), r_adr, TypeInt::POS)); + } else { + alen = alloc->Ideal_length(); + Node* ccast = alloc->make_ideal_length(_gvn.type(array)->is_aryptr(), &_gvn); + if (ccast != alen) { + alen = _gvn.transform(ccast); + } + } + return alen; } //------------------------------do_null_check---------------------------------- @@ -1180,6 +1189,12 @@ Node* GraphKit::null_check_common(Node* value, BasicType type, else reason = Deoptimization::Reason_div0_check; + // %%% Since Reason_unhandled is not recorded on a per-bytecode basis, + // ciMethodData::has_trap_at will return a conservative -1 if any + // must-be-null assertion has failed. This could cause performance + // problems for a method after its first do_null_assert failure. + // Consider using 'Reason_class_check' instead? + // To cause an implicit null check, we set the not-null probability // to the maximum (PROB_MAX). For an explicit check the probablity // is set to a smaller value. @@ -1367,6 +1382,10 @@ void GraphKit::pre_barrier(Node* ctl, BarrierSet* bs = Universe::heap()->barrier_set(); set_control(ctl); switch (bs->kind()) { + case BarrierSet::G1SATBCT: + case BarrierSet::G1SATBCTLogging: + g1_write_barrier_pre(obj, adr, adr_idx, val, val_type, bt); + break; case BarrierSet::CardTableModRef: case BarrierSet::CardTableExtension: @@ -1391,6 +1410,10 @@ void GraphKit::post_barrier(Node* ctl, BarrierSet* bs = Universe::heap()->barrier_set(); set_control(ctl); switch (bs->kind()) { + case BarrierSet::G1SATBCT: + case BarrierSet::G1SATBCTLogging: + g1_write_barrier_post(store, obj, adr, adr_idx, val, bt, use_precise); + break; case BarrierSet::CardTableModRef: case BarrierSet::CardTableExtension: @@ -2833,20 +2856,18 @@ Node* GraphKit::set_output_for_allocation(AllocateNode* alloc, assert(just_allocated_object(control()) == javaoop, "just allocated"); #ifdef ASSERT - { // Verify that the AllocateNode::Ideal_foo recognizers work: - Node* kn = alloc->in(AllocateNode::KlassNode); - Node* ln = alloc->in(AllocateNode::ALength); - assert(AllocateNode::Ideal_klass(rawoop, &_gvn) == kn, - "Ideal_klass works"); - assert(AllocateNode::Ideal_klass(javaoop, &_gvn) == kn, - "Ideal_klass works"); + { // Verify that the AllocateNode::Ideal_allocation recognizers work: + assert(AllocateNode::Ideal_allocation(rawoop, &_gvn) == alloc, + "Ideal_allocation works"); + assert(AllocateNode::Ideal_allocation(javaoop, &_gvn) == alloc, + "Ideal_allocation works"); if (alloc->is_AllocateArray()) { - assert(AllocateArrayNode::Ideal_length(rawoop, &_gvn) == ln, - "Ideal_length works"); - assert(AllocateArrayNode::Ideal_length(javaoop, &_gvn) == ln, - "Ideal_length works"); + assert(AllocateArrayNode::Ideal_array_allocation(rawoop, &_gvn) == alloc->as_AllocateArray(), + "Ideal_allocation works"); + assert(AllocateArrayNode::Ideal_array_allocation(javaoop, &_gvn) == alloc->as_AllocateArray(), + "Ideal_allocation works"); } else { - assert(ln->is_top(), "no length, please"); + assert(alloc->in(AllocateNode::ALength)->is_top(), "no length, please"); } } #endif //ASSERT @@ -3095,25 +3116,20 @@ Node* GraphKit::new_array(Node* klass_node, // array klass (maybe variable) // (This happens via a non-constant argument to inline_native_newArray.) // In any case, the value of klass_node provides the desired array type. const TypeInt* length_type = _gvn.find_int_type(length); - const TypeInt* narrow_length_type = NULL; const TypeOopPtr* ary_type = _gvn.type(klass_node)->is_klassptr()->as_instance_type(); if (ary_type->isa_aryptr() && length_type != NULL) { // Try to get a better type than POS for the size ary_type = ary_type->is_aryptr()->cast_to_size(length_type); - narrow_length_type = ary_type->is_aryptr()->size(); - if (narrow_length_type == length_type) - narrow_length_type = NULL; } Node* javaoop = set_output_for_allocation(alloc, ary_type, raw_mem_only); - // Cast length on remaining path to be positive: - if (narrow_length_type != NULL) { - Node* ccast = new (C, 2) CastIINode(length, narrow_length_type); - ccast->set_req(0, control()); - _gvn.set_type_bottom(ccast); - record_for_igvn(ccast); - if (map()->find_edge(length) >= 0) { + // Cast length on remaining path to be as narrow as possible + if (map()->find_edge(length) >= 0) { + Node* ccast = alloc->make_ideal_length(ary_type, &_gvn); + if (ccast != length) { + _gvn.set_type_bottom(ccast); + record_for_igvn(ccast); replace_in_map(length, ccast); } } @@ -3177,3 +3193,251 @@ InitializeNode* AllocateNode::initialization() { } return NULL; } + +void GraphKit::g1_write_barrier_pre(Node* obj, + Node* adr, + uint alias_idx, + Node* val, + const Type* val_type, + BasicType bt) { + IdealKit ideal(gvn(), control(), merged_memory(), true); +#define __ ideal. + __ declares_done(); + + Node* thread = __ thread(); + + Node* no_ctrl = NULL; + Node* no_base = __ top(); + Node* zero = __ ConI(0); + + float likely = PROB_LIKELY(0.999); + float unlikely = PROB_UNLIKELY(0.999); + + BasicType active_type = in_bytes(PtrQueue::byte_width_of_active()) == 4 ? T_INT : T_BYTE; + assert(in_bytes(PtrQueue::byte_width_of_active()) == 4 || in_bytes(PtrQueue::byte_width_of_active()) == 1, "flag width"); + + // Offsets into the thread + const int marking_offset = in_bytes(JavaThread::satb_mark_queue_offset() + // 648 + PtrQueue::byte_offset_of_active()); + const int index_offset = in_bytes(JavaThread::satb_mark_queue_offset() + // 656 + PtrQueue::byte_offset_of_index()); + const int buffer_offset = in_bytes(JavaThread::satb_mark_queue_offset() + // 652 + PtrQueue::byte_offset_of_buf()); + // Now the actual pointers into the thread + + // set_control( ctl); + + Node* marking_adr = __ AddP(no_base, thread, __ ConX(marking_offset)); + Node* buffer_adr = __ AddP(no_base, thread, __ ConX(buffer_offset)); + Node* index_adr = __ AddP(no_base, thread, __ ConX(index_offset)); + + // Now some of the values + + Node* marking = __ load(no_ctrl, marking_adr, TypeInt::INT, active_type, Compile::AliasIdxRaw); + Node* index = __ load(no_ctrl, index_adr, TypeInt::INT, T_INT, Compile::AliasIdxRaw); + Node* buffer = __ load(no_ctrl, buffer_adr, TypeRawPtr::NOTNULL, T_ADDRESS, Compile::AliasIdxRaw); + + // if (!marking) + __ if_then(marking, BoolTest::ne, zero); { + + const Type* t1 = adr->bottom_type(); + const Type* t2 = val->bottom_type(); + + Node* orig = __ load(no_ctrl, adr, val_type, bt, alias_idx); + // if (orig != NULL) + __ if_then(orig, BoolTest::ne, null()); { + + // load original value + // alias_idx correct?? + + // is the queue for this thread full? + __ if_then(index, BoolTest::ne, zero, likely); { + + // decrement the index + Node* next_index = __ SubI(index, __ ConI(sizeof(intptr_t))); + Node* next_indexX = next_index; +#ifdef _LP64 + // We could refine the type for what it's worth + // const TypeLong* lidxtype = TypeLong::make(CONST64(0), get_size_from_queue); + next_indexX = _gvn.transform( new (C, 2) ConvI2LNode(next_index, TypeLong::make(0, max_jlong, Type::WidenMax)) ); +#endif // _LP64 + + // Now get the buffer location we will log the original value into and store it + + Node *log_addr = __ AddP(no_base, buffer, next_indexX); + // __ store(__ ctrl(), log_addr, orig, T_OBJECT, C->get_alias_index(TypeOopPtr::BOTTOM)); + __ store(__ ctrl(), log_addr, orig, T_OBJECT, Compile::AliasIdxRaw); + + + // update the index + // __ store(__ ctrl(), index_adr, next_index, T_INT, Compile::AliasIdxRaw); + // This is a hack to force this store to occur before the oop store that is coming up + __ store(__ ctrl(), index_adr, next_index, T_INT, C->get_alias_index(TypeOopPtr::BOTTOM)); + + } __ else_(); { + + // logging buffer is full, call the runtime + const TypeFunc *tf = OptoRuntime::g1_wb_pre_Type(); + // __ make_leaf_call(tf, OptoRuntime::g1_wb_pre_Java(), "g1_wb_pre", orig, thread); + __ make_leaf_call(tf, CAST_FROM_FN_PTR(address, SharedRuntime::g1_wb_pre), "g1_wb_pre", orig, thread); + } __ end_if(); + } __ end_if(); + } __ end_if(); + + __ drain_delay_transform(); + set_control( __ ctrl()); + set_all_memory( __ merged_memory()); + +#undef __ +} + +// +// Update the card table and add card address to the queue +// +void GraphKit::g1_mark_card(IdealKit* ideal, Node* card_adr, Node* store, Node* index, Node* index_adr, Node* buffer, const TypeFunc* tf) { +#define __ ideal-> + Node* zero = __ ConI(0); + Node* no_base = __ top(); + BasicType card_bt = T_BYTE; + // Smash zero into card. MUST BE ORDERED WRT TO STORE + __ storeCM(__ ctrl(), card_adr, zero, store, card_bt, Compile::AliasIdxRaw); + + // Now do the queue work + __ if_then(index, BoolTest::ne, zero); { + + Node* next_index = __ SubI(index, __ ConI(sizeof(intptr_t))); + Node* next_indexX = next_index; +#ifdef _LP64 + // We could refine the type for what it's worth + // const TypeLong* lidxtype = TypeLong::make(CONST64(0), get_size_from_queue); + next_indexX = _gvn.transform( new (C, 2) ConvI2LNode(next_index, TypeLong::make(0, max_jlong, Type::WidenMax)) ); +#endif // _LP64 + Node* log_addr = __ AddP(no_base, buffer, next_indexX); + + __ store(__ ctrl(), log_addr, card_adr, T_ADDRESS, Compile::AliasIdxRaw); + __ store(__ ctrl(), index_adr, next_index, T_INT, Compile::AliasIdxRaw); + + } __ else_(); { + __ make_leaf_call(tf, CAST_FROM_FN_PTR(address, SharedRuntime::g1_wb_post), "g1_wb_post", card_adr, __ thread()); + } __ end_if(); +#undef __ +} + +void GraphKit::g1_write_barrier_post(Node* store, + Node* obj, + Node* adr, + uint alias_idx, + Node* val, + BasicType bt, + bool use_precise) { + // If we are writing a NULL then we need no post barrier + + if (val != NULL && val->is_Con() && val->bottom_type() == TypePtr::NULL_PTR) { + // Must be NULL + const Type* t = val->bottom_type(); + assert(t == Type::TOP || t == TypePtr::NULL_PTR, "must be NULL"); + // No post barrier if writing NULLx + return; + } + + if (!use_precise) { + // All card marks for a (non-array) instance are in one place: + adr = obj; + } + // (Else it's an array (or unknown), and we want more precise card marks.) + assert(adr != NULL, ""); + + IdealKit ideal(gvn(), control(), merged_memory(), true); +#define __ ideal. + __ declares_done(); + + Node* thread = __ thread(); + + Node* no_ctrl = NULL; + Node* no_base = __ top(); + float likely = PROB_LIKELY(0.999); + float unlikely = PROB_UNLIKELY(0.999); + Node* zero = __ ConI(0); + Node* zeroX = __ ConX(0); + + // Get the alias_index for raw card-mark memory + const TypePtr* card_type = TypeRawPtr::BOTTOM; + + const TypeFunc *tf = OptoRuntime::g1_wb_post_Type(); + + // Get the address of the card table + CardTableModRefBS* ct = + (CardTableModRefBS*)(Universe::heap()->barrier_set()); + Node *card_table = __ makecon(TypeRawPtr::make((address)ct->byte_map_base)); + // Get base of card map + assert(sizeof(*ct->byte_map_base) == sizeof(jbyte), "adjust this code"); + + + // Offsets into the thread + const int index_offset = in_bytes(JavaThread::dirty_card_queue_offset() + + PtrQueue::byte_offset_of_index()); + const int buffer_offset = in_bytes(JavaThread::dirty_card_queue_offset() + + PtrQueue::byte_offset_of_buf()); + + // Pointers into the thread + + Node* buffer_adr = __ AddP(no_base, thread, __ ConX(buffer_offset)); + Node* index_adr = __ AddP(no_base, thread, __ ConX(index_offset)); + + // Now some values + + Node* index = __ load(no_ctrl, index_adr, TypeInt::INT, T_INT, Compile::AliasIdxRaw); + Node* buffer = __ load(no_ctrl, buffer_adr, TypeRawPtr::NOTNULL, T_ADDRESS, Compile::AliasIdxRaw); + + + // Convert the store obj pointer to an int prior to doing math on it + // Use addr not obj gets accurate card marks + + // Node* cast = __ CastPX(no_ctrl, adr /* obj */); + + // Must use ctrl to prevent "integerized oop" existing across safepoint + Node* cast = __ CastPX(__ ctrl(), ( use_precise ? adr : obj )); + + // Divide pointer by card size + Node* card_offset = __ URShiftX( cast, __ ConI(CardTableModRefBS::card_shift) ); + + // Combine card table base and card offset + Node *card_adr = __ AddP(no_base, card_table, card_offset ); + + // If we know the value being stored does it cross regions? + + if (val != NULL) { + // Does the store cause us to cross regions? + + // Should be able to do an unsigned compare of region_size instead of + // and extra shift. Do we have an unsigned compare?? + // Node* region_size = __ ConI(1 << HeapRegion::LogOfHRGrainBytes); + Node* xor_res = __ URShiftX ( __ XorX( cast, __ CastPX(__ ctrl(), val)), __ ConI(HeapRegion::LogOfHRGrainBytes)); + + // if (xor_res == 0) same region so skip + __ if_then(xor_res, BoolTest::ne, zeroX); { + + // No barrier if we are storing a NULL + __ if_then(val, BoolTest::ne, null(), unlikely); { + + // Ok must mark the card if not already dirty + + // load the original value of the card + Node* card_val = __ load(__ ctrl(), card_adr, TypeInt::INT, T_BYTE, Compile::AliasIdxRaw); + + __ if_then(card_val, BoolTest::ne, zero); { + g1_mark_card(&ideal, card_adr, store, index, index_adr, buffer, tf); + } __ end_if(); + } __ end_if(); + } __ end_if(); + } else { + g1_mark_card(&ideal, card_adr, store, index, index_adr, buffer, tf); + } + + + __ drain_delay_transform(); + set_control( __ ctrl()); + set_all_memory( __ merged_memory()); +#undef __ + +} diff --git a/hotspot/src/share/vm/opto/graphKit.hpp b/hotspot/src/share/vm/opto/graphKit.hpp index c9ea0262561..0817ed3bfcb 100644 --- a/hotspot/src/share/vm/opto/graphKit.hpp +++ b/hotspot/src/share/vm/opto/graphKit.hpp @@ -24,6 +24,7 @@ class FastLockNode; class FastUnlockNode; +class IdealKit; class Parse; class RootNode; @@ -581,6 +582,27 @@ class GraphKit : public Phase { && Universe::heap()->can_elide_tlab_store_barriers()); } + // G1 pre/post barriers + void g1_write_barrier_pre(Node* obj, + Node* adr, + uint alias_idx, + Node* val, + const Type* val_type, + BasicType bt); + + void g1_write_barrier_post(Node* store, + Node* obj, + Node* adr, + uint alias_idx, + Node* val, + BasicType bt, + bool use_precise); + // Helper function for g1 + private: + void g1_mark_card(IdealKit* ideal, Node* card_adr, Node* store, Node* index, Node* index_adr, + Node* buffer, const TypeFunc* tf); + + public: // Helper function to round double arguments before a call void round_double_arguments(ciMethod* dest_method); void round_double_result(ciMethod* dest_method); diff --git a/hotspot/src/share/vm/opto/ifg.cpp b/hotspot/src/share/vm/opto/ifg.cpp index 4b65eabcb8e..1a352de722b 100644 --- a/hotspot/src/share/vm/opto/ifg.cpp +++ b/hotspot/src/share/vm/opto/ifg.cpp @@ -485,8 +485,9 @@ uint PhaseChaitin::build_ifg_physical( ResourceArea *a ) { // Liveout things are presumed live for the whole block. We accumulate // 'area' accordingly. If they get killed in the block, we'll subtract // the unused part of the block from the area. - double cost = b->_freq * double(last_inst-last_phi); - assert( cost >= 0, "negative spill cost" ); + int inst_count = last_inst - last_phi; + double cost = (inst_count <= 0) ? 0.0 : b->_freq * double(inst_count); + assert(!(cost < 0.0), "negative spill cost" ); IndexSetIterator elements(&liveout); uint lidx; while ((lidx = elements.next()) != 0) { @@ -590,7 +591,7 @@ uint PhaseChaitin::build_ifg_physical( ResourceArea *a ) { } else { // Else it is live // A DEF also ends 'area' partway through the block. lrgs(r)._area -= cost; - assert( lrgs(r)._area >= 0, "negative spill area" ); + assert(!(lrgs(r)._area < 0.0), "negative spill area" ); // Insure high score for immediate-use spill copies so they get a color if( n->is_SpillCopy() @@ -703,8 +704,9 @@ uint PhaseChaitin::build_ifg_physical( ResourceArea *a ) { } // End of if normal register-allocated value - cost -= b->_freq; // Area remaining in the block - if( cost < 0.0 ) cost = 0.0; // Cost goes negative in the Phi area + // Area remaining in the block + inst_count--; + cost = (inst_count <= 0) ? 0.0 : b->_freq * double(inst_count); // Make all inputs live if( !n->is_Phi() ) { // Phi function uses come from prior block @@ -751,7 +753,7 @@ uint PhaseChaitin::build_ifg_physical( ResourceArea *a ) { assert( pressure[0] == count_int_pressure (&liveout), "" ); assert( pressure[1] == count_float_pressure(&liveout), "" ); } - assert( lrg._area >= 0, "negative spill area" ); + assert(!(lrg._area < 0.0), "negative spill area" ); } } } // End of reverse pass over all instructions in block diff --git a/hotspot/src/share/vm/opto/lcm.cpp b/hotspot/src/share/vm/opto/lcm.cpp index d80e8f0b055..341ddb08c14 100644 --- a/hotspot/src/share/vm/opto/lcm.cpp +++ b/hotspot/src/share/vm/opto/lcm.cpp @@ -595,7 +595,7 @@ bool Block::schedule_local(PhaseCFG *cfg, Matcher &matcher, int *ready_cnt, Vect // A few node types require changing a required edge to a precedence edge // before allocation. - if( UseConcMarkSweepGC ) { + if( UseConcMarkSweepGC || UseG1GC ) { if( n->is_Mach() && n->as_Mach()->ideal_Opcode() == Op_StoreCM ) { // Note: Required edges with an index greater than oper_input_base // are not supported by the allocator. diff --git a/hotspot/src/share/vm/opto/loopTransform.cpp b/hotspot/src/share/vm/opto/loopTransform.cpp index dbce2580158..f1c15b08344 100644 --- a/hotspot/src/share/vm/opto/loopTransform.cpp +++ b/hotspot/src/share/vm/opto/loopTransform.cpp @@ -679,6 +679,10 @@ void PhaseIdealLoop::insert_pre_post_loops( IdealLoopTree *loop, Node_List &old_ CountedLoopNode *post_head = old_new[main_head->_idx]->as_CountedLoop(); post_head->set_post_loop(main_head); + // Reduce the post-loop trip count. + CountedLoopEndNode* post_end = old_new[main_end ->_idx]->as_CountedLoopEnd(); + post_end->_prob = PROB_FAIR; + // Build the main-loop normal exit. IfFalseNode *new_main_exit = new (C, 1) IfFalseNode(main_end); _igvn.register_new_node_with_optimizer( new_main_exit ); @@ -748,6 +752,9 @@ void PhaseIdealLoop::insert_pre_post_loops( IdealLoopTree *loop, Node_List &old_ pre_head->set_pre_loop(main_head); Node *pre_incr = old_new[incr->_idx]; + // Reduce the pre-loop trip count. + pre_end->_prob = PROB_FAIR; + // Find the pre-loop normal exit. Node* pre_exit = pre_end->proj_out(false); assert( pre_exit->Opcode() == Op_IfFalse, "" ); @@ -767,8 +774,8 @@ void PhaseIdealLoop::insert_pre_post_loops( IdealLoopTree *loop, Node_List &old_ register_new_node( min_cmp , new_pre_exit ); register_new_node( min_bol , new_pre_exit ); - // Build the IfNode - IfNode *min_iff = new (C, 2) IfNode( new_pre_exit, min_bol, PROB_FAIR, COUNT_UNKNOWN ); + // Build the IfNode (assume the main-loop is executed always). + IfNode *min_iff = new (C, 2) IfNode( new_pre_exit, min_bol, PROB_ALWAYS, COUNT_UNKNOWN ); _igvn.register_new_node_with_optimizer( min_iff ); set_idom(min_iff, new_pre_exit, dd_main_head); set_loop(min_iff, loop->_parent); @@ -1012,6 +1019,8 @@ void PhaseIdealLoop::do_unroll( IdealLoopTree *loop, Node_List &old_new, bool ad if (!has_ctrl(old)) set_loop(nnn, loop); } + + loop->record_for_igvn(); } //------------------------------do_maximally_unroll---------------------------- @@ -1581,10 +1590,10 @@ bool IdealLoopTree::policy_do_remove_empty_loop( PhaseIdealLoop *phase ) { //============================================================================= //------------------------------iteration_split_impl--------------------------- -void IdealLoopTree::iteration_split_impl( PhaseIdealLoop *phase, Node_List &old_new ) { +bool IdealLoopTree::iteration_split_impl( PhaseIdealLoop *phase, Node_List &old_new ) { // Check and remove empty loops (spam micro-benchmarks) if( policy_do_remove_empty_loop(phase) ) - return; // Here we removed an empty loop + return true; // Here we removed an empty loop bool should_peel = policy_peeling(phase); // Should we peel? @@ -1594,7 +1603,8 @@ void IdealLoopTree::iteration_split_impl( PhaseIdealLoop *phase, Node_List &old_ // This removes loop-invariant tests (usually null checks). if( !_head->is_CountedLoop() ) { // Non-counted loop if (PartialPeelLoop && phase->partial_peel(this, old_new)) { - return; + // Partial peel succeeded so terminate this round of loop opts + return false; } if( should_peel ) { // Should we peel? #ifndef PRODUCT @@ -1604,14 +1614,14 @@ void IdealLoopTree::iteration_split_impl( PhaseIdealLoop *phase, Node_List &old_ } else if( should_unswitch ) { phase->do_unswitching(this, old_new); } - return; + return true; } CountedLoopNode *cl = _head->as_CountedLoop(); - if( !cl->loopexit() ) return; // Ignore various kinds of broken loops + if( !cl->loopexit() ) return true; // Ignore various kinds of broken loops // Do nothing special to pre- and post- loops - if( cl->is_pre_loop() || cl->is_post_loop() ) return; + if( cl->is_pre_loop() || cl->is_post_loop() ) return true; // Compute loop trip count from profile data compute_profile_trip_cnt(phase); @@ -1624,11 +1634,11 @@ void IdealLoopTree::iteration_split_impl( PhaseIdealLoop *phase, Node_List &old_ // Here we did some unrolling and peeling. Eventually we will // completely unroll this loop and it will no longer be a loop. phase->do_maximally_unroll(this,old_new); - return; + return true; } if (should_unswitch) { phase->do_unswitching(this, old_new); - return; + return true; } } @@ -1689,14 +1699,16 @@ void IdealLoopTree::iteration_split_impl( PhaseIdealLoop *phase, Node_List &old_ if( should_peel ) // Might want to peel but do nothing else phase->do_peeling(this,old_new); } + return true; } //============================================================================= //------------------------------iteration_split-------------------------------- -void IdealLoopTree::iteration_split( PhaseIdealLoop *phase, Node_List &old_new ) { +bool IdealLoopTree::iteration_split( PhaseIdealLoop *phase, Node_List &old_new ) { // Recursively iteration split nested loops - if( _child ) _child->iteration_split( phase, old_new ); + if( _child && !_child->iteration_split( phase, old_new )) + return false; // Clean out prior deadwood DCE_loop_body(); @@ -1718,7 +1730,9 @@ void IdealLoopTree::iteration_split( PhaseIdealLoop *phase, Node_List &old_new ) _allow_optimizations && !tail()->is_top() ) { // Also ignore the occasional dead backedge if (!_has_call) { - iteration_split_impl( phase, old_new ); + if (!iteration_split_impl( phase, old_new )) { + return false; + } } else if (policy_unswitching(phase)) { phase->do_unswitching(this, old_new); } @@ -1727,5 +1741,7 @@ void IdealLoopTree::iteration_split( PhaseIdealLoop *phase, Node_List &old_new ) // Minor offset re-organization to remove loop-fallout uses of // trip counter. if( _head->is_CountedLoop() ) phase->reorg_offsets( this ); - if( _next ) _next->iteration_split( phase, old_new ); + if( _next && !_next->iteration_split( phase, old_new )) + return false; + return true; } diff --git a/hotspot/src/share/vm/opto/loopnode.cpp b/hotspot/src/share/vm/opto/loopnode.cpp index 7e853e6a1f8..7e375992f1d 100644 --- a/hotspot/src/share/vm/opto/loopnode.cpp +++ b/hotspot/src/share/vm/opto/loopnode.cpp @@ -1279,7 +1279,7 @@ void IdealLoopTree::counted_loop( PhaseIdealLoop *phase ) { // Visit all children, looking for Phis for (DUIterator i = cl->outs(); cl->has_out(i); i++) { Node *out = cl->out(i); - if (!out->is_Phi()) continue; // Looking for phis + if (!out->is_Phi() || out == phi) continue; // Looking for other phis PhiNode* phi2 = out->as_Phi(); Node *incr2 = phi2->in( LoopNode::LoopBackControl ); // Look for induction variables of the form: X += constant @@ -1388,6 +1388,37 @@ void IdealLoopTree::dump( ) const { #endif +static void log_loop_tree(IdealLoopTree* root, IdealLoopTree* loop, CompileLog* log) { + if (loop == root) { + if (loop->_child != NULL) { + log->begin_head("loop_tree"); + log->end_head(); + if( loop->_child ) log_loop_tree(root, loop->_child, log); + log->tail("loop_tree"); + assert(loop->_next == NULL, "what?"); + } + } else { + Node* head = loop->_head; + log->begin_head("loop"); + log->print(" idx='%d' ", head->_idx); + if (loop->_irreducible) log->print("irreducible='1' "); + if (head->is_Loop()) { + if (head->as_Loop()->is_inner_loop()) log->print("inner_loop='1' "); + if (head->as_Loop()->is_partial_peel_loop()) log->print("partial_peel_loop='1' "); + } + if (head->is_CountedLoop()) { + CountedLoopNode* cl = head->as_CountedLoop(); + if (cl->is_pre_loop()) log->print("pre_loop='%d' ", cl->main_idx()); + if (cl->is_main_loop()) log->print("main_loop='%d' ", cl->_idx); + if (cl->is_post_loop()) log->print("post_loop='%d' ", cl->main_idx()); + } + log->end_head(); + if( loop->_child ) log_loop_tree(root, loop->_child, log); + log->tail("loop"); + if( loop->_next ) log_loop_tree(root, loop->_next, log); + } +} + //============================================================================= //------------------------------PhaseIdealLoop--------------------------------- // Create a PhaseLoop. Build the ideal Loop tree. Map each Ideal Node to @@ -1624,10 +1655,13 @@ PhaseIdealLoop::PhaseIdealLoop( PhaseIterGVN &igvn, const PhaseIdealLoop *verify // Cleanup any modified bits _igvn.optimize(); - // Do not repeat loop optimizations if irreducible loops are present - // by claiming no-progress. - if( _has_irreducible_loops ) - C->clear_major_progress(); + // disable assert until issue with split_flow_path is resolved (6742111) + // assert(!_has_irreducible_loops || C->parsed_irreducible_loop() || C->is_osr_compilation(), + // "shouldn't introduce irreducible loops"); + + if (C->log() != NULL) { + log_loop_tree(_ltree_root, _ltree_root, C->log()); + } } #ifndef PRODUCT @@ -2732,11 +2766,7 @@ void PhaseIdealLoop::dump( ) const { } void PhaseIdealLoop::dump( IdealLoopTree *loop, uint idx, Node_List &rpo_list ) const { - - // Indent by loop nesting depth - for( uint x = 0; x < loop->_nest; x++ ) - tty->print(" "); - tty->print_cr("---- Loop N%d-N%d ----", loop->_head->_idx,loop->_tail->_idx); + loop->dump_head(); // Now scan for CFG nodes in the same loop for( uint j=idx; j > 0; j-- ) { diff --git a/hotspot/src/share/vm/opto/loopnode.hpp b/hotspot/src/share/vm/opto/loopnode.hpp index 629b4b2e5d1..53775646696 100644 --- a/hotspot/src/share/vm/opto/loopnode.hpp +++ b/hotspot/src/share/vm/opto/loopnode.hpp @@ -192,6 +192,8 @@ public: int is_main_no_pre_loop() const { return _loop_flags & Main_Has_No_Pre_Loop; } void set_main_no_pre_loop() { _loop_flags |= Main_Has_No_Pre_Loop; } + int main_idx() const { return _main_idx; } + void set_pre_loop (CountedLoopNode *main) { assert(is_normal_loop(),""); _loop_flags |= Pre ; _main_idx = main->_idx; } void set_main_loop ( ) { assert(is_normal_loop(),""); _loop_flags |= Main; } @@ -323,12 +325,14 @@ public: // Returns TRUE if loop tree is structurally changed. bool beautify_loops( PhaseIdealLoop *phase ); - // Perform iteration-splitting on inner loops. Split iterations to avoid - // range checks or one-shot null checks. - void iteration_split( PhaseIdealLoop *phase, Node_List &old_new ); + // Perform iteration-splitting on inner loops. Split iterations to + // avoid range checks or one-shot null checks. Returns false if the + // current round of loop opts should stop. + bool iteration_split( PhaseIdealLoop *phase, Node_List &old_new ); - // Driver for various flavors of iteration splitting - void iteration_split_impl( PhaseIdealLoop *phase, Node_List &old_new ); + // Driver for various flavors of iteration splitting. Returns false + // if the current round of loop opts should stop. + bool iteration_split_impl( PhaseIdealLoop *phase, Node_List &old_new ); // Given dominators, try to find loops with calls that must always be // executed (call dominates loop tail). These loops do not need non-call diff --git a/hotspot/src/share/vm/opto/loopopts.cpp b/hotspot/src/share/vm/opto/loopopts.cpp index 36ceb61a351..41048cbcbe9 100644 --- a/hotspot/src/share/vm/opto/loopopts.cpp +++ b/hotspot/src/share/vm/opto/loopopts.cpp @@ -1903,9 +1903,6 @@ void PhaseIdealLoop::clone_for_use_outside_loop( IdealLoopTree *loop, Node* n, N // Use in a phi is considered a use in the associated predecessor block use_c = use->in(0)->in(j); } - if (use_c->is_CountedLoop()) { - use_c = use_c->in(LoopNode::EntryControl); - } set_ctrl(n_clone, use_c); assert(!loop->is_member(get_loop(use_c)), "should be outside loop"); get_loop(use_c)->_body.push(n_clone); @@ -2667,6 +2664,10 @@ void PhaseIdealLoop::reorg_offsets( IdealLoopTree *loop ) { // Fix this by adjusting to use the post-increment trip counter. Node *phi = cl->phi(); if( !phi ) return; // Dead infinite loop + + // Shape messed up, probably by iteration_split_impl + if (phi->in(LoopNode::LoopBackControl) != cl->incr()) return; + bool progress = true; while (progress) { progress = false; diff --git a/hotspot/src/share/vm/opto/macro.cpp b/hotspot/src/share/vm/opto/macro.cpp index f8aea8e36aa..7de433e3aaa 100644 --- a/hotspot/src/share/vm/opto/macro.cpp +++ b/hotspot/src/share/vm/opto/macro.cpp @@ -944,25 +944,7 @@ void PhaseMacroExpand::expand_allocate_common( mem = mem->as_MergeMem()->memory_at(Compile::AliasIdxRaw); } - Node* eden_top_adr; - Node* eden_end_adr; - set_eden_pointers(eden_top_adr, eden_end_adr); - - uint raw_idx = C->get_alias_index(TypeRawPtr::BOTTOM); assert(ctrl != NULL, "must have control"); - - // Load Eden::end. Loop invariant and hoisted. - // - // Note: We set the control input on "eden_end" and "old_eden_top" when using - // a TLAB to work around a bug where these values were being moved across - // a safepoint. These are not oops, so they cannot be include in the oop - // map, but the can be changed by a GC. The proper way to fix this would - // be to set the raw memory state when generating a SafepointNode. However - // this will require extensive changes to the loop optimization in order to - // prevent a degradation of the optimization. - // See comment in memnode.hpp, around line 227 in class LoadPNode. - Node* eden_end = make_load(ctrl, mem, eden_end_adr, 0, TypeRawPtr::BOTTOM, T_ADDRESS); - // We need a Region and corresponding Phi's to merge the slow-path and fast-path results. // they will not be used if "always_slow" is set enum { slow_result_path = 1, fast_result_path = 2 }; @@ -982,12 +964,15 @@ void PhaseMacroExpand::expand_allocate_common( initial_slow_test = BoolNode::make_predicate(initial_slow_test, &_igvn); } - if (DTraceAllocProbes) { + if (DTraceAllocProbes || + !UseTLAB && (!Universe::heap()->supports_inline_contig_alloc() || + (UseConcMarkSweepGC && CMSIncrementalMode))) { // Force slow-path allocation always_slow = true; initial_slow_test = NULL; } + enum { too_big_or_final_path = 1, need_gc_path = 2 }; Node *slow_region = NULL; Node *toobig_false = ctrl; @@ -1016,6 +1001,23 @@ void PhaseMacroExpand::expand_allocate_common( Node *slow_mem = mem; // save the current memory state for slow path // generate the fast allocation code unless we know that the initial test will always go slow if (!always_slow) { + Node* eden_top_adr; + Node* eden_end_adr; + + set_eden_pointers(eden_top_adr, eden_end_adr); + + // Load Eden::end. Loop invariant and hoisted. + // + // Note: We set the control input on "eden_end" and "old_eden_top" when using + // a TLAB to work around a bug where these values were being moved across + // a safepoint. These are not oops, so they cannot be include in the oop + // map, but the can be changed by a GC. The proper way to fix this would + // be to set the raw memory state when generating a SafepointNode. However + // this will require extensive changes to the loop optimization in order to + // prevent a degradation of the optimization. + // See comment in memnode.hpp, around line 227 in class LoadPNode. + Node *eden_end = make_load(ctrl, mem, eden_end_adr, 0, TypeRawPtr::BOTTOM, T_ADDRESS); + // allocate the Region and Phi nodes for the result result_region = new (C, 3) RegionNode(3); result_phi_rawmem = new (C, 3) PhiNode( result_region, Type::MEMORY, TypeRawPtr::BOTTOM ); diff --git a/hotspot/src/share/vm/opto/matcher.cpp b/hotspot/src/share/vm/opto/matcher.cpp index f8ff59632ae..1fc7915b9d6 100644 --- a/hotspot/src/share/vm/opto/matcher.cpp +++ b/hotspot/src/share/vm/opto/matcher.cpp @@ -273,7 +273,7 @@ void Matcher::match( ) { find_shared( C->root() ); find_shared( C->top() ); - C->print_method("Before Matching", 2); + C->print_method("Before Matching"); // Swap out to old-space; emptying new-space Arena *old = C->node_arena()->move_contents(C->old_arena()); @@ -840,7 +840,7 @@ Node *Matcher::xform( Node *n, int max_stack ) { _new2old_map.map(m->_idx, n); #endif if (m->in(0) != NULL) // m might be top - collect_null_checks(m); + collect_null_checks(m, n); } else { // Else just a regular 'ol guy m = n->clone(); // So just clone into new-space #ifdef ASSERT @@ -1478,12 +1478,19 @@ MachNode *Matcher::ReduceInst( State *s, int rule, Node *&mem ) { m = _mem_node; assert(m != NULL && m->is_Mem(), "expecting memory node"); } - if (m->adr_type() != mach->adr_type()) { + const Type* mach_at = mach->adr_type(); + // DecodeN node consumed by an address may have different type + // then its input. Don't compare types for such case. + if (m->adr_type() != mach_at && m->in(MemNode::Address)->is_AddP() && + m->in(MemNode::Address)->in(AddPNode::Address)->is_DecodeN()) { + mach_at = m->adr_type(); + } + if (m->adr_type() != mach_at) { m->dump(); tty->print_cr("mach:"); mach->dump(1); } - assert(m->adr_type() == mach->adr_type(), "matcher should not change adr type"); + assert(m->adr_type() == mach_at, "matcher should not change adr type"); } #endif } @@ -1995,7 +2002,7 @@ void Matcher::dump_old2new_map() { // it. Used by later implicit-null-check handling. Actually collects // either an IfTrue or IfFalse for the common NOT-null path, AND the ideal // value being tested. -void Matcher::collect_null_checks( Node *proj ) { +void Matcher::collect_null_checks( Node *proj, Node *orig_proj ) { Node *iff = proj->in(0); if( iff->Opcode() == Op_If ) { // During matching If's have Bool & Cmp side-by-side @@ -2008,20 +2015,47 @@ void Matcher::collect_null_checks( Node *proj ) { if (ct == TypePtr::NULL_PTR || (opc == Op_CmpN && ct == TypeNarrowOop::NULL_PTR)) { + bool push_it = false; if( proj->Opcode() == Op_IfTrue ) { extern int all_null_checks_found; all_null_checks_found++; if( b->_test._test == BoolTest::ne ) { - _null_check_tests.push(proj); - _null_check_tests.push(cmp->in(1)); + push_it = true; } } else { assert( proj->Opcode() == Op_IfFalse, "" ); if( b->_test._test == BoolTest::eq ) { - _null_check_tests.push(proj); - _null_check_tests.push(cmp->in(1)); + push_it = true; } } + if( push_it ) { + _null_check_tests.push(proj); + Node* val = cmp->in(1); +#ifdef _LP64 + if (UseCompressedOops && !Matcher::clone_shift_expressions && + val->bottom_type()->isa_narrowoop()) { + // + // Look for DecodeN node which should be pinned to orig_proj. + // On platforms (Sparc) which can not handle 2 adds + // in addressing mode we have to keep a DecodeN node and + // use it to do implicit NULL check in address. + // + // DecodeN node was pinned to non-null path (orig_proj) during + // CastPP transformation in final_graph_reshaping_impl(). + // + uint cnt = orig_proj->outcnt(); + for (uint i = 0; i < orig_proj->outcnt(); i++) { + Node* d = orig_proj->raw_out(i); + if (d->is_DecodeN() && d->in(1) == val) { + val = d; + val->set_req(0, NULL); // Unpin now. + break; + } + } + } +#endif + _null_check_tests.push(val); + } } } } diff --git a/hotspot/src/share/vm/opto/matcher.hpp b/hotspot/src/share/vm/opto/matcher.hpp index 20de817c8b8..d4ba9ef10e8 100644 --- a/hotspot/src/share/vm/opto/matcher.hpp +++ b/hotspot/src/share/vm/opto/matcher.hpp @@ -166,7 +166,7 @@ public: // List of IfFalse or IfTrue Nodes that indicate a taken null test. // List is valid in the post-matching space. Node_List _null_check_tests; - void collect_null_checks( Node *proj ); + void collect_null_checks( Node *proj, Node *orig_proj ); void validate_null_checks( ); Matcher( Node_List &proj_list ); diff --git a/hotspot/src/share/vm/opto/memnode.cpp b/hotspot/src/share/vm/opto/memnode.cpp index 56f7ef736c9..ea7e062aef9 100644 --- a/hotspot/src/share/vm/opto/memnode.cpp +++ b/hotspot/src/share/vm/opto/memnode.cpp @@ -1887,6 +1887,38 @@ const Type *LoadRangeNode::Value( PhaseTransform *phase ) const { return tap->size(); } +//-------------------------------Ideal--------------------------------------- +// Feed through the length in AllocateArray(...length...)._length. +Node *LoadRangeNode::Ideal(PhaseGVN *phase, bool can_reshape) { + Node* p = MemNode::Ideal_common(phase, can_reshape); + if (p) return (p == NodeSentinel) ? NULL : p; + + // Take apart the address into an oop and and offset. + // Return 'this' if we cannot. + Node* adr = in(MemNode::Address); + intptr_t offset = 0; + Node* base = AddPNode::Ideal_base_and_offset(adr, phase, offset); + if (base == NULL) return NULL; + const TypeAryPtr* tary = phase->type(adr)->isa_aryptr(); + if (tary == NULL) return NULL; + + // We can fetch the length directly through an AllocateArrayNode. + // This works even if the length is not constant (clone or newArray). + if (offset == arrayOopDesc::length_offset_in_bytes()) { + AllocateArrayNode* alloc = AllocateArrayNode::Ideal_array_allocation(base, phase); + if (alloc != NULL) { + Node* allocated_length = alloc->Ideal_length(); + Node* len = alloc->make_ideal_length(tary, phase); + if (allocated_length != len) { + // New CastII improves on this. + return len; + } + } + } + + return NULL; +} + //------------------------------Identity--------------------------------------- // Feed through the length in AllocateArray(...length...)._length. Node* LoadRangeNode::Identity( PhaseTransform *phase ) { @@ -1905,15 +1937,22 @@ Node* LoadRangeNode::Identity( PhaseTransform *phase ) { // We can fetch the length directly through an AllocateArrayNode. // This works even if the length is not constant (clone or newArray). if (offset == arrayOopDesc::length_offset_in_bytes()) { - Node* allocated_length = AllocateArrayNode::Ideal_length(base, phase); - if (allocated_length != NULL) { - return allocated_length; + AllocateArrayNode* alloc = AllocateArrayNode::Ideal_array_allocation(base, phase); + if (alloc != NULL) { + Node* allocated_length = alloc->Ideal_length(); + // Do not allow make_ideal_length to allocate a CastII node. + Node* len = alloc->make_ideal_length(tary, phase, false); + if (allocated_length == len) { + // Return allocated_length only if it would not be improved by a CastII. + return allocated_length; + } } } return this; } + //============================================================================= //---------------------------StoreNode::make----------------------------------- // Polymorphic factory method: diff --git a/hotspot/src/share/vm/opto/memnode.hpp b/hotspot/src/share/vm/opto/memnode.hpp index dff9dad102e..2b40a676c45 100644 --- a/hotspot/src/share/vm/opto/memnode.hpp +++ b/hotspot/src/share/vm/opto/memnode.hpp @@ -241,6 +241,7 @@ public: virtual int Opcode() const; virtual const Type *Value( PhaseTransform *phase ) const; virtual Node *Identity( PhaseTransform *phase ); + virtual Node *Ideal(PhaseGVN *phase, bool can_reshape); }; //------------------------------LoadLNode-------------------------------------- diff --git a/hotspot/src/share/vm/opto/mulnode.cpp b/hotspot/src/share/vm/opto/mulnode.cpp index 22ea890c95a..5cdcbafbc4b 100644 --- a/hotspot/src/share/vm/opto/mulnode.cpp +++ b/hotspot/src/share/vm/opto/mulnode.cpp @@ -152,6 +152,14 @@ const Type *MulNode::Value( PhaseTransform *phase ) const { if( t1 == Type::BOTTOM || t2 == Type::BOTTOM ) return bottom_type(); +#if defined(IA32) + // Can't trust native compilers to properly fold strict double + // multiplication with round-to-zero on this platform. + if (op == Op_MulD && phase->C->method()->is_strict()) { + return TypeD::DOUBLE; + } +#endif + return mul_ring(t1,t2); // Local flavor of type multiplication } @@ -360,7 +368,7 @@ const Type *MulFNode::mul_ring(const Type *t0, const Type *t1) const { // Compute the product type of two double ranges into this node. const Type *MulDNode::mul_ring(const Type *t0, const Type *t1) const { if( t0 == Type::DOUBLE || t1 == Type::DOUBLE ) return Type::DOUBLE; - // We must be adding 2 double constants. + // We must be multiplying 2 double constants. return TypeD::make( t0->getd() * t1->getd() ); } diff --git a/hotspot/src/share/vm/opto/node.hpp b/hotspot/src/share/vm/opto/node.hpp index e027265bf67..f55a403099a 100644 --- a/hotspot/src/share/vm/opto/node.hpp +++ b/hotspot/src/share/vm/opto/node.hpp @@ -1320,7 +1320,8 @@ public: Node *pop() { if( _clock_index >= size() ) _clock_index = 0; Node *b = at(_clock_index); - map( _clock_index++, Node_List::pop()); + map( _clock_index, Node_List::pop()); + if (size() != 0) _clock_index++; // Always start from 0 _in_worklist >>= b->_idx; return b; } diff --git a/hotspot/src/share/vm/opto/parse.hpp b/hotspot/src/share/vm/opto/parse.hpp index bf344cb6f5f..0a68a35237e 100644 --- a/hotspot/src/share/vm/opto/parse.hpp +++ b/hotspot/src/share/vm/opto/parse.hpp @@ -167,9 +167,19 @@ class Parse : public GraphKit { int start() const { return flow()->start(); } int limit() const { return flow()->limit(); } - int pre_order() const { return flow()->pre_order(); } + int rpo() const { return flow()->rpo(); } int start_sp() const { return flow()->stack_size(); } + bool is_loop_head() const { return flow()->is_loop_head(); } + bool is_SEL_head() const { return flow()->is_single_entry_loop_head(); } + bool is_SEL_backedge(Block* pred) const{ return is_SEL_head() && pred->rpo() >= rpo(); } + bool is_invariant_local(uint i) const { + const JVMState* jvms = start_map()->jvms(); + if (!jvms->is_loc(i)) return false; + return flow()->is_invariant_local(i - jvms->locoff()); + } + bool can_elide_SEL_phi(uint i) const { assert(is_SEL_head(),""); return is_invariant_local(i); } + const Type* peek(int off=0) const { return stack_type_at(start_sp() - (off+1)); } const Type* stack_type_at(int i) const; @@ -305,7 +315,7 @@ class Parse : public GraphKit { // entry_bci() -- see osr_bci, etc. ciTypeFlow* flow() const { return _flow; } - // blocks() -- see pre_order_at, start_block, etc. + // blocks() -- see rpo_at, start_block, etc. int block_count() const { return _block_count; } GraphKit& exits() { return _exits; } @@ -330,12 +340,12 @@ class Parse : public GraphKit { // Must this parse be aborted? bool failing() { return C->failing(); } - Block* pre_order_at(int po) { - assert(0 <= po && po < _block_count, "oob"); - return &_blocks[po]; + Block* rpo_at(int rpo) { + assert(0 <= rpo && rpo < _block_count, "oob"); + return &_blocks[rpo]; } Block* start_block() { - return pre_order_at(flow()->start_block()->pre_order()); + return rpo_at(flow()->start_block()->rpo()); } // Can return NULL if the flow pass did not complete a block. Block* successor_for_bci(int bci) { @@ -359,9 +369,6 @@ class Parse : public GraphKit { // Parse all the basic blocks. void do_all_blocks(); - // Helper for do_all_blocks; makes one pass in pre-order. - void visit_blocks(); - // Parse the current basic block void do_one_block(); diff --git a/hotspot/src/share/vm/opto/parse1.cpp b/hotspot/src/share/vm/opto/parse1.cpp index d26d075f627..b896faca492 100644 --- a/hotspot/src/share/vm/opto/parse1.cpp +++ b/hotspot/src/share/vm/opto/parse1.cpp @@ -29,17 +29,17 @@ // the most. Some of the non-static variables are needed in bytecodeInfo.cpp // and eventually should be encapsulated in a proper class (gri 8/18/98). -int nodes_created = 0; int nodes_created_old = 0; -int methods_parsed = 0; int methods_parsed_old = 0; -int methods_seen = 0; int methods_seen_old = 0; +int nodes_created = 0; +int methods_parsed = 0; +int methods_seen = 0; +int blocks_parsed = 0; +int blocks_seen = 0; -int explicit_null_checks_inserted = 0, explicit_null_checks_inserted_old = 0; -int explicit_null_checks_elided = 0, explicit_null_checks_elided_old = 0; +int explicit_null_checks_inserted = 0; +int explicit_null_checks_elided = 0; int all_null_checks_found = 0, implicit_null_checks = 0; int implicit_null_throws = 0; -int parse_idx = 0; -size_t parse_arena = 0; int reclaim_idx = 0; int reclaim_in = 0; int reclaim_node = 0; @@ -61,6 +61,7 @@ void Parse::print_statistics() { tty->cr(); if (methods_seen != methods_parsed) tty->print_cr("Reasons for parse failures (NOT cumulative):"); + tty->print_cr("Blocks parsed: %d Blocks seen: %d", blocks_parsed, blocks_seen); if( explicit_null_checks_inserted ) tty->print_cr("%d original NULL checks - %d elided (%2d%%); optimizer leaves %d,", explicit_null_checks_inserted, explicit_null_checks_elided, (100*explicit_null_checks_elided)/explicit_null_checks_inserted, all_null_checks_found); @@ -373,6 +374,12 @@ Parse::Parse(JVMState* caller, ciMethod* parse_method, float expected_uses) C->record_method_not_compilable_all_tiers(_flow->failure_reason()); } +#ifndef PRODUCT + if (_flow->has_irreducible_entry()) { + C->set_parsed_irreducible_loop(true); + } +#endif + if (_expected_uses <= 0) { _prof_factor = 1; } else { @@ -556,118 +563,93 @@ Parse::Parse(JVMState* caller, ciMethod* parse_method, float expected_uses) set_map(entry_map); do_exits(); - // Collect a few more statistics. - parse_idx += C->unique(); - parse_arena += C->node_arena()->used(); - if (log) log->done("parse nodes='%d' memory='%d'", C->unique(), C->node_arena()->used()); } //---------------------------do_all_blocks------------------------------------- void Parse::do_all_blocks() { - _blocks_merged = 0; - _blocks_parsed = 0; + bool has_irreducible = flow()->has_irreducible_entry(); - int old_blocks_merged = -1; - int old_blocks_parsed = -1; + // Walk over all blocks in Reverse Post-Order. + while (true) { + bool progress = false; + for (int rpo = 0; rpo < block_count(); rpo++) { + Block* block = rpo_at(rpo); - for (int tries = 0; ; tries++) { - visit_blocks(); - if (failing()) return; // Check for bailout + if (block->is_parsed()) continue; - // No need for a work list. The outer loop is hardly ever repeated. - // The following loop traverses the blocks in a reasonable pre-order, - // as produced by the ciTypeFlow pass. - - // This loop can be taken more than once if there are two entries to - // a loop (irreduceable CFG), and the edge which ciTypeFlow chose - // as the first predecessor to the loop goes dead in the parser, - // due to parse-time optimization. (Could happen with obfuscated code.) - - // Look for progress, or the lack of it: - if (_blocks_parsed == block_count()) { - // That's all, folks. - if (TraceOptoParse) { - tty->print_cr("All blocks parsed."); + if (!block->is_merged()) { + // Dead block, no state reaches this block + continue; } - break; + + // Prepare to parse this block. + load_state_from(block); + + if (stopped()) { + // Block is dead. + continue; + } + + blocks_parsed++; + + progress = true; + if (block->is_loop_head() || block->is_handler() || has_irreducible && !block->is_ready()) { + // Not all preds have been parsed. We must build phis everywhere. + // (Note that dead locals do not get phis built, ever.) + ensure_phis_everywhere(); + + // Leave behind an undisturbed copy of the map, for future merges. + set_map(clone_map()); + } + + if (control()->is_Region() && !block->is_loop_head() && !has_irreducible && !block->is_handler()) { + // In the absence of irreducible loops, the Region and Phis + // associated with a merge that doesn't involve a backedge can + // be simplfied now since the RPO parsing order guarantees + // that any path which was supposed to reach here has already + // been parsed or must be dead. + Node* c = control(); + Node* result = _gvn.transform_no_reclaim(control()); + if (c != result && TraceOptoParse) { + tty->print_cr("Block #%d replace %d with %d", block->rpo(), c->_idx, result->_idx); + } + if (result != top()) { + record_for_igvn(result); + } + } + + // Parse the block. + do_one_block(); + + // Check for bailouts. + if (failing()) return; } - // How much work was done this time around? - int new_blocks_merged = _blocks_merged - old_blocks_merged; - int new_blocks_parsed = _blocks_parsed - old_blocks_parsed; - if (new_blocks_merged == 0) { - if (TraceOptoParse) { - tty->print_cr("All live blocks parsed; %d dead blocks.", block_count() - _blocks_parsed); - } - // No new blocks have become parseable. Some blocks are just dead. + // with irreducible loops multiple passes might be necessary to parse everything + if (!has_irreducible || !progress) { break; } - assert(new_blocks_parsed > 0, "must make progress"); - assert(tries < block_count(), "the pre-order cannot be this bad!"); - - old_blocks_merged = _blocks_merged; - old_blocks_parsed = _blocks_parsed; } + blocks_seen += block_count(); + #ifndef PRODUCT // Make sure there are no half-processed blocks remaining. // Every remaining unprocessed block is dead and may be ignored now. - for (int po = 0; po < block_count(); po++) { - Block* block = pre_order_at(po); + for (int rpo = 0; rpo < block_count(); rpo++) { + Block* block = rpo_at(rpo); if (!block->is_parsed()) { if (TraceOptoParse) { - tty->print("Skipped dead block %d at bci:%d", po, block->start()); - assert(!block->is_merged(), "no half-processed blocks"); + tty->print_cr("Skipped dead block %d at bci:%d", rpo, block->start()); } + assert(!block->is_merged(), "no half-processed blocks"); } } #endif } -//---------------------------visit_blocks-------------------------------------- -void Parse::visit_blocks() { - // Walk over all blocks, parsing every one that has been reached (merged). - for (int po = 0; po < block_count(); po++) { - Block* block = pre_order_at(po); - - if (block->is_parsed()) { - // Do not parse twice. - continue; - } - - if (!block->is_merged()) { - // No state on this block. It had not yet been reached. - // Delay reaching it until later. - continue; - } - - // Prepare to parse this block. - load_state_from(block); - - if (stopped()) { - // Block is dead. - continue; - } - - if (!block->is_ready() || block->is_handler()) { - // Not all preds have been parsed. We must build phis everywhere. - // (Note that dead locals do not get phis built, ever.) - ensure_phis_everywhere(); - - // Leave behind an undisturbed copy of the map, for future merges. - set_map(clone_map()); - } - - // Ready or not, parse the block. - do_one_block(); - - // Check for bailouts. - if (failing()) return; - } -} - //-------------------------------build_exits---------------------------------- // Build normal and exceptional exit merge points. void Parse::build_exits() { @@ -1134,24 +1116,24 @@ void Parse::init_blocks() { _blocks = NEW_RESOURCE_ARRAY(Block, _block_count); Copy::zero_to_bytes(_blocks, sizeof(Block)*_block_count); - int po; + int rpo; // Initialize the structs. - for (po = 0; po < block_count(); po++) { - Block* block = pre_order_at(po); - block->init_node(this, po); + for (rpo = 0; rpo < block_count(); rpo++) { + Block* block = rpo_at(rpo); + block->init_node(this, rpo); } // Collect predecessor and successor information. - for (po = 0; po < block_count(); po++) { - Block* block = pre_order_at(po); + for (rpo = 0; rpo < block_count(); rpo++) { + Block* block = rpo_at(rpo); block->init_graph(this); } } //-------------------------------init_node------------------------------------- -void Parse::Block::init_node(Parse* outer, int po) { - _flow = outer->flow()->pre_order_at(po); +void Parse::Block::init_node(Parse* outer, int rpo) { + _flow = outer->flow()->rpo_at(rpo); _pred_count = 0; _preds_parsed = 0; _count = 0; @@ -1177,7 +1159,7 @@ void Parse::Block::init_graph(Parse* outer) { int p = 0; for (int i = 0; i < ns+ne; i++) { ciTypeFlow::Block* tf2 = (i < ns) ? tfs->at(i) : tfe->at(i-ns); - Block* block2 = outer->pre_order_at(tf2->pre_order()); + Block* block2 = outer->rpo_at(tf2->rpo()); _successors[i] = block2; // Accumulate pred info for the other block, too. @@ -1368,10 +1350,11 @@ void Parse::do_one_block() { int nt = b->all_successors(); tty->print("Parsing block #%d at bci [%d,%d), successors: ", - block()->pre_order(), block()->start(), block()->limit()); + block()->rpo(), block()->start(), block()->limit()); for (int i = 0; i < nt; i++) { - tty->print((( i < ns) ? " %d" : " %d(e)"), b->successor_at(i)->pre_order()); + tty->print((( i < ns) ? " %d" : " %d(e)"), b->successor_at(i)->rpo()); } + if (b->is_loop_head()) tty->print(" lphd"); tty->print_cr(""); } @@ -1501,7 +1484,7 @@ void Parse::handle_missing_successor(int target_bci) { #ifndef PRODUCT Block* b = block(); int trap_bci = b->flow()->has_trap()? b->flow()->trap_bci(): -1; - tty->print_cr("### Missing successor at bci:%d for block #%d (trap_bci:%d)", target_bci, b->pre_order(), trap_bci); + tty->print_cr("### Missing successor at bci:%d for block #%d (trap_bci:%d)", target_bci, b->rpo(), trap_bci); #endif ShouldNotReachHere(); } @@ -1509,7 +1492,7 @@ void Parse::handle_missing_successor(int target_bci) { //--------------------------merge_common--------------------------------------- void Parse::merge_common(Parse::Block* target, int pnum) { if (TraceOptoParse) { - tty->print("Merging state at block #%d bci:%d", target->pre_order(), target->start()); + tty->print("Merging state at block #%d bci:%d", target->rpo(), target->start()); } // Zap extra stack slots to top @@ -1534,6 +1517,7 @@ void Parse::merge_common(Parse::Block* target, int pnum) { // which must not be allowed into this block's map.) if (pnum > PhiNode::Input // Known multiple inputs. || target->is_handler() // These have unpredictable inputs. + || target->is_loop_head() // Known multiple inputs || control()->is_Region()) { // We must hide this guy. // Add a Region to start the new basic block. Phis will be added // later lazily. @@ -1575,15 +1559,21 @@ void Parse::merge_common(Parse::Block* target, int pnum) { // Compute where to merge into // Merge incoming control path - r->set_req(pnum, newin->control()); + r->init_req(pnum, newin->control()); if (pnum == 1) { // Last merge for this Region? - _gvn.transform_no_reclaim(r); + if (!block()->flow()->is_irreducible_entry()) { + Node* result = _gvn.transform_no_reclaim(r); + if (r != result && TraceOptoParse) { + tty->print_cr("Block #%d replace %d with %d", block()->rpo(), r->_idx, result->_idx); + } + } record_for_igvn(r); } // Update all the non-control inputs to map: assert(TypeFunc::Parms == newin->jvms()->locoff(), "parser map should contain only youngest jvms"); + bool check_elide_phi = target->is_SEL_backedge(save_block); for (uint j = 1; j < newin->req(); j++) { Node* m = map()->in(j); // Current state of target. Node* n = newin->in(j); // Incoming change to target state. @@ -1603,7 +1593,11 @@ void Parse::merge_common(Parse::Block* target, int pnum) { merge_memory_edges(n->as_MergeMem(), pnum, nophi); continue; default: // All normal stuff - if (phi == NULL) phi = ensure_phi(j, nophi); + if (phi == NULL) { + if (!check_elide_phi || !target->can_elide_SEL_phi(j)) { + phi = ensure_phi(j, nophi); + } + } break; } } @@ -1736,9 +1730,13 @@ void Parse::ensure_phis_everywhere() { uint nof_monitors = map()->jvms()->nof_monitors(); assert(TypeFunc::Parms == map()->jvms()->locoff(), "parser map should contain only youngest jvms"); + bool check_elide_phi = block()->is_SEL_head(); for (uint i = TypeFunc::Parms; i < monoff; i++) { - ensure_phi(i); + if (!check_elide_phi || !block()->can_elide_SEL_phi(i)) { + ensure_phi(i); + } } + // Even monitors need Phis, though they are well-structured. // This is true for OSR methods, and also for the rare cases where // a monitor object is the subject of a replace_in_map operation. diff --git a/hotspot/src/share/vm/opto/parse2.cpp b/hotspot/src/share/vm/opto/parse2.cpp index cc1d6b3e430..0f40fdd962e 100644 --- a/hotspot/src/share/vm/opto/parse2.cpp +++ b/hotspot/src/share/vm/opto/parse2.cpp @@ -100,16 +100,17 @@ Node* Parse::array_addressing(BasicType type, int vals, const Type* *result2) { // Do the range check if (GenerateRangeChecks && need_range_check) { - // Range is constant in array-oop, so we can use the original state of mem - Node* len = load_array_length(ary); Node* tst; if (sizetype->_hi <= 0) { - // If the greatest array bound is negative, we can conclude that we're + // The greatest array bound is negative, so we can conclude that we're // compiling unreachable code, but the unsigned compare trick used below // only works with non-negative lengths. Instead, hack "tst" to be zero so // the uncommon_trap path will always be taken. tst = _gvn.intcon(0); } else { + // Range is constant in array-oop, so we can use the original state of mem + Node* len = load_array_length(ary); + // Test length vs index (standard trick using unsigned compare) Node* chk = _gvn.transform( new (C, 3) CmpUNode(idx, len) ); BoolTest::mask btest = BoolTest::lt; @@ -137,9 +138,12 @@ Node* Parse::array_addressing(BasicType type, int vals, const Type* *result2) { // Check for always knowing you are throwing a range-check exception if (stopped()) return top(); - Node* ptr = array_element_address( ary, idx, type, sizetype); + Node* ptr = array_element_address(ary, idx, type, sizetype); if (result2 != NULL) *result2 = elemtype; + + assert(ptr != top(), "top should go hand-in-hand with stopped"); + return ptr; } diff --git a/hotspot/src/share/vm/opto/postaloc.cpp b/hotspot/src/share/vm/opto/postaloc.cpp index 59e6b14125d..cd881065f32 100644 --- a/hotspot/src/share/vm/opto/postaloc.cpp +++ b/hotspot/src/share/vm/opto/postaloc.cpp @@ -34,7 +34,7 @@ static bool is_single_register(uint x) { #endif } -//------------------------------may_be_copy_of_callee----------------------------- +//---------------------------may_be_copy_of_callee----------------------------- // Check to see if we can possibly be a copy of a callee-save value. bool PhaseChaitin::may_be_copy_of_callee( Node *def ) const { // Short circuit if there are no callee save registers @@ -225,6 +225,20 @@ int PhaseChaitin::elide_copy( Node *n, int k, Block *current_block, Node_List &v // Scan all registers to see if this value is around already for( uint reg = 0; reg < (uint)_max_reg; reg++ ) { + if (reg == (uint)nk_reg) { + // Found ourselves so check if there is only one user of this + // copy and keep on searching for a better copy if so. + bool ignore_self = true; + x = n->in(k); + DUIterator_Fast imax, i = x->fast_outs(imax); + Node* first = x->fast_out(i); i++; + while (i < imax && ignore_self) { + Node* use = x->fast_out(i); i++; + if (use != first) ignore_self = false; + } + if (ignore_self) continue; + } + Node *vv = value[reg]; if( !single ) { // Doubles check for aligned-adjacent pair if( (reg&1)==0 ) continue; // Wrong half of a pair diff --git a/hotspot/src/share/vm/opto/runtime.cpp b/hotspot/src/share/vm/opto/runtime.cpp index aedfceb51af..7b5effc8182 100644 --- a/hotspot/src/share/vm/opto/runtime.cpp +++ b/hotspot/src/share/vm/opto/runtime.cpp @@ -44,6 +44,8 @@ address OptoRuntime::_multianewarray2_Java = NULL; address OptoRuntime::_multianewarray3_Java = NULL; address OptoRuntime::_multianewarray4_Java = NULL; address OptoRuntime::_multianewarray5_Java = NULL; +address OptoRuntime::_g1_wb_pre_Java = NULL; +address OptoRuntime::_g1_wb_post_Java = NULL; address OptoRuntime::_vtable_must_compile_Java = NULL; address OptoRuntime::_complete_monitor_locking_Java = NULL; address OptoRuntime::_rethrow_Java = NULL; @@ -89,6 +91,8 @@ void OptoRuntime::generate(ciEnv* env) { gen(env, _multianewarray3_Java , multianewarray3_Type , multianewarray3_C , 0 , true , false, false); gen(env, _multianewarray4_Java , multianewarray4_Type , multianewarray4_C , 0 , true , false, false); gen(env, _multianewarray5_Java , multianewarray5_Type , multianewarray5_C , 0 , true , false, false); + gen(env, _g1_wb_pre_Java , g1_wb_pre_Type , SharedRuntime::g1_wb_pre , 0 , false, false, false); + gen(env, _g1_wb_post_Java , g1_wb_post_Type , SharedRuntime::g1_wb_post , 0 , false, false, false); gen(env, _complete_monitor_locking_Java , complete_monitor_enter_Type , SharedRuntime::complete_monitor_locking_C , 0 , false, false, false); gen(env, _rethrow_Java , rethrow_Type , rethrow_C , 2 , true , false, true ); @@ -385,6 +389,33 @@ const TypeFunc *OptoRuntime::multianewarray5_Type() { return multianewarray_Type(5); } +const TypeFunc *OptoRuntime::g1_wb_pre_Type() { + const Type **fields = TypeTuple::fields(2); + fields[TypeFunc::Parms+0] = TypeInstPtr::NOTNULL; // original field value + fields[TypeFunc::Parms+1] = TypeRawPtr::NOTNULL; // thread + const TypeTuple *domain = TypeTuple::make(TypeFunc::Parms+2, fields); + + // create result type (range) + fields = TypeTuple::fields(0); + const TypeTuple *range = TypeTuple::make(TypeFunc::Parms+0, fields); + + return TypeFunc::make(domain, range); +} + +const TypeFunc *OptoRuntime::g1_wb_post_Type() { + + const Type **fields = TypeTuple::fields(2); + fields[TypeFunc::Parms+0] = TypeRawPtr::NOTNULL; // Card addr + fields[TypeFunc::Parms+1] = TypeRawPtr::NOTNULL; // thread + const TypeTuple *domain = TypeTuple::make(TypeFunc::Parms+2, fields); + + // create result type (range) + fields = TypeTuple::fields(0); + const TypeTuple *range = TypeTuple::make(TypeFunc::Parms, fields); + + return TypeFunc::make(domain, range); +} + const TypeFunc *OptoRuntime::uncommon_trap_Type() { // create input type (domain) const Type **fields = TypeTuple::fields(1); diff --git a/hotspot/src/share/vm/opto/runtime.hpp b/hotspot/src/share/vm/opto/runtime.hpp index 50f11712046..8a51b2facce 100644 --- a/hotspot/src/share/vm/opto/runtime.hpp +++ b/hotspot/src/share/vm/opto/runtime.hpp @@ -108,6 +108,8 @@ class OptoRuntime : public AllStatic { static address _multianewarray3_Java; static address _multianewarray4_Java; static address _multianewarray5_Java; + static address _g1_wb_pre_Java; + static address _g1_wb_post_Java; static address _vtable_must_compile_Java; static address _complete_monitor_locking_Java; static address _rethrow_Java; @@ -140,6 +142,8 @@ class OptoRuntime : public AllStatic { static void multianewarray3_C(klassOopDesc* klass, int len1, int len2, int len3, JavaThread *thread); static void multianewarray4_C(klassOopDesc* klass, int len1, int len2, int len3, int len4, JavaThread *thread); static void multianewarray5_C(klassOopDesc* klass, int len1, int len2, int len3, int len4, int len5, JavaThread *thread); + static void g1_wb_pre_C(oopDesc* orig, JavaThread* thread); + static void g1_wb_post_C(void* card_addr, JavaThread* thread); public: // Slow-path Locking and Unlocking @@ -195,6 +199,8 @@ private: static address multianewarray3_Java() { return _multianewarray3_Java; } static address multianewarray4_Java() { return _multianewarray4_Java; } static address multianewarray5_Java() { return _multianewarray5_Java; } + static address g1_wb_pre_Java() { return _g1_wb_pre_Java; } + static address g1_wb_post_Java() { return _g1_wb_post_Java; } static address vtable_must_compile_stub() { return _vtable_must_compile_Java; } static address complete_monitor_locking_Java() { return _complete_monitor_locking_Java; } @@ -232,6 +238,8 @@ private: static const TypeFunc* multianewarray3_Type(); // multianewarray static const TypeFunc* multianewarray4_Type(); // multianewarray static const TypeFunc* multianewarray5_Type(); // multianewarray + static const TypeFunc* g1_wb_pre_Type(); + static const TypeFunc* g1_wb_post_Type(); static const TypeFunc* complete_monitor_enter_Type(); static const TypeFunc* complete_monitor_exit_Type(); static const TypeFunc* uncommon_trap_Type(); diff --git a/hotspot/src/share/vm/opto/subnode.cpp b/hotspot/src/share/vm/opto/subnode.cpp index 774aff9f430..260b5dc8af8 100644 --- a/hotspot/src/share/vm/opto/subnode.cpp +++ b/hotspot/src/share/vm/opto/subnode.cpp @@ -206,6 +206,14 @@ Node *SubINode::Ideal(PhaseGVN *phase, bool can_reshape){ if( op1 == Op_AddI && op2 == Op_AddI && in1->in(2) == in2->in(2) ) return new (phase->C, 3) SubINode( in1->in(1), in2->in(1) ); + // Convert "(A+X) - (X+B)" into "A - B" + if( op1 == Op_AddI && op2 == Op_AddI && in1->in(2) == in2->in(1) ) + return new (phase->C, 3) SubINode( in1->in(1), in2->in(2) ); + + // Convert "(X+A) - (B+X)" into "A - B" + if( op1 == Op_AddI && op2 == Op_AddI && in1->in(1) == in2->in(2) ) + return new (phase->C, 3) SubINode( in1->in(2), in2->in(1) ); + // Convert "A-(B-C)" into (A+C)-B", since add is commutative and generally // nicer to optimize than subtract. if( op2 == Op_SubI && in2->outcnt() == 1) { diff --git a/hotspot/src/share/vm/opto/type.cpp b/hotspot/src/share/vm/opto/type.cpp index 243b44c4263..3e9b66ba32e 100644 --- a/hotspot/src/share/vm/opto/type.cpp +++ b/hotspot/src/share/vm/opto/type.cpp @@ -3157,17 +3157,18 @@ static jint max_array_length(BasicType etype) { // Narrow the given size type to the index range for the given array base type. // Return NULL if the resulting int type becomes empty. -const TypeInt* TypeAryPtr::narrow_size_type(const TypeInt* size, BasicType elem) { +const TypeInt* TypeAryPtr::narrow_size_type(const TypeInt* size) const { jint hi = size->_hi; jint lo = size->_lo; jint min_lo = 0; - jint max_hi = max_array_length(elem); + jint max_hi = max_array_length(elem()->basic_type()); //if (index_not_size) --max_hi; // type of a valid array index, FTR bool chg = false; if (lo < min_lo) { lo = min_lo; chg = true; } if (hi > max_hi) { hi = max_hi; chg = true; } + // Negative length arrays will produce weird intermediate dead fath-path code if (lo > hi) - return NULL; + return TypeInt::ZERO; if (!chg) return size; return TypeInt::make(lo, hi, Type::WidenMin); @@ -3176,9 +3177,7 @@ const TypeInt* TypeAryPtr::narrow_size_type(const TypeInt* size, BasicType elem) //-------------------------------cast_to_size---------------------------------- const TypeAryPtr* TypeAryPtr::cast_to_size(const TypeInt* new_size) const { assert(new_size != NULL, ""); - new_size = narrow_size_type(new_size, elem()->basic_type()); - if (new_size == NULL) // Negative length arrays will produce weird - new_size = TypeInt::ZERO; // intermediate dead fast-path goo + new_size = narrow_size_type(new_size); if (new_size == size()) return this; const TypeAry* new_ary = TypeAry::make(elem(), new_size); return make(ptr(), const_oop(), new_ary, klass(), klass_is_exact(), _offset, _instance_id); diff --git a/hotspot/src/share/vm/opto/type.hpp b/hotspot/src/share/vm/opto/type.hpp index 68366edca9b..69bc06a7384 100644 --- a/hotspot/src/share/vm/opto/type.hpp +++ b/hotspot/src/share/vm/opto/type.hpp @@ -840,6 +840,7 @@ public: virtual const TypeOopPtr *cast_to_instance_id(int instance_id) const; virtual const TypeAryPtr* cast_to_size(const TypeInt* size) const; + virtual const TypeInt* narrow_size_type(const TypeInt* size) const; virtual bool empty(void) const; // TRUE if type is vacuous virtual const TypePtr *add_offset( intptr_t offset ) const; @@ -865,7 +866,6 @@ public: } static const TypeAryPtr *_array_body_type[T_CONFLICT+1]; // sharpen the type of an int which is used as an array size - static const TypeInt* narrow_size_type(const TypeInt* size, BasicType elem); #ifndef PRODUCT virtual void dump2( Dict &d, uint depth, outputStream *st ) const; // Specialized per-Type dumping #endif diff --git a/hotspot/src/share/vm/prims/jvm.cpp b/hotspot/src/share/vm/prims/jvm.cpp index 75becdfb25e..81302b71225 100644 --- a/hotspot/src/share/vm/prims/jvm.cpp +++ b/hotspot/src/share/vm/prims/jvm.cpp @@ -377,7 +377,11 @@ JVM_END JVM_ENTRY_NO_ENV(jlong, JVM_FreeMemory(void)) JVMWrapper("JVM_FreeMemory"); CollectedHeap* ch = Universe::heap(); - size_t n = ch->capacity() - ch->used(); + size_t n; + { + MutexLocker x(Heap_lock); + n = ch->capacity() - ch->used(); + } return convert_size_t_to_jlong(n); JVM_END @@ -624,6 +628,32 @@ JVM_ENTRY(void, JVM_ResolveClass(JNIEnv* env, jclass cls)) if (PrintJVMWarnings) warning("JVM_ResolveClass not implemented"); JVM_END +// Common implementation for JVM_FindClassFromBootLoader and +// JVM_FindClassFromLoader +static jclass jvm_find_class_from_class_loader(JNIEnv* env, const char* name, + jboolean init, jobject loader, + jboolean throwError, TRAPS) { + // Java libraries should ensure that name is never null... + if (name == NULL || (int)strlen(name) > symbolOopDesc::max_length()) { + // It's impossible to create this class; the name cannot fit + // into the constant pool. + if (throwError) { + THROW_MSG_0(vmSymbols::java_lang_NoClassDefFoundError(), name); + } else { + THROW_MSG_0(vmSymbols::java_lang_ClassNotFoundException(), name); + } + } + symbolHandle h_name = oopFactory::new_symbol_handle(name, CHECK_NULL); + Handle h_loader(THREAD, JNIHandles::resolve(loader)); + jclass result = find_class_from_class_loader(env, h_name, init, h_loader, + Handle(), throwError, THREAD); + + if (TraceClassResolution && result != NULL) { + trace_class_resolution(java_lang_Class::as_klassOop(JNIHandles::resolve_non_null(result))); + } + return result; +} + // Rationale behind JVM_FindClassFromBootLoader // a> JVM_FindClassFromClassLoader was never exported in the export tables. // b> because of (a) java.dll has a direct dependecy on the unexported @@ -645,8 +675,8 @@ JVM_ENTRY(jclass, JVM_FindClassFromBootLoader(JNIEnv* env, jboolean throwError)) JVMWrapper3("JVM_FindClassFromBootLoader %s throw %s", name, throwError ? "error" : "exception"); - return JVM_FindClassFromClassLoader(env, name, JNI_FALSE, - (jobject)NULL, throwError); + return jvm_find_class_from_class_loader(env, name, JNI_FALSE, + (jobject)NULL, throwError, THREAD); JVM_END JVM_ENTRY(jclass, JVM_FindClassFromClassLoader(JNIEnv* env, const char* name, @@ -654,26 +684,8 @@ JVM_ENTRY(jclass, JVM_FindClassFromClassLoader(JNIEnv* env, const char* name, jboolean throwError)) JVMWrapper3("JVM_FindClassFromClassLoader %s throw %s", name, throwError ? "error" : "exception"); - // Java libraries should ensure that name is never null... - if (name == NULL || (int)strlen(name) > symbolOopDesc::max_length()) { - // It's impossible to create this class; the name cannot fit - // into the constant pool. - if (throwError) { - THROW_MSG_0(vmSymbols::java_lang_NoClassDefFoundError(), name); - } else { - THROW_MSG_0(vmSymbols::java_lang_ClassNotFoundException(), name); - } - } - symbolHandle h_name = oopFactory::new_symbol_handle(name, CHECK_NULL); - Handle h_loader(THREAD, JNIHandles::resolve(loader)); - jclass result = find_class_from_class_loader(env, h_name, init, h_loader, - Handle(), throwError, thread); - - if (TraceClassResolution && result != NULL) { - trace_class_resolution(java_lang_Class::as_klassOop(JNIHandles::resolve_non_null(result))); - } - - return result; + return jvm_find_class_from_class_loader(env, name, init, loader, + throwError, THREAD); JVM_END diff --git a/hotspot/src/share/vm/prims/jvmtiEnvBase.cpp b/hotspot/src/share/vm/prims/jvmtiEnvBase.cpp index 62591145feb..cae4a6a76d5 100644 --- a/hotspot/src/share/vm/prims/jvmtiEnvBase.cpp +++ b/hotspot/src/share/vm/prims/jvmtiEnvBase.cpp @@ -121,7 +121,7 @@ JvmtiEnvBase::JvmtiEnvBase() : _env_event_enable() { JvmtiEventController::env_initialize((JvmtiEnv*)this); #ifdef JVMTI_TRACE - _jvmti_external.functions = strlen(TraceJVMTI)? &jvmtiTrace_Interface : &jvmti_Interface; + _jvmti_external.functions = TraceJVMTI != NULL ? &jvmtiTrace_Interface : &jvmti_Interface; #else _jvmti_external.functions = &jvmti_Interface; #endif diff --git a/hotspot/src/share/vm/prims/jvmtiExport.cpp b/hotspot/src/share/vm/prims/jvmtiExport.cpp index 7bb33674d87..a3894b3d66e 100644 --- a/hotspot/src/share/vm/prims/jvmtiExport.cpp +++ b/hotspot/src/share/vm/prims/jvmtiExport.cpp @@ -2433,18 +2433,7 @@ JvmtiGCMarker::JvmtiGCMarker(bool full) : _full(full), _invocation_count(0) { // so we record the number of collections so that it can be checked in // the destructor. if (!_full) { - if (Universe::heap()->kind() == CollectedHeap::GenCollectedHeap) { - GenCollectedHeap* gch = GenCollectedHeap::heap(); - assert(gch->n_gens() == 2, "configuration not recognized"); - _invocation_count = (unsigned int)gch->get_gen(1)->stat_record()->invocations; - } else { -#ifndef SERIALGC - assert(Universe::heap()->kind() == CollectedHeap::ParallelScavengeHeap, "checking"); - _invocation_count = PSMarkSweep::total_invocations(); -#else // SERIALGC - fatal("SerialGC only supported in this configuration."); -#endif // SERIALGC - } + _invocation_count = Universe::heap()->total_full_collections(); } // Do clean up tasks that need to be done at a safepoint @@ -2466,20 +2455,7 @@ JvmtiGCMarker::~JvmtiGCMarker() { // generation but could have ended up doing a "full" GC - check the // GC count to see. if (!_full) { - if (Universe::heap()->kind() == CollectedHeap::GenCollectedHeap) { - GenCollectedHeap* gch = GenCollectedHeap::heap(); - if (_invocation_count != (unsigned int)gch->get_gen(1)->stat_record()->invocations) { - _full = true; - } - } else { -#ifndef SERIALGC - if (_invocation_count != PSMarkSweep::total_invocations()) { - _full = true; - } -#else // SERIALGC - fatal("SerialGC only supported in this configuration."); -#endif // SERIALGC - } + _full = (_invocation_count != Universe::heap()->total_full_collections()); } // Full collection probably means the perm generation has been GC'ed diff --git a/hotspot/src/share/vm/prims/jvmtiTagMap.cpp b/hotspot/src/share/vm/prims/jvmtiTagMap.cpp index 96db99c4112..152789815af 100644 --- a/hotspot/src/share/vm/prims/jvmtiTagMap.cpp +++ b/hotspot/src/share/vm/prims/jvmtiTagMap.cpp @@ -400,16 +400,28 @@ MemRegion JvmtiTagMap::_young_gen; // get the memory region used for the young generation void JvmtiTagMap::get_young_generation() { - if (Universe::heap()->kind() == CollectedHeap::GenCollectedHeap) { - GenCollectedHeap* gch = GenCollectedHeap::heap(); - _young_gen = gch->get_gen(0)->reserved(); - } else { + CollectedHeap* ch = Universe::heap(); + switch (ch->kind()) { + case (CollectedHeap::GenCollectedHeap): { + _young_gen = ((GenCollectedHeap*)ch)->get_gen(0)->reserved(); + break; + } #ifndef SERIALGC - ParallelScavengeHeap* psh = ParallelScavengeHeap::heap(); - _young_gen= psh->young_gen()->reserved(); -#else // SERIALGC - fatal("SerialGC only supported in this configuration."); -#endif // SERIALGC + case (CollectedHeap::ParallelScavengeHeap): { + _young_gen = ((ParallelScavengeHeap*)ch)->young_gen()->reserved(); + break; + } + case (CollectedHeap::G1CollectedHeap): { + // Until a more satisfactory solution is implemented, all + // oops in the tag map will require rehash at each gc. + // This is a correct, if extremely inefficient solution. + // See RFE 6621729 for related commentary. + _young_gen = ch->reserved_region(); + break; + } +#endif // !SERIALGC + default: + ShouldNotReachHere(); } } diff --git a/hotspot/src/share/vm/prims/jvmtiTrace.cpp b/hotspot/src/share/vm/prims/jvmtiTrace.cpp index 2fff2cfc2da..09d30fbda16 100644 --- a/hotspot/src/share/vm/prims/jvmtiTrace.cpp +++ b/hotspot/src/share/vm/prims/jvmtiTrace.cpp @@ -73,7 +73,7 @@ void JvmtiTrace::initialize() { const char *very_end; const char *curr; - if (strlen(TraceJVMTI)) { + if (TraceJVMTI != NULL) { curr = TraceJVMTI; } else { curr = ""; // hack in fixed tracing here diff --git a/hotspot/src/share/vm/prims/unsafe.cpp b/hotspot/src/share/vm/prims/unsafe.cpp index dbdbcead8fc..c17e0214841 100644 --- a/hotspot/src/share/vm/prims/unsafe.cpp +++ b/hotspot/src/share/vm/prims/unsafe.cpp @@ -891,6 +891,7 @@ UNSAFE_ENTRY(jboolean, Unsafe_CompareAndSwapObject(JNIEnv *env, jobject unsafe, oop e = JNIHandles::resolve(e_h); oop p = JNIHandles::resolve(obj); HeapWord* addr = (HeapWord *)index_oop_from_field_offset_long(p, offset); + update_barrier_set_pre((void*)addr, e); oop res = oopDesc::atomic_compare_exchange_oop(x, addr, e); jboolean success = (res == e); if (success) diff --git a/hotspot/src/share/vm/runtime/aprofiler.hpp b/hotspot/src/share/vm/runtime/aprofiler.hpp index 3a7b9b01ba2..bb0d1f95c56 100644 --- a/hotspot/src/share/vm/runtime/aprofiler.hpp +++ b/hotspot/src/share/vm/runtime/aprofiler.hpp @@ -33,6 +33,7 @@ class AllocationProfiler: AllStatic { friend class GenCollectedHeap; + friend class G1CollectedHeap; friend class MarkSweep; private: static bool _active; // tells whether profiler is active diff --git a/hotspot/src/share/vm/runtime/arguments.cpp b/hotspot/src/share/vm/runtime/arguments.cpp index 62e764d1a7a..b8bf9f1cd9c 100644 --- a/hotspot/src/share/vm/runtime/arguments.cpp +++ b/hotspot/src/share/vm/runtime/arguments.cpp @@ -947,18 +947,17 @@ static void no_shared_spaces() { // UseParNewGC and not explicitly set ParallelGCThreads we // set it, unless this is a single cpu machine. void Arguments::set_parnew_gc_flags() { - assert(!UseSerialGC && !UseParallelGC, "control point invariant"); + assert(!UseSerialGC && !UseParallelGC && !UseG1GC, + "control point invariant"); + assert(UseParNewGC, "Error"); // Turn off AdaptiveSizePolicy by default for parnew until it is // complete. - if (UseParNewGC && - FLAG_IS_DEFAULT(UseAdaptiveSizePolicy)) { + if (FLAG_IS_DEFAULT(UseAdaptiveSizePolicy)) { FLAG_SET_DEFAULT(UseAdaptiveSizePolicy, false); } - if (FLAG_IS_DEFAULT(UseParNewGC) && ParallelGCThreads > 1) { - FLAG_SET_DEFAULT(UseParNewGC, true); - } else if (UseParNewGC && ParallelGCThreads == 0) { + if (ParallelGCThreads == 0) { FLAG_SET_DEFAULT(ParallelGCThreads, Abstract_VM_Version::parallel_worker_threads()); if (FLAG_IS_DEFAULT(ParallelGCThreads) && ParallelGCThreads == 1) { @@ -994,15 +993,12 @@ void Arguments::set_parnew_gc_flags() { // further optimization and tuning efforts, and would almost // certainly gain from analysis of platform and environment. void Arguments::set_cms_and_parnew_gc_flags() { - if (UseSerialGC || UseParallelGC) { - return; - } - + assert(!UseSerialGC && !UseParallelGC, "Error"); assert(UseConcMarkSweepGC, "CMS is expected to be on here"); // If we are using CMS, we prefer to UseParNewGC, // unless explicitly forbidden. - if (!UseParNewGC && FLAG_IS_DEFAULT(UseParNewGC)) { + if (FLAG_IS_DEFAULT(UseParNewGC)) { FLAG_SET_ERGO(bool, UseParNewGC, true); } @@ -1182,6 +1178,7 @@ void Arguments::set_ergonomics_flags() { // machine class and automatic selection policy. if (!UseSerialGC && !UseConcMarkSweepGC && + !UseG1GC && !UseParNewGC && !DumpSharedSpaces && FLAG_IS_DEFAULT(UseParallelGC)) { @@ -1200,9 +1197,13 @@ void Arguments::set_ergonomics_flags() { // Check that UseCompressedOops can be set with the max heap size allocated // by ergonomics. if (MaxHeapSize <= max_heap_for_compressed_oops()) { - if (FLAG_IS_DEFAULT(UseCompressedOops)) { + if (FLAG_IS_DEFAULT(UseCompressedOops) && !UseG1GC) { // Turn off until bug is fixed. + // the following line to return it to default status. // FLAG_SET_ERGO(bool, UseCompressedOops, true); + } else if (UseCompressedOops && UseG1GC) { + warning(" UseCompressedOops does not currently work with UseG1GC; switching off UseCompressedOops. "); + FLAG_SET_DEFAULT(UseCompressedOops, false); } #ifdef _WIN64 if (UseLargePages && UseCompressedOops) { @@ -1213,8 +1214,7 @@ void Arguments::set_ergonomics_flags() { #endif // _WIN64 } else { if (UseCompressedOops && !FLAG_IS_DEFAULT(UseCompressedOops)) { - // If specified, give a warning - warning( "Max heap size too large for Compressed Oops"); + warning("Max heap size too large for Compressed Oops"); FLAG_SET_DEFAULT(UseCompressedOops, false); } } @@ -1224,6 +1224,7 @@ void Arguments::set_ergonomics_flags() { } void Arguments::set_parallel_gc_flags() { + assert(UseParallelGC || UseParallelOldGC, "Error"); // If parallel old was requested, automatically enable parallel scavenge. if (UseParallelOldGC && !UseParallelGC && FLAG_IS_DEFAULT(UseParallelGC)) { FLAG_SET_DEFAULT(UseParallelGC, true); @@ -1235,51 +1236,8 @@ void Arguments::set_parallel_gc_flags() { FLAG_SET_ERGO(uintx, ParallelGCThreads, Abstract_VM_Version::parallel_worker_threads()); - if (FLAG_IS_DEFAULT(MaxHeapSize)) { - const uint64_t reasonable_fraction = - os::physical_memory() / DefaultMaxRAMFraction; - const uint64_t maximum_size = (uint64_t) - (FLAG_IS_DEFAULT(DefaultMaxRAM) && UseCompressedOops ? - MIN2(max_heap_for_compressed_oops(), DefaultMaxRAM) : - DefaultMaxRAM); - size_t reasonable_max = - (size_t) os::allocatable_physical_memory(reasonable_fraction); - if (reasonable_max > maximum_size) { - reasonable_max = maximum_size; - } - if (PrintGCDetails && Verbose) { - // Cannot use gclog_or_tty yet. - tty->print_cr(" Max heap size for server class platform " - SIZE_FORMAT, reasonable_max); - } - // If the initial_heap_size has not been set with -Xms, - // then set it as fraction of size of physical memory - // respecting the maximum and minimum sizes of the heap. - if (initial_heap_size() == 0) { - const uint64_t reasonable_initial_fraction = - os::physical_memory() / DefaultInitialRAMFraction; - const size_t reasonable_initial = - (size_t) os::allocatable_physical_memory(reasonable_initial_fraction); - const size_t minimum_size = NewSize + OldSize; - set_initial_heap_size(MAX2(MIN2(reasonable_initial, reasonable_max), - minimum_size)); - // Currently the minimum size and the initial heap sizes are the same. - set_min_heap_size(initial_heap_size()); - if (PrintGCDetails && Verbose) { - // Cannot use gclog_or_tty yet. - tty->print_cr(" Initial heap size for server class platform " - SIZE_FORMAT, initial_heap_size()); - } - } else { - // An minimum size was specified on the command line. Be sure - // that the maximum size is consistent. - if (initial_heap_size() > reasonable_max) { - reasonable_max = initial_heap_size(); - } - } - FLAG_SET_ERGO(uintx, MaxHeapSize, (uintx) reasonable_max); - } - + // PS is a server collector, setup the heap sizes accordingly. + set_server_heap_size(); // If InitialSurvivorRatio or MinSurvivorRatio were not specified, but the // SurvivorRatio has been set, reset their default values to SurvivorRatio + // 2. By doing this we make SurvivorRatio also work for Parallel Scavenger. @@ -1307,6 +1265,70 @@ void Arguments::set_parallel_gc_flags() { } } +void Arguments::set_g1_gc_flags() { + assert(UseG1GC, "Error"); + // G1 is a server collector, setup the heap sizes accordingly. + set_server_heap_size(); +#ifdef COMPILER1 + FastTLABRefill = false; +#endif + FLAG_SET_DEFAULT(ParallelGCThreads, + Abstract_VM_Version::parallel_worker_threads()); + if (ParallelGCThreads == 0) { + FLAG_SET_DEFAULT(ParallelGCThreads, + Abstract_VM_Version::parallel_worker_threads +()); + } + no_shared_spaces(); +} + +void Arguments::set_server_heap_size() { + if (FLAG_IS_DEFAULT(MaxHeapSize)) { + const uint64_t reasonable_fraction = + os::physical_memory() / DefaultMaxRAMFraction; + const uint64_t maximum_size = (uint64_t) + (FLAG_IS_DEFAULT(DefaultMaxRAM) && UseCompressedOops ? + MIN2(max_heap_for_compressed_oops(), DefaultMaxRAM) : + DefaultMaxRAM); + size_t reasonable_max = + (size_t) os::allocatable_physical_memory(reasonable_fraction); + if (reasonable_max > maximum_size) { + reasonable_max = maximum_size; + } + if (PrintGCDetails && Verbose) { + // Cannot use gclog_or_tty yet. + tty->print_cr(" Max heap size for server class platform " + SIZE_FORMAT, reasonable_max); + } + // If the initial_heap_size has not been set with -Xms, + // then set it as fraction of size of physical memory + // respecting the maximum and minimum sizes of the heap. + if (initial_heap_size() == 0) { + const uint64_t reasonable_initial_fraction = + os::physical_memory() / DefaultInitialRAMFraction; + const size_t reasonable_initial = + (size_t) os::allocatable_physical_memory(reasonable_initial_fraction); + const size_t minimum_size = NewSize + OldSize; + set_initial_heap_size(MAX2(MIN2(reasonable_initial, reasonable_max), + minimum_size)); + // Currently the minimum size and the initial heap sizes are the same. + set_min_heap_size(initial_heap_size()); + if (PrintGCDetails && Verbose) { + // Cannot use gclog_or_tty yet. + tty->print_cr(" Initial heap size for server class platform " + SIZE_FORMAT, initial_heap_size()); + } + } else { + // A minimum size was specified on the command line. Be sure + // that the maximum size is consistent. + if (initial_heap_size() > reasonable_max) { + reasonable_max = initial_heap_size(); + } + } + FLAG_SET_ERGO(uintx, MaxHeapSize, (uintx) reasonable_max); + } +} + // This must be called after ergonomics because we want bytecode rewriting // if the server compiler is used, or if UseSharedSpaces is disabled. void Arguments::set_bytecode_flags() { @@ -1393,12 +1415,13 @@ static void set_serial_gc_flags() { FLAG_SET_DEFAULT(UseConcMarkSweepGC, false); FLAG_SET_DEFAULT(UseParallelGC, false); FLAG_SET_DEFAULT(UseParallelOldGC, false); + FLAG_SET_DEFAULT(UseG1GC, false); } static bool verify_serial_gc_flags() { return (UseSerialGC && - !(UseParNewGC || UseConcMarkSweepGC || UseParallelGC || - UseParallelOldGC)); + !(UseParNewGC || UseConcMarkSweepGC || UseG1GC || + UseParallelGC || UseParallelOldGC)); } // Check consistency of GC selection @@ -1501,8 +1524,8 @@ bool Arguments::check_vm_args_consistency() { status = status && verify_percentage(GCHeapFreeLimit, "GCHeapFreeLimit"); // Check user specified sharing option conflict with Parallel GC - bool cannot_share = (UseConcMarkSweepGC || UseParallelGC || - UseParallelOldGC || UseParNewGC || + bool cannot_share = (UseConcMarkSweepGC || UseG1GC || UseParNewGC || + UseParallelGC || UseParallelOldGC || SOLARIS_ONLY(UseISM) NOT_SOLARIS(UseLargePages)); if (cannot_share) { @@ -1542,11 +1565,6 @@ bool Arguments::check_vm_args_consistency() { "The CMS collector (-XX:+UseConcMarkSweepGC) must be " "selected in order\nto use CMSIncrementalMode.\n"); status = false; - } else if (!UseTLAB) { - jio_fprintf(defaultStream::error_stream(), - "error: CMSIncrementalMode requires thread-local " - "allocation buffers\n(-XX:+UseTLAB).\n"); - status = false; } else { status = status && verify_percentage(CMSIncrementalDutyCycle, "CMSIncrementalDutyCycle"); @@ -1566,13 +1584,6 @@ bool Arguments::check_vm_args_consistency() { } } - if (UseNUMA && !UseTLAB) { - jio_fprintf(defaultStream::error_stream(), - "error: NUMA allocator (-XX:+UseNUMA) requires thread-local " - "allocation\nbuffers (-XX:+UseTLAB).\n"); - status = false; - } - // CMS space iteration, which FLSVerifyAllHeapreferences entails, // insists that we hold the requisite locks so that the iteration is // MT-safe. For the verification at start-up and shut-down, we don't @@ -2361,10 +2372,15 @@ jint Arguments::finalize_vm_init_args(SysClassPath* scp_p, bool scp_assembly_req SOLARIS_ONLY(FLAG_SET_DEFAULT(UseMPSS, false)); SOLARIS_ONLY(FLAG_SET_DEFAULT(UseISM, false)); } + #else if (!FLAG_IS_DEFAULT(OptoLoopAlignment) && FLAG_IS_DEFAULT(MaxLoopPad)) { FLAG_SET_DEFAULT(MaxLoopPad, OptoLoopAlignment-1); } + // Temporary disable bulk zeroing reduction with G1. See CR 6627983. + if (UseG1GC) { + FLAG_SET_DEFAULT(ReduceBulkZeroing, false); + } #endif if (!check_vm_args_consistency()) { @@ -2519,12 +2535,29 @@ jint Arguments::parse(const JavaVMInitArgs* args) { } } + // Parse JavaVMInitArgs structure passed in, as well as JAVA_TOOL_OPTIONS and _JAVA_OPTIONS jint result = parse_vm_init_args(args); if (result != JNI_OK) { return result; } + // These are hacks until G1 is fully supported and tested + // but lets you force -XX:+UseG1GC in PRT and get it where it (mostly) works + if (UseG1GC) { + if (UseConcMarkSweepGC || UseParNewGC || UseParallelGC || UseParallelOldGC || UseSerialGC) { +#ifndef PRODUCT + tty->print_cr("-XX:+UseG1GC is incompatible with other collectors, using UseG1GC"); +#endif // PRODUCT + UseConcMarkSweepGC = false; + UseParNewGC = false; + UseParallelGC = false; + UseParallelOldGC = false; + UseSerialGC = false; + } + no_shared_spaces(); + } + #ifndef PRODUCT if (TraceBytecodesAt != 0) { TraceBytecodes = true; @@ -2570,6 +2603,12 @@ jint Arguments::parse(const JavaVMInitArgs* args) { // Set some flags for ParNew set_parnew_gc_flags(); } + // Temporary; make the "if" an "else-if" before + // we integrate G1. XXX + if (UseG1GC) { + // Set some flags for garbage-first, if needed. + set_g1_gc_flags(); + } #ifdef SERIALGC assert(verify_serial_gc_flags(), "SerialGC unset"); diff --git a/hotspot/src/share/vm/runtime/arguments.hpp b/hotspot/src/share/vm/runtime/arguments.hpp index 1a7e3849fe4..5dfd4aa78f9 100644 --- a/hotspot/src/share/vm/runtime/arguments.hpp +++ b/hotspot/src/share/vm/runtime/arguments.hpp @@ -294,10 +294,14 @@ class Arguments : AllStatic { // CMS/ParNew garbage collectors static void set_parnew_gc_flags(); static void set_cms_and_parnew_gc_flags(); - // UseParallelGC + // UseParallel[Old]GC static void set_parallel_gc_flags(); + // Garbage-First (UseG1GC) + static void set_g1_gc_flags(); // GC ergonomics static void set_ergonomics_flags(); + // Setup heap size for a server platform + static void set_server_heap_size(); // Based on automatic selection criteria, should the // low pause collector be used. static bool should_auto_select_low_pause_collector(); diff --git a/hotspot/src/share/vm/runtime/globals.cpp b/hotspot/src/share/vm/runtime/globals.cpp index 5adadcb98b5..d4a4b416d4f 100644 --- a/hotspot/src/share/vm/runtime/globals.cpp +++ b/hotspot/src/share/vm/runtime/globals.cpp @@ -28,7 +28,8 @@ RUNTIME_FLAGS(MATERIALIZE_DEVELOPER_FLAG, MATERIALIZE_PD_DEVELOPER_FLAG, \ MATERIALIZE_PRODUCT_FLAG, MATERIALIZE_PD_PRODUCT_FLAG, \ - MATERIALIZE_DIAGNOSTIC_FLAG, MATERIALIZE_NOTPRODUCT_FLAG, \ + MATERIALIZE_DIAGNOSTIC_FLAG, MATERIALIZE_EXPERIMENTAL_FLAG, \ + MATERIALIZE_NOTPRODUCT_FLAG, \ MATERIALIZE_MANAGEABLE_FLAG, MATERIALIZE_PRODUCT_RW_FLAG, \ MATERIALIZE_LP64_PRODUCT_FLAG) @@ -37,12 +38,16 @@ RUNTIME_OS_FLAGS(MATERIALIZE_DEVELOPER_FLAG, MATERIALIZE_PD_DEVELOPER_FLAG, \ MATERIALIZE_DIAGNOSTIC_FLAG, MATERIALIZE_NOTPRODUCT_FLAG) bool Flag::is_unlocker() const { - return strcmp(name, "UnlockDiagnosticVMOptions") == 0; + return strcmp(name, "UnlockDiagnosticVMOptions") == 0 || + strcmp(name, "UnlockExperimentalVMOptions") == 0; + } bool Flag::is_unlocked() const { if (strcmp(kind, "{diagnostic}") == 0) { return UnlockDiagnosticVMOptions; + } else if (strcmp(kind, "{experimental}") == 0) { + return UnlockExperimentalVMOptions; } else { return true; } @@ -125,6 +130,7 @@ void Flag::print_as_flag(outputStream* st) { #define RUNTIME_PRODUCT_FLAG_STRUCT(type, name, value, doc) { #type, XSTR(name), &name, "{product}", DEFAULT }, #define RUNTIME_PD_PRODUCT_FLAG_STRUCT(type, name, doc) { #type, XSTR(name), &name, "{pd product}", DEFAULT }, #define RUNTIME_DIAGNOSTIC_FLAG_STRUCT(type, name, value, doc) { #type, XSTR(name), &name, "{diagnostic}", DEFAULT }, +#define RUNTIME_EXPERIMENTAL_FLAG_STRUCT(type, name, value, doc) { #type, XSTR(name), &name, "{experimental}", DEFAULT }, #define RUNTIME_MANAGEABLE_FLAG_STRUCT(type, name, value, doc) { #type, XSTR(name), &name, "{manageable}", DEFAULT }, #define RUNTIME_PRODUCT_RW_FLAG_STRUCT(type, name, value, doc) { #type, XSTR(name), &name, "{product rw}", DEFAULT }, @@ -172,8 +178,11 @@ void Flag::print_as_flag(outputStream* st) { static Flag flagTable[] = { - RUNTIME_FLAGS(RUNTIME_DEVELOP_FLAG_STRUCT, RUNTIME_PD_DEVELOP_FLAG_STRUCT, RUNTIME_PRODUCT_FLAG_STRUCT, RUNTIME_PD_PRODUCT_FLAG_STRUCT, RUNTIME_DIAGNOSTIC_FLAG_STRUCT, RUNTIME_NOTPRODUCT_FLAG_STRUCT, RUNTIME_MANAGEABLE_FLAG_STRUCT, RUNTIME_PRODUCT_RW_FLAG_STRUCT, RUNTIME_LP64_PRODUCT_FLAG_STRUCT) + RUNTIME_FLAGS(RUNTIME_DEVELOP_FLAG_STRUCT, RUNTIME_PD_DEVELOP_FLAG_STRUCT, RUNTIME_PRODUCT_FLAG_STRUCT, RUNTIME_PD_PRODUCT_FLAG_STRUCT, RUNTIME_DIAGNOSTIC_FLAG_STRUCT, RUNTIME_EXPERIMENTAL_FLAG_STRUCT, RUNTIME_NOTPRODUCT_FLAG_STRUCT, RUNTIME_MANAGEABLE_FLAG_STRUCT, RUNTIME_PRODUCT_RW_FLAG_STRUCT, RUNTIME_LP64_PRODUCT_FLAG_STRUCT) RUNTIME_OS_FLAGS(RUNTIME_DEVELOP_FLAG_STRUCT, RUNTIME_PD_DEVELOP_FLAG_STRUCT, RUNTIME_PRODUCT_FLAG_STRUCT, RUNTIME_PD_PRODUCT_FLAG_STRUCT, RUNTIME_DIAGNOSTIC_FLAG_STRUCT, RUNTIME_NOTPRODUCT_FLAG_STRUCT) +#ifndef SERIALGC + G1_FLAGS(RUNTIME_DEVELOP_FLAG_STRUCT, RUNTIME_PD_DEVELOP_FLAG_STRUCT, RUNTIME_PRODUCT_FLAG_STRUCT, RUNTIME_PD_PRODUCT_FLAG_STRUCT, RUNTIME_DIAGNOSTIC_FLAG_STRUCT, RUNTIME_EXPERIMENTAL_FLAG_STRUCT, RUNTIME_NOTPRODUCT_FLAG_STRUCT, RUNTIME_MANAGEABLE_FLAG_STRUCT, RUNTIME_PRODUCT_RW_FLAG_STRUCT) +#endif // SERIALGC #ifdef COMPILER1 C1_FLAGS(C1_DEVELOP_FLAG_STRUCT, C1_PD_DEVELOP_FLAG_STRUCT, C1_PRODUCT_FLAG_STRUCT, C1_PD_PRODUCT_FLAG_STRUCT, C1_NOTPRODUCT_FLAG_STRUCT) #endif @@ -196,7 +205,8 @@ Flag* Flag::find_flag(char* name, size_t length) { for (Flag* current = &flagTable[0]; current->name; current++) { if (str_equal(current->name, name, length)) { if (!(current->is_unlocked() || current->is_unlocker())) { - // disable use of diagnostic flags until they are unlocked + // disable use of diagnostic or experimental flags until they + // are explicitly unlocked return NULL; } return current; @@ -355,8 +365,11 @@ bool CommandLineFlags::ccstrAtPut(char* name, size_t len, ccstr* value, FlagValu if (result == NULL) return false; if (!result->is_ccstr()) return false; ccstr old_value = result->get_ccstr(); - char* new_value = NEW_C_HEAP_ARRAY(char, strlen(*value)+1); - strcpy(new_value, *value); + char* new_value = NULL; + if (*value != NULL) { + new_value = NEW_C_HEAP_ARRAY(char, strlen(*value)+1); + strcpy(new_value, *value); + } result->set_ccstr(new_value); if (result->origin == DEFAULT && old_value != NULL) { // Prior value is NOT heap allocated, but was a literal constant. diff --git a/hotspot/src/share/vm/runtime/globals.hpp b/hotspot/src/share/vm/runtime/globals.hpp index 4f4b9b54bcd..cba2b991403 100644 --- a/hotspot/src/share/vm/runtime/globals.hpp +++ b/hotspot/src/share/vm/runtime/globals.hpp @@ -255,7 +255,19 @@ class CommandLineFlags { // diagnostic information about VM problems. To use a VM diagnostic // option, you must first specify +UnlockDiagnosticVMOptions. // (This master switch also affects the behavior of -Xprintflags.) - +// +// experimental flags are in support of features that are not +// part of the officially supported product, but are available +// for experimenting with. They could, for example, be performance +// features that may not have undergone full or rigorous QA, but which may +// help performance in some cases and released for experimentation +// by the community of users and developers. This flag also allows one to +// be able to build a fully supported product that nonetheless also +// ships with some unsupported, lightly tested, experimental features. +// Like the UnlockDiagnosticVMOptions flag above, there is a corresponding +// UnlockExperimentalVMOptions flag, which allows the control and +// modification of the experimental flags. +// // manageable flags are writeable external product flags. // They are dynamically writeable through the JDK management interface // (com.sun.management.HotSpotDiagnosticMXBean API) and also through JConsole. @@ -285,7 +297,7 @@ class CommandLineFlags { // Note that when there is a need to support develop flags to be writeable, // it can be done in the same way as product_rw. -#define RUNTIME_FLAGS(develop, develop_pd, product, product_pd, diagnostic, notproduct, manageable, product_rw, lp64_product) \ +#define RUNTIME_FLAGS(develop, develop_pd, product, product_pd, diagnostic, experimental, notproduct, manageable, product_rw, lp64_product) \ \ lp64_product(bool, UseCompressedOops, false, \ "Use 32-bit object references in 64-bit VM. " \ @@ -307,7 +319,10 @@ class CommandLineFlags { "Prints flags that appeared on the command line") \ \ diagnostic(bool, UnlockDiagnosticVMOptions, trueInDebug, \ - "Enable processing of flags relating to field diagnostics") \ + "Enable normal processing of flags relating to field diagnostics")\ + \ + experimental(bool, UnlockExperimentalVMOptions, false, \ + "Enable normal processing of flags relating to experimental features")\ \ product(bool, JavaMonitorsInStackTrace, true, \ "Print info. about Java monitor locks when the stacks are dumped")\ @@ -315,6 +330,12 @@ class CommandLineFlags { product_pd(bool, UseLargePages, \ "Use large page memory") \ \ + product_pd(bool, UseLargePagesIndividualAllocation, \ + "Allocate large pages individually for better affinity") \ + \ + develop(bool, LargePagesIndividualAllocationInjectError, false, \ + "Fail large pages individual allocation") \ + \ develop(bool, TracePageSizes, false, \ "Trace page size selection and usage.") \ \ @@ -692,7 +713,7 @@ class CommandLineFlags { diagnostic(bool, PrintAssembly, false, \ "Print assembly code (using external disassembler.so)") \ \ - diagnostic(ccstr, PrintAssemblyOptions, false, \ + diagnostic(ccstr, PrintAssemblyOptions, NULL, \ "Options string passed to disassembler.so") \ \ diagnostic(bool, PrintNMethods, false, \ @@ -833,7 +854,7 @@ class CommandLineFlags { "Use LWP-based instead of libthread-based synchronization " \ "(SPARC only)") \ \ - product(ccstr, SyncKnobs, "", \ + product(ccstr, SyncKnobs, NULL, \ "(Unstable) Various monitor synchronization tunables") \ \ product(intx, EmitSync, 0, \ @@ -976,6 +997,12 @@ class CommandLineFlags { product(bool, UseXmmI2F, false, \ "Use SSE2 CVTDQ2PS instruction to convert Integer to Float") \ \ + product(bool, UseXMMForArrayCopy, false, \ + "Use SSE2 MOVQ instruction for Arraycopy") \ + \ + product(bool, UseUnalignedLoadStores, false, \ + "Use SSE2 MOVDQU instruction for Arraycopy") \ + \ product(intx, FieldsAllocationStyle, 1, \ "0 - type based with oops first, 1 - with oops last") \ \ @@ -1017,7 +1044,7 @@ class CommandLineFlags { notproduct(bool, TraceJVMCalls, false, \ "Trace JVM calls") \ \ - product(ccstr, TraceJVMTI, "", \ + product(ccstr, TraceJVMTI, NULL, \ "Trace flags for JVMTI functions and events") \ \ /* This option can change an EMCP method into an obsolete method. */ \ @@ -1124,7 +1151,10 @@ class CommandLineFlags { /* gc */ \ \ product(bool, UseSerialGC, false, \ - "Tells whether the VM should use serial garbage collector") \ + "Use the serial garbage collector") \ + \ + experimental(bool, UseG1GC, false, \ + "Use the Garbage-First garbage collector") \ \ product(bool, UseParallelGC, false, \ "Use the Parallel Scavenge garbage collector") \ @@ -1139,10 +1169,6 @@ class CommandLineFlags { "In the Parallel Old garbage collector use parallel dense" \ " prefix update") \ \ - develop(bool, UseParallelOldGCChunkPointerCalc, true, \ - "In the Parallel Old garbage collector use chucks to calculate" \ - " new object locations") \ - \ product(uintx, HeapMaximumCompactionInterval, 20, \ "How often should we maximally compact the heap (not allowing " \ "any dead space)") \ @@ -1171,21 +1197,17 @@ class CommandLineFlags { product(uintx, ParallelCMSThreads, 0, \ "Max number of threads CMS will use for concurrent work") \ \ - develop(bool, VerifyParallelOldWithMarkSweep, false, \ - "Use the MarkSweep code to verify phases of Parallel Old") \ - \ - develop(uintx, VerifyParallelOldWithMarkSweepInterval, 1, \ - "Interval at which the MarkSweep code is used to verify " \ - "phases of Parallel Old") \ - \ develop(bool, ParallelOldMTUnsafeMarkBitMap, false, \ "Use the Parallel Old MT unsafe in marking the bitmap") \ \ develop(bool, ParallelOldMTUnsafeUpdateLiveData, false, \ "Use the Parallel Old MT unsafe in update of live size") \ \ - develop(bool, TraceChunkTasksQueuing, false, \ - "Trace the queuing of the chunk tasks") \ + develop(bool, TraceRegionTasksQueuing, false, \ + "Trace the queuing of the region tasks") \ + \ + product(uintx, ParallelMarkingThreads, 0, \ + "Number of marking threads concurrent gc will use") \ \ product(uintx, YoungPLABSize, 4096, \ "Size of young gen promotion labs (in HeapWords)") \ @@ -1283,6 +1305,12 @@ class CommandLineFlags { "The amount of young gen chosen by default per GC worker " \ "thread available ") \ \ + product(bool, GCOverheadReporting, false, \ + "Enables the GC overhead reporting facility") \ + \ + product(intx, GCOverheadReportingPeriodMS, 100, \ + "Reporting period for conc GC overhead reporting, in ms ") \ + \ product(bool, CMSIncrementalMode, false, \ "Whether CMS GC should operate in \"incremental\" mode") \ \ @@ -1611,6 +1639,9 @@ class CommandLineFlags { product(bool, ZeroTLAB, false, \ "Zero out the newly created TLAB") \ \ + product(bool, FastTLABRefill, true, \ + "Use fast TLAB refill code") \ + \ product(bool, PrintTLAB, false, \ "Print various TLAB related information") \ \ @@ -1800,6 +1831,9 @@ class CommandLineFlags { diagnostic(bool, VerifyDuringGC, false, \ "Verify memory system during GC (between phases)") \ \ + diagnostic(bool, GCParallelVerificationEnabled, true, \ + "Enable parallel memory system verification") \ + \ diagnostic(bool, VerifyRememberedSets, false, \ "Verify GC remembered sets") \ \ @@ -2527,7 +2561,7 @@ class CommandLineFlags { develop(intx, MaxRecursiveInlineLevel, 1, \ "maximum number of nested recursive calls that are inlined") \ \ - develop(intx, InlineSmallCode, 1000, \ + product(intx, InlineSmallCode, 1000, \ "Only inline already compiled methods if their code size is " \ "less than this") \ \ @@ -2811,6 +2845,12 @@ class CommandLineFlags { "how many entries we'll try to leave on the stack during " \ "parallel GC") \ \ + product(intx, DCQBarrierQueueBufferSize, 256, \ + "Number of elements in a dirty card queue buffer") \ + \ + product(intx, DCQBarrierProcessCompletedThreshold, 5, \ + "Number of completed dirty card buffers to trigger processing.") \ + \ /* stack parameters */ \ product_pd(intx, StackYellowPages, \ "Number of yellow zone (recoverable overflows) pages") \ @@ -3232,6 +3272,7 @@ class CommandLineFlags { #define DECLARE_PRODUCT_FLAG(type, name, value, doc) extern "C" type name; #define DECLARE_PD_PRODUCT_FLAG(type, name, doc) extern "C" type name; #define DECLARE_DIAGNOSTIC_FLAG(type, name, value, doc) extern "C" type name; +#define DECLARE_EXPERIMENTAL_FLAG(type, name, value, doc) extern "C" type name; #define DECLARE_MANAGEABLE_FLAG(type, name, value, doc) extern "C" type name; #define DECLARE_PRODUCT_RW_FLAG(type, name, value, doc) extern "C" type name; #ifdef PRODUCT @@ -3254,6 +3295,7 @@ class CommandLineFlags { #define MATERIALIZE_PRODUCT_FLAG(type, name, value, doc) type name = value; #define MATERIALIZE_PD_PRODUCT_FLAG(type, name, doc) type name = pd_##name; #define MATERIALIZE_DIAGNOSTIC_FLAG(type, name, value, doc) type name = value; +#define MATERIALIZE_EXPERIMENTAL_FLAG(type, name, value, doc) type name = value; #define MATERIALIZE_MANAGEABLE_FLAG(type, name, value, doc) type name = value; #define MATERIALIZE_PRODUCT_RW_FLAG(type, name, value, doc) type name = value; #ifdef PRODUCT @@ -3271,6 +3313,6 @@ class CommandLineFlags { #define MATERIALIZE_LP64_PRODUCT_FLAG(type, name, value, doc) /* flag is constant */ #endif // _LP64 -RUNTIME_FLAGS(DECLARE_DEVELOPER_FLAG, DECLARE_PD_DEVELOPER_FLAG, DECLARE_PRODUCT_FLAG, DECLARE_PD_PRODUCT_FLAG, DECLARE_DIAGNOSTIC_FLAG, DECLARE_NOTPRODUCT_FLAG, DECLARE_MANAGEABLE_FLAG, DECLARE_PRODUCT_RW_FLAG, DECLARE_LP64_PRODUCT_FLAG) +RUNTIME_FLAGS(DECLARE_DEVELOPER_FLAG, DECLARE_PD_DEVELOPER_FLAG, DECLARE_PRODUCT_FLAG, DECLARE_PD_PRODUCT_FLAG, DECLARE_DIAGNOSTIC_FLAG, DECLARE_EXPERIMENTAL_FLAG, DECLARE_NOTPRODUCT_FLAG, DECLARE_MANAGEABLE_FLAG, DECLARE_PRODUCT_RW_FLAG, DECLARE_LP64_PRODUCT_FLAG) RUNTIME_OS_FLAGS(DECLARE_DEVELOPER_FLAG, DECLARE_PD_DEVELOPER_FLAG, DECLARE_PRODUCT_FLAG, DECLARE_PD_PRODUCT_FLAG, DECLARE_DIAGNOSTIC_FLAG, DECLARE_NOTPRODUCT_FLAG) diff --git a/hotspot/src/share/vm/runtime/globals_extension.hpp b/hotspot/src/share/vm/runtime/globals_extension.hpp index 1d9dc0387ca..b129555ef83 100644 --- a/hotspot/src/share/vm/runtime/globals_extension.hpp +++ b/hotspot/src/share/vm/runtime/globals_extension.hpp @@ -30,6 +30,7 @@ #define RUNTIME_PRODUCT_FLAG_MEMBER(type, name, value, doc) FLAG_MEMBER(name), #define RUNTIME_PD_PRODUCT_FLAG_MEMBER(type, name, doc) FLAG_MEMBER(name), #define RUNTIME_DIAGNOSTIC_FLAG_MEMBER(type, name, value, doc) FLAG_MEMBER(name), +#define RUNTIME_EXPERIMENTAL_FLAG_MEMBER(type, name, value, doc) FLAG_MEMBER(name), #define RUNTIME_MANAGEABLE_FLAG_MEMBER(type, name, value, doc) FLAG_MEMBER(name), #define RUNTIME_PRODUCT_RW_FLAG_MEMBER(type, name, value, doc) FLAG_MEMBER(name), #ifdef PRODUCT @@ -74,21 +75,16 @@ #endif typedef enum { - RUNTIME_FLAGS(RUNTIME_DEVELOP_FLAG_MEMBER, RUNTIME_PD_DEVELOP_FLAG_MEMBER, RUNTIME_PRODUCT_FLAG_MEMBER, - RUNTIME_PD_PRODUCT_FLAG_MEMBER, RUNTIME_DIAGNOSTIC_FLAG_MEMBER, - RUNTIME_NOTPRODUCT_FLAG_MEMBER, RUNTIME_MANAGEABLE_FLAG_MEMBER, - RUNTIME_PRODUCT_RW_FLAG_MEMBER, - RUNTIME_LP64_PRODUCT_FLAG_MEMBER) - RUNTIME_OS_FLAGS(RUNTIME_DEVELOP_FLAG_MEMBER, RUNTIME_PD_DEVELOP_FLAG_MEMBER, RUNTIME_PRODUCT_FLAG_MEMBER, - RUNTIME_PD_PRODUCT_FLAG_MEMBER, RUNTIME_DIAGNOSTIC_FLAG_MEMBER, - RUNTIME_NOTPRODUCT_FLAG_MEMBER) + RUNTIME_FLAGS(RUNTIME_DEVELOP_FLAG_MEMBER, RUNTIME_PD_DEVELOP_FLAG_MEMBER, RUNTIME_PRODUCT_FLAG_MEMBER, RUNTIME_PD_PRODUCT_FLAG_MEMBER, RUNTIME_DIAGNOSTIC_FLAG_MEMBER, RUNTIME_EXPERIMENTAL_FLAG_MEMBER, RUNTIME_NOTPRODUCT_FLAG_MEMBER, RUNTIME_MANAGEABLE_FLAG_MEMBER, RUNTIME_PRODUCT_RW_FLAG_MEMBER, RUNTIME_LP64_PRODUCT_FLAG_MEMBER) + RUNTIME_OS_FLAGS(RUNTIME_DEVELOP_FLAG_MEMBER, RUNTIME_PD_DEVELOP_FLAG_MEMBER, RUNTIME_PRODUCT_FLAG_MEMBER, RUNTIME_PD_PRODUCT_FLAG_MEMBER, RUNTIME_DIAGNOSTIC_FLAG_MEMBER, RUNTIME_NOTPRODUCT_FLAG_MEMBER) +#ifndef KERNEL + G1_FLAGS(RUNTIME_DEVELOP_FLAG_MEMBER, RUNTIME_PD_DEVELOP_FLAG_MEMBER, RUNTIME_PRODUCT_FLAG_MEMBER, RUNTIME_PD_PRODUCT_FLAG_MEMBER, RUNTIME_DIAGNOSTIC_FLAG_MEMBER, RUNTIME_EXPERIMENTAL_FLAG_MEMBER, RUNTIME_NOTPRODUCT_FLAG_MEMBER, RUNTIME_MANAGEABLE_FLAG_MEMBER, RUNTIME_PRODUCT_RW_FLAG_MEMBER) +#endif #ifdef COMPILER1 - C1_FLAGS(C1_DEVELOP_FLAG_MEMBER, C1_PD_DEVELOP_FLAG_MEMBER, C1_PRODUCT_FLAG_MEMBER, C1_PD_PRODUCT_FLAG_MEMBER, - C1_NOTPRODUCT_FLAG_MEMBER) + C1_FLAGS(C1_DEVELOP_FLAG_MEMBER, C1_PD_DEVELOP_FLAG_MEMBER, C1_PRODUCT_FLAG_MEMBER, C1_PD_PRODUCT_FLAG_MEMBER, C1_NOTPRODUCT_FLAG_MEMBER) #endif #ifdef COMPILER2 - C2_FLAGS(C2_DEVELOP_FLAG_MEMBER, C2_PD_DEVELOP_FLAG_MEMBER, C2_PRODUCT_FLAG_MEMBER, C2_PD_PRODUCT_FLAG_MEMBER, - C2_DIAGNOSTIC_FLAG_MEMBER, C2_NOTPRODUCT_FLAG_MEMBER) + C2_FLAGS(C2_DEVELOP_FLAG_MEMBER, C2_PD_DEVELOP_FLAG_MEMBER, C2_PRODUCT_FLAG_MEMBER, C2_PD_PRODUCT_FLAG_MEMBER, C2_DIAGNOSTIC_FLAG_MEMBER, C2_NOTPRODUCT_FLAG_MEMBER) #endif NUM_CommandLineFlag } CommandLineFlag; @@ -100,6 +96,7 @@ typedef enum { #define RUNTIME_PRODUCT_FLAG_MEMBER_WITH_TYPE(type, name, value, doc) FLAG_MEMBER_WITH_TYPE(name,type), #define RUNTIME_PD_PRODUCT_FLAG_MEMBER_WITH_TYPE(type, name, doc) FLAG_MEMBER_WITH_TYPE(name,type), #define RUNTIME_DIAGNOSTIC_FLAG_MEMBER_WITH_TYPE(type, name, value, doc) FLAG_MEMBER_WITH_TYPE(name,type), +#define RUNTIME_EXPERIMENTAL_FLAG_MEMBER_WITH_TYPE(type, name, value, doc) FLAG_MEMBER_WITH_TYPE(name,type), #define RUNTIME_MANAGEABLE_FLAG_MEMBER_WITH_TYPE(type, name, value, doc) FLAG_MEMBER_WITH_TYPE(name,type), #define RUNTIME_PRODUCT_RW_FLAG_MEMBER_WITH_TYPE(type, name, value, doc) FLAG_MEMBER_WITH_TYPE(name,type), #ifdef PRODUCT @@ -144,24 +141,47 @@ typedef enum { #endif typedef enum { - RUNTIME_FLAGS(RUNTIME_DEVELOP_FLAG_MEMBER_WITH_TYPE, RUNTIME_PD_DEVELOP_FLAG_MEMBER_WITH_TYPE, - RUNTIME_PRODUCT_FLAG_MEMBER_WITH_TYPE, RUNTIME_PD_PRODUCT_FLAG_MEMBER_WITH_TYPE, + RUNTIME_FLAGS(RUNTIME_DEVELOP_FLAG_MEMBER_WITH_TYPE, + RUNTIME_PD_DEVELOP_FLAG_MEMBER_WITH_TYPE, + RUNTIME_PRODUCT_FLAG_MEMBER_WITH_TYPE, + RUNTIME_PD_PRODUCT_FLAG_MEMBER_WITH_TYPE, RUNTIME_DIAGNOSTIC_FLAG_MEMBER_WITH_TYPE, + RUNTIME_EXPERIMENTAL_FLAG_MEMBER_WITH_TYPE, RUNTIME_NOTPRODUCT_FLAG_MEMBER_WITH_TYPE, RUNTIME_MANAGEABLE_FLAG_MEMBER_WITH_TYPE, RUNTIME_PRODUCT_RW_FLAG_MEMBER_WITH_TYPE, RUNTIME_LP64_PRODUCT_FLAG_MEMBER_WITH_TYPE) -RUNTIME_OS_FLAGS(RUNTIME_DEVELOP_FLAG_MEMBER_WITH_TYPE, RUNTIME_PD_DEVELOP_FLAG_MEMBER_WITH_TYPE, - RUNTIME_PRODUCT_FLAG_MEMBER_WITH_TYPE, RUNTIME_PD_PRODUCT_FLAG_MEMBER_WITH_TYPE, - RUNTIME_DIAGNOSTIC_FLAG_MEMBER_WITH_TYPE, - RUNTIME_NOTPRODUCT_FLAG_MEMBER_WITH_TYPE) + RUNTIME_OS_FLAGS(RUNTIME_DEVELOP_FLAG_MEMBER_WITH_TYPE, + RUNTIME_PD_DEVELOP_FLAG_MEMBER_WITH_TYPE, + RUNTIME_PRODUCT_FLAG_MEMBER_WITH_TYPE, + RUNTIME_PD_PRODUCT_FLAG_MEMBER_WITH_TYPE, + RUNTIME_DIAGNOSTIC_FLAG_MEMBER_WITH_TYPE, + RUNTIME_NOTPRODUCT_FLAG_MEMBER_WITH_TYPE) +#ifndef KERNEL + G1_FLAGS(RUNTIME_DEVELOP_FLAG_MEMBER_WITH_TYPE, + RUNTIME_PD_DEVELOP_FLAG_MEMBER_WITH_TYPE, + RUNTIME_PRODUCT_FLAG_MEMBER_WITH_TYPE, + RUNTIME_PD_PRODUCT_FLAG_MEMBER_WITH_TYPE, + RUNTIME_DIAGNOSTIC_FLAG_MEMBER_WITH_TYPE, + RUNTIME_EXPERIMENTAL_FLAG_MEMBER_WITH_TYPE, + RUNTIME_NOTPRODUCT_FLAG_MEMBER_WITH_TYPE, + RUNTIME_MANAGEABLE_FLAG_MEMBER_WITH_TYPE, + RUNTIME_PRODUCT_RW_FLAG_MEMBER_WITH_TYPE) +#endif // KERNEL #ifdef COMPILER1 - C1_FLAGS(C1_DEVELOP_FLAG_MEMBER_WITH_TYPE, C1_PD_DEVELOP_FLAG_MEMBER_WITH_TYPE, C1_PRODUCT_FLAG_MEMBER_WITH_TYPE, - C1_PD_PRODUCT_FLAG_MEMBER_WITH_TYPE, C1_NOTPRODUCT_FLAG_MEMBER_WITH_TYPE) + C1_FLAGS(C1_DEVELOP_FLAG_MEMBER_WITH_TYPE, + C1_PD_DEVELOP_FLAG_MEMBER_WITH_TYPE, + C1_PRODUCT_FLAG_MEMBER_WITH_TYPE, + C1_PD_PRODUCT_FLAG_MEMBER_WITH_TYPE, + C1_NOTPRODUCT_FLAG_MEMBER_WITH_TYPE) #endif #ifdef COMPILER2 - C2_FLAGS(C2_DEVELOP_FLAG_MEMBER_WITH_TYPE, C2_PD_DEVELOP_FLAG_MEMBER_WITH_TYPE, C2_PRODUCT_FLAG_MEMBER_WITH_TYPE, - C2_PD_PRODUCT_FLAG_MEMBER_WITH_TYPE, C2_DIAGNOSTIC_FLAG_MEMBER_WITH_TYPE, C2_NOTPRODUCT_FLAG_MEMBER_WITH_TYPE) + C2_FLAGS(C2_DEVELOP_FLAG_MEMBER_WITH_TYPE, + C2_PD_DEVELOP_FLAG_MEMBER_WITH_TYPE, + C2_PRODUCT_FLAG_MEMBER_WITH_TYPE, + C2_PD_PRODUCT_FLAG_MEMBER_WITH_TYPE, + C2_DIAGNOSTIC_FLAG_MEMBER_WITH_TYPE, + C2_NOTPRODUCT_FLAG_MEMBER_WITH_TYPE) #endif NUM_CommandLineFlagWithType } CommandLineFlagWithType; diff --git a/hotspot/src/share/vm/runtime/mutexLocker.cpp b/hotspot/src/share/vm/runtime/mutexLocker.cpp index bc61fd22dc4..c10e08c039a 100644 --- a/hotspot/src/share/vm/runtime/mutexLocker.cpp +++ b/hotspot/src/share/vm/runtime/mutexLocker.cpp @@ -47,7 +47,7 @@ Mutex* JfieldIdCreation_lock = NULL; Monitor* JNICritical_lock = NULL; Mutex* JvmtiThreadState_lock = NULL; Monitor* JvmtiPendingEvent_lock = NULL; -Mutex* Heap_lock = NULL; +Monitor* Heap_lock = NULL; Mutex* ExpandHeap_lock = NULL; Mutex* AdapterHandlerLibrary_lock = NULL; Mutex* SignatureHandlerLibrary_lock = NULL; @@ -67,7 +67,18 @@ Mutex* STS_init_lock = NULL; Monitor* SLT_lock = NULL; Monitor* iCMS_lock = NULL; Monitor* FullGCCount_lock = NULL; +Monitor* CMark_lock = NULL; +Monitor* ZF_mon = NULL; +Monitor* Cleanup_mon = NULL; +Monitor* G1ConcRefine_mon = NULL; +Mutex* SATB_Q_FL_lock = NULL; +Monitor* SATB_Q_CBL_mon = NULL; +Mutex* Shared_SATB_Q_lock = NULL; +Mutex* DirtyCardQ_FL_lock = NULL; +Monitor* DirtyCardQ_CBL_mon = NULL; +Mutex* Shared_DirtyCardQ_lock = NULL; Mutex* ParGCRareEvent_lock = NULL; +Mutex* EvacFailureStack_lock = NULL; Mutex* DerivedPointerTableGC_lock = NULL; Mutex* Compile_lock = NULL; Monitor* MethodCompileQueue_lock = NULL; @@ -102,6 +113,9 @@ Mutex* PerfDataMemAlloc_lock = NULL; Mutex* PerfDataManager_lock = NULL; Mutex* OopMapCacheAlloc_lock = NULL; +Mutex* MMUTracker_lock = NULL; +Mutex* HotCardCache_lock = NULL; + Monitor* GCTaskManager_lock = NULL; Mutex* Management_lock = NULL; @@ -150,6 +164,23 @@ void mutex_init() { def(iCMS_lock , Monitor, special, true ); // CMS incremental mode start/stop notification def(FullGCCount_lock , Monitor, leaf, true ); // in support of ExplicitGCInvokesConcurrent } + if (UseG1GC) { + def(CMark_lock , Monitor, nonleaf, true ); // coordinate concurrent mark thread + def(ZF_mon , Monitor, leaf, true ); + def(Cleanup_mon , Monitor, nonleaf, true ); + def(G1ConcRefine_mon , Monitor, nonleaf, true ); + def(SATB_Q_FL_lock , Mutex , special, true ); + def(SATB_Q_CBL_mon , Monitor, nonleaf, true ); + def(Shared_SATB_Q_lock , Mutex, nonleaf, true ); + + def(DirtyCardQ_FL_lock , Mutex , special, true ); + def(DirtyCardQ_CBL_mon , Monitor, nonleaf, true ); + def(Shared_DirtyCardQ_lock , Mutex, nonleaf, true ); + + def(MMUTracker_lock , Mutex , leaf , true ); + def(HotCardCache_lock , Mutex , special , true ); + def(EvacFailureStack_lock , Mutex , nonleaf , true ); + } def(ParGCRareEvent_lock , Mutex , leaf , true ); def(DerivedPointerTableGC_lock , Mutex, leaf, true ); def(CodeCache_lock , Mutex , special, true ); @@ -203,7 +234,7 @@ void mutex_init() { def(SLT_lock , Monitor, nonleaf, false ); // used in CMS GC for locking PLL lock } - def(Heap_lock , Mutex , nonleaf+1, false); + def(Heap_lock , Monitor, nonleaf+1, false); def(JfieldIdCreation_lock , Mutex , nonleaf+1, true ); // jfieldID, Used in VM_Operation def(JNICachedItableIndex_lock , Mutex , nonleaf+1, false); // Used to cache an itable index during JNI invoke diff --git a/hotspot/src/share/vm/runtime/mutexLocker.hpp b/hotspot/src/share/vm/runtime/mutexLocker.hpp index 244fb33e503..e020f5a829c 100644 --- a/hotspot/src/share/vm/runtime/mutexLocker.hpp +++ b/hotspot/src/share/vm/runtime/mutexLocker.hpp @@ -38,7 +38,7 @@ extern Mutex* JfieldIdCreation_lock; // a lock on creating JNI stati extern Monitor* JNICritical_lock; // a lock used while entering and exiting JNI critical regions, allows GC to sometimes get in extern Mutex* JvmtiThreadState_lock; // a lock on modification of JVMTI thread data extern Monitor* JvmtiPendingEvent_lock; // a lock on the JVMTI pending events list -extern Mutex* Heap_lock; // a lock on the heap +extern Monitor* Heap_lock; // a lock on the heap extern Mutex* ExpandHeap_lock; // a lock on expanding the heap extern Mutex* AdapterHandlerLibrary_lock; // a lock on the AdapterHandlerLibrary extern Mutex* SignatureHandlerLibrary_lock; // a lock on the SignatureHandlerLibrary @@ -60,8 +60,30 @@ extern Mutex* STS_init_lock; // coordinate initialization of extern Monitor* SLT_lock; // used in CMS GC for acquiring PLL extern Monitor* iCMS_lock; // CMS incremental mode start/stop notification extern Monitor* FullGCCount_lock; // in support of "concurrent" full gc +extern Monitor* CMark_lock; // used for concurrent mark thread coordination +extern Monitor* ZF_mon; // used for G1 conc zero-fill. +extern Monitor* Cleanup_mon; // used for G1 conc cleanup. +extern Monitor* G1ConcRefine_mon; // used for G1 conc-refine + // coordination. + +extern Mutex* SATB_Q_FL_lock; // Protects SATB Q + // buffer free list. +extern Monitor* SATB_Q_CBL_mon; // Protects SATB Q + // completed buffer queue. +extern Mutex* Shared_SATB_Q_lock; // Lock protecting SATB + // queue shared by + // non-Java threads. + +extern Mutex* DirtyCardQ_FL_lock; // Protects dirty card Q + // buffer free list. +extern Monitor* DirtyCardQ_CBL_mon; // Protects dirty card Q + // completed buffer queue. +extern Mutex* Shared_DirtyCardQ_lock; // Lock protecting dirty card + // queue shared by + // non-Java threads. // (see option ExplicitGCInvokesConcurrent) extern Mutex* ParGCRareEvent_lock; // Synchronizes various (rare) parallel GC ops. +extern Mutex* EvacFailureStack_lock; // guards the evac failure scan stack extern Mutex* Compile_lock; // a lock held when Compilation is updating code (used to block CodeCache traversal, CHA updates, etc) extern Monitor* MethodCompileQueue_lock; // a lock held when method compilations are enqueued, dequeued #ifdef TIERED @@ -93,6 +115,10 @@ extern Mutex* PerfDataManager_lock; // a long on access to PerfData extern Mutex* ParkerFreeList_lock; extern Mutex* OopMapCacheAlloc_lock; // protects allocation of oop_map caches +extern Mutex* MMUTracker_lock; // protects the MMU + // tracker data structures +extern Mutex* HotCardCache_lock; // protects the hot card cache + extern Mutex* Management_lock; // a lock used to serialize JVM management extern Monitor* LowMemory_lock; // a lock used for low memory detection diff --git a/hotspot/src/share/vm/runtime/os.hpp b/hotspot/src/share/vm/runtime/os.hpp index 8111d3937e4..423cc800529 100644 --- a/hotspot/src/share/vm/runtime/os.hpp +++ b/hotspot/src/share/vm/runtime/os.hpp @@ -105,6 +105,18 @@ class os: AllStatic { static jlong elapsed_counter(); static jlong elapsed_frequency(); + // The "virtual time" of a thread is the amount of time a thread has + // actually run. The first function indicates whether the OS supports + // this functionality for the current thread, and if so: + // * the second enables vtime tracking (if that is required). + // * the third tells whether vtime is enabled. + // * the fourth returns the elapsed virtual time for the current + // thread. + static bool supports_vtime(); + static bool enable_vtime(); + static bool vtime_enabled(); + static double elapsedVTime(); + // Return current local time in a string (YYYY-MM-DD HH:MM:SS). // It is MT safe, but not async-safe, as reading time zone // information may require a lock on some platforms. diff --git a/hotspot/src/share/vm/runtime/sharedRuntime.cpp b/hotspot/src/share/vm/runtime/sharedRuntime.cpp index 39ac102ec00..ff4fe2a4f8e 100644 --- a/hotspot/src/share/vm/runtime/sharedRuntime.cpp +++ b/hotspot/src/share/vm/runtime/sharedRuntime.cpp @@ -111,6 +111,25 @@ void SharedRuntime::print_ic_miss_histogram() { } #endif // PRODUCT +#ifndef SERIALGC + +// G1 write-barrier pre: executed before a pointer store. +JRT_LEAF(void, SharedRuntime::g1_wb_pre(oopDesc* orig, JavaThread *thread)) + if (orig == NULL) { + assert(false, "should be optimized out"); + return; + } + // store the original value that was in the field reference + thread->satb_mark_queue().enqueue(orig); +JRT_END + +// G1 write-barrier post: executed after a pointer store. +JRT_LEAF(void, SharedRuntime::g1_wb_post(void* card_addr, JavaThread* thread)) + thread->dirty_card_queue().enqueue(card_addr); +JRT_END + +#endif // !SERIALGC + JRT_LEAF(jlong, SharedRuntime::lmul(jlong y, jlong x)) return x * y; diff --git a/hotspot/src/share/vm/runtime/sharedRuntime.hpp b/hotspot/src/share/vm/runtime/sharedRuntime.hpp index a785955d44c..d040660dcfa 100644 --- a/hotspot/src/share/vm/runtime/sharedRuntime.hpp +++ b/hotspot/src/share/vm/runtime/sharedRuntime.hpp @@ -99,6 +99,12 @@ class SharedRuntime: AllStatic { static address raw_exception_handler_for_return_address(address return_address); static address exception_handler_for_return_address(address return_address); +#ifndef SERIALGC + // G1 write barriers + static void g1_wb_pre(oopDesc* orig, JavaThread *thread); + static void g1_wb_post(void* card_addr, JavaThread* thread); +#endif // !SERIALGC + // exception handling and implicit exceptions static address compute_compiled_exc_handler(nmethod* nm, address ret_pc, Handle& exception, bool force_unwind, bool top_frame_only); diff --git a/hotspot/src/share/vm/runtime/task.cpp b/hotspot/src/share/vm/runtime/task.cpp index 056f6f33558..6848f76e000 100644 --- a/hotspot/src/share/vm/runtime/task.cpp +++ b/hotspot/src/share/vm/runtime/task.cpp @@ -67,7 +67,6 @@ void PeriodicTask::real_time_tick(size_t delay_time) { PeriodicTask::PeriodicTask(size_t interval_time) : _counter(0), _interval(interval_time) { - assert(is_init_completed(), "Periodic tasks should not start during VM initialization"); // Sanity check the interval time assert(_interval >= PeriodicTask::min_interval && _interval <= PeriodicTask::max_interval && diff --git a/hotspot/src/share/vm/runtime/thread.cpp b/hotspot/src/share/vm/runtime/thread.cpp index 1de74558377..4d97d62c68f 100644 --- a/hotspot/src/share/vm/runtime/thread.cpp +++ b/hotspot/src/share/vm/runtime/thread.cpp @@ -1138,6 +1138,10 @@ void WatcherThread::print_on(outputStream* st) const { void JavaThread::initialize() { // Initialize fields + + // Set the claimed par_id to -1 (ie not claiming any par_ids) + set_claimed_par_id(-1); + set_saved_exception_pc(NULL); set_threadObj(NULL); _anchor.clear(); @@ -1209,7 +1213,18 @@ void JavaThread::initialize() { pd_initialize(); } -JavaThread::JavaThread(bool is_attaching) : Thread() { +#ifndef SERIALGC +SATBMarkQueueSet JavaThread::_satb_mark_queue_set; +DirtyCardQueueSet JavaThread::_dirty_card_queue_set; +#endif // !SERIALGC + +JavaThread::JavaThread(bool is_attaching) : + Thread() +#ifndef SERIALGC + , _satb_mark_queue(&_satb_mark_queue_set), + _dirty_card_queue(&_dirty_card_queue_set) +#endif // !SERIALGC +{ initialize(); _is_attaching = is_attaching; } @@ -1255,7 +1270,13 @@ void JavaThread::block_if_vm_exited() { // Remove this ifdef when C1 is ported to the compiler interface. static void compiler_thread_entry(JavaThread* thread, TRAPS); -JavaThread::JavaThread(ThreadFunction entry_point, size_t stack_sz) : Thread() { +JavaThread::JavaThread(ThreadFunction entry_point, size_t stack_sz) : + Thread() +#ifndef SERIALGC + , _satb_mark_queue(&_satb_mark_queue_set), + _dirty_card_queue(&_dirty_card_queue_set) +#endif // !SERIALGC +{ if (TraceThreadEvents) { tty->print_cr("creating thread %p", this); } @@ -2964,10 +2985,6 @@ jint Threads::create_vm(JavaVMInitArgs* args, bool* canTryAgain) { if (UseStringCache) { // Forcibly initialize java/lang/String and mutate the private // static final "stringCacheEnabled" field before we start creating instances -#ifdef ASSERT - klassOop tmp_k = SystemDictionary::find(vmSymbolHandles::java_lang_String(), Handle(), Handle(), CHECK_0); - assert(tmp_k == NULL, "java/lang/String should not be loaded yet"); -#endif klassOop k_o = SystemDictionary::resolve_or_null(vmSymbolHandles::java_lang_String(), Handle(), Handle(), CHECK_0); KlassHandle k = KlassHandle(THREAD, k_o); guarantee(k.not_null(), "Must find java/lang/String"); @@ -3071,9 +3088,14 @@ jint Threads::create_vm(JavaVMInitArgs* args, bool* canTryAgain) { #ifndef SERIALGC // Support for ConcurrentMarkSweep. This should be cleaned up - // and better encapsulated. XXX YSR - if (UseConcMarkSweepGC) { - ConcurrentMarkSweepThread::makeSurrogateLockerThread(THREAD); + // and better encapsulated. The ugly nested if test would go away + // once things are properly refactored. XXX YSR + if (UseConcMarkSweepGC || UseG1GC) { + if (UseConcMarkSweepGC) { + ConcurrentMarkSweepThread::makeSurrogateLockerThread(THREAD); + } else { + ConcurrentMarkThread::makeSurrogateLockerThread(THREAD); + } if (HAS_PENDING_EXCEPTION) { vm_exit_during_initialization(Handle(THREAD, PENDING_EXCEPTION)); } diff --git a/hotspot/src/share/vm/runtime/thread.hpp b/hotspot/src/share/vm/runtime/thread.hpp index 72277e039b3..8d05504bb6a 100644 --- a/hotspot/src/share/vm/runtime/thread.hpp +++ b/hotspot/src/share/vm/runtime/thread.hpp @@ -783,6 +783,18 @@ class JavaThread: public Thread { } _jmp_ring[ jump_ring_buffer_size ]; #endif /* PRODUCT */ +#ifndef SERIALGC + // Support for G1 barriers + + ObjPtrQueue _satb_mark_queue; // Thread-local log for SATB barrier. + // Set of all such queues. + static SATBMarkQueueSet _satb_mark_queue_set; + + DirtyCardQueue _dirty_card_queue; // Thread-local log for dirty cards. + // Set of all such queues. + static DirtyCardQueueSet _dirty_card_queue_set; +#endif // !SERIALGC + friend class VMThread; friend class ThreadWaitTransition; friend class VM_Exit; @@ -1168,6 +1180,11 @@ class JavaThread: public Thread { static ByteSize do_not_unlock_if_synchronized_offset() { return byte_offset_of(JavaThread, _do_not_unlock_if_synchronized); } +#ifndef SERIALGC + static ByteSize satb_mark_queue_offset() { return byte_offset_of(JavaThread, _satb_mark_queue); } + static ByteSize dirty_card_queue_offset() { return byte_offset_of(JavaThread, _dirty_card_queue); } +#endif // !SERIALGC + // Returns the jni environment for this thread JNIEnv* jni_environment() { return &_jni_environment; } @@ -1414,6 +1431,20 @@ public: _stack_size_at_create = value; } +#ifndef SERIALGC + // SATB marking queue support + ObjPtrQueue& satb_mark_queue() { return _satb_mark_queue; } + static SATBMarkQueueSet& satb_mark_queue_set() { + return _satb_mark_queue_set; + } + + // Dirty card queue support + DirtyCardQueue& dirty_card_queue() { return _dirty_card_queue; } + static DirtyCardQueueSet& dirty_card_queue_set() { + return _dirty_card_queue_set; + } +#endif // !SERIALGC + // Machine dependent stuff #include "incls/_thread_pd.hpp.incl" @@ -1445,6 +1476,14 @@ public: // clearing/querying jni attach status bool is_attaching() const { return _is_attaching; } void set_attached() { _is_attaching = false; OrderAccess::fence(); } +private: + // This field is used to determine if a thread has claimed + // a par_id: it is -1 if the thread has not claimed a par_id; + // otherwise its value is the par_id that has been claimed. + int _claimed_par_id; +public: + int get_claimed_par_id() { return _claimed_par_id; } + void set_claimed_par_id(int id) { _claimed_par_id = id;} }; // Inline implementation of JavaThread::current diff --git a/hotspot/src/share/vm/runtime/virtualspace.cpp b/hotspot/src/share/vm/runtime/virtualspace.cpp index cbcc3520e37..5cd996194d9 100644 --- a/hotspot/src/share/vm/runtime/virtualspace.cpp +++ b/hotspot/src/share/vm/runtime/virtualspace.cpp @@ -251,24 +251,16 @@ void ReservedSpace::initialize(size_t size, size_t alignment, bool large, // increase size to a multiple of the desired alignment size = align_size_up(size, alignment); size_t extra_size = size + alignment; - char* extra_base = os::reserve_memory(extra_size, NULL, alignment); - if (extra_base == NULL) return; - // Do manual alignement - base = (char*) align_size_up((uintptr_t) extra_base, alignment); - assert(base >= extra_base, "just checking"); - // Release unused areas - size_t unused_bottom_size = base - extra_base; - size_t unused_top_size = extra_size - size - unused_bottom_size; - assert(unused_bottom_size % os::vm_allocation_granularity() == 0, - "size not allocation aligned"); - assert(unused_top_size % os::vm_allocation_granularity() == 0, - "size not allocation aligned"); - if (unused_bottom_size > 0) { - os::release_memory(extra_base, unused_bottom_size); - } - if (unused_top_size > 0) { - os::release_memory(base + size, unused_top_size); - } + do { + char* extra_base = os::reserve_memory(extra_size, NULL, alignment); + if (extra_base == NULL) return; + // Do manual alignement + base = (char*) align_size_up((uintptr_t) extra_base, alignment); + assert(base >= extra_base, "just checking"); + // Re-reserve the region at the aligned base address. + os::release_memory(extra_base, extra_size); + base = os::reserve_memory(size, base); + } while (base == NULL); } } // Done diff --git a/hotspot/src/share/vm/runtime/vmStructs.cpp b/hotspot/src/share/vm/runtime/vmStructs.cpp index e557d853db1..bc8ea34d52e 100644 --- a/hotspot/src/share/vm/runtime/vmStructs.cpp +++ b/hotspot/src/share/vm/runtime/vmStructs.cpp @@ -302,7 +302,7 @@ static inline uint64_t cast_uint64_t(size_t x) nonstatic_field(CardTableModRefBS, _guard_region, MemRegion) \ nonstatic_field(CardTableModRefBS, byte_map_base, jbyte*) \ \ - nonstatic_field(CardTableRS, _ct_bs, CardTableModRefBS) \ + nonstatic_field(CardTableRS, _ct_bs, CardTableModRefBSForCTRS*) \ \ nonstatic_field(CollectedHeap, _reserved, MemRegion) \ nonstatic_field(SharedHeap, _perm_gen, PermGen*) \ @@ -995,6 +995,7 @@ static inline uint64_t cast_uint64_t(size_t x) declare_toplevel_type(BarrierSet) \ declare_type(ModRefBarrierSet, BarrierSet) \ declare_type(CardTableModRefBS, ModRefBarrierSet) \ + declare_type(CardTableModRefBSForCTRS, CardTableModRefBS) \ declare_toplevel_type(GenRemSet) \ declare_type(CardTableRS, GenRemSet) \ declare_toplevel_type(BlockOffsetSharedArray) \ @@ -1022,6 +1023,10 @@ static inline uint64_t cast_uint64_t(size_t x) declare_toplevel_type(BlockOffsetSharedArray*) \ declare_toplevel_type(GenRemSet*) \ declare_toplevel_type(CardTableRS*) \ + declare_toplevel_type(CardTableModRefBS*) \ + declare_toplevel_type(CardTableModRefBS**) \ + declare_toplevel_type(CardTableModRefBSForCTRS*) \ + declare_toplevel_type(CardTableModRefBSForCTRS**) \ declare_toplevel_type(CollectedHeap*) \ declare_toplevel_type(ContiguousSpace*) \ declare_toplevel_type(DefNewGeneration*) \ diff --git a/hotspot/src/share/vm/runtime/vm_operations.hpp b/hotspot/src/share/vm/runtime/vm_operations.hpp index 673ba2bbd63..a2ed20b3b02 100644 --- a/hotspot/src/share/vm/runtime/vm_operations.hpp +++ b/hotspot/src/share/vm/runtime/vm_operations.hpp @@ -53,8 +53,13 @@ template(ParallelGCFailedAllocation) \ template(ParallelGCFailedPermanentAllocation) \ template(ParallelGCSystemGC) \ + template(CGC_Operation) \ template(CMS_Initial_Mark) \ template(CMS_Final_Remark) \ + template(G1CollectFull) \ + template(G1CollectForAllocation) \ + template(G1IncCollectionPause) \ + template(G1PopRegionCollectionPause) \ template(EnableBiasedLocking) \ template(RevokeBias) \ template(BulkRevokeBias) \ diff --git a/hotspot/src/share/vm/services/heapDumper.cpp b/hotspot/src/share/vm/services/heapDumper.cpp index 4ad842ad747..bf7aaf1a914 100644 --- a/hotspot/src/share/vm/services/heapDumper.cpp +++ b/hotspot/src/share/vm/services/heapDumper.cpp @@ -343,7 +343,8 @@ typedef enum { // Default stack trace ID (used for dummy HPROF_TRACE record) enum { - STACK_TRACE_ID = 1 + STACK_TRACE_ID = 1, + INITIAL_CLASS_COUNT = 200 }; @@ -408,6 +409,7 @@ class DumpWriter : public StackObj { void write_u8(u8 x); void write_objectID(oop o); void write_classID(Klass* k); + void write_id(u4 x); }; DumpWriter::DumpWriter(const char* path) { @@ -548,6 +550,14 @@ void DumpWriter::write_objectID(oop o) { #endif } +void DumpWriter::write_id(u4 x) { +#ifdef _LP64 + write_u8((u8) x); +#else + write_u4(x); +#endif +} + // We use java mirror as the class ID void DumpWriter::write_classID(Klass* k) { write_objectID(k->java_mirror()); @@ -596,6 +606,8 @@ class DumperSupport : AllStatic { static void dump_object_array(DumpWriter* writer, objArrayOop array); // creates HPROF_GC_PRIM_ARRAY_DUMP record for the given type array static void dump_prim_array(DumpWriter* writer, typeArrayOop array); + // create HPROF_FRAME record for the given method and bci + static void dump_stack_frame(DumpWriter* writer, int frame_serial_num, int class_serial_num, methodOop m, int bci); }; // write a header of the given type @@ -1070,6 +1082,29 @@ void DumperSupport::dump_prim_array(DumpWriter* writer, typeArrayOop array) { } } +// create a HPROF_FRAME record of the given methodOop and bci +void DumperSupport::dump_stack_frame(DumpWriter* writer, + int frame_serial_num, + int class_serial_num, + methodOop m, + int bci) { + int line_number; + if (m->is_native()) { + line_number = -3; // native frame + } else { + line_number = m->line_number_from_bci(bci); + } + + write_header(writer, HPROF_FRAME, 4*oopSize + 2*sizeof(u4)); + writer->write_id(frame_serial_num); // frame serial number + writer->write_objectID(m->name()); // method's name + writer->write_objectID(m->signature()); // method's signature + + assert(Klass::cast(m->method_holder())->oop_is_instance(), "not instanceKlass"); + writer->write_objectID(instanceKlass::cast(m->method_holder())->source_file_name()); // source file name + writer->write_u4(class_serial_num); // class serial number + writer->write_u4((u4) line_number); // line number +} // Support class used to generate HPROF_UTF8 records from the entries in the // SymbolTable. @@ -1104,12 +1139,15 @@ class JNILocalsDumper : public OopClosure { private: DumpWriter* _writer; u4 _thread_serial_num; + int _frame_num; DumpWriter* writer() const { return _writer; } public: JNILocalsDumper(DumpWriter* writer, u4 thread_serial_num) { _writer = writer; _thread_serial_num = thread_serial_num; + _frame_num = -1; // default - empty stack } + void set_frame_number(int n) { _frame_num = n; } void do_oop(oop* obj_p); void do_oop(narrowOop* obj_p) { ShouldNotReachHere(); } }; @@ -1122,7 +1160,7 @@ void JNILocalsDumper::do_oop(oop* obj_p) { writer()->write_u1(HPROF_GC_ROOT_JNI_LOCAL); writer()->write_objectID(o); writer()->write_u4(_thread_serial_num); - writer()->write_u4((u4)-1); // empty + writer()->write_u4((u4)_frame_num); } } @@ -1269,6 +1307,9 @@ class VM_HeapDumper : public VM_GC_Operation { bool _gc_before_heap_dump; bool _is_segmented_dump; jlong _dump_start; + GrowableArray* _klass_map; + ThreadStackTrace** _stack_traces; + int _num_threads; // accessors DumpWriter* writer() const { return _writer; } @@ -1291,9 +1332,16 @@ class VM_HeapDumper : public VM_GC_Operation { static void do_basic_type_array_class_dump(klassOop k); // HPROF_GC_ROOT_THREAD_OBJ records - void do_thread(JavaThread* thread, u4 thread_serial_num); + int do_thread(JavaThread* thread, u4 thread_serial_num); void do_threads(); + void add_class_serial_number(Klass* k, int serial_num) { + _klass_map->at_put_grow(serial_num, k); + } + + // HPROF_TRACE and HPROF_FRAME records + void dump_stack_traces(); + // writes a HPROF_HEAP_DUMP or HPROF_HEAP_DUMP_SEGMENT record void write_dump_header(); @@ -1313,6 +1361,18 @@ class VM_HeapDumper : public VM_GC_Operation { _gc_before_heap_dump = gc_before_heap_dump; _is_segmented_dump = false; _dump_start = (jlong)-1; + _klass_map = new (ResourceObj::C_HEAP) GrowableArray(INITIAL_CLASS_COUNT, true); + _stack_traces = NULL; + _num_threads = 0; + } + ~VM_HeapDumper() { + if (_stack_traces != NULL) { + for (int i=0; i < _num_threads; i++) { + delete _stack_traces[i]; + } + FREE_C_HEAP_ARRAY(ThreadStackTrace*, _stack_traces); + } + delete _klass_map; } VMOp_Type type() const { return VMOp_HeapDumper; } @@ -1436,6 +1496,9 @@ void VM_HeapDumper::do_load_class(klassOop k) { Klass* klass = Klass::cast(k); writer->write_classID(klass); + // add the klassOop and class serial number pair + dumper->add_class_serial_number(klass, class_serial_num); + writer->write_u4(STACK_TRACE_ID); // class name ID @@ -1465,15 +1528,15 @@ void VM_HeapDumper::do_basic_type_array_class_dump(klassOop k) { // Walk the stack of the given thread. // Dumps a HPROF_GC_ROOT_JAVA_FRAME record for each local // Dumps a HPROF_GC_ROOT_JNI_LOCAL record for each JNI local -void VM_HeapDumper::do_thread(JavaThread* java_thread, u4 thread_serial_num) { +// +// It returns the number of Java frames in this thread stack +int VM_HeapDumper::do_thread(JavaThread* java_thread, u4 thread_serial_num) { JNILocalsDumper blk(writer(), thread_serial_num); oop threadObj = java_thread->threadObj(); assert(threadObj != NULL, "sanity check"); - // JNI locals for the top frame - java_thread->active_handles()->oops_do(&blk); - + int stack_depth = 0; if (java_thread->has_last_Java_frame()) { // vframes are resource allocated @@ -1484,13 +1547,14 @@ void VM_HeapDumper::do_thread(JavaThread* java_thread, u4 thread_serial_num) { RegisterMap reg_map(java_thread); frame f = java_thread->last_frame(); vframe* vf = vframe::new_vframe(&f, ®_map, java_thread); + frame* last_entry_frame = NULL; while (vf != NULL) { + blk.set_frame_number(stack_depth); if (vf->is_java_frame()) { // java frame (interpreted, compiled, ...) javaVFrame *jvf = javaVFrame::cast(vf); - if (!(jvf->method()->is_native())) { StackValueCollection* locals = jvf->locals(); for (int slot=0; slotsize(); slot++) { @@ -1501,44 +1565,61 @@ void VM_HeapDumper::do_thread(JavaThread* java_thread, u4 thread_serial_num) { writer()->write_u1(HPROF_GC_ROOT_JAVA_FRAME); writer()->write_objectID(o); writer()->write_u4(thread_serial_num); - writer()->write_u4((u4)-1); // empty + writer()->write_u4((u4) stack_depth); } } } + } else { + // native frame + if (stack_depth == 0) { + // JNI locals for the top frame. + java_thread->active_handles()->oops_do(&blk); + } else { + if (last_entry_frame != NULL) { + // JNI locals for the entry frame + assert(last_entry_frame->is_entry_frame(), "checking"); + last_entry_frame->entry_frame_call_wrapper()->handles()->oops_do(&blk); + } + } } - } else { + // increment only for Java frames + stack_depth++; + last_entry_frame = NULL; + } else { // externalVFrame - if it's an entry frame then report any JNI locals - // as roots + // as roots when we find the corresponding native javaVFrame frame* fr = vf->frame_pointer(); assert(fr != NULL, "sanity check"); if (fr->is_entry_frame()) { - fr->entry_frame_call_wrapper()->handles()->oops_do(&blk); + last_entry_frame = fr; } } - vf = vf->sender(); } + } else { + // no last java frame but there may be JNI locals + java_thread->active_handles()->oops_do(&blk); } + return stack_depth; } // write a HPROF_GC_ROOT_THREAD_OBJ record for each java thread. Then walk // the stack so that locals and JNI locals are dumped. void VM_HeapDumper::do_threads() { - u4 thread_serial_num = 0; - for (JavaThread* thread = Threads::first(); thread != NULL ; thread = thread->next()) { + for (int i=0; i < _num_threads; i++) { + JavaThread* thread = _stack_traces[i]->thread(); oop threadObj = thread->threadObj(); - if (threadObj != NULL && !thread->is_exiting() && !thread->is_hidden_from_external_view()) { - ++thread_serial_num; - - writer()->write_u1(HPROF_GC_ROOT_THREAD_OBJ); - writer()->write_objectID(threadObj); - writer()->write_u4(thread_serial_num); - writer()->write_u4(STACK_TRACE_ID); - - do_thread(thread, thread_serial_num); - } + u4 thread_serial_num = i+1; + u4 stack_serial_num = thread_serial_num + STACK_TRACE_ID; + writer()->write_u1(HPROF_GC_ROOT_THREAD_OBJ); + writer()->write_objectID(threadObj); + writer()->write_u4(thread_serial_num); // thread number + writer()->write_u4(stack_serial_num); // stack trace serial number + int num_frames = do_thread(thread, thread_serial_num); + assert(num_frames == _stack_traces[i]->get_stack_depth(), + "total number of Java frames not matched"); } } @@ -1547,16 +1628,16 @@ void VM_HeapDumper::do_threads() { // records: // // HPROF_HEADER -// HPROF_TRACE // [HPROF_UTF8]* // [HPROF_LOAD_CLASS]* +// [[HPROF_FRAME]*|HPROF_TRACE]* // [HPROF_GC_CLASS_DUMP]* // HPROF_HEAP_DUMP // -// The HPROF_TRACE record after the header is "dummy trace" record which does -// not include any frames. Other records which require a stack trace ID will -// specify the trace ID of this record (1). It also means we can run HAT without -// needing the -stack false option. +// The HPROF_TRACE records represent the stack traces where the heap dump +// is generated and a "dummy trace" record which does not include +// any frames. The dummy trace record is used to be referenced as the +// unknown object alloc site. // // The HPROF_HEAP_DUMP record has a length following by sub-records. To allow // the heap dump be generated in a single pass we remember the position of @@ -1578,17 +1659,8 @@ void VM_HeapDumper::doit() { } // Write the file header - use 1.0.2 for large heaps, otherwise 1.0.1 - size_t used; + size_t used = ch->used(); const char* header; -#ifndef SERIALGC - if (Universe::heap()->kind() == CollectedHeap::GenCollectedHeap) { - used = GenCollectedHeap::heap()->used(); - } else { - used = ParallelScavengeHeap::heap()->used(); - } -#else // SERIALGC - used = GenCollectedHeap::heap()->used(); -#endif // SERIALGC if (used > (size_t)SegmentedHeapDumpThreshold) { set_segmented_dump(); header = "JAVA PROFILE 1.0.2"; @@ -1601,12 +1673,6 @@ void VM_HeapDumper::doit() { writer()->write_u4(oopSize); writer()->write_u8(os::javaTimeMillis()); - // HPROF_TRACE record without any frames - DumperSupport::write_header(writer(), HPROF_TRACE, 3*sizeof(u4)); - writer()->write_u4(STACK_TRACE_ID); - writer()->write_u4(0); // thread number - writer()->write_u4(0); // frame count - // HPROF_UTF8 records SymbolTableDumper sym_dumper(writer()); SymbolTable::oops_do(&sym_dumper); @@ -1615,6 +1681,10 @@ void VM_HeapDumper::doit() { SystemDictionary::classes_do(&do_load_class); Universe::basic_type_classes_do(&do_load_class); + // write HPROF_FRAME and HPROF_TRACE records + // this must be called after _klass_map is built when iterating the classes above. + dump_stack_traces(); + // write HPROF_HEAP_DUMP or HPROF_HEAP_DUMP_SEGMENT write_dump_header(); @@ -1655,6 +1725,47 @@ void VM_HeapDumper::doit() { end_of_dump(); } +void VM_HeapDumper::dump_stack_traces() { + // write a HPROF_TRACE record without any frames to be referenced as object alloc sites + DumperSupport::write_header(writer(), HPROF_TRACE, 3*sizeof(u4)); + writer()->write_u4((u4) STACK_TRACE_ID); + writer()->write_u4(0); // thread number + writer()->write_u4(0); // frame count + + _stack_traces = NEW_C_HEAP_ARRAY(ThreadStackTrace*, Threads::number_of_threads()); + int frame_serial_num = 0; + for (JavaThread* thread = Threads::first(); thread != NULL ; thread = thread->next()) { + oop threadObj = thread->threadObj(); + if (threadObj != NULL && !thread->is_exiting() && !thread->is_hidden_from_external_view()) { + // dump thread stack trace + ThreadStackTrace* stack_trace = new ThreadStackTrace(thread, false); + stack_trace->dump_stack_at_safepoint(-1); + _stack_traces[_num_threads++] = stack_trace; + + // write HPROF_FRAME records for this thread's stack trace + int depth = stack_trace->get_stack_depth(); + int thread_frame_start = frame_serial_num; + for (int j=0; j < depth; j++) { + StackFrameInfo* frame = stack_trace->stack_frame_at(j); + methodOop m = frame->method(); + int class_serial_num = _klass_map->find(Klass::cast(m->method_holder())); + // the class serial number starts from 1 + assert(class_serial_num > 0, "class not found"); + DumperSupport::dump_stack_frame(writer(), ++frame_serial_num, class_serial_num, m, frame->bci()); + } + + // write HPROF_TRACE record for one thread + DumperSupport::write_header(writer(), HPROF_TRACE, 3*sizeof(u4) + depth*oopSize); + int stack_serial_num = _num_threads + STACK_TRACE_ID; + writer()->write_u4(stack_serial_num); // stack trace serial number + writer()->write_u4((u4) _num_threads); // thread serial number + writer()->write_u4(depth); // frame count + for (int j=1; j <= depth; j++) { + writer()->write_id(thread_frame_start + j); + } + } + } +} // dump the heap to given path. int HeapDumper::dump(const char* path) { diff --git a/hotspot/src/share/vm/services/management.cpp b/hotspot/src/share/vm/services/management.cpp index 88e5f1e5b88..387deee7325 100644 --- a/hotspot/src/share/vm/services/management.cpp +++ b/hotspot/src/share/vm/services/management.cpp @@ -886,7 +886,7 @@ static jint get_num_flags() { int count = 0; for (int i = 0; i < nFlags; i++) { Flag* flag = &Flag::flags[i]; - // Exclude the diagnostic flags + // Exclude the locked (diagnostic, experimental) flags if (flag->is_unlocked() || flag->is_unlocker()) { count++; } @@ -1487,7 +1487,7 @@ JVM_ENTRY(jobjectArray, jmm_GetVMGlobalNames(JNIEnv *env)) int num_entries = 0; for (int i = 0; i < nFlags; i++) { Flag* flag = &Flag::flags[i]; - // Exclude the diagnostic flags + // Exclude the locked (experimental, diagnostic) flags if (flag->is_unlocked() || flag->is_unlocker()) { Handle s = java_lang_String::create_from_str(flag->name, CHECK_0); flags_ah->obj_at_put(num_entries, s()); @@ -1616,7 +1616,7 @@ JVM_ENTRY(jint, jmm_GetVMGlobals(JNIEnv *env, int num_entries = 0; for (int i = 0; i < nFlags && num_entries < count; i++) { Flag* flag = &Flag::flags[i]; - // Exclude the diagnostic flags + // Exclude the locked (diagnostic, experimental) flags if (flag->is_unlocked() || flag->is_unlocker()) { add_global_entry(env, null_h, &globals[num_entries], flag, THREAD); num_entries++; diff --git a/hotspot/src/share/vm/services/memoryService.cpp b/hotspot/src/share/vm/services/memoryService.cpp index 176f11e89bb..1d243828c77 100644 --- a/hotspot/src/share/vm/services/memoryService.cpp +++ b/hotspot/src/share/vm/services/memoryService.cpp @@ -59,9 +59,13 @@ void MemoryService::set_universe_heap(CollectedHeap* heap) { add_parallel_scavenge_heap_info(ParallelScavengeHeap::heap()); break; } + case CollectedHeap::G1CollectedHeap : { + G1CollectedHeap::g1_unimplemented(); + return; + } #endif // SERIALGC default: { - guarantee(false, "Not recognized kind of heap"); + guarantee(false, "Unrecognized kind of heap"); } } diff --git a/hotspot/src/share/vm/services/threadService.hpp b/hotspot/src/share/vm/services/threadService.hpp index 291a8eebb0e..97a83bb5e35 100644 --- a/hotspot/src/share/vm/services/threadService.hpp +++ b/hotspot/src/share/vm/services/threadService.hpp @@ -242,6 +242,7 @@ class ThreadStackTrace : public CHeapObj { ThreadStackTrace(JavaThread* thread, bool with_locked_monitors); ~ThreadStackTrace(); + JavaThread* thread() { return _thread; } StackFrameInfo* stack_frame_at(int i) { return _frames->at(i); } int get_stack_depth() { return _depth; } diff --git a/hotspot/src/share/vm/utilities/bitMap.cpp b/hotspot/src/share/vm/utilities/bitMap.cpp index b1d466b3e29..13f4721ca56 100644 --- a/hotspot/src/share/vm/utilities/bitMap.cpp +++ b/hotspot/src/share/vm/utilities/bitMap.cpp @@ -26,54 +26,59 @@ # include "incls/_bitMap.cpp.incl" -BitMap::BitMap(idx_t* map, idx_t size_in_bits) { +BitMap::BitMap(bm_word_t* map, idx_t size_in_bits) : + _map(map), _size(size_in_bits) +{ + assert(sizeof(bm_word_t) == BytesPerWord, "Implementation assumption."); assert(size_in_bits >= 0, "just checking"); - _map = map; - _size = size_in_bits; } -BitMap::BitMap(idx_t size_in_bits) { - assert(size_in_bits >= 0, "just checking"); - _size = size_in_bits; - _map = NEW_RESOURCE_ARRAY(idx_t, size_in_words()); +BitMap::BitMap(idx_t size_in_bits, bool in_resource_area) : + _map(NULL), _size(0) +{ + assert(sizeof(bm_word_t) == BytesPerWord, "Implementation assumption."); + resize(size_in_bits, in_resource_area); } -void BitMap::resize(idx_t size_in_bits) { +void BitMap::verify_index(idx_t index) const { + assert(index < _size, "BitMap index out of bounds"); +} + +void BitMap::verify_range(idx_t beg_index, idx_t end_index) const { +#ifdef ASSERT + assert(beg_index <= end_index, "BitMap range error"); + // Note that [0,0) and [size,size) are both valid ranges. + if (end_index != _size) verify_index(end_index); +#endif +} + +void BitMap::resize(idx_t size_in_bits, bool in_resource_area) { assert(size_in_bits >= 0, "just checking"); - size_t old_size_in_words = size_in_words(); - uintptr_t* old_map = map(); + idx_t old_size_in_words = size_in_words(); + bm_word_t* old_map = map(); + _size = size_in_bits; - size_t new_size_in_words = size_in_words(); - _map = NEW_RESOURCE_ARRAY(idx_t, new_size_in_words); - Copy::disjoint_words((HeapWord*) old_map, (HeapWord*) _map, MIN2(old_size_in_words, new_size_in_words)); + idx_t new_size_in_words = size_in_words(); + if (in_resource_area) { + _map = NEW_RESOURCE_ARRAY(bm_word_t, new_size_in_words); + } else { + if (old_map != NULL) FREE_C_HEAP_ARRAY(bm_word_t, _map); + _map = NEW_C_HEAP_ARRAY(bm_word_t, new_size_in_words); + } + Copy::disjoint_words((HeapWord*)old_map, (HeapWord*) _map, + MIN2(old_size_in_words, new_size_in_words)); if (new_size_in_words > old_size_in_words) { clear_range_of_words(old_size_in_words, size_in_words()); } } -// Returns a bit mask for a range of bits [beg, end) within a single word. Each -// bit in the mask is 0 if the bit is in the range, 1 if not in the range. The -// returned mask can be used directly to clear the range, or inverted to set the -// range. Note: end must not be 0. -inline BitMap::idx_t -BitMap::inverted_bit_mask_for_range(idx_t beg, idx_t end) const { - assert(end != 0, "does not work when end == 0"); - assert(beg == end || word_index(beg) == word_index(end - 1), - "must be a single-word range"); - idx_t mask = bit_mask(beg) - 1; // low (right) bits - if (bit_in_word(end) != 0) { - mask |= ~(bit_mask(end) - 1); // high (left) bits - } - return mask; -} - void BitMap::set_range_within_word(idx_t beg, idx_t end) { // With a valid range (beg <= end), this test ensures that end != 0, as // required by inverted_bit_mask_for_range. Also avoids an unnecessary write. if (beg != end) { - idx_t mask = inverted_bit_mask_for_range(beg, end); + bm_word_t mask = inverted_bit_mask_for_range(beg, end); *word_addr(beg) |= ~mask; } } @@ -82,7 +87,7 @@ void BitMap::clear_range_within_word(idx_t beg, idx_t end) { // With a valid range (beg <= end), this test ensures that end != 0, as // required by inverted_bit_mask_for_range. Also avoids an unnecessary write. if (beg != end) { - idx_t mask = inverted_bit_mask_for_range(beg, end); + bm_word_t mask = inverted_bit_mask_for_range(beg, end); *word_addr(beg) &= mask; } } @@ -105,20 +110,6 @@ void BitMap::par_put_range_within_word(idx_t beg, idx_t end, bool value) { } } -inline void BitMap::set_large_range_of_words(idx_t beg, idx_t end) { - memset(_map + beg, ~(unsigned char)0, (end - beg) * sizeof(uintptr_t)); -} - -inline void BitMap::clear_large_range_of_words(idx_t beg, idx_t end) { - memset(_map + beg, 0, (end - beg) * sizeof(uintptr_t)); -} - -inline BitMap::idx_t BitMap::word_index_round_up(idx_t bit) const { - idx_t bit_rounded_up = bit + (BitsPerWord - 1); - // Check for integer arithmetic overflow. - return bit_rounded_up > bit ? word_index(bit_rounded_up) : size_in_words(); -} - void BitMap::set_range(idx_t beg, idx_t end) { verify_range(beg, end); @@ -187,6 +178,64 @@ void BitMap::clear_large_range(idx_t beg, idx_t end) { clear_range_within_word(bit_index(end_full_word), end); } +void BitMap::mostly_disjoint_range_union(BitMap* from_bitmap, + idx_t from_start_index, + idx_t to_start_index, + size_t word_num) { + // Ensure that the parameters are correct. + // These shouldn't be that expensive to check, hence I left them as + // guarantees. + guarantee(from_bitmap->bit_in_word(from_start_index) == 0, + "it should be aligned on a word boundary"); + guarantee(bit_in_word(to_start_index) == 0, + "it should be aligned on a word boundary"); + guarantee(word_num >= 2, "word_num should be at least 2"); + + intptr_t* from = (intptr_t*) from_bitmap->word_addr(from_start_index); + intptr_t* to = (intptr_t*) word_addr(to_start_index); + + if (*from != 0) { + // if it's 0, then there's no point in doing the CAS + while (true) { + intptr_t old_value = *to; + intptr_t new_value = old_value | *from; + intptr_t res = Atomic::cmpxchg_ptr(new_value, to, old_value); + if (res == old_value) break; + } + } + ++from; + ++to; + + for (size_t i = 0; i < word_num - 2; ++i) { + if (*from != 0) { + // if it's 0, then there's no point in doing the CAS + assert(*to == 0, "nobody else should be writing here"); + intptr_t new_value = *from; + *to = new_value; + } + + ++from; + ++to; + } + + if (*from != 0) { + // if it's 0, then there's no point in doing the CAS + while (true) { + intptr_t old_value = *to; + intptr_t new_value = old_value | *from; + intptr_t res = Atomic::cmpxchg_ptr(new_value, to, old_value); + if (res == old_value) break; + } + } + + // the -1 is because we didn't advance them after the final CAS + assert(from == + (intptr_t*) from_bitmap->word_addr(from_start_index) + word_num - 1, + "invariant"); + assert(to == (intptr_t*) word_addr(to_start_index) + word_num - 1, + "invariant"); +} + void BitMap::at_put(idx_t offset, bool value) { if (value) { set_bit(offset); @@ -282,11 +331,11 @@ void BitMap::par_at_put_large_range(idx_t beg, idx_t end, bool value) { bool BitMap::contains(const BitMap other) const { assert(size() == other.size(), "must have same size"); - uintptr_t* dest_map = map(); - uintptr_t* other_map = other.map(); + bm_word_t* dest_map = map(); + bm_word_t* other_map = other.map(); idx_t size = size_in_words(); for (idx_t index = 0; index < size_in_words(); index++) { - uintptr_t word_union = dest_map[index] | other_map[index]; + bm_word_t word_union = dest_map[index] | other_map[index]; // If this has more bits set than dest_map[index], then other is not a // subset. if (word_union != dest_map[index]) return false; @@ -296,8 +345,8 @@ bool BitMap::contains(const BitMap other) const { bool BitMap::intersects(const BitMap other) const { assert(size() == other.size(), "must have same size"); - uintptr_t* dest_map = map(); - uintptr_t* other_map = other.map(); + bm_word_t* dest_map = map(); + bm_word_t* other_map = other.map(); idx_t size = size_in_words(); for (idx_t index = 0; index < size_in_words(); index++) { if ((dest_map[index] & other_map[index]) != 0) return true; @@ -308,8 +357,8 @@ bool BitMap::intersects(const BitMap other) const { void BitMap::set_union(BitMap other) { assert(size() == other.size(), "must have same size"); - idx_t* dest_map = map(); - idx_t* other_map = other.map(); + bm_word_t* dest_map = map(); + bm_word_t* other_map = other.map(); idx_t size = size_in_words(); for (idx_t index = 0; index < size_in_words(); index++) { dest_map[index] = dest_map[index] | other_map[index]; @@ -319,8 +368,8 @@ void BitMap::set_union(BitMap other) { void BitMap::set_difference(BitMap other) { assert(size() == other.size(), "must have same size"); - idx_t* dest_map = map(); - idx_t* other_map = other.map(); + bm_word_t* dest_map = map(); + bm_word_t* other_map = other.map(); idx_t size = size_in_words(); for (idx_t index = 0; index < size_in_words(); index++) { dest_map[index] = dest_map[index] & ~(other_map[index]); @@ -330,8 +379,8 @@ void BitMap::set_difference(BitMap other) { void BitMap::set_intersection(BitMap other) { assert(size() == other.size(), "must have same size"); - idx_t* dest_map = map(); - idx_t* other_map = other.map(); + bm_word_t* dest_map = map(); + bm_word_t* other_map = other.map(); idx_t size = size_in_words(); for (idx_t index = 0; index < size; index++) { dest_map[index] = dest_map[index] & other_map[index]; @@ -339,11 +388,26 @@ void BitMap::set_intersection(BitMap other) { } +void BitMap::set_intersection_at_offset(BitMap other, idx_t offset) { + assert(other.size() >= offset, "offset not in range"); + assert(other.size() - offset >= size(), "other not large enough"); + // XXX Ideally, we would remove this restriction. + guarantee((offset % (sizeof(bm_word_t) * BitsPerByte)) == 0, + "Only handle aligned cases so far."); + bm_word_t* dest_map = map(); + bm_word_t* other_map = other.map(); + idx_t offset_word_ind = word_index(offset); + idx_t size = size_in_words(); + for (idx_t index = 0; index < size; index++) { + dest_map[index] = dest_map[index] & other_map[offset_word_ind + index]; + } +} + bool BitMap::set_union_with_result(BitMap other) { assert(size() == other.size(), "must have same size"); bool changed = false; - idx_t* dest_map = map(); - idx_t* other_map = other.map(); + bm_word_t* dest_map = map(); + bm_word_t* other_map = other.map(); idx_t size = size_in_words(); for (idx_t index = 0; index < size; index++) { idx_t temp = map(index) | other_map[index]; @@ -357,11 +421,11 @@ bool BitMap::set_union_with_result(BitMap other) { bool BitMap::set_difference_with_result(BitMap other) { assert(size() == other.size(), "must have same size"); bool changed = false; - idx_t* dest_map = map(); - idx_t* other_map = other.map(); + bm_word_t* dest_map = map(); + bm_word_t* other_map = other.map(); idx_t size = size_in_words(); for (idx_t index = 0; index < size; index++) { - idx_t temp = dest_map[index] & ~(other_map[index]); + bm_word_t temp = dest_map[index] & ~(other_map[index]); changed = changed || (temp != dest_map[index]); dest_map[index] = temp; } @@ -372,12 +436,12 @@ bool BitMap::set_difference_with_result(BitMap other) { bool BitMap::set_intersection_with_result(BitMap other) { assert(size() == other.size(), "must have same size"); bool changed = false; - idx_t* dest_map = map(); - idx_t* other_map = other.map(); + bm_word_t* dest_map = map(); + bm_word_t* other_map = other.map(); idx_t size = size_in_words(); for (idx_t index = 0; index < size; index++) { - idx_t orig = dest_map[index]; - idx_t temp = orig & other_map[index]; + bm_word_t orig = dest_map[index]; + bm_word_t temp = orig & other_map[index]; changed = changed || (temp != orig); dest_map[index] = temp; } @@ -387,8 +451,8 @@ bool BitMap::set_intersection_with_result(BitMap other) { void BitMap::set_from(BitMap other) { assert(size() == other.size(), "must have same size"); - idx_t* dest_map = map(); - idx_t* other_map = other.map(); + bm_word_t* dest_map = map(); + bm_word_t* other_map = other.map(); idx_t size = size_in_words(); for (idx_t index = 0; index < size; index++) { dest_map[index] = other_map[index]; @@ -398,8 +462,8 @@ void BitMap::set_from(BitMap other) { bool BitMap::is_same(BitMap other) { assert(size() == other.size(), "must have same size"); - idx_t* dest_map = map(); - idx_t* other_map = other.map(); + bm_word_t* dest_map = map(); + bm_word_t* other_map = other.map(); idx_t size = size_in_words(); for (idx_t index = 0; index < size; index++) { if (dest_map[index] != other_map[index]) return false; @@ -408,24 +472,24 @@ bool BitMap::is_same(BitMap other) { } bool BitMap::is_full() const { - uintptr_t* word = map(); + bm_word_t* word = map(); idx_t rest = size(); for (; rest >= (idx_t) BitsPerWord; rest -= BitsPerWord) { - if (*word != (uintptr_t) AllBits) return false; + if (*word != (bm_word_t) AllBits) return false; word++; } - return rest == 0 || (*word | ~right_n_bits((int)rest)) == (uintptr_t) AllBits; + return rest == 0 || (*word | ~right_n_bits((int)rest)) == (bm_word_t) AllBits; } bool BitMap::is_empty() const { - uintptr_t* word = map(); + bm_word_t* word = map(); idx_t rest = size(); for (; rest >= (idx_t) BitsPerWord; rest -= BitsPerWord) { - if (*word != (uintptr_t) NoBits) return false; + if (*word != (bm_word_t) NoBits) return false; word++; } - return rest == 0 || (*word & right_n_bits((int)rest)) == (uintptr_t) NoBits; + return rest == 0 || (*word & right_n_bits((int)rest)) == (bm_word_t) NoBits; } void BitMap::clear_large() { @@ -436,7 +500,7 @@ void BitMap::clear_large() { // then modifications in and to the left of the _bit_ being // currently sampled will not be seen. Note also that the // interval [leftOffset, rightOffset) is right open. -void BitMap::iterate(BitMapClosure* blk, idx_t leftOffset, idx_t rightOffset) { +bool BitMap::iterate(BitMapClosure* blk, idx_t leftOffset, idx_t rightOffset) { verify_range(leftOffset, rightOffset); idx_t startIndex = word_index(leftOffset); @@ -445,106 +509,71 @@ void BitMap::iterate(BitMapClosure* blk, idx_t leftOffset, idx_t rightOffset) { offset < rightOffset && index < endIndex; offset = (++index) << LogBitsPerWord) { idx_t rest = map(index) >> (offset & (BitsPerWord - 1)); - for (; offset < rightOffset && rest != (uintptr_t)NoBits; offset++) { + for (; offset < rightOffset && rest != (bm_word_t)NoBits; offset++) { if (rest & 1) { - blk->do_bit(offset); + if (!blk->do_bit(offset)) return false; // resample at each closure application // (see, for instance, CMS bug 4525989) rest = map(index) >> (offset & (BitsPerWord -1)); - // XXX debugging: remove - // The following assertion assumes that closure application - // doesn't clear bits (may not be true in general, e.g. G1). - assert(rest & 1, - "incorrect shift or closure application can clear bits?"); } rest = rest >> 1; } } + return true; } -BitMap::idx_t BitMap::get_next_one_offset(idx_t l_offset, - idx_t r_offset) const { - assert(l_offset <= size(), "BitMap index out of bounds"); - assert(r_offset <= size(), "BitMap index out of bounds"); - assert(l_offset <= r_offset, "l_offset > r_offset ?"); +BitMap::idx_t* BitMap::_pop_count_table = NULL; - if (l_offset == r_offset) { - return l_offset; - } - idx_t index = word_index(l_offset); - idx_t r_index = word_index(r_offset-1) + 1; - idx_t res_offset = l_offset; - - // check bits including and to the _left_ of offset's position - idx_t pos = bit_in_word(res_offset); - idx_t res = map(index) >> pos; - if (res != (uintptr_t)NoBits) { - // find the position of the 1-bit - for (; !(res & 1); res_offset++) { - res = res >> 1; +void BitMap::init_pop_count_table() { + if (_pop_count_table == NULL) { + BitMap::idx_t *table = NEW_C_HEAP_ARRAY(idx_t, 256); + for (uint i = 0; i < 256; i++) { + table[i] = num_set_bits(i); } - assert(res_offset >= l_offset, "just checking"); - return MIN2(res_offset, r_offset); - } - // skip over all word length 0-bit runs - for (index++; index < r_index; index++) { - res = map(index); - if (res != (uintptr_t)NoBits) { - // found a 1, return the offset - for (res_offset = index << LogBitsPerWord; !(res & 1); - res_offset++) { - res = res >> 1; - } - assert(res & 1, "tautology; see loop condition"); - assert(res_offset >= l_offset, "just checking"); - return MIN2(res_offset, r_offset); + + intptr_t res = Atomic::cmpxchg_ptr((intptr_t) table, + (intptr_t*) &_pop_count_table, + (intptr_t) NULL_WORD); + if (res != NULL_WORD) { + guarantee( _pop_count_table == (void*) res, "invariant" ); + FREE_C_HEAP_ARRAY(bm_word_t, table); } } - return r_offset; } -BitMap::idx_t BitMap::get_next_zero_offset(idx_t l_offset, - idx_t r_offset) const { - assert(l_offset <= size(), "BitMap index out of bounds"); - assert(r_offset <= size(), "BitMap index out of bounds"); - assert(l_offset <= r_offset, "l_offset > r_offset ?"); +BitMap::idx_t BitMap::num_set_bits(bm_word_t w) { + idx_t bits = 0; - if (l_offset == r_offset) { - return l_offset; - } - idx_t index = word_index(l_offset); - idx_t r_index = word_index(r_offset-1) + 1; - idx_t res_offset = l_offset; - - // check bits including and to the _left_ of offset's position - idx_t pos = res_offset & (BitsPerWord - 1); - idx_t res = (map(index) >> pos) | left_n_bits((int)pos); - - if (res != (uintptr_t)AllBits) { - // find the position of the 0-bit - for (; res & 1; res_offset++) { - res = res >> 1; + while (w != 0) { + while ((w & 1) == 0) { + w >>= 1; } - assert(res_offset >= l_offset, "just checking"); - return MIN2(res_offset, r_offset); + bits++; + w >>= 1; } - // skip over all word length 1-bit runs - for (index++; index < r_index; index++) { - res = map(index); - if (res != (uintptr_t)AllBits) { - // found a 0, return the offset - for (res_offset = index << LogBitsPerWord; res & 1; - res_offset++) { - res = res >> 1; - } - assert(!(res & 1), "tautology; see loop condition"); - assert(res_offset >= l_offset, "just checking"); - return MIN2(res_offset, r_offset); - } - } - return r_offset; + return bits; } +BitMap::idx_t BitMap::num_set_bits_from_table(unsigned char c) { + assert(_pop_count_table != NULL, "precondition"); + return _pop_count_table[c]; +} + +BitMap::idx_t BitMap::count_one_bits() const { + init_pop_count_table(); // If necessary. + idx_t sum = 0; + typedef unsigned char uchar; + for (idx_t i = 0; i < size_in_words(); i++) { + bm_word_t w = map()[i]; + for (size_t j = 0; j < sizeof(bm_word_t); j++) { + sum += num_set_bits_from_table(uchar(w & 255)); + w >>= 8; + } + } + return sum; +} + + #ifndef PRODUCT void BitMap::print_on(outputStream* st) const { @@ -558,7 +587,7 @@ void BitMap::print_on(outputStream* st) const { #endif -BitMap2D::BitMap2D(uintptr_t* map, idx_t size_in_slots, idx_t bits_per_slot) +BitMap2D::BitMap2D(bm_word_t* map, idx_t size_in_slots, idx_t bits_per_slot) : _bits_per_slot(bits_per_slot) , _map(map, size_in_slots * bits_per_slot) { diff --git a/hotspot/src/share/vm/utilities/bitMap.hpp b/hotspot/src/share/vm/utilities/bitMap.hpp index 961a2f1b34e..899d65a07df 100644 --- a/hotspot/src/share/vm/utilities/bitMap.hpp +++ b/hotspot/src/share/vm/utilities/bitMap.hpp @@ -22,25 +22,19 @@ * */ -// Closure for iterating over BitMaps +// Forward decl; +class BitMapClosure; -class BitMapClosure VALUE_OBJ_CLASS_SPEC { - public: - // Callback when bit in map is set - virtual void do_bit(size_t offset) = 0; -}; - - -// Operations for bitmaps represented as arrays of unsigned 32- or 64-bit -// integers (uintptr_t). -// -// Bit offsets are numbered from 0 to size-1 +// Operations for bitmaps represented as arrays of unsigned integers. +// Bit offsets are numbered from 0 to size-1. class BitMap VALUE_OBJ_CLASS_SPEC { friend class BitMap2D; public: typedef size_t idx_t; // Type used for bit and word indices. + typedef uintptr_t bm_word_t; // Element type of array that represents + // the bitmap. // Hints for range sizes. typedef enum { @@ -48,8 +42,8 @@ class BitMap VALUE_OBJ_CLASS_SPEC { } RangeSizeHint; private: - idx_t* _map; // First word in bitmap - idx_t _size; // Size of bitmap (in bits) + bm_word_t* _map; // First word in bitmap + idx_t _size; // Size of bitmap (in bits) // Puts the given value at the given offset, using resize() to size // the bitmap appropriately if needed using factor-of-two expansion. @@ -62,7 +56,7 @@ class BitMap VALUE_OBJ_CLASS_SPEC { // Return a mask that will select the specified bit, when applied to the word // containing the bit. - static idx_t bit_mask(idx_t bit) { return (idx_t)1 << bit_in_word(bit); } + static bm_word_t bit_mask(idx_t bit) { return (bm_word_t)1 << bit_in_word(bit); } // Return the index of the word containing the specified bit. static idx_t word_index(idx_t bit) { return bit >> LogBitsPerWord; } @@ -71,66 +65,68 @@ class BitMap VALUE_OBJ_CLASS_SPEC { static idx_t bit_index(idx_t word) { return word << LogBitsPerWord; } // Return the array of bitmap words, or a specific word from it. - idx_t* map() const { return _map; } - idx_t map(idx_t word) const { return _map[word]; } + bm_word_t* map() const { return _map; } + bm_word_t map(idx_t word) const { return _map[word]; } // Return a pointer to the word containing the specified bit. - idx_t* word_addr(idx_t bit) const { return map() + word_index(bit); } + bm_word_t* word_addr(idx_t bit) const { return map() + word_index(bit); } // Set a word to a specified value or to all ones; clear a word. - void set_word (idx_t word, idx_t val) { _map[word] = val; } + void set_word (idx_t word, bm_word_t val) { _map[word] = val; } void set_word (idx_t word) { set_word(word, ~(uintptr_t)0); } void clear_word(idx_t word) { _map[word] = 0; } // Utilities for ranges of bits. Ranges are half-open [beg, end). // Ranges within a single word. - inline idx_t inverted_bit_mask_for_range(idx_t beg, idx_t end) const; - inline void set_range_within_word (idx_t beg, idx_t end); - inline void clear_range_within_word (idx_t beg, idx_t end); - inline void par_put_range_within_word (idx_t beg, idx_t end, bool value); + bm_word_t inverted_bit_mask_for_range(idx_t beg, idx_t end) const; + void set_range_within_word (idx_t beg, idx_t end); + void clear_range_within_word (idx_t beg, idx_t end); + void par_put_range_within_word (idx_t beg, idx_t end, bool value); // Ranges spanning entire words. - inline void set_range_of_words (idx_t beg, idx_t end); - inline void clear_range_of_words (idx_t beg, idx_t end); - inline void set_large_range_of_words (idx_t beg, idx_t end); - inline void clear_large_range_of_words (idx_t beg, idx_t end); + void set_range_of_words (idx_t beg, idx_t end); + void clear_range_of_words (idx_t beg, idx_t end); + void set_large_range_of_words (idx_t beg, idx_t end); + void clear_large_range_of_words (idx_t beg, idx_t end); // The index of the first full word in a range. - inline idx_t word_index_round_up(idx_t bit) const; + idx_t word_index_round_up(idx_t bit) const; // Verification, statistics. - void verify_index(idx_t index) const { - assert(index < _size, "BitMap index out of bounds"); - } + void verify_index(idx_t index) const; + void verify_range(idx_t beg_index, idx_t end_index) const; - void verify_range(idx_t beg_index, idx_t end_index) const { -#ifdef ASSERT - assert(beg_index <= end_index, "BitMap range error"); - // Note that [0,0) and [size,size) are both valid ranges. - if (end_index != _size) verify_index(end_index); -#endif - } + static idx_t* _pop_count_table; + static void init_pop_count_table(); + static idx_t num_set_bits(bm_word_t w); + static idx_t num_set_bits_from_table(unsigned char c); public: // Constructs a bitmap with no map, and size 0. BitMap() : _map(NULL), _size(0) {} - // Construction - BitMap(idx_t* map, idx_t size_in_bits); + // Constructs a bitmap with the given map and size. + BitMap(bm_word_t* map, idx_t size_in_bits); - // Allocates necessary data structure in resource area - BitMap(idx_t size_in_bits); + // Constructs an empty bitmap of the given size (that is, this clears the + // new bitmap). Allocates the map array in resource area if + // "in_resource_area" is true, else in the C heap. + BitMap(idx_t size_in_bits, bool in_resource_area = true); - void set_map(idx_t* map) { _map = map; } + // Set the map and size. + void set_map(bm_word_t* map) { _map = map; } void set_size(idx_t size_in_bits) { _size = size_in_bits; } - // Allocates necessary data structure in resource area. + // Allocates necessary data structure, either in the resource area + // or in the C heap, as indicated by "in_resource_area." // Preserves state currently in bit map by copying data. // Zeros any newly-addressable bits. - // Does not perform any frees (i.e., of current _map). - void resize(idx_t size_in_bits); + // If "in_resource_area" is false, frees the current map. + // (Note that this assumes that all calls to "resize" on the same BitMap + // use the same value for "in_resource_area".) + void resize(idx_t size_in_bits, bool in_resource_area = true); // Accessing idx_t size() const { return _size; } @@ -157,11 +153,11 @@ class BitMap VALUE_OBJ_CLASS_SPEC { // Set or clear the specified bit. inline void set_bit(idx_t bit); - inline void clear_bit(idx_t bit); + void clear_bit(idx_t bit); // Atomically set or clear the specified bit. - inline bool par_set_bit(idx_t bit); - inline bool par_clear_bit(idx_t bit); + bool par_set_bit(idx_t bit); + bool par_clear_bit(idx_t bit); // Put the given value at the given offset. The parallel version // will CAS the value into the bitmap and is quite a bit slower. @@ -183,23 +179,61 @@ class BitMap VALUE_OBJ_CLASS_SPEC { // Update a range of bits, using a hint about the size. Currently only // inlines the predominant case of a 1-bit range. Works best when hint is a // compile-time constant. - inline void set_range(idx_t beg, idx_t end, RangeSizeHint hint); - inline void clear_range(idx_t beg, idx_t end, RangeSizeHint hint); - inline void par_set_range(idx_t beg, idx_t end, RangeSizeHint hint); - inline void par_clear_range (idx_t beg, idx_t end, RangeSizeHint hint); + void set_range(idx_t beg, idx_t end, RangeSizeHint hint); + void clear_range(idx_t beg, idx_t end, RangeSizeHint hint); + void par_set_range(idx_t beg, idx_t end, RangeSizeHint hint); + void par_clear_range (idx_t beg, idx_t end, RangeSizeHint hint); + + // It performs the union operation between subsets of equal length + // of two bitmaps (the target bitmap of the method and the + // from_bitmap) and stores the result to the target bitmap. The + // from_start_index represents the first bit index of the subrange + // of the from_bitmap. The to_start_index is the equivalent of the + // target bitmap. Both indexes should be word-aligned, i.e. they + // should correspond to the first bit on a bitmap word (it's up to + // the caller to ensure this; the method does check it). The length + // of the subset is specified with word_num and it is in number of + // bitmap words. The caller should ensure that this is at least 2 + // (smaller ranges are not support to save extra checks). Again, + // this is checked in the method. + // + // Atomicity concerns: it is assumed that any contention on the + // target bitmap with other threads will happen on the first and + // last words; the ones in between will be "owned" exclusively by + // the calling thread and, in fact, they will already be 0. So, the + // method performs a CAS on the first word, copies the next + // word_num-2 words, and finally performs a CAS on the last word. + void mostly_disjoint_range_union(BitMap* from_bitmap, + idx_t from_start_index, + idx_t to_start_index, + size_t word_num); + // Clearing - void clear(); void clear_large(); + inline void clear(); - // Iteration support - void iterate(BitMapClosure* blk, idx_t leftIndex, idx_t rightIndex); - inline void iterate(BitMapClosure* blk) { + // Iteration support. Returns "true" if the iteration completed, false + // if the iteration terminated early (because the closure "blk" returned + // false). + bool iterate(BitMapClosure* blk, idx_t leftIndex, idx_t rightIndex); + bool iterate(BitMapClosure* blk) { // call the version that takes an interval - iterate(blk, 0, size()); + return iterate(blk, 0, size()); } - // Looking for 1's and 0's to the "right" + // Looking for 1's and 0's at indices equal to or greater than "l_index", + // stopping if none has been found before "r_index", and returning + // "r_index" (which must be at most "size") in that case. + idx_t get_next_one_offset_inline (idx_t l_index, idx_t r_index) const; + idx_t get_next_zero_offset_inline(idx_t l_index, idx_t r_index) const; + + // Like "get_next_one_offset_inline", except requires that "r_index" is + // aligned to bitsizeof(bm_word_t). + idx_t get_next_one_offset_inline_aligned_right(idx_t l_index, + idx_t r_index) const; + + // Non-inline versionsof the above. idx_t get_next_one_offset (idx_t l_index, idx_t r_index) const; idx_t get_next_zero_offset(idx_t l_index, idx_t r_index) const; @@ -210,12 +244,8 @@ class BitMap VALUE_OBJ_CLASS_SPEC { return get_next_zero_offset(offset, size()); } - - - // Find the next one bit in the range [beg_bit, end_bit), or return end_bit if - // no one bit is found. Equivalent to get_next_one_offset(), but inline for - // use in performance-critical code. - inline idx_t find_next_one_bit(idx_t beg_bit, idx_t end_bit) const; + // Returns the number of bits set in the bitmap. + idx_t count_one_bits() const; // Set operations. void set_union(BitMap bits); @@ -232,6 +262,15 @@ class BitMap VALUE_OBJ_CLASS_SPEC { bool set_difference_with_result(BitMap bits); bool set_intersection_with_result(BitMap bits); + // Requires the submap of "bits" starting at offset to be at least as + // large as "this". Modifies "this" to be the intersection of its + // current contents and the submap of "bits" starting at "offset" of the + // same length as "this." + // (For expedience, currently requires the offset to be aligned to the + // bitsize of a uintptr_t. This should go away in the future though it + // will probably remain a good case to optimize.) + void set_intersection_at_offset(BitMap bits, idx_t offset); + void set_from(BitMap bits); bool is_same(BitMap bits); @@ -248,58 +287,13 @@ class BitMap VALUE_OBJ_CLASS_SPEC { #endif }; -inline void BitMap::set_bit(idx_t bit) { - verify_index(bit); - *word_addr(bit) |= bit_mask(bit); -} - -inline void BitMap::clear_bit(idx_t bit) { - verify_index(bit); - *word_addr(bit) &= ~bit_mask(bit); -} - -inline void BitMap::set_range(idx_t beg, idx_t end, RangeSizeHint hint) { - if (hint == small_range && end - beg == 1) { - set_bit(beg); - } else { - if (hint == large_range) { - set_large_range(beg, end); - } else { - set_range(beg, end); - } - } -} - -inline void BitMap::clear_range(idx_t beg, idx_t end, RangeSizeHint hint) { - if (hint == small_range && end - beg == 1) { - clear_bit(beg); - } else { - if (hint == large_range) { - clear_large_range(beg, end); - } else { - clear_range(beg, end); - } - } -} - -inline void BitMap::par_set_range(idx_t beg, idx_t end, RangeSizeHint hint) { - if (hint == small_range && end - beg == 1) { - par_at_put(beg, true); - } else { - if (hint == large_range) { - par_at_put_large_range(beg, end, true); - } else { - par_at_put_range(beg, end, true); - } - } -} - // Convenience class wrapping BitMap which provides multiple bits per slot. class BitMap2D VALUE_OBJ_CLASS_SPEC { public: - typedef size_t idx_t; // Type used for bit and word indices. - + typedef BitMap::idx_t idx_t; // Type used for bit and word indices. + typedef BitMap::bm_word_t bm_word_t; // Element type of array that + // represents the bitmap. private: BitMap _map; idx_t _bits_per_slot; @@ -314,7 +308,7 @@ class BitMap2D VALUE_OBJ_CLASS_SPEC { public: // Construction. bits_per_slot must be greater than 0. - BitMap2D(uintptr_t* map, idx_t size_in_slots, idx_t bits_per_slot); + BitMap2D(bm_word_t* map, idx_t size_in_slots, idx_t bits_per_slot); // Allocates necessary data structure in resource area. bits_per_slot must be greater than 0. BitMap2D(idx_t size_in_slots, idx_t bits_per_slot); @@ -359,38 +353,14 @@ class BitMap2D VALUE_OBJ_CLASS_SPEC { _map.at_put_grow(bit_index(slot_index, bit_within_slot_index), value); } - void clear() { - _map.clear(); - } + void clear(); }; +// Closure for iterating over BitMaps - -inline void BitMap::set_range_of_words(idx_t beg, idx_t end) { - uintptr_t* map = _map; - for (idx_t i = beg; i < end; ++i) map[i] = ~(uintptr_t)0; -} - - -inline void BitMap::clear_range_of_words(idx_t beg, idx_t end) { - uintptr_t* map = _map; - for (idx_t i = beg; i < end; ++i) map[i] = 0; -} - - -inline void BitMap::clear() { - clear_range_of_words(0, size_in_words()); -} - - -inline void BitMap::par_clear_range(idx_t beg, idx_t end, RangeSizeHint hint) { - if (hint == small_range && end - beg == 1) { - par_at_put(beg, false); - } else { - if (hint == large_range) { - par_at_put_large_range(beg, end, false); - } else { - par_at_put_range(beg, end, false); - } - } -} +class BitMapClosure VALUE_OBJ_CLASS_SPEC { + public: + // Callback when bit in map is set. Should normally return "true"; + // return of false indicates that the bitmap iteration should terminate. + virtual bool do_bit(BitMap::idx_t offset) = 0; +}; diff --git a/hotspot/src/share/vm/utilities/bitMap.inline.hpp b/hotspot/src/share/vm/utilities/bitMap.inline.hpp index 5e656d99e63..3f59e01a57d 100644 --- a/hotspot/src/share/vm/utilities/bitMap.inline.hpp +++ b/hotspot/src/share/vm/utilities/bitMap.inline.hpp @@ -22,6 +22,17 @@ * */ + +inline void BitMap::set_bit(idx_t bit) { + verify_index(bit); + *word_addr(bit) |= bit_mask(bit); +} + +inline void BitMap::clear_bit(idx_t bit) { + verify_index(bit); + *word_addr(bit) &= ~bit_mask(bit); +} + inline bool BitMap::par_set_bit(idx_t bit) { verify_index(bit); volatile idx_t* const addr = word_addr(bit); @@ -64,42 +75,236 @@ inline bool BitMap::par_clear_bit(idx_t bit) { } while (true); } -inline BitMap::idx_t -BitMap::find_next_one_bit(idx_t beg_bit, idx_t end_bit) const -{ - verify_range(beg_bit, end_bit); - assert(bit_in_word(end_bit) == 0, "end_bit not word-aligned"); - - if (beg_bit == end_bit) { - return beg_bit; +inline void BitMap::set_range(idx_t beg, idx_t end, RangeSizeHint hint) { + if (hint == small_range && end - beg == 1) { + set_bit(beg); + } else { + if (hint == large_range) { + set_large_range(beg, end); + } else { + set_range(beg, end); + } } +} - idx_t index = word_index(beg_bit); - idx_t r_index = word_index(end_bit); - idx_t res_bit = beg_bit; +inline void BitMap::clear_range(idx_t beg, idx_t end, RangeSizeHint hint) { + if (hint == small_range && end - beg == 1) { + clear_bit(beg); + } else { + if (hint == large_range) { + clear_large_range(beg, end); + } else { + clear_range(beg, end); + } + } +} + +inline void BitMap::par_set_range(idx_t beg, idx_t end, RangeSizeHint hint) { + if (hint == small_range && end - beg == 1) { + par_at_put(beg, true); + } else { + if (hint == large_range) { + par_at_put_large_range(beg, end, true); + } else { + par_at_put_range(beg, end, true); + } + } +} + +inline void BitMap::set_range_of_words(idx_t beg, idx_t end) { + bm_word_t* map = _map; + for (idx_t i = beg; i < end; ++i) map[i] = ~(uintptr_t)0; +} + + +inline void BitMap::clear_range_of_words(idx_t beg, idx_t end) { + bm_word_t* map = _map; + for (idx_t i = beg; i < end; ++i) map[i] = 0; +} + + +inline void BitMap::clear() { + clear_range_of_words(0, size_in_words()); +} + + +inline void BitMap::par_clear_range(idx_t beg, idx_t end, RangeSizeHint hint) { + if (hint == small_range && end - beg == 1) { + par_at_put(beg, false); + } else { + if (hint == large_range) { + par_at_put_large_range(beg, end, false); + } else { + par_at_put_range(beg, end, false); + } + } +} + +inline BitMap::idx_t +BitMap::get_next_one_offset_inline(idx_t l_offset, idx_t r_offset) const { + assert(l_offset <= size(), "BitMap index out of bounds"); + assert(r_offset <= size(), "BitMap index out of bounds"); + assert(l_offset <= r_offset, "l_offset > r_offset ?"); + + if (l_offset == r_offset) { + return l_offset; + } + idx_t index = word_index(l_offset); + idx_t r_index = word_index(r_offset-1) + 1; + idx_t res_offset = l_offset; // check bits including and to the _left_ of offset's position - idx_t res = map(index) >> bit_in_word(res_bit); - if (res != (uintptr_t) NoBits) { + idx_t pos = bit_in_word(res_offset); + idx_t res = map(index) >> pos; + if (res != (uintptr_t)NoBits) { // find the position of the 1-bit - for (; !(res & 1); res_bit++) { + for (; !(res & 1); res_offset++) { res = res >> 1; } - assert(res_bit >= beg_bit && res_bit < end_bit, "just checking"); - return res_bit; + assert(res_offset >= l_offset && + res_offset < r_offset, "just checking"); + return MIN2(res_offset, r_offset); } // skip over all word length 0-bit runs for (index++; index < r_index; index++) { res = map(index); - if (res != (uintptr_t) NoBits) { + if (res != (uintptr_t)NoBits) { // found a 1, return the offset - for (res_bit = bit_index(index); !(res & 1); res_bit++) { + for (res_offset = bit_index(index); !(res & 1); res_offset++) { res = res >> 1; } assert(res & 1, "tautology; see loop condition"); - assert(res_bit >= beg_bit && res_bit < end_bit, "just checking"); - return res_bit; + assert(res_offset >= l_offset, "just checking"); + return MIN2(res_offset, r_offset); } } - return end_bit; + return r_offset; +} + +inline BitMap::idx_t +BitMap::get_next_zero_offset_inline(idx_t l_offset, idx_t r_offset) const { + assert(l_offset <= size(), "BitMap index out of bounds"); + assert(r_offset <= size(), "BitMap index out of bounds"); + assert(l_offset <= r_offset, "l_offset > r_offset ?"); + + if (l_offset == r_offset) { + return l_offset; + } + idx_t index = word_index(l_offset); + idx_t r_index = word_index(r_offset-1) + 1; + idx_t res_offset = l_offset; + + // check bits including and to the _left_ of offset's position + idx_t pos = res_offset & (BitsPerWord - 1); + idx_t res = (map(index) >> pos) | left_n_bits((int)pos); + + if (res != (uintptr_t)AllBits) { + // find the position of the 0-bit + for (; res & 1; res_offset++) { + res = res >> 1; + } + assert(res_offset >= l_offset, "just checking"); + return MIN2(res_offset, r_offset); + } + // skip over all word length 1-bit runs + for (index++; index < r_index; index++) { + res = map(index); + if (res != (uintptr_t)AllBits) { + // found a 0, return the offset + for (res_offset = index << LogBitsPerWord; res & 1; + res_offset++) { + res = res >> 1; + } + assert(!(res & 1), "tautology; see loop condition"); + assert(res_offset >= l_offset, "just checking"); + return MIN2(res_offset, r_offset); + } + } + return r_offset; +} + +inline BitMap::idx_t +BitMap::get_next_one_offset_inline_aligned_right(idx_t l_offset, + idx_t r_offset) const +{ + verify_range(l_offset, r_offset); + assert(bit_in_word(r_offset) == 0, "r_offset not word-aligned"); + + if (l_offset == r_offset) { + return l_offset; + } + idx_t index = word_index(l_offset); + idx_t r_index = word_index(r_offset); + idx_t res_offset = l_offset; + + // check bits including and to the _left_ of offset's position + idx_t res = map(index) >> bit_in_word(res_offset); + if (res != (uintptr_t)NoBits) { + // find the position of the 1-bit + for (; !(res & 1); res_offset++) { + res = res >> 1; + } + assert(res_offset >= l_offset && + res_offset < r_offset, "just checking"); + return res_offset; + } + // skip over all word length 0-bit runs + for (index++; index < r_index; index++) { + res = map(index); + if (res != (uintptr_t)NoBits) { + // found a 1, return the offset + for (res_offset = bit_index(index); !(res & 1); res_offset++) { + res = res >> 1; + } + assert(res & 1, "tautology; see loop condition"); + assert(res_offset >= l_offset && res_offset < r_offset, "just checking"); + return res_offset; + } + } + return r_offset; +} + + +// Returns a bit mask for a range of bits [beg, end) within a single word. Each +// bit in the mask is 0 if the bit is in the range, 1 if not in the range. The +// returned mask can be used directly to clear the range, or inverted to set the +// range. Note: end must not be 0. +inline BitMap::bm_word_t +BitMap::inverted_bit_mask_for_range(idx_t beg, idx_t end) const { + assert(end != 0, "does not work when end == 0"); + assert(beg == end || word_index(beg) == word_index(end - 1), + "must be a single-word range"); + bm_word_t mask = bit_mask(beg) - 1; // low (right) bits + if (bit_in_word(end) != 0) { + mask |= ~(bit_mask(end) - 1); // high (left) bits + } + return mask; +} + +inline void BitMap::set_large_range_of_words(idx_t beg, idx_t end) { + memset(_map + beg, ~(unsigned char)0, (end - beg) * sizeof(uintptr_t)); +} + +inline void BitMap::clear_large_range_of_words(idx_t beg, idx_t end) { + memset(_map + beg, 0, (end - beg) * sizeof(uintptr_t)); +} + +inline BitMap::idx_t BitMap::word_index_round_up(idx_t bit) const { + idx_t bit_rounded_up = bit + (BitsPerWord - 1); + // Check for integer arithmetic overflow. + return bit_rounded_up > bit ? word_index(bit_rounded_up) : size_in_words(); +} + +inline BitMap::idx_t BitMap::get_next_one_offset(idx_t l_offset, + idx_t r_offset) const { + return get_next_one_offset_inline(l_offset, r_offset); +} + +inline BitMap::idx_t BitMap::get_next_zero_offset(idx_t l_offset, + idx_t r_offset) const { + return get_next_zero_offset_inline(l_offset, r_offset); +} + +inline void BitMap2D::clear() { + _map.clear(); } diff --git a/hotspot/src/share/vm/utilities/debug.cpp b/hotspot/src/share/vm/utilities/debug.cpp index e395102a666..10a1b166514 100644 --- a/hotspot/src/share/vm/utilities/debug.cpp +++ b/hotspot/src/share/vm/utilities/debug.cpp @@ -668,7 +668,7 @@ public: oop target; void do_oop(oop* o) { if (o != NULL && *o == target) { - tty->print_cr("0x%08x", o); + tty->print_cr(INTPTR_FORMAT, o); } } void do_oop(narrowOop* o) { ShouldNotReachHere(); } @@ -687,13 +687,13 @@ public: static void findref(intptr_t x) { - GenCollectedHeap *gch = GenCollectedHeap::heap(); + CollectedHeap *ch = Universe::heap(); LookForRefInGenClosure lookFor; lookFor.target = (oop) x; LookForRefInObjectClosure look_in_object((oop) x); tty->print_cr("Searching heap:"); - gch->object_iterate(&look_in_object); + ch->object_iterate(&look_in_object); tty->print_cr("Searching strong roots:"); Universe::oops_do(&lookFor, false); diff --git a/hotspot/src/share/vm/utilities/globalDefinitions.hpp b/hotspot/src/share/vm/utilities/globalDefinitions.hpp index 258a70f0154..f87bfad69ac 100644 --- a/hotspot/src/share/vm/utilities/globalDefinitions.hpp +++ b/hotspot/src/share/vm/utilities/globalDefinitions.hpp @@ -99,7 +99,7 @@ class HeapWord { friend class VMStructs; private: char* i; -#ifdef ASSERT +#ifndef PRODUCT public: char* value() { return i; } #endif diff --git a/hotspot/src/share/vm/utilities/intHisto.cpp b/hotspot/src/share/vm/utilities/intHisto.cpp new file mode 100644 index 00000000000..ad0e488c781 --- /dev/null +++ b/hotspot/src/share/vm/utilities/intHisto.cpp @@ -0,0 +1,64 @@ +/* + * Copyright 2001-2007 Sun Microsystems, Inc. All Rights Reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara, + * CA 95054 USA or visit www.sun.com if you need additional information or + * have any questions. + * + */ + +# include "incls/_precompiled.incl" +# include "incls/_intHisto.cpp.incl" + +IntHistogram::IntHistogram(int est, int max) : _max(max), _tot(0) { + assert(0 <= est && est <= max, "Preconditions"); + _elements = new (ResourceObj::C_HEAP) GrowableArray(est, true); + guarantee(_elements != NULL, "alloc failure"); +} + +void IntHistogram::add_entry(int outcome) { + if (outcome > _max) outcome = _max; + int new_count = _elements->at_grow(outcome) + 1; + _elements->at_put(outcome, new_count); + _tot++; +} + +int IntHistogram::entries_for_outcome(int outcome) { + return _elements->at_grow(outcome); +} + +void IntHistogram::print_on(outputStream* st) const { + double tot_d = (double)_tot; + st->print_cr("Outcome # of occurrences %% of occurrences"); + st->print_cr("-----------------------------------------------"); + for (int i=0; i < _elements->length()-2; i++) { + int cnt = _elements->at(i); + if (cnt != 0) { + st->print_cr("%7d %10d %8.4f", + i, cnt, (double)cnt/tot_d); + } + } + // Does it have any max entries? + if (_elements->length()-1 == _max) { + int cnt = _elements->at(_max); + st->print_cr(">= %4d %10d %8.4f", + _max, cnt, (double)cnt/tot_d); + } + st->print_cr("-----------------------------------------------"); + st->print_cr(" All %10d %8.4f", _tot, 1.0); +} diff --git a/hotspot/src/share/vm/utilities/intHisto.hpp b/hotspot/src/share/vm/utilities/intHisto.hpp new file mode 100644 index 00000000000..c2ca1973de3 --- /dev/null +++ b/hotspot/src/share/vm/utilities/intHisto.hpp @@ -0,0 +1,70 @@ +/* + * Copyright 2001-2007 Sun Microsystems, Inc. All Rights Reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara, + * CA 95054 USA or visit www.sun.com if you need additional information or + * have any questions. + * + */ + +// This class implements a simple histogram. + +// A histogram summarizes a series of "measurements", each of which is +// assumed (required in this implementation) to have an outcome that is a +// non-negative integer. The histogram efficiently maps measurement outcomes +// to the number of measurements had that outcome. + +// To print the results, invoke print() on your Histogram*. + +// Note: there is already an existing "Histogram" class, in file +// histogram.{hpp,cpp}, but to my mind that's not a histogram, it's a table +// mapping strings to counts. To be a histogram (IMHO) it needs to map +// numbers (in fact, integers) to number of occurrences of that number. + +// ysr: (i am not sure i agree with the above note.) i suspect we want to have a +// histogram template that will map an arbitrary type (with a defined order +// relation) to a count. + + +class IntHistogram : public CHeapObj { + protected: + int _max; + int _tot; + GrowableArray* _elements; + +public: + // Create a new, empty table. "est" is an estimate of the maximum outcome + // that will be added, and "max" is an outcome such that all outcomes at + // least that large will be bundled with it. + IntHistogram(int est, int max); + // Add a measurement with the given outcome to the sequence. + void add_entry(int outcome); + // Return the number of entries recorded so far with the given outcome. + int entries_for_outcome(int outcome); + // Return the total number of entries recorded so far. + int total_entries() { return _tot; } + // Return the number of entries recorded so far with the given outcome as + // a fraction of the total number recorded so far. + double fraction_for_outcome(int outcome) { + return + (double)entries_for_outcome(outcome)/ + (double)total_entries(); + } + // Print the histogram on the given output stream. + void print_on(outputStream* st) const; +}; diff --git a/hotspot/src/share/vm/utilities/numberSeq.cpp b/hotspot/src/share/vm/utilities/numberSeq.cpp new file mode 100644 index 00000000000..7cd06b28c78 --- /dev/null +++ b/hotspot/src/share/vm/utilities/numberSeq.cpp @@ -0,0 +1,243 @@ +/* + * Copyright 2001-2007 Sun Microsystems, Inc. All Rights Reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara, + * CA 95054 USA or visit www.sun.com if you need additional information or + * have any questions. + * + */ + +# include "incls/_precompiled.incl" +# include "incls/_numberSeq.cpp.incl" + +AbsSeq::AbsSeq(double alpha) : + _num(0), _sum(0.0), _sum_of_squares(0.0), + _davg(0.0), _dvariance(0.0), _alpha(alpha) { +} + +void AbsSeq::add(double val) { + if (_num == 0) { + // if the sequence is empty, the davg is the same as the value + _davg = val; + // and the variance is 0 + _dvariance = 0.0; + } else { + // otherwise, calculate both + _davg = (1.0 - _alpha) * val + _alpha * _davg; + double diff = val - _davg; + _dvariance = (1.0 - _alpha) * diff * diff + _alpha * _dvariance; + } +} + +double AbsSeq::avg() const { + if (_num == 0) + return 0.0; + else + return _sum / total(); +} + +double AbsSeq::variance() const { + if (_num <= 1) + return 0.0; + + double x_bar = avg(); + double result = _sum_of_squares / total() - x_bar * x_bar; + if (result < 0.0) { + // due to loss-of-precision errors, the variance might be negative + // by a small bit + + // guarantee(-0.1 < result && result < 0.0, + // "if variance is negative, it should be very small"); + result = 0.0; + } + return result; +} + +double AbsSeq::sd() const { + double var = variance(); + guarantee( var >= 0.0, "variance should not be negative" ); + return sqrt(var); +} + +double AbsSeq::davg() const { + return _davg; +} + +double AbsSeq::dvariance() const { + if (_num <= 1) + return 0.0; + + double result = _dvariance; + if (result < 0.0) { + // due to loss-of-precision errors, the variance might be negative + // by a small bit + + guarantee(-0.1 < result && result < 0.0, + "if variance is negative, it should be very small"); + result = 0.0; + } + return result; +} + +double AbsSeq::dsd() const { + double var = dvariance(); + guarantee( var >= 0.0, "variance should not be negative" ); + return sqrt(var); +} + +NumberSeq::NumberSeq(double alpha) : + AbsSeq(alpha), _maximum(0.0), _last(0.0) { +} + +bool NumberSeq::check_nums(NumberSeq *total, int n, NumberSeq **parts) { + for (int i = 0; i < n; ++i) { + if (parts[i] != NULL && total->num() != parts[i]->num()) + return false; + } + return true; +} + +NumberSeq::NumberSeq(NumberSeq *total, int n, NumberSeq **parts) { + guarantee(check_nums(total, n, parts), "all seq lengths should match"); + double sum = total->sum(); + for (int i = 0; i < n; ++i) { + if (parts[i] != NULL) + sum -= parts[i]->sum(); + } + + _num = total->num(); + _sum = sum; + + // we do not calculate these... + _sum_of_squares = -1.0; + _maximum = -1.0; + _davg = -1.0; + _dvariance = -1.0; +} + +void NumberSeq::add(double val) { + AbsSeq::add(val); + + _last = val; + if (_num == 0) { + _maximum = val; + } else { + if (val > _maximum) + _maximum = val; + } + _sum += val; + _sum_of_squares += val * val; + ++_num; +} + + +TruncatedSeq::TruncatedSeq(int length, double alpha): + AbsSeq(alpha), _length(length), _next(0) { + _sequence = NEW_C_HEAP_ARRAY(double, _length); + for (int i = 0; i < _length; ++i) + _sequence[i] = 0.0; +} + +void TruncatedSeq::add(double val) { + AbsSeq::add(val); + + // get the oldest value in the sequence... + double old_val = _sequence[_next]; + // ...remove it from the sum and sum of squares + _sum -= old_val; + _sum_of_squares -= old_val * old_val; + + // ...and update them with the new value + _sum += val; + _sum_of_squares += val * val; + + // now replace the old value with the new one + _sequence[_next] = val; + _next = (_next + 1) % _length; + + // only increase it if the buffer is not full + if (_num < _length) + ++_num; + + guarantee( variance() > -1.0, "variance should be >= 0" ); +} + +// can't easily keep track of this incrementally... +double TruncatedSeq::maximum() const { + if (_num == 0) + return 0.0; + double ret = _sequence[0]; + for (int i = 1; i < _num; ++i) { + double val = _sequence[i]; + if (val > ret) + ret = val; + } + return ret; +} + +double TruncatedSeq::last() const { + if (_num == 0) + return 0.0; + unsigned last_index = (_next + _length - 1) % _length; + return _sequence[last_index]; +} + +double TruncatedSeq::oldest() const { + if (_num == 0) + return 0.0; + else if (_num < _length) + // index 0 always oldest value until the array is full + return _sequence[0]; + else { + // since the array is full, _next is over the oldest value + return _sequence[_next]; + } +} + +double TruncatedSeq::predict_next() const { + if (_num == 0) + return 0.0; + + double num = (double) _num; + double x_squared_sum = 0.0; + double x_sum = 0.0; + double y_sum = 0.0; + double xy_sum = 0.0; + double x_avg = 0.0; + double y_avg = 0.0; + + int first = (_next + _length - _num) % _length; + for (int i = 0; i < _num; ++i) { + double x = (double) i; + double y = _sequence[(first + i) % _length]; + + x_squared_sum += x * x; + x_sum += x; + y_sum += y; + xy_sum += x * y; + } + x_avg = x_sum / num; + y_avg = y_sum / num; + + double Sxx = x_squared_sum - x_sum * x_sum / num; + double Sxy = xy_sum - x_sum * y_sum / num; + double b1 = Sxy / Sxx; + double b0 = y_avg - b1 * x_avg; + + return b0 + b1 * num; +} diff --git a/hotspot/src/share/vm/utilities/numberSeq.hpp b/hotspot/src/share/vm/utilities/numberSeq.hpp new file mode 100644 index 00000000000..4366c8bf1e0 --- /dev/null +++ b/hotspot/src/share/vm/utilities/numberSeq.hpp @@ -0,0 +1,117 @@ +/* + * Copyright 2001-2007 Sun Microsystems, Inc. All Rights Reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara, + * CA 95054 USA or visit www.sun.com if you need additional information or + * have any questions. + * + */ + +/** + ** This file contains a few classes that represent number sequence, + ** x1, x2, x3, ..., xN, and can calculate their avg, max, and sd. + ** + ** Here's a quick description of the classes: + ** + ** AbsSeq: abstract superclass + ** NumberSeq: the sequence is assumed to be very long and the + ** maximum, avg, sd, davg, and dsd are calculated over all its elements + ** TruncatedSeq: this class keeps track of the last L elements + ** of the sequence and calculates avg, max, and sd only over them + **/ + +#define DEFAULT_ALPHA_VALUE 0.7 + +class AbsSeq { +private: + void init(double alpha); + +protected: + int _num; // the number of elements in the sequence + double _sum; // the sum of the elements in the sequence + double _sum_of_squares; // the sum of squares of the elements in the sequence + + double _davg; // decaying average + double _dvariance; // decaying variance + double _alpha; // factor for the decaying average / variance + + // This is what we divide with to get the average. In a standard + // number sequence, this should just be the number of elements in it. + virtual double total() const { return (double) _num; }; + +public: + AbsSeq(double alpha = DEFAULT_ALPHA_VALUE); + + virtual void add(double val); // adds a new element to the sequence + void add(unsigned val) { add((double) val); } + virtual double maximum() const = 0; // maximum element in the sequence + virtual double last() const = 0; // last element added in the sequence + + // the number of elements in the sequence + int num() const { return _num; } + // the sum of the elements in the sequence + double sum() const { return _sum; } + + double avg() const; // the average of the sequence + double variance() const; // the variance of the sequence + double sd() const; // the standard deviation of the sequence + + double davg() const; // decaying average + double dvariance() const; // decaying variance + double dsd() const; // decaying "standard deviation" +}; + +class NumberSeq: public AbsSeq { +private: + bool check_nums(NumberSeq* total, int n, NumberSeq** parts); + +protected: + double _last; + double _maximum; // keep track of maximum value + +public: + NumberSeq(double alpha = DEFAULT_ALPHA_VALUE); + NumberSeq(NumberSeq* total, int n_parts, NumberSeq** parts); + + virtual void add(double val); + virtual double maximum() const { return _maximum; } + virtual double last() const { return _last; } +}; + +class TruncatedSeq: public AbsSeq { +private: + enum PrivateConstants { + DefaultSeqLength = 10 + }; + void init(); +protected: + double *_sequence; // buffers the last L elements in the sequence + int _length; // this is L + int _next; // oldest slot in the array, i.e. next to be overwritten + +public: + // accepts a value for L + TruncatedSeq(int length = DefaultSeqLength, + double alpha = DEFAULT_ALPHA_VALUE); + virtual void add(double val); + virtual double maximum() const; + virtual double last() const; // the last value added to the sequence + + double oldest() const; // the oldest valid value in the sequence + double predict_next() const; // prediction based on linear regression +}; diff --git a/hotspot/src/share/vm/utilities/ostream.cpp b/hotspot/src/share/vm/utilities/ostream.cpp index 6839c78ba95..8770bed52ca 100644 --- a/hotspot/src/share/vm/utilities/ostream.cpp +++ b/hotspot/src/share/vm/utilities/ostream.cpp @@ -188,6 +188,17 @@ void outputStream::stamp() { print_raw(buf); } +void outputStream::stamp(bool guard, + const char* prefix, + const char* suffix) { + if (!guard) { + return; + } + print_raw(prefix); + stamp(); + print_raw(suffix); +} + void outputStream::date_stamp(bool guard, const char* prefix, const char* suffix) { diff --git a/hotspot/src/share/vm/utilities/ostream.hpp b/hotspot/src/share/vm/utilities/ostream.hpp index b69fac590c8..912ddad10a6 100644 --- a/hotspot/src/share/vm/utilities/ostream.hpp +++ b/hotspot/src/share/vm/utilities/ostream.hpp @@ -86,6 +86,10 @@ class outputStream : public ResourceObj { // Time stamp TimeStamp& time_stamp() { return _stamp; } void stamp(); + void stamp(bool guard, const char* prefix, const char* suffix); + void stamp(bool guard) { + stamp(guard, "", ": "); + } // Date stamp void date_stamp(bool guard, const char* prefix, const char* suffix); // A simplified call that includes a suffix of ": " diff --git a/hotspot/src/share/vm/utilities/taskqueue.cpp b/hotspot/src/share/vm/utilities/taskqueue.cpp index 691a85031df..d5220089c5a 100644 --- a/hotspot/src/share/vm/utilities/taskqueue.cpp +++ b/hotspot/src/share/vm/utilities/taskqueue.cpp @@ -65,7 +65,8 @@ void ParallelTaskTerminator::sleep(uint millis) { os::sleep(Thread::current(), millis, false); } -bool ParallelTaskTerminator::offer_termination() { +bool +ParallelTaskTerminator::offer_termination(TerminatorTerminator* terminator) { Atomic::inc(&_offered_termination); juint yield_count = 0; @@ -91,7 +92,8 @@ bool ParallelTaskTerminator::offer_termination() { sleep(WorkStealingSleepMillis); } - if (peek_in_queue_set()) { + if (peek_in_queue_set() || + (terminator != NULL && terminator->should_exit_termination())) { Atomic::dec(&_offered_termination); return false; } @@ -107,72 +109,72 @@ void ParallelTaskTerminator::reset_for_reuse() { } } -bool ChunkTaskQueueWithOverflow::is_empty() { - return (_chunk_queue.size() == 0) && +bool RegionTaskQueueWithOverflow::is_empty() { + return (_region_queue.size() == 0) && (_overflow_stack->length() == 0); } -bool ChunkTaskQueueWithOverflow::stealable_is_empty() { - return _chunk_queue.size() == 0; +bool RegionTaskQueueWithOverflow::stealable_is_empty() { + return _region_queue.size() == 0; } -bool ChunkTaskQueueWithOverflow::overflow_is_empty() { +bool RegionTaskQueueWithOverflow::overflow_is_empty() { return _overflow_stack->length() == 0; } -void ChunkTaskQueueWithOverflow::initialize() { - _chunk_queue.initialize(); +void RegionTaskQueueWithOverflow::initialize() { + _region_queue.initialize(); assert(_overflow_stack == 0, "Creating memory leak"); _overflow_stack = - new (ResourceObj::C_HEAP) GrowableArray(10, true); + new (ResourceObj::C_HEAP) GrowableArray(10, true); } -void ChunkTaskQueueWithOverflow::save(ChunkTask t) { - if (TraceChunkTasksQueuing && Verbose) { +void RegionTaskQueueWithOverflow::save(RegionTask t) { + if (TraceRegionTasksQueuing && Verbose) { gclog_or_tty->print_cr("CTQ: save " PTR_FORMAT, t); } - if(!_chunk_queue.push(t)) { + if(!_region_queue.push(t)) { _overflow_stack->push(t); } } -// Note that using this method will retrieve all chunks +// Note that using this method will retrieve all regions // that have been saved but that it will always check // the overflow stack. It may be more efficient to // check the stealable queue and the overflow stack // separately. -bool ChunkTaskQueueWithOverflow::retrieve(ChunkTask& chunk_task) { - bool result = retrieve_from_overflow(chunk_task); +bool RegionTaskQueueWithOverflow::retrieve(RegionTask& region_task) { + bool result = retrieve_from_overflow(region_task); if (!result) { - result = retrieve_from_stealable_queue(chunk_task); + result = retrieve_from_stealable_queue(region_task); } - if (TraceChunkTasksQueuing && Verbose && result) { + if (TraceRegionTasksQueuing && Verbose && result) { gclog_or_tty->print_cr(" CTQ: retrieve " PTR_FORMAT, result); } return result; } -bool ChunkTaskQueueWithOverflow::retrieve_from_stealable_queue( - ChunkTask& chunk_task) { - bool result = _chunk_queue.pop_local(chunk_task); - if (TraceChunkTasksQueuing && Verbose) { - gclog_or_tty->print_cr("CTQ: retrieve_stealable " PTR_FORMAT, chunk_task); +bool RegionTaskQueueWithOverflow::retrieve_from_stealable_queue( + RegionTask& region_task) { + bool result = _region_queue.pop_local(region_task); + if (TraceRegionTasksQueuing && Verbose) { + gclog_or_tty->print_cr("CTQ: retrieve_stealable " PTR_FORMAT, region_task); } return result; } -bool ChunkTaskQueueWithOverflow::retrieve_from_overflow( - ChunkTask& chunk_task) { +bool +RegionTaskQueueWithOverflow::retrieve_from_overflow(RegionTask& region_task) { bool result; if (!_overflow_stack->is_empty()) { - chunk_task = _overflow_stack->pop(); + region_task = _overflow_stack->pop(); result = true; } else { - chunk_task = (ChunkTask) NULL; + region_task = (RegionTask) NULL; result = false; } - if (TraceChunkTasksQueuing && Verbose) { - gclog_or_tty->print_cr("CTQ: retrieve_stealable " PTR_FORMAT, chunk_task); + if (TraceRegionTasksQueuing && Verbose) { + gclog_or_tty->print_cr("CTQ: retrieve_stealable " PTR_FORMAT, region_task); } return result; } diff --git a/hotspot/src/share/vm/utilities/taskqueue.hpp b/hotspot/src/share/vm/utilities/taskqueue.hpp index 3221ddaacc8..2323fb61c76 100644 --- a/hotspot/src/share/vm/utilities/taskqueue.hpp +++ b/hotspot/src/share/vm/utilities/taskqueue.hpp @@ -120,6 +120,11 @@ public: return dirty_size(_bottom, get_top()); } + void set_empty() { + _bottom = 0; + _age = Age(); + } + // Maximum number of elements allowed in the queue. This is two less // than the actual queue size, for somewhat complicated reasons. juint max_elems() { return n() - 2; } @@ -155,6 +160,9 @@ public: // Delete any resource associated with the queue. ~GenericTaskQueue(); + // apply the closure to all elements in the task queue + void oops_do(OopClosure* f); + private: // Element array. volatile E* _elems; @@ -171,6 +179,24 @@ void GenericTaskQueue::initialize() { guarantee(_elems != NULL, "Allocation failed."); } +template +void GenericTaskQueue::oops_do(OopClosure* f) { + // tty->print_cr("START OopTaskQueue::oops_do"); + int iters = size(); + juint index = _bottom; + for (int i = 0; i < iters; ++i) { + index = decrement_index(index); + // tty->print_cr(" doing entry %d," INTPTR_T " -> " INTPTR_T, + // index, &_elems[index], _elems[index]); + E* t = (E*)&_elems[index]; // cast away volatility + oop* p = (oop*)t; + assert((*t)->is_oop_or_null(), "Not an oop or null"); + f->do_oop(p); + } + // tty->print_cr("END OopTaskQueue::oops_do"); +} + + template bool GenericTaskQueue::push_slow(E t, juint dirty_n_elems) { if (dirty_n_elems == n() - 1) { @@ -383,6 +409,12 @@ bool GenericTaskQueueSet::peek() { return false; } +// When to terminate from the termination protocol. +class TerminatorTerminator: public CHeapObj { +public: + virtual bool should_exit_termination() = 0; +}; + // A class to aid in the termination of a set of parallel tasks using // TaskQueueSet's for work stealing. @@ -407,7 +439,14 @@ public: // else is. If returns "true", all threads are terminated. If returns // "false", available work has been observed in one of the task queues, // so the global task is not complete. - bool offer_termination(); + bool offer_termination() { + return offer_termination(NULL); + } + + // As above, but it also terminates of the should_exit_termination() + // method of the terminator parameter returns true. If terminator is + // NULL, then it is ignored. + bool offer_termination(TerminatorTerminator* terminator); // Reset the terminator, so that it may be reused again. // The caller is responsible for ensuring that this is done @@ -518,32 +557,32 @@ class StarTask { typedef GenericTaskQueue OopStarTaskQueue; typedef GenericTaskQueueSet OopStarTaskQueueSet; -typedef size_t ChunkTask; // index for chunk -typedef GenericTaskQueue ChunkTaskQueue; -typedef GenericTaskQueueSet ChunkTaskQueueSet; +typedef size_t RegionTask; // index for region +typedef GenericTaskQueue RegionTaskQueue; +typedef GenericTaskQueueSet RegionTaskQueueSet; -class ChunkTaskQueueWithOverflow: public CHeapObj { +class RegionTaskQueueWithOverflow: public CHeapObj { protected: - ChunkTaskQueue _chunk_queue; - GrowableArray* _overflow_stack; + RegionTaskQueue _region_queue; + GrowableArray* _overflow_stack; public: - ChunkTaskQueueWithOverflow() : _overflow_stack(NULL) {} + RegionTaskQueueWithOverflow() : _overflow_stack(NULL) {} // Initialize both stealable queue and overflow void initialize(); // Save first to stealable queue and then to overflow - void save(ChunkTask t); + void save(RegionTask t); // Retrieve first from overflow and then from stealable queue - bool retrieve(ChunkTask& chunk_index); + bool retrieve(RegionTask& region_index); // Retrieve from stealable queue - bool retrieve_from_stealable_queue(ChunkTask& chunk_index); + bool retrieve_from_stealable_queue(RegionTask& region_index); // Retrieve from overflow - bool retrieve_from_overflow(ChunkTask& chunk_index); + bool retrieve_from_overflow(RegionTask& region_index); bool is_empty(); bool stealable_is_empty(); bool overflow_is_empty(); - juint stealable_size() { return _chunk_queue.size(); } - ChunkTaskQueue* task_queue() { return &_chunk_queue; } + juint stealable_size() { return _region_queue.size(); } + RegionTaskQueue* task_queue() { return &_region_queue; } }; -#define USE_ChunkTaskQueueWithOverflow +#define USE_RegionTaskQueueWithOverflow diff --git a/hotspot/src/share/vm/utilities/workgroup.cpp b/hotspot/src/share/vm/utilities/workgroup.cpp index bdf650bbca7..d0f7b5eba79 100644 --- a/hotspot/src/share/vm/utilities/workgroup.cpp +++ b/hotspot/src/share/vm/utilities/workgroup.cpp @@ -28,13 +28,19 @@ // Definitions of WorkGang methods. AbstractWorkGang::AbstractWorkGang(const char* name, - bool are_GC_threads) : + bool are_GC_task_threads, + bool are_ConcurrentGC_threads) : _name(name), - _are_GC_threads(are_GC_threads) { + _are_GC_task_threads(are_GC_task_threads), + _are_ConcurrentGC_threads(are_ConcurrentGC_threads) { + + assert(!(are_GC_task_threads && are_ConcurrentGC_threads), + "They cannot both be STW GC and Concurrent threads" ); + // Other initialization. _monitor = new Monitor(/* priority */ Mutex::leaf, /* name */ "WorkGroup monitor", - /* allow_vm_block */ are_GC_threads); + /* allow_vm_block */ are_GC_task_threads); assert(monitor() != NULL, "Failed to allocate monitor"); _terminate = false; _task = NULL; @@ -44,16 +50,21 @@ AbstractWorkGang::AbstractWorkGang(const char* name, } WorkGang::WorkGang(const char* name, - int workers, - bool are_GC_threads) : - AbstractWorkGang(name, are_GC_threads) { + int workers, + bool are_GC_task_threads, + bool are_ConcurrentGC_threads) : + AbstractWorkGang(name, are_GC_task_threads, are_ConcurrentGC_threads) +{ // Save arguments. _total_workers = workers; + if (TraceWorkGang) { tty->print_cr("Constructing work gang %s with %d threads", name, workers); } _gang_workers = NEW_C_HEAP_ARRAY(GangWorker*, workers); - assert(gang_workers() != NULL, "Failed to allocate gang workers"); + if (gang_workers() == NULL) { + vm_exit_out_of_memory(0, "Cannot create GangWorker array."); + } for (int worker = 0; worker < total_workers(); worker += 1) { GangWorker* new_worker = new GangWorker(this, worker); assert(new_worker != NULL, "Failed to allocate GangWorker"); @@ -285,7 +296,11 @@ void GangWorker::loop() { } bool GangWorker::is_GC_task_thread() const { - return gang()->are_GC_threads(); + return gang()->are_GC_task_threads(); +} + +bool GangWorker::is_ConcurrentGC_thread() const { + return gang()->are_ConcurrentGC_threads(); } void GangWorker::print_on(outputStream* st) const { @@ -312,26 +327,43 @@ const char* AbstractGangTask::name() const { WorkGangBarrierSync::WorkGangBarrierSync() : _monitor(Mutex::safepoint, "work gang barrier sync", true), - _n_workers(0), _n_completed(0) { + _n_workers(0), _n_completed(0), _should_reset(false) { } WorkGangBarrierSync::WorkGangBarrierSync(int n_workers, const char* name) : _monitor(Mutex::safepoint, name, true), - _n_workers(n_workers), _n_completed(0) { + _n_workers(n_workers), _n_completed(0), _should_reset(false) { } void WorkGangBarrierSync::set_n_workers(int n_workers) { _n_workers = n_workers; _n_completed = 0; + _should_reset = false; } void WorkGangBarrierSync::enter() { MutexLockerEx x(monitor(), Mutex::_no_safepoint_check_flag); + if (should_reset()) { + // The should_reset() was set and we are the first worker to enter + // the sync barrier. We will zero the n_completed() count which + // effectively resets the barrier. + zero_completed(); + set_should_reset(false); + } inc_completed(); if (n_completed() == n_workers()) { + // At this point we would like to reset the barrier to be ready in + // case it is used again. However, we cannot set n_completed() to + // 0, even after the notify_all(), given that some other workers + // might still be waiting for n_completed() to become == + // n_workers(). So, if we set n_completed() to 0, those workers + // will get stuck (as they will wake up, see that n_completed() != + // n_workers() and go back to sleep). Instead, we raise the + // should_reset() flag and the barrier will be reset the first + // time a worker enters it again. + set_should_reset(true); monitor()->notify_all(); - } - else { + } else { while (n_completed() != n_workers()) { monitor()->wait(/* no_safepoint_check */ true); } @@ -442,3 +474,122 @@ bool SequentialSubTasksDone::all_tasks_completed() { } return false; } + +bool FreeIdSet::_stat_init = false; +FreeIdSet* FreeIdSet::_sets[NSets]; +bool FreeIdSet::_safepoint; + +FreeIdSet::FreeIdSet(int sz, Monitor* mon) : + _sz(sz), _mon(mon), _hd(0), _waiters(0), _index(-1), _claimed(0) +{ + _ids = new int[sz]; + for (int i = 0; i < sz; i++) _ids[i] = i+1; + _ids[sz-1] = end_of_list; // end of list. + if (_stat_init) { + for (int j = 0; j < NSets; j++) _sets[j] = NULL; + _stat_init = true; + } + // Add to sets. (This should happen while the system is still single-threaded.) + for (int j = 0; j < NSets; j++) { + if (_sets[j] == NULL) { + _sets[j] = this; + _index = j; + break; + } + } + guarantee(_index != -1, "Too many FreeIdSets in use!"); +} + +FreeIdSet::~FreeIdSet() { + _sets[_index] = NULL; +} + +void FreeIdSet::set_safepoint(bool b) { + _safepoint = b; + if (b) { + for (int j = 0; j < NSets; j++) { + if (_sets[j] != NULL && _sets[j]->_waiters > 0) { + Monitor* mon = _sets[j]->_mon; + mon->lock_without_safepoint_check(); + mon->notify_all(); + mon->unlock(); + } + } + } +} + +#define FID_STATS 0 + +int FreeIdSet::claim_par_id() { +#if FID_STATS + thread_t tslf = thr_self(); + tty->print("claim_par_id[%d]: sz = %d, claimed = %d\n", tslf, _sz, _claimed); +#endif + MutexLockerEx x(_mon, Mutex::_no_safepoint_check_flag); + while (!_safepoint && _hd == end_of_list) { + _waiters++; +#if FID_STATS + if (_waiters > 5) { + tty->print("claim_par_id waiting[%d]: %d waiters, %d claimed.\n", + tslf, _waiters, _claimed); + } +#endif + _mon->wait(Mutex::_no_safepoint_check_flag); + _waiters--; + } + if (_hd == end_of_list) { +#if FID_STATS + tty->print("claim_par_id[%d]: returning EOL.\n", tslf); +#endif + return -1; + } else { + int res = _hd; + _hd = _ids[res]; + _ids[res] = claimed; // For debugging. + _claimed++; +#if FID_STATS + tty->print("claim_par_id[%d]: returning %d, claimed = %d.\n", + tslf, res, _claimed); +#endif + return res; + } +} + +bool FreeIdSet::claim_perm_id(int i) { + assert(0 <= i && i < _sz, "Out of range."); + MutexLockerEx x(_mon, Mutex::_no_safepoint_check_flag); + int prev = end_of_list; + int cur = _hd; + while (cur != end_of_list) { + if (cur == i) { + if (prev == end_of_list) { + _hd = _ids[cur]; + } else { + _ids[prev] = _ids[cur]; + } + _ids[cur] = claimed; + _claimed++; + return true; + } else { + prev = cur; + cur = _ids[cur]; + } + } + return false; + +} + +void FreeIdSet::release_par_id(int id) { + MutexLockerEx x(_mon, Mutex::_no_safepoint_check_flag); + assert(_ids[id] == claimed, "Precondition."); + _ids[id] = _hd; + _hd = id; + _claimed--; +#if FID_STATS + tty->print("[%d] release_par_id(%d), waiters =%d, claimed = %d.\n", + thr_self(), id, _waiters, _claimed); +#endif + if (_waiters > 0) + // Notify all would be safer, but this is OK, right? + _mon->notify_all(); +} diff --git a/hotspot/src/share/vm/utilities/workgroup.hpp b/hotspot/src/share/vm/utilities/workgroup.hpp index 3797a3f76c2..fb2ec0603fd 100644 --- a/hotspot/src/share/vm/utilities/workgroup.hpp +++ b/hotspot/src/share/vm/utilities/workgroup.hpp @@ -72,7 +72,8 @@ class AbstractWorkGang: public CHeapObj { // Here's the public interface to this class. public: // Constructor and destructor. - AbstractWorkGang(const char* name, bool are_GC_threads); + AbstractWorkGang(const char* name, bool are_GC_task_threads, + bool are_ConcurrentGC_threads); ~AbstractWorkGang(); // Run a task, returns when the task is done (or terminated). virtual void run_task(AbstractGangTask* task) = 0; @@ -83,7 +84,8 @@ public: const char* name() const; protected: // Initialize only instance data. - const bool _are_GC_threads; + const bool _are_GC_task_threads; + const bool _are_ConcurrentGC_threads; // Printing support. const char* _name; // The monitor which protects these data, @@ -130,8 +132,11 @@ public: int finished_workers() const { return _finished_workers; } - bool are_GC_threads() const { - return _are_GC_threads; + bool are_GC_task_threads() const { + return _are_GC_task_threads; + } + bool are_ConcurrentGC_threads() const { + return _are_ConcurrentGC_threads; } // Predicates. bool is_idle() const { @@ -190,7 +195,8 @@ public: class WorkGang: public AbstractWorkGang { public: // Constructor - WorkGang(const char* name, int workers, bool are_GC_threads); + WorkGang(const char* name, int workers, + bool are_GC_task_threads, bool are_ConcurrentGC_threads); // Run a task, returns when the task is done (or terminated). virtual void run_task(AbstractGangTask* task); }; @@ -206,6 +212,7 @@ public: virtual void run(); // Predicate for Thread virtual bool is_GC_task_thread() const; + virtual bool is_ConcurrentGC_thread() const; // Printing void print_on(outputStream* st) const; virtual void print() const { print_on(tty); } @@ -228,12 +235,17 @@ protected: Monitor _monitor; int _n_workers; int _n_completed; + bool _should_reset; - Monitor* monitor() { return &_monitor; } - int n_workers() { return _n_workers; } - int n_completed() { return _n_completed; } + Monitor* monitor() { return &_monitor; } + int n_workers() { return _n_workers; } + int n_completed() { return _n_completed; } + bool should_reset() { return _should_reset; } - void inc_completed() { _n_completed++; } + void zero_completed() { _n_completed = 0; } + void inc_completed() { _n_completed++; } + + void set_should_reset(bool v) { _should_reset = v; } public: WorkGangBarrierSync(); @@ -343,3 +355,42 @@ public: // cleanup if necessary. bool all_tasks_completed(); }; + +// Represents a set of free small integer ids. +class FreeIdSet { + enum { + end_of_list = -1, + claimed = -2 + }; + + int _sz; + Monitor* _mon; + + int* _ids; + int _hd; + int _waiters; + int _claimed; + + static bool _safepoint; + typedef FreeIdSet* FreeIdSetPtr; + static const int NSets = 10; + static FreeIdSetPtr _sets[NSets]; + static bool _stat_init; + int _index; + +public: + FreeIdSet(int sz, Monitor* mon); + ~FreeIdSet(); + + static void set_safepoint(bool b); + + // Attempt to claim the given id permanently. Returns "true" iff + // successful. + bool claim_perm_id(int i); + + // Returns an unclaimed parallel id (waiting for one to be released if + // necessary). Returns "-1" if a GC wakes up a wait for an id. + int claim_par_id(); + + void release_par_id(int id); +}; diff --git a/hotspot/src/share/vm/utilities/yieldingWorkgroup.cpp b/hotspot/src/share/vm/utilities/yieldingWorkgroup.cpp index d4c0ea92d11..dd144285f44 100644 --- a/hotspot/src/share/vm/utilities/yieldingWorkgroup.cpp +++ b/hotspot/src/share/vm/utilities/yieldingWorkgroup.cpp @@ -31,8 +31,8 @@ class GangWorker; class WorkData; YieldingFlexibleWorkGang::YieldingFlexibleWorkGang( - const char* name, int workers, bool are_GC_threads) : - AbstractWorkGang(name, are_GC_threads) { + const char* name, int workers, bool are_GC_task_threads) : + AbstractWorkGang(name, are_GC_task_threads, false) { // Save arguments. _total_workers = workers; assert(_total_workers > 0, "Must have more than 1 worker"); diff --git a/hotspot/src/share/vm/utilities/yieldingWorkgroup.hpp b/hotspot/src/share/vm/utilities/yieldingWorkgroup.hpp index d7890f17d70..cd96ff08a55 100644 --- a/hotspot/src/share/vm/utilities/yieldingWorkgroup.hpp +++ b/hotspot/src/share/vm/utilities/yieldingWorkgroup.hpp @@ -143,7 +143,8 @@ class YieldingFlexibleWorkGang: public AbstractWorkGang { // Here's the public interface to this class. public: // Constructor and destructor. - YieldingFlexibleWorkGang(const char* name, int workers, bool are_GC_threads); + YieldingFlexibleWorkGang(const char* name, int workers, + bool are_GC_task_threads); YieldingFlexibleGangTask* yielding_task() const { assert(task() == NULL || task()->is_YieldingFlexibleGang_task(), diff --git a/hotspot/test/compiler/6646019/Test.java b/hotspot/test/compiler/6646019/Test.java index 99c07617e04..f28d0a05036 100644 --- a/hotspot/test/compiler/6646019/Test.java +++ b/hotspot/test/compiler/6646019/Test.java @@ -19,7 +19,6 @@ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara, * CA 95054 USA or visit www.sun.com if you need additional information or * have any questions. - * */ /* diff --git a/hotspot/test/compiler/6689060/Test.java b/hotspot/test/compiler/6689060/Test.java index 4d3f2003baa..ba42667a815 100644 --- a/hotspot/test/compiler/6689060/Test.java +++ b/hotspot/test/compiler/6689060/Test.java @@ -19,7 +19,6 @@ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara, * CA 95054 USA or visit www.sun.com if you need additional information or * have any questions. - * */ /* diff --git a/hotspot/test/compiler/6695810/Test.java b/hotspot/test/compiler/6695810/Test.java index f59db440083..a8f2ea0c80e 100644 --- a/hotspot/test/compiler/6695810/Test.java +++ b/hotspot/test/compiler/6695810/Test.java @@ -19,7 +19,6 @@ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara, * CA 95054 USA or visit www.sun.com if you need additional information or * have any questions. - * */ /* diff --git a/hotspot/test/compiler/6700047/Test6700047.java b/hotspot/test/compiler/6700047/Test6700047.java index 55921d59465..64e6ddb1933 100644 --- a/hotspot/test/compiler/6700047/Test6700047.java +++ b/hotspot/test/compiler/6700047/Test6700047.java @@ -29,6 +29,8 @@ */ public class Test6700047 { + static byte[] dummy = new byte[256]; + public static void main(String[] args) { for (int i = 0; i < 100000; i++) { intToLeftPaddedAsciiBytes(); @@ -53,6 +55,7 @@ public class Test6700047 { if (offset > 0) { for(int j = 0; j < offset; j++) { result++; + dummy[i] = 0; } } return result; diff --git a/hotspot/test/compiler/6711100/Test.java b/hotspot/test/compiler/6711100/Test.java new file mode 100644 index 00000000000..e1f0135b99d --- /dev/null +++ b/hotspot/test/compiler/6711100/Test.java @@ -0,0 +1,53 @@ +/* + * Copyright 2008 Sun Microsystems, Inc. All Rights Reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara, + * CA 95054 USA or visit www.sun.com if you need additional information or + * have any questions. + */ + +/* + * @test + * @bug 6711100 + * @summary 64bit fastdebug server vm crashes with assert(_base == Int,"Not an Int") + * @run main/othervm -Xcomp -XX:CompileOnly=Test. Test + */ + +public class Test { + + static byte b; + + // The server compiler chokes on compiling + // this method when f() is not inlined + public Test() { + b = (new byte[1])[(new byte[f()])[-1]]; + } + + protected static int f() { + return 1; + } + + public static void main(String[] args) { + try { + Test t = new Test(); + } catch (ArrayIndexOutOfBoundsException e) { + } + } +} + + diff --git a/jdk/.hgtags b/jdk/.hgtags index 8a91ce7fabc..d9b8755cd4d 100644 --- a/jdk/.hgtags +++ b/jdk/.hgtags @@ -12,3 +12,4 @@ fa4c0a6cdd25d97d4e6f5d7aa180bcbb0e0d56af jdk7-b33 cf4894b78ceb966326e93bf221db0c2d14d59218 jdk7-b35 134fd1a656ea85acd1f97f6700f75029b9b472a0 jdk7-b36 14f50aee4989b75934d385c56a83da0c23d2f68b jdk7-b37 +cc5f810b5af8a3a83b0df5a29d9e24d7a0ff8086 jdk7-b38 diff --git a/jdk/src/share/classes/com/sun/jmx/mbeanserver/MXBeanIntrospector.java b/jdk/src/share/classes/com/sun/jmx/mbeanserver/MXBeanIntrospector.java index 8ff0a902dff..6479f9751e0 100644 --- a/jdk/src/share/classes/com/sun/jmx/mbeanserver/MXBeanIntrospector.java +++ b/jdk/src/share/classes/com/sun/jmx/mbeanserver/MXBeanIntrospector.java @@ -32,6 +32,7 @@ import java.lang.ref.WeakReference; import java.lang.reflect.GenericArrayType; import java.lang.reflect.InvocationTargetException; import java.lang.reflect.Method; +import java.lang.reflect.ParameterizedType; import java.lang.reflect.Type; import java.util.Map; import java.util.WeakHashMap; @@ -390,7 +391,31 @@ class MXBeanIntrospector extends MBeanIntrospector { if (type instanceof Class) return ((Class) type).getName(); else - return type.toString(); + return genericTypeString(type); + } + + private static String genericTypeString(Type type) { + if (type instanceof Class) { + Class c = (Class) type; + if (c.isArray()) + return genericTypeString(c.getComponentType()) + "[]"; + else + return c.getName(); + } else if (type instanceof GenericArrayType) { + GenericArrayType gat = (GenericArrayType) type; + return genericTypeString(gat.getGenericComponentType()) + "[]"; + } else if (type instanceof ParameterizedType) { + ParameterizedType pt = (ParameterizedType) type; + StringBuilder sb = new StringBuilder(); + sb.append(genericTypeString(pt.getRawType())).append("<"); + String sep = ""; + for (Type t : pt.getActualTypeArguments()) { + sb.append(sep).append(genericTypeString(t)); + sep = ", "; + } + return sb.append(">").toString(); + } else + return "???"; } private final PerInterfaceMap diff --git a/jdk/src/share/classes/java/nio/channels/SelectableChannel.java b/jdk/src/share/classes/java/nio/channels/SelectableChannel.java index 10f523d0430..f11439f54a1 100644 --- a/jdk/src/share/classes/java/nio/channels/SelectableChannel.java +++ b/jdk/src/share/classes/java/nio/channels/SelectableChannel.java @@ -191,6 +191,9 @@ public abstract class SelectableChannel * @throws ClosedChannelException * If this channel is closed * + * @throws ClosedSelectorException + * If the selector is closed + * * @throws IllegalBlockingModeException * If this channel is in blocking mode * @@ -246,6 +249,9 @@ public abstract class SelectableChannel * @throws ClosedChannelException * If this channel is closed * + * @throws ClosedSelectorException + * If the selector is closed + * * @throws IllegalBlockingModeException * If this channel is in blocking mode * diff --git a/jdk/src/share/classes/java/nio/channels/spi/AbstractSelectableChannel.java b/jdk/src/share/classes/java/nio/channels/spi/AbstractSelectableChannel.java index 73fcf3d130e..3b1d31c5ebb 100644 --- a/jdk/src/share/classes/java/nio/channels/spi/AbstractSelectableChannel.java +++ b/jdk/src/share/classes/java/nio/channels/spi/AbstractSelectableChannel.java @@ -175,6 +175,16 @@ public abstract class AbstractSelectableChannel * the selector is invoked while holding the appropriate locks. The * resulting key is added to this channel's key set before being returned. *

+ * + * @throws ClosedSelectorException {@inheritDoc} + * + * @throws IllegalBlockingModeException {@inheritDoc} + * + * @throws IllegalSelectorException {@inheritDoc} + * + * @throws CancelledKeyException {@inheritDoc} + * + * @throws IllegalArgumentException {@inheritDoc} */ public final SelectionKey register(Selector sel, int ops, Object att) diff --git a/jdk/src/share/classes/javax/management/event/EventClient.java b/jdk/src/share/classes/javax/management/event/EventClient.java index 6f5c84eb2cd..cfce77b0879 100644 --- a/jdk/src/share/classes/javax/management/event/EventClient.java +++ b/jdk/src/share/classes/javax/management/event/EventClient.java @@ -265,12 +265,20 @@ public class EventClient implements EventConsumer, NotificationManager { public ScheduledThreadPoolExecutor createThreadPool(ThreadGroup group) { ThreadFactory daemonThreadFactory = new DaemonThreadFactory( "JMX EventClient lease renewer %d"); - ScheduledThreadPoolExecutor exec = new ScheduledThreadPoolExecutor( - 20, daemonThreadFactory); - exec.setKeepAliveTime(1, TimeUnit.SECONDS); - exec.allowCoreThreadTimeOut(true); - exec.setRemoveOnCancelPolicy(true); - return exec; + ScheduledThreadPoolExecutor executor = + new ScheduledThreadPoolExecutor(20, daemonThreadFactory); + executor.setKeepAliveTime(1, TimeUnit.SECONDS); + executor.allowCoreThreadTimeOut(true); + executor.setRemoveOnCancelPolicy(true); + // By default, a ScheduledThreadPoolExecutor will keep jobs + // in its queue even after they have been cancelled. They + // will only be removed when their scheduled time arrives. + // Since the job references the LeaseRenewer which references + // this EventClient, this can lead to a moderately large number + // of objects remaining referenced until the renewal time + // arrives. Hence the above call, which removes the job from + // the queue as soon as it is cancelled. + return executor; } }; return leaseRenewerThreadPool.getThreadPoolExecutor(create); @@ -381,7 +389,7 @@ public class EventClient implements EventConsumer, NotificationManager { listenerId = eventClientDelegate.addListener(clientId, name, filter); } catch (EventClientNotFoundException ecnfe) { - final IOException ioe = new IOException(); + final IOException ioe = new IOException(ecnfe.getMessage()); ioe.initCause(ecnfe); throw ioe; } @@ -488,7 +496,7 @@ public class EventClient implements EventConsumer, NotificationManager { listenerId = eventClientDelegate.addSubscriber(clientId, name, filter); } catch (EventClientNotFoundException ecnfe) { - final IOException ioe = new IOException(); + final IOException ioe = new IOException(ecnfe.getMessage()); ioe.initCause(ecnfe); throw ioe; } diff --git a/jdk/src/share/classes/javax/management/event/FetchingEventRelay.java b/jdk/src/share/classes/javax/management/event/FetchingEventRelay.java index 2a456ec44c7..e4c8d81d87b 100644 --- a/jdk/src/share/classes/javax/management/event/FetchingEventRelay.java +++ b/jdk/src/share/classes/javax/management/event/FetchingEventRelay.java @@ -91,7 +91,7 @@ public class FetchingEventRelay implements EventRelay { * the fetching. * * @param delegate The {@code EventClientDelegateMBean} to work with. - * @param executor Used to do the fetching. A new thread is created if + * @param fetchExecutor Used to do the fetching. A new thread is created if * {@code null}. * @throws IOException If failed to work with the {@code delegate}. * @throws MBeanException if unable to add a client to the remote @@ -101,12 +101,12 @@ public class FetchingEventRelay implements EventRelay { * @throws IllegalArgumentException If {@code delegate} is {@code null}. */ public FetchingEventRelay(EventClientDelegateMBean delegate, - Executor executor) throws IOException, MBeanException { + Executor fetchExecutor) throws IOException, MBeanException { this(delegate, DEFAULT_BUFFER_SIZE, DEFAULT_WAITING_TIMEOUT, DEFAULT_MAX_NOTIFICATIONS, - executor); + fetchExecutor); } /** @@ -120,7 +120,7 @@ public class FetchingEventRelay implements EventRelay { * @param timeout The waiting time in millseconds when fetching * notifications from an {@code EventClientDelegateMBean}. * @param maxNotifs The maximum notifications to fetch every time. - * @param executor Used to do the fetching. A new thread is created if + * @param fetchExecutor Used to do the fetching. A new thread is created if * {@code null}. * @throws IOException if failed to communicate with the {@code delegate}. * @throws MBeanException if unable to add a client to the remote @@ -133,12 +133,12 @@ public class FetchingEventRelay implements EventRelay { int bufferSize, long timeout, int maxNotifs, - Executor executor) throws IOException, MBeanException { + Executor fetchExecutor) throws IOException, MBeanException { this(delegate, bufferSize, timeout, maxNotifs, - executor, + fetchExecutor, FetchingEventForwarder.class.getName(), new Object[] {bufferSize}, new String[] {int.class.getName()}); @@ -155,7 +155,7 @@ public class FetchingEventRelay implements EventRelay { * @param timeout The waiting time in millseconds when fetching * notifications from an {@code EventClientDelegateMBean}. * @param maxNotifs The maximum notifications to fetch every time. - * @param executor Used to do the fetching. + * @param fetchExecutor Used to do the fetching. * @param forwarderName the class name of a user specific EventForwarder * to create in server to forward notifications to this object. The class * should be a subclass of the class {@link FetchingEventForwarder}. @@ -174,7 +174,7 @@ public class FetchingEventRelay implements EventRelay { int bufferSize, long timeout, int maxNotifs, - Executor executor, + Executor fetchExecutor, String forwarderName, Object[] params, String[] sig) throws IOException, MBeanException { @@ -184,11 +184,11 @@ public class FetchingEventRelay implements EventRelay { bufferSize+" "+ timeout+" "+ maxNotifs+" "+ - executor+" "+ + fetchExecutor+" "+ forwarderName+" "); } - if(delegate == null) { + if (delegate == null) { throw new NullPointerException("Null EventClientDelegateMBean!"); } @@ -212,16 +212,16 @@ public class FetchingEventRelay implements EventRelay { this.timeout = timeout; this.maxNotifs = maxNotifs; - if (executor == null) { - ScheduledThreadPoolExecutor stpe = new ScheduledThreadPoolExecutor(1, - daemonThreadFactory); - stpe.setKeepAliveTime(1, TimeUnit.SECONDS); - stpe.allowCoreThreadTimeOut(true); - executor = stpe; - this.defaultExecutor = stpe; + if (fetchExecutor == null) { + ScheduledThreadPoolExecutor executor = + new ScheduledThreadPoolExecutor(1, daemonThreadFactory); + executor.setKeepAliveTime(1, TimeUnit.SECONDS); + executor.allowCoreThreadTimeOut(true); + fetchExecutor = executor; + this.defaultExecutor = executor; } else this.defaultExecutor = null; - this.executor = executor; + this.fetchExecutor = fetchExecutor; startSequenceNumber = 0; fetchingJob = new MyJob(); @@ -258,7 +258,7 @@ public class FetchingEventRelay implements EventRelay { private class MyJob extends RepeatedSingletonJob { public MyJob() { - super(executor); + super(fetchExecutor); } public boolean isSuspended() { @@ -368,7 +368,7 @@ public class FetchingEventRelay implements EventRelay { private String clientId; private boolean stopped = false; - private final Executor executor; + private final Executor fetchExecutor; private final ExecutorService defaultExecutor; private final MyJob fetchingJob; diff --git a/jdk/src/share/classes/javax/management/monitor/Monitor.java b/jdk/src/share/classes/javax/management/monitor/Monitor.java index 081c5ede429..0329a33b077 100644 --- a/jdk/src/share/classes/javax/management/monitor/Monitor.java +++ b/jdk/src/share/classes/javax/management/monitor/Monitor.java @@ -181,7 +181,7 @@ public abstract class Monitor /** * Executor Service. */ - private static final ExecutorService executor; + private static final ThreadPoolExecutor executor; static { final String maximumPoolSizeSysProp = "jmx.x.monitor.maximum.pool.size"; final String maximumPoolSizeStr = AccessController.doPrivileged( @@ -218,7 +218,7 @@ public abstract class Monitor TimeUnit.SECONDS, new LinkedBlockingQueue(), new DaemonThreadFactory("Executor")); - ((ThreadPoolExecutor)executor).allowCoreThreadTimeOut(true); + executor.allowCoreThreadTimeOut(true); } /** diff --git a/jdk/src/share/classes/javax/management/remote/rmi/RMIConnector.java b/jdk/src/share/classes/javax/management/remote/rmi/RMIConnector.java index a620235ac13..934dac790db 100644 --- a/jdk/src/share/classes/javax/management/remote/rmi/RMIConnector.java +++ b/jdk/src/share/classes/javax/management/remote/rmi/RMIConnector.java @@ -71,9 +71,8 @@ import java.util.Map; import java.util.Properties; import java.util.Set; import java.util.WeakHashMap; -import java.util.concurrent.ArrayBlockingQueue; import java.util.concurrent.Executor; -import java.util.concurrent.LinkedBlockingDeque; +import java.util.concurrent.LinkedBlockingQueue; import java.util.concurrent.ThreadFactory; import java.util.concurrent.ThreadPoolExecutor; import java.util.concurrent.TimeUnit; @@ -421,12 +420,12 @@ public class RMIConnector implements JMXConnector, Serializable, JMXAddressable public ThreadPoolExecutor createThreadPool(ThreadGroup group) { ThreadFactory daemonThreadFactory = new DaemonThreadFactory( "JMX RMIConnector listener dispatch %d"); - ThreadPoolExecutor exec = new ThreadPoolExecutor( + ThreadPoolExecutor executor = new ThreadPoolExecutor( 1, 10, 1, TimeUnit.SECONDS, - new LinkedBlockingDeque(), + new LinkedBlockingQueue(), daemonThreadFactory); - exec.allowCoreThreadTimeOut(true); - return exec; + executor.allowCoreThreadTimeOut(true); + return executor; } }; return listenerDispatchThreadPool.getThreadPoolExecutor(create); @@ -1503,7 +1502,7 @@ public class RMIConnector implements JMXConnector, Serializable, JMXAddressable super(period); } - public void gotIOException (IOException ioe) throws IOException { + public void gotIOException(IOException ioe) throws IOException { if (ioe instanceof NoSuchObjectException) { // need to restart super.gotIOException(ioe); diff --git a/jdk/src/share/classes/sun/management/jmxremote/ConnectorBootstrap.java b/jdk/src/share/classes/sun/management/jmxremote/ConnectorBootstrap.java index 43cae9144e9..1ff66405e5e 100644 --- a/jdk/src/share/classes/sun/management/jmxremote/ConnectorBootstrap.java +++ b/jdk/src/share/classes/sun/management/jmxremote/ConnectorBootstrap.java @@ -80,7 +80,7 @@ import sun.management.AgentConfigurationError; import static sun.management.AgentConfigurationError.*; import sun.management.ConnectorAddressLink; import sun.management.FileSystem; -import sun.management.snmp.util.MibLogger; +import com.sun.jmx.remote.util.ClassLogger; import com.sun.jmx.remote.internal.RMIExporter; import com.sun.jmx.remote.security.JMXPluggableAuthenticator; @@ -99,6 +99,7 @@ public final class ConnectorBootstrap { public static final String PORT = "0"; public static final String CONFIG_FILE_NAME = "management.properties"; public static final String USE_SSL = "true"; + public static final String USE_LOCAL_ONLY = "true"; public static final String USE_REGISTRY_SSL = "false"; public static final String USE_AUTHENTICATION = "true"; public static final String PASSWORD_FILE_NAME = "jmxremote.password"; @@ -115,6 +116,8 @@ public final class ConnectorBootstrap { "com.sun.management.jmxremote.port"; public static final String CONFIG_FILE_NAME = "com.sun.management.config.file"; + public static final String USE_LOCAL_ONLY = + "com.sun.management.jmxremote.local.only"; public static final String USE_SSL = "com.sun.management.jmxremote.ssl"; public static final String USE_REGISTRY_SSL = @@ -384,7 +387,7 @@ public final class ConnectorBootstrap { checkAccessFile(accessFileName); } - if (log.isDebugOn()) { + if (log.debugOn()) { log.debug("initialize", Agent.getText("jmxremote.ConnectorBootstrap.initialize") + "\n\t" + PropertyNames.PORT + "=" + port + @@ -477,6 +480,18 @@ public final class ConnectorBootstrap { MBeanServer mbs = ManagementFactory.getPlatformMBeanServer(); try { JMXServiceURL url = new JMXServiceURL("rmi", localhost, 0); + // Do we accept connections from local interfaces only? + Properties props = Agent.getManagementProperties(); + if (props == null) { + props = new Properties(); + } + String useLocalOnlyStr = props.getProperty( + PropertyNames.USE_LOCAL_ONLY, DefaultValues.USE_LOCAL_ONLY); + boolean useLocalOnly = Boolean.valueOf(useLocalOnlyStr).booleanValue(); + if (useLocalOnly) { + env.put(RMIConnectorServer.RMI_SERVER_SOCKET_FACTORY_ATTRIBUTE, + new LocalRMIServerSocketFactory()); + } JMXConnectorServer server = JMXConnectorServerFactory.newJMXConnectorServer(url, env, mbs); server.start(); @@ -764,7 +779,7 @@ public final class ConnectorBootstrap { private ConnectorBootstrap() { } - // XXX Revisit: should probably clone this MibLogger.... - private static final MibLogger log = - new MibLogger(ConnectorBootstrap.class); + private static final ClassLogger log = + new ClassLogger(ConnectorBootstrap.class.getPackage().getName(), + "ConnectorBootstrap"); } diff --git a/jdk/src/share/classes/sun/management/jmxremote/LocalRMIServerSocketFactory.java b/jdk/src/share/classes/sun/management/jmxremote/LocalRMIServerSocketFactory.java new file mode 100644 index 00000000000..edb15539b23 --- /dev/null +++ b/jdk/src/share/classes/sun/management/jmxremote/LocalRMIServerSocketFactory.java @@ -0,0 +1,114 @@ +/* + * Copyright 2007 Sun Microsystems, Inc. All Rights Reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. Sun designates this + * particular file as subject to the "Classpath" exception as provided + * by Sun in the LICENSE file that accompanied this code. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara, + * CA 95054 USA or visit www.sun.com if you need additional information or + * have any questions. + */ + +package sun.management.jmxremote; + +import java.io.IOException; +import java.net.InetAddress; +import java.net.NetworkInterface; +import java.net.ServerSocket; +import java.net.Socket; +import java.net.SocketException; +import java.rmi.server.RMIServerSocketFactory; +import java.util.Enumeration; + +/** + * This RMI server socket factory creates server sockets that + * will only accept connection requests from clients running + * on the host where the RMI remote objects have been exported. + */ +public final class LocalRMIServerSocketFactory implements RMIServerSocketFactory { + /** + * Creates a server socket that only accepts connection requests from + * clients running on the host where the RMI remote objects have been + * exported. + */ + public ServerSocket createServerSocket(int port) throws IOException { + return new ServerSocket(port) { + @Override + public Socket accept() throws IOException { + Socket socket = super.accept(); + InetAddress remoteAddr = socket.getInetAddress(); + final String msg = "The server sockets created using the " + + "LocalRMIServerSocketFactory only accept connections " + + "from clients running on the host where the RMI " + + "remote objects have been exported."; + if (remoteAddr.isAnyLocalAddress()) { + // local address: accept the connection. + return socket; + } + // Retrieve all the network interfaces on this host. + Enumeration nis; + try { + nis = NetworkInterface.getNetworkInterfaces(); + } catch (SocketException e) { + try { + socket.close(); + } catch (IOException ioe) { + // Ignore... + } + throw new IOException(msg, e); + } + // Walk through the network interfaces to see + // if any of them matches the client's address. + // If true, then the client's address is local. + while (nis.hasMoreElements()) { + NetworkInterface ni = nis.nextElement(); + Enumeration addrs = ni.getInetAddresses(); + while (addrs.hasMoreElements()) { + InetAddress localAddr = addrs.nextElement(); + if (localAddr.equals(remoteAddr)) { + return socket; + } + } + } + // The client's address is remote so refuse the connection. + try { + socket.close(); + } catch (IOException ioe) { + // Ignore... + } + throw new IOException(msg); + } + }; + } + + /** + * Two LocalRMIServerSocketFactory objects + * are equal if they are of the same type. + */ + @Override + public boolean equals(Object obj) { + return (obj instanceof LocalRMIServerSocketFactory); + } + + /** + * Returns a hash code value for this LocalRMIServerSocketFactory. + */ + @Override + public int hashCode() { + return getClass().hashCode(); + } +} diff --git a/jdk/src/share/classes/sun/nio/ch/AbstractPollSelectorImpl.java b/jdk/src/share/classes/sun/nio/ch/AbstractPollSelectorImpl.java index 5016a03851b..4b95d869321 100644 --- a/jdk/src/share/classes/sun/nio/ch/AbstractPollSelectorImpl.java +++ b/jdk/src/share/classes/sun/nio/ch/AbstractPollSelectorImpl.java @@ -58,6 +58,9 @@ abstract class AbstractPollSelectorImpl // True if this Selector has been closed private boolean closed = false; + // Lock for close and cleanup + private Object closeLock = new Object(); + AbstractPollSelectorImpl(SelectorProvider sp, int channels, int offset) { super(sp); this.totalChannels = channels; @@ -65,7 +68,11 @@ abstract class AbstractPollSelectorImpl } void putEventOps(SelectionKeyImpl sk, int ops) { - pollWrapper.putEventOps(sk.getIndex(), ops); + synchronized (closeLock) { + if (closed) + throw new ClosedSelectorException(); + pollWrapper.putEventOps(sk.getIndex(), ops); + } } public Selector wakeup() { @@ -76,7 +83,9 @@ abstract class AbstractPollSelectorImpl protected abstract int doSelect(long timeout) throws IOException; protected void implClose() throws IOException { - if (!closed) { + synchronized (closeLock) { + if (closed) + return; closed = true; // Deregister channels for(int i=channelOffset; i # Specifies the local interface on which the SNMP agent will bind. -# This is usefull when running on machines which have several +# This is useful when running on machines which have several # interfaces defined. It makes it possible to listen to a specific # subnet accessible through that interface. # Default for this property is "localhost". @@ -143,6 +143,26 @@ # running MBean server, the connector, or the registry. # +# +# ########## RMI connector settings for local management ########## +# +# com.sun.management.jmxremote.local.only=true|false +# Default for this property is true. (Case for true/false ignored) +# If this property is specified as true then the local JMX RMI connector +# server will only accept connection requests from clients running on +# the host where the out-of-the-box JMX management agent is running. +# In order to ensure backwards compatibility this property could be +# set to false. However, deploying the local management agent in this +# way is discouraged because the local JMX RMI connector server will +# accept connection requests from any client either local or remote. +# For remote management the remote JMX RMI connector server should +# be used instead with authentication and SSL/TLS encryption enabled. +# + +# For allowing the local management agent accept local +# and remote connection requests use the following line +# com.sun.management.jmxremote.local.only=false + # # ###################### RMI SSL ############################# # diff --git a/jdk/src/solaris/classes/sun/nio/ch/DevPollSelectorImpl.java b/jdk/src/solaris/classes/sun/nio/ch/DevPollSelectorImpl.java index 532b02c23c9..cdf19cda207 100644 --- a/jdk/src/solaris/classes/sun/nio/ch/DevPollSelectorImpl.java +++ b/jdk/src/solaris/classes/sun/nio/ch/DevPollSelectorImpl.java @@ -46,15 +46,15 @@ class DevPollSelectorImpl // The poll object DevPollArrayWrapper pollWrapper; - // The number of valid channels in this Selector's poll array - private int totalChannels; - // Maps from file descriptors to keys private Map fdToKey; // True if this Selector has been closed private boolean closed = false; + // Lock for close/cleanup + private Object closeLock = new Object(); + // Lock for interrupt triggering and clearing private Object interruptLock = new Object(); private boolean interruptTriggered = false; @@ -72,7 +72,6 @@ class DevPollSelectorImpl pollWrapper = new DevPollArrayWrapper(); pollWrapper.initInterrupt(fd0, fd1); fdToKey = new HashMap(); - totalChannels = 1; } protected int doSelect(long timeout) @@ -131,45 +130,39 @@ class DevPollSelectorImpl } protected void implClose() throws IOException { - if (!closed) { - closed = true; + if (closed) + return; + closed = true; - // prevent further wakeup - synchronized (interruptLock) { - interruptTriggered = true; - } - - FileDispatcher.closeIntFD(fd0); - FileDispatcher.closeIntFD(fd1); - if (pollWrapper != null) { - - pollWrapper.release(fd0); - pollWrapper.closeDevPollFD(); - pollWrapper = null; - selectedKeys = null; - - // Deregister channels - Iterator i = keys.iterator(); - while (i.hasNext()) { - SelectionKeyImpl ski = (SelectionKeyImpl)i.next(); - deregister(ski); - SelectableChannel selch = ski.channel(); - if (!selch.isOpen() && !selch.isRegistered()) - ((SelChImpl)selch).kill(); - i.remove(); - } - totalChannels = 0; - - } - fd0 = -1; - fd1 = -1; + // prevent further wakeup + synchronized (interruptLock) { + interruptTriggered = true; } + + FileDispatcher.closeIntFD(fd0); + FileDispatcher.closeIntFD(fd1); + + pollWrapper.release(fd0); + pollWrapper.closeDevPollFD(); + selectedKeys = null; + + // Deregister channels + Iterator i = keys.iterator(); + while (i.hasNext()) { + SelectionKeyImpl ski = (SelectionKeyImpl)i.next(); + deregister(ski); + SelectableChannel selch = ski.channel(); + if (!selch.isOpen() && !selch.isRegistered()) + ((SelChImpl)selch).kill(); + i.remove(); + } + fd0 = -1; + fd1 = -1; } protected void implRegister(SelectionKeyImpl ski) { int fd = IOUtil.fdVal(ski.channel.getFD()); fdToKey.put(Integer.valueOf(fd), ski); - totalChannels++; keys.add(ski); } @@ -179,7 +172,6 @@ class DevPollSelectorImpl int fd = ski.channel.getFDVal(); fdToKey.remove(Integer.valueOf(fd)); pollWrapper.release(fd); - totalChannels--; ski.setIndex(-1); keys.remove(ski); selectedKeys.remove(ski); @@ -190,6 +182,8 @@ class DevPollSelectorImpl } void putEventOps(SelectionKeyImpl sk, int ops) { + if (closed) + throw new ClosedSelectorException(); int fd = IOUtil.fdVal(sk.channel.getFD()); pollWrapper.setInterest(fd, ops); } diff --git a/jdk/src/solaris/classes/sun/nio/ch/EPollSelectorImpl.java b/jdk/src/solaris/classes/sun/nio/ch/EPollSelectorImpl.java index b9193435c79..e23cfa75dc0 100644 --- a/jdk/src/solaris/classes/sun/nio/ch/EPollSelectorImpl.java +++ b/jdk/src/solaris/classes/sun/nio/ch/EPollSelectorImpl.java @@ -1,5 +1,5 @@ /* - * Copyright 2005-2008 Sun Microsystems, Inc. All Rights Reserved. + * Copyright 2005-2007 Sun Microsystems, Inc. All Rights Reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -31,7 +31,6 @@ import java.nio.channels.spi.*; import java.util.*; import sun.misc.*; - /** * An implementation of Selector for Linux 2.6+ kernels that uses * the epoll event notification facility. @@ -51,7 +50,7 @@ class EPollSelectorImpl private Map fdToKey; // True if this Selector has been closed - private boolean closed = false; + private volatile boolean closed = false; // Lock for interrupt triggering and clearing private Object interruptLock = new Object(); @@ -128,40 +127,41 @@ class EPollSelectorImpl } protected void implClose() throws IOException { - if (!closed) { - closed = true; + if (closed) + return; + closed = true; - // prevent further wakeup - synchronized (interruptLock) { - interruptTriggered = true; - } - - FileDispatcher.closeIntFD(fd0); - FileDispatcher.closeIntFD(fd1); - if (pollWrapper != null) { - - pollWrapper.release(fd0); - pollWrapper.closeEPollFD(); - pollWrapper = null; - selectedKeys = null; - - // Deregister channels - Iterator i = keys.iterator(); - while (i.hasNext()) { - SelectionKeyImpl ski = (SelectionKeyImpl)i.next(); - deregister(ski); - SelectableChannel selch = ski.channel(); - if (!selch.isOpen() && !selch.isRegistered()) - ((SelChImpl)selch).kill(); - i.remove(); - } - } - fd0 = -1; - fd1 = -1; + // prevent further wakeup + synchronized (interruptLock) { + interruptTriggered = true; } + + FileDispatcher.closeIntFD(fd0); + FileDispatcher.closeIntFD(fd1); + + pollWrapper.release(fd0); + pollWrapper.closeEPollFD(); + // it is possible + selectedKeys = null; + + // Deregister channels + Iterator i = keys.iterator(); + while (i.hasNext()) { + SelectionKeyImpl ski = (SelectionKeyImpl)i.next(); + deregister(ski); + SelectableChannel selch = ski.channel(); + if (!selch.isOpen() && !selch.isRegistered()) + ((SelChImpl)selch).kill(); + i.remove(); + } + + fd0 = -1; + fd1 = -1; } protected void implRegister(SelectionKeyImpl ski) { + if (closed) + throw new ClosedSelectorException(); int fd = IOUtil.fdVal(ski.channel.getFD()); fdToKey.put(Integer.valueOf(fd), ski); pollWrapper.add(fd); @@ -183,6 +183,8 @@ class EPollSelectorImpl } void putEventOps(SelectionKeyImpl sk, int ops) { + if (closed) + throw new ClosedSelectorException(); int fd = IOUtil.fdVal(sk.channel.getFD()); pollWrapper.setInterest(fd, ops); } diff --git a/jdk/src/windows/classes/sun/nio/ch/WindowsSelectorImpl.java b/jdk/src/windows/classes/sun/nio/ch/WindowsSelectorImpl.java index ac25ac0803a..a73351615fc 100644 --- a/jdk/src/windows/classes/sun/nio/ch/WindowsSelectorImpl.java +++ b/jdk/src/windows/classes/sun/nio/ch/WindowsSelectorImpl.java @@ -1,5 +1,5 @@ /* - * Copyright 2002-2008 Sun Microsystems, Inc. All Rights Reserved. + * Copyright 2002-2007 Sun Microsystems, Inc. All Rights Reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -80,6 +80,9 @@ final class WindowsSelectorImpl extends SelectorImpl { // File descriptors corresponding to source and sink private final int wakeupSourceFd, wakeupSinkFd; + // Lock for close cleanup + private Object closeLock = new Object(); + // Maps file descriptors to their indices in pollArray private final static class FdMap extends HashMap { static final long serialVersionUID = 0L; @@ -473,42 +476,48 @@ final class WindowsSelectorImpl extends SelectorImpl { } protected void implClose() throws IOException { - if (channelArray != null) { - if (pollWrapper != null) { - // prevent further wakeup - synchronized (interruptLock) { - interruptTriggered = true; - } - wakeupPipe.sink().close(); - wakeupPipe.source().close(); - for(int i = 1; i < totalChannels; i++) { // Deregister channels - if (i % MAX_SELECTABLE_FDS != 0) { // skip wakeupEvent - deregister(channelArray[i]); - SelectableChannel selch = channelArray[i].channel(); - if (!selch.isOpen() && !selch.isRegistered()) - ((SelChImpl)selch).kill(); + synchronized (closeLock) { + if (channelArray != null) { + if (pollWrapper != null) { + // prevent further wakeup + synchronized (interruptLock) { + interruptTriggered = true; } - } - pollWrapper.free(); - pollWrapper = null; - selectedKeys = null; - channelArray = null; - threads.clear(); - // Call startThreads. All remaining helper threads now exit, - // since threads.size() = 0; - startLock.startThreads(); + wakeupPipe.sink().close(); + wakeupPipe.source().close(); + for(int i = 1; i < totalChannels; i++) { // Deregister channels + if (i % MAX_SELECTABLE_FDS != 0) { // skip wakeupEvent + deregister(channelArray[i]); + SelectableChannel selch = channelArray[i].channel(); + if (!selch.isOpen() && !selch.isRegistered()) + ((SelChImpl)selch).kill(); + } + } + pollWrapper.free(); + pollWrapper = null; + selectedKeys = null; + channelArray = null; + threads.clear(); + // Call startThreads. All remaining helper threads now exit, + // since threads.size() = 0; + startLock.startThreads(); + } } } } protected void implRegister(SelectionKeyImpl ski) { - growIfNeeded(); - channelArray[totalChannels] = ski; - ski.setIndex(totalChannels); - fdMap.put(ski); - keys.add(ski); - pollWrapper.addEntry(totalChannels, ski); - totalChannels++; + synchronized (closeLock) { + if (pollWrapper == null) + throw new ClosedSelectorException(); + growIfNeeded(); + channelArray[totalChannels] = ski; + ski.setIndex(totalChannels); + fdMap.put(ski); + keys.add(ski); + pollWrapper.addEntry(totalChannels, ski); + totalChannels++; + } } private void growIfNeeded() { @@ -554,7 +563,11 @@ final class WindowsSelectorImpl extends SelectorImpl { } void putEventOps(SelectionKeyImpl sk, int ops) { - pollWrapper.putEventOps(sk.getIndex(), ops); + synchronized (closeLock) { + if (pollWrapper == null) + throw new ClosedSelectorException(); + pollWrapper.putEventOps(sk.getIndex(), ops); + } } public Selector wakeup() { diff --git a/jdk/test/java/nio/channels/Selector/CloseThenRegister.java b/jdk/test/java/nio/channels/Selector/CloseThenRegister.java new file mode 100644 index 00000000000..0fbb7b9d3a1 --- /dev/null +++ b/jdk/test/java/nio/channels/Selector/CloseThenRegister.java @@ -0,0 +1,48 @@ +/* + * Copyright 2008 Sun Microsystems, Inc. All Rights Reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara, + * CA 95054 USA or visit www.sun.com if you need additional information or + * have any questions. + */ + +/* @test + * @bug 5025260 + * @summary ClosedSelectorException is expected when register after close + */ + +import java.net.*; +import java.nio.channels.*; + +public class CloseThenRegister { + + public static void main (String [] args) throws Exception { + try { + Selector s = Selector.open(); + s.close(); + ServerSocketChannel c = ServerSocketChannel.open(); + c.socket().bind(new InetSocketAddress(40000)); + c.configureBlocking(false); + c.register(s, SelectionKey.OP_ACCEPT); + } catch (ClosedSelectorException cse) { + return; + } + throw new RuntimeException("register after close does not cause CSE!"); + } + +} diff --git a/jdk/test/javax/management/mxbean/TypeNameTest.java b/jdk/test/javax/management/mxbean/TypeNameTest.java new file mode 100644 index 00000000000..f5f48bccacb --- /dev/null +++ b/jdk/test/javax/management/mxbean/TypeNameTest.java @@ -0,0 +1,97 @@ +/* + * Copyright 2008 Sun Microsystems, Inc. All Rights Reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara, + * CA 95054 USA or visit www.sun.com if you need additional information or + * have any questions. + */ + +/* + * @test + * @bug 6757225 + * @summary Test that type names in MXBeans match their spec. + * @author Eamonn McManus + */ + +import java.lang.reflect.Field; +import java.lang.reflect.InvocationHandler; +import java.lang.reflect.Method; +import java.lang.reflect.Proxy; +import java.util.List; +import java.util.Map; +import javax.management.MBeanAttributeInfo; +import javax.management.MBeanInfo; +import javax.management.MBeanServer; +import javax.management.MBeanServerFactory; +import javax.management.ObjectName; +import javax.management.StandardMBean; + +public class TypeNameTest { + public static interface TestMXBean { + public int getInt(); + public String IntName = "int"; + + public Map getMapSI(); + public String MapSIName = "java.util.Map"; + + public Map getMapSInts(); + public String MapSIntsName = "java.util.Map"; + + public List> getListListInts(); + public String ListListIntsName = "java.util.List>"; + } + + private static InvocationHandler nullIH = new InvocationHandler() { + public Object invoke(Object proxy, Method method, Object[] args) + throws Throwable { + return null; + } + }; + + static String failure; + + public static void main(String[] args) throws Exception { + TestMXBean testImpl = (TestMXBean) Proxy.newProxyInstance( + TestMXBean.class.getClassLoader(), new Class[] {TestMXBean.class}, nullIH); + Object mxbean = new StandardMBean(testImpl, TestMXBean.class, true); + MBeanServer mbs = MBeanServerFactory.newMBeanServer(); + ObjectName name = new ObjectName("a:b=c"); + mbs.registerMBean(mxbean, name); + MBeanInfo mbi = mbs.getMBeanInfo(name); + MBeanAttributeInfo[] mbais = mbi.getAttributes(); + for (MBeanAttributeInfo mbai : mbais) { + String attrName = mbai.getName(); + String attrTypeName = (String) mbai.getDescriptor().getFieldValue("originalType"); + String fieldName = attrName + "Name"; + Field nameField = TestMXBean.class.getField(fieldName); + String expectedTypeName = (String) nameField.get(null); + if (expectedTypeName.equals(attrTypeName)) { + System.out.println("OK: " + attrName + ": " + attrTypeName); + } else { + failure = "For attribute " + attrName + " expected type name \"" + + expectedTypeName + "\", found type name \"" + attrTypeName + + "\""; + System.out.println("FAIL: " + failure); + } + } + if (failure == null) + System.out.println("TEST PASSED"); + else + throw new Exception("TEST FAILED: " + failure); + } +} diff --git a/jdk/test/sun/security/krb5/auto/Action.java b/jdk/test/sun/security/krb5/auto/Action.java new file mode 100644 index 00000000000..1e7073dc1ff --- /dev/null +++ b/jdk/test/sun/security/krb5/auto/Action.java @@ -0,0 +1,33 @@ +/* + * Copyright 2008 Sun Microsystems, Inc. All Rights Reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara, + * CA 95054 USA or visit www.sun.com if you need additional information or + * have any questions. + */ + +/** + * Action used in Context.doAs + */ +public interface Action { + /** + * This method always reads a byte block and emits another one + */ + byte[] run(Context s, byte[] input) throws Exception; +} + diff --git a/jdk/test/sun/security/krb5/auto/BasicKrb5Test.java b/jdk/test/sun/security/krb5/auto/BasicKrb5Test.java new file mode 100644 index 00000000000..26d52e0787c --- /dev/null +++ b/jdk/test/sun/security/krb5/auto/BasicKrb5Test.java @@ -0,0 +1,114 @@ +/* + * Copyright 2008 Sun Microsystems, Inc. All Rights Reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara, + * CA 95054 USA or visit www.sun.com if you need additional information or + * have any questions. + */ + +/* + * @test + * @bug 6706974 + * @summary Add krb5 test infrastructure + */ + +import org.ietf.jgss.GSSName; +import sun.security.jgss.GSSUtil; +import sun.security.krb5.Config; +import sun.security.krb5.internal.crypto.EType; + +/** + * Basic JGSS/krb5 test with 3 parties: client, server, backend server. Each + * party uses JAAS login to get subjects and executes JGSS calls using + * Subject.doAs. + */ +public class BasicKrb5Test { + + /** + * @param args empty or etype + */ + public static void main(String[] args) + throws Exception { + + String etype = null; + if (args.length > 0) { + etype = args[0]; + } + + // Creates and starts the KDC. This line must be put ahead of etype check + // since the check needs a krb5.conf. + new OneKDC(etype).writeJAASConf(); + + System.out.println("Testing etype " + etype); + if (etype != null && !EType.isSupported(Config.getInstance().getType(etype))) { + System.out.println("Not supported."); + System.exit(0); + } + + new BasicKrb5Test().go(OneKDC.SERVER, OneKDC.BACKEND); + } + + void go(final String server, final String backend) throws Exception { + Context c, s, s2, b; + c = Context.fromJAAS("client"); + s = Context.fromJAAS("server"); + b = Context.fromJAAS("backend"); + + c.startAsClient(server, GSSUtil.GSS_KRB5_MECH_OID); + c.x().requestCredDeleg(true); + s.startAsServer(GSSUtil.GSS_KRB5_MECH_OID); + + c.status(); + s.status(); + + Context.handshake(c, s); + GSSName client = c.x().getSrcName(); + + c.status(); + s.status(); + + Context.transmit("i say high --", c, s); + Context.transmit(" you say low", s, c); + + s2 = s.delegated(); + s.dispose(); + s = null; + + s2.startAsClient(backend, GSSUtil.GSS_KRB5_MECH_OID); + b.startAsServer(GSSUtil.GSS_KRB5_MECH_OID); + + s2.status(); + b.status(); + + Context.handshake(s2, b); + GSSName client2 = b.x().getSrcName(); + + if (!client.equals(client2)) { + throw new Exception("Delegation failed"); + } + + s2.status(); + b.status(); + + Context.transmit("you say hello --", s2, b); + Context.transmit(" i say goodbye", b, s2); + + s2.dispose(); + b.dispose(); + } +} diff --git a/jdk/test/sun/security/krb5/auto/CleanState.java b/jdk/test/sun/security/krb5/auto/CleanState.java new file mode 100644 index 00000000000..8f79fd8e70e --- /dev/null +++ b/jdk/test/sun/security/krb5/auto/CleanState.java @@ -0,0 +1,75 @@ +/* + * Copyright 2008 Sun Microsystems, Inc. All Rights Reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara, + * CA 95054 USA or visit www.sun.com if you need additional information or + * have any questions. + */ + +/* + * @test + * @bug 6716534 + * @summary Krb5LoginModule has not cleaned temp info between authentication attempts + */ +import com.sun.security.auth.module.Krb5LoginModule; +import java.util.HashMap; +import java.util.Map; +import javax.security.auth.Subject; +import javax.security.auth.callback.Callback; +import javax.security.auth.callback.CallbackHandler; +import javax.security.auth.callback.NameCallback; +import javax.security.auth.callback.PasswordCallback; + +public class CleanState { + public static void main(String[] args) throws Exception { + CleanState x = new CleanState(); + new OneKDC(null); + x.go(); + } + + void go() throws Exception { + Krb5LoginModule krb5 = new Krb5LoginModule(); + + final String name = OneKDC.USER; + final char[] password = OneKDC.PASS; + char[] badpassword = "hellokitty".toCharArray(); + + Map map = new HashMap(); + map.put("useTicketCache", "false"); + map.put("doNotPrompt", "false"); + map.put("tryFirstPass", "true"); + Map shared = new HashMap(); + shared.put("javax.security.auth.login.name", name); + shared.put("javax.security.auth.login.password", badpassword); + + krb5.initialize(new Subject(), new CallbackHandler() { + @Override + public void handle(Callback[] callbacks) { + for(Callback callback: callbacks) { + if (callback instanceof NameCallback) { + ((NameCallback)callback).setName(name); + } + if (callback instanceof PasswordCallback) { + ((PasswordCallback)callback).setPassword(password); + } + } + } + }, shared, map); + krb5.login(); + } +} diff --git a/jdk/test/sun/security/krb5/auto/Context.java b/jdk/test/sun/security/krb5/auto/Context.java new file mode 100644 index 00000000000..9f52dad1f1a --- /dev/null +++ b/jdk/test/sun/security/krb5/auto/Context.java @@ -0,0 +1,386 @@ +/* + * Copyright 2008 Sun Microsystems, Inc. All Rights Reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara, + * CA 95054 USA or visit www.sun.com if you need additional information or + * have any questions. + */ + +import com.sun.security.auth.module.Krb5LoginModule; +import java.security.PrivilegedActionException; +import java.security.PrivilegedExceptionAction; +import java.util.Arrays; +import java.util.HashMap; +import java.util.Map; +import javax.security.auth.Subject; +import javax.security.auth.kerberos.KerberosKey; +import javax.security.auth.kerberos.KerberosTicket; +import javax.security.auth.login.LoginContext; +import org.ietf.jgss.GSSContext; +import org.ietf.jgss.GSSCredential; +import org.ietf.jgss.GSSException; +import org.ietf.jgss.GSSManager; +import org.ietf.jgss.GSSName; +import org.ietf.jgss.MessageProp; +import org.ietf.jgss.Oid; + +/** + * Context of a JGSS subject, encapsulating Subject and GSSContext. + * + * Three "constructors", which acquire the (private) credentials and fill + * it into the Subject: + * + * 1. static fromJAAS(): Creates a Context using a JAAS login config entry + * 2. static fromUserPass(): Creates a Context using a username and a password + * 3. delegated(): A new context which uses the delegated credentials from a + * previously established acceptor Context + * + * Two context initiators, which create the GSSContext object inside: + * + * 1. startAsClient() + * 2. startAsServer() + * + * Privileged action: + * doAs(): Performs an action in the name of the Subject + * + * Handshake process: + * static handShake(initiator, acceptor) + * + * A four-phase typical data communication which includes all four GSS + * actions (wrap, unwrap, getMic and veryfyMiC): + * static transmit(message, from, to) + */ +public class Context { + + private Subject s; + private GSSContext x; + private boolean f; // context established? + private String name; + private GSSCredential cred; // see static method delegated(). + + private Context() {} + + /** + * Using the delegated credentials from a previous acceptor + * @param c + */ + public Context delegated() throws Exception { + Context out = new Context(); + out.s = s; + out.cred = x.getDelegCred(); + out.name = name + " as " + out.cred.getName().toString(); + return out; + } + + /** + * Logins with a JAAS login config entry name + */ + public static Context fromJAAS(final String name) throws Exception { + Context out = new Context(); + out.name = name; + LoginContext lc = new LoginContext(name); + lc.login(); + out.s = lc.getSubject(); + return out; + } + + /** + * Logins with a username and a password, using Krb5LoginModule directly + * @param storeKey true if key should be saved, used on acceptor side + */ + public static Context fromUserPass(String user, char[] pass, boolean storeKey) throws Exception { + Context out = new Context(); + out.name = user; + out.s = new Subject(); + Krb5LoginModule krb5 = new Krb5LoginModule(); + Map map = new HashMap(); + map.put("tryFirstPass", "true"); + if (storeKey) { + map.put("storeKey", "true"); + } + Map shared = new HashMap(); + shared.put("javax.security.auth.login.name", user); + shared.put("javax.security.auth.login.password", pass); + + krb5.initialize(out.s, null, shared, map); + krb5.login(); + krb5.commit(); + return out; + } + + /** + * Starts as a client + * @param target communication peer + * @param mech GSS mech + * @throws java.lang.Exception + */ + public void startAsClient(final String target, final Oid mech) throws Exception { + doAs(new Action() { + @Override + public byte[] run(Context me, byte[] dummy) throws Exception { + GSSManager m = GSSManager.getInstance(); + me.x = m.createContext( + target.indexOf('@') < 0 ? + m.createName(target, null) : + m.createName(target, GSSName.NT_HOSTBASED_SERVICE), + mech, + cred, + GSSContext.DEFAULT_LIFETIME); + return null; + } + }, null); + f = false; + } + + /** + * Starts as a server + * @param mech GSS mech + * @throws java.lang.Exception + */ + public void startAsServer(final Oid mech) throws Exception { + doAs(new Action() { + @Override + public byte[] run(Context me, byte[] dummy) throws Exception { + GSSManager m = GSSManager.getInstance(); + me.x = m.createContext(m.createCredential( + null, + GSSCredential.INDEFINITE_LIFETIME, + mech, + GSSCredential.ACCEPT_ONLY)); + return null; + } + }, null); + f = false; + } + + /** + * Accesses the internal GSSContext object. Currently it's used for -- + * + * 1. calling requestXXX() before handshake + * 2. accessing source name + * + * Note: If the application needs to do any privileged call on this + * object, please use doAs(). Otherwise, it can be done directly. The + * methods listed above are all non-privileged calls. + * + * @return the GSSContext object + */ + public GSSContext x() { + return x; + } + + /** + * Disposes the GSSContext within + * @throws org.ietf.jgss.GSSException + */ + public void dispose() throws GSSException { + x.dispose(); + } + + /** + * Does something using the Subject inside + * @param action the action + * @param in the input byte + * @return the output byte + * @throws java.lang.Exception + */ + public byte[] doAs(final Action action, final byte[] in) throws Exception { + try { + return Subject.doAs(s, new PrivilegedExceptionAction() { + + @Override + public byte[] run() throws Exception { + return action.run(Context.this, in); + } + }); + } catch (PrivilegedActionException pae) { + throw pae.getException(); + } + } + + /** + * Prints status of GSSContext and Subject + * @throws java.lang.Exception + */ + public void status() throws Exception { + System.out.println("STATUS OF " + name.toUpperCase()); + try { + StringBuffer sb = new StringBuffer(); + if (x.getAnonymityState()) { + sb.append("anon, "); + } + if (x.getConfState()) { + sb.append("conf, "); + } + if (x.getCredDelegState()) { + sb.append("deleg, "); + } + if (x.getIntegState()) { + sb.append("integ, "); + } + if (x.getMutualAuthState()) { + sb.append("mutual, "); + } + if (x.getReplayDetState()) { + sb.append("rep det, "); + } + if (x.getSequenceDetState()) { + sb.append("seq det, "); + } + System.out.println("Context status of " + name + ": " + sb.toString()); + System.out.println(x.getSrcName() + " -> " + x.getTargName()); + } catch (Exception e) { + ;// Don't care + } + System.out.println("====================================="); + for (Object o : s.getPrivateCredentials()) { + System.out.println(" " + o.getClass()); + if (o instanceof KerberosTicket) { + KerberosTicket kt = (KerberosTicket) o; + System.out.println(" " + kt.getServer() + " for " + kt.getClient()); + } else if (o instanceof KerberosKey) { + KerberosKey kk = (KerberosKey) o; + System.out.print(" " + kk.getKeyType() + " " + kk.getVersionNumber() + " " + kk.getAlgorithm() + " "); + for (byte b : kk.getEncoded()) { + System.out.printf("%02X", b & 0xff); + } + System.out.println(); + } else if (o instanceof Map) { + Map map = (Map) o; + for (Object k : map.keySet()) { + System.out.println(" " + k + ": " + map.get(k)); + } + } + } + } + + /** + * Transmits a message from one Context to another. The sender wraps the + * message and sends it to the receiver. The receiver unwraps it, creates + * a MIC of the clear text and sends it back to the sender. The sender + * verifies the MIC against the message sent earlier. + * @param message the message + * @param s1 the sender + * @param s2 the receiver + * @throws java.lang.Exception If anything goes wrong + */ + static public void transmit(final String message, final Context s1, + final Context s2) throws Exception { + final byte[] messageBytes = message.getBytes(); + System.out.printf("-------------------- TRANSMIT from %s to %s------------------------\n", + s1.name, s2.name); + + byte[] t = s1.doAs(new Action() { + @Override + public byte[] run(Context me, byte[] dummy) throws Exception { + System.out.println("wrap"); + MessageProp p1 = new MessageProp(0, true); + byte[] out = me.x.wrap(messageBytes, 0, messageBytes.length, p1); + System.out.println(printProp(p1)); + return out; + } + }, null); + + t = s2.doAs(new Action() { + @Override + public byte[] run(Context me, byte[] input) throws Exception { + MessageProp p1 = new MessageProp(0, true); + byte[] bytes = me.x.unwrap(input, 0, input.length, p1); + if (!Arrays.equals(messageBytes, bytes)) + throw new Exception("wrap/unwrap mismatch"); + System.out.println("unwrap"); + System.out.println(printProp(p1)); + p1 = new MessageProp(0, true); + System.out.println("getMIC"); + bytes = me.x.getMIC(bytes, 0, bytes.length, p1); + System.out.println(printProp(p1)); + return bytes; + } + }, t); + // Re-unwrap should make p2.isDuplicateToken() returns true + s1.doAs(new Action() { + @Override + public byte[] run(Context me, byte[] input) throws Exception { + MessageProp p1 = new MessageProp(0, true); + System.out.println("verifyMIC"); + me.x.verifyMIC(input, 0, input.length, + messageBytes, 0, messageBytes.length, + p1); + System.out.println(printProp(p1)); + return null; + } + }, t); + } + + /** + * Returns a string description of a MessageProp object + * @param prop the object + * @return the description + */ + static public String printProp(MessageProp prop) { + StringBuffer sb = new StringBuffer(); + sb.append("MessagePop: "); + sb.append("QOP="+ prop.getQOP() + ", "); + sb.append(prop.getPrivacy()?"privacy, ":""); + sb.append(prop.isDuplicateToken()?"dup, ":""); + sb.append(prop.isGapToken()?"gap, ":""); + sb.append(prop.isOldToken()?"old, ":""); + sb.append(prop.isUnseqToken()?"unseq, ":""); + sb.append(prop.getMinorString()+ "(" + prop.getMinorStatus()+")"); + return sb.toString(); + } + + /** + * Handshake (security context establishment process) between two Contexts + * @param c the initiator + * @param s the acceptor + * @throws java.lang.Exception + */ + static public void handshake(final Context c, final Context s) throws Exception { + byte[] t = new byte[0]; + while (!c.f || !s.f) { + t = c.doAs(new Action() { + @Override + public byte[] run(Context me, byte[] input) throws Exception { + if (me.x.isEstablished()) { + me.f = true; + System.out.println(c.name + " side established"); + return null; + } else { + System.out.println(c.name + " call initSecContext"); + return me.x.initSecContext(input, 0, input.length); + } + } + }, t); + + t = s.doAs(new Action() { + @Override + public byte[] run(Context me, byte[] input) throws Exception { + if (me.x.isEstablished()) { + me.f = true; + System.out.println(s.name + " side established"); + return null; + } else { + System.out.println(s.name + " called acceptSecContext"); + return me.x.acceptSecContext(input, 0, input.length); + } + } + }, t); + } + } +} diff --git a/jdk/test/sun/security/krb5/auto/CrossRealm.java b/jdk/test/sun/security/krb5/auto/CrossRealm.java new file mode 100644 index 00000000000..9b610279ebc --- /dev/null +++ b/jdk/test/sun/security/krb5/auto/CrossRealm.java @@ -0,0 +1,101 @@ +/* + * Copyright 2008 Sun Microsystems, Inc. All Rights Reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara, + * CA 95054 USA or visit www.sun.com if you need additional information or + * have any questions. + */ + +/* + * @test + * @bug 6706974 + * @summary Add krb5 test infrastructure + */ +import java.io.FileOutputStream; +import java.io.IOException; +import java.security.Security; +import javax.security.auth.callback.Callback; +import javax.security.auth.callback.CallbackHandler; +import javax.security.auth.callback.NameCallback; +import javax.security.auth.callback.PasswordCallback; +import javax.security.auth.callback.UnsupportedCallbackException; +import org.ietf.jgss.GSSContext; +import org.ietf.jgss.GSSManager; +import org.ietf.jgss.GSSName; +import sun.security.jgss.GSSUtil; + +public class CrossRealm implements CallbackHandler { + public static void main(String[] args) throws Exception { + startKDCs(); + xRealmAuth(); + } + + static void startKDCs() throws Exception { + // Create and start the KDC + KDC kdc1 = KDC.create("RABBIT.HOLE"); + kdc1.addPrincipal("dummy", "bogus".toCharArray()); + kdc1.addPrincipalRandKey("krbtgt/RABBIT.HOLE"); + kdc1.addPrincipal("krbtgt/SNAKE.HOLE", "sharedsec".toCharArray()); + + KDC kdc2 = KDC.create("SNAKE.HOLE"); + kdc2.addPrincipalRandKey("krbtgt/SNAKE.HOLE"); + kdc2.addPrincipal("krbtgt/RABBIT.HOLE", "sharedsec".toCharArray()); + kdc2.addPrincipalRandKey("host/www.snake.hole"); + + KDC.saveConfig("krb5-localkdc.conf", kdc1, kdc2, + "forwardable=true", + "[domain_realm]", + ".snake.hole=SNAKE.HOLE"); + System.setProperty("java.security.krb5.conf", "krb5-localkdc.conf"); + } + + static void xRealmAuth() throws Exception { + Security.setProperty("auth.login.defaultCallbackHandler", "CrossRealm"); + System.setProperty("java.security.auth.login.config", "jaas-localkdc.conf"); + System.setProperty("javax.security.auth.useSubjectCredsOnly", "false"); + FileOutputStream fos = new FileOutputStream("jaas-localkdc.conf"); + fos.write(("com.sun.security.jgss.krb5.initiate {\n" + + " com.sun.security.auth.module.Krb5LoginModule\n" + + " required\n" + + " principal=dummy\n" + + " doNotPrompt=false\n" + + " useTicketCache=false\n" + + " ;\n" + + "};").getBytes()); + fos.close(); + + GSSManager m = GSSManager.getInstance(); + m.createContext( + m.createName("host@www.snake.hole", GSSName.NT_HOSTBASED_SERVICE), + GSSUtil.GSS_KRB5_MECH_OID, + null, + GSSContext.DEFAULT_LIFETIME).initSecContext(new byte[0], 0, 0); + } + + @Override + public void handle(Callback[] callbacks) throws IOException, UnsupportedCallbackException { + for (Callback callback : callbacks) { + if (callback instanceof NameCallback) { + ((NameCallback) callback).setName("dummy"); + } + if (callback instanceof PasswordCallback) { + ((PasswordCallback) callback).setPassword("bogus".toCharArray()); + } + } + } +} diff --git a/jdk/test/sun/security/krb5/auto/KDC.java b/jdk/test/sun/security/krb5/auto/KDC.java new file mode 100644 index 00000000000..a875e090bb5 --- /dev/null +++ b/jdk/test/sun/security/krb5/auto/KDC.java @@ -0,0 +1,969 @@ +/* + * Copyright 2008 Sun Microsystems, Inc. All Rights Reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara, + * CA 95054 USA or visit www.sun.com if you need additional information or + * have any questions. + */ + +import java.lang.reflect.Constructor; +import java.lang.reflect.Field; +import java.lang.reflect.InvocationTargetException; +import java.net.*; +import java.io.*; +import java.lang.reflect.Method; +import java.security.SecureRandom; +import java.util.*; +import java.util.concurrent.*; +import sun.security.krb5.*; +import sun.security.krb5.internal.*; +import sun.security.krb5.internal.crypto.KeyUsage; +import sun.security.krb5.internal.ktab.KeyTab; +import sun.security.util.DerInputStream; +import sun.security.util.DerOutputStream; +import sun.security.util.DerValue; + +/** + * A KDC server. + *

+ * Features: + *

    + *
  1. Supports TCP and UDP + *
  2. Supports AS-REQ and TGS-REQ + *
  3. Principal db and other settings hard coded in application + *
  4. Options, say, request preauth or not + *
+ * Side effects: + *
    + *
  1. The Sun-internal class sun.security.krb5.Config is a + * singleton and initialized according to Kerberos settings (krb5.conf and + * java.security.krb5.* system properties). This means once it's initialized + * it will not automatically notice any changes to these settings (or file + * changes of krb5.conf). The KDC class normally does not touch these + * settings (except for the writeKtab() method). However, to make + * sure nothing ever goes wrong, if you want to make any changes to these + * settings after calling a KDC method, call Config.refresh() to + * make sure your changes are reflected in the Config object. + *
+ * Issues and TODOs: + *
    + *
  1. Generates krb5.conf to be used on another machine, currently the kdc is + * always localhost + *
  2. More options to KDC, say, error output, say, response nonce != + * request nonce + *
+ * Note: This program uses internal krb5 classes (including reflection to + * access private fields and methods). + *

+ * Usages: + *

+ * 1. Init and start the KDC: + *

+ * KDC kdc = KDC.create("REALM.NAME", port, isDaemon);
+ * KDC kdc = KDC.create("REALM.NAME");
+ * 
+ * Here, port is the UDP and TCP port number the KDC server + * listens on. If zero, a random port is chosen, which you can use getPort() + * later to retrieve the value. + *

+ * If isDaemon is true, the KDC worker threads will be daemons. + *

+ * The shortcut KDC.create("REALM.NAME") has port=0 and + * isDaemon=false, and is commonly used in an embedded KDC. + *

+ * 2. Adding users: + *

+ * kdc.addPrincipal(String principal_name, char[] password);
+ * kdc.addPrincipalRandKey(String principal_name);
+ * 
+ * A service principal's name should look like "host/f.q.d.n". The second form + * generates a random key. To expose this key, call writeKtab() to + * save the keys into a keytab file. + *

+ * Note that you need to add the principal name krbtgt/REALM.NAME yourself. + *

+ * Note that you can safely add a principal at any time after the KDC is + * started and before a user requests info on this principal. + *

+ * 3. Other public methods: + *

    + *
  • getPort: Returns the port number the KDC uses + *
  • getRealm: Returns the realm name + *
  • writeKtab: Writes all principals' keys into a keytab file + *
  • saveConfig: Saves a krb5.conf file to access this KDC + *
  • setOption: Sets various options + *
+ * Read the javadoc for details. Lazy developer can use OneKDC + * directly. + */ +public class KDC { + + // Under the hood. + + // The random generator to generate random keys (including session keys) + private static SecureRandom secureRandom = new SecureRandom(); + // Principal db + private Map passwords = new HashMap(); + // Realm name + private String realm; + // The request/response job queue + private BlockingQueue q = new ArrayBlockingQueue(100); + // Service port number + private int port; + // Options + private Map options = new HashMap(); + + /** + * Option names, to be expanded forever. + */ + public static enum Option { + /** + * Whether pre-authentication is required. Default Boolean.TRUE + */ + PREAUTH_REQUIRED, + }; + + /** + * A standalone KDC server. + * @param args + * @throws java.lang.Exception + */ + public static void main(String[] args) throws Exception { + if (args.length > 0) { + if (args[0].equals("-help") || args[0].equals("--help")) { + System.out.println("Usage:"); + System.out.println(" java " + KDC.class + " " + + "Start KDC on port 8888"); + return; + } + } + String localhost = "localhost"; + try { + localhost = InetAddress.getByName(localhost) + .getCanonicalHostName(); + } catch (UnknownHostException uhe) { + ; // Ignore, localhost is still "localhost" + } + KDC kdc = create("RABBIT.HOLE", 8888, false); + kdc.addPrincipal("dummy", "bogus".toCharArray()); + kdc.addPrincipal("foo", "bar".toCharArray()); + kdc.addPrincipalRandKey("krbtgt/" + kdc.realm); + kdc.addPrincipalRandKey("server/" + localhost); + kdc.addPrincipalRandKey("backend/" + localhost); + } + + /** + * Creates and starts a KDC running as a daemon on a random port. + * @param realm the realm name + * @return the running KDC instance + * @throws java.io.IOException for any socket creation error + */ + public static KDC create(String realm) throws IOException { + return create(realm, 0, true); + } + + /** + * Creates and starts a KDC server. + * @param realm the realm name + * @param port the TCP and UDP port to listen to. A random port will to + * chosen if zero. + * @param asDaemon if true, KDC threads will be daemons. Otherwise, not. + * @return the running KDC instance + * @throws java.io.IOException for any socket creation error + */ + public static KDC create(String realm, int port, boolean asDaemon) throws IOException { + return new KDC(realm, port, asDaemon); + } + + /** + * Sets an option + * @param key the option name + * @param obj the value + */ + public void setOption(Option key, Object value) { + options.put(key, value); + } + + /** + * Write all principals' keys into a keytab file. Note that the keys for + * the krbtgt principal for this realm will not be written. + *

+ * Attention: This method references krb5.conf settings. If you need to + * setup krb5.conf later, please call Config.refresh() after + * the new setting. For example: + *

+     * kdc.writeKtab("/etc/kdc/ktab");  // Config is initialized,
+     * System.setProperty("java.security.krb5.conf", "/home/mykrb5.conf");
+     * Config.refresh();
+     * 
+ * + * Inside this method there are 2 places krb5.conf is used: + *
    + *
  1. (Fatal) Generating keys: EncryptionKey.acquireSecretKeys + *
  2. (Has workaround) Creating PrincipalName + *
+ * @param tab The keytab filename to write to. + * @throws java.io.IOException for any file output error + * @throws sun.security.krb5.KrbException for any realm and/or principal + * name error. + */ + public void writeKtab(String tab) throws IOException, KrbException { + KeyTab ktab = KeyTab.create(tab); + for (String name : passwords.keySet()) { + if (name.equals("krbtgt/" + realm)) { + continue; + } + ktab.addEntry(new PrincipalName(name + "@" + realm, + name.indexOf('/') < 0 ? + PrincipalName.KRB_NT_UNKNOWN : + PrincipalName.KRB_NT_SRV_HST), passwords.get(name)); + } + ktab.save(); + } + + /** + * Adds a new principal to this realm with a given password. + * @param user the principal's name. For a service principal, use the + * form of host/f.q.d.n + * @param pass the password for the principal + */ + public void addPrincipal(String user, char[] pass) { + passwords.put(user, pass); + } + + /** + * Adds a new principal to this realm with a random password + * @param user the principal's name. For a service principal, use the + * form of host/f.q.d.n + */ + public void addPrincipalRandKey(String user) { + passwords.put(user, randomPassword()); + } + + /** + * Returns the name of this realm + * @return the name of this realm + */ + public String getRealm() { + return realm; + } + + /** + * Writes a krb5.conf for one or more KDC that includes KDC locations for + * each realm and the default realm name. You can also add extra strings + * into the file. The method should be called like: + *
+     *   KDC.saveConfig("krb5.conf", kdc1, kdc2, ..., line1, line2, ...);
+     * 
+ * Here you can provide one or more kdc# and zero or more line# arguments. + * The line# will be put after [libdefaults] and before [realms]. Therefore + * you can append new lines into [libdefaults] and/or create your new + * stanzas as well. Note that a newline character will be appended to + * each line# argument. + *

+ * For example: + *

+     * KDC.saveConfig("krb5.conf", this);
+     * 
+ * generates: + *
+     * [libdefaults]
+     * default_realm = REALM.NAME
+     *
+     * [realms]
+     *   REALM.NAME = {
+     *     kdc = localhost:port_number
+     *   }
+     * 
+ * + * Another example: + *
+     * KDC.saveConfig("krb5.conf", kdc1, kdc2, "forwardable = true", "",
+     *         "[domain_realm]",
+     *         ".kdc1.com = KDC1.NAME");
+     * 
+ * generates: + *
+     * [libdefaults]
+     * default_realm = KDC1.NAME
+     * forwardable = true
+     *
+     * [domain_realm]
+     * .kdc1.com = KDC1.NAME
+     *
+     * [realms]
+     *   KDC1.NAME = {
+     *     kdc = localhost:port1
+     *   }
+     *   KDC2.NAME = {
+     *     kdc = localhost:port2
+     *   }
+     * 
+ * @param file the name of the file to write into + * @param kdc the first (and default) KDC + * @param more more KDCs or extra lines (in their appearing order) to + * insert into the krb5.conf file. This method reads each argument's type + * to determine what it's for. This argument can be empty. + * @throws java.io.IOException for any file output error + */ + public static void saveConfig(String file, KDC kdc, Object... more) + throws IOException { + File f = new File(file); + StringBuffer sb = new StringBuffer(); + sb.append("[libdefaults]\ndefault_realm = "); + sb.append(kdc.realm); + sb.append("\n"); + for (Object o: more) { + if (o instanceof String) { + sb.append(o); + sb.append("\n"); + } + } + sb.append("\n[realms]\n"); + sb.append(realmLineForKDC(kdc)); + for (Object o: more) { + if (o instanceof KDC) { + sb.append(realmLineForKDC((KDC)o)); + } + } + FileOutputStream fos = new FileOutputStream(f); + fos.write(sb.toString().getBytes()); + fos.close(); + } + + /** + * Returns the service port of the KDC server. + * @return the KDC service port + */ + public int getPort() { + return port; + } + + // Private helper methods + + /** + * Private constructor, cannot be called outside. + * @param realm + */ + private KDC(String realm) { + this.realm = realm; + } + + /** + * A constructor that starts the KDC service also. + */ + protected KDC(String realm, int port, boolean asDaemon) + throws IOException { + this(realm); + startServer(port, asDaemon); + } + /** + * Generates a 32-char random password + * @return the password + */ + private static char[] randomPassword() { + char[] pass = new char[32]; + for (int i=0; i<32; i++) + pass[i] = (char)secureRandom.nextInt(); + return pass; + } + + /** + * Generates a random key for the given encryption type. + * @param eType the encryption type + * @return the generated key + * @throws sun.security.krb5.KrbException for unknown/unsupported etype + */ + private static EncryptionKey generateRandomKey(int eType) + throws KrbException { + // Is 32 enough for AES256? I should have generated the keys directly + // but different cryptos have different rules on what keys are valid. + char[] pass = randomPassword(); + String algo; + switch (eType) { + case EncryptedData.ETYPE_DES_CBC_MD5: algo = "DES"; break; + case EncryptedData.ETYPE_DES3_CBC_HMAC_SHA1_KD: algo = "DESede"; break; + case EncryptedData.ETYPE_AES128_CTS_HMAC_SHA1_96: algo = "AES128"; break; + case EncryptedData.ETYPE_ARCFOUR_HMAC: algo = "ArcFourHMAC"; break; + case EncryptedData.ETYPE_AES256_CTS_HMAC_SHA1_96: algo = "AES256"; break; + default: algo = "DES"; break; + } + return new EncryptionKey(pass, "NOTHING", algo); // Silly + } + + /** + * Returns the password for a given principal + * @param p principal + * @return the password + * @throws sun.security.krb5.KrbException when the principal is not inside + * the database. + */ + private char[] getPassword(PrincipalName p) throws KrbException { + char[] pass = passwords.get(p.getNameString()); + if (pass == null) { + throw new KrbException(Krb5.KDC_ERR_C_PRINCIPAL_UNKNOWN); + } + return pass; + } + + /** + * Returns the salt string for the principal. For normal users, the + * concatenation for the realm name and the sections of the principal; + * for krgtgt/A@B and krbtgt/B@A, always return AB (so that inter-realm + * principals have the same key). + * @param p principal + * @return the salt + */ + private String getSalt(PrincipalName p) { + String[] ns = p.getNameStrings(); + if (ns[0].equals("krbtgt") && ns.length > 1) { + // Shared cross-realm keys must be the same + if (ns[1].compareTo(realm) < 0) { + return ns[1] + realm; + } else { + return realm + ns[1]; + } + } else { + String s = getRealm(); + for (String n: p.getNameStrings()) { + s += n; + } + return s; + } + } + + /** + * Returns the key for a given principal of the given encryption type + * @param p the principal + * @param etype the encryption type + * @return the key + * @throws sun.security.krb5.KrbException for unknown/unsupported etype + */ + private EncryptionKey keyForUser(PrincipalName p, int etype) throws KrbException { + try { + // Do not call EncryptionKey.acquireSecretKeys(), otherwise + // the krb5.conf config file would be loaded. + Method stringToKey = EncryptionKey.class.getDeclaredMethod("stringToKey", char[].class, String.class, byte[].class, Integer.TYPE); + stringToKey.setAccessible(true); + return new EncryptionKey((byte[]) stringToKey.invoke(null, getPassword(p), getSalt(p), null, etype), etype, null); + } catch (InvocationTargetException ex) { + KrbException ke = (KrbException)ex.getCause(); + throw ke; + } catch (Exception e) { + throw new RuntimeException(e); // should not happen + } + } + + /** + * Processes an incoming request and generates a response. + * @param in the request + * @return the response + * @throws java.lang.Exception for various errors + */ + private byte[] processMessage(byte[] in) throws Exception { + if ((in[0] & 0x1f) == Krb5.KRB_AS_REQ) + return processAsReq(in); + else + return processTgsReq(in); + } + + /** + * Processes a TGS_REQ and generates a TGS_REP (or KRB_ERROR) + * @param in the request + * @return the response + * @throws java.lang.Exception for various errors + */ + private byte[] processTgsReq(byte[] in) throws Exception { + TGSReq tgsReq = new TGSReq(in); + try { + System.out.println(realm + "> " + tgsReq.reqBody.cname + + " sends TGS-REQ for " + + tgsReq.reqBody.sname); + KDCReqBody body = tgsReq.reqBody; + int etype = 0; + + // Reflection: PAData[] pas = tgsReq.pAData; + Field f = KDCReq.class.getDeclaredField("pAData"); + f.setAccessible(true); + PAData[] pas = (PAData[])f.get(tgsReq); + + Ticket tkt = null; + EncTicketPart etp = null; + if (pas == null || pas.length == 0) { + throw new KrbException(Krb5.KDC_ERR_PADATA_TYPE_NOSUPP); + } else { + for (PAData pa: pas) { + if (pa.getType() == Krb5.PA_TGS_REQ) { + APReq apReq = new APReq(pa.getValue()); + EncryptedData ed = apReq.authenticator; + tkt = apReq.ticket; + etype = tkt.encPart.getEType(); + EncryptionKey kkey = null; + if (!tkt.realm.toString().equals(realm)) { + if (tkt.sname.getNameString().equals("krbtgt/" + realm)) { + kkey = keyForUser(new PrincipalName("krbtgt/" + tkt.realm.toString(), realm), etype); + } + } else { + kkey = keyForUser(tkt.sname, etype); + } + byte[] bb = tkt.encPart.decrypt(kkey, KeyUsage.KU_TICKET); + DerInputStream derIn = new DerInputStream(bb); + DerValue der = derIn.getDerValue(); + etp = new EncTicketPart(der.toByteArray()); + } + } + if (tkt == null) { + throw new KrbException(Krb5.KDC_ERR_PADATA_TYPE_NOSUPP); + } + } + EncryptionKey skey = keyForUser(body.sname, etype); + if (skey == null) { + throw new KrbException(Krb5.KDC_ERR_SUMTYPE_NOSUPP); // TODO + } + + // Session key for original ticket, TGT + EncryptionKey ckey = etp.key; + + // Session key for session with the service + EncryptionKey key = generateRandomKey(etype); + + // Check time, TODO + KerberosTime till = body.till; + if (till == null) { + throw new KrbException(Krb5.KDC_ERR_NEVER_VALID); // TODO + } else if (till.isZero()) { + till = new KerberosTime(new Date().getTime() + 1000 * 3600 * 11); + } + + boolean[] bFlags = new boolean[Krb5.TKT_OPTS_MAX+1]; + if (body.kdcOptions.get(KDCOptions.FORWARDABLE)) { + bFlags[Krb5.TKT_OPTS_FORWARDABLE] = true; + } + if (body.kdcOptions.get(KDCOptions.FORWARDED) || + etp.flags.get(Krb5.TKT_OPTS_FORWARDED)) { + bFlags[Krb5.TKT_OPTS_FORWARDED] = true; + } + if (body.kdcOptions.get(KDCOptions.RENEWABLE)) { + bFlags[Krb5.TKT_OPTS_RENEWABLE] = true; + //renew = new KerberosTime(new Date().getTime() + 1000 * 3600 * 24 * 7); + } + if (body.kdcOptions.get(KDCOptions.PROXIABLE)) { + bFlags[Krb5.TKT_OPTS_PROXIABLE] = true; + } + if (body.kdcOptions.get(KDCOptions.POSTDATED)) { + bFlags[Krb5.TKT_OPTS_POSTDATED] = true; + } + if (body.kdcOptions.get(KDCOptions.ALLOW_POSTDATE)) { + bFlags[Krb5.TKT_OPTS_MAY_POSTDATE] = true; + } + bFlags[Krb5.TKT_OPTS_INITIAL] = true; + + TicketFlags tFlags = new TicketFlags(bFlags); + EncTicketPart enc = new EncTicketPart( + tFlags, + key, + etp.crealm, + etp.cname, + new TransitedEncoding(1, new byte[0]), // TODO + new KerberosTime(new Date()), + body.from, + till, body.rtime, + body.addresses, + null); + Ticket t = new Ticket( + body.crealm, + body.sname, + new EncryptedData(skey, enc.asn1Encode(), KeyUsage.KU_TICKET) + ); + EncTGSRepPart enc_part = new EncTGSRepPart( + key, + new LastReq(new LastReqEntry[]{ + new LastReqEntry(0, new KerberosTime(new Date().getTime() - 10000)) + }), + body.getNonce(), // TODO: detect replay + new KerberosTime(new Date().getTime() + 1000 * 3600 * 24), + // Next 5 and last MUST be same with ticket + tFlags, + new KerberosTime(new Date()), + body.from, + till, body.rtime, + body.crealm, + body.sname, + body.addresses + ); + EncryptedData edata = new EncryptedData(ckey, enc_part.asn1Encode(), KeyUsage.KU_ENC_TGS_REP_PART_SESSKEY); + TGSRep tgsRep = new TGSRep(null, + etp.crealm, + etp.cname, + t, + edata); + System.out.println(" Return " + tgsRep.cname + + " ticket for " + tgsRep.ticket.sname); + + DerOutputStream out = new DerOutputStream(); + out.write(DerValue.createTag(DerValue.TAG_APPLICATION, + true, (byte)Krb5.KRB_TGS_REP), tgsRep.asn1Encode()); + return out.toByteArray(); + } catch (KrbException ke) { + ke.printStackTrace(System.out); + KRBError kerr = ke.getError(); + KDCReqBody body = tgsReq.reqBody; + System.out.println(" Error " + ke.returnCode() + + " " +ke.returnCodeMessage()); + if (kerr == null) { + kerr = new KRBError(null, null, null, + new KerberosTime(new Date()), + 0, + ke.returnCode(), + body.crealm, body.cname, + new Realm(getRealm()), body.sname, + KrbException.errorMessage(ke.returnCode()), + null); + } + return kerr.asn1Encode(); + } + } + + /** + * Processes a AS_REQ and generates a AS_REP (or KRB_ERROR) + * @param in the request + * @return the response + * @throws java.lang.Exception for various errors + */ + private byte[] processAsReq(byte[] in) throws Exception { + ASReq asReq = new ASReq(in); + int[] eTypes = null; + try { + System.out.println(realm + "> " + asReq.reqBody.cname + + " sends AS-REQ for " + + asReq.reqBody.sname); + + KDCReqBody body = asReq.reqBody; + + // Reflection: int[] eType = body.eType; + Field f = KDCReqBody.class.getDeclaredField("eType"); + f.setAccessible(true); + eTypes = (int[])f.get(body); + int eType = eTypes[0]; + + EncryptionKey ckey = keyForUser(body.cname, eType); + EncryptionKey skey = keyForUser(body.sname, eType); + if (ckey == null) { + throw new KrbException(Krb5.KDC_ERR_ETYPE_NOSUPP); + } + if (skey == null) { + throw new KrbException(Krb5.KDC_ERR_SUMTYPE_NOSUPP); // TODO + } + + // Session key + EncryptionKey key = generateRandomKey(eType); + // Check time, TODO + KerberosTime till = body.till; + if (till == null) { + throw new KrbException(Krb5.KDC_ERR_NEVER_VALID); // TODO + } else if (till.isZero()) { + till = new KerberosTime(new Date().getTime() + 1000 * 3600 * 11); + } + //body.from + boolean[] bFlags = new boolean[Krb5.TKT_OPTS_MAX+1]; + if (body.kdcOptions.get(KDCOptions.FORWARDABLE)) { + bFlags[Krb5.TKT_OPTS_FORWARDABLE] = true; + } + if (body.kdcOptions.get(KDCOptions.RENEWABLE)) { + bFlags[Krb5.TKT_OPTS_RENEWABLE] = true; + //renew = new KerberosTime(new Date().getTime() + 1000 * 3600 * 24 * 7); + } + if (body.kdcOptions.get(KDCOptions.PROXIABLE)) { + bFlags[Krb5.TKT_OPTS_PROXIABLE] = true; + } + if (body.kdcOptions.get(KDCOptions.POSTDATED)) { + bFlags[Krb5.TKT_OPTS_POSTDATED] = true; + } + if (body.kdcOptions.get(KDCOptions.ALLOW_POSTDATE)) { + bFlags[Krb5.TKT_OPTS_MAY_POSTDATE] = true; + } + bFlags[Krb5.TKT_OPTS_INITIAL] = true; + + f = KDCReq.class.getDeclaredField("pAData"); + f.setAccessible(true); + PAData[] pas = (PAData[])f.get(asReq); + if (pas == null || pas.length == 0) { + Object preauth = options.get(Option.PREAUTH_REQUIRED); + if (preauth == null || preauth.equals(Boolean.TRUE)) { + throw new KrbException(Krb5.KDC_ERR_PREAUTH_REQUIRED); + } + } else { + try { + Constructor ctor = EncryptedData.class.getDeclaredConstructor(DerValue.class); + ctor.setAccessible(true); + EncryptedData data = ctor.newInstance(new DerValue(pas[0].getValue())); + data.decrypt(ckey, KeyUsage.KU_PA_ENC_TS); + } catch (Exception e) { + throw new KrbException(Krb5.KDC_ERR_PREAUTH_FAILED); + } + bFlags[Krb5.TKT_OPTS_PRE_AUTHENT] = true; + } + + TicketFlags tFlags = new TicketFlags(bFlags); + EncTicketPart enc = new EncTicketPart( + tFlags, + key, + body.crealm, + body.cname, + new TransitedEncoding(1, new byte[0]), + new KerberosTime(new Date()), + body.from, + till, body.rtime, + body.addresses, + null); + Ticket t = new Ticket( + body.crealm, + body.sname, + new EncryptedData(skey, enc.asn1Encode(), KeyUsage.KU_TICKET) + ); + EncASRepPart enc_part = new EncASRepPart( + key, + new LastReq(new LastReqEntry[]{ + new LastReqEntry(0, new KerberosTime(new Date().getTime() - 10000)) + }), + body.getNonce(), // TODO: detect replay? + new KerberosTime(new Date().getTime() + 1000 * 3600 * 24), + // Next 5 and last MUST be same with ticket + tFlags, + new KerberosTime(new Date()), + body.from, + till, body.rtime, + body.crealm, + body.sname, + body.addresses + ); + EncryptedData edata = new EncryptedData(ckey, enc_part.asn1Encode(), KeyUsage.KU_ENC_AS_REP_PART); + ASRep asRep = new ASRep(null, + body.crealm, + body.cname, + t, + edata); + + System.out.println(" Return " + asRep.cname + + " ticket for " + asRep.ticket.sname); + + DerOutputStream out = new DerOutputStream(); + out.write(DerValue.createTag(DerValue.TAG_APPLICATION, + true, (byte)Krb5.KRB_AS_REP), asRep.asn1Encode()); + return out.toByteArray(); + } catch (KrbException ke) { + ke.printStackTrace(System.out); + KRBError kerr = ke.getError(); + KDCReqBody body = asReq.reqBody; + System.out.println(" Error " + ke.returnCode() + + " " +ke.returnCodeMessage()); + byte[] eData = null; + if (kerr == null) { + if (ke.returnCode() == Krb5.KDC_ERR_PREAUTH_REQUIRED || + ke.returnCode() == Krb5.KDC_ERR_PREAUTH_FAILED) { + PAData pa; + + ETypeInfo2 ei2 = new ETypeInfo2(eTypes[0], null, null); + DerOutputStream eid = new DerOutputStream(); + eid.write(DerValue.tag_Sequence, ei2.asn1Encode()); + + pa = new PAData(Krb5.PA_ETYPE_INFO2, eid.toByteArray()); + + DerOutputStream bytes = new DerOutputStream(); + bytes.write(new PAData(Krb5.PA_ENC_TIMESTAMP, new byte[0]).asn1Encode()); + bytes.write(pa.asn1Encode()); + + boolean allOld = true; + for (int i: eTypes) { + if (i == EncryptedData.ETYPE_AES128_CTS_HMAC_SHA1_96 || + i == EncryptedData.ETYPE_AES256_CTS_HMAC_SHA1_96) { + allOld = false; + break; + } + } + if (allOld) { + ETypeInfo ei = new ETypeInfo(eTypes[0], null); + eid = new DerOutputStream(); + eid.write(DerValue.tag_Sequence, ei.asn1Encode()); + pa = new PAData(Krb5.PA_ETYPE_INFO, eid.toByteArray()); + bytes.write(pa.asn1Encode()); + } + DerOutputStream temp = new DerOutputStream(); + temp.write(DerValue.tag_Sequence, bytes); + eData = temp.toByteArray(); + } + kerr = new KRBError(null, null, null, + new KerberosTime(new Date()), + 0, + ke.returnCode(), + body.crealm, body.cname, + new Realm(getRealm()), body.sname, + KrbException.errorMessage(ke.returnCode()), + eData); + } + return kerr.asn1Encode(); + } + } + + /** + * Generates a line for a KDC to put inside [realms] of krb5.conf + * @param kdc the KDC + * @return REALM.NAME = { kdc = localhost:port } + */ + private static String realmLineForKDC(KDC kdc) { + return String.format(" %s = {\n kdc = localhost:%d\n }\n", kdc.realm, kdc.port); + } + + /** + * Start the KDC service. This server listens on both UDP and TCP using + * the same port number. It uses three threads to deal with requests. + * They can be set to daemon threads if requested. + * @param port the port number to listen to. If zero, a random available + * port no less than 8000 will be chosen and used. + * @param asDaemon true if the KDC threads should be daemons + * @throws java.io.IOException for any communication error + */ + protected void startServer(int port, boolean asDaemon) throws IOException { + DatagramSocket u1 = null; + ServerSocket t1 = null; + if (port > 0) { + u1 = new DatagramSocket(port, InetAddress.getByName("127.0.0.1")); + t1 = new ServerSocket(port); + } else { + while (true) { + // Try to find a port number that's both TCP and UDP free + try { + port = 8000 + new java.util.Random().nextInt(10000); + u1 = null; + u1 = new DatagramSocket(port, InetAddress.getByName("127.0.0.1")); + t1 = new ServerSocket(port); + break; + } catch (Exception e) { + if (u1 != null) u1.close(); + } + } + } + final DatagramSocket udp = u1; + final ServerSocket tcp = t1; + System.out.println("Start KDC on " + port); + + this.port = port; + + // The UDP consumer + Thread thread = new Thread() { + public void run() { + while (true) { + try { + byte[] inbuf = new byte[8192]; + DatagramPacket p = new DatagramPacket(inbuf, inbuf.length); + udp.receive(p); + System.out.println("-----------------------------------------------"); + System.out.println(">>>>> UDP packet received"); + q.put(new Job(processMessage(Arrays.copyOf(inbuf, p.getLength())), udp, p)); + } catch (Exception e) { + e.printStackTrace(); + } + } + } + }; + thread.setDaemon(asDaemon); + thread.start(); + + // The TCP consumer + thread = new Thread() { + public void run() { + while (true) { + try { + Socket socket = tcp.accept(); + System.out.println("-----------------------------------------------"); + System.out.println(">>>>> TCP connection established"); + DataInputStream in = new DataInputStream(socket.getInputStream()); + DataOutputStream out = new DataOutputStream(socket.getOutputStream()); + byte[] token = new byte[in.readInt()]; + in.readFully(token); + q.put(new Job(processMessage(token), socket, out)); + } catch (Exception e) { + e.printStackTrace(); + } + } + } + }; + thread.setDaemon(asDaemon); + thread.start(); + + // The dispatcher + thread = new Thread() { + public void run() { + while (true) { + try { + q.take().send(); + } catch (Exception e) { + } + } + } + }; + thread.setDaemon(true); + thread.start(); + } + + /** + * Helper class to encapsulate a job in a KDC. + */ + private static class Job { + byte[] token; // The received request at creation time and + // the response at send time + Socket s; // The TCP socket from where the request comes + DataOutputStream out; // The OutputStream of the TCP socket + DatagramSocket s2; // The UDP socket from where the request comes + DatagramPacket dp; // The incoming UDP datagram packet + boolean useTCP; // Whether TCP or UDP is used + + // Creates a job object for TCP + Job(byte[] token, Socket s, DataOutputStream out) { + useTCP = true; + this.token = token; + this.s = s; + this.out = out; + } + + // Creates a job object for UDP + Job(byte[] token, DatagramSocket s2, DatagramPacket dp) { + useTCP = false; + this.token = token; + this.s2 = s2; + this.dp = dp; + } + + // Sends the output back to the client + void send() { + try { + if (useTCP) { + System.out.println(">>>>> TCP request honored"); + out.writeInt(token.length); + out.write(token); + s.close(); + } else { + System.out.println(">>>>> UDP request honored"); + s2.send(new DatagramPacket(token, token.length, dp.getAddress(), dp.getPort())); + } + } catch (Exception e) { + e.printStackTrace(); + } + } + } +} diff --git a/jdk/test/sun/security/krb5/auto/KerberosHashEqualsTest.java b/jdk/test/sun/security/krb5/auto/KerberosHashEqualsTest.java new file mode 100644 index 00000000000..9e5b5839309 --- /dev/null +++ b/jdk/test/sun/security/krb5/auto/KerberosHashEqualsTest.java @@ -0,0 +1,173 @@ +/* + * Copyright 2005-2008 Sun Microsystems, Inc. All Rights Reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara, + * CA 95054 USA or visit www.sun.com if you need additional information or + * have any questions. + */ + +/* + * @test + * @bug 4641821 + * @summary hashCode() and equals() for KerberosKey and KerberosTicket + */ + +import java.net.InetAddress; +import java.util.Date; +import javax.security.auth.kerberos.KerberosKey; +import javax.security.auth.kerberos.KerberosPrincipal; +import javax.security.auth.kerberos.KerberosTicket; + +public class KerberosHashEqualsTest { + public static void main(String[] args) throws Exception { + new OneKDC(null); + new KerberosHashEqualsTest().check(); + } + + void checkSame(Object o1, Object o2) { + if(!o1.equals(o2)) { + throw new RuntimeException("equals() fails"); + } + if(o1.hashCode() != o2.hashCode()) { + throw new RuntimeException("hashCode() not same"); + } + } + + void checkNotSame(Object o1, Object o2) { + if(o1.equals(o2)) { + throw new RuntimeException("equals() succeeds"); + } + } + + void check() throws Exception { + + // The key part: + // new KerberosKey(principal, bytes, keyType, version) + + KerberosKey k1, k2; + KerberosPrincipal CLIENT = new KerberosPrincipal("client"); + KerberosPrincipal SERVER = new KerberosPrincipal("server"); + byte[] PASS = "pass".getBytes(); + + k1 = new KerberosKey(CLIENT, PASS, 1, 1); + k2 = new KerberosKey(CLIENT, PASS, 1, 1); + checkSame(k1, k1); // me is me + checkSame(k1, k2); // same + + // A destroyed key doesn't equal to any key + k2.destroy(); + checkNotSame(k1, k2); + checkNotSame(k2, k1); + k1.destroy(); + checkNotSame(k1, k2); // even if they are both destroyed + checkNotSame(k2, k1); + checkSame(k2, k2); + + // a little difference means not equal + k1 = new KerberosKey(CLIENT, PASS, 1, 1); + k2 = new KerberosKey(SERVER, PASS, 1, 1); + checkNotSame(k1, k2); // Different principal name + + k2 = new KerberosKey(CLIENT, "ssap".getBytes(), 1, 1); + checkNotSame(k1, k2); // Different password + + k2 = new KerberosKey(CLIENT, PASS, 2, 1); + checkNotSame(k1, k2); // Different keytype + + k2 = new KerberosKey(CLIENT, PASS, 1, 2); + checkNotSame(k1, k2); // Different version + + k2 = new KerberosKey(null, PASS, 1, 2); + checkNotSame(k1, k2); // null is not non-null + + k1 = new KerberosKey(null, PASS, 1, 2); + checkSame(k1, k2); // null is null + + checkNotSame(k1, "Another Object"); + + // The ticket part: + // new KerberosTicket(asn1 bytes, client, server, session key, type, flags, + // auth, start, end, renewUntil times, address) + + KerberosTicket t1, t2; + + byte[] ASN1 = "asn1".getBytes(); + boolean[] FORWARDABLE = new boolean[] {true, true}; + boolean[] ALLTRUE = new boolean[] {true, true, true, true, true, true, true, true, true, true}; + Date D0 = new Date(0); + + t1 = new KerberosTicket(ASN1, CLIENT, SERVER, PASS, 1, FORWARDABLE, D0, D0, D0, D0, null); + t2 = new KerberosTicket(ASN1, CLIENT, SERVER, PASS, 1, FORWARDABLE, D0, D0, D0, D0, null); + checkSame(t1, t1); + checkSame(t1, t2); + + // destroyed tickets doesn't equal to each other + t1.destroy(); + checkNotSame(t1, t2); + checkNotSame(t2, t1); + + t2.destroy(); + checkNotSame(t1, t2); // even if they are both destroyed + checkNotSame(t2, t1); + + checkSame(t2, t2); // unless they are the same object + + // a little difference means not equal + t1 = new KerberosTicket(ASN1, CLIENT, SERVER, PASS, 1, FORWARDABLE, D0, D0, D0, D0, null); + t2 = new KerberosTicket("asn11".getBytes(), CLIENT, SERVER, PASS, 1, FORWARDABLE, D0, D0, D0, D0, null); + checkNotSame(t1, t2); // Different ASN1 encoding + + t2 = new KerberosTicket(ASN1, new KerberosPrincipal("client1"), SERVER, PASS, 1, FORWARDABLE, D0, D0, D0, D0, null); + checkNotSame(t1, t2); // Different client + + t2 = new KerberosTicket(ASN1, CLIENT, new KerberosPrincipal("server1"), PASS, 1, FORWARDABLE, D0, D0, D0, D0, null); + checkNotSame(t1, t2); // Different server + + t2 = new KerberosTicket(ASN1, CLIENT, SERVER, "pass1".getBytes(), 1, FORWARDABLE, D0, D0, D0, D0, null); + checkNotSame(t1, t2); // Different session key + + t2 = new KerberosTicket(ASN1, CLIENT, SERVER, PASS, 2, FORWARDABLE, D0, D0, D0, D0, null); + checkNotSame(t1, t2); // Different key type + + t2 = new KerberosTicket(ASN1, CLIENT, SERVER, PASS, 1, new boolean[] {true, false}, D0, D0, D0, D0, null); + checkNotSame(t1, t2); // Different flags, not FORWARDABLE + + t2 = new KerberosTicket(ASN1, CLIENT, SERVER, PASS, 1, FORWARDABLE, new Date(1), D0, D0, D0, null); + checkNotSame(t1, t2); // Different authtime + + t2 = new KerberosTicket(ASN1, CLIENT, SERVER, PASS, 1, FORWARDABLE, D0, new Date(1), D0, D0, null); + checkNotSame(t1, t2); // Different starttime + + t2 = new KerberosTicket(ASN1, CLIENT, SERVER, PASS, 1, FORWARDABLE, D0, D0, new Date(1), D0, null); + checkNotSame(t1, t2); // Different endtime + + t2 = new KerberosTicket(ASN1, CLIENT, SERVER, PASS, 1, FORWARDABLE, D0, D0, D0, D0, new InetAddress[2]); + checkNotSame(t1, t2); // Different client addresses + + t2 = new KerberosTicket(ASN1, CLIENT, SERVER, PASS, 1, FORWARDABLE, D0, D0, D0, new Date(1), null); + t1 = new KerberosTicket(ASN1, CLIENT, SERVER, PASS, 1, FORWARDABLE, D0, D0, D0, new Date(2), null); + checkSame(t1, t2); // renewtill is ignored when RENEWABLE ticket flag is not set. + + t2 = new KerberosTicket(ASN1, CLIENT, SERVER, PASS, 1, ALLTRUE, D0, D0, D0, new Date(1), null); + t1 = new KerberosTicket(ASN1, CLIENT, SERVER, PASS, 1, ALLTRUE, D0, D0, D0, new Date(2), null); + checkNotSame(t1, t2); // renewtill is used when RENEWABLE is set. + + checkNotSame(t1, "Another Object"); + System.out.println("Good!"); + } +} diff --git a/jdk/test/sun/security/krb5/auto/OneKDC.java b/jdk/test/sun/security/krb5/auto/OneKDC.java new file mode 100644 index 00000000000..9505c6a3c8f --- /dev/null +++ b/jdk/test/sun/security/krb5/auto/OneKDC.java @@ -0,0 +1,155 @@ +/* + * Copyright 2008 Sun Microsystems, Inc. All Rights Reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara, + * CA 95054 USA or visit www.sun.com if you need additional information or + * have any questions. + */ + +import java.io.File; +import java.io.FileOutputStream; +import java.io.IOException; +import java.net.InetAddress; +import java.net.UnknownHostException; +import java.security.Security; +import javax.security.auth.callback.Callback; +import javax.security.auth.callback.CallbackHandler; +import javax.security.auth.callback.NameCallback; +import javax.security.auth.callback.PasswordCallback; +import sun.security.krb5.Config; + +/** + * This class starts a simple KDC with one realm, several typical principal + * names, generates delete-on-exit krb5.conf and keytab files, and setup + * system properties for them. There's also a helper method to generate a + * JAAS login config file that can be used for JAAS or JGSS apps. + *

+ * Just call this line to start everything: + *

+ * new OneKDC(null).writeJaasConf();
+ * 
+ */ +public class OneKDC extends KDC { + + // The krb5 codes would try to canonicalize hostnames before creating + // a service principal name, so let's find out the canonicalized form + // of localhost first. The following codes mimic the process inside + // PrincipalName.java. + static String localhost = "localhost"; + static { + try { + localhost = InetAddress.getByName(localhost) + .getCanonicalHostName(); + } catch (UnknownHostException uhe) { + ; // Ignore, localhost is still "localhost" + } + } + public static final String USER = "dummy"; + public static final char[] PASS = "bogus".toCharArray(); + public static String SERVER = "server/" + localhost; + public static String BACKEND = "backend/" + localhost; + public static final String KRB5_CONF = "localkdc-krb5.conf"; + public static final String KTAB = "localkdc.ktab"; + public static final String JAAS_CONF = "localkdc-jaas.conf"; + public static final String REALM = "RABBIT.HOLE"; + + /** + * Creates the KDC and starts it. + * @param etype Encryption type, null if not specified + * @throws java.lang.Exception if there's anything wrong + */ + public OneKDC(String etype) throws Exception { + super(REALM, 0, true); + addPrincipal(USER, PASS); + addPrincipalRandKey("krbtgt/" + REALM); + addPrincipalRandKey(SERVER); + addPrincipalRandKey(BACKEND); + KDC.saveConfig(KRB5_CONF, this, + "forwardable = true", + "default_keytab_name = " + KTAB, + etype == null ? "" : "default_tkt_enctypes=" + etype + "\ndefault_tgs_enctypes=" + etype); + System.setProperty("java.security.krb5.conf", KRB5_CONF); + // Whatever krb5.conf had been loaded before, we reload ours now. + Config.refresh(); + + writeKtab(KTAB); + new File(KRB5_CONF).deleteOnExit(); + new File(KTAB).deleteOnExit(); + } + + /** + * Writes a JAAS login config file, which contains as many as useful + * entries, including JGSS style initiator/acceptor and normal JAAS + * entries with names using existing OneKDC principals. + * @throws java.lang.Exception if anything goes wrong + */ + public void writeJAASConf() throws IOException { + System.setProperty("java.security.auth.login.config", JAAS_CONF); + File f = new File(JAAS_CONF); + FileOutputStream fos = new FileOutputStream(f); + fos.write(( + "com.sun.security.jgss.krb5.initiate {\n" + + " com.sun.security.auth.module.Krb5LoginModule required;\n};\n" + + "com.sun.security.jgss.krb5.accept {\n" + + " com.sun.security.auth.module.Krb5LoginModule required\n" + + " principal=\"" + SERVER + "\"\n" + + " useKeyTab=true\n" + + " isInitiator=false\n" + + " storeKey=true;\n};\n" + + "client {\n" + + " com.sun.security.auth.module.Krb5LoginModule required;\n};\n" + + "server {\n" + + " com.sun.security.auth.module.Krb5LoginModule required\n" + + " principal=\"" + SERVER + "\"\n" + + " useKeyTab=true\n" + + " storeKey=true;\n};\n" + + "backend {\n" + + " com.sun.security.auth.module.Krb5LoginModule required\n" + + " principal=\"" + BACKEND + "\"\n" + + " useKeyTab=true\n" + + " storeKey=true\n" + + " isInitiator=false;\n};\n" + ).getBytes()); + fos.close(); + f.deleteOnExit(); + Security.setProperty("auth.login.defaultCallbackHandler", "OneKDC$CallbackForClient"); + } + + /** + * The default callback handler for JAAS login. Note that this handler is + * hard coded to provide only info for USER1. If you need to provide info + * for another principal, please use Context.fromUserPass() instead. + */ + public static class CallbackForClient implements CallbackHandler { + public void handle(Callback[] callbacks) { + String user = OneKDC.USER; + char[] pass = OneKDC.PASS; + for (Callback callback : callbacks) { + if (callback instanceof NameCallback) { + System.out.println("Callback for name: " + user); + ((NameCallback) callback).setName(user); + } + if (callback instanceof PasswordCallback) { + System.out.println("Callback for pass: " + + new String(pass)); + ((PasswordCallback) callback).setPassword(pass); + } + } + } + } +} diff --git a/jdk/test/sun/security/krb5/auto/basic.sh b/jdk/test/sun/security/krb5/auto/basic.sh new file mode 100644 index 00000000000..388e4a1ea55 --- /dev/null +++ b/jdk/test/sun/security/krb5/auto/basic.sh @@ -0,0 +1,65 @@ +# +# Copyright 2008 Sun Microsystems, Inc. All Rights Reserved. +# DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. +# +# This code is free software; you can redistribute it and/or modify it +# under the terms of the GNU General Public License version 2 only, as +# published by the Free Software Foundation. +# +# This code is distributed in the hope that it will be useful, but WITHOUT +# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +# FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +# version 2 for more details (a copy is included in the LICENSE file that +# accompanied this code). +# +# You should have received a copy of the GNU General Public License version +# 2 along with this work; if not, write to the Free Software Foundation, +# Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. +# +# Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara, +# CA 95054 USA or visit www.sun.com if you need additional information or +# have any questions. +# + +# @test +# @bug 6706974 +# @summary Add krb5 test infrastructure +# @run shell/timeout=300 basic.sh +# + +if [ "${TESTSRC}" = "" ] ; then + TESTSRC="." +fi +if [ "${TESTJAVA}" = "" ] ; then + echo "TESTJAVA not set. Test cannot execute." + echo "FAILED!!!" + exit 1 +fi + +# set platform-dependent variables +OS=`uname -s` +case "$OS" in + Windows_* ) + FS="\\" + ;; + * ) + FS="/" + ;; +esac + +${TESTJAVA}${FS}bin${FS}javac -d . \ + ${TESTSRC}${FS}BasicKrb5Test.java \ + ${TESTSRC}${FS}KDC.java \ + ${TESTSRC}${FS}OneKDC.java \ + ${TESTSRC}${FS}Action.java \ + ${TESTSRC}${FS}Context.java \ + || exit 10 +${TESTJAVA}${FS}bin${FS}java -Dtest.src=$TESTSRC BasicKrb5Test || exit 100 +${TESTJAVA}${FS}bin${FS}java -Dtest.src=$TESTSRC BasicKrb5Test des-cbc-crc || exit 1 +${TESTJAVA}${FS}bin${FS}java -Dtest.src=$TESTSRC BasicKrb5Test des-cbc-md5 || exit 3 +${TESTJAVA}${FS}bin${FS}java -Dtest.src=$TESTSRC BasicKrb5Test des3-cbc-sha1 || exit 16 +${TESTJAVA}${FS}bin${FS}java -Dtest.src=$TESTSRC BasicKrb5Test aes128-cts || exit 17 +${TESTJAVA}${FS}bin${FS}java -Dtest.src=$TESTSRC BasicKrb5Test aes256-cts || exit 18 +${TESTJAVA}${FS}bin${FS}java -Dtest.src=$TESTSRC BasicKrb5Test rc4-hmac || exit 23 + +exit 0