From 3b5d6a04dd69cc66f792152855786c5c13edf492 Mon Sep 17 00:00:00 2001 From: Jon Masamitsu Date: Sun, 8 Feb 2009 13:18:01 -0800 Subject: [PATCH] 6690928: Use spinning in combination with yields for workstealing termination Substitute a spin loop for most calls to yield() to reduce the stress on the system. Reviewed-by: tonyp --- .../gc_implementation/g1/g1CollectedHeap.cpp | 11 ++- .../parallelScavenge/psMarkSweep.cpp | 4 ++ .../parallelScavenge/psParallelCompact.cpp | 4 ++ .../parallelScavenge/psScavenge.cpp | 4 ++ .../src/share/vm/memory/genCollectedHeap.cpp | 4 ++ hotspot/src/share/vm/oops/cpCacheKlass.cpp | 2 +- hotspot/src/share/vm/runtime/globals.hpp | 7 ++ hotspot/src/share/vm/utilities/taskqueue.cpp | 70 ++++++++++++++++++- hotspot/src/share/vm/utilities/taskqueue.hpp | 14 ++++ 9 files changed, 116 insertions(+), 4 deletions(-) diff --git a/hotspot/src/share/vm/gc_implementation/g1/g1CollectedHeap.cpp b/hotspot/src/share/vm/gc_implementation/g1/g1CollectedHeap.cpp index 506174b9d71..1c6766b9947 100644 --- a/hotspot/src/share/vm/gc_implementation/g1/g1CollectedHeap.cpp +++ b/hotspot/src/share/vm/gc_implementation/g1/g1CollectedHeap.cpp @@ -949,6 +949,10 @@ void G1CollectedHeap::do_collection(bool full, bool clear_all_soft_refs, GCOverheadReporter::recordSTWEnd(end); g1_policy()->record_full_collection_end(); +#ifdef TRACESPINNING + ParallelTaskTerminator::print_termination_counts(); +#endif + gc_epilogue(true); // Abandon concurrent refinement. This must happen last: in the @@ -2647,8 +2651,13 @@ G1CollectedHeap::do_collection_pause_at_safepoint(HeapRegion* popular_region) { } } - if (mark_in_progress()) + if (mark_in_progress()) { concurrent_mark()->update_g1_committed(); + } + +#ifdef TRACESPINNING + ParallelTaskTerminator::print_termination_counts(); +#endif gc_epilogue(false); } diff --git a/hotspot/src/share/vm/gc_implementation/parallelScavenge/psMarkSweep.cpp b/hotspot/src/share/vm/gc_implementation/parallelScavenge/psMarkSweep.cpp index 829403f5128..608eedb8fb1 100644 --- a/hotspot/src/share/vm/gc_implementation/parallelScavenge/psMarkSweep.cpp +++ b/hotspot/src/share/vm/gc_implementation/parallelScavenge/psMarkSweep.cpp @@ -362,6 +362,10 @@ void PSMarkSweep::invoke_no_policy(bool clear_all_softrefs) { if (PrintHeapAtGC) { Universe::print_heap_after_gc(); } + +#ifdef TRACESPINNING + ParallelTaskTerminator::print_termination_counts(); +#endif } bool PSMarkSweep::absorb_live_data_from_eden(PSAdaptiveSizePolicy* size_policy, diff --git a/hotspot/src/share/vm/gc_implementation/parallelScavenge/psParallelCompact.cpp b/hotspot/src/share/vm/gc_implementation/parallelScavenge/psParallelCompact.cpp index 5d693905c98..686c65b2c53 100644 --- a/hotspot/src/share/vm/gc_implementation/parallelScavenge/psParallelCompact.cpp +++ b/hotspot/src/share/vm/gc_implementation/parallelScavenge/psParallelCompact.cpp @@ -2203,6 +2203,10 @@ void PSParallelCompact::invoke_no_policy(bool maximum_heap_compaction) { collection_exit.ticks()); gc_task_manager()->print_task_time_stamps(); } + +#ifdef TRACESPINNING + ParallelTaskTerminator::print_termination_counts(); +#endif } bool PSParallelCompact::absorb_live_data_from_eden(PSAdaptiveSizePolicy* size_policy, diff --git a/hotspot/src/share/vm/gc_implementation/parallelScavenge/psScavenge.cpp b/hotspot/src/share/vm/gc_implementation/parallelScavenge/psScavenge.cpp index 6357362b0f7..f0bc6a85e8b 100644 --- a/hotspot/src/share/vm/gc_implementation/parallelScavenge/psScavenge.cpp +++ b/hotspot/src/share/vm/gc_implementation/parallelScavenge/psScavenge.cpp @@ -615,6 +615,10 @@ bool PSScavenge::invoke_no_policy() { gc_task_manager()->print_task_time_stamps(); } +#ifdef TRACESPINNING + ParallelTaskTerminator::print_termination_counts(); +#endif + return !promotion_failure_occurred; } diff --git a/hotspot/src/share/vm/memory/genCollectedHeap.cpp b/hotspot/src/share/vm/memory/genCollectedHeap.cpp index 41dfac1b5c8..5bb817f0280 100644 --- a/hotspot/src/share/vm/memory/genCollectedHeap.cpp +++ b/hotspot/src/share/vm/memory/genCollectedHeap.cpp @@ -610,6 +610,10 @@ void GenCollectedHeap::do_collection(bool full, Universe::print_heap_after_gc(); } +#ifdef TRACESPINNING + ParallelTaskTerminator::print_termination_counts(); +#endif + if (ExitAfterGCNum > 0 && total_collections() == ExitAfterGCNum) { tty->print_cr("Stopping after GC #%d", ExitAfterGCNum); vm_exit(-1); diff --git a/hotspot/src/share/vm/oops/cpCacheKlass.cpp b/hotspot/src/share/vm/oops/cpCacheKlass.cpp index c71ecfd9fc1..95fd11ded28 100644 --- a/hotspot/src/share/vm/oops/cpCacheKlass.cpp +++ b/hotspot/src/share/vm/oops/cpCacheKlass.cpp @@ -161,7 +161,7 @@ int constantPoolCacheKlass::oop_adjust_pointers(oop obj) { } bool constantPoolCacheKlass::oop_is_conc_safe(oop obj) const { - assert(obj->is_constantPoolCache(), "must be constMethod oop"); + assert(obj->is_constantPoolCache(), "should be constant pool"); return constantPoolCacheOop(obj)->is_conc_safe(); } diff --git a/hotspot/src/share/vm/runtime/globals.hpp b/hotspot/src/share/vm/runtime/globals.hpp index c30280d11ba..75bd0256aec 100644 --- a/hotspot/src/share/vm/runtime/globals.hpp +++ b/hotspot/src/share/vm/runtime/globals.hpp @@ -1655,6 +1655,13 @@ class CommandLineFlags { develop(uintx, WorkStealingYieldsBeforeSleep, 1000, \ "Number of yields before a sleep is done during workstealing") \ \ + develop(uintx, WorkStealingHardSpins, 4096, \ + "Number of iterations in a spin loop between checks on " \ + "time out of hard spin") \ + \ + develop(uintx, WorkStealingSpinToYieldRatio, 10, \ + "Ratio of hard spins to calls to yield") \ + \ product(uintx, PreserveMarkStackSize, 1024, \ "Size for stack used in promotion failure handling") \ \ diff --git a/hotspot/src/share/vm/utilities/taskqueue.cpp b/hotspot/src/share/vm/utilities/taskqueue.cpp index 992795a756c..2b3145813fd 100644 --- a/hotspot/src/share/vm/utilities/taskqueue.cpp +++ b/hotspot/src/share/vm/utilities/taskqueue.cpp @@ -25,6 +25,12 @@ # include "incls/_precompiled.incl" # include "incls/_taskqueue.cpp.incl" +#ifdef TRACESPINNING +uint ParallelTaskTerminator::_total_yields = 0; +uint ParallelTaskTerminator::_total_spins = 0; +uint ParallelTaskTerminator::_total_peeks = 0; +#endif + bool TaskQueueSuper::peek() { return _bottom != _age.top(); } @@ -70,14 +76,61 @@ ParallelTaskTerminator::offer_termination(TerminatorTerminator* terminator) { Atomic::inc(&_offered_termination); uint yield_count = 0; + // Number of hard spin loops done since last yield + uint hard_spin_count = 0; + // Number of iterations in the hard spin loop. + uint hard_spin_limit = WorkStealingHardSpins; + + // If WorkStealingSpinToYieldRatio is 0, no hard spinning is done. + // If it is greater than 0, then start with a small number + // of spins and increase number with each turn at spinning until + // the count of hard spins exceeds WorkStealingSpinToYieldRatio. + // Then do a yield() call and start spinning afresh. + if (WorkStealingSpinToYieldRatio > 0) { + hard_spin_limit = WorkStealingHardSpins >> WorkStealingSpinToYieldRatio; + hard_spin_limit = MAX2(hard_spin_limit, 1U); + } + // Remember the initial spin limit. + uint hard_spin_start = hard_spin_limit; + + // Loop waiting for all threads to offer termination or + // more work. while (true) { + // Are all threads offering termination? if (_offered_termination == _n_threads) { - //inner_termination_loop(); return true; } else { + // Look for more work. + // Periodically sleep() instead of yield() to give threads + // waiting on the cores the chance to grab this code if (yield_count <= WorkStealingYieldsBeforeSleep) { + // Do a yield or hardspin. For purposes of deciding whether + // to sleep, count this as a yield. yield_count++; - yield(); + + // Periodically call yield() instead spinning + // After WorkStealingSpinToYieldRatio spins, do a yield() call + // and reset the counts and starting limit. + if (hard_spin_count > WorkStealingSpinToYieldRatio) { + yield(); + hard_spin_count = 0; + hard_spin_limit = hard_spin_start; +#ifdef TRACESPINNING + _total_yields++; +#endif + } else { + // Hard spin this time + // Increase the hard spinning period but only up to a limit. + hard_spin_limit = MIN2(2*hard_spin_limit, + (uint) WorkStealingHardSpins); + for (uint j = 0; j < hard_spin_limit; j++) { + SpinPause(); + } + hard_spin_count++; +#ifdef TRACESPINNING + _total_spins++; +#endif + } } else { if (PrintGCDetails && Verbose) { gclog_or_tty->print_cr("ParallelTaskTerminator::offer_termination() " @@ -92,6 +145,9 @@ ParallelTaskTerminator::offer_termination(TerminatorTerminator* terminator) { sleep(WorkStealingSleepMillis); } +#ifdef TRACESPINNING + _total_peeks++; +#endif if (peek_in_queue_set() || (terminator != NULL && terminator->should_exit_termination())) { Atomic::dec(&_offered_termination); @@ -101,6 +157,16 @@ ParallelTaskTerminator::offer_termination(TerminatorTerminator* terminator) { } } +#ifdef TRACESPINNING +void ParallelTaskTerminator::print_termination_counts() { + gclog_or_tty->print_cr("ParallelTaskTerminator Total yields: %lld " + "Total spins: %lld Total peeks: %lld", + total_yields(), + total_spins(), + total_peeks()); +} +#endif + void ParallelTaskTerminator::reset_for_reuse() { if (_offered_termination != 0) { assert(_offered_termination == _n_threads, diff --git a/hotspot/src/share/vm/utilities/taskqueue.hpp b/hotspot/src/share/vm/utilities/taskqueue.hpp index 759affb7b34..3a80a814238 100644 --- a/hotspot/src/share/vm/utilities/taskqueue.hpp +++ b/hotspot/src/share/vm/utilities/taskqueue.hpp @@ -426,12 +426,20 @@ public: // A class to aid in the termination of a set of parallel tasks using // TaskQueueSet's for work stealing. +#undef TRACESPINNING + class ParallelTaskTerminator: public StackObj { private: int _n_threads; TaskQueueSetSuper* _queue_set; int _offered_termination; +#ifdef TRACESPINNING + static uint _total_yields; + static uint _total_spins; + static uint _total_peeks; +#endif + bool peek_in_queue_set(); protected: virtual void yield(); @@ -462,6 +470,12 @@ public: // the terminator is finished. void reset_for_reuse(); +#ifdef TRACESPINNING + static uint total_yields() { return _total_yields; } + static uint total_spins() { return _total_spins; } + static uint total_peeks() { return _total_peeks; } + static void print_termination_counts(); +#endif }; #define SIMPLE_STACK 0