Merge

2010-10-15 02:59:48 -07:00 · 2010-10-15 02:59:48 -07:00 · 4bf36a476f
commit 4bf36a476f
parent e1a504b343 ec8fa4caa2
65 changed files with 1597 additions and 714 deletions
--- a/hotspot/make/solaris/makefiles/sparcWorks.make
+++ b/hotspot/make/solaris/makefiles/sparcWorks.make
@ -51,9 +51,9 @@ ifeq ($(JRE_RELEASE_VER),1.6.0)
  VALIDATED_COMPILER_REVS   := 5.8
  VALIDATED_C_COMPILER_REVS := 5.8
 else
-  # Validated compilers for JDK7 are SS12 (5.9) or SS12 update 1 (5.10)
-  VALIDATED_COMPILER_REVS   := 5.9 5.10
-  VALIDATED_C_COMPILER_REVS := 5.9 5.10
+  # Validated compiler for JDK7 is SS12 update 1 + patches (5.10)
+  VALIDATED_COMPILER_REVS   := 5.10
+  VALIDATED_C_COMPILER_REVS := 5.10
 endif

 # Warning messages about not using the above validated versions
--- a/hotspot/src/share/vm/gc_implementation/concurrentMarkSweep/cmsCollectorPolicy.cpp
+++ b/hotspot/src/share/vm/gc_implementation/concurrentMarkSweep/cmsCollectorPolicy.cpp
@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2007, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved.
 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
 *
 * This code is free software; you can redistribute it and/or modify it
@ -39,7 +39,7 @@ void ConcurrentMarkSweepPolicy::initialize_generations() {
  if (_generations == NULL)
    vm_exit_during_initialization("Unable to allocate gen spec");

-  if (UseParNewGC && ParallelGCThreads > 0) {
+  if (ParNewGeneration::in_use()) {
    if (UseAdaptiveSizePolicy) {
      _generations[0] = new GenerationSpec(Generation::ASParNew,
                                           _initial_gen0_size, _max_gen0_size);
@ -79,7 +79,7 @@ void ConcurrentMarkSweepPolicy::initialize_size_policy(size_t init_eden_size,

 void ConcurrentMarkSweepPolicy::initialize_gc_policy_counters() {
  // initialize the policy counters - 2 collectors, 3 generations
-  if (UseParNewGC && ParallelGCThreads > 0) {
+  if (ParNewGeneration::in_use()) {
    _gc_policy_counters = new GCPolicyCounters("ParNew:CMS", 2, 3);
  }
  else {
@ -102,7 +102,7 @@ void ASConcurrentMarkSweepPolicy::initialize_gc_policy_counters() {

  assert(size_policy() != NULL, "A size policy is required");
  // initialize the policy counters - 2 collectors, 3 generations
-  if (UseParNewGC && ParallelGCThreads > 0) {
+  if (ParNewGeneration::in_use()) {
    _gc_policy_counters = new CMSGCAdaptivePolicyCounters("ParNew:CMS", 2, 3,
      size_policy());
  }
--- a/hotspot/src/share/vm/gc_implementation/concurrentMarkSweep/cmsPermGen.cpp
+++ b/hotspot/src/share/vm/gc_implementation/concurrentMarkSweep/cmsPermGen.cpp
@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2007, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved.
 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
 *
 * This code is free software; you can redistribute it and/or modify it
@ -50,6 +50,18 @@ HeapWord* CMSPermGen::mem_allocate(size_t size) {
  }
 }

+HeapWord* CMSPermGen::request_expand_and_allocate(Generation* gen,
+                                                  size_t size,
+                                                  GCCause::Cause prev_cause /* ignored */) {
+  HeapWord* obj = gen->expand_and_allocate(size, false);
+  if (gen->capacity() >= _capacity_expansion_limit) {
+    set_capacity_expansion_limit(gen->capacity() + MaxPermHeapExpansion);
+    assert(((ConcurrentMarkSweepGeneration*)gen)->should_concurrent_collect(),
+           "Should kick off a collection if one not in progress");
+  }
+  return obj;
+}
+
 void CMSPermGen::compute_new_size() {
  _gen->compute_new_size();
 }
--- a/hotspot/src/share/vm/gc_implementation/concurrentMarkSweep/cmsPermGen.hpp
+++ b/hotspot/src/share/vm/gc_implementation/concurrentMarkSweep/cmsPermGen.hpp
@ -33,6 +33,10 @@ class CMSPermGen:  public PermGen {
  // The "generation" view.
  ConcurrentMarkSweepGeneration* _gen;

+  // Override default implementation from PermGen
+  virtual HeapWord* request_expand_and_allocate(Generation* gen, size_t size,
+                                                GCCause::Cause prev_cause);
+
 public:
  CMSPermGen(ReservedSpace rs, size_t initial_byte_size,
             CardTableRS* ct, FreeBlockDictionary::DictionaryChoice);
--- a/hotspot/src/share/vm/gc_implementation/concurrentMarkSweep/compactibleFreeListSpace.cpp
+++ b/hotspot/src/share/vm/gc_implementation/concurrentMarkSweep/compactibleFreeListSpace.cpp
@ -124,7 +124,8 @@ CompactibleFreeListSpace::CompactibleFreeListSpace(BlockOffsetSharedArray* bs,
  checkFreeListConsistency();

  // Initialize locks for parallel case.
-  if (ParallelGCThreads > 0) {
+
+  if (CollectedHeap::use_parallel_gc_threads()) {
    for (size_t i = IndexSetStart; i < IndexSetSize; i += IndexSetStride) {
      _indexedFreeListParLocks[i] = new Mutex(Mutex::leaf - 1, // == ExpandHeap_lock - 1
                                              "a freelist par lock",
@ -1071,7 +1072,8 @@ bool CompactibleFreeListSpace::block_is_obj(const HeapWord* p) const {
  // at address below "p" in finding the object that contains "p"
  // and those objects (if garbage) may have been modified to hold
  // live range information.
-  // assert(ParallelGCThreads > 0 || _bt.block_start(p) == p, "Should be a block boundary");
+  // assert(CollectedHeap::use_parallel_gc_threads() || _bt.block_start(p) == p,
+  //        "Should be a block boundary");
  if (FreeChunk::indicatesFreeChunk(p)) return false;
  klassOop k = oop(p)->klass_or_null();
  if (k != NULL) {
@ -2932,7 +2934,9 @@ initialize_sequential_subtasks_for_rescan(int n_threads) {
         "n_tasks calculation incorrect");
  SequentialSubTasksDone* pst = conc_par_seq_tasks();
  assert(!pst->valid(), "Clobbering existing data?");
-  pst->set_par_threads(n_threads);
+  // Sets the condition for completion of the subtask (how many threads
+  // need to finish in order to be done).
+  pst->set_n_threads(n_threads);
  pst->set_n_tasks((int)n_tasks);
 }

@ -2972,6 +2976,8 @@ initialize_sequential_subtasks_for_marking(int n_threads,
         "n_tasks calculation incorrect");
  SequentialSubTasksDone* pst = conc_par_seq_tasks();
  assert(!pst->valid(), "Clobbering existing data?");
-  pst->set_par_threads(n_threads);
+  // Sets the condition for completion of the subtask (how many threads
+  // need to finish in order to be done).
+  pst->set_n_threads(n_threads);
  pst->set_n_tasks((int)n_tasks);
 }
--- a/hotspot/src/share/vm/gc_implementation/concurrentMarkSweep/concurrentMarkSweepGeneration.cpp
+++ b/hotspot/src/share/vm/gc_implementation/concurrentMarkSweep/concurrentMarkSweepGeneration.cpp
@ -195,7 +195,7 @@ ConcurrentMarkSweepGeneration::ConcurrentMarkSweepGeneration(
           "Offset of FreeChunk::_prev within FreeChunk must match"
           "  that of OopDesc::_klass within OopDesc");
  )
-  if (ParallelGCThreads > 0) {
+  if (CollectedHeap::use_parallel_gc_threads()) {
    typedef CMSParGCThreadState* CMSParGCThreadStatePtr;
    _par_gc_thread_states =
      NEW_C_HEAP_ARRAY(CMSParGCThreadStatePtr, ParallelGCThreads);
@ -540,8 +540,6 @@ CMSCollector::CMSCollector(ConcurrentMarkSweepGeneration* cmsGen,
  _is_alive_closure(_span, &_markBitMap),
  _restart_addr(NULL),
  _overflow_list(NULL),
-  _preserved_oop_stack(NULL),
-  _preserved_mark_stack(NULL),
  _stats(cmsGen),
  _eden_chunk_array(NULL),     // may be set in ctor body
  _eden_chunk_capacity(0),     // -- ditto --
@ -616,7 +614,7 @@ CMSCollector::CMSCollector(ConcurrentMarkSweepGeneration* cmsGen,
  }

  // Support for multi-threaded concurrent phases
-  if (ParallelGCThreads > 0 && CMSConcurrentMTEnabled) {
+  if (CollectedHeap::use_parallel_gc_threads() && CMSConcurrentMTEnabled) {
    if (FLAG_IS_DEFAULT(ConcGCThreads)) {
      // just for now
      FLAG_SET_DEFAULT(ConcGCThreads, (ParallelGCThreads + 3)/4);
@ -628,6 +626,8 @@ CMSCollector::CMSCollector(ConcurrentMarkSweepGeneration* cmsGen,
        warning("GC/CMS: _conc_workers allocation failure: "
              "forcing -CMSConcurrentMTEnabled");
        CMSConcurrentMTEnabled = false;
+      } else {
+        _conc_workers->initialize_workers();
      }
    } else {
      CMSConcurrentMTEnabled = false;
@ -936,7 +936,7 @@ void ConcurrentMarkSweepGeneration::reset_after_compaction() {
  // along with all the other pointers into the heap but
  // compaction is expected to be a rare event with
  // a heap using cms so don't do it without seeing the need.
-  if (ParallelGCThreads > 0) {
+  if (CollectedHeap::use_parallel_gc_threads()) {
    for (uint i = 0; i < ParallelGCThreads; i++) {
      _par_gc_thread_states[i]->promo.reset();
    }
@ -2630,7 +2630,8 @@ void CMSCollector::gc_prologue(bool full) {
  // Should call gc_prologue_work() for all cms gens we are responsible for
  bool registerClosure =    _collectorState >= Marking
                         && _collectorState < Sweeping;
-  ModUnionClosure* muc = ParallelGCThreads > 0 ? &_modUnionClosurePar
+  ModUnionClosure* muc = CollectedHeap::use_parallel_gc_threads() ?
+                                               &_modUnionClosurePar
                                               : &_modUnionClosure;
  _cmsGen->gc_prologue_work(full, registerClosure, muc);
  _permGen->gc_prologue_work(full, registerClosure, muc);
@ -2731,7 +2732,7 @@ void ConcurrentMarkSweepGeneration::gc_epilogue(bool full) {
  collector()->gc_epilogue(full);

  // Also reset promotion tracking in par gc thread states.
-  if (ParallelGCThreads > 0) {
+  if (CollectedHeap::use_parallel_gc_threads()) {
    for (uint i = 0; i < ParallelGCThreads; i++) {
      _par_gc_thread_states[i]->promo.stopTrackingPromotions(i);
    }
@ -3263,6 +3264,7 @@ HeapWord*
 ConcurrentMarkSweepGeneration::expand_and_allocate(size_t word_size,
                                                   bool   tlab,
                                                   bool   parallel) {
+  CMSSynchronousYieldRequest yr;
  assert(!tlab, "Can't deal with TLAB allocation");
  MutexLockerEx x(freelistLock(), Mutex::_no_safepoint_check_flag);
  expand(word_size*HeapWordSize, MinHeapDeltaBytes,
@ -3709,35 +3711,42 @@ class CMSConcMarkingTask;
 class CMSConcMarkingTerminator: public ParallelTaskTerminator {
  CMSCollector*       _collector;
  CMSConcMarkingTask* _task;
-  bool _yield;
- protected:
-  virtual void yield();
 public:
+  virtual void yield();
+
  // "n_threads" is the number of threads to be terminated.
  // "queue_set" is a set of work queues of other threads.
  // "collector" is the CMS collector associated with this task terminator.
  // "yield" indicates whether we need the gang as a whole to yield.
-  CMSConcMarkingTerminator(int n_threads, TaskQueueSetSuper* queue_set,
-                           CMSCollector* collector, bool yield) :
+  CMSConcMarkingTerminator(int n_threads, TaskQueueSetSuper* queue_set, CMSCollector* collector) :
    ParallelTaskTerminator(n_threads, queue_set),
-    _collector(collector),
-    _yield(yield) { }
+    _collector(collector) { }

  void set_task(CMSConcMarkingTask* task) {
    _task = task;
  }
 };

+class CMSConcMarkingTerminatorTerminator: public TerminatorTerminator {
+  CMSConcMarkingTask* _task;
+ public:
+  bool should_exit_termination();
+  void set_task(CMSConcMarkingTask* task) {
+    _task = task;
+  }
+};
+
 // MT Concurrent Marking Task
 class CMSConcMarkingTask: public YieldingFlexibleGangTask {
  CMSCollector* _collector;
-  YieldingFlexibleWorkGang* _workers;        // the whole gang
  int           _n_workers;                  // requested/desired # workers
  bool          _asynch;
  bool          _result;
  CompactibleFreeListSpace*  _cms_space;
  CompactibleFreeListSpace* _perm_space;
-  HeapWord*     _global_finger;
+  char          _pad_front[64];   // padding to ...
+  HeapWord*     _global_finger;   // ... avoid sharing cache line
+  char          _pad_back[64];
  HeapWord*     _restart_addr;

  //  Exposed here for yielding support
@ -3745,28 +3754,30 @@ class CMSConcMarkingTask: public YieldingFlexibleGangTask {

  // The per thread work queues, available here for stealing
  OopTaskQueueSet*  _task_queues;
+
+  // Termination (and yielding) support
  CMSConcMarkingTerminator _term;
+  CMSConcMarkingTerminatorTerminator _term_term;

 public:
  CMSConcMarkingTask(CMSCollector* collector,
                 CompactibleFreeListSpace* cms_space,
                 CompactibleFreeListSpace* perm_space,
-                 bool asynch, int n_workers,
+                 bool asynch,
                 YieldingFlexibleWorkGang* workers,
                 OopTaskQueueSet* task_queues):
    YieldingFlexibleGangTask("Concurrent marking done multi-threaded"),
    _collector(collector),
    _cms_space(cms_space),
    _perm_space(perm_space),
-    _asynch(asynch), _n_workers(n_workers), _result(true),
-    _workers(workers), _task_queues(task_queues),
-    _term(n_workers, task_queues, _collector, asynch),
+    _asynch(asynch), _n_workers(0), _result(true),
+    _task_queues(task_queues),
+    _term(_n_workers, task_queues, _collector),
    _bit_map_lock(collector->bitMapLock())
  {
-    assert(n_workers <= workers->total_workers(),
-           "Else termination won't work correctly today"); // XXX FIX ME!
-    _requested_size = n_workers;
+    _requested_size = _n_workers;
    _term.set_task(this);
+    _term_term.set_task(this);
    assert(_cms_space->bottom() < _perm_space->bottom(),
           "Finger incorrectly initialized below");
    _restart_addr = _global_finger = _cms_space->bottom();
@ -3781,7 +3792,16 @@ class CMSConcMarkingTask: public YieldingFlexibleGangTask {

  CMSConcMarkingTerminator* terminator() { return &_term; }

+  virtual void set_for_termination(int active_workers) {
+    terminator()->reset_for_reuse(active_workers);
+  }
+
  void work(int i);
+  bool should_yield() {
+    return    ConcurrentMarkSweepThread::should_yield()
+           && !_collector->foregroundGCIsActive()
+           && _asynch;
+  }

  virtual void coordinator_yield();  // stuff done by coordinator
  bool result() { return _result; }
@ -3803,10 +3823,17 @@ class CMSConcMarkingTask: public YieldingFlexibleGangTask {
  void bump_global_finger(HeapWord* f);
 };

+bool CMSConcMarkingTerminatorTerminator::should_exit_termination() {
+  assert(_task != NULL, "Error");
+  return _task->yielding();
+  // Note that we do not need the disjunct || _task->should_yield() above
+  // because we want terminating threads to yield only if the task
+  // is already in the midst of yielding, which happens only after at least one
+  // thread has yielded.
+}
+
 void CMSConcMarkingTerminator::yield() {
-  if (ConcurrentMarkSweepThread::should_yield() &&
-      !_collector->foregroundGCIsActive() &&
-      _yield) {
+  if (_task->should_yield()) {
    _task->yield();
  } else {
    ParallelTaskTerminator::yield();
@ -4031,6 +4058,7 @@ void CMSConcMarkingTask::do_scan_and_mark(int i, CompactibleFreeListSpace* sp) {

 class Par_ConcMarkingClosure: public Par_KlassRememberingOopClosure {
 private:
+  CMSConcMarkingTask* _task;
  MemRegion     _span;
  CMSBitMap*    _bit_map;
  CMSMarkStack* _overflow_stack;
@ -4038,11 +4066,12 @@ class Par_ConcMarkingClosure: public Par_KlassRememberingOopClosure {
 protected:
  DO_OOP_WORK_DEFN
 public:
-  Par_ConcMarkingClosure(CMSCollector* collector, OopTaskQueue* work_queue,
+  Par_ConcMarkingClosure(CMSCollector* collector, CMSConcMarkingTask* task, OopTaskQueue* work_queue,
                         CMSBitMap* bit_map, CMSMarkStack* overflow_stack,
                         CMSMarkStack* revisit_stack):
    Par_KlassRememberingOopClosure(collector, NULL, revisit_stack),
-    _span(_collector->_span),
+    _task(task),
+    _span(collector->_span),
    _work_queue(work_queue),
    _bit_map(bit_map),
    _overflow_stack(overflow_stack)
@ -4051,6 +4080,11 @@ class Par_ConcMarkingClosure: public Par_KlassRememberingOopClosure {
  virtual void do_oop(narrowOop* p);
  void trim_queue(size_t max);
  void handle_stack_overflow(HeapWord* lost);
+  void do_yield_check() {
+    if (_task->should_yield()) {
+      _task->yield();
+    }
+  }
 };

 // Grey object scanning during work stealing phase --
@ -4094,6 +4128,7 @@ void Par_ConcMarkingClosure::do_oop(oop obj) {
        handle_stack_overflow(addr);
      }
    } // Else, some other thread got there first
+    do_yield_check();
  }
 }

@ -4109,6 +4144,7 @@ void Par_ConcMarkingClosure::trim_queue(size_t max) {
      assert(_span.contains((HeapWord*)new_oop), "Not in span");
      assert(new_oop->is_parsable(), "Should be parsable");
      new_oop->oop_iterate(this);  // do_oop() above
+      do_yield_check();
    }
  }
 }
@ -4136,7 +4172,7 @@ void CMSConcMarkingTask::do_work_steal(int i) {
  CMSMarkStack* ovflw = &(_collector->_markStack);
  CMSMarkStack* revisit = &(_collector->_revisitStack);
  int* seed = _collector->hash_seed(i);
-  Par_ConcMarkingClosure cl(_collector, work_q, bm, ovflw, revisit);
+  Par_ConcMarkingClosure cl(_collector, this, work_q, bm, ovflw, revisit);
  while (true) {
    cl.trim_queue(0);
    assert(work_q->size() == 0, "Should have been emptied above");
@ -4149,9 +4185,11 @@ void CMSConcMarkingTask::do_work_steal(int i) {
      assert(obj_to_scan->is_oop(), "Should be an oop");
      assert(bm->isMarked((HeapWord*)obj_to_scan), "Grey object");
      obj_to_scan->oop_iterate(&cl);
-    } else if (terminator()->offer_termination()) {
+    } else if (terminator()->offer_termination(&_term_term)) {
      assert(work_q->size() == 0, "Impossible!");
      break;
+    } else if (yielding() || should_yield()) {
+      yield();
    }
  }
 }
@ -4220,9 +4258,12 @@ bool CMSCollector::do_marking_mt(bool asynch) {
  CompactibleFreeListSpace* cms_space  = _cmsGen->cmsSpace();
  CompactibleFreeListSpace* perm_space = _permGen->cmsSpace();

-  CMSConcMarkingTask tsk(this, cms_space, perm_space,
-                         asynch, num_workers /* number requested XXX */,
-                         conc_workers(), task_queues());
+  CMSConcMarkingTask tsk(this,
+                         cms_space,
+                         perm_space,
+                         asynch,
+                         conc_workers(),
+                         task_queues());

  // Since the actual number of workers we get may be different
  // from the number we requested above, do we need to do anything different
@ -4326,6 +4367,10 @@ void CMSCollector::preclean() {
  verify_overflow_empty();
  _abort_preclean = false;
  if (CMSPrecleaningEnabled) {
+    // Precleaning is currently not MT but the reference processor
+    // may be set for MT.  Disable it temporarily here.
+    ReferenceProcessor* rp = ref_processor();
+    ReferenceProcessorMTProcMutator z(rp, false);
    _eden_chunk_index = 0;
    size_t used = get_eden_used();
    size_t capacity = get_eden_capacity();
@ -4918,7 +4963,7 @@ void CMSCollector::checkpointRootsFinalWork(bool asynch,
      // dirtied since the first checkpoint in this GC cycle and prior to
      // the most recent young generation GC, minus those cleaned up by the
      // concurrent precleaning.
-      if (CMSParallelRemarkEnabled && ParallelGCThreads > 0) {
+      if (CMSParallelRemarkEnabled && CollectedHeap::use_parallel_gc_threads()) {
        TraceTime t("Rescan (parallel) ", PrintGCDetails, false, gclog_or_tty);
        do_remark_parallel();
      } else {
@ -5012,7 +5057,6 @@ void CMSCollector::checkpointRootsFinalWork(bool asynch,
 // Parallel remark task
 class CMSParRemarkTask: public AbstractGangTask {
  CMSCollector* _collector;
-  WorkGang*     _workers;
  int           _n_workers;
  CompactibleFreeListSpace* _cms_space;
  CompactibleFreeListSpace* _perm_space;
@ -5025,21 +5069,21 @@ class CMSParRemarkTask: public AbstractGangTask {
  CMSParRemarkTask(CMSCollector* collector,
                   CompactibleFreeListSpace* cms_space,
                   CompactibleFreeListSpace* perm_space,
-                   int n_workers, WorkGang* workers,
+                   int n_workers, FlexibleWorkGang* workers,
                   OopTaskQueueSet* task_queues):
    AbstractGangTask("Rescan roots and grey objects in parallel"),
    _collector(collector),
    _cms_space(cms_space), _perm_space(perm_space),
    _n_workers(n_workers),
-    _workers(workers),
    _task_queues(task_queues),
-    _term(workers->total_workers(), task_queues) { }
+    _term(n_workers, task_queues) { }

  OopTaskQueueSet* task_queues() { return _task_queues; }

  OopTaskQueue* work_queue(int i) { return task_queues()->queue(i); }

  ParallelTaskTerminator* terminator() { return &_term; }
+  int n_workers() { return _n_workers; }

  void work(int i);

@ -5057,6 +5101,11 @@ class CMSParRemarkTask: public AbstractGangTask {
  void do_work_steal(int i, Par_MarkRefsIntoAndScanClosure* cl, int* seed);
 };

+// work_queue(i) is passed to the closure
+// Par_MarkRefsIntoAndScanClosure.  The "i" parameter
+// also is passed to do_dirty_card_rescan_tasks() and to
+// do_work_steal() to select the i-th task_queue.
+
 void CMSParRemarkTask::work(int i) {
  elapsedTimer _timer;
  ResourceMark rm;
@ -5128,6 +5177,7 @@ void CMSParRemarkTask::work(int i) {

  // Do the rescan tasks for each of the two spaces
  // (cms_space and perm_space) in turn.
+  // "i" is passed to select the "i-th" task_queue
  do_dirty_card_rescan_tasks(_cms_space, i, &par_mrias_cl);
  do_dirty_card_rescan_tasks(_perm_space, i, &par_mrias_cl);
  _timer.stop();
@ -5150,6 +5200,7 @@ void CMSParRemarkTask::work(int i) {
  }
 }

+// Note that parameter "i" is not used.
 void
 CMSParRemarkTask::do_young_space_rescan(int i,
  Par_MarkRefsIntoAndScanClosure* cl, ContiguousSpace* space,
@ -5309,8 +5360,13 @@ CMSParRemarkTask::do_work_steal(int i, Par_MarkRefsIntoAndScanClosure* cl,
    size_t num_from_overflow_list = MIN2((size_t)(work_q->max_elems() - work_q->size())/4,
                                         (size_t)ParGCDesiredObjsFromOverflowList);
    // Now check if there's any work in the overflow list
+    // Passing ParallelGCThreads as the third parameter, no_of_gc_threads,
+    // only affects the number of attempts made to get work from the
+    // overflow list and does not affect the number of workers.  Just
+    // pass ParallelGCThreads so this behavior is unchanged.
    if (_collector->par_take_from_overflow_list(num_from_overflow_list,
-                                                work_q)) {
+                                                work_q,
+                                                ParallelGCThreads)) {
      // found something in global overflow list;
      // not yet ready to go stealing work from others.
      // We'd like to assert(work_q->size() != 0, ...)
@ -5367,11 +5423,12 @@ void CMSCollector::reset_survivor_plab_arrays() {
 // Merge the per-thread plab arrays into the global survivor chunk
 // array which will provide the partitioning of the survivor space
 // for CMS rescan.
-void CMSCollector::merge_survivor_plab_arrays(ContiguousSpace* surv) {
+void CMSCollector::merge_survivor_plab_arrays(ContiguousSpace* surv,
+                                              int no_of_gc_threads) {
  assert(_survivor_plab_array  != NULL, "Error");
  assert(_survivor_chunk_array != NULL, "Error");
  assert(_collectorState == FinalMarking, "Error");
-  for (uint j = 0; j < ParallelGCThreads; j++) {
+  for (int j = 0; j < no_of_gc_threads; j++) {
    _cursor[j] = 0;
  }
  HeapWord* top = surv->top();
@ -5379,7 +5436,7 @@ void CMSCollector::merge_survivor_plab_arrays(ContiguousSpace* surv) {
  for (i = 0; i < _survivor_chunk_capacity; i++) {  // all sca entries
    HeapWord* min_val = top;          // Higher than any PLAB address
    uint      min_tid = 0;            // position of min_val this round
-    for (uint j = 0; j < ParallelGCThreads; j++) {
+    for (int j = 0; j < no_of_gc_threads; j++) {
      ChunkArray* cur_sca = &_survivor_plab_array[j];
      if (_cursor[j] == cur_sca->end()) {
        continue;
@ -5413,7 +5470,7 @@ void CMSCollector::merge_survivor_plab_arrays(ContiguousSpace* surv) {
  // Verify that we used up all the recorded entries
  #ifdef ASSERT
    size_t total = 0;
-    for (uint j = 0; j < ParallelGCThreads; j++) {
+    for (int j = 0; j < no_of_gc_threads; j++) {
      assert(_cursor[j] == _survivor_plab_array[j].end(), "Ctl pt invariant");
      total += _cursor[j];
    }
@ -5448,13 +5505,15 @@ initialize_sequential_subtasks_for_young_gen_rescan(int n_threads) {
    // Each valid entry in [0, _eden_chunk_index) represents a task.
    size_t n_tasks = _eden_chunk_index + 1;
    assert(n_tasks == 1 || _eden_chunk_array != NULL, "Error");
-    pst->set_par_threads(n_threads);
+    // Sets the condition for completion of the subtask (how many threads
+    // need to finish in order to be done).
+    pst->set_n_threads(n_threads);
    pst->set_n_tasks((int)n_tasks);
  }

  // Merge the survivor plab arrays into _survivor_chunk_array
  if (_survivor_plab_array != NULL) {
-    merge_survivor_plab_arrays(dng->from());
+    merge_survivor_plab_arrays(dng->from(), n_threads);
  } else {
    assert(_survivor_chunk_index == 0, "Error");
  }
@ -5463,7 +5522,9 @@ initialize_sequential_subtasks_for_young_gen_rescan(int n_threads) {
  {
    SequentialSubTasksDone* pst = dng->to()->par_seq_tasks();
    assert(!pst->valid(), "Clobbering existing data?");
-    pst->set_par_threads(n_threads);
+    // Sets the condition for completion of the subtask (how many threads
+    // need to finish in order to be done).
+    pst->set_n_threads(n_threads);
    pst->set_n_tasks(1);
    assert(pst->valid(), "Error");
  }
@ -5474,7 +5535,9 @@ initialize_sequential_subtasks_for_young_gen_rescan(int n_threads) {
    assert(!pst->valid(), "Clobbering existing data?");
    size_t n_tasks = _survivor_chunk_index + 1;
    assert(n_tasks == 1 || _survivor_chunk_array != NULL, "Error");
-    pst->set_par_threads(n_threads);
+    // Sets the condition for completion of the subtask (how many threads
+    // need to finish in order to be done).
+    pst->set_n_threads(n_threads);
    pst->set_n_tasks((int)n_tasks);
    assert(pst->valid(), "Error");
  }
@ -5483,7 +5546,7 @@ initialize_sequential_subtasks_for_young_gen_rescan(int n_threads) {
 // Parallel version of remark
 void CMSCollector::do_remark_parallel() {
  GenCollectedHeap* gch = GenCollectedHeap::heap();
-  WorkGang* workers = gch->workers();
+  FlexibleWorkGang* workers = gch->workers();
  assert(workers != NULL, "Need parallel worker threads.");
  int n_workers = workers->total_workers();
  CompactibleFreeListSpace* cms_space  = _cmsGen->cmsSpace();
@ -5636,13 +5699,11 @@ void CMSCollector::do_remark_non_parallel() {
 ////////////////////////////////////////////////////////
 // Parallel Reference Processing Task Proxy Class
 ////////////////////////////////////////////////////////
-class CMSRefProcTaskProxy: public AbstractGangTask {
+class CMSRefProcTaskProxy: public AbstractGangTaskWOopQueues {
  typedef AbstractRefProcTaskExecutor::ProcessTask ProcessTask;
  CMSCollector*          _collector;
  CMSBitMap*             _mark_bit_map;
  const MemRegion        _span;
-  OopTaskQueueSet*       _task_queues;
-  ParallelTaskTerminator _term;
  ProcessTask&           _task;

 public:
@ -5650,24 +5711,21 @@ public:
                      CMSCollector*    collector,
                      const MemRegion& span,
                      CMSBitMap*       mark_bit_map,
-                      int              total_workers,
+                      AbstractWorkGang* workers,
                      OopTaskQueueSet* task_queues):
-    AbstractGangTask("Process referents by policy in parallel"),
+    AbstractGangTaskWOopQueues("Process referents by policy in parallel",
+      task_queues),
    _task(task),
-    _collector(collector), _span(span), _mark_bit_map(mark_bit_map),
-    _task_queues(task_queues),
-    _term(total_workers, task_queues)
+    _collector(collector), _span(span), _mark_bit_map(mark_bit_map)
    {
      assert(_collector->_span.equals(_span) && !_span.is_empty(),
             "Inconsistency in _span");
    }

-  OopTaskQueueSet* task_queues() { return _task_queues; }
+  OopTaskQueueSet* task_queues() { return queues(); }

  OopTaskQueue* work_queue(int i) { return task_queues()->queue(i); }

-  ParallelTaskTerminator* terminator() { return &_term; }
-
  void do_work_steal(int i,
                     CMSParDrainMarkingStackClosure* drain,
                     CMSParKeepAliveClosure* keep_alive,
@ -5739,8 +5797,13 @@ void CMSRefProcTaskProxy::do_work_steal(int i,
    size_t num_from_overflow_list = MIN2((size_t)(work_q->max_elems() - work_q->size())/4,
                                         (size_t)ParGCDesiredObjsFromOverflowList);
    // Now check if there's any work in the overflow list
+    // Passing ParallelGCThreads as the third parameter, no_of_gc_threads,
+    // only affects the number of attempts made to get work from the
+    // overflow list and does not affect the number of workers.  Just
+    // pass ParallelGCThreads so this behavior is unchanged.
    if (_collector->par_take_from_overflow_list(num_from_overflow_list,
-                                                work_q)) {
+                                                work_q,
+                                                ParallelGCThreads)) {
      // Found something in global overflow list;
      // not yet ready to go stealing work from others.
      // We'd like to assert(work_q->size() != 0, ...)
@ -5773,13 +5836,12 @@ void CMSRefProcTaskProxy::do_work_steal(int i,
 void CMSRefProcTaskExecutor::execute(ProcessTask& task)
 {
  GenCollectedHeap* gch = GenCollectedHeap::heap();
-  WorkGang* workers = gch->workers();
+  FlexibleWorkGang* workers = gch->workers();
  assert(workers != NULL, "Need parallel worker threads.");
-  int n_workers = workers->total_workers();
  CMSRefProcTaskProxy rp_task(task, &_collector,
                              _collector.ref_processor()->span(),
                              _collector.markBitMap(),
-                              n_workers, _collector.task_queues());
+                              workers, _collector.task_queues());
  workers->run_task(&rp_task);
 }

@ -5787,7 +5849,7 @@ void CMSRefProcTaskExecutor::execute(EnqueueTask& task)
 {

  GenCollectedHeap* gch = GenCollectedHeap::heap();
-  WorkGang* workers = gch->workers();
+  FlexibleWorkGang* workers = gch->workers();
  assert(workers != NULL, "Need parallel worker threads.");
  CMSRefEnqueueTaskProxy enq_task(task);
  workers->run_task(&enq_task);
@ -5814,6 +5876,14 @@ void CMSCollector::refProcessingWork(bool asynch, bool clear_all_soft_refs) {
  {
    TraceTime t("weak refs processing", PrintGCDetails, false, gclog_or_tty);
    if (rp->processing_is_mt()) {
+      // Set the degree of MT here.  If the discovery is done MT, there
+      // may have been a different number of threads doing the discovery
+      // and a different number of discovered lists may have Ref objects.
+      // That is OK as long as the Reference lists are balanced (see
+      // balance_all_queues() and balance_queues()).
+
+
+      rp->set_mt_degree(ParallelGCThreads);
      CMSRefProcTaskExecutor task_executor(*this);
      rp->process_discovered_references(&_is_alive_closure,
                                        &cmsKeepAliveClosure,
@ -5874,6 +5944,7 @@ void CMSCollector::refProcessingWork(bool asynch, bool clear_all_soft_refs) {

  rp->set_enqueuing_is_done(true);
  if (rp->processing_is_mt()) {
+    rp->balance_all_queues();
    CMSRefProcTaskExecutor task_executor(*this);
    rp->enqueue_discovered_references(&task_executor);
  } else {
@ -8708,7 +8779,8 @@ bool CMSCollector::take_from_overflow_list(size_t num, CMSMarkStack* stack) {
 // similar changes might be needed.
 // CR 6797058 has been filed to consolidate the common code.
 bool CMSCollector::par_take_from_overflow_list(size_t num,
-                                               OopTaskQueue* work_q) {
+                                               OopTaskQueue* work_q,
+                                               int no_of_gc_threads) {
  assert(work_q->size() == 0, "First empty local work queue");
  assert(num < work_q->max_elems(), "Can't bite more than we can chew");
  if (_overflow_list == NULL) {
@ -8717,7 +8789,9 @@ bool CMSCollector::par_take_from_overflow_list(size_t num,
  // Grab the entire list; we'll put back a suffix
  oop prefix = (oop)Atomic::xchg_ptr(BUSY, &_overflow_list);
  Thread* tid = Thread::current();
-  size_t CMSOverflowSpinCount = (size_t)ParallelGCThreads;
+  // Before "no_of_gc_threads" was introduced CMSOverflowSpinCount was
+  // set to ParallelGCThreads.
+  size_t CMSOverflowSpinCount = (size_t) no_of_gc_threads; // was ParallelGCThreads;
  size_t sleep_time_millis = MAX2((size_t)1, num/100);
  // If the list is busy, we spin for a short while,
  // sleeping between attempts to get the list.
@ -8867,23 +8941,10 @@ void CMSCollector::par_push_on_overflow_list(oop p) {
 // failures where possible, thus, incrementally hardening the VM
 // in such low resource situations.
 void CMSCollector::preserve_mark_work(oop p, markOop m) {
-  if (_preserved_oop_stack == NULL) {
-    assert(_preserved_mark_stack == NULL,
-           "bijection with preserved_oop_stack");
-    // Allocate the stacks
-    _preserved_oop_stack  = new (ResourceObj::C_HEAP)
-      GrowableArray<oop>(PreserveMarkStackSize, true);
-    _preserved_mark_stack = new (ResourceObj::C_HEAP)
-      GrowableArray<markOop>(PreserveMarkStackSize, true);
-    if (_preserved_oop_stack == NULL || _preserved_mark_stack == NULL) {
-      vm_exit_out_of_memory(2* PreserveMarkStackSize * sizeof(oop) /* punt */,
-                            "Preserved Mark/Oop Stack for CMS (C-heap)");
-    }
-  }
-  _preserved_oop_stack->push(p);
-  _preserved_mark_stack->push(m);
+  _preserved_oop_stack.push(p);
+  _preserved_mark_stack.push(m);
  assert(m == p->mark(), "Mark word changed");
-  assert(_preserved_oop_stack->length() == _preserved_mark_stack->length(),
+  assert(_preserved_oop_stack.size() == _preserved_mark_stack.size(),
         "bijection");
 }

@ -8925,42 +8986,30 @@ void CMSCollector::par_preserve_mark_if_necessary(oop p) {
 // effect on performance so great that this will
 // likely just be in the noise anyway.
 void CMSCollector::restore_preserved_marks_if_any() {
-  if (_preserved_oop_stack == NULL) {
-    assert(_preserved_mark_stack == NULL,
-           "bijection with preserved_oop_stack");
-    return;
-  }
-
  assert(SafepointSynchronize::is_at_safepoint(),
         "world should be stopped");
  assert(Thread::current()->is_ConcurrentGC_thread() ||
         Thread::current()->is_VM_thread(),
         "should be single-threaded");
+  assert(_preserved_oop_stack.size() == _preserved_mark_stack.size(),
+         "bijection");

-  int length = _preserved_oop_stack->length();
-  assert(_preserved_mark_stack->length() == length, "bijection");
-  for (int i = 0; i < length; i++) {
-    oop p = _preserved_oop_stack->at(i);
+  while (!_preserved_oop_stack.is_empty()) {
+    oop p = _preserved_oop_stack.pop();
    assert(p->is_oop(), "Should be an oop");
    assert(_span.contains(p), "oop should be in _span");
    assert(p->mark() == markOopDesc::prototype(),
           "Set when taken from overflow list");
-    markOop m = _preserved_mark_stack->at(i);
+    markOop m = _preserved_mark_stack.pop();
    p->set_mark(m);
  }
-  _preserved_mark_stack->clear();
-  _preserved_oop_stack->clear();
-  assert(_preserved_mark_stack->is_empty() &&
-         _preserved_oop_stack->is_empty(),
+  assert(_preserved_mark_stack.is_empty() && _preserved_oop_stack.is_empty(),
         "stacks were cleared above");
 }

 #ifndef PRODUCT
 bool CMSCollector::no_preserved_marks() const {
-  return (   (   _preserved_mark_stack == NULL
-              && _preserved_oop_stack == NULL)
-          || (   _preserved_mark_stack->is_empty()
-              && _preserved_oop_stack->is_empty()));
+  return _preserved_mark_stack.is_empty() && _preserved_oop_stack.is_empty();
 }
 #endif

@ -9256,4 +9305,3 @@ TraceCMSMemoryManagerStats::TraceCMSMemoryManagerStats(): TraceMemoryManagerStat
             true /* recordGCEndTime */,
             true /* countCollection */ );
 }
-
--- a/hotspot/src/share/vm/gc_implementation/concurrentMarkSweep/concurrentMarkSweepGeneration.hpp
+++ b/hotspot/src/share/vm/gc_implementation/concurrentMarkSweep/concurrentMarkSweepGeneration.hpp
@ -537,8 +537,8 @@ class CMSCollector: public CHeapObj {
  // The following array-pair keeps track of mark words
  // displaced for accomodating overflow list above.
  // This code will likely be revisited under RFE#4922830.
-  GrowableArray<oop>*     _preserved_oop_stack;
-  GrowableArray<markOop>* _preserved_mark_stack;
+  Stack<oop>     _preserved_oop_stack;
+  Stack<markOop> _preserved_mark_stack;

  int*             _hash_seed;

@ -729,7 +729,9 @@ class CMSCollector: public CHeapObj {

  // Support for marking stack overflow handling
  bool take_from_overflow_list(size_t num, CMSMarkStack* to_stack);
-  bool par_take_from_overflow_list(size_t num, OopTaskQueue* to_work_q);
+  bool par_take_from_overflow_list(size_t num,
+                                   OopTaskQueue* to_work_q,
+                                   int no_of_gc_threads);
  void push_on_overflow_list(oop p);
  void par_push_on_overflow_list(oop p);
  // the following is, obviously, not, in general, "MT-stable"
@ -768,7 +770,7 @@ class CMSCollector: public CHeapObj {
  void abortable_preclean(); // Preclean while looking for possible abort
  void initialize_sequential_subtasks_for_young_gen_rescan(int i);
  // Helper function for above; merge-sorts the per-thread plab samples
-  void merge_survivor_plab_arrays(ContiguousSpace* surv);
+  void merge_survivor_plab_arrays(ContiguousSpace* surv, int no_of_gc_threads);
  // Resets (i.e. clears) the per-thread plab sample vectors
  void reset_survivor_plab_arrays();

--- a/hotspot/src/share/vm/gc_implementation/concurrentMarkSweep/concurrentMarkSweepThread.hpp
+++ b/hotspot/src/share/vm/gc_implementation/concurrentMarkSweep/concurrentMarkSweepThread.hpp
@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2001, 2006, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2001, 2010, Oracle and/or its affiliates. All rights reserved.
 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
 *
 * This code is free software; you can redistribute it and/or modify it
@ -123,24 +123,44 @@ class ConcurrentMarkSweepThread: public ConcurrentGCThread {
  // or given timeout, whichever is earlier.
  void    wait_on_cms_lock(long t); // milliseconds

-  // The CMS thread will yield during the work portion of it's cycle
+  // The CMS thread will yield during the work portion of its cycle
  // only when requested to.  Both synchronous and asychronous requests
-  // are provided.  A synchronous request is used for young gen
-  // collections and direct allocations.  The requesting thread increments
-  // pending_yields at the beginning of an operation, and decrements it when
-  // the operation is completed.  The CMS thread yields when pending_yields
-  // is positive.  An asynchronous request is used by iCMS in the stop_icms()
-  // operation. A single yield satisfies the outstanding asynch yield requests.
-  // The requesting thread increments both pending_yields and pending_decrements.
-  // After yielding, the CMS thread decrements both by the amount in
-  // pending_decrements.
+  // are provided:
+  // (1) A synchronous request is used for young gen collections and
+  //     for direct allocations.  The requesting thread increments
+  //     _pending_yields at the beginning of an operation, and decrements
+  //     _pending_yields when that operation is completed.
+  //     In turn, the CMS thread yields when _pending_yields is positive,
+  //     and continues to yield until the value reverts to 0.
+  // (2) An asynchronous request, on the other hand, is used by iCMS
+  //     for the stop_icms() operation. A single yield satisfies all of
+  //     the outstanding asynch yield requests, of which there may
+  //     occasionally be several in close succession. To accomplish
+  //     this, an asynch-requesting thread atomically increments both
+  //     _pending_yields and _pending_decrements. An asynchr requesting
+  //     thread does not wait and "acknowledge" completion of an operation
+  //     and deregister the request, like the synchronous version described
+  //     above does. In turn, after yielding, the CMS thread decrements both
+  //     _pending_yields and _pending_decrements by the value seen in
+  //     _pending_decrements before the decrement.
+  //  NOTE: The above scheme is isomorphic to having two request counters,
+  //  one for async requests and one for sync requests, and for the CMS thread
+  //  to check the sum of the two counters to decide whether it should yield
+  //  and to clear only the async counter when it yields. However, it turns out
+  //  to be more efficient for CMS code to just check a single counter
+  //  _pending_yields that holds the sum (of both sync and async requests), and
+  //  a second counter _pending_decrements that only holds the async requests,
+  //  for greater efficiency, since in a typical CMS run, there are many more
+  //  pontential (i.e. static) yield points than there are actual
+  //  (i.e. dynamic) yields because of requests, which are few and far between.
+  //
  // Note that, while "_pending_yields >= _pending_decrements" is an invariant,
  // we cannot easily test that invariant, since the counters are manipulated via
  // atomic instructions without explicit locking and we cannot read
  // the two counters atomically together: one suggestion is to
  // use (for example) 16-bit counters so as to be able to read the
  // two counters atomically even on 32-bit platforms. Notice that
-  // the second assert in acknowledge_yield_request() does indeed
+  // the second assert in acknowledge_yield_request() below does indeed
  // check a form of the above invariant, albeit indirectly.

  static void increment_pending_yields()   {
@ -152,6 +172,7 @@ class ConcurrentMarkSweepThread: public ConcurrentGCThread {
    assert(_pending_yields >= 0, "can't be negative");
  }
  static void asynchronous_yield_request() {
+    assert(CMSIncrementalMode, "Currently only used w/iCMS");
    increment_pending_yields();
    Atomic::inc(&_pending_decrements);
    assert(_pending_decrements >= 0, "can't be negative");
@ -159,6 +180,7 @@ class ConcurrentMarkSweepThread: public ConcurrentGCThread {
  static void acknowledge_yield_request() {
    jint decrement = _pending_decrements;
    if (decrement > 0) {
+      assert(CMSIncrementalMode, "Currently only used w/iCMS");
      // Order important to preserve: _pending_yields >= _pending_decrements
      Atomic::add(-decrement, &_pending_decrements);
      Atomic::add(-decrement, &_pending_yields);
@ -195,7 +217,7 @@ inline void ConcurrentMarkSweepThread::trace_state(const char* desc) {
  }
 }

-// For scoped increment/decrement of yield requests
+// For scoped increment/decrement of (synchronous) yield requests
 class CMSSynchronousYieldRequest: public StackObj {
 public:
  CMSSynchronousYieldRequest() {
--- a/hotspot/src/share/vm/gc_implementation/g1/concurrentMark.cpp
+++ b/hotspot/src/share/vm/gc_implementation/g1/concurrentMark.cpp
@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2001, 2009, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2001, 2010, Oracle and/or its affiliates. All rights reserved.
 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
 *
 * This code is free software; you can redistribute it and/or modify it
@ -278,15 +278,16 @@ CMRegionStack::~CMRegionStack() {
  if (_base != NULL) FREE_C_HEAP_ARRAY(oop, _base);
 }

-void CMRegionStack::push(MemRegion mr) {
+void CMRegionStack::push_lock_free(MemRegion mr) {
  assert(mr.word_size() > 0, "Precondition");
  while (true) {
-    if (isFull()) {
+    jint index = _index;
+
+    if (index >= _capacity) {
      _overflow = true;
      return;
    }
    // Otherwise...
-    jint index = _index;
    jint next_index = index+1;
    jint res = Atomic::cmpxchg(next_index, &_index, index);
    if (res == index) {
@ -297,19 +298,17 @@ void CMRegionStack::push(MemRegion mr) {
  }
 }

-// Currently we do not call this at all. Normally we would call it
-// during the concurrent marking / remark phases but we now call
-// the lock-based version instead. But we might want to resurrect this
-// code in the future. So, we'll leave it here commented out.
-#if 0
-MemRegion CMRegionStack::pop() {
+// Lock-free pop of the region stack. Called during the concurrent
+// marking / remark phases. Should only be called in tandem with
+// other lock-free pops.
+MemRegion CMRegionStack::pop_lock_free() {
  while (true) {
-    // Otherwise...
    jint index = _index;

    if (index == 0) {
      return MemRegion();
    }
+    // Otherwise...
    jint next_index = index-1;
    jint res = Atomic::cmpxchg(next_index, &_index, index);
    if (res == index) {
@ -326,7 +325,11 @@ MemRegion CMRegionStack::pop() {
    // Otherwise, we need to try again.
  }
 }
-#endif // 0
+
+#if 0
+// The routines that manipulate the region stack with a lock are
+// not currently used. They should be retained, however, as a
+// diagnostic aid.

 void CMRegionStack::push_with_lock(MemRegion mr) {
  assert(mr.word_size() > 0, "Precondition");
@ -361,6 +364,7 @@ MemRegion CMRegionStack::pop_with_lock() {
    }
  }
 }
+#endif

 bool CMRegionStack::invalidate_entries_into_cset() {
  bool result = false;
@ -583,10 +587,13 @@ ConcurrentMark::ConcurrentMark(ReservedSpace rs,
 #endif

    guarantee(parallel_marking_threads() > 0, "peace of mind");
-    _parallel_workers = new WorkGang("G1 Parallel Marking Threads",
-                                     (int) parallel_marking_threads(), false, true);
-    if (_parallel_workers == NULL)
+    _parallel_workers = new FlexibleWorkGang("G1 Parallel Marking Threads",
+         (int) _parallel_marking_threads, false, true);
+    if (_parallel_workers == NULL) {
      vm_exit_during_initialization("Failed necessary allocation.");
+    } else {
+      _parallel_workers->initialize_workers();
+    }
  }

  // so that the call below can read a sensible value
@ -645,8 +652,9 @@ void ConcurrentMark::reset() {
  // We do reset all of them, since different phases will use
  // different number of active threads. So, it's easiest to have all
  // of them ready.
-  for (int i = 0; i < (int) _max_task_num; ++i)
+  for (int i = 0; i < (int) _max_task_num; ++i) {
    _tasks[i]->reset(_nextMarkBitMap);
+  }

  // we need this to make sure that the flag is on during the evac
  // pause with initial mark piggy-backed
@ -985,7 +993,7 @@ void ConcurrentMark::grayRegionIfNecessary(MemRegion mr) {
                             "below the finger, pushing it",
                             mr.start(), mr.end());

-    if (!region_stack_push(mr)) {
+    if (!region_stack_push_lock_free(mr)) {
      if (verbose_low())
        gclog_or_tty->print_cr("[global] region stack has overflown.");
    }
@ -1451,7 +1459,7 @@ public:
                                  _bm, _g1h->concurrent_mark(),
                                  _region_bm, _card_bm);
    calccl.no_yield();
-    if (ParallelGCThreads > 0) {
+    if (G1CollectedHeap::use_parallel_gc_threads()) {
      _g1h->heap_region_par_iterate_chunked(&calccl, i,
                                            HeapRegion::FinalCountClaimValue);
    } else {
@ -1531,7 +1539,7 @@ public:
    G1NoteEndOfConcMarkClosure g1_note_end(_g1h,
                                           &_par_cleanup_thread_state[i]->list,
                                           i);
-    if (ParallelGCThreads > 0) {
+    if (G1CollectedHeap::use_parallel_gc_threads()) {
      _g1h->heap_region_par_iterate_chunked(&g1_note_end, i,
                                            HeapRegion::NoteEndClaimValue);
    } else {
@ -1575,7 +1583,7 @@ public:
  {}

  void work(int i) {
-    if (ParallelGCThreads > 0) {
+    if (G1CollectedHeap::use_parallel_gc_threads()) {
      _g1rs->scrub_par(_region_bm, _card_bm, i,
                       HeapRegion::ScrubRemSetClaimValue);
    } else {
@ -1647,7 +1655,7 @@ void ConcurrentMark::cleanup() {
  // Do counting once more with the world stopped for good measure.
  G1ParFinalCountTask g1_par_count_task(g1h, nextMarkBitMap(),
                                        &_region_bm, &_card_bm);
-  if (ParallelGCThreads > 0) {
+  if (G1CollectedHeap::use_parallel_gc_threads()) {
    assert(g1h->check_heap_region_claim_values(
                                               HeapRegion::InitialClaimValue),
           "sanity check");
@ -1695,7 +1703,7 @@ void ConcurrentMark::cleanup() {
  // Note end of marking in all heap regions.
  double note_end_start = os::elapsedTime();
  G1ParNoteEndTask g1_par_note_end_task(g1h, _par_cleanup_thread_state);
-  if (ParallelGCThreads > 0) {
+  if (G1CollectedHeap::use_parallel_gc_threads()) {
    int n_workers = g1h->workers()->total_workers();
    g1h->set_par_threads(n_workers);
    g1h->workers()->run_task(&g1_par_note_end_task);
@ -1720,7 +1728,7 @@ void ConcurrentMark::cleanup() {
  if (G1ScrubRemSets) {
    double rs_scrub_start = os::elapsedTime();
    G1ParScrubRemSetTask g1_par_scrub_rs_task(g1h, &_region_bm, &_card_bm);
-    if (ParallelGCThreads > 0) {
+    if (G1CollectedHeap::use_parallel_gc_threads()) {
      int n_workers = g1h->workers()->total_workers();
      g1h->set_par_threads(n_workers);
      g1h->workers()->run_task(&g1_par_scrub_rs_task);
@ -1934,7 +1942,7 @@ void ConcurrentMark::checkpointRootsFinalWork() {

  g1h->ensure_parsability(false);

-  if (ParallelGCThreads > 0) {
+  if (G1CollectedHeap::use_parallel_gc_threads()) {
    G1CollectedHeap::StrongRootsScope srs(g1h);
    // this is remark, so we'll use up all available threads
    int active_workers = ParallelGCThreads;
@ -2330,6 +2338,39 @@ ConcurrentMark::claim_region(int task_num) {
  return NULL;
 }

+bool ConcurrentMark::invalidate_aborted_regions_in_cset() {
+  bool result = false;
+  for (int i = 0; i < (int)_max_task_num; ++i) {
+    CMTask* the_task = _tasks[i];
+    MemRegion mr = the_task->aborted_region();
+    if (mr.start() != NULL) {
+      assert(mr.end() != NULL, "invariant");
+      assert(mr.word_size() > 0, "invariant");
+      HeapRegion* hr = _g1h->heap_region_containing(mr.start());
+      assert(hr != NULL, "invariant");
+      if (hr->in_collection_set()) {
+        // The region points into the collection set
+        the_task->set_aborted_region(MemRegion());
+        result = true;
+      }
+    }
+  }
+  return result;
+}
+
+bool ConcurrentMark::has_aborted_regions() {
+  for (int i = 0; i < (int)_max_task_num; ++i) {
+    CMTask* the_task = _tasks[i];
+    MemRegion mr = the_task->aborted_region();
+    if (mr.start() != NULL) {
+      assert(mr.end() != NULL, "invariant");
+      assert(mr.word_size() > 0, "invariant");
+      return true;
+    }
+  }
+  return false;
+}
+
 void ConcurrentMark::oops_do(OopClosure* cl) {
  if (_markStack.size() > 0 && verbose_low())
    gclog_or_tty->print_cr("[global] scanning the global marking stack, "
@ -2348,13 +2389,22 @@ void ConcurrentMark::oops_do(OopClosure* cl) {
    queue->oops_do(cl);
  }

-  // finally, invalidate any entries that in the region stack that
+  // Invalidate any entries, that are in the region stack, that
  // point into the collection set
  if (_regionStack.invalidate_entries_into_cset()) {
    // otherwise, any gray objects copied during the evacuation pause
    // might not be visited.
    assert(_should_gray_objects, "invariant");
  }
+
+  // Invalidate any aborted regions, recorded in the individual CM
+  // tasks, that point into the collection set.
+  if (invalidate_aborted_regions_in_cset()) {
+    // otherwise, any gray objects copied during the evacuation pause
+    // might not be visited.
+    assert(_should_gray_objects, "invariant");
+  }
+
 }

 void ConcurrentMark::clear_marking_state() {
@ -2635,7 +2685,7 @@ void ConcurrentMark::newCSet() {
  // irrespective whether all collection set regions are below the
  // finger, if the region stack is not empty. This is expected to be
  // a rare case, so I don't think it's necessary to be smarted about it.
-  if (!region_stack_empty())
+  if (!region_stack_empty() || has_aborted_regions())
    _should_gray_objects = true;
 }

@ -2654,8 +2704,10 @@ void ConcurrentMark::abort() {
  _nextMarkBitMap->clearAll();
  // Empty mark stack
  clear_marking_state();
-  for (int i = 0; i < (int)_max_task_num; ++i)
+  for (int i = 0; i < (int)_max_task_num; ++i) {
    _tasks[i]->clear_region_fields();
+    _tasks[i]->clear_aborted_region();
+  }
  _has_aborted = true;

  SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
@ -2933,6 +2985,7 @@ void CMTask::reset(CMBitMap* nextMarkBitMap) {

  _nextMarkBitMap                = nextMarkBitMap;
  clear_region_fields();
+  clear_aborted_region();

  _calls                         = 0;
  _elapsed_time_ms               = 0.0;
@ -3369,14 +3422,14 @@ void CMTask::drain_satb_buffers() {

  CMObjectClosure oc(this);
  SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
-  if (ParallelGCThreads > 0)
+  if (G1CollectedHeap::use_parallel_gc_threads())
    satb_mq_set.set_par_closure(_task_id, &oc);
  else
    satb_mq_set.set_closure(&oc);

  // This keeps claiming and applying the closure to completed buffers
  // until we run out of buffers or we need to abort.
-  if (ParallelGCThreads > 0) {
+  if (G1CollectedHeap::use_parallel_gc_threads()) {
    while (!has_aborted() &&
           satb_mq_set.par_apply_closure_to_completed_buffer(_task_id)) {
      if (_cm->verbose_medium())
@ -3396,7 +3449,7 @@ void CMTask::drain_satb_buffers() {

  if (!concurrent() && !has_aborted()) {
    // We should only do this during remark.
-    if (ParallelGCThreads > 0)
+    if (G1CollectedHeap::use_parallel_gc_threads())
      satb_mq_set.par_iterate_closure_all_threads(_task_id);
    else
      satb_mq_set.iterate_closure_all_threads();
@ -3408,7 +3461,7 @@ void CMTask::drain_satb_buffers() {
         concurrent() ||
         satb_mq_set.completed_buffers_num() == 0, "invariant");

-  if (ParallelGCThreads > 0)
+  if (G1CollectedHeap::use_parallel_gc_threads())
    satb_mq_set.set_par_closure(_task_id, NULL);
  else
    satb_mq_set.set_closure(NULL);
@ -3425,20 +3478,32 @@ void CMTask::drain_region_stack(BitMapClosure* bc) {
  assert(_region_finger == NULL,
         "it should be NULL when we're not scanning a region");

-  if (!_cm->region_stack_empty()) {
+  if (!_cm->region_stack_empty() || !_aborted_region.is_empty()) {
    if (_cm->verbose_low())
      gclog_or_tty->print_cr("[%d] draining region stack, size = %d",
                             _task_id, _cm->region_stack_size());

-    MemRegion mr = _cm->region_stack_pop_with_lock();
+    MemRegion mr;
+
+    if (!_aborted_region.is_empty()) {
+      mr = _aborted_region;
+      _aborted_region = MemRegion();
+
+      if (_cm->verbose_low())
+        gclog_or_tty->print_cr("[%d] scanning aborted region [ " PTR_FORMAT ", " PTR_FORMAT " )",
+                             _task_id, mr.start(), mr.end());
+    } else {
+      mr = _cm->region_stack_pop_lock_free();
      // it returns MemRegion() if the pop fails
      statsOnly(if (mr.start() != NULL) ++_region_stack_pops );
+    }

    while (mr.start() != NULL) {
      if (_cm->verbose_medium())
        gclog_or_tty->print_cr("[%d] we are scanning region "
                               "["PTR_FORMAT", "PTR_FORMAT")",
                               _task_id, mr.start(), mr.end());
+
      assert(mr.end() <= _cm->finger(),
             "otherwise the region shouldn't be on the stack");
      assert(!mr.is_empty(), "Only non-empty regions live on the region stack");
@ -3451,7 +3516,7 @@ void CMTask::drain_region_stack(BitMapClosure* bc) {
        if (has_aborted())
          mr = MemRegion();
        else {
-          mr = _cm->region_stack_pop_with_lock();
+          mr = _cm->region_stack_pop_lock_free();
          // it returns MemRegion() if the pop fails
          statsOnly(if (mr.start() != NULL) ++_region_stack_pops );
        }
@ -3465,6 +3530,10 @@ void CMTask::drain_region_stack(BitMapClosure* bc) {
        // have definitely set _region_finger to something non-null.
        assert(_region_finger != NULL, "invariant");

+        // Make sure that any previously aborted region has been
+        // cleared.
+        assert(_aborted_region.is_empty(), "aborted region not cleared");
+
        // The iteration was actually aborted. So now _region_finger
        // points to the address of the object we last scanned. If we
        // leave it there, when we restart this task, we will rescan
@ -3477,14 +3546,14 @@ void CMTask::drain_region_stack(BitMapClosure* bc) {

        if (!newRegion.is_empty()) {
          if (_cm->verbose_low()) {
-            gclog_or_tty->print_cr("[%d] pushing unscanned region"
-                                   "[" PTR_FORMAT "," PTR_FORMAT ") on region stack",
+            gclog_or_tty->print_cr("[%d] recording unscanned region"
+                                   "[" PTR_FORMAT "," PTR_FORMAT ") in CMTask",
                                   _task_id,
                                   newRegion.start(), newRegion.end());
          }
-          // Now push the part of the region we didn't scan on the
-          // region stack to make sure a task scans it later.
-          _cm->region_stack_push_with_lock(newRegion);
+          // Now record the part of the region we didn't scan to
+          // make sure this task scans it later.
+          _aborted_region = newRegion;
        }
        // break from while
        mr = MemRegion();
@ -3654,6 +3723,8 @@ void CMTask::do_marking_step(double time_target_ms) {

  assert(concurrent() || _cm->region_stack_empty(),
         "the region stack should have been cleared before remark");
+  assert(concurrent() || !_cm->has_aborted_regions(),
+         "aborted regions should have been cleared before remark");
  assert(_region_finger == NULL,
         "this should be non-null only when a region is being scanned");

@ -3943,6 +4014,7 @@ void CMTask::do_marking_step(double time_target_ms) {
      // that, if a condition is false, we can immediately find out
      // which one.
      guarantee(_cm->out_of_regions(), "only way to reach here");
+      guarantee(_aborted_region.is_empty(), "only way to reach here");
      guarantee(_cm->region_stack_empty(), "only way to reach here");
      guarantee(_cm->mark_stack_empty(), "only way to reach here");
      guarantee(_task_queue->size() == 0, "only way to reach here");
@ -4042,7 +4114,8 @@ CMTask::CMTask(int task_id,
    _nextMarkBitMap(NULL), _hash_seed(17),
    _task_queue(task_queue),
    _task_queues(task_queues),
-    _oop_closure(NULL) {
+    _oop_closure(NULL),
+    _aborted_region(MemRegion()) {
  guarantee(task_queue != NULL, "invariant");
  guarantee(task_queues != NULL, "invariant");

--- a/hotspot/src/share/vm/gc_implementation/g1/concurrentMark.hpp
+++ b/hotspot/src/share/vm/gc_implementation/g1/concurrentMark.hpp
@ -250,21 +250,23 @@ public:

  // This is lock-free; assumes that it will only be called in parallel
  // with other "push" operations (no pops).
-  void push(MemRegion mr);
-
-#if 0
-  // This is currently not used. See the comment in the .cpp file.
+  void push_lock_free(MemRegion mr);

  // Lock-free; assumes that it will only be called in parallel
  // with other "pop" operations (no pushes).
-  MemRegion pop();
-#endif // 0
+  MemRegion pop_lock_free();
+
+#if 0
+  // The routines that manipulate the region stack with a lock are
+  // not currently used. They should be retained, however, as a
+  // diagnostic aid.

  // These two are the implementations that use a lock. They can be
  // called concurrently with each other but they should not be called
  // concurrently with the lock-free versions (push() / pop()).
  void push_with_lock(MemRegion mr);
  MemRegion pop_with_lock();
+#endif

  bool isEmpty()    { return _index == 0; }
  bool isFull()     { return _index == _capacity; }
@ -398,6 +400,7 @@ protected:
  volatile bool           _concurrent;
  // set at the end of a Full GC so that marking aborts
  volatile bool           _has_aborted;
+
  // used when remark aborts due to an overflow to indicate that
  // another concurrent marking phase should start
  volatile bool           _restart_for_overflow;
@ -548,23 +551,30 @@ public:
  bool mark_stack_overflow()            { return _markStack.overflow(); }
  bool mark_stack_empty()               { return _markStack.isEmpty(); }

-  // Manipulation of the region stack
-  bool region_stack_push(MemRegion mr) {
+  // (Lock-free) Manipulation of the region stack
+  bool region_stack_push_lock_free(MemRegion mr) {
    // Currently we only call the lock-free version during evacuation
    // pauses.
    assert(SafepointSynchronize::is_at_safepoint(), "world should be stopped");

-    _regionStack.push(mr);
+    _regionStack.push_lock_free(mr);
    if (_regionStack.overflow()) {
      set_has_overflown();
      return false;
    }
    return true;
  }
+
+  // Lock-free version of region-stack pop. Should only be
+  // called in tandem with other lock-free pops.
+  MemRegion region_stack_pop_lock_free() {
+    return _regionStack.pop_lock_free();
+  }
+
 #if 0
-  // Currently this is not used. See the comment in the .cpp file.
-  MemRegion region_stack_pop() { return _regionStack.pop(); }
-#endif // 0
+  // The routines that manipulate the region stack with a lock are
+  // not currently used. They should be retained, however, as a
+  // diagnostic aid.

  bool region_stack_push_with_lock(MemRegion mr) {
    // Currently we only call the lock-based version during either
@ -579,6 +589,7 @@ public:
    }
    return true;
  }
+
  MemRegion region_stack_pop_with_lock() {
    // Currently we only call the lock-based version during either
    // concurrent marking or remark.
@ -587,11 +598,21 @@ public:

    return _regionStack.pop_with_lock();
  }
+#endif

  int region_stack_size()               { return _regionStack.size(); }
  bool region_stack_overflow()          { return _regionStack.overflow(); }
  bool region_stack_empty()             { return _regionStack.isEmpty(); }

+  // Iterate over any regions that were aborted while draining the
+  // region stack (any such regions are saved in the corresponding
+  // CMTask) and invalidate (i.e. assign to the empty MemRegion())
+  // any regions that point into the collection set.
+  bool invalidate_aborted_regions_in_cset();
+
+  // Returns true if there are any aborted memory regions.
+  bool has_aborted_regions();
+
  bool concurrent_marking_in_progress() {
    return _concurrent_marking_in_progress;
  }
@ -856,6 +877,15 @@ private:
  // stack.
  HeapWord*                   _region_finger;

+  // If we abort while scanning a region we record the remaining
+  // unscanned portion and check this field when marking restarts.
+  // This avoids having to push on the region stack while other
+  // marking threads may still be popping regions.
+  // If we were to push the unscanned portion directly to the
+  // region stack then we would need to using locking versions
+  // of the push and pop operations.
+  MemRegion                   _aborted_region;
+
  // the number of words this task has scanned
  size_t                      _words_scanned;
  // When _words_scanned reaches this limit, the regular clock is
@ -1012,6 +1042,15 @@ public:
  void clear_has_aborted()      { _has_aborted = false; }
  bool claimed() { return _claimed; }

+  // Support routines for the partially scanned region that may be
+  // recorded as a result of aborting while draining the CMRegionStack
+  MemRegion aborted_region()    { return _aborted_region; }
+  void set_aborted_region(MemRegion mr)
+                                { _aborted_region = mr; }
+
+  // Clears any recorded partially scanned region
+  void clear_aborted_region()   { set_aborted_region(MemRegion()); }
+
  void set_oop_closure(OopClosure* oop_closure) {
    _oop_closure = oop_closure;
  }
--- a/hotspot/src/share/vm/gc_implementation/g1/concurrentMarkThread.cpp
+++ b/hotspot/src/share/vm/gc_implementation/g1/concurrentMarkThread.cpp
@ -303,9 +303,10 @@ void ConcurrentMarkThread::print_on(outputStream* st) const {
 }

 void ConcurrentMarkThread::sleepBeforeNextCycle() {
-  clear_in_progress();
  // We join here because we don't want to do the "shouldConcurrentMark()"
  // below while the world is otherwise stopped.
+  assert(!in_progress(), "should have been cleared");
+
  MutexLockerEx x(CGC_lock, Mutex::_no_safepoint_check_flag);
  while (!started()) {
    CGC_lock->wait(Mutex::_no_safepoint_check_flag);
--- a/hotspot/src/share/vm/gc_implementation/g1/concurrentMarkThread.hpp
+++ b/hotspot/src/share/vm/gc_implementation/g1/concurrentMarkThread.hpp
@ -69,12 +69,12 @@ class ConcurrentMarkThread: public ConcurrentGCThread {

  ConcurrentMark* cm()     { return _cm; }

-  void set_started()       { _started = true;  }
-  void clear_started()     { _started = false; }
+  void set_started()       { assert(!_in_progress, "cycle in progress"); _started = true;  }
+  void clear_started()     { assert(_in_progress, "must be starting a cycle"); _started = false; }
  bool started()           { return _started;  }

-  void set_in_progress()   { _in_progress = true;  }
-  void clear_in_progress() { _in_progress = false; }
+  void set_in_progress()   { assert(_started, "must be starting a cycle"); _in_progress = true;  }
+  void clear_in_progress() { assert(!_started, "must not be starting a new cycle"); _in_progress = false; }
  bool in_progress()       { return _in_progress;  }

  // This flag returns true from the moment a marking cycle is
--- a/hotspot/src/share/vm/gc_implementation/g1/dirtyCardQueue.hpp
+++ b/hotspot/src/share/vm/gc_implementation/g1/dirtyCardQueue.hpp
@ -37,11 +37,10 @@ public:
 class DirtyCardQueue: public PtrQueue {
 public:
  DirtyCardQueue(PtrQueueSet* qset_, bool perm = false) :
-    PtrQueue(qset_, perm)
-  {
-    // Dirty card queues are always active.
-    _active = true;
-  }
+    // Dirty card queues are always active, so we create them with their
+    // active field set to true.
+    PtrQueue(qset_, perm, true /* active */) { }
+
  // Apply the closure to all elements, and reset the index to make the
  // buffer empty.  If a closure application returns "false", return
  // "false" immediately, halting the iteration.  If "consume" is true,
--- a/hotspot/src/share/vm/gc_implementation/g1/g1CollectedHeap.cpp
+++ b/hotspot/src/share/vm/gc_implementation/g1/g1CollectedHeap.cpp
@ -961,7 +961,8 @@ void G1CollectedHeap::do_collection(bool explicit_gc,
    }

    // Rebuild remembered sets of all regions.
-    if (ParallelGCThreads > 0) {
+
+    if (G1CollectedHeap::use_parallel_gc_threads()) {
      ParRebuildRSTask rebuild_rs_task(this);
      assert(check_heap_region_claim_values(
             HeapRegion::InitialClaimValue), "sanity check");
@ -1784,6 +1785,14 @@ void G1CollectedHeap::increment_full_collections_completed(bool outer) {

  _full_collections_completed += 1;

+  // We need to clear the "in_progress" flag in the CM thread before
+  // we wake up any waiters (especially when ExplicitInvokesConcurrent
+  // is set) so that if a waiter requests another System.gc() it doesn't
+  // incorrectly see that a marking cyle is still in progress.
+  if (outer) {
+    _cmThread->clear_in_progress();
+  }
+
  // This notify_all() will ensure that a thread that called
  // System.gc() with (with ExplicitGCInvokesConcurrent set or not)
  // and it's waiting for a full GC to finish will be woken up. It is
@ -1960,7 +1969,7 @@ G1CollectedHeap::heap_region_par_iterate_chunked(HeapRegionClosure* cl,
                                                 int worker,
                                                 jint claim_value) {
  const size_t regions = n_regions();
-  const size_t worker_num = (ParallelGCThreads > 0 ? ParallelGCThreads : 1);
+  const size_t worker_num = (G1CollectedHeap::use_parallel_gc_threads() ? ParallelGCThreads : 1);
  // try to spread out the starting points of the workers
  const size_t start_index = regions / worker_num * (size_t) worker;

@ -2527,7 +2536,7 @@ void G1CollectedHeap::print_on_extended(outputStream* st) const {
 }

 void G1CollectedHeap::print_gc_threads_on(outputStream* st) const {
-  if (ParallelGCThreads > 0) {
+  if (G1CollectedHeap::use_parallel_gc_threads()) {
    workers()->print_worker_threads_on(st);
  }

@ -2543,7 +2552,7 @@ void G1CollectedHeap::print_gc_threads_on(outputStream* st) const {
 }

 void G1CollectedHeap::gc_threads_do(ThreadClosure* tc) const {
-  if (ParallelGCThreads > 0) {
+  if (G1CollectedHeap::use_parallel_gc_threads()) {
    workers()->threads_do(tc);
  }
  tc->do_thread(_cmThread);
@ -3083,7 +3092,7 @@ void G1CollectedHeap::set_gc_alloc_region(int purpose, HeapRegion* r) {
  if (r != NULL) {
    r_used = r->used();

-    if (ParallelGCThreads > 0) {
+    if (G1CollectedHeap::use_parallel_gc_threads()) {
      // need to take the lock to guard against two threads calling
      // get_gc_alloc_region concurrently (very unlikely but...)
      MutexLockerEx x(ParGCRareEvent_lock, Mutex::_no_safepoint_check_flag);
@ -4182,6 +4191,8 @@ public:

 // *** Common G1 Evacuation Stuff

+// This method is run in a GC worker.
+
 void
 G1CollectedHeap::
 g1_process_strong_roots(bool collecting_perm_gen,
@ -4259,7 +4270,7 @@ public:
 };

 void G1CollectedHeap::save_marks() {
-  if (ParallelGCThreads == 0) {
+  if (!CollectedHeap::use_parallel_gc_threads()) {
    SaveMarksClosure sm;
    heap_region_iterate(&sm);
  }
@ -4284,7 +4295,7 @@ void G1CollectedHeap::evacuate_collection_set() {

  assert(dirty_card_queue_set().completed_buffers_num() == 0, "Should be empty");
  double start_par = os::elapsedTime();
-  if (ParallelGCThreads > 0) {
+  if (G1CollectedHeap::use_parallel_gc_threads()) {
    // The individual threads will set their evac-failure closures.
    StrongRootsScope srs(this);
    if (ParallelGCVerbose) G1ParScanThreadState::print_termination_stats_hdr();
--- a/hotspot/src/share/vm/gc_implementation/g1/g1CollectedHeap.hpp
+++ b/hotspot/src/share/vm/gc_implementation/g1/g1CollectedHeap.hpp
@ -656,6 +656,9 @@ protected:
  bool _unclean_regions_coming;

 public:
+
+  SubTasksDone* process_strong_tasks() { return _process_strong_tasks; }
+
  void set_refine_cte_cl_concurrency(bool concurrent);

  RefToScanQueue *task_queue(int i) const;
@ -684,7 +687,7 @@ public:

  void set_par_threads(int t) {
    SharedHeap::set_par_threads(t);
-    _process_strong_tasks->set_par_threads(t);
+    _process_strong_tasks->set_n_threads(t);
  }

  virtual CollectedHeap::Name kind() const {
@ -1688,8 +1691,8 @@ public:
    ref = new_ref;
  }

-  int refs_to_scan()            { return refs()->size(); }
-  int overflowed_refs_to_scan() { return refs()->overflow_stack()->length(); }
+  int refs_to_scan()            { return (int)refs()->size(); }
+  int overflowed_refs_to_scan() { return (int)refs()->overflow_stack()->size(); }

  template <class T> void update_rs(HeapRegion* from, T* p, int tid) {
    if (G1DeferredRSUpdate) {
--- a/hotspot/src/share/vm/gc_implementation/g1/g1CollectorPolicy.cpp
+++ b/hotspot/src/share/vm/gc_implementation/g1/g1CollectorPolicy.cpp
@ -72,7 +72,10 @@ static double non_young_other_cost_per_region_ms_defaults[] = {
 // </NEW PREDICTION>

 G1CollectorPolicy::G1CollectorPolicy() :
-  _parallel_gc_threads((ParallelGCThreads > 0) ? ParallelGCThreads : 1),
+  _parallel_gc_threads(G1CollectedHeap::use_parallel_gc_threads()
+    ? ParallelGCThreads : 1),
+
+
  _n_pauses(0),
  _recent_CH_strong_roots_times_ms(new TruncatedSeq(NumPrevPausesForHeuristics)),
  _recent_G1_strong_roots_times_ms(new TruncatedSeq(NumPrevPausesForHeuristics)),
@ -1073,7 +1076,7 @@ void G1CollectorPolicy::print_stats (int level,
 }

 double G1CollectorPolicy::avg_value (double* data) {
-  if (ParallelGCThreads > 0) {
+  if (G1CollectedHeap::use_parallel_gc_threads()) {
    double ret = 0.0;
    for (uint i = 0; i < ParallelGCThreads; ++i)
      ret += data[i];
@ -1084,7 +1087,7 @@ double G1CollectorPolicy::avg_value (double* data) {
 }

 double G1CollectorPolicy::max_value (double* data) {
-  if (ParallelGCThreads > 0) {
+  if (G1CollectedHeap::use_parallel_gc_threads()) {
    double ret = data[0];
    for (uint i = 1; i < ParallelGCThreads; ++i)
      if (data[i] > ret)
@ -1096,7 +1099,7 @@ double G1CollectorPolicy::max_value (double* data) {
 }

 double G1CollectorPolicy::sum_of_values (double* data) {
-  if (ParallelGCThreads > 0) {
+  if (G1CollectedHeap::use_parallel_gc_threads()) {
    double sum = 0.0;
    for (uint i = 0; i < ParallelGCThreads; i++)
      sum += data[i];
@ -1110,7 +1113,7 @@ double G1CollectorPolicy::max_sum (double* data1,
                                   double* data2) {
  double ret = data1[0] + data2[0];

-  if (ParallelGCThreads > 0) {
+  if (G1CollectedHeap::use_parallel_gc_threads()) {
    for (uint i = 1; i < ParallelGCThreads; ++i) {
      double data = data1[i] + data2[i];
      if (data > ret)
@ -1126,7 +1129,7 @@ double G1CollectorPolicy::max_sum (double* data1,
 void G1CollectorPolicy::record_collection_pause_end() {
  double end_time_sec = os::elapsedTime();
  double elapsed_ms = _last_pause_time_ms;
-  bool parallel = ParallelGCThreads > 0;
+  bool parallel = G1CollectedHeap::use_parallel_gc_threads();
  double evac_ms = (end_time_sec - _cur_G1_strong_roots_end_sec) * 1000.0;
  size_t rs_size =
    _cur_collection_pause_used_regions_at_start - collection_set_size();
@ -1941,7 +1944,7 @@ G1CollectorPolicy::recent_avg_survival_fraction_work(TruncatedSeq* surviving,
      // Further, we're now always doing parallel collection.  But I'm still
      // leaving this here as a placeholder for a more precise assertion later.
      // (DLD, 10/05.)
-      assert((true || ParallelGCThreads > 0) ||
+      assert((true || G1CollectedHeap::use_parallel_gc_threads()) ||
             _g1->evacuation_failed() ||
             recent_survival_rate <= 1.0, "Or bad frac");
      return recent_survival_rate;
@ -1961,7 +1964,7 @@ G1CollectorPolicy::last_survival_fraction_work(TruncatedSeq* surviving,
    // Further, we're now always doing parallel collection.  But I'm still
    // leaving this here as a placeholder for a more precise assertion later.
    // (DLD, 10/05.)
-    assert((true || ParallelGCThreads > 0) ||
+    assert((true || G1CollectedHeap::use_parallel_gc_threads()) ||
           last_survival_rate <= 1.0, "Or bad frac");
    return last_survival_rate;
  } else {
@ -2121,7 +2124,7 @@ void G1CollectorPolicy::check_other_times(int level,
 }

 void G1CollectorPolicy::print_summary(PauseSummary* summary) const {
-  bool parallel = ParallelGCThreads > 0;
+  bool parallel = G1CollectedHeap::use_parallel_gc_threads();
  MainBodySummary*    body_summary = summary->main_body_summary();
  if (summary->get_total_seq()->num() > 0) {
    print_summary_sd(0, "Evacuation Pauses", summary->get_total_seq());
@ -2559,7 +2562,7 @@ record_concurrent_mark_cleanup_end(size_t freed_bytes,
    gclog_or_tty->print_cr("  clear marked regions + work1: %8.3f ms.",
                  (clear_marked_end - start)*1000.0);
  }
-  if (ParallelGCThreads > 0) {
+  if (G1CollectedHeap::use_parallel_gc_threads()) {
    const size_t OverpartitionFactor = 4;
    const size_t MinWorkUnit = 8;
    const size_t WorkUnit =
--- a/hotspot/src/share/vm/gc_implementation/g1/g1MarkSweep.cpp
+++ b/hotspot/src/share/vm/gc_implementation/g1/g1MarkSweep.cpp
@ -101,22 +101,6 @@ void G1MarkSweep::allocate_stacks() {
  GenMarkSweep::_preserved_count_max = 0;
  GenMarkSweep::_preserved_marks = NULL;
  GenMarkSweep::_preserved_count = 0;
-  GenMarkSweep::_preserved_mark_stack = NULL;
-  GenMarkSweep::_preserved_oop_stack = NULL;
-
-  GenMarkSweep::_marking_stack =
-    new (ResourceObj::C_HEAP) GrowableArray<oop>(4000, true);
-  GenMarkSweep::_objarray_stack =
-    new (ResourceObj::C_HEAP) GrowableArray<ObjArrayTask>(50, true);
-
-  int size = SystemDictionary::number_of_classes() * 2;
-  GenMarkSweep::_revisit_klass_stack =
-    new (ResourceObj::C_HEAP) GrowableArray<Klass*>(size, true);
-  // (#klass/k)^2 for k ~ 10 appears a better fit, but this will have to do
-  // for now until we have a chance to work out a more optimal setting.
-  GenMarkSweep::_revisit_mdo_stack =
-    new (ResourceObj::C_HEAP) GrowableArray<DataLayout*>(size*2, true);
-
 }

 void G1MarkSweep::mark_sweep_phase1(bool& marked_for_unloading,
@ -145,7 +129,7 @@ void G1MarkSweep::mark_sweep_phase1(bool& marked_for_unloading,

  // Follow system dictionary roots and unload classes
  bool purged_class = SystemDictionary::do_unloading(&GenMarkSweep::is_alive);
-  assert(GenMarkSweep::_marking_stack->is_empty(),
+  assert(GenMarkSweep::_marking_stack.is_empty(),
         "stack should be empty by now");

  // Follow code cache roots (has to be done after system dictionary,
@ -157,19 +141,19 @@ void G1MarkSweep::mark_sweep_phase1(bool& marked_for_unloading,

  // Update subklass/sibling/implementor links of live klasses
  GenMarkSweep::follow_weak_klass_links();
-  assert(GenMarkSweep::_marking_stack->is_empty(),
+  assert(GenMarkSweep::_marking_stack.is_empty(),
         "stack should be empty by now");

  // Visit memoized MDO's and clear any unmarked weak refs
  GenMarkSweep::follow_mdo_weak_refs();
-  assert(GenMarkSweep::_marking_stack->is_empty(), "just drained");
+  assert(GenMarkSweep::_marking_stack.is_empty(), "just drained");


  // Visit symbol and interned string tables and delete unmarked oops
  SymbolTable::unlink(&GenMarkSweep::is_alive);
  StringTable::unlink(&GenMarkSweep::is_alive);

-  assert(GenMarkSweep::_marking_stack->is_empty(),
+  assert(GenMarkSweep::_marking_stack.is_empty(),
         "stack should be empty by now");
 }

--- a/hotspot/src/share/vm/gc_implementation/g1/g1RemSet.cpp
+++ b/hotspot/src/share/vm/gc_implementation/g1/g1RemSet.cpp
@ -523,7 +523,7 @@ prepare_for_oops_into_collection_set_do() {
  assert(!_traversal_in_progress, "Invariant between iterations.");
  set_traversal(true);
  if (ParallelGCThreads > 0) {
-    _seq_task->set_par_threads((int)n_workers());
+    _seq_task->set_n_threads((int)n_workers());
  }
  guarantee( _cards_scanned == NULL, "invariant" );
  _cards_scanned = NEW_C_HEAP_ARRAY(size_t, n_workers());
--- a/hotspot/src/share/vm/gc_implementation/g1/ptrQueue.hpp
+++ b/hotspot/src/share/vm/gc_implementation/g1/ptrQueue.hpp
@ -89,6 +89,10 @@ public:
    return _buf == NULL ? 0 : _sz - _index;
  }

+  bool is_empty() {
+    return _buf == NULL || _sz == _index;
+  }
+
  // Set the "active" property of the queue to "b".  An enqueue to an
  // inactive thread is a no-op.  Setting a queue to inactive resets its
  // log to the empty state.
--- a/hotspot/src/share/vm/gc_implementation/g1/satbQueue.hpp
+++ b/hotspot/src/share/vm/gc_implementation/g1/satbQueue.hpp
@ -29,7 +29,12 @@ class JavaThread;
 class ObjPtrQueue: public PtrQueue {
 public:
  ObjPtrQueue(PtrQueueSet* qset_, bool perm = false) :
-    PtrQueue(qset_, perm, qset_->is_active()) { }
+    // SATB queues are only active during marking cycles. We create
+    // them with their active field set to false. If a thread is
+    // created during a cycle and its SATB queue needs to be activated
+    // before the thread starts running, we'll need to set its active
+    // field to true. This is done in JavaThread::initialize_queues().
+    PtrQueue(qset_, perm, false /* active */) { }
  // Apply the closure to all elements, and reset the index to make the
  // buffer empty.
  void apply_closure(ObjectClosure* cl);
--- a/hotspot/src/share/vm/gc_implementation/includeDB_gc_concurrentMarkSweep
+++ b/hotspot/src/share/vm/gc_implementation/includeDB_gc_concurrentMarkSweep
@ -171,6 +171,7 @@ concurrentMarkSweepGeneration.hpp       generation.hpp
 concurrentMarkSweepGeneration.hpp       generationCounters.hpp
 concurrentMarkSweepGeneration.hpp       memoryService.hpp
 concurrentMarkSweepGeneration.hpp       mutexLocker.hpp
+concurrentMarkSweepGeneration.hpp       stack.inline.hpp
 concurrentMarkSweepGeneration.hpp       taskqueue.hpp
 concurrentMarkSweepGeneration.hpp       virtualspace.hpp
 concurrentMarkSweepGeneration.hpp       yieldingWorkgroup.hpp
--- a/hotspot/src/share/vm/gc_implementation/includeDB_gc_parallelScavenge
+++ b/hotspot/src/share/vm/gc_implementation/includeDB_gc_parallelScavenge
@ -187,9 +187,11 @@ psCompactionManager.cpp                 parMarkBitMap.hpp
 psCompactionManager.cpp                 psParallelCompact.hpp
 psCompactionManager.cpp                 psCompactionManager.hpp
 psCompactionManager.cpp                 psOldGen.hpp
+psCompactionManager.cpp                 stack.inline.hpp
 psCompactionManager.cpp                 systemDictionary.hpp

 psCompactionManager.hpp                 allocation.hpp
+psCompactionManager.hpp                 stack.hpp
 psCompactionManager.hpp                 taskqueue.hpp

 psCompactionManager.inline.hpp		psCompactionManager.hpp
@ -233,12 +235,14 @@ psMarkSweep.cpp                         referencePolicy.hpp
 psMarkSweep.cpp                         referenceProcessor.hpp
 psMarkSweep.cpp                         safepoint.hpp
 psMarkSweep.cpp                         spaceDecorator.hpp
+psMarkSweep.cpp                         stack.inline.hpp
 psMarkSweep.cpp                         symbolTable.hpp
 psMarkSweep.cpp                         systemDictionary.hpp
 psMarkSweep.cpp                         vmThread.hpp

 psMarkSweep.hpp                         markSweep.inline.hpp
 psMarkSweep.hpp                         collectorCounters.hpp
+psMarkSweep.hpp                         stack.hpp

 psMarkSweepDecorator.cpp                liveRange.hpp
 psMarkSweepDecorator.cpp                markSweep.inline.hpp
@ -280,6 +284,7 @@ psParallelCompact.cpp			psYoungGen.hpp
 psParallelCompact.cpp			referencePolicy.hpp
 psParallelCompact.cpp			referenceProcessor.hpp
 psParallelCompact.cpp			safepoint.hpp
+psParallelCompact.cpp			stack.inline.hpp
 psParallelCompact.cpp			symbolTable.hpp
 psParallelCompact.cpp			systemDictionary.hpp
 psParallelCompact.cpp			vmThread.hpp
@ -367,6 +372,7 @@ psScavenge.cpp                          referencePolicy.hpp
 psScavenge.cpp                          referenceProcessor.hpp
 psScavenge.cpp                          resourceArea.hpp
 psScavenge.cpp                          spaceDecorator.hpp
+psScavenge.cpp                          stack.inline.hpp
 psScavenge.cpp                          threadCritical.hpp
 psScavenge.cpp                          vmThread.hpp
 psScavenge.cpp                          vm_operations.hpp
@ -376,6 +382,7 @@ psScavenge.hpp                          cardTableExtension.hpp
 psScavenge.hpp                          collectorCounters.hpp
 psScavenge.hpp                          oop.hpp
 psScavenge.hpp                          psVirtualspace.hpp
+psScavenge.hpp                          stack.hpp

 psScavenge.inline.hpp                   cardTableExtension.hpp
 psScavenge.inline.hpp                   parallelScavengeHeap.hpp
--- a/hotspot/src/share/vm/gc_implementation/includeDB_gc_serial
+++ b/hotspot/src/share/vm/gc_implementation/includeDB_gc_serial
@ -93,11 +93,13 @@ markSweep.cpp                           oop.inline.hpp
 markSweep.hpp                           growableArray.hpp
 markSweep.hpp                           markOop.hpp
 markSweep.hpp                           oop.hpp
+markSweep.hpp                           stack.hpp
 markSweep.hpp                           timer.hpp
 markSweep.hpp                           universe.hpp

 markSweep.inline.hpp                    collectedHeap.hpp
 markSweep.inline.hpp                    markSweep.hpp
+markSweep.inline.hpp                    stack.inline.hpp

 mutableSpace.hpp                        immutableSpace.hpp
 mutableSpace.hpp                        memRegion.hpp
--- a/hotspot/src/share/vm/gc_implementation/parNew/parCardTableModRefBS.cpp
+++ b/hotspot/src/share/vm/gc_implementation/parNew/parCardTableModRefBS.cpp
@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2007, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2007, 2010 Oracle and/or its affiliates. All rights reserved.
 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
 *
 * This code is free software; you can redistribute it and/or modify it
@ -44,7 +44,7 @@ void CardTableModRefBS::par_non_clean_card_iterate_work(Space* sp, MemRegion mr,

    int n_strides = n_threads * StridesPerThread;
    SequentialSubTasksDone* pst = sp->par_seq_tasks();
-    pst->set_par_threads(n_threads);
+    pst->set_n_threads(n_threads);
    pst->set_n_tasks(n_strides);

    int stride = 0;
--- a/hotspot/src/share/vm/gc_implementation/parNew/parNewGeneration.cpp
+++ b/hotspot/src/share/vm/gc_implementation/parNew/parNewGeneration.cpp
@ -34,12 +34,12 @@ ParScanThreadState::ParScanThreadState(Space* to_space_,
                                       Generation* old_gen_,
                                       int thread_num_,
                                       ObjToScanQueueSet* work_queue_set_,
-                                       GrowableArray<oop>**  overflow_stack_set_,
+                                       Stack<oop>* overflow_stacks_,
                                       size_t desired_plab_sz_,
                                       ParallelTaskTerminator& term_) :
  _to_space(to_space_), _old_gen(old_gen_), _young_gen(gen_), _thread_num(thread_num_),
  _work_queue(work_queue_set_->queue(thread_num_)), _to_space_full(false),
-  _overflow_stack(overflow_stack_set_[thread_num_]),
+  _overflow_stack(overflow_stacks_ ? overflow_stacks_ + thread_num_ : NULL),
  _ageTable(false), // false ==> not the global age table, no perf data.
  _to_space_alloc_buffer(desired_plab_sz_),
  _to_space_closure(gen_, this), _old_gen_closure(gen_, this),
@ -159,10 +159,11 @@ bool ParScanThreadState::take_from_overflow_stack() {
  assert(ParGCUseLocalOverflow, "Else should not call");
  assert(young_gen()->overflow_list() == NULL, "Error");
  ObjToScanQueue* queue = work_queue();
-  GrowableArray<oop>* of_stack = overflow_stack();
-  uint num_overflow_elems = of_stack->length();
-  uint num_take_elems     = MIN2(MIN2((queue->max_elems() - queue->size())/4,
-                                      (juint)ParGCDesiredObjsFromOverflowList),
+  Stack<oop>* const of_stack = overflow_stack();
+  const size_t num_overflow_elems = of_stack->size();
+  const size_t space_available = queue->max_elems() - queue->size();
+  const size_t num_take_elems = MIN3(space_available / 4,
+                                     ParGCDesiredObjsFromOverflowList,
                                     num_overflow_elems);
  // Transfer the most recent num_take_elems from the overflow
  // stack to our work queue.
@ -271,7 +272,7 @@ public:
                        ParNewGeneration&       gen,
                        Generation&             old_gen,
                        ObjToScanQueueSet&      queue_set,
-                        GrowableArray<oop>**    overflow_stacks_,
+                        Stack<oop>*             overflow_stacks_,
                        size_t                  desired_plab_sz,
                        ParallelTaskTerminator& term);

@ -302,17 +303,19 @@ private:
 ParScanThreadStateSet::ParScanThreadStateSet(
  int num_threads, Space& to_space, ParNewGeneration& gen,
  Generation& old_gen, ObjToScanQueueSet& queue_set,
-  GrowableArray<oop>** overflow_stack_set_,
+  Stack<oop>* overflow_stacks,
  size_t desired_plab_sz, ParallelTaskTerminator& term)
  : ResourceArray(sizeof(ParScanThreadState), num_threads),
    _gen(gen), _next_gen(old_gen), _term(term)
 {
  assert(num_threads > 0, "sanity check!");
+  assert(ParGCUseLocalOverflow == (overflow_stacks != NULL),
+         "overflow_stack allocation mismatch");
  // Initialize states.
  for (int i = 0; i < num_threads; ++i) {
    new ((ParScanThreadState*)_data + i)
        ParScanThreadState(&to_space, &gen, &old_gen, i, &queue_set,
-                           overflow_stack_set_, desired_plab_sz, term);
+                           overflow_stacks, desired_plab_sz, term);
  }
 }

@ -596,14 +599,11 @@ ParNewGeneration(ReservedSpace rs, size_t initial_byte_size, int level)
  for (uint i2 = 0; i2 < ParallelGCThreads; i2++)
    _task_queues->queue(i2)->initialize();

-  _overflow_stacks = NEW_C_HEAP_ARRAY(GrowableArray<oop>*, ParallelGCThreads);
-  guarantee(_overflow_stacks != NULL, "Overflow stack set allocation failure");
-  for (uint i = 0; i < ParallelGCThreads; i++) {
+  _overflow_stacks = NULL;
  if (ParGCUseLocalOverflow) {
-      _overflow_stacks[i] = new (ResourceObj::C_HEAP) GrowableArray<oop>(512, true);
-      guarantee(_overflow_stacks[i] != NULL, "Overflow Stack allocation failure.");
-    } else {
-      _overflow_stacks[i] = NULL;
+    _overflow_stacks = NEW_C_HEAP_ARRAY(Stack<oop>, ParallelGCThreads);
+    for (size_t i = 0; i < ParallelGCThreads; ++i) {
+      new (_overflow_stacks + i) Stack<oop>();
    }
  }

@ -937,12 +937,9 @@ void ParNewGeneration::collect(bool   full,
  } else {
    assert(HandlePromotionFailure,
      "Should only be here if promotion failure handling is on");
-    if (_promo_failure_scan_stack != NULL) {
-      // Can be non-null because of reference processing.
-      // Free stack with its elements.
-      delete _promo_failure_scan_stack;
-      _promo_failure_scan_stack = NULL;
-    }
+    assert(_promo_failure_scan_stack.is_empty(), "post condition");
+    _promo_failure_scan_stack.clear(true); // Clear cached segments.
+
    remove_forwarding_pointers();
    if (PrintGCDetails) {
      gclog_or_tty->print(" (promotion failed)");
@ -1397,8 +1394,8 @@ bool ParNewGeneration::take_from_overflow_list_work(ParScanThreadState* par_scan
  size_t objsFromOverflow = MIN2((size_t)(work_q->max_elems() - work_q->size())/4,
                                 (size_t)ParGCDesiredObjsFromOverflowList);

-  assert(par_scan_state->overflow_stack() == NULL, "Error");
  assert(!UseCompressedOops, "Error");
+  assert(par_scan_state->overflow_stack() == NULL, "Error");
  if (_overflow_list == NULL) return false;

  // Otherwise, there was something there; try claiming the list.
@ -1533,3 +1530,7 @@ void ParNewGeneration::ref_processor_init()
 const char* ParNewGeneration::name() const {
  return "par new generation";
 }
+
+bool ParNewGeneration::in_use() {
+  return UseParNewGC && ParallelGCThreads > 0;
+}
--- a/hotspot/src/share/vm/gc_implementation/parNew/parNewGeneration.hpp
+++ b/hotspot/src/share/vm/gc_implementation/parNew/parNewGeneration.hpp
@ -52,7 +52,7 @@ class ParScanThreadState {
  friend class ParScanThreadStateSet;
 private:
  ObjToScanQueue *_work_queue;
-  GrowableArray<oop>* _overflow_stack;
+  Stack<oop>* const _overflow_stack;

  ParGCAllocBuffer _to_space_alloc_buffer;

@ -120,7 +120,7 @@ class ParScanThreadState {
  ParScanThreadState(Space* to_space_, ParNewGeneration* gen_,
                     Generation* old_gen_, int thread_num_,
                     ObjToScanQueueSet* work_queue_set_,
-                     GrowableArray<oop>** overflow_stack_set_,
+                     Stack<oop>* overflow_stacks_,
                     size_t desired_plab_sz_,
                     ParallelTaskTerminator& term_);

@ -144,7 +144,7 @@ class ParScanThreadState {
  void trim_queues(int max_size);

  // Private overflow stack usage
-  GrowableArray<oop>* overflow_stack() { return _overflow_stack; }
+  Stack<oop>* overflow_stack() { return _overflow_stack; }
  bool take_from_overflow_stack();
  void push_on_overflow_stack(oop p);

@ -301,7 +301,7 @@ class ParNewGeneration: public DefNewGeneration {
  ObjToScanQueueSet* _task_queues;

  // Per-worker-thread local overflow stacks
-  GrowableArray<oop>** _overflow_stacks;
+  Stack<oop>* _overflow_stacks;

  // Desired size of survivor space plab's
  PLABStats _plab_stats;
@ -350,6 +350,8 @@ class ParNewGeneration: public DefNewGeneration {
    delete _task_queues;
  }

+  static bool in_use();
+
  virtual void ref_processor_init();
  virtual Generation::Name kind()        { return Generation::ParNew; }
  virtual const char* name() const;
--- a/hotspot/src/share/vm/gc_implementation/parallelScavenge/pcTasks.cpp
+++ b/hotspot/src/share/vm/gc_implementation/parallelScavenge/pcTasks.cpp
@ -59,8 +59,6 @@ void MarkFromRootsTask::do_it(GCTaskManager* manager, uint which) {
    PrintGCDetails && TraceParallelOldGCTasks, true, gclog_or_tty));
  ParCompactionManager* cm =
    ParCompactionManager::gc_thread_compaction_manager(which);
-  assert(cm->stacks_have_been_allocated(),
-         "Stack space has not been allocated");
  PSParallelCompact::MarkAndPushClosure mark_and_push_closure(cm);

  switch (_root_type) {
@ -119,7 +117,6 @@ void MarkFromRootsTask::do_it(GCTaskManager* manager, uint which) {

  // Do the real work
  cm->follow_marking_stacks();
-  // cm->deallocate_stacks();
 }


@ -135,8 +132,6 @@ void RefProcTaskProxy::do_it(GCTaskManager* manager, uint which)
    PrintGCDetails && TraceParallelOldGCTasks, true, gclog_or_tty));
  ParCompactionManager* cm =
    ParCompactionManager::gc_thread_compaction_manager(which);
-  assert(cm->stacks_have_been_allocated(),
-         "Stack space has not been allocated");
  PSParallelCompact::MarkAndPushClosure mark_and_push_closure(cm);
  PSParallelCompact::FollowStackClosure follow_stack_closure(cm);
  _rp_task.work(_work_id, *PSParallelCompact::is_alive_closure(),
--- a/hotspot/src/share/vm/gc_implementation/parallelScavenge/pcTasks.hpp
+++ b/hotspot/src/share/vm/gc_implementation/parallelScavenge/pcTasks.hpp
@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2005, 2008, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
 *
 * This code is free software; you can redistribute it and/or modify it
@ -242,7 +242,11 @@ class UpdateDensePrefixTask : public GCTask {
 //

 class DrainStacksCompactionTask : public GCTask {
+ uint _stack_index;
+ uint stack_index() { return _stack_index; }
 public:
+  DrainStacksCompactionTask(uint stack_index) : GCTask(),
+                                                _stack_index(stack_index) {};
  char* name() { return (char *)"drain-region-task"; }
  virtual void do_it(GCTaskManager* manager, uint which);
 };
--- a/hotspot/src/share/vm/gc_implementation/parallelScavenge/psCompactionManager.cpp
+++ b/hotspot/src/share/vm/gc_implementation/parallelScavenge/psCompactionManager.cpp
@ -46,23 +46,6 @@ ParCompactionManager::ParCompactionManager() :
  marking_stack()->initialize();
  _objarray_stack.initialize();
  region_stack()->initialize();
-
-  // Note that _revisit_klass_stack is allocated out of the
-  // C heap (as opposed to out of ResourceArena).
-  int size =
-    (SystemDictionary::number_of_classes() * 2) * 2 / ParallelGCThreads;
-  _revisit_klass_stack = new (ResourceObj::C_HEAP) GrowableArray<Klass*>(size, true);
-  // From some experiments (#klass/k)^2 for k = 10 seems a better fit, but this will
-  // have to do for now until we are able to investigate a more optimal setting.
-  _revisit_mdo_stack = new (ResourceObj::C_HEAP) GrowableArray<DataLayout*>(size*2, true);
-}
-
-ParCompactionManager::~ParCompactionManager() {
-  delete _revisit_klass_stack;
-  delete _revisit_mdo_stack;
-  // _manager_array and _stack_array are statics
-  // shared with all instances of ParCompactionManager
-  // should not be deallocated.
 }

 void ParCompactionManager::initialize(ParMarkBitMap* mbm) {
@ -134,9 +117,9 @@ ParCompactionManager::gc_thread_compaction_manager(int index) {
 }

 void ParCompactionManager::reset() {
-  for(uint i=0; i<ParallelGCThreads+1; i++) {
-    manager_array(i)->revisit_klass_stack()->clear();
-    manager_array(i)->revisit_mdo_stack()->clear();
+  for(uint i = 0; i < ParallelGCThreads + 1; i++) {
+    assert(manager_array(i)->revisit_klass_stack()->is_empty(), "sanity");
+    assert(manager_array(i)->revisit_mdo_stack()->is_empty(), "sanity");
  }
 }

@ -178,10 +161,3 @@ void ParCompactionManager::drain_region_stacks() {
    }
  } while (!region_stack()->is_empty());
 }
-
-#ifdef ASSERT
-bool ParCompactionManager::stacks_have_been_allocated() {
-  return (revisit_klass_stack()->data_addr() != NULL &&
-          revisit_mdo_stack()->data_addr() != NULL);
-}
-#endif
--- a/hotspot/src/share/vm/gc_implementation/parallelScavenge/psCompactionManager.hpp
+++ b/hotspot/src/share/vm/gc_implementation/parallelScavenge/psCompactionManager.hpp
@ -80,10 +80,9 @@ private:
  // type of TaskQueue.
  RegionTaskQueue               _region_stack;

-#if 1  // does this happen enough to need a per thread stack?
-  GrowableArray<Klass*>*        _revisit_klass_stack;
-  GrowableArray<DataLayout*>*   _revisit_mdo_stack;
-#endif
+  Stack<Klass*>                 _revisit_klass_stack;
+  Stack<DataLayout*>            _revisit_mdo_stack;
+
  static ParMarkBitMap* _mark_bitmap;

  Action _action;
@ -113,10 +112,7 @@ private:
  inline static ParCompactionManager* manager_array(int index);

  ParCompactionManager();
-  ~ParCompactionManager();

-  void allocate_stacks();
-  void deallocate_stacks();
  ParMarkBitMap* mark_bitmap() { return _mark_bitmap; }

  // Take actions in preparation for a compaction.
@ -129,11 +125,8 @@ private:
  bool should_verify_only();
  bool should_reset_only();

-#if 1
-  // Probably stays as a growable array
-  GrowableArray<Klass*>* revisit_klass_stack() { return _revisit_klass_stack; }
-  GrowableArray<DataLayout*>* revisit_mdo_stack() { return _revisit_mdo_stack; }
-#endif
+  Stack<Klass*>* revisit_klass_stack() { return &_revisit_klass_stack; }
+  Stack<DataLayout*>* revisit_mdo_stack() { return &_revisit_mdo_stack; }

  // Save for later processing.  Must not fail.
  inline void push(oop obj) { _marking_stack.push(obj); }
@ -162,10 +155,6 @@ private:
  // Process tasks remaining on any stack
  void drain_region_stacks();

-  // Debugging support
-#ifdef ASSERT
-  bool stacks_have_been_allocated();
-#endif
 };

 inline ParCompactionManager* ParCompactionManager::manager_array(int index) {
--- a/hotspot/src/share/vm/gc_implementation/parallelScavenge/psMarkSweep.cpp
+++ b/hotspot/src/share/vm/gc_implementation/parallelScavenge/psMarkSweep.cpp
@ -466,33 +466,16 @@ void PSMarkSweep::allocate_stacks() {
  _preserved_count_max  = pointer_delta(to_space->end(), to_space->top(), sizeof(jbyte));
  // Now divide by the size of a PreservedMark
  _preserved_count_max /= sizeof(PreservedMark);
-
-  _preserved_mark_stack = NULL;
-  _preserved_oop_stack = NULL;
-
-  _marking_stack = new (ResourceObj::C_HEAP) GrowableArray<oop>(4000, true);
-  _objarray_stack = new (ResourceObj::C_HEAP) GrowableArray<ObjArrayTask>(50, true);
-
-  int size = SystemDictionary::number_of_classes() * 2;
-  _revisit_klass_stack = new (ResourceObj::C_HEAP) GrowableArray<Klass*>(size, true);
-  // (#klass/k)^2, for k ~ 10 appears a better setting, but this will have to do for
-  // now until we investigate a more optimal setting.
-  _revisit_mdo_stack   = new (ResourceObj::C_HEAP) GrowableArray<DataLayout*>(size*2, true);
 }


 void PSMarkSweep::deallocate_stacks() {
-  if (_preserved_oop_stack) {
-    delete _preserved_mark_stack;
-    _preserved_mark_stack = NULL;
-    delete _preserved_oop_stack;
-    _preserved_oop_stack = NULL;
-  }
-
-  delete _marking_stack;
-  delete _objarray_stack;
-  delete _revisit_klass_stack;
-  delete _revisit_mdo_stack;
+  _preserved_mark_stack.clear(true);
+  _preserved_oop_stack.clear(true);
+  _marking_stack.clear();
+  _objarray_stack.clear(true);
+  _revisit_klass_stack.clear(true);
+  _revisit_mdo_stack.clear(true);
 }

 void PSMarkSweep::mark_sweep_phase1(bool clear_all_softrefs) {
@ -542,17 +525,17 @@ void PSMarkSweep::mark_sweep_phase1(bool clear_all_softrefs) {

  // Update subklass/sibling/implementor links of live klasses
  follow_weak_klass_links();
-  assert(_marking_stack->is_empty(), "just drained");
+  assert(_marking_stack.is_empty(), "just drained");

  // Visit memoized mdo's and clear unmarked weak refs
  follow_mdo_weak_refs();
-  assert(_marking_stack->is_empty(), "just drained");
+  assert(_marking_stack.is_empty(), "just drained");

  // Visit symbol and interned string tables and delete unmarked oops
  SymbolTable::unlink(is_alive_closure());
  StringTable::unlink(is_alive_closure());

-  assert(_marking_stack->is_empty(), "stack should be empty by now");
+  assert(_marking_stack.is_empty(), "stack should be empty by now");
 }


--- a/hotspot/src/share/vm/gc_implementation/parallelScavenge/psParallelCompact.cpp
+++ b/hotspot/src/share/vm/gc_implementation/parallelScavenge/psParallelCompact.cpp
@ -2170,6 +2170,16 @@ void PSParallelCompact::invoke_no_policy(bool maximum_heap_compaction) {
    heap->update_counters();
  }

+#ifdef ASSERT
+  for (size_t i = 0; i < ParallelGCThreads + 1; ++i) {
+    ParCompactionManager* const cm =
+      ParCompactionManager::manager_array(int(i));
+    assert(cm->marking_stack()->is_empty(),       "should be empty");
+    assert(cm->region_stack()->is_empty(),        "should be empty");
+    assert(cm->revisit_klass_stack()->is_empty(), "should be empty");
+  }
+#endif // ASSERT
+
  if (VerifyAfterGC && heap->total_collections() >= VerifyGCStartAt) {
    HandleMark hm;  // Discard invalid handles created during verification
    gclog_or_tty->print(" VerifyAfterGC:");
@ -2449,7 +2459,7 @@ void PSParallelCompact::enqueue_region_draining_tasks(GCTaskQueue* q,

  const unsigned int task_count = MAX2(parallel_gc_threads, 1U);
  for (unsigned int j = 0; j < task_count; j++) {
-    q->enqueue(new DrainStacksCompactionTask());
+    q->enqueue(new DrainStacksCompactionTask(j));
  }

  // Find all regions that are available (can be filled immediately) and
@ -2711,21 +2721,22 @@ PSParallelCompact::follow_weak_klass_links() {
  // All klasses on the revisit stack are marked at this point.
  // Update and follow all subklass, sibling and implementor links.
  if (PrintRevisitStats) {
-    gclog_or_tty->print_cr("#classes in system dictionary = %d", SystemDictionary::number_of_classes());
+    gclog_or_tty->print_cr("#classes in system dictionary = %d",
+                           SystemDictionary::number_of_classes());
  }
  for (uint i = 0; i < ParallelGCThreads + 1; i++) {
    ParCompactionManager* cm = ParCompactionManager::manager_array(i);
    KeepAliveClosure keep_alive_closure(cm);
-    int length = cm->revisit_klass_stack()->length();
+    Stack<Klass*>* const rks = cm->revisit_klass_stack();
    if (PrintRevisitStats) {
-      gclog_or_tty->print_cr("Revisit klass stack[%d] length = %d", i, length);
+      gclog_or_tty->print_cr("Revisit klass stack[%u] length = " SIZE_FORMAT,
+                             i, rks->size());
    }
-    for (int j = 0; j < length; j++) {
-      cm->revisit_klass_stack()->at(j)->follow_weak_klass_links(
-        is_alive_closure(),
-        &keep_alive_closure);
+    while (!rks->is_empty()) {
+      Klass* const k = rks->pop();
+      k->follow_weak_klass_links(is_alive_closure(), &keep_alive_closure);
    }
-    // revisit_klass_stack is cleared in reset()
+
    cm->follow_marking_stacks();
  }
 }
@ -2744,19 +2755,20 @@ void PSParallelCompact::follow_mdo_weak_refs() {
  // we can visit and clear any weak references from MDO's which
  // we memoized during the strong marking phase.
  if (PrintRevisitStats) {
-    gclog_or_tty->print_cr("#classes in system dictionary = %d", SystemDictionary::number_of_classes());
+    gclog_or_tty->print_cr("#classes in system dictionary = %d",
+                           SystemDictionary::number_of_classes());
  }
  for (uint i = 0; i < ParallelGCThreads + 1; i++) {
    ParCompactionManager* cm = ParCompactionManager::manager_array(i);
-    GrowableArray<DataLayout*>* rms = cm->revisit_mdo_stack();
-    int length = rms->length();
+    Stack<DataLayout*>* rms = cm->revisit_mdo_stack();
    if (PrintRevisitStats) {
-      gclog_or_tty->print_cr("Revisit MDO stack[%d] length = %d", i, length);
+      gclog_or_tty->print_cr("Revisit MDO stack[%u] size = " SIZE_FORMAT,
+                             i, rms->size());
    }
-    for (int j = 0; j < length; j++) {
-      rms->at(j)->follow_weak_refs(is_alive_closure());
+    while (!rms->is_empty()) {
+      rms->pop()->follow_weak_refs(is_alive_closure());
    }
-    // revisit_mdo_stack is cleared in reset()
+
    cm->follow_marking_stacks();
  }
 }
--- a/hotspot/src/share/vm/gc_implementation/parallelScavenge/psPromotionManager.cpp
+++ b/hotspot/src/share/vm/gc_implementation/parallelScavenge/psPromotionManager.cpp
@ -185,7 +185,6 @@ void PSPromotionManager::reset() {


 void PSPromotionManager::drain_stacks_depth(bool totally_drain) {
-  assert(claimed_stack_depth()->overflow_stack() != NULL, "invariant");
  totally_drain = totally_drain || _totally_drain;

 #ifdef ASSERT
--- a/hotspot/src/share/vm/gc_implementation/parallelScavenge/psScavenge.cpp
+++ b/hotspot/src/share/vm/gc_implementation/parallelScavenge/psScavenge.cpp
@ -34,9 +34,10 @@ bool                       PSScavenge::_survivor_overflow = false;
 int                        PSScavenge::_tenuring_threshold = 0;
 HeapWord*                  PSScavenge::_young_generation_boundary = NULL;
 elapsedTimer               PSScavenge::_accumulated_time;
-GrowableArray<markOop>*    PSScavenge::_preserved_mark_stack = NULL;
-GrowableArray<oop>*        PSScavenge::_preserved_oop_stack = NULL;
+Stack<markOop>             PSScavenge::_preserved_mark_stack;
+Stack<oop>                 PSScavenge::_preserved_oop_stack;
 CollectorCounters*         PSScavenge::_counters = NULL;
+bool                       PSScavenge::_promotion_failed = false;

 // Define before use
 class PSIsAliveClosure: public BoolObjectClosure {
@ -223,6 +224,9 @@ bool PSScavenge::invoke_no_policy() {
  assert(SafepointSynchronize::is_at_safepoint(), "should be at safepoint");
  assert(Thread::current() == (Thread*)VMThread::vm_thread(), "should be in vm thread");

+  assert(_preserved_mark_stack.is_empty(), "should be empty");
+  assert(_preserved_oop_stack.is_empty(), "should be empty");
+
  TimeStamp scavenge_entry;
  TimeStamp scavenge_midpoint;
  TimeStamp scavenge_exit;
@ -636,24 +640,20 @@ void PSScavenge::clean_up_failed_promotion() {
    young_gen->object_iterate(&unforward_closure);

    if (PrintGC && Verbose) {
-      gclog_or_tty->print_cr("Restoring %d marks",
-                              _preserved_oop_stack->length());
+      gclog_or_tty->print_cr("Restoring %d marks", _preserved_oop_stack.size());
    }

    // Restore any saved marks.
-    for (int i=0; i < _preserved_oop_stack->length(); i++) {
-      oop obj       = _preserved_oop_stack->at(i);
-      markOop mark  = _preserved_mark_stack->at(i);
+    while (!_preserved_oop_stack.is_empty()) {
+      oop obj      = _preserved_oop_stack.pop();
+      markOop mark = _preserved_mark_stack.pop();
      obj->set_mark(mark);
    }

-    // Deallocate the preserved mark and oop stacks.
-    // The stacks were allocated as CHeap objects, so
-    // we must call delete to prevent mem leaks.
-    delete _preserved_mark_stack;
-    _preserved_mark_stack = NULL;
-    delete _preserved_oop_stack;
-    _preserved_oop_stack = NULL;
+    // Clear the preserved mark and oop stack caches.
+    _preserved_mark_stack.clear(true);
+    _preserved_oop_stack.clear(true);
+    _promotion_failed = false;
  }

  // Reset the PromotionFailureALot counters.
@ -661,27 +661,16 @@ void PSScavenge::clean_up_failed_promotion() {
 }

 // This method is called whenever an attempt to promote an object
-// fails. Some markOops will need preserving, some will not. Note
+// fails. Some markOops will need preservation, some will not. Note
 // that the entire eden is traversed after a failed promotion, with
 // all forwarded headers replaced by the default markOop. This means
 // it is not neccessary to preserve most markOops.
 void PSScavenge::oop_promotion_failed(oop obj, markOop obj_mark) {
-  if (_preserved_mark_stack == NULL) {
-    ThreadCritical tc; // Lock and retest
-    if (_preserved_mark_stack == NULL) {
-      assert(_preserved_oop_stack == NULL, "Sanity");
-      _preserved_mark_stack = new (ResourceObj::C_HEAP) GrowableArray<markOop>(40, true);
-      _preserved_oop_stack = new (ResourceObj::C_HEAP) GrowableArray<oop>(40, true);
-    }
-  }
-
-  // Because we must hold the ThreadCritical lock before using
-  // the stacks, we should be safe from observing partial allocations,
-  // which are also guarded by the ThreadCritical lock.
+  _promotion_failed = true;
  if (obj_mark->must_be_preserved_for_promotion_failure(obj)) {
    ThreadCritical tc;
-    _preserved_oop_stack->push(obj);
-    _preserved_mark_stack->push(obj_mark);
+    _preserved_oop_stack.push(obj);
+    _preserved_mark_stack.push(obj_mark);
  }
 }

--- a/hotspot/src/share/vm/gc_implementation/parallelScavenge/psScavenge.hpp
+++ b/hotspot/src/share/vm/gc_implementation/parallelScavenge/psScavenge.hpp
@ -61,9 +61,10 @@ class PSScavenge: AllStatic {
  static HeapWord*           _young_generation_boundary; // The lowest address possible for the young_gen.
                                                         // This is used to decide if an oop should be scavenged,
                                                         // cards should be marked, etc.
-  static GrowableArray<markOop>* _preserved_mark_stack; // List of marks to be restored after failed promotion
-  static GrowableArray<oop>*     _preserved_oop_stack;  // List of oops that need their mark restored.
+  static Stack<markOop>          _preserved_mark_stack; // List of marks to be restored after failed promotion
+  static Stack<oop>              _preserved_oop_stack;  // List of oops that need their mark restored.
  static CollectorCounters*      _counters;         // collector performance counters
+  static bool                    _promotion_failed;

  static void clean_up_failed_promotion();

@ -79,8 +80,7 @@ class PSScavenge: AllStatic {
  // Accessors
  static int              tenuring_threshold()  { return _tenuring_threshold; }
  static elapsedTimer*    accumulated_time()    { return &_accumulated_time; }
-  static bool             promotion_failed()
-    { return _preserved_mark_stack != NULL; }
+  static bool             promotion_failed()    { return _promotion_failed; }
  static int              consecutive_skipped_scavenges()
    { return _consecutive_skipped_scavenges; }

--- a/hotspot/src/share/vm/gc_implementation/shared/concurrentGCThread.cpp
+++ b/hotspot/src/share/vm/gc_implementation/shared/concurrentGCThread.cpp
@ -185,7 +185,7 @@ SurrogateLockerThread* SurrogateLockerThread::make(TRAPS) {
  instanceKlassHandle klass (THREAD, k);
  instanceHandle thread_oop = klass->allocate_instance_handle(CHECK_NULL);

-  const char thread_name[] = "Surrogate Locker Thread (CMS)";
+  const char thread_name[] = "Surrogate Locker Thread (Concurrent GC)";
  Handle string = java_lang_String::create_from_str(thread_name, CHECK_NULL);

  // Initialize thread_oop to put it into the system threadGroup
--- a/hotspot/src/share/vm/gc_implementation/shared/markSweep.cpp
+++ b/hotspot/src/share/vm/gc_implementation/shared/markSweep.cpp
@ -25,13 +25,13 @@
 #include "incls/_precompiled.incl"
 #include "incls/_markSweep.cpp.incl"

-GrowableArray<oop>*          MarkSweep::_marking_stack = NULL;
-GrowableArray<ObjArrayTask>* MarkSweep::_objarray_stack = NULL;
-GrowableArray<Klass*>*       MarkSweep::_revisit_klass_stack = NULL;
-GrowableArray<DataLayout*>*  MarkSweep::_revisit_mdo_stack = NULL;
+Stack<oop>              MarkSweep::_marking_stack;
+Stack<DataLayout*>      MarkSweep::_revisit_mdo_stack;
+Stack<Klass*>           MarkSweep::_revisit_klass_stack;
+Stack<ObjArrayTask>     MarkSweep::_objarray_stack;

-GrowableArray<oop>*     MarkSweep::_preserved_oop_stack = NULL;
-GrowableArray<markOop>* MarkSweep::_preserved_mark_stack= NULL;
+Stack<oop>              MarkSweep::_preserved_oop_stack;
+Stack<markOop>          MarkSweep::_preserved_mark_stack;
 size_t                  MarkSweep::_preserved_count = 0;
 size_t                  MarkSweep::_preserved_count_max = 0;
 PreservedMark*          MarkSweep::_preserved_marks = NULL;
@ -58,37 +58,42 @@ GrowableArray<size_t>   * MarkSweep::_last_gc_live_oops_size = NULL;
 #endif

 void MarkSweep::revisit_weak_klass_link(Klass* k) {
-  _revisit_klass_stack->push(k);
+  _revisit_klass_stack.push(k);
 }

 void MarkSweep::follow_weak_klass_links() {
  // All klasses on the revisit stack are marked at this point.
  // Update and follow all subklass, sibling and implementor links.
  if (PrintRevisitStats) {
-    gclog_or_tty->print_cr("#classes in system dictionary = %d", SystemDictionary::number_of_classes());
-    gclog_or_tty->print_cr("Revisit klass stack length = %d", _revisit_klass_stack->length());
+    gclog_or_tty->print_cr("#classes in system dictionary = %d",
+                           SystemDictionary::number_of_classes());
+    gclog_or_tty->print_cr("Revisit klass stack size = " SIZE_FORMAT,
+                           _revisit_klass_stack.size());
  }
-  for (int i = 0; i < _revisit_klass_stack->length(); i++) {
-    _revisit_klass_stack->at(i)->follow_weak_klass_links(&is_alive,&keep_alive);
+  while (!_revisit_klass_stack.is_empty()) {
+    Klass* const k = _revisit_klass_stack.pop();
+    k->follow_weak_klass_links(&is_alive, &keep_alive);
  }
  follow_stack();
 }

 void MarkSweep::revisit_mdo(DataLayout* p) {
-  _revisit_mdo_stack->push(p);
+  _revisit_mdo_stack.push(p);
 }

 void MarkSweep::follow_mdo_weak_refs() {
  // All strongly reachable oops have been marked at this point;
  // we can visit and clear any weak references from MDO's which
  // we memoized during the strong marking phase.
-  assert(_marking_stack->is_empty(), "Marking stack should be empty");
+  assert(_marking_stack.is_empty(), "Marking stack should be empty");
  if (PrintRevisitStats) {
-    gclog_or_tty->print_cr("#classes in system dictionary = %d", SystemDictionary::number_of_classes());
-    gclog_or_tty->print_cr("Revisit MDO stack length = %d", _revisit_mdo_stack->length());
+    gclog_or_tty->print_cr("#classes in system dictionary = %d",
+                           SystemDictionary::number_of_classes());
+    gclog_or_tty->print_cr("Revisit MDO stack size = " SIZE_FORMAT,
+                           _revisit_mdo_stack.size());
  }
-  for (int i = 0; i < _revisit_mdo_stack->length(); i++) {
-    _revisit_mdo_stack->at(i)->follow_weak_refs(&is_alive);
+  while (!_revisit_mdo_stack.is_empty()) {
+    _revisit_mdo_stack.pop()->follow_weak_refs(&is_alive);
  }
  follow_stack();
 }
@ -106,41 +111,37 @@ void MarkSweep::MarkAndPushClosure::do_oop(narrowOop* p) { mark_and_push(p); }

 void MarkSweep::follow_stack() {
  do {
-    while (!_marking_stack->is_empty()) {
-      oop obj = _marking_stack->pop();
+    while (!_marking_stack.is_empty()) {
+      oop obj = _marking_stack.pop();
      assert (obj->is_gc_marked(), "p must be marked");
      obj->follow_contents();
    }
    // Process ObjArrays one at a time to avoid marking stack bloat.
-    if (!_objarray_stack->is_empty()) {
-      ObjArrayTask task = _objarray_stack->pop();
+    if (!_objarray_stack.is_empty()) {
+      ObjArrayTask task = _objarray_stack.pop();
      objArrayKlass* const k = (objArrayKlass*)task.obj()->blueprint();
      k->oop_follow_contents(task.obj(), task.index());
    }
-  } while (!_marking_stack->is_empty() || !_objarray_stack->is_empty());
+  } while (!_marking_stack.is_empty() || !_objarray_stack.is_empty());
 }

 MarkSweep::FollowStackClosure MarkSweep::follow_stack_closure;

 void MarkSweep::FollowStackClosure::do_void() { follow_stack(); }

-// We preserve the mark which should be replaced at the end and the location that it
-// will go.  Note that the object that this markOop belongs to isn't currently at that
-// address but it will be after phase4
+// We preserve the mark which should be replaced at the end and the location
+// that it will go.  Note that the object that this markOop belongs to isn't
+// currently at that address but it will be after phase4
 void MarkSweep::preserve_mark(oop obj, markOop mark) {
-  // we try to store preserved marks in the to space of the new generation since this
-  // is storage which should be available.  Most of the time this should be sufficient
-  // space for the marks we need to preserve but if it isn't we fall back in using
-  // GrowableArrays to keep track of the overflow.
+  // We try to store preserved marks in the to space of the new generation since
+  // this is storage which should be available.  Most of the time this should be
+  // sufficient space for the marks we need to preserve but if it isn't we fall
+  // back to using Stacks to keep track of the overflow.
  if (_preserved_count < _preserved_count_max) {
    _preserved_marks[_preserved_count++].init(obj, mark);
  } else {
-    if (_preserved_mark_stack == NULL) {
-      _preserved_mark_stack = new (ResourceObj::C_HEAP) GrowableArray<markOop>(40, true);
-      _preserved_oop_stack = new (ResourceObj::C_HEAP) GrowableArray<oop>(40, true);
-    }
-    _preserved_mark_stack->push(mark);
-    _preserved_oop_stack->push(obj);
+    _preserved_mark_stack.push(mark);
+    _preserved_oop_stack.push(obj);
  }
 }

@ -151,8 +152,7 @@ void MarkSweep::AdjustPointerClosure::do_oop(oop* p)       { adjust_pointer(p, _
 void MarkSweep::AdjustPointerClosure::do_oop(narrowOop* p) { adjust_pointer(p, _is_root); }

 void MarkSweep::adjust_marks() {
-  assert(_preserved_oop_stack == NULL ||
-         _preserved_oop_stack->length() == _preserved_mark_stack->length(),
+  assert( _preserved_oop_stack.size() == _preserved_mark_stack.size(),
         "inconsistent preserved oop stacks");

  // adjust the oops we saved earlier
@ -161,21 +161,19 @@ void MarkSweep::adjust_marks() {
  }

  // deal with the overflow stack
-  if (_preserved_oop_stack) {
-    for (int i = 0; i < _preserved_oop_stack->length(); i++) {
-      oop* p = _preserved_oop_stack->adr_at(i);
+  StackIterator<oop> iter(_preserved_oop_stack);
+  while (!iter.is_empty()) {
+    oop* p = iter.next_addr();
    adjust_pointer(p);
  }
-  }
 }

 void MarkSweep::restore_marks() {
-  assert(_preserved_oop_stack == NULL ||
-         _preserved_oop_stack->length() == _preserved_mark_stack->length(),
+  assert(_preserved_oop_stack.size() == _preserved_mark_stack.size(),
         "inconsistent preserved oop stacks");
  if (PrintGC && Verbose) {
-    gclog_or_tty->print_cr("Restoring %d marks", _preserved_count +
-                  (_preserved_oop_stack ? _preserved_oop_stack->length() : 0));
+    gclog_or_tty->print_cr("Restoring %d marks",
+                           _preserved_count + _preserved_oop_stack.size());
  }

  // restore the marks we saved earlier
@ -184,13 +182,11 @@ void MarkSweep::restore_marks() {
  }

  // deal with the overflow
-  if (_preserved_oop_stack) {
-    for (int i = 0; i < _preserved_oop_stack->length(); i++) {
-      oop obj       = _preserved_oop_stack->at(i);
-      markOop mark  = _preserved_mark_stack->at(i);
+  while (!_preserved_oop_stack.is_empty()) {
+    oop obj       = _preserved_oop_stack.pop();
+    markOop mark  = _preserved_mark_stack.pop();
    obj->set_mark(mark);
  }
-  }
 }

 #ifdef VALIDATE_MARK_SWEEP
--- a/hotspot/src/share/vm/gc_implementation/shared/markSweep.hpp
+++ b/hotspot/src/share/vm/gc_implementation/shared/markSweep.hpp
@ -104,23 +104,22 @@ class MarkSweep : AllStatic {
  friend class KeepAliveClosure;
  friend class VM_MarkSweep;
  friend void marksweep_init();
-  friend class DataLayout;

  //
  // Vars
  //
 protected:
  // Traversal stacks used during phase1
-  static GrowableArray<oop>*             _marking_stack;
-  static GrowableArray<ObjArrayTask>*    _objarray_stack;
+  static Stack<oop>                      _marking_stack;
+  static Stack<ObjArrayTask>             _objarray_stack;
  // Stack for live klasses to revisit at end of marking phase
-  static GrowableArray<Klass*>*          _revisit_klass_stack;
+  static Stack<Klass*>                   _revisit_klass_stack;
  // Set (stack) of MDO's to revisit at end of marking phase
-  static GrowableArray<DataLayout*>*    _revisit_mdo_stack;
+  static Stack<DataLayout*>              _revisit_mdo_stack;

  // Space for storing/restoring mark word
-  static GrowableArray<markOop>*         _preserved_mark_stack;
-  static GrowableArray<oop>*             _preserved_oop_stack;
+  static Stack<markOop>                  _preserved_mark_stack;
+  static Stack<oop>                      _preserved_oop_stack;
  static size_t                          _preserved_count;
  static size_t                          _preserved_count_max;
  static PreservedMark*                  _preserved_marks;
--- a/hotspot/src/share/vm/gc_implementation/shared/markSweep.inline.hpp
+++ b/hotspot/src/share/vm/gc_implementation/shared/markSweep.inline.hpp
@ -72,7 +72,7 @@ template <class T> inline void MarkSweep::mark_and_push(T* p) {
    oop obj = oopDesc::decode_heap_oop_not_null(heap_oop);
    if (!obj->mark()->is_marked()) {
      mark_object(obj);
-      _marking_stack->push(obj);
+      _marking_stack.push(obj);
    }
  }
 }
@ -80,7 +80,7 @@ template <class T> inline void MarkSweep::mark_and_push(T* p) {
 void MarkSweep::push_objarray(oop obj, size_t index) {
  ObjArrayTask task(obj, index);
  assert(task.is_valid(), "bad ObjArrayTask");
-  _objarray_stack->push(task);
+  _objarray_stack.push(task);
 }

 template <class T> inline void MarkSweep::adjust_pointer(T* p, bool isroot) {
--- a/hotspot/src/share/vm/gc_interface/collectedHeap.cpp
+++ b/hotspot/src/share/vm/gc_interface/collectedHeap.cpp
@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2001, 2009, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2001, 2010, Oracle and/or its affiliates. All rights reserved.
 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
 *
 * This code is free software; you can redistribute it and/or modify it
@ -34,7 +34,9 @@ size_t CollectedHeap::_filler_array_max_size = 0;

 // Memory state functions.

-CollectedHeap::CollectedHeap()
+
+CollectedHeap::CollectedHeap() : _n_par_threads(0)
+
 {
  const size_t max_len = size_t(arrayOopDesc::max_array_length(T_INT));
  const size_t elements_per_word = HeapWordSize / sizeof(jint);
--- a/hotspot/src/share/vm/gc_interface/collectedHeap.hpp
+++ b/hotspot/src/share/vm/gc_interface/collectedHeap.hpp
@ -59,6 +59,8 @@ class CollectedHeap : public CHeapObj {
  MemRegion _reserved;
  BarrierSet* _barrier_set;
  bool _is_gc_active;
+  int _n_par_threads;
+
  unsigned int _total_collections;          // ... started
  unsigned int _total_full_collections;     // ... started
  NOT_PRODUCT(volatile size_t _promotion_failure_alot_count;)
@ -293,6 +295,12 @@ class CollectedHeap : public CHeapObj {
  }
  GCCause::Cause gc_cause() { return _gc_cause; }

+  // Number of threads currently working on GC tasks.
+  int n_par_threads() { return _n_par_threads; }
+
+  // May be overridden to set additional parallelism.
+  virtual void set_par_threads(int t) { _n_par_threads = t; };
+
  // Preload classes into the shared portion of the heap, and then dump
  // that data to a file so that it can be loaded directly by another
  // VM (then terminate).
@ -606,6 +614,14 @@ class CollectedHeap : public CHeapObj {
    return (CIFireOOMAt > 1 && _fire_out_of_memory_count >= CIFireOOMAt);
  }
 #endif
+
+ public:
+  // This is a convenience method that is used in cases where
+  // the actual number of GC worker threads is not pertinent but
+  // only whether there more than 0.  Use of this method helps
+  // reduce the occurrence of ParallelGCThreads to uses where the
+  // actual number may be germane.
+  static bool use_parallel_gc_threads() { return ParallelGCThreads > 0; }
 };

 // Class to set and reset the GC cause for a CollectedHeap.
--- a/hotspot/src/share/vm/includeDB_core
+++ b/hotspot/src/share/vm/includeDB_core
@ -1437,12 +1437,14 @@ defNewGeneration.cpp                    oop.inline.hpp
 defNewGeneration.cpp                    referencePolicy.hpp
 defNewGeneration.cpp                    space.inline.hpp
 defNewGeneration.cpp                    spaceDecorator.hpp
+defNewGeneration.cpp                    stack.inline.hpp
 defNewGeneration.cpp                    thread_<os_family>.inline.hpp

 defNewGeneration.hpp                    ageTable.hpp
 defNewGeneration.hpp                    cSpaceCounters.hpp
 defNewGeneration.hpp                    generation.inline.hpp
 defNewGeneration.hpp                    generationCounters.hpp
+defNewGeneration.hpp                    stack.hpp

 defNewGeneration.inline.hpp             cardTableRS.hpp
 defNewGeneration.inline.hpp             defNewGeneration.hpp
@ -3461,6 +3463,7 @@ permGen.hpp                             gcCause.hpp
 permGen.hpp                             generation.hpp
 permGen.hpp                             handles.hpp
 permGen.hpp                             iterator.hpp
+permGen.hpp                             mutexLocker.hpp
 permGen.hpp                             virtualspace.hpp

 placeholders.cpp                        fieldType.hpp
@ -3871,6 +3874,10 @@ specialized_oop_closures.cpp            specialized_oop_closures.hpp

 specialized_oop_closures.hpp            atomic.hpp

+stack.hpp                               allocation.inline.hpp
+
+stack.inline.hpp                        stack.hpp
+
 stackMapFrame.cpp                       globalDefinitions.hpp
 stackMapFrame.cpp                       handles.inline.hpp
 stackMapFrame.cpp                       oop.inline.hpp
@ -4115,6 +4122,7 @@ task.hpp                                top.hpp
 taskqueue.cpp                           debug.hpp
 taskqueue.cpp				oop.inline.hpp
 taskqueue.cpp                           os.hpp
+taskqueue.cpp                           stack.inline.hpp
 taskqueue.cpp                           taskqueue.hpp
 taskqueue.cpp                           thread_<os_family>.inline.hpp

@ -4122,6 +4130,7 @@ taskqueue.hpp                           allocation.hpp
 taskqueue.hpp                           allocation.inline.hpp
 taskqueue.hpp                           mutex.hpp
 taskqueue.hpp                           orderAccess_<os_arch>.inline.hpp
+taskqueue.hpp				stack.hpp

 templateInterpreter.cpp                 interpreter.hpp
 templateInterpreter.cpp                 interpreterGenerator.hpp
@ -4741,6 +4750,7 @@ workgroup.cpp                           allocation.inline.hpp
 workgroup.cpp                           os.hpp
 workgroup.cpp                           workgroup.hpp

+workgroup.hpp                           taskqueue.hpp
 workgroup.hpp                           thread_<os_family>.inline.hpp

 xmlstream.cpp                           allocation.hpp
--- a/hotspot/src/share/vm/memory/allocation.hpp
+++ b/hotspot/src/share/vm/memory/allocation.hpp
@ -289,16 +289,17 @@ private:

 // One of the following macros must be used when allocating
 // an array or object from an arena
-#define NEW_ARENA_ARRAY(arena, type, size)\
-  (type*) arena->Amalloc((size) * sizeof(type))
+#define NEW_ARENA_ARRAY(arena, type, size) \
+  (type*) (arena)->Amalloc((size) * sizeof(type))

-#define REALLOC_ARENA_ARRAY(arena, type, old, old_size, new_size)\
-  (type*) arena->Arealloc((char*)(old), (old_size) * sizeof(type), (new_size) * sizeof(type) )
+#define REALLOC_ARENA_ARRAY(arena, type, old, old_size, new_size)    \
+  (type*) (arena)->Arealloc((char*)(old), (old_size) * sizeof(type), \
+                            (new_size) * sizeof(type) )

-#define FREE_ARENA_ARRAY(arena, type, old, size)\
-  arena->Afree((char*)(old), (size) * sizeof(type))
+#define FREE_ARENA_ARRAY(arena, type, old, size) \
+  (arena)->Afree((char*)(old), (size) * sizeof(type))

-#define NEW_ARENA_OBJ(arena, type)\
+#define NEW_ARENA_OBJ(arena, type) \
  NEW_ARENA_ARRAY(arena, type, 1)


--- a/hotspot/src/share/vm/memory/defNewGeneration.cpp
+++ b/hotspot/src/share/vm/memory/defNewGeneration.cpp
@ -87,9 +87,7 @@ void DefNewGeneration::FastEvacuateFollowersClosure::do_void() {
    _gch->oop_since_save_marks_iterate(_level, _scan_cur_or_nonheap,
                                       _scan_older);
  } while (!_gch->no_allocs_since_save_marks(_level));
-  guarantee(_gen->promo_failure_scan_stack() == NULL
-            || _gen->promo_failure_scan_stack()->length() == 0,
-            "Failed to finish scan");
+  guarantee(_gen->promo_failure_scan_is_complete(), "Failed to finish scan");
 }

 ScanClosure::ScanClosure(DefNewGeneration* g, bool gc_barrier) :
@ -130,9 +128,6 @@ DefNewGeneration::DefNewGeneration(ReservedSpace rs,
                                   int level,
                                   const char* policy)
  : Generation(rs, initial_size, level),
-    _objs_with_preserved_marks(NULL),
-    _preserved_marks_of_objs(NULL),
-    _promo_failure_scan_stack(NULL),
    _promo_failure_drain_in_progress(false),
    _should_allocate_from_space(false)
 {
@ -604,12 +599,8 @@ void DefNewGeneration::collect(bool   full,
  } else {
    assert(HandlePromotionFailure,
      "Should not be here unless promotion failure handling is on");
-    assert(_promo_failure_scan_stack != NULL &&
-      _promo_failure_scan_stack->length() == 0, "post condition");
-
-    // deallocate stack and it's elements
-    delete _promo_failure_scan_stack;
-    _promo_failure_scan_stack = NULL;
+    assert(_promo_failure_scan_stack.is_empty(), "post condition");
+    _promo_failure_scan_stack.clear(true); // Clear cached segments.

    remove_forwarding_pointers();
    if (PrintGCDetails) {
@ -620,7 +611,7 @@ void DefNewGeneration::collect(bool   full,
    // case there can be live objects in to-space
    // as a result of a partial evacuation of eden
    // and from-space.
-    swap_spaces();   // For the sake of uniformity wrt ParNewGeneration::collect().
+    swap_spaces();   // For uniformity wrt ParNewGeneration.
    from()->set_next_compaction_space(to());
    gch->set_incremental_collection_will_fail();

@ -653,34 +644,23 @@ void DefNewGeneration::remove_forwarding_pointers() {
  RemoveForwardPointerClosure rspc;
  eden()->object_iterate(&rspc);
  from()->object_iterate(&rspc);
+
  // Now restore saved marks, if any.
-  if (_objs_with_preserved_marks != NULL) {
-    assert(_preserved_marks_of_objs != NULL, "Both or none.");
-    assert(_objs_with_preserved_marks->length() ==
-           _preserved_marks_of_objs->length(), "Both or none.");
-    for (int i = 0; i < _objs_with_preserved_marks->length(); i++) {
-      oop obj   = _objs_with_preserved_marks->at(i);
-      markOop m = _preserved_marks_of_objs->at(i);
+  assert(_objs_with_preserved_marks.size() == _preserved_marks_of_objs.size(),
+         "should be the same");
+  while (!_objs_with_preserved_marks.is_empty()) {
+    oop obj   = _objs_with_preserved_marks.pop();
+    markOop m = _preserved_marks_of_objs.pop();
    obj->set_mark(m);
  }
-    delete _objs_with_preserved_marks;
-    delete _preserved_marks_of_objs;
-    _objs_with_preserved_marks = NULL;
-    _preserved_marks_of_objs = NULL;
-  }
+  _objs_with_preserved_marks.clear(true);
+  _preserved_marks_of_objs.clear(true);
 }

 void DefNewGeneration::preserve_mark_if_necessary(oop obj, markOop m) {
  if (m->must_be_preserved_for_promotion_failure(obj)) {
-    if (_objs_with_preserved_marks == NULL) {
-      assert(_preserved_marks_of_objs == NULL, "Both or none.");
-      _objs_with_preserved_marks = new (ResourceObj::C_HEAP)
-        GrowableArray<oop>(PreserveMarkStackSize, true);
-      _preserved_marks_of_objs = new (ResourceObj::C_HEAP)
-        GrowableArray<markOop>(PreserveMarkStackSize, true);
-    }
-    _objs_with_preserved_marks->push(obj);
-    _preserved_marks_of_objs->push(m);
+    _objs_with_preserved_marks.push(obj);
+    _preserved_marks_of_objs.push(m);
  }
 }

@ -695,7 +675,7 @@ void DefNewGeneration::handle_promotion_failure(oop old) {
  old->forward_to(old);
  _promotion_failed = true;

-  push_on_promo_failure_scan_stack(old);
+  _promo_failure_scan_stack.push(old);

  if (!_promo_failure_drain_in_progress) {
    // prevent recursion in copy_to_survivor_space()
@ -748,20 +728,9 @@ oop DefNewGeneration::copy_to_survivor_space(oop old) {
  return obj;
 }

-void DefNewGeneration::push_on_promo_failure_scan_stack(oop obj) {
-  if (_promo_failure_scan_stack == NULL) {
-    _promo_failure_scan_stack = new (ResourceObj::C_HEAP)
-                                    GrowableArray<oop>(40, true);
-  }
-
-  _promo_failure_scan_stack->push(obj);
-}
-
 void DefNewGeneration::drain_promo_failure_scan_stack() {
-  assert(_promo_failure_scan_stack != NULL, "precondition");
-
-  while (_promo_failure_scan_stack->length() > 0) {
-     oop obj = _promo_failure_scan_stack->pop();
+  while (!_promo_failure_scan_stack.is_empty()) {
+     oop obj = _promo_failure_scan_stack.pop();
     obj->oop_iterate(_promo_failure_scan_stack_closure);
  }
 }
--- a/hotspot/src/share/vm/memory/defNewGeneration.hpp
+++ b/hotspot/src/share/vm/memory/defNewGeneration.hpp
@ -77,10 +77,10 @@ protected:
  // word being overwritten with a self-forwarding-pointer.
  void   preserve_mark_if_necessary(oop obj, markOop m);

-  // When one is non-null, so is the other.  Together, they each pair is
-  // an object with a preserved mark, and its mark value.
-  GrowableArray<oop>*     _objs_with_preserved_marks;
-  GrowableArray<markOop>* _preserved_marks_of_objs;
+  // Together, these keep <object with a preserved mark, mark value> pairs.
+  // They should always contain the same number of elements.
+  Stack<oop>     _objs_with_preserved_marks;
+  Stack<markOop> _preserved_marks_of_objs;

  // Returns true if the collection can be safely attempted.
  // If this method returns false, a collection is not
@ -94,11 +94,7 @@ protected:
    _promo_failure_scan_stack_closure = scan_stack_closure;
  }

-  GrowableArray<oop>* _promo_failure_scan_stack;
-  GrowableArray<oop>* promo_failure_scan_stack() const {
-    return _promo_failure_scan_stack;
-  }
-  void push_on_promo_failure_scan_stack(oop);
+  Stack<oop> _promo_failure_scan_stack;
  void drain_promo_failure_scan_stack(void);
  bool _promo_failure_drain_in_progress;

@ -184,8 +180,6 @@ protected:
    void do_void();
  };

-  class FastEvacuateFollowersClosure;
-  friend class FastEvacuateFollowersClosure;
  class FastEvacuateFollowersClosure: public VoidClosure {
    GenCollectedHeap* _gch;
    int _level;
@ -336,6 +330,10 @@ protected:

  void verify(bool allow_dirty);

+  bool promo_failure_scan_is_complete() const {
+    return _promo_failure_scan_stack.is_empty();
+  }
+
 protected:
  // If clear_space is true, clear the survivor spaces.  Eden is
  // cleared if the minimum size of eden is 0.  If mangle_space
--- a/hotspot/src/share/vm/memory/genCollectedHeap.cpp
+++ b/hotspot/src/share/vm/memory/genCollectedHeap.cpp
@ -676,7 +676,7 @@ HeapWord* GenCollectedHeap::satisfy_failed_allocation(size_t size, bool is_tlab)

 void GenCollectedHeap::set_par_threads(int t) {
  SharedHeap::set_par_threads(t);
-  _gen_process_strong_tasks->set_par_threads(t);
+  _gen_process_strong_tasks->set_n_threads(t);
 }

 class AssertIsPermClosure: public OopClosure {
--- a/hotspot/src/share/vm/memory/genCollectedHeap.hpp
+++ b/hotspot/src/share/vm/memory/genCollectedHeap.hpp
@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2000, 2009, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2000, 2010, Oracle and/or its affiliates. All rights reserved.
 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
 *
 * This code is free software; you can redistribute it and/or modify it
@ -74,6 +74,7 @@ public:
  // Data structure for claiming the (potentially) parallel tasks in
  // (gen-specific) strong roots processing.
  SubTasksDone* _gen_process_strong_tasks;
+  SubTasksDone* gen_process_strong_tasks() { return _gen_process_strong_tasks; }

  // In block contents verification, the number of header words to skip
  NOT_PRODUCT(static size_t _skip_header_HeapWords;)
--- a/hotspot/src/share/vm/memory/genMarkSweep.cpp
+++ b/hotspot/src/share/vm/memory/genMarkSweep.cpp
@ -161,17 +161,6 @@ void GenMarkSweep::allocate_stacks() {

  _preserved_marks = (PreservedMark*)scratch;
  _preserved_count = 0;
-  _preserved_mark_stack = NULL;
-  _preserved_oop_stack = NULL;
-
-  _marking_stack       = new (ResourceObj::C_HEAP) GrowableArray<oop>(4000, true);
-  _objarray_stack      = new (ResourceObj::C_HEAP) GrowableArray<ObjArrayTask>(50, true);
-
-  int size = SystemDictionary::number_of_classes() * 2;
-  _revisit_klass_stack = new (ResourceObj::C_HEAP) GrowableArray<Klass*>(size, true);
-  // (#klass/k)^2 for k ~ 10 appears to be a better fit, but this will have to do for
-  // now until we have had a chance to investigate a more optimal setting.
-  _revisit_mdo_stack   = new (ResourceObj::C_HEAP) GrowableArray<DataLayout*>(2*size, true);

 #ifdef VALIDATE_MARK_SWEEP
  if (ValidateMarkSweep) {
@ -206,17 +195,12 @@ void GenMarkSweep::deallocate_stacks() {
    gch->release_scratch();
  }

-  if (_preserved_oop_stack) {
-    delete _preserved_mark_stack;
-    _preserved_mark_stack = NULL;
-    delete _preserved_oop_stack;
-    _preserved_oop_stack = NULL;
-  }
-
-  delete _marking_stack;
-  delete _objarray_stack;
-  delete _revisit_klass_stack;
-  delete _revisit_mdo_stack;
+  _preserved_mark_stack.clear(true);
+  _preserved_oop_stack.clear(true);
+  _marking_stack.clear();
+  _objarray_stack.clear(true);
+  _revisit_klass_stack.clear(true);
+  _revisit_mdo_stack.clear(true);

 #ifdef VALIDATE_MARK_SWEEP
  if (ValidateMarkSweep) {
@ -274,17 +258,17 @@ void GenMarkSweep::mark_sweep_phase1(int level,

  // Update subklass/sibling/implementor links of live klasses
  follow_weak_klass_links();
-  assert(_marking_stack->is_empty(), "just drained");
+  assert(_marking_stack.is_empty(), "just drained");

  // Visit memoized MDO's and clear any unmarked weak refs
  follow_mdo_weak_refs();
-  assert(_marking_stack->is_empty(), "just drained");
+  assert(_marking_stack.is_empty(), "just drained");

  // Visit symbol and interned string tables and delete unmarked oops
  SymbolTable::unlink(&is_alive);
  StringTable::unlink(&is_alive);

-  assert(_marking_stack->is_empty(), "stack should be empty by now");
+  assert(_marking_stack.is_empty(), "stack should be empty by now");
 }


--- a/hotspot/src/share/vm/memory/permGen.cpp
+++ b/hotspot/src/share/vm/memory/permGen.cpp
@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2000, 2009, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2000, 2010, Oracle and/or its affiliates. All rights reserved.
 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
 *
 * This code is free software; you can redistribute it and/or modify it
@ -25,6 +25,17 @@
 #include "incls/_precompiled.incl"
 #include "incls/_permGen.cpp.incl"

+HeapWord* PermGen::request_expand_and_allocate(Generation* gen, size_t size,
+                                               GCCause::Cause prev_cause) {
+  if (gen->capacity() < _capacity_expansion_limit ||
+      prev_cause != GCCause::_no_gc || UseG1GC) {  // last disjunct is a temporary hack for G1
+    return gen->expand_and_allocate(size, false);
+  }
+  // We have reached the limit of capacity expansion where
+  // we will not expand further until a GC is done; request denied.
+  return NULL;
+}
+
 HeapWord* PermGen::mem_allocate_in_gen(size_t size, Generation* gen) {
  GCCause::Cause next_cause = GCCause::_permanent_generation_full;
  GCCause::Cause prev_cause = GCCause::_no_gc;
@ -37,10 +48,14 @@ HeapWord* PermGen::mem_allocate_in_gen(size_t size, Generation* gen) {
      if ((obj = gen->allocate(size, false)) != NULL) {
        return obj;
      }
-      if (gen->capacity() < _capacity_expansion_limit ||
-          prev_cause != GCCause::_no_gc) {
-        obj = gen->expand_and_allocate(size, false);
-      }
+      // Attempt to expand and allocate the requested space:
+      // specific subtypes may use specific policy to either expand
+      // or not. The default policy (see above) is to expand until
+      // _capacity_expansion_limit, and no further unless a GC is done.
+      // Concurrent collectors may decide to kick off a concurrent
+      // collection under appropriate conditions.
+      obj = request_expand_and_allocate(gen, size, prev_cause);
+
      if (obj != NULL || prev_cause == GCCause::_last_ditch_collection) {
        return obj;
      }
@ -119,5 +134,5 @@ void CompactingPermGen::compute_new_size() {
  if (_gen->capacity() > desired_capacity) {
    _gen->shrink(_gen->capacity() - desired_capacity);
  }
-  _capacity_expansion_limit = _gen->capacity() + MaxPermHeapExpansion;
+  set_capacity_expansion_limit(_gen->capacity() + MaxPermHeapExpansion);
 }
--- a/hotspot/src/share/vm/memory/permGen.hpp
+++ b/hotspot/src/share/vm/memory/permGen.hpp
@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2000, 2008, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2000, 2010, Oracle and/or its affiliates. All rights reserved.
 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
 *
 * This code is free software; you can redistribute it and/or modify it
@ -30,15 +30,26 @@ class Generation;
 class GenRemSet;
 class CSpaceCounters;

-// PermGen models the part of the heap
+// PermGen models the part of the heap used to allocate class meta-data.

 class PermGen : public CHeapObj {
  friend class VMStructs;
 protected:
  size_t _capacity_expansion_limit;  // maximum expansion allowed without a
                                     // full gc occurring
+  void set_capacity_expansion_limit(size_t limit) {
+    assert_locked_or_safepoint(Heap_lock);
+    _capacity_expansion_limit = limit;
+  }

  HeapWord* mem_allocate_in_gen(size_t size, Generation* gen);
+  // Along with mem_allocate_in_gen() above, implements policy for
+  // "scheduling" allocation/expansion/collection of the perm gen.
+  // The virtual method request_...() below can be overridden by
+  // subtypes that want to implement a different expansion/collection
+  // policy from the default provided.
+  virtual HeapWord* request_expand_and_allocate(Generation* gen, size_t size,
+                                                GCCause::Cause prev_cause);

 public:
  enum Name {
--- a/hotspot/src/share/vm/memory/referenceProcessor.cpp
+++ b/hotspot/src/share/vm/memory/referenceProcessor.cpp
@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2001, 2009, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2001, 2010, Oracle and/or its affiliates. All rights reserved.
 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
 *
 * This code is free software; you can redistribute it and/or modify it
@ -137,16 +137,17 @@ ReferenceProcessor::ReferenceProcessor(MemRegion span,
  _discovery_is_atomic = atomic_discovery;
  _discovery_is_mt     = mt_discovery;
  _num_q               = mt_degree;
-  _discoveredSoftRefs  = NEW_C_HEAP_ARRAY(DiscoveredList, _num_q * subclasses_of_ref);
+  _max_num_q           = mt_degree;
+  _discoveredSoftRefs  = NEW_C_HEAP_ARRAY(DiscoveredList, _max_num_q * subclasses_of_ref);
  if (_discoveredSoftRefs == NULL) {
    vm_exit_during_initialization("Could not allocated RefProc Array");
  }
-  _discoveredWeakRefs    = &_discoveredSoftRefs[_num_q];
-  _discoveredFinalRefs   = &_discoveredWeakRefs[_num_q];
-  _discoveredPhantomRefs = &_discoveredFinalRefs[_num_q];
+  _discoveredWeakRefs    = &_discoveredSoftRefs[_max_num_q];
+  _discoveredFinalRefs   = &_discoveredWeakRefs[_max_num_q];
+  _discoveredPhantomRefs = &_discoveredFinalRefs[_max_num_q];
  assert(sentinel_ref() != NULL, "_sentinelRef is NULL");
  // Initialized all entries to _sentinelRef
-  for (int i = 0; i < _num_q * subclasses_of_ref; i++) {
+  for (int i = 0; i < _max_num_q * subclasses_of_ref; i++) {
        _discoveredSoftRefs[i].set_head(sentinel_ref());
    _discoveredSoftRefs[i].set_length(0);
  }
@ -159,7 +160,7 @@ ReferenceProcessor::ReferenceProcessor(MemRegion span,
 #ifndef PRODUCT
 void ReferenceProcessor::verify_no_references_recorded() {
  guarantee(!_discovering_refs, "Discovering refs?");
-  for (int i = 0; i < _num_q * subclasses_of_ref; i++) {
+  for (int i = 0; i < _max_num_q * subclasses_of_ref; i++) {
    guarantee(_discoveredSoftRefs[i].empty(),
              "Found non-empty discovered list");
  }
@ -167,7 +168,11 @@ void ReferenceProcessor::verify_no_references_recorded() {
 #endif

 void ReferenceProcessor::weak_oops_do(OopClosure* f) {
-  for (int i = 0; i < _num_q * subclasses_of_ref; i++) {
+  // Should this instead be
+  // for (int i = 0; i < subclasses_of_ref; i++_ {
+  //   for (int j = 0; j < _num_q; j++) {
+  //     int index = i * _max_num_q + j;
+  for (int i = 0; i < _max_num_q * subclasses_of_ref; i++) {
    if (UseCompressedOops) {
      f->do_oop((narrowOop*)_discoveredSoftRefs[i].adr_head());
    } else {
@ -395,7 +400,15 @@ public:
    assert(work_id < (unsigned int)_ref_processor.num_q(), "Index out-of-bounds");
    // Simplest first cut: static partitioning.
    int index = work_id;
-    for (int j = 0; j < subclasses_of_ref; j++, index += _n_queues) {
+    // The increment on "index" must correspond to the maximum number of queues
+    // (n_queues) with which that ReferenceProcessor was created.  That
+    // is because of the "clever" way the discovered references lists were
+    // allocated and are indexed into.  That number is ParallelGCThreads
+    // currently.  Assert that.
+    assert(_n_queues == (int) ParallelGCThreads, "Different number not expected");
+    for (int j = 0;
+         j < subclasses_of_ref;
+         j++, index += _n_queues) {
      _ref_processor.enqueue_discovered_reflist(
        _refs_lists[index], _pending_list_addr);
      _refs_lists[index].set_head(_sentinel_ref);
@ -410,11 +423,11 @@ void ReferenceProcessor::enqueue_discovered_reflists(HeapWord* pending_list_addr
  if (_processing_is_mt && task_executor != NULL) {
    // Parallel code
    RefProcEnqueueTask tsk(*this, _discoveredSoftRefs,
-                           pending_list_addr, sentinel_ref(), _num_q);
+                           pending_list_addr, sentinel_ref(), _max_num_q);
    task_executor->execute(tsk);
  } else {
    // Serial code: call the parent class's implementation
-    for (int i = 0; i < _num_q * subclasses_of_ref; i++) {
+    for (int i = 0; i < _max_num_q * subclasses_of_ref; i++) {
      enqueue_discovered_reflist(_discoveredSoftRefs[i], pending_list_addr);
      _discoveredSoftRefs[i].set_head(sentinel_ref());
      _discoveredSoftRefs[i].set_length(0);
@ -614,8 +627,9 @@ ReferenceProcessor::process_phase1(DiscoveredList&    refs_list,
  complete_gc->do_void();
  NOT_PRODUCT(
    if (PrintGCDetails && TraceReferenceGC) {
-      gclog_or_tty->print(" Dropped %d dead Refs out of %d "
-        "discovered Refs by policy ", iter.removed(), iter.processed());
+      gclog_or_tty->print_cr(" Dropped %d dead Refs out of %d "
+        "discovered Refs by policy  list " INTPTR_FORMAT,
+        iter.removed(), iter.processed(), (address)refs_list.head());
    }
  )
 }
@ -651,8 +665,9 @@ ReferenceProcessor::pp2_work(DiscoveredList&    refs_list,
  }
  NOT_PRODUCT(
    if (PrintGCDetails && TraceReferenceGC) {
-      gclog_or_tty->print(" Dropped %d active Refs out of %d "
-        "Refs in discovered list ", iter.removed(), iter.processed());
+      gclog_or_tty->print_cr(" Dropped %d active Refs out of %d "
+        "Refs in discovered list " INTPTR_FORMAT,
+        iter.removed(), iter.processed(), (address)refs_list.head());
    }
  )
 }
@ -689,8 +704,9 @@ ReferenceProcessor::pp2_work_concurrent_discovery(DiscoveredList&    refs_list,
  complete_gc->do_void();
  NOT_PRODUCT(
    if (PrintGCDetails && TraceReferenceGC) {
-      gclog_or_tty->print(" Dropped %d active Refs out of %d "
-        "Refs in discovered list ", iter.removed(), iter.processed());
+      gclog_or_tty->print_cr(" Dropped %d active Refs out of %d "
+        "Refs in discovered list " INTPTR_FORMAT,
+        iter.removed(), iter.processed(), (address)refs_list.head());
    }
  )
 }
@ -704,6 +720,7 @@ ReferenceProcessor::process_phase3(DiscoveredList&    refs_list,
                                   BoolObjectClosure* is_alive,
                                   OopClosure*        keep_alive,
                                   VoidClosure*       complete_gc) {
+  ResourceMark rm;
  DiscoveredListIterator iter(refs_list, keep_alive, is_alive);
  while (iter.has_next()) {
    iter.update_discovered();
@ -743,8 +760,8 @@ ReferenceProcessor::abandon_partial_discovered_list(DiscoveredList& refs_list) {

 void ReferenceProcessor::abandon_partial_discovery() {
  // loop over the lists
-  for (int i = 0; i < _num_q * subclasses_of_ref; i++) {
-    if (TraceReferenceGC && PrintGCDetails && ((i % _num_q) == 0)) {
+  for (int i = 0; i < _max_num_q * subclasses_of_ref; i++) {
+    if (TraceReferenceGC && PrintGCDetails && ((i % _max_num_q) == 0)) {
      gclog_or_tty->print_cr(
        "\nAbandoning %s discovered list",
        list_name(i));
@ -766,7 +783,9 @@ public:
                    OopClosure& keep_alive,
                    VoidClosure& complete_gc)
  {
-    _ref_processor.process_phase1(_refs_lists[i], _policy,
+    Thread* thr = Thread::current();
+    int refs_list_index = ((WorkerThread*)thr)->id();
+    _ref_processor.process_phase1(_refs_lists[refs_list_index], _policy,
                                  &is_alive, &keep_alive, &complete_gc);
  }
 private:
@ -802,6 +821,11 @@ public:
                    OopClosure& keep_alive,
                    VoidClosure& complete_gc)
  {
+    // Don't use "refs_list_index" calculated in this way because
+    // balance_queues() has moved the Ref's into the first n queues.
+    // Thread* thr = Thread::current();
+    // int refs_list_index = ((WorkerThread*)thr)->id();
+    // _ref_processor.process_phase3(_refs_lists[refs_list_index], _clear_referent,
    _ref_processor.process_phase3(_refs_lists[i], _clear_referent,
                                  &is_alive, &keep_alive, &complete_gc);
  }
@ -810,23 +834,47 @@ private:
 };

 // Balances reference queues.
+// Move entries from all queues[0, 1, ..., _max_num_q-1] to
+// queues[0, 1, ..., _num_q-1] because only the first _num_q
+// corresponding to the active workers will be processed.
 void ReferenceProcessor::balance_queues(DiscoveredList ref_lists[])
 {
  // calculate total length
  size_t total_refs = 0;
-  for (int i = 0; i < _num_q; ++i) {
+  if (TraceReferenceGC && PrintGCDetails) {
+    gclog_or_tty->print_cr("\nBalance ref_lists ");
+  }
+
+  for (int i = 0; i < _max_num_q; ++i) {
    total_refs += ref_lists[i].length();
+    if (TraceReferenceGC && PrintGCDetails) {
+      gclog_or_tty->print("%d ", ref_lists[i].length());
+    }
+  }
+  if (TraceReferenceGC && PrintGCDetails) {
+    gclog_or_tty->print_cr(" = %d", total_refs);
  }
  size_t avg_refs = total_refs / _num_q + 1;
  int to_idx = 0;
-  for (int from_idx = 0; from_idx < _num_q; from_idx++) {
-    while (ref_lists[from_idx].length() > avg_refs) {
+  for (int from_idx = 0; from_idx < _max_num_q; from_idx++) {
+    bool move_all = false;
+    if (from_idx >= _num_q) {
+      move_all = ref_lists[from_idx].length() > 0;
+    }
+    while ((ref_lists[from_idx].length() > avg_refs) ||
+           move_all) {
      assert(to_idx < _num_q, "Sanity Check!");
      if (ref_lists[to_idx].length() < avg_refs) {
        // move superfluous refs
-        size_t refs_to_move =
-          MIN2(ref_lists[from_idx].length() - avg_refs,
+        size_t refs_to_move;
+        // Move all the Ref's if the from queue will not be processed.
+        if (move_all) {
+          refs_to_move = MIN2(ref_lists[from_idx].length(),
                              avg_refs - ref_lists[to_idx].length());
+        } else {
+          refs_to_move = MIN2(ref_lists[from_idx].length() - avg_refs,
+                              avg_refs - ref_lists[to_idx].length());
+        }
        oop move_head = ref_lists[from_idx].head();
        oop move_tail = move_head;
        oop new_head  = move_head;
@ -840,11 +888,35 @@ void ReferenceProcessor::balance_queues(DiscoveredList ref_lists[])
        ref_lists[to_idx].inc_length(refs_to_move);
        ref_lists[from_idx].set_head(new_head);
        ref_lists[from_idx].dec_length(refs_to_move);
+        if (ref_lists[from_idx].length() == 0) {
+          break;
+        }
      } else {
-        ++to_idx;
+        to_idx = (to_idx + 1) % _num_q;
      }
    }
  }
+#ifdef ASSERT
+  size_t balanced_total_refs = 0;
+  for (int i = 0; i < _max_num_q; ++i) {
+    balanced_total_refs += ref_lists[i].length();
+    if (TraceReferenceGC && PrintGCDetails) {
+      gclog_or_tty->print("%d ", ref_lists[i].length());
+    }
+  }
+  if (TraceReferenceGC && PrintGCDetails) {
+    gclog_or_tty->print_cr(" = %d", balanced_total_refs);
+    gclog_or_tty->flush();
+  }
+  assert(total_refs == balanced_total_refs, "Balancing was incomplete");
+#endif
+}
+
+void ReferenceProcessor::balance_all_queues() {
+  balance_queues(_discoveredSoftRefs);
+  balance_queues(_discoveredWeakRefs);
+  balance_queues(_discoveredFinalRefs);
+  balance_queues(_discoveredPhantomRefs);
 }

 void
@ -857,8 +929,17 @@ ReferenceProcessor::process_discovered_reflist(
  VoidClosure*                 complete_gc,
  AbstractRefProcTaskExecutor* task_executor)
 {
-  bool mt = task_executor != NULL && _processing_is_mt;
-  if (mt && ParallelRefProcBalancingEnabled) {
+  bool mt_processing = task_executor != NULL && _processing_is_mt;
+  // If discovery used MT and a dynamic number of GC threads, then
+  // the queues must be balanced for correctness if fewer than the
+  // maximum number of queues were used.  The number of queue used
+  // during discovery may be different than the number to be used
+  // for processing so don't depend of _num_q < _max_num_q as part
+  // of the test.
+  bool must_balance = _discovery_is_mt;
+
+  if ((mt_processing && ParallelRefProcBalancingEnabled) ||
+      must_balance) {
    balance_queues(refs_lists);
  }
  if (PrintReferenceGC && PrintGCDetails) {
@ -875,7 +956,7 @@ ReferenceProcessor::process_discovered_reflist(
  //   policy reasons. Keep alive the transitive closure of all
  //   such referents.
  if (policy != NULL) {
-    if (mt) {
+    if (mt_processing) {
      RefProcPhase1Task phase1(*this, refs_lists, policy, true /*marks_oops_alive*/);
      task_executor->execute(phase1);
    } else {
@ -891,7 +972,7 @@ ReferenceProcessor::process_discovered_reflist(

  // Phase 2:
  // . Traverse the list and remove any refs whose referents are alive.
-  if (mt) {
+  if (mt_processing) {
    RefProcPhase2Task phase2(*this, refs_lists, !discovery_is_atomic() /*marks_oops_alive*/);
    task_executor->execute(phase2);
  } else {
@ -902,7 +983,7 @@ ReferenceProcessor::process_discovered_reflist(

  // Phase 3:
  // . Traverse the list and process referents as appropriate.
-  if (mt) {
+  if (mt_processing) {
    RefProcPhase3Task phase3(*this, refs_lists, clear_referent, true /*marks_oops_alive*/);
    task_executor->execute(phase3);
  } else {
@ -915,7 +996,11 @@ ReferenceProcessor::process_discovered_reflist(

 void ReferenceProcessor::clean_up_discovered_references() {
  // loop over the lists
-  for (int i = 0; i < _num_q * subclasses_of_ref; i++) {
+  // Should this instead be
+  // for (int i = 0; i < subclasses_of_ref; i++_ {
+  //   for (int j = 0; j < _num_q; j++) {
+  //     int index = i * _max_num_q + j;
+  for (int i = 0; i < _max_num_q * subclasses_of_ref; i++) {
    if (TraceReferenceGC && PrintGCDetails && ((i % _num_q) == 0)) {
      gclog_or_tty->print_cr(
        "\nScrubbing %s discovered list of Null referents",
@ -976,7 +1061,7 @@ inline DiscoveredList* ReferenceProcessor::get_discovered_list(ReferenceType rt)
      id = next_id();
    }
  }
-  assert(0 <= id && id < _num_q, "Id is out-of-bounds (call Freud?)");
+  assert(0 <= id && id < _max_num_q, "Id is out-of-bounds (call Freud?)");

  // Get the discovered queue to which we will add
  DiscoveredList* list = NULL;
@ -1001,6 +1086,10 @@ inline DiscoveredList* ReferenceProcessor::get_discovered_list(ReferenceType rt)
    default:
      ShouldNotReachHere();
  }
+  if (TraceReferenceGC && PrintGCDetails) {
+    gclog_or_tty->print_cr("Thread %d gets list " INTPTR_FORMAT,
+      id, list);
+  }
  return list;
 }

@ -1243,7 +1332,7 @@ void ReferenceProcessor::preclean_discovered_references(
  {
    TraceTime tt("Preclean SoftReferences", PrintGCDetails && PrintReferenceGC,
              false, gclog_or_tty);
-    for (int i = 0; i < _num_q; i++) {
+    for (int i = 0; i < _max_num_q; i++) {
      if (yield->should_return()) {
        return;
      }
@ -1340,15 +1429,16 @@ ReferenceProcessor::preclean_discovered_reflist(DiscoveredList&    refs_list,

  NOT_PRODUCT(
    if (PrintGCDetails && PrintReferenceGC) {
-      gclog_or_tty->print(" Dropped %d Refs out of %d "
-        "Refs in discovered list ", iter.removed(), iter.processed());
+      gclog_or_tty->print_cr(" Dropped %d Refs out of %d "
+        "Refs in discovered list " INTPTR_FORMAT,
+        iter.removed(), iter.processed(), (address)refs_list.head());
    }
  )
 }

 const char* ReferenceProcessor::list_name(int i) {
-   assert(i >= 0 && i <= _num_q * subclasses_of_ref, "Out of bounds index");
-   int j = i / _num_q;
+   assert(i >= 0 && i <= _max_num_q * subclasses_of_ref, "Out of bounds index");
+   int j = i / _max_num_q;
   switch (j) {
     case 0: return "SoftRef";
     case 1: return "WeakRef";
@ -1372,7 +1462,7 @@ void ReferenceProcessor::verify() {
 #ifndef PRODUCT
 void ReferenceProcessor::clear_discovered_references() {
  guarantee(!_discovering_refs, "Discovering refs?");
-  for (int i = 0; i < _num_q * subclasses_of_ref; i++) {
+  for (int i = 0; i < _max_num_q * subclasses_of_ref; i++) {
    oop obj = _discoveredSoftRefs[i].head();
    while (obj != sentinel_ref()) {
      oop next = java_lang_ref_Reference::discovered(obj);
--- a/hotspot/src/share/vm/memory/referenceProcessor.hpp
+++ b/hotspot/src/share/vm/memory/referenceProcessor.hpp
@ -85,8 +85,10 @@ class ReferenceProcessor : public CHeapObj {

  // The discovered ref lists themselves

-  // The MT'ness degree of the queues below
+  // The active MT'ness degree of the queues below
  int             _num_q;
+  // The maximum MT'ness degree of the queues below
+  int             _max_num_q;
  // Arrays of lists of oops, one per thread
  DiscoveredList* _discoveredSoftRefs;
  DiscoveredList* _discoveredWeakRefs;
@ -95,6 +97,7 @@ class ReferenceProcessor : public CHeapObj {

 public:
  int num_q()                            { return _num_q; }
+  void set_mt_degree(int v)              { _num_q = v; }
  DiscoveredList* discovered_soft_refs() { return _discoveredSoftRefs; }
  static oop  sentinel_ref()             { return _sentinelRef; }
  static oop* adr_sentinel_ref()         { return &_sentinelRef; }
@ -244,6 +247,7 @@ class ReferenceProcessor : public CHeapObj {
    _bs(NULL),
    _is_alive_non_header(NULL),
    _num_q(0),
+    _max_num_q(0),
    _processing_is_mt(false),
    _next_id(0)
  {}
@ -312,6 +316,9 @@ class ReferenceProcessor : public CHeapObj {
  void weak_oops_do(OopClosure* f);       // weak roots
  static void oops_do(OopClosure* f);     // strong root(s)

+  // Balance each of the discovered lists.
+  void balance_all_queues();
+
  // Discover a Reference object, using appropriate discovery criteria
  bool discover_reference(oop obj, ReferenceType rt);

--- a/hotspot/src/share/vm/memory/sharedHeap.cpp
+++ b/hotspot/src/share/vm/memory/sharedHeap.cpp
@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2000, 2009, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2000, 2010, Oracle and/or its affiliates. All rights reserved.
 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
 *
 * This code is free software; you can redistribute it and/or modify it
@ -50,7 +50,8 @@ SharedHeap::SharedHeap(CollectorPolicy* policy_) :
  _perm_gen(NULL), _rem_set(NULL),
  _strong_roots_parity(0),
  _process_strong_tasks(new SubTasksDone(SH_PS_NumElements)),
-  _workers(NULL), _n_par_threads(0)
+  _n_par_threads(0),
+  _workers(NULL)
 {
  if (_process_strong_tasks == NULL || !_process_strong_tasks->valid()) {
    vm_exit_during_initialization("Failed necessary allocation.");
@ -60,11 +61,13 @@ SharedHeap::SharedHeap(CollectorPolicy* policy_) :
      (UseConcMarkSweepGC && CMSParallelRemarkEnabled) ||
       UseG1GC) &&
      ParallelGCThreads > 0) {
-    _workers = new WorkGang("Parallel GC Threads", ParallelGCThreads,
+    _workers = new FlexibleWorkGang("Parallel GC Threads", ParallelGCThreads,
                            /* are_GC_task_threads */true,
                            /* are_ConcurrentGC_threads */false);
    if (_workers == NULL) {
      vm_exit_during_initialization("Failed necessary allocation.");
+    } else {
+      _workers->initialize_workers();
    }
  }
 }
@ -77,8 +80,9 @@ bool SharedHeap::heap_lock_held_for_gc() {
 }

 void SharedHeap::set_par_threads(int t) {
+  assert(t == 0 || !UseSerialGC, "Cannot have parallel threads");
  _n_par_threads = t;
-  _process_strong_tasks->set_par_threads(t);
+  _process_strong_tasks->set_n_threads(t);
 }

 class AssertIsPermClosure: public OopClosure {
--- a/hotspot/src/share/vm/memory/sharedHeap.hpp
+++ b/hotspot/src/share/vm/memory/sharedHeap.hpp
@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2000, 2009, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2000, 2010, Oracle and/or its affiliates. All rights reserved.
 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
 *
 * This code is free software; you can redistribute it and/or modify it
@ -38,6 +38,7 @@ class OopsInGenClosure;
 class ObjectClosure;
 class SubTasksDone;
 class WorkGang;
+class FlexibleWorkGang;
 class CollectorPolicy;
 class KlassHandle;

@ -74,7 +75,7 @@ protected:
  int _strong_roots_parity;

  // If we're doing parallel GC, use this gang of threads.
-  WorkGang* _workers;
+  FlexibleWorkGang* _workers;

  // Number of parallel threads currently working on GC tasks.
  // O indicates use sequential code; 1 means use parallel code even with
@ -189,7 +190,7 @@ public:
    SO_CodeCache           = 0x10
  };

-  WorkGang* workers() const { return _workers; }
+  FlexibleWorkGang* workers() const { return _workers; }

  // Sets the number of parallel threads that will be doing tasks
  // (such as process strong roots) subsequently.
--- a/hotspot/src/share/vm/runtime/globals.hpp
+++ b/hotspot/src/share/vm/runtime/globals.hpp
@ -273,6 +273,10 @@ class CommandLineFlags {
 //    UnlockExperimentalVMOptions flag, which allows the control and
 //    modification of the experimental flags.
 //
+// Nota bene: neither diagnostic nor experimental options should be used casually,
+//    and they are not supported on production loads, except under explicit
+//    direction from support engineers.
+//
 // manageable flags are writeable external product flags.
 //    They are dynamically writeable through the JDK management interface
 //    (com.sun.management.HotSpotDiagnosticMXBean API) and also through JConsole.
@ -634,6 +638,9 @@ class CommandLineFlags {
  develop(bool, ZapJNIHandleArea, trueInDebug,                              \
          "Zap freed JNI handle space with 0xFEFEFEFE")                     \
                                                                            \
+  notproduct(bool, ZapStackSegments, trueInDebug,                           \
+             "Zap allocated/freed Stack segments with 0xFADFADED")          \
+                                                                            \
  develop(bool, ZapUnusedHeapArea, trueInDebug,                             \
          "Zap unused heap space with 0xBAADBABE")                          \
                                                                            \
@ -1799,17 +1806,17 @@ class CommandLineFlags {
  develop(uintx, PromotionFailureALotInterval, 5,                           \
          "Total collections between promotion failures alot")              \
                                                                            \
-  develop(intx, WorkStealingSleepMillis, 1,                                 \
+  experimental(intx, WorkStealingSleepMillis, 1,                            \
          "Sleep time when sleep is used for yields")                       \
                                                                            \
-  develop(uintx, WorkStealingYieldsBeforeSleep, 1000,                       \
+  experimental(uintx, WorkStealingYieldsBeforeSleep, 1000,                  \
          "Number of yields before a sleep is done during workstealing")    \
                                                                            \
-  develop(uintx, WorkStealingHardSpins, 4096,                               \
+  experimental(uintx, WorkStealingHardSpins, 4096,                          \
          "Number of iterations in a spin loop between checks on "          \
          "time out of hard spin")                                          \
                                                                            \
-  develop(uintx, WorkStealingSpinToYieldRatio, 10,                          \
+  experimental(uintx, WorkStealingSpinToYieldRatio, 10,                     \
          "Ratio of hard spins to calls to yield")                          \
                                                                            \
  product(uintx, PreserveMarkStackSize, 1024,                               \
--- a/hotspot/src/share/vm/runtime/thread.cpp
+++ b/hotspot/src/share/vm/runtime/thread.cpp
@ -1645,7 +1645,29 @@ void JavaThread::flush_barrier_queues() {
  satb_mark_queue().flush();
  dirty_card_queue().flush();
 }
-#endif
+
+void JavaThread::initialize_queues() {
+  assert(!SafepointSynchronize::is_at_safepoint(),
+         "we should not be at a safepoint");
+
+  ObjPtrQueue& satb_queue = satb_mark_queue();
+  SATBMarkQueueSet& satb_queue_set = satb_mark_queue_set();
+  // The SATB queue should have been constructed with its active
+  // field set to false.
+  assert(!satb_queue.is_active(), "SATB queue should not be active");
+  assert(satb_queue.is_empty(), "SATB queue should be empty");
+  // If we are creating the thread during a marking cycle, we should
+  // set the active field of the SATB queue to true.
+  if (satb_queue_set.is_active()) {
+    satb_queue.set_active(true);
+  }
+
+  DirtyCardQueue& dirty_queue = dirty_card_queue();
+  // The dirty card queue should have been constructed with its
+  // active field set to true.
+  assert(dirty_queue.is_active(), "dirty card queue should be active");
+}
+#endif // !SERIALGC

 void JavaThread::cleanup_failed_attach_current_thread() {
  if (get_thread_profiler() != NULL) {
@ -3627,6 +3649,10 @@ jboolean Threads::is_supported_jni_version(jint version) {
 void Threads::add(JavaThread* p, bool force_daemon) {
  // The threads lock must be owned at this point
  assert_locked_or_safepoint(Threads_lock);
+
+  // See the comment for this method in thread.hpp for its purpose and
+  // why it is called here.
+  p->initialize_queues();
  p->set_next(_thread_list);
  _thread_list = p;
  _number_of_threads++;
--- a/hotspot/src/share/vm/runtime/thread.hpp
+++ b/hotspot/src/share/vm/runtime/thread.hpp
@ -1490,6 +1490,29 @@ public:
  }
 #endif // !SERIALGC

+  // This method initializes the SATB and dirty card queues before a
+  // JavaThread is added to the Java thread list. Right now, we don't
+  // have to do anything to the dirty card queue (it should have been
+  // activated when the thread was created), but we have to activate
+  // the SATB queue if the thread is created while a marking cycle is
+  // in progress. The activation / de-activation of the SATB queues at
+  // the beginning / end of a marking cycle is done during safepoints
+  // so we have to make sure this method is called outside one to be
+  // able to safely read the active field of the SATB queue set. Right
+  // now, it is called just before the thread is added to the Java
+  // thread list in the Threads::add() method. That method is holding
+  // the Threads_lock which ensures we are outside a safepoint. We
+  // cannot do the obvious and set the active field of the SATB queue
+  // when the thread is created given that, in some cases, safepoints
+  // might happen between the JavaThread constructor being called and the
+  // thread being added to the Java thread list (an example of this is
+  // when the structure for the DestroyJavaVM thread is created).
+#ifndef SERIALGC
+  void initialize_queues();
+#else // !SERIALGC
+  void initialize_queues() { }
+#endif // !SERIALGC
+
  // Machine dependent stuff
  #include "incls/_thread_pd.hpp.incl"

--- a/hotspot/src/share/vm/utilities/stack.hpp
+++ b/hotspot/src/share/vm/utilities/stack.hpp
@ -0,0 +1,204 @@
+/*
+ * Copyright 2009 Sun Microsystems, Inc.  All Rights Reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ */
+
+// Class Stack (below) grows and shrinks by linking together "segments" which
+// are allocated on demand.  Segments are arrays of the element type (E) plus an
+// extra pointer-sized field to store the segment link.  Recently emptied
+// segments are kept in a cache and reused.
+//
+// Notes/caveats:
+//
+// The size of an element must either evenly divide the size of a pointer or be
+// a multiple of the size of a pointer.
+//
+// Destructors are not called for elements popped off the stack, so element
+// types which rely on destructors for things like reference counting will not
+// work properly.
+//
+// Class Stack allocates segments from the C heap.  However, two protected
+// virtual methods are used to alloc/free memory which subclasses can override:
+//
+//      virtual void* alloc(size_t bytes);
+//      virtual void  free(void* addr, size_t bytes);
+//
+// The alloc() method must return storage aligned for any use.  The
+// implementation in class Stack assumes that alloc() will terminate the process
+// if the allocation fails.
+
+template <class E> class StackIterator;
+
+// StackBase holds common data/methods that don't depend on the element type,
+// factored out to reduce template code duplication.
+class StackBase
+{
+public:
+  size_t segment_size()   const { return _seg_size; } // Elements per segment.
+  size_t max_size()       const { return _max_size; } // Max elements allowed.
+  size_t max_cache_size() const { return _max_cache_size; } // Max segments
+                                                            // allowed in cache.
+
+  size_t cache_size() const { return _cache_size; }   // Segments in the cache.
+
+protected:
+  // The ctor arguments correspond to the like-named functions above.
+  // segment_size:    number of items per segment
+  // max_cache_size:  maxmium number of *segments* to cache
+  // max_size:        maximum number of items allowed, rounded to a multiple of
+  //                  the segment size (0 == unlimited)
+  inline StackBase(size_t segment_size, size_t max_cache_size, size_t max_size);
+
+  // Round max_size to a multiple of the segment size.  Treat 0 as unlimited.
+  static inline size_t adjust_max_size(size_t max_size, size_t seg_size);
+
+protected:
+  const size_t _seg_size;       // Number of items per segment.
+  const size_t _max_size;       // Maximum number of items allowed in the stack.
+  const size_t _max_cache_size; // Maximum number of segments to cache.
+  size_t       _cur_seg_size;   // Number of items in the current segment.
+  size_t       _full_seg_size;  // Number of items in already-filled segments.
+  size_t       _cache_size;     // Number of segments in the cache.
+};
+
+#ifdef __GNUC__
+#define inline
+#endif // __GNUC__
+
+template <class E>
+class Stack:  public StackBase
+{
+public:
+  friend class StackIterator<E>;
+
+  // segment_size:    number of items per segment
+  // max_cache_size:  maxmium number of *segments* to cache
+  // max_size:        maximum number of items allowed, rounded to a multiple of
+  //                  the segment size (0 == unlimited)
+  inline Stack(size_t segment_size = default_segment_size(),
+               size_t max_cache_size = 4, size_t max_size = 0);
+  inline ~Stack() { clear(true); }
+
+  inline bool is_empty() const { return _cur_seg == NULL; }
+  inline bool is_full()  const { return _full_seg_size >= max_size(); }
+
+  // Performance sensitive code should use is_empty() instead of size() == 0 and
+  // is_full() instead of size() == max_size().  Using a conditional here allows
+  // just one var to be updated when pushing/popping elements instead of two;
+  // _full_seg_size is updated only when pushing/popping segments.
+  inline size_t size() const {
+    return is_empty() ? 0 : _full_seg_size + _cur_seg_size;
+  }
+
+  inline void push(E elem);
+  inline E    pop();
+
+  // Clear everything from the stack, releasing the associated memory.  If
+  // clear_cache is true, also release any cached segments.
+  void clear(bool clear_cache = false);
+
+  static inline size_t default_segment_size();
+
+protected:
+  // Each segment includes space for _seg_size elements followed by a link
+  // (pointer) to the previous segment; the space is allocated as a single block
+  // of size segment_bytes().  _seg_size is rounded up if necessary so the link
+  // is properly aligned.  The C struct for the layout would be:
+  //
+  // struct segment {
+  //   E     elements[_seg_size];
+  //   E*    link;
+  // };
+
+  // Round up seg_size to keep the link field aligned.
+  static inline size_t adjust_segment_size(size_t seg_size);
+
+  // Methods for allocation size and getting/setting the link.
+  inline size_t link_offset() const;              // Byte offset of link field.
+  inline size_t segment_bytes() const;            // Segment size in bytes.
+  inline E**    link_addr(E* seg) const;          // Address of the link field.
+  inline E*     get_link(E* seg) const;           // Extract the link from seg.
+  inline E*     set_link(E* new_seg, E* old_seg); // new_seg.link = old_seg.
+
+  virtual E*    alloc(size_t bytes);
+  virtual void  free(E* addr, size_t bytes);
+
+  void push_segment();
+  void pop_segment();
+
+  void free_segments(E* seg);          // Free all segments in the list.
+  inline void reset(bool reset_cache); // Reset all data fields.
+
+  DEBUG_ONLY(void verify(bool at_empty_transition) const;)
+  DEBUG_ONLY(void zap_segment(E* seg, bool zap_link_field) const;)
+
+private:
+  E* _cur_seg;    // Current segment.
+  E* _cache;      // Segment cache to avoid ping-ponging.
+};
+
+template <class E> class ResourceStack:  public Stack<E>, public ResourceObj
+{
+public:
+  // If this class becomes widely used, it may make sense to save the Thread
+  // and use it when allocating segments.
+  ResourceStack(size_t segment_size = Stack<E>::default_segment_size()):
+    Stack<E>(segment_size, max_uintx)
+    { }
+
+  // Set the segment pointers to NULL so the parent dtor does not free them;
+  // that must be done by the ResourceMark code.
+  ~ResourceStack() { Stack<E>::reset(true); }
+
+protected:
+  virtual E*   alloc(size_t bytes);
+  virtual void free(E* addr, size_t bytes);
+
+private:
+  void clear(bool clear_cache = false);
+};
+
+template <class E>
+class StackIterator: public StackObj
+{
+public:
+  StackIterator(Stack<E>& stack): _stack(stack) { sync(); }
+
+  Stack<E>& stack() const { return _stack; }
+
+  bool is_empty() const { return _cur_seg == NULL; }
+
+  E  next() { return *next_addr(); }
+  E* next_addr();
+
+  void sync(); // Sync the iterator's state to the stack's current state.
+
+private:
+  Stack<E>& _stack;
+  size_t    _cur_seg_size;
+  E*        _cur_seg;
+  size_t    _full_seg_size;
+};
+
+#ifdef __GNUC__
+#undef inline
+#endif // __GNUC__
--- a/hotspot/src/share/vm/utilities/stack.inline.hpp
+++ b/hotspot/src/share/vm/utilities/stack.inline.hpp
@ -0,0 +1,273 @@
+/*
+ * Copyright 2009 Sun Microsystems, Inc.  All Rights Reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ */
+
+StackBase::StackBase(size_t segment_size, size_t max_cache_size,
+                     size_t max_size):
+  _seg_size(segment_size),
+  _max_cache_size(max_cache_size),
+  _max_size(adjust_max_size(max_size, segment_size))
+{
+  assert(_max_size % _seg_size == 0, "not a multiple");
+}
+
+size_t StackBase::adjust_max_size(size_t max_size, size_t seg_size)
+{
+  assert(seg_size > 0, "cannot be 0");
+  assert(max_size >= seg_size || max_size == 0, "max_size too small");
+  const size_t limit = max_uintx - (seg_size - 1);
+  if (max_size == 0 || max_size > limit) {
+    max_size = limit;
+  }
+  return (max_size + seg_size - 1) / seg_size * seg_size;
+}
+
+template <class E>
+Stack<E>::Stack(size_t segment_size, size_t max_cache_size, size_t max_size):
+  StackBase(adjust_segment_size(segment_size), max_cache_size, max_size)
+{
+  reset(true);
+}
+
+template <class E>
+void Stack<E>::push(E item)
+{
+  assert(!is_full(), "pushing onto a full stack");
+  if (_cur_seg_size == _seg_size) {
+    push_segment();
+  }
+  _cur_seg[_cur_seg_size] = item;
+  ++_cur_seg_size;
+}
+
+template <class E>
+E Stack<E>::pop()
+{
+  assert(!is_empty(), "popping from an empty stack");
+  if (_cur_seg_size == 1) {
+    E tmp = _cur_seg[--_cur_seg_size];
+    pop_segment();
+    return tmp;
+  }
+  return _cur_seg[--_cur_seg_size];
+}
+
+template <class E>
+void Stack<E>::clear(bool clear_cache)
+{
+  free_segments(_cur_seg);
+  if (clear_cache) free_segments(_cache);
+  reset(clear_cache);
+}
+
+template <class E>
+size_t Stack<E>::default_segment_size()
+{
+  // Number of elements that fit in 4K bytes minus the size of two pointers
+  // (link field and malloc header).
+  return (4096 - 2 * sizeof(E*)) / sizeof(E);
+}
+
+template <class E>
+size_t Stack<E>::adjust_segment_size(size_t seg_size)
+{
+  const size_t elem_sz = sizeof(E);
+  const size_t ptr_sz = sizeof(E*);
+  assert(elem_sz % ptr_sz == 0 || ptr_sz % elem_sz == 0, "bad element size");
+  if (elem_sz < ptr_sz) {
+    return align_size_up(seg_size * elem_sz, ptr_sz) / elem_sz;
+  }
+  return seg_size;
+}
+
+template <class E>
+size_t Stack<E>::link_offset() const
+{
+  return align_size_up(_seg_size * sizeof(E), sizeof(E*));
+}
+
+template <class E>
+size_t Stack<E>::segment_bytes() const
+{
+  return link_offset() + sizeof(E*);
+}
+
+template <class E>
+E** Stack<E>::link_addr(E* seg) const
+{
+  return (E**) ((char*)seg + link_offset());
+}
+
+template <class E>
+E* Stack<E>::get_link(E* seg) const
+{
+  return *link_addr(seg);
+}
+
+template <class E>
+E* Stack<E>::set_link(E* new_seg, E* old_seg)
+{
+  *link_addr(new_seg) = old_seg;
+  return new_seg;
+}
+
+template <class E>
+E* Stack<E>::alloc(size_t bytes)
+{
+  return (E*) NEW_C_HEAP_ARRAY(char, bytes);
+}
+
+template <class E>
+void Stack<E>::free(E* addr, size_t bytes)
+{
+  FREE_C_HEAP_ARRAY(char, (char*) addr);
+}
+
+template <class E>
+void Stack<E>::push_segment()
+{
+  assert(_cur_seg_size == _seg_size, "current segment is not full");
+  E* next;
+  if (_cache_size > 0) {
+    // Use a cached segment.
+    next = _cache;
+    _cache = get_link(_cache);
+    --_cache_size;
+  } else {
+    next = alloc(segment_bytes());
+    DEBUG_ONLY(zap_segment(next, true);)
+  }
+  const bool at_empty_transition = is_empty();
+  _cur_seg = set_link(next, _cur_seg);
+  _cur_seg_size = 0;
+  _full_seg_size += at_empty_transition ? 0 : _seg_size;
+  DEBUG_ONLY(verify(at_empty_transition);)
+}
+
+template <class E>
+void Stack<E>::pop_segment()
+{
+  assert(_cur_seg_size == 0, "current segment is not empty");
+  E* const prev = get_link(_cur_seg);
+  if (_cache_size < _max_cache_size) {
+    // Add the current segment to the cache.
+    DEBUG_ONLY(zap_segment(_cur_seg, false);)
+    _cache = set_link(_cur_seg, _cache);
+    ++_cache_size;
+  } else {
+    DEBUG_ONLY(zap_segment(_cur_seg, true);)
+    free(_cur_seg, segment_bytes());
+  }
+  const bool at_empty_transition = prev == NULL;
+  _cur_seg = prev;
+  _cur_seg_size = _seg_size;
+  _full_seg_size -= at_empty_transition ? 0 : _seg_size;
+  DEBUG_ONLY(verify(at_empty_transition);)
+}
+
+template <class E>
+void Stack<E>::free_segments(E* seg)
+{
+  const size_t bytes = segment_bytes();
+  while (seg != NULL) {
+    E* const prev = get_link(seg);
+    free(seg, bytes);
+    seg = prev;
+  }
+}
+
+template <class E>
+void Stack<E>::reset(bool reset_cache)
+{
+  _cur_seg_size = _seg_size; // So push() will alloc a new segment.
+  _full_seg_size = 0;
+  _cur_seg = NULL;
+  if (reset_cache) {
+    _cache_size = 0;
+    _cache = NULL;
+  }
+}
+
+#ifdef ASSERT
+template <class E>
+void Stack<E>::verify(bool at_empty_transition) const
+{
+  assert(size() <= max_size(), "stack exceeded bounds");
+  assert(cache_size() <= max_cache_size(), "cache exceeded bounds");
+  assert(_cur_seg_size <= segment_size(), "segment index exceeded bounds");
+
+  assert(_full_seg_size % _seg_size == 0, "not a multiple");
+  assert(at_empty_transition || is_empty() == (size() == 0), "mismatch");
+  assert((_cache == NULL) == (cache_size() == 0), "mismatch");
+
+  if (is_empty()) {
+    assert(_cur_seg_size == segment_size(), "sanity");
+  }
+}
+
+template <class E>
+void Stack<E>::zap_segment(E* seg, bool zap_link_field) const
+{
+  if (!ZapStackSegments) return;
+  const size_t zap_bytes = segment_bytes() - (zap_link_field ? 0 : sizeof(E*));
+  uint32_t* cur = (uint32_t*)seg;
+  const uint32_t* end = cur + zap_bytes / sizeof(uint32_t);
+  while (cur < end) {
+    *cur++ = 0xfadfaded;
+  }
+}
+#endif
+
+template <class E>
+E* ResourceStack<E>::alloc(size_t bytes)
+{
+  return (E*) resource_allocate_bytes(bytes);
+}
+
+template <class E>
+void ResourceStack<E>::free(E* addr, size_t bytes)
+{
+  resource_free_bytes((char*) addr, bytes);
+}
+
+template <class E>
+void StackIterator<E>::sync()
+{
+  _full_seg_size = _stack._full_seg_size;
+  _cur_seg_size = _stack._cur_seg_size;
+  _cur_seg = _stack._cur_seg;
+}
+
+template <class E>
+E* StackIterator<E>::next_addr()
+{
+  assert(!is_empty(), "no items left");
+  if (_cur_seg_size == 1) {
+    E* addr = _cur_seg;
+    _cur_seg = _stack.get_link(_cur_seg);
+    _cur_seg_size = _stack.segment_size();
+    _full_seg_size -= _stack.segment_size();
+    return addr;
+  }
+  return _cur_seg + --_cur_seg_size;
+}
--- a/hotspot/src/share/vm/utilities/taskqueue.cpp
+++ b/hotspot/src/share/vm/utilities/taskqueue.cpp
@ -144,6 +144,7 @@ void ParallelTaskTerminator::sleep(uint millis) {

 bool
 ParallelTaskTerminator::offer_termination(TerminatorTerminator* terminator) {
+  assert(_n_threads > 0, "Initialization is incorrect");
  assert(_offered_termination < _n_threads, "Invariant");
  Atomic::inc(&_offered_termination);

@ -255,3 +256,9 @@ bool ObjArrayTask::is_valid() const {
    _index < objArrayOop(_obj)->length();
 }
 #endif // ASSERT
+
+void ParallelTaskTerminator::reset_for_reuse(int n_threads) {
+  reset_for_reuse();
+  _n_threads = n_threads;
+}
+
--- a/hotspot/src/share/vm/utilities/taskqueue.hpp
+++ b/hotspot/src/share/vm/utilities/taskqueue.hpp
@ -305,6 +305,12 @@ bool GenericTaskQueue<E, N>::push_slow(E t, uint dirty_n_elems) {
  return false;
 }

+// pop_local_slow() is done by the owning thread and is trying to
+// get the last task in the queue.  It will compete with pop_global()
+// that will be used by other threads.  The tag age is incremented
+// whenever the queue goes empty which it will do here if this thread
+// gets the last task or in pop_global() if the queue wraps (top == 0
+// and pop_global() succeeds, see pop_global()).
 template<class E, unsigned int N>
 bool GenericTaskQueue<E, N>::pop_local_slow(uint localBot, Age oldAge) {
  // This queue was observed to contain exactly one element; either this
@ -366,75 +372,47 @@ GenericTaskQueue<E, N>::~GenericTaskQueue() {
 // OverflowTaskQueue is a TaskQueue that also includes an overflow stack for
 // elements that do not fit in the TaskQueue.
 //
-// Three methods from super classes are overridden:
+// This class hides two methods from super classes:
 //
-// initialize() - initialize the super classes and create the overflow stack
 // push() - push onto the task queue or, if that fails, onto the overflow stack
 // is_empty() - return true if both the TaskQueue and overflow stack are empty
 //
-// Note that size() is not overridden--it returns the number of elements in the
+// Note that size() is not hidden--it returns the number of elements in the
 // TaskQueue, and does not include the size of the overflow stack.  This
 // simplifies replacement of GenericTaskQueues with OverflowTaskQueues.
 template<class E, unsigned int N = TASKQUEUE_SIZE>
 class OverflowTaskQueue: public GenericTaskQueue<E, N>
 {
 public:
-  typedef GrowableArray<E>       overflow_t;
+  typedef Stack<E>               overflow_t;
  typedef GenericTaskQueue<E, N> taskqueue_t;

  TASKQUEUE_STATS_ONLY(using taskqueue_t::stats;)

-  OverflowTaskQueue();
-  ~OverflowTaskQueue();
-  void initialize();
-
-  inline overflow_t* overflow_stack() const { return _overflow_stack; }
-
  // Push task t onto the queue or onto the overflow stack.  Return true.
  inline bool push(E t);

  // Attempt to pop from the overflow stack; return true if anything was popped.
  inline bool pop_overflow(E& t);

+  inline overflow_t* overflow_stack() { return &_overflow_stack; }
+
  inline bool taskqueue_empty() const { return taskqueue_t::is_empty(); }
-  inline bool overflow_empty()  const { return overflow_stack()->is_empty(); }
+  inline bool overflow_empty()  const { return _overflow_stack.is_empty(); }
  inline bool is_empty()        const {
    return taskqueue_empty() && overflow_empty();
  }

 private:
-  overflow_t* _overflow_stack;
+  overflow_t _overflow_stack;
 };

-template <class E, unsigned int N>
-OverflowTaskQueue<E, N>::OverflowTaskQueue()
-{
-  _overflow_stack = NULL;
-}
-
-template <class E, unsigned int N>
-OverflowTaskQueue<E, N>::~OverflowTaskQueue()
-{
-  if (_overflow_stack != NULL) {
-    delete _overflow_stack;
-    _overflow_stack = NULL;
-  }
-}
-
-template <class E, unsigned int N>
-void OverflowTaskQueue<E, N>::initialize()
-{
-  taskqueue_t::initialize();
-  assert(_overflow_stack == NULL, "memory leak");
-  _overflow_stack = new (ResourceObj::C_HEAP) GrowableArray<E>(10, true);
-}
-
 template <class E, unsigned int N>
 bool OverflowTaskQueue<E, N>::push(E t)
 {
  if (!taskqueue_t::push(t)) {
    overflow_stack()->push(t);
-    TASKQUEUE_STATS_ONLY(stats.record_overflow(overflow_stack()->length()));
+    TASKQUEUE_STATS_ONLY(stats.record_overflow(overflow_stack()->size()));
  }
  return true;
 }
@ -637,6 +615,9 @@ public:
  // in an MT-safe manner, once the previous round of use of
  // the terminator is finished.
  void reset_for_reuse();
+  // Same as above but the number of parallel threads is set to the
+  // given number.
+  void reset_for_reuse(int n_threads);

 #ifdef TRACESPINNING
  static uint total_yields() { return _total_yields; }
@ -782,3 +763,4 @@ typedef GenericTaskQueueSet<OopStarTaskQueue> OopStarTaskQueueSet;

 typedef OverflowTaskQueue<size_t>             RegionTaskQueue;
 typedef GenericTaskQueueSet<RegionTaskQueue>  RegionTaskQueueSet;
+
--- a/hotspot/src/share/vm/utilities/workgroup.cpp
+++ b/hotspot/src/share/vm/utilities/workgroup.cpp
@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2001, 2007, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2001, 2010, Oracle and/or its affiliates. All rights reserved.
 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
 *
 * This code is free software; you can redistribute it and/or modify it
@ -53,28 +53,52 @@ WorkGang::WorkGang(const char* name,
                   int         workers,
                   bool        are_GC_task_threads,
                   bool        are_ConcurrentGC_threads) :
-  AbstractWorkGang(name, are_GC_task_threads, are_ConcurrentGC_threads)
-{
+  AbstractWorkGang(name, are_GC_task_threads, are_ConcurrentGC_threads) {
  // Save arguments.
  _total_workers = workers;
+}
+
+GangWorker* WorkGang::allocate_worker(int which) {
+  GangWorker* new_worker = new GangWorker(this, which);
+  return new_worker;
+}
+
+// The current implementation will exit if the allocation
+// of any worker fails.  Still, return a boolean so that
+// a future implementation can possibly do a partial
+// initialization of the workers and report such to the
+// caller.
+bool WorkGang::initialize_workers() {

  if (TraceWorkGang) {
-    tty->print_cr("Constructing work gang %s with %d threads", name, workers);
+    tty->print_cr("Constructing work gang %s with %d threads",
+                  name(),
+                  total_workers());
  }
-  _gang_workers = NEW_C_HEAP_ARRAY(GangWorker*, workers);
+  _gang_workers = NEW_C_HEAP_ARRAY(GangWorker*, total_workers());
  if (gang_workers() == NULL) {
    vm_exit_out_of_memory(0, "Cannot create GangWorker array.");
+    return false;
+  }
+  os::ThreadType worker_type;
+  if (are_ConcurrentGC_threads()) {
+    worker_type = os::cgc_thread;
+  } else {
+    worker_type = os::pgc_thread;
  }
  for (int worker = 0; worker < total_workers(); worker += 1) {
-    GangWorker* new_worker = new GangWorker(this, worker);
+    GangWorker* new_worker = allocate_worker(worker);
    assert(new_worker != NULL, "Failed to allocate GangWorker");
    _gang_workers[worker] = new_worker;
-    if (new_worker == NULL || !os::create_thread(new_worker, os::pgc_thread))
+    if (new_worker == NULL || !os::create_thread(new_worker, worker_type)) {
      vm_exit_out_of_memory(0, "Cannot create worker GC thread. Out of system resources.");
+      return false;
+    }
    if (!DisableStartThread) {
      os::start_thread(new_worker);
    }
  }
+  return true;
 }

 AbstractWorkGang::~AbstractWorkGang() {
@ -383,7 +407,7 @@ bool SubTasksDone::valid() {
  return _tasks != NULL;
 }

-void SubTasksDone::set_par_threads(int t) {
+void SubTasksDone::set_n_threads(int t) {
 #ifdef ASSERT
  assert(_claimed == 0 || _threads_completed == _n_threads,
         "should not be called while tasks are being processed!");
--- a/hotspot/src/share/vm/utilities/workgroup.hpp
+++ b/hotspot/src/share/vm/utilities/workgroup.hpp
@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2002, 2009, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2002, 2010, Oracle and/or its affiliates. All rights reserved.
 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
 *
 * This code is free software; you can redistribute it and/or modify it
@ -29,6 +29,7 @@ class GangWorker;
 class YieldingFlexibleGangWorker;
 class YieldingFlexibleGangTask;
 class WorkData;
+class AbstractWorkGang;

 // An abstract task to be worked on by a gang.
 // You subclass this to supply your own work() method
@ -38,6 +39,13 @@ public:
  // The argument tells you which member of the gang you are.
  virtual void work(int i) = 0;

+  // This method configures the task for proper termination.
+  // Some tasks do not have any requirements on termination
+  // and may inherit this method that does nothing.  Some
+  // tasks do some coordination on termination and override
+  // this method to implement that coordination.
+  virtual void set_for_termination(int active_workers) {};
+
  // Debugging accessor for the name.
  const char* name() const PRODUCT_RETURN_(return NULL;);
  int counter() { return _counter; }
@ -64,6 +72,18 @@ protected:
  virtual ~AbstractGangTask() { }
 };

+class AbstractGangTaskWOopQueues : public AbstractGangTask {
+  OopTaskQueueSet*       _queues;
+  ParallelTaskTerminator _terminator;
+ public:
+  AbstractGangTaskWOopQueues(const char* name, OopTaskQueueSet* queues) :
+    AbstractGangTask(name), _queues(queues), _terminator(0, _queues) {}
+  ParallelTaskTerminator* terminator() { return &_terminator; }
+  virtual void set_for_termination(int active_workers) {
+    terminator()->reset_for_reuse(active_workers);
+  }
+  OopTaskQueueSet* queues() { return _queues; }
+};

 // Class AbstractWorkGang:
 // An abstract class representing a gang of workers.
@ -114,6 +134,9 @@ public:
  int total_workers() const {
    return _total_workers;
  }
+  virtual int active_workers() const {
+    return _total_workers;
+  }
  bool terminate() const {
    return _terminate;
  }
@ -199,6 +222,13 @@ public:
           bool are_GC_task_threads, bool are_ConcurrentGC_threads);
  // Run a task, returns when the task is done (or terminated).
  virtual void run_task(AbstractGangTask* task);
+  void run_task(AbstractGangTask* task, uint no_of_parallel_workers);
+  // Allocate a worker and return a pointer to it.
+  virtual GangWorker* allocate_worker(int which);
+  // Initialize workers in the gang.  Return true if initialization
+  // succeeded. The type of the worker can be overridden in a derived
+  // class with the appropriate implementation of allocate_worker().
+  bool initialize_workers();
 };

 // Class GangWorker:
@ -226,6 +256,34 @@ public:
  AbstractWorkGang* gang() const { return _gang; }
 };

+class FlexibleWorkGang: public WorkGang {
+ protected:
+  int _active_workers;
+ public:
+  // Constructor and destructor.
+  FlexibleWorkGang(const char* name, int workers,
+                   bool are_GC_task_threads,
+                   bool  are_ConcurrentGC_threads) :
+    WorkGang(name, workers, are_GC_task_threads, are_ConcurrentGC_threads) {
+    _active_workers = ParallelGCThreads;
+  };
+  // Accessors for fields
+  virtual int active_workers() const { return _active_workers; }
+  void set_active_workers(int v) { _active_workers = v; }
+};
+
+// Work gangs in garbage collectors: 2009-06-10
+//
+// SharedHeap - work gang for stop-the-world parallel collection.
+//   Used by
+//     ParNewGeneration
+//     CMSParRemarkTask
+//     CMSRefProcTaskExecutor
+//     G1CollectedHeap
+//     G1ParFinalCountTask
+// ConcurrentMark
+// CMSCollector
+
 // A class that acts as a synchronisation barrier. Workers enter
 // the barrier and must wait until all other workers have entered
 // before any of them may leave.
@ -271,7 +329,7 @@ class SubTasksDone: public CHeapObj {
  int _n_threads;
  jint _threads_completed;
 #ifdef ASSERT
-  jint _claimed;
+  volatile jint _claimed;
 #endif

  // Set all tasks to unclaimed.
@ -286,9 +344,10 @@ public:
  // True iff the object is in a valid state.
  bool valid();

-  // Set the number of parallel threads doing the tasks to "t".  Can only
+  // Get/set the number of parallel threads doing the tasks to "t".  Can only
  // be called before tasks start or after they are complete.
-  void set_par_threads(int t);
+  int n_threads() { return _n_threads; }
+  void set_n_threads(int t);

  // Returns "false" if the task "t" is unclaimed, and ensures that task is
  // claimed.  The task "t" is required to be within the range of "this".
@ -315,13 +374,17 @@ class SequentialSubTasksDone : public StackObj {
 protected:
  jint _n_tasks;     // Total number of tasks available.
  jint _n_claimed;   // Number of tasks claimed.
+  // _n_threads is used to determine when a sub task is done.
+  // See comments on SubTasksDone::_n_threads
  jint _n_threads;   // Total number of parallel threads.
  jint _n_completed; // Number of completed threads.

  void clear();

 public:
-  SequentialSubTasksDone() { clear(); }
+  SequentialSubTasksDone() {
+    clear();
+  }
  ~SequentialSubTasksDone() {}

  // True iff the object is in a valid state.
@ -330,11 +393,12 @@ public:
  // number of tasks
  jint n_tasks() const { return _n_tasks; }

-  // Set the number of parallel threads doing the tasks to t.
+  // Get/set the number of parallel threads doing the tasks to t.
  // Should be called before the task starts but it is safe
  // to call this once a task is running provided that all
  // threads agree on the number of threads.
-  void set_par_threads(int t) { _n_threads = t; }
+  int n_threads() { return _n_threads; }
+  void set_n_threads(int t) { _n_threads = t; }

  // Set the number of tasks to be claimed to t. As above,
  // should be called before the tasks start but it is safe
--- a/hotspot/src/share/vm/utilities/yieldingWorkgroup.cpp
+++ b/hotspot/src/share/vm/utilities/yieldingWorkgroup.cpp
@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2005, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2005, 2010 Oracle and/or its affiliates. All rights reserved.
 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
 *
 * This code is free software; you can redistribute it and/or modify it
@ -32,29 +32,13 @@ class WorkData;

 YieldingFlexibleWorkGang::YieldingFlexibleWorkGang(
  const char* name, int workers, bool are_GC_task_threads) :
-  AbstractWorkGang(name, are_GC_task_threads, false) {
-  // Save arguments.
-  _total_workers = workers;
-  assert(_total_workers > 0, "Must have more than 1 worker");
+  FlexibleWorkGang(name, workers, are_GC_task_threads, false),
+    _yielded_workers(0) {}

-  _yielded_workers = 0;
-
-  if (TraceWorkGang) {
-    tty->print_cr("Constructing work gang %s with %d threads", name, workers);
-  }
-  _gang_workers = NEW_C_HEAP_ARRAY(GangWorker*, workers);
-  assert(gang_workers() != NULL, "Failed to allocate gang workers");
-  for (int worker = 0; worker < total_workers(); worker += 1) {
-    YieldingFlexibleGangWorker* new_worker =
-      new YieldingFlexibleGangWorker(this, worker);
-    assert(new_worker != NULL, "Failed to allocate YieldingFlexibleGangWorker");
-    _gang_workers[worker] = new_worker;
-    if (new_worker == NULL || !os::create_thread(new_worker, os::pgc_thread))
-      vm_exit_out_of_memory(0, "Cannot create worker GC thread. Out of system resources.");
-    if (!DisableStartThread) {
-      os::start_thread(new_worker);
-    }
-  }
+GangWorker* YieldingFlexibleWorkGang::allocate_worker(int which) {
+  YieldingFlexibleGangWorker* new_member =
+      new YieldingFlexibleGangWorker(this, which);
+  return (YieldingFlexibleGangWorker*) new_member;
 }

 // Run a task; returns when the task is done, or the workers yield,
@ -142,6 +126,7 @@ void YieldingFlexibleWorkGang::start_task(YieldingFlexibleGangTask* new_task) {
    _active_workers = total_workers();
  }
  new_task->set_actual_size(_active_workers);
+  new_task->set_for_termination(_active_workers);

  assert(_started_workers == 0, "Tabula rasa non");
  assert(_finished_workers == 0, "Tabula rasa non");
@ -161,22 +146,22 @@ void YieldingFlexibleWorkGang::wait_for_gang() {
  for (Status status = yielding_task()->status();
       status != COMPLETED && status != YIELDED && status != ABORTED;
       status = yielding_task()->status()) {
-    assert(started_workers() <= active_workers(), "invariant");
-    assert(finished_workers() <= active_workers(), "invariant");
-    assert(yielded_workers() <= active_workers(), "invariant");
+    assert(started_workers() <= total_workers(), "invariant");
+    assert(finished_workers() <= total_workers(), "invariant");
+    assert(yielded_workers() <= total_workers(), "invariant");
    monitor()->wait(Mutex::_no_safepoint_check_flag);
  }
  switch (yielding_task()->status()) {
    case COMPLETED:
    case ABORTED: {
-      assert(finished_workers() == active_workers(), "Inconsistent status");
+      assert(finished_workers() == total_workers(), "Inconsistent status");
      assert(yielded_workers() == 0, "Invariant");
      reset();   // for next task; gang<->task binding released
      break;
    }
    case YIELDED: {
      assert(yielded_workers() > 0, "Invariant");
-      assert(yielded_workers() + finished_workers() == active_workers(),
+      assert(yielded_workers() + finished_workers() == total_workers(),
             "Inconsistent counts");
      break;
    }
@ -208,7 +193,6 @@ void YieldingFlexibleWorkGang::continue_task(
 void YieldingFlexibleWorkGang::reset() {
  _started_workers  = 0;
  _finished_workers = 0;
-  _active_workers   = 0;
  yielding_task()->set_gang(NULL);
  _task = NULL;    // unbind gang from task
 }
@ -216,7 +200,7 @@ void YieldingFlexibleWorkGang::reset() {
 void YieldingFlexibleWorkGang::yield() {
  assert(task() != NULL, "Inconsistency; should have task binding");
  MutexLockerEx ml(monitor(), Mutex::_no_safepoint_check_flag);
-  assert(yielded_workers() < active_workers(), "Consistency check");
+  assert(yielded_workers() < total_workers(), "Consistency check");
  if (yielding_task()->status() == ABORTING) {
    // Do not yield; we need to abort as soon as possible
    // XXX NOTE: This can cause a performance pathology in the
@ -227,7 +211,7 @@ void YieldingFlexibleWorkGang::yield() {
    // us to return at each potential yield point.
    return;
  }
-  if (++_yielded_workers + finished_workers() == active_workers()) {
+  if (++_yielded_workers + finished_workers() == total_workers()) {
    yielding_task()->set_status(YIELDED);
    monitor()->notify_all();
  } else {
--- a/hotspot/src/share/vm/utilities/yieldingWorkgroup.hpp
+++ b/hotspot/src/share/vm/utilities/yieldingWorkgroup.hpp
@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2005, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
 *
 * This code is free software; you can redistribute it and/or modify it
@ -54,6 +54,25 @@ protected: // Override from parent class
  virtual void loop();
 };

+class FlexibleGangTask: public AbstractGangTask {
+  int _actual_size;                      // size of gang obtained
+protected:
+  int _requested_size;                   // size of gang requested
+public:
+ FlexibleGangTask(const char* name): AbstractGangTask(name),
+    _requested_size(0) {}
+
+  // The abstract work method.
+  // The argument tells you which member of the gang you are.
+  virtual void work(int i) = 0;
+
+  int requested_size() const { return _requested_size; }
+  int actual_size()    const { return _actual_size; }
+
+  void set_requested_size(int sz) { _requested_size = sz; }
+  void set_actual_size(int sz)    { _actual_size    = sz; }
+};
+
 // An abstract task to be worked on by a flexible work gang,
 // and where the workers will periodically yield, usually
 // in response to some condition that is signalled by means
@ -70,19 +89,15 @@ protected: // Override from parent class
 // maximum) in response to task requests at certain points.
 // The last part (the flexible part) has not yet been fully
 // fleshed out and is a work in progress.
-class YieldingFlexibleGangTask: public AbstractGangTask {
+class YieldingFlexibleGangTask: public FlexibleGangTask {
  Status _status;
  YieldingFlexibleWorkGang* _gang;
-  int _actual_size;                      // size of gang obtained

 protected:
-  int _requested_size;                   // size of gang requested
-
  // Constructor and desctructor: only construct subclasses.
-  YieldingFlexibleGangTask(const char* name): AbstractGangTask(name),
+  YieldingFlexibleGangTask(const char* name): FlexibleGangTask(name),
    _status(INACTIVE),
-    _gang(NULL),
-    _requested_size(0) { }
+    _gang(NULL) { }

  virtual ~YieldingFlexibleGangTask() { }

@ -122,24 +137,18 @@ public:
  virtual void abort();

  Status status()  const { return _status; }
+  bool yielding()  const { return _status == YIELDING; }
  bool yielded()   const { return _status == YIELDED; }
  bool completed() const { return _status == COMPLETED; }
  bool aborted()   const { return _status == ABORTED; }
  bool active()    const { return _status == ACTIVE; }
-
-  int requested_size() const { return _requested_size; }
-  int actual_size()    const { return _actual_size; }
-
-  void set_requested_size(int sz) { _requested_size = sz; }
-  void set_actual_size(int sz)    { _actual_size    = sz; }
 };
-
 // Class YieldingWorkGang: A subclass of WorkGang.
 // In particular, a YieldingWorkGang is made up of
 // YieldingGangWorkers, and provides infrastructure
 // supporting yielding to the "GangOverseer",
 // being the thread that orchestrates the WorkGang via run_task().
-class YieldingFlexibleWorkGang: public AbstractWorkGang {
+class YieldingFlexibleWorkGang: public FlexibleWorkGang {
  // Here's the public interface to this class.
 public:
  // Constructor and destructor.
@ -151,6 +160,9 @@ public:
           "Incorrect cast");
    return (YieldingFlexibleGangTask*)task();
  }
+  // Allocate a worker and return a pointer to it.
+  GangWorker* allocate_worker(int which);
+
  // Run a task; returns when the task is done, or the workers yield,
  // or the task is aborted, or the work gang is terminated via stop().
  // A task that has been yielded can be continued via this same interface
@ -180,10 +192,6 @@ public:
  void abort();

 private:
-  // The currently active workers in this gang.
-  // This is a number that is dynamically adjusted by
-  // the run_task() method at each subsequent invocation,
-  // using data in the YieldingFlexibleGangTask.
  int _active_workers;
  int _yielded_workers;
  void wait_for_gang();
@ -194,6 +202,7 @@ public:
    return _active_workers;
  }

+  // Accessors for fields
  int yielded_workers() const {
    return _yielded_workers;
  }