From 5fb46fd5a88e446fe62c4119c02a2dc6477318a4 Mon Sep 17 00:00:00 2001
From: Kevin Walls <kevinw@openjdk.org>
Date: Fri, 30 Jul 2010 22:43:50 +0100
Subject: [PATCH 1/9] 6581734: CMS Old Gen's collection usage is zero after GC
 which is incorrect

Management code enabled for use by a concurrent collector.

Reviewed-by: mchung, ysr
---
 .../concurrentMarkSweepGeneration.cpp         |  61 +++++++
 .../concurrentMarkSweepGeneration.hpp         |   9 ++
 .../includeDB_gc_concurrentMarkSweep          |   2 +
 hotspot/src/share/vm/services/management.cpp  |   9 +-
 .../src/share/vm/services/memoryManager.cpp   | 153 ++++++++++++------
 .../src/share/vm/services/memoryManager.hpp   |  18 ++-
 .../src/share/vm/services/memoryService.cpp   |  76 +++++++--
 .../src/share/vm/services/memoryService.hpp   |  37 ++++-
 hotspot/test/gc/6581734/Test6581734.java      | 149 +++++++++++++++++
 9 files changed, 435 insertions(+), 79 deletions(-)
 create mode 100644 hotspot/test/gc/6581734/Test6581734.java

diff --git a/hotspot/src/share/vm/gc_implementation/concurrentMarkSweep/concurrentMarkSweepGeneration.cpp b/hotspot/src/share/vm/gc_implementation/concurrentMarkSweep/concurrentMarkSweepGeneration.cpp
index 0aa08ec3018..a0bace5bf0d 100644
--- a/hotspot/src/share/vm/gc_implementation/concurrentMarkSweep/concurrentMarkSweepGeneration.cpp
+++ b/hotspot/src/share/vm/gc_implementation/concurrentMarkSweep/concurrentMarkSweepGeneration.cpp
@@ -1970,6 +1970,9 @@ void CMSCollector::do_compaction_work(bool clear_all_soft_refs) {
                                             _intra_sweep_estimate.padded_average());
   }
 
+  {
+    TraceCMSMemoryManagerStats();
+  }
   GenMarkSweep::invoke_at_safepoint(_cmsGen->level(),
     ref_processor(), clear_all_soft_refs);
   #ifdef ASSERT
@@ -3420,6 +3423,7 @@ CMSPhaseAccounting::~CMSPhaseAccounting() {
 void CMSCollector::checkpointRootsInitial(bool asynch) {
   assert(_collectorState == InitialMarking, "Wrong collector state");
   check_correct_thread_executing();
+  TraceCMSMemoryManagerStats tms(_collectorState);
   ReferenceProcessor* rp = ref_processor();
   SpecializationStats::clear();
   assert(_restart_addr == NULL, "Control point invariant");
@@ -4753,6 +4757,7 @@ void CMSCollector::checkpointRootsFinal(bool asynch,
   // world is stopped at this checkpoint
   assert(SafepointSynchronize::is_at_safepoint(),
          "world should be stopped");
+  TraceCMSMemoryManagerStats tms(_collectorState);
   verify_work_stacks_empty();
   verify_overflow_empty();
 
@@ -5854,6 +5859,8 @@ void CMSCollector::sweep(bool asynch) {
   verify_work_stacks_empty();
   verify_overflow_empty();
   increment_sweep_count();
+  TraceCMSMemoryManagerStats tms(_collectorState);
+
   _inter_sweep_timer.stop();
   _inter_sweep_estimate.sample(_inter_sweep_timer.seconds());
   size_policy()->avg_cms_free_at_sweep()->sample(_cmsGen->free());
@@ -9126,3 +9133,57 @@ size_t MarkDeadObjectsClosure::do_blk(HeapWord* addr) {
   }
   return res;
 }
+
+TraceCMSMemoryManagerStats::TraceCMSMemoryManagerStats(CMSCollector::CollectorState phase): TraceMemoryManagerStats() {
+
+  switch (phase) {
+    case CMSCollector::InitialMarking:
+      initialize(true  /* fullGC */ ,
+                 true  /* recordGCBeginTime */,
+                 true  /* recordPreGCUsage */,
+                 false /* recordPeakUsage */,
+                 false /* recordPostGCusage */,
+                 true  /* recordAccumulatedGCTime */,
+                 false /* recordGCEndTime */,
+                 false /* countCollection */  );
+      break;
+
+    case CMSCollector::FinalMarking:
+      initialize(true  /* fullGC */ ,
+                 false /* recordGCBeginTime */,
+                 false /* recordPreGCUsage */,
+                 false /* recordPeakUsage */,
+                 false /* recordPostGCusage */,
+                 true  /* recordAccumulatedGCTime */,
+                 false /* recordGCEndTime */,
+                 false /* countCollection */  );
+      break;
+
+    case CMSCollector::Sweeping:
+      initialize(true  /* fullGC */ ,
+                 false /* recordGCBeginTime */,
+                 false /* recordPreGCUsage */,
+                 true  /* recordPeakUsage */,
+                 true  /* recordPostGCusage */,
+                 false /* recordAccumulatedGCTime */,
+                 true  /* recordGCEndTime */,
+                 true  /* countCollection */  );
+      break;
+
+    default:
+      ShouldNotReachHere();
+  }
+}
+
+// when bailing out of cms in concurrent mode failure
+TraceCMSMemoryManagerStats::TraceCMSMemoryManagerStats(): TraceMemoryManagerStats() {
+  initialize(true /* fullGC */ ,
+             true /* recordGCBeginTime */,
+             true /* recordPreGCUsage */,
+             true /* recordPeakUsage */,
+             true /* recordPostGCusage */,
+             true /* recordAccumulatedGCTime */,
+             true /* recordGCEndTime */,
+             true /* countCollection */ );
+}
+
diff --git a/hotspot/src/share/vm/gc_implementation/concurrentMarkSweep/concurrentMarkSweepGeneration.hpp b/hotspot/src/share/vm/gc_implementation/concurrentMarkSweep/concurrentMarkSweepGeneration.hpp
index 451538472fd..16ed981bbe8 100644
--- a/hotspot/src/share/vm/gc_implementation/concurrentMarkSweep/concurrentMarkSweepGeneration.hpp
+++ b/hotspot/src/share/vm/gc_implementation/concurrentMarkSweep/concurrentMarkSweepGeneration.hpp
@@ -507,6 +507,7 @@ class CMSCollector: public CHeapObj {
   friend class VM_CMS_Operation;
   friend class VM_CMS_Initial_Mark;
   friend class VM_CMS_Final_Remark;
+  friend class TraceCMSMemoryManagerStats;
 
  private:
   jlong _time_of_last_gc;
@@ -1858,3 +1859,11 @@ public:
     _dead_bit_map(dead_bit_map) {}
   size_t do_blk(HeapWord* addr);
 };
+
+class TraceCMSMemoryManagerStats : public TraceMemoryManagerStats {
+
+ public:
+  TraceCMSMemoryManagerStats(CMSCollector::CollectorState phase);
+  TraceCMSMemoryManagerStats();
+};
+
diff --git a/hotspot/src/share/vm/gc_implementation/includeDB_gc_concurrentMarkSweep b/hotspot/src/share/vm/gc_implementation/includeDB_gc_concurrentMarkSweep
index 7cda699d3ce..99c5b3ae271 100644
--- a/hotspot/src/share/vm/gc_implementation/includeDB_gc_concurrentMarkSweep
+++ b/hotspot/src/share/vm/gc_implementation/includeDB_gc_concurrentMarkSweep
@@ -149,6 +149,7 @@ concurrentMarkSweepGeneration.cpp       isGCActiveMark.hpp
 concurrentMarkSweepGeneration.cpp       iterator.hpp
 concurrentMarkSweepGeneration.cpp       java.hpp
 concurrentMarkSweepGeneration.cpp       jvmtiExport.hpp
+concurrentMarkSweepGeneration.cpp       memoryService.hpp
 concurrentMarkSweepGeneration.cpp       oop.inline.hpp
 concurrentMarkSweepGeneration.cpp       parNewGeneration.hpp
 concurrentMarkSweepGeneration.cpp       referencePolicy.hpp
@@ -165,6 +166,7 @@ concurrentMarkSweepGeneration.hpp       gSpaceCounters.hpp
 concurrentMarkSweepGeneration.hpp       gcStats.hpp
 concurrentMarkSweepGeneration.hpp       generation.hpp
 concurrentMarkSweepGeneration.hpp       generationCounters.hpp
+concurrentMarkSweepGeneration.hpp       memoryService.hpp
 concurrentMarkSweepGeneration.hpp       mutexLocker.hpp
 concurrentMarkSweepGeneration.hpp       taskqueue.hpp
 concurrentMarkSweepGeneration.hpp       virtualspace.hpp
diff --git a/hotspot/src/share/vm/services/management.cpp b/hotspot/src/share/vm/services/management.cpp
index dd6c3bc9b0e..c91a1acfc96 100644
--- a/hotspot/src/share/vm/services/management.cpp
+++ b/hotspot/src/share/vm/services/management.cpp
@@ -1900,16 +1900,15 @@ JVM_ENTRY(void, jmm_GetLastGCStat(JNIEnv *env, jobject obj, jmmGCStat *gc_stat))
 
   // Get the GCMemoryManager
   GCMemoryManager* mgr = get_gc_memory_manager_from_jobject(obj, CHECK);
-  if (mgr->last_gc_stat() == NULL) {
-    gc_stat->gc_index = 0;
-    return;
-  }
 
   // Make a copy of the last GC statistics
   // GC may occur while constructing the last GC information
   int num_pools = MemoryService::num_memory_pools();
   GCStatInfo* stat = new GCStatInfo(num_pools);
-  stat->copy_stat(mgr->last_gc_stat());
+  if (mgr->get_last_gc_stat(stat) == 0) {
+    gc_stat->gc_index = 0;
+    return;
+  }
 
   gc_stat->gc_index = stat->gc_index();
   gc_stat->start_time = Management::ticks_to_ms(stat->start_time());
diff --git a/hotspot/src/share/vm/services/memoryManager.cpp b/hotspot/src/share/vm/services/memoryManager.cpp
index 0266b331dcd..4921ae1f569 100644
--- a/hotspot/src/share/vm/services/memoryManager.cpp
+++ b/hotspot/src/share/vm/services/memoryManager.cpp
@@ -166,17 +166,6 @@ GCStatInfo::~GCStatInfo() {
   FREE_C_HEAP_ARRAY(MemoryUsage*, _after_gc_usage_array);
 }
 
-void GCStatInfo::copy_stat(GCStatInfo* stat) {
-  set_index(stat->gc_index());
-  set_start_time(stat->start_time());
-  set_end_time(stat->end_time());
-  assert(_usage_array_size == stat->usage_array_size(), "Must have same array size");
-  for (int i = 0; i < _usage_array_size; i++) {
-    set_before_gc_usage(i, stat->before_gc_usage_for_pool(i));
-    set_after_gc_usage(i, stat->after_gc_usage_for_pool(i));
-  }
-}
-
 void GCStatInfo::set_gc_usage(int pool_index, MemoryUsage usage, bool before_gc) {
   MemoryUsage* gc_usage_array;
   if (before_gc) {
@@ -187,67 +176,129 @@ void GCStatInfo::set_gc_usage(int pool_index, MemoryUsage usage, bool before_gc)
   gc_usage_array[pool_index] = usage;
 }
 
+void GCStatInfo::clear() {
+  _index = 0;
+  _start_time = 0L;
+  _end_time = 0L;
+  size_t len = _usage_array_size * sizeof(MemoryUsage);
+  memset(_before_gc_usage_array, 0, len);
+  memset(_after_gc_usage_array, 0, len);
+}
+
+
 GCMemoryManager::GCMemoryManager() : MemoryManager() {
   _num_collections = 0;
   _last_gc_stat = NULL;
+  _last_gc_lock = new Mutex(Mutex::leaf, "_last_gc_lock", true);
+  _current_gc_stat = NULL;
   _num_gc_threads = 1;
 }
 
 GCMemoryManager::~GCMemoryManager() {
   delete _last_gc_stat;
+  delete _last_gc_lock;
+  delete _current_gc_stat;
 }
 
 void GCMemoryManager::initialize_gc_stat_info() {
   assert(MemoryService::num_memory_pools() > 0, "should have one or more memory pools");
   _last_gc_stat = new GCStatInfo(MemoryService::num_memory_pools());
+  _current_gc_stat = new GCStatInfo(MemoryService::num_memory_pools());
+  // tracking concurrent collections we need two objects: one to update, and one to
+  // hold the publicly available "last (completed) gc" information.
 }
 
-void GCMemoryManager::gc_begin() {
-  assert(_last_gc_stat != NULL, "Just checking");
-  _accumulated_timer.start();
-  _num_collections++;
-  _last_gc_stat->set_index(_num_collections);
-  _last_gc_stat->set_start_time(Management::timestamp());
+void GCMemoryManager::gc_begin(bool recordGCBeginTime, bool recordPreGCUsage,
+                               bool recordAccumulatedGCTime) {
+  assert(_last_gc_stat != NULL && _current_gc_stat != NULL, "Just checking");
+  if (recordAccumulatedGCTime) {
+    _accumulated_timer.start();
+  }
+  // _num_collections now increases in gc_end, to count completed collections
+  if (recordGCBeginTime) {
+    _current_gc_stat->set_index(_num_collections+1);
+    _current_gc_stat->set_start_time(Management::timestamp());
+  }
 
-  // Keep memory usage of all memory pools
-  for (int i = 0; i < MemoryService::num_memory_pools(); i++) {
-    MemoryPool* pool = MemoryService::get_memory_pool(i);
-    MemoryUsage usage = pool->get_memory_usage();
-    _last_gc_stat->set_before_gc_usage(i, usage);
-    HS_DTRACE_PROBE8(hotspot, mem__pool__gc__begin,
-      name(), strlen(name()),
-      pool->name(), strlen(pool->name()),
-      usage.init_size(), usage.used(),
-      usage.committed(), usage.max_size());
+  if (recordPreGCUsage) {
+    // Keep memory usage of all memory pools
+    for (int i = 0; i < MemoryService::num_memory_pools(); i++) {
+      MemoryPool* pool = MemoryService::get_memory_pool(i);
+      MemoryUsage usage = pool->get_memory_usage();
+      _current_gc_stat->set_before_gc_usage(i, usage);
+      HS_DTRACE_PROBE8(hotspot, mem__pool__gc__begin,
+        name(), strlen(name()),
+        pool->name(), strlen(pool->name()),
+        usage.init_size(), usage.used(),
+        usage.committed(), usage.max_size());
+    }
   }
 }
 
-void GCMemoryManager::gc_end() {
-  _accumulated_timer.stop();
-  _last_gc_stat->set_end_time(Management::timestamp());
-
-  int i;
-  // keep the last gc statistics for all memory pools
-  for (i = 0; i < MemoryService::num_memory_pools(); i++) {
-    MemoryPool* pool = MemoryService::get_memory_pool(i);
-    MemoryUsage usage = pool->get_memory_usage();
-
-    HS_DTRACE_PROBE8(hotspot, mem__pool__gc__end,
-      name(), strlen(name()),
-      pool->name(), strlen(pool->name()),
-      usage.init_size(), usage.used(),
-      usage.committed(), usage.max_size());
-
-    _last_gc_stat->set_after_gc_usage(i, usage);
+// A collector MUST, even if it does not complete for some reason,
+// make a TraceMemoryManagerStats object where countCollection is true,
+// to ensure the current gc stat is placed in _last_gc_stat.
+void GCMemoryManager::gc_end(bool recordPostGCUsage,
+                             bool recordAccumulatedGCTime,
+                             bool recordGCEndTime, bool countCollection) {
+  if (recordAccumulatedGCTime) {
+    _accumulated_timer.stop();
+  }
+  if (recordGCEndTime) {
+    _current_gc_stat->set_end_time(Management::timestamp());
   }
 
-  // Set last collection usage of the memory pools managed by this collector
-  for (i = 0; i < num_memory_pools(); i++) {
-    MemoryPool* pool = get_memory_pool(i);
-    MemoryUsage usage = pool->get_memory_usage();
+  if (recordPostGCUsage) {
+    int i;
+    // keep the last gc statistics for all memory pools
+    for (i = 0; i < MemoryService::num_memory_pools(); i++) {
+      MemoryPool* pool = MemoryService::get_memory_pool(i);
+      MemoryUsage usage = pool->get_memory_usage();
 
-    // Compare with GC usage threshold
-    pool->set_last_collection_usage(usage);
-    LowMemoryDetector::detect_after_gc_memory(pool);
+      HS_DTRACE_PROBE8(hotspot, mem__pool__gc__end,
+        name(), strlen(name()),
+        pool->name(), strlen(pool->name()),
+        usage.init_size(), usage.used(),
+        usage.committed(), usage.max_size());
+
+      _current_gc_stat->set_after_gc_usage(i, usage);
+    }
+
+    // Set last collection usage of the memory pools managed by this collector
+    for (i = 0; i < num_memory_pools(); i++) {
+      MemoryPool* pool = get_memory_pool(i);
+      MemoryUsage usage = pool->get_memory_usage();
+
+      // Compare with GC usage threshold
+      pool->set_last_collection_usage(usage);
+      LowMemoryDetector::detect_after_gc_memory(pool);
+    }
+  }
+  if (countCollection) {
+    _num_collections++;
+    // alternately update two objects making one public when complete
+    {
+      MutexLockerEx ml(_last_gc_lock, Mutex::_no_safepoint_check_flag);
+      GCStatInfo *tmp = _last_gc_stat;
+      _last_gc_stat = _current_gc_stat;
+      _current_gc_stat = tmp;
+      // reset the current stat for diagnosability purposes
+      _current_gc_stat->clear();
+    }
   }
 }
+
+size_t GCMemoryManager::get_last_gc_stat(GCStatInfo* dest) {
+  MutexLockerEx ml(_last_gc_lock, Mutex::_no_safepoint_check_flag);
+  if (_last_gc_stat->gc_index() != 0) {
+    dest->set_index(_last_gc_stat->gc_index());
+    dest->set_start_time(_last_gc_stat->start_time());
+    dest->set_end_time(_last_gc_stat->end_time());
+    assert(dest->usage_array_size() == _last_gc_stat->usage_array_size(),
+           "Must have same array size");
+    size_t len = dest->usage_array_size() * sizeof(MemoryUsage);
+    memcpy(dest->before_gc_usage_array(), _last_gc_stat->before_gc_usage_array(), len);
+    memcpy(dest->after_gc_usage_array(), _last_gc_stat->after_gc_usage_array(), len);
+  }
+  return _last_gc_stat->gc_index();
+}
diff --git a/hotspot/src/share/vm/services/memoryManager.hpp b/hotspot/src/share/vm/services/memoryManager.hpp
index 70b7eef717b..7399141a83a 100644
--- a/hotspot/src/share/vm/services/memoryManager.hpp
+++ b/hotspot/src/share/vm/services/memoryManager.hpp
@@ -131,6 +131,9 @@ public:
     return _after_gc_usage_array[pool_index];
   }
 
+  MemoryUsage* before_gc_usage_array() { return _before_gc_usage_array; }
+  MemoryUsage* after_gc_usage_array()  { return _after_gc_usage_array; }
+
   void set_index(size_t index)    { _index = index; }
   void set_start_time(jlong time) { _start_time = time; }
   void set_end_time(jlong time)   { _end_time = time; }
@@ -143,7 +146,7 @@ public:
     set_gc_usage(pool_index, usage, false /* after gc */);
   }
 
-  void copy_stat(GCStatInfo* stat);
+  void clear();
 };
 
 class GCMemoryManager : public MemoryManager {
@@ -153,6 +156,8 @@ private:
   elapsedTimer _accumulated_timer;
   elapsedTimer _gc_timer;         // for measuring every GC duration
   GCStatInfo*  _last_gc_stat;
+  Mutex*       _last_gc_lock;
+  GCStatInfo*  _current_gc_stat;
   int          _num_gc_threads;
 public:
   GCMemoryManager();
@@ -166,11 +171,16 @@ public:
   int    num_gc_threads()               { return _num_gc_threads; }
   void   set_num_gc_threads(int count)  { _num_gc_threads = count; }
 
-  void   gc_begin();
-  void   gc_end();
+  void   gc_begin(bool recordGCBeginTime, bool recordPreGCUsage,
+                  bool recordAccumulatedGCTime);
+  void   gc_end(bool recordPostGCUsage, bool recordAccumulatedGCTime,
+                bool recordGCEndTime, bool countCollection);
 
   void        reset_gc_stat()   { _num_collections = 0; _accumulated_timer.reset(); }
-  GCStatInfo* last_gc_stat()    { return _last_gc_stat; }
+
+  // Copy out _last_gc_stat to the given destination, returning
+  // the collection count. Zero signifies no gc has taken place.
+  size_t get_last_gc_stat(GCStatInfo* dest);
 
   virtual MemoryManager::Name kind() = 0;
 };
diff --git a/hotspot/src/share/vm/services/memoryService.cpp b/hotspot/src/share/vm/services/memoryService.cpp
index 3c55fffadae..c50a4fefe68 100644
--- a/hotspot/src/share/vm/services/memoryService.cpp
+++ b/hotspot/src/share/vm/services/memoryService.cpp
@@ -509,7 +509,10 @@ void MemoryService::track_memory_pool_usage(MemoryPool* pool) {
   }
 }
 
-void MemoryService::gc_begin(bool fullGC) {
+void MemoryService::gc_begin(bool fullGC, bool recordGCBeginTime,
+                             bool recordAccumulatedGCTime,
+                             bool recordPreGCUsage, bool recordPeakUsage) {
+
   GCMemoryManager* mgr;
   if (fullGC) {
     mgr = _major_gc_manager;
@@ -517,16 +520,21 @@ void MemoryService::gc_begin(bool fullGC) {
     mgr = _minor_gc_manager;
   }
   assert(mgr->is_gc_memory_manager(), "Sanity check");
-  mgr->gc_begin();
+  mgr->gc_begin(recordGCBeginTime, recordPreGCUsage, recordAccumulatedGCTime);
 
   // Track the peak memory usage when GC begins
-  for (int i = 0; i < _pools_list->length(); i++) {
-    MemoryPool* pool = _pools_list->at(i);
-    pool->record_peak_memory_usage();
+  if (recordPeakUsage) {
+    for (int i = 0; i < _pools_list->length(); i++) {
+      MemoryPool* pool = _pools_list->at(i);
+      pool->record_peak_memory_usage();
+    }
   }
 }
 
-void MemoryService::gc_end(bool fullGC) {
+void MemoryService::gc_end(bool fullGC, bool recordPostGCUsage,
+                           bool recordAccumulatedGCTime,
+                           bool recordGCEndTime, bool countCollection) {
+
   GCMemoryManager* mgr;
   if (fullGC) {
     mgr = (GCMemoryManager*) _major_gc_manager;
@@ -536,7 +544,8 @@ void MemoryService::gc_end(bool fullGC) {
   assert(mgr->is_gc_memory_manager(), "Sanity check");
 
   // register the GC end statistics and memory usage
-  mgr->gc_end();
+  mgr->gc_end(recordPostGCUsage, recordAccumulatedGCTime, recordGCEndTime,
+              countCollection);
 }
 
 void MemoryService::oops_do(OopClosure* f) {
@@ -585,12 +594,12 @@ Handle MemoryService::create_MemoryUsage_obj(MemoryUsage usage, TRAPS) {
   return obj;
 }
 //
-// GC manager type depends on the type of Generation. Depending the space
-// availablity and vm option the gc uses major gc manager or minor gc
+// GC manager type depends on the type of Generation. Depending on the space
+// availablity and vm options the gc uses major gc manager or minor gc
 // manager or both. The type of gc manager depends on the generation kind.
-// For DefNew, ParNew and ASParNew generation doing scavange gc uses minor
-// gc manager (so _fullGC is set to false ) and for other generation kind
-// DOing mark-sweep-compact uses major gc manager (so _fullGC is set
+// For DefNew, ParNew and ASParNew generation doing scavenge gc uses minor
+// gc manager (so _fullGC is set to false ) and for other generation kinds
+// doing mark-sweep-compact uses major gc manager (so _fullGC is set
 // to true).
 TraceMemoryManagerStats::TraceMemoryManagerStats(Generation::Name kind) {
   switch (kind) {
@@ -611,13 +620,48 @@ TraceMemoryManagerStats::TraceMemoryManagerStats(Generation::Name kind) {
     default:
       assert(false, "Unrecognized gc generation kind.");
   }
-  MemoryService::gc_begin(_fullGC);
+  // this has to be called in a stop the world pause and represent
+  // an entire gc pause, start to finish:
+  initialize(_fullGC, true, true, true, true, true, true, true);
 }
-TraceMemoryManagerStats::TraceMemoryManagerStats(bool fullGC) {
+TraceMemoryManagerStats::TraceMemoryManagerStats(bool fullGC,
+                                                 bool recordGCBeginTime,
+                                                 bool recordPreGCUsage,
+                                                 bool recordPeakUsage,
+                                                 bool recordPostGCUsage,
+                                                 bool recordAccumulatedGCTime,
+                                                 bool recordGCEndTime,
+                                                 bool countCollection) {
+  initialize(fullGC, recordGCBeginTime, recordPreGCUsage, recordPeakUsage,
+             recordPostGCUsage, recordAccumulatedGCTime, recordGCEndTime,
+             countCollection);
+}
+
+// for a subclass to create then initialize an instance before invoking
+// the MemoryService
+void TraceMemoryManagerStats::initialize(bool fullGC,
+                                         bool recordGCBeginTime,
+                                         bool recordPreGCUsage,
+                                         bool recordPeakUsage,
+                                         bool recordPostGCUsage,
+                                         bool recordAccumulatedGCTime,
+                                         bool recordGCEndTime,
+                                         bool countCollection) {
   _fullGC = fullGC;
-  MemoryService::gc_begin(_fullGC);
+  _recordGCBeginTime = recordGCBeginTime;
+  _recordPreGCUsage = recordPreGCUsage;
+  _recordPeakUsage = recordPeakUsage;
+  _recordPostGCUsage = recordPostGCUsage;
+  _recordAccumulatedGCTime = recordAccumulatedGCTime;
+  _recordGCEndTime = recordGCEndTime;
+  _countCollection = countCollection;
+
+  MemoryService::gc_begin(_fullGC, _recordGCBeginTime, _recordAccumulatedGCTime,
+                          _recordPreGCUsage, _recordPeakUsage);
 }
 
 TraceMemoryManagerStats::~TraceMemoryManagerStats() {
-  MemoryService::gc_end(_fullGC);
+  MemoryService::gc_end(_fullGC, _recordPostGCUsage, _recordAccumulatedGCTime,
+                        _recordGCEndTime, _countCollection);
 }
+
diff --git a/hotspot/src/share/vm/services/memoryService.hpp b/hotspot/src/share/vm/services/memoryService.hpp
index a21f3d1dc7f..cf26f1987bb 100644
--- a/hotspot/src/share/vm/services/memoryService.hpp
+++ b/hotspot/src/share/vm/services/memoryService.hpp
@@ -149,8 +149,13 @@ public:
   }
   static void track_memory_pool_usage(MemoryPool* pool);
 
-  static void gc_begin(bool fullGC);
-  static void gc_end(bool fullGC);
+  static void gc_begin(bool fullGC, bool recordGCBeginTime,
+                       bool recordAccumulatedGCTime,
+                       bool recordPreGCUsage, bool recordPeakUsage);
+  static void gc_end(bool fullGC, bool recordPostGCUsage,
+                     bool recordAccumulatedGCTime,
+                     bool recordGCEndTime, bool countCollection);
+
 
   static void oops_do(OopClosure* f);
 
@@ -164,8 +169,34 @@ public:
 class TraceMemoryManagerStats : public StackObj {
 private:
   bool         _fullGC;
+  bool         _recordGCBeginTime;
+  bool         _recordPreGCUsage;
+  bool         _recordPeakUsage;
+  bool         _recordPostGCUsage;
+  bool         _recordAccumulatedGCTime;
+  bool         _recordGCEndTime;
+  bool         _countCollection;
+
 public:
-  TraceMemoryManagerStats(bool fullGC);
+  TraceMemoryManagerStats() {}
+  TraceMemoryManagerStats(bool fullGC,
+                          bool recordGCBeginTime = true,
+                          bool recordPreGCUsage = true,
+                          bool recordPeakUsage = true,
+                          bool recordPostGCUsage = true,
+                          bool recordAccumulatedGCTime = true,
+                          bool recordGCEndTime = true,
+                          bool countCollection = true);
+
+  void initialize(bool fullGC,
+                  bool recordGCBeginTime,
+                  bool recordPreGCUsage,
+                  bool recordPeakUsage,
+                  bool recordPostGCUsage,
+                  bool recordAccumulatedGCTime,
+                  bool recordGCEndTime,
+                  bool countCollection);
+
   TraceMemoryManagerStats(Generation::Name kind);
   ~TraceMemoryManagerStats();
 };
diff --git a/hotspot/test/gc/6581734/Test6581734.java b/hotspot/test/gc/6581734/Test6581734.java
new file mode 100644
index 00000000000..1967f00880c
--- /dev/null
+++ b/hotspot/test/gc/6581734/Test6581734.java
@@ -0,0 +1,149 @@
+/*
+ * Copyright (c) 2010, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ */
+
+/*
+ * @test Test6581734.java
+ * @bug 6581734
+ * @summary CMS Old Gen's collection usage is zero after GC which is incorrect
+ * @run main/othervm -Xmx512m -verbose:gc -XX:+UseConcMarkSweepGC Test6581734
+ *
+ */
+import java.util.*;
+import java.lang.management.*;
+
+// 6581734 states that memory pool usage via the mbean is wrong
+// for CMS (zero, even after a collection).
+//
+// 6580448 states that the collection count similarly is wrong
+// (stays at zero for CMS collections)
+// -- closed as dup of 6581734 as the same fix resolves both.
+
+
+public class Test6581734 {
+
+    private String poolName = "CMS";
+    private String collectorName = "ConcurrentMarkSweep";
+
+    public static void main(String [] args) {
+
+        Test6581734 t = null;
+        if (args.length==2) {
+            t = new Test6581734(args[0], args[1]);
+        } else {
+            System.out.println("Defaulting to monitor CMS pool and collector.");
+            t = new Test6581734();
+        }
+        t.run();
+    }
+
+    public Test6581734(String pool, String collector) {
+        poolName = pool;
+        collectorName = collector;
+    }
+
+    public Test6581734() {
+    }
+
+    public void run() {
+        // Use some memory, enough that we expect collections should
+        // have happened.
+        // Must run with options to ensure no stop the world full GC,
+        // but e.g. at least one CMS cycle.
+        allocationWork(300*1024*1024);
+        System.out.println("Done allocationWork");
+
+        // Verify some non-zero results are stored.
+        List<MemoryPoolMXBean> pools = ManagementFactory.getMemoryPoolMXBeans();
+        int poolsFound = 0;
+        int poolsWithStats = 0;
+        for (int i=0; i<pools.size(); i++) {
+            MemoryPoolMXBean pool = pools.get(i);
+            String name = pool.getName();
+            System.out.println("found pool: " + name);
+
+            if (name.contains(poolName)) {
+                long usage = pool.getCollectionUsage().getUsed();
+                System.out.println(name + ": usage after GC = " + usage);
+                poolsFound++;
+                if (usage > 0) {
+                    poolsWithStats++;
+                }
+            }
+        }
+        if (poolsFound == 0) {
+            throw new RuntimeException("No matching memory pools found: test with -XX:+UseConcMarkSweepGC");
+        }
+
+        List<GarbageCollectorMXBean> collectors = ManagementFactory.getGarbageCollectorMXBeans();
+        int collectorsFound = 0;
+        int collectorsWithTime= 0;
+        for (int i=0; i<collectors.size(); i++) {
+            GarbageCollectorMXBean collector = collectors.get(i);
+            String name = collector.getName();
+            System.out.println("found collector: " + name);
+            if (name.contains(collectorName)) {
+                collectorsFound++;
+                System.out.println(name + ": collection count = "
+                                   + collector.getCollectionCount());
+                System.out.println(name + ": collection time  = "
+                                   + collector.getCollectionTime());
+                if (collector.getCollectionCount() <= 0) {
+                    throw new RuntimeException("collection count <= 0");
+                }
+                if (collector.getCollectionTime() > 0) {
+                    collectorsWithTime++;
+                }
+            }
+        }
+        // verify:
+        if (poolsWithStats < poolsFound) {
+            throw new RuntimeException("pools found with zero stats");
+        }
+
+        if (collectorsWithTime<collectorsFound) {
+            throw new RuntimeException("collectors found with zero time";
+        }
+        System.out.println("Test passed.");
+    }
+
+    public void allocationWork(long target) {
+
+        long sizeAllocated = 0;
+        List list = new LinkedList();
+        long delay = 50;
+        long count = 0;
+
+        while (sizeAllocated < target) {
+            int size = 1024*1024;
+            byte [] alloc = new byte[size];
+            if (count % 2 == 0) {
+                list.add(alloc);
+                sizeAllocated+=size;
+                System.out.print(".");
+            }
+            try { Thread.sleep(delay); } catch (InterruptedException ie) { }
+            count++;
+        }
+    }
+
+}

From 3a2b2b4fd881bd6de2486c92f2f4e707c58ea85d Mon Sep 17 00:00:00 2001
From: John Cuthbertson <johnc@openjdk.org>
Date: Mon, 2 Aug 2010 12:51:43 -0700
Subject: [PATCH 2/9] 6814437: G1: remove the _new_refs array

The per-worker _new_refs array is used to hold references that point into the collection set. It is populated during RSet updating and subsequently processed. In the event of an evacuation failure it processed again to recreate the RSets of regions in the collection set. Remove the per-worker _new_refs array by processing the references directly. Use a DirtyCardQueue to hold the cards containing the references so that the RSets of regions in the collection set can be recreated when handling an evacuation failure.

Reviewed-by: iveresov, jmasa, tonyp
---
 .../g1/concurrentG1Refine.cpp                 |  18 +-
 .../g1/concurrentG1Refine.hpp                 |   4 +-
 .../gc_implementation/g1/dirtyCardQueue.cpp   |  32 +-
 .../gc_implementation/g1/dirtyCardQueue.hpp   |  21 +-
 .../gc_implementation/g1/g1CollectedHeap.cpp  |  49 ++-
 .../gc_implementation/g1/g1CollectedHeap.hpp  |  17 +-
 .../g1/g1CollectorPolicy.cpp                  |   2 -
 .../g1/g1CollectorPolicy.hpp                  |  11 -
 .../g1/g1OopClosures.inline.hpp               |   8 +-
 .../vm/gc_implementation/g1/g1RemSet.cpp      | 336 +++++++++++++-----
 .../vm/gc_implementation/g1/g1RemSet.hpp      |  53 ++-
 .../gc_implementation/g1/g1RemSet.inline.hpp  |  31 +-
 .../vm/gc_implementation/g1/heapRegion.cpp    |   2 +
 .../vm/gc_implementation/includeDB_gc_g1      |   3 +-
 14 files changed, 407 insertions(+), 180 deletions(-)

diff --git a/hotspot/src/share/vm/gc_implementation/g1/concurrentG1Refine.cpp b/hotspot/src/share/vm/gc_implementation/g1/concurrentG1Refine.cpp
index 78d3d5cb2cd..e144aa7cc86 100644
--- a/hotspot/src/share/vm/gc_implementation/g1/concurrentG1Refine.cpp
+++ b/hotspot/src/share/vm/gc_implementation/g1/concurrentG1Refine.cpp
@@ -339,7 +339,9 @@ jbyte* ConcurrentG1Refine::cache_insert(jbyte* card_ptr, bool* defer) {
   return res;
 }
 
-void ConcurrentG1Refine::clean_up_cache(int worker_i, G1RemSet* g1rs) {
+void ConcurrentG1Refine::clean_up_cache(int worker_i,
+                                        G1RemSet* g1rs,
+                                        DirtyCardQueue* into_cset_dcq) {
   assert(!use_cache(), "cache should be disabled");
   int start_idx;
 
@@ -353,7 +355,19 @@ void ConcurrentG1Refine::clean_up_cache(int worker_i, G1RemSet* g1rs) {
       for (int i = start_idx; i < end_idx; i++) {
         jbyte* entry = _hot_cache[i];
         if (entry != NULL) {
-          g1rs->concurrentRefineOneCard(entry, worker_i);
+          if (g1rs->concurrentRefineOneCard(entry, worker_i, true)) {
+            // 'entry' contains references that point into the current
+            // collection set. We need to record 'entry' in the DCQS
+            // that's used for that purpose.
+            //
+            // The only time we care about recording cards that contain
+            // references that point into the collection set is during
+            // RSet updating while within an evacuation pause.
+            // In this case worker_i should be the id of a GC worker thread
+            assert(SafepointSynchronize::is_at_safepoint(), "not during an evacuation pause");
+            assert(worker_i < (int) DirtyCardQueueSet::num_par_ids(), "incorrect worker id");
+            into_cset_dcq->enqueue(entry);
+          }
         }
       }
     }
diff --git a/hotspot/src/share/vm/gc_implementation/g1/concurrentG1Refine.hpp b/hotspot/src/share/vm/gc_implementation/g1/concurrentG1Refine.hpp
index 85a50f7c7d5..f5c3653f888 100644
--- a/hotspot/src/share/vm/gc_implementation/g1/concurrentG1Refine.hpp
+++ b/hotspot/src/share/vm/gc_implementation/g1/concurrentG1Refine.hpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2001, 2009, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2001, 2010, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -184,7 +184,7 @@ class ConcurrentG1Refine: public CHeapObj {
   jbyte* cache_insert(jbyte* card_ptr, bool* defer);
 
   // Process the cached entries.
-  void clean_up_cache(int worker_i, G1RemSet* g1rs);
+  void clean_up_cache(int worker_i, G1RemSet* g1rs, DirtyCardQueue* into_cset_dcq);
 
   // Set up for parallel processing of the cards in the hot cache
   void clear_hot_cache_claimed_index() {
diff --git a/hotspot/src/share/vm/gc_implementation/g1/dirtyCardQueue.cpp b/hotspot/src/share/vm/gc_implementation/g1/dirtyCardQueue.cpp
index 760f3630129..c8100c4770e 100644
--- a/hotspot/src/share/vm/gc_implementation/g1/dirtyCardQueue.cpp
+++ b/hotspot/src/share/vm/gc_implementation/g1/dirtyCardQueue.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2001, 2009, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2001, 2010, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -178,13 +178,14 @@ DirtyCardQueueSet::get_completed_buffer(int stop_at) {
 }
 
 bool DirtyCardQueueSet::
-apply_closure_to_completed_buffer_helper(int worker_i,
+apply_closure_to_completed_buffer_helper(CardTableEntryClosure* cl,
+                                         int worker_i,
                                          BufferNode* nd) {
   if (nd != NULL) {
     void **buf = BufferNode::make_buffer_from_node(nd);
     size_t index = nd->index();
     bool b =
-      DirtyCardQueue::apply_closure_to_buffer(_closure, buf,
+      DirtyCardQueue::apply_closure_to_buffer(cl, buf,
                                               index, _sz,
                                               true, worker_i);
     if (b) {
@@ -199,17 +200,24 @@ apply_closure_to_completed_buffer_helper(int worker_i,
   }
 }
 
-bool DirtyCardQueueSet::apply_closure_to_completed_buffer(int worker_i,
+bool DirtyCardQueueSet::apply_closure_to_completed_buffer(CardTableEntryClosure* cl,
+                                                          int worker_i,
                                                           int stop_at,
-                                                          bool during_pause)
-{
+                                                          bool during_pause) {
   assert(!during_pause || stop_at == 0, "Should not leave any completed buffers during a pause");
   BufferNode* nd = get_completed_buffer(stop_at);
-  bool res = apply_closure_to_completed_buffer_helper(worker_i, nd);
+  bool res = apply_closure_to_completed_buffer_helper(cl, worker_i, nd);
   if (res) Atomic::inc(&_processed_buffers_rs_thread);
   return res;
 }
 
+bool DirtyCardQueueSet::apply_closure_to_completed_buffer(int worker_i,
+                                                          int stop_at,
+                                                          bool during_pause) {
+  return apply_closure_to_completed_buffer(_closure, worker_i,
+                                           stop_at, during_pause);
+}
+
 void DirtyCardQueueSet::apply_closure_to_all_completed_buffers() {
   BufferNode* nd = _completed_buffers_head;
   while (nd != NULL) {
@@ -222,8 +230,8 @@ void DirtyCardQueueSet::apply_closure_to_all_completed_buffers() {
   }
 }
 
-void DirtyCardQueueSet::abandon_logs() {
-  assert(SafepointSynchronize::is_at_safepoint(), "Must be at safepoint.");
+// Deallocates any completed log buffers
+void DirtyCardQueueSet::clear() {
   BufferNode* buffers_to_delete = NULL;
   {
     MutexLockerEx x(_cbl_mon, Mutex::_no_safepoint_check_flag);
@@ -242,6 +250,12 @@ void DirtyCardQueueSet::abandon_logs() {
     buffers_to_delete = nd->next();
     deallocate_buffer(BufferNode::make_buffer_from_node(nd));
   }
+
+}
+
+void DirtyCardQueueSet::abandon_logs() {
+  assert(SafepointSynchronize::is_at_safepoint(), "Must be at safepoint.");
+  clear();
   // Since abandon is done only at safepoints, we can safely manipulate
   // these queues.
   for (JavaThread* t = Threads::first(); t; t = t->next()) {
diff --git a/hotspot/src/share/vm/gc_implementation/g1/dirtyCardQueue.hpp b/hotspot/src/share/vm/gc_implementation/g1/dirtyCardQueue.hpp
index e1b68981648..524c0c25681 100644
--- a/hotspot/src/share/vm/gc_implementation/g1/dirtyCardQueue.hpp
+++ b/hotspot/src/share/vm/gc_implementation/g1/dirtyCardQueue.hpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2001, 2009, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2001, 2010, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -123,7 +123,21 @@ public:
                                          int stop_at = 0,
                                          bool during_pause = false);
 
-  bool apply_closure_to_completed_buffer_helper(int worker_i,
+  // If there exists some completed buffer, pop it, then apply the
+  // specified closure to all its elements, nulling out those elements
+  // processed.  If all elements are processed, returns "true".  If no
+  // completed buffers exist, returns false.  If a completed buffer exists,
+  // but is only partially completed before a "yield" happens, the
+  // partially completed buffer (with its processed elements set to NULL)
+  // is returned to the completed buffer set, and this call returns false.
+  bool apply_closure_to_completed_buffer(CardTableEntryClosure* cl,
+                                         int worker_i = 0,
+                                         int stop_at = 0,
+                                         bool during_pause = false);
+
+  // Helper routine for the above.
+  bool apply_closure_to_completed_buffer_helper(CardTableEntryClosure* cl,
+                                                int worker_i,
                                                 BufferNode* nd);
 
   BufferNode* get_completed_buffer(int stop_at);
@@ -136,6 +150,9 @@ public:
     return &_shared_dirty_card_queue;
   }
 
+  // Deallocate any completed log buffers
+  void clear();
+
   // If a full collection is happening, reset partial logs, and ignore
   // completed ones: the full collection will make them all irrelevant.
   void abandon_logs();
diff --git a/hotspot/src/share/vm/gc_implementation/g1/g1CollectedHeap.cpp b/hotspot/src/share/vm/gc_implementation/g1/g1CollectedHeap.cpp
index 658ac777f49..6d798a54889 100644
--- a/hotspot/src/share/vm/gc_implementation/g1/g1CollectedHeap.cpp
+++ b/hotspot/src/share/vm/gc_implementation/g1/g1CollectedHeap.cpp
@@ -56,7 +56,12 @@ public:
     _sts(sts), _g1rs(g1rs), _cg1r(cg1r), _concurrent(true)
   {}
   bool do_card_ptr(jbyte* card_ptr, int worker_i) {
-    _g1rs->concurrentRefineOneCard(card_ptr, worker_i);
+    bool oops_into_cset = _g1rs->concurrentRefineOneCard(card_ptr, worker_i, false);
+    // This path is executed by the concurrent refine or mutator threads,
+    // concurrently, and so we do not care if card_ptr contains references
+    // that point into the collection set.
+    assert(!oops_into_cset, "should be");
+
     if (_concurrent && _sts->should_yield()) {
       // Caller will actually yield.
       return false;
@@ -1322,6 +1327,7 @@ G1CollectedHeap::G1CollectedHeap(G1CollectorPolicy* policy_) :
   SharedHeap(policy_),
   _g1_policy(policy_),
   _dirty_card_queue_set(false),
+  _into_cset_dirty_card_queue_set(false),
   _ref_processor(NULL),
   _process_strong_tasks(new SubTasksDone(G1H_PS_NumElements)),
   _bot_shared(NULL),
@@ -1572,6 +1578,16 @@ jint G1CollectedHeap::initialize() {
                                       Shared_DirtyCardQ_lock,
                                       &JavaThread::dirty_card_queue_set());
   }
+
+  // Initialize the card queue set used to hold cards containing
+  // references into the collection set.
+  _into_cset_dirty_card_queue_set.initialize(DirtyCardQ_CBL_mon,
+                                             DirtyCardQ_FL_lock,
+                                             -1, // never trigger processing
+                                             -1, // no limit on length
+                                             Shared_DirtyCardQ_lock,
+                                             &JavaThread::dirty_card_queue_set());
+
   // In case we're keeping closure specialization stats, initialize those
   // counts and that mechanism.
   SpecializationStats::clear();
@@ -1603,14 +1619,16 @@ size_t G1CollectedHeap::capacity() const {
   return _g1_committed.byte_size();
 }
 
-void G1CollectedHeap::iterate_dirty_card_closure(bool concurrent,
+void G1CollectedHeap::iterate_dirty_card_closure(CardTableEntryClosure* cl,
+                                                 DirtyCardQueue* into_cset_dcq,
+                                                 bool concurrent,
                                                  int worker_i) {
   // Clean cards in the hot card cache
-  concurrent_g1_refine()->clean_up_cache(worker_i, g1_rem_set());
+  concurrent_g1_refine()->clean_up_cache(worker_i, g1_rem_set(), into_cset_dcq);
 
   DirtyCardQueueSet& dcqs = JavaThread::dirty_card_queue_set();
   int n_completed_buffers = 0;
-  while (dcqs.apply_closure_to_completed_buffer(worker_i, 0, true)) {
+  while (dcqs.apply_closure_to_completed_buffer(cl, worker_i, 0, true)) {
     n_completed_buffers++;
   }
   g1_policy()->record_update_rs_processed_buffers(worker_i,
@@ -3346,25 +3364,6 @@ public:
   }
 };
 
-class UpdateRSetImmediate : public OopsInHeapRegionClosure {
-private:
-  G1CollectedHeap* _g1;
-  G1RemSet* _g1_rem_set;
-public:
-  UpdateRSetImmediate(G1CollectedHeap* g1) :
-    _g1(g1), _g1_rem_set(g1->g1_rem_set()) {}
-
-  virtual void do_oop(narrowOop* p) { do_oop_work(p); }
-  virtual void do_oop(      oop* p) { do_oop_work(p); }
-  template <class T> void do_oop_work(T* p) {
-    assert(_from->is_in_reserved(p), "paranoia");
-    T heap_oop = oopDesc::load_heap_oop(p);
-    if (!oopDesc::is_null(heap_oop) && !_from->is_survivor()) {
-      _g1_rem_set->par_write_ref(_from, p, 0);
-    }
-  }
-};
-
 class UpdateRSetDeferred : public OopsInHeapRegionClosure {
 private:
   G1CollectedHeap* _g1;
@@ -3389,8 +3388,6 @@ public:
   }
 };
 
-
-
 class RemoveSelfPointerClosure: public ObjectClosure {
 private:
   G1CollectedHeap* _g1;
@@ -3453,7 +3450,7 @@ public:
 };
 
 void G1CollectedHeap::remove_self_forwarding_pointers() {
-  UpdateRSetImmediate immediate_update(_g1h);
+  UpdateRSetImmediate immediate_update(_g1h->g1_rem_set());
   DirtyCardQueue dcq(&_g1h->dirty_card_queue_set());
   UpdateRSetDeferred deferred_update(_g1h, &dcq);
   OopsInHeapRegionClosure *cl;
diff --git a/hotspot/src/share/vm/gc_implementation/g1/g1CollectedHeap.hpp b/hotspot/src/share/vm/gc_implementation/g1/g1CollectedHeap.hpp
index 74606c18bdf..6daaf614f44 100644
--- a/hotspot/src/share/vm/gc_implementation/g1/g1CollectedHeap.hpp
+++ b/hotspot/src/share/vm/gc_implementation/g1/g1CollectedHeap.hpp
@@ -505,6 +505,12 @@ protected:
   // A function to check the consistency of dirty card logs.
   void check_ct_logs_at_safepoint();
 
+  // A DirtyCardQueueSet that is used to hold cards that contain
+  // references into the current collection set. This is used to
+  // update the remembered sets of the regions in the collection
+  // set in the event of an evacuation failure.
+  DirtyCardQueueSet _into_cset_dirty_card_queue_set;
+
   // After a collection pause, make the regions in the CS into free
   // regions.
   void free_collection_set(HeapRegion* cs_head);
@@ -661,6 +667,13 @@ public:
   // A set of cards where updates happened during the GC
   DirtyCardQueueSet& dirty_card_queue_set() { return _dirty_card_queue_set; }
 
+  // A DirtyCardQueueSet that is used to hold cards that contain
+  // references into the current collection set. This is used to
+  // update the remembered sets of the regions in the collection
+  // set in the event of an evacuation failure.
+  DirtyCardQueueSet& into_cset_dirty_card_queue_set()
+        { return _into_cset_dirty_card_queue_set; }
+
   // Create a G1CollectedHeap with the specified policy.
   // Must call the initialize method afterwards.
   // May not return if something goes wrong.
@@ -715,7 +728,9 @@ public:
     OrderAccess::fence();
   }
 
-  void iterate_dirty_card_closure(bool concurrent, int worker_i);
+  void iterate_dirty_card_closure(CardTableEntryClosure* cl,
+                                  DirtyCardQueue* into_cset_dcq,
+                                  bool concurrent, int worker_i);
 
   // The shared block offset table array.
   G1BlockOffsetSharedArray* bot_shared() const { return _bot_shared; }
diff --git a/hotspot/src/share/vm/gc_implementation/g1/g1CollectorPolicy.cpp b/hotspot/src/share/vm/gc_implementation/g1/g1CollectorPolicy.cpp
index 0cb73eb0c6b..2feaae7df3a 100644
--- a/hotspot/src/share/vm/gc_implementation/g1/g1CollectorPolicy.cpp
+++ b/hotspot/src/share/vm/gc_implementation/g1/g1CollectorPolicy.cpp
@@ -238,7 +238,6 @@ G1CollectorPolicy::G1CollectorPolicy() :
   _par_last_update_rs_processed_buffers = new double[_parallel_gc_threads];
 
   _par_last_scan_rs_times_ms = new double[_parallel_gc_threads];
-  _par_last_scan_new_refs_times_ms = new double[_parallel_gc_threads];
 
   _par_last_obj_copy_times_ms = new double[_parallel_gc_threads];
 
@@ -842,7 +841,6 @@ void G1CollectorPolicy::record_collection_pause_start(double start_time_sec,
     _par_last_update_rs_times_ms[i] = -1234.0;
     _par_last_update_rs_processed_buffers[i] = -1234.0;
     _par_last_scan_rs_times_ms[i] = -1234.0;
-    _par_last_scan_new_refs_times_ms[i] = -1234.0;
     _par_last_obj_copy_times_ms[i] = -1234.0;
     _par_last_termination_times_ms[i] = -1234.0;
     _par_last_termination_attempts[i] = -1234.0;
diff --git a/hotspot/src/share/vm/gc_implementation/g1/g1CollectorPolicy.hpp b/hotspot/src/share/vm/gc_implementation/g1/g1CollectorPolicy.hpp
index 61dbee6c09e..84a8491142c 100644
--- a/hotspot/src/share/vm/gc_implementation/g1/g1CollectorPolicy.hpp
+++ b/hotspot/src/share/vm/gc_implementation/g1/g1CollectorPolicy.hpp
@@ -63,8 +63,6 @@ class MainBodySummary: public CHeapObj {
     define_num_seq(mark_stack_scan)
     define_num_seq(update_rs)
     define_num_seq(scan_rs)
-    define_num_seq(scan_new_refs) // Only for temp use; added to
-                                  // in parallel case.
     define_num_seq(obj_copy)
     define_num_seq(termination) // parallel only
     define_num_seq(parallel_other) // parallel only
@@ -177,7 +175,6 @@ protected:
   double* _par_last_update_rs_times_ms;
   double* _par_last_update_rs_processed_buffers;
   double* _par_last_scan_rs_times_ms;
-  double* _par_last_scan_new_refs_times_ms;
   double* _par_last_obj_copy_times_ms;
   double* _par_last_termination_times_ms;
   double* _par_last_termination_attempts;
@@ -933,14 +930,6 @@ public:
     _par_last_scan_rs_times_ms[thread] = ms;
   }
 
-  void record_scan_new_refs_time(int thread, double ms) {
-    _par_last_scan_new_refs_times_ms[thread] = ms;
-  }
-
-  double get_scan_new_refs_time(int thread) {
-    return _par_last_scan_new_refs_times_ms[thread];
-  }
-
   void reset_obj_copy_time(int thread) {
     _par_last_obj_copy_times_ms[thread] = 0.0;
   }
diff --git a/hotspot/src/share/vm/gc_implementation/g1/g1OopClosures.inline.hpp b/hotspot/src/share/vm/gc_implementation/g1/g1OopClosures.inline.hpp
index 1037b83bd48..28ec22b04f3 100644
--- a/hotspot/src/share/vm/gc_implementation/g1/g1OopClosures.inline.hpp
+++ b/hotspot/src/share/vm/gc_implementation/g1/g1OopClosures.inline.hpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2001, 2007, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2001, 2010, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -37,7 +37,8 @@ template <class T> inline void FilterIntoCSClosure::do_oop_nv(T* p) {
       _g1->obj_in_cs(oopDesc::decode_heap_oop_not_null(heap_oop))) {
     _oc->do_oop(p);
 #if FILTERINTOCSCLOSURE_DOHISTOGRAMCOUNT
-    _dcto_cl->incr_count();
+    if (_dcto_cl != NULL)
+      _dcto_cl->incr_count();
 #endif
   }
 }
@@ -113,7 +114,10 @@ template <class T> inline void G1ParPushHeapRSClosure::do_oop_nv(T* p) {
     if (_g1->in_cset_fast_test(obj)) {
       Prefetch::write(obj->mark_addr(), 0);
       Prefetch::read(obj->mark_addr(), (HeapWordSize*2));
+
+      // Place on the references queue
       _par_scan_state->push_on_queue(p);
     }
   }
 }
+
diff --git a/hotspot/src/share/vm/gc_implementation/g1/g1RemSet.cpp b/hotspot/src/share/vm/gc_implementation/g1/g1RemSet.cpp
index a3a77ed186b..9c081fce5af 100644
--- a/hotspot/src/share/vm/gc_implementation/g1/g1RemSet.cpp
+++ b/hotspot/src/share/vm/gc_implementation/g1/g1RemSet.cpp
@@ -122,23 +122,24 @@ public:
 HRInto_G1RemSet::HRInto_G1RemSet(G1CollectedHeap* g1, CardTableModRefBS* ct_bs)
   : G1RemSet(g1), _ct_bs(ct_bs), _g1p(_g1->g1_policy()),
     _cg1r(g1->concurrent_g1_refine()),
-    _par_traversal_in_progress(false), _new_refs(NULL),
+    _par_traversal_in_progress(false),
+    _cset_rs_update_cl(NULL),
     _cards_scanned(NULL), _total_cards_scanned(0)
 {
   _seq_task = new SubTasksDone(NumSeqTasks);
   guarantee(n_workers() > 0, "There should be some workers");
-  _new_refs = NEW_C_HEAP_ARRAY(GrowableArray<OopOrNarrowOopStar>*, n_workers());
+  _cset_rs_update_cl = NEW_C_HEAP_ARRAY(OopsInHeapRegionClosure*, n_workers());
   for (uint i = 0; i < n_workers(); i++) {
-    _new_refs[i] = new (ResourceObj::C_HEAP) GrowableArray<OopOrNarrowOopStar>(8192,true);
+    _cset_rs_update_cl[i] = NULL;
   }
 }
 
 HRInto_G1RemSet::~HRInto_G1RemSet() {
   delete _seq_task;
   for (uint i = 0; i < n_workers(); i++) {
-    delete _new_refs[i];
+    assert(_cset_rs_update_cl[i] == NULL, "it should be");
   }
-  FREE_C_HEAP_ARRAY(GrowableArray<OopOrNarrowOopStar>*, _new_refs);
+  FREE_C_HEAP_ARRAY(OopsInHeapRegionClosure*, _cset_rs_update_cl);
 }
 
 void CountNonCleanMemRegionClosure::do_MemRegion(MemRegion mr) {
@@ -306,12 +307,45 @@ void HRInto_G1RemSet::scanRS(OopsInHeapRegionClosure* oc, int worker_i) {
   _g1p->record_scan_rs_time(worker_i, scan_rs_time_sec * 1000.0);
 }
 
-void HRInto_G1RemSet::updateRS(int worker_i) {
-  ConcurrentG1Refine* cg1r = _g1->concurrent_g1_refine();
+// Closure used for updating RSets and recording references that
+// point into the collection set. Only called during an
+// evacuation pause.
 
+class RefineRecordRefsIntoCSCardTableEntryClosure: public CardTableEntryClosure {
+  G1RemSet* _g1rs;
+  DirtyCardQueue* _into_cset_dcq;
+public:
+  RefineRecordRefsIntoCSCardTableEntryClosure(G1CollectedHeap* g1h,
+                                              DirtyCardQueue* into_cset_dcq) :
+    _g1rs(g1h->g1_rem_set()), _into_cset_dcq(into_cset_dcq)
+  {}
+  bool do_card_ptr(jbyte* card_ptr, int worker_i) {
+    // The only time we care about recording cards that
+    // contain references that point into the collection set
+    // is during RSet updating within an evacuation pause.
+    // In this case worker_i should be the id of a GC worker thread.
+    assert(SafepointSynchronize::is_at_safepoint(), "not during an evacuation pause");
+    assert(worker_i < (int) DirtyCardQueueSet::num_par_ids(), "should be a GC worker");
+
+    if (_g1rs->concurrentRefineOneCard(card_ptr, worker_i, true)) {
+      // 'card_ptr' contains references that point into the collection
+      // set. We need to record the card in the DCQS
+      // (G1CollectedHeap::into_cset_dirty_card_queue_set())
+      // that's used for that purpose.
+      //
+      // Enqueue the card
+      _into_cset_dcq->enqueue(card_ptr);
+    }
+    return true;
+  }
+};
+
+void HRInto_G1RemSet::updateRS(DirtyCardQueue* into_cset_dcq, int worker_i) {
   double start = os::elapsedTime();
-  // Apply the appropriate closure to all remaining log entries.
-  _g1->iterate_dirty_card_closure(false, worker_i);
+  // Apply the given closure to all remaining log entries.
+  RefineRecordRefsIntoCSCardTableEntryClosure into_cset_update_rs_cl(_g1, into_cset_dcq);
+  _g1->iterate_dirty_card_closure(&into_cset_update_rs_cl, into_cset_dcq, false, worker_i);
+
   // Now there should be no dirty cards.
   if (G1RSLogCheckCardTable) {
     CountNonCleanMemRegionClosure cl(_g1);
@@ -405,33 +439,6 @@ public:
   }
 };
 
-template <class T> void
-HRInto_G1RemSet::scanNewRefsRS_work(OopsInHeapRegionClosure* oc,
-                                    int worker_i) {
-  double scan_new_refs_start_sec = os::elapsedTime();
-  G1CollectedHeap* g1h = G1CollectedHeap::heap();
-  CardTableModRefBS* ct_bs = (CardTableModRefBS*) (g1h->barrier_set());
-  for (int i = 0; i < _new_refs[worker_i]->length(); i++) {
-    T* p = (T*) _new_refs[worker_i]->at(i);
-    oop obj = oopDesc::load_decode_heap_oop(p);
-    // *p was in the collection set when p was pushed on "_new_refs", but
-    // another thread may have processed this location from an RS, so it
-    // might not point into the CS any longer.  If so, it's obviously been
-    // processed, and we don't need to do anything further.
-    if (g1h->obj_in_cs(obj)) {
-      HeapRegion* r = g1h->heap_region_containing(p);
-
-      DEBUG_ONLY(HeapRegion* to = g1h->heap_region_containing(obj));
-      oc->set_region(r);
-      // If "p" has already been processed concurrently, this is
-      // idempotent.
-      oc->do_oop(p);
-    }
-  }
-  double scan_new_refs_time_ms = (os::elapsedTime() - scan_new_refs_start_sec) * 1000.0;
-  _g1p->record_scan_new_refs_time(worker_i, scan_new_refs_time_ms);
-}
-
 void HRInto_G1RemSet::cleanupHRRS() {
   HeapRegionRemSet::cleanup();
 }
@@ -457,6 +464,26 @@ HRInto_G1RemSet::oops_into_collection_set_do(OopsInHeapRegionClosure* oc,
     count_cl.print_histo();
   }
 
+  // We cache the value of 'oc' closure into the appropriate slot in the
+  // _cset_rs_update_cl for this worker
+  assert(worker_i < (int)n_workers(), "sanity");
+  _cset_rs_update_cl[worker_i] = oc;
+
+  // A DirtyCardQueue that is used to hold cards containing references
+  // that point into the collection set. This DCQ is associated with a
+  // special DirtyCardQueueSet (see g1CollectedHeap.hpp).  Under normal
+  // circumstances (i.e. the pause successfully completes), these cards
+  // are just discarded (there's no need to update the RSets of regions
+  // that were in the collection set - after the pause these regions
+  // are wholly 'free' of live objects. In the event of an evacuation
+  // failure the cards/buffers in this queue set are:
+  // * passed to the DirtyCardQueueSet that is used to manage deferred
+  //   RSet updates, or
+  // * scanned for references that point into the collection set
+  //   and the RSet of the corresponding region in the collection set
+  //   is updated immediately.
+  DirtyCardQueue into_cset_dcq(&_g1->into_cset_dirty_card_queue_set());
+
   if (ParallelGCThreads > 0) {
     // The two flags below were introduced temporarily to serialize
     // the updating and scanning of remembered sets. There are some
@@ -465,12 +492,10 @@ HRInto_G1RemSet::oops_into_collection_set_do(OopsInHeapRegionClosure* oc,
     // conditions, we'll revert back to parallel remembered set
     // updating and scanning. See CRs 6677707 and 6677708.
     if (G1UseParallelRSetUpdating || (worker_i == 0)) {
-      updateRS(worker_i);
-      scanNewRefsRS(oc, worker_i);
+      updateRS(&into_cset_dcq, worker_i);
     } else {
       _g1p->record_update_rs_processed_buffers(worker_i, 0.0);
       _g1p->record_update_rs_time(worker_i, 0.0);
-      _g1p->record_scan_new_refs_time(worker_i, 0.0);
     }
     if (G1UseParallelRSetScanning || (worker_i == 0)) {
       scanRS(oc, worker_i);
@@ -479,10 +504,12 @@ HRInto_G1RemSet::oops_into_collection_set_do(OopsInHeapRegionClosure* oc,
     }
   } else {
     assert(worker_i == 0, "invariant");
-    updateRS(0);
-    scanNewRefsRS(oc, 0);
+    updateRS(&into_cset_dcq, 0);
     scanRS(oc, 0);
   }
+
+  // We now clear the cached values of _cset_rs_update_cl for this worker
+  _cset_rs_update_cl[worker_i] = NULL;
 }
 
 void HRInto_G1RemSet::
@@ -519,49 +546,65 @@ class cleanUpIteratorsClosure : public HeapRegionClosure {
   }
 };
 
-class UpdateRSetOopsIntoCSImmediate : public OopClosure {
-  G1CollectedHeap* _g1;
-public:
-  UpdateRSetOopsIntoCSImmediate(G1CollectedHeap* g1) : _g1(g1) { }
-  virtual void do_oop(narrowOop* p) { do_oop_work(p); }
-  virtual void do_oop(      oop* p) { do_oop_work(p); }
-  template <class T> void do_oop_work(T* p) {
-    HeapRegion* to = _g1->heap_region_containing(oopDesc::load_decode_heap_oop(p));
-    if (to->in_collection_set()) {
-      to->rem_set()->add_reference(p, 0);
-    }
-  }
-};
-
-class UpdateRSetOopsIntoCSDeferred : public OopClosure {
+// This closure, applied to a DirtyCardQueueSet, is used to immediately
+// update the RSets for the regions in the CSet. For each card it iterates
+// through the oops which coincide with that card. It scans the reference
+// fields in each oop; when it finds an oop that points into the collection
+// set, the RSet for the region containing the referenced object is updated.
+// Note: _par_traversal_in_progress in the G1RemSet must be FALSE; otherwise
+// the UpdateRSetImmediate closure will cause cards to be enqueued on to
+// the DCQS that we're iterating over, causing an infinite loop.
+class UpdateRSetCardTableEntryIntoCSetClosure: public CardTableEntryClosure {
   G1CollectedHeap* _g1;
   CardTableModRefBS* _ct_bs;
-  DirtyCardQueue* _dcq;
 public:
-  UpdateRSetOopsIntoCSDeferred(G1CollectedHeap* g1, DirtyCardQueue* dcq) :
-    _g1(g1), _ct_bs((CardTableModRefBS*)_g1->barrier_set()), _dcq(dcq) { }
-  virtual void do_oop(narrowOop* p) { do_oop_work(p); }
-  virtual void do_oop(      oop* p) { do_oop_work(p); }
-  template <class T> void do_oop_work(T* p) {
-    oop obj = oopDesc::load_decode_heap_oop(p);
-    if (_g1->obj_in_cs(obj)) {
-      size_t card_index = _ct_bs->index_for(p);
-      if (_ct_bs->mark_card_deferred(card_index)) {
-        _dcq->enqueue((jbyte*)_ct_bs->byte_for_index(card_index));
-      }
-    }
+  UpdateRSetCardTableEntryIntoCSetClosure(G1CollectedHeap* g1,
+                                          CardTableModRefBS* bs):
+    _g1(g1), _ct_bs(bs)
+  { }
+
+  bool do_card_ptr(jbyte* card_ptr, int worker_i) {
+    // Construct the region representing the card.
+    HeapWord* start = _ct_bs->addr_for(card_ptr);
+    // And find the region containing it.
+    HeapRegion* r = _g1->heap_region_containing(start);
+    assert(r != NULL, "unexpected null");
+
+    // Scan oops in the card looking for references into the collection set
+    HeapWord* end   = _ct_bs->addr_for(card_ptr + 1);
+    MemRegion scanRegion(start, end);
+
+    UpdateRSetImmediate update_rs_cl(_g1->g1_rem_set());
+    FilterIntoCSClosure update_rs_cset_oop_cl(NULL, _g1, &update_rs_cl);
+    FilterOutOfRegionClosure filter_then_update_rs_cset_oop_cl(r, &update_rs_cset_oop_cl);
+
+    // We can pass false as the "filter_young" parameter here as:
+    // * we should be in a STW pause,
+    // * the DCQS to which this closure is applied is used to hold
+    //   references that point into the collection set from the prior
+    //   RSet updating,
+    // * the post-write barrier shouldn't be logging updates to young
+    //   regions (but there is a situation where this can happen - see
+    //   the comment in HRInto_G1RemSet::concurrentRefineOneCard below -
+    //   that should not be applicable here), and
+    // * during actual RSet updating, the filtering of cards in young
+    //   regions in HeapRegion::oops_on_card_seq_iterate_careful is
+    //   employed.
+    // As a result, when this closure is applied to "refs into cset"
+    // DCQS, we shouldn't see any cards in young regions.
+    update_rs_cl.set_region(r);
+    HeapWord* stop_point =
+      r->oops_on_card_seq_iterate_careful(scanRegion,
+                                        &filter_then_update_rs_cset_oop_cl,
+                                        false /* filter_young */);
+
+    // Since this is performed in the event of an evacuation failure, we
+    // we shouldn't see a non-null stop point
+    assert(stop_point == NULL, "saw an unallocated region");
+    return true;
   }
 };
 
-template <class T> void HRInto_G1RemSet::new_refs_iterate_work(OopClosure* cl) {
-  for (size_t i = 0; i < n_workers(); i++) {
-    for (int j = 0; j < _new_refs[i]->length(); j++) {
-      T* p = (T*) _new_refs[i]->at(j);
-      cl->do_oop(p);
-    }
-  }
-}
-
 void HRInto_G1RemSet::cleanup_after_oops_into_collection_set_do() {
   guarantee( _cards_scanned != NULL, "invariant" );
   _total_cards_scanned = 0;
@@ -584,21 +627,38 @@ void HRInto_G1RemSet::cleanup_after_oops_into_collection_set_do() {
     set_par_traversal(false);
   }
 
+  DirtyCardQueueSet& into_cset_dcqs = _g1->into_cset_dirty_card_queue_set();
+  int into_cset_n_buffers = into_cset_dcqs.completed_buffers_num();
+
   if (_g1->evacuation_failed()) {
-    // Restore remembered sets for the regions pointing into
-    // the collection set.
+    // Restore remembered sets for the regions pointing into the collection set.
+
     if (G1DeferredRSUpdate) {
-      DirtyCardQueue dcq(&_g1->dirty_card_queue_set());
-      UpdateRSetOopsIntoCSDeferred deferred_update(_g1, &dcq);
-      new_refs_iterate(&deferred_update);
+      // If deferred RS updates are enabled then we just need to transfer
+      // the completed buffers from (a) the DirtyCardQueueSet used to hold
+      // cards that contain references that point into the collection set
+      // to (b) the DCQS used to hold the deferred RS updates
+      _g1->dirty_card_queue_set().merge_bufferlists(&into_cset_dcqs);
     } else {
-      UpdateRSetOopsIntoCSImmediate immediate_update(_g1);
-      new_refs_iterate(&immediate_update);
+
+      CardTableModRefBS* bs = (CardTableModRefBS*)_g1->barrier_set();
+      UpdateRSetCardTableEntryIntoCSetClosure update_rs_cset_immediate(_g1, bs);
+
+      int n_completed_buffers = 0;
+      while (into_cset_dcqs.apply_closure_to_completed_buffer(&update_rs_cset_immediate,
+                                                    0, 0, true)) {
+        n_completed_buffers++;
+      }
+      assert(n_completed_buffers == into_cset_n_buffers, "missed some buffers");
     }
   }
-  for (uint i = 0; i < n_workers(); i++) {
-    _new_refs[i]->clear();
-  }
+
+  // Free any completed buffers in the DirtyCardQueueSet used to hold cards
+  // which contain references that point into the collection.
+  _g1->into_cset_dirty_card_queue_set().clear();
+  assert(_g1->into_cset_dirty_card_queue_set().completed_buffers_num() == 0,
+         "all buffers should be freed");
+  _g1->into_cset_dirty_card_queue_set().clear_n_completed_buffers();
 
   assert(!_par_traversal_in_progress, "Invariant between iterations.");
 }
@@ -652,7 +712,43 @@ void HRInto_G1RemSet::scrub_par(BitMap* region_bm, BitMap* card_bm,
 
 static IntHistogram out_of_histo(50, 50);
 
-void HRInto_G1RemSet::concurrentRefineOneCard_impl(jbyte* card_ptr, int worker_i) {
+class TriggerClosure : public OopClosure {
+  bool _trigger;
+public:
+  TriggerClosure() : _trigger(false) { }
+  bool value() const { return _trigger; }
+  template <class T> void do_oop_nv(T* p) { _trigger = true; }
+  virtual void do_oop(oop* p)        { do_oop_nv(p); }
+  virtual void do_oop(narrowOop* p)  { do_oop_nv(p); }
+};
+
+class InvokeIfNotTriggeredClosure: public OopClosure {
+  TriggerClosure* _t;
+  OopClosure* _oc;
+public:
+  InvokeIfNotTriggeredClosure(TriggerClosure* t, OopClosure* oc):
+    _t(t), _oc(oc) { }
+  template <class T> void do_oop_nv(T* p) {
+    if (!_t->value()) _oc->do_oop(p);
+  }
+  virtual void do_oop(oop* p)        { do_oop_nv(p); }
+  virtual void do_oop(narrowOop* p)  { do_oop_nv(p); }
+};
+
+class Mux2Closure : public OopClosure {
+  OopClosure* _c1;
+  OopClosure* _c2;
+public:
+  Mux2Closure(OopClosure *c1, OopClosure *c2) : _c1(c1), _c2(c2) { }
+  template <class T> void do_oop_nv(T* p) {
+    _c1->do_oop(p); _c2->do_oop(p);
+  }
+  virtual void do_oop(oop* p)        { do_oop_nv(p); }
+  virtual void do_oop(narrowOop* p)  { do_oop_nv(p); }
+};
+
+bool HRInto_G1RemSet::concurrentRefineOneCard_impl(jbyte* card_ptr, int worker_i,
+                                                   bool check_for_refs_into_cset) {
   // Construct the region representing the card.
   HeapWord* start = _ct_bs->addr_for(card_ptr);
   // And find the region containing it.
@@ -669,7 +765,16 @@ void HRInto_G1RemSet::concurrentRefineOneCard_impl(jbyte* card_ptr, int worker_i
 
   UpdateRSOopClosure update_rs_oop_cl(this, worker_i);
   update_rs_oop_cl.set_from(r);
-  FilterOutOfRegionClosure filter_then_update_rs_oop_cl(r, &update_rs_oop_cl);
+
+  TriggerClosure trigger_cl;
+  FilterIntoCSClosure into_cs_cl(NULL, _g1, &trigger_cl);
+  InvokeIfNotTriggeredClosure invoke_cl(&trigger_cl, &into_cs_cl);
+  Mux2Closure mux(&invoke_cl, &update_rs_oop_cl);
+
+  FilterOutOfRegionClosure filter_then_update_rs_oop_cl(r,
+                        (check_for_refs_into_cset ?
+                                (OopClosure*)&mux :
+                                (OopClosure*)&update_rs_oop_cl));
 
   // Undirty the card.
   *card_ptr = CardTableModRefBS::clean_card_val();
@@ -717,11 +822,18 @@ void HRInto_G1RemSet::concurrentRefineOneCard_impl(jbyte* card_ptr, int worker_i
     out_of_histo.add_entry(filter_then_update_rs_oop_cl.out_of_region());
     _conc_refine_cards++;
   }
+
+  return trigger_cl.value();
 }
 
-void HRInto_G1RemSet::concurrentRefineOneCard(jbyte* card_ptr, int worker_i) {
+bool HRInto_G1RemSet::concurrentRefineOneCard(jbyte* card_ptr, int worker_i,
+                                              bool check_for_refs_into_cset) {
   // If the card is no longer dirty, nothing to do.
-  if (*card_ptr != CardTableModRefBS::dirty_card_val()) return;
+  if (*card_ptr != CardTableModRefBS::dirty_card_val()) {
+    // No need to return that this card contains refs that point
+    // into the collection set.
+    return false;
+  }
 
   // Construct the region representing the card.
   HeapWord* start = _ct_bs->addr_for(card_ptr);
@@ -729,7 +841,9 @@ void HRInto_G1RemSet::concurrentRefineOneCard(jbyte* card_ptr, int worker_i) {
   HeapRegion* r = _g1->heap_region_containing(start);
   if (r == NULL) {
     guarantee(_g1->is_in_permanent(start), "Or else where?");
-    return;  // Not in the G1 heap (might be in perm, for example.)
+    // Again no need to return that this card contains refs that
+    // point into the collection set.
+    return false;  // Not in the G1 heap (might be in perm, for example.)
   }
   // Why do we have to check here whether a card is on a young region,
   // given that we dirty young regions and, as a result, the
@@ -743,7 +857,7 @@ void HRInto_G1RemSet::concurrentRefineOneCard(jbyte* card_ptr, int worker_i) {
   // and it doesn't happen often, but it can happen. So, the extra
   // check below filters out those cards.
   if (r->is_young()) {
-    return;
+    return false;
   }
   // While we are processing RSet buffers during the collection, we
   // actually don't want to scan any cards on the collection set,
@@ -756,7 +870,7 @@ void HRInto_G1RemSet::concurrentRefineOneCard(jbyte* card_ptr, int worker_i) {
   // however, that if evacuation fails, we have to scan any objects
   // that were not moved and create any missing entries.
   if (r->in_collection_set()) {
-    return;
+    return false;
   }
 
   // Should we defer processing the card?
@@ -797,8 +911,14 @@ void HRInto_G1RemSet::concurrentRefineOneCard(jbyte* card_ptr, int worker_i) {
   //                  cache.
   //                  Immediately process res; no need to process card_ptr.
 
+
   jbyte* res = card_ptr;
   bool defer = false;
+
+  // This gets set to true if the card being refined has references
+  // that point into the collection set.
+  bool oops_into_cset = false;
+
   if (_cg1r->use_cache()) {
     jbyte* res = _cg1r->cache_insert(card_ptr, &defer);
     if (res != NULL && (res != card_ptr || defer)) {
@@ -815,14 +935,31 @@ void HRInto_G1RemSet::concurrentRefineOneCard(jbyte* card_ptr, int worker_i) {
         // Process card pointer we get back from the hot card cache. This
         // will check whether the region containing the card is young
         // _after_ checking that the region has been allocated from.
-        concurrentRefineOneCard_impl(res, worker_i);
+        oops_into_cset = concurrentRefineOneCard_impl(res, worker_i,
+                                                      false /* check_for_refs_into_cset */);
+        // The above call to concurrentRefineOneCard_impl is only
+        // performed if the hot card cache is enabled. This cache is
+        // disabled during an evacuation pause - which is the only
+        // time when we need know if the card contains references
+        // that point into the collection set. Also when the hot card
+        // cache is enabled, this code is executed by the concurrent
+        // refine threads - rather than the GC worker threads - and
+        // concurrentRefineOneCard_impl will return false.
+        assert(!oops_into_cset, "should not see true here");
       }
     }
   }
 
   if (!defer) {
-    concurrentRefineOneCard_impl(card_ptr, worker_i);
+    oops_into_cset =
+      concurrentRefineOneCard_impl(card_ptr, worker_i, check_for_refs_into_cset);
+    // We should only be detecting that the card contains references
+    // that point into the collection set if the current thread is
+    // a GC worker thread.
+    assert(!oops_into_cset || SafepointSynchronize::is_at_safepoint(),
+           "invalid result at non safepoint");
   }
+  return oops_into_cset;
 }
 
 class HRRSStatsIter: public HeapRegionClosure {
@@ -920,6 +1057,7 @@ void HRInto_G1RemSet::print_summary_info() {
 
   }
 }
+
 void HRInto_G1RemSet::prepare_for_verify() {
   if (G1HRRSFlushLogBuffersOnVerify &&
       (VerifyBeforeGC || VerifyAfterGC)
@@ -932,7 +1070,9 @@ void HRInto_G1RemSet::prepare_for_verify() {
     }
     bool cg1r_use_cache = _cg1r->use_cache();
     _cg1r->set_use_cache(false);
-    updateRS(0);
+    DirtyCardQueue into_cset_dcq(&_g1->into_cset_dirty_card_queue_set());
+    updateRS(&into_cset_dcq, 0);
+    _g1->into_cset_dirty_card_queue_set().clear();
     _cg1r->set_use_cache(cg1r_use_cache);
 
     assert(JavaThread::dirty_card_queue_set().completed_buffers_num() == 0, "All should be consumed");
diff --git a/hotspot/src/share/vm/gc_implementation/g1/g1RemSet.hpp b/hotspot/src/share/vm/gc_implementation/g1/g1RemSet.hpp
index bc30a9699c4..112f9aa646c 100644
--- a/hotspot/src/share/vm/gc_implementation/g1/g1RemSet.hpp
+++ b/hotspot/src/share/vm/gc_implementation/g1/g1RemSet.hpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2001, 2009, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2001, 2010, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -83,7 +83,13 @@ public:
   // Refine the card corresponding to "card_ptr".  If "sts" is non-NULL,
   // join and leave around parts that must be atomic wrt GC.  (NULL means
   // being done at a safepoint.)
-  virtual void concurrentRefineOneCard(jbyte* card_ptr, int worker_i) {}
+  // With some implementations of this routine, when check_for_refs_into_cset
+  // is true, a true result may be returned if the given card contains oops
+  // that have references into the current collection set.
+  virtual bool concurrentRefineOneCard(jbyte* card_ptr, int worker_i,
+                                       bool check_for_refs_into_cset) {
+    return false;
+  }
 
   // Print any relevant summary info.
   virtual void print_summary_info() {}
@@ -143,23 +149,21 @@ protected:
   size_t              _total_cards_scanned;
 
   // _par_traversal_in_progress is "true" iff a parallel traversal is in
-  // progress.  If so, then cards added to remembered sets should also have
-  // their references into the collection summarized in "_new_refs".
+  // progress.
   bool _par_traversal_in_progress;
   void set_par_traversal(bool b) { _par_traversal_in_progress = b; }
-  GrowableArray<OopOrNarrowOopStar>** _new_refs;
-  template <class T> void new_refs_iterate_work(OopClosure* cl);
-  void new_refs_iterate(OopClosure* cl) {
-    if (UseCompressedOops) {
-      new_refs_iterate_work<narrowOop>(cl);
-    } else {
-      new_refs_iterate_work<oop>(cl);
-    }
-  }
+
+  // Used for caching the closure that is responsible for scanning
+  // references into the collection set.
+  OopsInHeapRegionClosure** _cset_rs_update_cl;
 
   // The routine that performs the actual work of refining a dirty
   // card.
-  void concurrentRefineOneCard_impl(jbyte* card_ptr, int worker_i);
+  // If check_for_refs_into_refs is true then a true result is returned
+  // if the card contains oops that have references into the current
+  // collection set.
+  bool concurrentRefineOneCard_impl(jbyte* card_ptr, int worker_i,
+                                    bool check_for_refs_into_cset);
 
 protected:
   template <class T> void write_ref_nv(HeapRegion* from, T* p);
@@ -188,7 +192,7 @@ public:
       scanNewRefsRS_work<oop>(oc, worker_i);
     }
   }
-  void updateRS(int worker_i);
+  void updateRS(DirtyCardQueue* into_cset_dcq, int worker_i);
   HeapRegion* calculateStartRegion(int i);
 
   HRInto_G1RemSet* as_HRInto_G1RemSet() { return this; }
@@ -219,7 +223,11 @@ public:
   void scrub_par(BitMap* region_bm, BitMap* card_bm,
                  int worker_num, int claim_val);
 
-  virtual void concurrentRefineOneCard(jbyte* card_ptr, int worker_i);
+  // If check_for_refs_into_cset is true then a true result is returned
+  // if the card contains oops that have references into the current
+  // collection set.
+  virtual bool concurrentRefineOneCard(jbyte* card_ptr, int worker_i,
+                                       bool check_for_refs_into_cset);
 
   virtual void print_summary_info();
   virtual void prepare_for_verify();
@@ -265,3 +273,16 @@ public:
   //  bool idempotent() { return true; }
   bool apply_to_weak_ref_discovered_field() { return true; }
 };
+
+class UpdateRSetImmediate: public OopsInHeapRegionClosure {
+private:
+  G1RemSet* _g1_rem_set;
+
+  template <class T> void do_oop_work(T* p);
+public:
+  UpdateRSetImmediate(G1RemSet* rs) :
+    _g1_rem_set(rs) {}
+
+  virtual void do_oop(narrowOop* p) { do_oop_work(p); }
+  virtual void do_oop(      oop* p) { do_oop_work(p); }
+};
diff --git a/hotspot/src/share/vm/gc_implementation/g1/g1RemSet.inline.hpp b/hotspot/src/share/vm/gc_implementation/g1/g1RemSet.inline.hpp
index 2b916f866d5..d3fa77ae92a 100644
--- a/hotspot/src/share/vm/gc_implementation/g1/g1RemSet.inline.hpp
+++ b/hotspot/src/share/vm/gc_implementation/g1/g1RemSet.inline.hpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2001, 2009, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2001, 2010, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -56,19 +56,25 @@ template <class T> inline void HRInto_G1RemSet::par_write_ref_nv(HeapRegion* fro
     assert(Universe::heap()->is_in_reserved(obj), "must be in heap");
   }
 #endif // ASSERT
-  assert(from == NULL || from->is_in_reserved(p),
-         "p is not in from");
+
+  assert(from == NULL || from->is_in_reserved(p), "p is not in from");
+
   HeapRegion* to = _g1->heap_region_containing(obj);
   // The test below could be optimized by applying a bit op to to and from.
   if (to != NULL && from != NULL && from != to) {
-    // There is a tricky infinite loop if we keep pushing
-    // self forwarding pointers onto our _new_refs list.
     // The _par_traversal_in_progress flag is true during the collection pause,
-    // false during the evacuation failure handing.
+    // false during the evacuation failure handing. This should avoid a
+    // potential loop if we were to add the card containing 'p' to the DCQS
+    // that's used to regenerate the remembered sets for the collection set,
+    // in the event of an evacuation failure, here. The UpdateRSImmediate
+    // closure will eventally call this routine.
     if (_par_traversal_in_progress &&
         to->in_collection_set() && !self_forwarded(obj)) {
-      _new_refs[tid]->push((void*)p);
-      // Deferred updates to the Cset are either discarded (in the normal case),
+
+      assert(_cset_rs_update_cl[tid] != NULL, "should have been set already");
+      _cset_rs_update_cl[tid]->do_oop(p);
+
+      // Deferred updates to the CSet are either discarded (in the normal case),
       // or processed (if an evacuation failure occurs) at the end
       // of the collection.
       // See HRInto_G1RemSet::cleanup_after_oops_into_collection_set_do().
@@ -89,3 +95,12 @@ template <class T> inline void UpdateRSOopClosure::do_oop_work(T* p) {
   assert(_from != NULL, "from region must be non-NULL");
   _rs->par_write_ref(_from, p, _worker_i);
 }
+
+template <class T> inline void UpdateRSetImmediate::do_oop_work(T* p) {
+  assert(_from->is_in_reserved(p), "paranoia");
+  T heap_oop = oopDesc::load_heap_oop(p);
+  if (!oopDesc::is_null(heap_oop) && !_from->is_survivor()) {
+    _g1_rem_set->par_write_ref(_from, p, 0);
+  }
+}
+
diff --git a/hotspot/src/share/vm/gc_implementation/g1/heapRegion.cpp b/hotspot/src/share/vm/gc_implementation/g1/heapRegion.cpp
index 4e5446e1f5e..08d8902ce88 100644
--- a/hotspot/src/share/vm/gc_implementation/g1/heapRegion.cpp
+++ b/hotspot/src/share/vm/gc_implementation/g1/heapRegion.cpp
@@ -683,6 +683,8 @@ oops_on_card_seq_iterate_careful(MemRegion mr,
     return NULL;
   }
 
+  assert(!is_young(), "check value of filter_young");
+
   // We used to use "block_start_careful" here.  But we're actually happy
   // to update the BOT while we do this...
   HeapWord* cur = block_start(mr.start());
diff --git a/hotspot/src/share/vm/gc_implementation/includeDB_gc_g1 b/hotspot/src/share/vm/gc_implementation/includeDB_gc_g1
index 13e3464ed8f..d1476972275 100644
--- a/hotspot/src/share/vm/gc_implementation/includeDB_gc_g1
+++ b/hotspot/src/share/vm/gc_implementation/includeDB_gc_g1
@@ -1,5 +1,5 @@
 //
-// Copyright (c) 2004, 2009, Oracle and/or its affiliates. All rights reserved.
+// Copyright (c) 2004, 2010, Oracle and/or its affiliates. All rights reserved.
 // DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
 //
 // This code is free software; you can redistribute it and/or modify it
@@ -241,6 +241,7 @@ g1MMUTracker.cpp			mutexLocker.hpp
 
 g1MMUTracker.hpp			debug.hpp
 g1MMUTracker.hpp			allocation.hpp
+
 g1RemSet.cpp				bufferingOopClosure.hpp
 g1RemSet.cpp				concurrentG1Refine.hpp
 g1RemSet.cpp				concurrentG1RefineThread.hpp

From 38ab95c64b9f0bb29c22e811d7216f74ac6f5d10 Mon Sep 17 00:00:00 2001
From: Antonios Printezis <tonyp@openjdk.org>
Date: Thu, 22 Jul 2010 10:27:41 -0400
Subject: [PATCH 3/9] 6962589: remove breadth first scanning code from parallel
 gc

Remove the breadth-first copying order from ParallelScavenge and use depth-first by default.

Reviewed-by: jcoomes, ysr, johnc
---
 .../includeDB_gc_parallelScavenge             |   1 -
 .../parallelScavenge/cardTableExtension.cpp   |  70 +++-------
 .../parallelScavenge/prefetchQueue.hpp        |  68 ---------
 .../parallelScavenge/psPromotionManager.cpp   | 130 ++++--------------
 .../parallelScavenge/psPromotionManager.hpp   |  37 +----
 .../psPromotionManager.inline.hpp             |  55 --------
 .../parallelScavenge/psScavenge.cpp           |  12 +-
 .../parallelScavenge/psScavenge.inline.hpp    |   2 +-
 .../parallelScavenge/psTasks.cpp              |  32 ++---
 hotspot/src/share/vm/oops/arrayKlassKlass.cpp |   4 -
 .../share/vm/oops/compiledICHolderKlass.cpp   |   4 -
 .../src/share/vm/oops/constMethodKlass.cpp    |   4 -
 .../src/share/vm/oops/constantPoolKlass.cpp   |  15 --
 hotspot/src/share/vm/oops/cpCacheKlass.cpp    |  23 ----
 hotspot/src/share/vm/oops/instanceKlass.cpp   |  22 ---
 hotspot/src/share/vm/oops/instanceKlass.hpp   |   1 -
 .../src/share/vm/oops/instanceKlassKlass.cpp  |  36 +----
 .../src/share/vm/oops/instanceRefKlass.cpp    |  33 -----
 hotspot/src/share/vm/oops/klassKlass.cpp      |   3 -
 hotspot/src/share/vm/oops/klassPS.hpp         |   2 -
 hotspot/src/share/vm/oops/methodDataKlass.cpp |   7 -
 hotspot/src/share/vm/oops/methodKlass.cpp     |   4 -
 hotspot/src/share/vm/oops/objArrayKlass.cpp   |  11 --
 .../src/share/vm/oops/objArrayKlassKlass.cpp  |   4 -
 hotspot/src/share/vm/oops/oop.hpp             |   1 -
 hotspot/src/share/vm/oops/oop.psgc.inline.hpp |   9 --
 hotspot/src/share/vm/oops/symbolKlass.cpp     |   4 -
 hotspot/src/share/vm/oops/typeArrayKlass.cpp  |   4 -
 hotspot/src/share/vm/runtime/arguments.cpp    |   2 +
 hotspot/src/share/vm/runtime/globals.hpp      |   4 -
 30 files changed, 63 insertions(+), 541 deletions(-)
 delete mode 100644 hotspot/src/share/vm/gc_implementation/parallelScavenge/prefetchQueue.hpp

diff --git a/hotspot/src/share/vm/gc_implementation/includeDB_gc_parallelScavenge b/hotspot/src/share/vm/gc_implementation/includeDB_gc_parallelScavenge
index e06ed445a0d..83eae5eebe7 100644
--- a/hotspot/src/share/vm/gc_implementation/includeDB_gc_parallelScavenge
+++ b/hotspot/src/share/vm/gc_implementation/includeDB_gc_parallelScavenge
@@ -330,7 +330,6 @@ psPromotionManager.cpp                  psPromotionManager.inline.hpp
 psPromotionManager.cpp                  psScavenge.inline.hpp
 
 psPromotionManager.hpp                  allocation.hpp
-psPromotionManager.hpp                  prefetchQueue.hpp
 psPromotionManager.hpp                  psPromotionLAB.hpp
 psPromotionManager.hpp                  taskqueue.hpp
 
diff --git a/hotspot/src/share/vm/gc_implementation/parallelScavenge/cardTableExtension.cpp b/hotspot/src/share/vm/gc_implementation/parallelScavenge/cardTableExtension.cpp
index c87190d0ccb..e9da8d5fa66 100644
--- a/hotspot/src/share/vm/gc_implementation/parallelScavenge/cardTableExtension.cpp
+++ b/hotspot/src/share/vm/gc_implementation/parallelScavenge/cardTableExtension.cpp
@@ -123,7 +123,6 @@ void CardTableExtension::scavenge_contents(ObjectStartArray* start_array,
   assert(start_array != NULL && sp != NULL && pm != NULL, "Sanity");
   assert(start_array->covered_region().contains(sp->used_region()),
          "ObjectStartArray does not cover space");
-  bool depth_first = pm->depth_first();
 
   if (sp->not_empty()) {
     oop* sp_top = (oop*)space_top;
@@ -201,21 +200,12 @@ void CardTableExtension::scavenge_contents(ObjectStartArray* start_array,
           *first_nonclean_card++ = clean_card;
         }
         // scan oops in objects
-        // hoisted the if (depth_first) check out of the loop
-        if (depth_first){
-          do {
-            oop(bottom_obj)->push_contents(pm);
-            bottom_obj += oop(bottom_obj)->size();
-            assert(bottom_obj <= sp_top, "just checking");
-          } while (bottom_obj < top);
-          pm->drain_stacks_cond_depth();
-        } else {
-          do {
-            oop(bottom_obj)->copy_contents(pm);
-            bottom_obj += oop(bottom_obj)->size();
-            assert(bottom_obj <= sp_top, "just checking");
-          } while (bottom_obj < top);
-        }
+        do {
+          oop(bottom_obj)->push_contents(pm);
+          bottom_obj += oop(bottom_obj)->size();
+          assert(bottom_obj <= sp_top, "just checking");
+        } while (bottom_obj < top);
+        pm->drain_stacks_cond_depth();
         // remember top oop* scanned
         prev_top = top;
       }
@@ -230,7 +220,6 @@ void CardTableExtension::scavenge_contents_parallel(ObjectStartArray* start_arra
                                                     uint stripe_number) {
   int ssize = 128; // Naked constant!  Work unit = 64k.
   int dirty_card_count = 0;
-  bool depth_first = pm->depth_first();
 
   oop* sp_top = (oop*)space_top;
   jbyte* start_card = byte_for(sp->bottom());
@@ -363,43 +352,22 @@ void CardTableExtension::scavenge_contents_parallel(ObjectStartArray* start_arra
         const int interval = PrefetchScanIntervalInBytes;
         // scan all objects in the range
         if (interval != 0) {
-          // hoisted the if (depth_first) check out of the loop
-          if (depth_first) {
-            while (p < to) {
-              Prefetch::write(p, interval);
-              oop m = oop(p);
-              assert(m->is_oop_or_null(), "check for header");
-              m->push_contents(pm);
-              p += m->size();
-            }
-            pm->drain_stacks_cond_depth();
-          } else {
-            while (p < to) {
-              Prefetch::write(p, interval);
-              oop m = oop(p);
-              assert(m->is_oop_or_null(), "check for header");
-              m->copy_contents(pm);
-              p += m->size();
-            }
+          while (p < to) {
+            Prefetch::write(p, interval);
+            oop m = oop(p);
+            assert(m->is_oop_or_null(), "check for header");
+            m->push_contents(pm);
+            p += m->size();
           }
+          pm->drain_stacks_cond_depth();
         } else {
-          // hoisted the if (depth_first) check out of the loop
-          if (depth_first) {
-            while (p < to) {
-              oop m = oop(p);
-              assert(m->is_oop_or_null(), "check for header");
-              m->push_contents(pm);
-              p += m->size();
-            }
-            pm->drain_stacks_cond_depth();
-          } else {
-            while (p < to) {
-              oop m = oop(p);
-              assert(m->is_oop_or_null(), "check for header");
-              m->copy_contents(pm);
-              p += m->size();
-            }
+          while (p < to) {
+            oop m = oop(p);
+            assert(m->is_oop_or_null(), "check for header");
+            m->push_contents(pm);
+            p += m->size();
           }
+          pm->drain_stacks_cond_depth();
         }
         last_scanned = p;
       }
diff --git a/hotspot/src/share/vm/gc_implementation/parallelScavenge/prefetchQueue.hpp b/hotspot/src/share/vm/gc_implementation/parallelScavenge/prefetchQueue.hpp
deleted file mode 100644
index cd57b5ff8b3..00000000000
--- a/hotspot/src/share/vm/gc_implementation/parallelScavenge/prefetchQueue.hpp
+++ /dev/null
@@ -1,68 +0,0 @@
-/*
- * Copyright (c) 2002, 2008, Oracle and/or its affiliates. All rights reserved.
- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
- *
- * This code is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License version 2 only, as
- * published by the Free Software Foundation.
- *
- * This code is distributed in the hope that it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
- * version 2 for more details (a copy is included in the LICENSE file that
- * accompanied this code).
- *
- * You should have received a copy of the GNU General Public License version
- * 2 along with this work; if not, write to the Free Software Foundation,
- * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
- *
- * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
- * or visit www.oracle.com if you need additional information or have any
- * questions.
- *
- */
-
-//
-// PrefetchQueue is a FIFO queue of variable length (currently 8).
-//
-// We need to examine the performance penalty of variable lengths.
-// We may also want to split this into cpu dependent bits.
-//
-
-const int PREFETCH_QUEUE_SIZE  = 8;
-
-class PrefetchQueue : public CHeapObj {
- private:
-  void* _prefetch_queue[PREFETCH_QUEUE_SIZE];
-  uint  _prefetch_index;
-
- public:
-  int length() { return PREFETCH_QUEUE_SIZE; }
-
-  inline void clear() {
-    for(int i=0; i<PREFETCH_QUEUE_SIZE; i++) {
-      _prefetch_queue[i] = NULL;
-    }
-    _prefetch_index = 0;
-  }
-
-  template <class T> inline void* push_and_pop(T* p) {
-    oop o = oopDesc::load_decode_heap_oop_not_null(p);
-    Prefetch::write(o->mark_addr(), 0);
-    // This prefetch is intended to make sure the size field of array
-    // oops is in cache. It assumes the the object layout is
-    // mark -> klass -> size, and that mark and klass are heapword
-    // sized. If this should change, this prefetch will need updating!
-    Prefetch::write(o->mark_addr() + (HeapWordSize*2), 0);
-    _prefetch_queue[_prefetch_index++] = p;
-    _prefetch_index &= (PREFETCH_QUEUE_SIZE-1);
-    return _prefetch_queue[_prefetch_index];
-  }
-
-  // Stores a NULL pointer in the pop'd location.
-  inline void* pop() {
-    _prefetch_queue[_prefetch_index++] = NULL;
-    _prefetch_index &= (PREFETCH_QUEUE_SIZE-1);
-    return _prefetch_queue[_prefetch_index];
-  }
-};
diff --git a/hotspot/src/share/vm/gc_implementation/parallelScavenge/psPromotionManager.cpp b/hotspot/src/share/vm/gc_implementation/parallelScavenge/psPromotionManager.cpp
index 2da32555dee..1e73d11d1e3 100644
--- a/hotspot/src/share/vm/gc_implementation/parallelScavenge/psPromotionManager.cpp
+++ b/hotspot/src/share/vm/gc_implementation/parallelScavenge/psPromotionManager.cpp
@@ -27,7 +27,6 @@
 
 PSPromotionManager**         PSPromotionManager::_manager_array = NULL;
 OopStarTaskQueueSet*         PSPromotionManager::_stack_array_depth = NULL;
-OopTaskQueueSet*             PSPromotionManager::_stack_array_breadth = NULL;
 PSOldGen*                    PSPromotionManager::_old_gen = NULL;
 MutableSpace*                PSPromotionManager::_young_space = NULL;
 
@@ -42,23 +41,14 @@ void PSPromotionManager::initialize() {
   _manager_array = NEW_C_HEAP_ARRAY(PSPromotionManager*, ParallelGCThreads+1 );
   guarantee(_manager_array != NULL, "Could not initialize promotion manager");
 
-  if (UseDepthFirstScavengeOrder) {
-    _stack_array_depth = new OopStarTaskQueueSet(ParallelGCThreads);
-    guarantee(_stack_array_depth != NULL, "Count not initialize promotion manager");
-  } else {
-    _stack_array_breadth = new OopTaskQueueSet(ParallelGCThreads);
-    guarantee(_stack_array_breadth != NULL, "Count not initialize promotion manager");
-  }
+  _stack_array_depth = new OopStarTaskQueueSet(ParallelGCThreads);
+  guarantee(_stack_array_depth != NULL, "Cound not initialize promotion manager");
 
   // Create and register the PSPromotionManager(s) for the worker threads.
   for(uint i=0; i<ParallelGCThreads; i++) {
     _manager_array[i] = new PSPromotionManager();
     guarantee(_manager_array[i] != NULL, "Could not create PSPromotionManager");
-    if (UseDepthFirstScavengeOrder) {
-      stack_array_depth()->register_queue(i, _manager_array[i]->claimed_stack_depth());
-    } else {
-      stack_array_breadth()->register_queue(i, _manager_array[i]->claimed_stack_breadth());
-    }
+    stack_array_depth()->register_queue(i, _manager_array[i]->claimed_stack_depth());
   }
 
   // The VMThread gets its own PSPromotionManager, which is not available
@@ -93,11 +83,7 @@ void PSPromotionManager::post_scavenge() {
   TASKQUEUE_STATS_ONLY(if (PrintGCDetails && ParallelGCVerbose) print_stats());
   for (uint i = 0; i < ParallelGCThreads + 1; i++) {
     PSPromotionManager* manager = manager_array(i);
-    if (UseDepthFirstScavengeOrder) {
-      assert(manager->claimed_stack_depth()->is_empty(), "should be empty");
-    } else {
-      assert(manager->claimed_stack_breadth()->is_empty(), "should be empty");
-    }
+    assert(manager->claimed_stack_depth()->is_empty(), "should be empty");
     manager->flush_labs();
   }
 }
@@ -105,10 +91,8 @@ void PSPromotionManager::post_scavenge() {
 #if TASKQUEUE_STATS
 void
 PSPromotionManager::print_taskqueue_stats(uint i) const {
-  const TaskQueueStats& stats = depth_first() ?
-    _claimed_stack_depth.stats : _claimed_stack_breadth.stats;
   tty->print("%3u ", i);
-  stats.print();
+  _claimed_stack_depth.stats.print();
   tty->cr();
 }
 
@@ -128,8 +112,7 @@ static const char* const pm_stats_hdr[] = {
 
 void
 PSPromotionManager::print_stats() {
-  const bool df = UseDepthFirstScavengeOrder;
-  tty->print_cr("== GC Task Stats (%s-First), GC %3d", df ? "Depth" : "Breadth",
+  tty->print_cr("== GC Tasks Stats, GC %3d",
                 Universe::heap()->total_collections());
 
   tty->print("thr "); TaskQueueStats::print_header(1); tty->cr();
@@ -147,9 +130,7 @@ PSPromotionManager::print_stats() {
 
 void
 PSPromotionManager::reset_stats() {
-  TaskQueueStats& stats = depth_first() ?
-    claimed_stack_depth()->stats : claimed_stack_breadth()->stats;
-  stats.reset();
+  claimed_stack_depth()->stats.reset();
   _masked_pushes = _masked_steals = 0;
   _arrays_chunked = _array_chunks_processed = 0;
 }
@@ -158,19 +139,13 @@ PSPromotionManager::reset_stats() {
 PSPromotionManager::PSPromotionManager() {
   ParallelScavengeHeap* heap = (ParallelScavengeHeap*)Universe::heap();
   assert(heap->kind() == CollectedHeap::ParallelScavengeHeap, "Sanity");
-  _depth_first = UseDepthFirstScavengeOrder;
 
   // We set the old lab's start array.
   _old_lab.set_start_array(old_gen()->start_array());
 
   uint queue_size;
-  if (depth_first()) {
-    claimed_stack_depth()->initialize();
-    queue_size = claimed_stack_depth()->max_elems();
-  } else {
-    claimed_stack_breadth()->initialize();
-    queue_size = claimed_stack_breadth()->max_elems();
-  }
+  claimed_stack_depth()->initialize();
+  queue_size = claimed_stack_depth()->max_elems();
 
   _totally_drain = (ParallelGCThreads == 1) || (GCDrainStackTargetSize == 0);
   if (_totally_drain) {
@@ -205,14 +180,11 @@ void PSPromotionManager::reset() {
   _old_lab.initialize(MemRegion(lab_base, (size_t)0));
   _old_gen_is_full = false;
 
-  _prefetch_queue.clear();
-
   TASKQUEUE_STATS_ONLY(reset_stats());
 }
 
 
 void PSPromotionManager::drain_stacks_depth(bool totally_drain) {
-  assert(depth_first(), "invariant");
   assert(claimed_stack_depth()->overflow_stack() != NULL, "invariant");
   totally_drain = totally_drain || _totally_drain;
 
@@ -250,50 +222,6 @@ void PSPromotionManager::drain_stacks_depth(bool totally_drain) {
   assert(tq->overflow_empty(), "Sanity");
 }
 
-void PSPromotionManager::drain_stacks_breadth(bool totally_drain) {
-  assert(!depth_first(), "invariant");
-  assert(claimed_stack_breadth()->overflow_stack() != NULL, "invariant");
-  totally_drain = totally_drain || _totally_drain;
-
-#ifdef ASSERT
-  ParallelScavengeHeap* heap = (ParallelScavengeHeap*)Universe::heap();
-  assert(heap->kind() == CollectedHeap::ParallelScavengeHeap, "Sanity");
-  MutableSpace* to_space = heap->young_gen()->to_space();
-  MutableSpace* old_space = heap->old_gen()->object_space();
-  MutableSpace* perm_space = heap->perm_gen()->object_space();
-#endif /* ASSERT */
-
-  OverflowTaskQueue<oop>* const tq = claimed_stack_breadth();
-  do {
-    oop obj;
-
-    // Drain overflow stack first, so other threads can steal from
-    // claimed stack while we work.
-    while (tq->pop_overflow(obj)) {
-      obj->copy_contents(this);
-    }
-
-    if (totally_drain) {
-      while (tq->pop_local(obj)) {
-        obj->copy_contents(this);
-      }
-    } else {
-      while (tq->size() > _target_stack_size && tq->pop_local(obj)) {
-        obj->copy_contents(this);
-      }
-    }
-
-    // If we could not find any other work, flush the prefetch queue
-    if (tq->is_empty()) {
-      flush_prefetch_queue();
-    }
-  } while (totally_drain && !tq->taskqueue_empty() || !tq->overflow_empty());
-
-  assert(!totally_drain || tq->taskqueue_empty(), "Sanity");
-  assert(totally_drain || tq->size() <= _target_stack_size, "Sanity");
-  assert(tq->overflow_empty(), "Sanity");
-}
-
 void PSPromotionManager::flush_labs() {
   assert(stacks_empty(), "Attempt to flush lab with live stack");
 
@@ -319,7 +247,7 @@ void PSPromotionManager::flush_labs() {
 // performance.
 //
 
-oop PSPromotionManager::copy_to_survivor_space(oop o, bool depth_first) {
+oop PSPromotionManager::copy_to_survivor_space(oop o) {
   assert(PSScavenge::should_scavenge(&o), "Sanity");
 
   oop new_obj = NULL;
@@ -423,24 +351,20 @@ oop PSPromotionManager::copy_to_survivor_space(oop o, bool depth_first) {
         assert(young_space()->contains(new_obj), "Attempt to push non-promoted obj");
       }
 
-      if (depth_first) {
-        // Do the size comparison first with new_obj_size, which we
-        // already have. Hopefully, only a few objects are larger than
-        // _min_array_size_for_chunking, and most of them will be arrays.
-        // So, the is->objArray() test would be very infrequent.
-        if (new_obj_size > _min_array_size_for_chunking &&
-            new_obj->is_objArray() &&
-            PSChunkLargeArrays) {
-          // we'll chunk it
-          oop* const masked_o = mask_chunked_array_oop(o);
-          push_depth(masked_o);
-          TASKQUEUE_STATS_ONLY(++_arrays_chunked; ++_masked_pushes);
-        } else {
-          // we'll just push its contents
-          new_obj->push_contents(this);
-        }
+      // Do the size comparison first with new_obj_size, which we
+      // already have. Hopefully, only a few objects are larger than
+      // _min_array_size_for_chunking, and most of them will be arrays.
+      // So, the is->objArray() test would be very infrequent.
+      if (new_obj_size > _min_array_size_for_chunking &&
+          new_obj->is_objArray() &&
+          PSChunkLargeArrays) {
+        // we'll chunk it
+        oop* const masked_o = mask_chunked_array_oop(o);
+        push_depth(masked_o);
+        TASKQUEUE_STATS_ONLY(++_arrays_chunked; ++_masked_pushes);
       } else {
-        push_breadth(new_obj);
+        // we'll just push its contents
+        new_obj->push_contents(this);
       }
     }  else {
       // We lost, someone else "owns" this object
@@ -537,13 +461,7 @@ oop PSPromotionManager::oop_promotion_failed(oop obj, markOop obj_mark) {
     // We won any races, we "own" this object.
     assert(obj == obj->forwardee(), "Sanity");
 
-    if (depth_first()) {
-      obj->push_contents(this);
-    } else {
-      // Don't bother incrementing the age, just push
-      // onto the claimed_stack..
-      push_breadth(obj);
-    }
+    obj->push_contents(this);
 
     // Save the mark if needed
     PSScavenge::oop_promotion_failed(obj, obj_mark);
diff --git a/hotspot/src/share/vm/gc_implementation/parallelScavenge/psPromotionManager.hpp b/hotspot/src/share/vm/gc_implementation/parallelScavenge/psPromotionManager.hpp
index ec89b9557bb..aa14478d480 100644
--- a/hotspot/src/share/vm/gc_implementation/parallelScavenge/psPromotionManager.hpp
+++ b/hotspot/src/share/vm/gc_implementation/parallelScavenge/psPromotionManager.hpp
@@ -48,7 +48,6 @@ class PSPromotionManager : public CHeapObj {
  private:
   static PSPromotionManager**         _manager_array;
   static OopStarTaskQueueSet*         _stack_array_depth;
-  static OopTaskQueueSet*             _stack_array_breadth;
   static PSOldGen*                    _old_gen;
   static MutableSpace*                _young_space;
 
@@ -69,12 +68,10 @@ class PSPromotionManager : public CHeapObj {
   PSOldPromotionLAB                   _old_lab;
   bool                                _young_gen_is_full;
   bool                                _old_gen_is_full;
-  PrefetchQueue                       _prefetch_queue;
 
   OopStarTaskQueue                    _claimed_stack_depth;
   OverflowTaskQueue<oop>              _claimed_stack_breadth;
 
-  bool                                _depth_first;
   bool                                _totally_drain;
   uint                                _target_stack_size;
 
@@ -87,7 +84,6 @@ class PSPromotionManager : public CHeapObj {
 
   inline static PSPromotionManager* manager_array(int index);
   template <class T> inline void claim_or_forward_internal_depth(T* p);
-  template <class T> inline void claim_or_forward_internal_breadth(T* p);
 
   // On the task queues we push reference locations as well as
   // partially-scanned arrays (in the latter case, we push an oop to
@@ -136,19 +132,11 @@ class PSPromotionManager : public CHeapObj {
   void process_array_chunk(oop old);
 
   template <class T> void push_depth(T* p) {
-    assert(depth_first(), "pre-condition");
     claimed_stack_depth()->push(p);
   }
 
-  void push_breadth(oop o) {
-    assert(!depth_first(), "pre-condition");
-    claimed_stack_breadth()->push(o);
-  }
-
  protected:
   static OopStarTaskQueueSet* stack_array_depth()   { return _stack_array_depth; }
-  static OopTaskQueueSet*     stack_array_breadth() { return _stack_array_breadth; }
-
  public:
   // Static
   static void initialize();
@@ -163,19 +151,12 @@ class PSPromotionManager : public CHeapObj {
     return stack_array_depth()->steal(queue_num, seed, t);
   }
 
-  static bool steal_breadth(int queue_num, int* seed, oop& t) {
-    return stack_array_breadth()->steal(queue_num, seed, t);
-  }
-
   PSPromotionManager();
 
   // Accessors
   OopStarTaskQueue* claimed_stack_depth() {
     return &_claimed_stack_depth;
   }
-  OverflowTaskQueue<oop>* claimed_stack_breadth() {
-    return &_claimed_stack_breadth;
-  }
 
   bool young_gen_is_full()             { return _young_gen_is_full; }
 
@@ -183,18 +164,14 @@ class PSPromotionManager : public CHeapObj {
   void set_old_gen_is_full(bool state) { _old_gen_is_full = state; }
 
   // Promotion methods
-  oop copy_to_survivor_space(oop o, bool depth_first);
+  oop copy_to_survivor_space(oop o);
   oop oop_promotion_failed(oop obj, markOop obj_mark);
 
   void reset();
 
   void flush_labs();
   void drain_stacks(bool totally_drain) {
-    if (depth_first()) {
-      drain_stacks_depth(totally_drain);
-    } else {
-      drain_stacks_breadth(totally_drain);
-    }
+    drain_stacks_depth(totally_drain);
   }
  public:
   void drain_stacks_cond_depth() {
@@ -203,22 +180,14 @@ class PSPromotionManager : public CHeapObj {
     }
   }
   void drain_stacks_depth(bool totally_drain);
-  void drain_stacks_breadth(bool totally_drain);
 
-  bool depth_first() const {
-    return _depth_first;
-  }
   bool stacks_empty() {
-    return depth_first() ?
-      claimed_stack_depth()->is_empty() :
-      claimed_stack_breadth()->is_empty();
+    return claimed_stack_depth()->is_empty();
   }
 
   inline void process_popped_location_depth(StarTask p);
 
-  inline void flush_prefetch_queue();
   template <class T> inline void claim_or_forward_depth(T* p);
-  template <class T> inline void claim_or_forward_breadth(T* p);
 
   TASKQUEUE_STATS_ONLY(inline void record_steal(StarTask& p);)
 };
diff --git a/hotspot/src/share/vm/gc_implementation/parallelScavenge/psPromotionManager.inline.hpp b/hotspot/src/share/vm/gc_implementation/parallelScavenge/psPromotionManager.inline.hpp
index ea81c817b30..decc5e99a46 100644
--- a/hotspot/src/share/vm/gc_implementation/parallelScavenge/psPromotionManager.inline.hpp
+++ b/hotspot/src/share/vm/gc_implementation/parallelScavenge/psPromotionManager.inline.hpp
@@ -45,33 +45,8 @@ inline void PSPromotionManager::claim_or_forward_internal_depth(T* p) {
   }
 }
 
-template <class T>
-inline void PSPromotionManager::claim_or_forward_internal_breadth(T* p) {
-  if (p != NULL) { // XXX: error if p != NULL here
-    oop o = oopDesc::load_decode_heap_oop_not_null(p);
-    if (o->is_forwarded()) {
-      o = o->forwardee();
-    } else {
-      o = copy_to_survivor_space(o, false);
-    }
-    // Card mark
-    if (PSScavenge::is_obj_in_young((HeapWord*) o)) {
-      PSScavenge::card_table()->inline_write_ref_field_gc(p, o);
-    }
-    oopDesc::encode_store_heap_oop_not_null(p, o);
-  }
-}
-
-inline void PSPromotionManager::flush_prefetch_queue() {
-  assert(!depth_first(), "invariant");
-  for (int i = 0; i < _prefetch_queue.length(); i++) {
-    claim_or_forward_internal_breadth((oop*)_prefetch_queue.pop());
-  }
-}
-
 template <class T>
 inline void PSPromotionManager::claim_or_forward_depth(T* p) {
-  assert(depth_first(), "invariant");
   assert(PSScavenge::should_scavenge(p, true), "revisiting object?");
   assert(Universe::heap()->kind() == CollectedHeap::ParallelScavengeHeap,
          "Sanity");
@@ -80,36 +55,6 @@ inline void PSPromotionManager::claim_or_forward_depth(T* p) {
   claim_or_forward_internal_depth(p);
 }
 
-template <class T>
-inline void PSPromotionManager::claim_or_forward_breadth(T* p) {
-  assert(!depth_first(), "invariant");
-  assert(PSScavenge::should_scavenge(p, true), "revisiting object?");
-  assert(Universe::heap()->kind() == CollectedHeap::ParallelScavengeHeap,
-         "Sanity");
-  assert(Universe::heap()->is_in(p), "pointer outside heap");
-
-  if (UsePrefetchQueue) {
-    claim_or_forward_internal_breadth((T*)_prefetch_queue.push_and_pop(p));
-  } else {
-    // This option is used for testing.  The use of the prefetch
-    // queue can delay the processing of the objects and thus
-    // change the order of object scans.  For example, remembered
-    // set updates are typically the clearing of the remembered
-    // set (the cards) followed by updates of the remembered set
-    // for young-to-old pointers.  In a situation where there
-    // is an error in the sequence of clearing and updating
-    // (e.g. clear card A, update card A, erroneously clear
-    // card A again) the error can be obscured by a delay
-    // in the update due to the use of the prefetch queue
-    // (e.g., clear card A, erroneously clear card A again,
-    // update card A that was pushed into the prefetch queue
-    // and thus delayed until after the erronous clear).  The
-    // length of the delay is random depending on the objects
-    // in the queue and the delay can be zero.
-    claim_or_forward_internal_breadth(p);
-  }
-}
-
 inline void PSPromotionManager::process_popped_location_depth(StarTask p) {
   if (is_oop_masked(p)) {
     assert(PSChunkLargeArrays, "invariant");
diff --git a/hotspot/src/share/vm/gc_implementation/parallelScavenge/psScavenge.cpp b/hotspot/src/share/vm/gc_implementation/parallelScavenge/psScavenge.cpp
index 9cc02479d91..0ea076812b0 100644
--- a/hotspot/src/share/vm/gc_implementation/parallelScavenge/psScavenge.cpp
+++ b/hotspot/src/share/vm/gc_implementation/parallelScavenge/psScavenge.cpp
@@ -157,10 +157,8 @@ void PSRefProcTaskExecutor::execute(ProcessTask& task)
     q->enqueue(new PSRefProcTaskProxy(task, i));
   }
   ParallelTaskTerminator terminator(
-    ParallelScavengeHeap::gc_task_manager()->workers(),
-    UseDepthFirstScavengeOrder ?
-        (TaskQueueSetSuper*) PSPromotionManager::stack_array_depth()
-      : (TaskQueueSetSuper*) PSPromotionManager::stack_array_breadth());
+                 ParallelScavengeHeap::gc_task_manager()->workers(),
+                 (TaskQueueSetSuper*) PSPromotionManager::stack_array_depth());
   if (task.marks_oops_alive() && ParallelGCThreads > 1) {
     for (uint j=0; j<ParallelGCThreads; j++) {
       q->enqueue(new StealTask(&terminator));
@@ -375,10 +373,8 @@ bool PSScavenge::invoke_no_policy() {
       q->enqueue(new ScavengeRootsTask(ScavengeRootsTask::code_cache));
 
       ParallelTaskTerminator terminator(
-        gc_task_manager()->workers(),
-        promotion_manager->depth_first() ?
-            (TaskQueueSetSuper*) promotion_manager->stack_array_depth()
-          : (TaskQueueSetSuper*) promotion_manager->stack_array_breadth());
+                  gc_task_manager()->workers(),
+                  (TaskQueueSetSuper*) promotion_manager->stack_array_depth());
       if (ParallelGCThreads>1) {
         for (uint j=0; j<ParallelGCThreads; j++) {
           q->enqueue(new StealTask(&terminator));
diff --git a/hotspot/src/share/vm/gc_implementation/parallelScavenge/psScavenge.inline.hpp b/hotspot/src/share/vm/gc_implementation/parallelScavenge/psScavenge.inline.hpp
index 7dcf93a9540..3de87882fa5 100644
--- a/hotspot/src/share/vm/gc_implementation/parallelScavenge/psScavenge.inline.hpp
+++ b/hotspot/src/share/vm/gc_implementation/parallelScavenge/psScavenge.inline.hpp
@@ -65,7 +65,7 @@ inline void PSScavenge::copy_and_push_safe_barrier(PSPromotionManager* pm,
   oop o = oopDesc::load_decode_heap_oop_not_null(p);
   oop new_obj = o->is_forwarded()
         ? o->forwardee()
-        : pm->copy_to_survivor_space(o, pm->depth_first());
+        : pm->copy_to_survivor_space(o);
   oopDesc::encode_store_heap_oop_not_null(p, new_obj);
 
   // We cannot mark without test, as some code passes us pointers
diff --git a/hotspot/src/share/vm/gc_implementation/parallelScavenge/psTasks.cpp b/hotspot/src/share/vm/gc_implementation/parallelScavenge/psTasks.cpp
index 6f72724bfda..11774d941f1 100644
--- a/hotspot/src/share/vm/gc_implementation/parallelScavenge/psTasks.cpp
+++ b/hotspot/src/share/vm/gc_implementation/parallelScavenge/psTasks.cpp
@@ -144,29 +144,15 @@ void StealTask::do_it(GCTaskManager* manager, uint which) {
             "stacks should be empty at this point");
 
   int random_seed = 17;
-  if (pm->depth_first()) {
-    while(true) {
-      StarTask p;
-      if (PSPromotionManager::steal_depth(which, &random_seed, p)) {
-        TASKQUEUE_STATS_ONLY(pm->record_steal(p));
-        pm->process_popped_location_depth(p);
-        pm->drain_stacks_depth(true);
-      } else {
-        if (terminator()->offer_termination()) {
-          break;
-        }
-      }
-    }
-  } else {
-    while(true) {
-      oop obj;
-      if (PSPromotionManager::steal_breadth(which, &random_seed, obj)) {
-        obj->copy_contents(pm);
-        pm->drain_stacks_breadth(true);
-      } else {
-        if (terminator()->offer_termination()) {
-          break;
-        }
+  while(true) {
+    StarTask p;
+    if (PSPromotionManager::steal_depth(which, &random_seed, p)) {
+      TASKQUEUE_STATS_ONLY(pm->record_steal(p));
+      pm->process_popped_location_depth(p);
+      pm->drain_stacks_depth(true);
+    } else {
+      if (terminator()->offer_termination()) {
+        break;
       }
     }
   }
diff --git a/hotspot/src/share/vm/oops/arrayKlassKlass.cpp b/hotspot/src/share/vm/oops/arrayKlassKlass.cpp
index e2bb1432c58..36fefd98852 100644
--- a/hotspot/src/share/vm/oops/arrayKlassKlass.cpp
+++ b/hotspot/src/share/vm/oops/arrayKlassKlass.cpp
@@ -108,10 +108,6 @@ int arrayKlassKlass::oop_oop_iterate_m(oop obj, OopClosure* blk, MemRegion mr) {
 }
 
 #ifndef SERIALGC
-void arrayKlassKlass::oop_copy_contents(PSPromotionManager* pm, oop obj) {
-  assert(obj->blueprint()->oop_is_arrayKlass(),"must be an array klass");
-}
-
 void arrayKlassKlass::oop_push_contents(PSPromotionManager* pm, oop obj) {
   assert(obj->blueprint()->oop_is_arrayKlass(),"must be an array klass");
 }
diff --git a/hotspot/src/share/vm/oops/compiledICHolderKlass.cpp b/hotspot/src/share/vm/oops/compiledICHolderKlass.cpp
index fcda8a56adb..904fa1b2b93 100644
--- a/hotspot/src/share/vm/oops/compiledICHolderKlass.cpp
+++ b/hotspot/src/share/vm/oops/compiledICHolderKlass.cpp
@@ -120,10 +120,6 @@ int compiledICHolderKlass::oop_adjust_pointers(oop obj) {
 }
 
 #ifndef SERIALGC
-void compiledICHolderKlass::oop_copy_contents(PSPromotionManager* pm, oop obj) {
-  assert(obj->is_compiledICHolder(), "must be compiledICHolder");
-}
-
 void compiledICHolderKlass::oop_push_contents(PSPromotionManager* pm, oop obj) {
   assert(obj->is_compiledICHolder(), "must be compiledICHolder");
 }
diff --git a/hotspot/src/share/vm/oops/constMethodKlass.cpp b/hotspot/src/share/vm/oops/constMethodKlass.cpp
index 593e4c9838a..ec5162adf72 100644
--- a/hotspot/src/share/vm/oops/constMethodKlass.cpp
+++ b/hotspot/src/share/vm/oops/constMethodKlass.cpp
@@ -157,10 +157,6 @@ int constMethodKlass::oop_adjust_pointers(oop obj) {
 }
 
 #ifndef SERIALGC
-void constMethodKlass::oop_copy_contents(PSPromotionManager* pm, oop obj) {
-  assert(obj->is_constMethod(), "should be constMethod");
-}
-
 void constMethodKlass::oop_push_contents(PSPromotionManager* pm, oop obj) {
   assert(obj->is_constMethod(), "should be constMethod");
 }
diff --git a/hotspot/src/share/vm/oops/constantPoolKlass.cpp b/hotspot/src/share/vm/oops/constantPoolKlass.cpp
index dd4cb287c4a..8291ed6f38b 100644
--- a/hotspot/src/share/vm/oops/constantPoolKlass.cpp
+++ b/hotspot/src/share/vm/oops/constantPoolKlass.cpp
@@ -268,21 +268,6 @@ constantPoolKlass::oop_update_pointers(ParCompactionManager* cm, oop obj,
   return cp->object_size();
 }
 
-void constantPoolKlass::oop_copy_contents(PSPromotionManager* pm, oop obj) {
-  assert(obj->is_constantPool(), "should be constant pool");
-  constantPoolOop cp = (constantPoolOop) obj;
-  if (AnonymousClasses && cp->has_pseudo_string() && cp->tags() != NULL) {
-    oop* base = (oop*)cp->base();
-    for (int i = 0; i < cp->length(); ++i, ++base) {
-      if (cp->tag_at(i).is_string()) {
-        if (PSScavenge::should_scavenge(base)) {
-          pm->claim_or_forward_breadth(base);
-        }
-      }
-    }
-  }
-}
-
 void constantPoolKlass::oop_push_contents(PSPromotionManager* pm, oop obj) {
   assert(obj->is_constantPool(), "should be constant pool");
   constantPoolOop cp = (constantPoolOop) obj;
diff --git a/hotspot/src/share/vm/oops/cpCacheKlass.cpp b/hotspot/src/share/vm/oops/cpCacheKlass.cpp
index afff3352a6c..6a3a11e6a5b 100644
--- a/hotspot/src/share/vm/oops/cpCacheKlass.cpp
+++ b/hotspot/src/share/vm/oops/cpCacheKlass.cpp
@@ -166,29 +166,6 @@ bool constantPoolCacheKlass::oop_is_conc_safe(oop obj) const {
 }
 
 #ifndef SERIALGC
-void constantPoolCacheKlass::oop_copy_contents(PSPromotionManager* pm,
-                                               oop obj) {
-  assert(obj->is_constantPoolCache(), "should be constant pool");
-  if (EnableInvokeDynamic) {
-    constantPoolCacheOop cache = (constantPoolCacheOop)obj;
-    // during a scavenge, it is safe to inspect my pool, since it is perm
-    constantPoolOop pool = cache->constant_pool();
-    assert(pool->is_constantPool(), "should be constant pool");
-    if (pool->has_invokedynamic()) {
-      for (int i = 0; i < cache->length(); i++) {
-        ConstantPoolCacheEntry* e = cache->entry_at(i);
-        oop* p = (oop*)&e->_f1;
-        if (e->is_secondary_entry()) {
-          if (PSScavenge::should_scavenge(p))
-            pm->claim_or_forward_breadth(p);
-          assert(!(e->is_vfinal() && PSScavenge::should_scavenge((oop*)&e->_f2)),
-                 "no live oops here");
-        }
-      }
-    }
-  }
-}
-
 void constantPoolCacheKlass::oop_push_contents(PSPromotionManager* pm,
                                                oop obj) {
   assert(obj->is_constantPoolCache(), "should be constant pool");
diff --git a/hotspot/src/share/vm/oops/instanceKlass.cpp b/hotspot/src/share/vm/oops/instanceKlass.cpp
index 87bbddc5d4f..ff8077f1bcd 100644
--- a/hotspot/src/share/vm/oops/instanceKlass.cpp
+++ b/hotspot/src/share/vm/oops/instanceKlass.cpp
@@ -1809,18 +1809,7 @@ int instanceKlass::oop_adjust_pointers(oop obj) {
 }
 
 #ifndef SERIALGC
-void instanceKlass::oop_copy_contents(PSPromotionManager* pm, oop obj) {
-  assert(!pm->depth_first(), "invariant");
-  InstanceKlass_OOP_MAP_REVERSE_ITERATE( \
-    obj, \
-    if (PSScavenge::should_scavenge(p)) { \
-      pm->claim_or_forward_breadth(p); \
-    }, \
-    assert_nothing )
-}
-
 void instanceKlass::oop_push_contents(PSPromotionManager* pm, oop obj) {
-  assert(pm->depth_first(), "invariant");
   InstanceKlass_OOP_MAP_REVERSE_ITERATE( \
     obj, \
     if (PSScavenge::should_scavenge(p)) { \
@@ -1846,18 +1835,7 @@ int instanceKlass::oop_update_pointers(ParCompactionManager* cm, oop obj,
   return size_helper();
 }
 
-void instanceKlass::copy_static_fields(PSPromotionManager* pm) {
-  assert(!pm->depth_first(), "invariant");
-  InstanceKlass_OOP_ITERATE( \
-    start_of_static_fields(), static_oop_field_size(), \
-    if (PSScavenge::should_scavenge(p)) { \
-      pm->claim_or_forward_breadth(p); \
-    }, \
-    assert_nothing )
-}
-
 void instanceKlass::push_static_fields(PSPromotionManager* pm) {
-  assert(pm->depth_first(), "invariant");
   InstanceKlass_OOP_ITERATE( \
     start_of_static_fields(), static_oop_field_size(), \
     if (PSScavenge::should_scavenge(p)) { \
diff --git a/hotspot/src/share/vm/oops/instanceKlass.hpp b/hotspot/src/share/vm/oops/instanceKlass.hpp
index c25b84a36eb..d895ce10895 100644
--- a/hotspot/src/share/vm/oops/instanceKlass.hpp
+++ b/hotspot/src/share/vm/oops/instanceKlass.hpp
@@ -711,7 +711,6 @@ class instanceKlass: public Klass {
 
 #ifndef SERIALGC
   // Parallel Scavenge
-  void copy_static_fields(PSPromotionManager* pm);
   void push_static_fields(PSPromotionManager* pm);
 
   // Parallel Old
diff --git a/hotspot/src/share/vm/oops/instanceKlassKlass.cpp b/hotspot/src/share/vm/oops/instanceKlassKlass.cpp
index 4184ce029bf..74268925ee5 100644
--- a/hotspot/src/share/vm/oops/instanceKlassKlass.cpp
+++ b/hotspot/src/share/vm/oops/instanceKlassKlass.cpp
@@ -292,41 +292,7 @@ int instanceKlassKlass::oop_adjust_pointers(oop obj) {
 }
 
 #ifndef SERIALGC
-void instanceKlassKlass::oop_copy_contents(PSPromotionManager* pm, oop obj) {
-  assert(!pm->depth_first(), "invariant");
-  instanceKlass* ik = instanceKlass::cast(klassOop(obj));
-  ik->copy_static_fields(pm);
-
-  oop* loader_addr = ik->adr_class_loader();
-  if (PSScavenge::should_scavenge(loader_addr)) {
-    pm->claim_or_forward_breadth(loader_addr);
-  }
-
-  oop* pd_addr = ik->adr_protection_domain();
-  if (PSScavenge::should_scavenge(pd_addr)) {
-    pm->claim_or_forward_breadth(pd_addr);
-  }
-
-  oop* hk_addr = ik->adr_host_klass();
-  if (PSScavenge::should_scavenge(hk_addr)) {
-    pm->claim_or_forward_breadth(hk_addr);
-  }
-
-  oop* sg_addr = ik->adr_signers();
-  if (PSScavenge::should_scavenge(sg_addr)) {
-    pm->claim_or_forward_breadth(sg_addr);
-  }
-
-  oop* bsm_addr = ik->adr_bootstrap_method();
-  if (PSScavenge::should_scavenge(bsm_addr)) {
-    pm->claim_or_forward_breadth(bsm_addr);
-  }
-
-  klassKlass::oop_copy_contents(pm, obj);
-}
-
 void instanceKlassKlass::oop_push_contents(PSPromotionManager* pm, oop obj) {
-  assert(pm->depth_first(), "invariant");
   instanceKlass* ik = instanceKlass::cast(klassOop(obj));
   ik->push_static_fields(pm);
 
@@ -355,7 +321,7 @@ void instanceKlassKlass::oop_push_contents(PSPromotionManager* pm, oop obj) {
     pm->claim_or_forward_depth(bsm_addr);
   }
 
-  klassKlass::oop_copy_contents(pm, obj);
+  klassKlass::oop_push_contents(pm, obj);
 }
 
 int instanceKlassKlass::oop_update_pointers(ParCompactionManager* cm, oop obj) {
diff --git a/hotspot/src/share/vm/oops/instanceRefKlass.cpp b/hotspot/src/share/vm/oops/instanceRefKlass.cpp
index f8670f99266..022827f309f 100644
--- a/hotspot/src/share/vm/oops/instanceRefKlass.cpp
+++ b/hotspot/src/share/vm/oops/instanceRefKlass.cpp
@@ -272,42 +272,9 @@ ALL_OOP_OOP_ITERATE_CLOSURES_1(InstanceRefKlass_OOP_OOP_ITERATE_DEFN_m)
 ALL_OOP_OOP_ITERATE_CLOSURES_2(InstanceRefKlass_OOP_OOP_ITERATE_DEFN_m)
 
 #ifndef SERIALGC
-template <class T>
-void specialized_oop_copy_contents(instanceRefKlass *ref,
-                                   PSPromotionManager* pm, oop obj) {
-  assert(!pm->depth_first(), "invariant");
-  T* referent_addr = (T*)java_lang_ref_Reference::referent_addr(obj);
-  if (PSScavenge::should_scavenge(referent_addr)) {
-    ReferenceProcessor* rp = PSScavenge::reference_processor();
-    if (rp->discover_reference(obj, ref->reference_type())) {
-      // reference already enqueued, referent and next will be traversed later
-      ref->instanceKlass::oop_copy_contents(pm, obj);
-      return;
-    } else {
-      // treat referent as normal oop
-      pm->claim_or_forward_breadth(referent_addr);
-    }
-  }
-  // treat next as normal oop
-  T* next_addr = (T*)java_lang_ref_Reference::next_addr(obj);
-  if (PSScavenge::should_scavenge(next_addr)) {
-    pm->claim_or_forward_breadth(next_addr);
-  }
-  ref->instanceKlass::oop_copy_contents(pm, obj);
-}
-
-void instanceRefKlass::oop_copy_contents(PSPromotionManager* pm, oop obj) {
-  if (UseCompressedOops) {
-    specialized_oop_copy_contents<narrowOop>(this, pm, obj);
-  } else {
-    specialized_oop_copy_contents<oop>(this, pm, obj);
-  }
-}
-
 template <class T>
 void specialized_oop_push_contents(instanceRefKlass *ref,
                                    PSPromotionManager* pm, oop obj) {
-  assert(pm->depth_first(), "invariant");
   T* referent_addr = (T*)java_lang_ref_Reference::referent_addr(obj);
   if (PSScavenge::should_scavenge(referent_addr)) {
     ReferenceProcessor* rp = PSScavenge::reference_processor();
diff --git a/hotspot/src/share/vm/oops/klassKlass.cpp b/hotspot/src/share/vm/oops/klassKlass.cpp
index d4647e1081f..48377c370c8 100644
--- a/hotspot/src/share/vm/oops/klassKlass.cpp
+++ b/hotspot/src/share/vm/oops/klassKlass.cpp
@@ -161,9 +161,6 @@ int klassKlass::oop_adjust_pointers(oop obj) {
 }
 
 #ifndef SERIALGC
-void klassKlass::oop_copy_contents(PSPromotionManager* pm, oop obj) {
-}
-
 void klassKlass::oop_push_contents(PSPromotionManager* pm, oop obj) {
 }
 
diff --git a/hotspot/src/share/vm/oops/klassPS.hpp b/hotspot/src/share/vm/oops/klassPS.hpp
index c53abf770c2..6c02905db9b 100644
--- a/hotspot/src/share/vm/oops/klassPS.hpp
+++ b/hotspot/src/share/vm/oops/klassPS.hpp
@@ -28,7 +28,6 @@
 
 #ifndef SERIALGC
 #define PARALLEL_GC_DECLS \
-  virtual void oop_copy_contents(PSPromotionManager* pm, oop obj);          \
   virtual void oop_push_contents(PSPromotionManager* pm, oop obj);          \
   /* Parallel Old GC support                                                \
                                                                             \
@@ -43,7 +42,6 @@
 
 // Pure virtual version for klass.hpp
 #define PARALLEL_GC_DECLS_PV \
-  virtual void oop_copy_contents(PSPromotionManager* pm, oop obj) = 0;      \
   virtual void oop_push_contents(PSPromotionManager* pm, oop obj) = 0;      \
   virtual void oop_follow_contents(ParCompactionManager* cm, oop obj) = 0;  \
   virtual int  oop_update_pointers(ParCompactionManager* cm, oop obj) = 0;  \
diff --git a/hotspot/src/share/vm/oops/methodDataKlass.cpp b/hotspot/src/share/vm/oops/methodDataKlass.cpp
index 03b5bd8878b..e11f6afd6a0 100644
--- a/hotspot/src/share/vm/oops/methodDataKlass.cpp
+++ b/hotspot/src/share/vm/oops/methodDataKlass.cpp
@@ -154,13 +154,6 @@ int methodDataKlass::oop_adjust_pointers(oop obj) {
 
 
 #ifndef SERIALGC
-void methodDataKlass::oop_copy_contents(PSPromotionManager* pm, oop obj) {
-  assert (obj->is_methodData(), "object must be method data");
-  methodDataOop m = methodDataOop(obj);
-  // This should never point into the young gen.
-  assert(!PSScavenge::should_scavenge(m->adr_method()), "Sanity");
-}
-
 void methodDataKlass::oop_push_contents(PSPromotionManager* pm, oop obj) {
   assert (obj->is_methodData(), "object must be method data");
   methodDataOop m = methodDataOop(obj);
diff --git a/hotspot/src/share/vm/oops/methodKlass.cpp b/hotspot/src/share/vm/oops/methodKlass.cpp
index f0ba4ad8413..40677d19290 100644
--- a/hotspot/src/share/vm/oops/methodKlass.cpp
+++ b/hotspot/src/share/vm/oops/methodKlass.cpp
@@ -184,10 +184,6 @@ int methodKlass::oop_adjust_pointers(oop obj) {
 }
 
 #ifndef SERIALGC
-void methodKlass::oop_copy_contents(PSPromotionManager* pm, oop obj) {
-  assert(obj->is_method(), "should be method");
-}
-
 void methodKlass::oop_push_contents(PSPromotionManager* pm, oop obj) {
   assert(obj->is_method(), "should be method");
 }
diff --git a/hotspot/src/share/vm/oops/objArrayKlass.cpp b/hotspot/src/share/vm/oops/objArrayKlass.cpp
index 96935b61448..a932a117d67 100644
--- a/hotspot/src/share/vm/oops/objArrayKlass.cpp
+++ b/hotspot/src/share/vm/oops/objArrayKlass.cpp
@@ -426,18 +426,7 @@ int objArrayKlass::oop_adjust_pointers(oop obj) {
 }
 
 #ifndef SERIALGC
-void objArrayKlass::oop_copy_contents(PSPromotionManager* pm, oop obj) {
-  assert(!pm->depth_first(), "invariant");
-  assert(obj->is_objArray(), "obj must be obj array");
-  ObjArrayKlass_OOP_ITERATE( \
-    objArrayOop(obj), p, \
-    if (PSScavenge::should_scavenge(p)) { \
-      pm->claim_or_forward_breadth(p); \
-    })
-}
-
 void objArrayKlass::oop_push_contents(PSPromotionManager* pm, oop obj) {
-  assert(pm->depth_first(), "invariant");
   assert(obj->is_objArray(), "obj must be obj array");
   ObjArrayKlass_OOP_ITERATE( \
     objArrayOop(obj), p, \
diff --git a/hotspot/src/share/vm/oops/objArrayKlassKlass.cpp b/hotspot/src/share/vm/oops/objArrayKlassKlass.cpp
index f486dc7db40..7050d57b14b 100644
--- a/hotspot/src/share/vm/oops/objArrayKlassKlass.cpp
+++ b/hotspot/src/share/vm/oops/objArrayKlassKlass.cpp
@@ -229,10 +229,6 @@ objArrayKlassKlass::oop_oop_iterate_m(oop obj, OopClosure* blk, MemRegion mr) {
 }
 
 #ifndef SERIALGC
-void objArrayKlassKlass::oop_copy_contents(PSPromotionManager* pm, oop obj) {
-  assert(obj->blueprint()->oop_is_objArrayKlass(),"must be an obj array klass");
-}
-
 void objArrayKlassKlass::oop_push_contents(PSPromotionManager* pm, oop obj) {
   assert(obj->blueprint()->oop_is_objArrayKlass(),"must be an obj array klass");
 }
diff --git a/hotspot/src/share/vm/oops/oop.hpp b/hotspot/src/share/vm/oops/oop.hpp
index 952802c7801..04d31180401 100644
--- a/hotspot/src/share/vm/oops/oop.hpp
+++ b/hotspot/src/share/vm/oops/oop.hpp
@@ -306,7 +306,6 @@ class oopDesc {
 
 #ifndef SERIALGC
   // Parallel Scavenge
-  void copy_contents(PSPromotionManager* pm);
   void push_contents(PSPromotionManager* pm);
 
   // Parallel Old
diff --git a/hotspot/src/share/vm/oops/oop.psgc.inline.hpp b/hotspot/src/share/vm/oops/oop.psgc.inline.hpp
index 07ad5c5a653..40d40a3725b 100644
--- a/hotspot/src/share/vm/oops/oop.psgc.inline.hpp
+++ b/hotspot/src/share/vm/oops/oop.psgc.inline.hpp
@@ -24,15 +24,6 @@
 
 // ParallelScavengeHeap methods
 
-inline void oopDesc::copy_contents(PSPromotionManager* pm) {
-  Klass* klass = blueprint();
-  if (!klass->oop_is_typeArray()) {
-    // It might contain oops beyond the header, so take the virtual call.
-    klass->oop_copy_contents(pm, this);
-  }
-  // Else skip it.  The typeArrayKlass in the header never needs scavenging.
-}
-
 inline void oopDesc::push_contents(PSPromotionManager* pm) {
   Klass* klass = blueprint();
   if (!klass->oop_is_typeArray()) {
diff --git a/hotspot/src/share/vm/oops/symbolKlass.cpp b/hotspot/src/share/vm/oops/symbolKlass.cpp
index 4fe4dc86090..e3806db1202 100644
--- a/hotspot/src/share/vm/oops/symbolKlass.cpp
+++ b/hotspot/src/share/vm/oops/symbolKlass.cpp
@@ -184,10 +184,6 @@ int symbolKlass::oop_adjust_pointers(oop obj) {
 
 
 #ifndef SERIALGC
-void symbolKlass::oop_copy_contents(PSPromotionManager* pm, oop obj) {
-  assert(obj->is_symbol(), "should be symbol");
-}
-
 void symbolKlass::oop_push_contents(PSPromotionManager* pm, oop obj) {
   assert(obj->is_symbol(), "should be symbol");
 }
diff --git a/hotspot/src/share/vm/oops/typeArrayKlass.cpp b/hotspot/src/share/vm/oops/typeArrayKlass.cpp
index edb216e68ae..088960ce0b6 100644
--- a/hotspot/src/share/vm/oops/typeArrayKlass.cpp
+++ b/hotspot/src/share/vm/oops/typeArrayKlass.cpp
@@ -228,10 +228,6 @@ int typeArrayKlass::oop_oop_iterate_m(oop obj, OopClosure* blk, MemRegion mr) {
 }
 
 #ifndef SERIALGC
-void typeArrayKlass::oop_copy_contents(PSPromotionManager* pm, oop obj) {
-  assert(obj->is_typeArray(),"must be a type array");
-}
-
 void typeArrayKlass::oop_push_contents(PSPromotionManager* pm, oop obj) {
   assert(obj->is_typeArray(),"must be a type array");
 }
diff --git a/hotspot/src/share/vm/runtime/arguments.cpp b/hotspot/src/share/vm/runtime/arguments.cpp
index 5ba65fba422..b2182419673 100644
--- a/hotspot/src/share/vm/runtime/arguments.cpp
+++ b/hotspot/src/share/vm/runtime/arguments.cpp
@@ -184,6 +184,8 @@ static ObsoleteFlag obsolete_jvm_flags[] = {
   { "DefaultMaxRAM",       JDK_Version::jdk_update(6,18), JDK_Version::jdk(7) },
   { "DefaultInitialRAMFraction",
                            JDK_Version::jdk_update(6,18), JDK_Version::jdk(7) },
+  { "UseDepthFirstScavengeOrder",
+                           JDK_Version::jdk_update(6,22), JDK_Version::jdk(7) },
   { NULL, JDK_Version(0), JDK_Version(0) }
 };
 
diff --git a/hotspot/src/share/vm/runtime/globals.hpp b/hotspot/src/share/vm/runtime/globals.hpp
index daf5b6b6ba5..bf7264a578a 100644
--- a/hotspot/src/share/vm/runtime/globals.hpp
+++ b/hotspot/src/share/vm/runtime/globals.hpp
@@ -3088,10 +3088,6 @@ class CommandLineFlags {
                                                                             \
   product(intx, SafepointSpinBeforeYield, 2000,  "(Unstable)")              \
                                                                             \
-  product(bool, UseDepthFirstScavengeOrder, true,                           \
-          "true: the scavenge order will be depth-first, "                  \
-          "false: the scavenge order will be breadth-first")                \
-                                                                            \
   product(bool, PSChunkLargeArrays, true,                                   \
           "true: process large arrays in chunks")                           \
                                                                             \

From b4148f33790bb490fc8fd5c02a581cd018067374 Mon Sep 17 00:00:00 2001
From: Antonios Printezis <tonyp@openjdk.org>
Date: Wed, 4 Aug 2010 13:03:23 -0400
Subject: [PATCH 4/9] 6963209: G1: remove the concept of abandoned pauses

As part of 6944166 we disabled the concept of abandoned pauses (i.e., if the collection set is empty, we would still try to do a pause even if it is to update the RSets and scan the roots). This changeset removes the code and structures associated with abandoned pauses.

Reviewed-by: iveresov, johnc
---
 .../gc_implementation/g1/g1CollectedHeap.cpp  | 100 +++------
 .../g1/g1CollectorPolicy.cpp                  | 208 ++++++------------
 .../g1/g1CollectorPolicy.hpp                  |  13 +-
 3 files changed, 107 insertions(+), 214 deletions(-)

diff --git a/hotspot/src/share/vm/gc_implementation/g1/g1CollectedHeap.cpp b/hotspot/src/share/vm/gc_implementation/g1/g1CollectedHeap.cpp
index 6d798a54889..76baff21b62 100644
--- a/hotspot/src/share/vm/gc_implementation/g1/g1CollectedHeap.cpp
+++ b/hotspot/src/share/vm/gc_implementation/g1/g1CollectedHeap.cpp
@@ -2843,93 +2843,57 @@ G1CollectedHeap::do_collection_pause_at_safepoint(double target_pause_time_ms) {
       g1_policy()->print_collection_set(g1_policy()->inc_cset_head(), gclog_or_tty);
 #endif // YOUNG_LIST_VERBOSE
 
-      // Now choose the CS. We may abandon a pause if we find no
-      // region that will fit in the MMU pause.
-      bool abandoned = g1_policy()->choose_collection_set(target_pause_time_ms);
+      g1_policy()->choose_collection_set(target_pause_time_ms);
 
       // Nothing to do if we were unable to choose a collection set.
-      if (!abandoned) {
 #if G1_REM_SET_LOGGING
-        gclog_or_tty->print_cr("\nAfter pause, heap:");
-        print();
+      gclog_or_tty->print_cr("\nAfter pause, heap:");
+      print();
 #endif
-        PrepareForRSScanningClosure prepare_for_rs_scan;
-        collection_set_iterate(&prepare_for_rs_scan);
+      PrepareForRSScanningClosure prepare_for_rs_scan;
+      collection_set_iterate(&prepare_for_rs_scan);
 
-        setup_surviving_young_words();
+      setup_surviving_young_words();
 
-        // Set up the gc allocation regions.
-        get_gc_alloc_regions();
+      // Set up the gc allocation regions.
+      get_gc_alloc_regions();
 
-        // Actually do the work...
-        evacuate_collection_set();
+      // Actually do the work...
+      evacuate_collection_set();
 
-        free_collection_set(g1_policy()->collection_set());
-        g1_policy()->clear_collection_set();
+      free_collection_set(g1_policy()->collection_set());
+      g1_policy()->clear_collection_set();
 
-        cleanup_surviving_young_words();
+      cleanup_surviving_young_words();
 
-        // Start a new incremental collection set for the next pause.
-        g1_policy()->start_incremental_cset_building();
+      // Start a new incremental collection set for the next pause.
+      g1_policy()->start_incremental_cset_building();
 
-        // Clear the _cset_fast_test bitmap in anticipation of adding
-        // regions to the incremental collection set for the next
-        // evacuation pause.
-        clear_cset_fast_test();
+      // Clear the _cset_fast_test bitmap in anticipation of adding
+      // regions to the incremental collection set for the next
+      // evacuation pause.
+      clear_cset_fast_test();
 
-        if (g1_policy()->in_young_gc_mode()) {
-          _young_list->reset_sampled_info();
+      if (g1_policy()->in_young_gc_mode()) {
+        _young_list->reset_sampled_info();
 
-          // Don't check the whole heap at this point as the
-          // GC alloc regions from this pause have been tagged
-          // as survivors and moved on to the survivor list.
-          // Survivor regions will fail the !is_young() check.
-          assert(check_young_list_empty(false /* check_heap */),
-              "young list should be empty");
+        // Don't check the whole heap at this point as the
+        // GC alloc regions from this pause have been tagged
+        // as survivors and moved on to the survivor list.
+        // Survivor regions will fail the !is_young() check.
+        assert(check_young_list_empty(false /* check_heap */),
+               "young list should be empty");
 
 #if YOUNG_LIST_VERBOSE
-          gclog_or_tty->print_cr("Before recording survivors.\nYoung List:");
-          _young_list->print();
+        gclog_or_tty->print_cr("Before recording survivors.\nYoung List:");
+        _young_list->print();
 #endif // YOUNG_LIST_VERBOSE
 
-          g1_policy()->record_survivor_regions(_young_list->survivor_length(),
+        g1_policy()->record_survivor_regions(_young_list->survivor_length(),
                                           _young_list->first_survivor_region(),
                                           _young_list->last_survivor_region());
 
-          _young_list->reset_auxilary_lists();
-        }
-      } else {
-        // We have abandoned the current collection. This can only happen
-        // if we're not doing young or partially young collections, and
-        // we didn't find an old region that we're able to collect within
-        // the allowed time.
-
-        assert(g1_policy()->collection_set() == NULL, "should be");
-        assert(_young_list->length() == 0, "because it should be");
-
-        // This should be a no-op.
-        abandon_collection_set(g1_policy()->inc_cset_head());
-
-        g1_policy()->clear_incremental_cset();
-        g1_policy()->stop_incremental_cset_building();
-
-        // Start a new incremental collection set for the next pause.
-        g1_policy()->start_incremental_cset_building();
-
-        // Clear the _cset_fast_test bitmap in anticipation of adding
-        // regions to the incremental collection set for the next
-        // evacuation pause.
-        clear_cset_fast_test();
-
-        // This looks confusing, because the DPT should really be empty
-        // at this point -- since we have not done any collection work,
-        // there should not be any derived pointers in the table to update;
-        // however, there is some additional state in the DPT which is
-        // reset at the end of the (null) "gc" here via the following call.
-        // A better approach might be to split off that state resetting work
-        // into a separate method that asserts that the DPT is empty and call
-        // that here. That is deferred for now.
-        COMPILER2_PRESENT(DerivedPointerTable::update_pointers());
+        _young_list->reset_auxilary_lists();
       }
 
       if (evacuation_failed()) {
@@ -2963,7 +2927,7 @@ G1CollectedHeap::do_collection_pause_at_safepoint(double target_pause_time_ms) {
       double end_time_sec = os::elapsedTime();
       double pause_time_ms = (end_time_sec - start_time_sec) * MILLIUNITS;
       g1_policy()->record_pause_time_ms(pause_time_ms);
-      g1_policy()->record_collection_pause_end(abandoned);
+      g1_policy()->record_collection_pause_end();
 
       assert(regions_accounted_for(), "Region leakage.");
 
diff --git a/hotspot/src/share/vm/gc_implementation/g1/g1CollectorPolicy.cpp b/hotspot/src/share/vm/gc_implementation/g1/g1CollectorPolicy.cpp
index 2feaae7df3a..ef21287146c 100644
--- a/hotspot/src/share/vm/gc_implementation/g1/g1CollectorPolicy.cpp
+++ b/hotspot/src/share/vm/gc_implementation/g1/g1CollectorPolicy.cpp
@@ -88,7 +88,6 @@ G1CollectorPolicy::G1CollectorPolicy() :
   _all_mod_union_times_ms(new NumberSeq()),
 
   _summary(new Summary()),
-  _abandoned_summary(new AbandonedSummary()),
 
 #ifndef PRODUCT
   _cur_clear_ct_time_ms(0.0),
@@ -1124,7 +1123,7 @@ double G1CollectorPolicy::max_sum (double* data1,
 // Anything below that is considered to be zero
 #define MIN_TIMER_GRANULARITY 0.0000001
 
-void G1CollectorPolicy::record_collection_pause_end(bool abandoned) {
+void G1CollectorPolicy::record_collection_pause_end() {
   double end_time_sec = os::elapsedTime();
   double elapsed_ms = _last_pause_time_ms;
   bool parallel = ParallelGCThreads > 0;
@@ -1134,7 +1133,7 @@ void G1CollectorPolicy::record_collection_pause_end(bool abandoned) {
   size_t cur_used_bytes = _g1->used();
   assert(cur_used_bytes == _g1->recalculate_used(), "It should!");
   bool last_pause_included_initial_mark = false;
-  bool update_stats = !abandoned && !_g1->evacuation_failed();
+  bool update_stats = !_g1->evacuation_failed();
 
 #ifndef PRODUCT
   if (G1YoungSurvRateVerbose) {
@@ -1273,12 +1272,7 @@ void G1CollectorPolicy::record_collection_pause_end(bool abandoned) {
     gclog_or_tty->print_cr("   Recording collection pause(%d)", _n_pauses);
   }
 
-  PauseSummary* summary;
-  if (abandoned) {
-    summary = _abandoned_summary;
-  } else {
-    summary = _summary;
-  }
+  PauseSummary* summary = _summary;
 
   double ext_root_scan_time = avg_value(_par_last_ext_root_scan_times_ms);
   double mark_stack_scan_time = avg_value(_par_last_mark_stack_scan_times_ms);
@@ -1346,61 +1340,58 @@ void G1CollectorPolicy::record_collection_pause_end(bool abandoned) {
 
   double other_time_ms = elapsed_ms;
 
-  if (!abandoned) {
-    if (_satb_drain_time_set)
-      other_time_ms -= _cur_satb_drain_time_ms;
+  if (_satb_drain_time_set) {
+    other_time_ms -= _cur_satb_drain_time_ms;
+  }
 
-    if (parallel)
-      other_time_ms -= _cur_collection_par_time_ms + _cur_clear_ct_time_ms;
-    else
-      other_time_ms -=
-        update_rs_time +
-        ext_root_scan_time + mark_stack_scan_time +
-        scan_rs_time + obj_copy_time;
+  if (parallel) {
+    other_time_ms -= _cur_collection_par_time_ms + _cur_clear_ct_time_ms;
+  } else {
+    other_time_ms -=
+      update_rs_time +
+      ext_root_scan_time + mark_stack_scan_time +
+      scan_rs_time + obj_copy_time;
   }
 
   if (PrintGCDetails) {
-    gclog_or_tty->print_cr("%s%s, %1.8lf secs]",
-                           abandoned ? " (abandoned)" : "",
+    gclog_or_tty->print_cr("%s, %1.8lf secs]",
                            (last_pause_included_initial_mark) ? " (initial-mark)" : "",
                            elapsed_ms / 1000.0);
 
-    if (!abandoned) {
-      if (_satb_drain_time_set) {
-        print_stats(1, "SATB Drain Time", _cur_satb_drain_time_ms);
-      }
-      if (_last_satb_drain_processed_buffers >= 0) {
-        print_stats(2, "Processed Buffers", _last_satb_drain_processed_buffers);
-      }
-      if (parallel) {
-        print_stats(1, "Parallel Time", _cur_collection_par_time_ms);
-        print_par_stats(2, "GC Worker Start Time",
-                        _par_last_gc_worker_start_times_ms, false);
-        print_par_stats(2, "Update RS", _par_last_update_rs_times_ms);
-        print_par_sizes(3, "Processed Buffers",
-                        _par_last_update_rs_processed_buffers, true);
-        print_par_stats(2, "Ext Root Scanning",
-                        _par_last_ext_root_scan_times_ms);
-        print_par_stats(2, "Mark Stack Scanning",
-                        _par_last_mark_stack_scan_times_ms);
-        print_par_stats(2, "Scan RS", _par_last_scan_rs_times_ms);
-        print_par_stats(2, "Object Copy", _par_last_obj_copy_times_ms);
-        print_par_stats(2, "Termination", _par_last_termination_times_ms);
-        print_par_sizes(3, "Termination Attempts",
-                        _par_last_termination_attempts, true);
-        print_par_stats(2, "GC Worker End Time",
-                        _par_last_gc_worker_end_times_ms, false);
-        print_stats(2, "Other", parallel_other_time);
-        print_stats(1, "Clear CT", _cur_clear_ct_time_ms);
-      } else {
-        print_stats(1, "Update RS", update_rs_time);
-        print_stats(2, "Processed Buffers",
-                    (int)update_rs_processed_buffers);
-        print_stats(1, "Ext Root Scanning", ext_root_scan_time);
-        print_stats(1, "Mark Stack Scanning", mark_stack_scan_time);
-        print_stats(1, "Scan RS", scan_rs_time);
-        print_stats(1, "Object Copying", obj_copy_time);
-      }
+    if (_satb_drain_time_set) {
+      print_stats(1, "SATB Drain Time", _cur_satb_drain_time_ms);
+    }
+    if (_last_satb_drain_processed_buffers >= 0) {
+      print_stats(2, "Processed Buffers", _last_satb_drain_processed_buffers);
+    }
+    if (parallel) {
+      print_stats(1, "Parallel Time", _cur_collection_par_time_ms);
+      print_par_stats(2, "GC Worker Start Time",
+                      _par_last_gc_worker_start_times_ms, false);
+      print_par_stats(2, "Update RS", _par_last_update_rs_times_ms);
+      print_par_sizes(3, "Processed Buffers",
+                      _par_last_update_rs_processed_buffers, true);
+      print_par_stats(2, "Ext Root Scanning",
+                      _par_last_ext_root_scan_times_ms);
+      print_par_stats(2, "Mark Stack Scanning",
+                      _par_last_mark_stack_scan_times_ms);
+      print_par_stats(2, "Scan RS", _par_last_scan_rs_times_ms);
+      print_par_stats(2, "Object Copy", _par_last_obj_copy_times_ms);
+      print_par_stats(2, "Termination", _par_last_termination_times_ms);
+      print_par_sizes(3, "Termination Attempts",
+                      _par_last_termination_attempts, true);
+      print_par_stats(2, "GC Worker End Time",
+                      _par_last_gc_worker_end_times_ms, false);
+      print_stats(2, "Other", parallel_other_time);
+      print_stats(1, "Clear CT", _cur_clear_ct_time_ms);
+    } else {
+      print_stats(1, "Update RS", update_rs_time);
+      print_stats(2, "Processed Buffers",
+                  (int)update_rs_processed_buffers);
+      print_stats(1, "Ext Root Scanning", ext_root_scan_time);
+      print_stats(1, "Mark Stack Scanning", mark_stack_scan_time);
+      print_stats(1, "Scan RS", scan_rs_time);
+      print_stats(1, "Object Copying", obj_copy_time);
     }
 #ifndef PRODUCT
     print_stats(1, "Cur Clear CC", _cur_clear_cc_time_ms);
@@ -2176,33 +2167,27 @@ void G1CollectorPolicy::print_summary(PauseSummary* summary) const {
     print_summary(1, "Other", summary->get_other_seq());
     {
       NumberSeq calc_other_times_ms;
-      if (body_summary != NULL) {
-        // not abandoned
-        if (parallel) {
-          // parallel
-          NumberSeq* other_parts[] = {
-            body_summary->get_satb_drain_seq(),
-            body_summary->get_parallel_seq(),
-            body_summary->get_clear_ct_seq()
-          };
-          calc_other_times_ms = NumberSeq(summary->get_total_seq(),
-                                          3, other_parts);
-        } else {
-          // serial
-          NumberSeq* other_parts[] = {
-            body_summary->get_satb_drain_seq(),
-            body_summary->get_update_rs_seq(),
-            body_summary->get_ext_root_scan_seq(),
-            body_summary->get_mark_stack_scan_seq(),
-            body_summary->get_scan_rs_seq(),
-            body_summary->get_obj_copy_seq()
-          };
-          calc_other_times_ms = NumberSeq(summary->get_total_seq(),
-                                          7, other_parts);
-        }
+      if (parallel) {
+        // parallel
+        NumberSeq* other_parts[] = {
+          body_summary->get_satb_drain_seq(),
+          body_summary->get_parallel_seq(),
+          body_summary->get_clear_ct_seq()
+        };
+        calc_other_times_ms = NumberSeq(summary->get_total_seq(),
+                                        3, other_parts);
       } else {
-        // abandoned
-        calc_other_times_ms = NumberSeq();
+        // serial
+        NumberSeq* other_parts[] = {
+          body_summary->get_satb_drain_seq(),
+          body_summary->get_update_rs_seq(),
+          body_summary->get_ext_root_scan_seq(),
+          body_summary->get_mark_stack_scan_seq(),
+          body_summary->get_scan_rs_seq(),
+          body_summary->get_obj_copy_seq()
+        };
+        calc_other_times_ms = NumberSeq(summary->get_total_seq(),
+                                        7, other_parts);
       }
       check_other_times(1,  summary->get_other_seq(), &calc_other_times_ms);
     }
@@ -2213,20 +2198,6 @@ void G1CollectorPolicy::print_summary(PauseSummary* summary) const {
   gclog_or_tty->print_cr("");
 }
 
-void
-G1CollectorPolicy::print_abandoned_summary(PauseSummary* summary) const {
-  bool printed = false;
-  if (summary->get_total_seq()->num() > 0) {
-    printed = true;
-    print_summary(summary);
-  }
-  if (!printed) {
-    print_indent(0);
-    gclog_or_tty->print_cr("none");
-    gclog_or_tty->print_cr("");
-  }
-}
-
 void G1CollectorPolicy::print_tracing_info() const {
   if (TraceGen0Time) {
     gclog_or_tty->print_cr("ALL PAUSES");
@@ -2240,9 +2211,6 @@ void G1CollectorPolicy::print_tracing_info() const {
     gclog_or_tty->print_cr("EVACUATION PAUSES");
     print_summary(_summary);
 
-    gclog_or_tty->print_cr("ABANDONED PAUSES");
-    print_abandoned_summary(_abandoned_summary);
-
     gclog_or_tty->print_cr("MISC");
     print_summary_sd(0, "Stop World", _all_stop_world_times_ms);
     print_summary_sd(0, "Yields", _all_yield_times_ms);
@@ -2868,19 +2836,12 @@ void G1CollectorPolicy::print_collection_set(HeapRegion* list_head, outputStream
 }
 #endif // !PRODUCT
 
-bool
+void
 G1CollectorPolicy_BestRegionsFirst::choose_collection_set(
                                                   double target_pause_time_ms) {
   // Set this here - in case we're not doing young collections.
   double non_young_start_time_sec = os::elapsedTime();
 
-  // The result that this routine will return. This will be set to
-  // false if:
-  // * we're doing a young or partially young collection and we
-  //   have added the youg regions to collection set, or
-  // * we add old regions to the collection set.
-  bool abandon_collection = true;
-
   start_recording_regions();
 
   guarantee(target_pause_time_ms > 0.0,
@@ -2984,10 +2945,6 @@ G1CollectorPolicy_BestRegionsFirst::choose_collection_set(
     }
 
     assert(_inc_cset_size == _g1->young_list()->length(), "Invariant");
-    if (_inc_cset_size > 0) {
-      assert(_collection_set != NULL, "Invariant");
-      abandon_collection = false;
-    }
 
     double young_end_time_sec = os::elapsedTime();
     _recorded_young_cset_choice_time_ms =
@@ -3009,10 +2966,6 @@ G1CollectorPolicy_BestRegionsFirst::choose_collection_set(
     NumberSeq seq;
     double avg_prediction = 100000000000000000.0; // something very large
 
-    // Save the current size of the collection set to detect
-    // if we actually added any old regions.
-    size_t n_young_regions = _collection_set_size;
-
     do {
       hr = _collectionSetChooser->getNextMarkedRegion(time_remaining_ms,
                                                       avg_prediction);
@@ -3039,12 +2992,6 @@ G1CollectorPolicy_BestRegionsFirst::choose_collection_set(
     if (!adaptive_young_list_length() &&
         _collection_set_size < _young_list_fixed_length)
       _should_revert_to_full_young_gcs  = true;
-
-    if (_collection_set_size > n_young_regions) {
-      // We actually added old regions to the collection set
-      // so we are not abandoning this collection.
-      abandon_collection = false;
-    }
   }
 
 choose_collection_set_end:
@@ -3057,19 +3004,6 @@ choose_collection_set_end:
   double non_young_end_time_sec = os::elapsedTime();
   _recorded_non_young_cset_choice_time_ms =
     (non_young_end_time_sec - non_young_start_time_sec) * 1000.0;
-
-  // Here we are supposed to return whether the pause should be
-  // abandoned or not (i.e., whether the collection set is empty or
-  // not). However, this introduces a subtle issue when a pause is
-  // initiated explicitly with System.gc() and
-  // +ExplicitGCInvokesConcurrent (see Comment #2 in CR 6944166), it's
-  // supposed to start a marking cycle, and it's abandoned. So, by
-  // returning false here we are telling the caller never to consider
-  // a pause to be abandoned. We'll actually remove all the code
-  // associated with abandoned pauses as part of CR 6963209, but we are
-  // just disabling them this way for the moment to avoid increasing
-  // further the amount of changes for CR 6944166.
-  return false;
 }
 
 void G1CollectorPolicy_BestRegionsFirst::record_full_collection_end() {
@@ -3084,7 +3018,7 @@ expand_if_possible(size_t numRegions) {
 }
 
 void G1CollectorPolicy_BestRegionsFirst::
-record_collection_pause_end(bool abandoned) {
-  G1CollectorPolicy::record_collection_pause_end(abandoned);
+record_collection_pause_end() {
+  G1CollectorPolicy::record_collection_pause_end();
   assert(assertMarkedBytesDataOK(), "Marked regions not OK at pause end.");
 }
diff --git a/hotspot/src/share/vm/gc_implementation/g1/g1CollectorPolicy.hpp b/hotspot/src/share/vm/gc_implementation/g1/g1CollectorPolicy.hpp
index 84a8491142c..33ee6ebc446 100644
--- a/hotspot/src/share/vm/gc_implementation/g1/g1CollectorPolicy.hpp
+++ b/hotspot/src/share/vm/gc_implementation/g1/g1CollectorPolicy.hpp
@@ -76,9 +76,6 @@ public:
   virtual MainBodySummary*    main_body_summary()    { return this; }
 };
 
-class AbandonedSummary: public PauseSummary {
-};
-
 class G1CollectorPolicy: public CollectorPolicy {
 protected:
   // The number of pauses during the execution.
@@ -148,7 +145,6 @@ protected:
   TruncatedSeq* _concurrent_mark_cleanup_times_ms;
 
   Summary*           _summary;
-  AbandonedSummary*  _abandoned_summary;
 
   NumberSeq* _all_pause_times_ms;
   NumberSeq* _all_full_gc_times_ms;
@@ -573,7 +569,6 @@ protected:
                          NumberSeq* calc_other_times_ms) const;
 
   void print_summary (PauseSummary* stats) const;
-  void print_abandoned_summary(PauseSummary* summary) const;
 
   void print_summary (int level, const char* str, NumberSeq* seq) const;
   void print_summary_sd (int level, const char* str, NumberSeq* seq) const;
@@ -886,7 +881,7 @@ public:
   virtual void record_collection_pause_end_CH_strong_roots();
   virtual void record_collection_pause_end_G1_strong_roots();
 
-  virtual void record_collection_pause_end(bool abandoned);
+  virtual void record_collection_pause_end();
 
   // Record the fact that a full collection occurred.
   virtual void record_full_collection_start();
@@ -999,7 +994,7 @@ public:
   // Choose a new collection set.  Marks the chosen regions as being
   // "in_collection_set", and links them together.  The head and number of
   // the collection set are available via access methods.
-  virtual bool choose_collection_set(double target_pause_time_ms) = 0;
+  virtual void choose_collection_set(double target_pause_time_ms) = 0;
 
   // The head of the list (via "next_in_collection_set()") representing the
   // current collection set.
@@ -1256,7 +1251,7 @@ class G1CollectorPolicy_BestRegionsFirst: public G1CollectorPolicy {
   // If the estimated is less then desirable, resize if possible.
   void expand_if_possible(size_t numRegions);
 
-  virtual bool choose_collection_set(double target_pause_time_ms);
+  virtual void choose_collection_set(double target_pause_time_ms);
   virtual void record_collection_pause_start(double start_time_sec,
                                              size_t start_used);
   virtual void record_concurrent_mark_cleanup_end(size_t freed_bytes,
@@ -1267,7 +1262,7 @@ public:
   G1CollectorPolicy_BestRegionsFirst() {
     _collectionSetChooser = new CollectionSetChooser();
   }
-  void record_collection_pause_end(bool abandoned);
+  void record_collection_pause_end();
   bool should_do_collection_pause(size_t word_size);
   // This is not needed any more, after the CSet choosing code was
   // changed to use the pause prediction work. But let's leave the

From f8eed77f36cf4ba7879b9d261283d4065f283b15 Mon Sep 17 00:00:00 2001
From: John Cuthbertson <johnc@openjdk.org>
Date: Fri, 6 Aug 2010 10:17:21 -0700
Subject: [PATCH 5/9] 6930581: G1: assert(ParallelGCThreads > 1 || n_yielded()
 == _hrrs->occupied(),"Should have yielded all the .

During RSet updating, when ParallelGCThreads is zero, references that point into the collection set are added directly the referenced region's RSet. This can cause the sparse table in the RSet to expand. RSet scanning and the "occupied" routine will then operate on different instances of the sparse table causing the assert to trip. This may also cause some cards added post expansion to be missed during RSet scanning. When ParallelGCThreads is non-zero such references are recorded on the "references to be scanned" queue and the card containing the reference is recorded in a dirty card queue for use in the event of an evacuation failure. Employ the parallel code in the serial case to avoid expanding the RSets of regions in the collection set.

Reviewed-by: iveresov, ysr, tonyp
---
 .../vm/gc_implementation/g1/g1RemSet.cpp      | 50 ++++++++-----------
 .../vm/gc_implementation/g1/g1RemSet.hpp      |  8 +--
 .../gc_implementation/g1/g1RemSet.inline.hpp  |  6 +--
 .../vm/gc_implementation/g1/sparsePRT.cpp     |  2 +-
 4 files changed, 30 insertions(+), 36 deletions(-)

diff --git a/hotspot/src/share/vm/gc_implementation/g1/g1RemSet.cpp b/hotspot/src/share/vm/gc_implementation/g1/g1RemSet.cpp
index 9c081fce5af..19ec341f980 100644
--- a/hotspot/src/share/vm/gc_implementation/g1/g1RemSet.cpp
+++ b/hotspot/src/share/vm/gc_implementation/g1/g1RemSet.cpp
@@ -122,7 +122,7 @@ public:
 HRInto_G1RemSet::HRInto_G1RemSet(G1CollectedHeap* g1, CardTableModRefBS* ct_bs)
   : G1RemSet(g1), _ct_bs(ct_bs), _g1p(_g1->g1_policy()),
     _cg1r(g1->concurrent_g1_refine()),
-    _par_traversal_in_progress(false),
+    _traversal_in_progress(false),
     _cset_rs_update_cl(NULL),
     _cards_scanned(NULL), _total_cards_scanned(0)
 {
@@ -484,28 +484,24 @@ HRInto_G1RemSet::oops_into_collection_set_do(OopsInHeapRegionClosure* oc,
   //   is updated immediately.
   DirtyCardQueue into_cset_dcq(&_g1->into_cset_dirty_card_queue_set());
 
-  if (ParallelGCThreads > 0) {
-    // The two flags below were introduced temporarily to serialize
-    // the updating and scanning of remembered sets. There are some
-    // race conditions when these two operations are done in parallel
-    // and they are causing failures. When we resolve said race
-    // conditions, we'll revert back to parallel remembered set
-    // updating and scanning. See CRs 6677707 and 6677708.
-    if (G1UseParallelRSetUpdating || (worker_i == 0)) {
-      updateRS(&into_cset_dcq, worker_i);
-    } else {
-      _g1p->record_update_rs_processed_buffers(worker_i, 0.0);
-      _g1p->record_update_rs_time(worker_i, 0.0);
-    }
-    if (G1UseParallelRSetScanning || (worker_i == 0)) {
-      scanRS(oc, worker_i);
-    } else {
-      _g1p->record_scan_rs_time(worker_i, 0.0);
-    }
+  assert((ParallelGCThreads > 0) || worker_i == 0, "invariant");
+
+  // The two flags below were introduced temporarily to serialize
+  // the updating and scanning of remembered sets. There are some
+  // race conditions when these two operations are done in parallel
+  // and they are causing failures. When we resolve said race
+  // conditions, we'll revert back to parallel remembered set
+  // updating and scanning. See CRs 6677707 and 6677708.
+  if (G1UseParallelRSetUpdating || (worker_i == 0)) {
+    updateRS(&into_cset_dcq, worker_i);
   } else {
-    assert(worker_i == 0, "invariant");
-    updateRS(&into_cset_dcq, 0);
-    scanRS(oc, 0);
+    _g1p->record_update_rs_processed_buffers(worker_i, 0.0);
+    _g1p->record_update_rs_time(worker_i, 0.0);
+  }
+  if (G1UseParallelRSetScanning || (worker_i == 0)) {
+    scanRS(oc, worker_i);
+  } else {
+    _g1p->record_scan_rs_time(worker_i, 0.0);
   }
 
   // We now clear the cached values of _cset_rs_update_cl for this worker
@@ -524,9 +520,9 @@ prepare_for_oops_into_collection_set_do() {
   DirtyCardQueueSet& dcqs = JavaThread::dirty_card_queue_set();
   dcqs.concatenate_logs();
 
-  assert(!_par_traversal_in_progress, "Invariant between iterations.");
+  assert(!_traversal_in_progress, "Invariant between iterations.");
+  set_traversal(true);
   if (ParallelGCThreads > 0) {
-    set_par_traversal(true);
     _seq_task->set_par_threads((int)n_workers());
   }
   guarantee( _cards_scanned == NULL, "invariant" );
@@ -623,9 +619,7 @@ void HRInto_G1RemSet::cleanup_after_oops_into_collection_set_do() {
   // Set all cards back to clean.
   _g1->cleanUpCardTable();
 
-  if (ParallelGCThreads > 0) {
-    set_par_traversal(false);
-  }
+  set_traversal(false);
 
   DirtyCardQueueSet& into_cset_dcqs = _g1->into_cset_dirty_card_queue_set();
   int into_cset_n_buffers = into_cset_dcqs.completed_buffers_num();
@@ -660,7 +654,7 @@ void HRInto_G1RemSet::cleanup_after_oops_into_collection_set_do() {
          "all buffers should be freed");
   _g1->into_cset_dirty_card_queue_set().clear_n_completed_buffers();
 
-  assert(!_par_traversal_in_progress, "Invariant between iterations.");
+  assert(!_traversal_in_progress, "Invariant between iterations.");
 }
 
 class UpdateRSObjectClosure: public ObjectClosure {
diff --git a/hotspot/src/share/vm/gc_implementation/g1/g1RemSet.hpp b/hotspot/src/share/vm/gc_implementation/g1/g1RemSet.hpp
index 112f9aa646c..fcb5ecd76cf 100644
--- a/hotspot/src/share/vm/gc_implementation/g1/g1RemSet.hpp
+++ b/hotspot/src/share/vm/gc_implementation/g1/g1RemSet.hpp
@@ -148,10 +148,10 @@ protected:
   size_t*             _cards_scanned;
   size_t              _total_cards_scanned;
 
-  // _par_traversal_in_progress is "true" iff a parallel traversal is in
-  // progress.
-  bool _par_traversal_in_progress;
-  void set_par_traversal(bool b) { _par_traversal_in_progress = b; }
+  // _traversal_in_progress is "true" iff a traversal is in progress.
+
+  bool _traversal_in_progress;
+  void set_traversal(bool b) { _traversal_in_progress = b; }
 
   // Used for caching the closure that is responsible for scanning
   // references into the collection set.
diff --git a/hotspot/src/share/vm/gc_implementation/g1/g1RemSet.inline.hpp b/hotspot/src/share/vm/gc_implementation/g1/g1RemSet.inline.hpp
index d3fa77ae92a..ce64065527b 100644
--- a/hotspot/src/share/vm/gc_implementation/g1/g1RemSet.inline.hpp
+++ b/hotspot/src/share/vm/gc_implementation/g1/g1RemSet.inline.hpp
@@ -62,13 +62,13 @@ template <class T> inline void HRInto_G1RemSet::par_write_ref_nv(HeapRegion* fro
   HeapRegion* to = _g1->heap_region_containing(obj);
   // The test below could be optimized by applying a bit op to to and from.
   if (to != NULL && from != NULL && from != to) {
-    // The _par_traversal_in_progress flag is true during the collection pause,
-    // false during the evacuation failure handing. This should avoid a
+    // The _traversal_in_progress flag is true during the collection pause,
+    // false during the evacuation failure handling. This should avoid a
     // potential loop if we were to add the card containing 'p' to the DCQS
     // that's used to regenerate the remembered sets for the collection set,
     // in the event of an evacuation failure, here. The UpdateRSImmediate
     // closure will eventally call this routine.
-    if (_par_traversal_in_progress &&
+    if (_traversal_in_progress &&
         to->in_collection_set() && !self_forwarded(obj)) {
 
       assert(_cset_rs_update_cl[tid] != NULL, "should have been set already");
diff --git a/hotspot/src/share/vm/gc_implementation/g1/sparsePRT.cpp b/hotspot/src/share/vm/gc_implementation/g1/sparsePRT.cpp
index 434836345c8..5bc3ab29c70 100644
--- a/hotspot/src/share/vm/gc_implementation/g1/sparsePRT.cpp
+++ b/hotspot/src/share/vm/gc_implementation/g1/sparsePRT.cpp
@@ -424,7 +424,7 @@ void SparsePRT::cleanup_all() {
 
 
 SparsePRT::SparsePRT(HeapRegion* hr) :
-  _expanded(false), _next_expanded(NULL)
+  _hr(hr), _expanded(false), _next_expanded(NULL)
 {
   _cur = new RSHashTable(InitialCapacity);
   _next = _cur;

From 7fe1c05c7c42c4394cd1f402d86a4185f552e11b Mon Sep 17 00:00:00 2001
From: John Coomes <jcoomes@openjdk.org>
Date: Mon, 9 Aug 2010 05:41:05 -0700
Subject: [PATCH 6/9] 6966222: G1: simplify TaskQueue overflow handling

Reviewed-by: tonyp, ysr
---
 .../gc_implementation/g1/g1CollectedHeap.cpp  | 101 +++++++++++-------
 .../gc_implementation/g1/g1CollectedHeap.hpp  |  76 +++++--------
 .../g1/g1CollectedHeap.inline.hpp             |   3 +-
 hotspot/src/share/vm/utilities/taskqueue.cpp  |  31 ++++++
 hotspot/src/share/vm/utilities/taskqueue.hpp  |   6 ++
 5 files changed, 127 insertions(+), 90 deletions(-)

diff --git a/hotspot/src/share/vm/gc_implementation/g1/g1CollectedHeap.cpp b/hotspot/src/share/vm/gc_implementation/g1/g1CollectedHeap.cpp
index 76baff21b62..d1dd28ca2f3 100644
--- a/hotspot/src/share/vm/gc_implementation/g1/g1CollectedHeap.cpp
+++ b/hotspot/src/share/vm/gc_implementation/g1/g1CollectedHeap.cpp
@@ -2710,6 +2710,35 @@ struct PrepareForRSScanningClosure : public HeapRegionClosure {
   }
 };
 
+#if TASKQUEUE_STATS
+void G1CollectedHeap::print_taskqueue_stats_hdr(outputStream* const st) {
+  st->print_raw_cr("GC Task Stats");
+  st->print_raw("thr "); TaskQueueStats::print_header(1, st); st->cr();
+  st->print_raw("--- "); TaskQueueStats::print_header(2, st); st->cr();
+}
+
+void G1CollectedHeap::print_taskqueue_stats(outputStream* const st) const {
+  print_taskqueue_stats_hdr(st);
+
+  TaskQueueStats totals;
+  const int n = MAX2(workers()->total_workers(), 1);
+  for (int i = 0; i < n; ++i) {
+    st->print("%3d ", i); task_queue(i)->stats.print(st); st->cr();
+    totals += task_queue(i)->stats;
+  }
+  st->print_raw("tot "); totals.print(st); st->cr();
+
+  DEBUG_ONLY(totals.verify());
+}
+
+void G1CollectedHeap::reset_taskqueue_stats() {
+  const int n = MAX2(workers()->total_workers(), 1);
+  for (int i = 0; i < n; ++i) {
+    task_queue(i)->stats.reset();
+  }
+}
+#endif // TASKQUEUE_STATS
+
 void
 G1CollectedHeap::do_collection_pause_at_safepoint(double target_pause_time_ms) {
   if (GC_locker::check_active_before_gc()) {
@@ -2970,6 +2999,9 @@ G1CollectedHeap::do_collection_pause_at_safepoint(double target_pause_time_ms) {
     }
   }
 
+  TASKQUEUE_STATS_ONLY(if (ParallelGCVerbose) print_taskqueue_stats());
+  TASKQUEUE_STATS_ONLY(reset_taskqueue_stats());
+
   if (PrintHeapAtGC) {
     Universe::print_heap_after_gc();
   }
@@ -3715,10 +3747,6 @@ G1ParScanThreadState::G1ParScanThreadState(G1CollectedHeap* g1h, int queue_num)
     _surviving_alloc_buffer(g1h->desired_plab_sz(GCAllocForSurvived)),
     _tenured_alloc_buffer(g1h->desired_plab_sz(GCAllocForTenured)),
     _age_table(false),
-#if G1_DETAILED_STATS
-    _pushes(0), _pops(0), _steals(0),
-    _steal_attempts(0),  _overflow_pushes(0),
-#endif
     _strong_roots_time(0), _term_time(0),
     _alloc_buffer_waste(0), _undo_waste(0)
 {
@@ -3738,14 +3766,41 @@ G1ParScanThreadState::G1ParScanThreadState(G1CollectedHeap* g1h, int queue_num)
   _surviving_young_words = _surviving_young_words_base + PADDING_ELEM_NUM;
   memset(_surviving_young_words, 0, real_length * sizeof(size_t));
 
-  _overflowed_refs = new OverflowQueue(10);
-
   _alloc_buffers[GCAllocForSurvived] = &_surviving_alloc_buffer;
   _alloc_buffers[GCAllocForTenured]  = &_tenured_alloc_buffer;
 
   _start = os::elapsedTime();
 }
 
+void
+G1ParScanThreadState::print_termination_stats_hdr(outputStream* const st)
+{
+  st->print_raw_cr("GC Termination Stats");
+  st->print_raw_cr("     elapsed  --strong roots-- -------termination-------"
+                   " ------waste (KiB)------");
+  st->print_raw_cr("thr     ms        ms      %        ms      %    attempts"
+                   "  total   alloc    undo");
+  st->print_raw_cr("--- --------- --------- ------ --------- ------ --------"
+                   " ------- ------- -------");
+}
+
+void
+G1ParScanThreadState::print_termination_stats(int i,
+                                              outputStream* const st) const
+{
+  const double elapsed_ms = elapsed_time() * 1000.0;
+  const double s_roots_ms = strong_roots_time() * 1000.0;
+  const double term_ms    = term_time() * 1000.0;
+  st->print_cr("%3d %9.2f %9.2f %6.2f "
+               "%9.2f %6.2f " SIZE_FORMAT_W(8) " "
+               SIZE_FORMAT_W(7) " " SIZE_FORMAT_W(7) " " SIZE_FORMAT_W(7),
+               i, elapsed_ms, s_roots_ms, s_roots_ms * 100 / elapsed_ms,
+               term_ms, term_ms * 100 / elapsed_ms, term_attempts(),
+               (alloc_buffer_waste() + undo_waste()) * HeapWordSize / K,
+               alloc_buffer_waste() * HeapWordSize / K,
+               undo_waste() * HeapWordSize / K);
+}
+
 G1ParClosureSuper::G1ParClosureSuper(G1CollectedHeap* g1, G1ParScanThreadState* par_scan_state) :
   _g1(g1), _g1_rem(_g1->g1_rem_set()), _cm(_g1->concurrent_mark()),
   _par_scan_state(par_scan_state) { }
@@ -3952,12 +4007,9 @@ public:
     G1ParScanThreadState* pss = par_scan_state();
     while (true) {
       pss->trim_queue();
-      IF_G1_DETAILED_STATS(pss->note_steal_attempt());
 
       StarTask stolen_task;
       if (queues()->steal(pss->queue_num(), pss->hash_seed(), stolen_task)) {
-        IF_G1_DETAILED_STATS(pss->note_steal());
-
         // slightly paranoid tests; I'm trying to catch potential
         // problems before we go into push_on_queue to know where the
         // problem is coming from
@@ -4076,35 +4128,9 @@ public:
     // Clean up any par-expanded rem sets.
     HeapRegionRemSet::par_cleanup();
 
-    MutexLocker x(stats_lock());
     if (ParallelGCVerbose) {
-      gclog_or_tty->print("Thread %d complete:\n", i);
-#if G1_DETAILED_STATS
-      gclog_or_tty->print("  Pushes: %7d    Pops: %7d   Overflows: %7d   Steals %7d (in %d attempts)\n",
-                          pss.pushes(),
-                          pss.pops(),
-                          pss.overflow_pushes(),
-                          pss.steals(),
-                          pss.steal_attempts());
-#endif
-      double elapsed      = pss.elapsed();
-      double strong_roots = pss.strong_roots_time();
-      double term         = pss.term_time();
-      gclog_or_tty->print("  Elapsed: %7.2f ms.\n"
-                          "    Strong roots: %7.2f ms (%6.2f%%)\n"
-                          "    Termination:  %7.2f ms (%6.2f%%) "
-                                                 "(in "SIZE_FORMAT" entries)\n",
-                          elapsed * 1000.0,
-                          strong_roots * 1000.0, (strong_roots*100.0/elapsed),
-                          term * 1000.0, (term*100.0/elapsed),
-                          pss.term_attempts());
-      size_t total_waste = pss.alloc_buffer_waste() + pss.undo_waste();
-      gclog_or_tty->print("  Waste: %8dK\n"
-                 "    Alloc Buffer: %8dK\n"
-                 "    Undo: %8dK\n",
-                 (total_waste * HeapWordSize) / K,
-                 (pss.alloc_buffer_waste() * HeapWordSize) / K,
-                 (pss.undo_waste() * HeapWordSize) / K);
+      MutexLocker x(stats_lock());
+      pss.print_termination_stats(i);
     }
 
     assert(pss.refs_to_scan() == 0, "Task queue should be empty");
@@ -4221,6 +4247,7 @@ void G1CollectedHeap::evacuate_collection_set() {
   if (ParallelGCThreads > 0) {
     // The individual threads will set their evac-failure closures.
     StrongRootsScope srs(this);
+    if (ParallelGCVerbose) G1ParScanThreadState::print_termination_stats_hdr();
     workers()->run_task(&g1_par_task);
   } else {
     StrongRootsScope srs(this);
diff --git a/hotspot/src/share/vm/gc_implementation/g1/g1CollectedHeap.hpp b/hotspot/src/share/vm/gc_implementation/g1/g1CollectedHeap.hpp
index 6daaf614f44..d831e8d7d7e 100644
--- a/hotspot/src/share/vm/gc_implementation/g1/g1CollectedHeap.hpp
+++ b/hotspot/src/share/vm/gc_implementation/g1/g1CollectedHeap.hpp
@@ -46,17 +46,7 @@ class ConcurrentMarkThread;
 class ConcurrentG1Refine;
 class ConcurrentZFThread;
 
-// If want to accumulate detailed statistics on work queues
-// turn this on.
-#define G1_DETAILED_STATS 0
-
-#if G1_DETAILED_STATS
-#  define IF_G1_DETAILED_STATS(code) code
-#else
-#  define IF_G1_DETAILED_STATS(code)
-#endif
-
-typedef GenericTaskQueue<StarTask>          RefToScanQueue;
+typedef OverflowTaskQueue<StarTask>         RefToScanQueue;
 typedef GenericTaskQueueSet<RefToScanQueue> RefToScanQueueSet;
 
 typedef int RegionIdx_t;   // needs to hold [ 0..max_regions() )
@@ -471,6 +461,12 @@ protected:
   virtual void shrink(size_t expand_bytes);
   void shrink_helper(size_t expand_bytes);
 
+  #if TASKQUEUE_STATS
+  static void print_taskqueue_stats_hdr(outputStream* const st = gclog_or_tty);
+  void print_taskqueue_stats(outputStream* const st = gclog_or_tty) const;
+  void reset_taskqueue_stats();
+  #endif // TASKQUEUE_STATS
+
   // Do an incremental collection: identify a collection set, and evacuate
   // its live objects elsewhere.
   virtual void do_collection_pause();
@@ -662,7 +658,7 @@ protected:
 public:
   void set_refine_cte_cl_concurrency(bool concurrent);
 
-  RefToScanQueue *task_queue(int i);
+  RefToScanQueue *task_queue(int i) const;
 
   // A set of cards where updates happened during the GC
   DirtyCardQueueSet& dirty_card_queue_set() { return _dirty_card_queue_set; }
@@ -1579,9 +1575,6 @@ protected:
   CardTableModRefBS* _ct_bs;
   G1RemSet* _g1_rem;
 
-  typedef GrowableArray<StarTask> OverflowQueue;
-  OverflowQueue* _overflowed_refs;
-
   G1ParGCAllocBuffer  _surviving_alloc_buffer;
   G1ParGCAllocBuffer  _tenured_alloc_buffer;
   G1ParGCAllocBuffer* _alloc_buffers[GCAllocPurposeCount];
@@ -1598,10 +1591,6 @@ protected:
   int _queue_num;
 
   size_t _term_attempts;
-#if G1_DETAILED_STATS
-  int _pushes, _pops, _steals, _steal_attempts;
-  int _overflow_pushes;
-#endif
 
   double _start;
   double _start_strong_roots;
@@ -1615,7 +1604,7 @@ protected:
   // this points into the array, as we use the first few entries for padding
   size_t* _surviving_young_words;
 
-#define PADDING_ELEM_NUM (64 / sizeof(size_t))
+#define PADDING_ELEM_NUM (DEFAULT_CACHE_LINE_SIZE / sizeof(size_t))
 
   void   add_to_alloc_buffer_waste(size_t waste) { _alloc_buffer_waste += waste; }
 
@@ -1650,15 +1639,14 @@ public:
   }
 
   RefToScanQueue*   refs()            { return _refs;             }
-  OverflowQueue*    overflowed_refs() { return _overflowed_refs;  }
   ageTable*         age_table()       { return &_age_table;       }
 
   G1ParGCAllocBuffer* alloc_buffer(GCAllocPurpose purpose) {
     return _alloc_buffers[purpose];
   }
 
-  size_t alloc_buffer_waste()                    { return _alloc_buffer_waste; }
-  size_t undo_waste()                            { return _undo_waste; }
+  size_t alloc_buffer_waste() const              { return _alloc_buffer_waste; }
+  size_t undo_waste() const                      { return _undo_waste; }
 
   template <class T> void push_on_queue(T* ref) {
     assert(ref != NULL, "invariant");
@@ -1671,12 +1659,7 @@ public:
       assert(_g1h->obj_in_cs(p), "Should be in CS");
     }
 #endif
-    if (!refs()->push(ref)) {
-      overflowed_refs()->push(ref);
-      IF_G1_DETAILED_STATS(note_overflow_push());
-    } else {
-      IF_G1_DETAILED_STATS(note_push());
-    }
+    refs()->push(ref);
   }
 
   void pop_from_queue(StarTask& ref) {
@@ -1687,7 +1670,6 @@ public:
              _g1h->is_in_g1_reserved(ref.is_narrow() ? oopDesc::load_decode_heap_oop((narrowOop*)ref)
                                                      : oopDesc::load_decode_heap_oop((oop*)ref)),
               "invariant");
-      IF_G1_DETAILED_STATS(note_pop());
     } else {
       StarTask null_task;
       ref = null_task;
@@ -1695,7 +1677,8 @@ public:
   }
 
   void pop_from_overflow_queue(StarTask& ref) {
-    StarTask new_ref = overflowed_refs()->pop();
+    StarTask new_ref;
+    refs()->pop_overflow(new_ref);
     assert((oop*)new_ref != NULL, "pop() from a local non-empty stack");
     assert(UseCompressedOops || !new_ref.is_narrow(), "Error");
     assert(has_partial_array_mask((oop*)new_ref) ||
@@ -1705,8 +1688,8 @@ public:
     ref = new_ref;
   }
 
-  int refs_to_scan()                             { return refs()->size();                 }
-  int overflowed_refs_to_scan()                  { return overflowed_refs()->length();    }
+  int refs_to_scan()            { return refs()->size(); }
+  int overflowed_refs_to_scan() { return refs()->overflow_stack()->length(); }
 
   template <class T> void update_rs(HeapRegion* from, T* p, int tid) {
     if (G1DeferredRSUpdate) {
@@ -1775,30 +1758,16 @@ public:
   int* hash_seed() { return &_hash_seed; }
   int  queue_num() { return _queue_num; }
 
-  size_t term_attempts()   { return _term_attempts; }
+  size_t term_attempts() const  { return _term_attempts; }
   void note_term_attempt() { _term_attempts++; }
 
-#if G1_DETAILED_STATS
-  int pushes()          { return _pushes; }
-  int pops()            { return _pops; }
-  int steals()          { return _steals; }
-  int steal_attempts()  { return _steal_attempts; }
-  int overflow_pushes() { return _overflow_pushes; }
-
-  void note_push()          { _pushes++; }
-  void note_pop()           { _pops++; }
-  void note_steal()         { _steals++; }
-  void note_steal_attempt() { _steal_attempts++; }
-  void note_overflow_push() { _overflow_pushes++; }
-#endif
-
   void start_strong_roots() {
     _start_strong_roots = os::elapsedTime();
   }
   void end_strong_roots() {
     _strong_roots_time += (os::elapsedTime() - _start_strong_roots);
   }
-  double strong_roots_time() { return _strong_roots_time; }
+  double strong_roots_time() const { return _strong_roots_time; }
 
   void start_term_time() {
     note_term_attempt();
@@ -1807,12 +1776,17 @@ public:
   void end_term_time() {
     _term_time += (os::elapsedTime() - _start_term);
   }
-  double term_time() { return _term_time; }
+  double term_time() const { return _term_time; }
 
-  double elapsed() {
+  double elapsed_time() const {
     return os::elapsedTime() - _start;
   }
 
+  static void
+    print_termination_stats_hdr(outputStream* const st = gclog_or_tty);
+  void
+    print_termination_stats(int i, outputStream* const st = gclog_or_tty) const;
+
   size_t* surviving_young_words() {
     // We add on to hide entry 0 which accumulates surviving words for
     // age -1 regions (i.e. non-young ones)
diff --git a/hotspot/src/share/vm/gc_implementation/g1/g1CollectedHeap.inline.hpp b/hotspot/src/share/vm/gc_implementation/g1/g1CollectedHeap.inline.hpp
index a2459d9d0aa..207ee2ffa4c 100644
--- a/hotspot/src/share/vm/gc_implementation/g1/g1CollectedHeap.inline.hpp
+++ b/hotspot/src/share/vm/gc_implementation/g1/g1CollectedHeap.inline.hpp
@@ -81,11 +81,10 @@ inline HeapWord* G1CollectedHeap::attempt_allocation(size_t word_size,
   return attempt_allocation_slow(word_size, permit_collection_pause);
 }
 
-inline RefToScanQueue* G1CollectedHeap::task_queue(int i) {
+inline RefToScanQueue* G1CollectedHeap::task_queue(int i) const {
   return _task_queues->queue(i);
 }
 
-
 inline  bool G1CollectedHeap::isMarkedPrev(oop obj) const {
   return _cm->prevMarkBitMap()->isMarked((HeapWord *)obj);
 }
diff --git a/hotspot/src/share/vm/utilities/taskqueue.cpp b/hotspot/src/share/vm/utilities/taskqueue.cpp
index 7a001803d96..2492b90d1d8 100644
--- a/hotspot/src/share/vm/utilities/taskqueue.cpp
+++ b/hotspot/src/share/vm/utilities/taskqueue.cpp
@@ -36,6 +36,14 @@ const char * const TaskQueueStats::_names[last_stat_id] = {
   "qpush", "qpop", "qpop-s", "qattempt", "qsteal", "opush", "omax"
 };
 
+TaskQueueStats & TaskQueueStats::operator +=(const TaskQueueStats & addend)
+{
+  for (unsigned int i = 0; i < last_stat_id; ++i) {
+    _stats[i] += addend._stats[i];
+  }
+  return *this;
+}
+
 void TaskQueueStats::print_header(unsigned int line, outputStream* const stream,
                                   unsigned int width)
 {
@@ -71,6 +79,29 @@ void TaskQueueStats::print(outputStream* stream, unsigned int width) const
   }
   #undef FMT
 }
+
+#ifdef ASSERT
+// Invariants which should hold after a TaskQueue has been emptied and is
+// quiescent; they do not hold at arbitrary times.
+void TaskQueueStats::verify() const
+{
+  assert(get(push) == get(pop) + get(steal),
+         err_msg("push=" SIZE_FORMAT " pop=" SIZE_FORMAT " steal=" SIZE_FORMAT,
+                 get(push), get(pop), get(steal)));
+  assert(get(pop_slow) <= get(pop),
+         err_msg("pop_slow=" SIZE_FORMAT " pop=" SIZE_FORMAT,
+                 get(pop_slow), get(pop)));
+  assert(get(steal) <= get(steal_attempt),
+         err_msg("steal=" SIZE_FORMAT " steal_attempt=" SIZE_FORMAT,
+                 get(steal), get(steal_attempt)));
+  assert(get(overflow) == 0 || get(push) != 0,
+         err_msg("overflow=" SIZE_FORMAT " push=" SIZE_FORMAT,
+                 get(overflow), get(push)));
+  assert(get(overflow_max_len) == 0 || get(overflow) != 0,
+         err_msg("overflow_max_len=" SIZE_FORMAT " overflow=" SIZE_FORMAT,
+                 get(overflow_max_len), get(overflow)));
+}
+#endif // ASSERT
 #endif // TASKQUEUE_STATS
 
 int TaskQueueSetSuper::randomParkAndMiller(int *seed0) {
diff --git a/hotspot/src/share/vm/utilities/taskqueue.hpp b/hotspot/src/share/vm/utilities/taskqueue.hpp
index 52e89fc8027..1bd1ae72309 100644
--- a/hotspot/src/share/vm/utilities/taskqueue.hpp
+++ b/hotspot/src/share/vm/utilities/taskqueue.hpp
@@ -59,15 +59,21 @@ public:
   inline void record_steal(bool success);
   inline void record_overflow(size_t new_length);
 
+  TaskQueueStats & operator +=(const TaskQueueStats & addend);
+
   inline size_t get(StatId id) const { return _stats[id]; }
   inline const size_t* get() const   { return _stats; }
 
   inline void reset();
 
+  // Print the specified line of the header (does not include a line separator).
   static void print_header(unsigned int line, outputStream* const stream = tty,
                            unsigned int width = 10);
+  // Print the statistics (does not include a line separator).
   void print(outputStream* const stream = tty, unsigned int width = 10) const;
 
+  DEBUG_ONLY(void verify() const;)
+
 private:
   size_t                    _stats[last_stat_id];
   static const char * const _names[last_stat_id];

From b0e98512a72da4ac35f8be9dbf9db32ca878ce3f Mon Sep 17 00:00:00 2001
From: John Coomes <jcoomes@openjdk.org>
Date: Mon, 9 Aug 2010 18:03:50 -0700
Subject: [PATCH 7/9] 6970376: ParNew: shared TaskQueue statistics

Reviewed-by: ysr
---
 .../parNew/parNewGeneration.cpp               | 171 +++++++++++-------
 .../parNew/parNewGeneration.hpp               |  49 +++--
 2 files changed, 126 insertions(+), 94 deletions(-)

diff --git a/hotspot/src/share/vm/gc_implementation/parNew/parNewGeneration.cpp b/hotspot/src/share/vm/gc_implementation/parNew/parNewGeneration.cpp
index dc655aed136..45683b17572 100644
--- a/hotspot/src/share/vm/gc_implementation/parNew/parNewGeneration.cpp
+++ b/hotspot/src/share/vm/gc_implementation/parNew/parNewGeneration.cpp
@@ -51,9 +51,14 @@ ParScanThreadState::ParScanThreadState(Space* to_space_,
   _is_alive_closure(gen_), _scan_weak_ref_closure(gen_, this),
   _keep_alive_closure(&_scan_weak_ref_closure),
   _promotion_failure_size(0),
-  _pushes(0), _pops(0), _steals(0), _steal_attempts(0), _term_attempts(0),
   _strong_roots_time(0.0), _term_time(0.0)
 {
+  #if TASKQUEUE_STATS
+  _term_attempts = 0;
+  _overflow_refills = 0;
+  _overflow_refill_objs = 0;
+  #endif // TASKQUEUE_STATS
+
   _survivor_chunk_array =
     (ChunkArray*) old_gen()->get_data_recorder(thread_num());
   _hash_seed = 17;  // Might want to take time-based random value.
@@ -100,7 +105,6 @@ void ParScanThreadState::scan_partial_array_and_push_remainder(oop old) {
     // Push remainder.
     bool ok = work_queue()->push(old);
     assert(ok, "just popped, push must be okay");
-    note_push();
   } else {
     // Restore length so that it can be used if there
     // is a promotion failure and forwarding pointers
@@ -126,7 +130,6 @@ void ParScanThreadState::trim_queues(int max_size) {
     while (queue->size() > (juint)max_size) {
       oop obj_to_scan;
       if (queue->pop_local(obj_to_scan)) {
-        note_pop();
         if ((HeapWord *)obj_to_scan < young_old_boundary()) {
           if (obj_to_scan->is_objArray() &&
               obj_to_scan->is_forwarded() &&
@@ -271,20 +274,28 @@ public:
                         GrowableArray<oop>**    overflow_stacks_,
                         size_t                  desired_plab_sz,
                         ParallelTaskTerminator& term);
+
+  ~ParScanThreadStateSet() { TASKQUEUE_STATS_ONLY(reset_stats()); }
+
   inline ParScanThreadState& thread_state(int i);
-  int pushes() { return _pushes; }
-  int pops()   { return _pops; }
-  int steals() { return _steals; }
+
   void reset(bool promotion_failed);
   void flush();
+
+  #if TASKQUEUE_STATS
+  static void
+    print_termination_stats_hdr(outputStream* const st = gclog_or_tty);
+  void print_termination_stats(outputStream* const st = gclog_or_tty);
+  static void
+    print_taskqueue_stats_hdr(outputStream* const st = gclog_or_tty);
+  void print_taskqueue_stats(outputStream* const st = gclog_or_tty);
+  void reset_stats();
+  #endif // TASKQUEUE_STATS
+
 private:
   ParallelTaskTerminator& _term;
   ParNewGeneration&       _gen;
   Generation&             _next_gen;
-  // staticstics
-  int _pushes;
-  int _pops;
-  int _steals;
 };
 
 
@@ -294,8 +305,7 @@ ParScanThreadStateSet::ParScanThreadStateSet(
   GrowableArray<oop>** overflow_stack_set_,
   size_t desired_plab_sz, ParallelTaskTerminator& term)
   : ResourceArray(sizeof(ParScanThreadState), num_threads),
-    _gen(gen), _next_gen(old_gen), _term(term),
-    _pushes(0), _pops(0), _steals(0)
+    _gen(gen), _next_gen(old_gen), _term(term)
 {
   assert(num_threads > 0, "sanity check!");
   // Initialize states.
@@ -323,6 +333,82 @@ void ParScanThreadStateSet::reset(bool promotion_failed)
   }
 }
 
+#if TASKQUEUE_STATS
+void
+ParScanThreadState::reset_stats()
+{
+  taskqueue_stats().reset();
+  _term_attempts = 0;
+  _overflow_refills = 0;
+  _overflow_refill_objs = 0;
+}
+
+void ParScanThreadStateSet::reset_stats()
+{
+  for (int i = 0; i < length(); ++i) {
+    thread_state(i).reset_stats();
+  }
+}
+
+void
+ParScanThreadStateSet::print_termination_stats_hdr(outputStream* const st)
+{
+  st->print_raw_cr("GC Termination Stats");
+  st->print_raw_cr("     elapsed  --strong roots-- "
+                   "-------termination-------");
+  st->print_raw_cr("thr     ms        ms       %   "
+                   "    ms       %   attempts");
+  st->print_raw_cr("--- --------- --------- ------ "
+                   "--------- ------ --------");
+}
+
+void ParScanThreadStateSet::print_termination_stats(outputStream* const st)
+{
+  print_termination_stats_hdr(st);
+
+  for (int i = 0; i < length(); ++i) {
+    const ParScanThreadState & pss = thread_state(i);
+    const double elapsed_ms = pss.elapsed_time() * 1000.0;
+    const double s_roots_ms = pss.strong_roots_time() * 1000.0;
+    const double term_ms = pss.term_time() * 1000.0;
+    st->print_cr("%3d %9.2f %9.2f %6.2f "
+                 "%9.2f %6.2f " SIZE_FORMAT_W(8),
+                 i, elapsed_ms, s_roots_ms, s_roots_ms * 100 / elapsed_ms,
+                 term_ms, term_ms * 100 / elapsed_ms, pss.term_attempts());
+  }
+}
+
+// Print stats related to work queue activity.
+void ParScanThreadStateSet::print_taskqueue_stats_hdr(outputStream* const st)
+{
+  st->print_raw_cr("GC Task Stats");
+  st->print_raw("thr "); TaskQueueStats::print_header(1, st); st->cr();
+  st->print_raw("--- "); TaskQueueStats::print_header(2, st); st->cr();
+}
+
+void ParScanThreadStateSet::print_taskqueue_stats(outputStream* const st)
+{
+  print_taskqueue_stats_hdr(st);
+
+  TaskQueueStats totals;
+  for (int i = 0; i < length(); ++i) {
+    const ParScanThreadState & pss = thread_state(i);
+    const TaskQueueStats & stats = pss.taskqueue_stats();
+    st->print("%3d ", i); stats.print(st); st->cr();
+    totals += stats;
+
+    if (pss.overflow_refills() > 0) {
+      st->print_cr("    " SIZE_FORMAT_W(10) " overflow refills    "
+                   SIZE_FORMAT_W(10) " overflow objects",
+                   pss.overflow_refills(), pss.overflow_refill_objs());
+    }
+  }
+  st->print("tot "); totals.print(st); st->cr();
+
+  DEBUG_ONLY(totals.verify());
+}
+#endif // TASKQUEUE_STATS
+
 void ParScanThreadStateSet::flush()
 {
   // Work in this loop should be kept as lightweight as
@@ -346,42 +432,8 @@ void ParScanThreadStateSet::flush()
     // Inform old gen that we're done.
     _next_gen.par_promote_alloc_done(i);
     _next_gen.par_oop_since_save_marks_iterate_done(i);
-
-    // Flush stats related to work queue activity (push/pop/steal)
-    // This could conceivably become a bottleneck; if so, we'll put the
-    // stat's gathering under the flag.
-    if (PAR_STATS_ENABLED) {
-      _pushes += par_scan_state.pushes();
-      _pops   += par_scan_state.pops();
-      _steals += par_scan_state.steals();
-      if (ParallelGCVerbose) {
-        gclog_or_tty->print("Thread %d complete:\n"
-                            "  Pushes: %7d    Pops: %7d    Steals %7d (in %d attempts)\n",
-                            i, par_scan_state.pushes(), par_scan_state.pops(),
-                            par_scan_state.steals(), par_scan_state.steal_attempts());
-        if (par_scan_state.overflow_pushes() > 0 ||
-            par_scan_state.overflow_refills() > 0) {
-          gclog_or_tty->print("  Overflow pushes: %7d    "
-                              "Overflow refills: %7d for %d objs.\n",
-                              par_scan_state.overflow_pushes(),
-                              par_scan_state.overflow_refills(),
-                              par_scan_state.overflow_refill_objs());
-        }
-
-        double elapsed = par_scan_state.elapsed();
-        double strong_roots = par_scan_state.strong_roots_time();
-        double term = par_scan_state.term_time();
-        gclog_or_tty->print(
-                            "  Elapsed: %7.2f ms.\n"
-                            "    Strong roots: %7.2f ms (%6.2f%%)\n"
-                            "    Termination:  %7.2f ms (%6.2f%%) (in %d entries)\n",
-                           elapsed * 1000.0,
-                           strong_roots * 1000.0, (strong_roots*100.0/elapsed),
-                           term * 1000.0, (term*100.0/elapsed),
-                           par_scan_state.term_attempts());
-      }
-    }
   }
+
   if (UseConcMarkSweepGC && ParallelGCThreads > 0) {
     // We need to call this even when ResizeOldPLAB is disabled
     // so as to avoid breaking some asserts. While we may be able
@@ -456,15 +508,12 @@ void ParEvacuateFollowersClosure::do_void() {
     // We have no local work, attempt to steal from other threads.
 
     // attempt to steal work from promoted.
-    par_scan_state()->note_steal_attempt();
     if (task_queues()->steal(par_scan_state()->thread_num(),
                              par_scan_state()->hash_seed(),
                              obj_to_scan)) {
-      par_scan_state()->note_steal();
       bool res = work_q->push(obj_to_scan);
       assert(res, "Empty queue should have room for a push.");
 
-      par_scan_state()->note_push();
       //   if successful, goto Start.
       continue;
 
@@ -842,17 +891,6 @@ void ParNewGeneration::collect(bool   full,
   }
   thread_state_set.reset(promotion_failed());
 
-  if (PAR_STATS_ENABLED && ParallelGCVerbose) {
-    gclog_or_tty->print("Thread totals:\n"
-               "  Pushes: %7d    Pops: %7d    Steals %7d (sum = %7d).\n",
-               thread_state_set.pushes(), thread_state_set.pops(),
-               thread_state_set.steals(),
-               thread_state_set.pops()+thread_state_set.steals());
-  }
-  assert(thread_state_set.pushes() == thread_state_set.pops()
-                                    + thread_state_set.steals(),
-         "Or else the queues are leaky.");
-
   // Process (weak) reference objects found during scavenge.
   ReferenceProcessor* rp = ref_processor();
   IsAliveClosure is_alive(this);
@@ -932,6 +970,9 @@ void ParNewGeneration::collect(bool   full,
     gch->print_heap_change(gch_prev_used);
   }
 
+  TASKQUEUE_STATS_ONLY(thread_state_set.print_termination_stats());
+  TASKQUEUE_STATS_ONLY(thread_state_set.print_taskqueue_stats());
+
   if (UseAdaptiveSizePolicy) {
     size_policy->minor_collection_end(gch->gc_cause());
     size_policy->avg_survived()->sample(from()->used());
@@ -1104,9 +1145,8 @@ oop ParNewGeneration::copy_to_survivor_space_avoiding_promotion_undo(
         gclog_or_tty->print("queue overflow!\n");
       }
       push_on_overflow_list(old, par_scan_state);
-      par_scan_state->note_overflow_push();
+      TASKQUEUE_STATS_ONLY(par_scan_state->taskqueue_stats().record_overflow(0));
     }
-    par_scan_state->note_push();
 
     return new_obj;
   }
@@ -1227,9 +1267,8 @@ oop ParNewGeneration::copy_to_survivor_space_with_undo(
     if (simulate_overflow || !par_scan_state->work_queue()->push(obj_to_push)) {
       // Add stats for overflow pushes.
       push_on_overflow_list(old, par_scan_state);
-      par_scan_state->note_overflow_push();
+      TASKQUEUE_STATS_ONLY(par_scan_state->taskqueue_stats().record_overflow(0));
     }
-    par_scan_state->note_push();
 
     return new_obj;
   }
@@ -1466,7 +1505,7 @@ bool ParNewGeneration::take_from_overflow_list_work(ParScanThreadState* par_scan
     cur = next;
     n++;
   }
-  par_scan_state->note_overflow_refill(n);
+  TASKQUEUE_STATS_ONLY(par_scan_state->note_overflow_refill(n));
 #ifndef PRODUCT
   assert(_num_par_pushes >= n, "Too many pops?");
   Atomic::add_ptr(-(intptr_t)n, &_num_par_pushes);
diff --git a/hotspot/src/share/vm/gc_implementation/parNew/parNewGeneration.hpp b/hotspot/src/share/vm/gc_implementation/parNew/parNewGeneration.hpp
index 8196e621372..a3090ebf452 100644
--- a/hotspot/src/share/vm/gc_implementation/parNew/parNewGeneration.hpp
+++ b/hotspot/src/share/vm/gc_implementation/parNew/parNewGeneration.hpp
@@ -36,9 +36,6 @@ class ParEvacuateFollowersClosure;
 typedef Padded<OopTaskQueue> ObjToScanQueue;
 typedef GenericTaskQueueSet<ObjToScanQueue> ObjToScanQueueSet;
 
-// Enable this to get push/pop/steal stats.
-const int PAR_STATS_ENABLED = 0;
-
 class ParKeepAliveClosure: public DefNewGeneration::KeepAliveClosure {
  private:
   ParScanWeakRefClosure* _par_cl;
@@ -94,8 +91,11 @@ class ParScanThreadState {
 
   bool _to_space_full;
 
-  int _pushes, _pops, _steals, _steal_attempts, _term_attempts;
-  int _overflow_pushes, _overflow_refills, _overflow_refill_objs;
+#if TASKQUEUE_STATS
+  size_t _term_attempts;
+  size_t _overflow_refills;
+  size_t _overflow_refill_objs;
+#endif // TASKQUEUE_STATS
 
   // Stats for promotion failure
   size_t _promotion_failure_size;
@@ -181,45 +181,38 @@ class ParScanThreadState {
   }
   void print_and_clear_promotion_failure_size();
 
-  int pushes() { return _pushes; }
-  int pops()   { return _pops; }
-  int steals() { return _steals; }
-  int steal_attempts() { return _steal_attempts; }
-  int term_attempts()  { return _term_attempts; }
-  int overflow_pushes() { return _overflow_pushes; }
-  int overflow_refills() { return _overflow_refills; }
-  int overflow_refill_objs() { return _overflow_refill_objs; }
+#if TASKQUEUE_STATS
+  TaskQueueStats & taskqueue_stats() const { return _work_queue->stats; }
 
-  void note_push()  { if (PAR_STATS_ENABLED) _pushes++; }
-  void note_pop()   { if (PAR_STATS_ENABLED) _pops++; }
-  void note_steal() { if (PAR_STATS_ENABLED) _steals++; }
-  void note_steal_attempt() { if (PAR_STATS_ENABLED) _steal_attempts++; }
-  void note_term_attempt()  { if (PAR_STATS_ENABLED) _term_attempts++; }
-  void note_overflow_push() { if (PAR_STATS_ENABLED) _overflow_pushes++; }
-  void note_overflow_refill(int objs) {
-    if (PAR_STATS_ENABLED) {
-      _overflow_refills++;
-      _overflow_refill_objs += objs;
-    }
+  size_t term_attempts() const             { return _term_attempts; }
+  size_t overflow_refills() const          { return _overflow_refills; }
+  size_t overflow_refill_objs() const      { return _overflow_refill_objs; }
+
+  void note_term_attempt()                 { ++_term_attempts; }
+  void note_overflow_refill(size_t objs)   {
+    ++_overflow_refills; _overflow_refill_objs += objs;
   }
 
+  void reset_stats();
+#endif // TASKQUEUE_STATS
+
   void start_strong_roots() {
     _start_strong_roots = os::elapsedTime();
   }
   void end_strong_roots() {
     _strong_roots_time += (os::elapsedTime() - _start_strong_roots);
   }
-  double strong_roots_time() { return _strong_roots_time; }
+  double strong_roots_time() const { return _strong_roots_time; }
   void start_term_time() {
-    note_term_attempt();
+    TASKQUEUE_STATS_ONLY(note_term_attempt());
     _start_term = os::elapsedTime();
   }
   void end_term_time() {
     _term_time += (os::elapsedTime() - _start_term);
   }
-  double term_time() { return _term_time; }
+  double term_time() const { return _term_time; }
 
-  double elapsed() {
+  double elapsed_time() const {
     return os::elapsedTime() - _start;
   }
 };

From 7ee29eeb6a358088bb2821d754c6684db66544f2 Mon Sep 17 00:00:00 2001
From: "Y. Srinivas Ramakrishna" <ysr@openjdk.org>
Date: Tue, 10 Aug 2010 14:53:35 -0700
Subject: [PATCH 8/9] 6973570: OrderAccess::storestore() scales poorly on
 multi-socket x64 and sparc: cache-line ping-ponging

Volatile store to static variable removed in favour of a volatile store to stack to avoid excessive cache coherency traffic; verified that the volatile store is not elided by any of our current compilers.

Reviewed-by: dholmes, dice, jcoomes, kvn
---
 .../vm/orderAccess_linux_sparc.inline.hpp            |  6 +++---
 .../linux_x86/vm/orderAccess_linux_x86.inline.hpp    | 12 +++++++-----
 .../vm/orderAccess_solaris_sparc.inline.hpp          | 10 ++++++----
 .../vm/orderAccess_solaris_x86.inline.hpp            | 12 +++++++-----
 .../vm/orderAccess_windows_x86.inline.hpp            |  4 ++--
 hotspot/src/share/vm/runtime/orderAccess.cpp         |  4 +---
 hotspot/src/share/vm/runtime/orderAccess.hpp         | 12 +++++++-----
 7 files changed, 33 insertions(+), 27 deletions(-)

diff --git a/hotspot/src/os_cpu/linux_sparc/vm/orderAccess_linux_sparc.inline.hpp b/hotspot/src/os_cpu/linux_sparc/vm/orderAccess_linux_sparc.inline.hpp
index 4ff88cfdbbe..2770baaabef 100644
--- a/hotspot/src/os_cpu/linux_sparc/vm/orderAccess_linux_sparc.inline.hpp
+++ b/hotspot/src/os_cpu/linux_sparc/vm/orderAccess_linux_sparc.inline.hpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2003, 2008, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -36,8 +36,8 @@ inline void OrderAccess::acquire() {
 }
 
 inline void OrderAccess::release() {
-  jint* dummy = (jint*)&dummy;
-  __asm__ volatile("stw %%g0, [%0]" : : "r" (dummy) : "memory");
+  jint* local_dummy = (jint*)&local_dummy;
+  __asm__ volatile("stw %%g0, [%0]" : : "r" (local_dummy) : "memory");
 }
 
 inline void OrderAccess::fence() {
diff --git a/hotspot/src/os_cpu/linux_x86/vm/orderAccess_linux_x86.inline.hpp b/hotspot/src/os_cpu/linux_x86/vm/orderAccess_linux_x86.inline.hpp
index a4f6c17d7ae..d487ece69a3 100644
--- a/hotspot/src/os_cpu/linux_x86/vm/orderAccess_linux_x86.inline.hpp
+++ b/hotspot/src/os_cpu/linux_x86/vm/orderAccess_linux_x86.inline.hpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2003, 2009, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -30,16 +30,18 @@ inline void OrderAccess::loadstore()  { acquire(); }
 inline void OrderAccess::storeload()  { fence(); }
 
 inline void OrderAccess::acquire() {
-  volatile intptr_t dummy;
+  volatile intptr_t local_dummy;
 #ifdef AMD64
-  __asm__ volatile ("movq 0(%%rsp), %0" : "=r" (dummy) : : "memory");
+  __asm__ volatile ("movq 0(%%rsp), %0" : "=r" (local_dummy) : : "memory");
 #else
-  __asm__ volatile ("movl 0(%%esp),%0" : "=r" (dummy) : : "memory");
+  __asm__ volatile ("movl 0(%%esp),%0" : "=r" (local_dummy) : : "memory");
 #endif // AMD64
 }
 
 inline void OrderAccess::release() {
-  dummy = 0;
+  // Avoid hitting the same cache-line from
+  // different threads.
+  volatile jint local_dummy = 0;
 }
 
 inline void OrderAccess::fence() {
diff --git a/hotspot/src/os_cpu/solaris_sparc/vm/orderAccess_solaris_sparc.inline.hpp b/hotspot/src/os_cpu/solaris_sparc/vm/orderAccess_solaris_sparc.inline.hpp
index 2646977ce8f..6075e010687 100644
--- a/hotspot/src/os_cpu/solaris_sparc/vm/orderAccess_solaris_sparc.inline.hpp
+++ b/hotspot/src/os_cpu/solaris_sparc/vm/orderAccess_solaris_sparc.inline.hpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2003, 2009, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -42,8 +42,8 @@ inline void OrderAccess::acquire() {
 }
 
 inline void OrderAccess::release() {
-  jint* dummy = (jint*)&dummy;
-  __asm__ volatile("stw %%g0, [%0]" : : "r" (dummy) : "memory");
+  jint* local_dummy = (jint*)&local_dummy;
+  __asm__ volatile("stw %%g0, [%0]" : : "r" (local_dummy) : "memory");
 }
 
 inline void OrderAccess::fence() {
@@ -57,7 +57,9 @@ inline void OrderAccess::acquire() {
 }
 
 inline void OrderAccess::release() {
-  dummy = 0;
+  // Avoid hitting the same cache-line from
+  // different threads.
+  volatile jint local_dummy = 0;
 }
 
 inline void OrderAccess::fence() {
diff --git a/hotspot/src/os_cpu/solaris_x86/vm/orderAccess_solaris_x86.inline.hpp b/hotspot/src/os_cpu/solaris_x86/vm/orderAccess_solaris_x86.inline.hpp
index 7165cde66d0..84c1ce9238d 100644
--- a/hotspot/src/os_cpu/solaris_x86/vm/orderAccess_solaris_x86.inline.hpp
+++ b/hotspot/src/os_cpu/solaris_x86/vm/orderAccess_solaris_x86.inline.hpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2003, 2009, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -40,7 +40,9 @@ inline void OrderAccess::acquire() {
 }
 
 inline void OrderAccess::release() {
-  dummy = 0;
+  // Avoid hitting the same cache-line from
+  // different threads.
+  volatile jint local_dummy = 0;
 }
 
 inline void OrderAccess::fence() {
@@ -53,11 +55,11 @@ inline void OrderAccess::fence() {
 
 extern "C" {
   inline void _OrderAccess_acquire() {
-    volatile intptr_t dummy;
+    volatile intptr_t local_dummy;
 #ifdef AMD64
-    __asm__ volatile ("movq 0(%%rsp), %0" : "=r" (dummy) : : "memory");
+    __asm__ volatile ("movq 0(%%rsp), %0" : "=r" (local_dummy) : : "memory");
 #else
-    __asm__ volatile ("movl 0(%%esp),%0" : "=r" (dummy) : : "memory");
+    __asm__ volatile ("movl 0(%%esp),%0" : "=r" (local_dummy) : : "memory");
 #endif // AMD64
   }
   inline void _OrderAccess_fence() {
diff --git a/hotspot/src/os_cpu/windows_x86/vm/orderAccess_windows_x86.inline.hpp b/hotspot/src/os_cpu/windows_x86/vm/orderAccess_windows_x86.inline.hpp
index 4a5ac91b18d..1ccddc4d397 100644
--- a/hotspot/src/os_cpu/windows_x86/vm/orderAccess_windows_x86.inline.hpp
+++ b/hotspot/src/os_cpu/windows_x86/vm/orderAccess_windows_x86.inline.hpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2003, 2009, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -41,7 +41,7 @@ inline void OrderAccess::acquire() {
 
 inline void OrderAccess::release() {
   // A volatile store has release semantics.
-  dummy = 0;
+  volatile jint local_dummy = 0;
 }
 
 inline void OrderAccess::fence() {
diff --git a/hotspot/src/share/vm/runtime/orderAccess.cpp b/hotspot/src/share/vm/runtime/orderAccess.cpp
index b61c481607e..12124f964ba 100644
--- a/hotspot/src/share/vm/runtime/orderAccess.cpp
+++ b/hotspot/src/share/vm/runtime/orderAccess.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2003, 2009, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -25,8 +25,6 @@
 # include "incls/_precompiled.incl"
 # include "incls/_orderAccess.cpp.incl"
 
-volatile intptr_t OrderAccess::dummy = 0;
-
 void OrderAccess::StubRoutines_fence() {
   // Use a stub if it exists.  It may not exist during bootstrap so do
   // nothing in that case but assert if no fence code exists after threads have been created
diff --git a/hotspot/src/share/vm/runtime/orderAccess.hpp b/hotspot/src/share/vm/runtime/orderAccess.hpp
index 42c9227f5a2..28b049a43fc 100644
--- a/hotspot/src/share/vm/runtime/orderAccess.hpp
+++ b/hotspot/src/share/vm/runtime/orderAccess.hpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2003, 2009, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -166,6 +166,12 @@
 // and release must include a sequence point, usually via a volatile memory
 // access.  Other ways to guarantee a sequence point are, e.g., use of
 // indirect calls and linux's __asm__ volatile.
+// Note: as of 6973570, we have replaced the originally static "dummy" field
+// (see above) by a volatile store to the stack. All of the versions of the
+// compilers that we currently use (SunStudio, gcc and VC++) respect the
+// semantics of volatile here. If you build HotSpot using other
+// compilers, you may need to verify that no compiler reordering occurs
+// across the sequence point respresented by the volatile access.
 //
 //
 //                os::is_MP Considered Redundant
@@ -297,10 +303,6 @@ class OrderAccess : AllStatic {
   static void     release_store_ptr_fence(volatile intptr_t* p, intptr_t v);
   static void     release_store_ptr_fence(volatile void*     p, void*    v);
 
-  // In order to force a memory access, implementations may
-  // need a volatile externally visible dummy variable.
-  static volatile intptr_t dummy;
-
  private:
   // This is a helper that invokes the StubRoutines::fence_entry()
   // routine if it exists, It should only be used by platforms that

From bb7d4b2b35cefcf9d7129c5f6cf6f6ba156b7b79 Mon Sep 17 00:00:00 2001
From: John Coomes <jcoomes@openjdk.org>
Date: Wed, 11 Aug 2010 13:12:28 -0700
Subject: [PATCH 9/9] 6976378: ParNew:  stats are printed unconditionally in
 debug builds

Reviewed-by: tonyp
---
 .../share/vm/gc_implementation/parNew/parNewGeneration.cpp  | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/hotspot/src/share/vm/gc_implementation/parNew/parNewGeneration.cpp b/hotspot/src/share/vm/gc_implementation/parNew/parNewGeneration.cpp
index 45683b17572..e31e2854664 100644
--- a/hotspot/src/share/vm/gc_implementation/parNew/parNewGeneration.cpp
+++ b/hotspot/src/share/vm/gc_implementation/parNew/parNewGeneration.cpp
@@ -970,8 +970,10 @@ void ParNewGeneration::collect(bool   full,
     gch->print_heap_change(gch_prev_used);
   }
 
-  TASKQUEUE_STATS_ONLY(thread_state_set.print_termination_stats());
-  TASKQUEUE_STATS_ONLY(thread_state_set.print_taskqueue_stats());
+  if (PrintGCDetails && ParallelGCVerbose) {
+    TASKQUEUE_STATS_ONLY(thread_state_set.print_termination_stats());
+    TASKQUEUE_STATS_ONLY(thread_state_set.print_taskqueue_stats());
+  }
 
   if (UseAdaptiveSizePolicy) {
     size_policy->minor_collection_end(gch->gc_cause());