Merge

2009-10-23 18:44:33 -07:00 · 2009-10-23 18:44:33 -07:00 · bfa076da18
commit bfa076da18
parent 6dfb497d00 da1b89b746
28 changed files with 529 additions and 327 deletions
--- a/hotspot/src/share/vm/gc_implementation/g1/concurrentG1Refine.cpp
+++ b/hotspot/src/share/vm/gc_implementation/g1/concurrentG1Refine.cpp
@ -377,3 +377,11 @@ void ConcurrentG1Refine::clear_and_record_card_counts() {
  _g1h->g1_policy()->record_cc_clear_time(elapsed * 1000.0);
 #endif
 }
+
+void ConcurrentG1Refine::print_worker_threads_on(outputStream* st) const {
+  for (int i = 0; i < _n_threads; ++i) {
+    _threads[i]->print_on(st);
+    st->cr();
+  }
+}
+
--- a/hotspot/src/share/vm/gc_implementation/g1/concurrentG1Refine.hpp
+++ b/hotspot/src/share/vm/gc_implementation/g1/concurrentG1Refine.hpp
@ -179,4 +179,6 @@ class ConcurrentG1Refine: public CHeapObj {
  void clear_and_record_card_counts();

  static size_t thread_num();
+
+  void print_worker_threads_on(outputStream* st) const;
 };
--- a/hotspot/src/share/vm/gc_implementation/g1/concurrentG1RefineThread.cpp
+++ b/hotspot/src/share/vm/gc_implementation/g1/concurrentG1RefineThread.cpp
@ -204,8 +204,12 @@ void ConcurrentG1RefineThread::stop() {
  if (G1TraceConcurrentRefinement) gclog_or_tty->print_cr("G1-Refine-stop");
 }

-void ConcurrentG1RefineThread::print() {
-  gclog_or_tty->print("\"Concurrent G1 Refinement Thread\" ");
-  Thread::print();
-  gclog_or_tty->cr();
+void ConcurrentG1RefineThread::print() const {
+  print_on(tty);
+}
+
+void ConcurrentG1RefineThread::print_on(outputStream* st) const {
+  st->print("\"G1 Concurrent Refinement Thread#%d\" ", _worker_id);
+  Thread::print_on(st);
+  st->cr();
 }
--- a/hotspot/src/share/vm/gc_implementation/g1/concurrentG1RefineThread.hpp
+++ b/hotspot/src/share/vm/gc_implementation/g1/concurrentG1RefineThread.hpp
@ -77,7 +77,8 @@ class ConcurrentG1RefineThread: public ConcurrentGCThread {
                           int worker_id_offset, int worker_id);

  // Printing
-  void print();
+  void print() const;
+  void print_on(outputStream* st) const;

  // Total virtual time so far.
  double vtime_accum() { return _vtime_accum; }
--- a/hotspot/src/share/vm/gc_implementation/g1/concurrentMark.cpp
+++ b/hotspot/src/share/vm/gc_implementation/g1/concurrentMark.cpp
@ -237,7 +237,7 @@ void CMMarkStack::par_push_arr(oop* ptr_arr, int n) {
  _index = next_index;
  for (int i = 0; i < n; i++) {
    int ind = start + i;
-    guarantee(ind < _capacity, "By overflow test above.");
+    assert(ind < _capacity, "By overflow test above.");
    _base[ind] = ptr_arr[i];
  }
 }
@ -310,12 +310,12 @@ MemRegion CMRegionStack::pop() {
    if (res == index) {
      MemRegion mr = _base[next_index];
      if (mr.start() != NULL) {
-        tmp_guarantee_CM( mr.end() != NULL, "invariant" );
-        tmp_guarantee_CM( mr.word_size() > 0, "invariant" );
+        assert(mr.end() != NULL, "invariant");
+        assert(mr.word_size() > 0, "invariant");
        return mr;
      } else {
        // that entry was invalidated... let's skip it
-        tmp_guarantee_CM( mr.end() == NULL, "invariant" );
+        assert(mr.end() == NULL, "invariant");
      }
    }
    // Otherwise, we need to try again.
@ -328,10 +328,10 @@ bool CMRegionStack::invalidate_entries_into_cset() {
  for (int i = 0; i < _oops_do_bound; ++i) {
    MemRegion mr = _base[i];
    if (mr.start() != NULL) {
-      tmp_guarantee_CM( mr.end() != NULL, "invariant");
-      tmp_guarantee_CM( mr.word_size() > 0, "invariant" );
+      assert(mr.end() != NULL, "invariant");
+      assert(mr.word_size() > 0, "invariant");
      HeapRegion* hr = g1h->heap_region_containing(mr.start());
-      tmp_guarantee_CM( hr != NULL, "invariant" );
+      assert(hr != NULL, "invariant");
      if (hr->in_collection_set()) {
        // The region points into the collection set
        _base[i] = MemRegion();
@ -339,7 +339,7 @@ bool CMRegionStack::invalidate_entries_into_cset() {
      }
    } else {
      // that entry was invalidated... let's skip it
-      tmp_guarantee_CM( mr.end() == NULL, "invariant" );
+      assert(mr.end() == NULL, "invariant");
    }
  }
  return result;
@ -542,8 +542,8 @@ ConcurrentMark::ConcurrentMark(ReservedSpace rs,
    gclog_or_tty->print_cr("CL Sleep Factor          %1.4lf", cleanup_sleep_factor());
 #endif

-    guarantee( parallel_marking_threads() > 0, "peace of mind" );
-    _parallel_workers = new WorkGang("Parallel Marking Threads",
+    guarantee(parallel_marking_threads() > 0, "peace of mind");
+    _parallel_workers = new WorkGang("G1 Parallel Marking Threads",
                                     (int) parallel_marking_threads(), false, true);
    if (_parallel_workers == NULL)
      vm_exit_during_initialization("Failed necessary allocation.");
@ -569,8 +569,7 @@ void ConcurrentMark::update_g1_committed(bool force) {
    return;

  MemRegion committed = _g1h->g1_committed();
-  tmp_guarantee_CM( committed.start() == _heap_start,
-                    "start shouldn't change" );
+  assert(committed.start() == _heap_start, "start shouldn't change");
  HeapWord* new_end = committed.end();
  if (new_end > _heap_end) {
    // The heap has been expanded.
@ -592,9 +591,10 @@ void ConcurrentMark::reset() {
  _heap_start = committed.start();
  _heap_end   = committed.end();

-  guarantee( _heap_start != NULL &&
-             _heap_end != NULL   &&
-             _heap_start < _heap_end, "heap bounds should look ok" );
+  // Separated the asserts so that we know which one fires.
+  assert(_heap_start != NULL, "heap bounds should look ok");
+  assert(_heap_end != NULL, "heap bounds should look ok");
+  assert(_heap_start < _heap_end, "heap bounds should look ok");

  // reset all the marking data structures and any necessary flags
  clear_marking_state();
@ -614,7 +614,7 @@ void ConcurrentMark::reset() {
 }

 void ConcurrentMark::set_phase(size_t active_tasks, bool concurrent) {
-  guarantee( active_tasks <= _max_task_num, "we should not have more" );
+  assert(active_tasks <= _max_task_num, "we should not have more");

  _active_tasks = active_tasks;
  // Need to update the three data structures below according to the
@ -634,8 +634,8 @@ void ConcurrentMark::set_phase(size_t active_tasks, bool concurrent) {
    // We currently assume that the concurrent flag has been set to
    // false before we start remark. At this point we should also be
    // in a STW phase.
-    guarantee( !concurrent_marking_in_progress(), "invariant" );
-    guarantee( _finger == _heap_end, "only way to get here" );
+    assert(!concurrent_marking_in_progress(), "invariant");
+    assert(_finger == _heap_end, "only way to get here");
    update_g1_committed(true);
  }
 }
@ -933,8 +933,8 @@ void ConcurrentMark::grayRoot(oop p) {
  // initial-mark that the committed space is expanded during the
  // pause without CM observing this change. So the assertions below
  // is a bit conservative; but better than nothing.
-  tmp_guarantee_CM( _g1h->g1_committed().contains(addr),
-                    "address should be within the heap bounds" );
+  assert(_g1h->g1_committed().contains(addr),
+         "address should be within the heap bounds");

  if (!_nextMarkBitMap->isMarked(addr))
    _nextMarkBitMap->parMark(addr);
@ -960,12 +960,15 @@ void ConcurrentMark::grayRegionIfNecessary(MemRegion mr) {
  if (mr.start() < finger) {
    // The finger is always heap region aligned and it is not possible
    // for mr to span heap regions.
-    tmp_guarantee_CM( mr.end() <= finger, "invariant" );
+    assert(mr.end() <= finger, "invariant");

-    tmp_guarantee_CM( mr.start() <= mr.end() &&
-                      _heap_start <= mr.start() &&
-                      mr.end() <= _heap_end,
-                  "region boundaries should fall within the committed space" );
+    // Separated the asserts so that we know which one fires.
+    assert(mr.start() <= mr.end(),
+           "region boundaries should fall within the committed space");
+    assert(_heap_start <= mr.start(),
+           "region boundaries should fall within the committed space");
+    assert(mr.end() <= _heap_end,
+           "region boundaries should fall within the committed space");
    if (verbose_low())
      gclog_or_tty->print_cr("[global] region ["PTR_FORMAT", "PTR_FORMAT") "
                             "below the finger, pushing it",
@ -1014,14 +1017,14 @@ private:

 public:
  void work(int worker_i) {
-    guarantee( Thread::current()->is_ConcurrentGC_thread(),
-               "this should only be done by a conc GC thread" );
+    assert(Thread::current()->is_ConcurrentGC_thread(),
+           "this should only be done by a conc GC thread");

    double start_vtime = os::elapsedVTime();

    ConcurrentGCThread::stsJoin();

-    guarantee( (size_t)worker_i < _cm->active_tasks(), "invariant" );
+    assert((size_t) worker_i < _cm->active_tasks(), "invariant");
    CMTask* the_task = _cm->task(worker_i);
    the_task->record_start_time();
    if (!_cm->has_aborted()) {
@ -1059,7 +1062,7 @@ public:
      } while (!_cm->has_aborted() && the_task->has_aborted());
    }
    the_task->record_end_time();
-    guarantee( !the_task->has_aborted() || _cm->has_aborted(), "invariant" );
+    guarantee(!the_task->has_aborted() || _cm->has_aborted(), "invariant");

    ConcurrentGCThread::stsLeave();

@ -1182,8 +1185,7 @@ class CalcLiveObjectsClosure: public HeapRegionClosure {
  void mark_card_num_range(intptr_t start_card_num, intptr_t last_card_num) {
    for (intptr_t i = start_card_num; i <= last_card_num; i++) {
 #if CARD_BM_TEST_MODE
-      guarantee(_card_bm->at(i - _bottom_card_num),
-                "Should already be set.");
+      guarantee(_card_bm->at(i - _bottom_card_num), "Should already be set.");
 #else
      _card_bm->par_at_put(i - _bottom_card_num, 1);
 #endif
@ -1328,7 +1330,7 @@ public:
      // In any case, we set the last card num.
      last_card_num = obj_last_card_num;

-      marked_bytes += obj_sz * HeapWordSize;
+      marked_bytes += (size_t)obj_sz * HeapWordSize;
      // Find the next marked object after this one.
      start = _bm->getNextMarkedWordAddress(start + 1, nextTop);
      _changed = true;
@ -1442,7 +1444,7 @@ public:
    }
    assert(calccl.complete(), "Shouldn't have yielded!");

-    guarantee( (size_t)i < _n_workers, "invariant" );
+    assert((size_t) i < _n_workers, "invariant");
    _live_bytes[i] = calccl.tot_live();
    _used_bytes[i] = calccl.tot_used();
  }
@ -1774,14 +1776,14 @@ void ConcurrentMark::completeCleanup() {
      hd->rem_set()->clear();
      HeapRegion* next_hd = hd->next_from_unclean_list();
      (void)list->pop();
-      guarantee(list->hd() == next_hd, "how not?");
+      assert(list->hd() == next_hd, "how not?");
      _g1h->put_region_on_unclean_list(hd);
      if (!hd->isHumongous()) {
        // Add this to the _free_regions count by 1.
        _g1h->finish_free_region_work(0, 0, 1, NULL);
      }
      hd = list->hd();
-      guarantee(hd == next_hd, "how not?");
+      assert(hd == next_hd, "how not?");
    }
  }
 }
@ -1931,9 +1933,6 @@ void ConcurrentMark::checkpointRootsFinalWork() {
    g1h->set_par_threads(n_workers);
    g1h->workers()->run_task(&remarkTask);
    g1h->set_par_threads(0);
-
-    SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
-    guarantee( satb_mq_set.completed_buffers_num() == 0, "invariant" );
  } else {
    G1CollectedHeap::StrongRootsScope srs(g1h);
    // this is remark, so we'll use up all available threads
@ -1945,10 +1944,9 @@ void ConcurrentMark::checkpointRootsFinalWork() {
    // active_workers will be fewer. The extra ones will just bail out
    // immediately.
    remarkTask.work(0);
-
-    SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
-    guarantee( satb_mq_set.completed_buffers_num() == 0, "invariant" );
  }
+  SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
+  guarantee(satb_mq_set.completed_buffers_num() == 0, "invariant");

  print_stats();

@ -1989,7 +1987,7 @@ public:
      str = "outside G1 reserved";
    else {
      HeapRegion* hr  = _g1h->heap_region_containing(obj);
-      guarantee( hr != NULL, "invariant" );
+      guarantee(hr != NULL, "invariant");
      if (hr->obj_allocated_since_prev_marking(obj)) {
        str = "over TAMS";
        if (_bitmap->isMarked((HeapWord*) obj))
@ -2125,7 +2123,7 @@ void ConcurrentMark::deal_with_reference(oop obj) {
  HeapWord* objAddr = (HeapWord*) obj;
  assert(obj->is_oop_or_null(true /* ignore mark word */), "Error");
  if (_g1h->is_in_g1_reserved(objAddr)) {
-    tmp_guarantee_CM( obj != NULL, "is_in_g1_reserved should ensure this" );
+    assert(obj != NULL, "is_in_g1_reserved should ensure this");
    HeapRegion* hr = _g1h->heap_region_containing(obj);
    if (_g1h->is_obj_ill(obj, hr)) {
      if (verbose_high())
@ -2167,7 +2165,7 @@ void ConcurrentMark::drainAllSATBBuffers() {
  satb_mq_set.iterate_closure_all_threads();

  satb_mq_set.set_closure(NULL);
-  guarantee( satb_mq_set.completed_buffers_num() == 0, "invariant" );
+  assert(satb_mq_set.completed_buffers_num() == 0, "invariant");
 }

 void ConcurrentMark::markPrev(oop p) {
@ -2200,7 +2198,7 @@ ConcurrentMark::claim_region(int task_num) {
  // _heap_end will not change underneath our feet; it only changes at
  // yield points.
  while (finger < _heap_end) {
-    tmp_guarantee_CM( _g1h->is_in_g1_reserved(finger), "invariant" );
+    assert(_g1h->is_in_g1_reserved(finger), "invariant");

    // is the gap between reading the finger and doing the CAS too long?

@ -2222,7 +2220,7 @@ ConcurrentMark::claim_region(int task_num) {

      // notice that _finger == end cannot be guaranteed here since,
      // someone else might have moved the finger even further
-      guarantee( _finger >= end, "the finger should have moved forward" );
+      assert(_finger >= end, "the finger should have moved forward");

      if (verbose_low())
        gclog_or_tty->print_cr("[%d] we were successful with region = "
@ -2234,8 +2232,8 @@ ConcurrentMark::claim_region(int task_num) {
                                 "returning it ", task_num, curr_region);
        return curr_region;
      } else {
-        tmp_guarantee_CM( limit == bottom,
-                          "the region limit should be at bottom" );
+        assert(limit == bottom,
+               "the region limit should be at bottom");
        if (verbose_low())
          gclog_or_tty->print_cr("[%d] region "PTR_FORMAT" is empty, "
                                 "returning NULL", task_num, curr_region);
@ -2244,7 +2242,7 @@ ConcurrentMark::claim_region(int task_num) {
        return NULL;
      }
    } else {
-      guarantee( _finger > finger, "the finger should have moved forward" );
+      assert(_finger > finger, "the finger should have moved forward");
      if (verbose_low())
        gclog_or_tty->print_cr("[%d] somebody else moved the finger, "
                               "global finger = "PTR_FORMAT", "
@ -2282,7 +2280,7 @@ void ConcurrentMark::oops_do(OopClosure* cl) {
  if (_regionStack.invalidate_entries_into_cset()) {
    // otherwise, any gray objects copied during the evacuation pause
    // might not be visited.
-    guarantee( _should_gray_objects, "invariant" );
+    assert(_should_gray_objects, "invariant");
  }
 }

@ -2637,6 +2635,10 @@ void ConcurrentMark::print_summary_info() {
                cmThread()->vtime_count_accum());
 }

+void ConcurrentMark::print_worker_threads_on(outputStream* st) const {
+  _parallel_workers->print_worker_threads_on(st);
+}
+
 // Closures
 // XXX: there seems to be a lot of code  duplication here;
 // should refactor and consolidate the shared code.
@ -2711,12 +2713,12 @@ public:

  bool do_bit(size_t offset) {
    HeapWord* addr = _nextMarkBitMap->offsetToHeapWord(offset);
-    tmp_guarantee_CM( _nextMarkBitMap->isMarked(addr), "invariant" );
-    tmp_guarantee_CM( addr < _cm->finger(), "invariant" );
+    assert(_nextMarkBitMap->isMarked(addr), "invariant");
+    assert( addr < _cm->finger(), "invariant");

    if (_scanning_heap_region) {
      statsOnly( _task->increase_objs_found_on_bitmap() );
-      tmp_guarantee_CM( addr >= _task->finger(), "invariant" );
+      assert(addr >= _task->finger(), "invariant");
      // We move that task's local finger along.
      _task->move_finger_to(addr);
    } else {
@ -2761,8 +2763,9 @@ public:
  virtual void do_oop(      oop* p) { do_oop_work(p); }

  template <class T> void do_oop_work(T* p) {
-    tmp_guarantee_CM( _g1h->is_in_g1_reserved((HeapWord*) p), "invariant" );
-    tmp_guarantee_CM( !_g1h->heap_region_containing((HeapWord*) p)->is_on_free_list(), "invariant" );
+    assert(_g1h->is_in_g1_reserved((HeapWord*) p), "invariant");
+    assert(!_g1h->heap_region_containing((HeapWord*) p)->is_on_free_list(),
+           "invariant");

    oop obj = oopDesc::load_decode_heap_oop(p);
    if (_cm->verbose_high())
@ -2779,8 +2782,11 @@ public:
 };

 void CMTask::setup_for_region(HeapRegion* hr) {
-  tmp_guarantee_CM( hr != NULL && !hr->continuesHumongous(),
-      "claim_region() should have filtered out continues humongous regions" );
+  // Separated the asserts so that we know which one fires.
+  assert(hr != NULL,
+        "claim_region() should have filtered out continues humongous regions");
+  assert(!hr->continuesHumongous(),
+        "claim_region() should have filtered out continues humongous regions");

  if (_cm->verbose_low())
    gclog_or_tty->print_cr("[%d] setting up for region "PTR_FORMAT,
@ -2808,9 +2814,9 @@ void CMTask::update_region_limit() {
    // as the region is not supposed to be empty in the first place)
    _finger = bottom;
  } else if (limit >= _region_limit) {
-    tmp_guarantee_CM( limit >= _finger, "peace of mind" );
+    assert(limit >= _finger, "peace of mind");
  } else {
-    tmp_guarantee_CM( limit < _region_limit, "only way to get here" );
+    assert(limit < _region_limit, "only way to get here");
    // This can happen under some pretty unusual circumstances.  An
    // evacuation pause empties the region underneath our feet (NTAMS
    // at bottom). We then do some allocation in the region (NTAMS
@ -2828,7 +2834,7 @@ void CMTask::update_region_limit() {
 }

 void CMTask::giveup_current_region() {
-  tmp_guarantee_CM( _curr_region != NULL, "invariant" );
+  assert(_curr_region != NULL, "invariant");
  if (_cm->verbose_low())
    gclog_or_tty->print_cr("[%d] giving up region "PTR_FORMAT,
                           _task_id, _curr_region);
@ -2846,7 +2852,7 @@ void CMTask::clear_region_fields() {
 }

 void CMTask::reset(CMBitMap* nextMarkBitMap) {
-  guarantee( nextMarkBitMap != NULL, "invariant" );
+  guarantee(nextMarkBitMap != NULL, "invariant");

  if (_cm->verbose_low())
    gclog_or_tty->print_cr("[%d] resetting", _task_id);
@ -2912,7 +2918,7 @@ void CMTask::deal_with_reference(oop obj) {
  HeapWord* objAddr = (HeapWord*) obj;
  assert(obj->is_oop_or_null(true /* ignore mark word */), "Error");
  if (_g1h->is_in_g1_reserved(objAddr)) {
-    tmp_guarantee_CM( obj != NULL, "is_in_g1_reserved should ensure this" );
+    assert(obj != NULL, "is_in_g1_reserved should ensure this");
    HeapRegion* hr =  _g1h->heap_region_containing(obj);
    if (_g1h->is_obj_ill(obj, hr)) {
      if (_cm->verbose_high())
@ -2973,10 +2979,11 @@ void CMTask::deal_with_reference(oop obj) {

 void CMTask::push(oop obj) {
  HeapWord* objAddr = (HeapWord*) obj;
-  tmp_guarantee_CM( _g1h->is_in_g1_reserved(objAddr), "invariant" );
-  tmp_guarantee_CM( !_g1h->heap_region_containing(objAddr)->is_on_free_list(), "invariant" );
-  tmp_guarantee_CM( !_g1h->is_obj_ill(obj), "invariant" );
-  tmp_guarantee_CM( _nextMarkBitMap->isMarked(objAddr), "invariant" );
+  assert(_g1h->is_in_g1_reserved(objAddr), "invariant");
+  assert(!_g1h->heap_region_containing(objAddr)->is_on_free_list(),
+         "invariant");
+  assert(!_g1h->is_obj_ill(obj), "invariant");
+  assert(_nextMarkBitMap->isMarked(objAddr), "invariant");

  if (_cm->verbose_high())
    gclog_or_tty->print_cr("[%d] pushing "PTR_FORMAT, _task_id, (void*) obj);
@ -2995,7 +3002,7 @@ void CMTask::push(oop obj) {
    // stack, we should have definitely removed some entries from the
    // local queue. So, there must be space on it.
    bool success = _task_queue->push(obj);
-    tmp_guarantee_CM( success, "invariant" );
+    assert(success, "invariant");
  }

  statsOnly( int tmp_size = _task_queue->size();
@ -3005,9 +3012,9 @@ void CMTask::push(oop obj) {
 }

 void CMTask::reached_limit() {
-  tmp_guarantee_CM( _words_scanned >= _words_scanned_limit ||
-                    _refs_reached >= _refs_reached_limit ,
-                 "shouldn't have been called otherwise" );
+  assert(_words_scanned >= _words_scanned_limit ||
+         _refs_reached >= _refs_reached_limit ,
+         "shouldn't have been called otherwise");
  regular_clock_call();
 }

@ -3165,8 +3172,8 @@ void CMTask::get_entries_from_global_stack() {
  oop buffer[global_stack_transfer_size];
  int n;
  _cm->mark_stack_pop(buffer, global_stack_transfer_size, &n);
-  tmp_guarantee_CM( n <= global_stack_transfer_size,
-                    "we should not pop more than the given limit" );
+  assert(n <= global_stack_transfer_size,
+         "we should not pop more than the given limit");
  if (n > 0) {
    // yes, we did actually pop at least one entry

@ -3178,7 +3185,7 @@ void CMTask::get_entries_from_global_stack() {
      bool success = _task_queue->push(buffer[i]);
      // We only call this when the local queue is empty or under a
      // given target limit. So, we do not expect this push to fail.
-      tmp_guarantee_CM( success, "invariant" );
+      assert(success, "invariant");
    }

    statsOnly( int tmp_size = _task_queue->size();
@ -3218,10 +3225,9 @@ void CMTask::drain_local_queue(bool partially) {
        gclog_or_tty->print_cr("[%d] popped "PTR_FORMAT, _task_id,
                               (void*) obj);

-      tmp_guarantee_CM( _g1h->is_in_g1_reserved((HeapWord*) obj),
-                        "invariant" );
-      tmp_guarantee_CM( !_g1h->heap_region_containing(obj)->is_on_free_list(),
-                        "invariant" );
+      assert(_g1h->is_in_g1_reserved((HeapWord*) obj), "invariant" );
+      assert(!_g1h->heap_region_containing(obj)->is_on_free_list(),
+             "invariant");

      scan_object(obj);

@ -3243,7 +3249,7 @@ void CMTask::drain_global_stack(bool partially) {

  // We have a policy to drain the local queue before we attempt to
  // drain the global stack.
-  tmp_guarantee_CM( partially || _task_queue->size() == 0, "invariant" );
+  assert(partially || _task_queue->size() == 0, "invariant");

  // Decide what the target size is, depending whether we're going to
  // drain it partially (so that other tasks can steal if they run out
@ -3324,9 +3330,9 @@ void CMTask::drain_satb_buffers() {

  _draining_satb_buffers = false;

-  tmp_guarantee_CM( has_aborted() ||
-                    concurrent() ||
-                    satb_mq_set.completed_buffers_num() == 0, "invariant" );
+  assert(has_aborted() ||
+         concurrent() ||
+         satb_mq_set.completed_buffers_num() == 0, "invariant");

  if (ParallelGCThreads > 0)
    satb_mq_set.set_par_closure(_task_id, NULL);
@ -3342,8 +3348,8 @@ void CMTask::drain_region_stack(BitMapClosure* bc) {
  if (has_aborted())
    return;

-  tmp_guarantee_CM( _region_finger == NULL,
-                    "it should be NULL when we're not scanning a region" );
+  assert(_region_finger == NULL,
+         "it should be NULL when we're not scanning a region");

  if (!_cm->region_stack_empty()) {
    if (_cm->verbose_low())
@ -3359,12 +3365,12 @@ void CMTask::drain_region_stack(BitMapClosure* bc) {
        gclog_or_tty->print_cr("[%d] we are scanning region "
                               "["PTR_FORMAT", "PTR_FORMAT")",
                               _task_id, mr.start(), mr.end());
-      tmp_guarantee_CM( mr.end() <= _cm->finger(),
-                        "otherwise the region shouldn't be on the stack" );
+      assert(mr.end() <= _cm->finger(),
+             "otherwise the region shouldn't be on the stack");
      assert(!mr.is_empty(), "Only non-empty regions live on the region stack");
      if (_nextMarkBitMap->iterate(bc, mr)) {
-        tmp_guarantee_CM( !has_aborted(),
-               "cannot abort the task without aborting the bitmap iteration" );
+        assert(!has_aborted(),
+               "cannot abort the task without aborting the bitmap iteration");

        // We finished iterating over the region without aborting.
        regular_clock_call();
@ -3376,14 +3382,14 @@ void CMTask::drain_region_stack(BitMapClosure* bc) {
          statsOnly(if (mr.start() != NULL) ++_region_stack_pops );
        }
      } else {
-        guarantee( has_aborted(), "currently the only way to do so" );
+        assert(has_aborted(), "currently the only way to do so");

        // The only way to abort the bitmap iteration is to return
        // false from the do_bit() method. However, inside the
        // do_bit() method we move the _region_finger to point to the
        // object currently being looked at. So, if we bail out, we
        // have definitely set _region_finger to something non-null.
-        guarantee( _region_finger != NULL, "invariant" );
+        assert(_region_finger != NULL, "invariant");

        // The iteration was actually aborted. So now _region_finger
        // points to the address of the object we last scanned. If we
@ -3412,13 +3418,6 @@ void CMTask::drain_region_stack(BitMapClosure* bc) {
      _region_finger = NULL;
    }

-    // We only push regions on the region stack during evacuation
-    // pauses. So if we come out the above iteration because we region
-    // stack is empty, it will remain empty until the next yield
-    // point. So, the guarantee below is safe.
-    guarantee( has_aborted() || _cm->region_stack_empty(),
-               "only way to exit the loop" );
-
    if (_cm->verbose_low())
      gclog_or_tty->print_cr("[%d] drained region stack, size = %d",
                             _task_id, _cm->region_stack_size());
@ -3576,21 +3575,21 @@ void CMTask::print_stats() {
 *****************************************************************************/

 void CMTask::do_marking_step(double time_target_ms) {
-  guarantee( time_target_ms >= 1.0, "minimum granularity is 1ms" );
-  guarantee( concurrent() == _cm->concurrent(), "they should be the same" );
+  assert(time_target_ms >= 1.0, "minimum granularity is 1ms");
+  assert(concurrent() == _cm->concurrent(), "they should be the same");

-  guarantee( concurrent() || _cm->region_stack_empty(),
-             "the region stack should have been cleared before remark" );
-  guarantee( _region_finger == NULL,
-             "this should be non-null only when a region is being scanned" );
+  assert(concurrent() || _cm->region_stack_empty(),
+         "the region stack should have been cleared before remark");
+  assert(_region_finger == NULL,
+         "this should be non-null only when a region is being scanned");

  G1CollectorPolicy* g1_policy = _g1h->g1_policy();
-  guarantee( _task_queues != NULL, "invariant" );
-  guarantee( _task_queue != NULL,  "invariant" );
-  guarantee( _task_queues->queue(_task_id) == _task_queue, "invariant" );
+  assert(_task_queues != NULL, "invariant");
+  assert(_task_queue != NULL, "invariant");
+  assert(_task_queues->queue(_task_id) == _task_queue, "invariant");

-  guarantee( !_claimed,
-             "only one thread should claim this task at any one time" );
+  assert(!_claimed,
+         "only one thread should claim this task at any one time");

  // OK, this doesn't safeguard again all possible scenarios, as it is
  // possible for two threads to set the _claimed flag at the same
@ -3661,9 +3660,8 @@ void CMTask::do_marking_step(double time_target_ms) {
  do {
    if (!has_aborted() && _curr_region != NULL) {
      // This means that we're already holding on to a region.
-      tmp_guarantee_CM( _finger != NULL,
-                        "if region is not NULL, then the finger "
-                        "should not be NULL either" );
+      assert(_finger != NULL, "if region is not NULL, then the finger "
+             "should not be NULL either");

      // We might have restarted this task after an evacuation pause
      // which might have evacuated the region we're holding on to
@ -3695,13 +3693,13 @@ void CMTask::do_marking_step(double time_target_ms) {
        giveup_current_region();
        regular_clock_call();
      } else {
-        guarantee( has_aborted(), "currently the only way to do so" );
+        assert(has_aborted(), "currently the only way to do so");
        // The only way to abort the bitmap iteration is to return
        // false from the do_bit() method. However, inside the
        // do_bit() method we move the _finger to point to the
        // object currently being looked at. So, if we bail out, we
        // have definitely set _finger to something non-null.
-        guarantee( _finger != NULL, "invariant" );
+        assert(_finger != NULL, "invariant");

        // Region iteration was actually aborted. So now _finger
        // points to the address of the object we last scanned. If we
@ -3728,9 +3726,10 @@ void CMTask::do_marking_step(double time_target_ms) {
    while (!has_aborted() && _curr_region == NULL && !_cm->out_of_regions()) {
      // We are going to try to claim a new region. We should have
      // given up on the previous one.
-      tmp_guarantee_CM( _curr_region  == NULL &&
-                        _finger       == NULL &&
-                        _region_limit == NULL, "invariant" );
+      // Separated the asserts so that we know which one fires.
+      assert(_curr_region  == NULL, "invariant");
+      assert(_finger       == NULL, "invariant");
+      assert(_region_limit == NULL, "invariant");
      if (_cm->verbose_low())
        gclog_or_tty->print_cr("[%d] trying to claim a new region", _task_id);
      HeapRegion* claimed_region = _cm->claim_region(_task_id);
@ -3744,7 +3743,7 @@ void CMTask::do_marking_step(double time_target_ms) {
                                 _task_id, claimed_region);

        setup_for_region(claimed_region);
-        tmp_guarantee_CM( _curr_region == claimed_region, "invariant" );
+        assert(_curr_region == claimed_region, "invariant");
      }
      // It is important to call the regular clock here. It might take
      // a while to claim a region if, for example, we hit a large
@ -3755,8 +3754,8 @@ void CMTask::do_marking_step(double time_target_ms) {
    }

    if (!has_aborted() && _curr_region == NULL) {
-      tmp_guarantee_CM( _cm->out_of_regions(),
-                        "at this point we should be out of regions" );
+      assert(_cm->out_of_regions(),
+             "at this point we should be out of regions");
    }
  } while ( _curr_region != NULL && !has_aborted());

@ -3765,8 +3764,8 @@ void CMTask::do_marking_step(double time_target_ms) {
    // tasks might be pushing objects to it concurrently. We also cannot
    // check if the region stack is empty because if a thread is aborting
    // it can push a partially done region back.
-    tmp_guarantee_CM( _cm->out_of_regions(),
-                      "at this point we should be out of regions" );
+    assert(_cm->out_of_regions(),
+           "at this point we should be out of regions");

    if (_cm->verbose_low())
      gclog_or_tty->print_cr("[%d] all regions claimed", _task_id);
@ -3790,8 +3789,8 @@ void CMTask::do_marking_step(double time_target_ms) {
    // tasks might be pushing objects to it concurrently. We also cannot
    // check if the region stack is empty because if a thread is aborting
    // it can push a partially done region back.
-    guarantee( _cm->out_of_regions() &&
-               _task_queue->size() == 0, "only way to reach here" );
+    assert(_cm->out_of_regions() && _task_queue->size() == 0,
+           "only way to reach here");

    if (_cm->verbose_low())
      gclog_or_tty->print_cr("[%d] starting to steal", _task_id);
@ -3807,8 +3806,8 @@ void CMTask::do_marking_step(double time_target_ms) {

        statsOnly( ++_steals );

-        tmp_guarantee_CM( _nextMarkBitMap->isMarked((HeapWord*) obj),
-                          "any stolen object should be marked" );
+        assert(_nextMarkBitMap->isMarked((HeapWord*) obj),
+               "any stolen object should be marked");
        scan_object(obj);

        // And since we're towards the end, let's totally drain the
@ -3828,8 +3827,9 @@ void CMTask::do_marking_step(double time_target_ms) {
    // tasks might be concurrently pushing objects on it. We also cannot
    // check if the region stack is empty because if a thread is aborting
    // it can push a partially done region back.
-    guarantee( _cm->out_of_regions() &&
-               _task_queue->size() == 0, "only way to reach here" );
+    // Separated the asserts so that we know which one fires.
+    assert(_cm->out_of_regions(), "only way to reach here");
+    assert(_task_queue->size() == 0, "only way to reach here");

    if (_cm->verbose_low())
      gclog_or_tty->print_cr("[%d] starting termination protocol", _task_id);
@ -3849,7 +3849,7 @@ void CMTask::do_marking_step(double time_target_ms) {
      if (_task_id == 0) {
        // let's allow task 0 to do this
        if (concurrent()) {
-          guarantee( _cm->concurrent_marking_in_progress(), "invariant" );
+          assert(_cm->concurrent_marking_in_progress(), "invariant");
          // we need to set this to false before the next
          // safepoint. This way we ensure that the marking phase
          // doesn't observe any more heap expansions.
@ -3858,15 +3858,16 @@ void CMTask::do_marking_step(double time_target_ms) {
      }

      // We can now guarantee that the global stack is empty, since
-      // all other tasks have finished.
-      guarantee( _cm->out_of_regions() &&
-                 _cm->region_stack_empty() &&
-                 _cm->mark_stack_empty() &&
-                 _task_queue->size() == 0 &&
-                 !_cm->has_overflown() &&
-                 !_cm->mark_stack_overflow() &&
-                 !_cm->region_stack_overflow(),
-                 "only way to reach here" );
+      // all other tasks have finished. We separated the guarantees so
+      // that, if a condition is false, we can immediately find out
+      // which one.
+      guarantee(_cm->out_of_regions(), "only way to reach here");
+      guarantee(_cm->region_stack_empty(), "only way to reach here");
+      guarantee(_cm->mark_stack_empty(), "only way to reach here");
+      guarantee(_task_queue->size() == 0, "only way to reach here");
+      guarantee(!_cm->has_overflown(), "only way to reach here");
+      guarantee(!_cm->mark_stack_overflow(), "only way to reach here");
+      guarantee(!_cm->region_stack_overflow(), "only way to reach here");

      if (_cm->verbose_low())
        gclog_or_tty->print_cr("[%d] all tasks terminated", _task_id);
@ -3961,8 +3962,8 @@ CMTask::CMTask(int task_id,
    _task_queue(task_queue),
    _task_queues(task_queues),
    _oop_closure(NULL) {
-  guarantee( task_queue != NULL, "invariant" );
-  guarantee( task_queues != NULL, "invariant" );
+  guarantee(task_queue != NULL, "invariant");
+  guarantee(task_queues != NULL, "invariant");

  statsOnly( _clock_due_to_scanning = 0;
             _clock_due_to_marking  = 0 );
--- a/hotspot/src/share/vm/gc_implementation/g1/concurrentMark.hpp
+++ b/hotspot/src/share/vm/gc_implementation/g1/concurrentMark.hpp
@ -295,12 +295,6 @@ do {                          \
 } while (0)
 #endif // _MARKING_STATS_

-// Some extra guarantees that I like to also enable in optimised mode
-// when debugging. If you want to enable them, comment out the assert
-// macro and uncomment out the guaratee macro
-// #define tmp_guarantee_CM(expr, str) guarantee(expr, str)
-#define tmp_guarantee_CM(expr, str) assert(expr, str)
-
 typedef enum {
  no_verbose  = 0,   // verbose turned off
  stats_verbose,     // only prints stats at the end of marking
@ -485,15 +479,15 @@ protected:

  // Returns the task with the given id
  CMTask* task(int id) {
-    guarantee( 0 <= id && id < (int) _active_tasks, "task id not within "
-               "active bounds" );
+    assert(0 <= id && id < (int) _active_tasks,
+           "task id not within active bounds");
    return _tasks[id];
  }

  // Returns the task queue with the given id
  CMTaskQueue* task_queue(int id) {
-    guarantee( 0 <= id && id < (int) _active_tasks, "task queue id not within "
-               "active bounds" );
+    assert(0 <= id && id < (int) _active_tasks,
+           "task queue id not within active bounds");
    return (CMTaskQueue*) _task_queues->queue(id);
  }

@ -723,6 +717,8 @@ public:

  void print_summary_info();

+  void print_worker_threads_on(outputStream* st) const;
+
  // The following indicate whether a given verbose level has been
  // set. Notice that anything above stats is conditional to
  // _MARKING_VERBOSE_ having been set to 1
@ -959,8 +955,7 @@ public:

  // It scans an object and visits its children.
  void scan_object(oop obj) {
-    tmp_guarantee_CM( _nextMarkBitMap->isMarked((HeapWord*) obj),
-                      "invariant" );
+    assert(_nextMarkBitMap->isMarked((HeapWord*) obj), "invariant");

    if (_cm->verbose_high())
      gclog_or_tty->print_cr("[%d] we're scanning object "PTR_FORMAT,
@ -999,14 +994,13 @@ public:

  // moves the local finger to a new location
  inline void move_finger_to(HeapWord* new_finger) {
-    tmp_guarantee_CM( new_finger >= _finger && new_finger < _region_limit,
-                   "invariant" );
+    assert(new_finger >= _finger && new_finger < _region_limit, "invariant");
    _finger = new_finger;
  }

  // moves the region finger to a new location
  inline void move_region_finger_to(HeapWord* new_finger) {
-    tmp_guarantee_CM( new_finger < _cm->finger(), "invariant" );
+    assert(new_finger < _cm->finger(), "invariant");
    _region_finger = new_finger;
  }

--- a/hotspot/src/share/vm/gc_implementation/g1/concurrentMarkThread.cpp
+++ b/hotspot/src/share/vm/gc_implementation/g1/concurrentMarkThread.cpp
@ -286,10 +286,14 @@ void ConcurrentMarkThread::stop() {
  }
 }

-void ConcurrentMarkThread::print() {
-  gclog_or_tty->print("\"Concurrent Mark GC Thread\" ");
-  Thread::print();
-  gclog_or_tty->cr();
+void ConcurrentMarkThread::print() const {
+  print_on(tty);
+}
+
+void ConcurrentMarkThread::print_on(outputStream* st) const {
+  st->print("\"G1 Main Concurrent Mark GC Thread\" ");
+  Thread::print_on(st);
+  st->cr();
 }

 void ConcurrentMarkThread::sleepBeforeNextCycle() {
--- a/hotspot/src/share/vm/gc_implementation/g1/concurrentMarkThread.hpp
+++ b/hotspot/src/share/vm/gc_implementation/g1/concurrentMarkThread.hpp
@ -57,7 +57,8 @@ class ConcurrentMarkThread: public ConcurrentGCThread {
  static SurrogateLockerThread* slt() { return _slt; }

  // Printing
-  void print();
+  void print_on(outputStream* st) const;
+  void print() const;

  // Total virtual time so far.
  double vtime_accum();
--- a/hotspot/src/share/vm/gc_implementation/g1/concurrentZFThread.cpp
+++ b/hotspot/src/share/vm/gc_implementation/g1/concurrentZFThread.cpp
@ -157,10 +157,14 @@ void ConcurrentZFThread::stop() {
  }
 }

-void ConcurrentZFThread::print() {
-  gclog_or_tty->print("\"Concurrent ZF Thread\" ");
-  Thread::print();
-  gclog_or_tty->cr();
+void ConcurrentZFThread::print() const {
+  print_on(tty);
+}
+
+void ConcurrentZFThread::print_on(outputStream* st) const {
+  st->print("\"G1 Concurrent Zero-Fill Thread\" ");
+  Thread::print_on(st);
+  st->cr();
 }


--- a/hotspot/src/share/vm/gc_implementation/g1/concurrentZFThread.hpp
+++ b/hotspot/src/share/vm/gc_implementation/g1/concurrentZFThread.hpp
@ -61,7 +61,8 @@ class ConcurrentZFThread: public ConcurrentGCThread {
  virtual void run();

  // Printing
-  void print();
+  void print_on(outputStream* st) const;
+  void print() const;

  // Waits until "r" has been zero-filled.  Requires caller to hold the
  // ZF_mon.
--- a/hotspot/src/share/vm/gc_implementation/g1/g1CollectedHeap.cpp
+++ b/hotspot/src/share/vm/gc_implementation/g1/g1CollectedHeap.cpp
@ -2210,40 +2210,58 @@ private:
  bool _allow_dirty;
  bool _par;
  bool _use_prev_marking;
+  bool _failures;
 public:
  // use_prev_marking == true  -> use "prev" marking information,
  // use_prev_marking == false -> use "next" marking information
  VerifyRegionClosure(bool allow_dirty, bool par, bool use_prev_marking)
    : _allow_dirty(allow_dirty),
      _par(par),
-      _use_prev_marking(use_prev_marking) {}
+      _use_prev_marking(use_prev_marking),
+      _failures(false) {}
+
+  bool failures() {
+    return _failures;
+  }

  bool doHeapRegion(HeapRegion* r) {
    guarantee(_par || r->claim_value() == HeapRegion::InitialClaimValue,
              "Should be unclaimed at verify points.");
    if (!r->continuesHumongous()) {
-      VerifyObjsInRegionClosure not_dead_yet_cl(r, _use_prev_marking);
-      r->verify(_allow_dirty, _use_prev_marking);
-      r->object_iterate(&not_dead_yet_cl);
-      guarantee(r->max_live_bytes() >= not_dead_yet_cl.live_bytes(),
-                "More live objects than counted in last complete marking.");
+      bool failures = false;
+      r->verify(_allow_dirty, _use_prev_marking, &failures);
+      if (failures) {
+        _failures = true;
+      } else {
+        VerifyObjsInRegionClosure not_dead_yet_cl(r, _use_prev_marking);
+        r->object_iterate(&not_dead_yet_cl);
+        if (r->max_live_bytes() < not_dead_yet_cl.live_bytes()) {
+          gclog_or_tty->print_cr("["PTR_FORMAT","PTR_FORMAT"] "
+                                 "max_live_bytes "SIZE_FORMAT" "
+                                 "< calculated "SIZE_FORMAT,
+                                 r->bottom(), r->end(),
+                                 r->max_live_bytes(),
+                                 not_dead_yet_cl.live_bytes());
+          _failures = true;
+        }
+      }
    }
-    return false;
+    return false; // stop the region iteration if we hit a failure
  }
 };

 class VerifyRootsClosure: public OopsInGenClosure {
 private:
  G1CollectedHeap* _g1h;
-  bool             _failures;
  bool             _use_prev_marking;
+  bool             _failures;
 public:
  // use_prev_marking == true  -> use "prev" marking information,
  // use_prev_marking == false -> use "next" marking information
  VerifyRootsClosure(bool use_prev_marking) :
    _g1h(G1CollectedHeap::heap()),
-    _failures(false),
-    _use_prev_marking(use_prev_marking) { }
+    _use_prev_marking(use_prev_marking),
+    _failures(false) { }

  bool failures() { return _failures; }

@ -2253,7 +2271,7 @@ public:
      oop obj = oopDesc::decode_heap_oop_not_null(heap_oop);
      if (_g1h->is_obj_dead_cond(obj, _use_prev_marking)) {
        gclog_or_tty->print_cr("Root location "PTR_FORMAT" "
-                               "points to dead obj "PTR_FORMAT, p, (void*) obj);
+                              "points to dead obj "PTR_FORMAT, p, (void*) obj);
        obj->print_on(gclog_or_tty);
        _failures = true;
      }
@ -2271,6 +2289,7 @@ private:
  G1CollectedHeap* _g1h;
  bool _allow_dirty;
  bool _use_prev_marking;
+  bool _failures;

 public:
  // use_prev_marking == true  -> use "prev" marking information,
@ -2280,13 +2299,21 @@ public:
    AbstractGangTask("Parallel verify task"),
    _g1h(g1h),
    _allow_dirty(allow_dirty),
-    _use_prev_marking(use_prev_marking) { }
+    _use_prev_marking(use_prev_marking),
+    _failures(false) { }
+
+  bool failures() {
+    return _failures;
+  }

  void work(int worker_i) {
    HandleMark hm;
    VerifyRegionClosure blk(_allow_dirty, true, _use_prev_marking);
    _g1h->heap_region_par_iterate_chunked(&blk, worker_i,
                                          HeapRegion::ParVerifyClaimValue);
+    if (blk.failures()) {
+      _failures = true;
+    }
  }
 };

@ -2307,6 +2334,7 @@ void G1CollectedHeap::verify(bool allow_dirty,
                         &rootsCl,
                         &blobsCl,
                         &rootsCl);
+    bool failures = rootsCl.failures();
    rem_set()->invalidate(perm_gen()->used_region(), false);
    if (!silent) { gclog_or_tty->print("heapRegions "); }
    if (GCParallelVerificationEnabled && ParallelGCThreads > 1) {
@ -2318,6 +2346,9 @@ void G1CollectedHeap::verify(bool allow_dirty,
      set_par_threads(n_workers);
      workers()->run_task(&task);
      set_par_threads(0);
+      if (task.failures()) {
+        failures = true;
+      }

      assert(check_heap_region_claim_values(HeapRegion::ParVerifyClaimValue),
             "sanity check");
@ -2329,10 +2360,23 @@ void G1CollectedHeap::verify(bool allow_dirty,
    } else {
      VerifyRegionClosure blk(allow_dirty, false, use_prev_marking);
      _hrs->iterate(&blk);
+      if (blk.failures()) {
+        failures = true;
+      }
    }
    if (!silent) gclog_or_tty->print("remset ");
    rem_set()->verify();
-    guarantee(!rootsCl.failures(), "should not have had failures");
+
+    if (failures) {
+      gclog_or_tty->print_cr("Heap:");
+      print_on(gclog_or_tty, true /* extended */);
+      gclog_or_tty->print_cr("");
+      if (VerifyDuringGC && G1VerifyConcMarkPrintReachable) {
+        concurrent_mark()->print_prev_bitmap_reachable();
+      }
+      gclog_or_tty->flush();
+    }
+    guarantee(!failures, "there should not have been any failures");
  } else {
    if (!silent) gclog_or_tty->print("(SKIPPING roots, heapRegions, remset) ");
  }
@ -2374,6 +2418,7 @@ void G1CollectedHeap::print_on(outputStream* st, bool extended) const {
  st->cr();
  perm()->as_gen()->print_on(st);
  if (extended) {
+    st->cr();
    print_on_extended(st);
  }
 }
@ -2383,27 +2428,18 @@ void G1CollectedHeap::print_on_extended(outputStream* st) const {
  _hrs->iterate(&blk);
 }

-class PrintOnThreadsClosure : public ThreadClosure {
-  outputStream* _st;
-public:
-  PrintOnThreadsClosure(outputStream* st) : _st(st) { }
-  virtual void do_thread(Thread *t) {
-    t->print_on(_st);
-  }
-};
-
 void G1CollectedHeap::print_gc_threads_on(outputStream* st) const {
  if (ParallelGCThreads > 0) {
-    workers()->print_worker_threads();
+    workers()->print_worker_threads_on(st);
  }
-  st->print("\"G1 concurrent mark GC Thread\" ");
-  _cmThread->print();
+
+  _cmThread->print_on(st);
  st->cr();
-  st->print("\"G1 concurrent refinement GC Threads\" ");
-  PrintOnThreadsClosure p(st);
-  _cg1r->threads_do(&p);
-  st->cr();
-  st->print("\"G1 zero-fill GC Thread\" ");
+
+  _cm->print_worker_threads_on(st);
+
+  _cg1r->print_worker_threads_on(st);
+
  _czft->print_on(st);
  st->cr();
 }
--- a/hotspot/src/share/vm/gc_implementation/g1/g1CollectedHeap.hpp
+++ b/hotspot/src/share/vm/gc_implementation/g1/g1CollectedHeap.hpp
@ -992,11 +992,39 @@ public:

  // Can a compiler initialize a new object without store barriers?
  // This permission only extends from the creation of a new object
-  // via a TLAB up to the first subsequent safepoint.
+  // via a TLAB up to the first subsequent safepoint. If such permission
+  // is granted for this heap type, the compiler promises to call
+  // defer_store_barrier() below on any slow path allocation of
+  // a new object for which such initializing store barriers will
+  // have been elided. G1, like CMS, allows this, but should be
+  // ready to provide a compensating write barrier as necessary
+  // if that storage came out of a non-young region. The efficiency
+  // of this implementation depends crucially on being able to
+  // answer very efficiently in constant time whether a piece of
+  // storage in the heap comes from a young region or not.
+  // See ReduceInitialCardMarks.
  virtual bool can_elide_tlab_store_barriers() const {
-    // Since G1's TLAB's may, on occasion, come from non-young regions
-    // as well. (Is there a flag controlling that? XXX)
-    return false;
+    return true;
+  }
+
+  bool is_in_young(oop obj) {
+    HeapRegion* hr = heap_region_containing(obj);
+    return hr != NULL && hr->is_young();
+  }
+
+  // We don't need barriers for initializing stores to objects
+  // in the young gen: for the SATB pre-barrier, there is no
+  // pre-value that needs to be remembered; for the remembered-set
+  // update logging post-barrier, we don't maintain remembered set
+  // information for young gen objects. Note that non-generational
+  // G1 does not have any "young" objects, should not elide
+  // the rs logging barrier and so should always answer false below.
+  // However, non-generational G1 (-XX:-G1Gen) appears to have
+  // bit-rotted so was not tested below.
+  virtual bool can_elide_initializing_store_barrier(oop new_obj) {
+    assert(G1Gen || !is_in_young(new_obj),
+           "Non-generational G1 should never return true below");
+    return is_in_young(new_obj);
  }

  // Can a compiler elide a store barrier when it writes
--- a/hotspot/src/share/vm/gc_implementation/g1/heapRegion.cpp
+++ b/hotspot/src/share/vm/gc_implementation/g1/heapRegion.cpp
@ -722,12 +722,13 @@ void HeapRegion::print_on(outputStream* st) const {
    st->print(" F");
  else
    st->print("  ");
-  st->print(" %d", _gc_time_stamp);
+  st->print(" %5d", _gc_time_stamp);
  G1OffsetTableContigSpace::print_on(st);
 }

 void HeapRegion::verify(bool allow_dirty) const {
-  verify(allow_dirty, /* use_prev_marking */ true);
+  bool dummy = false;
+  verify(allow_dirty, /* use_prev_marking */ true, /* failures */ &dummy);
 }

 #define OBJ_SAMPLE_INTERVAL 0
@ -736,8 +737,11 @@ void HeapRegion::verify(bool allow_dirty) const {
 // This really ought to be commoned up into OffsetTableContigSpace somehow.
 // We would need a mechanism to make that code skip dead objects.

-void HeapRegion::verify(bool allow_dirty, bool use_prev_marking) const {
+void HeapRegion::verify(bool allow_dirty,
+                        bool use_prev_marking,
+                        bool* failures) const {
  G1CollectedHeap* g1 = G1CollectedHeap::heap();
+  *failures = false;
  HeapWord* p = bottom();
  HeapWord* prev_p = NULL;
  int objs = 0;
@ -746,8 +750,14 @@ void HeapRegion::verify(bool allow_dirty, bool use_prev_marking) const {
  while (p < top()) {
    size_t size = oop(p)->size();
    if (blocks == BLOCK_SAMPLE_INTERVAL) {
-      guarantee(p == block_start_const(p + (size/2)),
-                "check offset computation");
+      HeapWord* res = block_start_const(p + (size/2));
+      if (p != res) {
+        gclog_or_tty->print_cr("offset computation 1 for "PTR_FORMAT" and "
+                               SIZE_FORMAT" returned "PTR_FORMAT,
+                               p, size, res);
+        *failures = true;
+        return;
+      }
      blocks = 0;
    } else {
      blocks++;
@ -755,11 +765,34 @@ void HeapRegion::verify(bool allow_dirty, bool use_prev_marking) const {
    if (objs == OBJ_SAMPLE_INTERVAL) {
      oop obj = oop(p);
      if (!g1->is_obj_dead_cond(obj, this, use_prev_marking)) {
-        obj->verify();
-        vl_cl.set_containing_obj(obj);
-        obj->oop_iterate(&vl_cl);
-        if (G1MaxVerifyFailures >= 0
-            && vl_cl.n_failures() >= G1MaxVerifyFailures) break;
+        if (obj->is_oop()) {
+          klassOop klass = obj->klass();
+          if (!klass->is_perm()) {
+            gclog_or_tty->print_cr("klass "PTR_FORMAT" of object "PTR_FORMAT" "
+                                   "not in perm", klass, obj);
+            *failures = true;
+            return;
+          } else if (!klass->is_klass()) {
+            gclog_or_tty->print_cr("klass "PTR_FORMAT" of object "PTR_FORMAT" "
+                                   "not a klass", klass, obj);
+            *failures = true;
+            return;
+          } else {
+            vl_cl.set_containing_obj(obj);
+            obj->oop_iterate(&vl_cl);
+            if (vl_cl.failures()) {
+              *failures = true;
+            }
+            if (G1MaxVerifyFailures >= 0 &&
+                vl_cl.n_failures() >= G1MaxVerifyFailures) {
+              return;
+            }
+          }
+        } else {
+          gclog_or_tty->print_cr(PTR_FORMAT" no an oop", obj);
+          *failures = true;
+          return;
+        }
      }
      objs = 0;
    } else {
@ -771,21 +804,22 @@ void HeapRegion::verify(bool allow_dirty, bool use_prev_marking) const {
  HeapWord* rend = end();
  HeapWord* rtop = top();
  if (rtop < rend) {
-    guarantee(block_start_const(rtop + (rend - rtop) / 2) == rtop,
-              "check offset computation");
+    HeapWord* res = block_start_const(rtop + (rend - rtop) / 2);
+    if (res != rtop) {
+        gclog_or_tty->print_cr("offset computation 2 for "PTR_FORMAT" and "
+                               PTR_FORMAT" returned "PTR_FORMAT,
+                               rtop, rend, res);
+        *failures = true;
+        return;
+    }
  }
-  if (vl_cl.failures()) {
-    gclog_or_tty->print_cr("Heap:");
-    G1CollectedHeap::heap()->print_on(gclog_or_tty, true /* extended */);
-    gclog_or_tty->print_cr("");
+
+  if (p != top()) {
+    gclog_or_tty->print_cr("end of last object "PTR_FORMAT" "
+                           "does not match top "PTR_FORMAT, p, top());
+    *failures = true;
+    return;
  }
-  if (VerifyDuringGC &&
-      G1VerifyConcMarkPrintReachable &&
-      vl_cl.failures()) {
-    g1->concurrent_mark()->print_prev_bitmap_reachable();
-  }
-  guarantee(!vl_cl.failures(), "region verification failed");
-  guarantee(p == top(), "end of last object must match end of space");
 }

 // G1OffsetTableContigSpace code; copied from space.cpp.  Hope this can go
--- a/hotspot/src/share/vm/gc_implementation/g1/heapRegion.hpp
+++ b/hotspot/src/share/vm/gc_implementation/g1/heapRegion.hpp
@ -569,13 +569,8 @@ class HeapRegion: public G1OffsetTableContigSpace {
  // ever evacuated into this region.  If we evacuate, allocate, and
  // then evacuate we are in deep doodoo.
  void note_end_of_copying() {
-    assert(top() >= _next_top_at_mark_start,
-           "Increase only");
-    // Survivor regions will be scanned on the start of concurrent
-    // marking.
-    if (!is_survivor()) {
-      _next_top_at_mark_start = top();
-    }
+    assert(top() >= _next_top_at_mark_start, "Increase only");
+    _next_top_at_mark_start = top();
  }

  // Returns "false" iff no object in the region was allocated when the
@ -798,7 +793,7 @@ class HeapRegion: public G1OffsetTableContigSpace {
  // use_prev_marking == true. Currently, there is only one case where
  // this is called with use_prev_marking == false, which is to verify
  // the "next" marking information at the end of remark.
-  void verify(bool allow_dirty, bool use_prev_marking) const;
+  void verify(bool allow_dirty, bool use_prev_marking, bool *failures) const;

  // Override; it uses the "prev" marking information
  virtual void verify(bool allow_dirty) const;
--- a/hotspot/src/share/vm/gc_implementation/parallelScavenge/parallelScavengeHeap.cpp
+++ b/hotspot/src/share/vm/gc_implementation/parallelScavenge/parallelScavengeHeap.cpp
@ -314,41 +314,6 @@ bool ParallelScavengeHeap::is_in_reserved(const void* p) const {
  return false;
 }

-// Static method
-bool ParallelScavengeHeap::is_in_young(oop* p) {
-  ParallelScavengeHeap* heap = (ParallelScavengeHeap*)Universe::heap();
-  assert(heap->kind() == CollectedHeap::ParallelScavengeHeap,
-                                            "Must be ParallelScavengeHeap");
-
-  PSYoungGen* young_gen = heap->young_gen();
-
-  if (young_gen->is_in_reserved(p)) {
-    return true;
-  }
-
-  return false;
-}
-
-// Static method
-bool ParallelScavengeHeap::is_in_old_or_perm(oop* p) {
-  ParallelScavengeHeap* heap = (ParallelScavengeHeap*)Universe::heap();
-  assert(heap->kind() == CollectedHeap::ParallelScavengeHeap,
-                                            "Must be ParallelScavengeHeap");
-
-  PSOldGen* old_gen = heap->old_gen();
-  PSPermGen* perm_gen = heap->perm_gen();
-
-  if (old_gen->is_in_reserved(p)) {
-    return true;
-  }
-
-  if (perm_gen->is_in_reserved(p)) {
-    return true;
-  }
-
-  return false;
-}
-
 // There are two levels of allocation policy here.
 //
 // When an allocation request fails, the requesting thread must invoke a VM
@ -764,6 +729,13 @@ void ParallelScavengeHeap::resize_all_tlabs() {
  CollectedHeap::resize_all_tlabs();
 }

+bool ParallelScavengeHeap::can_elide_initializing_store_barrier(oop new_obj) {
+  // We don't need barriers for stores to objects in the
+  // young gen and, a fortiori, for initializing stores to
+  // objects therein.
+  return is_in_young(new_obj);
+}
+
 // This method is used by System.gc() and JVMTI.
 void ParallelScavengeHeap::collect(GCCause::Cause cause) {
  assert(!Heap_lock->owned_by_self(),
--- a/hotspot/src/share/vm/gc_implementation/parallelScavenge/parallelScavengeHeap.hpp
+++ b/hotspot/src/share/vm/gc_implementation/parallelScavenge/parallelScavengeHeap.hpp
@ -129,8 +129,8 @@ class ParallelScavengeHeap : public CollectedHeap {
    return perm_gen()->is_in(p);
  }

-  static bool is_in_young(oop *p);        // reserved part
-  static bool is_in_old_or_perm(oop *p);  // reserved part
+  inline bool is_in_young(oop p);        // reserved part
+  inline bool is_in_old_or_perm(oop p);  // reserved part

  // Memory allocation.   "gc_time_limit_was_exceeded" will
  // be set to true if the adaptive size policy determine that
@ -191,6 +191,10 @@ class ParallelScavengeHeap : public CollectedHeap {
    return true;
  }

+  // Return true if we don't we need a store barrier for
+  // initializing stores to an object at this address.
+  virtual bool can_elide_initializing_store_barrier(oop new_obj);
+
  // Can a compiler elide a store barrier when it writes
  // a permanent oop into the heap?  Applies when the compiler
  // is storing x to the heap, where x->is_perm() is true.
--- a/hotspot/src/share/vm/gc_implementation/parallelScavenge/parallelScavengeHeap.inline.hpp
+++ b/hotspot/src/share/vm/gc_implementation/parallelScavenge/parallelScavengeHeap.inline.hpp
@ -41,3 +41,11 @@ inline void ParallelScavengeHeap::invoke_full_gc(bool maximum_compaction)
    PSMarkSweep::invoke(maximum_compaction);
  }
 }
+
+inline bool ParallelScavengeHeap::is_in_young(oop p) {
+  return young_gen()->is_in_reserved(p);
+}
+
+inline bool ParallelScavengeHeap::is_in_old_or_perm(oop p) {
+  return old_gen()->is_in_reserved(p) || perm_gen()->is_in_reserved(p);
+}
--- a/hotspot/src/share/vm/gc_interface/collectedHeap.cpp
+++ b/hotspot/src/share/vm/gc_interface/collectedHeap.cpp
@ -137,6 +137,89 @@ HeapWord* CollectedHeap::allocate_from_tlab_slow(Thread* thread, size_t size) {
  return obj;
 }

+void CollectedHeap::flush_deferred_store_barrier(JavaThread* thread) {
+  MemRegion deferred = thread->deferred_card_mark();
+  if (!deferred.is_empty()) {
+    {
+      // Verify that the storage points to a parsable object in heap
+      DEBUG_ONLY(oop old_obj = oop(deferred.start());)
+      assert(is_in(old_obj), "Not in allocated heap");
+      assert(!can_elide_initializing_store_barrier(old_obj),
+             "Else should have been filtered in defer_store_barrier()");
+      assert(!is_in_permanent(old_obj), "Sanity: not expected");
+      assert(old_obj->is_oop(true), "Not an oop");
+      assert(old_obj->is_parsable(), "Will not be concurrently parsable");
+      assert(deferred.word_size() == (size_t)(old_obj->size()),
+             "Mismatch: multiple objects?");
+    }
+    BarrierSet* bs = barrier_set();
+    assert(bs->has_write_region_opt(), "No write_region() on BarrierSet");
+    bs->write_region(deferred);
+    // "Clear" the deferred_card_mark field
+    thread->set_deferred_card_mark(MemRegion());
+  }
+  assert(thread->deferred_card_mark().is_empty(), "invariant");
+}
+
+// Helper for ReduceInitialCardMarks. For performance,
+// compiled code may elide card-marks for initializing stores
+// to a newly allocated object along the fast-path. We
+// compensate for such elided card-marks as follows:
+// (a) Generational, non-concurrent collectors, such as
+//     GenCollectedHeap(ParNew,DefNew,Tenured) and
+//     ParallelScavengeHeap(ParallelGC, ParallelOldGC)
+//     need the card-mark if and only if the region is
+//     in the old gen, and do not care if the card-mark
+//     succeeds or precedes the initializing stores themselves,
+//     so long as the card-mark is completed before the next
+//     scavenge. For all these cases, we can do a card mark
+//     at the point at which we do a slow path allocation
+//     in the old gen. For uniformity, however, we end
+//     up using the same scheme (see below) for all three
+//     cases (deferring the card-mark appropriately).
+// (b) GenCollectedHeap(ConcurrentMarkSweepGeneration) requires
+//     in addition that the card-mark for an old gen allocated
+//     object strictly follow any associated initializing stores.
+//     In these cases, the memRegion remembered below is
+//     used to card-mark the entire region either just before the next
+//     slow-path allocation by this thread or just before the next scavenge or
+//     CMS-associated safepoint, whichever of these events happens first.
+//     (The implicit assumption is that the object has been fully
+//     initialized by this point, a fact that we assert when doing the
+//     card-mark.)
+// (c) G1CollectedHeap(G1) uses two kinds of write barriers. When a
+//     G1 concurrent marking is in progress an SATB (pre-write-)barrier is
+//     is used to remember the pre-value of any store. Initializing
+//     stores will not need this barrier, so we need not worry about
+//     compensating for the missing pre-barrier here. Turning now
+//     to the post-barrier, we note that G1 needs a RS update barrier
+//     which simply enqueues a (sequence of) dirty cards which may
+//     optionally be refined by the concurrent update threads. Note
+//     that this barrier need only be applied to a non-young write,
+//     but, like in CMS, because of the presence of concurrent refinement
+//     (much like CMS' precleaning), must strictly follow the oop-store.
+//     Thus, using the same protocol for maintaining the intended
+//     invariants turns out, serendepitously, to be the same for all
+//     three collectors/heap types above.
+//
+// For each future collector, this should be reexamined with
+// that specific collector in mind.
+oop CollectedHeap::defer_store_barrier(JavaThread* thread, oop new_obj) {
+  // If a previous card-mark was deferred, flush it now.
+  flush_deferred_store_barrier(thread);
+  if (can_elide_initializing_store_barrier(new_obj)) {
+    // The deferred_card_mark region should be empty
+    // following the flush above.
+    assert(thread->deferred_card_mark().is_empty(), "Error");
+  } else {
+    // Remember info for the newly deferred store barrier
+    MemRegion deferred = MemRegion((HeapWord*)new_obj, new_obj->size());
+    assert(!deferred.is_empty(), "Error");
+    thread->set_deferred_card_mark(deferred);
+  }
+  return new_obj;
+}
+
 size_t CollectedHeap::filler_array_hdr_size() {
  return size_t(arrayOopDesc::header_size(T_INT));
 }
@ -225,16 +308,6 @@ void CollectedHeap::fill_with_objects(HeapWord* start, size_t words)
  fill_with_object_impl(start, words);
 }

-oop CollectedHeap::new_store_barrier(oop new_obj) {
-  // %%% This needs refactoring.  (It was imported from the server compiler.)
-  guarantee(can_elide_tlab_store_barriers(), "store barrier elision not supported");
-  BarrierSet* bs = this->barrier_set();
-  assert(bs->has_write_region_opt(), "Barrier set does not have write_region");
-  int new_size = new_obj->size();
-  bs->write_region(MemRegion((HeapWord*)new_obj, new_size));
-  return new_obj;
-}
-
 HeapWord* CollectedHeap::allocate_new_tlab(size_t size) {
  guarantee(false, "thread-local allocation buffers not supported");
  return NULL;
--- a/hotspot/src/share/vm/gc_interface/collectedHeap.hpp
+++ b/hotspot/src/share/vm/gc_interface/collectedHeap.hpp
@ -415,9 +415,14 @@ class CollectedHeap : public CHeapObj {
    guarantee(false, "thread-local allocation buffers not supported");
    return 0;
  }
+
  // Can a compiler initialize a new object without store barriers?
  // This permission only extends from the creation of a new object
-  // via a TLAB up to the first subsequent safepoint.
+  // via a TLAB up to the first subsequent safepoint. If such permission
+  // is granted for this heap type, the compiler promises to call
+  // defer_store_barrier() below on any slow path allocation of
+  // a new object for which such initializing store barriers will
+  // have been elided.
  virtual bool can_elide_tlab_store_barriers() const = 0;

  // If a compiler is eliding store barriers for TLAB-allocated objects,
@ -425,8 +430,19 @@ class CollectedHeap : public CHeapObj {
  // an object allocated anywhere.  The compiler's runtime support
  // promises to call this function on such a slow-path-allocated
  // object before performing initializations that have elided
-  // store barriers.  Returns new_obj, or maybe a safer copy thereof.
-  virtual oop new_store_barrier(oop new_obj);
+  // store barriers. Returns new_obj, or maybe a safer copy thereof.
+  virtual oop defer_store_barrier(JavaThread* thread, oop new_obj);
+
+  // Answers whether an initializing store to a new object currently
+  // allocated at the given address doesn't need a (deferred) store
+  // barrier. Returns "true" if it doesn't need an initializing
+  // store barrier; answers "false" if it does.
+  virtual bool can_elide_initializing_store_barrier(oop new_obj) = 0;
+
+  // If the CollectedHeap was asked to defer a store barrier above,
+  // this informs it to flush such a deferred store barrier to the
+  // remembered set.
+  virtual void flush_deferred_store_barrier(JavaThread* thread);

  // Can a compiler elide a store barrier when it writes
  // a permanent oop into the heap?  Applies when the compiler
--- a/hotspot/src/share/vm/memory/genCollectedHeap.hpp
+++ b/hotspot/src/share/vm/memory/genCollectedHeap.hpp
@ -260,6 +260,20 @@ public:
    return true;
  }

+  // We don't need barriers for stores to objects in the
+  // young gen and, a fortiori, for initializing stores to
+  // objects therein. This applies to {DefNew,ParNew}+{Tenured,CMS}
+  // only and may need to be re-examined in case other
+  // kinds of collectors are implemented in the future.
+  virtual bool can_elide_initializing_store_barrier(oop new_obj) {
+    // We wanted to assert that:-
+    // assert(UseParNewGC || UseSerialGC || UseConcMarkSweepGC,
+    //       "Check can_elide_initializing_store_barrier() for this collector");
+    // but unfortunately the flag UseSerialGC need not necessarily always
+    // be set when DefNew+Tenured are being used.
+    return is_in_youngest((void*)new_obj);
+  }
+
  // Can a compiler elide a store barrier when it writes
  // a permanent oop into the heap?  Applies when the compiler
  // is storing x to the heap, where x->is_perm() is true.
--- a/hotspot/src/share/vm/opto/graphKit.cpp
+++ b/hotspot/src/share/vm/opto/graphKit.cpp
@ -3186,6 +3186,15 @@ void GraphKit::write_barrier_post(Node* oop_store,
      return;
  }

+  if (use_ReduceInitialCardMarks()
+      && obj == just_allocated_object(control())) {
+    // We can skip marks on a freshly-allocated object in Eden.
+    // Keep this code in sync with maybe_defer_card_mark() in runtime.cpp.
+    // That routine informs GC to take appropriate compensating steps
+    // so as to make this card-mark elision safe.
+    return;
+  }
+
  if (!use_precise) {
    // All card marks for a (non-array) instance are in one place:
    adr = obj;
--- a/hotspot/src/share/vm/opto/library_call.cpp
+++ b/hotspot/src/share/vm/opto/library_call.cpp
@ -4160,13 +4160,13 @@ bool LibraryCallKit::inline_native_clone(bool is_virtual) {
          result_mem ->set_req(_objArray_path, reset_memory());
        }
      }
-      // We can dispense with card marks if we know the allocation
-      // comes out of eden (TLAB)...  In fact, ReduceInitialCardMarks
-      // causes the non-eden paths to simulate a fresh allocation,
-      // insofar that no further card marks are required to initialize
-      // the object.
-
      // Otherwise, there are no card marks to worry about.
+      // (We can dispense with card marks if we know the allocation
+      //  comes out of eden (TLAB)...  In fact, ReduceInitialCardMarks
+      //  causes the non-eden paths to take compensating steps to
+      //  simulate a fresh allocation, so that no further
+      //  card marks are required in compiled code to initialize
+      //  the object.)

      if (!stopped()) {
        copy_to_clone(obj, alloc_obj, obj_size, true, false);
--- a/hotspot/src/share/vm/opto/runtime.cpp
+++ b/hotspot/src/share/vm/opto/runtime.cpp
@ -143,18 +143,20 @@ const char* OptoRuntime::stub_name(address entry) {
 // We failed the fast-path allocation.  Now we need to do a scavenge or GC
 // and try allocation again.

-void OptoRuntime::do_eager_card_mark(JavaThread* thread) {
+void OptoRuntime::maybe_defer_card_mark(JavaThread* thread) {
  // After any safepoint, just before going back to compiled code,
-  // we perform a card mark.  This lets the compiled code omit
-  // card marks for initialization of new objects.
-  // Keep this code consistent with GraphKit::store_barrier.
+  // we inform the GC that we will be doing initializing writes to
+  // this object in the future without emitting card-marks, so
+  // GC may take any compensating steps.
+  // NOTE: Keep this code consistent with GraphKit::store_barrier.

  oop new_obj = thread->vm_result();
  if (new_obj == NULL)  return;

  assert(Universe::heap()->can_elide_tlab_store_barriers(),
         "compiler must check this first");
-  new_obj = Universe::heap()->new_store_barrier(new_obj);
+  // GC may decide to give back a safer copy of new_obj.
+  new_obj = Universe::heap()->defer_store_barrier(thread, new_obj);
  thread->set_vm_result(new_obj);
 }

@ -197,8 +199,8 @@ JRT_BLOCK_ENTRY(void, OptoRuntime::new_instance_C(klassOopDesc* klass, JavaThrea
  JRT_BLOCK_END;

  if (GraphKit::use_ReduceInitialCardMarks()) {
-    // do them now so we don't have to do them on the fast path
-    do_eager_card_mark(thread);
+    // inform GC that we won't do card marks for initializing writes.
+    maybe_defer_card_mark(thread);
  }
 JRT_END

@ -236,8 +238,8 @@ JRT_BLOCK_ENTRY(void, OptoRuntime::new_array_C(klassOopDesc* array_type, int len
  JRT_BLOCK_END;

  if (GraphKit::use_ReduceInitialCardMarks()) {
-    // do them now so we don't have to do them on the fast path
-    do_eager_card_mark(thread);
+    // inform GC that we won't do card marks for initializing writes.
+    maybe_defer_card_mark(thread);
  }
 JRT_END

--- a/hotspot/src/share/vm/opto/runtime.hpp
+++ b/hotspot/src/share/vm/opto/runtime.hpp
@ -133,8 +133,8 @@ class OptoRuntime : public AllStatic {
  // Allocate storage for a objArray or typeArray
  static void new_array_C(klassOopDesc* array_klass, int len, JavaThread *thread);

-  // Post-allocation step for implementing ReduceInitialCardMarks:
-  static void do_eager_card_mark(JavaThread* thread);
+  // Post-slow-path-allocation step for implementing ReduceInitialCardMarks:
+  static void maybe_defer_card_mark(JavaThread* thread);

  // Allocate storage for a multi-dimensional arrays
  // Note: needs to be fixed for arbitrary number of dimensions
--- a/hotspot/src/share/vm/runtime/thread.cpp
+++ b/hotspot/src/share/vm/runtime/thread.cpp
@ -1213,6 +1213,7 @@ JavaThread::JavaThread(bool is_attaching) :
 {
  initialize();
  _is_attaching = is_attaching;
+  assert(_deferred_card_mark.is_empty(), "Default MemRegion ctor");
 }

 bool JavaThread::reguard_stack(address cur_sp) {
@ -2318,6 +2319,10 @@ void JavaThread::gc_prologue() {


 void JavaThread::oops_do(OopClosure* f, CodeBlobClosure* cf) {
+  // Flush deferred store-barriers, if any, associated with
+  // initializing stores done by this JavaThread in the current epoch.
+  Universe::heap()->flush_deferred_store_barrier(this);
+
  // The ThreadProfiler oops_do is done from FlatProfiler::oops_do
  // since there may be more than one thread using each ThreadProfiler.

--- a/hotspot/src/share/vm/runtime/thread.hpp
+++ b/hotspot/src/share/vm/runtime/thread.hpp
@ -684,8 +684,13 @@ class JavaThread: public Thread {
  methodOop     _callee_target;

  // Oop results of VM runtime calls
-  oop           _vm_result;                      // Used to pass back an oop result into Java code, GC-preserved
-  oop           _vm_result_2;                    // Used to pass back an oop result into Java code, GC-preserved
+  oop           _vm_result;    // Used to pass back an oop result into Java code, GC-preserved
+  oop           _vm_result_2;  // Used to pass back an oop result into Java code, GC-preserved
+
+  // See ReduceInitialCardMarks: this holds the precise space interval of
+  // the most recent slow path allocation for which compiled code has
+  // elided card-marks for performance along the fast-path.
+  MemRegion     _deferred_card_mark;

  MonitorChunk* _monitor_chunks;                 // Contains the off stack monitors
                                                 // allocated during deoptimization
@ -1082,6 +1087,9 @@ class JavaThread: public Thread {
  oop  vm_result_2() const                       { return _vm_result_2; }
  void set_vm_result_2  (oop x)                  { _vm_result_2   = x; }

+  MemRegion deferred_card_mark() const           { return _deferred_card_mark; }
+  void set_deferred_card_mark(MemRegion mr)      { _deferred_card_mark = mr;   }
+
  // Exception handling for compiled methods
  oop      exception_oop() const                 { return _exception_oop; }
  int      exception_stack_size() const          { return _exception_stack_size; }
--- a/hotspot/src/share/vm/utilities/taskqueue.hpp
+++ b/hotspot/src/share/vm/utilities/taskqueue.hpp
@ -207,7 +207,7 @@ bool GenericTaskQueue<E>::push_slow(E t, uint dirty_n_elems) {
    // Actually means 0, so do the push.
    uint localBot = _bottom;
    _elems[localBot] = t;
-    _bottom = increment_index(localBot);
+    OrderAccess::release_store(&_bottom, increment_index(localBot));
    return true;
  }
  return false;
@ -465,19 +465,7 @@ public:
 #endif
 };

-#define SIMPLE_STACK 0
-
 template<class E> inline bool GenericTaskQueue<E>::push(E t) {
-#if SIMPLE_STACK
-  uint localBot = _bottom;
-  if (_bottom < max_elems()) {
-    _elems[localBot] = t;
-    _bottom = localBot + 1;
-    return true;
-  } else {
-    return false;
-  }
-#else
  uint localBot = _bottom;
  assert((localBot >= 0) && (localBot < N), "_bottom out of range.");
  idx_t top = _age.top();
@ -485,23 +473,14 @@ template<class E> inline bool GenericTaskQueue<E>::push(E t) {
  assert((dirty_n_elems >= 0) && (dirty_n_elems < N), "n_elems out of range.");
  if (dirty_n_elems < max_elems()) {
    _elems[localBot] = t;
-    _bottom = increment_index(localBot);
+    OrderAccess::release_store(&_bottom, increment_index(localBot));
    return true;
  } else {
    return push_slow(t, dirty_n_elems);
  }
-#endif
 }

 template<class E> inline bool GenericTaskQueue<E>::pop_local(E& t) {
-#if SIMPLE_STACK
-  uint localBot = _bottom;
-  assert(localBot > 0, "precondition.");
-  localBot--;
-  t = _elems[localBot];
-  _bottom = localBot;
-  return true;
-#else
  uint localBot = _bottom;
  // This value cannot be N-1.  That can only occur as a result of
  // the assignment to bottom in this method.  If it does, this method
@ -529,7 +508,6 @@ template<class E> inline bool GenericTaskQueue<E>::pop_local(E& t) {
    // path.
    return pop_local_slow(localBot, _age.get());
  }
-#endif
 }

 typedef oop Task;
--- a/hotspot/test/gc/6845368/bigobj.java
+++ b/hotspot/test/gc/6845368/bigobj.java
@ -3,7 +3,7 @@
   @bug 6845368
   @summary ensure gc updates references > 64K bytes from the start of the obj
   @author John Coomes
-   @run main/othervm -Xmx64m bigobj
+   @run main/othervm/timeout=720 -Xmx64m bigobj
 */

 // Allocate an object with a block of reference fields that starts more