Merge

2010-01-04 07:58:42 -08:00 · 2010-01-04 07:58:42 -08:00 · 1335d16d6e
commit 1335d16d6e
parent 2ff37874ef 272a6d47bb
26 changed files with 1099 additions and 345 deletions
--- a/hotspot/src/share/vm/gc_implementation/concurrentMarkSweep/binaryTreeDictionary.cpp
+++ b/hotspot/src/share/vm/gc_implementation/concurrentMarkSweep/binaryTreeDictionary.cpp
@ -62,12 +62,13 @@ TreeList* TreeList::as_TreeList(TreeChunk* tc) {
  tl->link_head(tc);
  tl->link_tail(tc);
  tl->set_count(1);
-  tl->init_statistics();
+  tl->init_statistics(true /* split_birth */);
  tl->setParent(NULL);
  tl->setLeft(NULL);
  tl->setRight(NULL);
  return tl;
 }
+
 TreeList* TreeList::as_TreeList(HeapWord* addr, size_t size) {
  TreeChunk* tc = (TreeChunk*) addr;
  assert(size >= sizeof(TreeChunk), "Chunk is too small for a TreeChunk");
@ -267,6 +268,31 @@ TreeChunk* TreeList::first_available() {
  return retTC;
 }

+// Returns the block with the largest heap address amongst
+// those in the list for this size; potentially slow and expensive,
+// use with caution!
+TreeChunk* TreeList::largest_address() {
+  guarantee(head() != NULL, "The head of the list cannot be NULL");
+  FreeChunk* fc = head()->next();
+  TreeChunk* retTC;
+  if (fc == NULL) {
+    retTC = head_as_TreeChunk();
+  } else {
+    // walk down the list and return the one with the highest
+    // heap address among chunks of this size.
+    FreeChunk* last = fc;
+    while (fc->next() != NULL) {
+      if ((HeapWord*)last < (HeapWord*)fc) {
+        last = fc;
+      }
+      fc = fc->next();
+    }
+    retTC = TreeChunk::as_TreeChunk(last);
+  }
+  assert(retTC->list() == this, "Wrong type of chunk.");
+  return retTC;
+}
+
 BinaryTreeDictionary::BinaryTreeDictionary(MemRegion mr, bool splay):
  _splay(splay)
 {
@ -379,7 +405,7 @@ BinaryTreeDictionary::getChunkFromTree(size_t size, Dither dither, bool splay)
            break;
          }
          // The evm code reset the hint of the candidate as
-          // at an interrim point.  Why?  Seems like this leaves
+          // at an interim point.  Why?  Seems like this leaves
          // the hint pointing to a list that didn't work.
          // curTL->set_hint(hintTL->size());
        }
@ -436,7 +462,7 @@ FreeChunk* BinaryTreeDictionary::findLargestDict() const {
  TreeList *curTL = root();
  if (curTL != NULL) {
    while(curTL->right() != NULL) curTL = curTL->right();
-    return curTL->first_available();
+    return curTL->largest_address();
  } else {
    return NULL;
  }
@ -664,7 +690,7 @@ void BinaryTreeDictionary::insertChunkInTree(FreeChunk* fc) {
    }
  }
  TreeChunk* tc = TreeChunk::as_TreeChunk(fc);
-  // This chunk is being returned to the binary try.  It's embedded
+  // This chunk is being returned to the binary tree.  Its embedded
  // TreeList should be unused at this point.
  tc->initialize();
  if (curTL != NULL) {          // exact match
@ -807,6 +833,8 @@ void BinaryTreeDictionary::dictCensusUpdate(size_t size, bool split, bool birth)
 }

 bool BinaryTreeDictionary::coalDictOverPopulated(size_t size) {
+  if (FLSAlwaysCoalesceLarge) return true;
+
  TreeList* list_of_size = findList(size);
  // None of requested size implies overpopulated.
  return list_of_size == NULL || list_of_size->coalDesired() <= 0 ||
@ -854,17 +882,20 @@ class BeginSweepClosure : public AscendTreeCensusClosure {
  double _percentage;
  float _inter_sweep_current;
  float _inter_sweep_estimate;
+  float _intra_sweep_estimate;

 public:
  BeginSweepClosure(double p, float inter_sweep_current,
-                              float inter_sweep_estimate) :
+                              float inter_sweep_estimate,
+                              float intra_sweep_estimate) :
   _percentage(p),
   _inter_sweep_current(inter_sweep_current),
-   _inter_sweep_estimate(inter_sweep_estimate) { }
+   _inter_sweep_estimate(inter_sweep_estimate),
+   _intra_sweep_estimate(intra_sweep_estimate) { }

  void do_list(FreeList* fl) {
    double coalSurplusPercent = _percentage;
-    fl->compute_desired(_inter_sweep_current, _inter_sweep_estimate);
+    fl->compute_desired(_inter_sweep_current, _inter_sweep_estimate, _intra_sweep_estimate);
    fl->set_coalDesired((ssize_t)((double)fl->desired() * coalSurplusPercent));
    fl->set_beforeSweep(fl->count());
    fl->set_bfrSurp(fl->surplus());
@ -939,9 +970,10 @@ FreeChunk* BinaryTreeDictionary::find_chunk_ends_at(HeapWord* target) const {
 }

 void BinaryTreeDictionary::beginSweepDictCensus(double coalSurplusPercent,
-  float inter_sweep_current, float inter_sweep_estimate) {
+  float inter_sweep_current, float inter_sweep_estimate, float intra_sweep_estimate) {
  BeginSweepClosure bsc(coalSurplusPercent, inter_sweep_current,
-                                            inter_sweep_estimate);
+                                            inter_sweep_estimate,
+                                            intra_sweep_estimate);
  bsc.do_tree(root());
 }

@ -1077,13 +1109,13 @@ void BinaryTreeDictionary::reportStatistics() const {
 // Print census information - counts, births, deaths, etc.
 // for each list in the tree.  Also print some summary
 // information.
-class printTreeCensusClosure : public AscendTreeCensusClosure {
+class PrintTreeCensusClosure : public AscendTreeCensusClosure {
  int _print_line;
  size_t _totalFree;
  FreeList _total;

 public:
-  printTreeCensusClosure() {
+  PrintTreeCensusClosure() {
    _print_line = 0;
    _totalFree = 0;
  }
@ -1113,7 +1145,7 @@ void BinaryTreeDictionary::printDictCensus(void) const {

  gclog_or_tty->print("\nBinaryTree\n");
  FreeList::print_labels_on(gclog_or_tty, "size");
-  printTreeCensusClosure ptc;
+  PrintTreeCensusClosure ptc;
  ptc.do_tree(root());

  FreeList* total = ptc.total();
@ -1130,6 +1162,38 @@ void BinaryTreeDictionary::printDictCensus(void) const {
             /(total->desired() != 0 ? (double)total->desired() : 1.0));
 }

+class PrintFreeListsClosure : public AscendTreeCensusClosure {
+  outputStream* _st;
+  int _print_line;
+
+ public:
+  PrintFreeListsClosure(outputStream* st) {
+    _st = st;
+    _print_line = 0;
+  }
+  void do_list(FreeList* fl) {
+    if (++_print_line >= 40) {
+      FreeList::print_labels_on(_st, "size");
+      _print_line = 0;
+    }
+    fl->print_on(gclog_or_tty);
+    size_t sz = fl->size();
+    for (FreeChunk* fc = fl->head(); fc != NULL;
+         fc = fc->next()) {
+      _st->print_cr("\t[" PTR_FORMAT "," PTR_FORMAT ")  %s",
+                    fc, (HeapWord*)fc + sz,
+                    fc->cantCoalesce() ? "\t CC" : "");
+    }
+  }
+};
+
+void BinaryTreeDictionary::print_free_lists(outputStream* st) const {
+
+  FreeList::print_labels_on(st, "size");
+  PrintFreeListsClosure pflc(st);
+  pflc.do_tree(root());
+}
+
 // Verify the following tree invariants:
 // . _root has no parent
 // . parent and child point to each other
--- a/hotspot/src/share/vm/gc_implementation/concurrentMarkSweep/binaryTreeDictionary.hpp
+++ b/hotspot/src/share/vm/gc_implementation/concurrentMarkSweep/binaryTreeDictionary.hpp
@ -42,9 +42,6 @@ class TreeList: public FreeList {
  friend class AscendTreeCensusClosure;
  friend class DescendTreeCensusClosure;
  friend class DescendTreeSearchClosure;
-  TreeList* _parent;
-  TreeList* _left;
-  TreeList* _right;

 protected:
  TreeList* parent() const { return _parent; }
@ -82,6 +79,11 @@ class TreeList: public FreeList {
  // to a TreeChunk.
  TreeChunk* first_available();

+  // Returns the block with the largest heap address amongst
+  // those in the list for this size; potentially slow and expensive,
+  // use with caution!
+  TreeChunk* largest_address();
+
  // removeChunkReplaceIfNeeded() removes the given "tc" from the TreeList.
  // If "tc" is the first chunk in the list, it is also the
  // TreeList that is the node in the tree.  removeChunkReplaceIfNeeded()
@ -254,8 +256,9 @@ class BinaryTreeDictionary: public FreeBlockDictionary {
  // Methods called at the beginning of a sweep to prepare the
  // statistics for the sweep.
  void       beginSweepDictCensus(double coalSurplusPercent,
-                                  float sweep_current,
-                                  float sweep_estimate);
+                                  float inter_sweep_current,
+                                  float inter_sweep_estimate,
+                                  float intra_sweep_estimate);
  // Methods called after the end of a sweep to modify the
  // statistics for the sweep.
  void       endSweepDictCensus(double splitSurplusPercent);
@ -269,6 +272,7 @@ class BinaryTreeDictionary: public FreeBlockDictionary {
  // Print the statistcis for all the lists in the tree.  Also may
  // print out summaries.
  void       printDictCensus(void) const;
+  void       print_free_lists(outputStream* st) const;

  // For debugging.  Returns the sum of the _returnedBytes for
  // all lists in the tree.
--- a/hotspot/src/share/vm/gc_implementation/concurrentMarkSweep/cmsLockVerifier.cpp
+++ b/hotspot/src/share/vm/gc_implementation/concurrentMarkSweep/cmsLockVerifier.cpp
@ -32,7 +32,9 @@
 // threads. The second argument is in support of an extra locking
 // check for CFL spaces' free list locks.
 #ifndef PRODUCT
-void CMSLockVerifier::assert_locked(const Mutex* lock, const Mutex* p_lock) {
+void CMSLockVerifier::assert_locked(const Mutex* lock,
+                                    const Mutex* p_lock1,
+                                    const Mutex* p_lock2) {
  if (!Universe::is_fully_initialized()) {
    return;
  }
@ -40,7 +42,7 @@ void CMSLockVerifier::assert_locked(const Mutex* lock, const Mutex* p_lock) {
  Thread* myThread = Thread::current();

  if (lock == NULL) { // a "lock-free" structure, e.g. MUT, protected by CMS token
-    assert(p_lock == NULL, "Unexpected state");
+    assert(p_lock1 == NULL && p_lock2 == NULL, "Unexpected caller error");
    if (myThread->is_ConcurrentGC_thread()) {
      // This test might have to change in the future, if there can be
      // multiple peer CMS threads.  But for now, if we're testing the CMS
@ -60,36 +62,39 @@ void CMSLockVerifier::assert_locked(const Mutex* lock, const Mutex* p_lock) {
    return;
  }

-  if (ParallelGCThreads == 0) {
+  if (myThread->is_VM_thread()
+      || myThread->is_ConcurrentGC_thread()
+      || myThread->is_Java_thread()) {
+    // Make sure that we are holding the associated lock.
    assert_lock_strong(lock);
-  } else {
-    if (myThread->is_VM_thread()
-        || myThread->is_ConcurrentGC_thread()
-        || myThread->is_Java_thread()) {
-      // Make sure that we are holding the associated lock.
-      assert_lock_strong(lock);
-      // The checking of p_lock is a spl case for CFLS' free list
-      // locks: we make sure that none of the parallel GC work gang
-      // threads are holding "sub-locks" of freeListLock(). We check only
-      // the parDictionaryAllocLock because the others are too numerous.
-      // This spl case code is somewhat ugly and any improvements
-      // are welcome XXX FIX ME!!
-      if (p_lock != NULL) {
-        assert(!p_lock->is_locked() || p_lock->owned_by_self(),
-               "Possible race between this and parallel GC threads");
-      }
-    } else if (myThread->is_GC_task_thread()) {
-      // Make sure that the VM or CMS thread holds lock on our behalf
-      // XXX If there were a concept of a gang_master for a (set of)
-      // gang_workers, we could have used the identity of that thread
-      // for checking ownership here; for now we just disjunct.
-      assert(lock->owner() == VMThread::vm_thread() ||
-             lock->owner() == ConcurrentMarkSweepThread::cmst(),
-             "Should be locked by VM thread or CMS thread on my behalf");
-    } else {
-      // Make sure we didn't miss some obscure corner case
-      ShouldNotReachHere();
+    // The checking of p_lock is a spl case for CFLS' free list
+    // locks: we make sure that none of the parallel GC work gang
+    // threads are holding "sub-locks" of freeListLock(). We check only
+    // the parDictionaryAllocLock because the others are too numerous.
+    // This spl case code is somewhat ugly and any improvements
+    // are welcome.
+    assert(p_lock1 == NULL || !p_lock1->is_locked() || p_lock1->owned_by_self(),
+           "Possible race between this and parallel GC threads");
+    assert(p_lock2 == NULL || !p_lock2->is_locked() || p_lock2->owned_by_self(),
+           "Possible race between this and parallel GC threads");
+  } else if (myThread->is_GC_task_thread()) {
+    // Make sure that the VM or CMS thread holds lock on our behalf
+    // XXX If there were a concept of a gang_master for a (set of)
+    // gang_workers, we could have used the identity of that thread
+    // for checking ownership here; for now we just disjunct.
+    assert(lock->owner() == VMThread::vm_thread() ||
+           lock->owner() == ConcurrentMarkSweepThread::cmst(),
+           "Should be locked by VM thread or CMS thread on my behalf");
+    if (p_lock1 != NULL) {
+      assert_lock_strong(p_lock1);
    }
+    if (p_lock2 != NULL) {
+      assert_lock_strong(p_lock2);
+    }
+  } else {
+    // Make sure we didn't miss some other thread type calling into here;
+    // perhaps as a result of future VM evolution.
+    ShouldNotReachHere();
  }
 }
 #endif
--- a/hotspot/src/share/vm/gc_implementation/concurrentMarkSweep/cmsLockVerifier.hpp
+++ b/hotspot/src/share/vm/gc_implementation/concurrentMarkSweep/cmsLockVerifier.hpp
@ -29,8 +29,11 @@
 // the parallel threads.
 class CMSLockVerifier: AllStatic {
 public:
-  static void assert_locked(const Mutex* lock, const Mutex* p_lock)
+  static void assert_locked(const Mutex* lock, const Mutex* p_lock1, const Mutex* p_lock2)
    PRODUCT_RETURN;
+  static void assert_locked(const Mutex* lock, const Mutex* p_lock) {
+    assert_locked(lock, p_lock, NULL);
+  }
  static void assert_locked(const Mutex* lock) {
    assert_locked(lock, NULL);
  }
--- a/hotspot/src/share/vm/gc_implementation/concurrentMarkSweep/compactibleFreeListSpace.cpp
+++ b/hotspot/src/share/vm/gc_implementation/concurrentMarkSweep/compactibleFreeListSpace.cpp
@ -62,18 +62,15 @@ CompactibleFreeListSpace::CompactibleFreeListSpace(BlockOffsetSharedArray* bs,
  // implementation, namely, the simple binary tree (splaying
  // temporarily disabled).
  switch (dictionaryChoice) {
-    case FreeBlockDictionary::dictionaryBinaryTree:
-      _dictionary = new BinaryTreeDictionary(mr);
-      break;
    case FreeBlockDictionary::dictionarySplayTree:
    case FreeBlockDictionary::dictionarySkipList:
    default:
      warning("dictionaryChoice: selected option not understood; using"
              " default BinaryTreeDictionary implementation instead.");
+    case FreeBlockDictionary::dictionaryBinaryTree:
      _dictionary = new BinaryTreeDictionary(mr);
      break;
  }
-  splitBirth(mr.word_size());
  assert(_dictionary != NULL, "CMS dictionary initialization");
  // The indexed free lists are initially all empty and are lazily
  // filled in on demand. Initialize the array elements to NULL.
@ -388,6 +385,105 @@ size_t CompactibleFreeListSpace::max_alloc_in_words() const {
  return res;
 }

+void CompactibleFreeListSpace::print_indexed_free_lists(outputStream* st)
+const {
+  reportIndexedFreeListStatistics();
+  gclog_or_tty->print_cr("Layout of Indexed Freelists");
+  gclog_or_tty->print_cr("---------------------------");
+  FreeList::print_labels_on(st, "size");
+  for (size_t i = IndexSetStart; i < IndexSetSize; i += IndexSetStride) {
+    _indexedFreeList[i].print_on(gclog_or_tty);
+    for (FreeChunk* fc = _indexedFreeList[i].head(); fc != NULL;
+         fc = fc->next()) {
+      gclog_or_tty->print_cr("\t[" PTR_FORMAT "," PTR_FORMAT ")  %s",
+                          fc, (HeapWord*)fc + i,
+                          fc->cantCoalesce() ? "\t CC" : "");
+    }
+  }
+}
+
+void CompactibleFreeListSpace::print_promo_info_blocks(outputStream* st)
+const {
+  _promoInfo.print_on(st);
+}
+
+void CompactibleFreeListSpace::print_dictionary_free_lists(outputStream* st)
+const {
+  _dictionary->reportStatistics();
+  st->print_cr("Layout of Freelists in Tree");
+  st->print_cr("---------------------------");
+  _dictionary->print_free_lists(st);
+}
+
+class BlkPrintingClosure: public BlkClosure {
+  const CMSCollector*             _collector;
+  const CompactibleFreeListSpace* _sp;
+  const CMSBitMap*                _live_bit_map;
+  const bool                      _post_remark;
+  outputStream*                   _st;
+public:
+  BlkPrintingClosure(const CMSCollector* collector,
+                     const CompactibleFreeListSpace* sp,
+                     const CMSBitMap* live_bit_map,
+                     outputStream* st):
+    _collector(collector),
+    _sp(sp),
+    _live_bit_map(live_bit_map),
+    _post_remark(collector->abstract_state() > CMSCollector::FinalMarking),
+    _st(st) { }
+  size_t do_blk(HeapWord* addr);
+};
+
+size_t BlkPrintingClosure::do_blk(HeapWord* addr) {
+  size_t sz = _sp->block_size_no_stall(addr, _collector);
+  assert(sz != 0, "Should always be able to compute a size");
+  if (_sp->block_is_obj(addr)) {
+    const bool dead = _post_remark && !_live_bit_map->isMarked(addr);
+    _st->print_cr(PTR_FORMAT ": %s object of size " SIZE_FORMAT "%s",
+      addr,
+      dead ? "dead" : "live",
+      sz,
+      (!dead && CMSPrintObjectsInDump) ? ":" : ".");
+    if (CMSPrintObjectsInDump && !dead) {
+      oop(addr)->print_on(_st);
+      _st->print_cr("--------------------------------------");
+    }
+  } else { // free block
+    _st->print_cr(PTR_FORMAT ": free block of size " SIZE_FORMAT "%s",
+      addr, sz, CMSPrintChunksInDump ? ":" : ".");
+    if (CMSPrintChunksInDump) {
+      ((FreeChunk*)addr)->print_on(_st);
+      _st->print_cr("--------------------------------------");
+    }
+  }
+  return sz;
+}
+
+void CompactibleFreeListSpace::dump_at_safepoint_with_locks(CMSCollector* c,
+  outputStream* st) {
+  st->print_cr("\n=========================");
+  st->print_cr("Block layout in CMS Heap:");
+  st->print_cr("=========================");
+  BlkPrintingClosure  bpcl(c, this, c->markBitMap(), st);
+  blk_iterate(&bpcl);
+
+  st->print_cr("\n=======================================");
+  st->print_cr("Order & Layout of Promotion Info Blocks");
+  st->print_cr("=======================================");
+  print_promo_info_blocks(st);
+
+  st->print_cr("\n===========================");
+  st->print_cr("Order of Indexed Free Lists");
+  st->print_cr("=========================");
+  print_indexed_free_lists(st);
+
+  st->print_cr("\n=================================");
+  st->print_cr("Order of Free Lists in Dictionary");
+  st->print_cr("=================================");
+  print_dictionary_free_lists(st);
+}
+
+
 void CompactibleFreeListSpace::reportFreeListStatistics() const {
  assert_lock_strong(&_freelistLock);
  assert(PrintFLSStatistics != 0, "Reporting error");
@ -449,37 +545,37 @@ void CompactibleFreeListSpace::set_end(HeapWord* value) {
  if (prevEnd != NULL) {
    // Resize the underlying block offset table.
    _bt.resize(pointer_delta(value, bottom()));
-  if (value <= prevEnd) {
-    assert(value >= unallocated_block(), "New end is below unallocated block");
-  } else {
-    // Now, take this new chunk and add it to the free blocks.
-    // Note that the BOT has not yet been updated for this block.
-    size_t newFcSize = pointer_delta(value, prevEnd);
-    // XXX This is REALLY UGLY and should be fixed up. XXX
-    if (!_adaptive_freelists && _smallLinearAllocBlock._ptr == NULL) {
-      // Mark the boundary of the new block in BOT
-      _bt.mark_block(prevEnd, value);
-      // put it all in the linAB
-      if (ParallelGCThreads == 0) {
-        _smallLinearAllocBlock._ptr = prevEnd;
-        _smallLinearAllocBlock._word_size = newFcSize;
-        repairLinearAllocBlock(&_smallLinearAllocBlock);
-      } else { // ParallelGCThreads > 0
-        MutexLockerEx x(parDictionaryAllocLock(),
-                        Mutex::_no_safepoint_check_flag);
-        _smallLinearAllocBlock._ptr = prevEnd;
-        _smallLinearAllocBlock._word_size = newFcSize;
-        repairLinearAllocBlock(&_smallLinearAllocBlock);
-      }
-      // Births of chunks put into a LinAB are not recorded.  Births
-      // of chunks as they are allocated out of a LinAB are.
+    if (value <= prevEnd) {
+      assert(value >= unallocated_block(), "New end is below unallocated block");
    } else {
-      // Add the block to the free lists, if possible coalescing it
-      // with the last free block, and update the BOT and census data.
-      addChunkToFreeListsAtEndRecordingStats(prevEnd, newFcSize);
+      // Now, take this new chunk and add it to the free blocks.
+      // Note that the BOT has not yet been updated for this block.
+      size_t newFcSize = pointer_delta(value, prevEnd);
+      // XXX This is REALLY UGLY and should be fixed up. XXX
+      if (!_adaptive_freelists && _smallLinearAllocBlock._ptr == NULL) {
+        // Mark the boundary of the new block in BOT
+        _bt.mark_block(prevEnd, value);
+        // put it all in the linAB
+        if (ParallelGCThreads == 0) {
+          _smallLinearAllocBlock._ptr = prevEnd;
+          _smallLinearAllocBlock._word_size = newFcSize;
+          repairLinearAllocBlock(&_smallLinearAllocBlock);
+        } else { // ParallelGCThreads > 0
+          MutexLockerEx x(parDictionaryAllocLock(),
+                          Mutex::_no_safepoint_check_flag);
+          _smallLinearAllocBlock._ptr = prevEnd;
+          _smallLinearAllocBlock._word_size = newFcSize;
+          repairLinearAllocBlock(&_smallLinearAllocBlock);
+        }
+        // Births of chunks put into a LinAB are not recorded.  Births
+        // of chunks as they are allocated out of a LinAB are.
+      } else {
+        // Add the block to the free lists, if possible coalescing it
+        // with the last free block, and update the BOT and census data.
+        addChunkToFreeListsAtEndRecordingStats(prevEnd, newFcSize);
+      }
    }
  }
-  }
 }

 class FreeListSpace_DCTOC : public Filtering_DCTOC {
@ -732,7 +828,7 @@ void CompactibleFreeListSpace::safe_object_iterate(ObjectClosure* blk) {

 void CompactibleFreeListSpace::object_iterate_mem(MemRegion mr,
                                                  UpwardsObjectClosure* cl) {
-  assert_locked();
+  assert_locked(freelistLock());
  NOT_PRODUCT(verify_objects_initialized());
  Space::object_iterate_mem(mr, cl);
 }
@ -1212,12 +1308,15 @@ bool CompactibleFreeListSpace::verifyChunkInFreeLists(FreeChunk* fc) const {
 void CompactibleFreeListSpace::assert_locked() const {
  CMSLockVerifier::assert_locked(freelistLock(), parDictionaryAllocLock());
 }
+
+void CompactibleFreeListSpace::assert_locked(const Mutex* lock) const {
+  CMSLockVerifier::assert_locked(lock);
+}
 #endif

 FreeChunk* CompactibleFreeListSpace::allocateScratch(size_t size) {
  // In the parallel case, the main thread holds the free list lock
  // on behalf the parallel threads.
-  assert_locked();
  FreeChunk* fc;
  {
    // If GC is parallel, this might be called by several threads.
@ -1298,17 +1397,18 @@ CompactibleFreeListSpace::getChunkFromLinearAllocBlock(LinearAllocBlock *blk,
    res = blk->_ptr;
    _bt.allocated(res, blk->_word_size);
  } else if (size + MinChunkSize <= blk->_refillSize) {
+    size_t sz = blk->_word_size;
    // Update _unallocated_block if the size is such that chunk would be
    // returned to the indexed free list.  All other chunks in the indexed
    // free lists are allocated from the dictionary so that _unallocated_block
    // has already been adjusted for them.  Do it here so that the cost
    // for all chunks added back to the indexed free lists.
-    if (blk->_word_size < SmallForDictionary) {
-      _bt.allocated(blk->_ptr, blk->_word_size);
+    if (sz < SmallForDictionary) {
+      _bt.allocated(blk->_ptr, sz);
    }
    // Return the chunk that isn't big enough, and then refill below.
-    addChunkToFreeLists(blk->_ptr, blk->_word_size);
-    _bt.verify_single_block(blk->_ptr, (blk->_ptr + blk->_word_size));
+    addChunkToFreeLists(blk->_ptr, sz);
+    splitBirth(sz);
    // Don't keep statistics on adding back chunk from a LinAB.
  } else {
    // A refilled block would not satisfy the request.
@ -1376,11 +1476,13 @@ CompactibleFreeListSpace::getChunkFromIndexedFreeList(size_t size) {
    res = getChunkFromIndexedFreeListHelper(size);
  }
  _bt.verify_not_unallocated((HeapWord*) res, size);
+  assert(res == NULL || res->size() == size, "Incorrect block size");
  return res;
 }

 FreeChunk*
-CompactibleFreeListSpace::getChunkFromIndexedFreeListHelper(size_t size) {
+CompactibleFreeListSpace::getChunkFromIndexedFreeListHelper(size_t size,
+  bool replenish) {
  assert_locked();
  FreeChunk* fc = NULL;
  if (size < SmallForDictionary) {
@ -1398,54 +1500,66 @@ CompactibleFreeListSpace::getChunkFromIndexedFreeListHelper(size_t size) {
      // and replenishing indexed lists from the small linAB.
      //
      FreeChunk* newFc = NULL;
-      size_t replenish_size = CMSIndexedFreeListReplenish * size;
+      const size_t replenish_size = CMSIndexedFreeListReplenish * size;
      if (replenish_size < SmallForDictionary) {
        // Do not replenish from an underpopulated size.
        if (_indexedFreeList[replenish_size].surplus() > 0 &&
            _indexedFreeList[replenish_size].head() != NULL) {
-          newFc =
-            _indexedFreeList[replenish_size].getChunkAtHead();
-        } else {
+          newFc = _indexedFreeList[replenish_size].getChunkAtHead();
+        } else if (bestFitFirst()) {
          newFc = bestFitSmall(replenish_size);
        }
      }
-      if (newFc != NULL) {
-        splitDeath(replenish_size);
-      } else if (replenish_size > size) {
+      if (newFc == NULL && replenish_size > size) {
        assert(CMSIndexedFreeListReplenish > 1, "ctl pt invariant");
-        newFc =
-          getChunkFromIndexedFreeListHelper(replenish_size);
+        newFc = getChunkFromIndexedFreeListHelper(replenish_size, false);
      }
+      // Note: The stats update re split-death of block obtained above
+      // will be recorded below precisely when we know we are going to
+      // be actually splitting it into more than one pieces below.
      if (newFc != NULL) {
-        assert(newFc->size() == replenish_size, "Got wrong size");
-        size_t i;
-        FreeChunk *curFc, *nextFc;
-        // carve up and link blocks 0, ..., CMSIndexedFreeListReplenish - 2
-        // The last chunk is not added to the lists but is returned as the
-        // free chunk.
-        for (curFc = newFc, nextFc = (FreeChunk*)((HeapWord*)curFc + size),
-             i = 0;
-             i < (CMSIndexedFreeListReplenish - 1);
-             curFc = nextFc, nextFc = (FreeChunk*)((HeapWord*)nextFc + size),
-             i++) {
+        if  (replenish || CMSReplenishIntermediate) {
+          // Replenish this list and return one block to caller.
+          size_t i;
+          FreeChunk *curFc, *nextFc;
+          size_t num_blk = newFc->size() / size;
+          assert(num_blk >= 1, "Smaller than requested?");
+          assert(newFc->size() % size == 0, "Should be integral multiple of request");
+          if (num_blk > 1) {
+            // we are sure we will be splitting the block just obtained
+            // into multiple pieces; record the split-death of the original
+            splitDeath(replenish_size);
+          }
+          // carve up and link blocks 0, ..., num_blk - 2
+          // The last chunk is not added to the lists but is returned as the
+          // free chunk.
+          for (curFc = newFc, nextFc = (FreeChunk*)((HeapWord*)curFc + size),
+               i = 0;
+               i < (num_blk - 1);
+               curFc = nextFc, nextFc = (FreeChunk*)((HeapWord*)nextFc + size),
+               i++) {
+            curFc->setSize(size);
+            // Don't record this as a return in order to try and
+            // determine the "returns" from a GC.
+            _bt.verify_not_unallocated((HeapWord*) fc, size);
+            _indexedFreeList[size].returnChunkAtTail(curFc, false);
+            _bt.mark_block((HeapWord*)curFc, size);
+            splitBirth(size);
+            // Don't record the initial population of the indexed list
+            // as a split birth.
+          }
+
+          // check that the arithmetic was OK above
+          assert((HeapWord*)nextFc == (HeapWord*)newFc + num_blk*size,
+            "inconsistency in carving newFc");
          curFc->setSize(size);
-          // Don't record this as a return in order to try and
-          // determine the "returns" from a GC.
-          _bt.verify_not_unallocated((HeapWord*) fc, size);
-          _indexedFreeList[size].returnChunkAtTail(curFc, false);
          _bt.mark_block((HeapWord*)curFc, size);
          splitBirth(size);
-          // Don't record the initial population of the indexed list
-          // as a split birth.
+          fc = curFc;
+        } else {
+          // Return entire block to caller
+          fc = newFc;
        }
-
-        // check that the arithmetic was OK above
-        assert((HeapWord*)nextFc == (HeapWord*)newFc + replenish_size,
-          "inconsistency in carving newFc");
-        curFc->setSize(size);
-        _bt.mark_block((HeapWord*)curFc, size);
-        splitBirth(size);
-        return curFc;
      }
    }
  } else {
@ -1453,7 +1567,7 @@ CompactibleFreeListSpace::getChunkFromIndexedFreeListHelper(size_t size) {
    // replenish the indexed free list.
    fc = getChunkFromDictionaryExact(size);
  }
-  assert(fc == NULL || fc->isFree(), "Should be returning a free chunk");
+  // assert(fc == NULL || fc->isFree(), "Should be returning a free chunk");
  return fc;
 }

@ -1512,6 +1626,11 @@ CompactibleFreeListSpace::returnChunkToDictionary(FreeChunk* chunk) {
  // adjust _unallocated_block downward, as necessary
  _bt.freed((HeapWord*)chunk, size);
  _dictionary->returnChunk(chunk);
+#ifndef PRODUCT
+  if (CMSCollector::abstract_state() != CMSCollector::Sweeping) {
+    TreeChunk::as_TreeChunk(chunk)->list()->verify_stats();
+  }
+#endif // PRODUCT
 }

 void
@ -1525,6 +1644,11 @@ CompactibleFreeListSpace::returnChunkToFreeList(FreeChunk* fc) {
  } else {
    _indexedFreeList[size].returnChunkAtHead(fc);
  }
+#ifndef PRODUCT
+  if (CMSCollector::abstract_state() != CMSCollector::Sweeping) {
+     _indexedFreeList[size].verify_stats();
+  }
+#endif // PRODUCT
 }

 // Add chunk to end of last block -- if it's the largest
@ -1537,7 +1661,6 @@ CompactibleFreeListSpace::addChunkToFreeListsAtEndRecordingStats(
  HeapWord* chunk, size_t     size) {
  // check that the chunk does lie in this space!
  assert(chunk != NULL && is_in_reserved(chunk), "Not in this space!");
-  assert_locked();
  // One of the parallel gc task threads may be here
  // whilst others are allocating.
  Mutex* lock = NULL;
@ -1991,24 +2114,26 @@ double CompactibleFreeListSpace::flsFrag() const {
  return frag;
 }

-#define CoalSurplusPercent 1.05
-#define SplitSurplusPercent 1.10
-
 void CompactibleFreeListSpace::beginSweepFLCensus(
  float inter_sweep_current,
-  float inter_sweep_estimate) {
+  float inter_sweep_estimate,
+  float intra_sweep_estimate) {
  assert_locked();
  size_t i;
  for (i = IndexSetStart; i < IndexSetSize; i += IndexSetStride) {
    FreeList* fl    = &_indexedFreeList[i];
-    fl->compute_desired(inter_sweep_current, inter_sweep_estimate);
-    fl->set_coalDesired((ssize_t)((double)fl->desired() * CoalSurplusPercent));
+    if (PrintFLSStatistics > 1) {
+      gclog_or_tty->print("size[%d] : ", i);
+    }
+    fl->compute_desired(inter_sweep_current, inter_sweep_estimate, intra_sweep_estimate);
+    fl->set_coalDesired((ssize_t)((double)fl->desired() * CMSSmallCoalSurplusPercent));
    fl->set_beforeSweep(fl->count());
    fl->set_bfrSurp(fl->surplus());
  }
-  _dictionary->beginSweepDictCensus(CoalSurplusPercent,
+  _dictionary->beginSweepDictCensus(CMSLargeCoalSurplusPercent,
                                    inter_sweep_current,
-                                    inter_sweep_estimate);
+                                    inter_sweep_estimate,
+                                    intra_sweep_estimate);
 }

 void CompactibleFreeListSpace::setFLSurplus() {
@ -2017,7 +2142,7 @@ void CompactibleFreeListSpace::setFLSurplus() {
  for (i = IndexSetStart; i < IndexSetSize; i += IndexSetStride) {
    FreeList *fl = &_indexedFreeList[i];
    fl->set_surplus(fl->count() -
-                    (ssize_t)((double)fl->desired() * SplitSurplusPercent));
+                    (ssize_t)((double)fl->desired() * CMSSmallSplitSurplusPercent));
  }
 }

@ -2048,6 +2173,11 @@ void CompactibleFreeListSpace::clearFLCensus() {
 }

 void CompactibleFreeListSpace::endSweepFLCensus(size_t sweep_count) {
+  if (PrintFLSStatistics > 0) {
+    HeapWord* largestAddr = (HeapWord*) dictionary()->findLargestDict();
+    gclog_or_tty->print_cr("CMS: Large block " PTR_FORMAT,
+                           largestAddr);
+  }
  setFLSurplus();
  setFLHints();
  if (PrintGC && PrintFLSCensus > 0) {
@ -2055,7 +2185,7 @@ void CompactibleFreeListSpace::endSweepFLCensus(size_t sweep_count) {
  }
  clearFLCensus();
  assert_locked();
-  _dictionary->endSweepDictCensus(SplitSurplusPercent);
+  _dictionary->endSweepDictCensus(CMSLargeSplitSurplusPercent);
 }

 bool CompactibleFreeListSpace::coalOverPopulated(size_t size) {
@ -2312,13 +2442,18 @@ void CompactibleFreeListSpace::verifyIndexedFreeLists() const {
 }

 void CompactibleFreeListSpace::verifyIndexedFreeList(size_t size) const {
-  FreeChunk* fc =  _indexedFreeList[size].head();
+  FreeChunk* fc   =  _indexedFreeList[size].head();
+  FreeChunk* tail =  _indexedFreeList[size].tail();
+  size_t    num = _indexedFreeList[size].count();
+  size_t      n = 0;
  guarantee((size % 2 == 0) || fc == NULL, "Odd slots should be empty");
-  for (; fc != NULL; fc = fc->next()) {
+  for (; fc != NULL; fc = fc->next(), n++) {
    guarantee(fc->size() == size, "Size inconsistency");
    guarantee(fc->isFree(), "!free?");
    guarantee(fc->next() == NULL || fc->next()->prev() == fc, "Broken list");
+    guarantee((fc->next() == NULL) == (fc == tail), "Incorrect tail");
  }
+  guarantee(n == num, "Incorrect count");
 }

 #ifndef PRODUCT
@ -2516,11 +2651,41 @@ void PromotionInfo::startTrackingPromotions() {
  _tracking = true;
 }

-void PromotionInfo::stopTrackingPromotions() {
+#define CMSPrintPromoBlockInfo 1
+
+void PromotionInfo::stopTrackingPromotions(uint worker_id) {
  assert(_spoolHead == _spoolTail && _firstIndex == _nextIndex,
         "spooling inconsistency?");
  _firstIndex = _nextIndex = 1;
  _tracking = false;
+  if (CMSPrintPromoBlockInfo > 1) {
+    print_statistics(worker_id);
+  }
+}
+
+void PromotionInfo::print_statistics(uint worker_id) const {
+  assert(_spoolHead == _spoolTail && _firstIndex == _nextIndex,
+         "Else will undercount");
+  assert(CMSPrintPromoBlockInfo > 0, "Else unnecessary call");
+  // Count the number of blocks and slots in the free pool
+  size_t slots  = 0;
+  size_t blocks = 0;
+  for (SpoolBlock* cur_spool = _spareSpool;
+       cur_spool != NULL;
+       cur_spool = cur_spool->nextSpoolBlock) {
+    // the first entry is just a self-pointer; indices 1 through
+    // bufferSize - 1 are occupied (thus, bufferSize - 1 slots).
+    guarantee((void*)cur_spool->displacedHdr == (void*)&cur_spool->displacedHdr,
+              "first entry of displacedHdr should be self-referential");
+    slots += cur_spool->bufferSize - 1;
+    blocks++;
+  }
+  if (_spoolHead != NULL) {
+    slots += _spoolHead->bufferSize - 1;
+    blocks++;
+  }
+  gclog_or_tty->print_cr(" [worker %d] promo_blocks = %d, promo_slots = %d ",
+                         worker_id, blocks, slots);
 }

 // When _spoolTail is not NULL, then the slot <_spoolTail, _nextIndex>
@ -2584,15 +2749,84 @@ void PromotionInfo::verify() const {
  guarantee(numDisplacedHdrs == numObjsWithDisplacedHdrs, "Displaced hdr count");
 }

+void PromotionInfo::print_on(outputStream* st) const {
+  SpoolBlock* curSpool = NULL;
+  size_t i = 0;
+  st->print_cr("start & end indices: [" SIZE_FORMAT ", " SIZE_FORMAT ")",
+               _firstIndex, _nextIndex);
+  for (curSpool = _spoolHead; curSpool != _spoolTail && curSpool != NULL;
+       curSpool = curSpool->nextSpoolBlock) {
+    curSpool->print_on(st);
+    st->print_cr(" active ");
+    i++;
+  }
+  for (curSpool = _spoolTail; curSpool != NULL;
+       curSpool = curSpool->nextSpoolBlock) {
+    curSpool->print_on(st);
+    st->print_cr(" inactive ");
+    i++;
+  }
+  for (curSpool = _spareSpool; curSpool != NULL;
+       curSpool = curSpool->nextSpoolBlock) {
+    curSpool->print_on(st);
+    st->print_cr(" free ");
+    i++;
+  }
+  st->print_cr(SIZE_FORMAT " header spooling blocks", i);
+}
+
+void SpoolBlock::print_on(outputStream* st) const {
+  st->print("[" PTR_FORMAT "," PTR_FORMAT "), " SIZE_FORMAT " HeapWords -> " PTR_FORMAT,
+            this, (HeapWord*)displacedHdr + bufferSize,
+            bufferSize, nextSpoolBlock);
+}
+
+///////////////////////////////////////////////////////////////////////////
+// CFLS_LAB
+///////////////////////////////////////////////////////////////////////////
+
+#define VECTOR_257(x)                                                                                  \
+  /* 1  2  3  4  5  6  7  8  9 1x 11 12 13 14 15 16 17 18 19 2x 21 22 23 24 25 26 27 28 29 3x 31 32 */ \
+  {  x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x,   \
+     x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x,   \
+     x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x,   \
+     x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x,   \
+     x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x,   \
+     x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x,   \
+     x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x,   \
+     x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x,   \
+     x }
+
+// Initialize with default setting of CMSParPromoteBlocksToClaim, _not_
+// OldPLABSize, whose static default is different; if overridden at the
+// command-line, this will get reinitialized via a call to
+// modify_initialization() below.
+AdaptiveWeightedAverage CFLS_LAB::_blocks_to_claim[]    =
+  VECTOR_257(AdaptiveWeightedAverage(OldPLABWeight, (float)CMSParPromoteBlocksToClaim));
+size_t CFLS_LAB::_global_num_blocks[]  = VECTOR_257(0);
+int    CFLS_LAB::_global_num_workers[] = VECTOR_257(0);

 CFLS_LAB::CFLS_LAB(CompactibleFreeListSpace* cfls) :
  _cfls(cfls)
 {
-  _blocks_to_claim = CMSParPromoteBlocksToClaim;
+  assert(CompactibleFreeListSpace::IndexSetSize == 257, "Modify VECTOR_257() macro above");
  for (size_t i = CompactibleFreeListSpace::IndexSetStart;
       i < CompactibleFreeListSpace::IndexSetSize;
       i += CompactibleFreeListSpace::IndexSetStride) {
    _indexedFreeList[i].set_size(i);
+    _num_blocks[i] = 0;
+  }
+}
+
+static bool _CFLS_LAB_modified = false;
+
+void CFLS_LAB::modify_initialization(size_t n, unsigned wt) {
+  assert(!_CFLS_LAB_modified, "Call only once");
+  _CFLS_LAB_modified = true;
+  for (size_t i = CompactibleFreeListSpace::IndexSetStart;
+       i < CompactibleFreeListSpace::IndexSetSize;
+       i += CompactibleFreeListSpace::IndexSetStride) {
+    _blocks_to_claim[i].modify(n, wt, true /* force */);
  }
 }

@ -2607,11 +2841,9 @@ HeapWord* CFLS_LAB::alloc(size_t word_sz) {
    if (res == NULL) return NULL;
  } else {
    FreeList* fl = &_indexedFreeList[word_sz];
-    bool filled = false; //TRAP
    if (fl->count() == 0) {
-      bool filled = true; //TRAP
      // Attempt to refill this local free list.
-      _cfls->par_get_chunk_of_blocks(word_sz, _blocks_to_claim, fl);
+      get_from_global_pool(word_sz, fl);
      // If it didn't work, give up.
      if (fl->count() == 0) return NULL;
    }
@ -2626,80 +2858,190 @@ HeapWord* CFLS_LAB::alloc(size_t word_sz) {
  return (HeapWord*)res;
 }

-void CFLS_LAB::retire() {
-  for (size_t i = CompactibleFreeListSpace::IndexSetStart;
+// Get a chunk of blocks of the right size and update related
+// book-keeping stats
+void CFLS_LAB::get_from_global_pool(size_t word_sz, FreeList* fl) {
+  // Get the #blocks we want to claim
+  size_t n_blks = (size_t)_blocks_to_claim[word_sz].average();
+  assert(n_blks > 0, "Error");
+  assert(ResizePLAB || n_blks == OldPLABSize, "Error");
+  // In some cases, when the application has a phase change,
+  // there may be a sudden and sharp shift in the object survival
+  // profile, and updating the counts at the end of a scavenge
+  // may not be quick enough, giving rise to large scavenge pauses
+  // during these phase changes. It is beneficial to detect such
+  // changes on-the-fly during a scavenge and avoid such a phase-change
+  // pothole. The following code is a heuristic attempt to do that.
+  // It is protected by a product flag until we have gained
+  // enough experience with this heuristic and fine-tuned its behaviour.
+  // WARNING: This might increase fragmentation if we overreact to
+  // small spikes, so some kind of historical smoothing based on
+  // previous experience with the greater reactivity might be useful.
+  // Lacking sufficient experience, CMSOldPLABResizeQuicker is disabled by
+  // default.
+  if (ResizeOldPLAB && CMSOldPLABResizeQuicker) {
+    size_t multiple = _num_blocks[word_sz]/(CMSOldPLABToleranceFactor*CMSOldPLABNumRefills*n_blks);
+    n_blks +=  CMSOldPLABReactivityFactor*multiple*n_blks;
+    n_blks = MIN2(n_blks, CMSOldPLABMax);
+  }
+  assert(n_blks > 0, "Error");
+  _cfls->par_get_chunk_of_blocks(word_sz, n_blks, fl);
+  // Update stats table entry for this block size
+  _num_blocks[word_sz] += fl->count();
+}
+
+void CFLS_LAB::compute_desired_plab_size() {
+  for (size_t i =  CompactibleFreeListSpace::IndexSetStart;
       i < CompactibleFreeListSpace::IndexSetSize;
       i += CompactibleFreeListSpace::IndexSetStride) {
-    if (_indexedFreeList[i].count() > 0) {
-      MutexLockerEx x(_cfls->_indexedFreeListParLocks[i],
-                      Mutex::_no_safepoint_check_flag);
-      _cfls->_indexedFreeList[i].prepend(&_indexedFreeList[i]);
-      // Reset this list.
-      _indexedFreeList[i] = FreeList();
-      _indexedFreeList[i].set_size(i);
+    assert((_global_num_workers[i] == 0) == (_global_num_blocks[i] == 0),
+           "Counter inconsistency");
+    if (_global_num_workers[i] > 0) {
+      // Need to smooth wrt historical average
+      if (ResizeOldPLAB) {
+        _blocks_to_claim[i].sample(
+          MAX2((size_t)CMSOldPLABMin,
+          MIN2((size_t)CMSOldPLABMax,
+               _global_num_blocks[i]/(_global_num_workers[i]*CMSOldPLABNumRefills))));
+      }
+      // Reset counters for next round
+      _global_num_workers[i] = 0;
+      _global_num_blocks[i] = 0;
+      if (PrintOldPLAB) {
+        gclog_or_tty->print_cr("[%d]: %d", i, (size_t)_blocks_to_claim[i].average());
+      }
    }
  }
 }

-void
-CompactibleFreeListSpace::
-par_get_chunk_of_blocks(size_t word_sz, size_t n, FreeList* fl) {
+void CFLS_LAB::retire(int tid) {
+  // We run this single threaded with the world stopped;
+  // so no need for locks and such.
+#define CFLS_LAB_PARALLEL_ACCESS 0
+  NOT_PRODUCT(Thread* t = Thread::current();)
+  assert(Thread::current()->is_VM_thread(), "Error");
+  assert(CompactibleFreeListSpace::IndexSetStart == CompactibleFreeListSpace::IndexSetStride,
+         "Will access to uninitialized slot below");
+#if CFLS_LAB_PARALLEL_ACCESS
+  for (size_t i = CompactibleFreeListSpace::IndexSetSize - 1;
+       i > 0;
+       i -= CompactibleFreeListSpace::IndexSetStride) {
+#else // CFLS_LAB_PARALLEL_ACCESS
+  for (size_t i =  CompactibleFreeListSpace::IndexSetStart;
+       i < CompactibleFreeListSpace::IndexSetSize;
+       i += CompactibleFreeListSpace::IndexSetStride) {
+#endif // !CFLS_LAB_PARALLEL_ACCESS
+    assert(_num_blocks[i] >= (size_t)_indexedFreeList[i].count(),
+           "Can't retire more than what we obtained");
+    if (_num_blocks[i] > 0) {
+      size_t num_retire =  _indexedFreeList[i].count();
+      assert(_num_blocks[i] > num_retire, "Should have used at least one");
+      {
+#if CFLS_LAB_PARALLEL_ACCESS
+        MutexLockerEx x(_cfls->_indexedFreeListParLocks[i],
+                        Mutex::_no_safepoint_check_flag);
+#endif // CFLS_LAB_PARALLEL_ACCESS
+        // Update globals stats for num_blocks used
+        _global_num_blocks[i] += (_num_blocks[i] - num_retire);
+        _global_num_workers[i]++;
+        assert(_global_num_workers[i] <= (ssize_t)ParallelGCThreads, "Too big");
+        if (num_retire > 0) {
+          _cfls->_indexedFreeList[i].prepend(&_indexedFreeList[i]);
+          // Reset this list.
+          _indexedFreeList[i] = FreeList();
+          _indexedFreeList[i].set_size(i);
+        }
+      }
+      if (PrintOldPLAB) {
+        gclog_or_tty->print_cr("%d[%d]: %d/%d/%d",
+                               tid, i, num_retire, _num_blocks[i], (size_t)_blocks_to_claim[i].average());
+      }
+      // Reset stats for next round
+      _num_blocks[i]         = 0;
+    }
+  }
+}
+
+void CompactibleFreeListSpace:: par_get_chunk_of_blocks(size_t word_sz, size_t n, FreeList* fl) {
  assert(fl->count() == 0, "Precondition.");
  assert(word_sz < CompactibleFreeListSpace::IndexSetSize,
         "Precondition");

-  // We'll try all multiples of word_sz in the indexed set (starting with
-  // word_sz itself), then try getting a big chunk and splitting it.
-  int k = 1;
-  size_t cur_sz = k * word_sz;
-  bool found = false;
-  while (cur_sz < CompactibleFreeListSpace::IndexSetSize && k == 1) {
-    FreeList* gfl = &_indexedFreeList[cur_sz];
-    FreeList fl_for_cur_sz;  // Empty.
-    fl_for_cur_sz.set_size(cur_sz);
-    {
-      MutexLockerEx x(_indexedFreeListParLocks[cur_sz],
-                      Mutex::_no_safepoint_check_flag);
-      if (gfl->count() != 0) {
-        size_t nn = MAX2(n/k, (size_t)1);
-        gfl->getFirstNChunksFromList(nn, &fl_for_cur_sz);
-        found = true;
-      }
-    }
-    // Now transfer fl_for_cur_sz to fl.  Common case, we hope, is k = 1.
-    if (found) {
-      if (k == 1) {
-        fl->prepend(&fl_for_cur_sz);
-      } else {
-        // Divide each block on fl_for_cur_sz up k ways.
-        FreeChunk* fc;
-        while ((fc = fl_for_cur_sz.getChunkAtHead()) != NULL) {
-          // Must do this in reverse order, so that anybody attempting to
-          // access the main chunk sees it as a single free block until we
-          // change it.
-          size_t fc_size = fc->size();
-          for (int i = k-1; i >= 0; i--) {
-            FreeChunk* ffc = (FreeChunk*)((HeapWord*)fc + i * word_sz);
-            ffc->setSize(word_sz);
-            ffc->linkNext(NULL);
-            ffc->linkPrev(NULL); // Mark as a free block for other (parallel) GC threads.
-            // Above must occur before BOT is updated below.
-            // splitting from the right, fc_size == (k - i + 1) * wordsize
-            _bt.mark_block((HeapWord*)ffc, word_sz);
-            fc_size -= word_sz;
-            _bt.verify_not_unallocated((HeapWord*)ffc, ffc->size());
-            _bt.verify_single_block((HeapWord*)fc, fc_size);
-            _bt.verify_single_block((HeapWord*)ffc, ffc->size());
-            // Push this on "fl".
-            fl->returnChunkAtHead(ffc);
+  // We'll try all multiples of word_sz in the indexed set, starting with
+  // word_sz itself and, if CMSSplitIndexedFreeListBlocks, try larger multiples,
+  // then try getting a big chunk and splitting it.
+  {
+    bool found;
+    int  k;
+    size_t cur_sz;
+    for (k = 1, cur_sz = k * word_sz, found = false;
+         (cur_sz < CompactibleFreeListSpace::IndexSetSize) &&
+         (CMSSplitIndexedFreeListBlocks || k <= 1);
+         k++, cur_sz = k * word_sz) {
+      FreeList* gfl = &_indexedFreeList[cur_sz];
+      FreeList fl_for_cur_sz;  // Empty.
+      fl_for_cur_sz.set_size(cur_sz);
+      {
+        MutexLockerEx x(_indexedFreeListParLocks[cur_sz],
+                        Mutex::_no_safepoint_check_flag);
+        if (gfl->count() != 0) {
+          // nn is the number of chunks of size cur_sz that
+          // we'd need to split k-ways each, in order to create
+          // "n" chunks of size word_sz each.
+          const size_t nn = MAX2(n/k, (size_t)1);
+          gfl->getFirstNChunksFromList(nn, &fl_for_cur_sz);
+          found = true;
+          if (k > 1) {
+            // Update split death stats for the cur_sz-size blocks list:
+            // we increment the split death count by the number of blocks
+            // we just took from the cur_sz-size blocks list and which
+            // we will be splitting below.
+            ssize_t deaths = _indexedFreeList[cur_sz].splitDeaths() +
+                             fl_for_cur_sz.count();
+            _indexedFreeList[cur_sz].set_splitDeaths(deaths);
          }
-          // TRAP
-          assert(fl->tail()->next() == NULL, "List invariant.");
        }
      }
-      return;
+      // Now transfer fl_for_cur_sz to fl.  Common case, we hope, is k = 1.
+      if (found) {
+        if (k == 1) {
+          fl->prepend(&fl_for_cur_sz);
+        } else {
+          // Divide each block on fl_for_cur_sz up k ways.
+          FreeChunk* fc;
+          while ((fc = fl_for_cur_sz.getChunkAtHead()) != NULL) {
+            // Must do this in reverse order, so that anybody attempting to
+            // access the main chunk sees it as a single free block until we
+            // change it.
+            size_t fc_size = fc->size();
+            for (int i = k-1; i >= 0; i--) {
+              FreeChunk* ffc = (FreeChunk*)((HeapWord*)fc + i * word_sz);
+              ffc->setSize(word_sz);
+              ffc->linkNext(NULL);
+              ffc->linkPrev(NULL); // Mark as a free block for other (parallel) GC threads.
+              // Above must occur before BOT is updated below.
+              // splitting from the right, fc_size == (k - i + 1) * wordsize
+              _bt.mark_block((HeapWord*)ffc, word_sz);
+              fc_size -= word_sz;
+              _bt.verify_not_unallocated((HeapWord*)ffc, ffc->size());
+              _bt.verify_single_block((HeapWord*)fc, fc_size);
+              _bt.verify_single_block((HeapWord*)ffc, ffc->size());
+              // Push this on "fl".
+              fl->returnChunkAtHead(ffc);
+            }
+            // TRAP
+            assert(fl->tail()->next() == NULL, "List invariant.");
+          }
+        }
+        // Update birth stats for this block size.
+        size_t num = fl->count();
+        MutexLockerEx x(_indexedFreeListParLocks[word_sz],
+                        Mutex::_no_safepoint_check_flag);
+        ssize_t births = _indexedFreeList[word_sz].splitBirths() + num;
+        _indexedFreeList[word_sz].set_splitBirths(births);
+        return;
+      }
    }
-    k++; cur_sz = k * word_sz;
  }
  // Otherwise, we'll split a block from the dictionary.
  FreeChunk* fc = NULL;
@ -2723,17 +3065,20 @@ par_get_chunk_of_blocks(size_t word_sz, size_t n, FreeList* fl) {
      }
    }
    if (fc == NULL) return;
+    assert((ssize_t)n >= 1, "Control point invariant");
    // Otherwise, split up that block.
-    size_t nn = fc->size() / word_sz;
+    const size_t nn = fc->size() / word_sz;
    n = MIN2(nn, n);
+    assert((ssize_t)n >= 1, "Control point invariant");
    rem = fc->size() - n * word_sz;
    // If there is a remainder, and it's too small, allocate one fewer.
    if (rem > 0 && rem < MinChunkSize) {
      n--; rem += word_sz;
    }
+    assert((ssize_t)n >= 1, "Control point invariant");
    // First return the remainder, if any.
    // Note that we hold the lock until we decide if we're going to give
-    // back the remainder to the dictionary, since a contending allocator
+    // back the remainder to the dictionary, since a concurrent allocation
    // may otherwise see the heap as empty.  (We're willing to take that
    // hit if the block is a small block.)
    if (rem > 0) {
@ -2743,18 +3088,16 @@ par_get_chunk_of_blocks(size_t word_sz, size_t n, FreeList* fl) {
      rem_fc->linkNext(NULL);
      rem_fc->linkPrev(NULL); // Mark as a free block for other (parallel) GC threads.
      // Above must occur before BOT is updated below.
+      assert((ssize_t)n > 0 && prefix_size > 0 && rem_fc > fc, "Error");
      _bt.split_block((HeapWord*)fc, fc->size(), prefix_size);
      if (rem >= IndexSetSize) {
        returnChunkToDictionary(rem_fc);
-        dictionary()->dictCensusUpdate(fc->size(),
-                                       true /*split*/,
-                                       true /*birth*/);
+        dictionary()->dictCensusUpdate(rem, true /*split*/, true /*birth*/);
        rem_fc = NULL;
      }
      // Otherwise, return it to the small list below.
    }
  }
-  //
  if (rem_fc != NULL) {
    MutexLockerEx x(_indexedFreeListParLocks[rem],
                    Mutex::_no_safepoint_check_flag);
@ -2762,7 +3105,7 @@ par_get_chunk_of_blocks(size_t word_sz, size_t n, FreeList* fl) {
    _indexedFreeList[rem].returnChunkAtHead(rem_fc);
    smallSplitBirth(rem);
  }
-
+  assert((ssize_t)n > 0 && fc != NULL, "Consistency");
  // Now do the splitting up.
  // Must do this in reverse order, so that anybody attempting to
  // access the main chunk sees it as a single free block until we
@ -2792,13 +3135,15 @@ par_get_chunk_of_blocks(size_t word_sz, size_t n, FreeList* fl) {
  _bt.verify_single_block((HeapWord*)fc, fc->size());
  fl->returnChunkAtHead(fc);

+  assert((ssize_t)n > 0 && (ssize_t)n == fl->count(), "Incorrect number of blocks");
  {
+    // Update the stats for this block size.
    MutexLockerEx x(_indexedFreeListParLocks[word_sz],
                    Mutex::_no_safepoint_check_flag);
-    ssize_t new_births = _indexedFreeList[word_sz].splitBirths() + n;
-    _indexedFreeList[word_sz].set_splitBirths(new_births);
-    ssize_t new_surplus = _indexedFreeList[word_sz].surplus() + n;
-    _indexedFreeList[word_sz].set_surplus(new_surplus);
+    const ssize_t births = _indexedFreeList[word_sz].splitBirths() + n;
+    _indexedFreeList[word_sz].set_splitBirths(births);
+    // ssize_t new_surplus = _indexedFreeList[word_sz].surplus() + n;
+    // _indexedFreeList[word_sz].set_surplus(new_surplus);
  }

  // TRAP
--- a/hotspot/src/share/vm/gc_implementation/concurrentMarkSweep/compactibleFreeListSpace.hpp
+++ b/hotspot/src/share/vm/gc_implementation/concurrentMarkSweep/compactibleFreeListSpace.hpp
@ -25,8 +25,6 @@
 // Classes in support of keeping track of promotions into a non-Contiguous
 // space, in this case a CompactibleFreeListSpace.

-#define CFLS_LAB_REFILL_STATS 0
-
 // Forward declarations
 class CompactibleFreeListSpace;
 class BlkClosure;
@ -89,6 +87,9 @@ class SpoolBlock: public FreeChunk {
    displacedHdr = (markOop*)&displacedHdr;
    nextSpoolBlock = NULL;
  }
+
+  void print_on(outputStream* st) const;
+  void print() const { print_on(gclog_or_tty); }
 };

 class PromotionInfo VALUE_OBJ_CLASS_SPEC {
@ -121,7 +122,7 @@ class PromotionInfo VALUE_OBJ_CLASS_SPEC {
    return _promoHead == NULL;
  }
  void startTrackingPromotions();
-  void stopTrackingPromotions();
+  void stopTrackingPromotions(uint worker_id = 0);
  bool tracking() const          { return _tracking;  }
  void track(PromotedObject* trackOop);      // keep track of a promoted oop
  // The following variant must be used when trackOop is not fully
@ -161,6 +162,9 @@ class PromotionInfo VALUE_OBJ_CLASS_SPEC {
    _nextIndex = 0;

  }
+
+  void print_on(outputStream* st) const;
+  void print_statistics(uint worker_id) const;
 };

 class LinearAllocBlock VALUE_OBJ_CLASS_SPEC {
@ -243,6 +247,7 @@ class CompactibleFreeListSpace: public CompactibleSpace {
  mutable Mutex _freelistLock;
  // locking verifier convenience function
  void assert_locked() const PRODUCT_RETURN;
+  void assert_locked(const Mutex* lock) const PRODUCT_RETURN;

  // Linear allocation blocks
  LinearAllocBlock _smallLinearAllocBlock;
@ -281,13 +286,6 @@ class CompactibleFreeListSpace: public CompactibleSpace {
  // Locks protecting the exact lists during par promotion allocation.
  Mutex* _indexedFreeListParLocks[IndexSetSize];

-#if CFLS_LAB_REFILL_STATS
-  // Some statistics.
-  jint  _par_get_chunk_from_small;
-  jint  _par_get_chunk_from_large;
-#endif
-
-
  // Attempt to obtain up to "n" blocks of the size "word_sz" (which is
  // required to be smaller than "IndexSetSize".)  If successful,
  // adds them to "fl", which is required to be an empty free list.
@ -320,7 +318,7 @@ class CompactibleFreeListSpace: public CompactibleSpace {
  // Helper function for getChunkFromIndexedFreeList.
  // Replenish the indexed free list for this "size".  Do not take from an
  // underpopulated size.
-  FreeChunk*  getChunkFromIndexedFreeListHelper(size_t size);
+  FreeChunk*  getChunkFromIndexedFreeListHelper(size_t size, bool replenish = true);

  // Get a chunk from the indexed free list.  If the indexed free list
  // does not have a free chunk, try to replenish the indexed free list
@ -430,10 +428,6 @@ class CompactibleFreeListSpace: public CompactibleSpace {
  void initialize_sequential_subtasks_for_marking(int n_threads,
         HeapWord* low = NULL);

-#if CFLS_LAB_REFILL_STATS
-  void print_par_alloc_stats();
-#endif
-
  // Space enquiries
  size_t used() const;
  size_t free() const;
@ -617,6 +611,12 @@ class CompactibleFreeListSpace: public CompactibleSpace {
  // Do some basic checks on the the free lists.
  void checkFreeListConsistency()         const PRODUCT_RETURN;

+  // Printing support
+  void dump_at_safepoint_with_locks(CMSCollector* c, outputStream* st);
+  void print_indexed_free_lists(outputStream* st) const;
+  void print_dictionary_free_lists(outputStream* st) const;
+  void print_promo_info_blocks(outputStream* st) const;
+
  NOT_PRODUCT (
    void initializeIndexedFreeListArrayReturnedBytes();
    size_t sumIndexedFreeListArrayReturnedBytes();
@ -638,8 +638,9 @@ class CompactibleFreeListSpace: public CompactibleSpace {

  // Statistics functions
  // Initialize census for lists before the sweep.
-  void beginSweepFLCensus(float sweep_current,
-                          float sweep_estimate);
+  void beginSweepFLCensus(float inter_sweep_current,
+                          float inter_sweep_estimate,
+                          float intra_sweep_estimate);
  // Set the surplus for each of the free lists.
  void setFLSurplus();
  // Set the hint for each of the free lists.
@ -730,16 +731,17 @@ class CFLS_LAB : public CHeapObj {
  FreeList _indexedFreeList[CompactibleFreeListSpace::IndexSetSize];

  // Initialized from a command-line arg.
-  size_t _blocks_to_claim;

-#if CFLS_LAB_REFILL_STATS
-  // Some statistics.
-  int _refills;
-  int _blocksTaken;
-  static int _tot_refills;
-  static int _tot_blocksTaken;
-  static int _next_threshold;
-#endif
+  // Allocation statistics in support of dynamic adjustment of
+  // #blocks to claim per get_from_global_pool() call below.
+  static AdaptiveWeightedAverage
+                 _blocks_to_claim  [CompactibleFreeListSpace::IndexSetSize];
+  static size_t _global_num_blocks [CompactibleFreeListSpace::IndexSetSize];
+  static int    _global_num_workers[CompactibleFreeListSpace::IndexSetSize];
+  size_t        _num_blocks        [CompactibleFreeListSpace::IndexSetSize];
+
+  // Internal work method
+  void get_from_global_pool(size_t word_sz, FreeList* fl);

 public:
  CFLS_LAB(CompactibleFreeListSpace* cfls);
@ -748,7 +750,12 @@ public:
  HeapWord* alloc(size_t word_sz);

  // Return any unused portions of the buffer to the global pool.
-  void retire();
+  void retire(int tid);
+
+  // Dynamic OldPLABSize sizing
+  static void compute_desired_plab_size();
+  // When the settings are modified from default static initialization
+  static void modify_initialization(size_t n, unsigned wt);
 };

 size_t PromotionInfo::refillSize() const {
--- a/hotspot/src/share/vm/gc_implementation/concurrentMarkSweep/concurrentMarkSweepGeneration.cpp
+++ b/hotspot/src/share/vm/gc_implementation/concurrentMarkSweep/concurrentMarkSweepGeneration.cpp
@ -253,7 +253,6 @@ void ConcurrentMarkSweepGeneration::init_initiating_occupancy(intx io, intx tr)
  }
 }

-
 void ConcurrentMarkSweepGeneration::ref_processor_init() {
  assert(collector() != NULL, "no collector");
  collector()->ref_processor_init();
@ -341,6 +340,14 @@ CMSStats::CMSStats(ConcurrentMarkSweepGeneration* cms_gen, unsigned int alpha):
  _icms_duty_cycle = CMSIncrementalDutyCycle;
 }

+double CMSStats::cms_free_adjustment_factor(size_t free) const {
+  // TBD: CR 6909490
+  return 1.0;
+}
+
+void CMSStats::adjust_cms_free_adjustment_factor(bool fail, size_t free) {
+}
+
 // If promotion failure handling is on use
 // the padded average size of the promotion for each
 // young generation collection.
@ -361,7 +368,11 @@ double CMSStats::time_until_cms_gen_full() const {

    // Adjust by the safety factor.
    double cms_free_dbl = (double)cms_free;
-    cms_free_dbl = cms_free_dbl * (100.0 - CMSIncrementalSafetyFactor) / 100.0;
+    double cms_adjustment = (100.0 - CMSIncrementalSafetyFactor)/100.0;
+    // Apply a further correction factor which tries to adjust
+    // for recent occurance of concurrent mode failures.
+    cms_adjustment = cms_adjustment * cms_free_adjustment_factor(cms_free);
+    cms_free_dbl = cms_free_dbl * cms_adjustment;

    if (PrintGCDetails && Verbose) {
      gclog_or_tty->print_cr("CMSStats::time_until_cms_gen_full: cms_free "
@ -395,6 +406,8 @@ double CMSStats::time_until_cms_start() const {
  // late.
  double work = cms_duration() + gc0_period();
  double deadline = time_until_cms_gen_full();
+  // If a concurrent mode failure occurred recently, we want to be
+  // more conservative and halve our expected time_until_cms_gen_full()
  if (work > deadline) {
    if (Verbose && PrintGCDetails) {
      gclog_or_tty->print(
@ -556,7 +569,8 @@ CMSCollector::CMSCollector(ConcurrentMarkSweepGeneration* cmsGen,
  _should_unload_classes(false),
  _concurrent_cycles_since_last_unload(0),
  _roots_scanning_options(0),
-  _sweep_estimate(CMS_SweepWeight, CMS_SweepPadding)
+  _inter_sweep_estimate(CMS_SweepWeight, CMS_SweepPadding),
+  _intra_sweep_estimate(CMS_SweepWeight, CMS_SweepPadding)
 {
  if (ExplicitGCInvokesConcurrentAndUnloadsClasses) {
    ExplicitGCInvokesConcurrent = true;
@ -773,7 +787,7 @@ CMSCollector::CMSCollector(ConcurrentMarkSweepGeneration* cmsGen,
  NOT_PRODUCT(_overflow_counter = CMSMarkStackOverflowInterval;)
  _gc_counters = new CollectorCounters("CMS", 1);
  _completed_initialization = true;
-  _sweep_timer.start();  // start of time
+  _inter_sweep_timer.start();  // start of time
 }

 const char* ConcurrentMarkSweepGeneration::name() const {
@ -900,6 +914,14 @@ bool ConcurrentMarkSweepGeneration::promotion_attempt_is_safe(
  return result;
 }

+// At a promotion failure dump information on block layout in heap
+// (cms old generation).
+void ConcurrentMarkSweepGeneration::promotion_failure_occurred() {
+  if (CMSDumpAtPromotionFailure) {
+    cmsSpace()->dump_at_safepoint_with_locks(collector(), gclog_or_tty);
+  }
+}
+
 CompactibleSpace*
 ConcurrentMarkSweepGeneration::first_compaction_space() const {
  return _cmsSpace;
@ -1368,12 +1390,7 @@ void
 ConcurrentMarkSweepGeneration::
 par_promote_alloc_done(int thread_num) {
  CMSParGCThreadState* ps = _par_gc_thread_states[thread_num];
-  ps->lab.retire();
-#if CFLS_LAB_REFILL_STATS
-  if (thread_num == 0) {
-    _cmsSpace->print_par_alloc_stats();
-  }
-#endif
+  ps->lab.retire(thread_num);
 }

 void
@ -1974,11 +1991,14 @@ void CMSCollector::do_compaction_work(bool clear_all_soft_refs) {
  // We must adjust the allocation statistics being maintained
  // in the free list space. We do so by reading and clearing
  // the sweep timer and updating the block flux rate estimates below.
-  assert(_sweep_timer.is_active(), "We should never see the timer inactive");
-  _sweep_timer.stop();
-  // Note that we do not use this sample to update the _sweep_estimate.
-  _cmsGen->cmsSpace()->beginSweepFLCensus((float)(_sweep_timer.seconds()),
-                                          _sweep_estimate.padded_average());
+  assert(!_intra_sweep_timer.is_active(), "_intra_sweep_timer should be inactive");
+  if (_inter_sweep_timer.is_active()) {
+    _inter_sweep_timer.stop();
+    // Note that we do not use this sample to update the _inter_sweep_estimate.
+    _cmsGen->cmsSpace()->beginSweepFLCensus((float)(_inter_sweep_timer.seconds()),
+                                            _inter_sweep_estimate.padded_average(),
+                                            _intra_sweep_estimate.padded_average());
+  }

  GenMarkSweep::invoke_at_safepoint(_cmsGen->level(),
    ref_processor(), clear_all_soft_refs);
@ -2015,10 +2035,10 @@ void CMSCollector::do_compaction_work(bool clear_all_soft_refs) {
  }

  // Adjust the per-size allocation stats for the next epoch.
-  _cmsGen->cmsSpace()->endSweepFLCensus(sweepCount() /* fake */);
-  // Restart the "sweep timer" for next epoch.
-  _sweep_timer.reset();
-  _sweep_timer.start();
+  _cmsGen->cmsSpace()->endSweepFLCensus(sweep_count() /* fake */);
+  // Restart the "inter sweep timer" for the next epoch.
+  _inter_sweep_timer.reset();
+  _inter_sweep_timer.start();

  // Sample collection pause time and reset for collection interval.
  if (UseAdaptiveSizePolicy) {
@ -2676,7 +2696,7 @@ void ConcurrentMarkSweepGeneration::gc_epilogue(bool full) {
  // Also reset promotion tracking in par gc thread states.
  if (ParallelGCThreads > 0) {
    for (uint i = 0; i < ParallelGCThreads; i++) {
-      _par_gc_thread_states[i]->promo.stopTrackingPromotions();
+      _par_gc_thread_states[i]->promo.stopTrackingPromotions(i);
    }
  }
 }
@ -2771,7 +2791,7 @@ class VerifyMarkedClosure: public BitMapClosure {
  bool do_bit(size_t offset) {
    HeapWord* addr = _marks->offsetToHeapWord(offset);
    if (!_marks->isMarked(addr)) {
-      oop(addr)->print();
+      oop(addr)->print_on(gclog_or_tty);
      gclog_or_tty->print_cr(" ("INTPTR_FORMAT" should have been marked)", addr);
      _failed = true;
    }
@ -2820,7 +2840,7 @@ bool CMSCollector::verify_after_remark() {
  // Clear any marks from a previous round
  verification_mark_bm()->clear_all();
  assert(verification_mark_stack()->isEmpty(), "markStack should be empty");
-  assert(overflow_list_is_empty(), "overflow list should be empty");
+  verify_work_stacks_empty();

  GenCollectedHeap* gch = GenCollectedHeap::heap();
  gch->ensure_parsability(false);  // fill TLABs, but no need to retire them
@ -2893,8 +2913,8 @@ void CMSCollector::verify_after_remark_work_1() {
  verification_mark_bm()->iterate(&vcl);
  if (vcl.failed()) {
    gclog_or_tty->print("Verification failed");
-    Universe::heap()->print();
-    fatal(" ... aborting");
+    Universe::heap()->print_on(gclog_or_tty);
+    fatal("CMS: failed marking verification after remark");
  }
 }

@ -3314,7 +3334,7 @@ bool ConcurrentMarkSweepGeneration::grow_by(size_t bytes) {
    Universe::heap()->barrier_set()->resize_covered_region(mr);
    // Hmmmm... why doesn't CFLS::set_end verify locking?
    // This is quite ugly; FIX ME XXX
-    _cmsSpace->assert_locked();
+    _cmsSpace->assert_locked(freelistLock());
    _cmsSpace->set_end((HeapWord*)_virtual_space.high());

    // update the space and generation capacity counters
@ -5868,9 +5888,9 @@ void CMSCollector::sweep(bool asynch) {
  check_correct_thread_executing();
  verify_work_stacks_empty();
  verify_overflow_empty();
-  incrementSweepCount();
-  _sweep_timer.stop();
-  _sweep_estimate.sample(_sweep_timer.seconds());
+  increment_sweep_count();
+  _inter_sweep_timer.stop();
+  _inter_sweep_estimate.sample(_inter_sweep_timer.seconds());
  size_policy()->avg_cms_free_at_sweep()->sample(_cmsGen->free());

  // PermGen verification support: If perm gen sweeping is disabled in
@ -5893,6 +5913,9 @@ void CMSCollector::sweep(bool asynch) {
    }
  }

+  assert(!_intra_sweep_timer.is_active(), "Should not be active");
+  _intra_sweep_timer.reset();
+  _intra_sweep_timer.start();
  if (asynch) {
    TraceCPUTime tcpu(PrintGCDetails, true, gclog_or_tty);
    CMSPhaseAccounting pa(this, "sweep", !PrintGCDetails);
@ -5937,8 +5960,11 @@ void CMSCollector::sweep(bool asynch) {
  verify_work_stacks_empty();
  verify_overflow_empty();

-  _sweep_timer.reset();
-  _sweep_timer.start();
+  _intra_sweep_timer.stop();
+  _intra_sweep_estimate.sample(_intra_sweep_timer.seconds());
+
+  _inter_sweep_timer.reset();
+  _inter_sweep_timer.start();

  update_time_of_last_gc(os::javaTimeMillis());

@ -5981,11 +6007,11 @@ void CMSCollector::sweep(bool asynch) {
 // FIX ME!!! Looks like this belongs in CFLSpace, with
 // CMSGen merely delegating to it.
 void ConcurrentMarkSweepGeneration::setNearLargestChunk() {
-  double nearLargestPercent = 0.999;
+  double nearLargestPercent = FLSLargestBlockCoalesceProximity;
  HeapWord*  minAddr        = _cmsSpace->bottom();
  HeapWord*  largestAddr    =
    (HeapWord*) _cmsSpace->dictionary()->findLargestDict();
-  if (largestAddr == 0) {
+  if (largestAddr == NULL) {
    // The dictionary appears to be empty.  In this case
    // try to coalesce at the end of the heap.
    largestAddr = _cmsSpace->end();
@ -5993,6 +6019,13 @@ void ConcurrentMarkSweepGeneration::setNearLargestChunk() {
  size_t largestOffset     = pointer_delta(largestAddr, minAddr);
  size_t nearLargestOffset =
    (size_t)((double)largestOffset * nearLargestPercent) - MinChunkSize;
+  if (PrintFLSStatistics != 0) {
+    gclog_or_tty->print_cr(
+      "CMS: Large Block: " PTR_FORMAT ";"
+      " Proximity: " PTR_FORMAT " -> " PTR_FORMAT,
+      largestAddr,
+      _cmsSpace->nearLargestChunk(), minAddr + nearLargestOffset);
+  }
  _cmsSpace->set_nearLargestChunk(minAddr + nearLargestOffset);
 }

@ -6072,9 +6105,11 @@ void CMSCollector::sweepWork(ConcurrentMarkSweepGeneration* gen,
  assert_lock_strong(gen->freelistLock());
  assert_lock_strong(bitMapLock());

-  assert(!_sweep_timer.is_active(), "Was switched off in an outer context");
-  gen->cmsSpace()->beginSweepFLCensus((float)(_sweep_timer.seconds()),
-                                      _sweep_estimate.padded_average());
+  assert(!_inter_sweep_timer.is_active(), "Was switched off in an outer context");
+  assert(_intra_sweep_timer.is_active(),  "Was switched on  in an outer context");
+  gen->cmsSpace()->beginSweepFLCensus((float)(_inter_sweep_timer.seconds()),
+                                      _inter_sweep_estimate.padded_average(),
+                                      _intra_sweep_estimate.padded_average());
  gen->setNearLargestChunk();

  {
@ -6087,7 +6122,7 @@ void CMSCollector::sweepWork(ConcurrentMarkSweepGeneration* gen,
    // end-of-sweep-census below will be off by a little bit.
  }
  gen->cmsSpace()->sweep_completed();
-  gen->cmsSpace()->endSweepFLCensus(sweepCount());
+  gen->cmsSpace()->endSweepFLCensus(sweep_count());
  if (should_unload_classes()) {                // unloaded classes this cycle,
    _concurrent_cycles_since_last_unload = 0;   // ... reset count
  } else {                                      // did not unload classes,
--- a/hotspot/src/share/vm/gc_implementation/concurrentMarkSweep/concurrentMarkSweepGeneration.hpp
+++ b/hotspot/src/share/vm/gc_implementation/concurrentMarkSweep/concurrentMarkSweepGeneration.hpp
@ -355,6 +355,11 @@ class CMSStats VALUE_OBJ_CLASS_SPEC {
                                             unsigned int new_duty_cycle);
  unsigned int icms_update_duty_cycle_impl();

+  // In support of adjusting of cms trigger ratios based on history
+  // of concurrent mode failure.
+  double cms_free_adjustment_factor(size_t free) const;
+  void   adjust_cms_free_adjustment_factor(bool fail, size_t free);
+
 public:
  CMSStats(ConcurrentMarkSweepGeneration* cms_gen,
           unsigned int alpha = CMSExpAvgFactor);
@ -570,8 +575,11 @@ class CMSCollector: public CHeapObj {
  // appropriately.
  void check_gc_time_limit();
  // XXX Move these to CMSStats ??? FIX ME !!!
-  elapsedTimer _sweep_timer;
-  AdaptivePaddedAverage _sweep_estimate;
+  elapsedTimer _inter_sweep_timer;   // time between sweeps
+  elapsedTimer _intra_sweep_timer;   // time _in_ sweeps
+  // padded decaying average estimates of the above
+  AdaptivePaddedAverage _inter_sweep_estimate;
+  AdaptivePaddedAverage _intra_sweep_estimate;

 protected:
  ConcurrentMarkSweepGeneration* _cmsGen;  // old gen (CMS)
@ -625,6 +633,7 @@ class CMSCollector: public CHeapObj {
  // . _collectorState <= Idling ==  post-sweep && pre-mark
  // . _collectorState in (Idling, Sweeping) == {initial,final}marking ||
  //                                            precleaning || abortablePrecleanb
+ public:
  enum CollectorState {
    Resizing            = 0,
    Resetting           = 1,
@ -636,6 +645,7 @@ class CMSCollector: public CHeapObj {
    FinalMarking        = 7,
    Sweeping            = 8
  };
+ protected:
  static CollectorState _collectorState;

  // State related to prologue/epilogue invocation for my generations
@ -655,7 +665,7 @@ class CMSCollector: public CHeapObj {

  int    _numYields;
  size_t _numDirtyCards;
-  uint   _sweepCount;
+  size_t _sweep_count;
  // number of full gc's since the last concurrent gc.
  uint   _full_gcs_since_conc_gc;

@ -905,7 +915,7 @@ class CMSCollector: public CHeapObj {

  // Check that the currently executing thread is the expected
  // one (foreground collector or background collector).
-  void check_correct_thread_executing()        PRODUCT_RETURN;
+  static void check_correct_thread_executing() PRODUCT_RETURN;
  // XXXPERM void print_statistics()           PRODUCT_RETURN;

  bool is_cms_reachable(HeapWord* addr);
@ -930,8 +940,8 @@ class CMSCollector: public CHeapObj {
  static void set_foregroundGCShouldWait(bool v) { _foregroundGCShouldWait = v; }
  static bool foregroundGCIsActive() { return _foregroundGCIsActive; }
  static void set_foregroundGCIsActive(bool v) { _foregroundGCIsActive = v; }
-  uint  sweepCount() const             { return _sweepCount; }
-  void incrementSweepCount()           { _sweepCount++; }
+  size_t sweep_count() const             { return _sweep_count; }
+  void   increment_sweep_count()         { _sweep_count++; }

  // Timers/stats for gc scheduling and incremental mode pacing.
  CMSStats& stats() { return _stats; }
@ -1165,6 +1175,11 @@ class ConcurrentMarkSweepGeneration: public CardGeneration {
  virtual bool promotion_attempt_is_safe(size_t promotion_in_bytes,
    bool younger_handles_promotion_failure) const;

+  // Inform this (non-young) generation that a promotion failure was
+  // encountered during a collection of a younger generation that
+  // promotes into this generation.
+  virtual void promotion_failure_occurred();
+
  bool should_collect(bool full, size_t size, bool tlab);
  virtual bool should_concurrent_collect() const;
  virtual bool is_too_full() const;
--- a/hotspot/src/share/vm/gc_implementation/concurrentMarkSweep/freeBlockDictionary.hpp
+++ b/hotspot/src/share/vm/gc_implementation/concurrentMarkSweep/freeBlockDictionary.hpp
@ -55,7 +55,8 @@ class FreeBlockDictionary: public CHeapObj {
  virtual void       dictCensusUpdate(size_t size, bool split, bool birth) = 0;
  virtual bool       coalDictOverPopulated(size_t size) = 0;
  virtual void       beginSweepDictCensus(double coalSurplusPercent,
-                       float sweep_current, float sweep_ewstimate) = 0;
+                       float inter_sweep_current, float inter_sweep_estimate,
+                       float intra__sweep_current) = 0;
  virtual void       endSweepDictCensus(double splitSurplusPercent) = 0;
  virtual FreeChunk* findLargestDict() const = 0;
  // verify that the given chunk is in the dictionary.
@ -79,6 +80,7 @@ class FreeBlockDictionary: public CHeapObj {
  }

  virtual void       printDictCensus() const = 0;
+  virtual void       print_free_lists(outputStream* st) const = 0;

  virtual void       verify()         const = 0;

--- a/hotspot/src/share/vm/gc_implementation/concurrentMarkSweep/freeChunk.cpp
+++ b/hotspot/src/share/vm/gc_implementation/concurrentMarkSweep/freeChunk.cpp
@ -67,3 +67,8 @@ void FreeChunk::verifyList() const {
  }
 }
 #endif
+
+void FreeChunk::print_on(outputStream* st) {
+  st->print_cr("Next: " PTR_FORMAT " Prev: " PTR_FORMAT " %s",
+    next(), prev(), cantCoalesce() ? "[can't coalesce]" : "");
+}
--- a/hotspot/src/share/vm/gc_implementation/concurrentMarkSweep/freeChunk.hpp
+++ b/hotspot/src/share/vm/gc_implementation/concurrentMarkSweep/freeChunk.hpp
@ -129,6 +129,8 @@ class FreeChunk VALUE_OBJ_CLASS_SPEC {
  void verifyList()         const PRODUCT_RETURN;
  void mangleAllocated(size_t size) PRODUCT_RETURN;
  void mangleFreed(size_t size)     PRODUCT_RETURN;
+
+  void print_on(outputStream* st);
 };

 // Alignment helpers etc.
--- a/hotspot/src/share/vm/gc_implementation/concurrentMarkSweep/freeList.cpp
+++ b/hotspot/src/share/vm/gc_implementation/concurrentMarkSweep/freeList.cpp
@ -81,8 +81,8 @@ void FreeList::reset(size_t hint) {
  set_hint(hint);
 }

-void FreeList::init_statistics() {
-  _allocation_stats.initialize();
+void FreeList::init_statistics(bool split_birth) {
+  _allocation_stats.initialize(split_birth);
 }

 FreeChunk* FreeList::getChunkAtHead() {
@ -292,14 +292,31 @@ bool FreeList::verifyChunkInFreeLists(FreeChunk* fc) const {
 }

 #ifndef PRODUCT
+void FreeList::verify_stats() const {
+  // The +1 of the LH comparand is to allow some "looseness" in
+  // checking: we usually call this interface when adding a block
+  // and we'll subsequently update the stats; we cannot update the
+  // stats beforehand because in the case of the large-block BT
+  // dictionary for example, this might be the first block and
+  // in that case there would be no place that we could record
+  // the stats (which are kept in the block itself).
+  assert(_allocation_stats.prevSweep() + _allocation_stats.splitBirths() + 1   // Total Stock + 1
+          >= _allocation_stats.splitDeaths() + (ssize_t)count(), "Conservation Principle");
+}
+
 void FreeList::assert_proper_lock_protection_work() const {
-#ifdef ASSERT
-  if (_protecting_lock != NULL &&
-      SharedHeap::heap()->n_par_threads() > 0) {
-    // Should become an assert.
-    guarantee(_protecting_lock->owned_by_self(), "FreeList RACE DETECTED");
+  assert(_protecting_lock != NULL, "Don't call this directly");
+  assert(ParallelGCThreads > 0, "Don't call this directly");
+  Thread* thr = Thread::current();
+  if (thr->is_VM_thread() || thr->is_ConcurrentGC_thread()) {
+    // assert that we are holding the freelist lock
+  } else if (thr->is_GC_task_thread()) {
+    assert(_protecting_lock->owned_by_self(), "FreeList RACE DETECTED");
+  } else if (thr->is_Java_thread()) {
+    assert(!SafepointSynchronize::is_at_safepoint(), "Should not be executing");
+  } else {
+    ShouldNotReachHere();  // unaccounted thread type?
  }
-#endif
 }
 #endif

--- a/hotspot/src/share/vm/gc_implementation/concurrentMarkSweep/freeList.hpp
+++ b/hotspot/src/share/vm/gc_implementation/concurrentMarkSweep/freeList.hpp
@ -35,18 +35,26 @@ class CompactibleFreeListSpace;
 // for that implementation.

 class Mutex;
+class TreeList;

 class FreeList VALUE_OBJ_CLASS_SPEC {
  friend class CompactibleFreeListSpace;
  friend class VMStructs;
-  friend class printTreeCensusClosure;
-  FreeChunk*    _head;          // List of free chunks
+  friend class PrintTreeCensusClosure;
+
+ protected:
+  TreeList* _parent;
+  TreeList* _left;
+  TreeList* _right;
+
+ private:
+  FreeChunk*    _head;          // Head of list of free chunks
  FreeChunk*    _tail;          // Tail of list of free chunks
-  size_t        _size;          // Size in Heap words of each chunks
+  size_t        _size;          // Size in Heap words of each chunk
  ssize_t       _count;         // Number of entries in list
  size_t        _hint;          // next larger size list with a positive surplus

-  AllocationStats _allocation_stats;            // statistics for smart allocation
+  AllocationStats _allocation_stats; // allocation-related statistics

 #ifdef ASSERT
  Mutex*        _protecting_lock;
@ -63,9 +71,12 @@ class FreeList VALUE_OBJ_CLASS_SPEC {

  // Initialize the allocation statistics.
 protected:
-  void init_statistics();
+  void init_statistics(bool split_birth = false);
  void set_count(ssize_t v) { _count = v;}
-  void increment_count()    { _count++; }
+  void increment_count()    {
+    _count++;
+  }
+
  void decrement_count() {
    _count--;
    assert(_count >= 0, "Count should not be negative");
@ -167,11 +178,13 @@ class FreeList VALUE_OBJ_CLASS_SPEC {
    _allocation_stats.set_desired(v);
  }
  void compute_desired(float inter_sweep_current,
-                       float inter_sweep_estimate) {
+                       float inter_sweep_estimate,
+                       float intra_sweep_estimate) {
    assert_proper_lock_protection();
    _allocation_stats.compute_desired(_count,
                                      inter_sweep_current,
-                                      inter_sweep_estimate);
+                                      inter_sweep_estimate,
+                                      intra_sweep_estimate);
  }
  ssize_t coalDesired() const {
    return _allocation_stats.coalDesired();
@ -306,6 +319,9 @@ class FreeList VALUE_OBJ_CLASS_SPEC {
  // found.  Return NULL if "fc" is not found.
  bool verifyChunkInFreeLists(FreeChunk* fc) const;

+  // Stats verification
+  void verify_stats() const PRODUCT_RETURN;
+
  // Printing support
  static void print_labels_on(outputStream* st, const char* c);
  void print_on(outputStream* st, const char* c = NULL) const;
--- a/hotspot/src/share/vm/gc_implementation/includeDB_gc_concurrentMarkSweep
+++ b/hotspot/src/share/vm/gc_implementation/includeDB_gc_concurrentMarkSweep
@ -221,6 +221,7 @@ freeList.cpp                            freeList.hpp
 freeList.cpp                            globals.hpp
 freeList.cpp                            mutex.hpp
 freeList.cpp                            sharedHeap.hpp
+freeList.cpp                            vmThread.hpp

 freeList.hpp                            allocationStats.hpp

--- a/hotspot/src/share/vm/gc_implementation/includeDB_gc_serial
+++ b/hotspot/src/share/vm/gc_implementation/includeDB_gc_serial
@ -71,6 +71,7 @@ gcUtil.cpp                              gcUtil.hpp
 gcUtil.hpp                              allocation.hpp
 gcUtil.hpp                              debug.hpp
 gcUtil.hpp                              globalDefinitions.hpp
+gcUtil.hpp                              ostream.hpp
 gcUtil.hpp				timer.hpp

 generationCounters.cpp                  generationCounters.hpp
--- a/hotspot/src/share/vm/gc_implementation/parNew/parNewGeneration.cpp
+++ b/hotspot/src/share/vm/gc_implementation/parNew/parNewGeneration.cpp
@ -50,6 +50,7 @@ ParScanThreadState::ParScanThreadState(Space* to_space_,
                      work_queue_set_, &term_),
  _is_alive_closure(gen_), _scan_weak_ref_closure(gen_, this),
  _keep_alive_closure(&_scan_weak_ref_closure),
+  _promotion_failure_size(0),
  _pushes(0), _pops(0), _steals(0), _steal_attempts(0), _term_attempts(0),
  _strong_roots_time(0.0), _term_time(0.0)
 {
@ -249,6 +250,16 @@ void ParScanThreadState::undo_alloc_in_to_space(HeapWord* obj,
  }
 }

+void ParScanThreadState::print_and_clear_promotion_failure_size() {
+  if (_promotion_failure_size != 0) {
+    if (PrintPromotionFailure) {
+      gclog_or_tty->print(" (%d: promotion failure size = " SIZE_FORMAT ") ",
+        _thread_num, _promotion_failure_size);
+    }
+    _promotion_failure_size = 0;
+  }
+}
+
 class ParScanThreadStateSet: private ResourceArray {
 public:
  // Initializes states for the specified number of threads;
@ -260,11 +271,11 @@ public:
                        GrowableArray<oop>**    overflow_stacks_,
                        size_t                  desired_plab_sz,
                        ParallelTaskTerminator& term);
-  inline ParScanThreadState& thread_sate(int i);
+  inline ParScanThreadState& thread_state(int i);
  int pushes() { return _pushes; }
  int pops()   { return _pops; }
  int steals() { return _steals; }
-  void reset();
+  void reset(bool promotion_failed);
  void flush();
 private:
  ParallelTaskTerminator& _term;
@ -295,22 +306,31 @@ ParScanThreadStateSet::ParScanThreadStateSet(
  }
 }

-inline ParScanThreadState& ParScanThreadStateSet::thread_sate(int i)
+inline ParScanThreadState& ParScanThreadStateSet::thread_state(int i)
 {
  assert(i >= 0 && i < length(), "sanity check!");
  return ((ParScanThreadState*)_data)[i];
 }


-void ParScanThreadStateSet::reset()
+void ParScanThreadStateSet::reset(bool promotion_failed)
 {
  _term.reset_for_reuse();
+  if (promotion_failed) {
+    for (int i = 0; i < length(); ++i) {
+      thread_state(i).print_and_clear_promotion_failure_size();
+    }
+  }
 }

 void ParScanThreadStateSet::flush()
 {
+  // Work in this loop should be kept as lightweight as
+  // possible since this might otherwise become a bottleneck
+  // to scaling. Should we add heavy-weight work into this
+  // loop, consider parallelizing the loop into the worker threads.
  for (int i = 0; i < length(); ++i) {
-    ParScanThreadState& par_scan_state = thread_sate(i);
+    ParScanThreadState& par_scan_state = thread_state(i);

    // Flush stats related to To-space PLAB activity and
    // retire the last buffer.
@ -362,6 +382,14 @@ void ParScanThreadStateSet::flush()
      }
    }
  }
+  if (UseConcMarkSweepGC && ParallelGCThreads > 0) {
+    // We need to call this even when ResizeOldPLAB is disabled
+    // so as to avoid breaking some asserts. While we may be able
+    // to avoid this by reorganizing the code a bit, I am loathe
+    // to do that unless we find cases where ergo leads to bad
+    // performance.
+    CFLS_LAB::compute_desired_plab_size();
+  }
 }

 ParScanClosure::ParScanClosure(ParNewGeneration* g,
@ -475,7 +503,7 @@ void ParNewGenTask::work(int i) {

  Generation* old_gen = gch->next_gen(_gen);

-  ParScanThreadState& par_scan_state = _state_set->thread_sate(i);
+  ParScanThreadState& par_scan_state = _state_set->thread_state(i);
  par_scan_state.set_young_old_boundary(_young_old_boundary);

  par_scan_state.start_strong_roots();
@ -659,7 +687,7 @@ void ParNewRefProcTaskProxy::work(int i)
 {
  ResourceMark rm;
  HandleMark hm;
-  ParScanThreadState& par_scan_state = _state_set.thread_sate(i);
+  ParScanThreadState& par_scan_state = _state_set.thread_state(i);
  par_scan_state.set_young_old_boundary(_young_old_boundary);
  _task.work(i, par_scan_state.is_alive_closure(),
             par_scan_state.keep_alive_closure(),
@ -693,7 +721,7 @@ void ParNewRefProcTaskExecutor::execute(ProcessTask& task)
  ParNewRefProcTaskProxy rp_task(task, _generation, *_generation.next_gen(),
                                 _generation.reserved().end(), _state_set);
  workers->run_task(&rp_task);
-  _state_set.reset();
+  _state_set.reset(_generation.promotion_failed());
 }

 void ParNewRefProcTaskExecutor::execute(EnqueueTask& task)
@ -813,7 +841,7 @@ void ParNewGeneration::collect(bool   full,
    GenCollectedHeap::StrongRootsScope srs(gch);
    tsk.work(0);
  }
-  thread_state_set.reset();
+  thread_state_set.reset(promotion_failed());

  if (PAR_STATS_ENABLED && ParallelGCVerbose) {
    gclog_or_tty->print("Thread totals:\n"
@ -882,6 +910,8 @@ void ParNewGeneration::collect(bool   full,
    swap_spaces();  // Make life simpler for CMS || rescan; see 6483690.
    from()->set_next_compaction_space(to());
    gch->set_incremental_collection_will_fail();
+    // Inform the next generation that a promotion failure occurred.
+    _next_gen->promotion_failure_occurred();

    // Reset the PromotionFailureALot counters.
    NOT_PRODUCT(Universe::heap()->reset_promotion_should_fail();)
@ -1029,6 +1059,8 @@ oop ParNewGeneration::copy_to_survivor_space_avoiding_promotion_undo(
      new_obj = old;

      preserve_mark_if_necessary(old, m);
+      // Log the size of the maiden promotion failure
+      par_scan_state->log_promotion_failure(sz);
    }

    old->forward_to(new_obj);
@ -1150,6 +1182,8 @@ oop ParNewGeneration::copy_to_survivor_space_with_undo(
      failed_to_promote = true;

      preserve_mark_if_necessary(old, m);
+      // Log the size of the maiden promotion failure
+      par_scan_state->log_promotion_failure(sz);
    }
  } else {
    // Is in to-space; do copying ourselves.
--- a/hotspot/src/share/vm/gc_implementation/parNew/parNewGeneration.hpp
+++ b/hotspot/src/share/vm/gc_implementation/parNew/parNewGeneration.hpp
@ -97,6 +97,9 @@ class ParScanThreadState {
  int _pushes, _pops, _steals, _steal_attempts, _term_attempts;
  int _overflow_pushes, _overflow_refills, _overflow_refill_objs;

+  // Stats for promotion failure
+  size_t _promotion_failure_size;
+
  // Timing numbers.
  double _start;
  double _start_strong_roots;
@ -169,6 +172,15 @@ class ParScanThreadState {
  // Undo the most recent allocation ("obj", of "word_sz").
  void undo_alloc_in_to_space(HeapWord* obj, size_t word_sz);

+  // Promotion failure stats
+  size_t promotion_failure_size() { return promotion_failure_size(); }
+  void log_promotion_failure(size_t sz) {
+    if (_promotion_failure_size == 0) {
+      _promotion_failure_size = sz;
+    }
+  }
+  void print_and_clear_promotion_failure_size();
+
  int pushes() { return _pushes; }
  int pops()   { return _pops; }
  int steals() { return _steals; }
--- a/hotspot/src/share/vm/gc_implementation/shared/allocationStats.hpp
+++ b/hotspot/src/share/vm/gc_implementation/shared/allocationStats.hpp
@ -31,7 +31,7 @@ class AllocationStats VALUE_OBJ_CLASS_SPEC {
  // beginning of this sweep:
  //   Count(end_last_sweep) - Count(start_this_sweep)
  //     + splitBirths(between) - splitDeaths(between)
-  // The above number divided by the time since the start [END???] of the
+  // The above number divided by the time since the end of the
  // previous sweep gives us a time rate of demand for blocks
  // of this size. We compute a padded average of this rate as
  // our current estimate for the time rate of demand for blocks
@ -41,7 +41,7 @@ class AllocationStats VALUE_OBJ_CLASS_SPEC {
  // estimates.
  AdaptivePaddedAverage _demand_rate_estimate;

-  ssize_t     _desired;          // Estimate computed as described above
+  ssize_t     _desired;         // Demand stimate computed as described above
  ssize_t     _coalDesired;     // desired +/- small-percent for tuning coalescing

  ssize_t     _surplus;         // count - (desired +/- small-percent),
@ -53,9 +53,9 @@ class AllocationStats VALUE_OBJ_CLASS_SPEC {
  ssize_t     _coalDeaths;      // loss from coalescing
  ssize_t     _splitBirths;     // additional chunks from splitting
  ssize_t     _splitDeaths;     // loss from splitting
-  size_t     _returnedBytes;    // number of bytes returned to list.
+  size_t      _returnedBytes;   // number of bytes returned to list.
 public:
-  void initialize() {
+  void initialize(bool split_birth = false) {
    AdaptivePaddedAverage* dummy =
      new (&_demand_rate_estimate) AdaptivePaddedAverage(CMS_FLSWeight,
                                                         CMS_FLSPadding);
@ -67,7 +67,7 @@ class AllocationStats VALUE_OBJ_CLASS_SPEC {
    _beforeSweep = 0;
    _coalBirths = 0;
    _coalDeaths = 0;
-    _splitBirths = 0;
+    _splitBirths = split_birth? 1 : 0;
    _splitDeaths = 0;
    _returnedBytes = 0;
  }
@ -75,10 +75,12 @@ class AllocationStats VALUE_OBJ_CLASS_SPEC {
  AllocationStats() {
    initialize();
  }
+
  // The rate estimate is in blocks per second.
  void compute_desired(size_t count,
                       float inter_sweep_current,
-                       float inter_sweep_estimate) {
+                       float inter_sweep_estimate,
+                       float intra_sweep_estimate) {
    // If the latest inter-sweep time is below our granularity
    // of measurement, we may call in here with
    // inter_sweep_current == 0. However, even for suitably small
@ -88,12 +90,31 @@ class AllocationStats VALUE_OBJ_CLASS_SPEC {
    // vulnerable to noisy glitches. In such cases, we
    // ignore the current sample and use currently available
    // historical estimates.
+    // XXX NEEDS TO BE FIXED
+    // assert(prevSweep() + splitBirths() >= splitDeaths() + (ssize_t)count, "Conservation Principle");
+    //     ^^^^^^^^^^^^^^^^^^^^^^^^^^^    ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+    //     "Total Stock"                  "Not used at this block size"
    if (inter_sweep_current > _threshold) {
-      ssize_t demand = prevSweep() - count + splitBirths() - splitDeaths();
+      ssize_t demand = prevSweep() - (ssize_t)count + splitBirths() - splitDeaths();
+      // XXX NEEDS TO BE FIXED
+      // assert(demand >= 0, "Demand should be non-negative");
+      // Defensive: adjust for imprecision in event counting
+      if (demand < 0) {
+        demand = 0;
+      }
+      float old_rate = _demand_rate_estimate.padded_average();
      float rate = ((float)demand)/inter_sweep_current;
      _demand_rate_estimate.sample(rate);
-      _desired = (ssize_t)(_demand_rate_estimate.padded_average()
-                           *inter_sweep_estimate);
+      float new_rate = _demand_rate_estimate.padded_average();
+      ssize_t old_desired = _desired;
+      _desired = (ssize_t)(new_rate * (inter_sweep_estimate
+                                       + CMSExtrapolateSweep
+                                         ? intra_sweep_estimate
+                                         : 0.0));
+      if (PrintFLSStatistics > 1) {
+        gclog_or_tty->print_cr("demand: %d, old_rate: %f, current_rate: %f, new_rate: %f, old_desired: %d, new_desired: %d",
+                                demand,     old_rate,     rate,             new_rate,     old_desired,     _desired);
+      }
    }
  }

--- a/hotspot/src/share/vm/gc_implementation/shared/gcUtil.cpp
+++ b/hotspot/src/share/vm/gc_implementation/shared/gcUtil.cpp
@ -52,11 +52,35 @@ void AdaptiveWeightedAverage::sample(float new_sample) {
  _last_sample = new_sample;
 }

+void AdaptiveWeightedAverage::print() const {
+  print_on(tty);
+}
+
+void AdaptiveWeightedAverage::print_on(outputStream* st) const {
+  guarantee(false, "NYI");
+}
+
+void AdaptivePaddedAverage::print() const {
+  print_on(tty);
+}
+
+void AdaptivePaddedAverage::print_on(outputStream* st) const {
+  guarantee(false, "NYI");
+}
+
+void AdaptivePaddedNoZeroDevAverage::print() const {
+  print_on(tty);
+}
+
+void AdaptivePaddedNoZeroDevAverage::print_on(outputStream* st) const {
+  guarantee(false, "NYI");
+}
+
 void AdaptivePaddedAverage::sample(float new_sample) {
-  // Compute our parent classes sample information
+  // Compute new adaptive weighted average based on new sample.
  AdaptiveWeightedAverage::sample(new_sample);

-  // Now compute the deviation and the new padded sample
+  // Now update the deviation and the padded average.
  float new_avg = average();
  float new_dev = compute_adaptive_average(fabsd(new_sample - new_avg),
                                           deviation());
--- a/hotspot/src/share/vm/gc_implementation/shared/gcUtil.hpp
+++ b/hotspot/src/share/vm/gc_implementation/shared/gcUtil.hpp
@ -54,8 +54,8 @@ class AdaptiveWeightedAverage : public CHeapObj {

 public:
  // Input weight must be between 0 and 100
-  AdaptiveWeightedAverage(unsigned weight) :
-    _average(0.0), _sample_count(0), _weight(weight), _last_sample(0.0) {
+  AdaptiveWeightedAverage(unsigned weight, float avg = 0.0) :
+    _average(avg), _sample_count(0), _weight(weight), _last_sample(0.0) {
  }

  void clear() {
@ -64,6 +64,13 @@ class AdaptiveWeightedAverage : public CHeapObj {
    _last_sample = 0;
  }

+  // Useful for modifying static structures after startup.
+  void  modify(size_t avg, unsigned wt, bool force = false)  {
+    assert(force, "Are you sure you want to call this?");
+    _average = (float)avg;
+    _weight  = wt;
+  }
+
  // Accessors
  float    average() const       { return _average;       }
  unsigned weight()  const       { return _weight;        }
@ -83,6 +90,10 @@ class AdaptiveWeightedAverage : public CHeapObj {
    // Convert to float and back to avoid integer overflow.
    return (size_t)exp_avg((float)avg, (float)sample, weight);
  }
+
+  // Printing
+  void print_on(outputStream* st) const;
+  void print() const;
 };


@ -129,6 +140,10 @@ class AdaptivePaddedAverage : public AdaptiveWeightedAverage {

  // Override
  void  sample(float new_sample);
+
+  // Printing
+  void print_on(outputStream* st) const;
+  void print() const;
 };

 // A weighted average that includes a deviation from the average,
@ -146,7 +161,12 @@ public:
    AdaptivePaddedAverage(weight, padding)  {}
  // Override
  void  sample(float new_sample);
+
+  // Printing
+  void print_on(outputStream* st) const;
+  void print() const;
 };
+
 // Use a least squares fit to a set of data to generate a linear
 // equation.
 //              y = intercept + slope * x
--- a/hotspot/src/share/vm/includeDB_gc_parallel
+++ b/hotspot/src/share/vm/includeDB_gc_parallel
@ -21,6 +21,8 @@
 // have any questions.
 //  

+arguments.cpp                           compactibleFreeListSpace.hpp
+
 assembler_<arch>.cpp                    g1SATBCardTableModRefBS.hpp
 assembler_<arch>.cpp                    g1CollectedHeap.inline.hpp
 assembler_<arch>.cpp                    heapRegion.hpp
--- a/hotspot/src/share/vm/memory/defNewGeneration.cpp
+++ b/hotspot/src/share/vm/memory/defNewGeneration.cpp
@ -609,7 +609,7 @@ void DefNewGeneration::collect(bool   full,

    remove_forwarding_pointers();
    if (PrintGCDetails) {
-      gclog_or_tty->print(" (promotion failed)");
+      gclog_or_tty->print(" (promotion failed) ");
    }
    // Add to-space to the list of space to compact
    // when a promotion failure has occurred.  In that
@ -620,6 +620,9 @@ void DefNewGeneration::collect(bool   full,
    from()->set_next_compaction_space(to());
    gch->set_incremental_collection_will_fail();

+    // Inform the next generation that a promotion failure occurred.
+    _next_gen->promotion_failure_occurred();
+
    // Reset the PromotionFailureALot counters.
    NOT_PRODUCT(Universe::heap()->reset_promotion_should_fail();)
  }
@ -679,6 +682,11 @@ void DefNewGeneration::preserve_mark_if_necessary(oop obj, markOop m) {

 void DefNewGeneration::handle_promotion_failure(oop old) {
  preserve_mark_if_necessary(old, old->mark());
+  if (!_promotion_failed && PrintPromotionFailure) {
+    gclog_or_tty->print(" (promotion failure size = " SIZE_FORMAT ") ",
+                        old->size());
+  }
+
  // forward to self
  old->forward_to(old);
  _promotion_failed = true;
--- a/hotspot/src/share/vm/memory/generation.hpp
+++ b/hotspot/src/share/vm/memory/generation.hpp
@ -181,6 +181,12 @@ class Generation: public CHeapObj {
  virtual bool promotion_attempt_is_safe(size_t promotion_in_bytes,
    bool younger_handles_promotion_failure) const;

+  // For a non-young generation, this interface can be used to inform a
+  // generation that a promotion attempt into that generation failed.
+  // Typically used to enable diagnostic output for post-mortem analysis,
+  // but other uses of the interface are not ruled out.
+  virtual void promotion_failure_occurred() { /* does nothing */ }
+
  // Return an estimate of the maximum allocation that could be performed
  // in the generation without triggering any collection or expansion
  // activity.  It is "unsafe" because no locks are taken; the result
--- a/hotspot/src/share/vm/runtime/arguments.cpp
+++ b/hotspot/src/share/vm/runtime/arguments.cpp
@ -948,6 +948,7 @@ static void no_shared_spaces() {
  }
 }

+#ifndef KERNEL
 // If the user has chosen ParallelGCThreads > 0, we set UseParNewGC
 // if it's not explictly set or unset. If the user has chosen
 // UseParNewGC and not explicitly set ParallelGCThreads we
@ -1177,8 +1178,7 @@ void Arguments::set_cms_and_parnew_gc_flags() {
      // the value (either from the command line or ergonomics) of
      // OldPLABSize.  Following OldPLABSize is an ergonomics decision.
      FLAG_SET_ERGO(uintx, CMSParPromoteBlocksToClaim, OldPLABSize);
-    }
-    else {
+    } else {
      // OldPLABSize and CMSParPromoteBlocksToClaim are both set.
      // CMSParPromoteBlocksToClaim is a collector-specific flag, so
      // we'll let it to take precedence.
@ -1188,7 +1188,23 @@ void Arguments::set_cms_and_parnew_gc_flags() {
                  " CMSParPromoteBlocksToClaim will take precedence.\n");
    }
  }
+  if (!FLAG_IS_DEFAULT(ResizeOldPLAB) && !ResizeOldPLAB) {
+    // OldPLAB sizing manually turned off: Use a larger default setting,
+    // unless it was manually specified. This is because a too-low value
+    // will slow down scavenges.
+    if (FLAG_IS_DEFAULT(CMSParPromoteBlocksToClaim)) {
+      FLAG_SET_ERGO(uintx, CMSParPromoteBlocksToClaim, 50); // default value before 6631166
+    }
+  }
+  // Overwrite OldPLABSize which is the variable we will internally use everywhere.
+  FLAG_SET_ERGO(uintx, OldPLABSize, CMSParPromoteBlocksToClaim);
+  // If either of the static initialization defaults have changed, note this
+  // modification.
+  if (!FLAG_IS_DEFAULT(CMSParPromoteBlocksToClaim) || !FLAG_IS_DEFAULT(OldPLABWeight)) {
+    CFLS_LAB::modify_initialization(OldPLABSize, OldPLABWeight);
+  }
 }
+#endif // KERNEL

 inline uintx max_heap_for_compressed_oops() {
  LP64_ONLY(return oopDesc::OopEncodingHeapMax - MaxPermSize - os::vm_page_size());
@ -2370,22 +2386,25 @@ SOLARIS_ONLY(
                  "ExtendedDTraceProbes flag is only applicable on Solaris\n");
      return JNI_EINVAL;
 #endif // ndef SOLARIS
-    } else
 #ifdef ASSERT
-    if (match_option(option, "-XX:+FullGCALot", &tail)) {
+    } else if (match_option(option, "-XX:+FullGCALot", &tail)) {
      FLAG_SET_CMDLINE(bool, FullGCALot, true);
      // disable scavenge before parallel mark-compact
      FLAG_SET_CMDLINE(bool, ScavengeBeforeFullGC, false);
-    } else
 #endif
-    if (match_option(option, "-XX:ParCMSPromoteBlocksToClaim=", &tail)) {
+    } else if (match_option(option, "-XX:CMSParPromoteBlocksToClaim=", &tail)) {
      julong cms_blocks_to_claim = (julong)atol(tail);
      FLAG_SET_CMDLINE(uintx, CMSParPromoteBlocksToClaim, cms_blocks_to_claim);
      jio_fprintf(defaultStream::error_stream(),
-        "Please use -XX:CMSParPromoteBlocksToClaim in place of "
+        "Please use -XX:OldPLABSize in place of "
+        "-XX:CMSParPromoteBlocksToClaim in the future\n");
+    } else if (match_option(option, "-XX:ParCMSPromoteBlocksToClaim=", &tail)) {
+      julong cms_blocks_to_claim = (julong)atol(tail);
+      FLAG_SET_CMDLINE(uintx, CMSParPromoteBlocksToClaim, cms_blocks_to_claim);
+      jio_fprintf(defaultStream::error_stream(),
+        "Please use -XX:OldPLABSize in place of "
        "-XX:ParCMSPromoteBlocksToClaim in the future\n");
-    } else
-    if (match_option(option, "-XX:ParallelGCOldGenAllocBufferSize=", &tail)) {
+    } else if (match_option(option, "-XX:ParallelGCOldGenAllocBufferSize=", &tail)) {
      julong old_plab_size = 0;
      ArgsRange errcode = parse_memory_size(tail, &old_plab_size, 1);
      if (errcode != arg_in_range) {
@ -2398,8 +2417,7 @@ SOLARIS_ONLY(
      jio_fprintf(defaultStream::error_stream(),
                  "Please use -XX:OldPLABSize in place of "
                  "-XX:ParallelGCOldGenAllocBufferSize in the future\n");
-    } else
-    if (match_option(option, "-XX:ParallelGCToSpaceAllocBufferSize=", &tail)) {
+    } else if (match_option(option, "-XX:ParallelGCToSpaceAllocBufferSize=", &tail)) {
      julong young_plab_size = 0;
      ArgsRange errcode = parse_memory_size(tail, &young_plab_size, 1);
      if (errcode != arg_in_range) {
@ -2412,8 +2430,7 @@ SOLARIS_ONLY(
      jio_fprintf(defaultStream::error_stream(),
                  "Please use -XX:YoungPLABSize in place of "
                  "-XX:ParallelGCToSpaceAllocBufferSize in the future\n");
-    } else
-    if (match_option(option, "-XX:", &tail)) { // -XX:xxxx
+    } else if (match_option(option, "-XX:", &tail)) { // -XX:xxxx
      // Skip -XX:Flags= since that case has already been handled
      if (strncmp(tail, "Flags=", strlen("Flags=")) != 0) {
        if (!process_argument(tail, args->ignoreUnrecognized, origin)) {
@ -2727,6 +2744,7 @@ jint Arguments::parse(const JavaVMInitArgs* args) {
    return JNI_EINVAL;
  }

+#ifndef KERNEL
  if (UseConcMarkSweepGC) {
    // Set flags for CMS and ParNew.  Check UseConcMarkSweep first
    // to ensure that when both UseConcMarkSweepGC and UseParNewGC
@ -2744,6 +2762,7 @@ jint Arguments::parse(const JavaVMInitArgs* args) {
      set_g1_gc_flags();
    }
  }
+#endif // KERNEL

 #ifdef SERIALGC
  assert(verify_serial_gc_flags(), "SerialGC unset");
--- a/hotspot/src/share/vm/runtime/globals.hpp
+++ b/hotspot/src/share/vm/runtime/globals.hpp
@ -1355,10 +1355,46 @@ class CommandLineFlags {
  product(uintx, ParGCDesiredObjsFromOverflowList, 20,                      \
          "The desired number of objects to claim from the overflow list")  \
                                                                            \
-  product(uintx, CMSParPromoteBlocksToClaim, 50,                            \
+  product(uintx, CMSParPromoteBlocksToClaim, 16,                             \
          "Number of blocks to attempt to claim when refilling CMS LAB for "\
          "parallel GC.")                                                   \
                                                                            \
+  product(uintx, OldPLABWeight, 50,                                         \
+          "Percentage (0-100) used to weight the current sample when"       \
+          "computing exponentially decaying average for resizing CMSParPromoteBlocksToClaim.") \
+                                                                            \
+  product(bool, ResizeOldPLAB, true,                                        \
+          "Dynamically resize (old gen) promotion labs")                    \
+                                                                            \
+  product(bool, PrintOldPLAB, false,                                        \
+          "Print (old gen) promotion labs sizing decisions")                \
+                                                                            \
+  product(uintx, CMSOldPLABMin, 16,                                         \
+          "Min size of CMS gen promotion lab caches per worker per blksize")\
+                                                                            \
+  product(uintx, CMSOldPLABMax, 1024,                                       \
+          "Max size of CMS gen promotion lab caches per worker per blksize")\
+                                                                            \
+  product(uintx, CMSOldPLABNumRefills, 4,                                   \
+          "Nominal number of refills of CMS gen promotion lab cache"        \
+          " per worker per block size")                                     \
+                                                                            \
+  product(bool, CMSOldPLABResizeQuicker, false,                             \
+          "Whether to react on-the-fly during a scavenge to a sudden"       \
+          " change in block demand rate")                                   \
+                                                                            \
+  product(uintx, CMSOldPLABToleranceFactor, 4,                              \
+          "The tolerance of the phase-change detector for on-the-fly"       \
+          " PLAB resizing during a scavenge")                               \
+                                                                            \
+  product(uintx, CMSOldPLABReactivityFactor, 2,                             \
+          "The gain in the feedback loop for on-the-fly PLAB resizing"      \
+          " during a scavenge")                                             \
+                                                                            \
+  product(uintx, CMSOldPLABReactivityCeiling, 10,                           \
+          "The clamping of the gain in the feedback loop for on-the-fly"    \
+          " PLAB resizing during a scavenge")                               \
+                                                                            \
  product(bool, AlwaysPreTouch, false,                                      \
          "It forces all freshly committed pages to be pre-touched.")       \
                                                                            \
@ -1400,27 +1436,54 @@ class CommandLineFlags {
          "Percentage (0-100) by which the CMS incremental mode duty cycle" \
          " is shifted to the right within the period between young GCs")   \
                                                                            \
-  product(uintx, CMSExpAvgFactor, 25,                                       \
-          "Percentage (0-100) used to weight the current sample when "      \
-          "computing exponential averages for CMS statistics")              \
+  product(uintx, CMSExpAvgFactor, 50,                                       \
+          "Percentage (0-100) used to weight the current sample when"       \
+          "computing exponential averages for CMS statistics.")             \
                                                                            \
-  product(uintx, CMS_FLSWeight, 50,                                         \
-          "Percentage (0-100) used to weight the current sample when "      \
-          "computing exponentially decating averages for CMS FLS statistics") \
+  product(uintx, CMS_FLSWeight, 75,                                         \
+          "Percentage (0-100) used to weight the current sample when"       \
+          "computing exponentially decating averages for CMS FLS statistics.") \
                                                                            \
-  product(uintx, CMS_FLSPadding, 2,                                         \
-          "The multiple of deviation from mean to use for buffering "       \
+  product(uintx, CMS_FLSPadding, 1,                                         \
+          "The multiple of deviation from mean to use for buffering"        \
          "against volatility in free list demand.")                        \
                                                                            \
  product(uintx, FLSCoalescePolicy, 2,                                      \
          "CMS: Aggression level for coalescing, increasing from 0 to 4")   \
                                                                            \
-  product(uintx, CMS_SweepWeight, 50,                                       \
+  product(bool, FLSAlwaysCoalesceLarge, false,                              \
+          "CMS: Larger free blocks are always available for coalescing")    \
+                                                                            \
+  product(double, FLSLargestBlockCoalesceProximity, 0.99,                   \
+          "CMS: the smaller the percentage the greater the coalition force")\
+                                                                            \
+  product(double, CMSSmallCoalSurplusPercent, 1.05,                         \
+          "CMS: the factor by which to inflate estimated demand of small"   \
+          " block sizes to prevent coalescing with an adjoining block")     \
+                                                                            \
+  product(double, CMSLargeCoalSurplusPercent, 0.95,                         \
+          "CMS: the factor by which to inflate estimated demand of large"   \
+          " block sizes to prevent coalescing with an adjoining block")     \
+                                                                            \
+  product(double, CMSSmallSplitSurplusPercent, 1.10,                        \
+          "CMS: the factor by which to inflate estimated demand of small"   \
+          " block sizes to prevent splitting to supply demand for smaller"  \
+          " blocks")                                                        \
+                                                                            \
+  product(double, CMSLargeSplitSurplusPercent, 1.00,                        \
+          "CMS: the factor by which to inflate estimated demand of large"   \
+          " block sizes to prevent splitting to supply demand for smaller"  \
+          " blocks")                                                        \
+                                                                            \
+  product(bool, CMSExtrapolateSweep, false,                                 \
+          "CMS: cushion for block demand during sweep")                     \
+                                                                            \
+  product(uintx, CMS_SweepWeight, 75,                                       \
          "Percentage (0-100) used to weight the current sample when "      \
          "computing exponentially decaying average for inter-sweep "       \
          "duration")                                                       \
                                                                            \
-  product(uintx, CMS_SweepPadding, 2,                                       \
+  product(uintx, CMS_SweepPadding, 1,                                       \
          "The multiple of deviation from mean to use for buffering "       \
          "against volatility in inter-sweep duration.")                    \
                                                                            \
@ -1459,6 +1522,13 @@ class CommandLineFlags {
  product(uintx, CMSIndexedFreeListReplenish, 4,                            \
          "Replenish and indexed free list with this number of chunks")     \
                                                                            \
+  product(bool, CMSReplenishIntermediate, true,                             \
+          "Replenish all intermediate free-list caches")                    \
+                                                                            \
+  product(bool, CMSSplitIndexedFreeListBlocks, true,                        \
+          "When satisfying batched demand, splot blocks from the "          \
+          "IndexedFreeList whose size is a multiple of requested size")     \
+                                                                            \
  product(bool, CMSLoopWarn, false,                                         \
          "Warn in case of excessive CMS looping")                          \
                                                                            \
@ -1593,6 +1663,18 @@ class CommandLineFlags {
          "Bitmap operations should process at most this many bits"         \
          "between yields")                                                 \
                                                                            \
+  product(bool, CMSDumpAtPromotionFailure, false,                           \
+          "Dump useful information about the state of the CMS old "         \
+          " generation upon a promotion failure.")                          \
+                                                                            \
+  product(bool, CMSPrintChunksInDump, false,                                \
+          "In a dump enabled by CMSDumpAtPromotionFailure, include "        \
+          " more detailed information about the free chunks.")              \
+                                                                            \
+  product(bool, CMSPrintObjectsInDump, false,                               \
+          "In a dump enabled by CMSDumpAtPromotionFailure, include "        \
+          " more detailed information about the allocated objects.")        \
+                                                                            \
  diagnostic(bool, FLSVerifyAllHeapReferences, false,                       \
          "Verify that all refs across the FLS boundary "                   \
          " are to valid objects")                                          \
@ -1677,6 +1759,10 @@ class CommandLineFlags {
          "The youngest generation collection does not require "            \
          "a guarantee of full promotion of all live objects.")             \
                                                                            \
+  product(bool, PrintPromotionFailure, false,                               \
+          "Print additional diagnostic information following "              \
+          " promotion failure")                                             \
+                                                                            \
  notproduct(bool, PromotionFailureALot, false,                             \
          "Use promotion failure handling on every youngest generation "    \
          "collection")                                                     \
--- a/hotspot/src/share/vm/services/classLoadingService.cpp
+++ b/hotspot/src/share/vm/services/classLoadingService.cpp
@ -128,7 +128,7 @@ void ClassLoadingService::notify_class_unloaded(instanceKlass* k) {

  if (TraceClassUnloading) {
    ResourceMark rm;
-    tty->print_cr("[Unloading class %s]", k->external_name());
+    gclog_or_tty->print_cr("[Unloading class %s]", k->external_name());
  }
 }