diff --git a/hotspot/src/share/vm/gc_implementation/concurrentMarkSweep/binaryTreeDictionary.cpp b/hotspot/src/share/vm/gc_implementation/concurrentMarkSweep/binaryTreeDictionary.cpp index eb1a1118d72..ec4caa22fc6 100644 --- a/hotspot/src/share/vm/gc_implementation/concurrentMarkSweep/binaryTreeDictionary.cpp +++ b/hotspot/src/share/vm/gc_implementation/concurrentMarkSweep/binaryTreeDictionary.cpp @@ -62,12 +62,13 @@ TreeList* TreeList::as_TreeList(TreeChunk* tc) { tl->link_head(tc); tl->link_tail(tc); tl->set_count(1); - tl->init_statistics(); + tl->init_statistics(true /* split_birth */); tl->setParent(NULL); tl->setLeft(NULL); tl->setRight(NULL); return tl; } + TreeList* TreeList::as_TreeList(HeapWord* addr, size_t size) { TreeChunk* tc = (TreeChunk*) addr; assert(size >= sizeof(TreeChunk), "Chunk is too small for a TreeChunk"); @@ -267,6 +268,31 @@ TreeChunk* TreeList::first_available() { return retTC; } +// Returns the block with the largest heap address amongst +// those in the list for this size; potentially slow and expensive, +// use with caution! +TreeChunk* TreeList::largest_address() { + guarantee(head() != NULL, "The head of the list cannot be NULL"); + FreeChunk* fc = head()->next(); + TreeChunk* retTC; + if (fc == NULL) { + retTC = head_as_TreeChunk(); + } else { + // walk down the list and return the one with the highest + // heap address among chunks of this size. + FreeChunk* last = fc; + while (fc->next() != NULL) { + if ((HeapWord*)last < (HeapWord*)fc) { + last = fc; + } + fc = fc->next(); + } + retTC = TreeChunk::as_TreeChunk(last); + } + assert(retTC->list() == this, "Wrong type of chunk."); + return retTC; +} + BinaryTreeDictionary::BinaryTreeDictionary(MemRegion mr, bool splay): _splay(splay) { @@ -379,7 +405,7 @@ BinaryTreeDictionary::getChunkFromTree(size_t size, Dither dither, bool splay) break; } // The evm code reset the hint of the candidate as - // at an interrim point. Why? Seems like this leaves + // at an interim point. Why? Seems like this leaves // the hint pointing to a list that didn't work. // curTL->set_hint(hintTL->size()); } @@ -436,7 +462,7 @@ FreeChunk* BinaryTreeDictionary::findLargestDict() const { TreeList *curTL = root(); if (curTL != NULL) { while(curTL->right() != NULL) curTL = curTL->right(); - return curTL->first_available(); + return curTL->largest_address(); } else { return NULL; } @@ -664,7 +690,7 @@ void BinaryTreeDictionary::insertChunkInTree(FreeChunk* fc) { } } TreeChunk* tc = TreeChunk::as_TreeChunk(fc); - // This chunk is being returned to the binary try. It's embedded + // This chunk is being returned to the binary tree. Its embedded // TreeList should be unused at this point. tc->initialize(); if (curTL != NULL) { // exact match @@ -807,6 +833,8 @@ void BinaryTreeDictionary::dictCensusUpdate(size_t size, bool split, bool birth) } bool BinaryTreeDictionary::coalDictOverPopulated(size_t size) { + if (FLSAlwaysCoalesceLarge) return true; + TreeList* list_of_size = findList(size); // None of requested size implies overpopulated. return list_of_size == NULL || list_of_size->coalDesired() <= 0 || @@ -854,17 +882,20 @@ class BeginSweepClosure : public AscendTreeCensusClosure { double _percentage; float _inter_sweep_current; float _inter_sweep_estimate; + float _intra_sweep_estimate; public: BeginSweepClosure(double p, float inter_sweep_current, - float inter_sweep_estimate) : + float inter_sweep_estimate, + float intra_sweep_estimate) : _percentage(p), _inter_sweep_current(inter_sweep_current), - _inter_sweep_estimate(inter_sweep_estimate) { } + _inter_sweep_estimate(inter_sweep_estimate), + _intra_sweep_estimate(intra_sweep_estimate) { } void do_list(FreeList* fl) { double coalSurplusPercent = _percentage; - fl->compute_desired(_inter_sweep_current, _inter_sweep_estimate); + fl->compute_desired(_inter_sweep_current, _inter_sweep_estimate, _intra_sweep_estimate); fl->set_coalDesired((ssize_t)((double)fl->desired() * coalSurplusPercent)); fl->set_beforeSweep(fl->count()); fl->set_bfrSurp(fl->surplus()); @@ -939,9 +970,10 @@ FreeChunk* BinaryTreeDictionary::find_chunk_ends_at(HeapWord* target) const { } void BinaryTreeDictionary::beginSweepDictCensus(double coalSurplusPercent, - float inter_sweep_current, float inter_sweep_estimate) { + float inter_sweep_current, float inter_sweep_estimate, float intra_sweep_estimate) { BeginSweepClosure bsc(coalSurplusPercent, inter_sweep_current, - inter_sweep_estimate); + inter_sweep_estimate, + intra_sweep_estimate); bsc.do_tree(root()); } @@ -1077,13 +1109,13 @@ void BinaryTreeDictionary::reportStatistics() const { // Print census information - counts, births, deaths, etc. // for each list in the tree. Also print some summary // information. -class printTreeCensusClosure : public AscendTreeCensusClosure { +class PrintTreeCensusClosure : public AscendTreeCensusClosure { int _print_line; size_t _totalFree; FreeList _total; public: - printTreeCensusClosure() { + PrintTreeCensusClosure() { _print_line = 0; _totalFree = 0; } @@ -1113,7 +1145,7 @@ void BinaryTreeDictionary::printDictCensus(void) const { gclog_or_tty->print("\nBinaryTree\n"); FreeList::print_labels_on(gclog_or_tty, "size"); - printTreeCensusClosure ptc; + PrintTreeCensusClosure ptc; ptc.do_tree(root()); FreeList* total = ptc.total(); @@ -1130,6 +1162,38 @@ void BinaryTreeDictionary::printDictCensus(void) const { /(total->desired() != 0 ? (double)total->desired() : 1.0)); } +class PrintFreeListsClosure : public AscendTreeCensusClosure { + outputStream* _st; + int _print_line; + + public: + PrintFreeListsClosure(outputStream* st) { + _st = st; + _print_line = 0; + } + void do_list(FreeList* fl) { + if (++_print_line >= 40) { + FreeList::print_labels_on(_st, "size"); + _print_line = 0; + } + fl->print_on(gclog_or_tty); + size_t sz = fl->size(); + for (FreeChunk* fc = fl->head(); fc != NULL; + fc = fc->next()) { + _st->print_cr("\t[" PTR_FORMAT "," PTR_FORMAT ") %s", + fc, (HeapWord*)fc + sz, + fc->cantCoalesce() ? "\t CC" : ""); + } + } +}; + +void BinaryTreeDictionary::print_free_lists(outputStream* st) const { + + FreeList::print_labels_on(st, "size"); + PrintFreeListsClosure pflc(st); + pflc.do_tree(root()); +} + // Verify the following tree invariants: // . _root has no parent // . parent and child point to each other diff --git a/hotspot/src/share/vm/gc_implementation/concurrentMarkSweep/binaryTreeDictionary.hpp b/hotspot/src/share/vm/gc_implementation/concurrentMarkSweep/binaryTreeDictionary.hpp index d45193be9fb..0a107da91ab 100644 --- a/hotspot/src/share/vm/gc_implementation/concurrentMarkSweep/binaryTreeDictionary.hpp +++ b/hotspot/src/share/vm/gc_implementation/concurrentMarkSweep/binaryTreeDictionary.hpp @@ -42,9 +42,6 @@ class TreeList: public FreeList { friend class AscendTreeCensusClosure; friend class DescendTreeCensusClosure; friend class DescendTreeSearchClosure; - TreeList* _parent; - TreeList* _left; - TreeList* _right; protected: TreeList* parent() const { return _parent; } @@ -82,6 +79,11 @@ class TreeList: public FreeList { // to a TreeChunk. TreeChunk* first_available(); + // Returns the block with the largest heap address amongst + // those in the list for this size; potentially slow and expensive, + // use with caution! + TreeChunk* largest_address(); + // removeChunkReplaceIfNeeded() removes the given "tc" from the TreeList. // If "tc" is the first chunk in the list, it is also the // TreeList that is the node in the tree. removeChunkReplaceIfNeeded() @@ -254,8 +256,9 @@ class BinaryTreeDictionary: public FreeBlockDictionary { // Methods called at the beginning of a sweep to prepare the // statistics for the sweep. void beginSweepDictCensus(double coalSurplusPercent, - float sweep_current, - float sweep_estimate); + float inter_sweep_current, + float inter_sweep_estimate, + float intra_sweep_estimate); // Methods called after the end of a sweep to modify the // statistics for the sweep. void endSweepDictCensus(double splitSurplusPercent); @@ -269,6 +272,7 @@ class BinaryTreeDictionary: public FreeBlockDictionary { // Print the statistcis for all the lists in the tree. Also may // print out summaries. void printDictCensus(void) const; + void print_free_lists(outputStream* st) const; // For debugging. Returns the sum of the _returnedBytes for // all lists in the tree. diff --git a/hotspot/src/share/vm/gc_implementation/concurrentMarkSweep/cmsLockVerifier.cpp b/hotspot/src/share/vm/gc_implementation/concurrentMarkSweep/cmsLockVerifier.cpp index 00ef43f6957..b0ee1e8869e 100644 --- a/hotspot/src/share/vm/gc_implementation/concurrentMarkSweep/cmsLockVerifier.cpp +++ b/hotspot/src/share/vm/gc_implementation/concurrentMarkSweep/cmsLockVerifier.cpp @@ -32,7 +32,9 @@ // threads. The second argument is in support of an extra locking // check for CFL spaces' free list locks. #ifndef PRODUCT -void CMSLockVerifier::assert_locked(const Mutex* lock, const Mutex* p_lock) { +void CMSLockVerifier::assert_locked(const Mutex* lock, + const Mutex* p_lock1, + const Mutex* p_lock2) { if (!Universe::is_fully_initialized()) { return; } @@ -40,7 +42,7 @@ void CMSLockVerifier::assert_locked(const Mutex* lock, const Mutex* p_lock) { Thread* myThread = Thread::current(); if (lock == NULL) { // a "lock-free" structure, e.g. MUT, protected by CMS token - assert(p_lock == NULL, "Unexpected state"); + assert(p_lock1 == NULL && p_lock2 == NULL, "Unexpected caller error"); if (myThread->is_ConcurrentGC_thread()) { // This test might have to change in the future, if there can be // multiple peer CMS threads. But for now, if we're testing the CMS @@ -60,36 +62,39 @@ void CMSLockVerifier::assert_locked(const Mutex* lock, const Mutex* p_lock) { return; } - if (ParallelGCThreads == 0) { + if (myThread->is_VM_thread() + || myThread->is_ConcurrentGC_thread() + || myThread->is_Java_thread()) { + // Make sure that we are holding the associated lock. assert_lock_strong(lock); - } else { - if (myThread->is_VM_thread() - || myThread->is_ConcurrentGC_thread() - || myThread->is_Java_thread()) { - // Make sure that we are holding the associated lock. - assert_lock_strong(lock); - // The checking of p_lock is a spl case for CFLS' free list - // locks: we make sure that none of the parallel GC work gang - // threads are holding "sub-locks" of freeListLock(). We check only - // the parDictionaryAllocLock because the others are too numerous. - // This spl case code is somewhat ugly and any improvements - // are welcome XXX FIX ME!! - if (p_lock != NULL) { - assert(!p_lock->is_locked() || p_lock->owned_by_self(), - "Possible race between this and parallel GC threads"); - } - } else if (myThread->is_GC_task_thread()) { - // Make sure that the VM or CMS thread holds lock on our behalf - // XXX If there were a concept of a gang_master for a (set of) - // gang_workers, we could have used the identity of that thread - // for checking ownership here; for now we just disjunct. - assert(lock->owner() == VMThread::vm_thread() || - lock->owner() == ConcurrentMarkSweepThread::cmst(), - "Should be locked by VM thread or CMS thread on my behalf"); - } else { - // Make sure we didn't miss some obscure corner case - ShouldNotReachHere(); + // The checking of p_lock is a spl case for CFLS' free list + // locks: we make sure that none of the parallel GC work gang + // threads are holding "sub-locks" of freeListLock(). We check only + // the parDictionaryAllocLock because the others are too numerous. + // This spl case code is somewhat ugly and any improvements + // are welcome. + assert(p_lock1 == NULL || !p_lock1->is_locked() || p_lock1->owned_by_self(), + "Possible race between this and parallel GC threads"); + assert(p_lock2 == NULL || !p_lock2->is_locked() || p_lock2->owned_by_self(), + "Possible race between this and parallel GC threads"); + } else if (myThread->is_GC_task_thread()) { + // Make sure that the VM or CMS thread holds lock on our behalf + // XXX If there were a concept of a gang_master for a (set of) + // gang_workers, we could have used the identity of that thread + // for checking ownership here; for now we just disjunct. + assert(lock->owner() == VMThread::vm_thread() || + lock->owner() == ConcurrentMarkSweepThread::cmst(), + "Should be locked by VM thread or CMS thread on my behalf"); + if (p_lock1 != NULL) { + assert_lock_strong(p_lock1); } + if (p_lock2 != NULL) { + assert_lock_strong(p_lock2); + } + } else { + // Make sure we didn't miss some other thread type calling into here; + // perhaps as a result of future VM evolution. + ShouldNotReachHere(); } } #endif diff --git a/hotspot/src/share/vm/gc_implementation/concurrentMarkSweep/cmsLockVerifier.hpp b/hotspot/src/share/vm/gc_implementation/concurrentMarkSweep/cmsLockVerifier.hpp index f2fe4514061..943eba0374c 100644 --- a/hotspot/src/share/vm/gc_implementation/concurrentMarkSweep/cmsLockVerifier.hpp +++ b/hotspot/src/share/vm/gc_implementation/concurrentMarkSweep/cmsLockVerifier.hpp @@ -29,8 +29,11 @@ // the parallel threads. class CMSLockVerifier: AllStatic { public: - static void assert_locked(const Mutex* lock, const Mutex* p_lock) + static void assert_locked(const Mutex* lock, const Mutex* p_lock1, const Mutex* p_lock2) PRODUCT_RETURN; + static void assert_locked(const Mutex* lock, const Mutex* p_lock) { + assert_locked(lock, p_lock, NULL); + } static void assert_locked(const Mutex* lock) { assert_locked(lock, NULL); } diff --git a/hotspot/src/share/vm/gc_implementation/concurrentMarkSweep/compactibleFreeListSpace.cpp b/hotspot/src/share/vm/gc_implementation/concurrentMarkSweep/compactibleFreeListSpace.cpp index 6b4bd36d934..27ac4a84e79 100644 --- a/hotspot/src/share/vm/gc_implementation/concurrentMarkSweep/compactibleFreeListSpace.cpp +++ b/hotspot/src/share/vm/gc_implementation/concurrentMarkSweep/compactibleFreeListSpace.cpp @@ -62,18 +62,15 @@ CompactibleFreeListSpace::CompactibleFreeListSpace(BlockOffsetSharedArray* bs, // implementation, namely, the simple binary tree (splaying // temporarily disabled). switch (dictionaryChoice) { - case FreeBlockDictionary::dictionaryBinaryTree: - _dictionary = new BinaryTreeDictionary(mr); - break; case FreeBlockDictionary::dictionarySplayTree: case FreeBlockDictionary::dictionarySkipList: default: warning("dictionaryChoice: selected option not understood; using" " default BinaryTreeDictionary implementation instead."); + case FreeBlockDictionary::dictionaryBinaryTree: _dictionary = new BinaryTreeDictionary(mr); break; } - splitBirth(mr.word_size()); assert(_dictionary != NULL, "CMS dictionary initialization"); // The indexed free lists are initially all empty and are lazily // filled in on demand. Initialize the array elements to NULL. @@ -388,6 +385,105 @@ size_t CompactibleFreeListSpace::max_alloc_in_words() const { return res; } +void CompactibleFreeListSpace::print_indexed_free_lists(outputStream* st) +const { + reportIndexedFreeListStatistics(); + gclog_or_tty->print_cr("Layout of Indexed Freelists"); + gclog_or_tty->print_cr("---------------------------"); + FreeList::print_labels_on(st, "size"); + for (size_t i = IndexSetStart; i < IndexSetSize; i += IndexSetStride) { + _indexedFreeList[i].print_on(gclog_or_tty); + for (FreeChunk* fc = _indexedFreeList[i].head(); fc != NULL; + fc = fc->next()) { + gclog_or_tty->print_cr("\t[" PTR_FORMAT "," PTR_FORMAT ") %s", + fc, (HeapWord*)fc + i, + fc->cantCoalesce() ? "\t CC" : ""); + } + } +} + +void CompactibleFreeListSpace::print_promo_info_blocks(outputStream* st) +const { + _promoInfo.print_on(st); +} + +void CompactibleFreeListSpace::print_dictionary_free_lists(outputStream* st) +const { + _dictionary->reportStatistics(); + st->print_cr("Layout of Freelists in Tree"); + st->print_cr("---------------------------"); + _dictionary->print_free_lists(st); +} + +class BlkPrintingClosure: public BlkClosure { + const CMSCollector* _collector; + const CompactibleFreeListSpace* _sp; + const CMSBitMap* _live_bit_map; + const bool _post_remark; + outputStream* _st; +public: + BlkPrintingClosure(const CMSCollector* collector, + const CompactibleFreeListSpace* sp, + const CMSBitMap* live_bit_map, + outputStream* st): + _collector(collector), + _sp(sp), + _live_bit_map(live_bit_map), + _post_remark(collector->abstract_state() > CMSCollector::FinalMarking), + _st(st) { } + size_t do_blk(HeapWord* addr); +}; + +size_t BlkPrintingClosure::do_blk(HeapWord* addr) { + size_t sz = _sp->block_size_no_stall(addr, _collector); + assert(sz != 0, "Should always be able to compute a size"); + if (_sp->block_is_obj(addr)) { + const bool dead = _post_remark && !_live_bit_map->isMarked(addr); + _st->print_cr(PTR_FORMAT ": %s object of size " SIZE_FORMAT "%s", + addr, + dead ? "dead" : "live", + sz, + (!dead && CMSPrintObjectsInDump) ? ":" : "."); + if (CMSPrintObjectsInDump && !dead) { + oop(addr)->print_on(_st); + _st->print_cr("--------------------------------------"); + } + } else { // free block + _st->print_cr(PTR_FORMAT ": free block of size " SIZE_FORMAT "%s", + addr, sz, CMSPrintChunksInDump ? ":" : "."); + if (CMSPrintChunksInDump) { + ((FreeChunk*)addr)->print_on(_st); + _st->print_cr("--------------------------------------"); + } + } + return sz; +} + +void CompactibleFreeListSpace::dump_at_safepoint_with_locks(CMSCollector* c, + outputStream* st) { + st->print_cr("\n========================="); + st->print_cr("Block layout in CMS Heap:"); + st->print_cr("========================="); + BlkPrintingClosure bpcl(c, this, c->markBitMap(), st); + blk_iterate(&bpcl); + + st->print_cr("\n======================================="); + st->print_cr("Order & Layout of Promotion Info Blocks"); + st->print_cr("======================================="); + print_promo_info_blocks(st); + + st->print_cr("\n==========================="); + st->print_cr("Order of Indexed Free Lists"); + st->print_cr("========================="); + print_indexed_free_lists(st); + + st->print_cr("\n================================="); + st->print_cr("Order of Free Lists in Dictionary"); + st->print_cr("================================="); + print_dictionary_free_lists(st); +} + + void CompactibleFreeListSpace::reportFreeListStatistics() const { assert_lock_strong(&_freelistLock); assert(PrintFLSStatistics != 0, "Reporting error"); @@ -449,37 +545,37 @@ void CompactibleFreeListSpace::set_end(HeapWord* value) { if (prevEnd != NULL) { // Resize the underlying block offset table. _bt.resize(pointer_delta(value, bottom())); - if (value <= prevEnd) { - assert(value >= unallocated_block(), "New end is below unallocated block"); - } else { - // Now, take this new chunk and add it to the free blocks. - // Note that the BOT has not yet been updated for this block. - size_t newFcSize = pointer_delta(value, prevEnd); - // XXX This is REALLY UGLY and should be fixed up. XXX - if (!_adaptive_freelists && _smallLinearAllocBlock._ptr == NULL) { - // Mark the boundary of the new block in BOT - _bt.mark_block(prevEnd, value); - // put it all in the linAB - if (ParallelGCThreads == 0) { - _smallLinearAllocBlock._ptr = prevEnd; - _smallLinearAllocBlock._word_size = newFcSize; - repairLinearAllocBlock(&_smallLinearAllocBlock); - } else { // ParallelGCThreads > 0 - MutexLockerEx x(parDictionaryAllocLock(), - Mutex::_no_safepoint_check_flag); - _smallLinearAllocBlock._ptr = prevEnd; - _smallLinearAllocBlock._word_size = newFcSize; - repairLinearAllocBlock(&_smallLinearAllocBlock); - } - // Births of chunks put into a LinAB are not recorded. Births - // of chunks as they are allocated out of a LinAB are. + if (value <= prevEnd) { + assert(value >= unallocated_block(), "New end is below unallocated block"); } else { - // Add the block to the free lists, if possible coalescing it - // with the last free block, and update the BOT and census data. - addChunkToFreeListsAtEndRecordingStats(prevEnd, newFcSize); + // Now, take this new chunk and add it to the free blocks. + // Note that the BOT has not yet been updated for this block. + size_t newFcSize = pointer_delta(value, prevEnd); + // XXX This is REALLY UGLY and should be fixed up. XXX + if (!_adaptive_freelists && _smallLinearAllocBlock._ptr == NULL) { + // Mark the boundary of the new block in BOT + _bt.mark_block(prevEnd, value); + // put it all in the linAB + if (ParallelGCThreads == 0) { + _smallLinearAllocBlock._ptr = prevEnd; + _smallLinearAllocBlock._word_size = newFcSize; + repairLinearAllocBlock(&_smallLinearAllocBlock); + } else { // ParallelGCThreads > 0 + MutexLockerEx x(parDictionaryAllocLock(), + Mutex::_no_safepoint_check_flag); + _smallLinearAllocBlock._ptr = prevEnd; + _smallLinearAllocBlock._word_size = newFcSize; + repairLinearAllocBlock(&_smallLinearAllocBlock); + } + // Births of chunks put into a LinAB are not recorded. Births + // of chunks as they are allocated out of a LinAB are. + } else { + // Add the block to the free lists, if possible coalescing it + // with the last free block, and update the BOT and census data. + addChunkToFreeListsAtEndRecordingStats(prevEnd, newFcSize); + } } } - } } class FreeListSpace_DCTOC : public Filtering_DCTOC { @@ -732,7 +828,7 @@ void CompactibleFreeListSpace::safe_object_iterate(ObjectClosure* blk) { void CompactibleFreeListSpace::object_iterate_mem(MemRegion mr, UpwardsObjectClosure* cl) { - assert_locked(); + assert_locked(freelistLock()); NOT_PRODUCT(verify_objects_initialized()); Space::object_iterate_mem(mr, cl); } @@ -1212,12 +1308,15 @@ bool CompactibleFreeListSpace::verifyChunkInFreeLists(FreeChunk* fc) const { void CompactibleFreeListSpace::assert_locked() const { CMSLockVerifier::assert_locked(freelistLock(), parDictionaryAllocLock()); } + +void CompactibleFreeListSpace::assert_locked(const Mutex* lock) const { + CMSLockVerifier::assert_locked(lock); +} #endif FreeChunk* CompactibleFreeListSpace::allocateScratch(size_t size) { // In the parallel case, the main thread holds the free list lock // on behalf the parallel threads. - assert_locked(); FreeChunk* fc; { // If GC is parallel, this might be called by several threads. @@ -1298,17 +1397,18 @@ CompactibleFreeListSpace::getChunkFromLinearAllocBlock(LinearAllocBlock *blk, res = blk->_ptr; _bt.allocated(res, blk->_word_size); } else if (size + MinChunkSize <= blk->_refillSize) { + size_t sz = blk->_word_size; // Update _unallocated_block if the size is such that chunk would be // returned to the indexed free list. All other chunks in the indexed // free lists are allocated from the dictionary so that _unallocated_block // has already been adjusted for them. Do it here so that the cost // for all chunks added back to the indexed free lists. - if (blk->_word_size < SmallForDictionary) { - _bt.allocated(blk->_ptr, blk->_word_size); + if (sz < SmallForDictionary) { + _bt.allocated(blk->_ptr, sz); } // Return the chunk that isn't big enough, and then refill below. - addChunkToFreeLists(blk->_ptr, blk->_word_size); - _bt.verify_single_block(blk->_ptr, (blk->_ptr + blk->_word_size)); + addChunkToFreeLists(blk->_ptr, sz); + splitBirth(sz); // Don't keep statistics on adding back chunk from a LinAB. } else { // A refilled block would not satisfy the request. @@ -1376,11 +1476,13 @@ CompactibleFreeListSpace::getChunkFromIndexedFreeList(size_t size) { res = getChunkFromIndexedFreeListHelper(size); } _bt.verify_not_unallocated((HeapWord*) res, size); + assert(res == NULL || res->size() == size, "Incorrect block size"); return res; } FreeChunk* -CompactibleFreeListSpace::getChunkFromIndexedFreeListHelper(size_t size) { +CompactibleFreeListSpace::getChunkFromIndexedFreeListHelper(size_t size, + bool replenish) { assert_locked(); FreeChunk* fc = NULL; if (size < SmallForDictionary) { @@ -1398,54 +1500,66 @@ CompactibleFreeListSpace::getChunkFromIndexedFreeListHelper(size_t size) { // and replenishing indexed lists from the small linAB. // FreeChunk* newFc = NULL; - size_t replenish_size = CMSIndexedFreeListReplenish * size; + const size_t replenish_size = CMSIndexedFreeListReplenish * size; if (replenish_size < SmallForDictionary) { // Do not replenish from an underpopulated size. if (_indexedFreeList[replenish_size].surplus() > 0 && _indexedFreeList[replenish_size].head() != NULL) { - newFc = - _indexedFreeList[replenish_size].getChunkAtHead(); - } else { + newFc = _indexedFreeList[replenish_size].getChunkAtHead(); + } else if (bestFitFirst()) { newFc = bestFitSmall(replenish_size); } } - if (newFc != NULL) { - splitDeath(replenish_size); - } else if (replenish_size > size) { + if (newFc == NULL && replenish_size > size) { assert(CMSIndexedFreeListReplenish > 1, "ctl pt invariant"); - newFc = - getChunkFromIndexedFreeListHelper(replenish_size); + newFc = getChunkFromIndexedFreeListHelper(replenish_size, false); } + // Note: The stats update re split-death of block obtained above + // will be recorded below precisely when we know we are going to + // be actually splitting it into more than one pieces below. if (newFc != NULL) { - assert(newFc->size() == replenish_size, "Got wrong size"); - size_t i; - FreeChunk *curFc, *nextFc; - // carve up and link blocks 0, ..., CMSIndexedFreeListReplenish - 2 - // The last chunk is not added to the lists but is returned as the - // free chunk. - for (curFc = newFc, nextFc = (FreeChunk*)((HeapWord*)curFc + size), - i = 0; - i < (CMSIndexedFreeListReplenish - 1); - curFc = nextFc, nextFc = (FreeChunk*)((HeapWord*)nextFc + size), - i++) { + if (replenish || CMSReplenishIntermediate) { + // Replenish this list and return one block to caller. + size_t i; + FreeChunk *curFc, *nextFc; + size_t num_blk = newFc->size() / size; + assert(num_blk >= 1, "Smaller than requested?"); + assert(newFc->size() % size == 0, "Should be integral multiple of request"); + if (num_blk > 1) { + // we are sure we will be splitting the block just obtained + // into multiple pieces; record the split-death of the original + splitDeath(replenish_size); + } + // carve up and link blocks 0, ..., num_blk - 2 + // The last chunk is not added to the lists but is returned as the + // free chunk. + for (curFc = newFc, nextFc = (FreeChunk*)((HeapWord*)curFc + size), + i = 0; + i < (num_blk - 1); + curFc = nextFc, nextFc = (FreeChunk*)((HeapWord*)nextFc + size), + i++) { + curFc->setSize(size); + // Don't record this as a return in order to try and + // determine the "returns" from a GC. + _bt.verify_not_unallocated((HeapWord*) fc, size); + _indexedFreeList[size].returnChunkAtTail(curFc, false); + _bt.mark_block((HeapWord*)curFc, size); + splitBirth(size); + // Don't record the initial population of the indexed list + // as a split birth. + } + + // check that the arithmetic was OK above + assert((HeapWord*)nextFc == (HeapWord*)newFc + num_blk*size, + "inconsistency in carving newFc"); curFc->setSize(size); - // Don't record this as a return in order to try and - // determine the "returns" from a GC. - _bt.verify_not_unallocated((HeapWord*) fc, size); - _indexedFreeList[size].returnChunkAtTail(curFc, false); _bt.mark_block((HeapWord*)curFc, size); splitBirth(size); - // Don't record the initial population of the indexed list - // as a split birth. + fc = curFc; + } else { + // Return entire block to caller + fc = newFc; } - - // check that the arithmetic was OK above - assert((HeapWord*)nextFc == (HeapWord*)newFc + replenish_size, - "inconsistency in carving newFc"); - curFc->setSize(size); - _bt.mark_block((HeapWord*)curFc, size); - splitBirth(size); - return curFc; } } } else { @@ -1453,7 +1567,7 @@ CompactibleFreeListSpace::getChunkFromIndexedFreeListHelper(size_t size) { // replenish the indexed free list. fc = getChunkFromDictionaryExact(size); } - assert(fc == NULL || fc->isFree(), "Should be returning a free chunk"); + // assert(fc == NULL || fc->isFree(), "Should be returning a free chunk"); return fc; } @@ -1512,6 +1626,11 @@ CompactibleFreeListSpace::returnChunkToDictionary(FreeChunk* chunk) { // adjust _unallocated_block downward, as necessary _bt.freed((HeapWord*)chunk, size); _dictionary->returnChunk(chunk); +#ifndef PRODUCT + if (CMSCollector::abstract_state() != CMSCollector::Sweeping) { + TreeChunk::as_TreeChunk(chunk)->list()->verify_stats(); + } +#endif // PRODUCT } void @@ -1525,6 +1644,11 @@ CompactibleFreeListSpace::returnChunkToFreeList(FreeChunk* fc) { } else { _indexedFreeList[size].returnChunkAtHead(fc); } +#ifndef PRODUCT + if (CMSCollector::abstract_state() != CMSCollector::Sweeping) { + _indexedFreeList[size].verify_stats(); + } +#endif // PRODUCT } // Add chunk to end of last block -- if it's the largest @@ -1537,7 +1661,6 @@ CompactibleFreeListSpace::addChunkToFreeListsAtEndRecordingStats( HeapWord* chunk, size_t size) { // check that the chunk does lie in this space! assert(chunk != NULL && is_in_reserved(chunk), "Not in this space!"); - assert_locked(); // One of the parallel gc task threads may be here // whilst others are allocating. Mutex* lock = NULL; @@ -1991,24 +2114,26 @@ double CompactibleFreeListSpace::flsFrag() const { return frag; } -#define CoalSurplusPercent 1.05 -#define SplitSurplusPercent 1.10 - void CompactibleFreeListSpace::beginSweepFLCensus( float inter_sweep_current, - float inter_sweep_estimate) { + float inter_sweep_estimate, + float intra_sweep_estimate) { assert_locked(); size_t i; for (i = IndexSetStart; i < IndexSetSize; i += IndexSetStride) { FreeList* fl = &_indexedFreeList[i]; - fl->compute_desired(inter_sweep_current, inter_sweep_estimate); - fl->set_coalDesired((ssize_t)((double)fl->desired() * CoalSurplusPercent)); + if (PrintFLSStatistics > 1) { + gclog_or_tty->print("size[%d] : ", i); + } + fl->compute_desired(inter_sweep_current, inter_sweep_estimate, intra_sweep_estimate); + fl->set_coalDesired((ssize_t)((double)fl->desired() * CMSSmallCoalSurplusPercent)); fl->set_beforeSweep(fl->count()); fl->set_bfrSurp(fl->surplus()); } - _dictionary->beginSweepDictCensus(CoalSurplusPercent, + _dictionary->beginSweepDictCensus(CMSLargeCoalSurplusPercent, inter_sweep_current, - inter_sweep_estimate); + inter_sweep_estimate, + intra_sweep_estimate); } void CompactibleFreeListSpace::setFLSurplus() { @@ -2017,7 +2142,7 @@ void CompactibleFreeListSpace::setFLSurplus() { for (i = IndexSetStart; i < IndexSetSize; i += IndexSetStride) { FreeList *fl = &_indexedFreeList[i]; fl->set_surplus(fl->count() - - (ssize_t)((double)fl->desired() * SplitSurplusPercent)); + (ssize_t)((double)fl->desired() * CMSSmallSplitSurplusPercent)); } } @@ -2048,6 +2173,11 @@ void CompactibleFreeListSpace::clearFLCensus() { } void CompactibleFreeListSpace::endSweepFLCensus(size_t sweep_count) { + if (PrintFLSStatistics > 0) { + HeapWord* largestAddr = (HeapWord*) dictionary()->findLargestDict(); + gclog_or_tty->print_cr("CMS: Large block " PTR_FORMAT, + largestAddr); + } setFLSurplus(); setFLHints(); if (PrintGC && PrintFLSCensus > 0) { @@ -2055,7 +2185,7 @@ void CompactibleFreeListSpace::endSweepFLCensus(size_t sweep_count) { } clearFLCensus(); assert_locked(); - _dictionary->endSweepDictCensus(SplitSurplusPercent); + _dictionary->endSweepDictCensus(CMSLargeSplitSurplusPercent); } bool CompactibleFreeListSpace::coalOverPopulated(size_t size) { @@ -2312,13 +2442,18 @@ void CompactibleFreeListSpace::verifyIndexedFreeLists() const { } void CompactibleFreeListSpace::verifyIndexedFreeList(size_t size) const { - FreeChunk* fc = _indexedFreeList[size].head(); + FreeChunk* fc = _indexedFreeList[size].head(); + FreeChunk* tail = _indexedFreeList[size].tail(); + size_t num = _indexedFreeList[size].count(); + size_t n = 0; guarantee((size % 2 == 0) || fc == NULL, "Odd slots should be empty"); - for (; fc != NULL; fc = fc->next()) { + for (; fc != NULL; fc = fc->next(), n++) { guarantee(fc->size() == size, "Size inconsistency"); guarantee(fc->isFree(), "!free?"); guarantee(fc->next() == NULL || fc->next()->prev() == fc, "Broken list"); + guarantee((fc->next() == NULL) == (fc == tail), "Incorrect tail"); } + guarantee(n == num, "Incorrect count"); } #ifndef PRODUCT @@ -2516,11 +2651,41 @@ void PromotionInfo::startTrackingPromotions() { _tracking = true; } -void PromotionInfo::stopTrackingPromotions() { +#define CMSPrintPromoBlockInfo 1 + +void PromotionInfo::stopTrackingPromotions(uint worker_id) { assert(_spoolHead == _spoolTail && _firstIndex == _nextIndex, "spooling inconsistency?"); _firstIndex = _nextIndex = 1; _tracking = false; + if (CMSPrintPromoBlockInfo > 1) { + print_statistics(worker_id); + } +} + +void PromotionInfo::print_statistics(uint worker_id) const { + assert(_spoolHead == _spoolTail && _firstIndex == _nextIndex, + "Else will undercount"); + assert(CMSPrintPromoBlockInfo > 0, "Else unnecessary call"); + // Count the number of blocks and slots in the free pool + size_t slots = 0; + size_t blocks = 0; + for (SpoolBlock* cur_spool = _spareSpool; + cur_spool != NULL; + cur_spool = cur_spool->nextSpoolBlock) { + // the first entry is just a self-pointer; indices 1 through + // bufferSize - 1 are occupied (thus, bufferSize - 1 slots). + guarantee((void*)cur_spool->displacedHdr == (void*)&cur_spool->displacedHdr, + "first entry of displacedHdr should be self-referential"); + slots += cur_spool->bufferSize - 1; + blocks++; + } + if (_spoolHead != NULL) { + slots += _spoolHead->bufferSize - 1; + blocks++; + } + gclog_or_tty->print_cr(" [worker %d] promo_blocks = %d, promo_slots = %d ", + worker_id, blocks, slots); } // When _spoolTail is not NULL, then the slot <_spoolTail, _nextIndex> @@ -2584,15 +2749,84 @@ void PromotionInfo::verify() const { guarantee(numDisplacedHdrs == numObjsWithDisplacedHdrs, "Displaced hdr count"); } +void PromotionInfo::print_on(outputStream* st) const { + SpoolBlock* curSpool = NULL; + size_t i = 0; + st->print_cr("start & end indices: [" SIZE_FORMAT ", " SIZE_FORMAT ")", + _firstIndex, _nextIndex); + for (curSpool = _spoolHead; curSpool != _spoolTail && curSpool != NULL; + curSpool = curSpool->nextSpoolBlock) { + curSpool->print_on(st); + st->print_cr(" active "); + i++; + } + for (curSpool = _spoolTail; curSpool != NULL; + curSpool = curSpool->nextSpoolBlock) { + curSpool->print_on(st); + st->print_cr(" inactive "); + i++; + } + for (curSpool = _spareSpool; curSpool != NULL; + curSpool = curSpool->nextSpoolBlock) { + curSpool->print_on(st); + st->print_cr(" free "); + i++; + } + st->print_cr(SIZE_FORMAT " header spooling blocks", i); +} + +void SpoolBlock::print_on(outputStream* st) const { + st->print("[" PTR_FORMAT "," PTR_FORMAT "), " SIZE_FORMAT " HeapWords -> " PTR_FORMAT, + this, (HeapWord*)displacedHdr + bufferSize, + bufferSize, nextSpoolBlock); +} + +/////////////////////////////////////////////////////////////////////////// +// CFLS_LAB +/////////////////////////////////////////////////////////////////////////// + +#define VECTOR_257(x) \ + /* 1 2 3 4 5 6 7 8 9 1x 11 12 13 14 15 16 17 18 19 2x 21 22 23 24 25 26 27 28 29 3x 31 32 */ \ + { x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, \ + x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, \ + x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, \ + x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, \ + x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, \ + x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, \ + x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, \ + x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, \ + x } + +// Initialize with default setting of CMSParPromoteBlocksToClaim, _not_ +// OldPLABSize, whose static default is different; if overridden at the +// command-line, this will get reinitialized via a call to +// modify_initialization() below. +AdaptiveWeightedAverage CFLS_LAB::_blocks_to_claim[] = + VECTOR_257(AdaptiveWeightedAverage(OldPLABWeight, (float)CMSParPromoteBlocksToClaim)); +size_t CFLS_LAB::_global_num_blocks[] = VECTOR_257(0); +int CFLS_LAB::_global_num_workers[] = VECTOR_257(0); CFLS_LAB::CFLS_LAB(CompactibleFreeListSpace* cfls) : _cfls(cfls) { - _blocks_to_claim = CMSParPromoteBlocksToClaim; + assert(CompactibleFreeListSpace::IndexSetSize == 257, "Modify VECTOR_257() macro above"); for (size_t i = CompactibleFreeListSpace::IndexSetStart; i < CompactibleFreeListSpace::IndexSetSize; i += CompactibleFreeListSpace::IndexSetStride) { _indexedFreeList[i].set_size(i); + _num_blocks[i] = 0; + } +} + +static bool _CFLS_LAB_modified = false; + +void CFLS_LAB::modify_initialization(size_t n, unsigned wt) { + assert(!_CFLS_LAB_modified, "Call only once"); + _CFLS_LAB_modified = true; + for (size_t i = CompactibleFreeListSpace::IndexSetStart; + i < CompactibleFreeListSpace::IndexSetSize; + i += CompactibleFreeListSpace::IndexSetStride) { + _blocks_to_claim[i].modify(n, wt, true /* force */); } } @@ -2607,11 +2841,9 @@ HeapWord* CFLS_LAB::alloc(size_t word_sz) { if (res == NULL) return NULL; } else { FreeList* fl = &_indexedFreeList[word_sz]; - bool filled = false; //TRAP if (fl->count() == 0) { - bool filled = true; //TRAP // Attempt to refill this local free list. - _cfls->par_get_chunk_of_blocks(word_sz, _blocks_to_claim, fl); + get_from_global_pool(word_sz, fl); // If it didn't work, give up. if (fl->count() == 0) return NULL; } @@ -2626,80 +2858,190 @@ HeapWord* CFLS_LAB::alloc(size_t word_sz) { return (HeapWord*)res; } -void CFLS_LAB::retire() { - for (size_t i = CompactibleFreeListSpace::IndexSetStart; +// Get a chunk of blocks of the right size and update related +// book-keeping stats +void CFLS_LAB::get_from_global_pool(size_t word_sz, FreeList* fl) { + // Get the #blocks we want to claim + size_t n_blks = (size_t)_blocks_to_claim[word_sz].average(); + assert(n_blks > 0, "Error"); + assert(ResizePLAB || n_blks == OldPLABSize, "Error"); + // In some cases, when the application has a phase change, + // there may be a sudden and sharp shift in the object survival + // profile, and updating the counts at the end of a scavenge + // may not be quick enough, giving rise to large scavenge pauses + // during these phase changes. It is beneficial to detect such + // changes on-the-fly during a scavenge and avoid such a phase-change + // pothole. The following code is a heuristic attempt to do that. + // It is protected by a product flag until we have gained + // enough experience with this heuristic and fine-tuned its behaviour. + // WARNING: This might increase fragmentation if we overreact to + // small spikes, so some kind of historical smoothing based on + // previous experience with the greater reactivity might be useful. + // Lacking sufficient experience, CMSOldPLABResizeQuicker is disabled by + // default. + if (ResizeOldPLAB && CMSOldPLABResizeQuicker) { + size_t multiple = _num_blocks[word_sz]/(CMSOldPLABToleranceFactor*CMSOldPLABNumRefills*n_blks); + n_blks += CMSOldPLABReactivityFactor*multiple*n_blks; + n_blks = MIN2(n_blks, CMSOldPLABMax); + } + assert(n_blks > 0, "Error"); + _cfls->par_get_chunk_of_blocks(word_sz, n_blks, fl); + // Update stats table entry for this block size + _num_blocks[word_sz] += fl->count(); +} + +void CFLS_LAB::compute_desired_plab_size() { + for (size_t i = CompactibleFreeListSpace::IndexSetStart; i < CompactibleFreeListSpace::IndexSetSize; i += CompactibleFreeListSpace::IndexSetStride) { - if (_indexedFreeList[i].count() > 0) { - MutexLockerEx x(_cfls->_indexedFreeListParLocks[i], - Mutex::_no_safepoint_check_flag); - _cfls->_indexedFreeList[i].prepend(&_indexedFreeList[i]); - // Reset this list. - _indexedFreeList[i] = FreeList(); - _indexedFreeList[i].set_size(i); + assert((_global_num_workers[i] == 0) == (_global_num_blocks[i] == 0), + "Counter inconsistency"); + if (_global_num_workers[i] > 0) { + // Need to smooth wrt historical average + if (ResizeOldPLAB) { + _blocks_to_claim[i].sample( + MAX2((size_t)CMSOldPLABMin, + MIN2((size_t)CMSOldPLABMax, + _global_num_blocks[i]/(_global_num_workers[i]*CMSOldPLABNumRefills)))); + } + // Reset counters for next round + _global_num_workers[i] = 0; + _global_num_blocks[i] = 0; + if (PrintOldPLAB) { + gclog_or_tty->print_cr("[%d]: %d", i, (size_t)_blocks_to_claim[i].average()); + } } } } -void -CompactibleFreeListSpace:: -par_get_chunk_of_blocks(size_t word_sz, size_t n, FreeList* fl) { +void CFLS_LAB::retire(int tid) { + // We run this single threaded with the world stopped; + // so no need for locks and such. +#define CFLS_LAB_PARALLEL_ACCESS 0 + NOT_PRODUCT(Thread* t = Thread::current();) + assert(Thread::current()->is_VM_thread(), "Error"); + assert(CompactibleFreeListSpace::IndexSetStart == CompactibleFreeListSpace::IndexSetStride, + "Will access to uninitialized slot below"); +#if CFLS_LAB_PARALLEL_ACCESS + for (size_t i = CompactibleFreeListSpace::IndexSetSize - 1; + i > 0; + i -= CompactibleFreeListSpace::IndexSetStride) { +#else // CFLS_LAB_PARALLEL_ACCESS + for (size_t i = CompactibleFreeListSpace::IndexSetStart; + i < CompactibleFreeListSpace::IndexSetSize; + i += CompactibleFreeListSpace::IndexSetStride) { +#endif // !CFLS_LAB_PARALLEL_ACCESS + assert(_num_blocks[i] >= (size_t)_indexedFreeList[i].count(), + "Can't retire more than what we obtained"); + if (_num_blocks[i] > 0) { + size_t num_retire = _indexedFreeList[i].count(); + assert(_num_blocks[i] > num_retire, "Should have used at least one"); + { +#if CFLS_LAB_PARALLEL_ACCESS + MutexLockerEx x(_cfls->_indexedFreeListParLocks[i], + Mutex::_no_safepoint_check_flag); +#endif // CFLS_LAB_PARALLEL_ACCESS + // Update globals stats for num_blocks used + _global_num_blocks[i] += (_num_blocks[i] - num_retire); + _global_num_workers[i]++; + assert(_global_num_workers[i] <= (ssize_t)ParallelGCThreads, "Too big"); + if (num_retire > 0) { + _cfls->_indexedFreeList[i].prepend(&_indexedFreeList[i]); + // Reset this list. + _indexedFreeList[i] = FreeList(); + _indexedFreeList[i].set_size(i); + } + } + if (PrintOldPLAB) { + gclog_or_tty->print_cr("%d[%d]: %d/%d/%d", + tid, i, num_retire, _num_blocks[i], (size_t)_blocks_to_claim[i].average()); + } + // Reset stats for next round + _num_blocks[i] = 0; + } + } +} + +void CompactibleFreeListSpace:: par_get_chunk_of_blocks(size_t word_sz, size_t n, FreeList* fl) { assert(fl->count() == 0, "Precondition."); assert(word_sz < CompactibleFreeListSpace::IndexSetSize, "Precondition"); - // We'll try all multiples of word_sz in the indexed set (starting with - // word_sz itself), then try getting a big chunk and splitting it. - int k = 1; - size_t cur_sz = k * word_sz; - bool found = false; - while (cur_sz < CompactibleFreeListSpace::IndexSetSize && k == 1) { - FreeList* gfl = &_indexedFreeList[cur_sz]; - FreeList fl_for_cur_sz; // Empty. - fl_for_cur_sz.set_size(cur_sz); - { - MutexLockerEx x(_indexedFreeListParLocks[cur_sz], - Mutex::_no_safepoint_check_flag); - if (gfl->count() != 0) { - size_t nn = MAX2(n/k, (size_t)1); - gfl->getFirstNChunksFromList(nn, &fl_for_cur_sz); - found = true; - } - } - // Now transfer fl_for_cur_sz to fl. Common case, we hope, is k = 1. - if (found) { - if (k == 1) { - fl->prepend(&fl_for_cur_sz); - } else { - // Divide each block on fl_for_cur_sz up k ways. - FreeChunk* fc; - while ((fc = fl_for_cur_sz.getChunkAtHead()) != NULL) { - // Must do this in reverse order, so that anybody attempting to - // access the main chunk sees it as a single free block until we - // change it. - size_t fc_size = fc->size(); - for (int i = k-1; i >= 0; i--) { - FreeChunk* ffc = (FreeChunk*)((HeapWord*)fc + i * word_sz); - ffc->setSize(word_sz); - ffc->linkNext(NULL); - ffc->linkPrev(NULL); // Mark as a free block for other (parallel) GC threads. - // Above must occur before BOT is updated below. - // splitting from the right, fc_size == (k - i + 1) * wordsize - _bt.mark_block((HeapWord*)ffc, word_sz); - fc_size -= word_sz; - _bt.verify_not_unallocated((HeapWord*)ffc, ffc->size()); - _bt.verify_single_block((HeapWord*)fc, fc_size); - _bt.verify_single_block((HeapWord*)ffc, ffc->size()); - // Push this on "fl". - fl->returnChunkAtHead(ffc); + // We'll try all multiples of word_sz in the indexed set, starting with + // word_sz itself and, if CMSSplitIndexedFreeListBlocks, try larger multiples, + // then try getting a big chunk and splitting it. + { + bool found; + int k; + size_t cur_sz; + for (k = 1, cur_sz = k * word_sz, found = false; + (cur_sz < CompactibleFreeListSpace::IndexSetSize) && + (CMSSplitIndexedFreeListBlocks || k <= 1); + k++, cur_sz = k * word_sz) { + FreeList* gfl = &_indexedFreeList[cur_sz]; + FreeList fl_for_cur_sz; // Empty. + fl_for_cur_sz.set_size(cur_sz); + { + MutexLockerEx x(_indexedFreeListParLocks[cur_sz], + Mutex::_no_safepoint_check_flag); + if (gfl->count() != 0) { + // nn is the number of chunks of size cur_sz that + // we'd need to split k-ways each, in order to create + // "n" chunks of size word_sz each. + const size_t nn = MAX2(n/k, (size_t)1); + gfl->getFirstNChunksFromList(nn, &fl_for_cur_sz); + found = true; + if (k > 1) { + // Update split death stats for the cur_sz-size blocks list: + // we increment the split death count by the number of blocks + // we just took from the cur_sz-size blocks list and which + // we will be splitting below. + ssize_t deaths = _indexedFreeList[cur_sz].splitDeaths() + + fl_for_cur_sz.count(); + _indexedFreeList[cur_sz].set_splitDeaths(deaths); } - // TRAP - assert(fl->tail()->next() == NULL, "List invariant."); } } - return; + // Now transfer fl_for_cur_sz to fl. Common case, we hope, is k = 1. + if (found) { + if (k == 1) { + fl->prepend(&fl_for_cur_sz); + } else { + // Divide each block on fl_for_cur_sz up k ways. + FreeChunk* fc; + while ((fc = fl_for_cur_sz.getChunkAtHead()) != NULL) { + // Must do this in reverse order, so that anybody attempting to + // access the main chunk sees it as a single free block until we + // change it. + size_t fc_size = fc->size(); + for (int i = k-1; i >= 0; i--) { + FreeChunk* ffc = (FreeChunk*)((HeapWord*)fc + i * word_sz); + ffc->setSize(word_sz); + ffc->linkNext(NULL); + ffc->linkPrev(NULL); // Mark as a free block for other (parallel) GC threads. + // Above must occur before BOT is updated below. + // splitting from the right, fc_size == (k - i + 1) * wordsize + _bt.mark_block((HeapWord*)ffc, word_sz); + fc_size -= word_sz; + _bt.verify_not_unallocated((HeapWord*)ffc, ffc->size()); + _bt.verify_single_block((HeapWord*)fc, fc_size); + _bt.verify_single_block((HeapWord*)ffc, ffc->size()); + // Push this on "fl". + fl->returnChunkAtHead(ffc); + } + // TRAP + assert(fl->tail()->next() == NULL, "List invariant."); + } + } + // Update birth stats for this block size. + size_t num = fl->count(); + MutexLockerEx x(_indexedFreeListParLocks[word_sz], + Mutex::_no_safepoint_check_flag); + ssize_t births = _indexedFreeList[word_sz].splitBirths() + num; + _indexedFreeList[word_sz].set_splitBirths(births); + return; + } } - k++; cur_sz = k * word_sz; } // Otherwise, we'll split a block from the dictionary. FreeChunk* fc = NULL; @@ -2723,17 +3065,20 @@ par_get_chunk_of_blocks(size_t word_sz, size_t n, FreeList* fl) { } } if (fc == NULL) return; + assert((ssize_t)n >= 1, "Control point invariant"); // Otherwise, split up that block. - size_t nn = fc->size() / word_sz; + const size_t nn = fc->size() / word_sz; n = MIN2(nn, n); + assert((ssize_t)n >= 1, "Control point invariant"); rem = fc->size() - n * word_sz; // If there is a remainder, and it's too small, allocate one fewer. if (rem > 0 && rem < MinChunkSize) { n--; rem += word_sz; } + assert((ssize_t)n >= 1, "Control point invariant"); // First return the remainder, if any. // Note that we hold the lock until we decide if we're going to give - // back the remainder to the dictionary, since a contending allocator + // back the remainder to the dictionary, since a concurrent allocation // may otherwise see the heap as empty. (We're willing to take that // hit if the block is a small block.) if (rem > 0) { @@ -2743,18 +3088,16 @@ par_get_chunk_of_blocks(size_t word_sz, size_t n, FreeList* fl) { rem_fc->linkNext(NULL); rem_fc->linkPrev(NULL); // Mark as a free block for other (parallel) GC threads. // Above must occur before BOT is updated below. + assert((ssize_t)n > 0 && prefix_size > 0 && rem_fc > fc, "Error"); _bt.split_block((HeapWord*)fc, fc->size(), prefix_size); if (rem >= IndexSetSize) { returnChunkToDictionary(rem_fc); - dictionary()->dictCensusUpdate(fc->size(), - true /*split*/, - true /*birth*/); + dictionary()->dictCensusUpdate(rem, true /*split*/, true /*birth*/); rem_fc = NULL; } // Otherwise, return it to the small list below. } } - // if (rem_fc != NULL) { MutexLockerEx x(_indexedFreeListParLocks[rem], Mutex::_no_safepoint_check_flag); @@ -2762,7 +3105,7 @@ par_get_chunk_of_blocks(size_t word_sz, size_t n, FreeList* fl) { _indexedFreeList[rem].returnChunkAtHead(rem_fc); smallSplitBirth(rem); } - + assert((ssize_t)n > 0 && fc != NULL, "Consistency"); // Now do the splitting up. // Must do this in reverse order, so that anybody attempting to // access the main chunk sees it as a single free block until we @@ -2792,13 +3135,15 @@ par_get_chunk_of_blocks(size_t word_sz, size_t n, FreeList* fl) { _bt.verify_single_block((HeapWord*)fc, fc->size()); fl->returnChunkAtHead(fc); + assert((ssize_t)n > 0 && (ssize_t)n == fl->count(), "Incorrect number of blocks"); { + // Update the stats for this block size. MutexLockerEx x(_indexedFreeListParLocks[word_sz], Mutex::_no_safepoint_check_flag); - ssize_t new_births = _indexedFreeList[word_sz].splitBirths() + n; - _indexedFreeList[word_sz].set_splitBirths(new_births); - ssize_t new_surplus = _indexedFreeList[word_sz].surplus() + n; - _indexedFreeList[word_sz].set_surplus(new_surplus); + const ssize_t births = _indexedFreeList[word_sz].splitBirths() + n; + _indexedFreeList[word_sz].set_splitBirths(births); + // ssize_t new_surplus = _indexedFreeList[word_sz].surplus() + n; + // _indexedFreeList[word_sz].set_surplus(new_surplus); } // TRAP diff --git a/hotspot/src/share/vm/gc_implementation/concurrentMarkSweep/compactibleFreeListSpace.hpp b/hotspot/src/share/vm/gc_implementation/concurrentMarkSweep/compactibleFreeListSpace.hpp index 9f16f8d2eb0..d937de86156 100644 --- a/hotspot/src/share/vm/gc_implementation/concurrentMarkSweep/compactibleFreeListSpace.hpp +++ b/hotspot/src/share/vm/gc_implementation/concurrentMarkSweep/compactibleFreeListSpace.hpp @@ -25,8 +25,6 @@ // Classes in support of keeping track of promotions into a non-Contiguous // space, in this case a CompactibleFreeListSpace. -#define CFLS_LAB_REFILL_STATS 0 - // Forward declarations class CompactibleFreeListSpace; class BlkClosure; @@ -89,6 +87,9 @@ class SpoolBlock: public FreeChunk { displacedHdr = (markOop*)&displacedHdr; nextSpoolBlock = NULL; } + + void print_on(outputStream* st) const; + void print() const { print_on(gclog_or_tty); } }; class PromotionInfo VALUE_OBJ_CLASS_SPEC { @@ -121,7 +122,7 @@ class PromotionInfo VALUE_OBJ_CLASS_SPEC { return _promoHead == NULL; } void startTrackingPromotions(); - void stopTrackingPromotions(); + void stopTrackingPromotions(uint worker_id = 0); bool tracking() const { return _tracking; } void track(PromotedObject* trackOop); // keep track of a promoted oop // The following variant must be used when trackOop is not fully @@ -161,6 +162,9 @@ class PromotionInfo VALUE_OBJ_CLASS_SPEC { _nextIndex = 0; } + + void print_on(outputStream* st) const; + void print_statistics(uint worker_id) const; }; class LinearAllocBlock VALUE_OBJ_CLASS_SPEC { @@ -243,6 +247,7 @@ class CompactibleFreeListSpace: public CompactibleSpace { mutable Mutex _freelistLock; // locking verifier convenience function void assert_locked() const PRODUCT_RETURN; + void assert_locked(const Mutex* lock) const PRODUCT_RETURN; // Linear allocation blocks LinearAllocBlock _smallLinearAllocBlock; @@ -281,13 +286,6 @@ class CompactibleFreeListSpace: public CompactibleSpace { // Locks protecting the exact lists during par promotion allocation. Mutex* _indexedFreeListParLocks[IndexSetSize]; -#if CFLS_LAB_REFILL_STATS - // Some statistics. - jint _par_get_chunk_from_small; - jint _par_get_chunk_from_large; -#endif - - // Attempt to obtain up to "n" blocks of the size "word_sz" (which is // required to be smaller than "IndexSetSize".) If successful, // adds them to "fl", which is required to be an empty free list. @@ -320,7 +318,7 @@ class CompactibleFreeListSpace: public CompactibleSpace { // Helper function for getChunkFromIndexedFreeList. // Replenish the indexed free list for this "size". Do not take from an // underpopulated size. - FreeChunk* getChunkFromIndexedFreeListHelper(size_t size); + FreeChunk* getChunkFromIndexedFreeListHelper(size_t size, bool replenish = true); // Get a chunk from the indexed free list. If the indexed free list // does not have a free chunk, try to replenish the indexed free list @@ -430,10 +428,6 @@ class CompactibleFreeListSpace: public CompactibleSpace { void initialize_sequential_subtasks_for_marking(int n_threads, HeapWord* low = NULL); -#if CFLS_LAB_REFILL_STATS - void print_par_alloc_stats(); -#endif - // Space enquiries size_t used() const; size_t free() const; @@ -617,6 +611,12 @@ class CompactibleFreeListSpace: public CompactibleSpace { // Do some basic checks on the the free lists. void checkFreeListConsistency() const PRODUCT_RETURN; + // Printing support + void dump_at_safepoint_with_locks(CMSCollector* c, outputStream* st); + void print_indexed_free_lists(outputStream* st) const; + void print_dictionary_free_lists(outputStream* st) const; + void print_promo_info_blocks(outputStream* st) const; + NOT_PRODUCT ( void initializeIndexedFreeListArrayReturnedBytes(); size_t sumIndexedFreeListArrayReturnedBytes(); @@ -638,8 +638,9 @@ class CompactibleFreeListSpace: public CompactibleSpace { // Statistics functions // Initialize census for lists before the sweep. - void beginSweepFLCensus(float sweep_current, - float sweep_estimate); + void beginSweepFLCensus(float inter_sweep_current, + float inter_sweep_estimate, + float intra_sweep_estimate); // Set the surplus for each of the free lists. void setFLSurplus(); // Set the hint for each of the free lists. @@ -730,16 +731,17 @@ class CFLS_LAB : public CHeapObj { FreeList _indexedFreeList[CompactibleFreeListSpace::IndexSetSize]; // Initialized from a command-line arg. - size_t _blocks_to_claim; -#if CFLS_LAB_REFILL_STATS - // Some statistics. - int _refills; - int _blocksTaken; - static int _tot_refills; - static int _tot_blocksTaken; - static int _next_threshold; -#endif + // Allocation statistics in support of dynamic adjustment of + // #blocks to claim per get_from_global_pool() call below. + static AdaptiveWeightedAverage + _blocks_to_claim [CompactibleFreeListSpace::IndexSetSize]; + static size_t _global_num_blocks [CompactibleFreeListSpace::IndexSetSize]; + static int _global_num_workers[CompactibleFreeListSpace::IndexSetSize]; + size_t _num_blocks [CompactibleFreeListSpace::IndexSetSize]; + + // Internal work method + void get_from_global_pool(size_t word_sz, FreeList* fl); public: CFLS_LAB(CompactibleFreeListSpace* cfls); @@ -748,7 +750,12 @@ public: HeapWord* alloc(size_t word_sz); // Return any unused portions of the buffer to the global pool. - void retire(); + void retire(int tid); + + // Dynamic OldPLABSize sizing + static void compute_desired_plab_size(); + // When the settings are modified from default static initialization + static void modify_initialization(size_t n, unsigned wt); }; size_t PromotionInfo::refillSize() const { diff --git a/hotspot/src/share/vm/gc_implementation/concurrentMarkSweep/concurrentMarkSweepGeneration.cpp b/hotspot/src/share/vm/gc_implementation/concurrentMarkSweep/concurrentMarkSweepGeneration.cpp index 20bbd7abafd..1ec7696bdf7 100644 --- a/hotspot/src/share/vm/gc_implementation/concurrentMarkSweep/concurrentMarkSweepGeneration.cpp +++ b/hotspot/src/share/vm/gc_implementation/concurrentMarkSweep/concurrentMarkSweepGeneration.cpp @@ -253,7 +253,6 @@ void ConcurrentMarkSweepGeneration::init_initiating_occupancy(intx io, intx tr) } } - void ConcurrentMarkSweepGeneration::ref_processor_init() { assert(collector() != NULL, "no collector"); collector()->ref_processor_init(); @@ -341,6 +340,14 @@ CMSStats::CMSStats(ConcurrentMarkSweepGeneration* cms_gen, unsigned int alpha): _icms_duty_cycle = CMSIncrementalDutyCycle; } +double CMSStats::cms_free_adjustment_factor(size_t free) const { + // TBD: CR 6909490 + return 1.0; +} + +void CMSStats::adjust_cms_free_adjustment_factor(bool fail, size_t free) { +} + // If promotion failure handling is on use // the padded average size of the promotion for each // young generation collection. @@ -361,7 +368,11 @@ double CMSStats::time_until_cms_gen_full() const { // Adjust by the safety factor. double cms_free_dbl = (double)cms_free; - cms_free_dbl = cms_free_dbl * (100.0 - CMSIncrementalSafetyFactor) / 100.0; + double cms_adjustment = (100.0 - CMSIncrementalSafetyFactor)/100.0; + // Apply a further correction factor which tries to adjust + // for recent occurance of concurrent mode failures. + cms_adjustment = cms_adjustment * cms_free_adjustment_factor(cms_free); + cms_free_dbl = cms_free_dbl * cms_adjustment; if (PrintGCDetails && Verbose) { gclog_or_tty->print_cr("CMSStats::time_until_cms_gen_full: cms_free " @@ -395,6 +406,8 @@ double CMSStats::time_until_cms_start() const { // late. double work = cms_duration() + gc0_period(); double deadline = time_until_cms_gen_full(); + // If a concurrent mode failure occurred recently, we want to be + // more conservative and halve our expected time_until_cms_gen_full() if (work > deadline) { if (Verbose && PrintGCDetails) { gclog_or_tty->print( @@ -556,7 +569,8 @@ CMSCollector::CMSCollector(ConcurrentMarkSweepGeneration* cmsGen, _should_unload_classes(false), _concurrent_cycles_since_last_unload(0), _roots_scanning_options(0), - _sweep_estimate(CMS_SweepWeight, CMS_SweepPadding) + _inter_sweep_estimate(CMS_SweepWeight, CMS_SweepPadding), + _intra_sweep_estimate(CMS_SweepWeight, CMS_SweepPadding) { if (ExplicitGCInvokesConcurrentAndUnloadsClasses) { ExplicitGCInvokesConcurrent = true; @@ -773,7 +787,7 @@ CMSCollector::CMSCollector(ConcurrentMarkSweepGeneration* cmsGen, NOT_PRODUCT(_overflow_counter = CMSMarkStackOverflowInterval;) _gc_counters = new CollectorCounters("CMS", 1); _completed_initialization = true; - _sweep_timer.start(); // start of time + _inter_sweep_timer.start(); // start of time } const char* ConcurrentMarkSweepGeneration::name() const { @@ -900,6 +914,14 @@ bool ConcurrentMarkSweepGeneration::promotion_attempt_is_safe( return result; } +// At a promotion failure dump information on block layout in heap +// (cms old generation). +void ConcurrentMarkSweepGeneration::promotion_failure_occurred() { + if (CMSDumpAtPromotionFailure) { + cmsSpace()->dump_at_safepoint_with_locks(collector(), gclog_or_tty); + } +} + CompactibleSpace* ConcurrentMarkSweepGeneration::first_compaction_space() const { return _cmsSpace; @@ -1368,12 +1390,7 @@ void ConcurrentMarkSweepGeneration:: par_promote_alloc_done(int thread_num) { CMSParGCThreadState* ps = _par_gc_thread_states[thread_num]; - ps->lab.retire(); -#if CFLS_LAB_REFILL_STATS - if (thread_num == 0) { - _cmsSpace->print_par_alloc_stats(); - } -#endif + ps->lab.retire(thread_num); } void @@ -1974,11 +1991,14 @@ void CMSCollector::do_compaction_work(bool clear_all_soft_refs) { // We must adjust the allocation statistics being maintained // in the free list space. We do so by reading and clearing // the sweep timer and updating the block flux rate estimates below. - assert(_sweep_timer.is_active(), "We should never see the timer inactive"); - _sweep_timer.stop(); - // Note that we do not use this sample to update the _sweep_estimate. - _cmsGen->cmsSpace()->beginSweepFLCensus((float)(_sweep_timer.seconds()), - _sweep_estimate.padded_average()); + assert(!_intra_sweep_timer.is_active(), "_intra_sweep_timer should be inactive"); + if (_inter_sweep_timer.is_active()) { + _inter_sweep_timer.stop(); + // Note that we do not use this sample to update the _inter_sweep_estimate. + _cmsGen->cmsSpace()->beginSweepFLCensus((float)(_inter_sweep_timer.seconds()), + _inter_sweep_estimate.padded_average(), + _intra_sweep_estimate.padded_average()); + } GenMarkSweep::invoke_at_safepoint(_cmsGen->level(), ref_processor(), clear_all_soft_refs); @@ -2015,10 +2035,10 @@ void CMSCollector::do_compaction_work(bool clear_all_soft_refs) { } // Adjust the per-size allocation stats for the next epoch. - _cmsGen->cmsSpace()->endSweepFLCensus(sweepCount() /* fake */); - // Restart the "sweep timer" for next epoch. - _sweep_timer.reset(); - _sweep_timer.start(); + _cmsGen->cmsSpace()->endSweepFLCensus(sweep_count() /* fake */); + // Restart the "inter sweep timer" for the next epoch. + _inter_sweep_timer.reset(); + _inter_sweep_timer.start(); // Sample collection pause time and reset for collection interval. if (UseAdaptiveSizePolicy) { @@ -2676,7 +2696,7 @@ void ConcurrentMarkSweepGeneration::gc_epilogue(bool full) { // Also reset promotion tracking in par gc thread states. if (ParallelGCThreads > 0) { for (uint i = 0; i < ParallelGCThreads; i++) { - _par_gc_thread_states[i]->promo.stopTrackingPromotions(); + _par_gc_thread_states[i]->promo.stopTrackingPromotions(i); } } } @@ -2771,7 +2791,7 @@ class VerifyMarkedClosure: public BitMapClosure { bool do_bit(size_t offset) { HeapWord* addr = _marks->offsetToHeapWord(offset); if (!_marks->isMarked(addr)) { - oop(addr)->print(); + oop(addr)->print_on(gclog_or_tty); gclog_or_tty->print_cr(" ("INTPTR_FORMAT" should have been marked)", addr); _failed = true; } @@ -2820,7 +2840,7 @@ bool CMSCollector::verify_after_remark() { // Clear any marks from a previous round verification_mark_bm()->clear_all(); assert(verification_mark_stack()->isEmpty(), "markStack should be empty"); - assert(overflow_list_is_empty(), "overflow list should be empty"); + verify_work_stacks_empty(); GenCollectedHeap* gch = GenCollectedHeap::heap(); gch->ensure_parsability(false); // fill TLABs, but no need to retire them @@ -2893,8 +2913,8 @@ void CMSCollector::verify_after_remark_work_1() { verification_mark_bm()->iterate(&vcl); if (vcl.failed()) { gclog_or_tty->print("Verification failed"); - Universe::heap()->print(); - fatal(" ... aborting"); + Universe::heap()->print_on(gclog_or_tty); + fatal("CMS: failed marking verification after remark"); } } @@ -3314,7 +3334,7 @@ bool ConcurrentMarkSweepGeneration::grow_by(size_t bytes) { Universe::heap()->barrier_set()->resize_covered_region(mr); // Hmmmm... why doesn't CFLS::set_end verify locking? // This is quite ugly; FIX ME XXX - _cmsSpace->assert_locked(); + _cmsSpace->assert_locked(freelistLock()); _cmsSpace->set_end((HeapWord*)_virtual_space.high()); // update the space and generation capacity counters @@ -5868,9 +5888,9 @@ void CMSCollector::sweep(bool asynch) { check_correct_thread_executing(); verify_work_stacks_empty(); verify_overflow_empty(); - incrementSweepCount(); - _sweep_timer.stop(); - _sweep_estimate.sample(_sweep_timer.seconds()); + increment_sweep_count(); + _inter_sweep_timer.stop(); + _inter_sweep_estimate.sample(_inter_sweep_timer.seconds()); size_policy()->avg_cms_free_at_sweep()->sample(_cmsGen->free()); // PermGen verification support: If perm gen sweeping is disabled in @@ -5893,6 +5913,9 @@ void CMSCollector::sweep(bool asynch) { } } + assert(!_intra_sweep_timer.is_active(), "Should not be active"); + _intra_sweep_timer.reset(); + _intra_sweep_timer.start(); if (asynch) { TraceCPUTime tcpu(PrintGCDetails, true, gclog_or_tty); CMSPhaseAccounting pa(this, "sweep", !PrintGCDetails); @@ -5937,8 +5960,11 @@ void CMSCollector::sweep(bool asynch) { verify_work_stacks_empty(); verify_overflow_empty(); - _sweep_timer.reset(); - _sweep_timer.start(); + _intra_sweep_timer.stop(); + _intra_sweep_estimate.sample(_intra_sweep_timer.seconds()); + + _inter_sweep_timer.reset(); + _inter_sweep_timer.start(); update_time_of_last_gc(os::javaTimeMillis()); @@ -5981,11 +6007,11 @@ void CMSCollector::sweep(bool asynch) { // FIX ME!!! Looks like this belongs in CFLSpace, with // CMSGen merely delegating to it. void ConcurrentMarkSweepGeneration::setNearLargestChunk() { - double nearLargestPercent = 0.999; + double nearLargestPercent = FLSLargestBlockCoalesceProximity; HeapWord* minAddr = _cmsSpace->bottom(); HeapWord* largestAddr = (HeapWord*) _cmsSpace->dictionary()->findLargestDict(); - if (largestAddr == 0) { + if (largestAddr == NULL) { // The dictionary appears to be empty. In this case // try to coalesce at the end of the heap. largestAddr = _cmsSpace->end(); @@ -5993,6 +6019,13 @@ void ConcurrentMarkSweepGeneration::setNearLargestChunk() { size_t largestOffset = pointer_delta(largestAddr, minAddr); size_t nearLargestOffset = (size_t)((double)largestOffset * nearLargestPercent) - MinChunkSize; + if (PrintFLSStatistics != 0) { + gclog_or_tty->print_cr( + "CMS: Large Block: " PTR_FORMAT ";" + " Proximity: " PTR_FORMAT " -> " PTR_FORMAT, + largestAddr, + _cmsSpace->nearLargestChunk(), minAddr + nearLargestOffset); + } _cmsSpace->set_nearLargestChunk(minAddr + nearLargestOffset); } @@ -6072,9 +6105,11 @@ void CMSCollector::sweepWork(ConcurrentMarkSweepGeneration* gen, assert_lock_strong(gen->freelistLock()); assert_lock_strong(bitMapLock()); - assert(!_sweep_timer.is_active(), "Was switched off in an outer context"); - gen->cmsSpace()->beginSweepFLCensus((float)(_sweep_timer.seconds()), - _sweep_estimate.padded_average()); + assert(!_inter_sweep_timer.is_active(), "Was switched off in an outer context"); + assert(_intra_sweep_timer.is_active(), "Was switched on in an outer context"); + gen->cmsSpace()->beginSweepFLCensus((float)(_inter_sweep_timer.seconds()), + _inter_sweep_estimate.padded_average(), + _intra_sweep_estimate.padded_average()); gen->setNearLargestChunk(); { @@ -6087,7 +6122,7 @@ void CMSCollector::sweepWork(ConcurrentMarkSweepGeneration* gen, // end-of-sweep-census below will be off by a little bit. } gen->cmsSpace()->sweep_completed(); - gen->cmsSpace()->endSweepFLCensus(sweepCount()); + gen->cmsSpace()->endSweepFLCensus(sweep_count()); if (should_unload_classes()) { // unloaded classes this cycle, _concurrent_cycles_since_last_unload = 0; // ... reset count } else { // did not unload classes, diff --git a/hotspot/src/share/vm/gc_implementation/concurrentMarkSweep/concurrentMarkSweepGeneration.hpp b/hotspot/src/share/vm/gc_implementation/concurrentMarkSweep/concurrentMarkSweepGeneration.hpp index a58217faf93..18164a58b4e 100644 --- a/hotspot/src/share/vm/gc_implementation/concurrentMarkSweep/concurrentMarkSweepGeneration.hpp +++ b/hotspot/src/share/vm/gc_implementation/concurrentMarkSweep/concurrentMarkSweepGeneration.hpp @@ -355,6 +355,11 @@ class CMSStats VALUE_OBJ_CLASS_SPEC { unsigned int new_duty_cycle); unsigned int icms_update_duty_cycle_impl(); + // In support of adjusting of cms trigger ratios based on history + // of concurrent mode failure. + double cms_free_adjustment_factor(size_t free) const; + void adjust_cms_free_adjustment_factor(bool fail, size_t free); + public: CMSStats(ConcurrentMarkSweepGeneration* cms_gen, unsigned int alpha = CMSExpAvgFactor); @@ -570,8 +575,11 @@ class CMSCollector: public CHeapObj { // appropriately. void check_gc_time_limit(); // XXX Move these to CMSStats ??? FIX ME !!! - elapsedTimer _sweep_timer; - AdaptivePaddedAverage _sweep_estimate; + elapsedTimer _inter_sweep_timer; // time between sweeps + elapsedTimer _intra_sweep_timer; // time _in_ sweeps + // padded decaying average estimates of the above + AdaptivePaddedAverage _inter_sweep_estimate; + AdaptivePaddedAverage _intra_sweep_estimate; protected: ConcurrentMarkSweepGeneration* _cmsGen; // old gen (CMS) @@ -625,6 +633,7 @@ class CMSCollector: public CHeapObj { // . _collectorState <= Idling == post-sweep && pre-mark // . _collectorState in (Idling, Sweeping) == {initial,final}marking || // precleaning || abortablePrecleanb + public: enum CollectorState { Resizing = 0, Resetting = 1, @@ -636,6 +645,7 @@ class CMSCollector: public CHeapObj { FinalMarking = 7, Sweeping = 8 }; + protected: static CollectorState _collectorState; // State related to prologue/epilogue invocation for my generations @@ -655,7 +665,7 @@ class CMSCollector: public CHeapObj { int _numYields; size_t _numDirtyCards; - uint _sweepCount; + size_t _sweep_count; // number of full gc's since the last concurrent gc. uint _full_gcs_since_conc_gc; @@ -905,7 +915,7 @@ class CMSCollector: public CHeapObj { // Check that the currently executing thread is the expected // one (foreground collector or background collector). - void check_correct_thread_executing() PRODUCT_RETURN; + static void check_correct_thread_executing() PRODUCT_RETURN; // XXXPERM void print_statistics() PRODUCT_RETURN; bool is_cms_reachable(HeapWord* addr); @@ -930,8 +940,8 @@ class CMSCollector: public CHeapObj { static void set_foregroundGCShouldWait(bool v) { _foregroundGCShouldWait = v; } static bool foregroundGCIsActive() { return _foregroundGCIsActive; } static void set_foregroundGCIsActive(bool v) { _foregroundGCIsActive = v; } - uint sweepCount() const { return _sweepCount; } - void incrementSweepCount() { _sweepCount++; } + size_t sweep_count() const { return _sweep_count; } + void increment_sweep_count() { _sweep_count++; } // Timers/stats for gc scheduling and incremental mode pacing. CMSStats& stats() { return _stats; } @@ -1165,6 +1175,11 @@ class ConcurrentMarkSweepGeneration: public CardGeneration { virtual bool promotion_attempt_is_safe(size_t promotion_in_bytes, bool younger_handles_promotion_failure) const; + // Inform this (non-young) generation that a promotion failure was + // encountered during a collection of a younger generation that + // promotes into this generation. + virtual void promotion_failure_occurred(); + bool should_collect(bool full, size_t size, bool tlab); virtual bool should_concurrent_collect() const; virtual bool is_too_full() const; diff --git a/hotspot/src/share/vm/gc_implementation/concurrentMarkSweep/freeBlockDictionary.hpp b/hotspot/src/share/vm/gc_implementation/concurrentMarkSweep/freeBlockDictionary.hpp index 5f9c4f22632..1a454fe68f4 100644 --- a/hotspot/src/share/vm/gc_implementation/concurrentMarkSweep/freeBlockDictionary.hpp +++ b/hotspot/src/share/vm/gc_implementation/concurrentMarkSweep/freeBlockDictionary.hpp @@ -55,7 +55,8 @@ class FreeBlockDictionary: public CHeapObj { virtual void dictCensusUpdate(size_t size, bool split, bool birth) = 0; virtual bool coalDictOverPopulated(size_t size) = 0; virtual void beginSweepDictCensus(double coalSurplusPercent, - float sweep_current, float sweep_ewstimate) = 0; + float inter_sweep_current, float inter_sweep_estimate, + float intra__sweep_current) = 0; virtual void endSweepDictCensus(double splitSurplusPercent) = 0; virtual FreeChunk* findLargestDict() const = 0; // verify that the given chunk is in the dictionary. @@ -79,6 +80,7 @@ class FreeBlockDictionary: public CHeapObj { } virtual void printDictCensus() const = 0; + virtual void print_free_lists(outputStream* st) const = 0; virtual void verify() const = 0; diff --git a/hotspot/src/share/vm/gc_implementation/concurrentMarkSweep/freeChunk.cpp b/hotspot/src/share/vm/gc_implementation/concurrentMarkSweep/freeChunk.cpp index 494c090c6fb..e709c3af6ca 100644 --- a/hotspot/src/share/vm/gc_implementation/concurrentMarkSweep/freeChunk.cpp +++ b/hotspot/src/share/vm/gc_implementation/concurrentMarkSweep/freeChunk.cpp @@ -67,3 +67,8 @@ void FreeChunk::verifyList() const { } } #endif + +void FreeChunk::print_on(outputStream* st) { + st->print_cr("Next: " PTR_FORMAT " Prev: " PTR_FORMAT " %s", + next(), prev(), cantCoalesce() ? "[can't coalesce]" : ""); +} diff --git a/hotspot/src/share/vm/gc_implementation/concurrentMarkSweep/freeChunk.hpp b/hotspot/src/share/vm/gc_implementation/concurrentMarkSweep/freeChunk.hpp index 768614d7e2b..9e731e7ca35 100644 --- a/hotspot/src/share/vm/gc_implementation/concurrentMarkSweep/freeChunk.hpp +++ b/hotspot/src/share/vm/gc_implementation/concurrentMarkSweep/freeChunk.hpp @@ -129,6 +129,8 @@ class FreeChunk VALUE_OBJ_CLASS_SPEC { void verifyList() const PRODUCT_RETURN; void mangleAllocated(size_t size) PRODUCT_RETURN; void mangleFreed(size_t size) PRODUCT_RETURN; + + void print_on(outputStream* st); }; // Alignment helpers etc. diff --git a/hotspot/src/share/vm/gc_implementation/concurrentMarkSweep/freeList.cpp b/hotspot/src/share/vm/gc_implementation/concurrentMarkSweep/freeList.cpp index 79503deb81d..1ca1a4e5396 100644 --- a/hotspot/src/share/vm/gc_implementation/concurrentMarkSweep/freeList.cpp +++ b/hotspot/src/share/vm/gc_implementation/concurrentMarkSweep/freeList.cpp @@ -81,8 +81,8 @@ void FreeList::reset(size_t hint) { set_hint(hint); } -void FreeList::init_statistics() { - _allocation_stats.initialize(); +void FreeList::init_statistics(bool split_birth) { + _allocation_stats.initialize(split_birth); } FreeChunk* FreeList::getChunkAtHead() { @@ -292,14 +292,31 @@ bool FreeList::verifyChunkInFreeLists(FreeChunk* fc) const { } #ifndef PRODUCT +void FreeList::verify_stats() const { + // The +1 of the LH comparand is to allow some "looseness" in + // checking: we usually call this interface when adding a block + // and we'll subsequently update the stats; we cannot update the + // stats beforehand because in the case of the large-block BT + // dictionary for example, this might be the first block and + // in that case there would be no place that we could record + // the stats (which are kept in the block itself). + assert(_allocation_stats.prevSweep() + _allocation_stats.splitBirths() + 1 // Total Stock + 1 + >= _allocation_stats.splitDeaths() + (ssize_t)count(), "Conservation Principle"); +} + void FreeList::assert_proper_lock_protection_work() const { -#ifdef ASSERT - if (_protecting_lock != NULL && - SharedHeap::heap()->n_par_threads() > 0) { - // Should become an assert. - guarantee(_protecting_lock->owned_by_self(), "FreeList RACE DETECTED"); + assert(_protecting_lock != NULL, "Don't call this directly"); + assert(ParallelGCThreads > 0, "Don't call this directly"); + Thread* thr = Thread::current(); + if (thr->is_VM_thread() || thr->is_ConcurrentGC_thread()) { + // assert that we are holding the freelist lock + } else if (thr->is_GC_task_thread()) { + assert(_protecting_lock->owned_by_self(), "FreeList RACE DETECTED"); + } else if (thr->is_Java_thread()) { + assert(!SafepointSynchronize::is_at_safepoint(), "Should not be executing"); + } else { + ShouldNotReachHere(); // unaccounted thread type? } -#endif } #endif diff --git a/hotspot/src/share/vm/gc_implementation/concurrentMarkSweep/freeList.hpp b/hotspot/src/share/vm/gc_implementation/concurrentMarkSweep/freeList.hpp index 581317643c7..8dd1543ab64 100644 --- a/hotspot/src/share/vm/gc_implementation/concurrentMarkSweep/freeList.hpp +++ b/hotspot/src/share/vm/gc_implementation/concurrentMarkSweep/freeList.hpp @@ -35,18 +35,26 @@ class CompactibleFreeListSpace; // for that implementation. class Mutex; +class TreeList; class FreeList VALUE_OBJ_CLASS_SPEC { friend class CompactibleFreeListSpace; friend class VMStructs; - friend class printTreeCensusClosure; - FreeChunk* _head; // List of free chunks + friend class PrintTreeCensusClosure; + + protected: + TreeList* _parent; + TreeList* _left; + TreeList* _right; + + private: + FreeChunk* _head; // Head of list of free chunks FreeChunk* _tail; // Tail of list of free chunks - size_t _size; // Size in Heap words of each chunks + size_t _size; // Size in Heap words of each chunk ssize_t _count; // Number of entries in list size_t _hint; // next larger size list with a positive surplus - AllocationStats _allocation_stats; // statistics for smart allocation + AllocationStats _allocation_stats; // allocation-related statistics #ifdef ASSERT Mutex* _protecting_lock; @@ -63,9 +71,12 @@ class FreeList VALUE_OBJ_CLASS_SPEC { // Initialize the allocation statistics. protected: - void init_statistics(); + void init_statistics(bool split_birth = false); void set_count(ssize_t v) { _count = v;} - void increment_count() { _count++; } + void increment_count() { + _count++; + } + void decrement_count() { _count--; assert(_count >= 0, "Count should not be negative"); @@ -167,11 +178,13 @@ class FreeList VALUE_OBJ_CLASS_SPEC { _allocation_stats.set_desired(v); } void compute_desired(float inter_sweep_current, - float inter_sweep_estimate) { + float inter_sweep_estimate, + float intra_sweep_estimate) { assert_proper_lock_protection(); _allocation_stats.compute_desired(_count, inter_sweep_current, - inter_sweep_estimate); + inter_sweep_estimate, + intra_sweep_estimate); } ssize_t coalDesired() const { return _allocation_stats.coalDesired(); @@ -306,6 +319,9 @@ class FreeList VALUE_OBJ_CLASS_SPEC { // found. Return NULL if "fc" is not found. bool verifyChunkInFreeLists(FreeChunk* fc) const; + // Stats verification + void verify_stats() const PRODUCT_RETURN; + // Printing support static void print_labels_on(outputStream* st, const char* c); void print_on(outputStream* st, const char* c = NULL) const; diff --git a/hotspot/src/share/vm/gc_implementation/includeDB_gc_concurrentMarkSweep b/hotspot/src/share/vm/gc_implementation/includeDB_gc_concurrentMarkSweep index 7ae314990c8..c5a7a386b60 100644 --- a/hotspot/src/share/vm/gc_implementation/includeDB_gc_concurrentMarkSweep +++ b/hotspot/src/share/vm/gc_implementation/includeDB_gc_concurrentMarkSweep @@ -221,6 +221,7 @@ freeList.cpp freeList.hpp freeList.cpp globals.hpp freeList.cpp mutex.hpp freeList.cpp sharedHeap.hpp +freeList.cpp vmThread.hpp freeList.hpp allocationStats.hpp diff --git a/hotspot/src/share/vm/gc_implementation/includeDB_gc_serial b/hotspot/src/share/vm/gc_implementation/includeDB_gc_serial index 6fb42f95b6f..60e41874d43 100644 --- a/hotspot/src/share/vm/gc_implementation/includeDB_gc_serial +++ b/hotspot/src/share/vm/gc_implementation/includeDB_gc_serial @@ -71,6 +71,7 @@ gcUtil.cpp gcUtil.hpp gcUtil.hpp allocation.hpp gcUtil.hpp debug.hpp gcUtil.hpp globalDefinitions.hpp +gcUtil.hpp ostream.hpp gcUtil.hpp timer.hpp generationCounters.cpp generationCounters.hpp diff --git a/hotspot/src/share/vm/gc_implementation/parNew/parNewGeneration.cpp b/hotspot/src/share/vm/gc_implementation/parNew/parNewGeneration.cpp index 5acb923a056..07f759c9457 100644 --- a/hotspot/src/share/vm/gc_implementation/parNew/parNewGeneration.cpp +++ b/hotspot/src/share/vm/gc_implementation/parNew/parNewGeneration.cpp @@ -50,6 +50,7 @@ ParScanThreadState::ParScanThreadState(Space* to_space_, work_queue_set_, &term_), _is_alive_closure(gen_), _scan_weak_ref_closure(gen_, this), _keep_alive_closure(&_scan_weak_ref_closure), + _promotion_failure_size(0), _pushes(0), _pops(0), _steals(0), _steal_attempts(0), _term_attempts(0), _strong_roots_time(0.0), _term_time(0.0) { @@ -249,6 +250,16 @@ void ParScanThreadState::undo_alloc_in_to_space(HeapWord* obj, } } +void ParScanThreadState::print_and_clear_promotion_failure_size() { + if (_promotion_failure_size != 0) { + if (PrintPromotionFailure) { + gclog_or_tty->print(" (%d: promotion failure size = " SIZE_FORMAT ") ", + _thread_num, _promotion_failure_size); + } + _promotion_failure_size = 0; + } +} + class ParScanThreadStateSet: private ResourceArray { public: // Initializes states for the specified number of threads; @@ -260,11 +271,11 @@ public: GrowableArray** overflow_stacks_, size_t desired_plab_sz, ParallelTaskTerminator& term); - inline ParScanThreadState& thread_sate(int i); + inline ParScanThreadState& thread_state(int i); int pushes() { return _pushes; } int pops() { return _pops; } int steals() { return _steals; } - void reset(); + void reset(bool promotion_failed); void flush(); private: ParallelTaskTerminator& _term; @@ -295,22 +306,31 @@ ParScanThreadStateSet::ParScanThreadStateSet( } } -inline ParScanThreadState& ParScanThreadStateSet::thread_sate(int i) +inline ParScanThreadState& ParScanThreadStateSet::thread_state(int i) { assert(i >= 0 && i < length(), "sanity check!"); return ((ParScanThreadState*)_data)[i]; } -void ParScanThreadStateSet::reset() +void ParScanThreadStateSet::reset(bool promotion_failed) { _term.reset_for_reuse(); + if (promotion_failed) { + for (int i = 0; i < length(); ++i) { + thread_state(i).print_and_clear_promotion_failure_size(); + } + } } void ParScanThreadStateSet::flush() { + // Work in this loop should be kept as lightweight as + // possible since this might otherwise become a bottleneck + // to scaling. Should we add heavy-weight work into this + // loop, consider parallelizing the loop into the worker threads. for (int i = 0; i < length(); ++i) { - ParScanThreadState& par_scan_state = thread_sate(i); + ParScanThreadState& par_scan_state = thread_state(i); // Flush stats related to To-space PLAB activity and // retire the last buffer. @@ -362,6 +382,14 @@ void ParScanThreadStateSet::flush() } } } + if (UseConcMarkSweepGC && ParallelGCThreads > 0) { + // We need to call this even when ResizeOldPLAB is disabled + // so as to avoid breaking some asserts. While we may be able + // to avoid this by reorganizing the code a bit, I am loathe + // to do that unless we find cases where ergo leads to bad + // performance. + CFLS_LAB::compute_desired_plab_size(); + } } ParScanClosure::ParScanClosure(ParNewGeneration* g, @@ -475,7 +503,7 @@ void ParNewGenTask::work(int i) { Generation* old_gen = gch->next_gen(_gen); - ParScanThreadState& par_scan_state = _state_set->thread_sate(i); + ParScanThreadState& par_scan_state = _state_set->thread_state(i); par_scan_state.set_young_old_boundary(_young_old_boundary); par_scan_state.start_strong_roots(); @@ -659,7 +687,7 @@ void ParNewRefProcTaskProxy::work(int i) { ResourceMark rm; HandleMark hm; - ParScanThreadState& par_scan_state = _state_set.thread_sate(i); + ParScanThreadState& par_scan_state = _state_set.thread_state(i); par_scan_state.set_young_old_boundary(_young_old_boundary); _task.work(i, par_scan_state.is_alive_closure(), par_scan_state.keep_alive_closure(), @@ -693,7 +721,7 @@ void ParNewRefProcTaskExecutor::execute(ProcessTask& task) ParNewRefProcTaskProxy rp_task(task, _generation, *_generation.next_gen(), _generation.reserved().end(), _state_set); workers->run_task(&rp_task); - _state_set.reset(); + _state_set.reset(_generation.promotion_failed()); } void ParNewRefProcTaskExecutor::execute(EnqueueTask& task) @@ -813,7 +841,7 @@ void ParNewGeneration::collect(bool full, GenCollectedHeap::StrongRootsScope srs(gch); tsk.work(0); } - thread_state_set.reset(); + thread_state_set.reset(promotion_failed()); if (PAR_STATS_ENABLED && ParallelGCVerbose) { gclog_or_tty->print("Thread totals:\n" @@ -882,6 +910,8 @@ void ParNewGeneration::collect(bool full, swap_spaces(); // Make life simpler for CMS || rescan; see 6483690. from()->set_next_compaction_space(to()); gch->set_incremental_collection_will_fail(); + // Inform the next generation that a promotion failure occurred. + _next_gen->promotion_failure_occurred(); // Reset the PromotionFailureALot counters. NOT_PRODUCT(Universe::heap()->reset_promotion_should_fail();) @@ -1029,6 +1059,8 @@ oop ParNewGeneration::copy_to_survivor_space_avoiding_promotion_undo( new_obj = old; preserve_mark_if_necessary(old, m); + // Log the size of the maiden promotion failure + par_scan_state->log_promotion_failure(sz); } old->forward_to(new_obj); @@ -1150,6 +1182,8 @@ oop ParNewGeneration::copy_to_survivor_space_with_undo( failed_to_promote = true; preserve_mark_if_necessary(old, m); + // Log the size of the maiden promotion failure + par_scan_state->log_promotion_failure(sz); } } else { // Is in to-space; do copying ourselves. diff --git a/hotspot/src/share/vm/gc_implementation/parNew/parNewGeneration.hpp b/hotspot/src/share/vm/gc_implementation/parNew/parNewGeneration.hpp index 3e2ab80af2e..a8dee0bbca9 100644 --- a/hotspot/src/share/vm/gc_implementation/parNew/parNewGeneration.hpp +++ b/hotspot/src/share/vm/gc_implementation/parNew/parNewGeneration.hpp @@ -97,6 +97,9 @@ class ParScanThreadState { int _pushes, _pops, _steals, _steal_attempts, _term_attempts; int _overflow_pushes, _overflow_refills, _overflow_refill_objs; + // Stats for promotion failure + size_t _promotion_failure_size; + // Timing numbers. double _start; double _start_strong_roots; @@ -169,6 +172,15 @@ class ParScanThreadState { // Undo the most recent allocation ("obj", of "word_sz"). void undo_alloc_in_to_space(HeapWord* obj, size_t word_sz); + // Promotion failure stats + size_t promotion_failure_size() { return promotion_failure_size(); } + void log_promotion_failure(size_t sz) { + if (_promotion_failure_size == 0) { + _promotion_failure_size = sz; + } + } + void print_and_clear_promotion_failure_size(); + int pushes() { return _pushes; } int pops() { return _pops; } int steals() { return _steals; } diff --git a/hotspot/src/share/vm/gc_implementation/shared/allocationStats.hpp b/hotspot/src/share/vm/gc_implementation/shared/allocationStats.hpp index 4772f7c45bc..9358688a4c9 100644 --- a/hotspot/src/share/vm/gc_implementation/shared/allocationStats.hpp +++ b/hotspot/src/share/vm/gc_implementation/shared/allocationStats.hpp @@ -31,7 +31,7 @@ class AllocationStats VALUE_OBJ_CLASS_SPEC { // beginning of this sweep: // Count(end_last_sweep) - Count(start_this_sweep) // + splitBirths(between) - splitDeaths(between) - // The above number divided by the time since the start [END???] of the + // The above number divided by the time since the end of the // previous sweep gives us a time rate of demand for blocks // of this size. We compute a padded average of this rate as // our current estimate for the time rate of demand for blocks @@ -41,7 +41,7 @@ class AllocationStats VALUE_OBJ_CLASS_SPEC { // estimates. AdaptivePaddedAverage _demand_rate_estimate; - ssize_t _desired; // Estimate computed as described above + ssize_t _desired; // Demand stimate computed as described above ssize_t _coalDesired; // desired +/- small-percent for tuning coalescing ssize_t _surplus; // count - (desired +/- small-percent), @@ -53,9 +53,9 @@ class AllocationStats VALUE_OBJ_CLASS_SPEC { ssize_t _coalDeaths; // loss from coalescing ssize_t _splitBirths; // additional chunks from splitting ssize_t _splitDeaths; // loss from splitting - size_t _returnedBytes; // number of bytes returned to list. + size_t _returnedBytes; // number of bytes returned to list. public: - void initialize() { + void initialize(bool split_birth = false) { AdaptivePaddedAverage* dummy = new (&_demand_rate_estimate) AdaptivePaddedAverage(CMS_FLSWeight, CMS_FLSPadding); @@ -67,7 +67,7 @@ class AllocationStats VALUE_OBJ_CLASS_SPEC { _beforeSweep = 0; _coalBirths = 0; _coalDeaths = 0; - _splitBirths = 0; + _splitBirths = split_birth? 1 : 0; _splitDeaths = 0; _returnedBytes = 0; } @@ -75,10 +75,12 @@ class AllocationStats VALUE_OBJ_CLASS_SPEC { AllocationStats() { initialize(); } + // The rate estimate is in blocks per second. void compute_desired(size_t count, float inter_sweep_current, - float inter_sweep_estimate) { + float inter_sweep_estimate, + float intra_sweep_estimate) { // If the latest inter-sweep time is below our granularity // of measurement, we may call in here with // inter_sweep_current == 0. However, even for suitably small @@ -88,12 +90,31 @@ class AllocationStats VALUE_OBJ_CLASS_SPEC { // vulnerable to noisy glitches. In such cases, we // ignore the current sample and use currently available // historical estimates. + // XXX NEEDS TO BE FIXED + // assert(prevSweep() + splitBirths() >= splitDeaths() + (ssize_t)count, "Conservation Principle"); + // ^^^^^^^^^^^^^^^^^^^^^^^^^^^ ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + // "Total Stock" "Not used at this block size" if (inter_sweep_current > _threshold) { - ssize_t demand = prevSweep() - count + splitBirths() - splitDeaths(); + ssize_t demand = prevSweep() - (ssize_t)count + splitBirths() - splitDeaths(); + // XXX NEEDS TO BE FIXED + // assert(demand >= 0, "Demand should be non-negative"); + // Defensive: adjust for imprecision in event counting + if (demand < 0) { + demand = 0; + } + float old_rate = _demand_rate_estimate.padded_average(); float rate = ((float)demand)/inter_sweep_current; _demand_rate_estimate.sample(rate); - _desired = (ssize_t)(_demand_rate_estimate.padded_average() - *inter_sweep_estimate); + float new_rate = _demand_rate_estimate.padded_average(); + ssize_t old_desired = _desired; + _desired = (ssize_t)(new_rate * (inter_sweep_estimate + + CMSExtrapolateSweep + ? intra_sweep_estimate + : 0.0)); + if (PrintFLSStatistics > 1) { + gclog_or_tty->print_cr("demand: %d, old_rate: %f, current_rate: %f, new_rate: %f, old_desired: %d, new_desired: %d", + demand, old_rate, rate, new_rate, old_desired, _desired); + } } } diff --git a/hotspot/src/share/vm/gc_implementation/shared/gcUtil.cpp b/hotspot/src/share/vm/gc_implementation/shared/gcUtil.cpp index 9ae5e4a0d29..e18782aac55 100644 --- a/hotspot/src/share/vm/gc_implementation/shared/gcUtil.cpp +++ b/hotspot/src/share/vm/gc_implementation/shared/gcUtil.cpp @@ -52,11 +52,35 @@ void AdaptiveWeightedAverage::sample(float new_sample) { _last_sample = new_sample; } +void AdaptiveWeightedAverage::print() const { + print_on(tty); +} + +void AdaptiveWeightedAverage::print_on(outputStream* st) const { + guarantee(false, "NYI"); +} + +void AdaptivePaddedAverage::print() const { + print_on(tty); +} + +void AdaptivePaddedAverage::print_on(outputStream* st) const { + guarantee(false, "NYI"); +} + +void AdaptivePaddedNoZeroDevAverage::print() const { + print_on(tty); +} + +void AdaptivePaddedNoZeroDevAverage::print_on(outputStream* st) const { + guarantee(false, "NYI"); +} + void AdaptivePaddedAverage::sample(float new_sample) { - // Compute our parent classes sample information + // Compute new adaptive weighted average based on new sample. AdaptiveWeightedAverage::sample(new_sample); - // Now compute the deviation and the new padded sample + // Now update the deviation and the padded average. float new_avg = average(); float new_dev = compute_adaptive_average(fabsd(new_sample - new_avg), deviation()); diff --git a/hotspot/src/share/vm/gc_implementation/shared/gcUtil.hpp b/hotspot/src/share/vm/gc_implementation/shared/gcUtil.hpp index affc3e44597..1bb4fc9f852 100644 --- a/hotspot/src/share/vm/gc_implementation/shared/gcUtil.hpp +++ b/hotspot/src/share/vm/gc_implementation/shared/gcUtil.hpp @@ -54,8 +54,8 @@ class AdaptiveWeightedAverage : public CHeapObj { public: // Input weight must be between 0 and 100 - AdaptiveWeightedAverage(unsigned weight) : - _average(0.0), _sample_count(0), _weight(weight), _last_sample(0.0) { + AdaptiveWeightedAverage(unsigned weight, float avg = 0.0) : + _average(avg), _sample_count(0), _weight(weight), _last_sample(0.0) { } void clear() { @@ -64,6 +64,13 @@ class AdaptiveWeightedAverage : public CHeapObj { _last_sample = 0; } + // Useful for modifying static structures after startup. + void modify(size_t avg, unsigned wt, bool force = false) { + assert(force, "Are you sure you want to call this?"); + _average = (float)avg; + _weight = wt; + } + // Accessors float average() const { return _average; } unsigned weight() const { return _weight; } @@ -83,6 +90,10 @@ class AdaptiveWeightedAverage : public CHeapObj { // Convert to float and back to avoid integer overflow. return (size_t)exp_avg((float)avg, (float)sample, weight); } + + // Printing + void print_on(outputStream* st) const; + void print() const; }; @@ -129,6 +140,10 @@ class AdaptivePaddedAverage : public AdaptiveWeightedAverage { // Override void sample(float new_sample); + + // Printing + void print_on(outputStream* st) const; + void print() const; }; // A weighted average that includes a deviation from the average, @@ -146,7 +161,12 @@ public: AdaptivePaddedAverage(weight, padding) {} // Override void sample(float new_sample); + + // Printing + void print_on(outputStream* st) const; + void print() const; }; + // Use a least squares fit to a set of data to generate a linear // equation. // y = intercept + slope * x diff --git a/hotspot/src/share/vm/includeDB_gc_parallel b/hotspot/src/share/vm/includeDB_gc_parallel index 5f089b7d7f1..2d1c45a0c9b 100644 --- a/hotspot/src/share/vm/includeDB_gc_parallel +++ b/hotspot/src/share/vm/includeDB_gc_parallel @@ -21,6 +21,8 @@ // have any questions. // +arguments.cpp compactibleFreeListSpace.hpp + assembler_.cpp g1SATBCardTableModRefBS.hpp assembler_.cpp g1CollectedHeap.inline.hpp assembler_.cpp heapRegion.hpp diff --git a/hotspot/src/share/vm/memory/defNewGeneration.cpp b/hotspot/src/share/vm/memory/defNewGeneration.cpp index 7db8b9dea97..875cf00817b 100644 --- a/hotspot/src/share/vm/memory/defNewGeneration.cpp +++ b/hotspot/src/share/vm/memory/defNewGeneration.cpp @@ -609,7 +609,7 @@ void DefNewGeneration::collect(bool full, remove_forwarding_pointers(); if (PrintGCDetails) { - gclog_or_tty->print(" (promotion failed)"); + gclog_or_tty->print(" (promotion failed) "); } // Add to-space to the list of space to compact // when a promotion failure has occurred. In that @@ -620,6 +620,9 @@ void DefNewGeneration::collect(bool full, from()->set_next_compaction_space(to()); gch->set_incremental_collection_will_fail(); + // Inform the next generation that a promotion failure occurred. + _next_gen->promotion_failure_occurred(); + // Reset the PromotionFailureALot counters. NOT_PRODUCT(Universe::heap()->reset_promotion_should_fail();) } @@ -679,6 +682,11 @@ void DefNewGeneration::preserve_mark_if_necessary(oop obj, markOop m) { void DefNewGeneration::handle_promotion_failure(oop old) { preserve_mark_if_necessary(old, old->mark()); + if (!_promotion_failed && PrintPromotionFailure) { + gclog_or_tty->print(" (promotion failure size = " SIZE_FORMAT ") ", + old->size()); + } + // forward to self old->forward_to(old); _promotion_failed = true; diff --git a/hotspot/src/share/vm/memory/generation.hpp b/hotspot/src/share/vm/memory/generation.hpp index 985e9db1028..e39be059506 100644 --- a/hotspot/src/share/vm/memory/generation.hpp +++ b/hotspot/src/share/vm/memory/generation.hpp @@ -181,6 +181,12 @@ class Generation: public CHeapObj { virtual bool promotion_attempt_is_safe(size_t promotion_in_bytes, bool younger_handles_promotion_failure) const; + // For a non-young generation, this interface can be used to inform a + // generation that a promotion attempt into that generation failed. + // Typically used to enable diagnostic output for post-mortem analysis, + // but other uses of the interface are not ruled out. + virtual void promotion_failure_occurred() { /* does nothing */ } + // Return an estimate of the maximum allocation that could be performed // in the generation without triggering any collection or expansion // activity. It is "unsafe" because no locks are taken; the result diff --git a/hotspot/src/share/vm/runtime/arguments.cpp b/hotspot/src/share/vm/runtime/arguments.cpp index ae82b8def40..8defca2d547 100644 --- a/hotspot/src/share/vm/runtime/arguments.cpp +++ b/hotspot/src/share/vm/runtime/arguments.cpp @@ -948,6 +948,7 @@ static void no_shared_spaces() { } } +#ifndef KERNEL // If the user has chosen ParallelGCThreads > 0, we set UseParNewGC // if it's not explictly set or unset. If the user has chosen // UseParNewGC and not explicitly set ParallelGCThreads we @@ -1177,8 +1178,7 @@ void Arguments::set_cms_and_parnew_gc_flags() { // the value (either from the command line or ergonomics) of // OldPLABSize. Following OldPLABSize is an ergonomics decision. FLAG_SET_ERGO(uintx, CMSParPromoteBlocksToClaim, OldPLABSize); - } - else { + } else { // OldPLABSize and CMSParPromoteBlocksToClaim are both set. // CMSParPromoteBlocksToClaim is a collector-specific flag, so // we'll let it to take precedence. @@ -1188,7 +1188,23 @@ void Arguments::set_cms_and_parnew_gc_flags() { " CMSParPromoteBlocksToClaim will take precedence.\n"); } } + if (!FLAG_IS_DEFAULT(ResizeOldPLAB) && !ResizeOldPLAB) { + // OldPLAB sizing manually turned off: Use a larger default setting, + // unless it was manually specified. This is because a too-low value + // will slow down scavenges. + if (FLAG_IS_DEFAULT(CMSParPromoteBlocksToClaim)) { + FLAG_SET_ERGO(uintx, CMSParPromoteBlocksToClaim, 50); // default value before 6631166 + } + } + // Overwrite OldPLABSize which is the variable we will internally use everywhere. + FLAG_SET_ERGO(uintx, OldPLABSize, CMSParPromoteBlocksToClaim); + // If either of the static initialization defaults have changed, note this + // modification. + if (!FLAG_IS_DEFAULT(CMSParPromoteBlocksToClaim) || !FLAG_IS_DEFAULT(OldPLABWeight)) { + CFLS_LAB::modify_initialization(OldPLABSize, OldPLABWeight); + } } +#endif // KERNEL inline uintx max_heap_for_compressed_oops() { LP64_ONLY(return oopDesc::OopEncodingHeapMax - MaxPermSize - os::vm_page_size()); @@ -2370,22 +2386,25 @@ SOLARIS_ONLY( "ExtendedDTraceProbes flag is only applicable on Solaris\n"); return JNI_EINVAL; #endif // ndef SOLARIS - } else #ifdef ASSERT - if (match_option(option, "-XX:+FullGCALot", &tail)) { + } else if (match_option(option, "-XX:+FullGCALot", &tail)) { FLAG_SET_CMDLINE(bool, FullGCALot, true); // disable scavenge before parallel mark-compact FLAG_SET_CMDLINE(bool, ScavengeBeforeFullGC, false); - } else #endif - if (match_option(option, "-XX:ParCMSPromoteBlocksToClaim=", &tail)) { + } else if (match_option(option, "-XX:CMSParPromoteBlocksToClaim=", &tail)) { julong cms_blocks_to_claim = (julong)atol(tail); FLAG_SET_CMDLINE(uintx, CMSParPromoteBlocksToClaim, cms_blocks_to_claim); jio_fprintf(defaultStream::error_stream(), - "Please use -XX:CMSParPromoteBlocksToClaim in place of " + "Please use -XX:OldPLABSize in place of " + "-XX:CMSParPromoteBlocksToClaim in the future\n"); + } else if (match_option(option, "-XX:ParCMSPromoteBlocksToClaim=", &tail)) { + julong cms_blocks_to_claim = (julong)atol(tail); + FLAG_SET_CMDLINE(uintx, CMSParPromoteBlocksToClaim, cms_blocks_to_claim); + jio_fprintf(defaultStream::error_stream(), + "Please use -XX:OldPLABSize in place of " "-XX:ParCMSPromoteBlocksToClaim in the future\n"); - } else - if (match_option(option, "-XX:ParallelGCOldGenAllocBufferSize=", &tail)) { + } else if (match_option(option, "-XX:ParallelGCOldGenAllocBufferSize=", &tail)) { julong old_plab_size = 0; ArgsRange errcode = parse_memory_size(tail, &old_plab_size, 1); if (errcode != arg_in_range) { @@ -2398,8 +2417,7 @@ SOLARIS_ONLY( jio_fprintf(defaultStream::error_stream(), "Please use -XX:OldPLABSize in place of " "-XX:ParallelGCOldGenAllocBufferSize in the future\n"); - } else - if (match_option(option, "-XX:ParallelGCToSpaceAllocBufferSize=", &tail)) { + } else if (match_option(option, "-XX:ParallelGCToSpaceAllocBufferSize=", &tail)) { julong young_plab_size = 0; ArgsRange errcode = parse_memory_size(tail, &young_plab_size, 1); if (errcode != arg_in_range) { @@ -2412,8 +2430,7 @@ SOLARIS_ONLY( jio_fprintf(defaultStream::error_stream(), "Please use -XX:YoungPLABSize in place of " "-XX:ParallelGCToSpaceAllocBufferSize in the future\n"); - } else - if (match_option(option, "-XX:", &tail)) { // -XX:xxxx + } else if (match_option(option, "-XX:", &tail)) { // -XX:xxxx // Skip -XX:Flags= since that case has already been handled if (strncmp(tail, "Flags=", strlen("Flags=")) != 0) { if (!process_argument(tail, args->ignoreUnrecognized, origin)) { @@ -2727,6 +2744,7 @@ jint Arguments::parse(const JavaVMInitArgs* args) { return JNI_EINVAL; } +#ifndef KERNEL if (UseConcMarkSweepGC) { // Set flags for CMS and ParNew. Check UseConcMarkSweep first // to ensure that when both UseConcMarkSweepGC and UseParNewGC @@ -2744,6 +2762,7 @@ jint Arguments::parse(const JavaVMInitArgs* args) { set_g1_gc_flags(); } } +#endif // KERNEL #ifdef SERIALGC assert(verify_serial_gc_flags(), "SerialGC unset"); diff --git a/hotspot/src/share/vm/runtime/globals.hpp b/hotspot/src/share/vm/runtime/globals.hpp index 62dceadc215..536429bdd75 100644 --- a/hotspot/src/share/vm/runtime/globals.hpp +++ b/hotspot/src/share/vm/runtime/globals.hpp @@ -1355,10 +1355,46 @@ class CommandLineFlags { product(uintx, ParGCDesiredObjsFromOverflowList, 20, \ "The desired number of objects to claim from the overflow list") \ \ - product(uintx, CMSParPromoteBlocksToClaim, 50, \ + product(uintx, CMSParPromoteBlocksToClaim, 16, \ "Number of blocks to attempt to claim when refilling CMS LAB for "\ "parallel GC.") \ \ + product(uintx, OldPLABWeight, 50, \ + "Percentage (0-100) used to weight the current sample when" \ + "computing exponentially decaying average for resizing CMSParPromoteBlocksToClaim.") \ + \ + product(bool, ResizeOldPLAB, true, \ + "Dynamically resize (old gen) promotion labs") \ + \ + product(bool, PrintOldPLAB, false, \ + "Print (old gen) promotion labs sizing decisions") \ + \ + product(uintx, CMSOldPLABMin, 16, \ + "Min size of CMS gen promotion lab caches per worker per blksize")\ + \ + product(uintx, CMSOldPLABMax, 1024, \ + "Max size of CMS gen promotion lab caches per worker per blksize")\ + \ + product(uintx, CMSOldPLABNumRefills, 4, \ + "Nominal number of refills of CMS gen promotion lab cache" \ + " per worker per block size") \ + \ + product(bool, CMSOldPLABResizeQuicker, false, \ + "Whether to react on-the-fly during a scavenge to a sudden" \ + " change in block demand rate") \ + \ + product(uintx, CMSOldPLABToleranceFactor, 4, \ + "The tolerance of the phase-change detector for on-the-fly" \ + " PLAB resizing during a scavenge") \ + \ + product(uintx, CMSOldPLABReactivityFactor, 2, \ + "The gain in the feedback loop for on-the-fly PLAB resizing" \ + " during a scavenge") \ + \ + product(uintx, CMSOldPLABReactivityCeiling, 10, \ + "The clamping of the gain in the feedback loop for on-the-fly" \ + " PLAB resizing during a scavenge") \ + \ product(bool, AlwaysPreTouch, false, \ "It forces all freshly committed pages to be pre-touched.") \ \ @@ -1400,27 +1436,54 @@ class CommandLineFlags { "Percentage (0-100) by which the CMS incremental mode duty cycle" \ " is shifted to the right within the period between young GCs") \ \ - product(uintx, CMSExpAvgFactor, 25, \ - "Percentage (0-100) used to weight the current sample when " \ - "computing exponential averages for CMS statistics") \ + product(uintx, CMSExpAvgFactor, 50, \ + "Percentage (0-100) used to weight the current sample when" \ + "computing exponential averages for CMS statistics.") \ \ - product(uintx, CMS_FLSWeight, 50, \ - "Percentage (0-100) used to weight the current sample when " \ - "computing exponentially decating averages for CMS FLS statistics") \ + product(uintx, CMS_FLSWeight, 75, \ + "Percentage (0-100) used to weight the current sample when" \ + "computing exponentially decating averages for CMS FLS statistics.") \ \ - product(uintx, CMS_FLSPadding, 2, \ - "The multiple of deviation from mean to use for buffering " \ + product(uintx, CMS_FLSPadding, 1, \ + "The multiple of deviation from mean to use for buffering" \ "against volatility in free list demand.") \ \ product(uintx, FLSCoalescePolicy, 2, \ "CMS: Aggression level for coalescing, increasing from 0 to 4") \ \ - product(uintx, CMS_SweepWeight, 50, \ + product(bool, FLSAlwaysCoalesceLarge, false, \ + "CMS: Larger free blocks are always available for coalescing") \ + \ + product(double, FLSLargestBlockCoalesceProximity, 0.99, \ + "CMS: the smaller the percentage the greater the coalition force")\ + \ + product(double, CMSSmallCoalSurplusPercent, 1.05, \ + "CMS: the factor by which to inflate estimated demand of small" \ + " block sizes to prevent coalescing with an adjoining block") \ + \ + product(double, CMSLargeCoalSurplusPercent, 0.95, \ + "CMS: the factor by which to inflate estimated demand of large" \ + " block sizes to prevent coalescing with an adjoining block") \ + \ + product(double, CMSSmallSplitSurplusPercent, 1.10, \ + "CMS: the factor by which to inflate estimated demand of small" \ + " block sizes to prevent splitting to supply demand for smaller" \ + " blocks") \ + \ + product(double, CMSLargeSplitSurplusPercent, 1.00, \ + "CMS: the factor by which to inflate estimated demand of large" \ + " block sizes to prevent splitting to supply demand for smaller" \ + " blocks") \ + \ + product(bool, CMSExtrapolateSweep, false, \ + "CMS: cushion for block demand during sweep") \ + \ + product(uintx, CMS_SweepWeight, 75, \ "Percentage (0-100) used to weight the current sample when " \ "computing exponentially decaying average for inter-sweep " \ "duration") \ \ - product(uintx, CMS_SweepPadding, 2, \ + product(uintx, CMS_SweepPadding, 1, \ "The multiple of deviation from mean to use for buffering " \ "against volatility in inter-sweep duration.") \ \ @@ -1459,6 +1522,13 @@ class CommandLineFlags { product(uintx, CMSIndexedFreeListReplenish, 4, \ "Replenish and indexed free list with this number of chunks") \ \ + product(bool, CMSReplenishIntermediate, true, \ + "Replenish all intermediate free-list caches") \ + \ + product(bool, CMSSplitIndexedFreeListBlocks, true, \ + "When satisfying batched demand, splot blocks from the " \ + "IndexedFreeList whose size is a multiple of requested size") \ + \ product(bool, CMSLoopWarn, false, \ "Warn in case of excessive CMS looping") \ \ @@ -1593,6 +1663,18 @@ class CommandLineFlags { "Bitmap operations should process at most this many bits" \ "between yields") \ \ + product(bool, CMSDumpAtPromotionFailure, false, \ + "Dump useful information about the state of the CMS old " \ + " generation upon a promotion failure.") \ + \ + product(bool, CMSPrintChunksInDump, false, \ + "In a dump enabled by CMSDumpAtPromotionFailure, include " \ + " more detailed information about the free chunks.") \ + \ + product(bool, CMSPrintObjectsInDump, false, \ + "In a dump enabled by CMSDumpAtPromotionFailure, include " \ + " more detailed information about the allocated objects.") \ + \ diagnostic(bool, FLSVerifyAllHeapReferences, false, \ "Verify that all refs across the FLS boundary " \ " are to valid objects") \ @@ -1677,6 +1759,10 @@ class CommandLineFlags { "The youngest generation collection does not require " \ "a guarantee of full promotion of all live objects.") \ \ + product(bool, PrintPromotionFailure, false, \ + "Print additional diagnostic information following " \ + " promotion failure") \ + \ notproduct(bool, PromotionFailureALot, false, \ "Use promotion failure handling on every youngest generation " \ "collection") \ diff --git a/hotspot/src/share/vm/services/classLoadingService.cpp b/hotspot/src/share/vm/services/classLoadingService.cpp index d8f10757907..9581f319942 100644 --- a/hotspot/src/share/vm/services/classLoadingService.cpp +++ b/hotspot/src/share/vm/services/classLoadingService.cpp @@ -128,7 +128,7 @@ void ClassLoadingService::notify_class_unloaded(instanceKlass* k) { if (TraceClassUnloading) { ResourceMark rm; - tty->print_cr("[Unloading class %s]", k->external_name()); + gclog_or_tty->print_cr("[Unloading class %s]", k->external_name()); } }