/* * Copyright 2001-2009 Sun Microsystems, Inc. All Rights Reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it * under the terms of the GNU General Public License version 2 only, as * published by the Free Software Foundation. * * This code is distributed in the hope that it will be useful, but WITHOUT * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License * version 2 for more details (a copy is included in the LICENSE file that * accompanied this code). * * You should have received a copy of the GNU General Public License version * 2 along with this work; if not, write to the Free Software Foundation, * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. * * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara, * CA 95054 USA or visit www.sun.com if you need additional information or * have any questions. * */ #include "incls/_precompiled.incl" #include "incls/_concurrentG1Refine.cpp.incl" // Possible sizes for the card counts cache: odd primes that roughly double in size. // (See jvmtiTagMap.cpp). int ConcurrentG1Refine::_cc_cache_sizes[] = { 16381, 32771, 76831, 150001, 307261, 614563, 1228891, 2457733, 4915219, 9830479, 19660831, 39321619, 78643219, 157286461, -1 }; ConcurrentG1Refine::ConcurrentG1Refine() : _card_counts(NULL), _card_epochs(NULL), _n_card_counts(0), _max_n_card_counts(0), _cache_size_index(0), _expand_card_counts(false), _hot_cache(NULL), _def_use_cache(false), _use_cache(false), _n_periods(0), _threads(NULL), _n_threads(0) { // Ergomonically select initial concurrent refinement parameters if (FLAG_IS_DEFAULT(G1ConcRefineGreenZone)) { FLAG_SET_DEFAULT(G1ConcRefineGreenZone, MAX2(ParallelGCThreads, 1)); } set_green_zone(G1ConcRefineGreenZone); if (FLAG_IS_DEFAULT(G1ConcRefineYellowZone)) { FLAG_SET_DEFAULT(G1ConcRefineYellowZone, green_zone() * 3); } set_yellow_zone(MAX2(G1ConcRefineYellowZone, green_zone())); if (FLAG_IS_DEFAULT(G1ConcRefineRedZone)) { FLAG_SET_DEFAULT(G1ConcRefineRedZone, yellow_zone() * 2); } set_red_zone(MAX2(G1ConcRefineRedZone, yellow_zone())); _n_worker_threads = thread_num(); // We need one extra thread to do the young gen rset size sampling. _n_threads = _n_worker_threads + 1; reset_threshold_step(); _threads = NEW_C_HEAP_ARRAY(ConcurrentG1RefineThread*, _n_threads); int worker_id_offset = (int)DirtyCardQueueSet::num_par_ids(); ConcurrentG1RefineThread *next = NULL; for (int i = _n_threads - 1; i >= 0; i--) { ConcurrentG1RefineThread* t = new ConcurrentG1RefineThread(this, next, worker_id_offset, i); assert(t != NULL, "Conc refine should have been created"); assert(t->cg1r() == this, "Conc refine thread should refer to this"); _threads[i] = t; next = t; } } void ConcurrentG1Refine::reset_threshold_step() { if (FLAG_IS_DEFAULT(G1ConcRefineThresholdStep)) { _thread_threshold_step = (yellow_zone() - green_zone()) / (worker_thread_num() + 1); } else { _thread_threshold_step = G1ConcRefineThresholdStep; } } int ConcurrentG1Refine::thread_num() { return MAX2((G1ParallelRSetThreads > 0) ? G1ParallelRSetThreads : ParallelGCThreads, 1); } void ConcurrentG1Refine::init() { if (G1ConcRSLogCacheSize > 0) { _g1h = G1CollectedHeap::heap(); _max_n_card_counts = (unsigned) (_g1h->g1_reserved_obj_bytes() >> CardTableModRefBS::card_shift); size_t max_card_num = ((size_t)1 << (sizeof(unsigned)*BitsPerByte-1)) - 1; guarantee(_max_n_card_counts < max_card_num, "card_num representation"); int desired = _max_n_card_counts / InitialCacheFraction; for (_cache_size_index = 0; _cc_cache_sizes[_cache_size_index] >= 0; _cache_size_index++) { if (_cc_cache_sizes[_cache_size_index] >= desired) break; } _cache_size_index = MAX2(0, (_cache_size_index - 1)); int initial_size = _cc_cache_sizes[_cache_size_index]; if (initial_size < 0) initial_size = _max_n_card_counts; // Make sure we don't go bigger than we will ever need _n_card_counts = MIN2((unsigned) initial_size, _max_n_card_counts); _card_counts = NEW_C_HEAP_ARRAY(CardCountCacheEntry, _n_card_counts); _card_epochs = NEW_C_HEAP_ARRAY(CardEpochCacheEntry, _n_card_counts); Copy::fill_to_bytes(&_card_counts[0], _n_card_counts * sizeof(CardCountCacheEntry)); Copy::fill_to_bytes(&_card_epochs[0], _n_card_counts * sizeof(CardEpochCacheEntry)); ModRefBarrierSet* bs = _g1h->mr_bs(); guarantee(bs->is_a(BarrierSet::CardTableModRef), "Precondition"); _ct_bs = (CardTableModRefBS*)bs; _ct_bot = _ct_bs->byte_for_const(_g1h->reserved_region().start()); _def_use_cache = true; _use_cache = true; _hot_cache_size = (1 << G1ConcRSLogCacheSize); _hot_cache = NEW_C_HEAP_ARRAY(jbyte*, _hot_cache_size); _n_hot = 0; _hot_cache_idx = 0; // For refining the cards in the hot cache in parallel int n_workers = (ParallelGCThreads > 0 ? _g1h->workers()->total_workers() : 1); _hot_cache_par_chunk_size = MAX2(1, _hot_cache_size / n_workers); _hot_cache_par_claimed_idx = 0; } } void ConcurrentG1Refine::stop() { if (_threads != NULL) { for (int i = 0; i < _n_threads; i++) { _threads[i]->stop(); } } } void ConcurrentG1Refine::reinitialize_threads() { reset_threshold_step(); if (_threads != NULL) { for (int i = 0; i < _n_threads; i++) { _threads[i]->initialize(); } } } ConcurrentG1Refine::~ConcurrentG1Refine() { if (G1ConcRSLogCacheSize > 0) { assert(_card_counts != NULL, "Logic"); FREE_C_HEAP_ARRAY(CardCountCacheEntry, _card_counts); assert(_card_epochs != NULL, "Logic"); FREE_C_HEAP_ARRAY(CardEpochCacheEntry, _card_epochs); assert(_hot_cache != NULL, "Logic"); FREE_C_HEAP_ARRAY(jbyte*, _hot_cache); } if (_threads != NULL) { for (int i = 0; i < _n_threads; i++) { delete _threads[i]; } FREE_C_HEAP_ARRAY(ConcurrentG1RefineThread*, _threads); } } void ConcurrentG1Refine::threads_do(ThreadClosure *tc) { if (_threads != NULL) { for (int i = 0; i < _n_threads; i++) { tc->do_thread(_threads[i]); } } } bool ConcurrentG1Refine::is_young_card(jbyte* card_ptr) { HeapWord* start = _ct_bs->addr_for(card_ptr); HeapRegion* r = _g1h->heap_region_containing(start); if (r != NULL && r->is_young()) { return true; } // This card is not associated with a heap region // so can't be young. return false; } jbyte* ConcurrentG1Refine::add_card_count(jbyte* card_ptr, int* count, bool* defer) { unsigned new_card_num = ptr_2_card_num(card_ptr); unsigned bucket = hash(new_card_num); assert(0 <= bucket && bucket < _n_card_counts, "Bounds"); CardCountCacheEntry* count_ptr = &_card_counts[bucket]; CardEpochCacheEntry* epoch_ptr = &_card_epochs[bucket]; // We have to construct a new entry if we haven't updated the counts // during the current period, or if the count was updated for a // different card number. unsigned int new_epoch = (unsigned int) _n_periods; julong new_epoch_entry = make_epoch_entry(new_card_num, new_epoch); while (true) { // Fetch the previous epoch value julong prev_epoch_entry = epoch_ptr->_value; julong cas_res; if (extract_epoch(prev_epoch_entry) != new_epoch) { // This entry has not yet been updated during this period. // Note: we update the epoch value atomically to ensure // that there is only one winner that updates the cached // card_ptr value even though all the refine threads share // the same epoch value. cas_res = (julong) Atomic::cmpxchg((jlong) new_epoch_entry, (volatile jlong*)&epoch_ptr->_value, (jlong) prev_epoch_entry); if (cas_res == prev_epoch_entry) { // We have successfully won the race to update the // epoch and card_num value. Make it look like the // count and eviction count were previously cleared. count_ptr->_count = 1; count_ptr->_evict_count = 0; *count = 0; // We can defer the processing of card_ptr *defer = true; return card_ptr; } // We did not win the race to update the epoch field, so some other // thread must have done it. The value that gets returned by CAS // should be the new epoch value. assert(extract_epoch(cas_res) == new_epoch, "unexpected epoch"); // We could 'continue' here or just re-read the previous epoch value prev_epoch_entry = epoch_ptr->_value; } // The epoch entry for card_ptr has been updated during this period. unsigned old_card_num = extract_card_num(prev_epoch_entry); // The card count that will be returned to caller *count = count_ptr->_count; // Are we updating the count for the same card? if (new_card_num == old_card_num) { // Same card - just update the count. We could have more than one // thread racing to update count for the current card. It should be // OK not to use a CAS as the only penalty should be some missed // increments of the count which delays identifying the card as "hot". if (*count < max_jubyte) count_ptr->_count++; // We can defer the processing of card_ptr *defer = true; return card_ptr; } // Different card - evict old card info if (count_ptr->_evict_count < max_jubyte) count_ptr->_evict_count++; if (count_ptr->_evict_count > G1CardCountCacheExpandThreshold) { // Trigger a resize the next time we clear _expand_card_counts = true; } cas_res = (julong) Atomic::cmpxchg((jlong) new_epoch_entry, (volatile jlong*)&epoch_ptr->_value, (jlong) prev_epoch_entry); if (cas_res == prev_epoch_entry) { // We successfully updated the card num value in the epoch entry count_ptr->_count = 0; // initialize counter for new card num // Even though the region containg the card at old_card_num was not // in the young list when old_card_num was recorded in the epoch // cache it could have been added to the free list and subsequently // added to the young list in the intervening time. If the evicted // card is in a young region just return the card_ptr and the evicted // card will not be cleaned. See CR 6817995. jbyte* old_card_ptr = card_num_2_ptr(old_card_num); if (is_young_card(old_card_ptr)) { *count = 0; // We can defer the processing of card_ptr *defer = true; return card_ptr; } // We do not want to defer processing of card_ptr in this case // (we need to refine old_card_ptr and card_ptr) *defer = false; return old_card_ptr; } // Someone else beat us - try again. } } jbyte* ConcurrentG1Refine::cache_insert(jbyte* card_ptr, bool* defer) { int count; jbyte* cached_ptr = add_card_count(card_ptr, &count, defer); assert(cached_ptr != NULL, "bad cached card ptr"); if (is_young_card(cached_ptr)) { // The region containing cached_ptr has been freed during a clean up // pause, reallocated, and tagged as young. assert(cached_ptr != card_ptr, "shouldn't be"); // We've just inserted a new old-gen card pointer into the card count // cache and evicted the previous contents of that count slot. // The evicted card pointer has been determined to be in a young region // and so cannot be the newly inserted card pointer (that will be // in an old region). // The count for newly inserted card will be set to zero during the // insertion, so we don't want to defer the cleaning of the newly // inserted card pointer. assert(*defer == false, "deferring non-hot card"); return NULL; } // The card pointer we obtained from card count cache is not hot // so do not store it in the cache; return it for immediate // refining. if (count < G1ConcRSHotCardLimit) { return cached_ptr; } // Otherwise, the pointer we got from the _card_counts is hot. jbyte* res = NULL; MutexLockerEx x(HotCardCache_lock, Mutex::_no_safepoint_check_flag); if (_n_hot == _hot_cache_size) { res = _hot_cache[_hot_cache_idx]; _n_hot--; } // Now _n_hot < _hot_cache_size, and we can insert at _hot_cache_idx. _hot_cache[_hot_cache_idx] = cached_ptr; _hot_cache_idx++; if (_hot_cache_idx == _hot_cache_size) _hot_cache_idx = 0; _n_hot++; if (res != NULL) { // Even though the region containg res was not in the young list // when it was recorded in the hot cache it could have been added // to the free list and subsequently added to the young list in // the intervening time. If res is in a young region, return NULL // so that res is not cleaned. See CR 6817995. if (is_young_card(res)) { res = NULL; } } return res; } void ConcurrentG1Refine::clean_up_cache(int worker_i, G1RemSet* g1rs) { assert(!use_cache(), "cache should be disabled"); int start_idx; while ((start_idx = _hot_cache_par_claimed_idx) < _n_hot) { // read once int end_idx = start_idx + _hot_cache_par_chunk_size; if (start_idx == Atomic::cmpxchg(end_idx, &_hot_cache_par_claimed_idx, start_idx)) { // The current worker has successfully claimed the chunk [start_idx..end_idx) end_idx = MIN2(end_idx, _n_hot); for (int i = start_idx; i < end_idx; i++) { jbyte* entry = _hot_cache[i]; if (entry != NULL) { g1rs->concurrentRefineOneCard(entry, worker_i); } } } } } void ConcurrentG1Refine::expand_card_count_cache() { if (_n_card_counts < _max_n_card_counts) { int new_idx = _cache_size_index+1; int new_size = _cc_cache_sizes[new_idx]; if (new_size < 0) new_size = _max_n_card_counts; // Make sure we don't go bigger than we will ever need new_size = MIN2((unsigned) new_size, _max_n_card_counts); // Expand the card count and card epoch tables if (new_size > (int)_n_card_counts) { // We can just free and allocate a new array as we're // not interested in preserving the contents assert(_card_counts != NULL, "Logic!"); assert(_card_epochs != NULL, "Logic!"); FREE_C_HEAP_ARRAY(CardCountCacheEntry, _card_counts); FREE_C_HEAP_ARRAY(CardEpochCacheEntry, _card_epochs); _n_card_counts = new_size; _card_counts = NEW_C_HEAP_ARRAY(CardCountCacheEntry, _n_card_counts); _card_epochs = NEW_C_HEAP_ARRAY(CardEpochCacheEntry, _n_card_counts); _cache_size_index = new_idx; } } } void ConcurrentG1Refine::clear_and_record_card_counts() { if (G1ConcRSLogCacheSize == 0) return; #ifndef PRODUCT double start = os::elapsedTime(); #endif if (_expand_card_counts) { expand_card_count_cache(); _expand_card_counts = false; // Only need to clear the epochs. Copy::fill_to_bytes(&_card_epochs[0], _n_card_counts * sizeof(CardEpochCacheEntry)); } int this_epoch = (int) _n_periods; assert((this_epoch+1) <= max_jint, "to many periods"); // Update epoch _n_periods++; #ifndef PRODUCT double elapsed = os::elapsedTime() - start; _g1h->g1_policy()->record_cc_clear_time(elapsed * 1000.0); #endif } void ConcurrentG1Refine::print_worker_threads_on(outputStream* st) const { for (int i = 0; i < _n_threads; ++i) { _threads[i]->print_on(st); st->cr(); } }