From e13e75547c10d085d70f13722a676cc408b9da88 Mon Sep 17 00:00:00 2001 From: Stefan Karlsson Date: Thu, 21 May 2015 09:10:47 +0200 Subject: [PATCH 01/26] 8080109: Use single-threaded code in Threads::possibly_parallel_oops_do when running with only one worker thread Reviewed-by: jmasa, kbarrett --- hotspot/src/share/vm/gc/g1/g1RootProcessor.cpp | 2 +- hotspot/src/share/vm/gc/shared/genCollectedHeap.cpp | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/hotspot/src/share/vm/gc/g1/g1RootProcessor.cpp b/hotspot/src/share/vm/gc/g1/g1RootProcessor.cpp index 2c2c68be549..48f9b094adf 100644 --- a/hotspot/src/share/vm/gc/g1/g1RootProcessor.cpp +++ b/hotspot/src/share/vm/gc/g1/g1RootProcessor.cpp @@ -253,7 +253,7 @@ void G1RootProcessor::process_java_roots(OopClosure* strong_roots, { G1GCParPhaseTimesTracker x(phase_times, G1GCPhaseTimes::ThreadRoots, worker_i); - bool is_par = _g1h->n_par_threads() > 0; + bool is_par = _g1h->n_par_threads() > 1; Threads::possibly_parallel_oops_do(is_par, strong_roots, thread_stack_clds, strong_code); } } diff --git a/hotspot/src/share/vm/gc/shared/genCollectedHeap.cpp b/hotspot/src/share/vm/gc/shared/genCollectedHeap.cpp index 28a276d335b..bd64b4b70f3 100644 --- a/hotspot/src/share/vm/gc/shared/genCollectedHeap.cpp +++ b/hotspot/src/share/vm/gc/shared/genCollectedHeap.cpp @@ -609,7 +609,7 @@ void GenCollectedHeap::process_roots(bool activate_scope, // Only process code roots from thread stacks if we aren't visiting the entire CodeCache anyway CodeBlobClosure* roots_from_code_p = (so & SO_AllCodeCache) ? NULL : code_roots; - bool is_par = n_par_threads() > 0; + bool is_par = n_par_threads() > 1; Threads::possibly_parallel_oops_do(is_par, strong_roots, roots_from_clds_p, roots_from_code_p); if (!_process_strong_tasks->is_task_claimed(GCH_PS_Universe_oops_do)) { From c13872f88d535dab4f15c67019e01441aa95778d Mon Sep 17 00:00:00 2001 From: Stefan Karlsson Date: Thu, 21 May 2015 09:23:00 +0200 Subject: [PATCH 02/26] 8080110: Remove usage of CollectedHeap::n_par_threads() from root processing Reviewed-by: jmasa, kbarrett --- .../gc/cms/concurrentMarkSweepGeneration.cpp | 90 +++++++++++-------- .../src/share/vm/gc/cms/parNewGeneration.cpp | 39 ++++---- .../src/share/vm/gc/cms/parNewGeneration.hpp | 5 +- hotspot/src/share/vm/gc/g1/concurrentMark.cpp | 19 ++-- .../src/share/vm/gc/g1/g1CollectedHeap.cpp | 4 +- hotspot/src/share/vm/gc/g1/g1MarkSweep.cpp | 4 +- .../src/share/vm/gc/g1/g1RootProcessor.cpp | 22 +++-- .../src/share/vm/gc/g1/g1RootProcessor.hpp | 7 +- .../share/vm/gc/serial/defNewGeneration.cpp | 24 +++-- .../src/share/vm/gc/serial/genMarkSweep.cpp | 41 +++++---- .../share/vm/gc/shared/genCollectedHeap.cpp | 12 ++- .../share/vm/gc/shared/genCollectedHeap.hpp | 20 ++--- .../share/vm/gc/shared/strongRootsScope.cpp | 20 ++--- .../share/vm/gc/shared/strongRootsScope.hpp | 11 ++- 14 files changed, 176 insertions(+), 142 deletions(-) diff --git a/hotspot/src/share/vm/gc/cms/concurrentMarkSweepGeneration.cpp b/hotspot/src/share/vm/gc/cms/concurrentMarkSweepGeneration.cpp index cdb21c69435..9b4e024d20a 100644 --- a/hotspot/src/share/vm/gc/cms/concurrentMarkSweepGeneration.cpp +++ b/hotspot/src/share/vm/gc/cms/concurrentMarkSweepGeneration.cpp @@ -2428,14 +2428,18 @@ void CMSCollector::verify_after_remark_work_1() { MarkRefsIntoClosure notOlder(_span, verification_mark_bm()); gch->rem_set()->prepare_for_younger_refs_iterate(false); // Not parallel. - gch->gen_process_roots(_cmsGen->level(), - true, // younger gens are roots - true, // activate StrongRootsScope - GenCollectedHeap::ScanningOption(roots_scanning_options()), - should_unload_classes(), - ¬Older, - NULL, - NULL); // SSS: Provide correct closure + { + StrongRootsScope srs(1); + + gch->gen_process_roots(&srs, + _cmsGen->level(), + true, // younger gens are roots + GenCollectedHeap::ScanningOption(roots_scanning_options()), + should_unload_classes(), + ¬Older, + NULL, + NULL); + } // Now mark from the roots MarkFromRootsClosure markFromRootsClosure(this, _span, @@ -2496,14 +2500,18 @@ void CMSCollector::verify_after_remark_work_2() { gch->rem_set()->prepare_for_younger_refs_iterate(false); // Not parallel. - gch->gen_process_roots(_cmsGen->level(), - true, // younger gens are roots - true, // activate StrongRootsScope - GenCollectedHeap::ScanningOption(roots_scanning_options()), - should_unload_classes(), - ¬Older, - NULL, - &cld_closure); + { + StrongRootsScope srs(1); + + gch->gen_process_roots(&srs, + _cmsGen->level(), + true, // younger gens are roots + GenCollectedHeap::ScanningOption(roots_scanning_options()), + should_unload_classes(), + ¬Older, + NULL, + &cld_closure); + } // Now mark from the roots MarkFromRootsVerifyClosure markFromRootsClosure(this, _span, @@ -2913,10 +2921,11 @@ class CMSParMarkTask : public AbstractGangTask { // Parallel initial mark task class CMSParInitialMarkTask: public CMSParMarkTask { + StrongRootsScope* _strong_roots_scope; public: - CMSParInitialMarkTask(CMSCollector* collector, uint n_workers) : - CMSParMarkTask("Scan roots and young gen for initial mark in parallel", - collector, n_workers) {} + CMSParInitialMarkTask(CMSCollector* collector, StrongRootsScope* strong_roots_scope, uint n_workers) : + CMSParMarkTask("Scan roots and young gen for initial mark in parallel", collector, n_workers), + _strong_roots_scope(strong_roots_scope) {} void work(uint worker_id); }; @@ -3004,14 +3013,15 @@ void CMSCollector::checkpointRootsInitialWork() { FlexibleWorkGang* workers = gch->workers(); assert(workers != NULL, "Need parallel worker threads."); uint n_workers = workers->active_workers(); - CMSParInitialMarkTask tsk(this, n_workers); + + StrongRootsScope srs(n_workers); + + CMSParInitialMarkTask tsk(this, &srs, n_workers); gch->set_par_threads(n_workers); initialize_sequential_subtasks_for_young_gen_rescan(n_workers); if (n_workers > 1) { - StrongRootsScope srs; workers->run_task(&tsk); } else { - StrongRootsScope srs; tsk.work(0); } gch->set_par_threads(0); @@ -3019,9 +3029,12 @@ void CMSCollector::checkpointRootsInitialWork() { // The serial version. CLDToOopClosure cld_closure(¬Older, true); gch->rem_set()->prepare_for_younger_refs_iterate(false); // Not parallel. - gch->gen_process_roots(_cmsGen->level(), + + StrongRootsScope srs(1); + + gch->gen_process_roots(&srs, + _cmsGen->level(), true, // younger gens are roots - true, // activate StrongRootsScope GenCollectedHeap::ScanningOption(roots_scanning_options()), should_unload_classes(), ¬Older, @@ -4452,9 +4465,9 @@ void CMSParInitialMarkTask::work(uint worker_id) { CLDToOopClosure cld_closure(&par_mri_cl, true); - gch->gen_process_roots(_collector->_cmsGen->level(), + gch->gen_process_roots(_strong_roots_scope, + _collector->_cmsGen->level(), false, // yg was scanned above - false, // this is parallel code GenCollectedHeap::ScanningOption(_collector->CMSCollector::roots_scanning_options()), _collector->should_unload_classes(), &par_mri_cl, @@ -4478,6 +4491,7 @@ class CMSParRemarkTask: public CMSParMarkTask { // The per-thread work queues, available here for stealing. OopTaskQueueSet* _task_queues; ParallelTaskTerminator _term; + StrongRootsScope* _strong_roots_scope; public: // A value of 0 passed to n_workers will cause the number of @@ -4485,12 +4499,14 @@ class CMSParRemarkTask: public CMSParMarkTask { CMSParRemarkTask(CMSCollector* collector, CompactibleFreeListSpace* cms_space, uint n_workers, FlexibleWorkGang* workers, - OopTaskQueueSet* task_queues): + OopTaskQueueSet* task_queues, + StrongRootsScope* strong_roots_scope): CMSParMarkTask("Rescan roots and grey objects in parallel", collector, n_workers), _cms_space(cms_space), _task_queues(task_queues), - _term(n_workers, task_queues) { } + _term(n_workers, task_queues), + _strong_roots_scope(strong_roots_scope) { } OopTaskQueueSet* task_queues() { return _task_queues; } @@ -4588,9 +4604,9 @@ void CMSParRemarkTask::work(uint worker_id) { // ---------- remaining roots -------------- _timer.reset(); _timer.start(); - gch->gen_process_roots(_collector->_cmsGen->level(), + gch->gen_process_roots(_strong_roots_scope, + _collector->_cmsGen->level(), false, // yg was scanned above - false, // this is parallel code GenCollectedHeap::ScanningOption(_collector->CMSCollector::roots_scanning_options()), _collector->should_unload_classes(), &par_mrias_cl, @@ -5068,9 +5084,9 @@ void CMSCollector::do_remark_parallel() { } CompactibleFreeListSpace* cms_space = _cmsGen->cmsSpace(); - CMSParRemarkTask tsk(this, - cms_space, - n_workers, workers, task_queues()); + StrongRootsScope srs(n_workers); + + CMSParRemarkTask tsk(this, cms_space, n_workers, workers, task_queues(), &srs); // Set up for parallel process_roots work. gch->set_par_threads(n_workers); @@ -5105,11 +5121,9 @@ void CMSCollector::do_remark_parallel() { // necessarily be so, since it's possible that we are doing // ST marking. ReferenceProcessorMTDiscoveryMutator mt(ref_processor(), true); - StrongRootsScope srs; workers->run_task(&tsk); } else { ReferenceProcessorMTDiscoveryMutator mt(ref_processor(), false); - StrongRootsScope srs; tsk.work(0); } @@ -5177,11 +5191,11 @@ void CMSCollector::do_remark_non_parallel() { verify_work_stacks_empty(); gch->rem_set()->prepare_for_younger_refs_iterate(false); // Not parallel. - StrongRootsScope srs; + StrongRootsScope srs(1); - gch->gen_process_roots(_cmsGen->level(), + gch->gen_process_roots(&srs, + _cmsGen->level(), true, // younger gens as roots - false, // use the local StrongRootsScope GenCollectedHeap::ScanningOption(roots_scanning_options()), should_unload_classes(), &mrias_cl, diff --git a/hotspot/src/share/vm/gc/cms/parNewGeneration.cpp b/hotspot/src/share/vm/gc/cms/parNewGeneration.cpp index 3bed483338b..c29ae02e33b 100644 --- a/hotspot/src/share/vm/gc/cms/parNewGeneration.cpp +++ b/hotspot/src/share/vm/gc/cms/parNewGeneration.cpp @@ -567,11 +567,13 @@ void ParEvacuateFollowersClosure::do_void() { } ParNewGenTask::ParNewGenTask(ParNewGeneration* gen, Generation* old_gen, - HeapWord* young_old_boundary, ParScanThreadStateSet* state_set) : + HeapWord* young_old_boundary, ParScanThreadStateSet* state_set, + StrongRootsScope* strong_roots_scope) : AbstractGangTask("ParNewGeneration collection"), _gen(gen), _old_gen(old_gen), _young_old_boundary(young_old_boundary), - _state_set(state_set) + _state_set(state_set), + _strong_roots_scope(strong_roots_scope) {} // Reset the terminator for the given number of @@ -603,10 +605,10 @@ void ParNewGenTask::work(uint worker_id) { false); par_scan_state.start_strong_roots(); - gch->gen_process_roots(_gen->level(), + gch->gen_process_roots(_strong_roots_scope, + _gen->level(), true, // Process younger gens, if any, // as strong roots. - false, // no scope; this is parallel code GenCollectedHeap::SO_ScavengeCodeCache, GenCollectedHeap::StrongAndWeakRoots, &par_scan_state.to_space_root_closure(), @@ -952,20 +954,23 @@ void ParNewGeneration::collect(bool full, *to(), *this, *_old_gen, *task_queues(), _overflow_stacks, desired_plab_sz(), _term); - ParNewGenTask tsk(this, _old_gen, reserved().end(), &thread_state_set); - gch->set_par_threads(n_workers); - gch->rem_set()->prepare_for_younger_refs_iterate(true); - // It turns out that even when we're using 1 thread, doing the work in a - // separate thread causes wide variance in run times. We can't help this - // in the multi-threaded case, but we special-case n=1 here to get - // repeatable measurements of the 1-thread overhead of the parallel code. - if (n_workers > 1) { - StrongRootsScope srs; - workers->run_task(&tsk); - } else { - StrongRootsScope srs; - tsk.work(0); + { + StrongRootsScope srs(n_workers); + + ParNewGenTask tsk(this, _old_gen, reserved().end(), &thread_state_set, &srs); + gch->set_par_threads(n_workers); + gch->rem_set()->prepare_for_younger_refs_iterate(true); + // It turns out that even when we're using 1 thread, doing the work in a + // separate thread causes wide variance in run times. We can't help this + // in the multi-threaded case, but we special-case n=1 here to get + // repeatable measurements of the 1-thread overhead of the parallel code. + if (n_workers > 1) { + workers->run_task(&tsk); + } else { + tsk.work(0); + } } + thread_state_set.reset(0 /* Bad value in debug if not reset */, promotion_failed()); diff --git a/hotspot/src/share/vm/gc/cms/parNewGeneration.hpp b/hotspot/src/share/vm/gc/cms/parNewGeneration.hpp index 560d91f7c85..f4f91aa4223 100644 --- a/hotspot/src/share/vm/gc/cms/parNewGeneration.hpp +++ b/hotspot/src/share/vm/gc/cms/parNewGeneration.hpp @@ -39,6 +39,7 @@ class ParScanWithBarrierClosure; class ParRootScanWithoutBarrierClosure; class ParRootScanWithBarrierTwoGensClosure; class ParEvacuateFollowersClosure; +class StrongRootsScope; // It would be better if these types could be kept local to the .cpp file, // but they must be here to allow ParScanClosure::do_oop_work to be defined @@ -237,12 +238,14 @@ class ParNewGenTask: public AbstractGangTask { Generation* _old_gen; HeapWord* _young_old_boundary; class ParScanThreadStateSet* _state_set; + StrongRootsScope* _strong_roots_scope; public: ParNewGenTask(ParNewGeneration* gen, Generation* old_gen, HeapWord* young_old_boundary, - ParScanThreadStateSet* state_set); + ParScanThreadStateSet* state_set, + StrongRootsScope* strong_roots_scope); HeapWord* young_old_boundary() { return _young_old_boundary; } diff --git a/hotspot/src/share/vm/gc/g1/concurrentMark.cpp b/hotspot/src/share/vm/gc/g1/concurrentMark.cpp index 58933476c61..03d589ef4cd 100644 --- a/hotspot/src/share/vm/gc/g1/concurrentMark.cpp +++ b/hotspot/src/share/vm/gc/g1/concurrentMark.cpp @@ -2608,7 +2608,6 @@ void ConcurrentMark::checkpointRootsFinalWork() { g1h->ensure_parsability(false); - StrongRootsScope srs; // this is remark, so we'll use up all active threads uint active_workers = g1h->workers()->active_workers(); if (active_workers == 0) { @@ -2622,13 +2621,17 @@ void ConcurrentMark::checkpointRootsFinalWork() { // constructor and pass values of the active workers // through the gang in the task. - CMRemarkTask remarkTask(this, active_workers); - // We will start all available threads, even if we decide that the - // active_workers will be fewer. The extra ones will just bail out - // immediately. - g1h->set_par_threads(active_workers); - g1h->workers()->run_task(&remarkTask); - g1h->set_par_threads(0); + { + StrongRootsScope srs(active_workers); + + CMRemarkTask remarkTask(this, active_workers); + // We will start all available threads, even if we decide that the + // active_workers will be fewer. The extra ones will just bail out + // immediately. + g1h->set_par_threads(active_workers); + g1h->workers()->run_task(&remarkTask); + g1h->set_par_threads(0); + } SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set(); guarantee(has_overflown() || diff --git a/hotspot/src/share/vm/gc/g1/g1CollectedHeap.cpp b/hotspot/src/share/vm/gc/g1/g1CollectedHeap.cpp index d18eb149490..9a0b55c86ce 100644 --- a/hotspot/src/share/vm/gc/g1/g1CollectedHeap.cpp +++ b/hotspot/src/share/vm/gc/g1/g1CollectedHeap.cpp @@ -3021,7 +3021,7 @@ void G1CollectedHeap::verify(bool silent, VerifyOption vo) { G1VerifyCodeRootBlobClosure blobsCl(&codeRootsCl); { - G1RootProcessor root_processor(this); + G1RootProcessor root_processor(this, 1); root_processor.process_all_roots(&rootsCl, &cldCl, &blobsCl); @@ -5393,7 +5393,7 @@ void G1CollectedHeap::evacuate_collection_set(EvacuationInfo& evacuation_info) { double end_par_time_sec; { - G1RootProcessor root_processor(this); + G1RootProcessor root_processor(this, n_workers); G1ParTask g1_par_task(this, _task_queues, &root_processor); // InitialMark needs claim bits to keep track of the marked-through CLDs. if (g1_policy()->during_initial_mark_pause()) { diff --git a/hotspot/src/share/vm/gc/g1/g1MarkSweep.cpp b/hotspot/src/share/vm/gc/g1/g1MarkSweep.cpp index 22662210e29..7b18002d744 100644 --- a/hotspot/src/share/vm/gc/g1/g1MarkSweep.cpp +++ b/hotspot/src/share/vm/gc/g1/g1MarkSweep.cpp @@ -127,7 +127,7 @@ void G1MarkSweep::mark_sweep_phase1(bool& marked_for_unloading, MarkingCodeBlobClosure follow_code_closure(&GenMarkSweep::follow_root_closure, !CodeBlobToOopClosure::FixRelocations); { - G1RootProcessor root_processor(g1h); + G1RootProcessor root_processor(g1h, 1); root_processor.process_strong_roots(&GenMarkSweep::follow_root_closure, &GenMarkSweep::follow_cld_closure, &follow_code_closure); @@ -237,7 +237,7 @@ void G1MarkSweep::mark_sweep_phase3() { CodeBlobToOopClosure adjust_code_closure(&GenMarkSweep::adjust_pointer_closure, CodeBlobToOopClosure::FixRelocations); { - G1RootProcessor root_processor(g1h); + G1RootProcessor root_processor(g1h, 1); root_processor.process_all_roots(&GenMarkSweep::adjust_pointer_closure, &GenMarkSweep::adjust_cld_closure, &adjust_code_closure); diff --git a/hotspot/src/share/vm/gc/g1/g1RootProcessor.cpp b/hotspot/src/share/vm/gc/g1/g1RootProcessor.cpp index 48f9b094adf..b6cb0f1f5eb 100644 --- a/hotspot/src/share/vm/gc/g1/g1RootProcessor.cpp +++ b/hotspot/src/share/vm/gc/g1/g1RootProcessor.cpp @@ -90,11 +90,10 @@ public: void G1RootProcessor::worker_has_discovered_all_strong_classes() { - uint n_workers = _g1h->n_par_threads(); assert(ClassUnloadingWithConcurrentMark, "Currently only needed when doing G1 Class Unloading"); uint new_value = (uint)Atomic::add(1, &_n_workers_discovered_strong_classes); - if (new_value == n_workers) { + if (new_value == n_workers()) { // This thread is last. Notify the others. MonitorLockerEx ml(&_lock, Mutex::_no_safepoint_check_flag); _lock.notify_all(); @@ -102,21 +101,20 @@ void G1RootProcessor::worker_has_discovered_all_strong_classes() { } void G1RootProcessor::wait_until_all_strong_classes_discovered() { - uint n_workers = _g1h->n_par_threads(); assert(ClassUnloadingWithConcurrentMark, "Currently only needed when doing G1 Class Unloading"); - if ((uint)_n_workers_discovered_strong_classes != n_workers) { + if ((uint)_n_workers_discovered_strong_classes != n_workers()) { MonitorLockerEx ml(&_lock, Mutex::_no_safepoint_check_flag); - while ((uint)_n_workers_discovered_strong_classes != n_workers) { + while ((uint)_n_workers_discovered_strong_classes != n_workers()) { _lock.wait(Mutex::_no_safepoint_check_flag, 0, false); } } } -G1RootProcessor::G1RootProcessor(G1CollectedHeap* g1h) : +G1RootProcessor::G1RootProcessor(G1CollectedHeap* g1h, uint n_workers) : _g1h(g1h), _process_strong_tasks(new SubTasksDone(G1RP_PS_NumElements)), - _srs(), + _srs(n_workers), _lock(Mutex::leaf, "G1 Root Scanning barrier lock", false, Monitor::_safepoint_check_never), _n_workers_discovered_strong_classes(0) {} @@ -253,7 +251,7 @@ void G1RootProcessor::process_java_roots(OopClosure* strong_roots, { G1GCParPhaseTimesTracker x(phase_times, G1GCPhaseTimes::ThreadRoots, worker_i); - bool is_par = _g1h->n_par_threads() > 1; + bool is_par = n_workers() > 1; Threads::possibly_parallel_oops_do(is_par, strong_roots, thread_stack_clds, strong_code); } } @@ -330,5 +328,13 @@ void G1RootProcessor::scan_remembered_sets(G1ParPushHeapRSClosure* scan_rs, } void G1RootProcessor::set_num_workers(uint active_workers) { + assert(active_workers == _srs.n_threads(), + err_msg("Mismatch between number of worker threads. active_workers: %u and n_workers(): %u", + active_workers, + _srs.n_threads())); _process_strong_tasks->set_n_threads(active_workers); } + +uint G1RootProcessor::n_workers() const { + return _srs.n_threads(); +} diff --git a/hotspot/src/share/vm/gc/g1/g1RootProcessor.hpp b/hotspot/src/share/vm/gc/g1/g1RootProcessor.hpp index 9be26d5259b..582b1109806 100644 --- a/hotspot/src/share/vm/gc/g1/g1RootProcessor.hpp +++ b/hotspot/src/share/vm/gc/g1/g1RootProcessor.hpp @@ -85,7 +85,7 @@ class G1RootProcessor : public StackObj { uint worker_i); public: - G1RootProcessor(G1CollectedHeap* g1h); + G1RootProcessor(G1CollectedHeap* g1h, uint n_workers); // Apply closures to the strongly and weakly reachable roots in the system // in a single pass. @@ -114,8 +114,11 @@ public: OopClosure* scan_non_heap_weak_roots, uint worker_i); - // Inform the root processor about the number of worker threads + // Inform SubTaskDone about the number of worker threads. void set_num_workers(uint active_workers); + + // Number of worker threads used by the root processor. + uint n_workers() const; }; #endif // SHARE_VM_GC_G1_G1ROOTPROCESSOR_HPP diff --git a/hotspot/src/share/vm/gc/serial/defNewGeneration.cpp b/hotspot/src/share/vm/gc/serial/defNewGeneration.cpp index 65ac2fd911e..bd3cbfe6a4f 100644 --- a/hotspot/src/share/vm/gc/serial/defNewGeneration.cpp +++ b/hotspot/src/share/vm/gc/serial/defNewGeneration.cpp @@ -38,6 +38,7 @@ #include "gc/shared/referencePolicy.hpp" #include "gc/shared/space.inline.hpp" #include "gc/shared/spaceDecorator.hpp" +#include "gc/shared/strongRootsScope.hpp" #include "memory/iterator.hpp" #include "oops/instanceRefKlass.hpp" #include "oops/oop.inline.hpp" @@ -625,15 +626,20 @@ void DefNewGeneration::collect(bool full, assert(gch->no_allocs_since_save_marks(0), "save marks have not been newly set."); - gch->gen_process_roots(_level, - true, // Process younger gens, if any, - // as strong roots. - true, // activate StrongRootsScope - GenCollectedHeap::SO_ScavengeCodeCache, - GenCollectedHeap::StrongAndWeakRoots, - &fsc_with_no_gc_barrier, - &fsc_with_gc_barrier, - &cld_scan_closure); + { + // SerialGC runs with n_workers == 0. + StrongRootsScope srs(0); + + gch->gen_process_roots(&srs, + _level, + true, // Process younger gens, if any, + // as strong roots. + GenCollectedHeap::SO_ScavengeCodeCache, + GenCollectedHeap::StrongAndWeakRoots, + &fsc_with_no_gc_barrier, + &fsc_with_gc_barrier, + &cld_scan_closure); + } // "evacuate followers". evacuate_followers.do_void(); diff --git a/hotspot/src/share/vm/gc/serial/genMarkSweep.cpp b/hotspot/src/share/vm/gc/serial/genMarkSweep.cpp index d355e9ac7aa..2dad400e004 100644 --- a/hotspot/src/share/vm/gc/serial/genMarkSweep.cpp +++ b/hotspot/src/share/vm/gc/serial/genMarkSweep.cpp @@ -40,6 +40,7 @@ #include "gc/shared/modRefBarrierSet.hpp" #include "gc/shared/referencePolicy.hpp" #include "gc/shared/space.hpp" +#include "gc/shared/strongRootsScope.hpp" #include "oops/instanceRefKlass.hpp" #include "oops/oop.inline.hpp" #include "prims/jvmtiExport.hpp" @@ -200,14 +201,18 @@ void GenMarkSweep::mark_sweep_phase1(int level, // Need new claim bits before marking starts. ClassLoaderDataGraph::clear_claimed_marks(); - gch->gen_process_roots(level, - false, // Younger gens are not roots. - true, // activate StrongRootsScope - GenCollectedHeap::SO_None, - GenCollectedHeap::StrongRootsOnly, - &follow_root_closure, - &follow_root_closure, - &follow_cld_closure); + { + StrongRootsScope srs(1); + + gch->gen_process_roots(&srs, + level, + false, // Younger gens are not roots. + GenCollectedHeap::SO_None, + GenCollectedHeap::StrongRootsOnly, + &follow_root_closure, + &follow_root_closure, + &follow_cld_closure); + } // Process reference objects found during marking { @@ -284,14 +289,18 @@ void GenMarkSweep::mark_sweep_phase3(int level) { assert(level == 1, "We don't use mark-sweep on young generations."); adjust_pointer_closure.set_orig_generation(gch->old_gen()); - gch->gen_process_roots(level, - false, // Younger gens are not roots. - true, // activate StrongRootsScope - GenCollectedHeap::SO_AllCodeCache, - GenCollectedHeap::StrongAndWeakRoots, - &adjust_pointer_closure, - &adjust_pointer_closure, - &adjust_cld_closure); + { + StrongRootsScope srs(1); + + gch->gen_process_roots(&srs, + level, + false, // Younger gens are not roots. + GenCollectedHeap::SO_AllCodeCache, + GenCollectedHeap::StrongAndWeakRoots, + &adjust_pointer_closure, + &adjust_pointer_closure, + &adjust_cld_closure); + } gch->gen_process_weak_roots(&adjust_pointer_closure); diff --git a/hotspot/src/share/vm/gc/shared/genCollectedHeap.cpp b/hotspot/src/share/vm/gc/shared/genCollectedHeap.cpp index bd64b4b70f3..ff68fba58ed 100644 --- a/hotspot/src/share/vm/gc/shared/genCollectedHeap.cpp +++ b/hotspot/src/share/vm/gc/shared/genCollectedHeap.cpp @@ -582,15 +582,13 @@ public: static AssertNonScavengableClosure assert_is_non_scavengable_closure; #endif -void GenCollectedHeap::process_roots(bool activate_scope, +void GenCollectedHeap::process_roots(StrongRootsScope* scope, ScanningOption so, OopClosure* strong_roots, OopClosure* weak_roots, CLDClosure* strong_cld_closure, CLDClosure* weak_cld_closure, CodeBlobClosure* code_roots) { - StrongRootsScope srs(activate_scope); - // General roots. assert(Threads::thread_claim_parity() != 0, "must have called prologue code"); assert(code_roots != NULL, "code root closure should always be set"); @@ -609,7 +607,7 @@ void GenCollectedHeap::process_roots(bool activate_scope, // Only process code roots from thread stacks if we aren't visiting the entire CodeCache anyway CodeBlobClosure* roots_from_code_p = (so & SO_AllCodeCache) ? NULL : code_roots; - bool is_par = n_par_threads() > 1; + bool is_par = scope->n_threads() > 1; Threads::possibly_parallel_oops_do(is_par, strong_roots, roots_from_clds_p, roots_from_code_p); if (!_process_strong_tasks->is_task_claimed(GCH_PS_Universe_oops_do)) { @@ -669,9 +667,9 @@ void GenCollectedHeap::process_roots(bool activate_scope, } -void GenCollectedHeap::gen_process_roots(int level, +void GenCollectedHeap::gen_process_roots(StrongRootsScope* scope, + int level, bool younger_gens_as_roots, - bool activate_scope, ScanningOption so, bool only_strong_roots, OopsInGenClosure* not_older_gens, @@ -689,7 +687,7 @@ void GenCollectedHeap::gen_process_roots(int level, OopsInGenClosure* weak_roots = only_strong_roots ? NULL : not_older_gens; CLDClosure* weak_cld_closure = only_strong_roots ? NULL : cld_closure; - process_roots(activate_scope, so, + process_roots(scope, so, not_older_gens, weak_roots, cld_closure, weak_cld_closure, &mark_code_closure); diff --git a/hotspot/src/share/vm/gc/shared/genCollectedHeap.hpp b/hotspot/src/share/vm/gc/shared/genCollectedHeap.hpp index 2caa66f5615..72a051d9fdd 100644 --- a/hotspot/src/share/vm/gc/shared/genCollectedHeap.hpp +++ b/hotspot/src/share/vm/gc/shared/genCollectedHeap.hpp @@ -30,8 +30,9 @@ #include "gc/shared/collectorPolicy.hpp" #include "gc/shared/generation.hpp" -class SubTasksDone; class FlexibleWorkGang; +class StrongRootsScope; +class SubTasksDone; // A "GenCollectedHeap" is a CollectedHeap that uses generational // collection. It has two generations, young and old. @@ -385,7 +386,7 @@ public: }; private: - void process_roots(bool activate_scope, + void process_roots(StrongRootsScope* scope, ScanningOption so, OopClosure* strong_roots, OopClosure* weak_roots, @@ -393,24 +394,13 @@ public: CLDClosure* weak_cld_closure, CodeBlobClosure* code_roots); - void gen_process_roots(int level, - bool younger_gens_as_roots, - bool activate_scope, - ScanningOption so, - OopsInGenClosure* not_older_gens, - OopsInGenClosure* weak_roots, - OopsInGenClosure* older_gens, - CLDClosure* cld_closure, - CLDClosure* weak_cld_closure, - CodeBlobClosure* code_closure); - public: static const bool StrongAndWeakRoots = false; static const bool StrongRootsOnly = true; - void gen_process_roots(int level, + void gen_process_roots(StrongRootsScope* scope, + int level, bool younger_gens_as_roots, - bool activate_scope, ScanningOption so, bool only_strong_roots, OopsInGenClosure* not_older_gens, diff --git a/hotspot/src/share/vm/gc/shared/strongRootsScope.cpp b/hotspot/src/share/vm/gc/shared/strongRootsScope.cpp index 82989c3e342..43a697f8cda 100644 --- a/hotspot/src/share/vm/gc/shared/strongRootsScope.cpp +++ b/hotspot/src/share/vm/gc/shared/strongRootsScope.cpp @@ -28,24 +28,18 @@ #include "gc/shared/strongRootsScope.hpp" #include "runtime/thread.hpp" -MarkScope::MarkScope(bool activate) : _active(activate) { - if (_active) { - nmethod::oops_do_marking_prologue(); - } +MarkScope::MarkScope() { + nmethod::oops_do_marking_prologue(); } MarkScope::~MarkScope() { - if (_active) { - nmethod::oops_do_marking_epilogue(); - } + nmethod::oops_do_marking_epilogue(); } -StrongRootsScope::StrongRootsScope(bool activate) : MarkScope(activate) { - if (_active) { - Threads::change_thread_claim_parity(); - // Zero the claimed high water mark in the StringTable - StringTable::clear_parallel_claimed_index(); - } +StrongRootsScope::StrongRootsScope(uint n_threads) : _n_threads(n_threads) { + Threads::change_thread_claim_parity(); + // Zero the claimed high water mark in the StringTable + StringTable::clear_parallel_claimed_index(); } StrongRootsScope::~StrongRootsScope() { diff --git a/hotspot/src/share/vm/gc/shared/strongRootsScope.hpp b/hotspot/src/share/vm/gc/shared/strongRootsScope.hpp index 3ec8fa6deca..2ed55c4cf12 100644 --- a/hotspot/src/share/vm/gc/shared/strongRootsScope.hpp +++ b/hotspot/src/share/vm/gc/shared/strongRootsScope.hpp @@ -29,18 +29,21 @@ class MarkScope : public StackObj { protected: - bool _active; - public: - MarkScope(bool activate = true); + MarkScope(); ~MarkScope(); }; // Sets up and tears down the required state for parallel root processing. class StrongRootsScope : public MarkScope { + // Number of threads participating in the roots processing. + const uint _n_threads; + public: - StrongRootsScope(bool activate = true); + StrongRootsScope(uint n_threads); ~StrongRootsScope(); + + uint n_threads() const { return _n_threads; } }; #endif // SHARE_VM_GC_SHARED_STRONGROOTSSCOPE_HPP From b77b3ec014d673cc71c2be29899f7f2feb309741 Mon Sep 17 00:00:00 2001 From: Stefan Karlsson Date: Thu, 21 May 2015 09:23:46 +0200 Subject: [PATCH 03/26] 8080111: Remove SubTaskDone::_n_threads Reviewed-by: jmasa, kbarrett --- hotspot/src/share/vm/gc/cms/parNewGeneration.cpp | 4 ---- hotspot/src/share/vm/gc/g1/g1CollectedHeap.cpp | 1 - hotspot/src/share/vm/gc/g1/g1RootProcessor.cpp | 14 +++----------- hotspot/src/share/vm/gc/g1/g1RootProcessor.hpp | 3 --- .../src/share/vm/gc/shared/genCollectedHeap.cpp | 7 +------ .../src/share/vm/gc/shared/genCollectedHeap.hpp | 1 - hotspot/src/share/vm/gc/shared/workgroup.cpp | 15 ++++++--------- hotspot/src/share/vm/gc/shared/workgroup.hpp | 15 +++------------ 8 files changed, 13 insertions(+), 47 deletions(-) diff --git a/hotspot/src/share/vm/gc/cms/parNewGeneration.cpp b/hotspot/src/share/vm/gc/cms/parNewGeneration.cpp index c29ae02e33b..7827e5ee313 100644 --- a/hotspot/src/share/vm/gc/cms/parNewGeneration.cpp +++ b/hotspot/src/share/vm/gc/cms/parNewGeneration.cpp @@ -580,10 +580,6 @@ ParNewGenTask::ParNewGenTask(ParNewGeneration* gen, Generation* old_gen, // active threads. void ParNewGenTask::set_for_termination(uint active_workers) { _state_set->reset(active_workers, _gen->promotion_failed()); - // Should the heap be passed in? There's only 1 for now so - // grab it instead. - GenCollectedHeap* gch = GenCollectedHeap::heap(); - gch->set_n_termination(active_workers); } void ParNewGenTask::work(uint worker_id) { diff --git a/hotspot/src/share/vm/gc/g1/g1CollectedHeap.cpp b/hotspot/src/share/vm/gc/g1/g1CollectedHeap.cpp index 9a0b55c86ce..084f81ffe36 100644 --- a/hotspot/src/share/vm/gc/g1/g1CollectedHeap.cpp +++ b/hotspot/src/share/vm/gc/g1/g1CollectedHeap.cpp @@ -4326,7 +4326,6 @@ public: ParallelTaskTerminator* terminator() { return &_terminator; } virtual void set_for_termination(uint active_workers) { - _root_processor->set_num_workers(active_workers); terminator()->reset_for_reuse(active_workers); _n_workers = active_workers; } diff --git a/hotspot/src/share/vm/gc/g1/g1RootProcessor.cpp b/hotspot/src/share/vm/gc/g1/g1RootProcessor.cpp index b6cb0f1f5eb..f7c6aeb8538 100644 --- a/hotspot/src/share/vm/gc/g1/g1RootProcessor.cpp +++ b/hotspot/src/share/vm/gc/g1/g1RootProcessor.cpp @@ -204,7 +204,7 @@ void G1RootProcessor::evacuate_roots(OopClosure* scan_non_heap_roots, } } - _process_strong_tasks->all_tasks_completed(); + _process_strong_tasks->all_tasks_completed(n_workers()); } void G1RootProcessor::process_strong_roots(OopClosure* oops, @@ -214,7 +214,7 @@ void G1RootProcessor::process_strong_roots(OopClosure* oops, process_java_roots(oops, clds, clds, NULL, blobs, NULL, 0); process_vm_roots(oops, NULL, NULL, 0); - _process_strong_tasks->all_tasks_completed(); + _process_strong_tasks->all_tasks_completed(n_workers()); } void G1RootProcessor::process_all_roots(OopClosure* oops, @@ -228,7 +228,7 @@ void G1RootProcessor::process_all_roots(OopClosure* oops, CodeCache::blobs_do(blobs); } - _process_strong_tasks->all_tasks_completed(); + _process_strong_tasks->all_tasks_completed(n_workers()); } void G1RootProcessor::process_java_roots(OopClosure* strong_roots, @@ -327,14 +327,6 @@ void G1RootProcessor::scan_remembered_sets(G1ParPushHeapRSClosure* scan_rs, _g1h->g1_rem_set()->oops_into_collection_set_do(scan_rs, &scavenge_cs_nmethods, worker_i); } -void G1RootProcessor::set_num_workers(uint active_workers) { - assert(active_workers == _srs.n_threads(), - err_msg("Mismatch between number of worker threads. active_workers: %u and n_workers(): %u", - active_workers, - _srs.n_threads())); - _process_strong_tasks->set_n_threads(active_workers); -} - uint G1RootProcessor::n_workers() const { return _srs.n_threads(); } diff --git a/hotspot/src/share/vm/gc/g1/g1RootProcessor.hpp b/hotspot/src/share/vm/gc/g1/g1RootProcessor.hpp index 582b1109806..fb7b4014def 100644 --- a/hotspot/src/share/vm/gc/g1/g1RootProcessor.hpp +++ b/hotspot/src/share/vm/gc/g1/g1RootProcessor.hpp @@ -114,9 +114,6 @@ public: OopClosure* scan_non_heap_weak_roots, uint worker_i); - // Inform SubTaskDone about the number of worker threads. - void set_num_workers(uint active_workers); - // Number of worker threads used by the root processor. uint n_workers() const; }; diff --git a/hotspot/src/share/vm/gc/shared/genCollectedHeap.cpp b/hotspot/src/share/vm/gc/shared/genCollectedHeap.cpp index ff68fba58ed..5c59c117eea 100644 --- a/hotspot/src/share/vm/gc/shared/genCollectedHeap.cpp +++ b/hotspot/src/share/vm/gc/shared/genCollectedHeap.cpp @@ -564,11 +564,6 @@ HeapWord* GenCollectedHeap::satisfy_failed_allocation(size_t size, bool is_tlab) void GenCollectedHeap::set_par_threads(uint t) { assert(t == 0 || !UseSerialGC, "Cannot have parallel threads"); CollectedHeap::set_par_threads(t); - set_n_termination(t); -} - -void GenCollectedHeap::set_n_termination(uint t) { - _process_strong_tasks->set_n_threads(t); } #ifdef ASSERT @@ -709,7 +704,7 @@ void GenCollectedHeap::gen_process_roots(StrongRootsScope* scope, older_gens->reset_generation(); } - _process_strong_tasks->all_tasks_completed(); + _process_strong_tasks->all_tasks_completed(scope->n_threads()); } diff --git a/hotspot/src/share/vm/gc/shared/genCollectedHeap.hpp b/hotspot/src/share/vm/gc/shared/genCollectedHeap.hpp index 72a051d9fdd..adfedac6841 100644 --- a/hotspot/src/share/vm/gc/shared/genCollectedHeap.hpp +++ b/hotspot/src/share/vm/gc/shared/genCollectedHeap.hpp @@ -365,7 +365,6 @@ public: static GenCollectedHeap* heap(); void set_par_threads(uint t); - void set_n_termination(uint t); // Invoke the "do_oop" method of one of the closures "not_older_gens" // or "older_gens" on root locations for the generation at diff --git a/hotspot/src/share/vm/gc/shared/workgroup.cpp b/hotspot/src/share/vm/gc/shared/workgroup.cpp index 571a615c3c2..ccf16394c7b 100644 --- a/hotspot/src/share/vm/gc/shared/workgroup.cpp +++ b/hotspot/src/share/vm/gc/shared/workgroup.cpp @@ -434,7 +434,7 @@ void WorkGangBarrierSync::abort() { // SubTasksDone functions. SubTasksDone::SubTasksDone(uint n) : - _n_tasks(n), _n_threads(1), _tasks(NULL) { + _n_tasks(n), _tasks(NULL) { _tasks = NEW_C_HEAP_ARRAY(uint, n, mtInternal); guarantee(_tasks != NULL, "alloc failure"); clear(); @@ -444,12 +444,6 @@ bool SubTasksDone::valid() { return _tasks != NULL; } -void SubTasksDone::set_n_threads(uint t) { - assert(_claimed == 0 || _threads_completed == _n_threads, - "should not be called while tasks are being processed!"); - _n_threads = (t == 0 ? 1 : t); -} - void SubTasksDone::clear() { for (uint i = 0; i < _n_tasks; i++) { _tasks[i] = 0; @@ -477,7 +471,7 @@ bool SubTasksDone::is_task_claimed(uint t) { return res; } -void SubTasksDone::all_tasks_completed() { +void SubTasksDone::all_tasks_completed(uint n_threads) { jint observed = _threads_completed; jint old; do { @@ -485,7 +479,10 @@ void SubTasksDone::all_tasks_completed() { observed = Atomic::cmpxchg(old+1, &_threads_completed, old); } while (observed != old); // If this was the last thread checking in, clear the tasks. - if (observed+1 == (jint)_n_threads) clear(); + uint adjusted_thread_count = (n_threads == 0 ? 1 : n_threads); + if (observed + 1 == (jint)adjusted_thread_count) { + clear(); + } } diff --git a/hotspot/src/share/vm/gc/shared/workgroup.hpp b/hotspot/src/share/vm/gc/shared/workgroup.hpp index 1f1d82c72f6..be70044ed2d 100644 --- a/hotspot/src/share/vm/gc/shared/workgroup.hpp +++ b/hotspot/src/share/vm/gc/shared/workgroup.hpp @@ -390,12 +390,6 @@ public: class SubTasksDone: public CHeapObj { uint* _tasks; uint _n_tasks; - // _n_threads is used to determine when a sub task is done. - // It does not control how many threads will execute the subtask - // but must be initialized to the number that do execute the task - // in order to correctly decide when the subtask is done (all the - // threads working on the task have finished). - uint _n_threads; uint _threads_completed; #ifdef ASSERT volatile uint _claimed; @@ -413,11 +407,6 @@ public: // True iff the object is in a valid state. bool valid(); - // Get/set the number of parallel threads doing the tasks to "t". Can only - // be called before tasks start or after they are complete. - uint n_threads() { return _n_threads; } - void set_n_threads(uint t); - // Returns "false" if the task "t" is unclaimed, and ensures that task is // claimed. The task "t" is required to be within the range of "this". bool is_task_claimed(uint t); @@ -426,7 +415,9 @@ public: // tasks that it will try to claim. Every thread in the parallel task // must execute this. (When the last thread does so, the task array is // cleared.) - void all_tasks_completed(); + // + // n_threads - Number of threads executing the sub-tasks. + void all_tasks_completed(uint n_threads); // Destructor. ~SubTasksDone(); From 8d0f1a65288bfcc387a770bb3ed9895f7b10b62c Mon Sep 17 00:00:00 2001 From: Stefan Karlsson Date: Thu, 21 May 2015 09:35:38 +0200 Subject: [PATCH 04/26] 8080112: Replace and remove the last usages of CollectedHeap::n_par_threads() Reviewed-by: jmasa, kbarrett --- .../vm/gc/cms/compactibleFreeListSpace.cpp | 26 ++++++--------- .../vm/gc/cms/compactibleFreeListSpace.hpp | 3 +- .../share/vm/gc/cms/parCardTableModRefBS.cpp | 26 +++++++-------- hotspot/src/share/vm/gc/g1/concurrentMark.cpp | 6 +--- .../share/vm/gc/serial/defNewGeneration.cpp | 2 +- .../share/vm/gc/serial/defNewGeneration.hpp | 2 +- .../src/share/vm/gc/shared/cardGeneration.cpp | 4 +-- .../src/share/vm/gc/shared/cardGeneration.hpp | 2 +- .../share/vm/gc/shared/cardTableModRefBS.cpp | 33 +++++-------------- .../share/vm/gc/shared/cardTableModRefBS.hpp | 5 +-- .../src/share/vm/gc/shared/cardTableRS.cpp | 21 +++++------- .../src/share/vm/gc/shared/cardTableRS.hpp | 6 ++-- .../src/share/vm/gc/shared/collectedHeap.cpp | 3 +- .../src/share/vm/gc/shared/collectedHeap.hpp | 6 +--- .../share/vm/gc/shared/genCollectedHeap.cpp | 2 +- hotspot/src/share/vm/gc/shared/genRemSet.hpp | 5 +-- hotspot/src/share/vm/gc/shared/generation.cpp | 5 +-- hotspot/src/share/vm/gc/shared/generation.hpp | 4 +-- hotspot/src/share/vm/gc/shared/space.cpp | 6 ++-- hotspot/src/share/vm/gc/shared/space.hpp | 6 ++-- 20 files changed, 72 insertions(+), 101 deletions(-) diff --git a/hotspot/src/share/vm/gc/cms/compactibleFreeListSpace.cpp b/hotspot/src/share/vm/gc/cms/compactibleFreeListSpace.cpp index 628feb99d64..bc3680a5569 100644 --- a/hotspot/src/share/vm/gc/cms/compactibleFreeListSpace.cpp +++ b/hotspot/src/share/vm/gc/cms/compactibleFreeListSpace.cpp @@ -641,6 +641,7 @@ void CompactibleFreeListSpace::set_end(HeapWord* value) { class FreeListSpace_DCTOC : public Filtering_DCTOC { CompactibleFreeListSpace* _cfls; CMSCollector* _collector; + bool _parallel; protected: // Override. #define walk_mem_region_with_cl_DECL(ClosureType) \ @@ -661,9 +662,10 @@ public: CMSCollector* collector, ExtendedOopClosure* cl, CardTableModRefBS::PrecisionStyle precision, - HeapWord* boundary) : + HeapWord* boundary, + bool parallel) : Filtering_DCTOC(sp, cl, precision, boundary), - _cfls(sp), _collector(collector) {} + _cfls(sp), _collector(collector), _parallel(parallel) {} }; // We de-virtualize the block-related calls below, since we know that our @@ -674,10 +676,7 @@ void FreeListSpace_DCTOC::walk_mem_region_with_cl(MemRegion mr, HeapWord* bottom, \ HeapWord* top, \ ClosureType* cl) { \ - bool is_par = GenCollectedHeap::heap()->n_par_threads() > 0; \ - if (is_par) { \ - assert(GenCollectedHeap::heap()->n_par_threads() == \ - GenCollectedHeap::heap()->workers()->active_workers(), "Mismatch"); \ + if (_parallel) { \ walk_mem_region_with_cl_par(mr, bottom, top, cl); \ } else { \ walk_mem_region_with_cl_nopar(mr, bottom, top, cl); \ @@ -747,8 +746,9 @@ FreeListSpace_DCTOC__walk_mem_region_with_cl_DEFN(FilteringClosure) DirtyCardToOopClosure* CompactibleFreeListSpace::new_dcto_cl(ExtendedOopClosure* cl, CardTableModRefBS::PrecisionStyle precision, - HeapWord* boundary) { - return new FreeListSpace_DCTOC(this, _collector, cl, precision, boundary); + HeapWord* boundary, + bool parallel) { + return new FreeListSpace_DCTOC(this, _collector, cl, precision, boundary, parallel); } @@ -1897,11 +1897,9 @@ CompactibleFreeListSpace::splitChunkAndReturnRemainder(FreeChunk* chunk, assert(chunk->is_free() && ffc->is_free(), "Error"); _bt.split_block((HeapWord*)chunk, chunk->size(), new_size); if (rem_sz < SmallForDictionary) { - bool is_par = (GenCollectedHeap::heap()->n_par_threads() > 0); + // The freeList lock is held, but multiple GC task threads might be executing in parallel. + bool is_par = Thread::current()->is_GC_task_thread(); if (is_par) _indexedFreeListParLocks[rem_sz]->lock(); - assert(!is_par || - (GenCollectedHeap::heap()->n_par_threads() == - GenCollectedHeap::heap()->workers()->active_workers()), "Mismatch"); returnChunkToFreeList(ffc); split(size, rem_sz); if (is_par) _indexedFreeListParLocks[rem_sz]->unlock(); @@ -1972,8 +1970,6 @@ void CompactibleFreeListSpace::save_marks() { bool CompactibleFreeListSpace::no_allocs_since_save_marks() { assert(_promoInfo.tracking(), "No preceding save_marks?"); - assert(GenCollectedHeap::heap()->n_par_threads() == 0, - "Shouldn't be called if using parallel gc."); return _promoInfo.noPromotions(); } @@ -1981,8 +1977,6 @@ bool CompactibleFreeListSpace::no_allocs_since_save_marks() { \ void CompactibleFreeListSpace:: \ oop_since_save_marks_iterate##nv_suffix(OopClosureType* blk) { \ - assert(GenCollectedHeap::heap()->n_par_threads() == 0, \ - "Shouldn't be called (yet) during parallel part of gc."); \ _promoInfo.promoted_oops_iterate##nv_suffix(blk); \ /* \ * This also restores any displaced headers and removes the elements from \ diff --git a/hotspot/src/share/vm/gc/cms/compactibleFreeListSpace.hpp b/hotspot/src/share/vm/gc/cms/compactibleFreeListSpace.hpp index 0e4bf630e65..07bc88f3530 100644 --- a/hotspot/src/share/vm/gc/cms/compactibleFreeListSpace.hpp +++ b/hotspot/src/share/vm/gc/cms/compactibleFreeListSpace.hpp @@ -438,7 +438,8 @@ class CompactibleFreeListSpace: public CompactibleSpace { // Override: provides a DCTO_CL specific to this kind of space. DirtyCardToOopClosure* new_dcto_cl(ExtendedOopClosure* cl, CardTableModRefBS::PrecisionStyle precision, - HeapWord* boundary); + HeapWord* boundary, + bool parallel); void blk_iterate(BlkClosure* cl); void blk_iterate_careful(BlkClosureCareful* cl); diff --git a/hotspot/src/share/vm/gc/cms/parCardTableModRefBS.cpp b/hotspot/src/share/vm/gc/cms/parCardTableModRefBS.cpp index 6d838b28816..bdf029d28c4 100644 --- a/hotspot/src/share/vm/gc/cms/parCardTableModRefBS.cpp +++ b/hotspot/src/share/vm/gc/cms/parCardTableModRefBS.cpp @@ -39,16 +39,11 @@ void CardTableModRefBS::non_clean_card_iterate_parallel_work(Space* sp, MemRegion mr, OopsInGenClosure* cl, CardTableRS* ct, - int n_threads) { - assert(n_threads > 0, "Error: expected n_threads > 0"); - assert((n_threads == 1 && ParallelGCThreads == 0) || - n_threads <= (int)ParallelGCThreads, - "# worker threads != # requested!"); - assert(!Thread::current()->is_VM_thread() || (n_threads == 1), "There is only 1 VM thread"); - assert(UseDynamicNumberOfGCThreads || - !FLAG_IS_DEFAULT(ParallelGCThreads) || - n_threads == (int)ParallelGCThreads, - "# worker threads != # requested!"); + uint n_threads) { + assert(n_threads > 0, "expected n_threads > 0"); + assert(n_threads <= ParallelGCThreads, + err_msg("n_threads: %u > ParallelGCThreads: " UINTX_FORMAT, n_threads, ParallelGCThreads)); + // Make sure the LNC array is valid for the space. jbyte** lowest_non_clean; uintptr_t lowest_non_clean_base_chunk_index; @@ -66,7 +61,8 @@ void CardTableModRefBS::non_clean_card_iterate_parallel_work(Space* sp, MemRegio uint stride = 0; while (!pst->is_task_claimed(/* reference */ stride)) { - process_stride(sp, mr, stride, n_strides, cl, ct, + process_stride(sp, mr, stride, n_strides, + cl, ct, lowest_non_clean, lowest_non_clean_base_chunk_index, lowest_non_clean_chunk_size); @@ -132,9 +128,13 @@ process_stride(Space* sp, assert(chunk_mr.word_size() > 0, "[chunk_card_start > used_end)"); assert(used.contains(chunk_mr), "chunk_mr should be subset of used"); + // This function is used by the parallel card table iteration. + const bool parallel = true; + DirtyCardToOopClosure* dcto_cl = sp->new_dcto_cl(cl, precision(), - cl->gen_boundary()); - ClearNoncleanCardWrapper clear_cl(dcto_cl, ct); + cl->gen_boundary(), + parallel); + ClearNoncleanCardWrapper clear_cl(dcto_cl, ct, parallel); // Process the chunk. diff --git a/hotspot/src/share/vm/gc/g1/concurrentMark.cpp b/hotspot/src/share/vm/gc/g1/concurrentMark.cpp index 03d589ef4cd..0662baf52d1 100644 --- a/hotspot/src/share/vm/gc/g1/concurrentMark.cpp +++ b/hotspot/src/share/vm/gc/g1/concurrentMark.cpp @@ -1938,15 +1938,11 @@ void ConcurrentMark::cleanup() { HeapRegionRemSet::reset_for_cleanup_tasks(); - uint n_workers; - // Do counting once more with the world stopped for good measure. G1ParFinalCountTask g1_par_count_task(g1h, &_region_bm, &_card_bm); g1h->set_par_threads(); - n_workers = g1h->n_par_threads(); - assert(g1h->n_par_threads() == n_workers, - "Should not have been reset"); + uint n_workers = _g1h->workers()->active_workers(); g1h->workers()->run_task(&g1_par_count_task); // Done with the parallel phase so reset to 0. g1h->set_par_threads(0); diff --git a/hotspot/src/share/vm/gc/serial/defNewGeneration.cpp b/hotspot/src/share/vm/gc/serial/defNewGeneration.cpp index bd3cbfe6a4f..ea5a04f1c48 100644 --- a/hotspot/src/share/vm/gc/serial/defNewGeneration.cpp +++ b/hotspot/src/share/vm/gc/serial/defNewGeneration.cpp @@ -455,7 +455,7 @@ void DefNewGeneration::compute_new_size() { } } -void DefNewGeneration::younger_refs_iterate(OopsInGenClosure* cl) { +void DefNewGeneration::younger_refs_iterate(OopsInGenClosure* cl, uint n_threads) { assert(false, "NYI -- are you sure you want to call this?"); } diff --git a/hotspot/src/share/vm/gc/serial/defNewGeneration.hpp b/hotspot/src/share/vm/gc/serial/defNewGeneration.hpp index 1975114aefb..47f9a0c4612 100644 --- a/hotspot/src/share/vm/gc/serial/defNewGeneration.hpp +++ b/hotspot/src/share/vm/gc/serial/defNewGeneration.hpp @@ -255,7 +255,7 @@ protected: // Iteration void object_iterate(ObjectClosure* blk); - void younger_refs_iterate(OopsInGenClosure* cl); + void younger_refs_iterate(OopsInGenClosure* cl, uint n_threads); void space_iterate(SpaceClosure* blk, bool usedOnly = false); diff --git a/hotspot/src/share/vm/gc/shared/cardGeneration.cpp b/hotspot/src/share/vm/gc/shared/cardGeneration.cpp index 09285cf466f..38eb081d0c8 100644 --- a/hotspot/src/share/vm/gc/shared/cardGeneration.cpp +++ b/hotspot/src/share/vm/gc/shared/cardGeneration.cpp @@ -353,8 +353,8 @@ void CardGeneration::space_iterate(SpaceClosure* blk, blk->do_space(space()); } -void CardGeneration::younger_refs_iterate(OopsInGenClosure* blk) { +void CardGeneration::younger_refs_iterate(OopsInGenClosure* blk, uint n_threads) { blk->set_generation(this); - younger_refs_in_space_iterate(space(), blk); + younger_refs_in_space_iterate(space(), blk, n_threads); blk->reset_generation(); } diff --git a/hotspot/src/share/vm/gc/shared/cardGeneration.hpp b/hotspot/src/share/vm/gc/shared/cardGeneration.hpp index efc2656d4f1..ce0c1daa3d7 100644 --- a/hotspot/src/share/vm/gc/shared/cardGeneration.hpp +++ b/hotspot/src/share/vm/gc/shared/cardGeneration.hpp @@ -89,7 +89,7 @@ class CardGeneration: public Generation { void space_iterate(SpaceClosure* blk, bool usedOnly = false); - void younger_refs_iterate(OopsInGenClosure* blk); + void younger_refs_iterate(OopsInGenClosure* blk, uint n_threads); bool is_in(const void* p) const; diff --git a/hotspot/src/share/vm/gc/shared/cardTableModRefBS.cpp b/hotspot/src/share/vm/gc/shared/cardTableModRefBS.cpp index 61d2df3862c..a0dfc7dbd27 100644 --- a/hotspot/src/share/vm/gc/shared/cardTableModRefBS.cpp +++ b/hotspot/src/share/vm/gc/shared/cardTableModRefBS.cpp @@ -440,31 +440,11 @@ void CardTableModRefBS::write_ref_field_work(void* field, oop newVal, bool relea void CardTableModRefBS::non_clean_card_iterate_possibly_parallel(Space* sp, MemRegion mr, OopsInGenClosure* cl, - CardTableRS* ct) { + CardTableRS* ct, + uint n_threads) { if (!mr.is_empty()) { - // Caller (process_roots()) claims that all GC threads - // execute this call. With UseDynamicNumberOfGCThreads now all - // active GC threads execute this call. The number of active GC - // threads needs to be passed to par_non_clean_card_iterate_work() - // to get proper partitioning and termination. - // - // This is an example of where n_par_threads() is used instead - // of workers()->active_workers(). n_par_threads can be set to 0 to - // turn off parallelism. For example when this code is called as - // part of verification during root processing then n_par_threads() - // may have been set to 0. active_workers is not overloaded with - // the meaning that it is a switch to disable parallelism and so keeps - // the meaning of the number of active gc workers. If parallelism has - // not been shut off by setting n_par_threads to 0, then n_par_threads - // should be equal to active_workers. When a different mechanism for - // shutting off parallelism is used, then active_workers can be used in - // place of n_par_threads. - int n_threads = GenCollectedHeap::heap()->n_par_threads(); - bool is_par = n_threads > 0; - if (is_par) { + if (n_threads > 0) { #if INCLUDE_ALL_GCS - assert(GenCollectedHeap::heap()->n_par_threads() == - GenCollectedHeap::heap()->workers()->active_workers(), "Mismatch"); non_clean_card_iterate_parallel_work(sp, mr, cl, ct, n_threads); #else // INCLUDE_ALL_GCS fatal("Parallel gc not supported here."); @@ -472,8 +452,11 @@ void CardTableModRefBS::non_clean_card_iterate_possibly_parallel(Space* sp, } else { // clear_cl finds contiguous dirty ranges of cards to process and clear. - DirtyCardToOopClosure* dcto_cl = sp->new_dcto_cl(cl, precision(), cl->gen_boundary()); - ClearNoncleanCardWrapper clear_cl(dcto_cl, ct); + // This is the single-threaded version. + const bool parallel = false; + + DirtyCardToOopClosure* dcto_cl = sp->new_dcto_cl(cl, precision(), cl->gen_boundary(), parallel); + ClearNoncleanCardWrapper clear_cl(dcto_cl, ct, parallel); clear_cl.do_MemRegion(mr); } diff --git a/hotspot/src/share/vm/gc/shared/cardTableModRefBS.hpp b/hotspot/src/share/vm/gc/shared/cardTableModRefBS.hpp index abba92ca9e3..cac16cae00a 100644 --- a/hotspot/src/share/vm/gc/shared/cardTableModRefBS.hpp +++ b/hotspot/src/share/vm/gc/shared/cardTableModRefBS.hpp @@ -178,14 +178,15 @@ class CardTableModRefBS: public ModRefBarrierSet { // region mr in the given space and apply cl to any dirty sub-regions // of mr. Clears the dirty cards as they are processed. void non_clean_card_iterate_possibly_parallel(Space* sp, MemRegion mr, - OopsInGenClosure* cl, CardTableRS* ct); + OopsInGenClosure* cl, CardTableRS* ct, + uint n_threads); private: // Work method used to implement non_clean_card_iterate_possibly_parallel() // above in the parallel case. void non_clean_card_iterate_parallel_work(Space* sp, MemRegion mr, OopsInGenClosure* cl, CardTableRS* ct, - int n_threads); + uint n_threads); protected: // Dirty the bytes corresponding to "mr" (not all of which must be diff --git a/hotspot/src/share/vm/gc/shared/cardTableRS.cpp b/hotspot/src/share/vm/gc/shared/cardTableRS.cpp index 880effd5b6d..185d4ceeaa1 100644 --- a/hotspot/src/share/vm/gc/shared/cardTableRS.cpp +++ b/hotspot/src/share/vm/gc/shared/cardTableRS.cpp @@ -102,9 +102,10 @@ void CardTableRS::prepare_for_younger_refs_iterate(bool parallel) { } void CardTableRS::younger_refs_iterate(Generation* g, - OopsInGenClosure* blk) { + OopsInGenClosure* blk, + uint n_threads) { _last_cur_val_in_gen[g->level()+1] = cur_youngergen_card_val(); - g->younger_refs_iterate(blk); + g->younger_refs_iterate(blk, n_threads); } inline bool ClearNoncleanCardWrapper::clear_card(jbyte* entry) { @@ -164,15 +165,8 @@ inline bool ClearNoncleanCardWrapper::clear_card_serial(jbyte* entry) { } ClearNoncleanCardWrapper::ClearNoncleanCardWrapper( - DirtyCardToOopClosure* dirty_card_closure, CardTableRS* ct) : - _dirty_card_closure(dirty_card_closure), _ct(ct) { - // Cannot yet substitute active_workers for n_par_threads - // in the case where parallelism is being turned off by - // setting n_par_threads to 0. - _is_par = (GenCollectedHeap::heap()->n_par_threads() > 0); - assert(!_is_par || - (GenCollectedHeap::heap()->n_par_threads() == - GenCollectedHeap::heap()->workers()->active_workers()), "Mismatch"); + DirtyCardToOopClosure* dirty_card_closure, CardTableRS* ct, bool is_par) : + _dirty_card_closure(dirty_card_closure), _ct(ct), _is_par(is_par) { } bool ClearNoncleanCardWrapper::is_word_aligned(jbyte* entry) { @@ -272,7 +266,8 @@ void CardTableRS::write_ref_field_gc_par(void* field, oop new_val) { } void CardTableRS::younger_refs_in_space_iterate(Space* sp, - OopsInGenClosure* cl) { + OopsInGenClosure* cl, + uint n_threads) { const MemRegion urasm = sp->used_region_at_save_marks(); #ifdef ASSERT // Convert the assertion check to a warning if we are running @@ -301,7 +296,7 @@ void CardTableRS::younger_refs_in_space_iterate(Space* sp, ShouldNotReachHere(); } #endif - _ct_bs->non_clean_card_iterate_possibly_parallel(sp, urasm, cl, this); + _ct_bs->non_clean_card_iterate_possibly_parallel(sp, urasm, cl, this, n_threads); } void CardTableRS::clear_into_younger(Generation* old_gen) { diff --git a/hotspot/src/share/vm/gc/shared/cardTableRS.hpp b/hotspot/src/share/vm/gc/shared/cardTableRS.hpp index 98a1fca47fc..076aebba850 100644 --- a/hotspot/src/share/vm/gc/shared/cardTableRS.hpp +++ b/hotspot/src/share/vm/gc/shared/cardTableRS.hpp @@ -56,7 +56,7 @@ class CardTableRS: public GenRemSet { CardTableModRefBSForCTRS* _ct_bs; - virtual void younger_refs_in_space_iterate(Space* sp, OopsInGenClosure* cl); + virtual void younger_refs_in_space_iterate(Space* sp, OopsInGenClosure* cl, uint n_threads); void verify_space(Space* s, HeapWord* gen_start); @@ -116,7 +116,7 @@ public: // Card table entries are cleared before application; "blk" is // responsible for dirtying if the oop is still older-to-younger after // closure application. - void younger_refs_iterate(Generation* g, OopsInGenClosure* blk); + void younger_refs_iterate(Generation* g, OopsInGenClosure* blk, uint n_threads); void inline_write_ref_field_gc(void* field, oop new_val) { jbyte* byte = _ct_bs->byte_for(field); @@ -183,7 +183,7 @@ private: bool is_word_aligned(jbyte* entry); public: - ClearNoncleanCardWrapper(DirtyCardToOopClosure* dirty_card_closure, CardTableRS* ct); + ClearNoncleanCardWrapper(DirtyCardToOopClosure* dirty_card_closure, CardTableRS* ct, bool is_par); void do_MemRegion(MemRegion mr); }; diff --git a/hotspot/src/share/vm/gc/shared/collectedHeap.cpp b/hotspot/src/share/vm/gc/shared/collectedHeap.cpp index 663588051de..7008a7a8074 100644 --- a/hotspot/src/share/vm/gc/shared/collectedHeap.cpp +++ b/hotspot/src/share/vm/gc/shared/collectedHeap.cpp @@ -160,8 +160,7 @@ void CollectedHeap::trace_heap_after_gc(const GCTracer* gc_tracer) { // Memory state functions. -CollectedHeap::CollectedHeap() : _n_par_threads(0) -{ +CollectedHeap::CollectedHeap() { const size_t max_len = size_t(arrayOopDesc::max_array_length(T_INT)); const size_t elements_per_word = HeapWordSize / sizeof(jint); _filler_array_max_size = align_object_size(filler_array_hdr_size() + diff --git a/hotspot/src/share/vm/gc/shared/collectedHeap.hpp b/hotspot/src/share/vm/gc/shared/collectedHeap.hpp index 076fcadf868..66b9bffb611 100644 --- a/hotspot/src/share/vm/gc/shared/collectedHeap.hpp +++ b/hotspot/src/share/vm/gc/shared/collectedHeap.hpp @@ -101,7 +101,6 @@ class CollectedHeap : public CHeapObj { protected: BarrierSet* _barrier_set; bool _is_gc_active; - uint _n_par_threads; unsigned int _total_collections; // ... started unsigned int _total_full_collections; // ... started @@ -291,11 +290,8 @@ class CollectedHeap : public CHeapObj { } GCCause::Cause gc_cause() { return _gc_cause; } - // Number of threads currently working on GC tasks. - uint n_par_threads() { return _n_par_threads; } - // May be overridden to set additional parallelism. - virtual void set_par_threads(uint t) { _n_par_threads = t; }; + virtual void set_par_threads(uint t) { (void)t; }; // General obj/array allocation facilities. inline static oop obj_allocate(KlassHandle klass, int size, TRAPS); diff --git a/hotspot/src/share/vm/gc/shared/genCollectedHeap.cpp b/hotspot/src/share/vm/gc/shared/genCollectedHeap.cpp index 5c59c117eea..3e1e3b9c112 100644 --- a/hotspot/src/share/vm/gc/shared/genCollectedHeap.cpp +++ b/hotspot/src/share/vm/gc/shared/genCollectedHeap.cpp @@ -700,7 +700,7 @@ void GenCollectedHeap::gen_process_roots(StrongRootsScope* scope, // older-gen scanning. if (level == 0) { older_gens->set_generation(_old_gen); - rem_set()->younger_refs_iterate(_old_gen, older_gens); + rem_set()->younger_refs_iterate(_old_gen, older_gens, scope->n_threads()); older_gens->reset_generation(); } diff --git a/hotspot/src/share/vm/gc/shared/genRemSet.hpp b/hotspot/src/share/vm/gc/shared/genRemSet.hpp index 9a5db641cb4..c8ec2a90c81 100644 --- a/hotspot/src/share/vm/gc/shared/genRemSet.hpp +++ b/hotspot/src/share/vm/gc/shared/genRemSet.hpp @@ -77,10 +77,11 @@ public: // 1) that are in objects allocated in "g" at the time of the last call // to "save_Marks", and // 2) that point to objects in younger generations. - virtual void younger_refs_iterate(Generation* g, OopsInGenClosure* blk) = 0; + virtual void younger_refs_iterate(Generation* g, OopsInGenClosure* blk, uint n_threads) = 0; virtual void younger_refs_in_space_iterate(Space* sp, - OopsInGenClosure* cl) = 0; + OopsInGenClosure* cl, + uint n_threads) = 0; // This method is used to notify the remembered set that "new_val" has // been written into "field" by the garbage collector. diff --git a/hotspot/src/share/vm/gc/shared/generation.cpp b/hotspot/src/share/vm/gc/shared/generation.cpp index 5210a44517f..e5f7ede190f 100644 --- a/hotspot/src/share/vm/gc/shared/generation.cpp +++ b/hotspot/src/share/vm/gc/shared/generation.cpp @@ -293,9 +293,10 @@ void Generation::oop_iterate(ExtendedOopClosure* cl) { } void Generation::younger_refs_in_space_iterate(Space* sp, - OopsInGenClosure* cl) { + OopsInGenClosure* cl, + uint n_threads) { GenRemSet* rs = GenCollectedHeap::heap()->rem_set(); - rs->younger_refs_in_space_iterate(sp, cl); + rs->younger_refs_in_space_iterate(sp, cl, n_threads); } class GenerationObjIterateClosure : public SpaceClosure { diff --git a/hotspot/src/share/vm/gc/shared/generation.hpp b/hotspot/src/share/vm/gc/shared/generation.hpp index 6d4f840681c..91df7c0507c 100644 --- a/hotspot/src/share/vm/gc/shared/generation.hpp +++ b/hotspot/src/share/vm/gc/shared/generation.hpp @@ -122,7 +122,7 @@ class Generation: public CHeapObj { // The iteration is only over objects allocated at the start of the // iterations; objects allocated as a result of applying the closure are // not included. - void younger_refs_in_space_iterate(Space* sp, OopsInGenClosure* cl); + void younger_refs_in_space_iterate(Space* sp, OopsInGenClosure* cl, uint n_threads); public: // The set of possible generation kinds. @@ -526,7 +526,7 @@ class Generation: public CHeapObj { // in the current generation that contain pointers to objects in younger // generations. Objects allocated since the last "save_marks" call are // excluded. - virtual void younger_refs_iterate(OopsInGenClosure* cl) = 0; + virtual void younger_refs_iterate(OopsInGenClosure* cl, uint n_threads) = 0; // Inform a generation that it longer contains references to objects // in any younger generation. [e.g. Because younger gens are empty, diff --git a/hotspot/src/share/vm/gc/shared/space.cpp b/hotspot/src/share/vm/gc/shared/space.cpp index b57e0a6936a..c0ce365a779 100644 --- a/hotspot/src/share/vm/gc/shared/space.cpp +++ b/hotspot/src/share/vm/gc/shared/space.cpp @@ -181,7 +181,8 @@ void DirtyCardToOopClosure::do_MemRegion(MemRegion mr) { DirtyCardToOopClosure* Space::new_dcto_cl(ExtendedOopClosure* cl, CardTableModRefBS::PrecisionStyle precision, - HeapWord* boundary) { + HeapWord* boundary, + bool parallel) { return new DirtyCardToOopClosure(this, cl, precision, boundary); } @@ -260,7 +261,8 @@ ContiguousSpaceDCTOC__walk_mem_region_with_cl_DEFN(FilteringClosure) DirtyCardToOopClosure* ContiguousSpace::new_dcto_cl(ExtendedOopClosure* cl, CardTableModRefBS::PrecisionStyle precision, - HeapWord* boundary) { + HeapWord* boundary, + bool parallel) { return new ContiguousSpaceDCTOC(this, cl, precision, boundary); } diff --git a/hotspot/src/share/vm/gc/shared/space.hpp b/hotspot/src/share/vm/gc/shared/space.hpp index ca0941e37a6..1665380969a 100644 --- a/hotspot/src/share/vm/gc/shared/space.hpp +++ b/hotspot/src/share/vm/gc/shared/space.hpp @@ -183,7 +183,8 @@ class Space: public CHeapObj { // operate. ResourceArea allocated. virtual DirtyCardToOopClosure* new_dcto_cl(ExtendedOopClosure* cl, CardTableModRefBS::PrecisionStyle precision, - HeapWord* boundary = NULL); + HeapWord* boundary, + bool parallel); // If "p" is in the space, returns the address of the start of the // "block" that contains "p". We say "block" instead of "object" since @@ -629,7 +630,8 @@ class ContiguousSpace: public CompactibleSpace { // Override. DirtyCardToOopClosure* new_dcto_cl(ExtendedOopClosure* cl, CardTableModRefBS::PrecisionStyle precision, - HeapWord* boundary = NULL); + HeapWord* boundary, + bool parallel); // Apply "blk->do_oop" to the addresses of all reference fields in objects // starting with the _saved_mark_word, which was noted during a generation's From 5dc3521a8087af053ece20c3daec46208b4fead5 Mon Sep 17 00:00:00 2001 From: Stefan Karlsson Date: Thu, 21 May 2015 09:35:59 +0200 Subject: [PATCH 05/26] 8080113: Remove CollectedHeap::set_par_threads() Reviewed-by: jmasa, kbarrett --- .../gc/cms/concurrentMarkSweepGeneration.cpp | 5 -- .../src/share/vm/gc/cms/parNewGeneration.cpp | 3 -- hotspot/src/share/vm/gc/g1/concurrentMark.cpp | 21 +------- .../src/share/vm/gc/g1/g1CollectedHeap.cpp | 53 ------------------- .../src/share/vm/gc/g1/g1CollectedHeap.hpp | 5 -- hotspot/src/share/vm/gc/g1/g1StringDedup.cpp | 2 - .../vm/gc/parallel/psParallelCompact.cpp | 1 - .../src/share/vm/gc/parallel/psScavenge.cpp | 1 - .../share/vm/gc/serial/defNewGeneration.cpp | 4 +- .../share/vm/gc/shared/cardTableModRefBS.cpp | 2 +- .../src/share/vm/gc/shared/collectedHeap.hpp | 3 -- .../share/vm/gc/shared/genCollectedHeap.cpp | 5 -- .../share/vm/gc/shared/genCollectedHeap.hpp | 2 - 13 files changed, 6 insertions(+), 101 deletions(-) diff --git a/hotspot/src/share/vm/gc/cms/concurrentMarkSweepGeneration.cpp b/hotspot/src/share/vm/gc/cms/concurrentMarkSweepGeneration.cpp index 9b4e024d20a..163a4eb7e03 100644 --- a/hotspot/src/share/vm/gc/cms/concurrentMarkSweepGeneration.cpp +++ b/hotspot/src/share/vm/gc/cms/concurrentMarkSweepGeneration.cpp @@ -3017,14 +3017,12 @@ void CMSCollector::checkpointRootsInitialWork() { StrongRootsScope srs(n_workers); CMSParInitialMarkTask tsk(this, &srs, n_workers); - gch->set_par_threads(n_workers); initialize_sequential_subtasks_for_young_gen_rescan(n_workers); if (n_workers > 1) { workers->run_task(&tsk); } else { tsk.work(0); } - gch->set_par_threads(0); } else { // The serial version. CLDToOopClosure cld_closure(¬Older, true); @@ -5088,8 +5086,6 @@ void CMSCollector::do_remark_parallel() { CMSParRemarkTask tsk(this, cms_space, n_workers, workers, task_queues(), &srs); - // Set up for parallel process_roots work. - gch->set_par_threads(n_workers); // We won't be iterating over the cards in the card table updating // the younger_gen cards, so we shouldn't call the following else // the verification code as well as subsequent younger_refs_iterate @@ -5127,7 +5123,6 @@ void CMSCollector::do_remark_parallel() { tsk.work(0); } - gch->set_par_threads(0); // 0 ==> non-parallel. // restore, single-threaded for now, any preserved marks // as a result of work_q overflow restore_preserved_marks_if_any(); diff --git a/hotspot/src/share/vm/gc/cms/parNewGeneration.cpp b/hotspot/src/share/vm/gc/cms/parNewGeneration.cpp index 7827e5ee313..6b327332418 100644 --- a/hotspot/src/share/vm/gc/cms/parNewGeneration.cpp +++ b/hotspot/src/share/vm/gc/cms/parNewGeneration.cpp @@ -836,7 +836,6 @@ void ParNewRefProcTaskExecutor::set_single_threaded_mode() { _state_set.flush(); GenCollectedHeap* gch = GenCollectedHeap::heap(); - gch->set_par_threads(0); // 0 ==> non-parallel. gch->save_marks(); } @@ -954,7 +953,6 @@ void ParNewGeneration::collect(bool full, StrongRootsScope srs(n_workers); ParNewGenTask tsk(this, _old_gen, reserved().end(), &thread_state_set, &srs); - gch->set_par_threads(n_workers); gch->rem_set()->prepare_for_younger_refs_iterate(true); // It turns out that even when we're using 1 thread, doing the work in a // separate thread causes wide variance in run times. We can't help this @@ -996,7 +994,6 @@ void ParNewGeneration::collect(bool full, _gc_timer, _gc_tracer.gc_id()); } else { thread_state_set.flush(); - gch->set_par_threads(0); // 0 ==> non-parallel. gch->save_marks(); stats = rp->process_discovered_references(&is_alive, &keep_alive, &evacuate_followers, NULL, diff --git a/hotspot/src/share/vm/gc/g1/concurrentMark.cpp b/hotspot/src/share/vm/gc/g1/concurrentMark.cpp index 0662baf52d1..1b6f98325e8 100644 --- a/hotspot/src/share/vm/gc/g1/concurrentMark.cpp +++ b/hotspot/src/share/vm/gc/g1/concurrentMark.cpp @@ -1941,11 +1941,7 @@ void ConcurrentMark::cleanup() { // Do counting once more with the world stopped for good measure. G1ParFinalCountTask g1_par_count_task(g1h, &_region_bm, &_card_bm); - g1h->set_par_threads(); - uint n_workers = _g1h->workers()->active_workers(); g1h->workers()->run_task(&g1_par_count_task); - // Done with the parallel phase so reset to 0. - g1h->set_par_threads(0); if (VerifyDuringGC) { // Verify that the counting data accumulated during marking matches @@ -1961,10 +1957,7 @@ void ConcurrentMark::cleanup() { &expected_region_bm, &expected_card_bm); - g1h->set_par_threads((int)n_workers); g1h->workers()->run_task(&g1_par_verify_task); - // Done with the parallel phase so reset to 0. - g1h->set_par_threads(0); guarantee(g1_par_verify_task.failures() == 0, "Unexpected accounting failures"); } @@ -1986,11 +1979,11 @@ void ConcurrentMark::cleanup() { g1h->reset_gc_time_stamp(); + uint n_workers = _g1h->workers()->active_workers(); + // Note end of marking in all heap regions. G1ParNoteEndTask g1_par_note_end_task(g1h, &_cleanup_list, n_workers); - g1h->set_par_threads((int)n_workers); g1h->workers()->run_task(&g1_par_note_end_task); - g1h->set_par_threads(0); g1h->check_gc_time_stamps(); if (!cleanup_list_is_empty()) { @@ -2005,9 +1998,7 @@ void ConcurrentMark::cleanup() { if (G1ScrubRemSets) { double rs_scrub_start = os::elapsedTime(); G1ParScrubRemSetTask g1_par_scrub_rs_task(g1h, &_region_bm, &_card_bm, n_workers); - g1h->set_par_threads((int)n_workers); g1h->workers()->run_task(&g1_par_scrub_rs_task); - g1h->set_par_threads(0); double rs_scrub_end = os::elapsedTime(); double this_rs_scrub_time = (rs_scrub_end - rs_scrub_start); @@ -2308,9 +2299,7 @@ void G1CMRefProcTaskExecutor::execute(ProcessTask& proc_task) { // and overflow handling in CMTask::do_marking_step() knows // how many workers to wait for. _cm->set_concurrency(_active_workers); - _g1h->set_par_threads(_active_workers); _workers->run_task(&proc_task_proxy); - _g1h->set_par_threads(0); } class G1CMRefEnqueueTaskProxy: public AbstractGangTask { @@ -2340,9 +2329,7 @@ void G1CMRefProcTaskExecutor::execute(EnqueueTask& enq_task) { // and overflow handling in CMTask::do_marking_step() knows // how many workers to wait for. _cm->set_concurrency(_active_workers); - _g1h->set_par_threads(_active_workers); _workers->run_task(&enq_task_proxy); - _g1h->set_par_threads(0); } void ConcurrentMark::weakRefsWorkParallelPart(BoolObjectClosure* is_alive, bool purged_classes) { @@ -2624,9 +2611,7 @@ void ConcurrentMark::checkpointRootsFinalWork() { // We will start all available threads, even if we decide that the // active_workers will be fewer. The extra ones will just bail out // immediately. - g1h->set_par_threads(active_workers); g1h->workers()->run_task(&remarkTask); - g1h->set_par_threads(0); } SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set(); @@ -3000,9 +2985,7 @@ void ConcurrentMark::aggregate_count_data() { G1AggregateCountDataTask g1_par_agg_task(_g1h, this, &_card_bm, _max_worker_id, n_workers); - _g1h->set_par_threads(n_workers); _g1h->workers()->run_task(&g1_par_agg_task); - _g1h->set_par_threads(0); } // Clear the per-worker arrays used to store the per-region counting data diff --git a/hotspot/src/share/vm/gc/g1/g1CollectedHeap.cpp b/hotspot/src/share/vm/gc/g1/g1CollectedHeap.cpp index 084f81ffe36..5ffac644050 100644 --- a/hotspot/src/share/vm/gc/g1/g1CollectedHeap.cpp +++ b/hotspot/src/share/vm/gc/g1/g1CollectedHeap.cpp @@ -1330,23 +1330,12 @@ bool G1CollectedHeap::do_collection(bool explicit_gc, n_workers == workers()->total_workers(), "If not dynamic should be using all the workers"); workers()->set_active_workers(n_workers); - // Set parallel threads in the heap (_n_par_threads) only - // before a parallel phase and always reset it to 0 after - // the phase so that the number of parallel threads does - // no get carried forward to a serial phase where there - // may be code that is "possibly_parallel". - set_par_threads(n_workers); ParRebuildRSTask rebuild_rs_task(this); assert(UseDynamicNumberOfGCThreads || workers()->active_workers() == workers()->total_workers(), "Unless dynamic should use total workers"); - // Use the most recent number of active workers - assert(workers()->active_workers() > 0, - "Active workers not properly set"); - set_par_threads(workers()->active_workers()); workers()->run_task(&rebuild_rs_task); - set_par_threads(0); // Rebuild the strong code root lists for each region rebuild_strong_code_roots(); @@ -3045,10 +3034,7 @@ void G1CollectedHeap::verify(bool silent, VerifyOption vo) { assert(UseDynamicNumberOfGCThreads || workers()->active_workers() == workers()->total_workers(), "If not dynamic should be using all the workers"); - uint n_workers = workers()->active_workers(); - set_par_threads(n_workers); workers()->run_task(&task); - set_par_threads(0); if (task.failures()) { failures = true; } @@ -4041,10 +4027,8 @@ void G1CollectedHeap::finalize_for_evac_failure() { void G1CollectedHeap::remove_self_forwarding_pointers() { double remove_self_forwards_start = os::elapsedTime(); - set_par_threads(); G1ParRemoveSelfForwardPtrsTask rsfp_task(this); workers()->run_task(&rsfp_task); - set_par_threads(0); // Now restore saved marks, if any. assert(_objs_with_preserved_marks.size() == @@ -4810,19 +4794,14 @@ void G1CollectedHeap::parallel_cleaning(BoolObjectClosure* is_alive, G1ParallelCleaningTask g1_unlink_task(is_alive, process_strings, process_symbols, n_workers, class_unloading_occurred); - set_par_threads(n_workers); workers()->run_task(&g1_unlink_task); - set_par_threads(0); } void G1CollectedHeap::unlink_string_and_symbol_table(BoolObjectClosure* is_alive, bool process_strings, bool process_symbols) { { - uint n_workers = workers()->active_workers(); G1StringSymbolTableUnlinkTask g1_unlink_task(is_alive, process_strings, process_symbols); - set_par_threads(n_workers); workers()->run_task(&g1_unlink_task); - set_par_threads(0); } if (G1StringDedup::is_enabled()) { @@ -4850,13 +4829,9 @@ class G1RedirtyLoggedCardsTask : public AbstractGangTask { void G1CollectedHeap::redirty_logged_cards() { double redirty_logged_cards_start = os::elapsedTime(); - uint n_workers = workers()->active_workers(); - G1RedirtyLoggedCardsTask redirty_task(&dirty_card_queue_set()); dirty_card_queue_set().reset_for_par_iteration(); - set_par_threads(n_workers); workers()->run_task(&redirty_task); - set_par_threads(0); DirtyCardQueueSet& dcq = JavaThread::dirty_card_queue_set(); dcq.merge_bufferlists(&dirty_card_queue_set()); @@ -5092,9 +5067,7 @@ void G1STWRefProcTaskExecutor::execute(ProcessTask& proc_task) { ParallelTaskTerminator terminator(_active_workers, _queues); G1STWRefProcTaskProxy proc_task_proxy(proc_task, _g1h, _queues, &terminator); - _g1h->set_par_threads(_active_workers); _workers->run_task(&proc_task_proxy); - _g1h->set_par_threads(0); } // Gang task for parallel reference enqueueing. @@ -5123,9 +5096,7 @@ void G1STWRefProcTaskExecutor::execute(EnqueueTask& enq_task) { G1STWRefEnqueueTaskProxy enq_task_proxy(enq_task); - _g1h->set_par_threads(_active_workers); _workers->run_task(&enq_task_proxy); - _g1h->set_par_threads(0); } // End of weak reference support closures @@ -5247,15 +5218,12 @@ void G1CollectedHeap::process_discovered_references(uint no_of_gc_workers) { assert(no_of_gc_workers == workers()->active_workers(), "Need to reset active GC workers"); - set_par_threads(no_of_gc_workers); G1ParPreserveCMReferentsTask keep_cm_referents(this, no_of_gc_workers, _task_queues); workers()->run_task(&keep_cm_referents); - set_par_threads(0); - // Closure to test whether a referent is alive. G1STWIsAliveClosure is_alive(this); @@ -5382,8 +5350,6 @@ void G1CollectedHeap::evacuate_collection_set(EvacuationInfo& evacuation_info) { assert(UseDynamicNumberOfGCThreads || n_workers == workers()->total_workers(), "If not dynamic should be using all the workers"); - set_par_threads(n_workers); - init_for_evac_failure(NULL); @@ -5424,8 +5390,6 @@ void G1CollectedHeap::evacuate_collection_set(EvacuationInfo& evacuation_info) { (os::elapsedTime() - end_par_time_sec) * 1000.0; phase_times->record_code_root_fixup_time(code_root_fixup_time_ms); - set_par_threads(0); - // Process any discovered reference objects - we have // to do this _before_ we retire the GC alloc regions // as we may have to copy some 'reachable' referent @@ -5778,9 +5742,7 @@ void G1CollectedHeap::cleanUpCardTable() { // Iterate over the dirty cards region list. G1ParCleanupCTTask cleanup_task(ct_bs, this); - set_par_threads(); workers()->run_task(&cleanup_task); - set_par_threads(0); #ifndef PRODUCT if (G1VerifyCTCleanup || VerifyAfterGC) { G1VerifyCardTableCleanup cleanup_verifier(this, ct_bs); @@ -6313,21 +6275,6 @@ void G1CollectedHeap::retire_mutator_alloc_region(HeapRegion* alloc_region, g1mm()->update_eden_size(); } -void G1CollectedHeap::set_par_threads() { - // Don't change the number of workers. Use the value previously set - // in the workgroup. - uint n_workers = workers()->active_workers(); - assert(UseDynamicNumberOfGCThreads || - n_workers == workers()->total_workers(), - "Otherwise should be using the total number of workers"); - if (n_workers == 0) { - assert(false, "Should have been set in prior evacuation pause."); - n_workers = ParallelGCThreads; - workers()->set_active_workers(n_workers); - } - set_par_threads(n_workers); -} - // Methods for the GC alloc regions HeapRegion* G1CollectedHeap::new_gc_alloc_region(size_t word_size, diff --git a/hotspot/src/share/vm/gc/g1/g1CollectedHeap.hpp b/hotspot/src/share/vm/gc/g1/g1CollectedHeap.hpp index fdd0f83c48e..7f79f521682 100644 --- a/hotspot/src/share/vm/gc/g1/g1CollectedHeap.hpp +++ b/hotspot/src/share/vm/gc/g1/g1CollectedHeap.hpp @@ -1012,11 +1012,6 @@ public: // Initialize weak reference processing. void ref_processing_init(); - // Explicitly import set_par_threads into this scope - using CollectedHeap::set_par_threads; - // Set _n_par_threads according to a policy TBD. - void set_par_threads(); - virtual Name kind() const { return CollectedHeap::G1CollectedHeap; } diff --git a/hotspot/src/share/vm/gc/g1/g1StringDedup.cpp b/hotspot/src/share/vm/gc/g1/g1StringDedup.cpp index fa99207ff35..9a19c9f9431 100644 --- a/hotspot/src/share/vm/gc/g1/g1StringDedup.cpp +++ b/hotspot/src/share/vm/gc/g1/g1StringDedup.cpp @@ -153,9 +153,7 @@ void G1StringDedup::unlink_or_oops_do(BoolObjectClosure* is_alive, G1StringDedupUnlinkOrOopsDoTask task(is_alive, keep_alive, allow_resize_and_rehash, phase_times); G1CollectedHeap* g1h = G1CollectedHeap::heap(); - g1h->set_par_threads(); g1h->workers()->run_task(&task); - g1h->set_par_threads(0); } void G1StringDedup::threads_do(ThreadClosure* tc) { diff --git a/hotspot/src/share/vm/gc/parallel/psParallelCompact.cpp b/hotspot/src/share/vm/gc/parallel/psParallelCompact.cpp index b928f956e24..91615543387 100644 --- a/hotspot/src/share/vm/gc/parallel/psParallelCompact.cpp +++ b/hotspot/src/share/vm/gc/parallel/psParallelCompact.cpp @@ -2029,7 +2029,6 @@ bool PSParallelCompact::invoke_no_policy(bool maximum_heap_compaction) { // Set the number of GC threads to be used in this collection gc_task_manager()->set_active_gang(); gc_task_manager()->task_idle_workers(); - heap->set_par_threads(gc_task_manager()->active_workers()); TraceCPUTime tcpu(PrintGCDetails, true, gclog_or_tty); GCTraceTime t1(GCCauseString("Full GC", gc_cause), PrintGC, !PrintGCDetails, NULL, _gc_tracer.gc_id()); diff --git a/hotspot/src/share/vm/gc/parallel/psScavenge.cpp b/hotspot/src/share/vm/gc/parallel/psScavenge.cpp index cb75197b321..ff7651c1c69 100644 --- a/hotspot/src/share/vm/gc/parallel/psScavenge.cpp +++ b/hotspot/src/share/vm/gc/parallel/psScavenge.cpp @@ -382,7 +382,6 @@ bool PSScavenge::invoke_no_policy() { // Get the active number of workers here and use that value // throughout the methods. uint active_workers = gc_task_manager()->active_workers(); - heap->set_par_threads(active_workers); PSPromotionManager::pre_scavenge(); diff --git a/hotspot/src/share/vm/gc/serial/defNewGeneration.cpp b/hotspot/src/share/vm/gc/serial/defNewGeneration.cpp index ea5a04f1c48..33a40c3744d 100644 --- a/hotspot/src/share/vm/gc/serial/defNewGeneration.cpp +++ b/hotspot/src/share/vm/gc/serial/defNewGeneration.cpp @@ -627,7 +627,9 @@ void DefNewGeneration::collect(bool full, "save marks have not been newly set."); { - // SerialGC runs with n_workers == 0. + // DefNew needs to run with n_threads == 0, to make sure the serial + // version of the card table scanning code is used. + // See: CardTableModRefBS::non_clean_card_iterate_possibly_parallel. StrongRootsScope srs(0); gch->gen_process_roots(&srs, diff --git a/hotspot/src/share/vm/gc/shared/cardTableModRefBS.cpp b/hotspot/src/share/vm/gc/shared/cardTableModRefBS.cpp index a0dfc7dbd27..2c84471bbbb 100644 --- a/hotspot/src/share/vm/gc/shared/cardTableModRefBS.cpp +++ b/hotspot/src/share/vm/gc/shared/cardTableModRefBS.cpp @@ -452,7 +452,7 @@ void CardTableModRefBS::non_clean_card_iterate_possibly_parallel(Space* sp, } else { // clear_cl finds contiguous dirty ranges of cards to process and clear. - // This is the single-threaded version. + // This is the single-threaded version used by DefNew. const bool parallel = false; DirtyCardToOopClosure* dcto_cl = sp->new_dcto_cl(cl, precision(), cl->gen_boundary(), parallel); diff --git a/hotspot/src/share/vm/gc/shared/collectedHeap.hpp b/hotspot/src/share/vm/gc/shared/collectedHeap.hpp index 66b9bffb611..4082df4f4fc 100644 --- a/hotspot/src/share/vm/gc/shared/collectedHeap.hpp +++ b/hotspot/src/share/vm/gc/shared/collectedHeap.hpp @@ -290,9 +290,6 @@ class CollectedHeap : public CHeapObj { } GCCause::Cause gc_cause() { return _gc_cause; } - // May be overridden to set additional parallelism. - virtual void set_par_threads(uint t) { (void)t; }; - // General obj/array allocation facilities. inline static oop obj_allocate(KlassHandle klass, int size, TRAPS); inline static oop array_allocate(KlassHandle klass, int size, int length, TRAPS); diff --git a/hotspot/src/share/vm/gc/shared/genCollectedHeap.cpp b/hotspot/src/share/vm/gc/shared/genCollectedHeap.cpp index 3e1e3b9c112..5f490f61a7a 100644 --- a/hotspot/src/share/vm/gc/shared/genCollectedHeap.cpp +++ b/hotspot/src/share/vm/gc/shared/genCollectedHeap.cpp @@ -561,11 +561,6 @@ HeapWord* GenCollectedHeap::satisfy_failed_allocation(size_t size, bool is_tlab) return collector_policy()->satisfy_failed_allocation(size, is_tlab); } -void GenCollectedHeap::set_par_threads(uint t) { - assert(t == 0 || !UseSerialGC, "Cannot have parallel threads"); - CollectedHeap::set_par_threads(t); -} - #ifdef ASSERT class AssertNonScavengableClosure: public OopClosure { public: diff --git a/hotspot/src/share/vm/gc/shared/genCollectedHeap.hpp b/hotspot/src/share/vm/gc/shared/genCollectedHeap.hpp index adfedac6841..3f06cfaed35 100644 --- a/hotspot/src/share/vm/gc/shared/genCollectedHeap.hpp +++ b/hotspot/src/share/vm/gc/shared/genCollectedHeap.hpp @@ -364,8 +364,6 @@ public: // asserted to be this type. static GenCollectedHeap* heap(); - void set_par_threads(uint t); - // Invoke the "do_oop" method of one of the closures "not_older_gens" // or "older_gens" on root locations for the generation at // "level". (The "older_gens" closure is used for scanning references From 4e9a9eee847b7a291a5d22fe805e9cf08e1df813 Mon Sep 17 00:00:00 2001 From: Stefan Karlsson Date: Thu, 21 May 2015 14:10:15 +0200 Subject: [PATCH 06/26] 8080869: FlexibleWorkGang initializes _active_workers to more than _total_workers Reviewed-by: kbarrett, jmasa --- hotspot/src/share/vm/gc/shared/workgroup.hpp | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/hotspot/src/share/vm/gc/shared/workgroup.hpp b/hotspot/src/share/vm/gc/shared/workgroup.hpp index be70044ed2d..e559da25b7b 100644 --- a/hotspot/src/share/vm/gc/shared/workgroup.hpp +++ b/hotspot/src/share/vm/gc/shared/workgroup.hpp @@ -315,15 +315,13 @@ class FlexibleWorkGang: public WorkGang { uint _active_workers; public: // Constructor and destructor. - // Initialize active_workers to a minimum value. Setting it to - // the parameter "workers" will initialize it to a maximum - // value which is not desirable. FlexibleWorkGang(const char* name, uint workers, bool are_GC_task_threads, bool are_ConcurrentGC_threads) : WorkGang(name, workers, are_GC_task_threads, are_ConcurrentGC_threads), - _active_workers(UseDynamicNumberOfGCThreads ? 1U : ParallelGCThreads) {} - // Accessors for fields + _active_workers(UseDynamicNumberOfGCThreads ? 1U : workers) {} + + // Accessors for fields. virtual uint active_workers() const { return _active_workers; } void set_active_workers(uint v) { assert(v <= _total_workers, From a6ffb28ff8e9e07258c97586b710e813280976cd Mon Sep 17 00:00:00 2001 From: Katja Kantserova Date: Fri, 22 May 2015 08:47:27 +0200 Subject: [PATCH 07/26] 8080828: Create sanity test for JDK-8080155 Reviewed-by: sla --- .../sa/TestClassLoaderStats.java | 57 +++++++++++++++++++ 1 file changed, 57 insertions(+) create mode 100644 hotspot/test/serviceability/sa/TestClassLoaderStats.java diff --git a/hotspot/test/serviceability/sa/TestClassLoaderStats.java b/hotspot/test/serviceability/sa/TestClassLoaderStats.java new file mode 100644 index 00000000000..2f35d6a4edd --- /dev/null +++ b/hotspot/test/serviceability/sa/TestClassLoaderStats.java @@ -0,0 +1,57 @@ +/* + * Copyright (c) 2015, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + */ + +import jdk.test.lib.Platform; +import jdk.test.lib.ProcessTools; +import jdk.test.lib.OutputAnalyzer; + +/* + * @test + * @library /testlibrary + * @build jdk.test.lib.* + * @run main TestClassLoaderStats + */ +public class TestClassLoaderStats { + + public static void main(String[] args) throws Exception { + if (!Platform.shouldSAAttach()) { + System.out.println("SA attach not expected to work - test skipped."); + return; + } + + ProcessBuilder processBuilder = ProcessTools.createJavaProcessBuilder( + "-XX:+UsePerfData", + "sun.jvm.hotspot.tools.ClassLoaderStats", + Integer.toString(ProcessTools.getProcessId())); + OutputAnalyzer output = ProcessTools.executeProcess(processBuilder); + System.out.println(output.getOutput()); + + output.shouldHaveExitValue(0); + output.shouldContain("Debugger attached successfully."); + // The class loader stats header needs to be presented in the output: + output.shouldMatch("class_loader\\W+classes\\W+bytes\\W+parent_loader\\W+alive?\\W+type"); + output.stderrShouldNotMatch("[E|e]xception"); + output.stderrShouldNotMatch("[E|e]rror"); + } + +} From bd1a0cf8eca982bd8dc29157e6e1d14d8f13e8ff Mon Sep 17 00:00:00 2001 From: Katja Kantserova Date: Fri, 22 May 2015 13:52:46 +0200 Subject: [PATCH 08/26] 8080855: Create sanity test for JDK-8080692 Reviewed-by: sla --- .../serviceability/sa/TestStackTrace.java | 55 +++++++++++++++++++ 1 file changed, 55 insertions(+) create mode 100644 hotspot/test/serviceability/sa/TestStackTrace.java diff --git a/hotspot/test/serviceability/sa/TestStackTrace.java b/hotspot/test/serviceability/sa/TestStackTrace.java new file mode 100644 index 00000000000..ba5a56a031d --- /dev/null +++ b/hotspot/test/serviceability/sa/TestStackTrace.java @@ -0,0 +1,55 @@ +/* + * Copyright (c) 2015, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + */ + +import jdk.test.lib.OutputAnalyzer; +import jdk.test.lib.Platform; +import jdk.test.lib.ProcessTools; + +/* + * @test + * @library /testlibrary + * @build jdk.test.lib.* + * @run main TestStackTrace + */ +public class TestStackTrace { + + public static void main(String[] args) throws Exception { + if (!Platform.shouldSAAttach()) { + System.out.println("SA attach not expected to work - test skipped."); + return; + } + + ProcessBuilder processBuilder = ProcessTools.createJavaProcessBuilder( + "-XX:+UsePerfData", + "sun.jvm.hotspot.tools.StackTrace", + Integer.toString(ProcessTools.getProcessId())); + OutputAnalyzer output = ProcessTools.executeProcess(processBuilder); + System.out.println(output.getOutput()); + + output.shouldHaveExitValue(0); + output.shouldContain("Debugger attached successfully."); + output.stderrShouldNotMatch("[E|e]xception"); + output.stderrShouldNotMatch("[E|e]rror"); + } + +} From 48e61a6f7e26508563f13b945f31a1e7323a3560 Mon Sep 17 00:00:00 2001 From: Ivan Gerasimov Date: Fri, 22 May 2015 02:38:59 +0300 Subject: [PATCH 09/26] 8069068: VM warning: WaitForMultipleObjects timed out (0) .. Increase timeout to 5 minutes Reviewed-by: dholmes, dcubed --- hotspot/src/os/windows/vm/os_windows.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/hotspot/src/os/windows/vm/os_windows.cpp b/hotspot/src/os/windows/vm/os_windows.cpp index 97c73ac471c..2bae2d0e992 100644 --- a/hotspot/src/os/windows/vm/os_windows.cpp +++ b/hotspot/src/os/windows/vm/os_windows.cpp @@ -3768,7 +3768,7 @@ HINSTANCE os::win32::load_Windows_dll(const char* name, char *ebuf, return NULL; } -#define EXIT_TIMEOUT PRODUCT_ONLY(1000) NOT_PRODUCT(4000) /* 1 sec in product, 4 sec in debug */ +#define EXIT_TIMEOUT 300000 /* 5 minutes */ static BOOL CALLBACK init_crit_sect_call(PINIT_ONCE, PVOID pcrit_sect, PVOID*) { InitializeCriticalSection((CRITICAL_SECTION*)pcrit_sect); From c55c7818a4faf929f78fee63ae0482c495868a94 Mon Sep 17 00:00:00 2001 From: Staffan Larsen Date: Fri, 22 May 2015 09:09:56 +0200 Subject: [PATCH 10/26] 8066757: Can't build 'images' with --disable-zip-debug-info on OS X after jigsaw m2 merge Reviewed-by: erikj, ihse --- make/StripBinaries.gmk | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/make/StripBinaries.gmk b/make/StripBinaries.gmk index 17153130574..c7fb8f105f1 100644 --- a/make/StripBinaries.gmk +++ b/make/StripBinaries.gmk @@ -61,9 +61,12 @@ STRIP_CMDS_SRC := $(filter-out $(COPY_CMDS_SRC), $(ALL_CMDS_SRC)) COPY_LIBS_SRC := \ $(shell $(FIND) $(SUPPORT_OUTPUTDIR)/modules_libs \ \( ! -name '*$(SHARED_LIBRARY_SUFFIX)' -type f \) -o -type l) +# OS X stores symbol information in a .dylib file inside a .dSYM directory - +# that file should not be stripped, so we prune the tree at the .dSYM directory. +# Example: support/modules_libs/java.base/libjsig.dylib.dSYM/Contents/Resources/DWARF/libjsig.dylib STRIP_LIBS_SRC := \ $(shell $(FIND) $(SUPPORT_OUTPUTDIR)/modules_libs \ - -name '*$(SHARED_LIBRARY_SUFFIX)' -type f) + -name '*$(SHARED_LIBRARY_SUFFIX)' -type f -print -o -name "*.dSYM" -prune) $(eval $(call SetupCopyFiles,STRIP_MODULES_CMDS, \ SRC := $(SUPPORT_OUTPUTDIR)/modules_cmds, \ From 33461c4c6efaf9c8b93c6c6492f8daeb32c30735 Mon Sep 17 00:00:00 2001 From: Bengt Rutisson Date: Fri, 22 May 2015 10:56:37 +0200 Subject: [PATCH 11/26] 8080627: JavaThread::satb_mark_queue_offset() is too big for an ARM ldrsb instruction Reviewed-by: roland, kbarrett --- hotspot/src/share/vm/c1/c1_LIRGenerator.cpp | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/hotspot/src/share/vm/c1/c1_LIRGenerator.cpp b/hotspot/src/share/vm/c1/c1_LIRGenerator.cpp index c8db5f824b0..c30847f15fb 100644 --- a/hotspot/src/share/vm/c1/c1_LIRGenerator.cpp +++ b/hotspot/src/share/vm/c1/c1_LIRGenerator.cpp @@ -1469,7 +1469,9 @@ void LIRGenerator::G1SATBCardTableModRef_pre_barrier(LIR_Opr addr_opr, LIR_Opr p } else { guarantee(in_bytes(PtrQueue::byte_width_of_active()) == 1, "Assumption"); - flag_type = T_BYTE; + // Use unsigned type T_BOOLEAN here rather than signed T_BYTE since some platforms, eg. ARM, + // need to use unsigned instructions to use the large offset to load the satb_mark_queue. + flag_type = T_BOOLEAN; } LIR_Opr thrd = getThreadPointer(); LIR_Address* mark_active_flag_addr = From 5d81ec20824116e10f0dfe28f7db6220e6dd27d6 Mon Sep 17 00:00:00 2001 From: Stefan Karlsson Date: Fri, 22 May 2015 10:57:53 +0200 Subject: [PATCH 12/26] 8080837: Move number of workers calculation out of CollectionSetChooser::prepare_for_par_region_addition Reviewed-by: kbarrett, mgerdin --- .../src/share/vm/gc/g1/collectionSetChooser.cpp | 14 ++------------ .../src/share/vm/gc/g1/collectionSetChooser.hpp | 2 +- hotspot/src/share/vm/gc/g1/concurrentMark.cpp | 2 +- hotspot/src/share/vm/gc/g1/g1CollectorPolicy.cpp | 9 ++++++--- hotspot/src/share/vm/gc/g1/g1CollectorPolicy.hpp | 2 +- 5 files changed, 11 insertions(+), 18 deletions(-) diff --git a/hotspot/src/share/vm/gc/g1/collectionSetChooser.cpp b/hotspot/src/share/vm/gc/g1/collectionSetChooser.cpp index e9528320b47..03402e4003d 100644 --- a/hotspot/src/share/vm/gc/g1/collectionSetChooser.cpp +++ b/hotspot/src/share/vm/gc/g1/collectionSetChooser.cpp @@ -158,20 +158,10 @@ void CollectionSetChooser::add_region(HeapRegion* hr) { hr->calc_gc_efficiency(); } -void CollectionSetChooser::prepare_for_par_region_addition(uint n_regions, +void CollectionSetChooser::prepare_for_par_region_addition(uint n_threads, + uint n_regions, uint chunk_size) { _first_par_unreserved_idx = 0; - uint n_threads = (uint) ParallelGCThreads; - if (UseDynamicNumberOfGCThreads) { - assert(G1CollectedHeap::heap()->workers()->active_workers() > 0, - "Should have been set earlier"); - // This is defensive code. As the assertion above says, the number - // of active threads should be > 0, but in case there is some path - // or some improperly initialized variable with leads to no - // active threads, protect against that in a product build. - n_threads = MAX2(G1CollectedHeap::heap()->workers()->active_workers(), - 1U); - } uint max_waste = n_threads * chunk_size; // it should be aligned with respect to chunk_size uint aligned_n_regions = (n_regions + chunk_size - 1) / chunk_size * chunk_size; diff --git a/hotspot/src/share/vm/gc/g1/collectionSetChooser.hpp b/hotspot/src/share/vm/gc/g1/collectionSetChooser.hpp index 4323652494c..bb895d4ebdc 100644 --- a/hotspot/src/share/vm/gc/g1/collectionSetChooser.hpp +++ b/hotspot/src/share/vm/gc/g1/collectionSetChooser.hpp @@ -121,7 +121,7 @@ public: // Must be called before calls to claim_array_chunk(). // n_regions is the number of regions, chunk_size the chunk size. - void prepare_for_par_region_addition(uint n_regions, uint chunk_size); + void prepare_for_par_region_addition(uint n_threads, uint n_regions, uint chunk_size); // Returns the first index in a contiguous chunk of chunk_size indexes // that the calling thread has reserved. These must be set by the // calling thread using set_region() (to NULL if necessary). diff --git a/hotspot/src/share/vm/gc/g1/concurrentMark.cpp b/hotspot/src/share/vm/gc/g1/concurrentMark.cpp index 1b6f98325e8..c53df6cba87 100644 --- a/hotspot/src/share/vm/gc/g1/concurrentMark.cpp +++ b/hotspot/src/share/vm/gc/g1/concurrentMark.cpp @@ -2007,7 +2007,7 @@ void ConcurrentMark::cleanup() { // this will also free any regions totally full of garbage objects, // and sort the regions. - g1h->g1_policy()->record_concurrent_mark_cleanup_end((int)n_workers); + g1h->g1_policy()->record_concurrent_mark_cleanup_end(); // Statistics. double end = os::elapsedTime(); diff --git a/hotspot/src/share/vm/gc/g1/g1CollectorPolicy.cpp b/hotspot/src/share/vm/gc/g1/g1CollectorPolicy.cpp index 72fdeb5ee21..0478a072cd9 100644 --- a/hotspot/src/share/vm/gc/g1/g1CollectorPolicy.cpp +++ b/hotspot/src/share/vm/gc/g1/g1CollectorPolicy.cpp @@ -1587,14 +1587,17 @@ uint G1CollectorPolicy::calculate_parallel_work_chunk_size(uint n_workers, uint } void -G1CollectorPolicy::record_concurrent_mark_cleanup_end(uint n_workers) { +G1CollectorPolicy::record_concurrent_mark_cleanup_end() { _collectionSetChooser->clear(); + FlexibleWorkGang* workers = _g1->workers(); + uint n_workers = workers->active_workers(); + uint n_regions = _g1->num_regions(); uint chunk_size = calculate_parallel_work_chunk_size(n_workers, n_regions); - _collectionSetChooser->prepare_for_par_region_addition(n_regions, chunk_size); + _collectionSetChooser->prepare_for_par_region_addition(n_workers, n_regions, chunk_size); ParKnownGarbageTask par_known_garbage_task(_collectionSetChooser, chunk_size, n_workers); - _g1->workers()->run_task(&par_known_garbage_task); + workers->run_task(&par_known_garbage_task); _collectionSetChooser->sort_regions(); diff --git a/hotspot/src/share/vm/gc/g1/g1CollectorPolicy.hpp b/hotspot/src/share/vm/gc/g1/g1CollectorPolicy.hpp index c94ada50fa4..84c5ef3281c 100644 --- a/hotspot/src/share/vm/gc/g1/g1CollectorPolicy.hpp +++ b/hotspot/src/share/vm/gc/g1/g1CollectorPolicy.hpp @@ -692,7 +692,7 @@ public: // Record start, end, and completion of cleanup. void record_concurrent_mark_cleanup_start(); - void record_concurrent_mark_cleanup_end(uint n_workers); + void record_concurrent_mark_cleanup_end(); void record_concurrent_mark_cleanup_completed(); // Records the information about the heap size for reporting in From 21bb8edbbad14bbc6f4ce338b391bf0c41220de9 Mon Sep 17 00:00:00 2001 From: Stefan Karlsson Date: Fri, 22 May 2015 10:58:04 +0200 Subject: [PATCH 13/26] 8080840: Clean up active_workers() asserts Reviewed-by: kbarrett, jmasa --- .../gc/cms/concurrentMarkSweepGeneration.cpp | 9 +--- hotspot/src/share/vm/gc/g1/concurrentMark.cpp | 9 +--- .../src/share/vm/gc/g1/g1CollectedHeap.cpp | 47 +++++-------------- .../src/share/vm/gc/g1/g1CollectedHeap.hpp | 4 +- hotspot/src/share/vm/gc/shared/workgroup.hpp | 8 +++- 5 files changed, 25 insertions(+), 52 deletions(-) diff --git a/hotspot/src/share/vm/gc/cms/concurrentMarkSweepGeneration.cpp b/hotspot/src/share/vm/gc/cms/concurrentMarkSweepGeneration.cpp index 163a4eb7e03..724c82029cf 100644 --- a/hotspot/src/share/vm/gc/cms/concurrentMarkSweepGeneration.cpp +++ b/hotspot/src/share/vm/gc/cms/concurrentMarkSweepGeneration.cpp @@ -5072,14 +5072,9 @@ void CMSCollector::do_remark_parallel() { FlexibleWorkGang* workers = gch->workers(); assert(workers != NULL, "Need parallel worker threads."); // Choose to use the number of GC workers most recently set - // into "active_workers". If active_workers is not set, set it - // to ParallelGCThreads. + // into "active_workers". uint n_workers = workers->active_workers(); - if (n_workers == 0) { - assert(n_workers > 0, "Should have been set during scavenge"); - n_workers = ParallelGCThreads; - workers->set_active_workers(n_workers); - } + CompactibleFreeListSpace* cms_space = _cmsGen->cmsSpace(); StrongRootsScope srs(n_workers); diff --git a/hotspot/src/share/vm/gc/g1/concurrentMark.cpp b/hotspot/src/share/vm/gc/g1/concurrentMark.cpp index c53df6cba87..18f3e18ffcb 100644 --- a/hotspot/src/share/vm/gc/g1/concurrentMark.cpp +++ b/hotspot/src/share/vm/gc/g1/concurrentMark.cpp @@ -1218,15 +1218,13 @@ void ConcurrentMark::markFromRoots() { "Maximum number of marking threads exceeded"); uint active_workers = MAX2(1U, parallel_marking_threads()); + assert(active_workers > 0, "Should have been set"); // Parallel task terminator is set in "set_concurrency_and_phase()" set_concurrency_and_phase(active_workers, true /* concurrent */); CMConcurrentMarkingTask markingTask(this, cmThread()); _parallel_workers->set_active_workers(active_workers); - // Don't set _n_par_threads because it affects MT in process_roots() - // and the decisions on that MT processing is made elsewhere. - assert(_parallel_workers->active_workers() > 0, "Should have been set"); _parallel_workers->run_task(&markingTask); print_stats(); } @@ -2593,11 +2591,6 @@ void ConcurrentMark::checkpointRootsFinalWork() { // this is remark, so we'll use up all active threads uint active_workers = g1h->workers()->active_workers(); - if (active_workers == 0) { - assert(active_workers > 0, "Should have been set earlier"); - active_workers = (uint) ParallelGCThreads; - g1h->workers()->set_active_workers(active_workers); - } set_concurrency_and_phase(active_workers, false /* concurrent */); // Leave _parallel_marking_threads at it's // value originally calculated in the ConcurrentMark diff --git a/hotspot/src/share/vm/gc/g1/g1CollectedHeap.cpp b/hotspot/src/share/vm/gc/g1/g1CollectedHeap.cpp index 5ffac644050..2163f618ffe 100644 --- a/hotspot/src/share/vm/gc/g1/g1CollectedHeap.cpp +++ b/hotspot/src/share/vm/gc/g1/g1CollectedHeap.cpp @@ -1326,15 +1326,9 @@ bool G1CollectedHeap::do_collection(bool explicit_gc, AdaptiveSizePolicy::calc_active_workers(workers()->total_workers(), workers()->active_workers(), Threads::number_of_non_daemon_threads()); - assert(UseDynamicNumberOfGCThreads || - n_workers == workers()->total_workers(), - "If not dynamic should be using all the workers"); workers()->set_active_workers(n_workers); ParRebuildRSTask rebuild_rs_task(this); - assert(UseDynamicNumberOfGCThreads || - workers()->active_workers() == workers()->total_workers(), - "Unless dynamic should use total workers"); workers()->run_task(&rebuild_rs_task); // Rebuild the strong code root lists for each region @@ -2530,9 +2524,6 @@ HeapRegion* G1CollectedHeap::start_cset_region_for_worker(uint worker_i) { result = g1_policy()->collection_set(); uint cs_size = g1_policy()->cset_region_length(); uint active_workers = workers()->active_workers(); - assert(UseDynamicNumberOfGCThreads || - active_workers == workers()->total_workers(), - "Unless dynamic should use total workers"); uint end_ind = (cs_size * worker_i) / active_workers; uint start_ind = 0; @@ -3031,9 +3022,6 @@ void G1CollectedHeap::verify(bool silent, VerifyOption vo) { if (GCParallelVerificationEnabled && ParallelGCThreads > 1) { G1ParVerifyTask task(this, vo); - assert(UseDynamicNumberOfGCThreads || - workers()->active_workers() == workers()->total_workers(), - "If not dynamic should be using all the workers"); workers()->run_task(&task); if (task.failures()) { failures = true; @@ -3682,9 +3670,6 @@ G1CollectedHeap::do_collection_pause_at_safepoint(double target_pause_time_ms) { uint active_workers = AdaptiveSizePolicy::calc_active_workers(workers()->total_workers(), workers()->active_workers(), Threads::number_of_non_daemon_threads()); - assert(UseDynamicNumberOfGCThreads || - active_workers == workers()->total_workers(), - "If not dynamic should be using all the workers"); workers()->set_active_workers(active_workers); double pause_start_sec = os::elapsedTime(); @@ -5189,7 +5174,7 @@ public: }; // Weak Reference processing during an evacuation pause (part 1). -void G1CollectedHeap::process_discovered_references(uint no_of_gc_workers) { +void G1CollectedHeap::process_discovered_references() { double ref_proc_start = os::elapsedTime(); ReferenceProcessor* rp = _ref_processor_stw; @@ -5216,7 +5201,7 @@ void G1CollectedHeap::process_discovered_references(uint no_of_gc_workers) { // referents points to another object which is also referenced by an // object discovered by the STW ref processor. - assert(no_of_gc_workers == workers()->active_workers(), "Need to reset active GC workers"); + uint no_of_gc_workers = workers()->active_workers(); G1ParPreserveCMReferentsTask keep_cm_referents(this, no_of_gc_workers, @@ -5297,7 +5282,7 @@ void G1CollectedHeap::process_discovered_references(uint no_of_gc_workers) { } // Weak Reference processing during an evacuation pause (part 2). -void G1CollectedHeap::enqueue_discovered_references(uint no_of_gc_workers) { +void G1CollectedHeap::enqueue_discovered_references() { double ref_enq_start = os::elapsedTime(); ReferenceProcessor* rp = _ref_processor_stw; @@ -5311,12 +5296,12 @@ void G1CollectedHeap::enqueue_discovered_references(uint no_of_gc_workers) { } else { // Parallel reference enqueueing - assert(no_of_gc_workers == workers()->active_workers(), - "Need to reset active workers"); - assert(rp->num_q() == no_of_gc_workers, "sanity"); - assert(no_of_gc_workers <= rp->max_num_q(), "sanity"); + uint n_workers = workers()->active_workers(); - G1STWRefProcTaskExecutor par_task_executor(this, workers(), _task_queues, no_of_gc_workers); + assert(rp->num_q() == n_workers, "sanity"); + assert(n_workers <= rp->max_num_q(), "sanity"); + + G1STWRefProcTaskExecutor par_task_executor(this, workers(), _task_queues, n_workers); rp->enqueue_discovered_references(&par_task_executor); } @@ -5347,9 +5332,6 @@ void G1CollectedHeap::evacuate_collection_set(EvacuationInfo& evacuation_info) { hot_card_cache->set_use_cache(false); const uint n_workers = workers()->active_workers(); - assert(UseDynamicNumberOfGCThreads || - n_workers == workers()->total_workers(), - "If not dynamic should be using all the workers"); init_for_evac_failure(NULL); @@ -5365,12 +5347,9 @@ void G1CollectedHeap::evacuate_collection_set(EvacuationInfo& evacuation_info) { ClassLoaderDataGraph::clear_claimed_marks(); } - // The individual threads will set their evac-failure closures. - if (PrintTerminationStats) G1ParScanThreadState::print_termination_stats_hdr(); - // These tasks use ShareHeap::_process_strong_tasks - assert(UseDynamicNumberOfGCThreads || - workers()->active_workers() == workers()->total_workers(), - "If not dynamic should be using all the workers"); + // The individual threads will set their evac-failure closures. + if (PrintTerminationStats) G1ParScanThreadState::print_termination_stats_hdr(); + workers()->run_task(&g1_par_task); end_par_time_sec = os::elapsedTime(); @@ -5395,7 +5374,7 @@ void G1CollectedHeap::evacuate_collection_set(EvacuationInfo& evacuation_info) { // as we may have to copy some 'reachable' referent // objects (and their reachable sub-graphs) that were // not copied during the pause. - process_discovered_references(n_workers); + process_discovered_references(); if (G1StringDedup::is_enabled()) { double fixup_start = os::elapsedTime(); @@ -5437,7 +5416,7 @@ void G1CollectedHeap::evacuate_collection_set(EvacuationInfo& evacuation_info) { // will log these updates (and dirty their associated // cards). We need these updates logged to update any // RSets. - enqueue_discovered_references(n_workers); + enqueue_discovered_references(); redirty_logged_cards(); COMPILER2_PRESENT(DerivedPointerTable::update_pointers()); diff --git a/hotspot/src/share/vm/gc/g1/g1CollectedHeap.hpp b/hotspot/src/share/vm/gc/g1/g1CollectedHeap.hpp index 7f79f521682..c16ba569427 100644 --- a/hotspot/src/share/vm/gc/g1/g1CollectedHeap.hpp +++ b/hotspot/src/share/vm/gc/g1/g1CollectedHeap.hpp @@ -606,11 +606,11 @@ protected: // Process any reference objects discovered during // an incremental evacuation pause. - void process_discovered_references(uint no_of_gc_workers); + void process_discovered_references(); // Enqueue any remaining discovered references // after processing. - void enqueue_discovered_references(uint no_of_gc_workers); + void enqueue_discovered_references(); public: FlexibleWorkGang* workers() const { return _workers; } diff --git a/hotspot/src/share/vm/gc/shared/workgroup.hpp b/hotspot/src/share/vm/gc/shared/workgroup.hpp index e559da25b7b..9c9bb65c1a1 100644 --- a/hotspot/src/share/vm/gc/shared/workgroup.hpp +++ b/hotspot/src/share/vm/gc/shared/workgroup.hpp @@ -322,7 +322,13 @@ class FlexibleWorkGang: public WorkGang { _active_workers(UseDynamicNumberOfGCThreads ? 1U : workers) {} // Accessors for fields. - virtual uint active_workers() const { return _active_workers; } + virtual uint active_workers() const { + assert(_active_workers <= _total_workers, + err_msg("_active_workers: %u > _total_workers: %u", _active_workers, _total_workers)); + assert(UseDynamicNumberOfGCThreads || _active_workers == _total_workers, + "Unless dynamic should use total workers"); + return _active_workers; + } void set_active_workers(uint v) { assert(v <= _total_workers, "Trying to set more workers active than there are"); From 23b343af68259df225b9d49787d524482891838c Mon Sep 17 00:00:00 2001 From: Stefan Karlsson Date: Fri, 22 May 2015 10:58:16 +0200 Subject: [PATCH 14/26] 8080876: Replace unnecessary MAX2(ParallelGCThreads, 1) calls with ParallelGCThreads Reviewed-by: kbarrett, mgerdin --- hotspot/src/share/vm/gc/cms/parNewGeneration.cpp | 4 ++-- .../src/share/vm/gc/g1/concurrentG1Refine.cpp | 2 +- hotspot/src/share/vm/gc/g1/concurrentMark.cpp | 2 +- hotspot/src/share/vm/gc/g1/g1CollectedHeap.cpp | 16 +++++++--------- hotspot/src/share/vm/gc/g1/g1OopClosures.cpp | 4 ++-- .../src/share/vm/gc/g1/g1StringDedupQueue.cpp | 2 +- .../src/share/vm/gc/g1/g1StringDedupTable.cpp | 2 +- .../share/vm/gc/parallel/psParallelCompact.cpp | 4 ++-- hotspot/src/share/vm/gc/parallel/psScavenge.cpp | 4 ++-- .../share/vm/gc/shared/adaptiveSizePolicy.cpp | 2 +- hotspot/src/share/vm/runtime/arguments.cpp | 4 +--- 11 files changed, 21 insertions(+), 25 deletions(-) diff --git a/hotspot/src/share/vm/gc/cms/parNewGeneration.cpp b/hotspot/src/share/vm/gc/cms/parNewGeneration.cpp index 6b327332418..227b669faa3 100644 --- a/hotspot/src/share/vm/gc/cms/parNewGeneration.cpp +++ b/hotspot/src/share/vm/gc/cms/parNewGeneration.cpp @@ -1475,9 +1475,9 @@ void ParNewGeneration::ref_processor_init() { _ref_processor = new ReferenceProcessor(_reserved, // span ParallelRefProcEnabled && (ParallelGCThreads > 1), // mt processing - (int) ParallelGCThreads, // mt processing degree + (uint) ParallelGCThreads, // mt processing degree refs_discovery_is_mt(), // mt discovery - (int) ParallelGCThreads, // mt discovery degree + (uint) ParallelGCThreads, // mt discovery degree refs_discovery_is_atomic(), // atomic_discovery NULL); // is_alive_non_header } diff --git a/hotspot/src/share/vm/gc/g1/concurrentG1Refine.cpp b/hotspot/src/share/vm/gc/g1/concurrentG1Refine.cpp index aa8174860ba..577a3692e1b 100644 --- a/hotspot/src/share/vm/gc/g1/concurrentG1Refine.cpp +++ b/hotspot/src/share/vm/gc/g1/concurrentG1Refine.cpp @@ -35,7 +35,7 @@ ConcurrentG1Refine::ConcurrentG1Refine(G1CollectedHeap* g1h, CardTableEntryClosu { // Ergonomically select initial concurrent refinement parameters if (FLAG_IS_DEFAULT(G1ConcRefinementGreenZone)) { - FLAG_SET_DEFAULT(G1ConcRefinementGreenZone, MAX2(ParallelGCThreads, 1)); + FLAG_SET_DEFAULT(G1ConcRefinementGreenZone, (intx)ParallelGCThreads); } set_green_zone(G1ConcRefinementGreenZone); diff --git a/hotspot/src/share/vm/gc/g1/concurrentMark.cpp b/hotspot/src/share/vm/gc/g1/concurrentMark.cpp index 18f3e18ffcb..79826904518 100644 --- a/hotspot/src/share/vm/gc/g1/concurrentMark.cpp +++ b/hotspot/src/share/vm/gc/g1/concurrentMark.cpp @@ -518,7 +518,7 @@ ConcurrentMark::ConcurrentMark(G1CollectedHeap* g1h, G1RegionToSpaceMapper* prev _markStack(this), // _finger set in set_non_marking_state - _max_worker_id(MAX2((uint)ParallelGCThreads, 1U)), + _max_worker_id((uint)ParallelGCThreads), // _active_tasks set in set_non_marking_state // _tasks set inside the constructor _task_queues(new CMTaskQueueSet((int) _max_worker_id)), diff --git a/hotspot/src/share/vm/gc/g1/g1CollectedHeap.cpp b/hotspot/src/share/vm/gc/g1/g1CollectedHeap.cpp index 2163f618ffe..57f752e3fdd 100644 --- a/hotspot/src/share/vm/gc/g1/g1CollectedHeap.cpp +++ b/hotspot/src/share/vm/gc/g1/g1CollectedHeap.cpp @@ -1752,7 +1752,7 @@ G1CollectedHeap::G1CollectedHeap(G1CollectorPolicy* policy_) : _allocator = G1Allocator::create_allocator(this); _humongous_object_threshold_in_words = HeapRegion::GrainWords / 2; - int n_queues = MAX2((int)ParallelGCThreads, 1); + int n_queues = (int)ParallelGCThreads; _task_queues = new RefToScanQueueSet(n_queues); uint n_rem_sets = HeapRegionRemSet::num_par_rem_sets(); @@ -2064,11 +2064,11 @@ void G1CollectedHeap::ref_processing_init() { new ReferenceProcessor(mr, // span ParallelRefProcEnabled && (ParallelGCThreads > 1), // mt processing - (int) ParallelGCThreads, + (uint) ParallelGCThreads, // degree of mt processing (ParallelGCThreads > 1) || (ConcGCThreads > 1), // mt discovery - (int) MAX2(ParallelGCThreads, ConcGCThreads), + (uint) MAX2(ParallelGCThreads, ConcGCThreads), // degree of mt discovery false, // Reference discovery is not atomic @@ -2081,11 +2081,11 @@ void G1CollectedHeap::ref_processing_init() { new ReferenceProcessor(mr, // span ParallelRefProcEnabled && (ParallelGCThreads > 1), // mt processing - MAX2((int)ParallelGCThreads, 1), + (uint) ParallelGCThreads, // degree of mt processing (ParallelGCThreads > 1), // mt discovery - MAX2((int)ParallelGCThreads, 1), + (uint) ParallelGCThreads, // degree of mt discovery true, // Reference discovery is atomic @@ -2485,8 +2485,7 @@ void G1CollectedHeap::clear_cset_start_regions() { assert(_worker_cset_start_region != NULL, "sanity"); assert(_worker_cset_start_region_time_stamp != NULL, "sanity"); - int n_queues = MAX2((int)ParallelGCThreads, 1); - for (int i = 0; i < n_queues; i++) { + for (uint i = 0; i < ParallelGCThreads; i++) { _worker_cset_start_region[i] = NULL; _worker_cset_start_region_time_stamp[i] = 0; } @@ -3844,8 +3843,7 @@ G1CollectedHeap::do_collection_pause_at_safepoint(double target_pause_time_ms) { if (evacuation_failed()) { _allocator->set_used(recalculate_used()); - uint n_queues = MAX2((int)ParallelGCThreads, 1); - for (uint i = 0; i < n_queues; i++) { + for (uint i = 0; i < ParallelGCThreads; i++) { if (_evacuation_failed_info_array[i].has_failed()) { _gc_tracer_stw->report_evacuation_failed(_evacuation_failed_info_array[i]); } diff --git a/hotspot/src/share/vm/gc/g1/g1OopClosures.cpp b/hotspot/src/share/vm/gc/g1/g1OopClosures.cpp index 57ce7a58299..1667002abaf 100644 --- a/hotspot/src/share/vm/gc/g1/g1OopClosures.cpp +++ b/hotspot/src/share/vm/gc/g1/g1OopClosures.cpp @@ -50,8 +50,8 @@ void G1ParClosureSuper::set_par_scan_thread_state(G1ParScanThreadState* par_scan _par_scan_state = par_scan_state; _worker_id = par_scan_state->queue_num(); - assert(_worker_id < MAX2((uint)ParallelGCThreads, 1u), - err_msg("The given worker id %u must be less than the number of threads %u", _worker_id, MAX2((uint)ParallelGCThreads, 1u))); + assert(_worker_id < ParallelGCThreads, + err_msg("The given worker id %u must be less than the number of threads " UINTX_FORMAT, _worker_id, ParallelGCThreads)); } // Generate G1 specialized oop_oop_iterate functions. diff --git a/hotspot/src/share/vm/gc/g1/g1StringDedupQueue.cpp b/hotspot/src/share/vm/gc/g1/g1StringDedupQueue.cpp index 8216889c8af..44376a600b0 100644 --- a/hotspot/src/share/vm/gc/g1/g1StringDedupQueue.cpp +++ b/hotspot/src/share/vm/gc/g1/g1StringDedupQueue.cpp @@ -42,7 +42,7 @@ G1StringDedupQueue::G1StringDedupQueue() : _cancel(false), _empty(true), _dropped(0) { - _nqueues = MAX2(ParallelGCThreads, (size_t)1); + _nqueues = ParallelGCThreads; _queues = NEW_C_HEAP_ARRAY(G1StringDedupWorkerQueue, _nqueues, mtGC); for (size_t i = 0; i < _nqueues; i++) { new (_queues + i) G1StringDedupWorkerQueue(G1StringDedupWorkerQueue::default_segment_size(), _max_cache_size, _max_size); diff --git a/hotspot/src/share/vm/gc/g1/g1StringDedupTable.cpp b/hotspot/src/share/vm/gc/g1/g1StringDedupTable.cpp index 8adb7b96665..30f9843459a 100644 --- a/hotspot/src/share/vm/gc/g1/g1StringDedupTable.cpp +++ b/hotspot/src/share/vm/gc/g1/g1StringDedupTable.cpp @@ -112,7 +112,7 @@ public: }; G1StringDedupEntryCache::G1StringDedupEntryCache() { - _nlists = MAX2(ParallelGCThreads, (size_t)1); + _nlists = ParallelGCThreads; _lists = PaddedArray::create_unfreeable((uint)_nlists); } diff --git a/hotspot/src/share/vm/gc/parallel/psParallelCompact.cpp b/hotspot/src/share/vm/gc/parallel/psParallelCompact.cpp index 91615543387..6b2553fb408 100644 --- a/hotspot/src/share/vm/gc/parallel/psParallelCompact.cpp +++ b/hotspot/src/share/vm/gc/parallel/psParallelCompact.cpp @@ -832,9 +832,9 @@ void PSParallelCompact::post_initialize() { _ref_processor = new ReferenceProcessor(mr, // span ParallelRefProcEnabled && (ParallelGCThreads > 1), // mt processing - (int) ParallelGCThreads, // mt processing degree + (uint) ParallelGCThreads, // mt processing degree true, // mt discovery - (int) ParallelGCThreads, // mt discovery degree + (uint) ParallelGCThreads, // mt discovery degree true, // atomic_discovery &_is_alive_closure); // non-header is alive closure _counters = new CollectorCounters("PSParallelCompact", 1); diff --git a/hotspot/src/share/vm/gc/parallel/psScavenge.cpp b/hotspot/src/share/vm/gc/parallel/psScavenge.cpp index ff7651c1c69..8f7b693b252 100644 --- a/hotspot/src/share/vm/gc/parallel/psScavenge.cpp +++ b/hotspot/src/share/vm/gc/parallel/psScavenge.cpp @@ -845,9 +845,9 @@ void PSScavenge::initialize() { _ref_processor = new ReferenceProcessor(mr, // span ParallelRefProcEnabled && (ParallelGCThreads > 1), // mt processing - (int) ParallelGCThreads, // mt processing degree + (uint) ParallelGCThreads, // mt processing degree true, // mt discovery - (int) ParallelGCThreads, // mt discovery degree + (uint) ParallelGCThreads, // mt discovery degree true, // atomic_discovery NULL); // header provides liveness info diff --git a/hotspot/src/share/vm/gc/shared/adaptiveSizePolicy.cpp b/hotspot/src/share/vm/gc/shared/adaptiveSizePolicy.cpp index 78d9a39e8f4..eba54f95aff 100644 --- a/hotspot/src/share/vm/gc/shared/adaptiveSizePolicy.cpp +++ b/hotspot/src/share/vm/gc/shared/adaptiveSizePolicy.cpp @@ -161,7 +161,7 @@ uint AdaptiveSizePolicy::calc_default_active_workers(uintx total_workers, } _debug_perturbation = !_debug_perturbation; } - assert((new_active_workers <= (uintx) ParallelGCThreads) && + assert((new_active_workers <= ParallelGCThreads) && (new_active_workers >= min_workers), "Jiggled active workers too much"); } diff --git a/hotspot/src/share/vm/runtime/arguments.cpp b/hotspot/src/share/vm/runtime/arguments.cpp index 8eff73dab06..06af457396f 100644 --- a/hotspot/src/share/vm/runtime/arguments.cpp +++ b/hotspot/src/share/vm/runtime/arguments.cpp @@ -1278,10 +1278,8 @@ void Arguments::set_cms_and_parnew_gc_flags() { // Preferred young gen size for "short" pauses: // upper bound depends on # of threads and NewRatio. - const uintx parallel_gc_threads = - (ParallelGCThreads == 0 ? 1 : ParallelGCThreads); const size_t preferred_max_new_size_unaligned = - MIN2(max_heap/(NewRatio+1), ScaleForWordSize(young_gen_per_worker * parallel_gc_threads)); + MIN2(max_heap/(NewRatio+1), ScaleForWordSize(young_gen_per_worker * ParallelGCThreads)); size_t preferred_max_new_size = align_size_up(preferred_max_new_size_unaligned, os::vm_page_size()); From db20c1bc454450b10092331db062c3fa3f406365 Mon Sep 17 00:00:00 2001 From: Stefan Karlsson Date: Fri, 22 May 2015 13:35:29 +0200 Subject: [PATCH 15/26] 8080877: Don't use workers()->total_workers() when walking G1CollectedHeap::_task_queues Reviewed-by: jmasa, drwhite --- hotspot/src/share/vm/gc/cms/cmsOopClosures.hpp | 1 + hotspot/src/share/vm/gc/cms/parOopClosures.hpp | 1 + hotspot/src/share/vm/gc/g1/g1CollectedHeap.cpp | 8 ++++++-- hotspot/src/share/vm/gc/g1/g1CollectedHeap.hpp | 2 ++ hotspot/src/share/vm/gc/shared/genOopClosures.hpp | 5 ----- hotspot/src/share/vm/gc/shared/taskqueue.hpp | 2 ++ 6 files changed, 12 insertions(+), 7 deletions(-) diff --git a/hotspot/src/share/vm/gc/cms/cmsOopClosures.hpp b/hotspot/src/share/vm/gc/cms/cmsOopClosures.hpp index 434fc34dd6b..1ee3f47ce49 100644 --- a/hotspot/src/share/vm/gc/cms/cmsOopClosures.hpp +++ b/hotspot/src/share/vm/gc/cms/cmsOopClosures.hpp @@ -26,6 +26,7 @@ #define SHARE_VM_GC_CMS_CMSOOPCLOSURES_HPP #include "gc/shared/genOopClosures.hpp" +#include "gc/shared/taskqueue.hpp" #include "memory/iterator.hpp" ///////////////////////////////////////////////////////////////// diff --git a/hotspot/src/share/vm/gc/cms/parOopClosures.hpp b/hotspot/src/share/vm/gc/cms/parOopClosures.hpp index 73132650a5a..daf95f65785 100644 --- a/hotspot/src/share/vm/gc/cms/parOopClosures.hpp +++ b/hotspot/src/share/vm/gc/cms/parOopClosures.hpp @@ -26,6 +26,7 @@ #define SHARE_VM_GC_CMS_PAROOPCLOSURES_HPP #include "gc/shared/genOopClosures.hpp" +#include "gc/shared/taskqueue.hpp" #include "memory/padded.hpp" // Closures for ParNewGeneration diff --git a/hotspot/src/share/vm/gc/g1/g1CollectedHeap.cpp b/hotspot/src/share/vm/gc/g1/g1CollectedHeap.cpp index 57f752e3fdd..e7fff3f4330 100644 --- a/hotspot/src/share/vm/gc/g1/g1CollectedHeap.cpp +++ b/hotspot/src/share/vm/gc/g1/g1CollectedHeap.cpp @@ -3545,6 +3545,10 @@ public: }; #endif // ASSERT +uint G1CollectedHeap::num_task_queues() const { + return _task_queues->size(); +} + #if TASKQUEUE_STATS void G1CollectedHeap::print_taskqueue_stats_hdr(outputStream* const st) { st->print_raw_cr("GC Task Stats"); @@ -3556,7 +3560,7 @@ void G1CollectedHeap::print_taskqueue_stats(outputStream* const st) const { print_taskqueue_stats_hdr(st); TaskQueueStats totals; - const uint n = workers()->total_workers(); + const uint n = num_task_queues(); for (uint i = 0; i < n; ++i) { st->print("%3u ", i); task_queue(i)->stats.print(st); st->cr(); totals += task_queue(i)->stats; @@ -3567,7 +3571,7 @@ void G1CollectedHeap::print_taskqueue_stats(outputStream* const st) const { } void G1CollectedHeap::reset_taskqueue_stats() { - const uint n = workers()->total_workers(); + const uint n = num_task_queues(); for (uint i = 0; i < n; ++i) { task_queue(i)->stats.reset(); } diff --git a/hotspot/src/share/vm/gc/g1/g1CollectedHeap.hpp b/hotspot/src/share/vm/gc/g1/g1CollectedHeap.hpp index c16ba569427..48e8b2a5080 100644 --- a/hotspot/src/share/vm/gc/g1/g1CollectedHeap.hpp +++ b/hotspot/src/share/vm/gc/g1/g1CollectedHeap.hpp @@ -981,6 +981,8 @@ public: RefToScanQueue *task_queue(uint i) const; + uint num_task_queues() const; + // A set of cards where updates happened during the GC DirtyCardQueueSet& dirty_card_queue_set() { return _dirty_card_queue_set; } diff --git a/hotspot/src/share/vm/gc/shared/genOopClosures.hpp b/hotspot/src/share/vm/gc/shared/genOopClosures.hpp index 6f49767d89b..4cec47220bb 100644 --- a/hotspot/src/share/vm/gc/shared/genOopClosures.hpp +++ b/hotspot/src/share/vm/gc/shared/genOopClosures.hpp @@ -35,11 +35,6 @@ class CardTableModRefBS; class DefNewGeneration; class KlassRemSet; -template class GenericTaskQueue; -typedef GenericTaskQueue OopTaskQueue; -template class GenericTaskQueueSet; -typedef GenericTaskQueueSet OopTaskQueueSet; - // Closure for iterating roots from a particular generation // Note: all classes deriving from this MUST call this do_barrier // method at the end of their own do_oop method! diff --git a/hotspot/src/share/vm/gc/shared/taskqueue.hpp b/hotspot/src/share/vm/gc/shared/taskqueue.hpp index 5b80a9dcc4a..06b416d6397 100644 --- a/hotspot/src/share/vm/gc/shared/taskqueue.hpp +++ b/hotspot/src/share/vm/gc/shared/taskqueue.hpp @@ -382,6 +382,8 @@ public: bool steal(uint queue_num, int* seed, E& t); bool peek(); + + uint size() const { return _n; } }; template void From b04d2bca57267acc5b32cecaa0b243bb7634146e Mon Sep 17 00:00:00 2001 From: Stefan Johansson Date: Mon, 25 May 2015 11:39:43 +0200 Subject: [PATCH 16/26] 8080746: Refactor oop iteration macros to be more general Reviewed-by: stefank, pliden --- hotspot/src/share/vm/memory/iterator.hpp | 5 -- hotspot/src/share/vm/oops/arrayKlass.hpp | 32 +++++++++++ .../vm/oops/instanceClassLoaderKlass.hpp | 15 ++---- .../oops/instanceClassLoaderKlass.inline.hpp | 30 ++--------- hotspot/src/share/vm/oops/instanceKlass.hpp | 15 ++---- .../share/vm/oops/instanceKlass.inline.hpp | 27 ++-------- .../src/share/vm/oops/instanceMirrorKlass.hpp | 15 ++---- .../vm/oops/instanceMirrorKlass.inline.hpp | 31 ++--------- .../src/share/vm/oops/instanceRefKlass.hpp | 15 ++---- .../share/vm/oops/instanceRefKlass.inline.hpp | 31 ++--------- hotspot/src/share/vm/oops/klass.hpp | 54 ++++++++++++++++--- hotspot/src/share/vm/oops/objArrayKlass.hpp | 20 +++---- .../share/vm/oops/objArrayKlass.inline.hpp | 43 +++------------ hotspot/src/share/vm/oops/oop.inline.hpp | 2 +- hotspot/src/share/vm/oops/typeArrayKlass.hpp | 28 +++++----- .../share/vm/oops/typeArrayKlass.inline.hpp | 38 +++++-------- 16 files changed, 149 insertions(+), 252 deletions(-) diff --git a/hotspot/src/share/vm/memory/iterator.hpp b/hotspot/src/share/vm/memory/iterator.hpp index 53335b8d5f1..8652a7ec7af 100644 --- a/hotspot/src/share/vm/memory/iterator.hpp +++ b/hotspot/src/share/vm/memory/iterator.hpp @@ -381,9 +381,4 @@ template <> class Devirtualizer { template static bool do_metadata(OopClosureType* closure); }; -// Helper to convert the oop iterate macro suffixes into bool values that can be used by template functions. -#define nvs_nv_to_bool true -#define nvs_v_to_bool false -#define nvs_to_bool(nv_suffix) nvs##nv_suffix##_to_bool - #endif // SHARE_VM_MEMORY_ITERATOR_HPP diff --git a/hotspot/src/share/vm/oops/arrayKlass.hpp b/hotspot/src/share/vm/oops/arrayKlass.hpp index 3ec39b2682b..7d109081b30 100644 --- a/hotspot/src/share/vm/oops/arrayKlass.hpp +++ b/hotspot/src/share/vm/oops/arrayKlass.hpp @@ -144,4 +144,36 @@ class ArrayKlass: public Klass { void oop_verify_on(oop obj, outputStream* st); }; +// Array oop iteration macros for declarations. +// Used to generate the declarations in the *ArrayKlass header files. + +#define OOP_OOP_ITERATE_DECL_RANGE(OopClosureType, nv_suffix) \ + int oop_oop_iterate_range##nv_suffix(oop obj, OopClosureType* closure, int start, int end); + +#if INCLUDE_ALL_GCS +// Named NO_BACKWARDS because the definition used by *ArrayKlass isn't reversed, see below. +#define OOP_OOP_ITERATE_DECL_NO_BACKWARDS(OopClosureType, nv_suffix) \ + int oop_oop_iterate_backwards##nv_suffix(oop obj, OopClosureType* closure); +#endif // INCLUDE_ALL_GCS + + +// Array oop iteration macros for definitions. +// Used to generate the definitions in the *ArrayKlass.inline.hpp files. + +#define OOP_OOP_ITERATE_DEFN_RANGE(KlassType, OopClosureType, nv_suffix) \ + \ +int KlassType::oop_oop_iterate_range##nv_suffix(oop obj, OopClosureType* closure, int start, int end) { \ + return oop_oop_iterate_range(obj, closure, start, end); \ +} + +#if INCLUDE_ALL_GCS +#define OOP_OOP_ITERATE_DEFN_NO_BACKWARDS(KlassType, OopClosureType, nv_suffix) \ +int KlassType::oop_oop_iterate_backwards##nv_suffix(oop obj, OopClosureType* closure) { \ + /* No reverse implementation ATM. */ \ + return oop_oop_iterate(obj, closure); \ +} +#else +#define OOP_OOP_ITERATE_DEFN_NO_BACKWARDS(KlassType, OopClosureType, nv_suffix) +#endif + #endif // SHARE_VM_OOPS_ARRAYKLASS_HPP diff --git a/hotspot/src/share/vm/oops/instanceClassLoaderKlass.hpp b/hotspot/src/share/vm/oops/instanceClassLoaderKlass.hpp index 18d377b66d8..e77f0315740 100644 --- a/hotspot/src/share/vm/oops/instanceClassLoaderKlass.hpp +++ b/hotspot/src/share/vm/oops/instanceClassLoaderKlass.hpp @@ -87,19 +87,12 @@ public: public: -#define InstanceClassLoaderKlass_OOP_OOP_ITERATE_DECL(OopClosureType, nv_suffix) \ - int oop_oop_iterate##nv_suffix(oop obj, OopClosureType* blk); \ - int oop_oop_iterate##nv_suffix##_m(oop obj, OopClosureType* blk, MemRegion mr); - - ALL_OOP_OOP_ITERATE_CLOSURES_1(InstanceClassLoaderKlass_OOP_OOP_ITERATE_DECL) - ALL_OOP_OOP_ITERATE_CLOSURES_2(InstanceClassLoaderKlass_OOP_OOP_ITERATE_DECL) + ALL_OOP_OOP_ITERATE_CLOSURES_1(OOP_OOP_ITERATE_DECL) + ALL_OOP_OOP_ITERATE_CLOSURES_2(OOP_OOP_ITERATE_DECL) #if INCLUDE_ALL_GCS -#define InstanceClassLoaderKlass_OOP_OOP_ITERATE_BACKWARDS_DECL(OopClosureType, nv_suffix) \ - int oop_oop_iterate_backwards##nv_suffix(oop obj, OopClosureType* blk); - - ALL_OOP_OOP_ITERATE_CLOSURES_1(InstanceClassLoaderKlass_OOP_OOP_ITERATE_BACKWARDS_DECL) - ALL_OOP_OOP_ITERATE_CLOSURES_2(InstanceClassLoaderKlass_OOP_OOP_ITERATE_BACKWARDS_DECL) + ALL_OOP_OOP_ITERATE_CLOSURES_1(OOP_OOP_ITERATE_DECL_BACKWARDS) + ALL_OOP_OOP_ITERATE_CLOSURES_2(OOP_OOP_ITERATE_DECL_BACKWARDS) #endif // INCLUDE_ALL_GCS }; diff --git a/hotspot/src/share/vm/oops/instanceClassLoaderKlass.inline.hpp b/hotspot/src/share/vm/oops/instanceClassLoaderKlass.inline.hpp index f54bde1ca2a..36518bb20de 100644 --- a/hotspot/src/share/vm/oops/instanceClassLoaderKlass.inline.hpp +++ b/hotspot/src/share/vm/oops/instanceClassLoaderKlass.inline.hpp @@ -78,33 +78,9 @@ inline int InstanceClassLoaderKlass::oop_oop_iterate_bounded(oop obj, OopClosure return size; } - -#define InstanceClassLoaderKlass_OOP_OOP_ITERATE_DEFN(OopClosureType, nv_suffix) \ - \ -int InstanceClassLoaderKlass::oop_oop_iterate##nv_suffix(oop obj, OopClosureType* closure) { \ - return oop_oop_iterate(obj, closure); \ -} - -#if INCLUDE_ALL_GCS -#define InstanceClassLoaderKlass_OOP_OOP_ITERATE_BACKWARDS_DEFN(OopClosureType, nv_suffix) \ - \ -int InstanceClassLoaderKlass::oop_oop_iterate_backwards##nv_suffix(oop obj, OopClosureType* closure) { \ - return oop_oop_iterate_reverse(obj, closure); \ -} -#else -#define InstanceClassLoaderKlass_OOP_OOP_ITERATE_BACKWARDS_DEFN(OopClosureType, nv_suffix) -#endif - - -#define InstanceClassLoaderKlass_OOP_OOP_ITERATE_DEFN_m(OopClosureType, nv_suffix) \ - \ -int InstanceClassLoaderKlass::oop_oop_iterate##nv_suffix##_m(oop obj, OopClosureType* closure, MemRegion mr) { \ - return oop_oop_iterate_bounded(obj, closure, mr); \ -} - #define ALL_INSTANCE_CLASS_LOADER_KLASS_OOP_OOP_ITERATE_DEFN(OopClosureType, nv_suffix) \ - InstanceClassLoaderKlass_OOP_OOP_ITERATE_DEFN( OopClosureType, nv_suffix) \ - InstanceClassLoaderKlass_OOP_OOP_ITERATE_DEFN_m( OopClosureType, nv_suffix) \ - InstanceClassLoaderKlass_OOP_OOP_ITERATE_BACKWARDS_DEFN(OopClosureType, nv_suffix) + OOP_OOP_ITERATE_DEFN( InstanceClassLoaderKlass, OopClosureType, nv_suffix) \ + OOP_OOP_ITERATE_DEFN_BOUNDED( InstanceClassLoaderKlass, OopClosureType, nv_suffix) \ + OOP_OOP_ITERATE_DEFN_BACKWARDS(InstanceClassLoaderKlass, OopClosureType, nv_suffix) #endif // SHARE_VM_OOPS_INSTANCECLASSLOADERKLASS_INLINE_HPP diff --git a/hotspot/src/share/vm/oops/instanceKlass.hpp b/hotspot/src/share/vm/oops/instanceKlass.hpp index 3291e7927ed..b9aaa75480e 100644 --- a/hotspot/src/share/vm/oops/instanceKlass.hpp +++ b/hotspot/src/share/vm/oops/instanceKlass.hpp @@ -1084,19 +1084,12 @@ class InstanceKlass: public Klass { public: -#define InstanceKlass_OOP_OOP_ITERATE_DECL(OopClosureType, nv_suffix) \ - int oop_oop_iterate##nv_suffix(oop obj, OopClosureType* closure); \ - int oop_oop_iterate##nv_suffix##_m(oop obj, OopClosureType* closure, MemRegion mr); - - ALL_OOP_OOP_ITERATE_CLOSURES_1(InstanceKlass_OOP_OOP_ITERATE_DECL) - ALL_OOP_OOP_ITERATE_CLOSURES_2(InstanceKlass_OOP_OOP_ITERATE_DECL) + ALL_OOP_OOP_ITERATE_CLOSURES_1(OOP_OOP_ITERATE_DECL) + ALL_OOP_OOP_ITERATE_CLOSURES_2(OOP_OOP_ITERATE_DECL) #if INCLUDE_ALL_GCS -#define InstanceKlass_OOP_OOP_ITERATE_BACKWARDS_DECL(OopClosureType, nv_suffix) \ - int oop_oop_iterate_backwards##nv_suffix(oop obj, OopClosureType* closure); - - ALL_OOP_OOP_ITERATE_CLOSURES_1(InstanceKlass_OOP_OOP_ITERATE_BACKWARDS_DECL) - ALL_OOP_OOP_ITERATE_CLOSURES_2(InstanceKlass_OOP_OOP_ITERATE_BACKWARDS_DECL) + ALL_OOP_OOP_ITERATE_CLOSURES_1(OOP_OOP_ITERATE_DECL_BACKWARDS) + ALL_OOP_OOP_ITERATE_CLOSURES_2(OOP_OOP_ITERATE_DECL_BACKWARDS) #endif // INCLUDE_ALL_GCS u2 idnum_allocated_count() const { return _idnum_allocated_count; } diff --git a/hotspot/src/share/vm/oops/instanceKlass.inline.hpp b/hotspot/src/share/vm/oops/instanceKlass.inline.hpp index 5142c6d2e05..4b4352f38dd 100644 --- a/hotspot/src/share/vm/oops/instanceKlass.inline.hpp +++ b/hotspot/src/share/vm/oops/instanceKlass.inline.hpp @@ -27,6 +27,7 @@ #include "memory/iterator.hpp" #include "oops/instanceKlass.hpp" +#include "oops/klass.hpp" #include "oops/oop.inline.hpp" #include "utilities/debug.hpp" #include "utilities/globalDefinitions.hpp" @@ -187,29 +188,9 @@ INLINE int InstanceKlass::oop_oop_iterate_bounded(oop obj, OopClosureType* closu #undef INLINE - -#define InstanceKlass_OOP_OOP_ITERATE_DEFN(OopClosureType, nv_suffix) \ -int InstanceKlass::oop_oop_iterate##nv_suffix(oop obj, OopClosureType* closure) { \ - return oop_oop_iterate(obj, closure); \ -} - -#if INCLUDE_ALL_GCS -#define InstanceKlass_OOP_OOP_ITERATE_BACKWARDS_DEFN(OopClosureType, nv_suffix) \ -int InstanceKlass::oop_oop_iterate_backwards##nv_suffix(oop obj, OopClosureType* closure) { \ - return oop_oop_iterate_reverse(obj, closure); \ -} -#else -#define InstanceKlass_OOP_OOP_ITERATE_BACKWARDS_DEFN(OopClosureType, nv_suffix) -#endif - -#define InstanceKlass_OOP_OOP_ITERATE_DEFN_m(OopClosureType, nv_suffix) \ -int InstanceKlass::oop_oop_iterate##nv_suffix##_m(oop obj, OopClosureType* closure, MemRegion mr) { \ - return oop_oop_iterate_bounded(obj, closure, mr); \ -} - #define ALL_INSTANCE_KLASS_OOP_OOP_ITERATE_DEFN(OopClosureType, nv_suffix) \ - InstanceKlass_OOP_OOP_ITERATE_DEFN( OopClosureType, nv_suffix) \ - InstanceKlass_OOP_OOP_ITERATE_DEFN_m( OopClosureType, nv_suffix) \ - InstanceKlass_OOP_OOP_ITERATE_BACKWARDS_DEFN(OopClosureType, nv_suffix) + OOP_OOP_ITERATE_DEFN( InstanceKlass, OopClosureType, nv_suffix) \ + OOP_OOP_ITERATE_DEFN_BOUNDED( InstanceKlass, OopClosureType, nv_suffix) \ + OOP_OOP_ITERATE_DEFN_BACKWARDS(InstanceKlass, OopClosureType, nv_suffix) #endif // SHARE_VM_OOPS_INSTANCEKLASS_INLINE_HPP diff --git a/hotspot/src/share/vm/oops/instanceMirrorKlass.hpp b/hotspot/src/share/vm/oops/instanceMirrorKlass.hpp index 30154a9c18f..d50f43a1210 100644 --- a/hotspot/src/share/vm/oops/instanceMirrorKlass.hpp +++ b/hotspot/src/share/vm/oops/instanceMirrorKlass.hpp @@ -149,19 +149,12 @@ class InstanceMirrorKlass: public InstanceKlass { public: -#define InstanceMirrorKlass_OOP_OOP_ITERATE_DECL(OopClosureType, nv_suffix) \ - int oop_oop_iterate##nv_suffix(oop obj, OopClosureType* blk); \ - int oop_oop_iterate##nv_suffix##_m(oop obj, OopClosureType* blk, MemRegion mr); - - ALL_OOP_OOP_ITERATE_CLOSURES_1(InstanceMirrorKlass_OOP_OOP_ITERATE_DECL) - ALL_OOP_OOP_ITERATE_CLOSURES_2(InstanceMirrorKlass_OOP_OOP_ITERATE_DECL) + ALL_OOP_OOP_ITERATE_CLOSURES_1(OOP_OOP_ITERATE_DECL) + ALL_OOP_OOP_ITERATE_CLOSURES_2(OOP_OOP_ITERATE_DECL) #if INCLUDE_ALL_GCS -#define InstanceMirrorKlass_OOP_OOP_ITERATE_BACKWARDS_DECL(OopClosureType, nv_suffix) \ - int oop_oop_iterate_backwards##nv_suffix(oop obj, OopClosureType* blk); - - ALL_OOP_OOP_ITERATE_CLOSURES_1(InstanceMirrorKlass_OOP_OOP_ITERATE_BACKWARDS_DECL) - ALL_OOP_OOP_ITERATE_CLOSURES_2(InstanceMirrorKlass_OOP_OOP_ITERATE_BACKWARDS_DECL) + ALL_OOP_OOP_ITERATE_CLOSURES_1(OOP_OOP_ITERATE_DECL_BACKWARDS) + ALL_OOP_OOP_ITERATE_CLOSURES_2(OOP_OOP_ITERATE_DECL_BACKWARDS) #endif // INCLUDE_ALL_GCS }; diff --git a/hotspot/src/share/vm/oops/instanceMirrorKlass.inline.hpp b/hotspot/src/share/vm/oops/instanceMirrorKlass.inline.hpp index c4c2d6c0a70..75f3af92631 100644 --- a/hotspot/src/share/vm/oops/instanceMirrorKlass.inline.hpp +++ b/hotspot/src/share/vm/oops/instanceMirrorKlass.inline.hpp @@ -27,6 +27,7 @@ #include "classfile/javaClasses.hpp" #include "oops/instanceKlass.inline.hpp" #include "oops/instanceMirrorKlass.hpp" +#include "oops/klass.hpp" #include "oops/oop.inline.hpp" #include "utilities/debug.hpp" #include "utilities/globalDefinitions.hpp" @@ -132,33 +133,9 @@ int InstanceMirrorKlass::oop_oop_iterate_bounded(oop obj, OopClosureType* closur return oop_size(obj); } - -#define InstanceMirrorKlass_OOP_OOP_ITERATE_DEFN(OopClosureType, nv_suffix) \ - \ -int InstanceMirrorKlass::oop_oop_iterate##nv_suffix(oop obj, OopClosureType* closure) { \ - return oop_oop_iterate(obj, closure); \ -} - -#if INCLUDE_ALL_GCS -#define InstanceMirrorKlass_OOP_OOP_ITERATE_BACKWARDS_DEFN(OopClosureType, nv_suffix) \ - \ -int InstanceMirrorKlass::oop_oop_iterate_backwards##nv_suffix(oop obj, OopClosureType* closure) { \ - return oop_oop_iterate_reverse(obj, closure); \ -} -#else -#define InstanceMirrorKlass_OOP_OOP_ITERATE_BACKWARDS_DEFN(OopClosureType, nv_suffix) -#endif - - -#define InstanceMirrorKlass_OOP_OOP_ITERATE_DEFN_m(OopClosureType, nv_suffix) \ - \ -int InstanceMirrorKlass::oop_oop_iterate##nv_suffix##_m(oop obj, OopClosureType* closure, MemRegion mr) { \ - return oop_oop_iterate_bounded(obj, closure, mr); \ -} - #define ALL_INSTANCE_MIRROR_KLASS_OOP_OOP_ITERATE_DEFN(OopClosureType, nv_suffix) \ - InstanceMirrorKlass_OOP_OOP_ITERATE_DEFN( OopClosureType, nv_suffix) \ - InstanceMirrorKlass_OOP_OOP_ITERATE_DEFN_m( OopClosureType, nv_suffix) \ - InstanceMirrorKlass_OOP_OOP_ITERATE_BACKWARDS_DEFN(OopClosureType, nv_suffix) + OOP_OOP_ITERATE_DEFN( InstanceMirrorKlass, OopClosureType, nv_suffix) \ + OOP_OOP_ITERATE_DEFN_BOUNDED( InstanceMirrorKlass, OopClosureType, nv_suffix) \ + OOP_OOP_ITERATE_DEFN_BACKWARDS(InstanceMirrorKlass, OopClosureType, nv_suffix) #endif // SHARE_VM_OOPS_INSTANCEMIRRORKLASS_INLINE_HPP diff --git a/hotspot/src/share/vm/oops/instanceRefKlass.hpp b/hotspot/src/share/vm/oops/instanceRefKlass.hpp index b13dc403beb..8560b4a6a12 100644 --- a/hotspot/src/share/vm/oops/instanceRefKlass.hpp +++ b/hotspot/src/share/vm/oops/instanceRefKlass.hpp @@ -119,19 +119,12 @@ private: public: -#define InstanceRefKlass_OOP_OOP_ITERATE_DECL(OopClosureType, nv_suffix) \ - int oop_oop_iterate##nv_suffix(oop obj, OopClosureType* closure); \ - int oop_oop_iterate##nv_suffix##_m(oop obj, OopClosureType* closure, MemRegion mr); - - ALL_OOP_OOP_ITERATE_CLOSURES_1(InstanceRefKlass_OOP_OOP_ITERATE_DECL) - ALL_OOP_OOP_ITERATE_CLOSURES_2(InstanceRefKlass_OOP_OOP_ITERATE_DECL) + ALL_OOP_OOP_ITERATE_CLOSURES_1(OOP_OOP_ITERATE_DECL) + ALL_OOP_OOP_ITERATE_CLOSURES_2(OOP_OOP_ITERATE_DECL) #if INCLUDE_ALL_GCS -#define InstanceRefKlass_OOP_OOP_ITERATE_BACKWARDS_DECL(OopClosureType, nv_suffix) \ - int oop_oop_iterate_backwards##nv_suffix(oop obj, OopClosureType* closure); - - ALL_OOP_OOP_ITERATE_CLOSURES_1(InstanceRefKlass_OOP_OOP_ITERATE_BACKWARDS_DECL) - ALL_OOP_OOP_ITERATE_CLOSURES_2(InstanceRefKlass_OOP_OOP_ITERATE_BACKWARDS_DECL) + ALL_OOP_OOP_ITERATE_CLOSURES_1(OOP_OOP_ITERATE_DECL_BACKWARDS) + ALL_OOP_OOP_ITERATE_CLOSURES_2(OOP_OOP_ITERATE_DECL_BACKWARDS) #endif // INCLUDE_ALL_GCS static void release_and_notify_pending_list_lock(BasicLock *pending_list_basic_lock); diff --git a/hotspot/src/share/vm/oops/instanceRefKlass.inline.hpp b/hotspot/src/share/vm/oops/instanceRefKlass.inline.hpp index 26a2017402a..8cbea359991 100644 --- a/hotspot/src/share/vm/oops/instanceRefKlass.inline.hpp +++ b/hotspot/src/share/vm/oops/instanceRefKlass.inline.hpp @@ -141,34 +141,9 @@ int InstanceRefKlass::oop_oop_iterate_bounded(oop obj, OopClosureType* closure, // Macro to define InstanceRefKlass::oop_oop_iterate for virtual/nonvirtual for // all closures. Macros calling macros above for each oop size. - -#define InstanceRefKlass_OOP_OOP_ITERATE_DEFN(OopClosureType, nv_suffix) \ - \ -int InstanceRefKlass::oop_oop_iterate##nv_suffix(oop obj, OopClosureType* closure) { \ - return oop_oop_iterate(obj, closure); \ -} - -#if INCLUDE_ALL_GCS -#define InstanceRefKlass_OOP_OOP_ITERATE_BACKWARDS_DEFN(OopClosureType, nv_suffix) \ - \ -int InstanceRefKlass::oop_oop_iterate_backwards##nv_suffix(oop obj, OopClosureType* closure) { \ - return oop_oop_iterate_reverse(obj, closure); \ -} -#else -#define InstanceRefKlass_OOP_OOP_ITERATE_BACKWARDS_DEFN(OopClosureType, nv_suffix) -#endif - - -#define InstanceRefKlass_OOP_OOP_ITERATE_DEFN_m(OopClosureType, nv_suffix) \ - \ -int InstanceRefKlass::oop_oop_iterate##nv_suffix##_m(oop obj, OopClosureType* closure, MemRegion mr) { \ - return oop_oop_iterate_bounded(obj, closure, mr); \ -} - #define ALL_INSTANCE_REF_KLASS_OOP_OOP_ITERATE_DEFN(OopClosureType, nv_suffix) \ - InstanceRefKlass_OOP_OOP_ITERATE_DEFN( OopClosureType, nv_suffix) \ - InstanceRefKlass_OOP_OOP_ITERATE_DEFN_m( OopClosureType, nv_suffix) \ - InstanceRefKlass_OOP_OOP_ITERATE_BACKWARDS_DEFN(OopClosureType, nv_suffix) - + OOP_OOP_ITERATE_DEFN( InstanceRefKlass, OopClosureType, nv_suffix) \ + OOP_OOP_ITERATE_DEFN_BOUNDED( InstanceRefKlass, OopClosureType, nv_suffix) \ + OOP_OOP_ITERATE_DEFN_BACKWARDS(InstanceRefKlass, OopClosureType, nv_suffix) #endif // SHARE_VM_OOPS_INSTANCEREFKLASS_INLINE_HPP diff --git a/hotspot/src/share/vm/oops/klass.hpp b/hotspot/src/share/vm/oops/klass.hpp index d6e0e06c7e9..57648b363b6 100644 --- a/hotspot/src/share/vm/oops/klass.hpp +++ b/hotspot/src/share/vm/oops/klass.hpp @@ -583,20 +583,20 @@ protected: // Iterators specialized to particular subtypes // of ExtendedOopClosure, to avoid closure virtual calls. -#define Klass_OOP_OOP_ITERATE_DECL(OopClosureType, nv_suffix) \ - virtual int oop_oop_iterate##nv_suffix(oop obj, OopClosureType* closure) = 0; \ - /* Iterates "closure" over all the oops in "obj" (of type "this") within "mr". */ \ - virtual int oop_oop_iterate##nv_suffix##_m(oop obj, OopClosureType* closure, MemRegion mr) = 0; +#define Klass_OOP_OOP_ITERATE_DECL(OopClosureType, nv_suffix) \ + virtual int oop_oop_iterate##nv_suffix(oop obj, OopClosureType* closure) = 0; \ + /* Iterates "closure" over all the oops in "obj" (of type "this") within "mr". */ \ + virtual int oop_oop_iterate_bounded##nv_suffix(oop obj, OopClosureType* closure, MemRegion mr) = 0; ALL_OOP_OOP_ITERATE_CLOSURES_1(Klass_OOP_OOP_ITERATE_DECL) ALL_OOP_OOP_ITERATE_CLOSURES_2(Klass_OOP_OOP_ITERATE_DECL) #if INCLUDE_ALL_GCS -#define Klass_OOP_OOP_ITERATE_BACKWARDS_DECL(OopClosureType, nv_suffix) \ +#define Klass_OOP_OOP_ITERATE_DECL_BACKWARDS(OopClosureType, nv_suffix) \ virtual int oop_oop_iterate_backwards##nv_suffix(oop obj, OopClosureType* closure) = 0; - ALL_OOP_OOP_ITERATE_CLOSURES_1(Klass_OOP_OOP_ITERATE_BACKWARDS_DECL) - ALL_OOP_OOP_ITERATE_CLOSURES_2(Klass_OOP_OOP_ITERATE_BACKWARDS_DECL) + ALL_OOP_OOP_ITERATE_CLOSURES_1(Klass_OOP_OOP_ITERATE_DECL_BACKWARDS) + ALL_OOP_OOP_ITERATE_CLOSURES_2(Klass_OOP_OOP_ITERATE_DECL_BACKWARDS) #endif // INCLUDE_ALL_GCS virtual void array_klasses_do(void f(Klass* k)) {} @@ -651,4 +651,44 @@ protected: void klass_update_barrier_set_pre(oop* p, oop v); }; +// Helper to convert the oop iterate macro suffixes into bool values that can be used by template functions. +#define nvs_nv_to_bool true +#define nvs_v_to_bool false +#define nvs_to_bool(nv_suffix) nvs##nv_suffix##_to_bool + +// Oop iteration macros for declarations. +// Used to generate declarations in the *Klass header files. + +#define OOP_OOP_ITERATE_DECL(OopClosureType, nv_suffix) \ + int oop_oop_iterate##nv_suffix(oop obj, OopClosureType* closure); \ + int oop_oop_iterate_bounded##nv_suffix(oop obj, OopClosureType* closure, MemRegion mr); + +#if INCLUDE_ALL_GCS +#define OOP_OOP_ITERATE_DECL_BACKWARDS(OopClosureType, nv_suffix) \ + int oop_oop_iterate_backwards##nv_suffix(oop obj, OopClosureType* closure); +#endif // INCLUDE_ALL_GCS + + +// Oop iteration macros for definitions. +// Used to generate definitions in the *Klass.inline.hpp files. + +#define OOP_OOP_ITERATE_DEFN(KlassType, OopClosureType, nv_suffix) \ +int KlassType::oop_oop_iterate##nv_suffix(oop obj, OopClosureType* closure) { \ + return oop_oop_iterate(obj, closure); \ +} + +#if INCLUDE_ALL_GCS +#define OOP_OOP_ITERATE_DEFN_BACKWARDS(KlassType, OopClosureType, nv_suffix) \ +int KlassType::oop_oop_iterate_backwards##nv_suffix(oop obj, OopClosureType* closure) { \ + return oop_oop_iterate_reverse(obj, closure); \ +} +#else +#define OOP_OOP_ITERATE_DEFN_BACKWARDS(KlassType, OopClosureType, nv_suffix) +#endif + +#define OOP_OOP_ITERATE_DEFN_BOUNDED(KlassType, OopClosureType, nv_suffix) \ +int KlassType::oop_oop_iterate_bounded##nv_suffix(oop obj, OopClosureType* closure, MemRegion mr) { \ + return oop_oop_iterate_bounded(obj, closure, mr); \ +} + #endif // SHARE_VM_OOPS_KLASS_HPP diff --git a/hotspot/src/share/vm/oops/objArrayKlass.hpp b/hotspot/src/share/vm/oops/objArrayKlass.hpp index 5c167cb493c..9b593522f3b 100644 --- a/hotspot/src/share/vm/oops/objArrayKlass.hpp +++ b/hotspot/src/share/vm/oops/objArrayKlass.hpp @@ -163,22 +163,14 @@ class ObjArrayKlass : public ArrayKlass { public: -#define ObjArrayKlass_OOP_OOP_ITERATE_DECL(OopClosureType, nv_suffix) \ - int oop_oop_iterate##nv_suffix(oop obj, OopClosureType* blk); \ - int oop_oop_iterate##nv_suffix##_m(oop obj, OopClosureType* blk, \ - MemRegion mr); \ - int oop_oop_iterate_range##nv_suffix(oop obj, OopClosureType* blk, \ - int start, int end); - - ALL_OOP_OOP_ITERATE_CLOSURES_1(ObjArrayKlass_OOP_OOP_ITERATE_DECL) - ALL_OOP_OOP_ITERATE_CLOSURES_2(ObjArrayKlass_OOP_OOP_ITERATE_DECL) + ALL_OOP_OOP_ITERATE_CLOSURES_1(OOP_OOP_ITERATE_DECL) + ALL_OOP_OOP_ITERATE_CLOSURES_2(OOP_OOP_ITERATE_DECL) + ALL_OOP_OOP_ITERATE_CLOSURES_1(OOP_OOP_ITERATE_DECL_RANGE) + ALL_OOP_OOP_ITERATE_CLOSURES_2(OOP_OOP_ITERATE_DECL_RANGE) #if INCLUDE_ALL_GCS -#define ObjArrayKlass_OOP_OOP_ITERATE_BACKWARDS_DECL(OopClosureType, nv_suffix) \ - int oop_oop_iterate_backwards##nv_suffix(oop obj, OopClosureType* blk); - - ALL_OOP_OOP_ITERATE_CLOSURES_1(ObjArrayKlass_OOP_OOP_ITERATE_BACKWARDS_DECL) - ALL_OOP_OOP_ITERATE_CLOSURES_2(ObjArrayKlass_OOP_OOP_ITERATE_BACKWARDS_DECL) + ALL_OOP_OOP_ITERATE_CLOSURES_1(OOP_OOP_ITERATE_DECL_NO_BACKWARDS) + ALL_OOP_OOP_ITERATE_CLOSURES_2(OOP_OOP_ITERATE_DECL_NO_BACKWARDS) #endif // INCLUDE_ALL_GCS // JVM support diff --git a/hotspot/src/share/vm/oops/objArrayKlass.inline.hpp b/hotspot/src/share/vm/oops/objArrayKlass.inline.hpp index d7867ab8ea8..3a5cedf8137 100644 --- a/hotspot/src/share/vm/oops/objArrayKlass.inline.hpp +++ b/hotspot/src/share/vm/oops/objArrayKlass.inline.hpp @@ -27,6 +27,8 @@ #include "memory/memRegion.hpp" #include "memory/iterator.inline.hpp" +#include "oops/arrayKlass.hpp" +#include "oops/klass.hpp" #include "oops/objArrayKlass.hpp" #include "oops/objArrayOop.inline.hpp" #include "oops/oop.inline.hpp" @@ -149,41 +151,10 @@ int ObjArrayKlass::oop_oop_iterate_range(oop obj, OopClosureType* closure, int s return size; } - -#define ObjArrayKlass_OOP_OOP_ITERATE_DEFN(OopClosureType, nv_suffix) \ - \ -int ObjArrayKlass::oop_oop_iterate##nv_suffix(oop obj, OopClosureType* closure) { \ - return oop_oop_iterate(obj, closure); \ -} - -#if INCLUDE_ALL_GCS -#define ObjArrayKlass_OOP_OOP_ITERATE_BACKWARDS_DEFN(OopClosureType, nv_suffix) \ -int ObjArrayKlass::oop_oop_iterate_backwards##nv_suffix(oop obj, OopClosureType* closure) { \ - /* No reverse implementation ATM. */ \ - return oop_oop_iterate(obj, closure); \ -} -#else -#define ObjArrayKlass_OOP_OOP_ITERATE_BACKWARDS_DEFN(OopClosureType, nv_suffix) -#endif - -#define ObjArrayKlass_OOP_OOP_ITERATE_DEFN_m(OopClosureType, nv_suffix) \ - \ -int ObjArrayKlass::oop_oop_iterate##nv_suffix##_m(oop obj, OopClosureType* closure, MemRegion mr) { \ - return oop_oop_iterate_bounded(obj, closure, mr); \ -} - -#define ObjArrayKlass_OOP_OOP_ITERATE_DEFN_r(OopClosureType, nv_suffix) \ - \ -int ObjArrayKlass::oop_oop_iterate_range##nv_suffix(oop obj, OopClosureType* closure, int start, int end) { \ - return oop_oop_iterate_range(obj, closure, start, end); \ -} - - -#define ALL_OBJ_ARRAY_KLASS_OOP_OOP_ITERATE_DEFN(OopClosureType, nv_suffix) \ - ObjArrayKlass_OOP_OOP_ITERATE_DEFN( OopClosureType, nv_suffix) \ - ObjArrayKlass_OOP_OOP_ITERATE_BACKWARDS_DEFN(OopClosureType, nv_suffix) \ - ObjArrayKlass_OOP_OOP_ITERATE_DEFN_m( OopClosureType, nv_suffix) \ - ObjArrayKlass_OOP_OOP_ITERATE_DEFN_r( OopClosureType, nv_suffix) - +#define ALL_OBJ_ARRAY_KLASS_OOP_OOP_ITERATE_DEFN(OopClosureType, nv_suffix) \ + OOP_OOP_ITERATE_DEFN( ObjArrayKlass, OopClosureType, nv_suffix) \ + OOP_OOP_ITERATE_DEFN_BOUNDED( ObjArrayKlass, OopClosureType, nv_suffix) \ + OOP_OOP_ITERATE_DEFN_RANGE( ObjArrayKlass, OopClosureType, nv_suffix) \ + OOP_OOP_ITERATE_DEFN_NO_BACKWARDS(ObjArrayKlass, OopClosureType, nv_suffix) #endif // SHARE_VM_OOPS_OBJARRAYKLASS_INLINE_HPP diff --git a/hotspot/src/share/vm/oops/oop.inline.hpp b/hotspot/src/share/vm/oops/oop.inline.hpp index 8e0842110e0..77bef028030 100644 --- a/hotspot/src/share/vm/oops/oop.inline.hpp +++ b/hotspot/src/share/vm/oops/oop.inline.hpp @@ -741,7 +741,7 @@ inline int oopDesc::oop_iterate(OopClosureType* blk) { \ } \ \ inline int oopDesc::oop_iterate(OopClosureType* blk, MemRegion mr) { \ - return klass()->oop_oop_iterate##nv_suffix##_m(this, blk, mr); \ + return klass()->oop_oop_iterate_bounded##nv_suffix(this, blk, mr); \ } diff --git a/hotspot/src/share/vm/oops/typeArrayKlass.hpp b/hotspot/src/share/vm/oops/typeArrayKlass.hpp index 36600be0b84..21ccd1f2919 100644 --- a/hotspot/src/share/vm/oops/typeArrayKlass.hpp +++ b/hotspot/src/share/vm/oops/typeArrayKlass.hpp @@ -92,24 +92,24 @@ class TypeArrayKlass : public ArrayKlass { // The implementation used by all oop_oop_iterate functions in TypeArrayKlasses. inline int oop_oop_iterate_impl(oop obj, ExtendedOopClosure* closure); + // Wraps oop_oop_iterate_impl to conform to macros. + template + inline int oop_oop_iterate(oop obj, OopClosureType* closure); + + // Wraps oop_oop_iterate_impl to conform to macros. + template + inline int oop_oop_iterate_bounded(oop obj, OopClosureType* closure, MemRegion mr); + public: -#define TypeArrayKlass_OOP_OOP_ITERATE_DECL(OopClosureType, nv_suffix) \ - int oop_oop_iterate##nv_suffix(oop obj, OopClosureType* closure); \ - int oop_oop_iterate##nv_suffix##_m(oop obj, OopClosureType* closure, \ - MemRegion mr); \ - int oop_oop_iterate_range##nv_suffix(oop obj, OopClosureType* closure, \ - int start, int end); - - ALL_OOP_OOP_ITERATE_CLOSURES_1(TypeArrayKlass_OOP_OOP_ITERATE_DECL) - ALL_OOP_OOP_ITERATE_CLOSURES_2(TypeArrayKlass_OOP_OOP_ITERATE_DECL) + ALL_OOP_OOP_ITERATE_CLOSURES_1(OOP_OOP_ITERATE_DECL) + ALL_OOP_OOP_ITERATE_CLOSURES_2(OOP_OOP_ITERATE_DECL) + ALL_OOP_OOP_ITERATE_CLOSURES_1(OOP_OOP_ITERATE_DECL_RANGE) + ALL_OOP_OOP_ITERATE_CLOSURES_2(OOP_OOP_ITERATE_DECL_RANGE) #if INCLUDE_ALL_GCS -#define TypeArrayKlass_OOP_OOP_ITERATE_BACKWARDS_DECL(OopClosureType, nv_suffix) \ - int oop_oop_iterate_backwards##nv_suffix(oop obj, OopClosureType* closure); - - ALL_OOP_OOP_ITERATE_CLOSURES_1(TypeArrayKlass_OOP_OOP_ITERATE_BACKWARDS_DECL) - ALL_OOP_OOP_ITERATE_CLOSURES_2(TypeArrayKlass_OOP_OOP_ITERATE_BACKWARDS_DECL) + ALL_OOP_OOP_ITERATE_CLOSURES_1(OOP_OOP_ITERATE_DECL_NO_BACKWARDS) + ALL_OOP_OOP_ITERATE_CLOSURES_2(OOP_OOP_ITERATE_DECL_NO_BACKWARDS) #endif // INCLUDE_ALL_GCS diff --git a/hotspot/src/share/vm/oops/typeArrayKlass.inline.hpp b/hotspot/src/share/vm/oops/typeArrayKlass.inline.hpp index 9609972af8b..76d71451607 100644 --- a/hotspot/src/share/vm/oops/typeArrayKlass.inline.hpp +++ b/hotspot/src/share/vm/oops/typeArrayKlass.inline.hpp @@ -25,6 +25,8 @@ #ifndef SHARE_VM_OOPS_TYPEARRAYKLASS_INLINE_HPP #define SHARE_VM_OOPS_TYPEARRAYKLASS_INLINE_HPP +#include "oops/arrayKlass.hpp" +#include "oops/klass.hpp" #include "oops/oop.inline.hpp" #include "oops/typeArrayKlass.hpp" #include "oops/typeArrayOop.hpp" @@ -39,35 +41,19 @@ inline int TypeArrayKlass::oop_oop_iterate_impl(oop obj, ExtendedOopClosure* clo return t->object_size(); } -#define TypeArrayKlass_OOP_OOP_ITERATE_DEFN(OopClosureType, nv_suffix) \ - \ -int TypeArrayKlass:: \ -oop_oop_iterate##nv_suffix(oop obj, OopClosureType* closure) { \ - return oop_oop_iterate_impl(obj, closure); \ +template +int TypeArrayKlass::oop_oop_iterate(oop obj, OopClosureType* closure) { + return oop_oop_iterate_impl(obj, closure); } -#if INCLUDE_ALL_GCS -#define TypeArrayKlass_OOP_OOP_ITERATE_BACKWARDS_DEFN(OopClosureType, nv_suffix) \ - \ -int TypeArrayKlass:: \ -oop_oop_iterate_backwards##nv_suffix(oop obj, OopClosureType* closure) { \ - return oop_oop_iterate_impl(obj, closure); \ -} -#else -#define TypeArrayKlass_OOP_OOP_ITERATE_BACKWARDS_DEFN(OopClosureType, nv_suffix) -#endif - - -#define TypeArrayKlass_OOP_OOP_ITERATE_DEFN_m(OopClosureType, nv_suffix) \ - \ -int TypeArrayKlass:: \ -oop_oop_iterate##nv_suffix##_m(oop obj, OopClosureType* closure, MemRegion mr) { \ - return oop_oop_iterate_impl(obj, closure); \ +template +int TypeArrayKlass::oop_oop_iterate_bounded(oop obj, OopClosureType* closure, MemRegion mr) { + return oop_oop_iterate_impl(obj, closure); } -#define ALL_TYPE_ARRAY_KLASS_OOP_OOP_ITERATE_DEFN(OopClosureType, nv_suffix) \ - TypeArrayKlass_OOP_OOP_ITERATE_DEFN( OopClosureType, nv_suffix) \ - TypeArrayKlass_OOP_OOP_ITERATE_DEFN_m( OopClosureType, nv_suffix) \ - TypeArrayKlass_OOP_OOP_ITERATE_BACKWARDS_DEFN(OopClosureType, nv_suffix) +#define ALL_TYPE_ARRAY_KLASS_OOP_OOP_ITERATE_DEFN(OopClosureType, nv_suffix) \ + OOP_OOP_ITERATE_DEFN( TypeArrayKlass, OopClosureType, nv_suffix) \ + OOP_OOP_ITERATE_DEFN_BOUNDED( TypeArrayKlass, OopClosureType, nv_suffix) \ + OOP_OOP_ITERATE_DEFN_NO_BACKWARDS(TypeArrayKlass, OopClosureType, nv_suffix) #endif // SHARE_VM_OOPS_TYPEARRAYKLASS_INLINE_HPP From f3f59e37c991a0a544934c4be8440c8736a0adfc Mon Sep 17 00:00:00 2001 From: Stefan Karlsson Date: Mon, 25 May 2015 11:41:34 +0200 Subject: [PATCH 17/26] 8080879: Remove FlexibleWorkGang::set_for_termination Reviewed-by: brutisso, kbarrett, pliden --- .../vm/gc/cms/concurrentMarkSweepGeneration.cpp | 8 ++------ hotspot/src/share/vm/gc/cms/parNewGeneration.cpp | 11 ++--------- hotspot/src/share/vm/gc/cms/parNewGeneration.hpp | 4 ---- hotspot/src/share/vm/gc/cms/yieldingWorkgroup.hpp | 7 +++++++ hotspot/src/share/vm/gc/g1/g1CollectedHeap.cpp | 12 ++++-------- hotspot/src/share/vm/gc/shared/workgroup.cpp | 2 -- hotspot/src/share/vm/gc/shared/workgroup.hpp | 14 ++------------ 7 files changed, 17 insertions(+), 41 deletions(-) diff --git a/hotspot/src/share/vm/gc/cms/concurrentMarkSweepGeneration.cpp b/hotspot/src/share/vm/gc/cms/concurrentMarkSweepGeneration.cpp index 724c82029cf..09d3a72e514 100644 --- a/hotspot/src/share/vm/gc/cms/concurrentMarkSweepGeneration.cpp +++ b/hotspot/src/share/vm/gc/cms/concurrentMarkSweepGeneration.cpp @@ -5258,18 +5258,14 @@ public: CMSBitMap* mark_bit_map, AbstractWorkGang* workers, OopTaskQueueSet* task_queues): - // XXX Should superclass AGTWOQ also know about AWG since it knows - // about the task_queues used by the AWG? Then it could initialize - // the terminator() object. See 6984287. The set_for_termination() - // below is a temporary band-aid for the regression in 6984287. AbstractGangTaskWOopQueues("Process referents by policy in parallel", - task_queues), + task_queues, + workers->active_workers()), _task(task), _collector(collector), _span(span), _mark_bit_map(mark_bit_map) { assert(_collector->_span.equals(_span) && !_span.is_empty(), "Inconsistency in _span"); - set_for_termination(workers->active_workers()); } OopTaskQueueSet* task_queues() { return queues(); } diff --git a/hotspot/src/share/vm/gc/cms/parNewGeneration.cpp b/hotspot/src/share/vm/gc/cms/parNewGeneration.cpp index 227b669faa3..bcfc66e9dfe 100644 --- a/hotspot/src/share/vm/gc/cms/parNewGeneration.cpp +++ b/hotspot/src/share/vm/gc/cms/parNewGeneration.cpp @@ -576,12 +576,6 @@ ParNewGenTask::ParNewGenTask(ParNewGeneration* gen, Generation* old_gen, _strong_roots_scope(strong_roots_scope) {} -// Reset the terminator for the given number of -// active threads. -void ParNewGenTask::set_for_termination(uint active_workers) { - _state_set->reset(active_workers, _gen->promotion_failed()); -} - void ParNewGenTask::work(uint worker_id) { GenCollectedHeap* gch = GenCollectedHeap::heap(); // Since this is being done in a separate thread, need new resource @@ -757,9 +751,6 @@ public: private: virtual void work(uint worker_id); - virtual void set_for_termination(uint active_workers) { - _state_set.terminator()->reset_for_reuse(active_workers); - } private: ParNewGeneration& _gen; ProcessTask& _task; @@ -949,6 +940,8 @@ void ParNewGeneration::collect(bool full, *to(), *this, *_old_gen, *task_queues(), _overflow_stacks, desired_plab_sz(), _term); + thread_state_set.reset(n_workers, promotion_failed()); + { StrongRootsScope srs(n_workers); diff --git a/hotspot/src/share/vm/gc/cms/parNewGeneration.hpp b/hotspot/src/share/vm/gc/cms/parNewGeneration.hpp index f4f91aa4223..a901b7cf156 100644 --- a/hotspot/src/share/vm/gc/cms/parNewGeneration.hpp +++ b/hotspot/src/share/vm/gc/cms/parNewGeneration.hpp @@ -250,10 +250,6 @@ public: HeapWord* young_old_boundary() { return _young_old_boundary; } void work(uint worker_id); - - // Reset the terminator in ParScanThreadStateSet for - // "active_workers" threads. - virtual void set_for_termination(uint active_workers); }; class KeepAliveClosure: public DefNewGeneration::KeepAliveClosure { diff --git a/hotspot/src/share/vm/gc/cms/yieldingWorkgroup.hpp b/hotspot/src/share/vm/gc/cms/yieldingWorkgroup.hpp index 2696ce56867..3400f0fcfaf 100644 --- a/hotspot/src/share/vm/gc/cms/yieldingWorkgroup.hpp +++ b/hotspot/src/share/vm/gc/cms/yieldingWorkgroup.hpp @@ -147,6 +147,13 @@ public: bool completed() const { return _status == COMPLETED; } bool aborted() const { return _status == ABORTED; } bool active() const { return _status == ACTIVE; } + + // This method configures the task for proper termination. + // Some tasks do not have any requirements on termination + // and may inherit this method that does nothing. Some + // tasks do some coordination on termination and override + // this method to implement that coordination. + virtual void set_for_termination(uint active_workers) {} }; // Class YieldingWorkGang: A subclass of WorkGang. // In particular, a YieldingWorkGang is made up of diff --git a/hotspot/src/share/vm/gc/g1/g1CollectedHeap.cpp b/hotspot/src/share/vm/gc/g1/g1CollectedHeap.cpp index e7fff3f4330..2ce9501550b 100644 --- a/hotspot/src/share/vm/gc/g1/g1CollectedHeap.cpp +++ b/hotspot/src/share/vm/gc/g1/g1CollectedHeap.cpp @@ -4279,12 +4279,13 @@ protected: Mutex* stats_lock() { return &_stats_lock; } public: - G1ParTask(G1CollectedHeap* g1h, RefToScanQueueSet *task_queues, G1RootProcessor* root_processor) + G1ParTask(G1CollectedHeap* g1h, RefToScanQueueSet *task_queues, G1RootProcessor* root_processor, uint n_workers) : AbstractGangTask("G1 collection"), _g1h(g1h), _queues(task_queues), _root_processor(root_processor), - _terminator(0, _queues), + _terminator(n_workers, _queues), + _n_workers(n_workers), _stats_lock(Mutex::leaf, "parallel G1 stats lock", true) {} @@ -4296,11 +4297,6 @@ public: ParallelTaskTerminator* terminator() { return &_terminator; } - virtual void set_for_termination(uint active_workers) { - terminator()->reset_for_reuse(active_workers); - _n_workers = active_workers; - } - // Helps out with CLD processing. // // During InitialMark we need to: @@ -5343,7 +5339,7 @@ void G1CollectedHeap::evacuate_collection_set(EvacuationInfo& evacuation_info) { { G1RootProcessor root_processor(this, n_workers); - G1ParTask g1_par_task(this, _task_queues, &root_processor); + G1ParTask g1_par_task(this, _task_queues, &root_processor, n_workers); // InitialMark needs claim bits to keep track of the marked-through CLDs. if (g1_policy()->during_initial_mark_pause()) { ClassLoaderDataGraph::clear_claimed_marks(); diff --git a/hotspot/src/share/vm/gc/shared/workgroup.cpp b/hotspot/src/share/vm/gc/shared/workgroup.cpp index ccf16394c7b..6d615dd7bcb 100644 --- a/hotspot/src/share/vm/gc/shared/workgroup.cpp +++ b/hotspot/src/share/vm/gc/shared/workgroup.cpp @@ -133,8 +133,6 @@ void WorkGang::run_task(AbstractGangTask* task) { } void WorkGang::run_task(AbstractGangTask* task, uint no_of_parallel_workers) { - task->set_for_termination(no_of_parallel_workers); - // This thread is executed by the VM thread which does not block // on ordinary MutexLocker's. MutexLockerEx ml(monitor(), Mutex::_no_safepoint_check_flag); diff --git a/hotspot/src/share/vm/gc/shared/workgroup.hpp b/hotspot/src/share/vm/gc/shared/workgroup.hpp index 9c9bb65c1a1..1c0aad8cf0e 100644 --- a/hotspot/src/share/vm/gc/shared/workgroup.hpp +++ b/hotspot/src/share/vm/gc/shared/workgroup.hpp @@ -59,13 +59,6 @@ public: // The argument tells you which member of the gang you are. virtual void work(uint worker_id) = 0; - // This method configures the task for proper termination. - // Some tasks do not have any requirements on termination - // and may inherit this method that does nothing. Some - // tasks do some coordination on termination and override - // this method to implement that coordination. - virtual void set_for_termination(uint active_workers) {}; - // Debugging accessor for the name. const char* name() const PRODUCT_RETURN_(return NULL;); int counter() { return _counter; } @@ -99,12 +92,9 @@ class AbstractGangTaskWOopQueues : public AbstractGangTask { OopTaskQueueSet* _queues; ParallelTaskTerminator _terminator; public: - AbstractGangTaskWOopQueues(const char* name, OopTaskQueueSet* queues) : - AbstractGangTask(name), _queues(queues), _terminator(0, _queues) {} + AbstractGangTaskWOopQueues(const char* name, OopTaskQueueSet* queues, uint n_threads) : + AbstractGangTask(name), _queues(queues), _terminator(n_threads, _queues) {} ParallelTaskTerminator* terminator() { return &_terminator; } - virtual void set_for_termination(uint active_workers) { - terminator()->reset_for_reuse(active_workers); - } OopTaskQueueSet* queues() { return _queues; } }; From 976d6c1e2a79b3ef55584a51bb172ed7289b3302 Mon Sep 17 00:00:00 2001 From: Stefan Karlsson Date: Tue, 26 May 2015 10:06:03 +0200 Subject: [PATCH 18/26] 8081007: Remove redundant active worker variables and calls in ParNewGeneration::collect Reviewed-by: pliden, kbarrett, ehelin --- hotspot/src/share/vm/gc/cms/parNewGeneration.cpp | 16 +++++++--------- 1 file changed, 7 insertions(+), 9 deletions(-) diff --git a/hotspot/src/share/vm/gc/cms/parNewGeneration.cpp b/hotspot/src/share/vm/gc/cms/parNewGeneration.cpp index bcfc66e9dfe..00481a5a07e 100644 --- a/hotspot/src/share/vm/gc/cms/parNewGeneration.cpp +++ b/hotspot/src/share/vm/gc/cms/parNewGeneration.cpp @@ -927,23 +927,21 @@ void ParNewGeneration::collect(bool full, to()->clear(SpaceDecorator::Mangle); gch->save_marks(); - assert(workers != NULL, "Need parallel worker threads."); - uint n_workers = active_workers; // Set the correct parallelism (number of queues) in the reference processor - ref_processor()->set_active_mt_degree(n_workers); + ref_processor()->set_active_mt_degree(active_workers); // Always set the terminator for the active number of workers // because only those workers go through the termination protocol. - ParallelTaskTerminator _term(n_workers, task_queues()); - ParScanThreadStateSet thread_state_set(workers->active_workers(), + ParallelTaskTerminator _term(active_workers, task_queues()); + ParScanThreadStateSet thread_state_set(active_workers, *to(), *this, *_old_gen, *task_queues(), _overflow_stacks, desired_plab_sz(), _term); - thread_state_set.reset(n_workers, promotion_failed()); + thread_state_set.reset(active_workers, promotion_failed()); { - StrongRootsScope srs(n_workers); + StrongRootsScope srs(active_workers); ParNewGenTask tsk(this, _old_gen, reserved().end(), &thread_state_set, &srs); gch->rem_set()->prepare_for_younger_refs_iterate(true); @@ -951,7 +949,7 @@ void ParNewGeneration::collect(bool full, // separate thread causes wide variance in run times. We can't help this // in the multi-threaded case, but we special-case n=1 here to get // repeatable measurements of the 1-thread overhead of the parallel code. - if (n_workers > 1) { + if (active_workers > 1) { workers->run_task(&tsk); } else { tsk.work(0); @@ -1024,7 +1022,7 @@ void ParNewGeneration::collect(bool full, to()->set_concurrent_iteration_safe_limit(to()->top()); if (ResizePLAB) { - plab_stats()->adjust_desired_plab_sz(n_workers); + plab_stats()->adjust_desired_plab_sz(active_workers); } if (PrintGC && !PrintGCDetails) { From 77588c66b44f836f4b4e551c05dbdfd327273191 Mon Sep 17 00:00:00 2001 From: Chris Plummer Date: Tue, 26 May 2015 11:26:50 -0700 Subject: [PATCH 19/26] 8051712: regression Test7107135 crashes On AARCH64, make ElfFile::specifies_noexecstack() default to noexectstack Reviewed-by: dholmes, dlong, aph --- hotspot/src/share/vm/utilities/elfFile.cpp | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/hotspot/src/share/vm/utilities/elfFile.cpp b/hotspot/src/share/vm/utilities/elfFile.cpp index 3d895956315..ac943bd154c 100644 --- a/hotspot/src/share/vm/utilities/elfFile.cpp +++ b/hotspot/src/share/vm/utilities/elfFile.cpp @@ -261,7 +261,12 @@ bool ElfFile::specifies_noexecstack() { } } } +// AARCH64 defaults to noexecstack. All others default to execstack. +#ifdef AARCH64 + return true; +#else return false; +#endif } #endif From 0ee851e5efaa048cdea9cf8ccdba66c5af654768 Mon Sep 17 00:00:00 2001 From: Bengt Rutisson Date: Wed, 27 May 2015 09:04:14 +0200 Subject: [PATCH 20/26] 8081039: G1: Remove unused statistics code in G1NoteEndOfConcMarkClosure and G1ParNoteEndTask Reviewed-by: jmasa, kbarrett --- hotspot/src/share/vm/gc/g1/concurrentMark.cpp | 29 +------------------ 1 file changed, 1 insertion(+), 28 deletions(-) diff --git a/hotspot/src/share/vm/gc/g1/concurrentMark.cpp b/hotspot/src/share/vm/gc/g1/concurrentMark.cpp index 79826904518..ca2fde278a8 100644 --- a/hotspot/src/share/vm/gc/g1/concurrentMark.cpp +++ b/hotspot/src/share/vm/gc/g1/concurrentMark.cpp @@ -1759,28 +1759,20 @@ public: } }; -class G1ParNoteEndTask; - class G1NoteEndOfConcMarkClosure : public HeapRegionClosure { G1CollectedHeap* _g1; - size_t _max_live_bytes; - uint _regions_claimed; size_t _freed_bytes; FreeRegionList* _local_cleanup_list; HeapRegionSetCount _old_regions_removed; HeapRegionSetCount _humongous_regions_removed; HRRSCleanupTask* _hrrs_cleanup_task; - double _claimed_region_time; - double _max_region_time; public: G1NoteEndOfConcMarkClosure(G1CollectedHeap* g1, FreeRegionList* local_cleanup_list, HRRSCleanupTask* hrrs_cleanup_task) : _g1(g1), - _max_live_bytes(0), _regions_claimed(0), _freed_bytes(0), - _claimed_region_time(0.0), _max_region_time(0.0), _local_cleanup_list(local_cleanup_list), _old_regions_removed(), _humongous_regions_removed(), @@ -1797,10 +1789,7 @@ public: // We use a claim value of zero here because all regions // were claimed with value 1 in the FinalCount task. _g1->reset_gc_time_stamps(hr); - double start = os::elapsedTime(); - _regions_claimed++; hr->note_end_of_marking(); - _max_live_bytes += hr->max_live_bytes(); if (hr->used() > 0 && hr->max_live_bytes() == 0 && !hr->is_young()) { _freed_bytes += hr->used(); @@ -1817,18 +1806,8 @@ public: hr->rem_set()->do_cleanup_work(_hrrs_cleanup_task); } - double region_time = (os::elapsedTime() - start); - _claimed_region_time += region_time; - if (region_time > _max_region_time) { - _max_region_time = region_time; - } return false; } - - size_t max_live_bytes() { return _max_live_bytes; } - uint regions_claimed() { return _regions_claimed; } - double claimed_region_time_sec() { return _claimed_region_time; } - double max_region_time_sec() { return _max_region_time; } }; class G1ParNoteEndTask: public AbstractGangTask { @@ -1836,14 +1815,12 @@ class G1ParNoteEndTask: public AbstractGangTask { protected: G1CollectedHeap* _g1h; - size_t _max_live_bytes; - size_t _freed_bytes; FreeRegionList* _cleanup_list; HeapRegionClaimer _hrclaimer; public: G1ParNoteEndTask(G1CollectedHeap* g1h, FreeRegionList* cleanup_list, uint n_workers) : - AbstractGangTask("G1 note end"), _g1h(g1h), _max_live_bytes(0), _freed_bytes(0), _cleanup_list(cleanup_list), _hrclaimer(n_workers) { + AbstractGangTask("G1 note end"), _g1h(g1h), _cleanup_list(cleanup_list), _hrclaimer(n_workers) { } void work(uint worker_id) { @@ -1859,8 +1836,6 @@ public: { MutexLockerEx x(ParGCRareEvent_lock, Mutex::_no_safepoint_check_flag); _g1h->decrement_summary_bytes(g1_note_end.freed_bytes()); - _max_live_bytes += g1_note_end.max_live_bytes(); - _freed_bytes += g1_note_end.freed_bytes(); // If we iterate over the global cleanup list at the end of // cleanup to do this printing we will not guarantee to only @@ -1885,8 +1860,6 @@ public: HeapRegionRemSet::finish_cleanup_task(&hrrs_cleanup_task); } } - size_t max_live_bytes() { return _max_live_bytes; } - size_t freed_bytes() { return _freed_bytes; } }; class G1ParScrubRemSetTask: public AbstractGangTask { From a1eb515812094a6258841b5c9df31a9638c98e86 Mon Sep 17 00:00:00 2001 From: Alexander Alexeev Date: Tue, 2 Jun 2015 14:28:08 +0000 Subject: [PATCH 21/26] 8081669: aarch64: JTreg TestStable tests failing Fix TestStable failures Reviewed-by: vlivanov --- .../compiler/stable/StableConfiguration.java | 24 ++++++++++++++++++- 1 file changed, 23 insertions(+), 1 deletion(-) diff --git a/hotspot/test/compiler/stable/StableConfiguration.java b/hotspot/test/compiler/stable/StableConfiguration.java index 09e395707e7..be69081640c 100644 --- a/hotspot/test/compiler/stable/StableConfiguration.java +++ b/hotspot/test/compiler/stable/StableConfiguration.java @@ -41,10 +41,32 @@ public class StableConfiguration { System.out.println("Server Compiler: " + get()); } + // The method 'get' below returns true if the method is server compiled + // and is used by the Stable tests to determine whether methods in + // general are being server compiled or not as the -XX:+FoldStableValues + // option is only applicable to -server. + // + // On aarch64 we DeOptimize when patching. This means that when the + // method is compiled as a result of -Xcomp it DeOptimizes immediately. + // The result is that getMethodCompilationLevel returns 0. This means + // the method returns true based on java.vm.name. + // + // However when the tests are run with -XX:+TieredCompilation and + // -XX:TieredStopAtLevel=1 this fails because methods will always + // be client compiled. + // + // Solution is to add a simple method 'get1' which should never be + // DeOpted and use that to determine the compilation level instead. + static void get1() { + } + + + // ::get() is among immediately compiled methods. static boolean get() { try { - Method m = StableConfiguration.class.getDeclaredMethod("get"); + get1(); + Method m = StableConfiguration.class.getDeclaredMethod("get1"); int level = WB.getMethodCompilationLevel(m); if (level > 0) { return (level == 4); From 5e3d542763e5275fc0523aa291c69df4e4a01ff8 Mon Sep 17 00:00:00 2001 From: Alexander Alexeev Date: Wed, 27 May 2015 09:02:08 +0000 Subject: [PATCH 22/26] 8081289: aarch64: add support for RewriteFrequentPairs in interpreter Add support for RewriteFrequentPairs Reviewed-by: roland --- .../src/cpu/aarch64/vm/globals_aarch64.hpp | 2 +- .../cpu/aarch64/vm/templateTable_aarch64.cpp | 107 ++++++++++++++++-- 2 files changed, 99 insertions(+), 10 deletions(-) diff --git a/hotspot/src/cpu/aarch64/vm/globals_aarch64.hpp b/hotspot/src/cpu/aarch64/vm/globals_aarch64.hpp index 4c011a2e50b..d8930056f8d 100644 --- a/hotspot/src/cpu/aarch64/vm/globals_aarch64.hpp +++ b/hotspot/src/cpu/aarch64/vm/globals_aarch64.hpp @@ -64,7 +64,7 @@ define_pd_global(intx, StackShadowPages, 4 DEBUG_ONLY(+5)); define_pd_global(intx, PreInflateSpin, 10); define_pd_global(bool, RewriteBytecodes, true); -define_pd_global(bool, RewriteFrequentPairs, false); +define_pd_global(bool, RewriteFrequentPairs, true); define_pd_global(bool, UseMembar, true); diff --git a/hotspot/src/cpu/aarch64/vm/templateTable_aarch64.cpp b/hotspot/src/cpu/aarch64/vm/templateTable_aarch64.cpp index 0d026e07c6b..8bf72155077 100644 --- a/hotspot/src/cpu/aarch64/vm/templateTable_aarch64.cpp +++ b/hotspot/src/cpu/aarch64/vm/templateTable_aarch64.cpp @@ -513,23 +513,61 @@ void TemplateTable::nofast_iload() { void TemplateTable::iload_internal(RewriteControl rc) { transition(vtos, itos); if (RewriteFrequentPairs && rc == may_rewrite) { - // TODO : check x86 code for what to do here - __ call_Unimplemented(); - } else { - locals_index(r1); - __ ldr(r0, iaddress(r1)); + Label rewrite, done; + Register bc = r4; + + // get next bytecode + __ load_unsigned_byte(r1, at_bcp(Bytecodes::length_for(Bytecodes::_iload))); + + // if _iload, wait to rewrite to iload2. We only want to rewrite the + // last two iloads in a pair. Comparing against fast_iload means that + // the next bytecode is neither an iload or a caload, and therefore + // an iload pair. + __ cmpw(r1, Bytecodes::_iload); + __ br(Assembler::EQ, done); + + // if _fast_iload rewrite to _fast_iload2 + __ cmpw(r1, Bytecodes::_fast_iload); + __ movw(bc, Bytecodes::_fast_iload2); + __ br(Assembler::EQ, rewrite); + + // if _caload rewrite to _fast_icaload + __ cmpw(r1, Bytecodes::_caload); + __ movw(bc, Bytecodes::_fast_icaload); + __ br(Assembler::EQ, rewrite); + + // else rewrite to _fast_iload + __ movw(bc, Bytecodes::_fast_iload); + + // rewrite + // bc: new bytecode + __ bind(rewrite); + patch_bytecode(Bytecodes::_iload, bc, r1, false); + __ bind(done); + } + // do iload, get the local value into tos + locals_index(r1); + __ ldr(r0, iaddress(r1)); + } void TemplateTable::fast_iload2() { - __ call_Unimplemented(); + transition(vtos, itos); + locals_index(r1); + __ ldr(r0, iaddress(r1)); + __ push(itos); + locals_index(r1, 3); + __ ldr(r0, iaddress(r1)); } void TemplateTable::fast_iload() { - __ call_Unimplemented(); + transition(vtos, itos); + locals_index(r1); + __ ldr(r0, iaddress(r1)); } void TemplateTable::lload() @@ -721,7 +759,18 @@ void TemplateTable::caload() // iload followed by caload frequent pair void TemplateTable::fast_icaload() { - __ call_Unimplemented(); + transition(vtos, itos); + // load index out of locals + locals_index(r2); + __ ldr(r1, iaddress(r2)); + + __ pop_ptr(r0); + + // r0: array + // r1: index + index_check(r0, r1); // leaves index in r1, kills rscratch1 + __ lea(r1, Address(r0, r1, Address::uxtw(1))); + __ load_unsigned_short(r0, Address(r1, arrayOopDesc::base_offset_in_bytes(T_CHAR))); } void TemplateTable::saload() @@ -797,7 +846,47 @@ void TemplateTable::aload_0_internal(RewriteControl rc) { // These bytecodes with a small amount of code are most profitable // to rewrite if (RewriteFrequentPairs && rc == may_rewrite) { - __ call_Unimplemented(); + Label rewrite, done; + const Register bc = r4; + + // get next bytecode + __ load_unsigned_byte(r1, at_bcp(Bytecodes::length_for(Bytecodes::_aload_0))); + + // do actual aload_0 + aload(0); + + // if _getfield then wait with rewrite + __ cmpw(r1, Bytecodes::Bytecodes::_getfield); + __ br(Assembler::EQ, done); + + // if _igetfield then reqrite to _fast_iaccess_0 + assert(Bytecodes::java_code(Bytecodes::_fast_iaccess_0) == Bytecodes::_aload_0, "fix bytecode definition"); + __ cmpw(r1, Bytecodes::_fast_igetfield); + __ movw(bc, Bytecodes::_fast_iaccess_0); + __ br(Assembler::EQ, rewrite); + + // if _agetfield then reqrite to _fast_aaccess_0 + assert(Bytecodes::java_code(Bytecodes::_fast_aaccess_0) == Bytecodes::_aload_0, "fix bytecode definition"); + __ cmpw(r1, Bytecodes::_fast_agetfield); + __ movw(bc, Bytecodes::_fast_aaccess_0); + __ br(Assembler::EQ, rewrite); + + // if _fgetfield then reqrite to _fast_faccess_0 + assert(Bytecodes::java_code(Bytecodes::_fast_faccess_0) == Bytecodes::_aload_0, "fix bytecode definition"); + __ cmpw(r1, Bytecodes::_fast_fgetfield); + __ movw(bc, Bytecodes::_fast_faccess_0); + __ br(Assembler::EQ, rewrite); + + // else rewrite to _fast_aload0 + assert(Bytecodes::java_code(Bytecodes::_fast_aload_0) == Bytecodes::_aload_0, "fix bytecode definition"); + __ movw(bc, Bytecodes::Bytecodes::_fast_aload_0); + + // rewrite + // bc: new bytecode + __ bind(rewrite); + patch_bytecode(Bytecodes::_aload_0, bc, r1, false); + + __ bind(done); } else { aload(0); } From 082d1bfa0408570b9af4722bbfd6307eab9db959 Mon Sep 17 00:00:00 2001 From: Magnus Ihse Bursie Date: Wed, 3 Jun 2015 15:20:25 +0200 Subject: [PATCH 23/26] 8081692: Configure should verify that -fstack-protector is valid Reviewed-by: erikj --- common/autoconf/flags.m4 | 12 ++-- common/autoconf/generated-configure.sh | 80 +++++++++++++++++++++++--- 2 files changed, 79 insertions(+), 13 deletions(-) diff --git a/common/autoconf/flags.m4 b/common/autoconf/flags.m4 index 70c8b0d55dc..e61638daa14 100644 --- a/common/autoconf/flags.m4 +++ b/common/autoconf/flags.m4 @@ -338,14 +338,16 @@ AC_DEFUN_ONCE([FLAGS_SETUP_COMPILER_FLAGS_FOR_OPTIMIZATION], # no adjustment ;; slowdebug ) - # Add runtime stack smashing and undefined behavior checks - CFLAGS_DEBUG_OPTIONS="-fstack-protector-all --param ssp-buffer-size=1" - CXXFLAGS_DEBUG_OPTIONS="-fstack-protector-all --param ssp-buffer-size=1" + # Add runtime stack smashing and undefined behavior checks. + # Not all versions of gcc support -fstack-protector + STACK_PROTECTOR_CFLAG="-fstack-protector-all" + FLAGS_COMPILER_CHECK_ARGUMENTS([$STACK_PROTECTOR_CFLAG], [], [STACK_PROTECTOR_CFLAG=""]) + + CFLAGS_DEBUG_OPTIONS="$STACK_PROTECTOR_CFLAG --param ssp-buffer-size=1" + CXXFLAGS_DEBUG_OPTIONS="$STACK_PROTECTOR_CFLAG --param ssp-buffer-size=1" ;; esac fi - AC_SUBST(CFLAGS_DEBUG_OPTIONS) - AC_SUBST(CXXFLAGS_DEBUG_OPTIONS) # Optimization levels if test "x$TOOLCHAIN_TYPE" = xsolstudio; then diff --git a/common/autoconf/generated-configure.sh b/common/autoconf/generated-configure.sh index 08af14dadf5..f262cf48b17 100644 --- a/common/autoconf/generated-configure.sh +++ b/common/autoconf/generated-configure.sh @@ -718,8 +718,6 @@ C_O_FLAG_DEBUG C_O_FLAG_NORM C_O_FLAG_HI C_O_FLAG_HIGHEST -CXXFLAGS_DEBUG_OPTIONS -CFLAGS_DEBUG_OPTIONS CXXFLAGS_DEBUG_SYMBOLS CFLAGS_DEBUG_SYMBOLS CXX_FLAG_DEPS @@ -4366,7 +4364,7 @@ VS_SDK_PLATFORM_NAME_2013= #CUSTOM_AUTOCONF_INCLUDE # Do not change or remove the following line, it is needed for consistency checks: -DATE_WHEN_GENERATED=1432629750 +DATE_WHEN_GENERATED=1433337614 ############################################################################### # @@ -41837,14 +41835,80 @@ $as_echo "$ac_cv_c_bigendian" >&6; } # no adjustment ;; slowdebug ) - # Add runtime stack smashing and undefined behavior checks - CFLAGS_DEBUG_OPTIONS="-fstack-protector-all --param ssp-buffer-size=1" - CXXFLAGS_DEBUG_OPTIONS="-fstack-protector-all --param ssp-buffer-size=1" - ;; - esac + # Add runtime stack smashing and undefined behavior checks. + # Not all versions of gcc support -fstack-protector + STACK_PROTECTOR_CFLAG="-fstack-protector-all" + + { $as_echo "$as_me:${as_lineno-$LINENO}: checking if compiler supports \"$STACK_PROTECTOR_CFLAG\"" >&5 +$as_echo_n "checking if compiler supports \"$STACK_PROTECTOR_CFLAG\"... " >&6; } + supports=yes + + saved_cflags="$CFLAGS" + CFLAGS="$CFLAGS $STACK_PROTECTOR_CFLAG" + ac_ext=c +ac_cpp='$CPP $CPPFLAGS' +ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5' +ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' +ac_compiler_gnu=$ac_cv_c_compiler_gnu + + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ +int i; +_ACEOF +if ac_fn_c_try_compile "$LINENO"; then : + +else + supports=no +fi +rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext + ac_ext=cpp +ac_cpp='$CXXCPP $CPPFLAGS' +ac_compile='$CXX -c $CXXFLAGS $CPPFLAGS conftest.$ac_ext >&5' +ac_link='$CXX -o conftest$ac_exeext $CXXFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' +ac_compiler_gnu=$ac_cv_cxx_compiler_gnu + + CFLAGS="$saved_cflags" + + saved_cxxflags="$CXXFLAGS" + CXXFLAGS="$CXXFLAG $STACK_PROTECTOR_CFLAG" + ac_ext=cpp +ac_cpp='$CXXCPP $CPPFLAGS' +ac_compile='$CXX -c $CXXFLAGS $CPPFLAGS conftest.$ac_ext >&5' +ac_link='$CXX -o conftest$ac_exeext $CXXFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' +ac_compiler_gnu=$ac_cv_cxx_compiler_gnu + + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ +int i; +_ACEOF +if ac_fn_cxx_try_compile "$LINENO"; then : + +else + supports=no +fi +rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext + ac_ext=cpp +ac_cpp='$CXXCPP $CPPFLAGS' +ac_compile='$CXX -c $CXXFLAGS $CPPFLAGS conftest.$ac_ext >&5' +ac_link='$CXX -o conftest$ac_exeext $CXXFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' +ac_compiler_gnu=$ac_cv_cxx_compiler_gnu + + CXXFLAGS="$saved_cxxflags" + + { $as_echo "$as_me:${as_lineno-$LINENO}: result: $supports" >&5 +$as_echo "$supports" >&6; } + if test "x$supports" = "xyes" ; then + : + else + STACK_PROTECTOR_CFLAG="" fi + CFLAGS_DEBUG_OPTIONS="$STACK_PROTECTOR_CFLAG --param ssp-buffer-size=1" + CXXFLAGS_DEBUG_OPTIONS="$STACK_PROTECTOR_CFLAG --param ssp-buffer-size=1" + ;; + esac + fi # Optimization levels if test "x$TOOLCHAIN_TYPE" = xsolstudio; then From 10741a86f6abbe9cc7fbb2f8c9db4941dbf08407 Mon Sep 17 00:00:00 2001 From: Ed Nevill Date: Thu, 4 Jun 2015 12:04:18 +0000 Subject: [PATCH 24/26] 8079565: aarch64: Add vectorization support for aarch64 Add vectorization support Reviewed-by: roland --- hotspot/src/cpu/aarch64/vm/aarch64.ad | 1391 +++++++++++++++-- .../src/cpu/aarch64/vm/assembler_aarch64.hpp | 156 +- .../cpu/aarch64/vm/macroAssembler_aarch64.cpp | 4 +- .../cpu/aarch64/vm/macroAssembler_aarch64.hpp | 40 + .../src/cpu/aarch64/vm/register_aarch64.hpp | 2 +- 5 files changed, 1461 insertions(+), 132 deletions(-) diff --git a/hotspot/src/cpu/aarch64/vm/aarch64.ad b/hotspot/src/cpu/aarch64/vm/aarch64.ad index a71729c503b..a9cd41a7949 100644 --- a/hotspot/src/cpu/aarch64/vm/aarch64.ad +++ b/hotspot/src/cpu/aarch64/vm/aarch64.ad @@ -161,70 +161,165 @@ reg_def R31_H ( NS, NS, Op_RegI, 31, r31_sp->as_VMReg()->next()); // the platform ABI treats v8-v15 as callee save). float registers // v16-v31 are SOC as per the platform spec - reg_def V0 ( SOC, SOC, Op_RegF, 0, v0->as_VMReg() ); - reg_def V0_H ( SOC, SOC, Op_RegF, 0, v0->as_VMReg()->next() ); - reg_def V1 ( SOC, SOC, Op_RegF, 1, v1->as_VMReg() ); - reg_def V1_H ( SOC, SOC, Op_RegF, 1, v1->as_VMReg()->next() ); - reg_def V2 ( SOC, SOC, Op_RegF, 2, v2->as_VMReg() ); - reg_def V2_H ( SOC, SOC, Op_RegF, 2, v2->as_VMReg()->next() ); - reg_def V3 ( SOC, SOC, Op_RegF, 3, v3->as_VMReg() ); - reg_def V3_H ( SOC, SOC, Op_RegF, 3, v3->as_VMReg()->next() ); - reg_def V4 ( SOC, SOC, Op_RegF, 4, v4->as_VMReg() ); - reg_def V4_H ( SOC, SOC, Op_RegF, 4, v4->as_VMReg()->next() ); - reg_def V5 ( SOC, SOC, Op_RegF, 5, v5->as_VMReg() ); - reg_def V5_H ( SOC, SOC, Op_RegF, 5, v5->as_VMReg()->next() ); - reg_def V6 ( SOC, SOC, Op_RegF, 6, v6->as_VMReg() ); - reg_def V6_H ( SOC, SOC, Op_RegF, 6, v6->as_VMReg()->next() ); - reg_def V7 ( SOC, SOC, Op_RegF, 7, v7->as_VMReg() ); - reg_def V7_H ( SOC, SOC, Op_RegF, 7, v7->as_VMReg()->next() ); - reg_def V8 ( SOC, SOE, Op_RegF, 8, v8->as_VMReg() ); - reg_def V8_H ( SOC, SOE, Op_RegF, 8, v8->as_VMReg()->next() ); - reg_def V9 ( SOC, SOE, Op_RegF, 9, v9->as_VMReg() ); - reg_def V9_H ( SOC, SOE, Op_RegF, 9, v9->as_VMReg()->next() ); - reg_def V10 ( SOC, SOE, Op_RegF, 10, v10->as_VMReg() ); - reg_def V10_H( SOC, SOE, Op_RegF, 10, v10->as_VMReg()->next()); - reg_def V11 ( SOC, SOE, Op_RegF, 11, v11->as_VMReg() ); - reg_def V11_H( SOC, SOE, Op_RegF, 11, v11->as_VMReg()->next()); - reg_def V12 ( SOC, SOE, Op_RegF, 12, v12->as_VMReg() ); - reg_def V12_H( SOC, SOE, Op_RegF, 12, v12->as_VMReg()->next()); - reg_def V13 ( SOC, SOE, Op_RegF, 13, v13->as_VMReg() ); - reg_def V13_H( SOC, SOE, Op_RegF, 13, v13->as_VMReg()->next()); - reg_def V14 ( SOC, SOE, Op_RegF, 14, v14->as_VMReg() ); - reg_def V14_H( SOC, SOE, Op_RegF, 14, v14->as_VMReg()->next()); - reg_def V15 ( SOC, SOE, Op_RegF, 15, v15->as_VMReg() ); - reg_def V15_H( SOC, SOE, Op_RegF, 15, v15->as_VMReg()->next()); - reg_def V16 ( SOC, SOC, Op_RegF, 16, v16->as_VMReg() ); - reg_def V16_H( SOC, SOC, Op_RegF, 16, v16->as_VMReg()->next()); - reg_def V17 ( SOC, SOC, Op_RegF, 17, v17->as_VMReg() ); - reg_def V17_H( SOC, SOC, Op_RegF, 17, v17->as_VMReg()->next()); - reg_def V18 ( SOC, SOC, Op_RegF, 18, v18->as_VMReg() ); - reg_def V18_H( SOC, SOC, Op_RegF, 18, v18->as_VMReg()->next()); - reg_def V19 ( SOC, SOC, Op_RegF, 19, v19->as_VMReg() ); - reg_def V19_H( SOC, SOC, Op_RegF, 19, v19->as_VMReg()->next()); - reg_def V20 ( SOC, SOC, Op_RegF, 20, v20->as_VMReg() ); - reg_def V20_H( SOC, SOC, Op_RegF, 20, v20->as_VMReg()->next()); - reg_def V21 ( SOC, SOC, Op_RegF, 21, v21->as_VMReg() ); - reg_def V21_H( SOC, SOC, Op_RegF, 21, v21->as_VMReg()->next()); - reg_def V22 ( SOC, SOC, Op_RegF, 22, v22->as_VMReg() ); - reg_def V22_H( SOC, SOC, Op_RegF, 22, v22->as_VMReg()->next()); - reg_def V23 ( SOC, SOC, Op_RegF, 23, v23->as_VMReg() ); - reg_def V23_H( SOC, SOC, Op_RegF, 23, v23->as_VMReg()->next()); - reg_def V24 ( SOC, SOC, Op_RegF, 24, v24->as_VMReg() ); - reg_def V24_H( SOC, SOC, Op_RegF, 24, v24->as_VMReg()->next()); - reg_def V25 ( SOC, SOC, Op_RegF, 25, v25->as_VMReg() ); - reg_def V25_H( SOC, SOC, Op_RegF, 25, v25->as_VMReg()->next()); - reg_def V26 ( SOC, SOC, Op_RegF, 26, v26->as_VMReg() ); - reg_def V26_H( SOC, SOC, Op_RegF, 26, v26->as_VMReg()->next()); - reg_def V27 ( SOC, SOC, Op_RegF, 27, v27->as_VMReg() ); - reg_def V27_H( SOC, SOC, Op_RegF, 27, v27->as_VMReg()->next()); - reg_def V28 ( SOC, SOC, Op_RegF, 28, v28->as_VMReg() ); - reg_def V28_H( SOC, SOC, Op_RegF, 28, v28->as_VMReg()->next()); - reg_def V29 ( SOC, SOC, Op_RegF, 29, v29->as_VMReg() ); - reg_def V29_H( SOC, SOC, Op_RegF, 29, v29->as_VMReg()->next()); - reg_def V30 ( SOC, SOC, Op_RegF, 30, v30->as_VMReg() ); - reg_def V30_H( SOC, SOC, Op_RegF, 30, v30->as_VMReg()->next()); - reg_def V31 ( SOC, SOC, Op_RegF, 31, v31->as_VMReg() ); - reg_def V31_H( SOC, SOC, Op_RegF, 31, v31->as_VMReg()->next()); + reg_def V0 ( SOC, SOC, Op_RegF, 0, v0->as_VMReg() ); + reg_def V0_H ( SOC, SOC, Op_RegF, 0, v0->as_VMReg()->next() ); + reg_def V0_J ( SOC, SOC, Op_RegF, 0, v0->as_VMReg()->next(2) ); + reg_def V0_K ( SOC, SOC, Op_RegF, 0, v0->as_VMReg()->next(3) ); + + reg_def V1 ( SOC, SOC, Op_RegF, 1, v1->as_VMReg() ); + reg_def V1_H ( SOC, SOC, Op_RegF, 1, v1->as_VMReg()->next() ); + reg_def V1_J ( SOC, SOC, Op_RegF, 1, v1->as_VMReg()->next(2) ); + reg_def V1_K ( SOC, SOC, Op_RegF, 1, v1->as_VMReg()->next(3) ); + + reg_def V2 ( SOC, SOC, Op_RegF, 2, v2->as_VMReg() ); + reg_def V2_H ( SOC, SOC, Op_RegF, 2, v2->as_VMReg()->next() ); + reg_def V2_J ( SOC, SOC, Op_RegF, 2, v2->as_VMReg()->next(2) ); + reg_def V2_K ( SOC, SOC, Op_RegF, 2, v2->as_VMReg()->next(3) ); + + reg_def V3 ( SOC, SOC, Op_RegF, 3, v3->as_VMReg() ); + reg_def V3_H ( SOC, SOC, Op_RegF, 3, v3->as_VMReg()->next() ); + reg_def V3_J ( SOC, SOC, Op_RegF, 3, v3->as_VMReg()->next(2) ); + reg_def V3_K ( SOC, SOC, Op_RegF, 3, v3->as_VMReg()->next(3) ); + + reg_def V4 ( SOC, SOC, Op_RegF, 4, v4->as_VMReg() ); + reg_def V4_H ( SOC, SOC, Op_RegF, 4, v4->as_VMReg()->next() ); + reg_def V4_J ( SOC, SOC, Op_RegF, 4, v4->as_VMReg()->next(2) ); + reg_def V4_K ( SOC, SOC, Op_RegF, 4, v4->as_VMReg()->next(3) ); + + reg_def V5 ( SOC, SOC, Op_RegF, 5, v5->as_VMReg() ); + reg_def V5_H ( SOC, SOC, Op_RegF, 5, v5->as_VMReg()->next() ); + reg_def V5_J ( SOC, SOC, Op_RegF, 5, v5->as_VMReg()->next(2) ); + reg_def V5_K ( SOC, SOC, Op_RegF, 5, v5->as_VMReg()->next(3) ); + + reg_def V6 ( SOC, SOC, Op_RegF, 6, v6->as_VMReg() ); + reg_def V6_H ( SOC, SOC, Op_RegF, 6, v6->as_VMReg()->next() ); + reg_def V6_J ( SOC, SOC, Op_RegF, 6, v6->as_VMReg()->next(2) ); + reg_def V6_K ( SOC, SOC, Op_RegF, 6, v6->as_VMReg()->next(3) ); + + reg_def V7 ( SOC, SOC, Op_RegF, 7, v7->as_VMReg() ); + reg_def V7_H ( SOC, SOC, Op_RegF, 7, v7->as_VMReg()->next() ); + reg_def V7_J ( SOC, SOC, Op_RegF, 7, v7->as_VMReg()->next(2) ); + reg_def V7_K ( SOC, SOC, Op_RegF, 7, v7->as_VMReg()->next(3) ); + + reg_def V8 ( SOC, SOC, Op_RegF, 8, v8->as_VMReg() ); + reg_def V8_H ( SOC, SOC, Op_RegF, 8, v8->as_VMReg()->next() ); + reg_def V8_J ( SOC, SOC, Op_RegF, 8, v8->as_VMReg()->next(2) ); + reg_def V8_K ( SOC, SOC, Op_RegF, 8, v8->as_VMReg()->next(3) ); + + reg_def V9 ( SOC, SOC, Op_RegF, 9, v9->as_VMReg() ); + reg_def V9_H ( SOC, SOC, Op_RegF, 9, v9->as_VMReg()->next() ); + reg_def V9_J ( SOC, SOC, Op_RegF, 9, v9->as_VMReg()->next(2) ); + reg_def V9_K ( SOC, SOC, Op_RegF, 9, v9->as_VMReg()->next(3) ); + + reg_def V10 ( SOC, SOC, Op_RegF, 10, v10->as_VMReg() ); + reg_def V10_H( SOC, SOC, Op_RegF, 10, v10->as_VMReg()->next() ); + reg_def V10_J( SOC, SOC, Op_RegF, 10, v10->as_VMReg()->next(2)); + reg_def V10_K( SOC, SOC, Op_RegF, 10, v10->as_VMReg()->next(3)); + + reg_def V11 ( SOC, SOC, Op_RegF, 11, v11->as_VMReg() ); + reg_def V11_H( SOC, SOC, Op_RegF, 11, v11->as_VMReg()->next() ); + reg_def V11_J( SOC, SOC, Op_RegF, 11, v11->as_VMReg()->next(2)); + reg_def V11_K( SOC, SOC, Op_RegF, 11, v11->as_VMReg()->next(3)); + + reg_def V12 ( SOC, SOC, Op_RegF, 12, v12->as_VMReg() ); + reg_def V12_H( SOC, SOC, Op_RegF, 12, v12->as_VMReg()->next() ); + reg_def V12_J( SOC, SOC, Op_RegF, 12, v12->as_VMReg()->next(2)); + reg_def V12_K( SOC, SOC, Op_RegF, 12, v12->as_VMReg()->next(3)); + + reg_def V13 ( SOC, SOC, Op_RegF, 13, v13->as_VMReg() ); + reg_def V13_H( SOC, SOC, Op_RegF, 13, v13->as_VMReg()->next() ); + reg_def V13_J( SOC, SOC, Op_RegF, 13, v13->as_VMReg()->next(2)); + reg_def V13_K( SOC, SOC, Op_RegF, 13, v13->as_VMReg()->next(3)); + + reg_def V14 ( SOC, SOC, Op_RegF, 14, v14->as_VMReg() ); + reg_def V14_H( SOC, SOC, Op_RegF, 14, v14->as_VMReg()->next() ); + reg_def V14_J( SOC, SOC, Op_RegF, 14, v14->as_VMReg()->next(2)); + reg_def V14_K( SOC, SOC, Op_RegF, 14, v14->as_VMReg()->next(3)); + + reg_def V15 ( SOC, SOC, Op_RegF, 15, v15->as_VMReg() ); + reg_def V15_H( SOC, SOC, Op_RegF, 15, v15->as_VMReg()->next() ); + reg_def V15_J( SOC, SOC, Op_RegF, 15, v15->as_VMReg()->next(2)); + reg_def V15_K( SOC, SOC, Op_RegF, 15, v15->as_VMReg()->next(3)); + + reg_def V16 ( SOC, SOC, Op_RegF, 16, v16->as_VMReg() ); + reg_def V16_H( SOC, SOC, Op_RegF, 16, v16->as_VMReg()->next() ); + reg_def V16_J( SOC, SOC, Op_RegF, 16, v16->as_VMReg()->next(2)); + reg_def V16_K( SOC, SOC, Op_RegF, 16, v16->as_VMReg()->next(3)); + + reg_def V17 ( SOC, SOC, Op_RegF, 17, v17->as_VMReg() ); + reg_def V17_H( SOC, SOC, Op_RegF, 17, v17->as_VMReg()->next() ); + reg_def V17_J( SOC, SOC, Op_RegF, 17, v17->as_VMReg()->next(2)); + reg_def V17_K( SOC, SOC, Op_RegF, 17, v17->as_VMReg()->next(3)); + + reg_def V18 ( SOC, SOC, Op_RegF, 18, v18->as_VMReg() ); + reg_def V18_H( SOC, SOC, Op_RegF, 18, v18->as_VMReg()->next() ); + reg_def V18_J( SOC, SOC, Op_RegF, 18, v18->as_VMReg()->next(2)); + reg_def V18_K( SOC, SOC, Op_RegF, 18, v18->as_VMReg()->next(3)); + + reg_def V19 ( SOC, SOC, Op_RegF, 19, v19->as_VMReg() ); + reg_def V19_H( SOC, SOC, Op_RegF, 19, v19->as_VMReg()->next() ); + reg_def V19_J( SOC, SOC, Op_RegF, 19, v19->as_VMReg()->next(2)); + reg_def V19_K( SOC, SOC, Op_RegF, 19, v19->as_VMReg()->next(3)); + + reg_def V20 ( SOC, SOC, Op_RegF, 20, v20->as_VMReg() ); + reg_def V20_H( SOC, SOC, Op_RegF, 20, v20->as_VMReg()->next() ); + reg_def V20_J( SOC, SOC, Op_RegF, 20, v20->as_VMReg()->next(2)); + reg_def V20_K( SOC, SOC, Op_RegF, 20, v20->as_VMReg()->next(3)); + + reg_def V21 ( SOC, SOC, Op_RegF, 21, v21->as_VMReg() ); + reg_def V21_H( SOC, SOC, Op_RegF, 21, v21->as_VMReg()->next() ); + reg_def V21_J( SOC, SOC, Op_RegF, 21, v21->as_VMReg()->next(2)); + reg_def V21_K( SOC, SOC, Op_RegF, 21, v21->as_VMReg()->next(3)); + + reg_def V22 ( SOC, SOC, Op_RegF, 22, v22->as_VMReg() ); + reg_def V22_H( SOC, SOC, Op_RegF, 22, v22->as_VMReg()->next() ); + reg_def V22_J( SOC, SOC, Op_RegF, 22, v22->as_VMReg()->next(2)); + reg_def V22_K( SOC, SOC, Op_RegF, 22, v22->as_VMReg()->next(3)); + + reg_def V23 ( SOC, SOC, Op_RegF, 23, v23->as_VMReg() ); + reg_def V23_H( SOC, SOC, Op_RegF, 23, v23->as_VMReg()->next() ); + reg_def V23_J( SOC, SOC, Op_RegF, 23, v23->as_VMReg()->next(2)); + reg_def V23_K( SOC, SOC, Op_RegF, 23, v23->as_VMReg()->next(3)); + + reg_def V24 ( SOC, SOC, Op_RegF, 24, v24->as_VMReg() ); + reg_def V24_H( SOC, SOC, Op_RegF, 24, v24->as_VMReg()->next() ); + reg_def V24_J( SOC, SOC, Op_RegF, 24, v24->as_VMReg()->next(2)); + reg_def V24_K( SOC, SOC, Op_RegF, 24, v24->as_VMReg()->next(3)); + + reg_def V25 ( SOC, SOC, Op_RegF, 25, v25->as_VMReg() ); + reg_def V25_H( SOC, SOC, Op_RegF, 25, v25->as_VMReg()->next() ); + reg_def V25_J( SOC, SOC, Op_RegF, 25, v25->as_VMReg()->next(2)); + reg_def V25_K( SOC, SOC, Op_RegF, 25, v25->as_VMReg()->next(3)); + + reg_def V26 ( SOC, SOC, Op_RegF, 26, v26->as_VMReg() ); + reg_def V26_H( SOC, SOC, Op_RegF, 26, v26->as_VMReg()->next() ); + reg_def V26_J( SOC, SOC, Op_RegF, 26, v26->as_VMReg()->next(2)); + reg_def V26_K( SOC, SOC, Op_RegF, 26, v26->as_VMReg()->next(3)); + + reg_def V27 ( SOC, SOC, Op_RegF, 27, v27->as_VMReg() ); + reg_def V27_H( SOC, SOC, Op_RegF, 27, v27->as_VMReg()->next() ); + reg_def V27_J( SOC, SOC, Op_RegF, 27, v27->as_VMReg()->next(2)); + reg_def V27_K( SOC, SOC, Op_RegF, 27, v27->as_VMReg()->next(3)); + + reg_def V28 ( SOC, SOC, Op_RegF, 28, v28->as_VMReg() ); + reg_def V28_H( SOC, SOC, Op_RegF, 28, v28->as_VMReg()->next() ); + reg_def V28_J( SOC, SOC, Op_RegF, 28, v28->as_VMReg()->next(2)); + reg_def V28_K( SOC, SOC, Op_RegF, 28, v28->as_VMReg()->next(3)); + + reg_def V29 ( SOC, SOC, Op_RegF, 29, v29->as_VMReg() ); + reg_def V29_H( SOC, SOC, Op_RegF, 29, v29->as_VMReg()->next() ); + reg_def V29_J( SOC, SOC, Op_RegF, 29, v29->as_VMReg()->next(2)); + reg_def V29_K( SOC, SOC, Op_RegF, 29, v29->as_VMReg()->next(3)); + + reg_def V30 ( SOC, SOC, Op_RegF, 30, v30->as_VMReg() ); + reg_def V30_H( SOC, SOC, Op_RegF, 30, v30->as_VMReg()->next() ); + reg_def V30_J( SOC, SOC, Op_RegF, 30, v30->as_VMReg()->next(2)); + reg_def V30_K( SOC, SOC, Op_RegF, 30, v30->as_VMReg()->next(3)); + + reg_def V31 ( SOC, SOC, Op_RegF, 31, v31->as_VMReg() ); + reg_def V31_H( SOC, SOC, Op_RegF, 31, v31->as_VMReg()->next() ); + reg_def V31_J( SOC, SOC, Op_RegF, 31, v31->as_VMReg()->next(2)); + reg_def V31_K( SOC, SOC, Op_RegF, 31, v31->as_VMReg()->next(3)); // ---------------------------- // Special Registers @@ -291,42 +386,42 @@ alloc_class chunk0( alloc_class chunk1( // no save - V16, V16_H, - V17, V17_H, - V18, V18_H, - V19, V19_H, - V20, V20_H, - V21, V21_H, - V22, V22_H, - V23, V23_H, - V24, V24_H, - V25, V25_H, - V26, V26_H, - V27, V27_H, - V28, V28_H, - V29, V29_H, - V30, V30_H, - V31, V31_H, + V16, V16_H, V16_J, V16_K, + V17, V17_H, V17_J, V17_K, + V18, V18_H, V18_J, V18_K, + V19, V19_H, V19_J, V19_K, + V20, V20_H, V20_J, V20_K, + V21, V21_H, V21_J, V21_K, + V22, V22_H, V22_J, V22_K, + V23, V23_H, V23_J, V23_K, + V24, V24_H, V24_J, V24_K, + V25, V25_H, V25_J, V25_K, + V26, V26_H, V26_J, V26_K, + V27, V27_H, V27_J, V27_K, + V28, V28_H, V28_J, V28_K, + V29, V29_H, V29_J, V29_K, + V30, V30_H, V30_J, V30_K, + V31, V31_H, V31_J, V31_K, // arg registers - V0, V0_H, - V1, V1_H, - V2, V2_H, - V3, V3_H, - V4, V4_H, - V5, V5_H, - V6, V6_H, - V7, V7_H, + V0, V0_H, V0_J, V0_K, + V1, V1_H, V1_J, V1_K, + V2, V2_H, V2_J, V2_K, + V3, V3_H, V3_J, V3_K, + V4, V4_H, V4_J, V4_K, + V5, V5_H, V5_J, V5_K, + V6, V6_H, V6_J, V6_K, + V7, V7_H, V7_J, V7_K, // non-volatiles - V8, V8_H, - V9, V9_H, - V10, V10_H, - V11, V11_H, - V12, V12_H, - V13, V13_H, - V14, V14_H, - V15, V15_H, + V8, V8_H, V8_J, V8_K, + V9, V9_H, V9_J, V9_K, + V10, V10_H, V10_J, V10_K, + V11, V11_H, V11_J, V11_K, + V12, V12_H, V12_J, V12_K, + V13, V13_H, V13_J, V13_K, + V14, V14_H, V14_J, V14_K, + V15, V15_H, V15_J, V15_K, ); alloc_class chunk2(RFLAGS); @@ -770,6 +865,42 @@ reg_class double_reg( V31, V31_H ); +// Class for all 128bit vector registers +reg_class vectorx_reg( + V0, V0_H, V0_J, V0_K, + V1, V1_H, V1_J, V1_K, + V2, V2_H, V2_J, V2_K, + V3, V3_H, V3_J, V3_K, + V4, V4_H, V4_J, V4_K, + V5, V5_H, V5_J, V5_K, + V6, V6_H, V6_J, V6_K, + V7, V7_H, V7_J, V7_K, + V8, V8_H, V8_J, V8_K, + V9, V9_H, V9_J, V9_K, + V10, V10_H, V10_J, V10_K, + V11, V11_H, V11_J, V11_K, + V12, V12_H, V12_J, V12_K, + V13, V13_H, V13_J, V13_K, + V14, V14_H, V14_J, V14_K, + V15, V15_H, V15_J, V15_K, + V16, V16_H, V16_J, V16_K, + V17, V17_H, V17_J, V17_K, + V18, V18_H, V18_J, V18_K, + V19, V19_H, V19_J, V19_K, + V20, V20_H, V20_J, V20_K, + V21, V21_H, V21_J, V21_K, + V22, V22_H, V22_J, V22_K, + V23, V23_H, V23_J, V23_K, + V24, V24_H, V24_J, V24_K, + V25, V25_H, V25_J, V25_K, + V26, V26_H, V26_J, V26_K, + V27, V27_H, V27_J, V27_K, + V28, V28_H, V28_J, V28_K, + V29, V29_H, V29_J, V29_K, + V30, V30_H, V30_J, V30_K, + V31, V31_H, V31_J, V31_K +); + // Class for 128 bit register v0 reg_class v0_reg( V0, V0_H @@ -1964,7 +2095,7 @@ static enum RC rc_class(OptoReg::Name reg) { } // we have 32 float register * 2 halves - if (reg < 60 + 64) { + if (reg < 60 + 128) { return rc_float; } @@ -2000,6 +2131,78 @@ uint MachSpillCopyNode::implementation(CodeBuffer *cbuf, PhaseRegAlloc *ra_, boo return 0; // Self copy, no move. } + if (bottom_type()->isa_vect() != NULL) { + uint len = 4; + if (cbuf) { + MacroAssembler _masm(cbuf); + uint ireg = ideal_reg(); + assert((src_lo_rc != rc_int && dst_lo_rc != rc_int), "sanity"); + assert(ireg == Op_VecX, "sanity"); + if (src_lo_rc == rc_stack && dst_lo_rc == rc_stack) { + // stack->stack + int src_offset = ra_->reg2offset(src_lo); + int dst_offset = ra_->reg2offset(dst_lo); + assert((src_offset & 7) && (dst_offset & 7), "unaligned stack offset"); + len = 8; + if (src_offset < 512) { + __ ldp(rscratch1, rscratch2, Address(sp, src_offset)); + } else { + __ ldr(rscratch1, Address(sp, src_offset)); + __ ldr(rscratch2, Address(sp, src_offset+4)); + len += 4; + } + if (dst_offset < 512) { + __ stp(rscratch1, rscratch2, Address(sp, dst_offset)); + } else { + __ str(rscratch1, Address(sp, dst_offset)); + __ str(rscratch2, Address(sp, dst_offset+4)); + len += 4; + } + } else if (src_lo_rc == rc_float && dst_lo_rc == rc_float) { + __ orr(as_FloatRegister(Matcher::_regEncode[dst_lo]), __ T16B, + as_FloatRegister(Matcher::_regEncode[src_lo]), + as_FloatRegister(Matcher::_regEncode[src_lo])); + } else if (src_lo_rc == rc_float && dst_lo_rc == rc_stack) { + __ str(as_FloatRegister(Matcher::_regEncode[src_lo]), __ Q, + Address(sp, ra_->reg2offset(dst_lo))); + } else if (src_lo_rc == rc_stack && dst_lo_rc == rc_float) { + __ ldr(as_FloatRegister(Matcher::_regEncode[dst_lo]), __ Q, + Address(sp, ra_->reg2offset(src_lo))); + } else { + ShouldNotReachHere(); + } + } else if (st) { + if (src_lo_rc == rc_stack && dst_lo_rc == rc_stack) { + // stack->stack + int src_offset = ra_->reg2offset(src_lo); + int dst_offset = ra_->reg2offset(dst_lo); + if (src_offset < 512) { + st->print("ldp rscratch1, rscratch2, [sp, #%d]", src_offset); + } else { + st->print("ldr rscratch1, [sp, #%d]", src_offset); + st->print("\nldr rscratch2, [sp, #%d]", src_offset+4); + } + if (dst_offset < 512) { + st->print("\nstp rscratch1, rscratch2, [sp, #%d]", dst_offset); + } else { + st->print("\nstr rscratch1, [sp, #%d]", dst_offset); + st->print("\nstr rscratch2, [sp, #%d]", dst_offset+4); + } + st->print("\t# vector spill, stack to stack"); + } else if (src_lo_rc == rc_float && dst_lo_rc == rc_float) { + st->print("mov %s, %s\t# vector spill, reg to reg", + Matcher::regName[dst_lo], Matcher::regName[src_lo]); + } else if (src_lo_rc == rc_float && dst_lo_rc == rc_stack) { + st->print("str %s, [sp, #%d]\t# vector spill, reg to stack", + Matcher::regName[src_lo], ra_->reg2offset(dst_lo)); + } else if (src_lo_rc == rc_stack && dst_lo_rc == rc_float) { + st->print("ldr %s, [sp, #%d]\t# vector spill, stack to reg", + Matcher::regName[dst_lo], ra_->reg2offset(src_lo)); + } + } + return len; + } + switch (src_lo_rc) { case rc_int: if (dst_lo_rc == rc_int) { // gpr --> gpr copy @@ -2422,8 +2625,12 @@ const bool Matcher::convL2FSupported(void) { // Vector width in bytes. const int Matcher::vector_width_in_bytes(BasicType bt) { - // TODO fixme - return 0; + int size = MIN2(16,(int)MaxVectorSize); + // Minimum 2 values in vector + if (size < 2*type2aelembytes(bt)) size = 0; + // But never < 4 + if (size < 4) size = 0; + return size; } // Limits on vector size (number of elements) loaded into vector. @@ -2431,22 +2638,19 @@ const int Matcher::max_vector_size(const BasicType bt) { return vector_width_in_bytes(bt)/type2aelembytes(bt); } const int Matcher::min_vector_size(const BasicType bt) { - int max_size = max_vector_size(bt); - // Min size which can be loaded into vector is 4 bytes. - int size = (type2aelembytes(bt) == 1) ? 4 : 2; - return MIN2(size,max_size); + //return (type2aelembytes(bt) == 1) ? 4 : 2; + // For the moment, only support 1 vector size, 128 bits + return max_vector_size(bt); } // Vector ideal reg. const int Matcher::vector_ideal_reg(int len) { - // TODO fixme - return Op_RegD; + return Op_VecX; } // Only lowest bits of xmm reg are used for vector shift count. const int Matcher::vector_shift_count_ideal_reg(int size) { - // TODO fixme - return Op_RegL; + return Op_VecX; } // AES support not yet implemented @@ -2657,6 +2861,8 @@ static void getCallInfo(const TypeFunc *tf, int &gpcnt, int &fpcnt, int &rtype) typedef void (MacroAssembler::* mem_insn)(Register Rt, const Address &adr); typedef void (MacroAssembler::* mem_float_insn)(FloatRegister Rt, const Address &adr); +typedef void (MacroAssembler::* mem_vector_insn)(FloatRegister Rt, + MacroAssembler::SIMD_RegVariant T, const Address &adr); // Used for all non-volatile memory accesses. The use of // $mem->opcode() to discover whether this pattern uses sign-extended @@ -2724,6 +2930,18 @@ typedef void (MacroAssembler::* mem_float_insn)(FloatRegister Rt, const Address } } + static void loadStore(MacroAssembler masm, mem_vector_insn insn, + FloatRegister reg, MacroAssembler::SIMD_RegVariant T, + int opcode, Register base, int index, int size, int disp) + { + if (index == -1) { + (masm.*insn)(reg, T, Address(base, disp)); + } else { + assert(disp == 0, "unsupported address mode"); + (masm.*insn)(reg, T, Address(base, as_Register(index), Address::lsl(size))); + } + } + %} @@ -2855,6 +3073,24 @@ encode %{ as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp); %} + enc_class aarch64_enc_ldrvS(vecX dst, memory mem) %{ + FloatRegister dst_reg = as_FloatRegister($dst$$reg); + loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldr, dst_reg, MacroAssembler::S, + $mem->opcode(), as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp); + %} + + enc_class aarch64_enc_ldrvD(vecX dst, memory mem) %{ + FloatRegister dst_reg = as_FloatRegister($dst$$reg); + loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldr, dst_reg, MacroAssembler::D, + $mem->opcode(), as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp); + %} + + enc_class aarch64_enc_ldrvQ(vecX dst, memory mem) %{ + FloatRegister dst_reg = as_FloatRegister($dst$$reg); + loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldr, dst_reg, MacroAssembler::Q, + $mem->opcode(), as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp); + %} + enc_class aarch64_enc_strb(iRegI src, memory mem) %{ Register src_reg = as_Register($src$$reg); loadStore(MacroAssembler(&cbuf), &MacroAssembler::strb, src_reg, $mem->opcode(), @@ -2923,6 +3159,24 @@ encode %{ as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp); %} + enc_class aarch64_enc_strvS(vecX src, memory mem) %{ + FloatRegister src_reg = as_FloatRegister($src$$reg); + loadStore(MacroAssembler(&cbuf), &MacroAssembler::str, src_reg, MacroAssembler::S, + $mem->opcode(), as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp); + %} + + enc_class aarch64_enc_strvD(vecX src, memory mem) %{ + FloatRegister src_reg = as_FloatRegister($src$$reg); + loadStore(MacroAssembler(&cbuf), &MacroAssembler::str, src_reg, MacroAssembler::D, + $mem->opcode(), as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp); + %} + + enc_class aarch64_enc_strvQ(vecX src, memory mem) %{ + FloatRegister src_reg = as_FloatRegister($src$$reg); + loadStore(MacroAssembler(&cbuf), &MacroAssembler::str, src_reg, MacroAssembler::Q, + $mem->opcode(), as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp); + %} + // END Non-volatile memory access // volatile loads and stores @@ -4933,6 +5187,16 @@ operand vRegD() interface(REG_INTER); %} +operand vecX() +%{ + constraint(ALLOC_IN_RC(vectorx_reg)); + match(VecX); + + op_cost(0); + format %{ %} + interface(REG_INTER); +%} + operand vRegD_V0() %{ constraint(ALLOC_IN_RC(v0_reg)); @@ -5505,6 +5769,7 @@ operand iRegL2I(iRegL reg) %{ interface(REG_INTER) %} +opclass vmem(indirect, indIndex, indOffI, indOffL); //----------OPERAND CLASSES---------------------------------------------------- // Operand Classes are groups of operands that are used as to simplify @@ -12926,7 +13191,919 @@ instruct tlsLoadP(thread_RegP dst) ins_pipe(pipe_class_empty); %} +// ====================VECTOR INSTRUCTIONS===================================== +// Load vector (32 bits) +instruct loadV4(vecX dst, vmem mem) +%{ + predicate(n->as_LoadVector()->memory_size() == 4); + match(Set dst (LoadVector mem)); + ins_cost(4 * INSN_COST); + format %{ "ldrs $dst,$mem\t# vector (32 bits)" %} + ins_encode( aarch64_enc_ldrvS(dst, mem) ); + ins_pipe(pipe_class_memory); +%} + +// Load vector (64 bits) +instruct loadV8(vecX dst, vmem mem) +%{ + predicate(n->as_LoadVector()->memory_size() == 8); + match(Set dst (LoadVector mem)); + ins_cost(4 * INSN_COST); + format %{ "ldrd $dst,$mem\t# vector (64 bits)" %} + ins_encode( aarch64_enc_ldrvD(dst, mem) ); + ins_pipe(pipe_class_memory); +%} + +// Load Vector (128 bits) +instruct loadV16(vecX dst, vmem mem) +%{ + predicate(n->as_LoadVector()->memory_size() == 16); + match(Set dst (LoadVector mem)); + ins_cost(4 * INSN_COST); + format %{ "ldrq $dst,$mem\t# vector (128 bits)" %} + ins_encode( aarch64_enc_ldrvQ(dst, mem) ); + ins_pipe(pipe_class_memory); +%} + +// Store Vector (32 bits) +instruct storeV4(vecX src, vmem mem) +%{ + predicate(n->as_StoreVector()->memory_size() == 4); + match(Set mem (StoreVector mem src)); + ins_cost(4 * INSN_COST); + format %{ "strs $mem,$src\t# vector (32 bits)" %} + ins_encode( aarch64_enc_strvS(src, mem) ); + ins_pipe(pipe_class_memory); +%} + +// Store Vector (64 bits) +instruct storeV8(vecX src, vmem mem) +%{ + predicate(n->as_StoreVector()->memory_size() == 8); + match(Set mem (StoreVector mem src)); + ins_cost(4 * INSN_COST); + format %{ "strd $mem,$src\t# vector (64 bits)" %} + ins_encode( aarch64_enc_strvD(src, mem) ); + ins_pipe(pipe_class_memory); +%} + +// Store Vector (128 bits) +instruct storeV16(vecX src, vmem mem) +%{ + predicate(n->as_StoreVector()->memory_size() == 16); + match(Set mem (StoreVector mem src)); + ins_cost(4 * INSN_COST); + format %{ "strq $mem,$src\t# vector (128 bits)" %} + ins_encode( aarch64_enc_strvQ(src, mem) ); + ins_pipe(pipe_class_memory); +%} + +instruct replicate16B(vecX dst, iRegIorL2I src) +%{ + match(Set dst (ReplicateB src)); + ins_cost(INSN_COST); + format %{ "dup $dst, $src\t# vector (16B)" %} + ins_encode %{ + __ dup(as_FloatRegister($dst$$reg), __ T16B, as_Register($src$$reg)); + %} + ins_pipe(pipe_class_default); +%} + +instruct replicate16B_imm(vecX dst, immI con) +%{ + match(Set dst (ReplicateB con)); + ins_cost(INSN_COST); + format %{ "movi $dst, $con\t# vector(16B)" %} + ins_encode %{ + __ mov(as_FloatRegister($dst$$reg), __ T16B, $con$$constant); + %} + ins_pipe(pipe_class_default); +%} + +instruct replicate8S(vecX dst, iRegIorL2I src) +%{ + match(Set dst (ReplicateS src)); + ins_cost(INSN_COST); + format %{ "dup $dst, $src\t# vector (8S)" %} + ins_encode %{ + __ dup(as_FloatRegister($dst$$reg), __ T8H, as_Register($src$$reg)); + %} + ins_pipe(pipe_class_default); +%} + +instruct replicate8S_imm(vecX dst, immI con) +%{ + match(Set dst (ReplicateS con)); + ins_cost(INSN_COST); + format %{ "movi $dst, $con\t# vector(8H)" %} + ins_encode %{ + __ mov(as_FloatRegister($dst$$reg), __ T8H, $con$$constant); + %} + ins_pipe(pipe_class_default); +%} + +instruct replicate4I(vecX dst, iRegIorL2I src) +%{ + match(Set dst (ReplicateI src)); + ins_cost(INSN_COST); + format %{ "dup $dst, $src\t# vector (4I)" %} + ins_encode %{ + __ dup(as_FloatRegister($dst$$reg), __ T4S, as_Register($src$$reg)); + %} + ins_pipe(pipe_class_default); +%} + +instruct replicate4I_imm(vecX dst, immI con) +%{ + match(Set dst (ReplicateI con)); + ins_cost(INSN_COST); + format %{ "movi $dst, $con\t# vector(4I)" %} + ins_encode %{ + __ mov(as_FloatRegister($dst$$reg), __ T4S, $con$$constant); + %} + ins_pipe(pipe_class_default); +%} + +instruct replicate2L(vecX dst, iRegL src) +%{ + match(Set dst (ReplicateL src)); + ins_cost(INSN_COST); + format %{ "dup $dst, $src\t# vector (2L)" %} + ins_encode %{ + __ dup(as_FloatRegister($dst$$reg), __ T2D, as_Register($src$$reg)); + %} + ins_pipe(pipe_class_default); +%} + +instruct replicate2L_zero(vecX dst, immI0 zero) +%{ + match(Set dst (ReplicateI zero)); + ins_cost(INSN_COST); + format %{ "movi $dst, $zero\t# vector(4I)" %} + ins_encode %{ + __ eor(as_FloatRegister($dst$$reg), __ T16B, + as_FloatRegister($dst$$reg), + as_FloatRegister($dst$$reg)); + %} + ins_pipe(pipe_class_default); +%} + +instruct replicate4F(vecX dst, vRegF src) +%{ + match(Set dst (ReplicateF src)); + ins_cost(INSN_COST); + format %{ "dup $dst, $src\t# vector (4F)" %} + ins_encode %{ + __ dup(as_FloatRegister($dst$$reg), __ T4S, + as_FloatRegister($src$$reg)); + %} + ins_pipe(pipe_class_default); +%} + +instruct replicate2D(vecX dst, vRegD src) +%{ + match(Set dst (ReplicateD src)); + ins_cost(INSN_COST); + format %{ "dup $dst, $src\t# vector (2D)" %} + ins_encode %{ + __ dup(as_FloatRegister($dst$$reg), __ T2D, + as_FloatRegister($src$$reg)); + %} + ins_pipe(pipe_class_default); +%} + +// ====================REDUCTION ARITHMETIC==================================== + +instruct reduce_add4I(iRegINoSp dst, iRegIorL2I src1, vecX src2, vecX tmp, iRegI tmp2) +%{ + match(Set dst (AddReductionVI src1 src2)); + ins_cost(INSN_COST); + effect(TEMP tmp, TEMP tmp2); + format %{ "addv $tmp, T4S, $src2\n\t" + "umov $tmp2, $tmp, S, 0\n\t" + "addw $dst, $tmp2, $src1\t add reduction4i" + %} + ins_encode %{ + __ addv(as_FloatRegister($tmp$$reg), __ T4S, + as_FloatRegister($src2$$reg)); + __ umov($tmp2$$Register, as_FloatRegister($tmp$$reg), __ S, 0); + __ addw($dst$$Register, $tmp2$$Register, $src1$$Register); + %} + ins_pipe(pipe_class_default); +%} + +instruct reduce_mul4I(iRegINoSp dst, iRegIorL2I src1, vecX src2, vecX tmp, iRegI tmp2) +%{ + match(Set dst (MulReductionVI src1 src2)); + ins_cost(INSN_COST); + effect(TEMP tmp, TEMP tmp2, TEMP dst); + format %{ "ins $tmp, $src2, 0, 1\n\t" + "mul $tmp, $tmp, $src2\n\t" + "umov $tmp2, $tmp, S, 0\n\t" + "mul $dst, $tmp2, $src1\n\t" + "umov $tmp2, $tmp, S, 1\n\t" + "mul $dst, $tmp2, $dst\t mul reduction4i\n\t" + %} + ins_encode %{ + __ ins(as_FloatRegister($tmp$$reg), __ D, + as_FloatRegister($src2$$reg), 0, 1); + __ mulv(as_FloatRegister($tmp$$reg), __ T2S, + as_FloatRegister($tmp$$reg), as_FloatRegister($src2$$reg)); + __ umov($tmp2$$Register, as_FloatRegister($tmp$$reg), __ S, 0); + __ mul($dst$$Register, $tmp2$$Register, $src1$$Register); + __ umov($tmp2$$Register, as_FloatRegister($tmp$$reg), __ S, 1); + __ mul($dst$$Register, $tmp2$$Register, $dst$$Register); + %} + ins_pipe(pipe_class_default); +%} + +instruct reduce_add4F(vRegF dst, vRegF src1, vecX src2, vecX tmp) +%{ + match(Set dst (AddReductionVF src1 src2)); + ins_cost(INSN_COST); + effect(TEMP tmp, TEMP dst); + format %{ "fadds $dst, $src1, $src2\n\t" + "ins $tmp, S, $src2, 0, 1\n\t" + "fadds $dst, $dst, $tmp\n\t" + "ins $tmp, S, $src2, 0, 2\n\t" + "fadds $dst, $dst, $tmp\n\t" + "ins $tmp, S, $src2, 0, 3\n\t" + "fadds $dst, $dst, $tmp\t add reduction4f" + %} + ins_encode %{ + __ fadds(as_FloatRegister($dst$$reg), + as_FloatRegister($src1$$reg), as_FloatRegister($src2$$reg)); + __ ins(as_FloatRegister($tmp$$reg), __ S, + as_FloatRegister($src2$$reg), 0, 1); + __ fadds(as_FloatRegister($dst$$reg), + as_FloatRegister($dst$$reg), as_FloatRegister($tmp$$reg)); + __ ins(as_FloatRegister($tmp$$reg), __ S, + as_FloatRegister($src2$$reg), 0, 2); + __ fadds(as_FloatRegister($dst$$reg), + as_FloatRegister($dst$$reg), as_FloatRegister($tmp$$reg)); + __ ins(as_FloatRegister($tmp$$reg), __ S, + as_FloatRegister($src2$$reg), 0, 3); + __ fadds(as_FloatRegister($dst$$reg), + as_FloatRegister($dst$$reg), as_FloatRegister($tmp$$reg)); + %} + ins_pipe(pipe_class_default); +%} + +instruct reduce_mul4F(vRegF dst, vRegF src1, vecX src2, vecX tmp) +%{ + match(Set dst (MulReductionVF src1 src2)); + ins_cost(INSN_COST); + effect(TEMP tmp, TEMP dst); + format %{ "fmuls $dst, $src1, $src2\n\t" + "ins $tmp, S, $src2, 0, 1\n\t" + "fmuls $dst, $dst, $tmp\n\t" + "ins $tmp, S, $src2, 0, 2\n\t" + "fmuls $dst, $dst, $tmp\n\t" + "ins $tmp, S, $src2, 0, 3\n\t" + "fmuls $dst, $dst, $tmp\t add reduction4f" + %} + ins_encode %{ + __ fmuls(as_FloatRegister($dst$$reg), + as_FloatRegister($src1$$reg), as_FloatRegister($src2$$reg)); + __ ins(as_FloatRegister($tmp$$reg), __ S, + as_FloatRegister($src2$$reg), 0, 1); + __ fmuls(as_FloatRegister($dst$$reg), + as_FloatRegister($dst$$reg), as_FloatRegister($tmp$$reg)); + __ ins(as_FloatRegister($tmp$$reg), __ S, + as_FloatRegister($src2$$reg), 0, 2); + __ fmuls(as_FloatRegister($dst$$reg), + as_FloatRegister($dst$$reg), as_FloatRegister($tmp$$reg)); + __ ins(as_FloatRegister($tmp$$reg), __ S, + as_FloatRegister($src2$$reg), 0, 3); + __ fmuls(as_FloatRegister($dst$$reg), + as_FloatRegister($dst$$reg), as_FloatRegister($tmp$$reg)); + %} + ins_pipe(pipe_class_default); +%} + +instruct reduce_add2D(vRegD dst, vRegD src1, vecX src2, vecX tmp) +%{ + match(Set dst (AddReductionVD src1 src2)); + ins_cost(INSN_COST); + effect(TEMP tmp, TEMP dst); + format %{ "faddd $dst, $src1, $src2\n\t" + "ins $tmp, D, $src2, 0, 1\n\t" + "faddd $dst, $dst, $tmp\t add reduction2d" + %} + ins_encode %{ + __ faddd(as_FloatRegister($dst$$reg), + as_FloatRegister($src1$$reg), as_FloatRegister($src2$$reg)); + __ ins(as_FloatRegister($tmp$$reg), __ D, + as_FloatRegister($src2$$reg), 0, 1); + __ faddd(as_FloatRegister($dst$$reg), + as_FloatRegister($dst$$reg), as_FloatRegister($tmp$$reg)); + %} + ins_pipe(pipe_class_default); +%} + +instruct reduce_mul2D(vRegD dst, vRegD src1, vecX src2, vecX tmp) +%{ + match(Set dst (MulReductionVD src1 src2)); + ins_cost(INSN_COST); + effect(TEMP tmp, TEMP dst); + format %{ "fmuld $dst, $src1, $src2\n\t" + "ins $tmp, D, $src2, 0, 1\n\t" + "fmuld $dst, $dst, $tmp\t add reduction2d" + %} + ins_encode %{ + __ fmuld(as_FloatRegister($dst$$reg), + as_FloatRegister($src1$$reg), as_FloatRegister($src2$$reg)); + __ ins(as_FloatRegister($tmp$$reg), __ D, + as_FloatRegister($src2$$reg), 0, 1); + __ fmuld(as_FloatRegister($dst$$reg), + as_FloatRegister($dst$$reg), as_FloatRegister($tmp$$reg)); + %} + ins_pipe(pipe_class_default); +%} + +// ====================VECTOR ARITHMETIC======================================= + +// --------------------------------- ADD -------------------------------------- + +instruct vadd16B(vecX dst, vecX src1, vecX src2) +%{ + match(Set dst (AddVB src1 src2)); + ins_cost(INSN_COST); + format %{ "addv $dst,$src1,$src2\t# vector (16B)" %} + ins_encode %{ + __ addv(as_FloatRegister($dst$$reg), __ T16B, + as_FloatRegister($src1$$reg), + as_FloatRegister($src2$$reg)); + %} + ins_pipe(pipe_class_default); +%} + +instruct vadd8S(vecX dst, vecX src1, vecX src2) +%{ + match(Set dst (AddVS src1 src2)); + ins_cost(INSN_COST); + format %{ "addv $dst,$src1,$src2\t# vector (8H)" %} + ins_encode %{ + __ addv(as_FloatRegister($dst$$reg), __ T8H, + as_FloatRegister($src1$$reg), + as_FloatRegister($src2$$reg)); + %} + ins_pipe(pipe_class_default); +%} + +instruct vadd4I(vecX dst, vecX src1, vecX src2) +%{ + match(Set dst (AddVI src1 src2)); + ins_cost(INSN_COST); + format %{ "addv $dst,$src1,$src2\t# vector (4S)" %} + ins_encode %{ + __ addv(as_FloatRegister($dst$$reg), __ T4S, + as_FloatRegister($src1$$reg), + as_FloatRegister($src2$$reg)); + %} + ins_pipe(pipe_class_default); +%} + +instruct vadd2L(vecX dst, vecX src1, vecX src2) +%{ + match(Set dst (AddVL src1 src2)); + ins_cost(INSN_COST); + format %{ "addv $dst,$src1,$src2\t# vector (2L)" %} + ins_encode %{ + __ addv(as_FloatRegister($dst$$reg), __ T2D, + as_FloatRegister($src1$$reg), + as_FloatRegister($src2$$reg)); + %} + ins_pipe(pipe_class_default); +%} + +instruct vadd4F(vecX dst, vecX src1, vecX src2) +%{ + match(Set dst (AddVF src1 src2)); + ins_cost(INSN_COST); + format %{ "fadd $dst,$src1,$src2\t# vector (4S)" %} + ins_encode %{ + __ fadd(as_FloatRegister($dst$$reg), __ T4S, + as_FloatRegister($src1$$reg), + as_FloatRegister($src2$$reg)); + %} + ins_pipe(pipe_class_default); +%} + +instruct vadd2D(vecX dst, vecX src1, vecX src2) +%{ + match(Set dst (AddVD src1 src2)); + ins_cost(INSN_COST); + format %{ "fadd $dst,$src1,$src2\t# vector (2D)" %} + ins_encode %{ + __ fadd(as_FloatRegister($dst$$reg), __ T2D, + as_FloatRegister($src1$$reg), + as_FloatRegister($src2$$reg)); + %} + ins_pipe(pipe_class_default); +%} + +// --------------------------------- SUB -------------------------------------- + +instruct vsub16B(vecX dst, vecX src1, vecX src2) +%{ + match(Set dst (SubVB src1 src2)); + ins_cost(INSN_COST); + format %{ "subv $dst,$src1,$src2\t# vector (16B)" %} + ins_encode %{ + __ subv(as_FloatRegister($dst$$reg), __ T16B, + as_FloatRegister($src1$$reg), + as_FloatRegister($src2$$reg)); + %} + ins_pipe(pipe_class_default); +%} + +instruct vsub8S(vecX dst, vecX src1, vecX src2) +%{ + match(Set dst (SubVS src1 src2)); + ins_cost(INSN_COST); + format %{ "subv $dst,$src1,$src2\t# vector (8H)" %} + ins_encode %{ + __ subv(as_FloatRegister($dst$$reg), __ T8H, + as_FloatRegister($src1$$reg), + as_FloatRegister($src2$$reg)); + %} + ins_pipe(pipe_class_default); +%} + +instruct vsub4I(vecX dst, vecX src1, vecX src2) +%{ + match(Set dst (SubVI src1 src2)); + ins_cost(INSN_COST); + format %{ "subv $dst,$src1,$src2\t# vector (4S)" %} + ins_encode %{ + __ subv(as_FloatRegister($dst$$reg), __ T4S, + as_FloatRegister($src1$$reg), + as_FloatRegister($src2$$reg)); + %} + ins_pipe(pipe_class_default); +%} + +instruct vsub2L(vecX dst, vecX src1, vecX src2) +%{ + match(Set dst (SubVL src1 src2)); + ins_cost(INSN_COST); + format %{ "subv $dst,$src1,$src2\t# vector (2L)" %} + ins_encode %{ + __ subv(as_FloatRegister($dst$$reg), __ T2D, + as_FloatRegister($src1$$reg), + as_FloatRegister($src2$$reg)); + %} + ins_pipe(pipe_class_default); +%} + +instruct vsub4F(vecX dst, vecX src1, vecX src2) +%{ + match(Set dst (SubVF src1 src2)); + ins_cost(INSN_COST); + format %{ "fsub $dst,$src1,$src2\t# vector (4S)" %} + ins_encode %{ + __ fsub(as_FloatRegister($dst$$reg), __ T4S, + as_FloatRegister($src1$$reg), + as_FloatRegister($src2$$reg)); + %} + ins_pipe(pipe_class_default); +%} + +instruct vsub2D(vecX dst, vecX src1, vecX src2) +%{ + match(Set dst (SubVD src1 src2)); + ins_cost(INSN_COST); + format %{ "fsub $dst,$src1,$src2\t# vector (2D)" %} + ins_encode %{ + __ fsub(as_FloatRegister($dst$$reg), __ T2D, + as_FloatRegister($src1$$reg), + as_FloatRegister($src2$$reg)); + %} + ins_pipe(pipe_class_default); +%} + +// --------------------------------- MUL -------------------------------------- + +instruct vmul8S(vecX dst, vecX src1, vecX src2) +%{ + match(Set dst (MulVS src1 src2)); + ins_cost(INSN_COST); + format %{ "mulv $dst,$src1,$src2\t# vector (8H)" %} + ins_encode %{ + __ mulv(as_FloatRegister($dst$$reg), __ T8H, + as_FloatRegister($src1$$reg), + as_FloatRegister($src2$$reg)); + %} + ins_pipe(pipe_class_default); +%} + +instruct vmul4I(vecX dst, vecX src1, vecX src2) +%{ + match(Set dst (MulVI src1 src2)); + ins_cost(INSN_COST); + format %{ "mulv $dst,$src1,$src2\t# vector (4S)" %} + ins_encode %{ + __ mulv(as_FloatRegister($dst$$reg), __ T4S, + as_FloatRegister($src1$$reg), + as_FloatRegister($src2$$reg)); + %} + ins_pipe(pipe_class_default); +%} + +instruct vmul4F(vecX dst, vecX src1, vecX src2) +%{ + match(Set dst (MulVF src1 src2)); + ins_cost(INSN_COST); + format %{ "fmul $dst,$src1,$src2\t# vector (4S)" %} + ins_encode %{ + __ fmul(as_FloatRegister($dst$$reg), __ T4S, + as_FloatRegister($src1$$reg), + as_FloatRegister($src2$$reg)); + %} + ins_pipe(pipe_class_default); +%} + +instruct vmul2D(vecX dst, vecX src1, vecX src2) +%{ + match(Set dst (MulVD src1 src2)); + ins_cost(INSN_COST); + format %{ "fmul $dst,$src1,$src2\t# vector (2D)" %} + ins_encode %{ + __ fmul(as_FloatRegister($dst$$reg), __ T2D, + as_FloatRegister($src1$$reg), + as_FloatRegister($src2$$reg)); + %} + ins_pipe(pipe_class_default); +%} + +// --------------------------------- DIV -------------------------------------- + +instruct vdiv4F(vecX dst, vecX src1, vecX src2) +%{ + match(Set dst (DivVF src1 src2)); + ins_cost(INSN_COST); + format %{ "fdiv $dst,$src1,$src2\t# vector (4S)" %} + ins_encode %{ + __ fdiv(as_FloatRegister($dst$$reg), __ T4S, + as_FloatRegister($src1$$reg), + as_FloatRegister($src2$$reg)); + %} + ins_pipe(pipe_class_default); +%} + +instruct vdiv2D(vecX dst, vecX src1, vecX src2) +%{ + match(Set dst (DivVD src1 src2)); + ins_cost(INSN_COST); + format %{ "fdiv $dst,$src1,$src2\t# vector (2D)" %} + ins_encode %{ + __ fdiv(as_FloatRegister($dst$$reg), __ T2D, + as_FloatRegister($src1$$reg), + as_FloatRegister($src2$$reg)); + %} + ins_pipe(pipe_class_default); +%} + +// --------------------------------- AND -------------------------------------- + +instruct vand16B(vecX dst, vecX src1, vecX src2) +%{ + match(Set dst (AndV src1 src2)); + ins_cost(INSN_COST); + format %{ "and $dst,$src1,$src2\t# vector (16B)" %} + ins_encode %{ + __ andr(as_FloatRegister($dst$$reg), __ T16B, + as_FloatRegister($src1$$reg), + as_FloatRegister($src2$$reg)); + %} + ins_pipe(pipe_class_default); +%} + +// --------------------------------- OR --------------------------------------- + +instruct vor16B(vecX dst, vecX src1, vecX src2) +%{ + match(Set dst (OrV src1 src2)); + ins_cost(INSN_COST); + format %{ "orr $dst,$src1,$src2\t# vector (16B)" %} + ins_encode %{ + __ orr(as_FloatRegister($dst$$reg), __ T16B, + as_FloatRegister($src1$$reg), + as_FloatRegister($src2$$reg)); + %} + ins_pipe(pipe_class_default); +%} + +// --------------------------------- XOR -------------------------------------- + +instruct vxor16B(vecX dst, vecX src1, vecX src2) +%{ + match(Set dst (XorV src1 src2)); + ins_cost(INSN_COST); + format %{ "xor $dst,$src1,$src2\t# vector (16B)" %} + ins_encode %{ + __ eor(as_FloatRegister($dst$$reg), __ T16B, + as_FloatRegister($src1$$reg), + as_FloatRegister($src2$$reg)); + %} + ins_pipe(pipe_class_default); +%} + +// ------------------------------ Shift --------------------------------------- + +instruct vshiftcntL(vecX dst, iRegIorL2I cnt) %{ + match(Set dst (LShiftCntV cnt)); + format %{ "dup $dst, $cnt\t# shift count (vecX)" %} + ins_encode %{ + __ dup(as_FloatRegister($dst$$reg), __ T16B, as_Register($cnt$$reg)); + %} + ins_pipe(pipe_class_default); +%} + +// Right shifts on aarch64 SIMD are implemented as left shift by -ve amount +instruct vshiftcntR(vecX dst, iRegIorL2I cnt) %{ + match(Set dst (RShiftCntV cnt)); + format %{ "dup $dst, $cnt\t# shift count (vecX)\n\tneg $dst, $dst\t T16B" %} + ins_encode %{ + __ dup(as_FloatRegister($dst$$reg), __ T16B, as_Register($cnt$$reg)); + __ negr(as_FloatRegister($dst$$reg), __ T16B, as_FloatRegister($dst$$reg)); + %} + ins_pipe(pipe_class_default); +%} + +instruct vsll16B(vecX dst, vecX src, vecX shift) %{ + match(Set dst (LShiftVB src shift)); + match(Set dst (RShiftVB src shift)); + ins_cost(INSN_COST); + format %{ "sshl $dst,$src,$shift\t# vector (16B)" %} + ins_encode %{ + __ sshl(as_FloatRegister($dst$$reg), __ T16B, + as_FloatRegister($src$$reg), + as_FloatRegister($shift$$reg)); + %} + ins_pipe(pipe_class_default); +%} + +instruct vsrl16B(vecX dst, vecX src, vecX shift) %{ + match(Set dst (URShiftVB src shift)); + ins_cost(INSN_COST); + format %{ "ushl $dst,$src,$shift\t# vector (16B)" %} + ins_encode %{ + __ ushl(as_FloatRegister($dst$$reg), __ T16B, + as_FloatRegister($src$$reg), + as_FloatRegister($shift$$reg)); + %} + ins_pipe(pipe_class_default); +%} + +instruct vsll16B_imm(vecX dst, vecX src, immI shift) %{ + match(Set dst (LShiftVB src shift)); + ins_cost(INSN_COST); + format %{ "shl $dst, $src, $shift\t# vector (16B)" %} + ins_encode %{ + int sh = (int)$shift$$constant & 31; + if (sh >= 8) { + __ eor(as_FloatRegister($dst$$reg), __ T16B, + as_FloatRegister($src$$reg), + as_FloatRegister($src$$reg)); + } else { + __ shl(as_FloatRegister($dst$$reg), __ T16B, + as_FloatRegister($src$$reg), sh); + } + %} + ins_pipe(pipe_class_default); +%} + +instruct vsra16B_imm(vecX dst, vecX src, immI shift) %{ + match(Set dst (RShiftVB src shift)); + ins_cost(INSN_COST); + format %{ "sshr $dst, $src, $shift\t# vector (16B)" %} + ins_encode %{ + int sh = (int)$shift$$constant & 31; + if (sh >= 8) sh = 7; + sh = -sh & 7; + __ sshr(as_FloatRegister($dst$$reg), __ T16B, + as_FloatRegister($src$$reg), sh); + %} + ins_pipe(pipe_class_default); +%} + +instruct vsrl16B_imm(vecX dst, vecX src, immI shift) %{ + match(Set dst (URShiftVB src shift)); + ins_cost(INSN_COST); + format %{ "ushr $dst, $src, $shift\t# vector (16B)" %} + ins_encode %{ + int sh = (int)$shift$$constant & 31; + if (sh >= 8) { + __ eor(as_FloatRegister($dst$$reg), __ T16B, + as_FloatRegister($src$$reg), + as_FloatRegister($src$$reg)); + } else { + __ ushr(as_FloatRegister($dst$$reg), __ T16B, + as_FloatRegister($src$$reg), -sh & 7); + } + %} + ins_pipe(pipe_class_default); +%} + +instruct vsll8S(vecX dst, vecX src, vecX shift) %{ + match(Set dst (LShiftVS src shift)); + match(Set dst (RShiftVS src shift)); + ins_cost(INSN_COST); + format %{ "sshl $dst,$src,$shift\t# vector (8H)" %} + ins_encode %{ + __ sshl(as_FloatRegister($dst$$reg), __ T8H, + as_FloatRegister($src$$reg), + as_FloatRegister($shift$$reg)); + %} + ins_pipe(pipe_class_default); +%} + +instruct vsrl8S(vecX dst, vecX src, vecX shift) %{ + match(Set dst (URShiftVS src shift)); + ins_cost(INSN_COST); + format %{ "ushl $dst,$src,$shift\t# vector (8H)" %} + ins_encode %{ + __ ushl(as_FloatRegister($dst$$reg), __ T8H, + as_FloatRegister($src$$reg), + as_FloatRegister($shift$$reg)); + %} + ins_pipe(pipe_class_default); +%} + +instruct vsll8S_imm(vecX dst, vecX src, immI shift) %{ + match(Set dst (LShiftVS src shift)); + ins_cost(INSN_COST); + format %{ "shl $dst, $src, $shift\t# vector (8H)" %} + ins_encode %{ + int sh = (int)$shift$$constant & 31; + if (sh >= 16) { + __ eor(as_FloatRegister($dst$$reg), __ T16B, + as_FloatRegister($src$$reg), + as_FloatRegister($src$$reg)); + } else { + __ shl(as_FloatRegister($dst$$reg), __ T8H, + as_FloatRegister($src$$reg), sh); + } + %} + ins_pipe(pipe_class_default); +%} + +instruct vsra8S_imm(vecX dst, vecX src, immI shift) %{ + match(Set dst (RShiftVS src shift)); + ins_cost(INSN_COST); + format %{ "sshr $dst, $src, $shift\t# vector (8H)" %} + ins_encode %{ + int sh = (int)$shift$$constant & 31; + if (sh >= 16) sh = 15; + sh = -sh & 15; + __ sshr(as_FloatRegister($dst$$reg), __ T8H, + as_FloatRegister($src$$reg), sh); + %} + ins_pipe(pipe_class_default); +%} + +instruct vsrl8S_imm(vecX dst, vecX src, immI shift) %{ + match(Set dst (URShiftVS src shift)); + ins_cost(INSN_COST); + format %{ "ushr $dst, $src, $shift\t# vector (8H)" %} + ins_encode %{ + int sh = (int)$shift$$constant & 31; + if (sh >= 16) { + __ eor(as_FloatRegister($dst$$reg), __ T16B, + as_FloatRegister($src$$reg), + as_FloatRegister($src$$reg)); + } else { + __ ushr(as_FloatRegister($dst$$reg), __ T8H, + as_FloatRegister($src$$reg), -sh & 15); + } + %} + ins_pipe(pipe_class_default); +%} + +instruct vsll4I(vecX dst, vecX src, vecX shift) %{ + match(Set dst (LShiftVI src shift)); + match(Set dst (RShiftVI src shift)); + ins_cost(INSN_COST); + format %{ "sshl $dst,$src,$shift\t# vector (4S)" %} + ins_encode %{ + __ sshl(as_FloatRegister($dst$$reg), __ T4S, + as_FloatRegister($src$$reg), + as_FloatRegister($shift$$reg)); + %} + ins_pipe(pipe_class_default); +%} + +instruct vsrl4I(vecX dst, vecX src, vecX shift) %{ + match(Set dst (URShiftVI src shift)); + ins_cost(INSN_COST); + format %{ "ushl $dst,$src,$shift\t# vector (4S)" %} + ins_encode %{ + __ ushl(as_FloatRegister($dst$$reg), __ T4S, + as_FloatRegister($src$$reg), + as_FloatRegister($shift$$reg)); + %} + ins_pipe(pipe_class_default); +%} + +instruct vsll4I_imm(vecX dst, vecX src, immI shift) %{ + match(Set dst (LShiftVI src shift)); + ins_cost(INSN_COST); + format %{ "shl $dst, $src, $shift\t# vector (4S)" %} + ins_encode %{ + __ shl(as_FloatRegister($dst$$reg), __ T4S, + as_FloatRegister($src$$reg), + (int)$shift$$constant & 31); + %} + ins_pipe(pipe_class_default); +%} + +instruct vsra4I_imm(vecX dst, vecX src, immI shift) %{ + match(Set dst (RShiftVI src shift)); + ins_cost(INSN_COST); + format %{ "sshr $dst, $src, $shift\t# vector (4S)" %} + ins_encode %{ + __ sshr(as_FloatRegister($dst$$reg), __ T4S, + as_FloatRegister($src$$reg), + -(int)$shift$$constant & 31); + %} + ins_pipe(pipe_class_default); +%} + +instruct vsrl4I_imm(vecX dst, vecX src, immI shift) %{ + match(Set dst (URShiftVI src shift)); + ins_cost(INSN_COST); + format %{ "ushr $dst, $src, $shift\t# vector (4S)" %} + ins_encode %{ + __ ushr(as_FloatRegister($dst$$reg), __ T4S, + as_FloatRegister($src$$reg), + -(int)$shift$$constant & 31); + %} + ins_pipe(pipe_class_default); +%} + +instruct vsll2L(vecX dst, vecX src, vecX shift) %{ + match(Set dst (LShiftVL src shift)); + match(Set dst (RShiftVL src shift)); + ins_cost(INSN_COST); + format %{ "sshl $dst,$src,$shift\t# vector (2D)" %} + ins_encode %{ + __ sshl(as_FloatRegister($dst$$reg), __ T2D, + as_FloatRegister($src$$reg), + as_FloatRegister($shift$$reg)); + %} + ins_pipe(pipe_class_default); +%} + +instruct vsrl2L(vecX dst, vecX src, vecX shift) %{ + match(Set dst (URShiftVL src shift)); + ins_cost(INSN_COST); + format %{ "ushl $dst,$src,$shift\t# vector (2D)" %} + ins_encode %{ + __ ushl(as_FloatRegister($dst$$reg), __ T2D, + as_FloatRegister($src$$reg), + as_FloatRegister($shift$$reg)); + %} + ins_pipe(pipe_class_default); +%} + +instruct vsll2L_imm(vecX dst, vecX src, immI shift) %{ + match(Set dst (LShiftVL src shift)); + ins_cost(INSN_COST); + format %{ "shl $dst, $src, $shift\t# vector (2D)" %} + ins_encode %{ + __ shl(as_FloatRegister($dst$$reg), __ T2D, + as_FloatRegister($src$$reg), + (int)$shift$$constant & 63); + %} + ins_pipe(pipe_class_default); +%} + +instruct vsra2L_imm(vecX dst, vecX src, immI shift) %{ + match(Set dst (RShiftVL src shift)); + ins_cost(INSN_COST); + format %{ "sshr $dst, $src, $shift\t# vector (2D)" %} + ins_encode %{ + __ sshr(as_FloatRegister($dst$$reg), __ T2D, + as_FloatRegister($src$$reg), + -(int)$shift$$constant & 63); + %} + ins_pipe(pipe_class_default); +%} + +instruct vsrl2L_imm(vecX dst, vecX src, immI shift) %{ + match(Set dst (URShiftVL src shift)); + ins_cost(INSN_COST); + format %{ "ushr $dst, $src, $shift\t# vector (2D)" %} + ins_encode %{ + __ ushr(as_FloatRegister($dst$$reg), __ T2D, + as_FloatRegister($src$$reg), + -(int)$shift$$constant & 63); + %} + ins_pipe(pipe_class_default); +%} //----------PEEPHOLE RULES----------------------------------------------------- // These must follow all instruction definitions as they use the names diff --git a/hotspot/src/cpu/aarch64/vm/assembler_aarch64.hpp b/hotspot/src/cpu/aarch64/vm/assembler_aarch64.hpp index 56c1df1facf..5150a172f48 100644 --- a/hotspot/src/cpu/aarch64/vm/assembler_aarch64.hpp +++ b/hotspot/src/cpu/aarch64/vm/assembler_aarch64.hpp @@ -466,6 +466,11 @@ class Address VALUE_OBJ_CLASS_SPEC { case base_plus_offset: { unsigned size = i->get(31, 30); + if (i->get(26, 26) && i->get(23, 23)) { + // SIMD Q Type - Size = 128 bits + assert(size == 0, "bad size"); + size = 0b100; + } unsigned mask = (1 << size) - 1; if (_offset < 0 || _offset & mask) { @@ -1888,9 +1893,18 @@ public: }; enum SIMD_RegVariant { - S32, D64, Q128 + B, H, S, D, Q }; +#define INSN(NAME, op) \ + void NAME(FloatRegister Rt, SIMD_RegVariant T, const Address &adr) { \ + ld_st2((Register)Rt, adr, (int)T & 3, op + ((T==Q) ? 0b10:0b00), 1); \ + } \ + + INSN(ldr, 1); + INSN(str, 0); + +#undef INSN private: @@ -1997,27 +2011,87 @@ public: rf(Vm, 16), f(0b000111, 15, 10), rf(Vn, 5), rf(Vd, 0); \ } - INSN(eor, 0b101110001); - INSN(orr, 0b001110101); + INSN(eor, 0b101110001); + INSN(orr, 0b001110101); INSN(andr, 0b001110001); - INSN(bic, 0b001110011); - INSN(bif, 0b101110111); - INSN(bit, 0b101110101); - INSN(bsl, 0b101110011); - INSN(orn, 0b001110111); + INSN(bic, 0b001110011); + INSN(bif, 0b101110111); + INSN(bit, 0b101110101); + INSN(bsl, 0b101110011); + INSN(orn, 0b001110111); #undef INSN -#define INSN(NAME, opc) \ +#define INSN(NAME, opc, opc2) \ void NAME(FloatRegister Vd, SIMD_Arrangement T, FloatRegister Vn, FloatRegister Vm) { \ starti; \ f(0, 31), f((int)T & 1, 30), f(opc, 29), f(0b01110, 28, 24); \ - f((int)T >> 1, 23, 22), f(1, 21), rf(Vm, 16), f(0b100001, 15, 10); \ + f((int)T >> 1, 23, 22), f(1, 21), rf(Vm, 16), f(opc2, 15, 10); \ rf(Vn, 5), rf(Vd, 0); \ } - INSN(addv, 0); - INSN(subv, 1); + INSN(addv, 0, 0b100001); + INSN(subv, 1, 0b100001); + INSN(mulv, 0, 0b100111); + INSN(sshl, 0, 0b010001); + INSN(ushl, 1, 0b010001); + +#undef INSN + +#define INSN(NAME, opc, opc2) \ + void NAME(FloatRegister Vd, SIMD_Arrangement T, FloatRegister Vn) { \ + starti; \ + f(0, 31), f((int)T & 1, 30), f(opc, 29), f(0b01110, 28, 24); \ + f((int)T >> 1, 23, 22), f(opc2, 21, 10); \ + rf(Vn, 5), rf(Vd, 0); \ + } + + INSN(absr, 0, 0b100000101110); + INSN(negr, 1, 0b100000101110); + INSN(notr, 1, 0b100000010110); + INSN(addv, 0, 0b110001101110); + +#undef INSN + +#define INSN(NAME, op0, cmode0) \ + void NAME(FloatRegister Vd, SIMD_Arrangement T, unsigned imm8, unsigned lsl = 0) { \ + unsigned cmode = cmode0; \ + unsigned op = op0; \ + starti; \ + assert(lsl == 0 || \ + ((T == T4H || T == T8H) && lsl == 8) || \ + ((T == T2S || T == T4S) && ((lsl >> 3) < 4)), "invalid shift"); \ + cmode |= lsl >> 2; \ + if (T == T4H || T == T8H) cmode |= 0b1000; \ + if (!(T == T4H || T == T8H || T == T2S || T == T4S)) { \ + assert(op == 0 && cmode0 == 0, "must be MOVI"); \ + cmode = 0b1110; \ + if (T == T1D || T == T2D) op = 1; \ + } \ + f(0, 31), f((int)T & 1, 30), f(op, 29), f(0b0111100000, 28, 19); \ + f(imm8 >> 5, 18, 16), f(cmode, 15, 12), f(0x01, 11, 10), f(imm8 & 0b11111, 9, 5); \ + rf(Vd, 0); \ + } + + INSN(movi, 0, 0); + INSN(orri, 0, 1); + INSN(mvni, 1, 0); + INSN(bici, 1, 1); + +#undef INSN + +#define INSN(NAME, op1, op2, op3) \ + void NAME(FloatRegister Vd, SIMD_Arrangement T, FloatRegister Vn, FloatRegister Vm) { \ + starti; \ + assert(T == T2S || T == T4S || T == T2D, "invalid arrangement"); \ + f(0, 31), f((int)T & 1, 30), f(op1, 29), f(0b01110, 28, 24), f(op2, 23); \ + f(T==T2D ? 1:0, 22); f(1, 21), rf(Vm, 16), f(op3, 15, 10), rf(Vn, 5), rf(Vd, 0); \ + } + + INSN(fadd, 0, 0, 0b110101); + INSN(fdiv, 1, 0, 0b111111); + INSN(fmul, 1, 0, 0b110111); + INSN(fsub, 0, 1, 0b110101); #undef INSN @@ -2064,19 +2138,40 @@ public: #undef INSN - void shl(FloatRegister Vd, FloatRegister Vn, SIMD_Arrangement T, int shift){ + void ins(FloatRegister Vd, SIMD_RegVariant T, FloatRegister Vn, int didx, int sidx) { starti; - /* The encodings for the immh:immb fields (bits 22:16) are - * 0001 xxx 8B/16B, shift = xxx - * 001x xxx 4H/8H, shift = xxxx - * 01xx xxx 2S/4S, shift = xxxxx - * 1xxx xxx 1D/2D, shift = xxxxxx (1D is RESERVED) - */ - assert((1 << ((T>>1)+3)) > shift, "Invalid Shift value"); - f(0, 31), f(T & 1, 30), f(0b0011110, 29, 23), f((1 << ((T>>1)+3))|shift, 22, 16); - f(0b010101, 15, 10), rf(Vn, 5), rf(Vd, 0); + assert(T != Q, "invalid register variant"); + f(0b01101110000, 31, 21), f(((didx<<1)|1)<<(int)T, 20, 16), f(0, 15); + f(sidx<<(int)T, 14, 11), f(1, 10), rf(Vn, 5), rf(Vd, 0); } + void umov(Register Rd, FloatRegister Vn, SIMD_RegVariant T, int idx) { + starti; + f(0, 31), f(T==D ? 1:0, 30), f(0b001110000, 29, 21); + f(((idx<<1)|1)<<(int)T, 20, 16), f(0b001111, 15, 10); + rf(Vn, 5), rf(Rd, 0); + } + +#define INSN(NAME, opc, opc2) \ + void NAME(FloatRegister Vd, SIMD_Arrangement T, FloatRegister Vn, int shift){ \ + starti; \ + /* The encodings for the immh:immb fields (bits 22:16) are \ + * 0001 xxx 8B/16B, shift = xxx \ + * 001x xxx 4H/8H, shift = xxxx \ + * 01xx xxx 2S/4S, shift = xxxxx \ + * 1xxx xxx 1D/2D, shift = xxxxxx (1D is RESERVED) \ + */ \ + assert((1 << ((T>>1)+3)) > shift, "Invalid Shift value"); \ + f(0, 31), f(T & 1, 30), f(opc, 29), f(0b011110, 28, 23), \ + f((1 << ((T>>1)+3))|shift, 22, 16); f(opc2, 15, 10), rf(Vn, 5), rf(Vd, 0); \ + } + + INSN(shl, 0, 0b010101); + INSN(sshr, 0, 0b000001); + INSN(ushr, 1, 0b000001); + +#undef INSN + void ushll(FloatRegister Vd, SIMD_Arrangement Ta, FloatRegister Vn, SIMD_Arrangement Tb, int shift) { starti; /* The encodings for the immh:immb fields (bits 22:16) are @@ -2149,6 +2244,23 @@ public: rf(Vn, 5), rf(Vd, 0); } + void dup(FloatRegister Vd, SIMD_Arrangement T, Register Xs) + { + starti; + assert(T != T1D, "reserved encoding"); + f(0,31), f((int)T & 1, 30), f(0b001110000, 29, 21); + f((1 << (T >> 1)), 20, 16), f(0b000011, 15, 10), rf(Xs, 5), rf(Vd, 0); + } + + void dup(FloatRegister Vd, SIMD_Arrangement T, FloatRegister Vn, int index = 0) + { + starti; + assert(T != T1D, "reserved encoding"); + f(0, 31), f((int)T & 1, 30), f(0b001110000, 29, 21); + f(((1 << (T >> 1)) | (index << ((T >> 1) + 1))), 20, 16); + f(0b000001, 15, 10), rf(Vn, 5), rf(Vd, 0); + } + // CRC32 instructions #define INSN(NAME, sf, sz) \ void NAME(Register Rd, Register Rn, Register Rm) { \ diff --git a/hotspot/src/cpu/aarch64/vm/macroAssembler_aarch64.cpp b/hotspot/src/cpu/aarch64/vm/macroAssembler_aarch64.cpp index 1bc3643e1c4..2440ecb3c7f 100644 --- a/hotspot/src/cpu/aarch64/vm/macroAssembler_aarch64.cpp +++ b/hotspot/src/cpu/aarch64/vm/macroAssembler_aarch64.cpp @@ -2802,8 +2802,8 @@ void MacroAssembler::kernel_crc32(Register crc, Register buf, Register len, uzp2(v21, v20, v16, T2D); eor(v20, T16B, v17, v21); - shl(v16, v28, T2D, 1); - shl(v17, v20, T2D, 1); + shl(v16, T2D, v28, 1); + shl(v17, T2D, v20, 1); eor(v0, T16B, v0, v16); eor(v1, T16B, v1, v17); diff --git a/hotspot/src/cpu/aarch64/vm/macroAssembler_aarch64.hpp b/hotspot/src/cpu/aarch64/vm/macroAssembler_aarch64.hpp index 9378440d849..6c47590716b 100644 --- a/hotspot/src/cpu/aarch64/vm/macroAssembler_aarch64.hpp +++ b/hotspot/src/cpu/aarch64/vm/macroAssembler_aarch64.hpp @@ -37,6 +37,7 @@ class MacroAssembler: public Assembler { friend class LIR_Assembler; using Assembler::mov; + using Assembler::movi; protected: @@ -464,6 +465,45 @@ public: void movptr(Register r, uintptr_t imm64); + // Macro to mov replicated immediate to vector register. + // Where imm32 == hex abcdefgh, Vd will get the following values + // for different arrangements in T + // T8B: Vd = ghghghghghghghgh + // T16B: Vd = ghghghghghghghghghghghghghghghgh + // T4H: Vd = efghefghefghefgh + // T8H: Vd = efghefghefghefghefghefghefghefgh + // T2S: Vd = abcdefghabcdefgh + // T4S: Vd = abcdefghabcdefghabcdefghabcdefgh + // T1D/T2D: invalid + void mov(FloatRegister Vd, SIMD_Arrangement T, u_int32_t imm32) { + assert(T != T1D && T != T2D, "invalid arrangement"); + u_int32_t nimm32 = ~imm32; + if (T == T8B || T == T16B) { imm32 &= 0xff; nimm32 &= 0xff; } + if (T == T4H || T == T8H) { imm32 &= 0xffff; nimm32 &= 0xffff; } + u_int32_t x = imm32; + int movi_cnt = 0; + int movn_cnt = 0; + while (x) { if (x & 0xff) movi_cnt++; x >>= 8; } + x = nimm32; + while (x) { if (x & 0xff) movn_cnt++; x >>= 8; } + if (movn_cnt < movi_cnt) imm32 = nimm32; + unsigned lsl = 0; + while (imm32 && (imm32 & 0xff) == 0) { lsl += 8; imm32 >>= 8; } + if (movn_cnt < movi_cnt) + mvni(Vd, T, imm32 & 0xff, lsl); + else + movi(Vd, T, imm32 & 0xff, lsl); + imm32 >>= 8; lsl += 8; + while (imm32) { + while ((imm32 & 0xff) == 0) { lsl += 8; imm32 >>= 8; } + if (movn_cnt < movi_cnt) + bici(Vd, T, imm32 & 0xff, lsl); + else + orri(Vd, T, imm32 & 0xff, lsl); + lsl += 8; imm32 >>= 8; + } + } + // macro instructions for accessing and updating floating point // status register // diff --git a/hotspot/src/cpu/aarch64/vm/register_aarch64.hpp b/hotspot/src/cpu/aarch64/vm/register_aarch64.hpp index 1e22e935278..762b8b0f1ce 100644 --- a/hotspot/src/cpu/aarch64/vm/register_aarch64.hpp +++ b/hotspot/src/cpu/aarch64/vm/register_aarch64.hpp @@ -186,7 +186,7 @@ class ConcreteRegisterImpl : public AbstractRegisterImpl { // it's optoregs. number_of_registers = (2 * RegisterImpl::number_of_registers + - 2 * FloatRegisterImpl::number_of_registers + + 4 * FloatRegisterImpl::number_of_registers + 1) // flags }; From f9895116414b192dcd99a0808749f363e6736904 Mon Sep 17 00:00:00 2001 From: David Katleman Date: Thu, 4 Jun 2015 09:31:45 -0700 Subject: [PATCH 25/26] Added tag jdk9-b67 for changeset b0bcdde43c19 --- .hgtags-top-repo | 1 + 1 file changed, 1 insertion(+) diff --git a/.hgtags-top-repo b/.hgtags-top-repo index 70c739c4e65..3b7f5316ae8 100644 --- a/.hgtags-top-repo +++ b/.hgtags-top-repo @@ -309,3 +309,4 @@ ea38728b4f4bdd8fd0d7a89b18069f521cf05013 jdk9-b61 82cf9aab9a83e41c8194ba01af9666afdb856cbe jdk9-b64 7c31f9d7b932f7924f1258d52885b1c7c3e078c2 jdk9-b65 dc6e8336f51bb6b67b7245766179eab5ca7720b4 jdk9-b66 +f546760134eb861fcfecd4ce611b0040b0d25a6a jdk9-b67 From c051a3bcfb5a17343f41fda8b52ad016484b53f6 Mon Sep 17 00:00:00 2001 From: David Katleman Date: Thu, 4 Jun 2015 09:31:46 -0700 Subject: [PATCH 26/26] Added tag jdk9-b67 for changeset b83f001a855d --- hotspot/.hgtags | 1 + 1 file changed, 1 insertion(+) diff --git a/hotspot/.hgtags b/hotspot/.hgtags index 7ccce8f8bd5..57051d33f64 100644 --- a/hotspot/.hgtags +++ b/hotspot/.hgtags @@ -469,3 +469,4 @@ ee878f3d6732856f7725c590312bfbe2ffa52cc7 jdk9-b58 bf92b8db249cdfa5651ef954b6c0743a7e0ea4cd jdk9-b64 e7ae94c4f35e940ea423fc1dd260435df34a77c0 jdk9-b65 197e94e0dacddd16816f101d24fc0442ab518326 jdk9-b66 +d47dfabd16d48eb96a451edd1b61194a39ee0eb5 jdk9-b67