From e77e5da78580751dc093306902165e0607296476 Mon Sep 17 00:00:00 2001 From: Martin Doerr Date: Mon, 12 Aug 2019 19:20:12 +0200 Subject: [PATCH] 8229422: Taskqueue: Outdated selection of weak memory model platforms Reviewed-by: tschatzl, dholmes, drwhite --- src/hotspot/cpu/aarch64/globalDefinitions_aarch64.hpp | 6 ++++++ src/hotspot/cpu/arm/globalDefinitions_arm.hpp | 3 +++ src/hotspot/cpu/ppc/globalDefinitions_ppc.hpp | 4 ++-- src/hotspot/cpu/s390/globalDefinitions_s390.hpp | 2 ++ src/hotspot/cpu/sparc/globalDefinitions_sparc.hpp | 2 ++ src/hotspot/cpu/x86/globalDefinitions_x86.hpp | 2 ++ src/hotspot/share/gc/shared/taskqueue.inline.hpp | 2 +- src/hotspot/share/utilities/globalDefinitions.hpp | 9 ++++++--- 8 files changed, 24 insertions(+), 6 deletions(-) diff --git a/src/hotspot/cpu/aarch64/globalDefinitions_aarch64.hpp b/src/hotspot/cpu/aarch64/globalDefinitions_aarch64.hpp index 1b7e70dc6a2..40f75fef5df 100644 --- a/src/hotspot/cpu/aarch64/globalDefinitions_aarch64.hpp +++ b/src/hotspot/cpu/aarch64/globalDefinitions_aarch64.hpp @@ -34,6 +34,12 @@ const bool CCallingConventionRequiresIntsAsLongs = false; #define SUPPORTS_NATIVE_CX8 +// Aarch64 was not originally defined as multi-copy-atomic, but now is. +// See: "Simplifying ARM Concurrency: Multicopy-atomic Axiomatic and +// Operational Models for ARMv8" +// So we could #define CPU_MULTI_COPY_ATOMIC but historically we have +// not done so. + // According to the ARMv8 ARM, "Concurrent modification and execution // of instructions can lead to the resulting instruction performing // any behavior that can be achieved by executing any sequence of diff --git a/src/hotspot/cpu/arm/globalDefinitions_arm.hpp b/src/hotspot/cpu/arm/globalDefinitions_arm.hpp index 56143c6b9f8..a1179a53866 100644 --- a/src/hotspot/cpu/arm/globalDefinitions_arm.hpp +++ b/src/hotspot/cpu/arm/globalDefinitions_arm.hpp @@ -45,6 +45,9 @@ const bool HaveVFP = true; #define SUPPORTS_NATIVE_CX8 #endif +// arm32 is not specified as multi-copy-atomic +// So we must not #define CPU_MULTI_COPY_ATOMIC + #define STUBROUTINES_MD_HPP "stubRoutines_arm.hpp" #define INTERP_MASM_MD_HPP "interp_masm_arm.hpp" #define TEMPLATETABLE_MD_HPP "templateTable_arm.hpp" diff --git a/src/hotspot/cpu/ppc/globalDefinitions_ppc.hpp b/src/hotspot/cpu/ppc/globalDefinitions_ppc.hpp index 777b3f78685..45b4f1c74b2 100644 --- a/src/hotspot/cpu/ppc/globalDefinitions_ppc.hpp +++ b/src/hotspot/cpu/ppc/globalDefinitions_ppc.hpp @@ -41,8 +41,8 @@ const bool CCallingConventionRequiresIntsAsLongs = true; #define SUPPORTS_NATIVE_CX8 -// The PPC CPUs are NOT multiple-copy-atomic. -#define CPU_NOT_MULTIPLE_COPY_ATOMIC +// PPC64 is not specified as multi-copy-atomic +// So we must not #define CPU_MULTI_COPY_ATOMIC // The expected size in bytes of a cache line, used to pad data structures. #define DEFAULT_CACHE_LINE_SIZE 128 diff --git a/src/hotspot/cpu/s390/globalDefinitions_s390.hpp b/src/hotspot/cpu/s390/globalDefinitions_s390.hpp index 8629e322ff8..2d38f5f8c36 100644 --- a/src/hotspot/cpu/s390/globalDefinitions_s390.hpp +++ b/src/hotspot/cpu/s390/globalDefinitions_s390.hpp @@ -42,6 +42,8 @@ const int StackAlignmentInBytes = 16; #define SUPPORTS_NATIVE_CX8 +#define CPU_MULTI_COPY_ATOMIC + // Indicates whether the C calling conventions require that // 32-bit integer argument values are extended to 64 bits. // This is the case on z/Architecture. diff --git a/src/hotspot/cpu/sparc/globalDefinitions_sparc.hpp b/src/hotspot/cpu/sparc/globalDefinitions_sparc.hpp index 2f0bc10a31b..e82b39a165d 100644 --- a/src/hotspot/cpu/sparc/globalDefinitions_sparc.hpp +++ b/src/hotspot/cpu/sparc/globalDefinitions_sparc.hpp @@ -36,6 +36,8 @@ const bool CCallingConventionRequiresIntsAsLongs = true; #define SUPPORTS_NATIVE_CX8 +#define CPU_MULTI_COPY_ATOMIC + // The expected size in bytes of a cache line, used to pad data structures. #if defined(TIERED) // tiered, 64-bit, large machine diff --git a/src/hotspot/cpu/x86/globalDefinitions_x86.hpp b/src/hotspot/cpu/x86/globalDefinitions_x86.hpp index f247d30d079..11e5b34dc83 100644 --- a/src/hotspot/cpu/x86/globalDefinitions_x86.hpp +++ b/src/hotspot/cpu/x86/globalDefinitions_x86.hpp @@ -33,6 +33,8 @@ const bool CCallingConventionRequiresIntsAsLongs = false; #define SUPPORTS_NATIVE_CX8 +#define CPU_MULTI_COPY_ATOMIC + // The expected size in bytes of a cache line, used to pad data structures. #if defined(TIERED) #ifdef _LP64 diff --git a/src/hotspot/share/gc/shared/taskqueue.inline.hpp b/src/hotspot/share/gc/shared/taskqueue.inline.hpp index 15ef2ddfa5a..4f5160ec911 100644 --- a/src/hotspot/share/gc/shared/taskqueue.inline.hpp +++ b/src/hotspot/share/gc/shared/taskqueue.inline.hpp @@ -207,7 +207,7 @@ bool GenericTaskQueue::pop_global(volatile E& t) { // Architectures with weak memory model require a barrier here // to guarantee that bottom is not older than age, // which is crucial for the correctness of the algorithm. -#if !(defined SPARC || defined IA32 || defined AMD64) +#ifndef CPU_MULTI_COPY_ATOMIC OrderAccess::fence(); #endif uint localBot = OrderAccess::load_acquire(&_bottom); diff --git a/src/hotspot/share/utilities/globalDefinitions.hpp b/src/hotspot/share/utilities/globalDefinitions.hpp index 8e333f4cbbc..9779d97e731 100644 --- a/src/hotspot/share/utilities/globalDefinitions.hpp +++ b/src/hotspot/share/utilities/globalDefinitions.hpp @@ -481,10 +481,13 @@ const uint64_t KlassEncodingMetaspaceMax = (uint64_t(max_juint) + 1) << LogKlas // assure their ordering, instead of after volatile stores. // (See "A Tutorial Introduction to the ARM and POWER Relaxed Memory Models" // by Luc Maranget, Susmit Sarkar and Peter Sewell, INRIA/Cambridge) -#ifdef CPU_NOT_MULTIPLE_COPY_ATOMIC -const bool support_IRIW_for_not_multiple_copy_atomic_cpu = true; -#else +#ifdef CPU_MULTI_COPY_ATOMIC +// Not needed. const bool support_IRIW_for_not_multiple_copy_atomic_cpu = false; +#else +// From all non-multi-copy-atomic architectures, only PPC64 supports IRIW at the moment. +// Final decision is subject to JEP 188: Java Memory Model Update. +const bool support_IRIW_for_not_multiple_copy_atomic_cpu = PPC64_ONLY(true) NOT_PPC64(false); #endif // The expected size in bytes of a cache line, used to pad data structures.