From 72726c41829b33fd2baf5b3604cab49d39489dd2 Mon Sep 17 00:00:00 2001 From: Eric Liu Date: Wed, 20 Apr 2022 00:55:56 +0000 Subject: [PATCH] 8284563: AArch64: bitperm feature detection for SVE2 on Linux Reviewed-by: aph, njian --- .../cpu/aarch64/macroAssembler_aarch64.cpp | 14 +++--- .../cpu/aarch64/macroAssembler_aarch64.hpp | 2 +- .../cpu/aarch64/vm_version_aarch64.cpp | 47 +++++++++++-------- .../cpu/aarch64/vm_version_aarch64.hpp | 43 ++++++++++------- .../vm_version_linux_aarch64.cpp | 9 +++- .../src/jdk/vm/ci/aarch64/AArch64.java | 1 + 6 files changed, 69 insertions(+), 47 deletions(-) diff --git a/src/hotspot/cpu/aarch64/macroAssembler_aarch64.cpp b/src/hotspot/cpu/aarch64/macroAssembler_aarch64.cpp index 00b893d3a7d..33db31d5705 100644 --- a/src/hotspot/cpu/aarch64/macroAssembler_aarch64.cpp +++ b/src/hotspot/cpu/aarch64/macroAssembler_aarch64.cpp @@ -1575,7 +1575,7 @@ void MacroAssembler::atomic_incw(Register counter_addr, Register tmp, Register t return; } Label retry_load; - if ((VM_Version::features() & VM_Version::CPU_STXR_PREFETCH)) + if (VM_Version::supports_stxr_prefetch()) prfm(Address(counter_addr), PSTL1STRM); bind(retry_load); // flush and load exclusive from the memory location @@ -2297,7 +2297,7 @@ void MacroAssembler::cmpxchgptr(Register oldv, Register newv, Register addr, Reg membar(AnyAny); } else { Label retry_load, nope; - if ((VM_Version::features() & VM_Version::CPU_STXR_PREFETCH)) + if (VM_Version::supports_stxr_prefetch()) prfm(Address(addr), PSTL1STRM); bind(retry_load); // flush and load exclusive from the memory location @@ -2340,7 +2340,7 @@ void MacroAssembler::cmpxchgw(Register oldv, Register newv, Register addr, Regis membar(AnyAny); } else { Label retry_load, nope; - if ((VM_Version::features() & VM_Version::CPU_STXR_PREFETCH)) + if (VM_Version::supports_stxr_prefetch()) prfm(Address(addr), PSTL1STRM); bind(retry_load); // flush and load exclusive from the memory location @@ -2382,7 +2382,7 @@ void MacroAssembler::cmpxchg(Register addr, Register expected, compare_eq(result, expected, size); } else { Label retry_load, done; - if ((VM_Version::features() & VM_Version::CPU_STXR_PREFETCH)) + if (VM_Version::supports_stxr_prefetch()) prfm(Address(addr), PSTL1STRM); bind(retry_load); load_exclusive(result, addr, size, acquire); @@ -2441,7 +2441,7 @@ void MacroAssembler::atomic_##NAME(Register prev, RegisterOrConstant incr, Regis result = different(prev, incr, addr) ? prev : rscratch2; \ \ Label retry_load; \ - if ((VM_Version::features() & VM_Version::CPU_STXR_PREFETCH)) \ + if (VM_Version::supports_stxr_prefetch()) \ prfm(Address(addr), PSTL1STRM); \ bind(retry_load); \ LDXR(result, addr); \ @@ -2472,7 +2472,7 @@ void MacroAssembler::atomic_##OP(Register prev, Register newv, Register addr) { result = different(prev, newv, addr) ? prev : rscratch2; \ \ Label retry_load; \ - if ((VM_Version::features() & VM_Version::CPU_STXR_PREFETCH)) \ + if (VM_Version::supports_stxr_prefetch()) \ prfm(Address(addr), PSTL1STRM); \ bind(retry_load); \ LDXR(result, addr); \ @@ -5210,7 +5210,7 @@ void MacroAssembler::cache_wb(Address line) { assert(line.offset() == 0, "offset should be 0"); // would like to assert this // assert(line._ext.shift == 0, "shift should be zero"); - if (VM_Version::features() & VM_Version::CPU_DCPOP) { + if (VM_Version::supports_dcpop()) { // writeback using clear virtual address to point of persistence dc(Assembler::CVAP, line.base()); } else { diff --git a/src/hotspot/cpu/aarch64/macroAssembler_aarch64.hpp b/src/hotspot/cpu/aarch64/macroAssembler_aarch64.hpp index 2fa2ef61f36..4693e9bdc98 100644 --- a/src/hotspot/cpu/aarch64/macroAssembler_aarch64.hpp +++ b/src/hotspot/cpu/aarch64/macroAssembler_aarch64.hpp @@ -432,7 +432,7 @@ class MacroAssembler: public Assembler { #define WRAP(INSN) \ void INSN(Register Rd, Register Rn, Register Rm, Register Ra) { \ - if ((VM_Version::features() & VM_Version::CPU_A53MAC) && Ra != zr) \ + if (VM_Version::supports_a53mac() && Ra != zr) \ nop(); \ Assembler::INSN(Rd, Rn, Rm, Ra); \ } diff --git a/src/hotspot/cpu/aarch64/vm_version_aarch64.cpp b/src/hotspot/cpu/aarch64/vm_version_aarch64.cpp index d2a573ac63b..07979998ff0 100644 --- a/src/hotspot/cpu/aarch64/vm_version_aarch64.cpp +++ b/src/hotspot/cpu/aarch64/vm_version_aarch64.cpp @@ -124,7 +124,7 @@ void VM_Version::initialize() { // if dcpop is available publish data cache line flush size via // generic field, otherwise let if default to zero thereby // disabling writeback - if (_features & CPU_DCPOP) { + if (VM_Version::supports_dcpop()) { _data_cache_line_flush_size = dcache_line; } } @@ -226,17 +226,17 @@ void VM_Version::initialize() { char buf[512]; sprintf(buf, "0x%02x:0x%x:0x%03x:%d", _cpu, _variant, _model, _revision); if (_model2) sprintf(buf+strlen(buf), "(0x%03x)", _model2); -#define ADD_FEATURE_IF_SUPPORTED(id, name, bit) if (_features & CPU_##id) strcat(buf, ", " name); +#define ADD_FEATURE_IF_SUPPORTED(id, name, bit) if (VM_Version::supports_##name()) strcat(buf, ", " #name); CPU_FEATURE_FLAGS(ADD_FEATURE_IF_SUPPORTED) #undef ADD_FEATURE_IF_SUPPORTED _features_string = os::strdup(buf); if (FLAG_IS_DEFAULT(UseCRC32)) { - UseCRC32 = (_features & CPU_CRC32) != 0; + UseCRC32 = VM_Version::supports_crc32(); } - if (UseCRC32 && (_features & CPU_CRC32) == 0) { + if (UseCRC32 && !VM_Version::supports_crc32()) { warning("UseCRC32 specified, but not supported on this CPU"); FLAG_SET_DEFAULT(UseCRC32, false); } @@ -250,7 +250,7 @@ void VM_Version::initialize() { FLAG_SET_DEFAULT(UseVectorizedMismatchIntrinsic, false); } - if (_features & CPU_LSE) { + if (VM_Version::supports_lse()) { if (FLAG_IS_DEFAULT(UseLSE)) FLAG_SET_DEFAULT(UseLSE, true); } else { @@ -260,7 +260,7 @@ void VM_Version::initialize() { } } - if (_features & CPU_AES) { + if (VM_Version::supports_aes()) { UseAES = UseAES || FLAG_IS_DEFAULT(UseAES); UseAESIntrinsics = UseAESIntrinsics || (UseAES && FLAG_IS_DEFAULT(UseAESIntrinsics)); @@ -291,7 +291,7 @@ void VM_Version::initialize() { UseCRC32Intrinsics = true; } - if (_features & CPU_CRC32) { + if (VM_Version::supports_crc32()) { if (FLAG_IS_DEFAULT(UseCRC32CIntrinsics)) { FLAG_SET_DEFAULT(UseCRC32CIntrinsics, true); } @@ -308,7 +308,8 @@ void VM_Version::initialize() { UseMD5Intrinsics = true; } - if (_features & (CPU_SHA1 | CPU_SHA2 | CPU_SHA3 | CPU_SHA512)) { + if (VM_Version::supports_sha1() || VM_Version::supports_sha2() || + VM_Version::supports_sha3() || VM_Version::supports_sha512()) { if (FLAG_IS_DEFAULT(UseSHA)) { FLAG_SET_DEFAULT(UseSHA, true); } @@ -317,7 +318,7 @@ void VM_Version::initialize() { FLAG_SET_DEFAULT(UseSHA, false); } - if (UseSHA && (_features & CPU_SHA1)) { + if (UseSHA && VM_Version::supports_sha1()) { if (FLAG_IS_DEFAULT(UseSHA1Intrinsics)) { FLAG_SET_DEFAULT(UseSHA1Intrinsics, true); } @@ -326,7 +327,7 @@ void VM_Version::initialize() { FLAG_SET_DEFAULT(UseSHA1Intrinsics, false); } - if (UseSHA && (_features & CPU_SHA2)) { + if (UseSHA && VM_Version::supports_sha2()) { if (FLAG_IS_DEFAULT(UseSHA256Intrinsics)) { FLAG_SET_DEFAULT(UseSHA256Intrinsics, true); } @@ -335,7 +336,7 @@ void VM_Version::initialize() { FLAG_SET_DEFAULT(UseSHA256Intrinsics, false); } - if (UseSHA && (_features & CPU_SHA3)) { + if (UseSHA && VM_Version::supports_sha3()) { // Do not auto-enable UseSHA3Intrinsics until it has been fully tested on hardware // if (FLAG_IS_DEFAULT(UseSHA3Intrinsics)) { // FLAG_SET_DEFAULT(UseSHA3Intrinsics, true); @@ -345,7 +346,7 @@ void VM_Version::initialize() { FLAG_SET_DEFAULT(UseSHA3Intrinsics, false); } - if (UseSHA && (_features & CPU_SHA512)) { + if (UseSHA && VM_Version::supports_sha512()) { // Do not auto-enable UseSHA512Intrinsics until it has been fully tested on hardware // if (FLAG_IS_DEFAULT(UseSHA512Intrinsics)) { // FLAG_SET_DEFAULT(UseSHA512Intrinsics, true); @@ -359,7 +360,7 @@ void VM_Version::initialize() { FLAG_SET_DEFAULT(UseSHA, false); } - if (_features & CPU_PMULL) { + if (VM_Version::supports_pmull()) { if (FLAG_IS_DEFAULT(UseGHASHIntrinsics)) { FLAG_SET_DEFAULT(UseGHASHIntrinsics, true); } @@ -384,18 +385,26 @@ void VM_Version::initialize() { FLAG_SET_DEFAULT(UseBlockZeroing, false); } - if (_features & CPU_SVE) { + if (VM_Version::supports_sve2()) { if (FLAG_IS_DEFAULT(UseSVE)) { - FLAG_SET_DEFAULT(UseSVE, (_features & CPU_SVE2) ? 2 : 1); + FLAG_SET_DEFAULT(UseSVE, 2); } - if (UseSVE > 0) { - _initial_sve_vector_length = get_current_sve_vector_length(); + } else if (VM_Version::supports_sve()) { + if (FLAG_IS_DEFAULT(UseSVE)) { + FLAG_SET_DEFAULT(UseSVE, 1); + } else if (UseSVE > 1) { + warning("SVE2 specified, but not supported on current CPU. Using SVE."); + FLAG_SET_DEFAULT(UseSVE, 1); } } else if (UseSVE > 0) { warning("UseSVE specified, but not supported on current CPU. Disabling SVE."); FLAG_SET_DEFAULT(UseSVE, 0); } + if (UseSVE > 0) { + _initial_sve_vector_length = get_current_sve_vector_length(); + } + // This machine allows unaligned memory accesses if (FLAG_IS_DEFAULT(UseUnalignedAccesses)) { FLAG_SET_DEFAULT(UseUnalignedAccesses, true); @@ -416,14 +425,14 @@ void VM_Version::initialize() { _rop_protection = false; // Enable PAC if this code has been built with branch-protection and the CPU/OS supports it. #ifdef __ARM_FEATURE_PAC_DEFAULT - if ((_features & CPU_PACA) != 0) { + if (VM_Version::supports_paca()) { _rop_protection = true; } #endif } else if (strcmp(UseBranchProtection, "pac-ret") == 0) { _rop_protection = true; #ifdef __ARM_FEATURE_PAC_DEFAULT - if ((_features & CPU_PACA) == 0) { + if (!VM_Version::supports_paca()) { warning("ROP-protection specified, but not supported on this CPU."); // Disable PAC to prevent illegal instruction crashes. _rop_protection = false; diff --git a/src/hotspot/cpu/aarch64/vm_version_aarch64.hpp b/src/hotspot/cpu/aarch64/vm_version_aarch64.hpp index e979f62b926..f5f400c39ac 100644 --- a/src/hotspot/cpu/aarch64/vm_version_aarch64.hpp +++ b/src/hotspot/cpu/aarch64/vm_version_aarch64.hpp @@ -100,32 +100,39 @@ public: CPU_APPLE = 'a', }; - enum Feature_Flag { #define CPU_FEATURE_FLAGS(decl) \ - decl(FP, "fp", 0) \ - decl(ASIMD, "simd", 1) \ - decl(EVTSTRM, "evtstrm", 2) \ - decl(AES, "aes", 3) \ - decl(PMULL, "pmull", 4) \ - decl(SHA1, "sha1", 5) \ - decl(SHA2, "sha256", 6) \ - decl(CRC32, "crc", 7) \ - decl(LSE, "lse", 8) \ - decl(DCPOP, "dcpop", 16) \ - decl(SHA3, "sha3", 17) \ - decl(SHA512, "sha512", 21) \ - decl(SVE, "sve", 22) \ - decl(PACA, "paca", 30) \ + decl(FP, fp, 0) \ + decl(ASIMD, simd, 1) \ + decl(EVTSTRM, evtstrm, 2) \ + decl(AES, aes, 3) \ + decl(PMULL, pmull, 4) \ + decl(SHA1, sha1, 5) \ + decl(SHA2, sha2, 6) \ + decl(CRC32, crc32, 7) \ + decl(LSE, lse, 8) \ + decl(DCPOP, dcpop, 16) \ + decl(SHA3, sha3, 17) \ + decl(SHA512, sha512, 21) \ + decl(SVE, sve, 22) \ + decl(PACA, paca, 30) \ /* flags above must follow Linux HWCAP */ \ - decl(SVE2, "sve2", 28) \ - decl(STXR_PREFETCH, "stxr_prefetch", 29) \ - decl(A53MAC, "a53mac", 31) + decl(SVEBITPERM, svebitperm, 27) \ + decl(SVE2, sve2, 28) \ + decl(STXR_PREFETCH, stxr_prefetch, 29) \ + decl(A53MAC, a53mac, 31) + enum Feature_Flag { #define DECLARE_CPU_FEATURE_FLAG(id, name, bit) CPU_##id = (1 << bit), CPU_FEATURE_FLAGS(DECLARE_CPU_FEATURE_FLAG) #undef DECLARE_CPU_FEATURE_FLAG }; + // Feature identification +#define CPU_FEATURE_DETECTION(id, name, bit) \ + static bool supports_##name() { return (_features & CPU_##id) != 0; }; + CPU_FEATURE_FLAGS(CPU_FEATURE_DETECTION) +#undef CPU_FEATURE_DETECTION + static int cpu_family() { return _cpu; } static int cpu_model() { return _model; } static int cpu_model2() { return _model2; } diff --git a/src/hotspot/os_cpu/linux_aarch64/vm_version_linux_aarch64.cpp b/src/hotspot/os_cpu/linux_aarch64/vm_version_linux_aarch64.cpp index b1080e77c90..e1ace9cd07b 100644 --- a/src/hotspot/os_cpu/linux_aarch64/vm_version_linux_aarch64.cpp +++ b/src/hotspot/os_cpu/linux_aarch64/vm_version_linux_aarch64.cpp @@ -80,6 +80,10 @@ #define HWCAP2_SVE2 (1 << 1) #endif +#ifndef HWCAP2_SVEBITPERM +#define HWCAP2_SVEBITPERM (1 << 4) +#endif + #ifndef PR_SVE_GET_VL // For old toolchains which do not have SVE related macros defined. #define PR_SVE_SET_VL 50 @@ -87,12 +91,12 @@ #endif int VM_Version::get_current_sve_vector_length() { - assert(_features & CPU_SVE, "should not call this"); + assert(VM_Version::supports_sve(), "should not call this"); return prctl(PR_SVE_GET_VL); } int VM_Version::set_and_get_current_sve_vector_length(int length) { - assert(_features & CPU_SVE, "should not call this"); + assert(VM_Version::supports_sve(), "should not call this"); int new_length = prctl(PR_SVE_SET_VL, length); return new_length; } @@ -133,6 +137,7 @@ void VM_Version::get_os_cpu_info() { HWCAP_PACA); if (auxv2 & HWCAP2_SVE2) _features |= CPU_SVE2; + if (auxv2 & HWCAP2_SVEBITPERM) _features |= CPU_SVEBITPERM; uint64_t ctr_el0; uint64_t dczid_el0; diff --git a/src/jdk.internal.vm.ci/share/classes/jdk.vm.ci.aarch64/src/jdk/vm/ci/aarch64/AArch64.java b/src/jdk.internal.vm.ci/share/classes/jdk.vm.ci.aarch64/src/jdk/vm/ci/aarch64/AArch64.java index a4b5ba3ffa5..e8009fa3e33 100644 --- a/src/jdk.internal.vm.ci/share/classes/jdk.vm.ci.aarch64/src/jdk/vm/ci/aarch64/AArch64.java +++ b/src/jdk.internal.vm.ci/share/classes/jdk.vm.ci.aarch64/src/jdk/vm/ci/aarch64/AArch64.java @@ -178,6 +178,7 @@ public class AArch64 extends Architecture { SHA512, SVE, PACA, + SVEBITPERM, SVE2, STXR_PREFETCH, A53MAC,