8329031: CPUID feature detection for Advanced Performance Extensions (Intel® APX)

Reviewed-by: sviswanathan, kvn
This commit is contained in:
Jatin Bhateja 2024-06-09 00:47:23 +00:00
parent 8d2f9e57c3
commit a941397327
8 changed files with 185 additions and 26 deletions

View File

@ -115,6 +115,10 @@ define_pd_global(intx, InitArrayShortSize, 8*BytesPerLong);
"Highest supported AVX instructions set on x86/x64") \
range(0, 3) \
\
\
product(bool, UseAPX, false, EXPERIMENTAL, \
"Use Intel Advanced Performance Extensions") \
\
product(bool, UseKNLSetting, false, DIAGNOSTIC, \
"Control whether Knights platform setting should be used") \
\
@ -234,8 +238,6 @@ define_pd_global(intx, InitArrayShortSize, 8*BytesPerLong);
"Turn off JVM mitigations related to Intel micro code " \
"mitigations for the Intel JCC erratum") \
\
product(bool, UseAPX, false, EXPERIMENTAL, \
"Use Advanced Performance Extensions on x86") \
// end of ARCH_FLAGS
#endif // CPU_X86_GLOBALS_X86_HPP

View File

@ -56,6 +56,10 @@ const char* VM_Version::_features_names[] = { CPU_FEATURE_FLAGS(DECLARE_CPU_FEAT
address VM_Version::_cpuinfo_segv_addr = 0;
// Address of instruction after the one which causes SEGV
address VM_Version::_cpuinfo_cont_addr = 0;
// Address of instruction which causes APX specific SEGV
address VM_Version::_cpuinfo_segv_addr_apx = 0;
// Address of instruction after the one which causes APX specific SEGV
address VM_Version::_cpuinfo_cont_addr_apx = 0;
static BufferBlob* stub_blob;
static const int stub_size = 2000;
@ -63,9 +67,11 @@ static const int stub_size = 2000;
extern "C" {
typedef void (*get_cpu_info_stub_t)(void*);
typedef void (*detect_virt_stub_t)(uint32_t, uint32_t*);
typedef void (*clear_apx_test_state_t)(void);
}
static get_cpu_info_stub_t get_cpu_info_stub = nullptr;
static detect_virt_stub_t detect_virt_stub = nullptr;
static clear_apx_test_state_t clear_apx_test_state_stub = nullptr;
#ifdef _LP64
@ -102,6 +108,27 @@ class VM_Version_StubGenerator: public StubCodeGenerator {
VM_Version_StubGenerator(CodeBuffer *c) : StubCodeGenerator(c) {}
address clear_apx_test_state() {
# define __ _masm->
address start = __ pc();
// EGPRs are call clobbered registers, Explicit clearing of r16 and r31 during signal
// handling guarantees that preserved register values post signal handling were
// re-instantiated by operating system and not because they were not modified externally.
/* FIXME Uncomment following code after OS enablement of
bool save_apx = UseAPX;
VM_Version::set_apx_cpuFeatures();
UseAPX = true;
// EGPR state save/restoration.
__ mov64(r16, 0L);
__ mov64(r31, 0L);
UseAPX = save_apx;
VM_Version::clean_cpuFeatures();
*/
__ ret(0);
return start;
}
address generate_get_cpu_info() {
// Flags to test CPU type.
const uint32_t HS_EFL_AC = 0x40000;
@ -113,7 +140,8 @@ class VM_Version_StubGenerator: public StubCodeGenerator {
bool use_evex = FLAG_IS_DEFAULT(UseAVX) || (UseAVX > 2);
Label detect_486, cpu486, detect_586, std_cpuid1, std_cpuid4;
Label sef_cpuid, ext_cpuid, ext_cpuid1, ext_cpuid5, ext_cpuid7, ext_cpuid8, done, wrapup;
Label sef_cpuid, sefsl1_cpuid, ext_cpuid, ext_cpuid1, ext_cpuid5, ext_cpuid7;
Label ext_cpuid8, done, wrapup, vector_save_restore, apx_save_restore_warning;
Label legacy_setup, save_restore_except, legacy_save_restore, start_simd_check;
StubCodeMark mark(this, "VM_Version", "get_cpu_info_stub");
@ -288,7 +316,7 @@ class VM_Version_StubGenerator: public StubCodeGenerator {
__ movl(Address(rsi, 4), rdx);
//
// cpuid(0x7) Structured Extended Features
// cpuid(0x7) Structured Extended Features Enumeration Leaf.
//
__ bind(sef_cpuid);
__ movl(rax, 7);
@ -303,12 +331,16 @@ class VM_Version_StubGenerator: public StubCodeGenerator {
__ movl(Address(rsi, 8), rcx);
__ movl(Address(rsi, 12), rdx);
// ECX = 1
//
// cpuid(0x7) Structured Extended Features Enumeration Sub-Leaf 1.
//
__ bind(sefsl1_cpuid);
__ movl(rax, 7);
__ movl(rcx, 1);
__ cpuid();
__ lea(rsi, Address(rbp, in_bytes(VM_Version::sef_cpuid7_ecx1_offset())));
__ lea(rsi, Address(rbp, in_bytes(VM_Version::sefsl1_cpuid7_offset())));
__ movl(Address(rsi, 0), rax);
__ movl(Address(rsi, 4), rdx);
//
// Extended cpuid(0x80000000)
@ -387,6 +419,46 @@ class VM_Version_StubGenerator: public StubCodeGenerator {
__ movl(Address(rsi, 8), rcx);
__ movl(Address(rsi,12), rdx);
#ifndef PRODUCT
//
// Check if OS has enabled XGETBV instruction to access XCR0
// (OSXSAVE feature flag) and CPU supports APX
//
// To enable APX, check CPUID.EAX=7.ECX=1.EDX[21] bit for HW support
// and XCRO[19] bit for OS support to save/restore extended GPR state.
__ lea(rsi, Address(rbp, in_bytes(VM_Version::sefsl1_cpuid7_offset())));
__ movl(rax, 0x200000);
__ andl(rax, Address(rsi, 4));
__ cmpl(rax, 0x200000);
__ jcc(Assembler::notEqual, vector_save_restore);
// check _cpuid_info.xem_xcr0_eax.bits.apx_f
__ movl(rax, 0x80000);
__ andl(rax, Address(rbp, in_bytes(VM_Version::xem_xcr0_offset()))); // xcr0 bits apx_f
__ cmpl(rax, 0x80000);
__ jcc(Assembler::notEqual, vector_save_restore);
/* FIXME: Uncomment while integrating JDK-8329032
bool save_apx = UseAPX;
VM_Version::set_apx_cpuFeatures();
UseAPX = true;
__ mov64(r16, VM_Version::egpr_test_value());
__ mov64(r31, VM_Version::egpr_test_value());
*/
__ xorl(rsi, rsi);
VM_Version::set_cpuinfo_segv_addr_apx(__ pc());
// Generate SEGV
__ movl(rax, Address(rsi, 0));
VM_Version::set_cpuinfo_cont_addr_apx(__ pc());
/* FIXME: Uncomment after integration of JDK-8329032
__ lea(rsi, Address(rbp, in_bytes(VM_Version::apx_save_offset())));
__ movq(Address(rsi, 0), r16);
__ movq(Address(rsi, 8), r31);
UseAPX = save_apx;
*/
#endif
__ bind(vector_save_restore);
//
// Check if OS has enabled XGETBV instruction to access XCR0
// (OSXSAVE feature flag) and CPU supports AVX
@ -580,6 +652,7 @@ class VM_Version_StubGenerator: public StubCodeGenerator {
__ vmovdqu(xmm7, Address(rsp, 0));
__ addptr(rsp, 32);
#endif // _WINDOWS
generate_vzeroupper(wrapup);
VM_Version::clean_cpuFeatures();
UseAVX = saved_useavx;
@ -940,6 +1013,7 @@ void VM_Version::get_processor_features() {
FLAG_SET_DEFAULT(UseAVX, use_avx_limit);
}
}
if (UseAVX > use_avx_limit) {
if (UseSSE < 4) {
warning("UseAVX=%d requires UseSSE=4, setting it to UseAVX=0", UseAVX);
@ -963,6 +1037,16 @@ void VM_Version::get_processor_features() {
_features &= ~CPU_AVX512_VBMI2;
_features &= ~CPU_AVX512_BITALG;
_features &= ~CPU_AVX512_IFMA;
_features &= ~CPU_APX_F;
}
// Currently APX support is only enabled for targets supporting AVX512VL feature.
bool apx_supported = os_supports_apx_egprs() && supports_apx_f() && supports_avx512vl();
if (UseAPX && !apx_supported) {
warning("UseAPX is not supported on this CPU, setting it to false");
FLAG_SET_DEFAULT(UseAPX, false);
} else if (FLAG_IS_DEFAULT(UseAPX)) {
FLAG_SET_DEFAULT(UseAPX, apx_supported ? true : false);
}
if (UseAVX < 2) {
@ -1002,14 +1086,6 @@ void VM_Version::get_processor_features() {
}
}
// APX support not enabled yet
if (UseAPX) {
if (!FLAG_IS_DEFAULT(UseAPX)) {
warning("APX is not supported on this CPU.");
}
FLAG_SET_DEFAULT(UseAPX, false);
}
if (FLAG_IS_DEFAULT(IntelJccErratumMitigation)) {
_has_intel_jcc_erratum = compute_has_intel_jcc_erratum();
} else {
@ -2143,6 +2219,10 @@ int VM_Version::avx3_threshold() {
FLAG_IS_DEFAULT(AVX3Threshold)) ? 0 : AVX3Threshold;
}
void VM_Version::clear_apx_test_state() {
clear_apx_test_state_stub();
}
static bool _vm_version_initialized = false;
void VM_Version::initialize() {
@ -2160,6 +2240,8 @@ void VM_Version::initialize() {
detect_virt_stub = CAST_TO_FN_PTR(detect_virt_stub_t,
g.generate_detect_virt());
clear_apx_test_state_stub = CAST_TO_FN_PTR(clear_apx_test_state_t,
g.clear_apx_test_state());
get_processor_features();
LP64_ONLY(Assembler::precompute_instructions();)
@ -2958,6 +3040,10 @@ uint64_t VM_Version::CpuidInfo::feature_flags() const {
result |= CPU_SSE4_2;
if (std_cpuid1_ecx.bits.popcnt != 0)
result |= CPU_POPCNT;
if (sefsl1_cpuid7_edx.bits.apx_f != 0 &&
xem_xcr0_eax.bits.apx_f != 0) {
result |= CPU_APX_F;
}
if (std_cpuid1_ecx.bits.avx != 0 &&
std_cpuid1_ecx.bits.osxsave != 0 &&
xem_xcr0_eax.bits.sse != 0 &&
@ -2968,7 +3054,7 @@ uint64_t VM_Version::CpuidInfo::feature_flags() const {
result |= CPU_F16C;
if (sef_cpuid7_ebx.bits.avx2 != 0) {
result |= CPU_AVX2;
if (sef_cpuid7_ecx1_eax.bits.avx_ifma != 0)
if (sefsl1_cpuid7_eax.bits.avx_ifma != 0)
result |= CPU_AVX_IFMA;
}
if (sef_cpuid7_ecx.bits.gfni != 0)
@ -3142,6 +3228,17 @@ bool VM_Version::os_supports_avx_vectors() {
return retVal;
}
bool VM_Version::os_supports_apx_egprs() {
if (!supports_apx_f()) {
return false;
}
if (_cpuid_info.apx_save[0] != egpr_test_value() ||
_cpuid_info.apx_save[1] != egpr_test_value()) {
return false;
}
return true;
}
uint VM_Version::cores_per_cpu() {
uint result = 1;
if (is_intel()) {

View File

@ -26,6 +26,7 @@
#define CPU_X86_VM_VERSION_X86_HPP
#include "runtime/abstract_vm_version.hpp"
#include "utilities/debug.hpp"
#include "utilities/macros.hpp"
#include "utilities/sizes.hpp"
@ -279,7 +280,7 @@ class VM_Version : public Abstract_VM_Version {
} bits;
};
union SefCpuid7Ecx1Eax {
union SefCpuid7SubLeaf1Eax {
uint32_t value;
struct {
uint32_t : 23,
@ -288,6 +289,15 @@ class VM_Version : public Abstract_VM_Version {
} bits;
};
union SefCpuid7SubLeaf1Edx {
uint32_t value;
struct {
uint32_t : 21,
apx_f : 1,
: 10;
} bits;
};
union ExtCpuid1EEbx {
uint32_t value;
struct {
@ -308,7 +318,9 @@ class VM_Version : public Abstract_VM_Version {
opmask : 1,
zmm512 : 1,
zmm32 : 1,
: 24;
: 11,
apx_f : 1,
: 12;
} bits;
};
@ -319,8 +331,10 @@ protected:
static bool _has_intel_jcc_erratum;
static address _cpuinfo_segv_addr; // address of instruction which causes SEGV
static address _cpuinfo_cont_addr; // address of instruction after the one which causes SEGV
static address _cpuinfo_segv_addr; // address of instruction which causes SEGV
static address _cpuinfo_cont_addr; // address of instruction after the one which causes SEGV
static address _cpuinfo_segv_addr_apx; // address of instruction which causes APX specific SEGV
static address _cpuinfo_cont_addr_apx; // address of instruction after the one which causes APX specific SEGV
/*
* Update following files when declaring new flags:
@ -400,7 +414,8 @@ protected:
decl(CET_IBT, "cet_ibt", 56) /* Control Flow Enforcement - Indirect Branch Tracking */ \
decl(CET_SS, "cet_ss", 57) /* Control Flow Enforcement - Shadow Stack */ \
decl(AVX512_IFMA, "avx512_ifma", 58) /* Integer Vector FMA instructions*/ \
decl(AVX_IFMA, "avx_ifma", 59) /* 256-bit VEX-coded variant of AVX512-IFMA*/
decl(AVX_IFMA, "avx_ifma", 59) /* 256-bit VEX-coded variant of AVX512-IFMA*/ \
decl(APX_F, "apx_f", 60) /* Intel Advanced Performance Extensions*/
#define DECLARE_CPU_FEATURE_FLAG(id, name, bit) CPU_##id = (1ULL << bit),
CPU_FEATURE_FLAGS(DECLARE_CPU_FEATURE_FLAG)
@ -458,14 +473,17 @@ protected:
uint32_t dcp_cpuid4_ecx; // unused currently
uint32_t dcp_cpuid4_edx; // unused currently
// cpuid function 7 (structured extended features)
// ECX = 0 before calling cpuid()
// cpuid function 7 (structured extended features enumeration leaf)
// eax = 7, ecx = 0
SefCpuid7Eax sef_cpuid7_eax;
SefCpuid7Ebx sef_cpuid7_ebx;
SefCpuid7Ecx sef_cpuid7_ecx;
SefCpuid7Edx sef_cpuid7_edx;
// ECX = 1 before calling cpuid()
SefCpuid7Ecx1Eax sef_cpuid7_ecx1_eax;
// cpuid function 7 (structured extended features enumeration sub-leaf 1)
// eax = 7, ecx = 1
SefCpuid7SubLeaf1Eax sefsl1_cpuid7_eax;
SefCpuid7SubLeaf1Edx sefsl1_cpuid7_edx;
// cpuid function 0xB (processor topology)
// ecx = 0
@ -537,6 +555,9 @@ protected:
// Space to save zmm registers after signal handle
int zmm_save[16*4]; // Save zmm0, zmm7, zmm8, zmm31
// Space to save apx registers after signal handle
jlong apx_save[2]; // Save r16 and r31
uint64_t feature_flags() const;
// Asserts
@ -576,6 +597,7 @@ private:
static bool compute_has_intel_jcc_erratum();
static bool os_supports_avx_vectors();
static bool os_supports_apx_egprs();
static void get_processor_features();
public:
@ -584,7 +606,7 @@ public:
static ByteSize std_cpuid1_offset() { return byte_offset_of(CpuidInfo, std_cpuid1_eax); }
static ByteSize dcp_cpuid4_offset() { return byte_offset_of(CpuidInfo, dcp_cpuid4_eax); }
static ByteSize sef_cpuid7_offset() { return byte_offset_of(CpuidInfo, sef_cpuid7_eax); }
static ByteSize sef_cpuid7_ecx1_offset() { return byte_offset_of(CpuidInfo, sef_cpuid7_ecx1_eax); }
static ByteSize sefsl1_cpuid7_offset() { return byte_offset_of(CpuidInfo, sefsl1_cpuid7_eax); }
static ByteSize ext_cpuid1_offset() { return byte_offset_of(CpuidInfo, ext_cpuid1_eax); }
static ByteSize ext_cpuid5_offset() { return byte_offset_of(CpuidInfo, ext_cpuid5_eax); }
static ByteSize ext_cpuid7_offset() { return byte_offset_of(CpuidInfo, ext_cpuid7_eax); }
@ -596,9 +618,11 @@ public:
static ByteSize xem_xcr0_offset() { return byte_offset_of(CpuidInfo, xem_xcr0_eax); }
static ByteSize ymm_save_offset() { return byte_offset_of(CpuidInfo, ymm_save); }
static ByteSize zmm_save_offset() { return byte_offset_of(CpuidInfo, zmm_save); }
static ByteSize apx_save_offset() { return byte_offset_of(CpuidInfo, apx_save); }
// The value used to check ymm register after signal handle
static int ymm_test_value() { return 0xCAFEBABE; }
static jlong egpr_test_value() { return 0xCAFEBABECAFEBABELL; }
static void get_cpu_info_wrapper();
static void set_cpuinfo_segv_addr(address pc) { _cpuinfo_segv_addr = pc; }
@ -606,9 +630,17 @@ public:
static void set_cpuinfo_cont_addr(address pc) { _cpuinfo_cont_addr = pc; }
static address cpuinfo_cont_addr() { return _cpuinfo_cont_addr; }
static void set_cpuinfo_segv_addr_apx(address pc) { _cpuinfo_segv_addr_apx = pc; }
static bool is_cpuinfo_segv_addr_apx(address pc) { return _cpuinfo_segv_addr_apx == pc; }
static void set_cpuinfo_cont_addr_apx(address pc) { _cpuinfo_cont_addr_apx = pc; }
static address cpuinfo_cont_addr_apx() { return _cpuinfo_cont_addr_apx; }
static void clear_apx_test_state();
static void clean_cpuFeatures() { _features = 0; }
static void set_avx_cpuFeatures() { _features = (CPU_SSE | CPU_SSE2 | CPU_AVX | CPU_VZEROUPPER ); }
static void set_evex_cpuFeatures() { _features = (CPU_AVX512F | CPU_SSE | CPU_SSE2 | CPU_VZEROUPPER ); }
static void set_apx_cpuFeatures() { _features |= CPU_APX_F; }
// Initialization
static void initialize();
@ -705,6 +737,7 @@ public:
static bool supports_avx512novl() { return (supports_evex() && !supports_avx512vl()); }
static bool supports_avx512nobw() { return (supports_evex() && !supports_avx512bw()); }
static bool supports_avx256only() { return (supports_avx2() && !supports_evex()); }
static bool supports_apx_f() { return (_features & CPU_APX_F) != 0; }
static bool supports_avxonly() { return ((supports_avx2() || supports_avx()) && !supports_evex()); }
static bool supports_sha() { return (_features & CPU_SHA) != 0; }
static bool supports_fma() { return (_features & CPU_FMA) != 0 && supports_avx(); }

View File

@ -2744,6 +2744,15 @@ LONG WINAPI topLevelExceptionFilter(struct _EXCEPTION_POINTERS* exceptionInfo) {
// Verify that OS save/restore AVX registers.
return Handle_Exception(exceptionInfo, VM_Version::cpuinfo_cont_addr());
}
#ifndef PRODUCT
if ((exception_code == EXCEPTION_ACCESS_VIOLATION) &&
VM_Version::is_cpuinfo_segv_addr_apx(pc)) {
// Verify that OS save/restore APX registers.
VM_Version::clear_apx_test_state();
return Handle_Exception(exceptionInfo, VM_Version::cpuinfo_cont_addr_apx());
}
#endif
#endif
if (t != nullptr && t->is_Java_thread()) {

View File

@ -416,6 +416,14 @@ bool PosixSignals::pd_hotspot_signal_handler(int sig, siginfo_t* info,
stub = VM_Version::cpuinfo_cont_addr();
}
#ifndef PRODUCT
if ((sig == SIGSEGV || sig == SIGBUS) && VM_Version::is_cpuinfo_segv_addr_apx(pc)) {
// Verify that OS save/restore APX registers.
stub = VM_Version::cpuinfo_cont_addr_apx();
VM_Version::clear_apx_test_state();
}
#endif
// We test if stub is already set (by the stack overflow code
// above) so it is not overwritten by the code that follows. This
// check is not required on other platforms, because on other

View File

@ -248,6 +248,14 @@ bool PosixSignals::pd_hotspot_signal_handler(int sig, siginfo_t* info,
stub = VM_Version::cpuinfo_cont_addr();
}
#ifndef PRODUCT
if ((sig == SIGSEGV) && VM_Version::is_cpuinfo_segv_addr_apx(pc)) {
// Verify that OS save/restore APX registers.
stub = VM_Version::cpuinfo_cont_addr_apx();
VM_Version::clear_apx_test_state();
}
#endif
if (thread->thread_state() == _thread_in_Java) {
// Java thread running in Java code => find exception handler if any
// a fault inside compiled code, the interpreter, or a stub

View File

@ -233,6 +233,7 @@ public class AMD64 extends Architecture {
CET_SS,
AVX512_IFMA,
AVX_IFMA,
APX_F,
}
private final EnumSet<CPUFeature> features;

View File

@ -65,7 +65,8 @@ public class CPUInfoTest {
"avx512_vbmi2", "avx512_vbmi", "rdtscp", "rdpid",
"hv", "fsrm", "avx512_bitalg", "gfni",
"f16c", "pku", "ospke", "cet_ibt",
"cet_ss", "avx512_ifma", "serialize", "avx_ifma"
"cet_ss", "avx512_ifma", "serialize", "avx_ifma",
"apx_f"
);
// @formatter:on
// Checkstyle: resume