8290027: Move inline functions from vm_version_x86.hpp to cpp
Reviewed-by: kbarrett, dholmes
This commit is contained in:
parent
87aa3ce03e
commit
e9d9cc6d0a
@ -2843,3 +2843,321 @@ int64_t VM_Version::maximum_qualified_cpu_frequency(void) {
|
||||
return _max_qualified_cpu_frequency;
|
||||
}
|
||||
|
||||
uint64_t VM_Version::feature_flags() {
|
||||
uint64_t result = 0;
|
||||
if (_cpuid_info.std_cpuid1_edx.bits.cmpxchg8 != 0)
|
||||
result |= CPU_CX8;
|
||||
if (_cpuid_info.std_cpuid1_edx.bits.cmov != 0)
|
||||
result |= CPU_CMOV;
|
||||
if (_cpuid_info.std_cpuid1_edx.bits.clflush != 0)
|
||||
result |= CPU_FLUSH;
|
||||
#ifdef _LP64
|
||||
// clflush should always be available on x86_64
|
||||
// if not we are in real trouble because we rely on it
|
||||
// to flush the code cache.
|
||||
assert ((result & CPU_FLUSH) != 0, "clflush should be available");
|
||||
#endif
|
||||
if (_cpuid_info.std_cpuid1_edx.bits.fxsr != 0 || (is_amd_family() &&
|
||||
_cpuid_info.ext_cpuid1_edx.bits.fxsr != 0))
|
||||
result |= CPU_FXSR;
|
||||
// HT flag is set for multi-core processors also.
|
||||
if (threads_per_core() > 1)
|
||||
result |= CPU_HT;
|
||||
if (_cpuid_info.std_cpuid1_edx.bits.mmx != 0 || (is_amd_family() &&
|
||||
_cpuid_info.ext_cpuid1_edx.bits.mmx != 0))
|
||||
result |= CPU_MMX;
|
||||
if (_cpuid_info.std_cpuid1_edx.bits.sse != 0)
|
||||
result |= CPU_SSE;
|
||||
if (_cpuid_info.std_cpuid1_edx.bits.sse2 != 0)
|
||||
result |= CPU_SSE2;
|
||||
if (_cpuid_info.std_cpuid1_ecx.bits.sse3 != 0)
|
||||
result |= CPU_SSE3;
|
||||
if (_cpuid_info.std_cpuid1_ecx.bits.ssse3 != 0)
|
||||
result |= CPU_SSSE3;
|
||||
if (_cpuid_info.std_cpuid1_ecx.bits.sse4_1 != 0)
|
||||
result |= CPU_SSE4_1;
|
||||
if (_cpuid_info.std_cpuid1_ecx.bits.sse4_2 != 0)
|
||||
result |= CPU_SSE4_2;
|
||||
if (_cpuid_info.std_cpuid1_ecx.bits.popcnt != 0)
|
||||
result |= CPU_POPCNT;
|
||||
if (_cpuid_info.std_cpuid1_ecx.bits.avx != 0 &&
|
||||
_cpuid_info.std_cpuid1_ecx.bits.osxsave != 0 &&
|
||||
_cpuid_info.xem_xcr0_eax.bits.sse != 0 &&
|
||||
_cpuid_info.xem_xcr0_eax.bits.ymm != 0) {
|
||||
result |= CPU_AVX;
|
||||
result |= CPU_VZEROUPPER;
|
||||
if (_cpuid_info.sef_cpuid7_ebx.bits.avx2 != 0)
|
||||
result |= CPU_AVX2;
|
||||
if (_cpuid_info.sef_cpuid7_ebx.bits.avx512f != 0 &&
|
||||
_cpuid_info.xem_xcr0_eax.bits.opmask != 0 &&
|
||||
_cpuid_info.xem_xcr0_eax.bits.zmm512 != 0 &&
|
||||
_cpuid_info.xem_xcr0_eax.bits.zmm32 != 0) {
|
||||
result |= CPU_AVX512F;
|
||||
if (_cpuid_info.sef_cpuid7_ebx.bits.avx512cd != 0)
|
||||
result |= CPU_AVX512CD;
|
||||
if (_cpuid_info.sef_cpuid7_ebx.bits.avx512dq != 0)
|
||||
result |= CPU_AVX512DQ;
|
||||
if (_cpuid_info.sef_cpuid7_ebx.bits.avx512pf != 0)
|
||||
result |= CPU_AVX512PF;
|
||||
if (_cpuid_info.sef_cpuid7_ebx.bits.avx512er != 0)
|
||||
result |= CPU_AVX512ER;
|
||||
if (_cpuid_info.sef_cpuid7_ebx.bits.avx512bw != 0)
|
||||
result |= CPU_AVX512BW;
|
||||
if (_cpuid_info.sef_cpuid7_ebx.bits.avx512vl != 0)
|
||||
result |= CPU_AVX512VL;
|
||||
if (_cpuid_info.sef_cpuid7_ecx.bits.avx512_vpopcntdq != 0)
|
||||
result |= CPU_AVX512_VPOPCNTDQ;
|
||||
if (_cpuid_info.sef_cpuid7_ecx.bits.avx512_vpclmulqdq != 0)
|
||||
result |= CPU_AVX512_VPCLMULQDQ;
|
||||
if (_cpuid_info.sef_cpuid7_ecx.bits.vaes != 0)
|
||||
result |= CPU_AVX512_VAES;
|
||||
if (_cpuid_info.sef_cpuid7_ecx.bits.gfni != 0)
|
||||
result |= CPU_GFNI;
|
||||
if (_cpuid_info.sef_cpuid7_ecx.bits.avx512_vnni != 0)
|
||||
result |= CPU_AVX512_VNNI;
|
||||
if (_cpuid_info.sef_cpuid7_ecx.bits.avx512_bitalg != 0)
|
||||
result |= CPU_AVX512_BITALG;
|
||||
if (_cpuid_info.sef_cpuid7_ecx.bits.avx512_vbmi != 0)
|
||||
result |= CPU_AVX512_VBMI;
|
||||
if (_cpuid_info.sef_cpuid7_ecx.bits.avx512_vbmi2 != 0)
|
||||
result |= CPU_AVX512_VBMI2;
|
||||
}
|
||||
}
|
||||
if (_cpuid_info.std_cpuid1_ecx.bits.hv != 0)
|
||||
result |= CPU_HV;
|
||||
if (_cpuid_info.sef_cpuid7_ebx.bits.bmi1 != 0)
|
||||
result |= CPU_BMI1;
|
||||
if (_cpuid_info.std_cpuid1_edx.bits.tsc != 0)
|
||||
result |= CPU_TSC;
|
||||
if (_cpuid_info.ext_cpuid7_edx.bits.tsc_invariance != 0)
|
||||
result |= CPU_TSCINV_BIT;
|
||||
if (_cpuid_info.std_cpuid1_ecx.bits.aes != 0)
|
||||
result |= CPU_AES;
|
||||
if (_cpuid_info.sef_cpuid7_ebx.bits.erms != 0)
|
||||
result |= CPU_ERMS;
|
||||
if (_cpuid_info.sef_cpuid7_edx.bits.fast_short_rep_mov != 0)
|
||||
result |= CPU_FSRM;
|
||||
if (_cpuid_info.std_cpuid1_ecx.bits.clmul != 0)
|
||||
result |= CPU_CLMUL;
|
||||
if (_cpuid_info.sef_cpuid7_ebx.bits.rtm != 0)
|
||||
result |= CPU_RTM;
|
||||
if (_cpuid_info.sef_cpuid7_ebx.bits.adx != 0)
|
||||
result |= CPU_ADX;
|
||||
if (_cpuid_info.sef_cpuid7_ebx.bits.bmi2 != 0)
|
||||
result |= CPU_BMI2;
|
||||
if (_cpuid_info.sef_cpuid7_ebx.bits.sha != 0)
|
||||
result |= CPU_SHA;
|
||||
if (_cpuid_info.std_cpuid1_ecx.bits.fma != 0)
|
||||
result |= CPU_FMA;
|
||||
if (_cpuid_info.sef_cpuid7_ebx.bits.clflushopt != 0)
|
||||
result |= CPU_FLUSHOPT;
|
||||
if (_cpuid_info.ext_cpuid1_edx.bits.rdtscp != 0)
|
||||
result |= CPU_RDTSCP;
|
||||
if (_cpuid_info.sef_cpuid7_ecx.bits.rdpid != 0)
|
||||
result |= CPU_RDPID;
|
||||
|
||||
// AMD|Hygon features.
|
||||
if (is_amd_family()) {
|
||||
if ((_cpuid_info.ext_cpuid1_edx.bits.tdnow != 0) ||
|
||||
(_cpuid_info.ext_cpuid1_ecx.bits.prefetchw != 0))
|
||||
result |= CPU_3DNOW_PREFETCH;
|
||||
if (_cpuid_info.ext_cpuid1_ecx.bits.lzcnt != 0)
|
||||
result |= CPU_LZCNT;
|
||||
if (_cpuid_info.ext_cpuid1_ecx.bits.sse4a != 0)
|
||||
result |= CPU_SSE4A;
|
||||
}
|
||||
|
||||
// Intel features.
|
||||
if (is_intel()) {
|
||||
if (_cpuid_info.ext_cpuid1_ecx.bits.lzcnt != 0) {
|
||||
result |= CPU_LZCNT;
|
||||
}
|
||||
if (_cpuid_info.ext_cpuid1_ecx.bits.prefetchw != 0) {
|
||||
result |= CPU_3DNOW_PREFETCH;
|
||||
}
|
||||
if (_cpuid_info.sef_cpuid7_ebx.bits.clwb != 0) {
|
||||
result |= CPU_CLWB;
|
||||
}
|
||||
if (_cpuid_info.sef_cpuid7_edx.bits.serialize != 0)
|
||||
result |= CPU_SERIALIZE;
|
||||
}
|
||||
|
||||
// ZX features.
|
||||
if (is_zx()) {
|
||||
if (_cpuid_info.ext_cpuid1_ecx.bits.lzcnt != 0) {
|
||||
result |= CPU_LZCNT;
|
||||
}
|
||||
if (_cpuid_info.ext_cpuid1_ecx.bits.prefetchw != 0) {
|
||||
result |= CPU_3DNOW_PREFETCH;
|
||||
}
|
||||
}
|
||||
|
||||
// Composite features.
|
||||
if (supports_tscinv_bit() &&
|
||||
((is_amd_family() && !is_amd_Barcelona()) ||
|
||||
is_intel_tsc_synched_at_init())) {
|
||||
result |= CPU_TSCINV;
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
bool VM_Version::os_supports_avx_vectors() {
|
||||
bool retVal = false;
|
||||
int nreg = 2 LP64_ONLY(+2);
|
||||
if (supports_evex()) {
|
||||
// Verify that OS save/restore all bits of EVEX registers
|
||||
// during signal processing.
|
||||
retVal = true;
|
||||
for (int i = 0; i < 16 * nreg; i++) { // 64 bytes per zmm register
|
||||
if (_cpuid_info.zmm_save[i] != ymm_test_value()) {
|
||||
retVal = false;
|
||||
break;
|
||||
}
|
||||
}
|
||||
} else if (supports_avx()) {
|
||||
// Verify that OS save/restore all bits of AVX registers
|
||||
// during signal processing.
|
||||
retVal = true;
|
||||
for (int i = 0; i < 8 * nreg; i++) { // 32 bytes per ymm register
|
||||
if (_cpuid_info.ymm_save[i] != ymm_test_value()) {
|
||||
retVal = false;
|
||||
break;
|
||||
}
|
||||
}
|
||||
// zmm_save will be set on a EVEX enabled machine even if we choose AVX code gen
|
||||
if (retVal == false) {
|
||||
// Verify that OS save/restore all bits of EVEX registers
|
||||
// during signal processing.
|
||||
retVal = true;
|
||||
for (int i = 0; i < 16 * nreg; i++) { // 64 bytes per zmm register
|
||||
if (_cpuid_info.zmm_save[i] != ymm_test_value()) {
|
||||
retVal = false;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
return retVal;
|
||||
}
|
||||
|
||||
uint VM_Version::cores_per_cpu() {
|
||||
uint result = 1;
|
||||
if (is_intel()) {
|
||||
bool supports_topology = supports_processor_topology();
|
||||
if (supports_topology) {
|
||||
result = _cpuid_info.tpl_cpuidB1_ebx.bits.logical_cpus /
|
||||
_cpuid_info.tpl_cpuidB0_ebx.bits.logical_cpus;
|
||||
}
|
||||
if (!supports_topology || result == 0) {
|
||||
result = (_cpuid_info.dcp_cpuid4_eax.bits.cores_per_cpu + 1);
|
||||
}
|
||||
} else if (is_amd_family()) {
|
||||
result = (_cpuid_info.ext_cpuid8_ecx.bits.cores_per_cpu + 1);
|
||||
} else if (is_zx()) {
|
||||
bool supports_topology = supports_processor_topology();
|
||||
if (supports_topology) {
|
||||
result = _cpuid_info.tpl_cpuidB1_ebx.bits.logical_cpus /
|
||||
_cpuid_info.tpl_cpuidB0_ebx.bits.logical_cpus;
|
||||
}
|
||||
if (!supports_topology || result == 0) {
|
||||
result = (_cpuid_info.dcp_cpuid4_eax.bits.cores_per_cpu + 1);
|
||||
}
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
uint VM_Version::threads_per_core() {
|
||||
uint result = 1;
|
||||
if (is_intel() && supports_processor_topology()) {
|
||||
result = _cpuid_info.tpl_cpuidB0_ebx.bits.logical_cpus;
|
||||
} else if (is_zx() && supports_processor_topology()) {
|
||||
result = _cpuid_info.tpl_cpuidB0_ebx.bits.logical_cpus;
|
||||
} else if (_cpuid_info.std_cpuid1_edx.bits.ht != 0) {
|
||||
if (cpu_family() >= 0x17) {
|
||||
result = _cpuid_info.ext_cpuid1E_ebx.bits.threads_per_core + 1;
|
||||
} else {
|
||||
result = _cpuid_info.std_cpuid1_ebx.bits.threads_per_cpu /
|
||||
cores_per_cpu();
|
||||
}
|
||||
}
|
||||
return (result == 0 ? 1 : result);
|
||||
}
|
||||
|
||||
intx VM_Version::L1_line_size() {
|
||||
intx result = 0;
|
||||
if (is_intel()) {
|
||||
result = (_cpuid_info.dcp_cpuid4_ebx.bits.L1_line_size + 1);
|
||||
} else if (is_amd_family()) {
|
||||
result = _cpuid_info.ext_cpuid5_ecx.bits.L1_line_size;
|
||||
} else if (is_zx()) {
|
||||
result = (_cpuid_info.dcp_cpuid4_ebx.bits.L1_line_size + 1);
|
||||
}
|
||||
if (result < 32) // not defined ?
|
||||
result = 32; // 32 bytes by default on x86 and other x64
|
||||
return result;
|
||||
}
|
||||
|
||||
bool VM_Version::is_intel_tsc_synched_at_init() {
|
||||
if (is_intel_family_core()) {
|
||||
uint32_t ext_model = extended_cpu_model();
|
||||
if (ext_model == CPU_MODEL_NEHALEM_EP ||
|
||||
ext_model == CPU_MODEL_WESTMERE_EP ||
|
||||
ext_model == CPU_MODEL_SANDYBRIDGE_EP ||
|
||||
ext_model == CPU_MODEL_IVYBRIDGE_EP) {
|
||||
// <= 2-socket invariant tsc support. EX versions are usually used
|
||||
// in > 2-socket systems and likely don't synchronize tscs at
|
||||
// initialization.
|
||||
// Code that uses tsc values must be prepared for them to arbitrarily
|
||||
// jump forward or backward.
|
||||
return true;
|
||||
}
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
intx VM_Version::allocate_prefetch_distance(bool use_watermark_prefetch) {
|
||||
// Hardware prefetching (distance/size in bytes):
|
||||
// Pentium 3 - 64 / 32
|
||||
// Pentium 4 - 256 / 128
|
||||
// Athlon - 64 / 32 ????
|
||||
// Opteron - 128 / 64 only when 2 sequential cache lines accessed
|
||||
// Core - 128 / 64
|
||||
//
|
||||
// Software prefetching (distance in bytes / instruction with best score):
|
||||
// Pentium 3 - 128 / prefetchnta
|
||||
// Pentium 4 - 512 / prefetchnta
|
||||
// Athlon - 128 / prefetchnta
|
||||
// Opteron - 256 / prefetchnta
|
||||
// Core - 256 / prefetchnta
|
||||
// It will be used only when AllocatePrefetchStyle > 0
|
||||
|
||||
if (is_amd_family()) { // AMD | Hygon
|
||||
if (supports_sse2()) {
|
||||
return 256; // Opteron
|
||||
} else {
|
||||
return 128; // Athlon
|
||||
}
|
||||
} else { // Intel
|
||||
if (supports_sse3() && cpu_family() == 6) {
|
||||
if (supports_sse4_2() && supports_ht()) { // Nehalem based cpus
|
||||
return 192;
|
||||
} else if (use_watermark_prefetch) { // watermark prefetching on Core
|
||||
#ifdef _LP64
|
||||
return 384;
|
||||
#else
|
||||
return 320;
|
||||
#endif
|
||||
}
|
||||
}
|
||||
if (supports_sse2()) {
|
||||
if (cpu_family() == 6) {
|
||||
return 256; // Pentium M, Core, Core2
|
||||
} else {
|
||||
return 512; // Pentium 4
|
||||
}
|
||||
} else {
|
||||
return 128; // Pentium 3 (and all other old CPUs)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -508,6 +508,7 @@ protected:
|
||||
int zmm_save[16*4]; // Save zmm0, zmm7, zmm8, zmm31
|
||||
};
|
||||
|
||||
private:
|
||||
// The actual cpuid info block
|
||||
static CpuidInfo _cpuid_info;
|
||||
|
||||
@ -536,204 +537,8 @@ protected:
|
||||
|
||||
static bool compute_has_intel_jcc_erratum();
|
||||
|
||||
static uint64_t feature_flags() {
|
||||
uint64_t result = 0;
|
||||
if (_cpuid_info.std_cpuid1_edx.bits.cmpxchg8 != 0)
|
||||
result |= CPU_CX8;
|
||||
if (_cpuid_info.std_cpuid1_edx.bits.cmov != 0)
|
||||
result |= CPU_CMOV;
|
||||
if (_cpuid_info.std_cpuid1_edx.bits.clflush != 0)
|
||||
result |= CPU_FLUSH;
|
||||
#ifdef _LP64
|
||||
// clflush should always be available on x86_64
|
||||
// if not we are in real trouble because we rely on it
|
||||
// to flush the code cache.
|
||||
assert ((result & CPU_FLUSH) != 0, "clflush should be available");
|
||||
#endif
|
||||
if (_cpuid_info.std_cpuid1_edx.bits.fxsr != 0 || (is_amd_family() &&
|
||||
_cpuid_info.ext_cpuid1_edx.bits.fxsr != 0))
|
||||
result |= CPU_FXSR;
|
||||
// HT flag is set for multi-core processors also.
|
||||
if (threads_per_core() > 1)
|
||||
result |= CPU_HT;
|
||||
if (_cpuid_info.std_cpuid1_edx.bits.mmx != 0 || (is_amd_family() &&
|
||||
_cpuid_info.ext_cpuid1_edx.bits.mmx != 0))
|
||||
result |= CPU_MMX;
|
||||
if (_cpuid_info.std_cpuid1_edx.bits.sse != 0)
|
||||
result |= CPU_SSE;
|
||||
if (_cpuid_info.std_cpuid1_edx.bits.sse2 != 0)
|
||||
result |= CPU_SSE2;
|
||||
if (_cpuid_info.std_cpuid1_ecx.bits.sse3 != 0)
|
||||
result |= CPU_SSE3;
|
||||
if (_cpuid_info.std_cpuid1_ecx.bits.ssse3 != 0)
|
||||
result |= CPU_SSSE3;
|
||||
if (_cpuid_info.std_cpuid1_ecx.bits.sse4_1 != 0)
|
||||
result |= CPU_SSE4_1;
|
||||
if (_cpuid_info.std_cpuid1_ecx.bits.sse4_2 != 0)
|
||||
result |= CPU_SSE4_2;
|
||||
if (_cpuid_info.std_cpuid1_ecx.bits.popcnt != 0)
|
||||
result |= CPU_POPCNT;
|
||||
if (_cpuid_info.std_cpuid1_ecx.bits.avx != 0 &&
|
||||
_cpuid_info.std_cpuid1_ecx.bits.osxsave != 0 &&
|
||||
_cpuid_info.xem_xcr0_eax.bits.sse != 0 &&
|
||||
_cpuid_info.xem_xcr0_eax.bits.ymm != 0) {
|
||||
result |= CPU_AVX;
|
||||
result |= CPU_VZEROUPPER;
|
||||
if (_cpuid_info.sef_cpuid7_ebx.bits.avx2 != 0)
|
||||
result |= CPU_AVX2;
|
||||
if (_cpuid_info.sef_cpuid7_ebx.bits.avx512f != 0 &&
|
||||
_cpuid_info.xem_xcr0_eax.bits.opmask != 0 &&
|
||||
_cpuid_info.xem_xcr0_eax.bits.zmm512 != 0 &&
|
||||
_cpuid_info.xem_xcr0_eax.bits.zmm32 != 0) {
|
||||
result |= CPU_AVX512F;
|
||||
if (_cpuid_info.sef_cpuid7_ebx.bits.avx512cd != 0)
|
||||
result |= CPU_AVX512CD;
|
||||
if (_cpuid_info.sef_cpuid7_ebx.bits.avx512dq != 0)
|
||||
result |= CPU_AVX512DQ;
|
||||
if (_cpuid_info.sef_cpuid7_ebx.bits.avx512pf != 0)
|
||||
result |= CPU_AVX512PF;
|
||||
if (_cpuid_info.sef_cpuid7_ebx.bits.avx512er != 0)
|
||||
result |= CPU_AVX512ER;
|
||||
if (_cpuid_info.sef_cpuid7_ebx.bits.avx512bw != 0)
|
||||
result |= CPU_AVX512BW;
|
||||
if (_cpuid_info.sef_cpuid7_ebx.bits.avx512vl != 0)
|
||||
result |= CPU_AVX512VL;
|
||||
if (_cpuid_info.sef_cpuid7_ecx.bits.avx512_vpopcntdq != 0)
|
||||
result |= CPU_AVX512_VPOPCNTDQ;
|
||||
if (_cpuid_info.sef_cpuid7_ecx.bits.avx512_vpclmulqdq != 0)
|
||||
result |= CPU_AVX512_VPCLMULQDQ;
|
||||
if (_cpuid_info.sef_cpuid7_ecx.bits.vaes != 0)
|
||||
result |= CPU_AVX512_VAES;
|
||||
if (_cpuid_info.sef_cpuid7_ecx.bits.gfni != 0)
|
||||
result |= CPU_GFNI;
|
||||
if (_cpuid_info.sef_cpuid7_ecx.bits.avx512_vnni != 0)
|
||||
result |= CPU_AVX512_VNNI;
|
||||
if (_cpuid_info.sef_cpuid7_ecx.bits.avx512_bitalg != 0)
|
||||
result |= CPU_AVX512_BITALG;
|
||||
if (_cpuid_info.sef_cpuid7_ecx.bits.avx512_vbmi != 0)
|
||||
result |= CPU_AVX512_VBMI;
|
||||
if (_cpuid_info.sef_cpuid7_ecx.bits.avx512_vbmi2 != 0)
|
||||
result |= CPU_AVX512_VBMI2;
|
||||
}
|
||||
}
|
||||
if (_cpuid_info.std_cpuid1_ecx.bits.hv != 0)
|
||||
result |= CPU_HV;
|
||||
if (_cpuid_info.sef_cpuid7_ebx.bits.bmi1 != 0)
|
||||
result |= CPU_BMI1;
|
||||
if (_cpuid_info.std_cpuid1_edx.bits.tsc != 0)
|
||||
result |= CPU_TSC;
|
||||
if (_cpuid_info.ext_cpuid7_edx.bits.tsc_invariance != 0)
|
||||
result |= CPU_TSCINV_BIT;
|
||||
if (_cpuid_info.std_cpuid1_ecx.bits.aes != 0)
|
||||
result |= CPU_AES;
|
||||
if (_cpuid_info.sef_cpuid7_ebx.bits.erms != 0)
|
||||
result |= CPU_ERMS;
|
||||
if (_cpuid_info.sef_cpuid7_edx.bits.fast_short_rep_mov != 0)
|
||||
result |= CPU_FSRM;
|
||||
if (_cpuid_info.std_cpuid1_ecx.bits.clmul != 0)
|
||||
result |= CPU_CLMUL;
|
||||
if (_cpuid_info.sef_cpuid7_ebx.bits.rtm != 0)
|
||||
result |= CPU_RTM;
|
||||
if (_cpuid_info.sef_cpuid7_ebx.bits.adx != 0)
|
||||
result |= CPU_ADX;
|
||||
if (_cpuid_info.sef_cpuid7_ebx.bits.bmi2 != 0)
|
||||
result |= CPU_BMI2;
|
||||
if (_cpuid_info.sef_cpuid7_ebx.bits.sha != 0)
|
||||
result |= CPU_SHA;
|
||||
if (_cpuid_info.std_cpuid1_ecx.bits.fma != 0)
|
||||
result |= CPU_FMA;
|
||||
if (_cpuid_info.sef_cpuid7_ebx.bits.clflushopt != 0)
|
||||
result |= CPU_FLUSHOPT;
|
||||
if (_cpuid_info.ext_cpuid1_edx.bits.rdtscp != 0)
|
||||
result |= CPU_RDTSCP;
|
||||
if (_cpuid_info.sef_cpuid7_ecx.bits.rdpid != 0)
|
||||
result |= CPU_RDPID;
|
||||
|
||||
// AMD|Hygon features.
|
||||
if (is_amd_family()) {
|
||||
if ((_cpuid_info.ext_cpuid1_edx.bits.tdnow != 0) ||
|
||||
(_cpuid_info.ext_cpuid1_ecx.bits.prefetchw != 0))
|
||||
result |= CPU_3DNOW_PREFETCH;
|
||||
if (_cpuid_info.ext_cpuid1_ecx.bits.lzcnt != 0)
|
||||
result |= CPU_LZCNT;
|
||||
if (_cpuid_info.ext_cpuid1_ecx.bits.sse4a != 0)
|
||||
result |= CPU_SSE4A;
|
||||
}
|
||||
|
||||
// Intel features.
|
||||
if (is_intel()) {
|
||||
if (_cpuid_info.ext_cpuid1_ecx.bits.lzcnt != 0) {
|
||||
result |= CPU_LZCNT;
|
||||
}
|
||||
if (_cpuid_info.ext_cpuid1_ecx.bits.prefetchw != 0) {
|
||||
result |= CPU_3DNOW_PREFETCH;
|
||||
}
|
||||
if (_cpuid_info.sef_cpuid7_ebx.bits.clwb != 0) {
|
||||
result |= CPU_CLWB;
|
||||
}
|
||||
if (_cpuid_info.sef_cpuid7_edx.bits.serialize != 0)
|
||||
result |= CPU_SERIALIZE;
|
||||
}
|
||||
|
||||
// ZX features.
|
||||
if (is_zx()) {
|
||||
if (_cpuid_info.ext_cpuid1_ecx.bits.lzcnt != 0) {
|
||||
result |= CPU_LZCNT;
|
||||
}
|
||||
if (_cpuid_info.ext_cpuid1_ecx.bits.prefetchw != 0) {
|
||||
result |= CPU_3DNOW_PREFETCH;
|
||||
}
|
||||
}
|
||||
|
||||
// Composite features.
|
||||
if (supports_tscinv_bit() &&
|
||||
((is_amd_family() && !is_amd_Barcelona()) ||
|
||||
is_intel_tsc_synched_at_init())) {
|
||||
result |= CPU_TSCINV;
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
static bool os_supports_avx_vectors() {
|
||||
bool retVal = false;
|
||||
int nreg = 2 LP64_ONLY(+2);
|
||||
if (supports_evex()) {
|
||||
// Verify that OS save/restore all bits of EVEX registers
|
||||
// during signal processing.
|
||||
retVal = true;
|
||||
for (int i = 0; i < 16 * nreg; i++) { // 64 bytes per zmm register
|
||||
if (_cpuid_info.zmm_save[i] != ymm_test_value()) {
|
||||
retVal = false;
|
||||
break;
|
||||
}
|
||||
}
|
||||
} else if (supports_avx()) {
|
||||
// Verify that OS save/restore all bits of AVX registers
|
||||
// during signal processing.
|
||||
retVal = true;
|
||||
for (int i = 0; i < 8 * nreg; i++) { // 32 bytes per ymm register
|
||||
if (_cpuid_info.ymm_save[i] != ymm_test_value()) {
|
||||
retVal = false;
|
||||
break;
|
||||
}
|
||||
}
|
||||
// zmm_save will be set on a EVEX enabled machine even if we choose AVX code gen
|
||||
if (retVal == false) {
|
||||
// Verify that OS save/restore all bits of EVEX registers
|
||||
// during signal processing.
|
||||
retVal = true;
|
||||
for (int i = 0; i < 16 * nreg; i++) { // 64 bytes per zmm register
|
||||
if (_cpuid_info.zmm_save[i] != ymm_test_value()) {
|
||||
retVal = false;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
return retVal;
|
||||
}
|
||||
|
||||
static uint64_t feature_flags();
|
||||
static bool os_supports_avx_vectors();
|
||||
static void get_processor_features();
|
||||
|
||||
public:
|
||||
@ -767,7 +572,6 @@ public:
|
||||
static void set_avx_cpuFeatures() { _features = (CPU_SSE | CPU_SSE2 | CPU_AVX | CPU_VZEROUPPER ); }
|
||||
static void set_evex_cpuFeatures() { _features = (CPU_AVX512F | CPU_SSE | CPU_SSE2 | CPU_VZEROUPPER ); }
|
||||
|
||||
|
||||
// Initialization
|
||||
static void initialize();
|
||||
|
||||
@ -811,62 +615,9 @@ public:
|
||||
(((_cpuid_info.tpl_cpuidB0_eax & 0x1f) | _cpuid_info.tpl_cpuidB0_ebx.bits.logical_cpus) != 0);
|
||||
}
|
||||
|
||||
static uint cores_per_cpu() {
|
||||
uint result = 1;
|
||||
if (is_intel()) {
|
||||
bool supports_topology = supports_processor_topology();
|
||||
if (supports_topology) {
|
||||
result = _cpuid_info.tpl_cpuidB1_ebx.bits.logical_cpus /
|
||||
_cpuid_info.tpl_cpuidB0_ebx.bits.logical_cpus;
|
||||
}
|
||||
if (!supports_topology || result == 0) {
|
||||
result = (_cpuid_info.dcp_cpuid4_eax.bits.cores_per_cpu + 1);
|
||||
}
|
||||
} else if (is_amd_family()) {
|
||||
result = (_cpuid_info.ext_cpuid8_ecx.bits.cores_per_cpu + 1);
|
||||
} else if (is_zx()) {
|
||||
bool supports_topology = supports_processor_topology();
|
||||
if (supports_topology) {
|
||||
result = _cpuid_info.tpl_cpuidB1_ebx.bits.logical_cpus /
|
||||
_cpuid_info.tpl_cpuidB0_ebx.bits.logical_cpus;
|
||||
}
|
||||
if (!supports_topology || result == 0) {
|
||||
result = (_cpuid_info.dcp_cpuid4_eax.bits.cores_per_cpu + 1);
|
||||
}
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
static uint threads_per_core() {
|
||||
uint result = 1;
|
||||
if (is_intel() && supports_processor_topology()) {
|
||||
result = _cpuid_info.tpl_cpuidB0_ebx.bits.logical_cpus;
|
||||
} else if (is_zx() && supports_processor_topology()) {
|
||||
result = _cpuid_info.tpl_cpuidB0_ebx.bits.logical_cpus;
|
||||
} else if (_cpuid_info.std_cpuid1_edx.bits.ht != 0) {
|
||||
if (cpu_family() >= 0x17) {
|
||||
result = _cpuid_info.ext_cpuid1E_ebx.bits.threads_per_core + 1;
|
||||
} else {
|
||||
result = _cpuid_info.std_cpuid1_ebx.bits.threads_per_cpu /
|
||||
cores_per_cpu();
|
||||
}
|
||||
}
|
||||
return (result == 0 ? 1 : result);
|
||||
}
|
||||
|
||||
static intx L1_line_size() {
|
||||
intx result = 0;
|
||||
if (is_intel()) {
|
||||
result = (_cpuid_info.dcp_cpuid4_ebx.bits.L1_line_size + 1);
|
||||
} else if (is_amd_family()) {
|
||||
result = _cpuid_info.ext_cpuid5_ecx.bits.L1_line_size;
|
||||
} else if (is_zx()) {
|
||||
result = (_cpuid_info.dcp_cpuid4_ebx.bits.L1_line_size + 1);
|
||||
}
|
||||
if (result < 32) // not defined ?
|
||||
result = 32; // 32 bytes by default on x86 and other x64
|
||||
return result;
|
||||
}
|
||||
static uint cores_per_cpu();
|
||||
static uint threads_per_core();
|
||||
static intx L1_line_size();
|
||||
|
||||
static intx prefetch_data_size() {
|
||||
return L1_line_size();
|
||||
@ -940,23 +691,7 @@ public:
|
||||
|
||||
static int avx3_threshold();
|
||||
|
||||
static bool is_intel_tsc_synched_at_init() {
|
||||
if (is_intel_family_core()) {
|
||||
uint32_t ext_model = extended_cpu_model();
|
||||
if (ext_model == CPU_MODEL_NEHALEM_EP ||
|
||||
ext_model == CPU_MODEL_WESTMERE_EP ||
|
||||
ext_model == CPU_MODEL_SANDYBRIDGE_EP ||
|
||||
ext_model == CPU_MODEL_IVYBRIDGE_EP) {
|
||||
// <= 2-socket invariant tsc support. EX versions are usually used
|
||||
// in > 2-socket systems and likely don't synchronize tscs at
|
||||
// initialization.
|
||||
// Code that uses tsc values must be prepared for them to arbitrarily
|
||||
// jump forward or backward.
|
||||
return true;
|
||||
}
|
||||
}
|
||||
return false;
|
||||
}
|
||||
static bool is_intel_tsc_synched_at_init();
|
||||
|
||||
// This checks if the JVM is potentially affected by an erratum on Intel CPUs (SKX102)
|
||||
// that causes unpredictable behaviour when jcc crosses 64 byte boundaries. Its microcode
|
||||
@ -986,51 +721,7 @@ public:
|
||||
|
||||
static bool supports_compare_and_exchange() { return true; }
|
||||
|
||||
static intx allocate_prefetch_distance(bool use_watermark_prefetch) {
|
||||
// Hardware prefetching (distance/size in bytes):
|
||||
// Pentium 3 - 64 / 32
|
||||
// Pentium 4 - 256 / 128
|
||||
// Athlon - 64 / 32 ????
|
||||
// Opteron - 128 / 64 only when 2 sequential cache lines accessed
|
||||
// Core - 128 / 64
|
||||
//
|
||||
// Software prefetching (distance in bytes / instruction with best score):
|
||||
// Pentium 3 - 128 / prefetchnta
|
||||
// Pentium 4 - 512 / prefetchnta
|
||||
// Athlon - 128 / prefetchnta
|
||||
// Opteron - 256 / prefetchnta
|
||||
// Core - 256 / prefetchnta
|
||||
// It will be used only when AllocatePrefetchStyle > 0
|
||||
|
||||
if (is_amd_family()) { // AMD | Hygon
|
||||
if (supports_sse2()) {
|
||||
return 256; // Opteron
|
||||
} else {
|
||||
return 128; // Athlon
|
||||
}
|
||||
} else { // Intel
|
||||
if (supports_sse3() && cpu_family() == 6) {
|
||||
if (supports_sse4_2() && supports_ht()) { // Nehalem based cpus
|
||||
return 192;
|
||||
} else if (use_watermark_prefetch) { // watermark prefetching on Core
|
||||
#ifdef _LP64
|
||||
return 384;
|
||||
#else
|
||||
return 320;
|
||||
#endif
|
||||
}
|
||||
}
|
||||
if (supports_sse2()) {
|
||||
if (cpu_family() == 6) {
|
||||
return 256; // Pentium M, Core, Core2
|
||||
} else {
|
||||
return 512; // Pentium 4
|
||||
}
|
||||
} else {
|
||||
return 128; // Pentium 3 (and all other old CPUs)
|
||||
}
|
||||
}
|
||||
}
|
||||
static intx allocate_prefetch_distance(bool use_watermark_prefetch);
|
||||
|
||||
// SSE2 and later processors implement a 'pause' instruction
|
||||
// that can be used for efficient implementation of
|
||||
@ -1063,11 +754,11 @@ public:
|
||||
// and trailing StoreStore fences.
|
||||
|
||||
#ifdef _LP64
|
||||
|
||||
static bool supports_clflush(); // Can't inline due to header file conflict
|
||||
#else
|
||||
static bool supports_clflush() { return ((_features & CPU_FLUSH) != 0); }
|
||||
#endif // _LP64
|
||||
|
||||
// Note: CPU_FLUSHOPT and CPU_CLWB bits should always be zero for 32-bit
|
||||
static bool supports_clflushopt() { return ((_features & CPU_FLUSHOPT) != 0); }
|
||||
static bool supports_clwb() { return ((_features & CPU_CLWB) != 0); }
|
||||
|
Loading…
x
Reference in New Issue
Block a user