8194279: support zhaoxin x86 cpu vendor ids CentaurHauls and Shanghai
Reviewed-by: dholmes, kvn
This commit is contained in:
parent
9e5bf18428
commit
9c8adb8718
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) 1997, 2016, Oracle and/or its affiliates. All rights reserved.
|
||||
* Copyright (c) 1997, 2018, Oracle and/or its affiliates. All rights reserved.
|
||||
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
||||
*
|
||||
* This code is free software; you can redistribute it and/or modify it
|
||||
@ -3167,6 +3167,89 @@ void Assembler::nop(int i) {
|
||||
return;
|
||||
}
|
||||
|
||||
if (UseAddressNop && VM_Version::is_zx()) {
|
||||
//
|
||||
// Using multi-bytes nops "0x0F 0x1F [address]" for ZX
|
||||
// 1: 0x90
|
||||
// 2: 0x66 0x90
|
||||
// 3: 0x66 0x66 0x90 (don't use "0x0F 0x1F 0x00" - need patching safe padding)
|
||||
// 4: 0x0F 0x1F 0x40 0x00
|
||||
// 5: 0x0F 0x1F 0x44 0x00 0x00
|
||||
// 6: 0x66 0x0F 0x1F 0x44 0x00 0x00
|
||||
// 7: 0x0F 0x1F 0x80 0x00 0x00 0x00 0x00
|
||||
// 8: 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00
|
||||
// 9: 0x66 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00
|
||||
// 10: 0x66 0x66 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00
|
||||
// 11: 0x66 0x66 0x66 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00
|
||||
|
||||
// The rest coding is ZX specific - don't use consecutive address nops
|
||||
|
||||
// 12: 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00 0x66 0x66 0x66 0x90
|
||||
// 13: 0x66 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00 0x66 0x66 0x66 0x90
|
||||
// 14: 0x66 0x66 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00 0x66 0x66 0x66 0x90
|
||||
// 15: 0x66 0x66 0x66 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00 0x66 0x66 0x66 0x90
|
||||
|
||||
while (i >= 15) {
|
||||
// For ZX don't generate consecutive addess nops (mix with regular nops)
|
||||
i -= 15;
|
||||
emit_int8(0x66); // size prefix
|
||||
emit_int8(0x66); // size prefix
|
||||
emit_int8(0x66); // size prefix
|
||||
addr_nop_8();
|
||||
emit_int8(0x66); // size prefix
|
||||
emit_int8(0x66); // size prefix
|
||||
emit_int8(0x66); // size prefix
|
||||
emit_int8((unsigned char)0x90);
|
||||
// nop
|
||||
}
|
||||
switch (i) {
|
||||
case 14:
|
||||
emit_int8(0x66); // size prefix
|
||||
case 13:
|
||||
emit_int8(0x66); // size prefix
|
||||
case 12:
|
||||
addr_nop_8();
|
||||
emit_int8(0x66); // size prefix
|
||||
emit_int8(0x66); // size prefix
|
||||
emit_int8(0x66); // size prefix
|
||||
emit_int8((unsigned char)0x90);
|
||||
// nop
|
||||
break;
|
||||
case 11:
|
||||
emit_int8(0x66); // size prefix
|
||||
case 10:
|
||||
emit_int8(0x66); // size prefix
|
||||
case 9:
|
||||
emit_int8(0x66); // size prefix
|
||||
case 8:
|
||||
addr_nop_8();
|
||||
break;
|
||||
case 7:
|
||||
addr_nop_7();
|
||||
break;
|
||||
case 6:
|
||||
emit_int8(0x66); // size prefix
|
||||
case 5:
|
||||
addr_nop_5();
|
||||
break;
|
||||
case 4:
|
||||
addr_nop_4();
|
||||
break;
|
||||
case 3:
|
||||
// Don't use "0x0F 0x1F 0x00" - need patching safe padding
|
||||
emit_int8(0x66); // size prefix
|
||||
case 2:
|
||||
emit_int8(0x66); // size prefix
|
||||
case 1:
|
||||
emit_int8((unsigned char)0x90);
|
||||
// nop
|
||||
break;
|
||||
default:
|
||||
assert(i == 0, " ");
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
// Using nops with size prefixes "0x66 0x90".
|
||||
// From AMD Optimization Guide:
|
||||
// 1: 0x90
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) 1997, 2017, Oracle and/or its affiliates. All rights reserved.
|
||||
* Copyright (c) 1997, 2018, Oracle and/or its affiliates. All rights reserved.
|
||||
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
||||
*
|
||||
* This code is free software; you can redistribute it and/or modify it
|
||||
@ -628,6 +628,11 @@ void VM_Version::get_processor_features() {
|
||||
if (UseSSE < 1)
|
||||
_features &= ~CPU_SSE;
|
||||
|
||||
//since AVX instructions is slower than SSE in some ZX cpus, force USEAVX=0.
|
||||
if (is_zx() && ((cpu_family() == 6) || (cpu_family() == 7))) {
|
||||
UseAVX = 0;
|
||||
}
|
||||
|
||||
// first try initial setting and detect what we can support
|
||||
int use_avx_limit = 0;
|
||||
if (UseAVX > 0) {
|
||||
@ -1078,6 +1083,66 @@ void VM_Version::get_processor_features() {
|
||||
// UseXmmRegToRegMoveAll == true --> movaps(xmm, xmm), movapd(xmm, xmm).
|
||||
// UseXmmRegToRegMoveAll == false --> movss(xmm, xmm), movsd(xmm, xmm).
|
||||
|
||||
|
||||
if (is_zx()) { // ZX cpus specific settings
|
||||
if (FLAG_IS_DEFAULT(UseStoreImmI16)) {
|
||||
UseStoreImmI16 = false; // don't use it on ZX cpus
|
||||
}
|
||||
if ((cpu_family() == 6) || (cpu_family() == 7)) {
|
||||
if (FLAG_IS_DEFAULT(UseAddressNop)) {
|
||||
// Use it on all ZX cpus
|
||||
UseAddressNop = true;
|
||||
}
|
||||
}
|
||||
if (FLAG_IS_DEFAULT(UseXmmLoadAndClearUpper)) {
|
||||
UseXmmLoadAndClearUpper = true; // use movsd on all ZX cpus
|
||||
}
|
||||
if (FLAG_IS_DEFAULT(UseXmmRegToRegMoveAll)) {
|
||||
if (supports_sse3()) {
|
||||
UseXmmRegToRegMoveAll = true; // use movaps, movapd on new ZX cpus
|
||||
} else {
|
||||
UseXmmRegToRegMoveAll = false;
|
||||
}
|
||||
}
|
||||
if (((cpu_family() == 6) || (cpu_family() == 7)) && supports_sse3()) { // new ZX cpus
|
||||
#ifdef COMPILER2
|
||||
if (FLAG_IS_DEFAULT(MaxLoopPad)) {
|
||||
// For new ZX cpus do the next optimization:
|
||||
// don't align the beginning of a loop if there are enough instructions
|
||||
// left (NumberOfLoopInstrToAlign defined in c2_globals.hpp)
|
||||
// in current fetch line (OptoLoopAlignment) or the padding
|
||||
// is big (> MaxLoopPad).
|
||||
// Set MaxLoopPad to 11 for new ZX cpus to reduce number of
|
||||
// generated NOP instructions. 11 is the largest size of one
|
||||
// address NOP instruction '0F 1F' (see Assembler::nop(i)).
|
||||
MaxLoopPad = 11;
|
||||
}
|
||||
#endif // COMPILER2
|
||||
if (FLAG_IS_DEFAULT(UseXMMForArrayCopy)) {
|
||||
UseXMMForArrayCopy = true; // use SSE2 movq on new ZX cpus
|
||||
}
|
||||
if (supports_sse4_2()) { // new ZX cpus
|
||||
if (FLAG_IS_DEFAULT(UseUnalignedLoadStores)) {
|
||||
UseUnalignedLoadStores = true; // use movdqu on newest ZX cpus
|
||||
}
|
||||
}
|
||||
if (supports_sse4_2()) {
|
||||
if (FLAG_IS_DEFAULT(UseSSE42Intrinsics)) {
|
||||
FLAG_SET_DEFAULT(UseSSE42Intrinsics, true);
|
||||
}
|
||||
} else {
|
||||
if (UseSSE42Intrinsics && !FLAG_IS_DEFAULT(UseAESIntrinsics)) {
|
||||
warning("SSE4.2 intrinsics require SSE4.2 instructions or higher. Intrinsics will be disabled.");
|
||||
}
|
||||
FLAG_SET_DEFAULT(UseSSE42Intrinsics, false);
|
||||
}
|
||||
}
|
||||
|
||||
if (FLAG_IS_DEFAULT(AllocatePrefetchInstr) && supports_3dnow_prefetch()) {
|
||||
FLAG_SET_DEFAULT(AllocatePrefetchInstr, 3);
|
||||
}
|
||||
}
|
||||
|
||||
if( is_amd() ) { // AMD cpus specific settings
|
||||
if( supports_sse2() && FLAG_IS_DEFAULT(UseAddressNop) ) {
|
||||
// Use it on new AMD cpus starting from Opteron.
|
||||
@ -1374,6 +1439,14 @@ void VM_Version::get_processor_features() {
|
||||
#endif
|
||||
}
|
||||
|
||||
if (is_zx() && ((cpu_family() == 6) || (cpu_family() == 7)) && supports_sse4_2()) {
|
||||
#ifdef COMPILER2
|
||||
if (FLAG_IS_DEFAULT(UseFPUForSpilling)) {
|
||||
FLAG_SET_DEFAULT(UseFPUForSpilling, true);
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
#ifdef _LP64
|
||||
// Prefetch settings
|
||||
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) 1997, 2014, Oracle and/or its affiliates. All rights reserved.
|
||||
* Copyright (c) 1997, 2018, Oracle and/or its affiliates. All rights reserved.
|
||||
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
||||
*
|
||||
* This code is free software; you can redistribute it and/or modify it
|
||||
@ -305,6 +305,9 @@ protected:
|
||||
enum Extended_Family {
|
||||
// AMD
|
||||
CPU_FAMILY_AMD_11H = 0x11,
|
||||
// ZX
|
||||
CPU_FAMILY_ZX_CORE_F6 = 6,
|
||||
CPU_FAMILY_ZX_CORE_F7 = 7,
|
||||
// Intel
|
||||
CPU_FAMILY_INTEL_CORE = 6,
|
||||
CPU_MODEL_NEHALEM = 0x1e,
|
||||
@ -549,6 +552,16 @@ protected:
|
||||
}
|
||||
}
|
||||
|
||||
// ZX features.
|
||||
if (is_zx()) {
|
||||
if (_cpuid_info.ext_cpuid1_ecx.bits.lzcnt_intel != 0)
|
||||
result |= CPU_LZCNT;
|
||||
// for ZX, ecx.bits.misalignsse bit (bit 8) indicates support for prefetchw
|
||||
if (_cpuid_info.ext_cpuid1_ecx.bits.misalignsse != 0) {
|
||||
result |= CPU_3DNOW_PREFETCH;
|
||||
}
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
@ -657,6 +670,7 @@ public:
|
||||
static bool is_P6() { return cpu_family() >= 6; }
|
||||
static bool is_amd() { assert_is_initialized(); return _cpuid_info.std_vendor_name_0 == 0x68747541; } // 'htuA'
|
||||
static bool is_intel() { assert_is_initialized(); return _cpuid_info.std_vendor_name_0 == 0x756e6547; } // 'uneG'
|
||||
static bool is_zx() { assert_is_initialized(); return (_cpuid_info.std_vendor_name_0 == 0x746e6543) || (_cpuid_info.std_vendor_name_0 == 0x68532020); } // 'tneC'||'hS '
|
||||
static bool is_atom_family() { return ((cpu_family() == 0x06) && ((extended_cpu_model() == 0x36) || (extended_cpu_model() == 0x37) || (extended_cpu_model() == 0x4D))); } //Silvermont and Centerton
|
||||
static bool is_knights_family() { return ((cpu_family() == 0x06) && ((extended_cpu_model() == 0x57) || (extended_cpu_model() == 0x85))); } // Xeon Phi 3200/5200/7200 and Future Xeon Phi
|
||||
|
||||
@ -680,6 +694,15 @@ public:
|
||||
}
|
||||
} else if (is_amd()) {
|
||||
result = (_cpuid_info.ext_cpuid8_ecx.bits.cores_per_cpu + 1);
|
||||
} else if (is_zx()) {
|
||||
bool supports_topology = supports_processor_topology();
|
||||
if (supports_topology) {
|
||||
result = _cpuid_info.tpl_cpuidB1_ebx.bits.logical_cpus /
|
||||
_cpuid_info.tpl_cpuidB0_ebx.bits.logical_cpus;
|
||||
}
|
||||
if (!supports_topology || result == 0) {
|
||||
result = (_cpuid_info.dcp_cpuid4_eax.bits.cores_per_cpu + 1);
|
||||
}
|
||||
}
|
||||
return result;
|
||||
}
|
||||
@ -688,6 +711,8 @@ public:
|
||||
uint result = 1;
|
||||
if (is_intel() && supports_processor_topology()) {
|
||||
result = _cpuid_info.tpl_cpuidB0_ebx.bits.logical_cpus;
|
||||
} else if (is_zx() && supports_processor_topology()) {
|
||||
result = _cpuid_info.tpl_cpuidB0_ebx.bits.logical_cpus;
|
||||
} else if (_cpuid_info.std_cpuid1_edx.bits.ht != 0) {
|
||||
if (cpu_family() >= 0x17) {
|
||||
result = _cpuid_info.ext_cpuid1E_ebx.bits.threads_per_core + 1;
|
||||
@ -705,6 +730,8 @@ public:
|
||||
result = (_cpuid_info.dcp_cpuid4_ebx.bits.L1_line_size + 1);
|
||||
} else if (is_amd()) {
|
||||
result = _cpuid_info.ext_cpuid5_ecx.bits.L1_line_size;
|
||||
} else if (is_zx()) {
|
||||
result = (_cpuid_info.dcp_cpuid4_ebx.bits.L1_line_size + 1);
|
||||
}
|
||||
if (result < 32) // not defined ?
|
||||
result = 32; // 32 bytes by default on x86 and other x64
|
||||
|
Loading…
x
Reference in New Issue
Block a user