8194279: support zhaoxin x86 cpu vendor ids CentaurHauls and Shanghai

Reviewed-by: dholmes, kvn
This commit is contained in:
Vic Wang 2018-01-04 22:54:40 -05:00 committed by David Holmes
parent 9e5bf18428
commit 9c8adb8718
3 changed files with 186 additions and 3 deletions

View File

@ -1,5 +1,5 @@
/*
* Copyright (c) 1997, 2016, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 1997, 2018, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@ -3167,6 +3167,89 @@ void Assembler::nop(int i) {
return;
}
if (UseAddressNop && VM_Version::is_zx()) {
//
// Using multi-bytes nops "0x0F 0x1F [address]" for ZX
// 1: 0x90
// 2: 0x66 0x90
// 3: 0x66 0x66 0x90 (don't use "0x0F 0x1F 0x00" - need patching safe padding)
// 4: 0x0F 0x1F 0x40 0x00
// 5: 0x0F 0x1F 0x44 0x00 0x00
// 6: 0x66 0x0F 0x1F 0x44 0x00 0x00
// 7: 0x0F 0x1F 0x80 0x00 0x00 0x00 0x00
// 8: 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00
// 9: 0x66 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00
// 10: 0x66 0x66 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00
// 11: 0x66 0x66 0x66 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00
// The rest coding is ZX specific - don't use consecutive address nops
// 12: 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00 0x66 0x66 0x66 0x90
// 13: 0x66 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00 0x66 0x66 0x66 0x90
// 14: 0x66 0x66 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00 0x66 0x66 0x66 0x90
// 15: 0x66 0x66 0x66 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00 0x66 0x66 0x66 0x90
while (i >= 15) {
// For ZX don't generate consecutive addess nops (mix with regular nops)
i -= 15;
emit_int8(0x66); // size prefix
emit_int8(0x66); // size prefix
emit_int8(0x66); // size prefix
addr_nop_8();
emit_int8(0x66); // size prefix
emit_int8(0x66); // size prefix
emit_int8(0x66); // size prefix
emit_int8((unsigned char)0x90);
// nop
}
switch (i) {
case 14:
emit_int8(0x66); // size prefix
case 13:
emit_int8(0x66); // size prefix
case 12:
addr_nop_8();
emit_int8(0x66); // size prefix
emit_int8(0x66); // size prefix
emit_int8(0x66); // size prefix
emit_int8((unsigned char)0x90);
// nop
break;
case 11:
emit_int8(0x66); // size prefix
case 10:
emit_int8(0x66); // size prefix
case 9:
emit_int8(0x66); // size prefix
case 8:
addr_nop_8();
break;
case 7:
addr_nop_7();
break;
case 6:
emit_int8(0x66); // size prefix
case 5:
addr_nop_5();
break;
case 4:
addr_nop_4();
break;
case 3:
// Don't use "0x0F 0x1F 0x00" - need patching safe padding
emit_int8(0x66); // size prefix
case 2:
emit_int8(0x66); // size prefix
case 1:
emit_int8((unsigned char)0x90);
// nop
break;
default:
assert(i == 0, " ");
}
return;
}
// Using nops with size prefixes "0x66 0x90".
// From AMD Optimization Guide:
// 1: 0x90

View File

@ -1,5 +1,5 @@
/*
* Copyright (c) 1997, 2017, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 1997, 2018, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@ -628,6 +628,11 @@ void VM_Version::get_processor_features() {
if (UseSSE < 1)
_features &= ~CPU_SSE;
//since AVX instructions is slower than SSE in some ZX cpus, force USEAVX=0.
if (is_zx() && ((cpu_family() == 6) || (cpu_family() == 7))) {
UseAVX = 0;
}
// first try initial setting and detect what we can support
int use_avx_limit = 0;
if (UseAVX > 0) {
@ -1078,6 +1083,66 @@ void VM_Version::get_processor_features() {
// UseXmmRegToRegMoveAll == true --> movaps(xmm, xmm), movapd(xmm, xmm).
// UseXmmRegToRegMoveAll == false --> movss(xmm, xmm), movsd(xmm, xmm).
if (is_zx()) { // ZX cpus specific settings
if (FLAG_IS_DEFAULT(UseStoreImmI16)) {
UseStoreImmI16 = false; // don't use it on ZX cpus
}
if ((cpu_family() == 6) || (cpu_family() == 7)) {
if (FLAG_IS_DEFAULT(UseAddressNop)) {
// Use it on all ZX cpus
UseAddressNop = true;
}
}
if (FLAG_IS_DEFAULT(UseXmmLoadAndClearUpper)) {
UseXmmLoadAndClearUpper = true; // use movsd on all ZX cpus
}
if (FLAG_IS_DEFAULT(UseXmmRegToRegMoveAll)) {
if (supports_sse3()) {
UseXmmRegToRegMoveAll = true; // use movaps, movapd on new ZX cpus
} else {
UseXmmRegToRegMoveAll = false;
}
}
if (((cpu_family() == 6) || (cpu_family() == 7)) && supports_sse3()) { // new ZX cpus
#ifdef COMPILER2
if (FLAG_IS_DEFAULT(MaxLoopPad)) {
// For new ZX cpus do the next optimization:
// don't align the beginning of a loop if there are enough instructions
// left (NumberOfLoopInstrToAlign defined in c2_globals.hpp)
// in current fetch line (OptoLoopAlignment) or the padding
// is big (> MaxLoopPad).
// Set MaxLoopPad to 11 for new ZX cpus to reduce number of
// generated NOP instructions. 11 is the largest size of one
// address NOP instruction '0F 1F' (see Assembler::nop(i)).
MaxLoopPad = 11;
}
#endif // COMPILER2
if (FLAG_IS_DEFAULT(UseXMMForArrayCopy)) {
UseXMMForArrayCopy = true; // use SSE2 movq on new ZX cpus
}
if (supports_sse4_2()) { // new ZX cpus
if (FLAG_IS_DEFAULT(UseUnalignedLoadStores)) {
UseUnalignedLoadStores = true; // use movdqu on newest ZX cpus
}
}
if (supports_sse4_2()) {
if (FLAG_IS_DEFAULT(UseSSE42Intrinsics)) {
FLAG_SET_DEFAULT(UseSSE42Intrinsics, true);
}
} else {
if (UseSSE42Intrinsics && !FLAG_IS_DEFAULT(UseAESIntrinsics)) {
warning("SSE4.2 intrinsics require SSE4.2 instructions or higher. Intrinsics will be disabled.");
}
FLAG_SET_DEFAULT(UseSSE42Intrinsics, false);
}
}
if (FLAG_IS_DEFAULT(AllocatePrefetchInstr) && supports_3dnow_prefetch()) {
FLAG_SET_DEFAULT(AllocatePrefetchInstr, 3);
}
}
if( is_amd() ) { // AMD cpus specific settings
if( supports_sse2() && FLAG_IS_DEFAULT(UseAddressNop) ) {
// Use it on new AMD cpus starting from Opteron.
@ -1374,6 +1439,14 @@ void VM_Version::get_processor_features() {
#endif
}
if (is_zx() && ((cpu_family() == 6) || (cpu_family() == 7)) && supports_sse4_2()) {
#ifdef COMPILER2
if (FLAG_IS_DEFAULT(UseFPUForSpilling)) {
FLAG_SET_DEFAULT(UseFPUForSpilling, true);
}
#endif
}
#ifdef _LP64
// Prefetch settings

View File

@ -1,5 +1,5 @@
/*
* Copyright (c) 1997, 2014, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 1997, 2018, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@ -305,6 +305,9 @@ protected:
enum Extended_Family {
// AMD
CPU_FAMILY_AMD_11H = 0x11,
// ZX
CPU_FAMILY_ZX_CORE_F6 = 6,
CPU_FAMILY_ZX_CORE_F7 = 7,
// Intel
CPU_FAMILY_INTEL_CORE = 6,
CPU_MODEL_NEHALEM = 0x1e,
@ -549,6 +552,16 @@ protected:
}
}
// ZX features.
if (is_zx()) {
if (_cpuid_info.ext_cpuid1_ecx.bits.lzcnt_intel != 0)
result |= CPU_LZCNT;
// for ZX, ecx.bits.misalignsse bit (bit 8) indicates support for prefetchw
if (_cpuid_info.ext_cpuid1_ecx.bits.misalignsse != 0) {
result |= CPU_3DNOW_PREFETCH;
}
}
return result;
}
@ -657,6 +670,7 @@ public:
static bool is_P6() { return cpu_family() >= 6; }
static bool is_amd() { assert_is_initialized(); return _cpuid_info.std_vendor_name_0 == 0x68747541; } // 'htuA'
static bool is_intel() { assert_is_initialized(); return _cpuid_info.std_vendor_name_0 == 0x756e6547; } // 'uneG'
static bool is_zx() { assert_is_initialized(); return (_cpuid_info.std_vendor_name_0 == 0x746e6543) || (_cpuid_info.std_vendor_name_0 == 0x68532020); } // 'tneC'||'hS '
static bool is_atom_family() { return ((cpu_family() == 0x06) && ((extended_cpu_model() == 0x36) || (extended_cpu_model() == 0x37) || (extended_cpu_model() == 0x4D))); } //Silvermont and Centerton
static bool is_knights_family() { return ((cpu_family() == 0x06) && ((extended_cpu_model() == 0x57) || (extended_cpu_model() == 0x85))); } // Xeon Phi 3200/5200/7200 and Future Xeon Phi
@ -680,6 +694,15 @@ public:
}
} else if (is_amd()) {
result = (_cpuid_info.ext_cpuid8_ecx.bits.cores_per_cpu + 1);
} else if (is_zx()) {
bool supports_topology = supports_processor_topology();
if (supports_topology) {
result = _cpuid_info.tpl_cpuidB1_ebx.bits.logical_cpus /
_cpuid_info.tpl_cpuidB0_ebx.bits.logical_cpus;
}
if (!supports_topology || result == 0) {
result = (_cpuid_info.dcp_cpuid4_eax.bits.cores_per_cpu + 1);
}
}
return result;
}
@ -688,6 +711,8 @@ public:
uint result = 1;
if (is_intel() && supports_processor_topology()) {
result = _cpuid_info.tpl_cpuidB0_ebx.bits.logical_cpus;
} else if (is_zx() && supports_processor_topology()) {
result = _cpuid_info.tpl_cpuidB0_ebx.bits.logical_cpus;
} else if (_cpuid_info.std_cpuid1_edx.bits.ht != 0) {
if (cpu_family() >= 0x17) {
result = _cpuid_info.ext_cpuid1E_ebx.bits.threads_per_core + 1;
@ -705,6 +730,8 @@ public:
result = (_cpuid_info.dcp_cpuid4_ebx.bits.L1_line_size + 1);
} else if (is_amd()) {
result = _cpuid_info.ext_cpuid5_ecx.bits.L1_line_size;
} else if (is_zx()) {
result = (_cpuid_info.dcp_cpuid4_ebx.bits.L1_line_size + 1);
}
if (result < 32) // not defined ?
result = 32; // 32 bytes by default on x86 and other x64