From 9c8adb87183abcc54fb33e3b0049714ddbfb5755 Mon Sep 17 00:00:00 2001 From: Vic Wang Date: Thu, 4 Jan 2018 22:54:40 -0500 Subject: [PATCH] 8194279: support zhaoxin x86 cpu vendor ids CentaurHauls and Shanghai Reviewed-by: dholmes, kvn --- src/hotspot/cpu/x86/assembler_x86.cpp | 85 +++++++++++++++++++++++++- src/hotspot/cpu/x86/vm_version_x86.cpp | 75 ++++++++++++++++++++++- src/hotspot/cpu/x86/vm_version_x86.hpp | 29 ++++++++- 3 files changed, 186 insertions(+), 3 deletions(-) diff --git a/src/hotspot/cpu/x86/assembler_x86.cpp b/src/hotspot/cpu/x86/assembler_x86.cpp index b13a911398b..a9ca31ee7ef 100644 --- a/src/hotspot/cpu/x86/assembler_x86.cpp +++ b/src/hotspot/cpu/x86/assembler_x86.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 1997, 2016, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 1997, 2018, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -3167,6 +3167,89 @@ void Assembler::nop(int i) { return; } + if (UseAddressNop && VM_Version::is_zx()) { + // + // Using multi-bytes nops "0x0F 0x1F [address]" for ZX + // 1: 0x90 + // 2: 0x66 0x90 + // 3: 0x66 0x66 0x90 (don't use "0x0F 0x1F 0x00" - need patching safe padding) + // 4: 0x0F 0x1F 0x40 0x00 + // 5: 0x0F 0x1F 0x44 0x00 0x00 + // 6: 0x66 0x0F 0x1F 0x44 0x00 0x00 + // 7: 0x0F 0x1F 0x80 0x00 0x00 0x00 0x00 + // 8: 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00 + // 9: 0x66 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00 + // 10: 0x66 0x66 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00 + // 11: 0x66 0x66 0x66 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00 + + // The rest coding is ZX specific - don't use consecutive address nops + + // 12: 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00 0x66 0x66 0x66 0x90 + // 13: 0x66 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00 0x66 0x66 0x66 0x90 + // 14: 0x66 0x66 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00 0x66 0x66 0x66 0x90 + // 15: 0x66 0x66 0x66 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00 0x66 0x66 0x66 0x90 + + while (i >= 15) { + // For ZX don't generate consecutive addess nops (mix with regular nops) + i -= 15; + emit_int8(0x66); // size prefix + emit_int8(0x66); // size prefix + emit_int8(0x66); // size prefix + addr_nop_8(); + emit_int8(0x66); // size prefix + emit_int8(0x66); // size prefix + emit_int8(0x66); // size prefix + emit_int8((unsigned char)0x90); + // nop + } + switch (i) { + case 14: + emit_int8(0x66); // size prefix + case 13: + emit_int8(0x66); // size prefix + case 12: + addr_nop_8(); + emit_int8(0x66); // size prefix + emit_int8(0x66); // size prefix + emit_int8(0x66); // size prefix + emit_int8((unsigned char)0x90); + // nop + break; + case 11: + emit_int8(0x66); // size prefix + case 10: + emit_int8(0x66); // size prefix + case 9: + emit_int8(0x66); // size prefix + case 8: + addr_nop_8(); + break; + case 7: + addr_nop_7(); + break; + case 6: + emit_int8(0x66); // size prefix + case 5: + addr_nop_5(); + break; + case 4: + addr_nop_4(); + break; + case 3: + // Don't use "0x0F 0x1F 0x00" - need patching safe padding + emit_int8(0x66); // size prefix + case 2: + emit_int8(0x66); // size prefix + case 1: + emit_int8((unsigned char)0x90); + // nop + break; + default: + assert(i == 0, " "); + } + return; + } + // Using nops with size prefixes "0x66 0x90". // From AMD Optimization Guide: // 1: 0x90 diff --git a/src/hotspot/cpu/x86/vm_version_x86.cpp b/src/hotspot/cpu/x86/vm_version_x86.cpp index 563ff1f111b..dcb90772aae 100644 --- a/src/hotspot/cpu/x86/vm_version_x86.cpp +++ b/src/hotspot/cpu/x86/vm_version_x86.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 1997, 2017, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 1997, 2018, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -628,6 +628,11 @@ void VM_Version::get_processor_features() { if (UseSSE < 1) _features &= ~CPU_SSE; + //since AVX instructions is slower than SSE in some ZX cpus, force USEAVX=0. + if (is_zx() && ((cpu_family() == 6) || (cpu_family() == 7))) { + UseAVX = 0; + } + // first try initial setting and detect what we can support int use_avx_limit = 0; if (UseAVX > 0) { @@ -1078,6 +1083,66 @@ void VM_Version::get_processor_features() { // UseXmmRegToRegMoveAll == true --> movaps(xmm, xmm), movapd(xmm, xmm). // UseXmmRegToRegMoveAll == false --> movss(xmm, xmm), movsd(xmm, xmm). + + if (is_zx()) { // ZX cpus specific settings + if (FLAG_IS_DEFAULT(UseStoreImmI16)) { + UseStoreImmI16 = false; // don't use it on ZX cpus + } + if ((cpu_family() == 6) || (cpu_family() == 7)) { + if (FLAG_IS_DEFAULT(UseAddressNop)) { + // Use it on all ZX cpus + UseAddressNop = true; + } + } + if (FLAG_IS_DEFAULT(UseXmmLoadAndClearUpper)) { + UseXmmLoadAndClearUpper = true; // use movsd on all ZX cpus + } + if (FLAG_IS_DEFAULT(UseXmmRegToRegMoveAll)) { + if (supports_sse3()) { + UseXmmRegToRegMoveAll = true; // use movaps, movapd on new ZX cpus + } else { + UseXmmRegToRegMoveAll = false; + } + } + if (((cpu_family() == 6) || (cpu_family() == 7)) && supports_sse3()) { // new ZX cpus +#ifdef COMPILER2 + if (FLAG_IS_DEFAULT(MaxLoopPad)) { + // For new ZX cpus do the next optimization: + // don't align the beginning of a loop if there are enough instructions + // left (NumberOfLoopInstrToAlign defined in c2_globals.hpp) + // in current fetch line (OptoLoopAlignment) or the padding + // is big (> MaxLoopPad). + // Set MaxLoopPad to 11 for new ZX cpus to reduce number of + // generated NOP instructions. 11 is the largest size of one + // address NOP instruction '0F 1F' (see Assembler::nop(i)). + MaxLoopPad = 11; + } +#endif // COMPILER2 + if (FLAG_IS_DEFAULT(UseXMMForArrayCopy)) { + UseXMMForArrayCopy = true; // use SSE2 movq on new ZX cpus + } + if (supports_sse4_2()) { // new ZX cpus + if (FLAG_IS_DEFAULT(UseUnalignedLoadStores)) { + UseUnalignedLoadStores = true; // use movdqu on newest ZX cpus + } + } + if (supports_sse4_2()) { + if (FLAG_IS_DEFAULT(UseSSE42Intrinsics)) { + FLAG_SET_DEFAULT(UseSSE42Intrinsics, true); + } + } else { + if (UseSSE42Intrinsics && !FLAG_IS_DEFAULT(UseAESIntrinsics)) { + warning("SSE4.2 intrinsics require SSE4.2 instructions or higher. Intrinsics will be disabled."); + } + FLAG_SET_DEFAULT(UseSSE42Intrinsics, false); + } + } + + if (FLAG_IS_DEFAULT(AllocatePrefetchInstr) && supports_3dnow_prefetch()) { + FLAG_SET_DEFAULT(AllocatePrefetchInstr, 3); + } + } + if( is_amd() ) { // AMD cpus specific settings if( supports_sse2() && FLAG_IS_DEFAULT(UseAddressNop) ) { // Use it on new AMD cpus starting from Opteron. @@ -1374,6 +1439,14 @@ void VM_Version::get_processor_features() { #endif } + if (is_zx() && ((cpu_family() == 6) || (cpu_family() == 7)) && supports_sse4_2()) { +#ifdef COMPILER2 + if (FLAG_IS_DEFAULT(UseFPUForSpilling)) { + FLAG_SET_DEFAULT(UseFPUForSpilling, true); + } +#endif + } + #ifdef _LP64 // Prefetch settings diff --git a/src/hotspot/cpu/x86/vm_version_x86.hpp b/src/hotspot/cpu/x86/vm_version_x86.hpp index 0a3b53a5271..4eb4d65c72a 100644 --- a/src/hotspot/cpu/x86/vm_version_x86.hpp +++ b/src/hotspot/cpu/x86/vm_version_x86.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 1997, 2014, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 1997, 2018, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -305,6 +305,9 @@ protected: enum Extended_Family { // AMD CPU_FAMILY_AMD_11H = 0x11, + // ZX + CPU_FAMILY_ZX_CORE_F6 = 6, + CPU_FAMILY_ZX_CORE_F7 = 7, // Intel CPU_FAMILY_INTEL_CORE = 6, CPU_MODEL_NEHALEM = 0x1e, @@ -549,6 +552,16 @@ protected: } } + // ZX features. + if (is_zx()) { + if (_cpuid_info.ext_cpuid1_ecx.bits.lzcnt_intel != 0) + result |= CPU_LZCNT; + // for ZX, ecx.bits.misalignsse bit (bit 8) indicates support for prefetchw + if (_cpuid_info.ext_cpuid1_ecx.bits.misalignsse != 0) { + result |= CPU_3DNOW_PREFETCH; + } + } + return result; } @@ -657,6 +670,7 @@ public: static bool is_P6() { return cpu_family() >= 6; } static bool is_amd() { assert_is_initialized(); return _cpuid_info.std_vendor_name_0 == 0x68747541; } // 'htuA' static bool is_intel() { assert_is_initialized(); return _cpuid_info.std_vendor_name_0 == 0x756e6547; } // 'uneG' + static bool is_zx() { assert_is_initialized(); return (_cpuid_info.std_vendor_name_0 == 0x746e6543) || (_cpuid_info.std_vendor_name_0 == 0x68532020); } // 'tneC'||'hS ' static bool is_atom_family() { return ((cpu_family() == 0x06) && ((extended_cpu_model() == 0x36) || (extended_cpu_model() == 0x37) || (extended_cpu_model() == 0x4D))); } //Silvermont and Centerton static bool is_knights_family() { return ((cpu_family() == 0x06) && ((extended_cpu_model() == 0x57) || (extended_cpu_model() == 0x85))); } // Xeon Phi 3200/5200/7200 and Future Xeon Phi @@ -680,6 +694,15 @@ public: } } else if (is_amd()) { result = (_cpuid_info.ext_cpuid8_ecx.bits.cores_per_cpu + 1); + } else if (is_zx()) { + bool supports_topology = supports_processor_topology(); + if (supports_topology) { + result = _cpuid_info.tpl_cpuidB1_ebx.bits.logical_cpus / + _cpuid_info.tpl_cpuidB0_ebx.bits.logical_cpus; + } + if (!supports_topology || result == 0) { + result = (_cpuid_info.dcp_cpuid4_eax.bits.cores_per_cpu + 1); + } } return result; } @@ -688,6 +711,8 @@ public: uint result = 1; if (is_intel() && supports_processor_topology()) { result = _cpuid_info.tpl_cpuidB0_ebx.bits.logical_cpus; + } else if (is_zx() && supports_processor_topology()) { + result = _cpuid_info.tpl_cpuidB0_ebx.bits.logical_cpus; } else if (_cpuid_info.std_cpuid1_edx.bits.ht != 0) { if (cpu_family() >= 0x17) { result = _cpuid_info.ext_cpuid1E_ebx.bits.threads_per_core + 1; @@ -705,6 +730,8 @@ public: result = (_cpuid_info.dcp_cpuid4_ebx.bits.L1_line_size + 1); } else if (is_amd()) { result = _cpuid_info.ext_cpuid5_ecx.bits.L1_line_size; + } else if (is_zx()) { + result = (_cpuid_info.dcp_cpuid4_ebx.bits.L1_line_size + 1); } if (result < 32) // not defined ? result = 32; // 32 bytes by default on x86 and other x64