8235510: java.util.zip.CRC32 performance drop after 8200067

Backout 8200067 optimization

Reviewed-by: kvn
This commit is contained in:
Sandhya Viswanathan 2019-12-06 16:03:44 -08:00
parent e7fa637179
commit 6a547f9574
4 changed files with 7 additions and 35 deletions

View File

@ -7232,7 +7232,7 @@ void Assembler::vpclmulqdq(XMMRegister dst, XMMRegister nds, XMMRegister src, in
}
void Assembler::evpclmulqdq(XMMRegister dst, XMMRegister nds, XMMRegister src, int mask, int vector_len) {
assert(VM_Version::supports_vpclmulqdq(), "Requires vector carryless multiplication support");
assert(VM_Version::supports_avx512_vpclmulqdq(), "Requires vector carryless multiplication support");
InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
attributes.set_is_evex_instruction();
int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);

View File

@ -8945,34 +8945,6 @@ void MacroAssembler::kernel_crc32(Register crc, Register buf, Register len, Regi
shrl(len, 4);
jcc(Assembler::zero, L_tail_restore);
// Fold total 512 bits of polynomial on each iteration
if (VM_Version::supports_vpclmulqdq()) {
Label Parallel_loop, L_No_Parallel;
cmpl(len, 8);
jccb(Assembler::less, L_No_Parallel);
movdqu(xmm0, ExternalAddress(StubRoutines::x86::crc_by128_masks_addr() + 32));
evmovdquq(xmm1, Address(buf, 0), Assembler::AVX_512bit);
movdl(xmm5, crc);
evpxorq(xmm1, xmm1, xmm5, Assembler::AVX_512bit);
addptr(buf, 64);
subl(len, 7);
evshufi64x2(xmm0, xmm0, xmm0, 0x00, Assembler::AVX_512bit); //propagate the mask from 128 bits to 512 bits
BIND(Parallel_loop);
fold_128bit_crc32_avx512(xmm1, xmm0, xmm5, buf, 0);
addptr(buf, 64);
subl(len, 4);
jcc(Assembler::greater, Parallel_loop);
vextracti64x2(xmm2, xmm1, 0x01);
vextracti64x2(xmm3, xmm1, 0x02);
vextracti64x2(xmm4, xmm1, 0x03);
jmp(L_fold_512b);
BIND(L_No_Parallel);
}
// Fold crc into first bytes of vector
movdqa(xmm1, Address(buf, 0));
movdl(rax, xmm1);

View File

@ -691,7 +691,7 @@ void VM_Version::get_processor_features() {
_features &= ~CPU_AVX512BW;
_features &= ~CPU_AVX512VL;
_features &= ~CPU_AVX512_VPOPCNTDQ;
_features &= ~CPU_VPCLMULQDQ;
_features &= ~CPU_AVX512_VPCLMULQDQ;
_features &= ~CPU_VAES;
}

View File

@ -245,7 +245,7 @@ class VM_Version : public Abstract_VM_Version {
: 1,
gfni : 1,
vaes : 1,
vpclmulqdq : 1,
avx512_vpclmulqdq : 1,
avx512_vnni : 1,
avx512_bitalg : 1,
: 1,
@ -338,7 +338,7 @@ protected:
#define CPU_FMA ((uint64_t)UCONST64(0x800000000)) // FMA instructions
#define CPU_VZEROUPPER ((uint64_t)UCONST64(0x1000000000)) // Vzeroupper instruction
#define CPU_AVX512_VPOPCNTDQ ((uint64_t)UCONST64(0x2000000000)) // Vector popcount
#define CPU_VPCLMULQDQ ((uint64_t)UCONST64(0x4000000000)) //Vector carryless multiplication
#define CPU_AVX512_VPCLMULQDQ ((uint64_t)UCONST64(0x4000000000)) //Vector carryless multiplication
#define CPU_VAES ((uint64_t)UCONST64(0x8000000000)) // Vector AES instructions
#define CPU_VNNI ((uint64_t)UCONST64(0x10000000000)) // Vector Neural Network Instructions
@ -561,8 +561,8 @@ enum Extended_Family {
result |= CPU_AVX512VL;
if (_cpuid_info.sef_cpuid7_ecx.bits.avx512_vpopcntdq != 0)
result |= CPU_AVX512_VPOPCNTDQ;
if (_cpuid_info.sef_cpuid7_ecx.bits.vpclmulqdq != 0)
result |= CPU_VPCLMULQDQ;
if (_cpuid_info.sef_cpuid7_ecx.bits.avx512_vpclmulqdq != 0)
result |= CPU_AVX512_VPCLMULQDQ;
if (_cpuid_info.sef_cpuid7_ecx.bits.vaes != 0)
result |= CPU_VAES;
if (_cpuid_info.sef_cpuid7_ecx.bits.avx512_vnni != 0)
@ -855,7 +855,7 @@ public:
static bool supports_fma() { return (_features & CPU_FMA) != 0 && supports_avx(); }
static bool supports_vzeroupper() { return (_features & CPU_VZEROUPPER) != 0; }
static bool supports_vpopcntdq() { return (_features & CPU_AVX512_VPOPCNTDQ) != 0; }
static bool supports_vpclmulqdq() { return (_features & CPU_VPCLMULQDQ) != 0; }
static bool supports_avx512_vpclmulqdq() { return (_features & CPU_AVX512_VPCLMULQDQ) != 0; }
static bool supports_vaes() { return (_features & CPU_VAES) != 0; }
static bool supports_vnni() { return (_features & CPU_VNNI) != 0; }