8235510: java.util.zip.CRC32 performance drop after 8200067
Backout 8200067 optimization Reviewed-by: kvn
This commit is contained in:
parent
e7fa637179
commit
6a547f9574
@ -7232,7 +7232,7 @@ void Assembler::vpclmulqdq(XMMRegister dst, XMMRegister nds, XMMRegister src, in
|
||||
}
|
||||
|
||||
void Assembler::evpclmulqdq(XMMRegister dst, XMMRegister nds, XMMRegister src, int mask, int vector_len) {
|
||||
assert(VM_Version::supports_vpclmulqdq(), "Requires vector carryless multiplication support");
|
||||
assert(VM_Version::supports_avx512_vpclmulqdq(), "Requires vector carryless multiplication support");
|
||||
InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
|
||||
attributes.set_is_evex_instruction();
|
||||
int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
|
||||
|
@ -8945,34 +8945,6 @@ void MacroAssembler::kernel_crc32(Register crc, Register buf, Register len, Regi
|
||||
shrl(len, 4);
|
||||
jcc(Assembler::zero, L_tail_restore);
|
||||
|
||||
// Fold total 512 bits of polynomial on each iteration
|
||||
if (VM_Version::supports_vpclmulqdq()) {
|
||||
Label Parallel_loop, L_No_Parallel;
|
||||
|
||||
cmpl(len, 8);
|
||||
jccb(Assembler::less, L_No_Parallel);
|
||||
|
||||
movdqu(xmm0, ExternalAddress(StubRoutines::x86::crc_by128_masks_addr() + 32));
|
||||
evmovdquq(xmm1, Address(buf, 0), Assembler::AVX_512bit);
|
||||
movdl(xmm5, crc);
|
||||
evpxorq(xmm1, xmm1, xmm5, Assembler::AVX_512bit);
|
||||
addptr(buf, 64);
|
||||
subl(len, 7);
|
||||
evshufi64x2(xmm0, xmm0, xmm0, 0x00, Assembler::AVX_512bit); //propagate the mask from 128 bits to 512 bits
|
||||
|
||||
BIND(Parallel_loop);
|
||||
fold_128bit_crc32_avx512(xmm1, xmm0, xmm5, buf, 0);
|
||||
addptr(buf, 64);
|
||||
subl(len, 4);
|
||||
jcc(Assembler::greater, Parallel_loop);
|
||||
|
||||
vextracti64x2(xmm2, xmm1, 0x01);
|
||||
vextracti64x2(xmm3, xmm1, 0x02);
|
||||
vextracti64x2(xmm4, xmm1, 0x03);
|
||||
jmp(L_fold_512b);
|
||||
|
||||
BIND(L_No_Parallel);
|
||||
}
|
||||
// Fold crc into first bytes of vector
|
||||
movdqa(xmm1, Address(buf, 0));
|
||||
movdl(rax, xmm1);
|
||||
|
@ -691,7 +691,7 @@ void VM_Version::get_processor_features() {
|
||||
_features &= ~CPU_AVX512BW;
|
||||
_features &= ~CPU_AVX512VL;
|
||||
_features &= ~CPU_AVX512_VPOPCNTDQ;
|
||||
_features &= ~CPU_VPCLMULQDQ;
|
||||
_features &= ~CPU_AVX512_VPCLMULQDQ;
|
||||
_features &= ~CPU_VAES;
|
||||
}
|
||||
|
||||
|
@ -245,7 +245,7 @@ class VM_Version : public Abstract_VM_Version {
|
||||
: 1,
|
||||
gfni : 1,
|
||||
vaes : 1,
|
||||
vpclmulqdq : 1,
|
||||
avx512_vpclmulqdq : 1,
|
||||
avx512_vnni : 1,
|
||||
avx512_bitalg : 1,
|
||||
: 1,
|
||||
@ -338,7 +338,7 @@ protected:
|
||||
#define CPU_FMA ((uint64_t)UCONST64(0x800000000)) // FMA instructions
|
||||
#define CPU_VZEROUPPER ((uint64_t)UCONST64(0x1000000000)) // Vzeroupper instruction
|
||||
#define CPU_AVX512_VPOPCNTDQ ((uint64_t)UCONST64(0x2000000000)) // Vector popcount
|
||||
#define CPU_VPCLMULQDQ ((uint64_t)UCONST64(0x4000000000)) //Vector carryless multiplication
|
||||
#define CPU_AVX512_VPCLMULQDQ ((uint64_t)UCONST64(0x4000000000)) //Vector carryless multiplication
|
||||
#define CPU_VAES ((uint64_t)UCONST64(0x8000000000)) // Vector AES instructions
|
||||
#define CPU_VNNI ((uint64_t)UCONST64(0x10000000000)) // Vector Neural Network Instructions
|
||||
|
||||
@ -561,8 +561,8 @@ enum Extended_Family {
|
||||
result |= CPU_AVX512VL;
|
||||
if (_cpuid_info.sef_cpuid7_ecx.bits.avx512_vpopcntdq != 0)
|
||||
result |= CPU_AVX512_VPOPCNTDQ;
|
||||
if (_cpuid_info.sef_cpuid7_ecx.bits.vpclmulqdq != 0)
|
||||
result |= CPU_VPCLMULQDQ;
|
||||
if (_cpuid_info.sef_cpuid7_ecx.bits.avx512_vpclmulqdq != 0)
|
||||
result |= CPU_AVX512_VPCLMULQDQ;
|
||||
if (_cpuid_info.sef_cpuid7_ecx.bits.vaes != 0)
|
||||
result |= CPU_VAES;
|
||||
if (_cpuid_info.sef_cpuid7_ecx.bits.avx512_vnni != 0)
|
||||
@ -855,7 +855,7 @@ public:
|
||||
static bool supports_fma() { return (_features & CPU_FMA) != 0 && supports_avx(); }
|
||||
static bool supports_vzeroupper() { return (_features & CPU_VZEROUPPER) != 0; }
|
||||
static bool supports_vpopcntdq() { return (_features & CPU_AVX512_VPOPCNTDQ) != 0; }
|
||||
static bool supports_vpclmulqdq() { return (_features & CPU_VPCLMULQDQ) != 0; }
|
||||
static bool supports_avx512_vpclmulqdq() { return (_features & CPU_AVX512_VPCLMULQDQ) != 0; }
|
||||
static bool supports_vaes() { return (_features & CPU_VAES) != 0; }
|
||||
static bool supports_vnni() { return (_features & CPU_VNNI) != 0; }
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user