8266332: Adler32 intrinsic for x86 64-bit platforms
Co-authored-by: Xubo Zhang <xubo.zhang@intel.com> Co-authored-by: Greg B Tucker <greg.b.tucker@intel.com> Co-authored-by: Pengfei Li <pli@openjdk.org> Reviewed-by: sviswanathan, jbhateja, kvn, neliasso
This commit is contained in:
parent
b961f2535c
commit
8e3549fc73
@ -8030,6 +8030,18 @@ void Assembler::vbroadcastsd(XMMRegister dst, Address src, int vector_len) {
|
|||||||
emit_operand(dst, src);
|
emit_operand(dst, src);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void Assembler::vbroadcastf128(XMMRegister dst, Address src, int vector_len) {
|
||||||
|
assert(VM_Version::supports_avx(), "");
|
||||||
|
assert(vector_len == AVX_256bit, "");
|
||||||
|
assert(dst != xnoreg, "sanity");
|
||||||
|
InstructionMark im(this);
|
||||||
|
InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
|
||||||
|
attributes.set_address_attributes(/* tuple_type */ EVEX_T4, /* input_size_in_bits */ EVEX_32bit);
|
||||||
|
// swap src<->dst for encoding
|
||||||
|
vex_prefix(src, 0, dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
|
||||||
|
emit_int8(0x1A);
|
||||||
|
emit_operand(dst, src);
|
||||||
|
}
|
||||||
|
|
||||||
// gpr source broadcast forms
|
// gpr source broadcast forms
|
||||||
|
|
||||||
|
@ -2442,11 +2442,12 @@ private:
|
|||||||
void evbroadcasti64x2(XMMRegister dst, XMMRegister src, int vector_len);
|
void evbroadcasti64x2(XMMRegister dst, XMMRegister src, int vector_len);
|
||||||
void evbroadcasti64x2(XMMRegister dst, Address src, int vector_len);
|
void evbroadcasti64x2(XMMRegister dst, Address src, int vector_len);
|
||||||
|
|
||||||
// scalar single/double precision replicate
|
// scalar single/double/128bit precision replicate
|
||||||
void vbroadcastss(XMMRegister dst, XMMRegister src, int vector_len);
|
void vbroadcastss(XMMRegister dst, XMMRegister src, int vector_len);
|
||||||
void vbroadcastss(XMMRegister dst, Address src, int vector_len);
|
void vbroadcastss(XMMRegister dst, Address src, int vector_len);
|
||||||
void vbroadcastsd(XMMRegister dst, XMMRegister src, int vector_len);
|
void vbroadcastsd(XMMRegister dst, XMMRegister src, int vector_len);
|
||||||
void vbroadcastsd(XMMRegister dst, Address src, int vector_len);
|
void vbroadcastsd(XMMRegister dst, Address src, int vector_len);
|
||||||
|
void vbroadcastf128(XMMRegister dst, Address src, int vector_len);
|
||||||
|
|
||||||
// gpr sourced byte/word/dword/qword replicate
|
// gpr sourced byte/word/dword/qword replicate
|
||||||
void evpbroadcastb(XMMRegister dst, Register src, int vector_len);
|
void evpbroadcastb(XMMRegister dst, Register src, int vector_len);
|
||||||
|
@ -3231,6 +3231,16 @@ void MacroAssembler::vpmullw(XMMRegister dst, XMMRegister nds, Address src, int
|
|||||||
Assembler::vpmullw(dst, nds, src, vector_len);
|
Assembler::vpmullw(dst, nds, src, vector_len);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void MacroAssembler::vpmulld(XMMRegister dst, XMMRegister nds, AddressLiteral src, int vector_len, Register scratch_reg) {
|
||||||
|
assert((UseAVX > 0), "AVX support is needed");
|
||||||
|
if (reachable(src)) {
|
||||||
|
Assembler::vpmulld(dst, nds, as_Address(src), vector_len);
|
||||||
|
} else {
|
||||||
|
lea(scratch_reg, src);
|
||||||
|
Assembler::vpmulld(dst, nds, Address(scratch_reg, 0), vector_len);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
void MacroAssembler::vpsubb(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
|
void MacroAssembler::vpsubb(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
|
||||||
assert(((dst->encoding() < 16 && src->encoding() < 16 && nds->encoding() < 16) || VM_Version::supports_avx512vlbw()),"XMM register should be 0-15");
|
assert(((dst->encoding() < 16 && src->encoding() < 16 && nds->encoding() < 16) || VM_Version::supports_avx512vlbw()),"XMM register should be 0-15");
|
||||||
Assembler::vpsubb(dst, nds, src, vector_len);
|
Assembler::vpsubb(dst, nds, src, vector_len);
|
||||||
|
@ -1307,6 +1307,13 @@ public:
|
|||||||
|
|
||||||
void vpmullw(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
|
void vpmullw(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
|
||||||
void vpmullw(XMMRegister dst, XMMRegister nds, Address src, int vector_len);
|
void vpmullw(XMMRegister dst, XMMRegister nds, Address src, int vector_len);
|
||||||
|
void vpmulld(XMMRegister dst, XMMRegister nds, Address src, int vector_len) {
|
||||||
|
Assembler::vpmulld(dst, nds, src, vector_len);
|
||||||
|
};
|
||||||
|
void vpmulld(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
|
||||||
|
Assembler::vpmulld(dst, nds, src, vector_len);
|
||||||
|
}
|
||||||
|
void vpmulld(XMMRegister dst, XMMRegister nds, AddressLiteral src, int vector_len, Register scratch_reg);
|
||||||
|
|
||||||
void vpsubb(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
|
void vpsubb(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
|
||||||
void vpsubb(XMMRegister dst, XMMRegister nds, Address src, int vector_len);
|
void vpsubb(XMMRegister dst, XMMRegister nds, Address src, int vector_len);
|
||||||
@ -1764,6 +1771,7 @@ public:
|
|||||||
void kernel_crc32_avx512_256B(Register crc, Register buf, Register len, Register key, Register pos,
|
void kernel_crc32_avx512_256B(Register crc, Register buf, Register len, Register key, Register pos,
|
||||||
Register tmp1, Register tmp2, Label& L_barrett, Label& L_16B_reduction_loop,
|
Register tmp1, Register tmp2, Label& L_barrett, Label& L_16B_reduction_loop,
|
||||||
Label& L_get_last_two_xmms, Label& L_128_done, Label& L_cleanup);
|
Label& L_get_last_two_xmms, Label& L_128_done, Label& L_cleanup);
|
||||||
|
void updateBytesAdler32(Register adler32, Register buf, Register length, XMMRegister shuf0, XMMRegister shuf1, ExternalAddress scale);
|
||||||
#endif // _LP64
|
#endif // _LP64
|
||||||
|
|
||||||
// CRC32C code for java.util.zip.CRC32C::updateBytes() intrinsic
|
// CRC32C code for java.util.zip.CRC32C::updateBytes() intrinsic
|
||||||
|
211
src/hotspot/cpu/x86/macroAssembler_x86_adler.cpp
Normal file
211
src/hotspot/cpu/x86/macroAssembler_x86_adler.cpp
Normal file
@ -0,0 +1,211 @@
|
|||||||
|
/*
|
||||||
|
* Copyright (c) 2021, Intel Corporation.
|
||||||
|
*
|
||||||
|
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
||||||
|
*
|
||||||
|
* This code is free software; you can redistribute it and/or modify it
|
||||||
|
* under the terms of the GNU General Public License version 2 only, as
|
||||||
|
* published by the Free Software Foundation.
|
||||||
|
*
|
||||||
|
* This code is distributed in the hope that it will be useful, but WITHOUT
|
||||||
|
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
||||||
|
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
|
||||||
|
* version 2 for more details (a copy is included in the LICENSE file that
|
||||||
|
* accompanied this code).
|
||||||
|
*
|
||||||
|
* You should have received a copy of the GNU General Public License version
|
||||||
|
* 2 along with this work; if not, write to the Free Software Foundation,
|
||||||
|
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||||
|
*
|
||||||
|
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
|
||||||
|
* or visit www.oracle.com if you need additional information or have any
|
||||||
|
* questions.
|
||||||
|
*
|
||||||
|
*/
|
||||||
|
|
||||||
|
#include "precompiled.hpp"
|
||||||
|
#include "asm/assembler.hpp"
|
||||||
|
#include "asm/assembler.inline.hpp"
|
||||||
|
#include "runtime/stubRoutines.hpp"
|
||||||
|
#include "macroAssembler_x86.hpp"
|
||||||
|
|
||||||
|
#ifdef _LP64
|
||||||
|
void MacroAssembler::updateBytesAdler32(Register init_d, Register data, Register size, XMMRegister yshuf0, XMMRegister yshuf1, ExternalAddress ascaletab)
|
||||||
|
{
|
||||||
|
const int LIMIT = 5552;
|
||||||
|
const int BASE = 65521;
|
||||||
|
const int CHUNKSIZE = 16;
|
||||||
|
const int CHUNKSIZE_M1 = CHUNKSIZE - 1;
|
||||||
|
|
||||||
|
const Register s = r11;
|
||||||
|
const Register a_d = r12; //r12d
|
||||||
|
const Register b_d = r8; //r8d
|
||||||
|
const Register end = r13;
|
||||||
|
|
||||||
|
const XMMRegister ya = xmm0;
|
||||||
|
const XMMRegister yb = xmm1;
|
||||||
|
const XMMRegister ydata0 = xmm2;
|
||||||
|
const XMMRegister ydata1 = xmm3;
|
||||||
|
const XMMRegister ysa = xmm4;
|
||||||
|
const XMMRegister ydata = ysa;
|
||||||
|
const XMMRegister ytmp0 = ydata0;
|
||||||
|
const XMMRegister ytmp1 = ydata1;
|
||||||
|
const XMMRegister ytmp2 = xmm5;
|
||||||
|
const XMMRegister xa = xmm0;
|
||||||
|
const XMMRegister xb = xmm1;
|
||||||
|
const XMMRegister xtmp0 = xmm2;
|
||||||
|
const XMMRegister xtmp1 = xmm3;
|
||||||
|
const XMMRegister xsa = xmm4;
|
||||||
|
const XMMRegister xtmp2 = xmm5;
|
||||||
|
assert_different_registers(init_d, data, size, s, a_d, b_d, end, rax);
|
||||||
|
|
||||||
|
Label SLOOP1, SLOOP1A, SKIP_LOOP_1A, FINISH, LT64, DO_FINAL, FINAL_LOOP, ZERO_SIZE, END;
|
||||||
|
|
||||||
|
push(r12);
|
||||||
|
push(r13);
|
||||||
|
push(r14);
|
||||||
|
movl(b_d, init_d); //adler
|
||||||
|
shrl(b_d, 16);
|
||||||
|
andl(init_d, 0xFFFF);
|
||||||
|
cmpl(size, 32);
|
||||||
|
jcc(Assembler::below, LT64);
|
||||||
|
movdl(xa, init_d); //vmovd - 32bit
|
||||||
|
vpxor(yb, yb, yb, Assembler::AVX_256bit);
|
||||||
|
|
||||||
|
bind(SLOOP1);
|
||||||
|
movl(s, LIMIT);
|
||||||
|
cmpl(s, size);
|
||||||
|
cmovl(Assembler::above, s, size); // s = min(size, LIMIT)
|
||||||
|
lea(end, Address(s, data, Address::times_1, -CHUNKSIZE_M1));
|
||||||
|
cmpptr(data, end);
|
||||||
|
jcc(Assembler::aboveEqual, SKIP_LOOP_1A);
|
||||||
|
|
||||||
|
align(32);
|
||||||
|
bind(SLOOP1A);
|
||||||
|
vbroadcastf128(ydata, Address(data, 0), Assembler::AVX_256bit);
|
||||||
|
addptr(data, CHUNKSIZE);
|
||||||
|
vpshufb(ydata0, ydata, yshuf0, Assembler::AVX_256bit);
|
||||||
|
vpaddd(ya, ya, ydata0, Assembler::AVX_256bit);
|
||||||
|
vpaddd(yb, yb, ya, Assembler::AVX_256bit);
|
||||||
|
vpshufb(ydata1, ydata, yshuf1, Assembler::AVX_256bit);
|
||||||
|
vpaddd(ya, ya, ydata1, Assembler::AVX_256bit);
|
||||||
|
vpaddd(yb, yb, ya, Assembler::AVX_256bit);
|
||||||
|
cmpptr(data, end);
|
||||||
|
jcc(Assembler::below, SLOOP1A);
|
||||||
|
|
||||||
|
bind(SKIP_LOOP_1A);
|
||||||
|
addptr(end, CHUNKSIZE_M1);
|
||||||
|
testl(s, CHUNKSIZE_M1);
|
||||||
|
jcc(Assembler::notEqual, DO_FINAL);
|
||||||
|
|
||||||
|
// either we're done, or we just did LIMIT
|
||||||
|
subl(size, s);
|
||||||
|
|
||||||
|
// reduce
|
||||||
|
vpslld(yb, yb, 3, Assembler::AVX_256bit); //b is scaled by 8
|
||||||
|
vpmulld(ysa, ya, ascaletab, Assembler::AVX_256bit, r14);
|
||||||
|
|
||||||
|
// compute horizontal sums of ya, yb, ysa
|
||||||
|
vextracti128(xtmp0, ya, 1);
|
||||||
|
vextracti128(xtmp1, yb, 1);
|
||||||
|
vextracti128(xtmp2, ysa, 1);
|
||||||
|
vpaddd(xa, xa, xtmp0, Assembler::AVX_128bit);
|
||||||
|
vpaddd(xb, xb, xtmp1, Assembler::AVX_128bit);
|
||||||
|
vpaddd(xsa, xsa, xtmp2, Assembler::AVX_128bit);
|
||||||
|
vphaddd(xa, xa, xa, Assembler::AVX_128bit);
|
||||||
|
vphaddd(xb, xb, xb, Assembler::AVX_128bit);
|
||||||
|
vphaddd(xsa, xsa, xsa, Assembler::AVX_128bit);
|
||||||
|
vphaddd(xa, xa, xa, Assembler::AVX_128bit);
|
||||||
|
vphaddd(xb, xb, xb, Assembler::AVX_128bit);
|
||||||
|
vphaddd(xsa, xsa, xsa, Assembler::AVX_128bit);
|
||||||
|
|
||||||
|
movdl(rax, xa);
|
||||||
|
xorl(rdx, rdx);
|
||||||
|
movl(rcx, BASE);
|
||||||
|
divl(rcx); // divide edx:eax by ecx, quot->eax, rem->edx
|
||||||
|
movl(a_d, rdx);
|
||||||
|
|
||||||
|
vpsubd(xb, xb, xsa, Assembler::AVX_128bit);
|
||||||
|
movdl(rax, xb);
|
||||||
|
addl(rax, b_d);
|
||||||
|
xorl(rdx, rdx);
|
||||||
|
movl(rcx, BASE);
|
||||||
|
divl(rcx); // divide edx:eax by ecx, quot->eax, rem->edx
|
||||||
|
movl(b_d, rdx);
|
||||||
|
|
||||||
|
testl(size, size);
|
||||||
|
jcc(Assembler::zero, FINISH);
|
||||||
|
|
||||||
|
// continue loop
|
||||||
|
movdl(xa, a_d);
|
||||||
|
vpxor(yb, yb, yb, Assembler::AVX_256bit);
|
||||||
|
jmp(SLOOP1);
|
||||||
|
|
||||||
|
bind(FINISH);
|
||||||
|
movl(rax, b_d);
|
||||||
|
shll(rax, 16);
|
||||||
|
orl(rax, a_d);
|
||||||
|
jmp(END);
|
||||||
|
|
||||||
|
bind(LT64);
|
||||||
|
movl(a_d, init_d);
|
||||||
|
lea(end, Address(data, size, Address::times_1));
|
||||||
|
testl(size, size);
|
||||||
|
jcc(Assembler::notZero, FINAL_LOOP);
|
||||||
|
jmp(ZERO_SIZE);
|
||||||
|
|
||||||
|
// handle remaining 1...15 bytes
|
||||||
|
bind(DO_FINAL);
|
||||||
|
// reduce
|
||||||
|
vpslld(yb, yb, 3, Assembler::AVX_256bit); //b is scaled by 8
|
||||||
|
vpmulld(ysa, ya, ascaletab, Assembler::AVX_256bit, r14); //scaled a
|
||||||
|
|
||||||
|
vextracti128(xtmp0, ya, 1);
|
||||||
|
vextracti128(xtmp1, yb, 1);
|
||||||
|
vextracti128(xtmp2, ysa, 1);
|
||||||
|
vpaddd(xa, xa, xtmp0, Assembler::AVX_128bit);
|
||||||
|
vpaddd(xb, xb, xtmp1, Assembler::AVX_128bit);
|
||||||
|
vpaddd(xsa, xsa, xtmp2, Assembler::AVX_128bit);
|
||||||
|
vphaddd(xa, xa, xa, Assembler::AVX_128bit);
|
||||||
|
vphaddd(xb, xb, xb, Assembler::AVX_128bit);
|
||||||
|
vphaddd(xsa, xsa, xsa, Assembler::AVX_128bit);
|
||||||
|
vphaddd(xa, xa, xa, Assembler::AVX_128bit);
|
||||||
|
vphaddd(xb, xb, xb, Assembler::AVX_128bit);
|
||||||
|
vphaddd(xsa, xsa, xsa, Assembler::AVX_128bit);
|
||||||
|
vpsubd(xb, xb, xsa, Assembler::AVX_128bit);
|
||||||
|
|
||||||
|
movdl(a_d, xa);
|
||||||
|
movdl(rax, xb);
|
||||||
|
addl(b_d, rax);
|
||||||
|
|
||||||
|
align(32);
|
||||||
|
bind(FINAL_LOOP);
|
||||||
|
movzbl(rax, Address(data, 0)); //movzx eax, byte[data]
|
||||||
|
addl(a_d, rax);
|
||||||
|
addptr(data, 1);
|
||||||
|
addl(b_d, a_d);
|
||||||
|
cmpptr(data, end);
|
||||||
|
jcc(Assembler::below, FINAL_LOOP);
|
||||||
|
|
||||||
|
bind(ZERO_SIZE);
|
||||||
|
|
||||||
|
movl(rax, a_d);
|
||||||
|
xorl(rdx, rdx);
|
||||||
|
movl(rcx, BASE);
|
||||||
|
divl(rcx); // div ecx -- divide edx:eax by ecx, quot->eax, rem->edx
|
||||||
|
movl(a_d, rdx);
|
||||||
|
|
||||||
|
movl(rax, b_d);
|
||||||
|
xorl(rdx, rdx);
|
||||||
|
movl(rcx, BASE);
|
||||||
|
divl(rcx); // divide edx:eax by ecx, quot->eax, rem->edx
|
||||||
|
shll(rdx, 16);
|
||||||
|
orl(rdx, a_d);
|
||||||
|
movl(rax, rdx);
|
||||||
|
|
||||||
|
bind(END);
|
||||||
|
pop(r14);
|
||||||
|
pop(r13);
|
||||||
|
pop(r12);
|
||||||
|
}
|
||||||
|
#endif
|
@ -5790,6 +5790,47 @@ address generate_avx_ghash_processBlocks() {
|
|||||||
return start;
|
return start;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/***
|
||||||
|
* Arguments:
|
||||||
|
*
|
||||||
|
* Inputs:
|
||||||
|
* c_rarg0 - int adler
|
||||||
|
* c_rarg1 - byte* buff
|
||||||
|
* c_rarg2 - int len
|
||||||
|
*
|
||||||
|
* Output:
|
||||||
|
* rax - int adler result
|
||||||
|
*/
|
||||||
|
|
||||||
|
address generate_updateBytesAdler32() {
|
||||||
|
assert(UseAdler32Intrinsics, "need AVX2");
|
||||||
|
|
||||||
|
__ align(CodeEntryAlignment);
|
||||||
|
StubCodeMark mark(this, "StubRoutines", "updateBytesAdler32");
|
||||||
|
|
||||||
|
address start = __ pc();
|
||||||
|
|
||||||
|
const Register data = r9;
|
||||||
|
const Register size = r10;
|
||||||
|
|
||||||
|
const XMMRegister yshuf0 = xmm6;
|
||||||
|
const XMMRegister yshuf1 = xmm7;
|
||||||
|
assert_different_registers(c_rarg0, c_rarg1, c_rarg2, data, size);
|
||||||
|
|
||||||
|
BLOCK_COMMENT("Entry:");
|
||||||
|
__ enter(); // required for proper stackwalking of RuntimeStub frame
|
||||||
|
|
||||||
|
__ vmovdqu(yshuf0, ExternalAddress((address) StubRoutines::x86::_adler32_shuf0_table), r9);
|
||||||
|
__ vmovdqu(yshuf1, ExternalAddress((address) StubRoutines::x86::_adler32_shuf1_table), r9);
|
||||||
|
__ movptr(data, c_rarg1); //data
|
||||||
|
__ movl(size, c_rarg2); //length
|
||||||
|
__ updateBytesAdler32(c_rarg0, data, size, yshuf0, yshuf1, ExternalAddress((address) StubRoutines::x86::_adler32_ascale_table));
|
||||||
|
__ leave();
|
||||||
|
__ ret(0);
|
||||||
|
return start;
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Arguments:
|
* Arguments:
|
||||||
*
|
*
|
||||||
@ -6754,6 +6795,11 @@ address generate_avx_ghash_processBlocks() {
|
|||||||
StubRoutines::_crc32c_table_addr = (address)StubRoutines::x86::_crc32c_table;
|
StubRoutines::_crc32c_table_addr = (address)StubRoutines::x86::_crc32c_table;
|
||||||
StubRoutines::_updateBytesCRC32C = generate_updateBytesCRC32C(supports_clmul);
|
StubRoutines::_updateBytesCRC32C = generate_updateBytesCRC32C(supports_clmul);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (UseAdler32Intrinsics) {
|
||||||
|
StubRoutines::_updateBytesAdler32 = generate_updateBytesAdler32();
|
||||||
|
}
|
||||||
|
|
||||||
if (UseLibmIntrinsic && InlineIntrinsics) {
|
if (UseLibmIntrinsic && InlineIntrinsics) {
|
||||||
if (vmIntrinsics::is_intrinsic_available(vmIntrinsics::_dsin) ||
|
if (vmIntrinsics::is_intrinsic_available(vmIntrinsics::_dsin) ||
|
||||||
vmIntrinsics::is_intrinsic_available(vmIntrinsics::_dcos) ||
|
vmIntrinsics::is_intrinsic_available(vmIntrinsics::_dcos) ||
|
||||||
|
@ -224,6 +224,25 @@ juint StubRoutines::x86::_shuf_table_crc32_avx512[] =
|
|||||||
0x83828100UL, 0x87868584UL, 0x8b8a8988UL, 0x8f8e8d8cUL,
|
0x83828100UL, 0x87868584UL, 0x8b8a8988UL, 0x8f8e8d8cUL,
|
||||||
0x03020100UL, 0x07060504UL, 0x0b0a0908UL, 0x000e0d0cUL
|
0x03020100UL, 0x07060504UL, 0x0b0a0908UL, 0x000e0d0cUL
|
||||||
};
|
};
|
||||||
|
|
||||||
|
juint StubRoutines::x86::_adler32_ascale_table[] =
|
||||||
|
{
|
||||||
|
0x00000000UL, 0x00000001UL, 0x00000002UL, 0x00000003UL,
|
||||||
|
0x00000004UL, 0x00000005UL, 0x00000006UL, 0x00000007UL
|
||||||
|
};
|
||||||
|
|
||||||
|
juint StubRoutines::x86::_adler32_shuf0_table[] =
|
||||||
|
{
|
||||||
|
0xFFFFFF00UL, 0xFFFFFF01UL, 0xFFFFFF02UL, 0xFFFFFF03UL,
|
||||||
|
0xFFFFFF04UL, 0xFFFFFF05UL, 0xFFFFFF06UL, 0xFFFFFF07UL
|
||||||
|
};
|
||||||
|
|
||||||
|
juint StubRoutines::x86::_adler32_shuf1_table[] =
|
||||||
|
{
|
||||||
|
0xFFFFFF08UL, 0xFFFFFF09, 0xFFFFFF0AUL, 0xFFFFFF0BUL,
|
||||||
|
0xFFFFFF0CUL, 0xFFFFFF0D, 0xFFFFFF0EUL, 0xFFFFFF0FUL
|
||||||
|
};
|
||||||
|
|
||||||
#endif // _LP64
|
#endif // _LP64
|
||||||
|
|
||||||
#define D 32
|
#define D 32
|
||||||
|
@ -119,6 +119,9 @@ class x86 {
|
|||||||
static juint _crc_by128_masks_avx512[];
|
static juint _crc_by128_masks_avx512[];
|
||||||
static juint _crc_table_avx512[];
|
static juint _crc_table_avx512[];
|
||||||
static juint _shuf_table_crc32_avx512[];
|
static juint _shuf_table_crc32_avx512[];
|
||||||
|
static juint _adler32_shuf0_table[];
|
||||||
|
static juint _adler32_shuf1_table[];
|
||||||
|
static juint _adler32_ascale_table[];
|
||||||
#endif // _LP64
|
#endif // _LP64
|
||||||
// table for CRC32C
|
// table for CRC32C
|
||||||
static juint* _crc32c_table;
|
static juint* _crc32c_table;
|
||||||
|
@ -898,6 +898,24 @@ void VM_Version::get_processor_features() {
|
|||||||
FLAG_SET_DEFAULT(UseCRC32Intrinsics, false);
|
FLAG_SET_DEFAULT(UseCRC32Intrinsics, false);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#ifdef _LP64
|
||||||
|
if (supports_avx2()) {
|
||||||
|
if (FLAG_IS_DEFAULT(UseAdler32Intrinsics)) {
|
||||||
|
UseAdler32Intrinsics = true;
|
||||||
|
}
|
||||||
|
} else if (UseAdler32Intrinsics) {
|
||||||
|
if (!FLAG_IS_DEFAULT(UseAdler32Intrinsics)) {
|
||||||
|
warning("Adler32 Intrinsics requires avx2 instructions (not available on this CPU)");
|
||||||
|
}
|
||||||
|
FLAG_SET_DEFAULT(UseAdler32Intrinsics, false);
|
||||||
|
}
|
||||||
|
#else
|
||||||
|
if (UseAdler32Intrinsics) {
|
||||||
|
warning("Adler32Intrinsics not available on this CPU.");
|
||||||
|
FLAG_SET_DEFAULT(UseAdler32Intrinsics, false);
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
if (supports_sse4_2() && supports_clmul()) {
|
if (supports_sse4_2() && supports_clmul()) {
|
||||||
if (FLAG_IS_DEFAULT(UseCRC32CIntrinsics)) {
|
if (FLAG_IS_DEFAULT(UseCRC32CIntrinsics)) {
|
||||||
UseCRC32CIntrinsics = true;
|
UseCRC32CIntrinsics = true;
|
||||||
@ -993,11 +1011,6 @@ void VM_Version::get_processor_features() {
|
|||||||
FLAG_SET_DEFAULT(UseSHA, false);
|
FLAG_SET_DEFAULT(UseSHA, false);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (UseAdler32Intrinsics) {
|
|
||||||
warning("Adler32Intrinsics not available on this CPU.");
|
|
||||||
FLAG_SET_DEFAULT(UseAdler32Intrinsics, false);
|
|
||||||
}
|
|
||||||
|
|
||||||
if (!supports_rtm() && UseRTMLocking) {
|
if (!supports_rtm() && UseRTMLocking) {
|
||||||
// Can't continue because UseRTMLocking affects UseBiasedLocking flag
|
// Can't continue because UseRTMLocking affects UseBiasedLocking flag
|
||||||
// setting during arguments processing. See use_biased_locking().
|
// setting during arguments processing. See use_biased_locking().
|
||||||
|
@ -93,6 +93,7 @@ bool vmIntrinsics::preserves_state(vmIntrinsics::ID id) {
|
|||||||
case vmIntrinsics::_updateCRC32:
|
case vmIntrinsics::_updateCRC32:
|
||||||
case vmIntrinsics::_updateBytesCRC32:
|
case vmIntrinsics::_updateBytesCRC32:
|
||||||
case vmIntrinsics::_updateByteBufferCRC32:
|
case vmIntrinsics::_updateByteBufferCRC32:
|
||||||
|
case vmIntrinsics::_updateBytesAdler32:
|
||||||
case vmIntrinsics::_vectorizedMismatch:
|
case vmIntrinsics::_vectorizedMismatch:
|
||||||
case vmIntrinsics::_fmaD:
|
case vmIntrinsics::_fmaD:
|
||||||
case vmIntrinsics::_fmaF:
|
case vmIntrinsics::_fmaF:
|
||||||
|
@ -231,6 +231,7 @@ JVMCIObjectArray CompilerToVM::initialize_intrinsics(JVMCI_TRAPS) {
|
|||||||
X86_ONLY(do_intx_flag(UseAVX)) \
|
X86_ONLY(do_intx_flag(UseAVX)) \
|
||||||
do_bool_flag(UseBiasedLocking) \
|
do_bool_flag(UseBiasedLocking) \
|
||||||
do_bool_flag(UseCRC32Intrinsics) \
|
do_bool_flag(UseCRC32Intrinsics) \
|
||||||
|
do_bool_flag(UseAdler32Intrinsics) \
|
||||||
do_bool_flag(UseCompressedClassPointers) \
|
do_bool_flag(UseCompressedClassPointers) \
|
||||||
do_bool_flag(UseCompressedOops) \
|
do_bool_flag(UseCompressedOops) \
|
||||||
X86_ONLY(do_bool_flag(UseCountLeadingZerosInstruction)) \
|
X86_ONLY(do_bool_flag(UseCountLeadingZerosInstruction)) \
|
||||||
|
@ -574,6 +574,7 @@ typedef HashtableEntry<InstanceKlass*, mtClass> KlassHashtableEntry;
|
|||||||
static_field(StubRoutines, _crc_table_adr, address) \
|
static_field(StubRoutines, _crc_table_adr, address) \
|
||||||
static_field(StubRoutines, _crc32c_table_addr, address) \
|
static_field(StubRoutines, _crc32c_table_addr, address) \
|
||||||
static_field(StubRoutines, _updateBytesCRC32C, address) \
|
static_field(StubRoutines, _updateBytesCRC32C, address) \
|
||||||
|
static_field(StubRoutines, _updateBytesAdler32, address) \
|
||||||
static_field(StubRoutines, _multiplyToLen, address) \
|
static_field(StubRoutines, _multiplyToLen, address) \
|
||||||
static_field(StubRoutines, _squareToLen, address) \
|
static_field(StubRoutines, _squareToLen, address) \
|
||||||
static_field(StubRoutines, _bigIntegerRightShiftWorker, address) \
|
static_field(StubRoutines, _bigIntegerRightShiftWorker, address) \
|
||||||
|
62
test/micro/org/openjdk/bench/java/util/TestAdler32.java
Normal file
62
test/micro/org/openjdk/bench/java/util/TestAdler32.java
Normal file
@ -0,0 +1,62 @@
|
|||||||
|
/*
|
||||||
|
* Copyright (c) 2021, Oracle and/or its affiliates. All rights reserved.
|
||||||
|
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
||||||
|
*
|
||||||
|
* This code is free software; you can redistribute it and/or modify it
|
||||||
|
* under the terms of the GNU General Public License version 2 only, as
|
||||||
|
* published by the Free Software Foundation.
|
||||||
|
*
|
||||||
|
* This code is distributed in the hope that it will be useful, but WITHOUT
|
||||||
|
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
||||||
|
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
|
||||||
|
* version 2 for more details (a copy is included in the LICENSE file that
|
||||||
|
* accompanied this code).
|
||||||
|
*
|
||||||
|
* You should have received a copy of the GNU General Public License version
|
||||||
|
* 2 along with this work; if not, write to the Free Software Foundation,
|
||||||
|
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||||
|
*
|
||||||
|
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
|
||||||
|
* or visit www.oracle.com if you need additional information or have any
|
||||||
|
* questions.
|
||||||
|
*/
|
||||||
|
package org.openjdk.bench.java.util;
|
||||||
|
|
||||||
|
import java.util.Random;
|
||||||
|
import java.util.concurrent.TimeUnit;
|
||||||
|
import java.util.zip.Adler32;
|
||||||
|
import org.openjdk.jmh.annotations.*;
|
||||||
|
|
||||||
|
@BenchmarkMode(Mode.AverageTime)
|
||||||
|
@OutputTimeUnit(TimeUnit.MICROSECONDS)
|
||||||
|
@State(Scope.Benchmark)
|
||||||
|
@Fork(value = 2)
|
||||||
|
@Warmup(iterations = 2, time = 30, timeUnit = TimeUnit.SECONDS)
|
||||||
|
@Measurement(iterations = 3, time = 60, timeUnit = TimeUnit.SECONDS)
|
||||||
|
|
||||||
|
public class TestAdler32 {
|
||||||
|
|
||||||
|
private Adler32 adler32;
|
||||||
|
private Random random;
|
||||||
|
private byte[] bytes;
|
||||||
|
|
||||||
|
@Param({"64", "128", "256", "512", "1024", "2048", "4096", "8192", "16384", "32768", "65536"})
|
||||||
|
private int count;
|
||||||
|
|
||||||
|
public TestAdler32() {
|
||||||
|
adler32 = new Adler32();
|
||||||
|
random = new Random(2147483648L);
|
||||||
|
bytes = new byte[1000000];
|
||||||
|
random.nextBytes(bytes);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Setup(Level.Iteration)
|
||||||
|
public void setupBytes() {
|
||||||
|
adler32.reset();
|
||||||
|
}
|
||||||
|
|
||||||
|
@Benchmark
|
||||||
|
public void testAdler32Update() {
|
||||||
|
adler32.update(bytes, 0, count);
|
||||||
|
}
|
||||||
|
}
|
Loading…
Reference in New Issue
Block a user