8134553: CRC32C implementations for x86/x64 targets
Reviewed-by: kvn
This commit is contained in:
parent
d49d1ea740
commit
61b77b8590
@ -48,6 +48,7 @@ private:
|
||||
address generate_Reference_get_entry();
|
||||
address generate_CRC32_update_entry();
|
||||
address generate_CRC32_updateBytes_entry(AbstractInterpreter::MethodKind kind);
|
||||
address generate_CRC32C_updateBytes_entry(AbstractInterpreter::MethodKind kind) { return NULL; }
|
||||
void lock_method(void);
|
||||
void generate_stack_overflow_check(void);
|
||||
|
||||
|
@ -38,5 +38,6 @@
|
||||
|
||||
address generate_CRC32_update_entry();
|
||||
address generate_CRC32_updateBytes_entry(AbstractInterpreter::MethodKind kind);
|
||||
address generate_CRC32C_updateBytes_entry(AbstractInterpreter::MethodKind kind) { return NULL; }
|
||||
|
||||
#endif // CPU_PPC_VM_INTERPRETERGENERATOR_PPC_HPP
|
||||
|
@ -48,4 +48,5 @@
|
||||
// Not supported
|
||||
address generate_CRC32_update_entry() { return NULL; }
|
||||
address generate_CRC32_updateBytes_entry(AbstractInterpreter::MethodKind kind) { return NULL; }
|
||||
address generate_CRC32C_updateBytes_entry(AbstractInterpreter::MethodKind kind) { return NULL; }
|
||||
#endif // CPU_SPARC_VM_INTERPRETERGENERATOR_SPARC_HPP
|
||||
|
@ -1604,6 +1604,85 @@ void Assembler::cpuid() {
|
||||
emit_int8((unsigned char)0xA2);
|
||||
}
|
||||
|
||||
// Opcode / Instruction Op / En 64 - Bit Mode Compat / Leg Mode Description Implemented
|
||||
// F2 0F 38 F0 / r CRC32 r32, r / m8 RM Valid Valid Accumulate CRC32 on r / m8. v
|
||||
// F2 REX 0F 38 F0 / r CRC32 r32, r / m8* RM Valid N.E. Accumulate CRC32 on r / m8. -
|
||||
// F2 REX.W 0F 38 F0 / r CRC32 r64, r / m8 RM Valid N.E. Accumulate CRC32 on r / m8. -
|
||||
//
|
||||
// F2 0F 38 F1 / r CRC32 r32, r / m16 RM Valid Valid Accumulate CRC32 on r / m16. v
|
||||
//
|
||||
// F2 0F 38 F1 / r CRC32 r32, r / m32 RM Valid Valid Accumulate CRC32 on r / m32. v
|
||||
//
|
||||
// F2 REX.W 0F 38 F1 / r CRC32 r64, r / m64 RM Valid N.E. Accumulate CRC32 on r / m64. v
|
||||
void Assembler::crc32(Register crc, Register v, int8_t sizeInBytes) {
|
||||
assert(VM_Version::supports_sse4_2(), "");
|
||||
int8_t w = 0x01;
|
||||
Prefix p = Prefix_EMPTY;
|
||||
|
||||
emit_int8((int8_t)0xF2);
|
||||
switch (sizeInBytes) {
|
||||
case 1:
|
||||
w = 0;
|
||||
break;
|
||||
case 2:
|
||||
case 4:
|
||||
break;
|
||||
LP64_ONLY(case 8:)
|
||||
// This instruction is not valid in 32 bits
|
||||
// Note:
|
||||
// http://www.intel.com/content/dam/www/public/us/en/documents/manuals/64-ia-32-architectures-software-developer-instruction-set-reference-manual-325383.pdf
|
||||
//
|
||||
// Page B - 72 Vol. 2C says
|
||||
// qwreg2 to qwreg 1111 0010 : 0100 1R0B : 0000 1111 : 0011 1000 : 1111 0000 : 11 qwreg1 qwreg2
|
||||
// mem64 to qwreg 1111 0010 : 0100 1R0B : 0000 1111 : 0011 1000 : 1111 0000 : mod qwreg r / m
|
||||
// F0!!!
|
||||
// while 3 - 208 Vol. 2A
|
||||
// F2 REX.W 0F 38 F1 / r CRC32 r64, r / m64 RM Valid N.E.Accumulate CRC32 on r / m64.
|
||||
//
|
||||
// the 0 on a last bit is reserved for a different flavor of this instruction :
|
||||
// F2 REX.W 0F 38 F0 / r CRC32 r64, r / m8 RM Valid N.E.Accumulate CRC32 on r / m8.
|
||||
p = REX_W;
|
||||
break;
|
||||
default:
|
||||
assert(0, "Unsupported value for a sizeInBytes argument");
|
||||
break;
|
||||
}
|
||||
LP64_ONLY(prefix(crc, v, p);)
|
||||
emit_int8((int8_t)0x0F);
|
||||
emit_int8(0x38);
|
||||
emit_int8((int8_t)(0xF0 | w));
|
||||
emit_int8(0xC0 | ((crc->encoding() & 0x7) << 3) | (v->encoding() & 7));
|
||||
}
|
||||
|
||||
void Assembler::crc32(Register crc, Address adr, int8_t sizeInBytes) {
|
||||
assert(VM_Version::supports_sse4_2(), "");
|
||||
InstructionMark im(this);
|
||||
int8_t w = 0x01;
|
||||
Prefix p = Prefix_EMPTY;
|
||||
|
||||
emit_int8((int8_t)0xF2);
|
||||
switch (sizeInBytes) {
|
||||
case 1:
|
||||
w = 0;
|
||||
break;
|
||||
case 2:
|
||||
case 4:
|
||||
break;
|
||||
LP64_ONLY(case 8:)
|
||||
// This instruction is not valid in 32 bits
|
||||
p = REX_W;
|
||||
break;
|
||||
default:
|
||||
assert(0, "Unsupported value for a sizeInBytes argument");
|
||||
break;
|
||||
}
|
||||
LP64_ONLY(prefix(crc, adr, p);)
|
||||
emit_int8((int8_t)0x0F);
|
||||
emit_int8(0x38);
|
||||
emit_int8((int8_t)(0xF0 | w));
|
||||
emit_operand(crc, adr);
|
||||
}
|
||||
|
||||
void Assembler::cvtdq2pd(XMMRegister dst, XMMRegister src) {
|
||||
NOT_LP64(assert(VM_Version::supports_sse2(), ""));
|
||||
emit_simd_arith_nonds(0xE6, dst, src, VEX_SIMD_F3, /* no_mask_reg */ false, /* legacy_mode */ true);
|
||||
@ -6223,6 +6302,14 @@ void Assembler::shldl(Register dst, Register src) {
|
||||
emit_int8((unsigned char)(0xC0 | src->encoding() << 3 | dst->encoding()));
|
||||
}
|
||||
|
||||
// 0F A4 / r ib
|
||||
void Assembler::shldl(Register dst, Register src, int8_t imm8) {
|
||||
emit_int8(0x0F);
|
||||
emit_int8((unsigned char)0xA4);
|
||||
emit_int8((unsigned char)(0xC0 | src->encoding() << 3 | dst->encoding()));
|
||||
emit_int8(imm8);
|
||||
}
|
||||
|
||||
void Assembler::shrdl(Register dst, Register src) {
|
||||
emit_int8(0x0F);
|
||||
emit_int8((unsigned char)0xAD);
|
||||
@ -6408,6 +6495,40 @@ void Assembler::prefix(Register reg) {
|
||||
}
|
||||
}
|
||||
|
||||
void Assembler::prefix(Register dst, Register src, Prefix p) {
|
||||
if (src->encoding() >= 8) {
|
||||
p = (Prefix)(p | REX_B);
|
||||
}
|
||||
if (dst->encoding() >= 8) {
|
||||
p = (Prefix)( p | REX_R);
|
||||
}
|
||||
if (p != Prefix_EMPTY) {
|
||||
// do not generate an empty prefix
|
||||
prefix(p);
|
||||
}
|
||||
}
|
||||
|
||||
void Assembler::prefix(Register dst, Address adr, Prefix p) {
|
||||
if (adr.base_needs_rex()) {
|
||||
if (adr.index_needs_rex()) {
|
||||
assert(false, "prefix(Register dst, Address adr, Prefix p) does not support handling of an X");
|
||||
} else {
|
||||
prefix(REX_B);
|
||||
}
|
||||
} else {
|
||||
if (adr.index_needs_rex()) {
|
||||
assert(false, "prefix(Register dst, Address adr, Prefix p) does not support handling of an X");
|
||||
}
|
||||
}
|
||||
if (dst->encoding() >= 8) {
|
||||
p = (Prefix)(p | REX_R);
|
||||
}
|
||||
if (p != Prefix_EMPTY) {
|
||||
// do not generate an empty prefix
|
||||
prefix(p);
|
||||
}
|
||||
}
|
||||
|
||||
void Assembler::prefix(Address adr) {
|
||||
if (adr.base_needs_rex()) {
|
||||
if (adr.index_needs_rex()) {
|
||||
|
@ -506,7 +506,8 @@ class Assembler : public AbstractAssembler {
|
||||
|
||||
VEX_3bytes = 0xC4,
|
||||
VEX_2bytes = 0xC5,
|
||||
EVEX_4bytes = 0x62
|
||||
EVEX_4bytes = 0x62,
|
||||
Prefix_EMPTY = 0x0
|
||||
};
|
||||
|
||||
enum VexPrefix {
|
||||
@ -615,6 +616,8 @@ private:
|
||||
int prefixq_and_encode(int dst_enc, int src_enc);
|
||||
|
||||
void prefix(Register reg);
|
||||
void prefix(Register dst, Register src, Prefix p);
|
||||
void prefix(Register dst, Address adr, Prefix p);
|
||||
void prefix(Address adr);
|
||||
void prefixq(Address adr);
|
||||
|
||||
@ -1177,6 +1180,10 @@ private:
|
||||
// Identify processor type and features
|
||||
void cpuid();
|
||||
|
||||
// CRC32C
|
||||
void crc32(Register crc, Register v, int8_t sizeInBytes);
|
||||
void crc32(Register crc, Address adr, int8_t sizeInBytes);
|
||||
|
||||
// Convert Scalar Double-Precision Floating-Point Value to Scalar Single-Precision Floating-Point Value
|
||||
void cvtsd2ss(XMMRegister dst, XMMRegister src);
|
||||
void cvtsd2ss(XMMRegister dst, Address src);
|
||||
@ -1783,6 +1790,7 @@ private:
|
||||
void setb(Condition cc, Register dst);
|
||||
|
||||
void shldl(Register dst, Register src);
|
||||
void shldl(Register dst, Register src, int8_t imm8);
|
||||
|
||||
void shll(Register dst, int imm8);
|
||||
void shll(Register dst);
|
||||
|
@ -37,6 +37,8 @@ inline int Assembler::prefix_and_encode(int dst_enc, int src_enc, bool byteinst)
|
||||
inline int Assembler::prefixq_and_encode(int dst_enc, int src_enc) { return dst_enc << 3 | src_enc; }
|
||||
|
||||
inline void Assembler::prefix(Register reg) {}
|
||||
inline void Assembler::prefix(Register dst, Register src, Prefix p) {}
|
||||
inline void Assembler::prefix(Register dst, Address adr, Prefix p) {}
|
||||
inline void Assembler::prefix(Address adr) {}
|
||||
inline void Assembler::prefixq(Address adr) {}
|
||||
|
||||
|
66
hotspot/src/cpu/x86/vm/crc32c.h
Normal file
66
hotspot/src/cpu/x86/vm/crc32c.h
Normal file
@ -0,0 +1,66 @@
|
||||
/*
|
||||
* Copyright (c) 2015, Oracle and/or its affiliates. All rights reserved.
|
||||
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
||||
*
|
||||
* This code is free software; you can redistribute it and/or modify it
|
||||
* under the terms of the GNU General Public License version 2 only, as
|
||||
* published by the Free Software Foundation.
|
||||
*
|
||||
* This code is distributed in the hope that it will be useful, but WITHOUT
|
||||
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
||||
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
|
||||
* version 2 for more details (a copy is included in the LICENSE file that
|
||||
* accompanied this code).
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License version
|
||||
* 2 along with this work; if not, write to the Free Software Foundation,
|
||||
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
*
|
||||
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
|
||||
* or visit www.oracle.com if you need additional information or have any
|
||||
* questions.
|
||||
*
|
||||
*/
|
||||
|
||||
enum {
|
||||
// S. Gueron / Information Processing Letters 112 (2012) 184
|
||||
// shows than anything above 6K and below 32K is a good choice
|
||||
// 32K does not deliver any further performance gains
|
||||
// 6K=8*256 (*3 as we compute 3 blocks together)
|
||||
//
|
||||
// Thus selecting the smallest value so it could apply to the largest number
|
||||
// of buffer sizes.
|
||||
CRC32C_HIGH = 8 * 256,
|
||||
|
||||
// empirical
|
||||
// based on ubench study using methodology described in
|
||||
// V. Gopal et al. / Fast CRC Computation for iSCSI Polynomial Using CRC32 Instruction April 2011 8
|
||||
//
|
||||
// arbitrary value between 27 and 256
|
||||
CRC32C_MIDDLE = 8 * 86,
|
||||
|
||||
// V. Gopal et al. / Fast CRC Computation for iSCSI Polynomial Using CRC32 Instruction April 2011 9
|
||||
// shows that 240 and 1024 are equally good choices as the 216==8*27
|
||||
//
|
||||
// Selecting the smallest value which resulted in a significant performance improvement over
|
||||
// sequential version
|
||||
CRC32C_LOW = 8 * 27,
|
||||
|
||||
CRC32C_NUM_ChunkSizeInBytes = 3,
|
||||
|
||||
// We need to compute powers of 64N and 128N for each "chunk" size
|
||||
CRC32C_NUM_PRECOMPUTED_CONSTANTS = ( 2 * CRC32C_NUM_ChunkSizeInBytes )
|
||||
};
|
||||
// Notes:
|
||||
// 1. Why we need to choose a "chunk" approach?
|
||||
// Overhead of computing a powers and powers of for an arbitrary buffer of size N is significant
|
||||
// (implementation approaches a library perf.)
|
||||
// 2. Why only 3 "chunks"?
|
||||
// Performance experiments results showed that a HIGH+LOW was not delivering a stable speedup
|
||||
// curve.
|
||||
//
|
||||
// Disclaimer:
|
||||
// If you ever decide to increase/decrease number of "chunks" be sure to modify
|
||||
// a) constants table generation (hotspot/src/cpu/x86/vm/stubRoutines_x86.cpp)
|
||||
// b) constant fetch from that table (macroAssembler_x86.cpp)
|
||||
// c) unrolled for loop (macroAssembler_x86.cpp)
|
@ -42,6 +42,7 @@
|
||||
address generate_Reference_get_entry();
|
||||
address generate_CRC32_update_entry();
|
||||
address generate_CRC32_updateBytes_entry(AbstractInterpreter::MethodKind kind);
|
||||
address generate_CRC32C_updateBytes_entry(AbstractInterpreter::MethodKind kind);
|
||||
#ifndef _LP64
|
||||
address generate_Float_intBitsToFloat_entry();
|
||||
address generate_Float_floatToRawIntBits_entry();
|
||||
|
@ -45,6 +45,7 @@
|
||||
#include "gc/g1/g1SATBCardTableModRefBS.hpp"
|
||||
#include "gc/g1/heapRegion.hpp"
|
||||
#endif // INCLUDE_ALL_GCS
|
||||
#include "crc32c.h"
|
||||
|
||||
#ifdef PRODUCT
|
||||
#define BLOCK_COMMENT(str) /* nothing */
|
||||
@ -8636,6 +8637,471 @@ void MacroAssembler::kernel_crc32(Register crc, Register buf, Register len, Regi
|
||||
notl(crc); // ~c
|
||||
}
|
||||
|
||||
#ifdef _LP64
|
||||
// S. Gueron / Information Processing Letters 112 (2012) 184
|
||||
// Algorithm 4: Computing carry-less multiplication using a precomputed lookup table.
|
||||
// Input: A 32 bit value B = [byte3, byte2, byte1, byte0].
|
||||
// Output: the 64-bit carry-less product of B * CONST
|
||||
void MacroAssembler::crc32c_ipl_alg4(Register in, uint32_t n,
|
||||
Register tmp1, Register tmp2, Register tmp3) {
|
||||
lea(tmp3, ExternalAddress(StubRoutines::crc32c_table_addr()));
|
||||
if (n > 0) {
|
||||
addq(tmp3, n * 256 * 8);
|
||||
}
|
||||
// Q1 = TABLEExt[n][B & 0xFF];
|
||||
movl(tmp1, in);
|
||||
andl(tmp1, 0x000000FF);
|
||||
shll(tmp1, 3);
|
||||
addq(tmp1, tmp3);
|
||||
movq(tmp1, Address(tmp1, 0));
|
||||
|
||||
// Q2 = TABLEExt[n][B >> 8 & 0xFF];
|
||||
movl(tmp2, in);
|
||||
shrl(tmp2, 8);
|
||||
andl(tmp2, 0x000000FF);
|
||||
shll(tmp2, 3);
|
||||
addq(tmp2, tmp3);
|
||||
movq(tmp2, Address(tmp2, 0));
|
||||
|
||||
shlq(tmp2, 8);
|
||||
xorq(tmp1, tmp2);
|
||||
|
||||
// Q3 = TABLEExt[n][B >> 16 & 0xFF];
|
||||
movl(tmp2, in);
|
||||
shrl(tmp2, 16);
|
||||
andl(tmp2, 0x000000FF);
|
||||
shll(tmp2, 3);
|
||||
addq(tmp2, tmp3);
|
||||
movq(tmp2, Address(tmp2, 0));
|
||||
|
||||
shlq(tmp2, 16);
|
||||
xorq(tmp1, tmp2);
|
||||
|
||||
// Q4 = TABLEExt[n][B >> 24 & 0xFF];
|
||||
shrl(in, 24);
|
||||
andl(in, 0x000000FF);
|
||||
shll(in, 3);
|
||||
addq(in, tmp3);
|
||||
movq(in, Address(in, 0));
|
||||
|
||||
shlq(in, 24);
|
||||
xorq(in, tmp1);
|
||||
// return Q1 ^ Q2 << 8 ^ Q3 << 16 ^ Q4 << 24;
|
||||
}
|
||||
|
||||
void MacroAssembler::crc32c_pclmulqdq(XMMRegister w_xtmp1,
|
||||
Register in_out,
|
||||
uint32_t const_or_pre_comp_const_index, bool is_pclmulqdq_supported,
|
||||
XMMRegister w_xtmp2,
|
||||
Register tmp1,
|
||||
Register n_tmp2, Register n_tmp3) {
|
||||
if (is_pclmulqdq_supported) {
|
||||
movdl(w_xtmp1, in_out); // modified blindly
|
||||
|
||||
movl(tmp1, const_or_pre_comp_const_index);
|
||||
movdl(w_xtmp2, tmp1);
|
||||
pclmulqdq(w_xtmp1, w_xtmp2, 0);
|
||||
|
||||
movdq(in_out, w_xtmp1);
|
||||
} else {
|
||||
crc32c_ipl_alg4(in_out, const_or_pre_comp_const_index, tmp1, n_tmp2, n_tmp3);
|
||||
}
|
||||
}
|
||||
|
||||
// Recombination Alternative 2: No bit-reflections
|
||||
// T1 = (CRC_A * U1) << 1
|
||||
// T2 = (CRC_B * U2) << 1
|
||||
// C1 = T1 >> 32
|
||||
// C2 = T2 >> 32
|
||||
// T1 = T1 & 0xFFFFFFFF
|
||||
// T2 = T2 & 0xFFFFFFFF
|
||||
// T1 = CRC32(0, T1)
|
||||
// T2 = CRC32(0, T2)
|
||||
// C1 = C1 ^ T1
|
||||
// C2 = C2 ^ T2
|
||||
// CRC = C1 ^ C2 ^ CRC_C
|
||||
void MacroAssembler::crc32c_rec_alt2(uint32_t const_or_pre_comp_const_index_u1, uint32_t const_or_pre_comp_const_index_u2, bool is_pclmulqdq_supported, Register in_out, Register in1, Register in2,
|
||||
XMMRegister w_xtmp1, XMMRegister w_xtmp2, XMMRegister w_xtmp3,
|
||||
Register tmp1, Register tmp2,
|
||||
Register n_tmp3) {
|
||||
crc32c_pclmulqdq(w_xtmp1, in_out, const_or_pre_comp_const_index_u1, is_pclmulqdq_supported, w_xtmp3, tmp1, tmp2, n_tmp3);
|
||||
crc32c_pclmulqdq(w_xtmp2, in1, const_or_pre_comp_const_index_u2, is_pclmulqdq_supported, w_xtmp3, tmp1, tmp2, n_tmp3);
|
||||
shlq(in_out, 1);
|
||||
movl(tmp1, in_out);
|
||||
shrq(in_out, 32);
|
||||
xorl(tmp2, tmp2);
|
||||
crc32(tmp2, tmp1, 4);
|
||||
xorl(in_out, tmp2); // we don't care about upper 32 bit contents here
|
||||
shlq(in1, 1);
|
||||
movl(tmp1, in1);
|
||||
shrq(in1, 32);
|
||||
xorl(tmp2, tmp2);
|
||||
crc32(tmp2, tmp1, 4);
|
||||
xorl(in1, tmp2);
|
||||
xorl(in_out, in1);
|
||||
xorl(in_out, in2);
|
||||
}
|
||||
|
||||
// Set N to predefined value
|
||||
// Subtract from a lenght of a buffer
|
||||
// execute in a loop:
|
||||
// CRC_A = 0xFFFFFFFF, CRC_B = 0, CRC_C = 0
|
||||
// for i = 1 to N do
|
||||
// CRC_A = CRC32(CRC_A, A[i])
|
||||
// CRC_B = CRC32(CRC_B, B[i])
|
||||
// CRC_C = CRC32(CRC_C, C[i])
|
||||
// end for
|
||||
// Recombine
|
||||
void MacroAssembler::crc32c_proc_chunk(uint32_t size, uint32_t const_or_pre_comp_const_index_u1, uint32_t const_or_pre_comp_const_index_u2, bool is_pclmulqdq_supported,
|
||||
Register in_out1, Register in_out2, Register in_out3,
|
||||
Register tmp1, Register tmp2, Register tmp3,
|
||||
XMMRegister w_xtmp1, XMMRegister w_xtmp2, XMMRegister w_xtmp3,
|
||||
Register tmp4, Register tmp5,
|
||||
Register n_tmp6) {
|
||||
Label L_processPartitions;
|
||||
Label L_processPartition;
|
||||
Label L_exit;
|
||||
|
||||
bind(L_processPartitions);
|
||||
cmpl(in_out1, 3 * size);
|
||||
jcc(Assembler::less, L_exit);
|
||||
xorl(tmp1, tmp1);
|
||||
xorl(tmp2, tmp2);
|
||||
movq(tmp3, in_out2);
|
||||
addq(tmp3, size);
|
||||
|
||||
bind(L_processPartition);
|
||||
crc32(in_out3, Address(in_out2, 0), 8);
|
||||
crc32(tmp1, Address(in_out2, size), 8);
|
||||
crc32(tmp2, Address(in_out2, size * 2), 8);
|
||||
addq(in_out2, 8);
|
||||
cmpq(in_out2, tmp3);
|
||||
jcc(Assembler::less, L_processPartition);
|
||||
crc32c_rec_alt2(const_or_pre_comp_const_index_u1, const_or_pre_comp_const_index_u2, is_pclmulqdq_supported, in_out3, tmp1, tmp2,
|
||||
w_xtmp1, w_xtmp2, w_xtmp3,
|
||||
tmp4, tmp5,
|
||||
n_tmp6);
|
||||
addq(in_out2, 2 * size);
|
||||
subl(in_out1, 3 * size);
|
||||
jmp(L_processPartitions);
|
||||
|
||||
bind(L_exit);
|
||||
}
|
||||
#else
|
||||
void MacroAssembler::crc32c_ipl_alg4(Register in_out, uint32_t n,
|
||||
Register tmp1, Register tmp2, Register tmp3,
|
||||
XMMRegister xtmp1, XMMRegister xtmp2) {
|
||||
lea(tmp3, ExternalAddress(StubRoutines::crc32c_table_addr()));
|
||||
if (n > 0) {
|
||||
addl(tmp3, n * 256 * 8);
|
||||
}
|
||||
// Q1 = TABLEExt[n][B & 0xFF];
|
||||
movl(tmp1, in_out);
|
||||
andl(tmp1, 0x000000FF);
|
||||
shll(tmp1, 3);
|
||||
addl(tmp1, tmp3);
|
||||
movq(xtmp1, Address(tmp1, 0));
|
||||
|
||||
// Q2 = TABLEExt[n][B >> 8 & 0xFF];
|
||||
movl(tmp2, in_out);
|
||||
shrl(tmp2, 8);
|
||||
andl(tmp2, 0x000000FF);
|
||||
shll(tmp2, 3);
|
||||
addl(tmp2, tmp3);
|
||||
movq(xtmp2, Address(tmp2, 0));
|
||||
|
||||
psllq(xtmp2, 8);
|
||||
pxor(xtmp1, xtmp2);
|
||||
|
||||
// Q3 = TABLEExt[n][B >> 16 & 0xFF];
|
||||
movl(tmp2, in_out);
|
||||
shrl(tmp2, 16);
|
||||
andl(tmp2, 0x000000FF);
|
||||
shll(tmp2, 3);
|
||||
addl(tmp2, tmp3);
|
||||
movq(xtmp2, Address(tmp2, 0));
|
||||
|
||||
psllq(xtmp2, 16);
|
||||
pxor(xtmp1, xtmp2);
|
||||
|
||||
// Q4 = TABLEExt[n][B >> 24 & 0xFF];
|
||||
shrl(in_out, 24);
|
||||
andl(in_out, 0x000000FF);
|
||||
shll(in_out, 3);
|
||||
addl(in_out, tmp3);
|
||||
movq(xtmp2, Address(in_out, 0));
|
||||
|
||||
psllq(xtmp2, 24);
|
||||
pxor(xtmp1, xtmp2); // Result in CXMM
|
||||
// return Q1 ^ Q2 << 8 ^ Q3 << 16 ^ Q4 << 24;
|
||||
}
|
||||
|
||||
void MacroAssembler::crc32c_pclmulqdq(XMMRegister w_xtmp1,
|
||||
Register in_out,
|
||||
uint32_t const_or_pre_comp_const_index, bool is_pclmulqdq_supported,
|
||||
XMMRegister w_xtmp2,
|
||||
Register tmp1,
|
||||
Register n_tmp2, Register n_tmp3) {
|
||||
if (is_pclmulqdq_supported) {
|
||||
movdl(w_xtmp1, in_out);
|
||||
|
||||
movl(tmp1, const_or_pre_comp_const_index);
|
||||
movdl(w_xtmp2, tmp1);
|
||||
pclmulqdq(w_xtmp1, w_xtmp2, 0);
|
||||
// Keep result in XMM since GPR is 32 bit in length
|
||||
} else {
|
||||
crc32c_ipl_alg4(in_out, const_or_pre_comp_const_index, tmp1, n_tmp2, n_tmp3, w_xtmp1, w_xtmp2);
|
||||
}
|
||||
}
|
||||
|
||||
void MacroAssembler::crc32c_rec_alt2(uint32_t const_or_pre_comp_const_index_u1, uint32_t const_or_pre_comp_const_index_u2, bool is_pclmulqdq_supported, Register in_out, Register in1, Register in2,
|
||||
XMMRegister w_xtmp1, XMMRegister w_xtmp2, XMMRegister w_xtmp3,
|
||||
Register tmp1, Register tmp2,
|
||||
Register n_tmp3) {
|
||||
crc32c_pclmulqdq(w_xtmp1, in_out, const_or_pre_comp_const_index_u1, is_pclmulqdq_supported, w_xtmp3, tmp1, tmp2, n_tmp3);
|
||||
crc32c_pclmulqdq(w_xtmp2, in1, const_or_pre_comp_const_index_u2, is_pclmulqdq_supported, w_xtmp3, tmp1, tmp2, n_tmp3);
|
||||
|
||||
psllq(w_xtmp1, 1);
|
||||
movdl(tmp1, w_xtmp1);
|
||||
psrlq(w_xtmp1, 32);
|
||||
movdl(in_out, w_xtmp1);
|
||||
|
||||
xorl(tmp2, tmp2);
|
||||
crc32(tmp2, tmp1, 4);
|
||||
xorl(in_out, tmp2);
|
||||
|
||||
psllq(w_xtmp2, 1);
|
||||
movdl(tmp1, w_xtmp2);
|
||||
psrlq(w_xtmp2, 32);
|
||||
movdl(in1, w_xtmp2);
|
||||
|
||||
xorl(tmp2, tmp2);
|
||||
crc32(tmp2, tmp1, 4);
|
||||
xorl(in1, tmp2);
|
||||
xorl(in_out, in1);
|
||||
xorl(in_out, in2);
|
||||
}
|
||||
|
||||
void MacroAssembler::crc32c_proc_chunk(uint32_t size, uint32_t const_or_pre_comp_const_index_u1, uint32_t const_or_pre_comp_const_index_u2, bool is_pclmulqdq_supported,
|
||||
Register in_out1, Register in_out2, Register in_out3,
|
||||
Register tmp1, Register tmp2, Register tmp3,
|
||||
XMMRegister w_xtmp1, XMMRegister w_xtmp2, XMMRegister w_xtmp3,
|
||||
Register tmp4, Register tmp5,
|
||||
Register n_tmp6) {
|
||||
Label L_processPartitions;
|
||||
Label L_processPartition;
|
||||
Label L_exit;
|
||||
|
||||
bind(L_processPartitions);
|
||||
cmpl(in_out1, 3 * size);
|
||||
jcc(Assembler::less, L_exit);
|
||||
xorl(tmp1, tmp1);
|
||||
xorl(tmp2, tmp2);
|
||||
movl(tmp3, in_out2);
|
||||
addl(tmp3, size);
|
||||
|
||||
bind(L_processPartition);
|
||||
crc32(in_out3, Address(in_out2, 0), 4);
|
||||
crc32(tmp1, Address(in_out2, size), 4);
|
||||
crc32(tmp2, Address(in_out2, size*2), 4);
|
||||
crc32(in_out3, Address(in_out2, 0+4), 4);
|
||||
crc32(tmp1, Address(in_out2, size+4), 4);
|
||||
crc32(tmp2, Address(in_out2, size*2+4), 4);
|
||||
addl(in_out2, 8);
|
||||
cmpl(in_out2, tmp3);
|
||||
jcc(Assembler::less, L_processPartition);
|
||||
|
||||
push(tmp3);
|
||||
push(in_out1);
|
||||
push(in_out2);
|
||||
tmp4 = tmp3;
|
||||
tmp5 = in_out1;
|
||||
n_tmp6 = in_out2;
|
||||
|
||||
crc32c_rec_alt2(const_or_pre_comp_const_index_u1, const_or_pre_comp_const_index_u2, is_pclmulqdq_supported, in_out3, tmp1, tmp2,
|
||||
w_xtmp1, w_xtmp2, w_xtmp3,
|
||||
tmp4, tmp5,
|
||||
n_tmp6);
|
||||
|
||||
pop(in_out2);
|
||||
pop(in_out1);
|
||||
pop(tmp3);
|
||||
|
||||
addl(in_out2, 2 * size);
|
||||
subl(in_out1, 3 * size);
|
||||
jmp(L_processPartitions);
|
||||
|
||||
bind(L_exit);
|
||||
}
|
||||
#endif //LP64
|
||||
|
||||
#ifdef _LP64
|
||||
// Algorithm 2: Pipelined usage of the CRC32 instruction.
|
||||
// Input: A buffer I of L bytes.
|
||||
// Output: the CRC32C value of the buffer.
|
||||
// Notations:
|
||||
// Write L = 24N + r, with N = floor (L/24).
|
||||
// r = L mod 24 (0 <= r < 24).
|
||||
// Consider I as the concatenation of A|B|C|R, where A, B, C, each,
|
||||
// N quadwords, and R consists of r bytes.
|
||||
// A[j] = I [8j+7:8j], j= 0, 1, ..., N-1
|
||||
// B[j] = I [N + 8j+7:N + 8j], j= 0, 1, ..., N-1
|
||||
// C[j] = I [2N + 8j+7:2N + 8j], j= 0, 1, ..., N-1
|
||||
// if r > 0 R[j] = I [3N +j], j= 0, 1, ...,r-1
|
||||
void MacroAssembler::crc32c_ipl_alg2_alt2(Register in_out, Register in1, Register in2,
|
||||
Register tmp1, Register tmp2, Register tmp3,
|
||||
Register tmp4, Register tmp5, Register tmp6,
|
||||
XMMRegister w_xtmp1, XMMRegister w_xtmp2, XMMRegister w_xtmp3,
|
||||
bool is_pclmulqdq_supported) {
|
||||
uint32_t const_or_pre_comp_const_index[CRC32C_NUM_PRECOMPUTED_CONSTANTS];
|
||||
Label L_wordByWord;
|
||||
Label L_byteByByteProlog;
|
||||
Label L_byteByByte;
|
||||
Label L_exit;
|
||||
|
||||
if (is_pclmulqdq_supported ) {
|
||||
const_or_pre_comp_const_index[1] = *(uint32_t *)StubRoutines::_crc32c_table_addr;
|
||||
const_or_pre_comp_const_index[0] = *((uint32_t *)StubRoutines::_crc32c_table_addr+1);
|
||||
|
||||
const_or_pre_comp_const_index[3] = *((uint32_t *)StubRoutines::_crc32c_table_addr + 2);
|
||||
const_or_pre_comp_const_index[2] = *((uint32_t *)StubRoutines::_crc32c_table_addr + 3);
|
||||
|
||||
const_or_pre_comp_const_index[5] = *((uint32_t *)StubRoutines::_crc32c_table_addr + 4);
|
||||
const_or_pre_comp_const_index[4] = *((uint32_t *)StubRoutines::_crc32c_table_addr + 5);
|
||||
assert((CRC32C_NUM_PRECOMPUTED_CONSTANTS - 1 ) == 5, "Checking whether you declared all of the constants based on the number of \"chunks\"");
|
||||
} else {
|
||||
const_or_pre_comp_const_index[0] = 1;
|
||||
const_or_pre_comp_const_index[1] = 0;
|
||||
|
||||
const_or_pre_comp_const_index[2] = 3;
|
||||
const_or_pre_comp_const_index[3] = 2;
|
||||
|
||||
const_or_pre_comp_const_index[4] = 5;
|
||||
const_or_pre_comp_const_index[5] = 4;
|
||||
}
|
||||
crc32c_proc_chunk(CRC32C_HIGH, const_or_pre_comp_const_index[0], const_or_pre_comp_const_index[1], is_pclmulqdq_supported,
|
||||
in2, in1, in_out,
|
||||
tmp1, tmp2, tmp3,
|
||||
w_xtmp1, w_xtmp2, w_xtmp3,
|
||||
tmp4, tmp5,
|
||||
tmp6);
|
||||
crc32c_proc_chunk(CRC32C_MIDDLE, const_or_pre_comp_const_index[2], const_or_pre_comp_const_index[3], is_pclmulqdq_supported,
|
||||
in2, in1, in_out,
|
||||
tmp1, tmp2, tmp3,
|
||||
w_xtmp1, w_xtmp2, w_xtmp3,
|
||||
tmp4, tmp5,
|
||||
tmp6);
|
||||
crc32c_proc_chunk(CRC32C_LOW, const_or_pre_comp_const_index[4], const_or_pre_comp_const_index[5], is_pclmulqdq_supported,
|
||||
in2, in1, in_out,
|
||||
tmp1, tmp2, tmp3,
|
||||
w_xtmp1, w_xtmp2, w_xtmp3,
|
||||
tmp4, tmp5,
|
||||
tmp6);
|
||||
movl(tmp1, in2);
|
||||
andl(tmp1, 0x00000007);
|
||||
negl(tmp1);
|
||||
addl(tmp1, in2);
|
||||
addq(tmp1, in1);
|
||||
|
||||
BIND(L_wordByWord);
|
||||
cmpq(in1, tmp1);
|
||||
jcc(Assembler::greaterEqual, L_byteByByteProlog);
|
||||
crc32(in_out, Address(in1, 0), 4);
|
||||
addq(in1, 4);
|
||||
jmp(L_wordByWord);
|
||||
|
||||
BIND(L_byteByByteProlog);
|
||||
andl(in2, 0x00000007);
|
||||
movl(tmp2, 1);
|
||||
|
||||
BIND(L_byteByByte);
|
||||
cmpl(tmp2, in2);
|
||||
jccb(Assembler::greater, L_exit);
|
||||
crc32(in_out, Address(in1, 0), 1);
|
||||
incq(in1);
|
||||
incl(tmp2);
|
||||
jmp(L_byteByByte);
|
||||
|
||||
BIND(L_exit);
|
||||
}
|
||||
#else
|
||||
void MacroAssembler::crc32c_ipl_alg2_alt2(Register in_out, Register in1, Register in2,
|
||||
Register tmp1, Register tmp2, Register tmp3,
|
||||
Register tmp4, Register tmp5, Register tmp6,
|
||||
XMMRegister w_xtmp1, XMMRegister w_xtmp2, XMMRegister w_xtmp3,
|
||||
bool is_pclmulqdq_supported) {
|
||||
uint32_t const_or_pre_comp_const_index[CRC32C_NUM_PRECOMPUTED_CONSTANTS];
|
||||
Label L_wordByWord;
|
||||
Label L_byteByByteProlog;
|
||||
Label L_byteByByte;
|
||||
Label L_exit;
|
||||
|
||||
if (is_pclmulqdq_supported) {
|
||||
const_or_pre_comp_const_index[1] = *(uint32_t *)StubRoutines::_crc32c_table_addr;
|
||||
const_or_pre_comp_const_index[0] = *((uint32_t *)StubRoutines::_crc32c_table_addr + 1);
|
||||
|
||||
const_or_pre_comp_const_index[3] = *((uint32_t *)StubRoutines::_crc32c_table_addr + 2);
|
||||
const_or_pre_comp_const_index[2] = *((uint32_t *)StubRoutines::_crc32c_table_addr + 3);
|
||||
|
||||
const_or_pre_comp_const_index[5] = *((uint32_t *)StubRoutines::_crc32c_table_addr + 4);
|
||||
const_or_pre_comp_const_index[4] = *((uint32_t *)StubRoutines::_crc32c_table_addr + 5);
|
||||
} else {
|
||||
const_or_pre_comp_const_index[0] = 1;
|
||||
const_or_pre_comp_const_index[1] = 0;
|
||||
|
||||
const_or_pre_comp_const_index[2] = 3;
|
||||
const_or_pre_comp_const_index[3] = 2;
|
||||
|
||||
const_or_pre_comp_const_index[4] = 5;
|
||||
const_or_pre_comp_const_index[5] = 4;
|
||||
}
|
||||
crc32c_proc_chunk(CRC32C_HIGH, const_or_pre_comp_const_index[0], const_or_pre_comp_const_index[1], is_pclmulqdq_supported,
|
||||
in2, in1, in_out,
|
||||
tmp1, tmp2, tmp3,
|
||||
w_xtmp1, w_xtmp2, w_xtmp3,
|
||||
tmp4, tmp5,
|
||||
tmp6);
|
||||
crc32c_proc_chunk(CRC32C_MIDDLE, const_or_pre_comp_const_index[2], const_or_pre_comp_const_index[3], is_pclmulqdq_supported,
|
||||
in2, in1, in_out,
|
||||
tmp1, tmp2, tmp3,
|
||||
w_xtmp1, w_xtmp2, w_xtmp3,
|
||||
tmp4, tmp5,
|
||||
tmp6);
|
||||
crc32c_proc_chunk(CRC32C_LOW, const_or_pre_comp_const_index[4], const_or_pre_comp_const_index[5], is_pclmulqdq_supported,
|
||||
in2, in1, in_out,
|
||||
tmp1, tmp2, tmp3,
|
||||
w_xtmp1, w_xtmp2, w_xtmp3,
|
||||
tmp4, tmp5,
|
||||
tmp6);
|
||||
movl(tmp1, in2);
|
||||
andl(tmp1, 0x00000007);
|
||||
negl(tmp1);
|
||||
addl(tmp1, in2);
|
||||
addl(tmp1, in1);
|
||||
|
||||
BIND(L_wordByWord);
|
||||
cmpl(in1, tmp1);
|
||||
jcc(Assembler::greaterEqual, L_byteByByteProlog);
|
||||
crc32(in_out, Address(in1,0), 4);
|
||||
addl(in1, 4);
|
||||
jmp(L_wordByWord);
|
||||
|
||||
BIND(L_byteByByteProlog);
|
||||
andl(in2, 0x00000007);
|
||||
movl(tmp2, 1);
|
||||
|
||||
BIND(L_byteByByte);
|
||||
cmpl(tmp2, in2);
|
||||
jccb(Assembler::greater, L_exit);
|
||||
movb(tmp1, Address(in1, 0));
|
||||
crc32(in_out, tmp1, 1);
|
||||
incl(in1);
|
||||
incl(tmp2);
|
||||
jmp(L_byteByByte);
|
||||
|
||||
BIND(L_exit);
|
||||
}
|
||||
#endif // LP64
|
||||
#undef BIND
|
||||
#undef BLOCK_COMMENT
|
||||
|
||||
|
@ -1278,9 +1278,42 @@ public:
|
||||
Register raxReg);
|
||||
#endif
|
||||
|
||||
// CRC32 code for java.util.zip.CRC32::updateBytes() instrinsic.
|
||||
// CRC32 code for java.util.zip.CRC32::updateBytes() intrinsic.
|
||||
void update_byte_crc32(Register crc, Register val, Register table);
|
||||
void kernel_crc32(Register crc, Register buf, Register len, Register table, Register tmp);
|
||||
// CRC32C code for java.util.zip.CRC32C::updateBytes() intrinsic
|
||||
// Note on a naming convention:
|
||||
// Prefix w = register only used on a Westmere+ architecture
|
||||
// Prefix n = register only used on a Nehalem architecture
|
||||
#ifdef _LP64
|
||||
void crc32c_ipl_alg4(Register in_out, uint32_t n,
|
||||
Register tmp1, Register tmp2, Register tmp3);
|
||||
#else
|
||||
void crc32c_ipl_alg4(Register in_out, uint32_t n,
|
||||
Register tmp1, Register tmp2, Register tmp3,
|
||||
XMMRegister xtmp1, XMMRegister xtmp2);
|
||||
#endif
|
||||
void crc32c_pclmulqdq(XMMRegister w_xtmp1,
|
||||
Register in_out,
|
||||
uint32_t const_or_pre_comp_const_index, bool is_pclmulqdq_supported,
|
||||
XMMRegister w_xtmp2,
|
||||
Register tmp1,
|
||||
Register n_tmp2, Register n_tmp3);
|
||||
void crc32c_rec_alt2(uint32_t const_or_pre_comp_const_index_u1, uint32_t const_or_pre_comp_const_index_u2, bool is_pclmulqdq_supported, Register in_out, Register in1, Register in2,
|
||||
XMMRegister w_xtmp1, XMMRegister w_xtmp2, XMMRegister w_xtmp3,
|
||||
Register tmp1, Register tmp2,
|
||||
Register n_tmp3);
|
||||
void crc32c_proc_chunk(uint32_t size, uint32_t const_or_pre_comp_const_index_u1, uint32_t const_or_pre_comp_const_index_u2, bool is_pclmulqdq_supported,
|
||||
Register in_out1, Register in_out2, Register in_out3,
|
||||
Register tmp1, Register tmp2, Register tmp3,
|
||||
XMMRegister w_xtmp1, XMMRegister w_xtmp2, XMMRegister w_xtmp3,
|
||||
Register tmp4, Register tmp5,
|
||||
Register n_tmp6);
|
||||
void crc32c_ipl_alg2_alt2(Register in_out, Register in1, Register in2,
|
||||
Register tmp1, Register tmp2, Register tmp3,
|
||||
Register tmp4, Register tmp5, Register tmp6,
|
||||
XMMRegister w_xtmp1, XMMRegister w_xtmp2, XMMRegister w_xtmp3,
|
||||
bool is_pclmulqdq_supported);
|
||||
// Fold 128-bit data chunk
|
||||
void fold_128bit_crc32(XMMRegister xcrc, XMMRegister xK, XMMRegister xtmp, Register buf, int offset);
|
||||
void fold_128bit_crc32(XMMRegister xcrc, XMMRegister xK, XMMRegister xtmp, XMMRegister xbuf);
|
||||
|
@ -2991,6 +2991,63 @@ class StubGenerator: public StubCodeGenerator {
|
||||
return start;
|
||||
}
|
||||
|
||||
/**
|
||||
* Arguments:
|
||||
*
|
||||
* Inputs:
|
||||
* rsp(4) - int crc
|
||||
* rsp(8) - byte* buf
|
||||
* rsp(12) - int length
|
||||
* rsp(16) - table_start - optional (present only when doing a library_calll,
|
||||
* not used by x86 algorithm)
|
||||
*
|
||||
* Ouput:
|
||||
* rax - int crc result
|
||||
*/
|
||||
address generate_updateBytesCRC32C(bool is_pclmulqdq_supported) {
|
||||
assert(UseCRC32CIntrinsics, "need SSE4_2");
|
||||
__ align(CodeEntryAlignment);
|
||||
StubCodeMark mark(this, "StubRoutines", "updateBytesCRC32C");
|
||||
address start = __ pc();
|
||||
const Register crc = rax; // crc
|
||||
const Register buf = rcx; // source java byte array address
|
||||
const Register len = rdx; // length
|
||||
const Register d = rbx;
|
||||
const Register g = rsi;
|
||||
const Register h = rdi;
|
||||
const Register empty = 0; // will never be used, in order not
|
||||
// to change a signature for crc32c_IPL_Alg2_Alt2
|
||||
// between 64/32 I'm just keeping it here
|
||||
assert_different_registers(crc, buf, len, d, g, h);
|
||||
|
||||
BLOCK_COMMENT("Entry:");
|
||||
__ enter(); // required for proper stackwalking of RuntimeStub frame
|
||||
Address crc_arg(rsp, 4 + 4 + 0); // ESP+4 +
|
||||
// we need to add additional 4 because __ enter
|
||||
// have just pushed ebp on a stack
|
||||
Address buf_arg(rsp, 4 + 4 + 4);
|
||||
Address len_arg(rsp, 4 + 4 + 8);
|
||||
// Load up:
|
||||
__ movl(crc, crc_arg);
|
||||
__ movl(buf, buf_arg);
|
||||
__ movl(len, len_arg);
|
||||
__ push(d);
|
||||
__ push(g);
|
||||
__ push(h);
|
||||
__ crc32c_ipl_alg2_alt2(crc, buf, len,
|
||||
d, g, h,
|
||||
empty, empty, empty,
|
||||
xmm0, xmm1, xmm2,
|
||||
is_pclmulqdq_supported);
|
||||
__ pop(h);
|
||||
__ pop(g);
|
||||
__ pop(d);
|
||||
__ leave(); // required for proper stackwalking of RuntimeStub frame
|
||||
__ ret(0);
|
||||
|
||||
return start;
|
||||
}
|
||||
|
||||
// Safefetch stubs.
|
||||
void generate_safefetch(const char* name, int size, address* entry,
|
||||
address* fault_pc, address* continuation_pc) {
|
||||
@ -3204,6 +3261,13 @@ class StubGenerator: public StubCodeGenerator {
|
||||
StubRoutines::_crc_table_adr = (address)StubRoutines::x86::_crc_table;
|
||||
StubRoutines::_updateBytesCRC32 = generate_updateBytesCRC32();
|
||||
}
|
||||
|
||||
if (UseCRC32CIntrinsics) {
|
||||
bool supports_clmul = VM_Version::supports_clmul();
|
||||
StubRoutines::x86::generate_CRC32C_table(supports_clmul);
|
||||
StubRoutines::_crc32c_table_addr = (address)StubRoutines::x86::_crc32c_table;
|
||||
StubRoutines::_updateBytesCRC32C = generate_updateBytesCRC32C(supports_clmul);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
@ -3958,6 +3958,64 @@ class StubGenerator: public StubCodeGenerator {
|
||||
return start;
|
||||
}
|
||||
|
||||
/**
|
||||
* Arguments:
|
||||
*
|
||||
* Inputs:
|
||||
* c_rarg0 - int crc
|
||||
* c_rarg1 - byte* buf
|
||||
* c_rarg2 - long length
|
||||
* c_rarg3 - table_start - optional (present only when doing a library_calll,
|
||||
* not used by x86 algorithm)
|
||||
*
|
||||
* Ouput:
|
||||
* rax - int crc result
|
||||
*/
|
||||
address generate_updateBytesCRC32C(bool is_pclmulqdq_supported) {
|
||||
assert(UseCRC32CIntrinsics, "need SSE4_2");
|
||||
__ align(CodeEntryAlignment);
|
||||
StubCodeMark mark(this, "StubRoutines", "updateBytesCRC32C");
|
||||
address start = __ pc();
|
||||
//reg.arg int#0 int#1 int#2 int#3 int#4 int#5 float regs
|
||||
//Windows RCX RDX R8 R9 none none XMM0..XMM3
|
||||
//Lin / Sol RDI RSI RDX RCX R8 R9 XMM0..XMM7
|
||||
const Register crc = c_rarg0; // crc
|
||||
const Register buf = c_rarg1; // source java byte array address
|
||||
const Register len = c_rarg2; // length
|
||||
const Register a = rax;
|
||||
const Register j = r9;
|
||||
const Register k = r10;
|
||||
const Register l = r11;
|
||||
#ifdef _WIN64
|
||||
const Register y = rdi;
|
||||
const Register z = rsi;
|
||||
#else
|
||||
const Register y = rcx;
|
||||
const Register z = r8;
|
||||
#endif
|
||||
assert_different_registers(crc, buf, len, a, j, k, l, y, z);
|
||||
|
||||
BLOCK_COMMENT("Entry:");
|
||||
__ enter(); // required for proper stackwalking of RuntimeStub frame
|
||||
#ifdef _WIN64
|
||||
__ push(y);
|
||||
__ push(z);
|
||||
#endif
|
||||
__ crc32c_ipl_alg2_alt2(crc, buf, len,
|
||||
a, j, k,
|
||||
l, y, z,
|
||||
c_farg0, c_farg1, c_farg2,
|
||||
is_pclmulqdq_supported);
|
||||
__ movl(rax, crc);
|
||||
#ifdef _WIN64
|
||||
__ pop(z);
|
||||
__ pop(y);
|
||||
#endif
|
||||
__ leave(); // required for proper stackwalking of RuntimeStub frame
|
||||
__ ret(0);
|
||||
|
||||
return start;
|
||||
}
|
||||
|
||||
/**
|
||||
* Arguments:
|
||||
@ -4302,6 +4360,13 @@ class StubGenerator: public StubCodeGenerator {
|
||||
StubRoutines::_crc_table_adr = (address)StubRoutines::x86::_crc_table;
|
||||
StubRoutines::_updateBytesCRC32 = generate_updateBytesCRC32();
|
||||
}
|
||||
|
||||
if (UseCRC32CIntrinsics) {
|
||||
bool supports_clmul = VM_Version::supports_clmul();
|
||||
StubRoutines::x86::generate_CRC32C_table(supports_clmul);
|
||||
StubRoutines::_crc32c_table_addr = (address)StubRoutines::x86::_crc32c_table;
|
||||
StubRoutines::_updateBytesCRC32C = generate_updateBytesCRC32C(supports_clmul);
|
||||
}
|
||||
}
|
||||
|
||||
void generate_all() {
|
||||
|
@ -27,6 +27,7 @@
|
||||
#include "runtime/frame.inline.hpp"
|
||||
#include "runtime/stubRoutines.hpp"
|
||||
#include "runtime/thread.inline.hpp"
|
||||
#include "crc32c.h"
|
||||
|
||||
// Implementation of the platform-specific part of StubRoutines - for
|
||||
// a description of how to extend it, see the stubRoutines.hpp file.
|
||||
@ -130,3 +131,107 @@ juint StubRoutines::x86::_crc_table[] =
|
||||
0x5d681b02UL, 0x2a6f2b94UL, 0xb40bbe37UL, 0xc30c8ea1UL, 0x5a05df1bUL,
|
||||
0x2d02ef8dUL
|
||||
};
|
||||
|
||||
#define D 32
|
||||
#define P 0x82F63B78 // Reflection of Castagnoli (0x11EDC6F41)
|
||||
|
||||
#define TILL_CYCLE 31
|
||||
uint32_t _crc32c_pow_2k_table[TILL_CYCLE]; // because _crc32c_pow_2k_table[TILL_CYCLE == 31] == _crc32c_pow_2k_table[0]
|
||||
|
||||
// A. Kadatch and B. Jenkins / Everything we know about CRC but afraid to forget September 3, 2010 8
|
||||
// Listing 1: Multiplication of normalized polynomials
|
||||
// "a" and "b" occupy D least significant bits.
|
||||
uint32_t crc32c_multiply(uint32_t a, uint32_t b) {
|
||||
uint32_t product = 0;
|
||||
uint32_t b_pow_x_table[D + 1]; // b_pow_x_table[k] = (b * x**k) mod P
|
||||
b_pow_x_table[0] = b;
|
||||
for (int k = 0; k < D; ++k) {
|
||||
// If "a" has non-zero coefficient at x**k,/ add ((b * x**k) mod P) to the result.
|
||||
if ((a & (uint64_t)(1 << (D - 1 - k))) != 0) product ^= b_pow_x_table[k];
|
||||
|
||||
// Compute b_pow_x_table[k+1] = (b ** x**(k+1)) mod P.
|
||||
if (b_pow_x_table[k] & 1) {
|
||||
// If degree of (b_pow_x_table[k] * x) is D, then
|
||||
// degree of (b_pow_x_table[k] * x - P) is less than D.
|
||||
b_pow_x_table[k + 1] = (b_pow_x_table[k] >> 1) ^ P;
|
||||
}
|
||||
else {
|
||||
b_pow_x_table[k + 1] = b_pow_x_table[k] >> 1;
|
||||
}
|
||||
}
|
||||
return product;
|
||||
}
|
||||
#undef D
|
||||
#undef P
|
||||
|
||||
// A. Kadatch and B. Jenkins / Everything we know about CRC but afraid to forget September 3, 2010 9
|
||||
void crc32c_init_pow_2k(void) {
|
||||
// _crc32c_pow_2k_table(0) =
|
||||
// x^(2^k) mod P(x) = x mod P(x) = x
|
||||
// Since we are operating on a reflected values
|
||||
// x = 10b, reflect(x) = 0x40000000
|
||||
_crc32c_pow_2k_table[0] = 0x40000000;
|
||||
|
||||
for (int k = 1; k < TILL_CYCLE; k++) {
|
||||
// _crc32c_pow_2k_table(k+1) = _crc32c_pow_2k_table(k-1)^2 mod P(x)
|
||||
uint32_t tmp = _crc32c_pow_2k_table[k - 1];
|
||||
_crc32c_pow_2k_table[k] = crc32c_multiply(tmp, tmp);
|
||||
}
|
||||
}
|
||||
|
||||
// x^N mod P(x)
|
||||
uint32_t crc32c_f_pow_n(uint32_t n) {
|
||||
// result = 1 (polynomial)
|
||||
uint32_t one, result = 0x80000000, i = 0;
|
||||
|
||||
while (one = (n & 1), (n == 1 || n - one > 0)) {
|
||||
if (one) {
|
||||
result = crc32c_multiply(result, _crc32c_pow_2k_table[i]);
|
||||
}
|
||||
n >>= 1;
|
||||
i++;
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
juint *StubRoutines::x86::_crc32c_table;
|
||||
|
||||
void StubRoutines::x86::generate_CRC32C_table(bool is_pclmulqdq_table_supported) {
|
||||
|
||||
static juint pow_n[CRC32C_NUM_PRECOMPUTED_CONSTANTS];
|
||||
|
||||
crc32c_init_pow_2k();
|
||||
|
||||
pow_n[0] = crc32c_f_pow_n(CRC32C_HIGH * 8); // 8N * 8 = 64N
|
||||
pow_n[1] = crc32c_f_pow_n(CRC32C_HIGH * 8 * 2); // 128N
|
||||
|
||||
pow_n[2] = crc32c_f_pow_n(CRC32C_MIDDLE * 8);
|
||||
pow_n[3] = crc32c_f_pow_n(CRC32C_MIDDLE * 8 * 2);
|
||||
|
||||
pow_n[4] = crc32c_f_pow_n(CRC32C_LOW * 8);
|
||||
pow_n[CRC32C_NUM_PRECOMPUTED_CONSTANTS - 1] =
|
||||
crc32c_f_pow_n(CRC32C_LOW * 8 * 2);
|
||||
|
||||
if (is_pclmulqdq_table_supported) {
|
||||
_crc32c_table = pow_n;
|
||||
} else {
|
||||
static julong pclmulqdq_table[CRC32C_NUM_PRECOMPUTED_CONSTANTS * 256];
|
||||
|
||||
for (int j = 0; j < CRC32C_NUM_PRECOMPUTED_CONSTANTS; j++) {
|
||||
static juint X_CONST = pow_n[j];
|
||||
for (int64_t i = 0; i < 256; i++) { // to force 64 bit wide computations
|
||||
// S. Gueron / Information Processing Letters 112 (2012) 184
|
||||
// Algorithm 3: Generating a carry-less multiplication lookup table.
|
||||
// Input: A 32-bit constant, X_CONST.
|
||||
// Output: A table of 256 entries, each one is a 64-bit quadword,
|
||||
// that can be used for computing "byte" * X_CONST, for a given byte.
|
||||
pclmulqdq_table[j * 256 + i] =
|
||||
((i & 1) * X_CONST) ^ ((i & 2) * X_CONST) ^ ((i & 4) * X_CONST) ^
|
||||
((i & 8) * X_CONST) ^ ((i & 16) * X_CONST) ^ ((i & 32) * X_CONST) ^
|
||||
((i & 64) * X_CONST) ^ ((i & 128) * X_CONST);
|
||||
}
|
||||
}
|
||||
_crc32c_table = (juint*)pclmulqdq_table;
|
||||
}
|
||||
}
|
||||
|
@ -36,6 +36,8 @@
|
||||
// masks and table for CRC32
|
||||
static uint64_t _crc_by128_masks[];
|
||||
static juint _crc_table[];
|
||||
// table for CRC32C
|
||||
static juint* _crc32c_table;
|
||||
// swap mask for ghash
|
||||
static address _ghash_long_swap_mask_addr;
|
||||
static address _ghash_byte_swap_mask_addr;
|
||||
@ -46,5 +48,6 @@
|
||||
static address crc_by128_masks_addr() { return (address)_crc_by128_masks; }
|
||||
static address ghash_long_swap_mask_addr() { return _ghash_long_swap_mask_addr; }
|
||||
static address ghash_byte_swap_mask_addr() { return _ghash_byte_swap_mask_addr; }
|
||||
static void generate_CRC32C_table(bool is_pclmulqdq_supported);
|
||||
|
||||
#endif // CPU_X86_VM_STUBROUTINES_X86_32_HPP
|
||||
|
@ -790,18 +790,25 @@ address InterpreterGenerator::generate_CRC32_updateBytes_entry(AbstractInterpret
|
||||
const Register buf = rdx; // source java byte array address
|
||||
const Register len = rdi; // length
|
||||
|
||||
// value x86_32
|
||||
// interp. arg ptr ESP + 4
|
||||
// int java.util.zip.CRC32.updateBytes(int crc, byte[] b, int off, int len)
|
||||
// 3 2 1 0
|
||||
// int java.util.zip.CRC32.updateByteBuffer(int crc, long buf, int off, int len)
|
||||
// 4 2,3 1 0
|
||||
|
||||
// Arguments are reversed on java expression stack
|
||||
__ movl(len, Address(rsp, wordSize)); // Length
|
||||
__ movl(len, Address(rsp, 4 + 0)); // Length
|
||||
// Calculate address of start element
|
||||
if (kind == Interpreter::java_util_zip_CRC32_updateByteBuffer) {
|
||||
__ movptr(buf, Address(rsp, 3*wordSize)); // long buf
|
||||
__ addptr(buf, Address(rsp, 2*wordSize)); // + offset
|
||||
__ movl(crc, Address(rsp, 5*wordSize)); // Initial CRC
|
||||
__ movptr(buf, Address(rsp, 4 + 2 * wordSize)); // long buf
|
||||
__ addptr(buf, Address(rsp, 4 + 1 * wordSize)); // + offset
|
||||
__ movl(crc, Address(rsp, 4 + 4 * wordSize)); // Initial CRC
|
||||
} else {
|
||||
__ movptr(buf, Address(rsp, 3*wordSize)); // byte[] array
|
||||
__ movptr(buf, Address(rsp, 4 + 2 * wordSize)); // byte[] array
|
||||
__ addptr(buf, arrayOopDesc::base_offset_in_bytes(T_BYTE)); // + header size
|
||||
__ addptr(buf, Address(rsp, 2*wordSize)); // + offset
|
||||
__ movl(crc, Address(rsp, 4*wordSize)); // Initial CRC
|
||||
__ addptr(buf, Address(rsp, 4 + 1 * wordSize)); // + offset
|
||||
__ movl(crc, Address(rsp, 4 + 3 * wordSize)); // Initial CRC
|
||||
}
|
||||
|
||||
__ super_call_VM_leaf(CAST_FROM_FN_PTR(address, StubRoutines::updateBytesCRC32()), crc, buf, len);
|
||||
@ -822,6 +829,53 @@ address InterpreterGenerator::generate_CRC32_updateBytes_entry(AbstractInterpret
|
||||
return generate_native_entry(false);
|
||||
}
|
||||
|
||||
/**
|
||||
* Method entry for static native methods:
|
||||
* int java.util.zip.CRC32C.updateBytes(int crc, byte[] b, int off, int end)
|
||||
* int java.util.zip.CRC32C.updateByteBuffer(int crc, long address, int off, int end)
|
||||
*/
|
||||
address InterpreterGenerator::generate_CRC32C_updateBytes_entry(AbstractInterpreter::MethodKind kind) {
|
||||
if (UseCRC32CIntrinsics) {
|
||||
address entry = __ pc();
|
||||
// Load parameters
|
||||
const Register crc = rax; // crc
|
||||
const Register buf = rcx; // source java byte array address
|
||||
const Register len = rdx; // length
|
||||
const Register end = len;
|
||||
|
||||
// value x86_32
|
||||
// interp. arg ptr ESP + 4
|
||||
// int java.util.zip.CRC32.updateBytes(int crc, byte[] b, int off, int end)
|
||||
// 3 2 1 0
|
||||
// int java.util.zip.CRC32.updateByteBuffer(int crc, long address, int off, int end)
|
||||
// 4 2,3 1 0
|
||||
|
||||
// Arguments are reversed on java expression stack
|
||||
__ movl(end, Address(rsp, 4 + 0)); // end
|
||||
__ subl(len, Address(rsp, 4 + 1 * wordSize)); // end - offset == length
|
||||
// Calculate address of start element
|
||||
if (kind == Interpreter::java_util_zip_CRC32_updateByteBuffer) {
|
||||
__ movptr(buf, Address(rsp, 4 + 2 * wordSize)); // long address
|
||||
__ addptr(buf, Address(rsp, 4 + 1 * wordSize)); // + offset
|
||||
__ movl(crc, Address(rsp, 4 + 4 * wordSize)); // Initial CRC
|
||||
} else {
|
||||
__ movptr(buf, Address(rsp, 4 + 2 * wordSize)); // byte[] array
|
||||
__ addptr(buf, arrayOopDesc::base_offset_in_bytes(T_BYTE)); // + header size
|
||||
__ addptr(buf, Address(rsp, 4 + 1 * wordSize)); // + offset
|
||||
__ movl(crc, Address(rsp, 4 + 3 * wordSize)); // Initial CRC
|
||||
}
|
||||
__ super_call_VM_leaf(CAST_FROM_FN_PTR(address, StubRoutines::updateBytesCRC32C()), crc, buf, len);
|
||||
// result in rax
|
||||
// _areturn
|
||||
__ pop(rdi); // get return address
|
||||
__ mov(rsp, rsi); // set sp to sender sp
|
||||
__ jmp(rdi);
|
||||
|
||||
return entry;
|
||||
}
|
||||
return generate_native_entry(false);
|
||||
}
|
||||
|
||||
/**
|
||||
* Method entry for static native method:
|
||||
* java.lang.Float.intBitsToFloat(int bits)
|
||||
|
@ -804,6 +804,57 @@ address InterpreterGenerator::generate_CRC32_updateBytes_entry(AbstractInterpret
|
||||
return generate_native_entry(false);
|
||||
}
|
||||
|
||||
/**
|
||||
* Method entry for static native methods:
|
||||
* int java.util.zip.CRC32C.updateBytes(int crc, byte[] b, int off, int end)
|
||||
* int java.util.zip.CRC32C.updateByteBuffer(int crc, long address, int off, int end)
|
||||
*/
|
||||
address InterpreterGenerator::generate_CRC32C_updateBytes_entry(AbstractInterpreter::MethodKind kind) {
|
||||
if (UseCRC32CIntrinsics) {
|
||||
address entry = __ pc();
|
||||
// Load parameters
|
||||
const Register crc = c_rarg0; // crc
|
||||
const Register buf = c_rarg1; // source java byte array address
|
||||
const Register len = c_rarg2;
|
||||
const Register off = c_rarg3; // offset
|
||||
const Register end = len;
|
||||
|
||||
// Arguments are reversed on java expression stack
|
||||
// Calculate address of start element
|
||||
if (kind == Interpreter::java_util_zip_CRC32C_updateDirectByteBuffer) {
|
||||
__ movptr(buf, Address(rsp, 3 * wordSize)); // long buf
|
||||
__ movl2ptr(off, Address(rsp, 2 * wordSize)); // offset
|
||||
__ addq(buf, off); // + offset
|
||||
__ movl(crc, Address(rsp, 5 * wordSize)); // Initial CRC
|
||||
// Note on 5 * wordSize vs. 4 * wordSize:
|
||||
// * int java.util.zip.CRC32C.updateByteBuffer(int crc, long address, int off, int end)
|
||||
// 4 2,3 1 0
|
||||
// end starts at SP + 8
|
||||
// The Java(R) Virtual Machine Specification Java SE 7 Edition
|
||||
// 4.10.2.3. Values of Types long and double
|
||||
// "When calculating operand stack length, values of type long and double have length two."
|
||||
} else {
|
||||
__ movptr(buf, Address(rsp, 3 * wordSize)); // byte[] array
|
||||
__ addptr(buf, arrayOopDesc::base_offset_in_bytes(T_BYTE)); // + header size
|
||||
__ movl2ptr(off, Address(rsp, 2 * wordSize)); // offset
|
||||
__ addq(buf, off); // + offset
|
||||
__ movl(crc, Address(rsp, 4 * wordSize)); // Initial CRC
|
||||
}
|
||||
__ movl(end, Address(rsp, wordSize)); // end
|
||||
__ subl(end, off); // end - off
|
||||
__ super_call_VM_leaf(CAST_FROM_FN_PTR(address, StubRoutines::updateBytesCRC32C()), crc, buf, len);
|
||||
// result in rax
|
||||
// _areturn
|
||||
__ pop(rdi); // get return address
|
||||
__ mov(rsp, r13); // set sp to sender sp
|
||||
__ jmp(rdi);
|
||||
|
||||
return entry;
|
||||
}
|
||||
|
||||
return generate_native_entry(false);
|
||||
}
|
||||
|
||||
// Interpreter stub for calling a native method. (asm interpreter)
|
||||
// This sets up a somewhat different looking stack for calling the
|
||||
// native method than the typical interpreter frame setup.
|
||||
|
@ -661,6 +661,18 @@ void VM_Version::get_processor_features() {
|
||||
FLAG_SET_DEFAULT(UseCRC32Intrinsics, false);
|
||||
}
|
||||
|
||||
if (supports_sse4_2()) {
|
||||
if (FLAG_IS_DEFAULT(UseCRC32CIntrinsics)) {
|
||||
UseCRC32CIntrinsics = true;
|
||||
}
|
||||
}
|
||||
else if (UseCRC32CIntrinsics) {
|
||||
if (!FLAG_IS_DEFAULT(UseCRC32CIntrinsics)) {
|
||||
warning("CRC32C intrinsics are not available on this CPU");
|
||||
}
|
||||
FLAG_SET_DEFAULT(UseCRC32CIntrinsics, false);
|
||||
}
|
||||
|
||||
// The AES intrinsic stubs require AES instruction support (of course)
|
||||
// but also require sse3 mode for instructions it use.
|
||||
if (UseAES && (UseSSE > 2)) {
|
||||
@ -704,12 +716,6 @@ void VM_Version::get_processor_features() {
|
||||
FLAG_SET_DEFAULT(UseSHA512Intrinsics, false);
|
||||
}
|
||||
|
||||
if (UseCRC32CIntrinsics) {
|
||||
if (!FLAG_IS_DEFAULT(UseCRC32CIntrinsics))
|
||||
warning("CRC32C intrinsics are not available on this CPU");
|
||||
FLAG_SET_DEFAULT(UseCRC32CIntrinsics, false);
|
||||
}
|
||||
|
||||
if (UseAdler32Intrinsics) {
|
||||
warning("Adler32Intrinsics not available on this CPU.");
|
||||
FLAG_SET_DEFAULT(UseAdler32Intrinsics, false);
|
||||
|
@ -42,4 +42,5 @@
|
||||
// Not supported
|
||||
address generate_CRC32_update_entry() { return NULL; }
|
||||
address generate_CRC32_updateBytes_entry(AbstractInterpreter::MethodKind kind) { return NULL; }
|
||||
address generate_CRC32C_updateBytes_entry(AbstractInterpreter::MethodKind kind) { return NULL; }
|
||||
#endif // CPU_ZERO_VM_INTERPRETERGENERATOR_ZERO_HPP
|
||||
|
@ -90,6 +90,8 @@ class AbstractInterpreter: AllStatic {
|
||||
java_util_zip_CRC32_update, // implementation of java.util.zip.CRC32.update()
|
||||
java_util_zip_CRC32_updateBytes, // implementation of java.util.zip.CRC32.updateBytes()
|
||||
java_util_zip_CRC32_updateByteBuffer, // implementation of java.util.zip.CRC32.updateByteBuffer()
|
||||
java_util_zip_CRC32C_updateBytes, // implementation of java.util.zip.CRC32C.updateBytes(crc, b[], off, end)
|
||||
java_util_zip_CRC32C_updateDirectByteBuffer, // implementation of java.util.zip.CRC32C.updateDirectByteBuffer(crc, address, off, end)
|
||||
java_lang_Float_intBitsToFloat, // implementation of java.lang.Float.intBitsToFloat()
|
||||
java_lang_Float_floatToRawIntBits, // implementation of java.lang.Float.floatToRawIntBits()
|
||||
java_lang_Double_longBitsToDouble, // implementation of java.lang.Double.longBitsToDouble()
|
||||
|
@ -234,6 +234,13 @@ AbstractInterpreter::MethodKind AbstractInterpreter::method_kind(methodHandle m)
|
||||
case vmIntrinsics::_updateByteBufferCRC32 : return java_util_zip_CRC32_updateByteBuffer;
|
||||
}
|
||||
}
|
||||
if (UseCRC32CIntrinsics) {
|
||||
// Use optimized stub code for CRC32C methods.
|
||||
switch (m->intrinsic_id()) {
|
||||
case vmIntrinsics::_updateBytesCRC32C : return java_util_zip_CRC32C_updateBytes;
|
||||
case vmIntrinsics::_updateDirectByteBufferCRC32C : return java_util_zip_CRC32C_updateDirectByteBuffer;
|
||||
}
|
||||
}
|
||||
|
||||
switch(m->intrinsic_id()) {
|
||||
case vmIntrinsics::_intBitsToFloat: return java_lang_Float_intBitsToFloat;
|
||||
@ -349,6 +356,8 @@ void AbstractInterpreter::print_method_kind(MethodKind kind) {
|
||||
case java_util_zip_CRC32_update : tty->print("java_util_zip_CRC32_update"); break;
|
||||
case java_util_zip_CRC32_updateBytes : tty->print("java_util_zip_CRC32_updateBytes"); break;
|
||||
case java_util_zip_CRC32_updateByteBuffer : tty->print("java_util_zip_CRC32_updateByteBuffer"); break;
|
||||
case java_util_zip_CRC32C_updateBytes : tty->print("java_util_zip_CRC32C_updateBytes"); break;
|
||||
case java_util_zip_CRC32C_updateDirectByteBuffer: tty->print("java_util_zip_CRC32C_updateDirectByteByffer"); break;
|
||||
default:
|
||||
if (kind >= method_handle_invoke_FIRST &&
|
||||
kind <= method_handle_invoke_LAST) {
|
||||
@ -567,6 +576,10 @@ address InterpreterGenerator::generate_method_entry(
|
||||
: // fall thru
|
||||
case Interpreter::java_util_zip_CRC32_updateByteBuffer
|
||||
: entry_point = generate_CRC32_updateBytes_entry(kind); break;
|
||||
case Interpreter::java_util_zip_CRC32C_updateBytes
|
||||
: // fall thru
|
||||
case Interpreter::java_util_zip_CRC32C_updateDirectByteBuffer
|
||||
: entry_point = generate_CRC32C_updateBytes_entry(kind); break;
|
||||
#if defined(TARGET_ARCH_x86) && !defined(_LP64)
|
||||
// On x86_32 platforms, a special entry is generated for the following four methods.
|
||||
// On other platforms the normal entry is used to enter these methods.
|
||||
@ -582,9 +595,9 @@ address InterpreterGenerator::generate_method_entry(
|
||||
case Interpreter::java_lang_Float_intBitsToFloat:
|
||||
case Interpreter::java_lang_Float_floatToRawIntBits:
|
||||
case Interpreter::java_lang_Double_longBitsToDouble:
|
||||
case Interpreter::java_lang_Double_doubleToRawLongBits:
|
||||
entry_point = generate_native_entry(false);
|
||||
break;
|
||||
case Interpreter::java_lang_Double_doubleToRawLongBits:
|
||||
entry_point = generate_native_entry(false);
|
||||
break;
|
||||
#endif // defined(TARGET_ARCH_x86) && !defined(_LP64)
|
||||
#endif // CC_INTERP
|
||||
default:
|
||||
|
@ -418,6 +418,11 @@ void TemplateInterpreterGenerator::generate_all() {
|
||||
method_entry(java_util_zip_CRC32_updateByteBuffer)
|
||||
}
|
||||
|
||||
if (UseCRC32CIntrinsics) {
|
||||
method_entry(java_util_zip_CRC32C_updateBytes)
|
||||
method_entry(java_util_zip_CRC32C_updateDirectByteBuffer)
|
||||
}
|
||||
|
||||
method_entry(java_lang_Float_intBitsToFloat);
|
||||
method_entry(java_lang_Float_floatToRawIntBits);
|
||||
method_entry(java_lang_Double_longBitsToDouble);
|
||||
|
@ -136,8 +136,9 @@ address StubRoutines::_sha512_implCompress = NULL;
|
||||
address StubRoutines::_sha512_implCompressMB = NULL;
|
||||
|
||||
address StubRoutines::_updateBytesCRC32 = NULL;
|
||||
address StubRoutines::_crc_table_adr = NULL;
|
||||
address StubRoutines::_crc_table_adr = NULL;
|
||||
|
||||
address StubRoutines::_crc32c_table_addr = NULL;
|
||||
address StubRoutines::_updateBytesCRC32C = NULL;
|
||||
address StubRoutines::_updateBytesAdler32 = NULL;
|
||||
|
||||
|
@ -197,6 +197,7 @@ class StubRoutines: AllStatic {
|
||||
static address _updateBytesCRC32;
|
||||
static address _crc_table_adr;
|
||||
|
||||
static address _crc32c_table_addr;
|
||||
static address _updateBytesCRC32C;
|
||||
static address _updateBytesAdler32;
|
||||
|
||||
@ -364,6 +365,7 @@ class StubRoutines: AllStatic {
|
||||
static address updateBytesCRC32() { return _updateBytesCRC32; }
|
||||
static address crc_table_addr() { return _crc_table_adr; }
|
||||
|
||||
static address crc32c_table_addr() { return _crc32c_table_addr; }
|
||||
static address updateBytesCRC32C() { return _updateBytesCRC32C; }
|
||||
static address updateBytesAdler32() { return _updateBytesAdler32; }
|
||||
|
||||
|
@ -832,6 +832,7 @@ typedef CompactHashtable<Symbol*, char> SymbolCompactHashTable;
|
||||
static_field(StubRoutines, _ghash_processBlocks, address) \
|
||||
static_field(StubRoutines, _updateBytesCRC32, address) \
|
||||
static_field(StubRoutines, _crc_table_adr, address) \
|
||||
static_field(StubRoutines, _crc32c_table_addr, address) \
|
||||
static_field(StubRoutines, _updateBytesCRC32C, address) \
|
||||
static_field(StubRoutines, _multiplyToLen, address) \
|
||||
static_field(StubRoutines, _squareToLen, address) \
|
||||
|
Loading…
x
Reference in New Issue
Block a user