8073583: C2 support for CRC32C on SPARC
Reviewed-by: jrose, kvn
This commit is contained in:
parent
4a826139e3
commit
e2533553f6
@ -199,6 +199,12 @@ void VM_Version::get_processor_features() {
|
||||
UseCRC32Intrinsics = true;
|
||||
}
|
||||
|
||||
if (UseCRC32CIntrinsics) {
|
||||
if (!FLAG_IS_DEFAULT(UseCRC32CIntrinsics))
|
||||
warning("CRC32C intrinsics are not available on this CPU");
|
||||
FLAG_SET_DEFAULT(UseCRC32CIntrinsics, false);
|
||||
}
|
||||
|
||||
if (auxv & (HWCAP_SHA1 | HWCAP_SHA2)) {
|
||||
if (FLAG_IS_DEFAULT(UseSHA)) {
|
||||
FLAG_SET_DEFAULT(UseSHA, true);
|
||||
|
@ -191,6 +191,13 @@ void VM_Version::initialize() {
|
||||
FLAG_SET_DEFAULT(UseSHA256Intrinsics, false);
|
||||
FLAG_SET_DEFAULT(UseSHA512Intrinsics, false);
|
||||
}
|
||||
|
||||
if (UseCRC32CIntrinsics) {
|
||||
if (!FLAG_IS_DEFAULT(UseCRC32CIntrinsics))
|
||||
warning("CRC32C intrinsics are not available on this CPU");
|
||||
FLAG_SET_DEFAULT(UseCRC32CIntrinsics, false);
|
||||
}
|
||||
|
||||
// Adjust RTM (Restricted Transactional Memory) flags.
|
||||
if (!has_tcheck() && UseRTMLocking) {
|
||||
// Can't continue because UseRTMLocking affects UseBiasedLocking flag
|
||||
|
@ -128,8 +128,11 @@ class Assembler : public AbstractAssembler {
|
||||
faligndata_op3 = 0x36,
|
||||
flog3_op3 = 0x36,
|
||||
edge_op3 = 0x36,
|
||||
fzero_op3 = 0x36,
|
||||
fsrc_op3 = 0x36,
|
||||
fnot_op3 = 0x36,
|
||||
xmulx_op3 = 0x36,
|
||||
crc32c_op3 = 0x36,
|
||||
impdep2_op3 = 0x37,
|
||||
stpartialf_op3 = 0x37,
|
||||
jmpl_op3 = 0x38,
|
||||
@ -231,7 +234,9 @@ class Assembler : public AbstractAssembler {
|
||||
|
||||
sha1_opf = 0x141,
|
||||
sha256_opf = 0x142,
|
||||
sha512_opf = 0x143
|
||||
sha512_opf = 0x143,
|
||||
|
||||
crc32c_opf = 0x147
|
||||
};
|
||||
|
||||
enum op5s {
|
||||
@ -600,6 +605,11 @@ class Assembler : public AbstractAssembler {
|
||||
return x & ((1 << 10) - 1);
|
||||
}
|
||||
|
||||
// create a low12 __value__ (not a field) for a given a 32-bit constant
|
||||
static int low12( int x ) {
|
||||
return x & ((1 << 12) - 1);
|
||||
}
|
||||
|
||||
// AES crypto instructions supported only on certain processors
|
||||
static void aes_only() { assert( VM_Version::has_aes(), "This instruction only works on SPARC with AES instructions support"); }
|
||||
|
||||
@ -608,6 +618,9 @@ class Assembler : public AbstractAssembler {
|
||||
static void sha256_only() { assert( VM_Version::has_sha256(), "This instruction only works on SPARC with SHA256"); }
|
||||
static void sha512_only() { assert( VM_Version::has_sha512(), "This instruction only works on SPARC with SHA512"); }
|
||||
|
||||
// CRC32C instruction supported only on certain processors
|
||||
static void crc32c_only() { assert( VM_Version::has_crc32c(), "This instruction only works on SPARC with CRC32C"); }
|
||||
|
||||
// instruction only in VIS1
|
||||
static void vis1_only() { assert( VM_Version::has_vis1(), "This instruction only works on SPARC with VIS1"); }
|
||||
|
||||
@ -1022,6 +1035,7 @@ public:
|
||||
|
||||
void nop() { emit_int32( op(branch_op) | op2(sethi_op2) ); }
|
||||
|
||||
void sw_count() { emit_int32( op(branch_op) | op2(sethi_op2) | 0x3f0 ); }
|
||||
|
||||
// pp 202
|
||||
|
||||
@ -1198,8 +1212,14 @@ public:
|
||||
|
||||
void faligndata( FloatRegister s1, FloatRegister s2, FloatRegister d ) { vis1_only(); emit_int32( op(arith_op) | fd(d, FloatRegisterImpl::D) | op3(faligndata_op3) | fs1(s1, FloatRegisterImpl::D) | opf(faligndata_opf) | fs2(s2, FloatRegisterImpl::D)); }
|
||||
|
||||
void fzero( FloatRegisterImpl::Width w, FloatRegister d ) { vis1_only(); emit_int32( op(arith_op) | fd(d, w) | op3(fzero_op3) | opf(0x62 - w)); }
|
||||
|
||||
void fsrc2( FloatRegisterImpl::Width w, FloatRegister s2, FloatRegister d ) { vis1_only(); emit_int32( op(arith_op) | fd(d, w) | op3(fsrc_op3) | opf(0x7A - w) | fs2(s2, w)); }
|
||||
|
||||
void fnot1( FloatRegisterImpl::Width w, FloatRegister s1, FloatRegister d ) { vis1_only(); emit_int32( op(arith_op) | fd(d, w) | op3(fnot_op3) | fs1(s1, w) | opf(0x6C - w)); }
|
||||
|
||||
void fpmerge( FloatRegister s1, FloatRegister s2, FloatRegister d ) { vis1_only(); emit_int32( op(arith_op) | fd(d, FloatRegisterImpl::D) | op3(0x36) | fs1(s1, FloatRegisterImpl::S) | opf(0x4b) | fs2(s2, FloatRegisterImpl::S)); }
|
||||
|
||||
void stpartialf( Register s1, Register s2, FloatRegister d, int ia = -1 ) { vis1_only(); emit_int32( op(ldst_op) | fd(d, FloatRegisterImpl::D) | op3(stpartialf_op3) | rs1(s1) | imm_asi(ia) | rs2(s2)); }
|
||||
|
||||
// VIS2 instructions
|
||||
@ -1224,6 +1244,10 @@ public:
|
||||
void sha256() { sha256_only(); emit_int32( op(arith_op) | op3(sha_op3) | opf(sha256_opf)); }
|
||||
void sha512() { sha512_only(); emit_int32( op(arith_op) | op3(sha_op3) | opf(sha512_opf)); }
|
||||
|
||||
// CRC32C instruction
|
||||
|
||||
void crc32c( FloatRegister s1, FloatRegister s2, FloatRegister d ) { crc32c_only(); emit_int32( op(arith_op) | fd(d, FloatRegisterImpl::D) | op3(crc32c_op3) | fs1(s1, FloatRegisterImpl::D) | opf(crc32c_opf) | fs2(s2, FloatRegisterImpl::D)); }
|
||||
|
||||
// Creation
|
||||
Assembler(CodeBuffer* code) : AbstractAssembler(code) {
|
||||
#ifdef CHECK_DELAY
|
||||
|
@ -956,6 +956,7 @@ void MacroAssembler::set64(jlong value, Register d, Register tmp) {
|
||||
|
||||
int hi = (int)(value >> 32);
|
||||
int lo = (int)(value & ~0);
|
||||
int bits_33to2 = (int)((value >> 2) & ~0);
|
||||
// (Matcher::isSimpleConstant64 knows about the following optimizations.)
|
||||
if (Assembler::is_simm13(lo) && value == lo) {
|
||||
or3(G0, lo, d);
|
||||
@ -964,6 +965,12 @@ void MacroAssembler::set64(jlong value, Register d, Register tmp) {
|
||||
if (low10(lo) != 0)
|
||||
or3(d, low10(lo), d);
|
||||
}
|
||||
else if ((hi >> 2) == 0) {
|
||||
Assembler::sethi(bits_33to2, d); // hardware version zero-extends to upper 32
|
||||
sllx(d, 2, d);
|
||||
if (low12(lo) != 0)
|
||||
or3(d, low12(lo), d);
|
||||
}
|
||||
else if (hi == -1) {
|
||||
Assembler::sethi(~lo, d); // hardware version zero-extends to upper 32
|
||||
xor3(d, low10(lo) ^ ~low10(~0), d);
|
||||
@ -4351,3 +4358,52 @@ void MacroAssembler::bis_zeroing(Register to, Register count, Register temp, Lab
|
||||
cmp_and_brx_short(to, end, Assembler::lessUnsigned, Assembler::pt, small_loop);
|
||||
nop(); // Separate short branches
|
||||
}
|
||||
|
||||
/**
|
||||
* Update CRC-32[C] with a byte value according to constants in table
|
||||
*
|
||||
* @param [in,out]crc Register containing the crc.
|
||||
* @param [in]val Register containing the byte to fold into the CRC.
|
||||
* @param [in]table Register containing the table of crc constants.
|
||||
*
|
||||
* uint32_t crc;
|
||||
* val = crc_table[(val ^ crc) & 0xFF];
|
||||
* crc = val ^ (crc >> 8);
|
||||
*/
|
||||
void MacroAssembler::update_byte_crc32(Register crc, Register val, Register table) {
|
||||
xor3(val, crc, val);
|
||||
and3(val, 0xFF, val);
|
||||
sllx(val, 2, val);
|
||||
lduw(table, val, val);
|
||||
srlx(crc, 8, crc);
|
||||
xor3(val, crc, crc);
|
||||
}
|
||||
|
||||
// Reverse byte order of lower 32 bits, assuming upper 32 bits all zeros
|
||||
void MacroAssembler::reverse_bytes_32(Register src, Register dst, Register tmp) {
|
||||
srlx(src, 24, dst);
|
||||
|
||||
sllx(src, 32+8, tmp);
|
||||
srlx(tmp, 32+24, tmp);
|
||||
sllx(tmp, 8, tmp);
|
||||
or3(dst, tmp, dst);
|
||||
|
||||
sllx(src, 32+16, tmp);
|
||||
srlx(tmp, 32+24, tmp);
|
||||
sllx(tmp, 16, tmp);
|
||||
or3(dst, tmp, dst);
|
||||
|
||||
sllx(src, 32+24, tmp);
|
||||
srlx(tmp, 32, tmp);
|
||||
or3(dst, tmp, dst);
|
||||
}
|
||||
|
||||
void MacroAssembler::movitof_revbytes(Register src, FloatRegister dst, Register tmp1, Register tmp2) {
|
||||
reverse_bytes_32(src, tmp1, tmp2);
|
||||
movxtod(tmp1, dst);
|
||||
}
|
||||
|
||||
void MacroAssembler::movftoi_revbytes(FloatRegister src, Register dst, Register tmp1, Register tmp2) {
|
||||
movdtox(src, tmp1);
|
||||
reverse_bytes_32(tmp1, dst, tmp2);
|
||||
}
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) 1997, 2014, Oracle and/or its affiliates. All rights reserved.
|
||||
* Copyright (c) 1997, 2015, Oracle and/or its affiliates. All rights reserved.
|
||||
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
||||
*
|
||||
* This code is free software; you can redistribute it and/or modify it
|
||||
@ -903,6 +903,10 @@ public:
|
||||
inline void ldf(FloatRegisterImpl::Width w, Register s1, RegisterOrConstant s2, FloatRegister d);
|
||||
inline void ldf(FloatRegisterImpl::Width w, const Address& a, FloatRegister d, int offset = 0);
|
||||
|
||||
// little-endian
|
||||
inline void ldxl(Register s1, Register s2, Register d) { ldxa(s1, s2, ASI_PRIMARY_LITTLE, d); }
|
||||
inline void ldfl(FloatRegisterImpl::Width w, Register s1, Register s2, FloatRegister d) { ldfa(w, s1, s2, ASI_PRIMARY_LITTLE, d); }
|
||||
|
||||
// membar psuedo instruction. takes into account target memory model.
|
||||
inline void membar( Assembler::Membar_mask_bits const7a );
|
||||
|
||||
@ -1436,6 +1440,14 @@ public:
|
||||
// Use BIS for zeroing
|
||||
void bis_zeroing(Register to, Register count, Register temp, Label& Ldone);
|
||||
|
||||
// Update CRC-32[C] with a byte value according to constants in table
|
||||
void update_byte_crc32(Register crc, Register val, Register table);
|
||||
|
||||
// Reverse byte order of lower 32 bits, assuming upper 32 bits all zeros
|
||||
void reverse_bytes_32(Register src, Register dst, Register tmp);
|
||||
void movitof_revbytes(Register src, FloatRegister dst, Register tmp1, Register tmp2);
|
||||
void movftoi_revbytes(FloatRegister src, Register dst, Register tmp1, Register tmp2);
|
||||
|
||||
#undef VIRTUAL
|
||||
};
|
||||
|
||||
|
@ -4910,6 +4910,206 @@ class StubGenerator: public StubCodeGenerator {
|
||||
return start;
|
||||
}
|
||||
|
||||
#define CHUNK_LEN 128 /* 128 x 8B = 1KB */
|
||||
#define CHUNK_K1 0x1307a0206 /* reverseBits(pow(x, CHUNK_LEN*8*8*3 - 32) mod P(x)) << 1 */
|
||||
#define CHUNK_K2 0x1a0f717c4 /* reverseBits(pow(x, CHUNK_LEN*8*8*2 - 32) mod P(x)) << 1 */
|
||||
#define CHUNK_K3 0x0170076fa /* reverseBits(pow(x, CHUNK_LEN*8*8*1 - 32) mod P(x)) << 1 */
|
||||
|
||||
/**
|
||||
* Arguments:
|
||||
*
|
||||
* Inputs:
|
||||
* O0 - int crc
|
||||
* O1 - byte* buf
|
||||
* O2 - int len
|
||||
* O3 - int* table
|
||||
*
|
||||
* Output:
|
||||
* O0 - int crc result
|
||||
*/
|
||||
address generate_updateBytesCRC32C() {
|
||||
assert(UseCRC32CIntrinsics, "need CRC32C instruction");
|
||||
|
||||
__ align(CodeEntryAlignment);
|
||||
StubCodeMark mark(this, "StubRoutines", "updateBytesCRC32C");
|
||||
address start = __ pc();
|
||||
|
||||
const Register crc = O0; // crc
|
||||
const Register buf = O1; // source java byte array address
|
||||
const Register len = O2; // number of bytes
|
||||
const Register table = O3; // byteTable
|
||||
|
||||
Label L_crc32c_head, L_crc32c_aligned;
|
||||
Label L_crc32c_parallel, L_crc32c_parallel_loop;
|
||||
Label L_crc32c_serial, L_crc32c_x32_loop, L_crc32c_x8, L_crc32c_x8_loop;
|
||||
Label L_crc32c_done, L_crc32c_tail, L_crc32c_return;
|
||||
|
||||
__ cmp_and_br_short(len, 0, Assembler::lessEqual, Assembler::pn, L_crc32c_return);
|
||||
|
||||
// clear upper 32 bits of crc
|
||||
__ clruwu(crc);
|
||||
|
||||
__ and3(buf, 7, G4);
|
||||
__ cmp_and_brx_short(G4, 0, Assembler::equal, Assembler::pt, L_crc32c_aligned);
|
||||
|
||||
__ mov(8, G1);
|
||||
__ sub(G1, G4, G4);
|
||||
|
||||
// ------ process the misaligned head (7 bytes or less) ------
|
||||
__ BIND(L_crc32c_head);
|
||||
|
||||
// crc = (crc >>> 8) ^ byteTable[(crc ^ b) & 0xFF];
|
||||
__ ldub(buf, 0, G1);
|
||||
__ update_byte_crc32(crc, G1, table);
|
||||
|
||||
__ inc(buf);
|
||||
__ dec(len);
|
||||
__ cmp_and_br_short(len, 0, Assembler::equal, Assembler::pn, L_crc32c_return);
|
||||
__ dec(G4);
|
||||
__ cmp_and_br_short(G4, 0, Assembler::greater, Assembler::pt, L_crc32c_head);
|
||||
|
||||
// ------ process the 8-byte-aligned body ------
|
||||
__ BIND(L_crc32c_aligned);
|
||||
__ nop();
|
||||
__ cmp_and_br_short(len, 8, Assembler::less, Assembler::pn, L_crc32c_tail);
|
||||
|
||||
// reverse the byte order of lower 32 bits to big endian, and move to FP side
|
||||
__ movitof_revbytes(crc, F0, G1, G3);
|
||||
|
||||
__ set(CHUNK_LEN*8*4, G4);
|
||||
__ cmp_and_br_short(len, G4, Assembler::less, Assembler::pt, L_crc32c_serial);
|
||||
|
||||
// ------ process four 1KB chunks in parallel ------
|
||||
__ BIND(L_crc32c_parallel);
|
||||
|
||||
__ fzero(FloatRegisterImpl::D, F2);
|
||||
__ fzero(FloatRegisterImpl::D, F4);
|
||||
__ fzero(FloatRegisterImpl::D, F6);
|
||||
|
||||
__ mov(CHUNK_LEN - 1, G4);
|
||||
__ BIND(L_crc32c_parallel_loop);
|
||||
// schedule ldf's ahead of crc32c's to hide the load-use latency
|
||||
__ ldf(FloatRegisterImpl::D, buf, 0, F8);
|
||||
__ ldf(FloatRegisterImpl::D, buf, CHUNK_LEN*8, F10);
|
||||
__ ldf(FloatRegisterImpl::D, buf, CHUNK_LEN*16, F12);
|
||||
__ ldf(FloatRegisterImpl::D, buf, CHUNK_LEN*24, F14);
|
||||
__ crc32c(F0, F8, F0);
|
||||
__ crc32c(F2, F10, F2);
|
||||
__ crc32c(F4, F12, F4);
|
||||
__ crc32c(F6, F14, F6);
|
||||
__ inc(buf, 8);
|
||||
__ dec(G4);
|
||||
__ cmp_and_br_short(G4, 0, Assembler::greater, Assembler::pt, L_crc32c_parallel_loop);
|
||||
|
||||
__ ldf(FloatRegisterImpl::D, buf, 0, F8);
|
||||
__ ldf(FloatRegisterImpl::D, buf, CHUNK_LEN*8, F10);
|
||||
__ ldf(FloatRegisterImpl::D, buf, CHUNK_LEN*16, F12);
|
||||
__ crc32c(F0, F8, F0);
|
||||
__ crc32c(F2, F10, F2);
|
||||
__ crc32c(F4, F12, F4);
|
||||
|
||||
__ inc(buf, CHUNK_LEN*24);
|
||||
__ ldfl(FloatRegisterImpl::D, buf, G0, F14); // load in little endian
|
||||
__ inc(buf, 8);
|
||||
|
||||
__ prefetch(buf, 0, Assembler::severalReads);
|
||||
__ prefetch(buf, CHUNK_LEN*8, Assembler::severalReads);
|
||||
__ prefetch(buf, CHUNK_LEN*16, Assembler::severalReads);
|
||||
__ prefetch(buf, CHUNK_LEN*24, Assembler::severalReads);
|
||||
|
||||
// move to INT side, and reverse the byte order of lower 32 bits to little endian
|
||||
__ movftoi_revbytes(F0, O4, G1, G4);
|
||||
__ movftoi_revbytes(F2, O5, G1, G4);
|
||||
__ movftoi_revbytes(F4, G5, G1, G4);
|
||||
|
||||
// combine the results of 4 chunks
|
||||
__ set64(CHUNK_K1, G3, G1);
|
||||
__ xmulx(O4, G3, O4);
|
||||
__ set64(CHUNK_K2, G3, G1);
|
||||
__ xmulx(O5, G3, O5);
|
||||
__ set64(CHUNK_K3, G3, G1);
|
||||
__ xmulx(G5, G3, G5);
|
||||
|
||||
__ movdtox(F14, G4);
|
||||
__ xor3(O4, O5, O5);
|
||||
__ xor3(G5, O5, O5);
|
||||
__ xor3(G4, O5, O5);
|
||||
|
||||
// reverse the byte order to big endian, via stack, and move to FP side
|
||||
__ add(SP, -8, G1);
|
||||
__ srlx(G1, 3, G1);
|
||||
__ sllx(G1, 3, G1);
|
||||
__ stx(O5, G1, G0);
|
||||
__ ldfl(FloatRegisterImpl::D, G1, G0, F2); // load in little endian
|
||||
|
||||
__ crc32c(F6, F2, F0);
|
||||
|
||||
__ set(CHUNK_LEN*8*4, G4);
|
||||
__ sub(len, G4, len);
|
||||
__ cmp_and_br_short(len, G4, Assembler::greaterEqual, Assembler::pt, L_crc32c_parallel);
|
||||
__ nop();
|
||||
__ cmp_and_br_short(len, 0, Assembler::equal, Assembler::pt, L_crc32c_done);
|
||||
|
||||
__ BIND(L_crc32c_serial);
|
||||
|
||||
__ mov(32, G4);
|
||||
__ cmp_and_br_short(len, G4, Assembler::less, Assembler::pn, L_crc32c_x8);
|
||||
|
||||
// ------ process 32B chunks ------
|
||||
__ BIND(L_crc32c_x32_loop);
|
||||
__ ldf(FloatRegisterImpl::D, buf, 0, F2);
|
||||
__ inc(buf, 8);
|
||||
__ crc32c(F0, F2, F0);
|
||||
__ ldf(FloatRegisterImpl::D, buf, 0, F2);
|
||||
__ inc(buf, 8);
|
||||
__ crc32c(F0, F2, F0);
|
||||
__ ldf(FloatRegisterImpl::D, buf, 0, F2);
|
||||
__ inc(buf, 8);
|
||||
__ crc32c(F0, F2, F0);
|
||||
__ ldf(FloatRegisterImpl::D, buf, 0, F2);
|
||||
__ inc(buf, 8);
|
||||
__ crc32c(F0, F2, F0);
|
||||
__ dec(len, 32);
|
||||
__ cmp_and_br_short(len, G4, Assembler::greaterEqual, Assembler::pt, L_crc32c_x32_loop);
|
||||
|
||||
__ BIND(L_crc32c_x8);
|
||||
__ nop();
|
||||
__ cmp_and_br_short(len, 8, Assembler::less, Assembler::pt, L_crc32c_done);
|
||||
|
||||
// ------ process 8B chunks ------
|
||||
__ BIND(L_crc32c_x8_loop);
|
||||
__ ldf(FloatRegisterImpl::D, buf, 0, F2);
|
||||
__ inc(buf, 8);
|
||||
__ crc32c(F0, F2, F0);
|
||||
__ dec(len, 8);
|
||||
__ cmp_and_br_short(len, 8, Assembler::greaterEqual, Assembler::pt, L_crc32c_x8_loop);
|
||||
|
||||
__ BIND(L_crc32c_done);
|
||||
|
||||
// move to INT side, and reverse the byte order of lower 32 bits to little endian
|
||||
__ movftoi_revbytes(F0, crc, G1, G3);
|
||||
|
||||
__ cmp_and_br_short(len, 0, Assembler::equal, Assembler::pt, L_crc32c_return);
|
||||
|
||||
// ------ process the misaligned tail (7 bytes or less) ------
|
||||
__ BIND(L_crc32c_tail);
|
||||
|
||||
// crc = (crc >>> 8) ^ byteTable[(crc ^ b) & 0xFF];
|
||||
__ ldub(buf, 0, G1);
|
||||
__ update_byte_crc32(crc, G1, table);
|
||||
|
||||
__ inc(buf);
|
||||
__ dec(len);
|
||||
__ cmp_and_br_short(len, 0, Assembler::greater, Assembler::pt, L_crc32c_tail);
|
||||
|
||||
__ BIND(L_crc32c_return);
|
||||
__ nop();
|
||||
__ retl();
|
||||
__ delayed()->nop();
|
||||
|
||||
return start;
|
||||
}
|
||||
|
||||
void generate_initial() {
|
||||
// Generates all stubs and initializes the entry points
|
||||
|
||||
@ -5001,6 +5201,11 @@ class StubGenerator: public StubCodeGenerator {
|
||||
StubRoutines::_sha512_implCompress = generate_sha512_implCompress(false, "sha512_implCompress");
|
||||
StubRoutines::_sha512_implCompressMB = generate_sha512_implCompress(true, "sha512_implCompressMB");
|
||||
}
|
||||
|
||||
// generate CRC32C intrinsic code
|
||||
if (UseCRC32CIntrinsics) {
|
||||
StubRoutines::_updateBytesCRC32C = generate_updateBytesCRC32C();
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) 1997, 2013, Oracle and/or its affiliates. All rights reserved.
|
||||
* Copyright (c) 1997, 2015, Oracle and/or its affiliates. All rights reserved.
|
||||
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
||||
*
|
||||
* This code is free software; you can redistribute it and/or modify it
|
||||
@ -41,7 +41,7 @@ static bool returns_to_call_stub(address return_pc) {
|
||||
enum /* platform_dependent_constants */ {
|
||||
// %%%%%%%% May be able to shrink this a lot
|
||||
code_size1 = 20000, // simply increase if too small (assembler will crash if too small)
|
||||
code_size2 = 23000 // simply increase if too small (assembler will crash if too small)
|
||||
code_size2 = 24000 // simply increase if too small (assembler will crash if too small)
|
||||
};
|
||||
|
||||
class Sparc {
|
||||
|
@ -230,7 +230,7 @@ void VM_Version::initialize() {
|
||||
assert((OptoLoopAlignment % relocInfo::addr_unit()) == 0, "alignment is not a multiple of NOP size");
|
||||
|
||||
char buf[512];
|
||||
jio_snprintf(buf, sizeof(buf), "%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s",
|
||||
jio_snprintf(buf, sizeof(buf), "%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s",
|
||||
(has_v9() ? ", v9" : (has_v8() ? ", v8" : "")),
|
||||
(has_hardware_popc() ? ", popc" : ""),
|
||||
(has_vis1() ? ", vis1" : ""),
|
||||
@ -242,6 +242,7 @@ void VM_Version::initialize() {
|
||||
(has_sha1() ? ", sha1" : ""),
|
||||
(has_sha256() ? ", sha256" : ""),
|
||||
(has_sha512() ? ", sha512" : ""),
|
||||
(has_crc32c() ? ", crc32c" : ""),
|
||||
(is_ultra3() ? ", ultra3" : ""),
|
||||
(is_sun4v() ? ", sun4v" : ""),
|
||||
(is_niagara_plus() ? ", niagara_plus" : (is_niagara() ? ", niagara" : "")),
|
||||
@ -363,6 +364,23 @@ void VM_Version::initialize() {
|
||||
}
|
||||
}
|
||||
|
||||
// SPARC T4 and above should have support for CRC32C instruction
|
||||
if (has_crc32c()) {
|
||||
if (UseVIS > 2) { // CRC32C intrinsics use VIS3 instructions
|
||||
if (FLAG_IS_DEFAULT(UseCRC32CIntrinsics)) {
|
||||
FLAG_SET_DEFAULT(UseCRC32CIntrinsics, true);
|
||||
}
|
||||
} else {
|
||||
if (UseCRC32CIntrinsics) {
|
||||
warning("SPARC CRC32C intrinsics require VIS3 instruction support. Intrinsics will be disabled.");
|
||||
FLAG_SET_DEFAULT(UseCRC32CIntrinsics, false);
|
||||
}
|
||||
}
|
||||
} else if (UseCRC32CIntrinsics) {
|
||||
warning("CRC32C instruction is not available on this CPU");
|
||||
FLAG_SET_DEFAULT(UseCRC32CIntrinsics, false);
|
||||
}
|
||||
|
||||
if (FLAG_IS_DEFAULT(ContendedPaddingWidth) &&
|
||||
(cache_line_size > ContendedPaddingWidth))
|
||||
ContendedPaddingWidth = cache_line_size;
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) 1997, 2014, Oracle and/or its affiliates. All rights reserved.
|
||||
* Copyright (c) 1997, 2015, Oracle and/or its affiliates. All rights reserved.
|
||||
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
||||
*
|
||||
* This code is free software; you can redistribute it and/or modify it
|
||||
@ -53,7 +53,8 @@ protected:
|
||||
aes_instructions = 19,
|
||||
sha1_instruction = 20,
|
||||
sha256_instruction = 21,
|
||||
sha512_instruction = 22
|
||||
sha512_instruction = 22,
|
||||
crc32c_instruction = 23
|
||||
};
|
||||
|
||||
enum Feature_Flag_Set {
|
||||
@ -83,6 +84,7 @@ protected:
|
||||
sha1_instruction_m = 1 << sha1_instruction,
|
||||
sha256_instruction_m = 1 << sha256_instruction,
|
||||
sha512_instruction_m = 1 << sha512_instruction,
|
||||
crc32c_instruction_m = 1 << crc32c_instruction,
|
||||
|
||||
generic_v8_m = v8_instructions_m | hardware_mul32_m | hardware_div32_m | hardware_fsmuld_m,
|
||||
generic_v9_m = generic_v8_m | v9_instructions_m,
|
||||
@ -141,6 +143,7 @@ public:
|
||||
static bool has_sha1() { return (_features & sha1_instruction_m) != 0; }
|
||||
static bool has_sha256() { return (_features & sha256_instruction_m) != 0; }
|
||||
static bool has_sha512() { return (_features & sha512_instruction_m) != 0; }
|
||||
static bool has_crc32c() { return (_features & crc32c_instruction_m) != 0; }
|
||||
|
||||
static bool supports_compare_and_exchange()
|
||||
{ return has_v9(); }
|
||||
|
@ -699,6 +699,12 @@ void VM_Version::get_processor_features() {
|
||||
FLAG_SET_DEFAULT(UseSHA512Intrinsics, false);
|
||||
}
|
||||
|
||||
if (UseCRC32CIntrinsics) {
|
||||
if (!FLAG_IS_DEFAULT(UseCRC32CIntrinsics))
|
||||
warning("CRC32C intrinsics are not available on this CPU");
|
||||
FLAG_SET_DEFAULT(UseCRC32CIntrinsics, false);
|
||||
}
|
||||
|
||||
// Adjust RTM (Restricted Transactional Memory) flags
|
||||
if (!supports_rtm() && UseRTMLocking) {
|
||||
// Can't continue because UseRTMLocking affects UseBiasedLocking flag
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) 2006, 2014, Oracle and/or its affiliates. All rights reserved.
|
||||
* Copyright (c) 2006, 2015, Oracle and/or its affiliates. All rights reserved.
|
||||
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
||||
*
|
||||
* This code is free software; you can redistribute it and/or modify it
|
||||
@ -363,6 +363,11 @@ int VM_Version::platform_features(int features) {
|
||||
#endif
|
||||
if (av & AV_SPARC_CBCOND) features |= cbcond_instructions_m;
|
||||
|
||||
#ifndef AV_SPARC_CRC32C
|
||||
#define AV_SPARC_CRC32C 0x20000000 /* crc32c instruction supported */
|
||||
#endif
|
||||
if (av & AV_SPARC_CRC32C) features |= crc32c_instruction_m;
|
||||
|
||||
#ifndef AV_SPARC_AES
|
||||
#define AV_SPARC_AES 0x00020000 /* aes instrs supported */
|
||||
#endif
|
||||
|
@ -863,6 +863,12 @@
|
||||
do_name( updateByteBuffer_name, "updateByteBuffer") \
|
||||
do_signature(updateByteBuffer_signature, "(IJII)I") \
|
||||
\
|
||||
/* support for java.util.zip.CRC32C */ \
|
||||
do_class(java_util_zip_CRC32C, "java/util/zip/CRC32C") \
|
||||
do_intrinsic(_updateBytesCRC32C, java_util_zip_CRC32C, updateBytes_name, updateBytes_signature, F_S) \
|
||||
do_intrinsic(_updateDirectByteBufferCRC32C, java_util_zip_CRC32C, updateDirectByteBuffer_name, updateByteBuffer_signature, F_S) \
|
||||
do_name( updateDirectByteBuffer_name, "updateDirectByteBuffer") \
|
||||
\
|
||||
/* support for sun.misc.Unsafe */ \
|
||||
do_class(sun_misc_Unsafe, "sun/misc/Unsafe") \
|
||||
\
|
||||
|
@ -962,6 +962,7 @@ void ConnectionGraph::process_call_arguments(CallNode *call) {
|
||||
(strcmp(call->as_CallLeaf()->_name, "g1_wb_pre") == 0 ||
|
||||
strcmp(call->as_CallLeaf()->_name, "g1_wb_post") == 0 ||
|
||||
strcmp(call->as_CallLeaf()->_name, "updateBytesCRC32") == 0 ||
|
||||
strcmp(call->as_CallLeaf()->_name, "updateBytesCRC32C") == 0 ||
|
||||
strcmp(call->as_CallLeaf()->_name, "aescrypt_encryptBlock") == 0 ||
|
||||
strcmp(call->as_CallLeaf()->_name, "aescrypt_decryptBlock") == 0 ||
|
||||
strcmp(call->as_CallLeaf()->_name, "cipherBlockChaining_encryptAESCrypt") == 0 ||
|
||||
|
@ -197,7 +197,7 @@ class LibraryCallKit : public GraphKit {
|
||||
CallJavaNode* generate_method_call_virtual(vmIntrinsics::ID method_id) {
|
||||
return generate_method_call(method_id, true, false);
|
||||
}
|
||||
Node * load_field_from_object(Node * fromObj, const char * fieldName, const char * fieldTypeString, bool is_exact, bool is_static);
|
||||
Node * load_field_from_object(Node * fromObj, const char * fieldName, const char * fieldTypeString, bool is_exact, bool is_static, ciInstanceKlass * fromKls);
|
||||
|
||||
Node* make_string_method_node(int opcode, Node* str1_start, Node* cnt1, Node* str2_start, Node* cnt2);
|
||||
Node* make_string_method_node(int opcode, Node* str1, Node* str2);
|
||||
@ -291,6 +291,9 @@ class LibraryCallKit : public GraphKit {
|
||||
bool inline_updateCRC32();
|
||||
bool inline_updateBytesCRC32();
|
||||
bool inline_updateByteBufferCRC32();
|
||||
Node* get_table_from_crc32c_class(ciInstanceKlass *crc32c_class);
|
||||
bool inline_updateBytesCRC32C();
|
||||
bool inline_updateDirectByteBufferCRC32C();
|
||||
bool inline_multiplyToLen();
|
||||
bool inline_squareToLen();
|
||||
bool inline_mulAdd();
|
||||
@ -539,6 +542,11 @@ CallGenerator* Compile::make_vm_intrinsic(ciMethod* m, bool is_virtual) {
|
||||
if (!UseCRC32Intrinsics) return NULL;
|
||||
break;
|
||||
|
||||
case vmIntrinsics::_updateBytesCRC32C:
|
||||
case vmIntrinsics::_updateDirectByteBufferCRC32C:
|
||||
if (!UseCRC32CIntrinsics) return NULL;
|
||||
break;
|
||||
|
||||
case vmIntrinsics::_incrementExactI:
|
||||
case vmIntrinsics::_addExactI:
|
||||
if (!Matcher::match_rule_supported(Op_OverflowAddI) || !UseMathExactIntrinsics) return NULL;
|
||||
@ -947,6 +955,11 @@ bool LibraryCallKit::try_to_inline(int predicate) {
|
||||
case vmIntrinsics::_updateByteBufferCRC32:
|
||||
return inline_updateByteBufferCRC32();
|
||||
|
||||
case vmIntrinsics::_updateBytesCRC32C:
|
||||
return inline_updateBytesCRC32C();
|
||||
case vmIntrinsics::_updateDirectByteBufferCRC32C:
|
||||
return inline_updateDirectByteBufferCRC32C();
|
||||
|
||||
case vmIntrinsics::_profileBoolean:
|
||||
return inline_profileBoolean();
|
||||
case vmIntrinsics::_isCompileConstant:
|
||||
@ -5536,6 +5549,106 @@ bool LibraryCallKit::inline_updateByteBufferCRC32() {
|
||||
return true;
|
||||
}
|
||||
|
||||
//------------------------------get_table_from_crc32c_class-----------------------
|
||||
Node * LibraryCallKit::get_table_from_crc32c_class(ciInstanceKlass *crc32c_class) {
|
||||
Node* table = load_field_from_object(NULL, "byteTable", "[I", /*is_exact*/ false, /*is_static*/ true, crc32c_class);
|
||||
assert (table != NULL, "wrong version of java.util.zip.CRC32C");
|
||||
|
||||
return table;
|
||||
}
|
||||
|
||||
//------------------------------inline_updateBytesCRC32C-----------------------
|
||||
//
|
||||
// Calculate CRC32C for byte[] array.
|
||||
// int java.util.zip.CRC32C.updateBytes(int crc, byte[] buf, int off, int end)
|
||||
//
|
||||
bool LibraryCallKit::inline_updateBytesCRC32C() {
|
||||
assert(UseCRC32CIntrinsics, "need CRC32C instruction support");
|
||||
assert(callee()->signature()->size() == 4, "updateBytes has 4 parameters");
|
||||
assert(callee()->holder()->is_loaded(), "CRC32C class must be loaded");
|
||||
// no receiver since it is a static method
|
||||
Node* crc = argument(0); // type: int
|
||||
Node* src = argument(1); // type: oop
|
||||
Node* offset = argument(2); // type: int
|
||||
Node* end = argument(3); // type: int
|
||||
|
||||
Node* length = _gvn.transform(new SubINode(end, offset));
|
||||
|
||||
const Type* src_type = src->Value(&_gvn);
|
||||
const TypeAryPtr* top_src = src_type->isa_aryptr();
|
||||
if (top_src == NULL || top_src->klass() == NULL) {
|
||||
// failed array check
|
||||
return false;
|
||||
}
|
||||
|
||||
// Figure out the size and type of the elements we will be copying.
|
||||
BasicType src_elem = src_type->isa_aryptr()->klass()->as_array_klass()->element_type()->basic_type();
|
||||
if (src_elem != T_BYTE) {
|
||||
return false;
|
||||
}
|
||||
|
||||
// 'src_start' points to src array + scaled offset
|
||||
Node* src_start = array_element_address(src, offset, src_elem);
|
||||
|
||||
// static final int[] byteTable in class CRC32C
|
||||
Node* table = get_table_from_crc32c_class(callee()->holder());
|
||||
Node* table_start = array_element_address(table, intcon(0), T_INT);
|
||||
|
||||
// We assume that range check is done by caller.
|
||||
// TODO: generate range check (offset+length < src.length) in debug VM.
|
||||
|
||||
// Call the stub.
|
||||
address stubAddr = StubRoutines::updateBytesCRC32C();
|
||||
const char *stubName = "updateBytesCRC32C";
|
||||
|
||||
Node* call = make_runtime_call(RC_LEAF, OptoRuntime::updateBytesCRC32C_Type(),
|
||||
stubAddr, stubName, TypePtr::BOTTOM,
|
||||
crc, src_start, length, table_start);
|
||||
Node* result = _gvn.transform(new ProjNode(call, TypeFunc::Parms));
|
||||
set_result(result);
|
||||
return true;
|
||||
}
|
||||
|
||||
//------------------------------inline_updateDirectByteBufferCRC32C-----------------------
|
||||
//
|
||||
// Calculate CRC32C for DirectByteBuffer.
|
||||
// int java.util.zip.CRC32C.updateDirectByteBuffer(int crc, long buf, int off, int end)
|
||||
//
|
||||
bool LibraryCallKit::inline_updateDirectByteBufferCRC32C() {
|
||||
assert(UseCRC32CIntrinsics, "need CRC32C instruction support");
|
||||
assert(callee()->signature()->size() == 5, "updateDirectByteBuffer has 4 parameters and one is long");
|
||||
assert(callee()->holder()->is_loaded(), "CRC32C class must be loaded");
|
||||
// no receiver since it is a static method
|
||||
Node* crc = argument(0); // type: int
|
||||
Node* src = argument(1); // type: long
|
||||
Node* offset = argument(3); // type: int
|
||||
Node* end = argument(4); // type: int
|
||||
|
||||
Node* length = _gvn.transform(new SubINode(end, offset));
|
||||
|
||||
src = ConvL2X(src); // adjust Java long to machine word
|
||||
Node* base = _gvn.transform(new CastX2PNode(src));
|
||||
offset = ConvI2X(offset);
|
||||
|
||||
// 'src_start' points to src array + scaled offset
|
||||
Node* src_start = basic_plus_adr(top(), base, offset);
|
||||
|
||||
// static final int[] byteTable in class CRC32C
|
||||
Node* table = get_table_from_crc32c_class(callee()->holder());
|
||||
Node* table_start = array_element_address(table, intcon(0), T_INT);
|
||||
|
||||
// Call the stub.
|
||||
address stubAddr = StubRoutines::updateBytesCRC32C();
|
||||
const char *stubName = "updateBytesCRC32C";
|
||||
|
||||
Node* call = make_runtime_call(RC_LEAF, OptoRuntime::updateBytesCRC32C_Type(),
|
||||
stubAddr, stubName, TypePtr::BOTTOM,
|
||||
crc, src_start, length, table_start);
|
||||
Node* result = _gvn.transform(new ProjNode(call, TypeFunc::Parms));
|
||||
set_result(result);
|
||||
return true;
|
||||
}
|
||||
|
||||
//----------------------------inline_reference_get----------------------------
|
||||
// public T java.lang.ref.Reference.get();
|
||||
bool LibraryCallKit::inline_reference_get() {
|
||||
@ -5571,18 +5684,28 @@ bool LibraryCallKit::inline_reference_get() {
|
||||
|
||||
|
||||
Node * LibraryCallKit::load_field_from_object(Node * fromObj, const char * fieldName, const char * fieldTypeString,
|
||||
bool is_exact=true, bool is_static=false) {
|
||||
bool is_exact=true, bool is_static=false,
|
||||
ciInstanceKlass * fromKls=NULL) {
|
||||
if (fromKls == NULL) {
|
||||
const TypeInstPtr* tinst = _gvn.type(fromObj)->isa_instptr();
|
||||
assert(tinst != NULL, "obj is null");
|
||||
assert(tinst->klass()->is_loaded(), "obj is not loaded");
|
||||
assert(!is_exact || tinst->klass_is_exact(), "klass not exact");
|
||||
fromKls = tinst->klass()->as_instance_klass();
|
||||
} else {
|
||||
assert(is_static, "only for static field access");
|
||||
}
|
||||
ciField* field = fromKls->get_field_by_name(ciSymbol::make(fieldName),
|
||||
ciSymbol::make(fieldTypeString),
|
||||
is_static);
|
||||
|
||||
const TypeInstPtr* tinst = _gvn.type(fromObj)->isa_instptr();
|
||||
assert(tinst != NULL, "obj is null");
|
||||
assert(tinst->klass()->is_loaded(), "obj is not loaded");
|
||||
assert(!is_exact || tinst->klass_is_exact(), "klass not exact");
|
||||
|
||||
ciField* field = tinst->klass()->as_instance_klass()->get_field_by_name(ciSymbol::make(fieldName),
|
||||
ciSymbol::make(fieldTypeString),
|
||||
is_static);
|
||||
if (field == NULL) return (Node *) NULL;
|
||||
assert (field != NULL, "undefined field");
|
||||
if (field == NULL) return (Node *) NULL;
|
||||
|
||||
if (is_static) {
|
||||
const TypeInstPtr* tip = TypeInstPtr::make(fromKls->java_mirror());
|
||||
fromObj = makecon(tip);
|
||||
}
|
||||
|
||||
// Next code copied from Parse::do_get_xxx():
|
||||
|
||||
|
@ -851,6 +851,29 @@ const TypeFunc* OptoRuntime::updateBytesCRC32_Type() {
|
||||
return TypeFunc::make(domain, range);
|
||||
}
|
||||
|
||||
/**
|
||||
* int updateBytesCRC32C(int crc, byte* buf, int len, int* table)
|
||||
*/
|
||||
const TypeFunc* OptoRuntime::updateBytesCRC32C_Type() {
|
||||
// create input type (domain)
|
||||
int num_args = 4;
|
||||
int argcnt = num_args;
|
||||
const Type** fields = TypeTuple::fields(argcnt);
|
||||
int argp = TypeFunc::Parms;
|
||||
fields[argp++] = TypeInt::INT; // crc
|
||||
fields[argp++] = TypePtr::NOTNULL; // buf
|
||||
fields[argp++] = TypeInt::INT; // len
|
||||
fields[argp++] = TypePtr::NOTNULL; // table
|
||||
assert(argp == TypeFunc::Parms+argcnt, "correct decoding");
|
||||
const TypeTuple* domain = TypeTuple::make(TypeFunc::Parms+argcnt, fields);
|
||||
|
||||
// result type needed
|
||||
fields = TypeTuple::fields(1);
|
||||
fields[TypeFunc::Parms+0] = TypeInt::INT; // crc result
|
||||
const TypeTuple* range = TypeTuple::make(TypeFunc::Parms+1, fields);
|
||||
return TypeFunc::make(domain, range);
|
||||
}
|
||||
|
||||
// for cipherBlockChaining calls of aescrypt encrypt/decrypt, four pointers and a length, returning int
|
||||
const TypeFunc* OptoRuntime::cipherBlockChaining_aescrypt_Type() {
|
||||
// create input type (domain)
|
||||
|
@ -319,6 +319,7 @@ private:
|
||||
static const TypeFunc* ghash_processBlocks_Type();
|
||||
|
||||
static const TypeFunc* updateBytesCRC32_Type();
|
||||
static const TypeFunc* updateBytesCRC32C_Type();
|
||||
|
||||
// leaf on stack replacement interpreter accessor types
|
||||
static const TypeFunc* osr_end_Type();
|
||||
|
@ -848,6 +848,9 @@ public:
|
||||
product(bool, UseCRC32Intrinsics, false, \
|
||||
"use intrinsics for java.util.zip.CRC32") \
|
||||
\
|
||||
product(bool, UseCRC32CIntrinsics, false, \
|
||||
"use intrinsics for java.util.zip.CRC32C") \
|
||||
\
|
||||
develop(bool, TraceCallFixup, false, \
|
||||
"Trace all call fixups") \
|
||||
\
|
||||
|
@ -137,6 +137,8 @@ address StubRoutines::_sha512_implCompressMB = NULL;
|
||||
address StubRoutines::_updateBytesCRC32 = NULL;
|
||||
address StubRoutines::_crc_table_adr = NULL;
|
||||
|
||||
address StubRoutines::_updateBytesCRC32C = NULL;
|
||||
|
||||
address StubRoutines::_multiplyToLen = NULL;
|
||||
address StubRoutines::_squareToLen = NULL;
|
||||
address StubRoutines::_mulAdd = NULL;
|
||||
|
@ -197,6 +197,8 @@ class StubRoutines: AllStatic {
|
||||
static address _updateBytesCRC32;
|
||||
static address _crc_table_adr;
|
||||
|
||||
static address _updateBytesCRC32C;
|
||||
|
||||
static address _multiplyToLen;
|
||||
static address _squareToLen;
|
||||
static address _mulAdd;
|
||||
@ -359,6 +361,8 @@ class StubRoutines: AllStatic {
|
||||
static address updateBytesCRC32() { return _updateBytesCRC32; }
|
||||
static address crc_table_addr() { return _crc_table_adr; }
|
||||
|
||||
static address updateBytesCRC32C() { return _updateBytesCRC32C; }
|
||||
|
||||
static address multiplyToLen() {return _multiplyToLen; }
|
||||
static address squareToLen() {return _squareToLen; }
|
||||
static address mulAdd() {return _mulAdd; }
|
||||
|
@ -830,6 +830,7 @@ typedef CompactHashtable<Symbol*, char> SymbolCompactHashTable;
|
||||
static_field(StubRoutines, _ghash_processBlocks, address) \
|
||||
static_field(StubRoutines, _updateBytesCRC32, address) \
|
||||
static_field(StubRoutines, _crc_table_adr, address) \
|
||||
static_field(StubRoutines, _updateBytesCRC32C, address) \
|
||||
static_field(StubRoutines, _multiplyToLen, address) \
|
||||
static_field(StubRoutines, _squareToLen, address) \
|
||||
static_field(StubRoutines, _mulAdd, address) \
|
||||
|
221
hotspot/test/compiler/intrinsics/crc32c/TestCRC32C.java
Normal file
221
hotspot/test/compiler/intrinsics/crc32c/TestCRC32C.java
Normal file
@ -0,0 +1,221 @@
|
||||
/*
|
||||
* Copyright (c) 2015, Oracle and/or its affiliates. All rights reserved.
|
||||
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
||||
*
|
||||
* This code is free software; you can redistribute it and/or modify it
|
||||
* under the terms of the GNU General Public License version 2 only, as
|
||||
* published by the Free Software Foundation.
|
||||
*
|
||||
* This code is distributed in the hope that it will be useful, but WITHOUT
|
||||
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
||||
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
|
||||
* version 2 for more details (a copy is included in the LICENSE file that
|
||||
* accompanied this code).
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License version
|
||||
* 2 along with this work; if not, write to the Free Software Foundation,
|
||||
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
*
|
||||
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
|
||||
* or visit www.oracle.com if you need additional information or have any
|
||||
* questions.
|
||||
*/
|
||||
|
||||
/**
|
||||
* @test
|
||||
* @bug 8073583
|
||||
* @summary C2 support for CRC32C on SPARC
|
||||
*
|
||||
* @run main/othervm/timeout=600 -Xbatch TestCRC32C -m
|
||||
*/
|
||||
|
||||
import java.nio.ByteBuffer;
|
||||
import java.util.zip.Checksum;
|
||||
import java.util.zip.CRC32C;
|
||||
|
||||
public class TestCRC32C {
|
||||
public static void main(String[] args) {
|
||||
int offset = Integer.getInteger("offset", 0);
|
||||
int msgSize = Integer.getInteger("msgSize", 512);
|
||||
boolean multi = false;
|
||||
int iters = 20000;
|
||||
int warmupIters = 20000;
|
||||
|
||||
if (args.length > 0) {
|
||||
if (args[0].equals("-m")) {
|
||||
multi = true;
|
||||
} else {
|
||||
iters = Integer.valueOf(args[0]);
|
||||
}
|
||||
if (args.length > 1) {
|
||||
warmupIters = Integer.valueOf(args[1]);
|
||||
}
|
||||
}
|
||||
|
||||
if (multi) {
|
||||
test_multi(warmupIters);
|
||||
return;
|
||||
}
|
||||
|
||||
System.out.println(" offset = " + offset);
|
||||
System.out.println("msgSize = " + msgSize + " bytes");
|
||||
System.out.println(" iters = " + iters);
|
||||
|
||||
byte[] b = initializedBytes(msgSize, offset);
|
||||
|
||||
CRC32C crc0 = new CRC32C();
|
||||
CRC32C crc1 = new CRC32C();
|
||||
CRC32C crc2 = new CRC32C();
|
||||
|
||||
crc0.update(b, offset, msgSize);
|
||||
|
||||
System.out.println("-------------------------------------------------------");
|
||||
|
||||
/* warm up */
|
||||
for (int i = 0; i < warmupIters; i++) {
|
||||
crc1.reset();
|
||||
crc1.update(b, offset, msgSize);
|
||||
}
|
||||
|
||||
/* measure performance */
|
||||
long start = System.nanoTime();
|
||||
for (int i = 0; i < iters; i++) {
|
||||
crc1.reset();
|
||||
crc1.update(b, offset, msgSize);
|
||||
}
|
||||
long end = System.nanoTime();
|
||||
double total = (double)(end - start)/1e9; // in seconds
|
||||
double thruput = (double)msgSize*iters/1e6/total; // in MB/s
|
||||
System.out.println("CRC32C.update(byte[]) runtime = " + total + " seconds");
|
||||
System.out.println("CRC32C.update(byte[]) throughput = " + thruput + " MB/s");
|
||||
|
||||
/* check correctness */
|
||||
for (int i = 0; i < iters; i++) {
|
||||
crc1.reset();
|
||||
crc1.update(b, offset, msgSize);
|
||||
if (!check(crc0, crc1)) break;
|
||||
}
|
||||
report("CRCs", crc0, crc1);
|
||||
|
||||
System.out.println("-------------------------------------------------------");
|
||||
|
||||
ByteBuffer buf = ByteBuffer.allocateDirect(msgSize);
|
||||
buf.put(b, offset, msgSize);
|
||||
buf.flip();
|
||||
|
||||
/* warm up */
|
||||
for (int i = 0; i < warmupIters; i++) {
|
||||
crc2.reset();
|
||||
crc2.update(buf);
|
||||
buf.rewind();
|
||||
}
|
||||
|
||||
/* measure performance */
|
||||
start = System.nanoTime();
|
||||
for (int i = 0; i < iters; i++) {
|
||||
crc2.reset();
|
||||
crc2.update(buf);
|
||||
buf.rewind();
|
||||
}
|
||||
end = System.nanoTime();
|
||||
total = (double)(end - start)/1e9; // in seconds
|
||||
thruput = (double)msgSize*iters/1e6/total; // in MB/s
|
||||
System.out.println("CRC32C.update(ByteBuffer) runtime = " + total + " seconds");
|
||||
System.out.println("CRC32C.update(ByteBuffer) throughput = " + thruput + " MB/s");
|
||||
|
||||
/* check correctness */
|
||||
for (int i = 0; i < iters; i++) {
|
||||
crc2.reset();
|
||||
crc2.update(buf);
|
||||
buf.rewind();
|
||||
if (!check(crc0, crc2)) break;
|
||||
}
|
||||
report("CRCs", crc0, crc2);
|
||||
|
||||
System.out.println("-------------------------------------------------------");
|
||||
}
|
||||
|
||||
private static void report(String s, Checksum crc0, Checksum crc1) {
|
||||
System.out.printf("%s: crc0 = %08x, crc1 = %08x\n",
|
||||
s, crc0.getValue(), crc1.getValue());
|
||||
}
|
||||
|
||||
private static boolean check(Checksum crc0, Checksum crc1) {
|
||||
if (crc0.getValue() != crc1.getValue()) {
|
||||
System.err.printf("ERROR: crc0 = %08x, crc1 = %08x\n",
|
||||
crc0.getValue(), crc1.getValue());
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
private static byte[] initializedBytes(int M, int offset) {
|
||||
byte[] bytes = new byte[M + offset];
|
||||
for (int i = 0; i < offset; i++) {
|
||||
bytes[i] = (byte) i;
|
||||
}
|
||||
for (int i = offset; i < bytes.length; i++) {
|
||||
bytes[i] = (byte) (i - offset);
|
||||
}
|
||||
return bytes;
|
||||
}
|
||||
|
||||
private static void test_multi(int iters) {
|
||||
int len1 = 8; // the 8B/iteration loop
|
||||
int len2 = 32; // the 32B/iteration loop
|
||||
int len3 = 4096; // the 4KB/iteration loop
|
||||
|
||||
byte[] b = initializedBytes(len3*16, 0);
|
||||
int[] offsets = { 0, 1, 2, 3, 4, 5, 6, 7, 8, 16, 32, 64, 128, 256, 512 };
|
||||
int[] sizes = { 0, 1, 2, 3, 4, 5, 6, 7,
|
||||
len1, len1+1, len1+2, len1+3, len1+4, len1+5, len1+6, len1+7,
|
||||
len1*2, len1*2+1, len1*2+3, len1*2+5, len1*2+7,
|
||||
len2, len2+1, len2+3, len2+5, len2+7,
|
||||
len2*2, len2*4, len2*8, len2*16, len2*32, len2*64,
|
||||
len3, len3+1, len3+3, len3+5, len3+7,
|
||||
len3*2, len3*4, len3*8,
|
||||
len1+len2, len1+len2+1, len1+len2+3, len1+len2+5, len1+len2+7,
|
||||
len1+len3, len1+len3+1, len1+len3+3, len1+len3+5, len1+len3+7,
|
||||
len2+len3, len2+len3+1, len2+len3+3, len2+len3+5, len2+len3+7,
|
||||
len1+len2+len3, len1+len2+len3+1, len1+len2+len3+3,
|
||||
len1+len2+len3+5, len1+len2+len3+7,
|
||||
(len1+len2+len3)*2, (len1+len2+len3)*2+1, (len1+len2+len3)*2+3,
|
||||
(len1+len2+len3)*2+5, (len1+len2+len3)*2+7,
|
||||
(len1+len2+len3)*3, (len1+len2+len3)*3-1, (len1+len2+len3)*3-3,
|
||||
(len1+len2+len3)*3-5, (len1+len2+len3)*3-7 };
|
||||
CRC32C[] crc0 = new CRC32C[offsets.length*sizes.length];
|
||||
CRC32C[] crc1 = new CRC32C[offsets.length*sizes.length];
|
||||
int i, j, k;
|
||||
|
||||
System.out.printf("testing %d cases ...\n", offsets.length*sizes.length);
|
||||
|
||||
/* set the result from interpreter as reference */
|
||||
for (i = 0; i < offsets.length; i++) {
|
||||
for (j = 0; j < sizes.length; j++) {
|
||||
crc0[i*sizes.length + j] = new CRC32C();
|
||||
crc1[i*sizes.length + j] = new CRC32C();
|
||||
crc0[i*sizes.length + j].update(b, offsets[i], sizes[j]);
|
||||
}
|
||||
}
|
||||
|
||||
/* warm up the JIT compiler and get result */
|
||||
for (k = 0; k < iters; k++) {
|
||||
for (i = 0; i < offsets.length; i++) {
|
||||
for (j = 0; j < sizes.length; j++) {
|
||||
crc1[i*sizes.length + j].reset();
|
||||
crc1[i*sizes.length + j].update(b, offsets[i], sizes[j]);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* check correctness */
|
||||
for (i = 0; i < offsets.length; i++) {
|
||||
for (j = 0; j < sizes.length; j++) {
|
||||
if (!check(crc0[i*sizes.length + j], crc1[i*sizes.length + j])) {
|
||||
System.out.printf("offsets[%d] = %d", i, offsets[i]);
|
||||
System.out.printf("\tsizes[%d] = %d\n", j, sizes[j]);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
Loading…
x
Reference in New Issue
Block a user