8155162: java.util.zip.CRC32C Interpreter/C1 intrinsics support on SPARC

Reviewed-by: kvn
This commit is contained in:
Ahmed Khawaja 2016-05-04 15:30:21 -07:00 committed by Vladimir Kozlov
parent 160798c7e7
commit 7057abda29
14 changed files with 414 additions and 192 deletions

View File

@ -1029,6 +1029,10 @@ void LIRGenerator::do_update_CRC32(Intrinsic* x) {
}
}
void LIRGenerator::do_update_CRC32C(Intrinsic* x) {
Unimplemented();
}
// _i2l, _i2f, _i2d, _l2i, _l2f, _l2d, _f2i, _f2l, _f2d, _d2i, _d2l, _d2f
// _i2b, _i2c, _i2s
void LIRGenerator::do_Convert(Convert* x) {

View File

@ -1427,3 +1427,7 @@ void LIRGenerator::do_update_CRC32(Intrinsic* x) {
}
}
}
void LIRGenerator::do_update_CRC32C(Intrinsic* x) {
Unimplemented();
}

View File

@ -868,6 +868,90 @@ void LIRGenerator::do_update_CRC32(Intrinsic* x) {
}
}
void LIRGenerator::do_update_CRC32C(Intrinsic* x) {
// Make all state_for calls early since they can emit code
LIR_Opr result = rlock_result(x);
int flags = 0;
switch (x->id()) {
case vmIntrinsics::_updateBytesCRC32C:
case vmIntrinsics::_updateDirectByteBufferCRC32C: {
bool is_updateBytes = (x->id() == vmIntrinsics::_updateBytesCRC32C);
int array_offset = is_updateBytes ? arrayOopDesc::base_offset_in_bytes(T_BYTE) : 0;
LIRItem crc(x->argument_at(0), this);
LIRItem buf(x->argument_at(1), this);
LIRItem off(x->argument_at(2), this);
LIRItem end(x->argument_at(3), this);
buf.load_item();
off.load_nonconstant();
end.load_nonconstant();
// len = end - off
LIR_Opr len = end.result();
LIR_Opr tmpA = new_register(T_INT);
LIR_Opr tmpB = new_register(T_INT);
__ move(end.result(), tmpA);
__ move(off.result(), tmpB);
__ sub(tmpA, tmpB, tmpA);
len = tmpA;
LIR_Opr index = off.result();
if(off.result()->is_constant()) {
index = LIR_OprFact::illegalOpr;
array_offset += off.result()->as_jint();
}
LIR_Opr base_op = buf.result();
if (index->is_valid()) {
LIR_Opr tmp = new_register(T_LONG);
__ convert(Bytecodes::_i2l, index, tmp);
index = tmp;
if (index->is_constant()) {
array_offset += index->as_constant_ptr()->as_jint();
index = LIR_OprFact::illegalOpr;
} else if (index->is_register()) {
LIR_Opr tmp2 = new_register(T_LONG);
LIR_Opr tmp3 = new_register(T_LONG);
__ move(base_op, tmp2);
__ move(index, tmp3);
__ add(tmp2, tmp3, tmp2);
base_op = tmp2;
} else {
ShouldNotReachHere();
}
}
LIR_Address* a = new LIR_Address(base_op, array_offset, T_BYTE);
BasicTypeList signature(3);
signature.append(T_INT);
signature.append(T_ADDRESS);
signature.append(T_INT);
CallingConvention* cc = frame_map()->c_calling_convention(&signature);
const LIR_Opr result_reg = result_register_for(x->type());
LIR_Opr addr = new_pointer_register();
__ leal(LIR_OprFact::address(a), addr);
crc.load_item_force(cc->at(0));
__ move(addr, cc->at(1));
__ move(len, cc->at(2));
__ call_runtime_leaf(StubRoutines::updateBytesCRC32C(), getThreadTemp(), result_reg, cc->args());
__ move(result_reg, result);
break;
}
default: {
ShouldNotReachHere();
}
}
}
// _i2l, _i2f, _i2d, _l2i, _l2f, _l2d, _f2i, _f2l, _f2d, _d2i, _d2l, _d2f
// _i2b, _i2c, _i2s
void LIRGenerator::do_Convert(Convert* x) {

View File

@ -4837,21 +4837,21 @@ void MacroAssembler::update_byte_crc32(Register crc, Register val, Register tabl
// Reverse byte order of lower 32 bits, assuming upper 32 bits all zeros
void MacroAssembler::reverse_bytes_32(Register src, Register dst, Register tmp) {
srlx(src, 24, dst);
srlx(src, 24, dst);
sllx(src, 32+8, tmp);
srlx(tmp, 32+24, tmp);
sllx(tmp, 8, tmp);
or3(dst, tmp, dst);
sllx(src, 32+8, tmp);
srlx(tmp, 32+24, tmp);
sllx(tmp, 8, tmp);
or3(dst, tmp, dst);
sllx(src, 32+16, tmp);
srlx(tmp, 32+24, tmp);
sllx(tmp, 16, tmp);
or3(dst, tmp, dst);
sllx(src, 32+16, tmp);
srlx(tmp, 32+24, tmp);
sllx(tmp, 16, tmp);
or3(dst, tmp, dst);
sllx(src, 32+24, tmp);
srlx(tmp, 32, tmp);
or3(dst, tmp, dst);
sllx(src, 32+24, tmp);
srlx(tmp, 32, tmp);
or3(dst, tmp, dst);
}
void MacroAssembler::movitof_revbytes(Register src, FloatRegister dst, Register tmp1, Register tmp2) {
@ -5103,3 +5103,176 @@ void MacroAssembler::kernel_crc32(Register crc, Register buf, Register len, Regi
not1(crc);
}
#define CHUNK_LEN 128 /* 128 x 8B = 1KB */
#define CHUNK_K1 0x1307a0206 /* reverseBits(pow(x, CHUNK_LEN*8*8*3 - 32) mod P(x)) << 1 */
#define CHUNK_K2 0x1a0f717c4 /* reverseBits(pow(x, CHUNK_LEN*8*8*2 - 32) mod P(x)) << 1 */
#define CHUNK_K3 0x0170076fa /* reverseBits(pow(x, CHUNK_LEN*8*8*1 - 32) mod P(x)) << 1 */
void MacroAssembler::kernel_crc32c(Register crc, Register buf, Register len, Register table) {
Label L_crc32c_head, L_crc32c_aligned;
Label L_crc32c_parallel, L_crc32c_parallel_loop;
Label L_crc32c_serial, L_crc32c_x32_loop, L_crc32c_x8, L_crc32c_x8_loop;
Label L_crc32c_done, L_crc32c_tail, L_crc32c_return;
set(ExternalAddress(StubRoutines::crc32c_table_addr()), table);
cmp_and_br_short(len, 0, Assembler::lessEqual, Assembler::pn, L_crc32c_return);
// clear upper 32 bits of crc
clruwu(crc);
and3(buf, 7, G4);
cmp_and_brx_short(G4, 0, Assembler::equal, Assembler::pt, L_crc32c_aligned);
mov(8, G1);
sub(G1, G4, G4);
// ------ process the misaligned head (7 bytes or less) ------
bind(L_crc32c_head);
// crc = (crc >>> 8) ^ byteTable[(crc ^ b) & 0xFF];
ldub(buf, 0, G1);
update_byte_crc32(crc, G1, table);
inc(buf);
dec(len);
cmp_and_br_short(len, 0, Assembler::equal, Assembler::pn, L_crc32c_return);
dec(G4);
cmp_and_br_short(G4, 0, Assembler::greater, Assembler::pt, L_crc32c_head);
// ------ process the 8-byte-aligned body ------
bind(L_crc32c_aligned);
nop();
cmp_and_br_short(len, 8, Assembler::less, Assembler::pn, L_crc32c_tail);
// reverse the byte order of lower 32 bits to big endian, and move to FP side
movitof_revbytes(crc, F0, G1, G3);
set(CHUNK_LEN*8*4, G4);
cmp_and_br_short(len, G4, Assembler::less, Assembler::pt, L_crc32c_serial);
// ------ process four 1KB chunks in parallel ------
bind(L_crc32c_parallel);
fzero(FloatRegisterImpl::D, F2);
fzero(FloatRegisterImpl::D, F4);
fzero(FloatRegisterImpl::D, F6);
mov(CHUNK_LEN - 1, G4);
bind(L_crc32c_parallel_loop);
// schedule ldf's ahead of crc32c's to hide the load-use latency
ldf(FloatRegisterImpl::D, buf, 0, F8);
ldf(FloatRegisterImpl::D, buf, CHUNK_LEN*8, F10);
ldf(FloatRegisterImpl::D, buf, CHUNK_LEN*16, F12);
ldf(FloatRegisterImpl::D, buf, CHUNK_LEN*24, F14);
crc32c(F0, F8, F0);
crc32c(F2, F10, F2);
crc32c(F4, F12, F4);
crc32c(F6, F14, F6);
inc(buf, 8);
dec(G4);
cmp_and_br_short(G4, 0, Assembler::greater, Assembler::pt, L_crc32c_parallel_loop);
ldf(FloatRegisterImpl::D, buf, 0, F8);
ldf(FloatRegisterImpl::D, buf, CHUNK_LEN*8, F10);
ldf(FloatRegisterImpl::D, buf, CHUNK_LEN*16, F12);
crc32c(F0, F8, F0);
crc32c(F2, F10, F2);
crc32c(F4, F12, F4);
inc(buf, CHUNK_LEN*24);
ldfl(FloatRegisterImpl::D, buf, G0, F14); // load in little endian
inc(buf, 8);
prefetch(buf, 0, Assembler::severalReads);
prefetch(buf, CHUNK_LEN*8, Assembler::severalReads);
prefetch(buf, CHUNK_LEN*16, Assembler::severalReads);
prefetch(buf, CHUNK_LEN*24, Assembler::severalReads);
// move to INT side, and reverse the byte order of lower 32 bits to little endian
movftoi_revbytes(F0, O4, G1, G4);
movftoi_revbytes(F2, O5, G1, G4);
movftoi_revbytes(F4, G5, G1, G4);
// combine the results of 4 chunks
set64(CHUNK_K1, G3, G1);
xmulx(O4, G3, O4);
set64(CHUNK_K2, G3, G1);
xmulx(O5, G3, O5);
set64(CHUNK_K3, G3, G1);
xmulx(G5, G3, G5);
movdtox(F14, G4);
xor3(O4, O5, O5);
xor3(G5, O5, O5);
xor3(G4, O5, O5);
// reverse the byte order to big endian, via stack, and move to FP side
// TODO: use new revb instruction
add(SP, -8, G1);
srlx(G1, 3, G1);
sllx(G1, 3, G1);
stx(O5, G1, G0);
ldfl(FloatRegisterImpl::D, G1, G0, F2); // load in little endian
crc32c(F6, F2, F0);
set(CHUNK_LEN*8*4, G4);
sub(len, G4, len);
cmp_and_br_short(len, G4, Assembler::greaterEqual, Assembler::pt, L_crc32c_parallel);
nop();
cmp_and_br_short(len, 0, Assembler::equal, Assembler::pt, L_crc32c_done);
bind(L_crc32c_serial);
mov(32, G4);
cmp_and_br_short(len, G4, Assembler::less, Assembler::pn, L_crc32c_x8);
// ------ process 32B chunks ------
bind(L_crc32c_x32_loop);
ldf(FloatRegisterImpl::D, buf, 0, F2);
crc32c(F0, F2, F0);
ldf(FloatRegisterImpl::D, buf, 8, F2);
crc32c(F0, F2, F0);
ldf(FloatRegisterImpl::D, buf, 16, F2);
crc32c(F0, F2, F0);
ldf(FloatRegisterImpl::D, buf, 24, F2);
inc(buf, 32);
crc32c(F0, F2, F0);
dec(len, 32);
cmp_and_br_short(len, G4, Assembler::greaterEqual, Assembler::pt, L_crc32c_x32_loop);
bind(L_crc32c_x8);
nop();
cmp_and_br_short(len, 8, Assembler::less, Assembler::pt, L_crc32c_done);
// ------ process 8B chunks ------
bind(L_crc32c_x8_loop);
ldf(FloatRegisterImpl::D, buf, 0, F2);
inc(buf, 8);
crc32c(F0, F2, F0);
dec(len, 8);
cmp_and_br_short(len, 8, Assembler::greaterEqual, Assembler::pt, L_crc32c_x8_loop);
bind(L_crc32c_done);
// move to INT side, and reverse the byte order of lower 32 bits to little endian
movftoi_revbytes(F0, crc, G1, G3);
cmp_and_br_short(len, 0, Assembler::equal, Assembler::pt, L_crc32c_return);
// ------ process the misaligned tail (7 bytes or less) ------
bind(L_crc32c_tail);
// crc = (crc >>> 8) ^ byteTable[(crc ^ b) & 0xFF];
ldub(buf, 0, G1);
update_byte_crc32(crc, G1, table);
inc(buf);
dec(len);
cmp_and_br_short(len, 0, Assembler::greater, Assembler::pt, L_crc32c_tail);
bind(L_crc32c_return);
nop();
}

View File

@ -1418,6 +1418,8 @@ public:
// Fold 8-bit data
void fold_8bit_crc32(Register xcrc, Register table, Register xtmp, Register tmp);
void fold_8bit_crc32(Register crc, Register table, Register tmp);
// CRC32C code for java.util.zip.CRC32C::updateBytes/updateDirectByteBuffer instrinsic.
void kernel_crc32c(Register crc, Register buf, Register len, Register table);
};

View File

@ -1,5 +1,5 @@
/*
* Copyright (c) 1997, 2015, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 1997, 2016, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@ -4909,11 +4909,6 @@ class StubGenerator: public StubCodeGenerator {
return start;
}
#define CHUNK_LEN 128 /* 128 x 8B = 1KB */
#define CHUNK_K1 0x1307a0206 /* reverseBits(pow(x, CHUNK_LEN*8*8*3 - 32) mod P(x)) << 1 */
#define CHUNK_K2 0x1a0f717c4 /* reverseBits(pow(x, CHUNK_LEN*8*8*2 - 32) mod P(x)) << 1 */
#define CHUNK_K3 0x0170076fa /* reverseBits(pow(x, CHUNK_LEN*8*8*1 - 32) mod P(x)) << 1 */
/**
* Arguments:
*
@ -4938,171 +4933,8 @@ class StubGenerator: public StubCodeGenerator {
const Register len = O2; // number of bytes
const Register table = O3; // byteTable
Label L_crc32c_head, L_crc32c_aligned;
Label L_crc32c_parallel, L_crc32c_parallel_loop;
Label L_crc32c_serial, L_crc32c_x32_loop, L_crc32c_x8, L_crc32c_x8_loop;
Label L_crc32c_done, L_crc32c_tail, L_crc32c_return;
__ kernel_crc32c(crc, buf, len, table);
__ cmp_and_br_short(len, 0, Assembler::lessEqual, Assembler::pn, L_crc32c_return);
// clear upper 32 bits of crc
__ clruwu(crc);
__ and3(buf, 7, G4);
__ cmp_and_brx_short(G4, 0, Assembler::equal, Assembler::pt, L_crc32c_aligned);
__ mov(8, G1);
__ sub(G1, G4, G4);
// ------ process the misaligned head (7 bytes or less) ------
__ BIND(L_crc32c_head);
// crc = (crc >>> 8) ^ byteTable[(crc ^ b) & 0xFF];
__ ldub(buf, 0, G1);
__ update_byte_crc32(crc, G1, table);
__ inc(buf);
__ dec(len);
__ cmp_and_br_short(len, 0, Assembler::equal, Assembler::pn, L_crc32c_return);
__ dec(G4);
__ cmp_and_br_short(G4, 0, Assembler::greater, Assembler::pt, L_crc32c_head);
// ------ process the 8-byte-aligned body ------
__ BIND(L_crc32c_aligned);
__ nop();
__ cmp_and_br_short(len, 8, Assembler::less, Assembler::pn, L_crc32c_tail);
// reverse the byte order of lower 32 bits to big endian, and move to FP side
__ movitof_revbytes(crc, F0, G1, G3);
__ set(CHUNK_LEN*8*4, G4);
__ cmp_and_br_short(len, G4, Assembler::less, Assembler::pt, L_crc32c_serial);
// ------ process four 1KB chunks in parallel ------
__ BIND(L_crc32c_parallel);
__ fzero(FloatRegisterImpl::D, F2);
__ fzero(FloatRegisterImpl::D, F4);
__ fzero(FloatRegisterImpl::D, F6);
__ mov(CHUNK_LEN - 1, G4);
__ BIND(L_crc32c_parallel_loop);
// schedule ldf's ahead of crc32c's to hide the load-use latency
__ ldf(FloatRegisterImpl::D, buf, 0, F8);
__ ldf(FloatRegisterImpl::D, buf, CHUNK_LEN*8, F10);
__ ldf(FloatRegisterImpl::D, buf, CHUNK_LEN*16, F12);
__ ldf(FloatRegisterImpl::D, buf, CHUNK_LEN*24, F14);
__ crc32c(F0, F8, F0);
__ crc32c(F2, F10, F2);
__ crc32c(F4, F12, F4);
__ crc32c(F6, F14, F6);
__ inc(buf, 8);
__ dec(G4);
__ cmp_and_br_short(G4, 0, Assembler::greater, Assembler::pt, L_crc32c_parallel_loop);
__ ldf(FloatRegisterImpl::D, buf, 0, F8);
__ ldf(FloatRegisterImpl::D, buf, CHUNK_LEN*8, F10);
__ ldf(FloatRegisterImpl::D, buf, CHUNK_LEN*16, F12);
__ crc32c(F0, F8, F0);
__ crc32c(F2, F10, F2);
__ crc32c(F4, F12, F4);
__ inc(buf, CHUNK_LEN*24);
__ ldfl(FloatRegisterImpl::D, buf, G0, F14); // load in little endian
__ inc(buf, 8);
__ prefetch(buf, 0, Assembler::severalReads);
__ prefetch(buf, CHUNK_LEN*8, Assembler::severalReads);
__ prefetch(buf, CHUNK_LEN*16, Assembler::severalReads);
__ prefetch(buf, CHUNK_LEN*24, Assembler::severalReads);
// move to INT side, and reverse the byte order of lower 32 bits to little endian
__ movftoi_revbytes(F0, O4, G1, G4);
__ movftoi_revbytes(F2, O5, G1, G4);
__ movftoi_revbytes(F4, G5, G1, G4);
// combine the results of 4 chunks
__ set64(CHUNK_K1, G3, G1);
__ xmulx(O4, G3, O4);
__ set64(CHUNK_K2, G3, G1);
__ xmulx(O5, G3, O5);
__ set64(CHUNK_K3, G3, G1);
__ xmulx(G5, G3, G5);
__ movdtox(F14, G4);
__ xor3(O4, O5, O5);
__ xor3(G5, O5, O5);
__ xor3(G4, O5, O5);
// reverse the byte order to big endian, via stack, and move to FP side
__ add(SP, -8, G1);
__ srlx(G1, 3, G1);
__ sllx(G1, 3, G1);
__ stx(O5, G1, G0);
__ ldfl(FloatRegisterImpl::D, G1, G0, F2); // load in little endian
__ crc32c(F6, F2, F0);
__ set(CHUNK_LEN*8*4, G4);
__ sub(len, G4, len);
__ cmp_and_br_short(len, G4, Assembler::greaterEqual, Assembler::pt, L_crc32c_parallel);
__ nop();
__ cmp_and_br_short(len, 0, Assembler::equal, Assembler::pt, L_crc32c_done);
__ BIND(L_crc32c_serial);
__ mov(32, G4);
__ cmp_and_br_short(len, G4, Assembler::less, Assembler::pn, L_crc32c_x8);
// ------ process 32B chunks ------
__ BIND(L_crc32c_x32_loop);
__ ldf(FloatRegisterImpl::D, buf, 0, F2);
__ inc(buf, 8);
__ crc32c(F0, F2, F0);
__ ldf(FloatRegisterImpl::D, buf, 0, F2);
__ inc(buf, 8);
__ crc32c(F0, F2, F0);
__ ldf(FloatRegisterImpl::D, buf, 0, F2);
__ inc(buf, 8);
__ crc32c(F0, F2, F0);
__ ldf(FloatRegisterImpl::D, buf, 0, F2);
__ inc(buf, 8);
__ crc32c(F0, F2, F0);
__ dec(len, 32);
__ cmp_and_br_short(len, G4, Assembler::greaterEqual, Assembler::pt, L_crc32c_x32_loop);
__ BIND(L_crc32c_x8);
__ nop();
__ cmp_and_br_short(len, 8, Assembler::less, Assembler::pt, L_crc32c_done);
// ------ process 8B chunks ------
__ BIND(L_crc32c_x8_loop);
__ ldf(FloatRegisterImpl::D, buf, 0, F2);
__ inc(buf, 8);
__ crc32c(F0, F2, F0);
__ dec(len, 8);
__ cmp_and_br_short(len, 8, Assembler::greaterEqual, Assembler::pt, L_crc32c_x8_loop);
__ BIND(L_crc32c_done);
// move to INT side, and reverse the byte order of lower 32 bits to little endian
__ movftoi_revbytes(F0, crc, G1, G3);
__ cmp_and_br_short(len, 0, Assembler::equal, Assembler::pt, L_crc32c_return);
// ------ process the misaligned tail (7 bytes or less) ------
__ BIND(L_crc32c_tail);
// crc = (crc >>> 8) ^ byteTable[(crc ^ b) & 0xFF];
__ ldub(buf, 0, G1);
__ update_byte_crc32(crc, G1, table);
__ inc(buf);
__ dec(len);
__ cmp_and_br_short(len, 0, Assembler::greater, Assembler::pt, L_crc32c_tail);
__ BIND(L_crc32c_return);
__ nop();
__ retl();
__ delayed()->nop();
@ -5366,6 +5198,12 @@ class StubGenerator: public StubCodeGenerator {
StubRoutines::_crc_table_adr = (address)StubRoutines::Sparc::_crc_table;
StubRoutines::_updateBytesCRC32 = generate_updateBytesCRC32();
}
if (UseCRC32CIntrinsics) {
// set table address before stub generation which use it
StubRoutines::_crc32c_table_addr = (address)StubRoutines::Sparc::_crc32c_table;
StubRoutines::_updateBytesCRC32C = generate_updateBytesCRC32C();
}
}
@ -5425,12 +5263,6 @@ class StubGenerator: public StubCodeGenerator {
StubRoutines::_sha512_implCompress = generate_sha512_implCompress(false, "sha512_implCompress");
StubRoutines::_sha512_implCompressMB = generate_sha512_implCompress(true, "sha512_implCompressMB");
}
// generate CRC32C intrinsic code
if (UseCRC32CIntrinsics) {
StubRoutines::_updateBytesCRC32C = generate_updateBytesCRC32C();
}
// generate Adler32 intrinsics code
if (UseAdler32Intrinsics) {
StubRoutines::_updateBytesAdler32 = generate_updateBytesAdler32();

View File

@ -1,5 +1,5 @@
/*
* Copyright (c) 1997, 2015, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 1997, 2016, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@ -147,3 +147,62 @@ juint StubRoutines::Sparc::_crc_table[] =
0x5d681b02UL, 0x2a6f2b94UL, 0xb40bbe37UL, 0xc30c8ea1UL, 0x5a05df1bUL,
0x2d02ef8dUL
};
/**
* CRC32C constants lookup table
*/
juint StubRoutines::Sparc::_crc32c_table[] =
{
0x00000000UL, 0xF26B8303UL, 0xE13B70F7UL, 0x1350F3F4UL, 0xC79A971FUL,
0x35F1141CUL, 0x26A1E7E8UL, 0xD4CA64EBUL, 0x8AD958CFUL, 0x78B2DBCCUL,
0x6BE22838UL, 0x9989AB3BUL, 0x4D43CFD0UL, 0xBF284CD3UL, 0xAC78BF27UL,
0x5E133C24UL, 0x105EC76FUL, 0xE235446CUL, 0xF165B798UL, 0x030E349BUL,
0xD7C45070UL, 0x25AFD373UL, 0x36FF2087UL, 0xC494A384UL, 0x9A879FA0UL,
0x68EC1CA3UL, 0x7BBCEF57UL, 0x89D76C54UL, 0x5D1D08BFUL, 0xAF768BBCUL,
0xBC267848UL, 0x4E4DFB4BUL, 0x20BD8EDEUL, 0xD2D60DDDUL, 0xC186FE29UL,
0x33ED7D2AUL, 0xE72719C1UL, 0x154C9AC2UL, 0x061C6936UL, 0xF477EA35UL,
0xAA64D611UL, 0x580F5512UL, 0x4B5FA6E6UL, 0xB93425E5UL, 0x6DFE410EUL,
0x9F95C20DUL, 0x8CC531F9UL, 0x7EAEB2FAUL, 0x30E349B1UL, 0xC288CAB2UL,
0xD1D83946UL, 0x23B3BA45UL, 0xF779DEAEUL, 0x05125DADUL, 0x1642AE59UL,
0xE4292D5AUL, 0xBA3A117EUL, 0x4851927DUL, 0x5B016189UL, 0xA96AE28AUL,
0x7DA08661UL, 0x8FCB0562UL, 0x9C9BF696UL, 0x6EF07595UL, 0x417B1DBCUL,
0xB3109EBFUL, 0xA0406D4BUL, 0x522BEE48UL, 0x86E18AA3UL, 0x748A09A0UL,
0x67DAFA54UL, 0x95B17957UL, 0xCBA24573UL, 0x39C9C670UL, 0x2A993584UL,
0xD8F2B687UL, 0x0C38D26CUL, 0xFE53516FUL, 0xED03A29BUL, 0x1F682198UL,
0x5125DAD3UL, 0xA34E59D0UL, 0xB01EAA24UL, 0x42752927UL, 0x96BF4DCCUL,
0x64D4CECFUL, 0x77843D3BUL, 0x85EFBE38UL, 0xDBFC821CUL, 0x2997011FUL,
0x3AC7F2EBUL, 0xC8AC71E8UL, 0x1C661503UL, 0xEE0D9600UL, 0xFD5D65F4UL,
0x0F36E6F7UL, 0x61C69362UL, 0x93AD1061UL, 0x80FDE395UL, 0x72966096UL,
0xA65C047DUL, 0x5437877EUL, 0x4767748AUL, 0xB50CF789UL, 0xEB1FCBADUL,
0x197448AEUL, 0x0A24BB5AUL, 0xF84F3859UL, 0x2C855CB2UL, 0xDEEEDFB1UL,
0xCDBE2C45UL, 0x3FD5AF46UL, 0x7198540DUL, 0x83F3D70EUL, 0x90A324FAUL,
0x62C8A7F9UL, 0xB602C312UL, 0x44694011UL, 0x5739B3E5UL, 0xA55230E6UL,
0xFB410CC2UL, 0x092A8FC1UL, 0x1A7A7C35UL, 0xE811FF36UL, 0x3CDB9BDDUL,
0xCEB018DEUL, 0xDDE0EB2AUL, 0x2F8B6829UL, 0x82F63B78UL, 0x709DB87BUL,
0x63CD4B8FUL, 0x91A6C88CUL, 0x456CAC67UL, 0xB7072F64UL, 0xA457DC90UL,
0x563C5F93UL, 0x082F63B7UL, 0xFA44E0B4UL, 0xE9141340UL, 0x1B7F9043UL,
0xCFB5F4A8UL, 0x3DDE77ABUL, 0x2E8E845FUL, 0xDCE5075CUL, 0x92A8FC17UL,
0x60C37F14UL, 0x73938CE0UL, 0x81F80FE3UL, 0x55326B08UL, 0xA759E80BUL,
0xB4091BFFUL, 0x466298FCUL, 0x1871A4D8UL, 0xEA1A27DBUL, 0xF94AD42FUL,
0x0B21572CUL, 0xDFEB33C7UL, 0x2D80B0C4UL, 0x3ED04330UL, 0xCCBBC033UL,
0xA24BB5A6UL, 0x502036A5UL, 0x4370C551UL, 0xB11B4652UL, 0x65D122B9UL,
0x97BAA1BAUL, 0x84EA524EUL, 0x7681D14DUL, 0x2892ED69UL, 0xDAF96E6AUL,
0xC9A99D9EUL, 0x3BC21E9DUL, 0xEF087A76UL, 0x1D63F975UL, 0x0E330A81UL,
0xFC588982UL, 0xB21572C9UL, 0x407EF1CAUL, 0x532E023EUL, 0xA145813DUL,
0x758FE5D6UL, 0x87E466D5UL, 0x94B49521UL, 0x66DF1622UL, 0x38CC2A06UL,
0xCAA7A905UL, 0xD9F75AF1UL, 0x2B9CD9F2UL, 0xFF56BD19UL, 0x0D3D3E1AUL,
0x1E6DCDEEUL, 0xEC064EEDUL, 0xC38D26C4UL, 0x31E6A5C7UL, 0x22B65633UL,
0xD0DDD530UL, 0x0417B1DBUL, 0xF67C32D8UL, 0xE52CC12CUL, 0x1747422FUL,
0x49547E0BUL, 0xBB3FFD08UL, 0xA86F0EFCUL, 0x5A048DFFUL, 0x8ECEE914UL,
0x7CA56A17UL, 0x6FF599E3UL, 0x9D9E1AE0UL, 0xD3D3E1ABUL, 0x21B862A8UL,
0x32E8915CUL, 0xC083125FUL, 0x144976B4UL, 0xE622F5B7UL, 0xF5720643UL,
0x07198540UL, 0x590AB964UL, 0xAB613A67UL, 0xB831C993UL, 0x4A5A4A90UL,
0x9E902E7BUL, 0x6CFBAD78UL, 0x7FAB5E8CUL, 0x8DC0DD8FUL, 0xE330A81AUL,
0x115B2B19UL, 0x020BD8EDUL, 0xF0605BEEUL, 0x24AA3F05UL, 0xD6C1BC06UL,
0xC5914FF2UL, 0x37FACCF1UL, 0x69E9F0D5UL, 0x9B8273D6UL, 0x88D28022UL,
0x7AB90321UL, 0xAE7367CAUL, 0x5C18E4C9UL, 0x4F48173DUL, 0xBD23943EUL,
0xF36E6F75UL, 0x0105EC76UL, 0x12551F82UL, 0xE03E9C81UL, 0x34F4F86AUL,
0xC69F7B69UL, 0xD5CF889DUL, 0x27A40B9EUL, 0x79B737BAUL, 0x8BDCB4B9UL,
0x988C474DUL, 0x6AE7C44EUL, 0xBE2DA0A5UL, 0x4C4623A6UL, 0x5F16D052UL,
0xAD7D5351UL
};

View File

@ -1,5 +1,5 @@
/*
* Copyright (c) 1997, 2015, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 1997, 2016, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@ -56,6 +56,7 @@ class Sparc {
// masks and table for CRC32
static uint64_t _crc_by128_masks[];
static juint _crc_table[];
static juint _crc32c_table[];
public:
// test assembler stop routine by setting registers

View File

@ -1082,8 +1082,56 @@ address TemplateInterpreterGenerator::generate_CRC32_updateBytes_entry(AbstractI
return NULL;
}
// Not supported
/**
* Method entry for intrinsic-candidate (non-native) methods:
* int java.util.zip.CRC32C.updateBytes(int crc, byte[] b, int off, int end)
* int java.util.zip.CRC32C.updateDirectByteBuffer(int crc, long buf, int off, int end)
* Unlike CRC32, CRC32C does not have any methods marked as native
* CRC32C also uses an "end" variable instead of the length variable CRC32 uses
*/
address TemplateInterpreterGenerator::generate_CRC32C_updateBytes_entry(AbstractInterpreter::MethodKind kind) {
if (UseCRC32CIntrinsics) {
address entry = __ pc();
// Load parameters from the stack
const Register crc = O0; // initial crc
const Register buf = O1; // source java byte array address
const Register offset = O2; // offset
const Register end = O3; // index of last element to process
const Register len = O2; // len argument to the kernel
const Register table = O3; // crc32c lookup table address
// Arguments are reversed on java expression stack
// Calculate address of start element
if (kind == Interpreter::java_util_zip_CRC32C_updateDirectByteBuffer) {
__ lduw(Gargs, 0, end);
__ lduw(Gargs, 8, offset);
__ ldx( Gargs, 16, buf);
__ lduw(Gargs, 32, crc);
__ add(buf, offset, buf);
__ sub(end, offset, len);
} else {
__ lduw(Gargs, 0, end);
__ lduw(Gargs, 8, offset);
__ ldx( Gargs, 16, buf);
__ lduw(Gargs, 24, crc);
__ add(buf, arrayOopDesc::base_offset_in_bytes(T_BYTE), buf); // account for the header size
__ add(buf, offset, buf);
__ sub(end, offset, len);
}
// Call the crc32c kernel
__ MacroAssembler::save_thread(L7_thread_cache);
__ kernel_crc32c(crc, buf, len, table);
__ MacroAssembler::restore_thread(L7_thread_cache);
// result in O0
__ retl();
__ delayed()->nop();
return entry;
}
return NULL;
}

View File

@ -1108,6 +1108,10 @@ void LIRGenerator::do_update_CRC32(Intrinsic* x) {
}
}
void LIRGenerator::do_update_CRC32C(Intrinsic* x) {
Unimplemented();
}
// _i2l, _i2f, _i2d, _l2i, _l2f, _l2d, _f2i, _f2l, _f2d, _d2i, _d2l, _d2f
// _i2b, _i2c, _i2s
LIR_Opr fixed_register_for(BasicType type) {

View File

@ -1,5 +1,5 @@
/*
* Copyright (c) 1999, 2015, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 1999, 2016, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@ -224,6 +224,10 @@ bool Compiler::is_intrinsic_supported(const methodHandle& method) {
case vmIntrinsics::_updateCRC32:
case vmIntrinsics::_updateBytesCRC32:
case vmIntrinsics::_updateByteBufferCRC32:
#ifdef SPARC
case vmIntrinsics::_updateBytesCRC32C:
case vmIntrinsics::_updateDirectByteBufferCRC32C:
#endif
case vmIntrinsics::_compareAndSwapInt:
case vmIntrinsics::_compareAndSwapObject:
case vmIntrinsics::_getCharStringU:

View File

@ -3174,6 +3174,11 @@ void LIRGenerator::do_Intrinsic(Intrinsic* x) {
do_update_CRC32(x);
break;
case vmIntrinsics::_updateBytesCRC32C:
case vmIntrinsics::_updateDirectByteBufferCRC32C:
do_update_CRC32C(x);
break;
default: ShouldNotReachHere(); break;
}
}

View File

@ -253,6 +253,7 @@ class LIRGenerator: public InstructionVisitor, public BlockClosure {
void do_FPIntrinsics(Intrinsic* x);
void do_Reference_get(Intrinsic* x);
void do_update_CRC32(Intrinsic* x);
void do_update_CRC32C(Intrinsic* x);
LIR_Opr call_runtime(BasicTypeArray* signature, LIRItemList* args, address entry, ValueType* result_type, CodeEmitInfo* info);
LIR_Opr call_runtime(BasicTypeArray* signature, LIR_OprList* args, address entry, ValueType* result_type, CodeEmitInfo* info);

View File

@ -318,6 +318,7 @@ const char* Runtime1::name_for_address(address entry) {
FUNCTION_CASE(entry, TRACE_TIME_METHOD);
#endif
FUNCTION_CASE(entry, StubRoutines::updateBytesCRC32());
FUNCTION_CASE(entry, StubRoutines::updateBytesCRC32C());
FUNCTION_CASE(entry, StubRoutines::dexp());
FUNCTION_CASE(entry, StubRoutines::dlog());
FUNCTION_CASE(entry, StubRoutines::dlog10());