8155162: java.util.zip.CRC32C Interpreter/C1 intrinsics support on SPARC
Reviewed-by: kvn
This commit is contained in:
parent
160798c7e7
commit
7057abda29
@ -1029,6 +1029,10 @@ void LIRGenerator::do_update_CRC32(Intrinsic* x) {
|
||||
}
|
||||
}
|
||||
|
||||
void LIRGenerator::do_update_CRC32C(Intrinsic* x) {
|
||||
Unimplemented();
|
||||
}
|
||||
|
||||
// _i2l, _i2f, _i2d, _l2i, _l2f, _l2d, _f2i, _f2l, _f2d, _d2i, _d2l, _d2f
|
||||
// _i2b, _i2c, _i2s
|
||||
void LIRGenerator::do_Convert(Convert* x) {
|
||||
|
@ -1427,3 +1427,7 @@ void LIRGenerator::do_update_CRC32(Intrinsic* x) {
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void LIRGenerator::do_update_CRC32C(Intrinsic* x) {
|
||||
Unimplemented();
|
||||
}
|
||||
|
@ -868,6 +868,90 @@ void LIRGenerator::do_update_CRC32(Intrinsic* x) {
|
||||
}
|
||||
}
|
||||
|
||||
void LIRGenerator::do_update_CRC32C(Intrinsic* x) {
|
||||
// Make all state_for calls early since they can emit code
|
||||
LIR_Opr result = rlock_result(x);
|
||||
int flags = 0;
|
||||
switch (x->id()) {
|
||||
case vmIntrinsics::_updateBytesCRC32C:
|
||||
case vmIntrinsics::_updateDirectByteBufferCRC32C: {
|
||||
|
||||
bool is_updateBytes = (x->id() == vmIntrinsics::_updateBytesCRC32C);
|
||||
int array_offset = is_updateBytes ? arrayOopDesc::base_offset_in_bytes(T_BYTE) : 0;
|
||||
|
||||
LIRItem crc(x->argument_at(0), this);
|
||||
LIRItem buf(x->argument_at(1), this);
|
||||
LIRItem off(x->argument_at(2), this);
|
||||
LIRItem end(x->argument_at(3), this);
|
||||
|
||||
buf.load_item();
|
||||
off.load_nonconstant();
|
||||
end.load_nonconstant();
|
||||
|
||||
// len = end - off
|
||||
LIR_Opr len = end.result();
|
||||
LIR_Opr tmpA = new_register(T_INT);
|
||||
LIR_Opr tmpB = new_register(T_INT);
|
||||
__ move(end.result(), tmpA);
|
||||
__ move(off.result(), tmpB);
|
||||
__ sub(tmpA, tmpB, tmpA);
|
||||
len = tmpA;
|
||||
|
||||
LIR_Opr index = off.result();
|
||||
|
||||
if(off.result()->is_constant()) {
|
||||
index = LIR_OprFact::illegalOpr;
|
||||
array_offset += off.result()->as_jint();
|
||||
}
|
||||
|
||||
LIR_Opr base_op = buf.result();
|
||||
|
||||
if (index->is_valid()) {
|
||||
LIR_Opr tmp = new_register(T_LONG);
|
||||
__ convert(Bytecodes::_i2l, index, tmp);
|
||||
index = tmp;
|
||||
if (index->is_constant()) {
|
||||
array_offset += index->as_constant_ptr()->as_jint();
|
||||
index = LIR_OprFact::illegalOpr;
|
||||
} else if (index->is_register()) {
|
||||
LIR_Opr tmp2 = new_register(T_LONG);
|
||||
LIR_Opr tmp3 = new_register(T_LONG);
|
||||
__ move(base_op, tmp2);
|
||||
__ move(index, tmp3);
|
||||
__ add(tmp2, tmp3, tmp2);
|
||||
base_op = tmp2;
|
||||
} else {
|
||||
ShouldNotReachHere();
|
||||
}
|
||||
}
|
||||
|
||||
LIR_Address* a = new LIR_Address(base_op, array_offset, T_BYTE);
|
||||
|
||||
BasicTypeList signature(3);
|
||||
signature.append(T_INT);
|
||||
signature.append(T_ADDRESS);
|
||||
signature.append(T_INT);
|
||||
CallingConvention* cc = frame_map()->c_calling_convention(&signature);
|
||||
const LIR_Opr result_reg = result_register_for(x->type());
|
||||
|
||||
LIR_Opr addr = new_pointer_register();
|
||||
__ leal(LIR_OprFact::address(a), addr);
|
||||
|
||||
crc.load_item_force(cc->at(0));
|
||||
__ move(addr, cc->at(1));
|
||||
__ move(len, cc->at(2));
|
||||
|
||||
__ call_runtime_leaf(StubRoutines::updateBytesCRC32C(), getThreadTemp(), result_reg, cc->args());
|
||||
__ move(result_reg, result);
|
||||
|
||||
break;
|
||||
}
|
||||
default: {
|
||||
ShouldNotReachHere();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// _i2l, _i2f, _i2d, _l2i, _l2f, _l2d, _f2i, _f2l, _f2d, _d2i, _d2l, _d2f
|
||||
// _i2b, _i2c, _i2s
|
||||
void LIRGenerator::do_Convert(Convert* x) {
|
||||
|
@ -4837,21 +4837,21 @@ void MacroAssembler::update_byte_crc32(Register crc, Register val, Register tabl
|
||||
|
||||
// Reverse byte order of lower 32 bits, assuming upper 32 bits all zeros
|
||||
void MacroAssembler::reverse_bytes_32(Register src, Register dst, Register tmp) {
|
||||
srlx(src, 24, dst);
|
||||
srlx(src, 24, dst);
|
||||
|
||||
sllx(src, 32+8, tmp);
|
||||
srlx(tmp, 32+24, tmp);
|
||||
sllx(tmp, 8, tmp);
|
||||
or3(dst, tmp, dst);
|
||||
sllx(src, 32+8, tmp);
|
||||
srlx(tmp, 32+24, tmp);
|
||||
sllx(tmp, 8, tmp);
|
||||
or3(dst, tmp, dst);
|
||||
|
||||
sllx(src, 32+16, tmp);
|
||||
srlx(tmp, 32+24, tmp);
|
||||
sllx(tmp, 16, tmp);
|
||||
or3(dst, tmp, dst);
|
||||
sllx(src, 32+16, tmp);
|
||||
srlx(tmp, 32+24, tmp);
|
||||
sllx(tmp, 16, tmp);
|
||||
or3(dst, tmp, dst);
|
||||
|
||||
sllx(src, 32+24, tmp);
|
||||
srlx(tmp, 32, tmp);
|
||||
or3(dst, tmp, dst);
|
||||
sllx(src, 32+24, tmp);
|
||||
srlx(tmp, 32, tmp);
|
||||
or3(dst, tmp, dst);
|
||||
}
|
||||
|
||||
void MacroAssembler::movitof_revbytes(Register src, FloatRegister dst, Register tmp1, Register tmp2) {
|
||||
@ -5103,3 +5103,176 @@ void MacroAssembler::kernel_crc32(Register crc, Register buf, Register len, Regi
|
||||
not1(crc);
|
||||
}
|
||||
|
||||
#define CHUNK_LEN 128 /* 128 x 8B = 1KB */
|
||||
#define CHUNK_K1 0x1307a0206 /* reverseBits(pow(x, CHUNK_LEN*8*8*3 - 32) mod P(x)) << 1 */
|
||||
#define CHUNK_K2 0x1a0f717c4 /* reverseBits(pow(x, CHUNK_LEN*8*8*2 - 32) mod P(x)) << 1 */
|
||||
#define CHUNK_K3 0x0170076fa /* reverseBits(pow(x, CHUNK_LEN*8*8*1 - 32) mod P(x)) << 1 */
|
||||
|
||||
void MacroAssembler::kernel_crc32c(Register crc, Register buf, Register len, Register table) {
|
||||
|
||||
Label L_crc32c_head, L_crc32c_aligned;
|
||||
Label L_crc32c_parallel, L_crc32c_parallel_loop;
|
||||
Label L_crc32c_serial, L_crc32c_x32_loop, L_crc32c_x8, L_crc32c_x8_loop;
|
||||
Label L_crc32c_done, L_crc32c_tail, L_crc32c_return;
|
||||
|
||||
set(ExternalAddress(StubRoutines::crc32c_table_addr()), table);
|
||||
|
||||
cmp_and_br_short(len, 0, Assembler::lessEqual, Assembler::pn, L_crc32c_return);
|
||||
|
||||
// clear upper 32 bits of crc
|
||||
clruwu(crc);
|
||||
|
||||
and3(buf, 7, G4);
|
||||
cmp_and_brx_short(G4, 0, Assembler::equal, Assembler::pt, L_crc32c_aligned);
|
||||
|
||||
mov(8, G1);
|
||||
sub(G1, G4, G4);
|
||||
|
||||
// ------ process the misaligned head (7 bytes or less) ------
|
||||
bind(L_crc32c_head);
|
||||
|
||||
// crc = (crc >>> 8) ^ byteTable[(crc ^ b) & 0xFF];
|
||||
ldub(buf, 0, G1);
|
||||
update_byte_crc32(crc, G1, table);
|
||||
|
||||
inc(buf);
|
||||
dec(len);
|
||||
cmp_and_br_short(len, 0, Assembler::equal, Assembler::pn, L_crc32c_return);
|
||||
dec(G4);
|
||||
cmp_and_br_short(G4, 0, Assembler::greater, Assembler::pt, L_crc32c_head);
|
||||
|
||||
// ------ process the 8-byte-aligned body ------
|
||||
bind(L_crc32c_aligned);
|
||||
nop();
|
||||
cmp_and_br_short(len, 8, Assembler::less, Assembler::pn, L_crc32c_tail);
|
||||
|
||||
// reverse the byte order of lower 32 bits to big endian, and move to FP side
|
||||
movitof_revbytes(crc, F0, G1, G3);
|
||||
|
||||
set(CHUNK_LEN*8*4, G4);
|
||||
cmp_and_br_short(len, G4, Assembler::less, Assembler::pt, L_crc32c_serial);
|
||||
|
||||
// ------ process four 1KB chunks in parallel ------
|
||||
bind(L_crc32c_parallel);
|
||||
|
||||
fzero(FloatRegisterImpl::D, F2);
|
||||
fzero(FloatRegisterImpl::D, F4);
|
||||
fzero(FloatRegisterImpl::D, F6);
|
||||
|
||||
mov(CHUNK_LEN - 1, G4);
|
||||
bind(L_crc32c_parallel_loop);
|
||||
// schedule ldf's ahead of crc32c's to hide the load-use latency
|
||||
ldf(FloatRegisterImpl::D, buf, 0, F8);
|
||||
ldf(FloatRegisterImpl::D, buf, CHUNK_LEN*8, F10);
|
||||
ldf(FloatRegisterImpl::D, buf, CHUNK_LEN*16, F12);
|
||||
ldf(FloatRegisterImpl::D, buf, CHUNK_LEN*24, F14);
|
||||
crc32c(F0, F8, F0);
|
||||
crc32c(F2, F10, F2);
|
||||
crc32c(F4, F12, F4);
|
||||
crc32c(F6, F14, F6);
|
||||
inc(buf, 8);
|
||||
dec(G4);
|
||||
cmp_and_br_short(G4, 0, Assembler::greater, Assembler::pt, L_crc32c_parallel_loop);
|
||||
|
||||
ldf(FloatRegisterImpl::D, buf, 0, F8);
|
||||
ldf(FloatRegisterImpl::D, buf, CHUNK_LEN*8, F10);
|
||||
ldf(FloatRegisterImpl::D, buf, CHUNK_LEN*16, F12);
|
||||
crc32c(F0, F8, F0);
|
||||
crc32c(F2, F10, F2);
|
||||
crc32c(F4, F12, F4);
|
||||
|
||||
inc(buf, CHUNK_LEN*24);
|
||||
ldfl(FloatRegisterImpl::D, buf, G0, F14); // load in little endian
|
||||
inc(buf, 8);
|
||||
|
||||
prefetch(buf, 0, Assembler::severalReads);
|
||||
prefetch(buf, CHUNK_LEN*8, Assembler::severalReads);
|
||||
prefetch(buf, CHUNK_LEN*16, Assembler::severalReads);
|
||||
prefetch(buf, CHUNK_LEN*24, Assembler::severalReads);
|
||||
|
||||
// move to INT side, and reverse the byte order of lower 32 bits to little endian
|
||||
movftoi_revbytes(F0, O4, G1, G4);
|
||||
movftoi_revbytes(F2, O5, G1, G4);
|
||||
movftoi_revbytes(F4, G5, G1, G4);
|
||||
|
||||
// combine the results of 4 chunks
|
||||
set64(CHUNK_K1, G3, G1);
|
||||
xmulx(O4, G3, O4);
|
||||
set64(CHUNK_K2, G3, G1);
|
||||
xmulx(O5, G3, O5);
|
||||
set64(CHUNK_K3, G3, G1);
|
||||
xmulx(G5, G3, G5);
|
||||
|
||||
movdtox(F14, G4);
|
||||
xor3(O4, O5, O5);
|
||||
xor3(G5, O5, O5);
|
||||
xor3(G4, O5, O5);
|
||||
|
||||
// reverse the byte order to big endian, via stack, and move to FP side
|
||||
// TODO: use new revb instruction
|
||||
add(SP, -8, G1);
|
||||
srlx(G1, 3, G1);
|
||||
sllx(G1, 3, G1);
|
||||
stx(O5, G1, G0);
|
||||
ldfl(FloatRegisterImpl::D, G1, G0, F2); // load in little endian
|
||||
|
||||
crc32c(F6, F2, F0);
|
||||
|
||||
set(CHUNK_LEN*8*4, G4);
|
||||
sub(len, G4, len);
|
||||
cmp_and_br_short(len, G4, Assembler::greaterEqual, Assembler::pt, L_crc32c_parallel);
|
||||
nop();
|
||||
cmp_and_br_short(len, 0, Assembler::equal, Assembler::pt, L_crc32c_done);
|
||||
|
||||
bind(L_crc32c_serial);
|
||||
|
||||
mov(32, G4);
|
||||
cmp_and_br_short(len, G4, Assembler::less, Assembler::pn, L_crc32c_x8);
|
||||
|
||||
// ------ process 32B chunks ------
|
||||
bind(L_crc32c_x32_loop);
|
||||
ldf(FloatRegisterImpl::D, buf, 0, F2);
|
||||
crc32c(F0, F2, F0);
|
||||
ldf(FloatRegisterImpl::D, buf, 8, F2);
|
||||
crc32c(F0, F2, F0);
|
||||
ldf(FloatRegisterImpl::D, buf, 16, F2);
|
||||
crc32c(F0, F2, F0);
|
||||
ldf(FloatRegisterImpl::D, buf, 24, F2);
|
||||
inc(buf, 32);
|
||||
crc32c(F0, F2, F0);
|
||||
dec(len, 32);
|
||||
cmp_and_br_short(len, G4, Assembler::greaterEqual, Assembler::pt, L_crc32c_x32_loop);
|
||||
|
||||
bind(L_crc32c_x8);
|
||||
nop();
|
||||
cmp_and_br_short(len, 8, Assembler::less, Assembler::pt, L_crc32c_done);
|
||||
|
||||
// ------ process 8B chunks ------
|
||||
bind(L_crc32c_x8_loop);
|
||||
ldf(FloatRegisterImpl::D, buf, 0, F2);
|
||||
inc(buf, 8);
|
||||
crc32c(F0, F2, F0);
|
||||
dec(len, 8);
|
||||
cmp_and_br_short(len, 8, Assembler::greaterEqual, Assembler::pt, L_crc32c_x8_loop);
|
||||
|
||||
bind(L_crc32c_done);
|
||||
|
||||
// move to INT side, and reverse the byte order of lower 32 bits to little endian
|
||||
movftoi_revbytes(F0, crc, G1, G3);
|
||||
|
||||
cmp_and_br_short(len, 0, Assembler::equal, Assembler::pt, L_crc32c_return);
|
||||
|
||||
// ------ process the misaligned tail (7 bytes or less) ------
|
||||
bind(L_crc32c_tail);
|
||||
|
||||
// crc = (crc >>> 8) ^ byteTable[(crc ^ b) & 0xFF];
|
||||
ldub(buf, 0, G1);
|
||||
update_byte_crc32(crc, G1, table);
|
||||
|
||||
inc(buf);
|
||||
dec(len);
|
||||
cmp_and_br_short(len, 0, Assembler::greater, Assembler::pt, L_crc32c_tail);
|
||||
|
||||
bind(L_crc32c_return);
|
||||
nop();
|
||||
}
|
||||
|
@ -1418,6 +1418,8 @@ public:
|
||||
// Fold 8-bit data
|
||||
void fold_8bit_crc32(Register xcrc, Register table, Register xtmp, Register tmp);
|
||||
void fold_8bit_crc32(Register crc, Register table, Register tmp);
|
||||
// CRC32C code for java.util.zip.CRC32C::updateBytes/updateDirectByteBuffer instrinsic.
|
||||
void kernel_crc32c(Register crc, Register buf, Register len, Register table);
|
||||
|
||||
};
|
||||
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) 1997, 2015, Oracle and/or its affiliates. All rights reserved.
|
||||
* Copyright (c) 1997, 2016, Oracle and/or its affiliates. All rights reserved.
|
||||
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
||||
*
|
||||
* This code is free software; you can redistribute it and/or modify it
|
||||
@ -4909,11 +4909,6 @@ class StubGenerator: public StubCodeGenerator {
|
||||
return start;
|
||||
}
|
||||
|
||||
#define CHUNK_LEN 128 /* 128 x 8B = 1KB */
|
||||
#define CHUNK_K1 0x1307a0206 /* reverseBits(pow(x, CHUNK_LEN*8*8*3 - 32) mod P(x)) << 1 */
|
||||
#define CHUNK_K2 0x1a0f717c4 /* reverseBits(pow(x, CHUNK_LEN*8*8*2 - 32) mod P(x)) << 1 */
|
||||
#define CHUNK_K3 0x0170076fa /* reverseBits(pow(x, CHUNK_LEN*8*8*1 - 32) mod P(x)) << 1 */
|
||||
|
||||
/**
|
||||
* Arguments:
|
||||
*
|
||||
@ -4938,171 +4933,8 @@ class StubGenerator: public StubCodeGenerator {
|
||||
const Register len = O2; // number of bytes
|
||||
const Register table = O3; // byteTable
|
||||
|
||||
Label L_crc32c_head, L_crc32c_aligned;
|
||||
Label L_crc32c_parallel, L_crc32c_parallel_loop;
|
||||
Label L_crc32c_serial, L_crc32c_x32_loop, L_crc32c_x8, L_crc32c_x8_loop;
|
||||
Label L_crc32c_done, L_crc32c_tail, L_crc32c_return;
|
||||
__ kernel_crc32c(crc, buf, len, table);
|
||||
|
||||
__ cmp_and_br_short(len, 0, Assembler::lessEqual, Assembler::pn, L_crc32c_return);
|
||||
|
||||
// clear upper 32 bits of crc
|
||||
__ clruwu(crc);
|
||||
|
||||
__ and3(buf, 7, G4);
|
||||
__ cmp_and_brx_short(G4, 0, Assembler::equal, Assembler::pt, L_crc32c_aligned);
|
||||
|
||||
__ mov(8, G1);
|
||||
__ sub(G1, G4, G4);
|
||||
|
||||
// ------ process the misaligned head (7 bytes or less) ------
|
||||
__ BIND(L_crc32c_head);
|
||||
|
||||
// crc = (crc >>> 8) ^ byteTable[(crc ^ b) & 0xFF];
|
||||
__ ldub(buf, 0, G1);
|
||||
__ update_byte_crc32(crc, G1, table);
|
||||
|
||||
__ inc(buf);
|
||||
__ dec(len);
|
||||
__ cmp_and_br_short(len, 0, Assembler::equal, Assembler::pn, L_crc32c_return);
|
||||
__ dec(G4);
|
||||
__ cmp_and_br_short(G4, 0, Assembler::greater, Assembler::pt, L_crc32c_head);
|
||||
|
||||
// ------ process the 8-byte-aligned body ------
|
||||
__ BIND(L_crc32c_aligned);
|
||||
__ nop();
|
||||
__ cmp_and_br_short(len, 8, Assembler::less, Assembler::pn, L_crc32c_tail);
|
||||
|
||||
// reverse the byte order of lower 32 bits to big endian, and move to FP side
|
||||
__ movitof_revbytes(crc, F0, G1, G3);
|
||||
|
||||
__ set(CHUNK_LEN*8*4, G4);
|
||||
__ cmp_and_br_short(len, G4, Assembler::less, Assembler::pt, L_crc32c_serial);
|
||||
|
||||
// ------ process four 1KB chunks in parallel ------
|
||||
__ BIND(L_crc32c_parallel);
|
||||
|
||||
__ fzero(FloatRegisterImpl::D, F2);
|
||||
__ fzero(FloatRegisterImpl::D, F4);
|
||||
__ fzero(FloatRegisterImpl::D, F6);
|
||||
|
||||
__ mov(CHUNK_LEN - 1, G4);
|
||||
__ BIND(L_crc32c_parallel_loop);
|
||||
// schedule ldf's ahead of crc32c's to hide the load-use latency
|
||||
__ ldf(FloatRegisterImpl::D, buf, 0, F8);
|
||||
__ ldf(FloatRegisterImpl::D, buf, CHUNK_LEN*8, F10);
|
||||
__ ldf(FloatRegisterImpl::D, buf, CHUNK_LEN*16, F12);
|
||||
__ ldf(FloatRegisterImpl::D, buf, CHUNK_LEN*24, F14);
|
||||
__ crc32c(F0, F8, F0);
|
||||
__ crc32c(F2, F10, F2);
|
||||
__ crc32c(F4, F12, F4);
|
||||
__ crc32c(F6, F14, F6);
|
||||
__ inc(buf, 8);
|
||||
__ dec(G4);
|
||||
__ cmp_and_br_short(G4, 0, Assembler::greater, Assembler::pt, L_crc32c_parallel_loop);
|
||||
|
||||
__ ldf(FloatRegisterImpl::D, buf, 0, F8);
|
||||
__ ldf(FloatRegisterImpl::D, buf, CHUNK_LEN*8, F10);
|
||||
__ ldf(FloatRegisterImpl::D, buf, CHUNK_LEN*16, F12);
|
||||
__ crc32c(F0, F8, F0);
|
||||
__ crc32c(F2, F10, F2);
|
||||
__ crc32c(F4, F12, F4);
|
||||
|
||||
__ inc(buf, CHUNK_LEN*24);
|
||||
__ ldfl(FloatRegisterImpl::D, buf, G0, F14); // load in little endian
|
||||
__ inc(buf, 8);
|
||||
|
||||
__ prefetch(buf, 0, Assembler::severalReads);
|
||||
__ prefetch(buf, CHUNK_LEN*8, Assembler::severalReads);
|
||||
__ prefetch(buf, CHUNK_LEN*16, Assembler::severalReads);
|
||||
__ prefetch(buf, CHUNK_LEN*24, Assembler::severalReads);
|
||||
|
||||
// move to INT side, and reverse the byte order of lower 32 bits to little endian
|
||||
__ movftoi_revbytes(F0, O4, G1, G4);
|
||||
__ movftoi_revbytes(F2, O5, G1, G4);
|
||||
__ movftoi_revbytes(F4, G5, G1, G4);
|
||||
|
||||
// combine the results of 4 chunks
|
||||
__ set64(CHUNK_K1, G3, G1);
|
||||
__ xmulx(O4, G3, O4);
|
||||
__ set64(CHUNK_K2, G3, G1);
|
||||
__ xmulx(O5, G3, O5);
|
||||
__ set64(CHUNK_K3, G3, G1);
|
||||
__ xmulx(G5, G3, G5);
|
||||
|
||||
__ movdtox(F14, G4);
|
||||
__ xor3(O4, O5, O5);
|
||||
__ xor3(G5, O5, O5);
|
||||
__ xor3(G4, O5, O5);
|
||||
|
||||
// reverse the byte order to big endian, via stack, and move to FP side
|
||||
__ add(SP, -8, G1);
|
||||
__ srlx(G1, 3, G1);
|
||||
__ sllx(G1, 3, G1);
|
||||
__ stx(O5, G1, G0);
|
||||
__ ldfl(FloatRegisterImpl::D, G1, G0, F2); // load in little endian
|
||||
|
||||
__ crc32c(F6, F2, F0);
|
||||
|
||||
__ set(CHUNK_LEN*8*4, G4);
|
||||
__ sub(len, G4, len);
|
||||
__ cmp_and_br_short(len, G4, Assembler::greaterEqual, Assembler::pt, L_crc32c_parallel);
|
||||
__ nop();
|
||||
__ cmp_and_br_short(len, 0, Assembler::equal, Assembler::pt, L_crc32c_done);
|
||||
|
||||
__ BIND(L_crc32c_serial);
|
||||
|
||||
__ mov(32, G4);
|
||||
__ cmp_and_br_short(len, G4, Assembler::less, Assembler::pn, L_crc32c_x8);
|
||||
|
||||
// ------ process 32B chunks ------
|
||||
__ BIND(L_crc32c_x32_loop);
|
||||
__ ldf(FloatRegisterImpl::D, buf, 0, F2);
|
||||
__ inc(buf, 8);
|
||||
__ crc32c(F0, F2, F0);
|
||||
__ ldf(FloatRegisterImpl::D, buf, 0, F2);
|
||||
__ inc(buf, 8);
|
||||
__ crc32c(F0, F2, F0);
|
||||
__ ldf(FloatRegisterImpl::D, buf, 0, F2);
|
||||
__ inc(buf, 8);
|
||||
__ crc32c(F0, F2, F0);
|
||||
__ ldf(FloatRegisterImpl::D, buf, 0, F2);
|
||||
__ inc(buf, 8);
|
||||
__ crc32c(F0, F2, F0);
|
||||
__ dec(len, 32);
|
||||
__ cmp_and_br_short(len, G4, Assembler::greaterEqual, Assembler::pt, L_crc32c_x32_loop);
|
||||
|
||||
__ BIND(L_crc32c_x8);
|
||||
__ nop();
|
||||
__ cmp_and_br_short(len, 8, Assembler::less, Assembler::pt, L_crc32c_done);
|
||||
|
||||
// ------ process 8B chunks ------
|
||||
__ BIND(L_crc32c_x8_loop);
|
||||
__ ldf(FloatRegisterImpl::D, buf, 0, F2);
|
||||
__ inc(buf, 8);
|
||||
__ crc32c(F0, F2, F0);
|
||||
__ dec(len, 8);
|
||||
__ cmp_and_br_short(len, 8, Assembler::greaterEqual, Assembler::pt, L_crc32c_x8_loop);
|
||||
|
||||
__ BIND(L_crc32c_done);
|
||||
|
||||
// move to INT side, and reverse the byte order of lower 32 bits to little endian
|
||||
__ movftoi_revbytes(F0, crc, G1, G3);
|
||||
|
||||
__ cmp_and_br_short(len, 0, Assembler::equal, Assembler::pt, L_crc32c_return);
|
||||
|
||||
// ------ process the misaligned tail (7 bytes or less) ------
|
||||
__ BIND(L_crc32c_tail);
|
||||
|
||||
// crc = (crc >>> 8) ^ byteTable[(crc ^ b) & 0xFF];
|
||||
__ ldub(buf, 0, G1);
|
||||
__ update_byte_crc32(crc, G1, table);
|
||||
|
||||
__ inc(buf);
|
||||
__ dec(len);
|
||||
__ cmp_and_br_short(len, 0, Assembler::greater, Assembler::pt, L_crc32c_tail);
|
||||
|
||||
__ BIND(L_crc32c_return);
|
||||
__ nop();
|
||||
__ retl();
|
||||
__ delayed()->nop();
|
||||
|
||||
@ -5366,6 +5198,12 @@ class StubGenerator: public StubCodeGenerator {
|
||||
StubRoutines::_crc_table_adr = (address)StubRoutines::Sparc::_crc_table;
|
||||
StubRoutines::_updateBytesCRC32 = generate_updateBytesCRC32();
|
||||
}
|
||||
|
||||
if (UseCRC32CIntrinsics) {
|
||||
// set table address before stub generation which use it
|
||||
StubRoutines::_crc32c_table_addr = (address)StubRoutines::Sparc::_crc32c_table;
|
||||
StubRoutines::_updateBytesCRC32C = generate_updateBytesCRC32C();
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@ -5425,12 +5263,6 @@ class StubGenerator: public StubCodeGenerator {
|
||||
StubRoutines::_sha512_implCompress = generate_sha512_implCompress(false, "sha512_implCompress");
|
||||
StubRoutines::_sha512_implCompressMB = generate_sha512_implCompress(true, "sha512_implCompressMB");
|
||||
}
|
||||
|
||||
// generate CRC32C intrinsic code
|
||||
if (UseCRC32CIntrinsics) {
|
||||
StubRoutines::_updateBytesCRC32C = generate_updateBytesCRC32C();
|
||||
}
|
||||
|
||||
// generate Adler32 intrinsics code
|
||||
if (UseAdler32Intrinsics) {
|
||||
StubRoutines::_updateBytesAdler32 = generate_updateBytesAdler32();
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) 1997, 2015, Oracle and/or its affiliates. All rights reserved.
|
||||
* Copyright (c) 1997, 2016, Oracle and/or its affiliates. All rights reserved.
|
||||
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
||||
*
|
||||
* This code is free software; you can redistribute it and/or modify it
|
||||
@ -147,3 +147,62 @@ juint StubRoutines::Sparc::_crc_table[] =
|
||||
0x5d681b02UL, 0x2a6f2b94UL, 0xb40bbe37UL, 0xc30c8ea1UL, 0x5a05df1bUL,
|
||||
0x2d02ef8dUL
|
||||
};
|
||||
|
||||
/**
|
||||
* CRC32C constants lookup table
|
||||
*/
|
||||
juint StubRoutines::Sparc::_crc32c_table[] =
|
||||
{
|
||||
0x00000000UL, 0xF26B8303UL, 0xE13B70F7UL, 0x1350F3F4UL, 0xC79A971FUL,
|
||||
0x35F1141CUL, 0x26A1E7E8UL, 0xD4CA64EBUL, 0x8AD958CFUL, 0x78B2DBCCUL,
|
||||
0x6BE22838UL, 0x9989AB3BUL, 0x4D43CFD0UL, 0xBF284CD3UL, 0xAC78BF27UL,
|
||||
0x5E133C24UL, 0x105EC76FUL, 0xE235446CUL, 0xF165B798UL, 0x030E349BUL,
|
||||
0xD7C45070UL, 0x25AFD373UL, 0x36FF2087UL, 0xC494A384UL, 0x9A879FA0UL,
|
||||
0x68EC1CA3UL, 0x7BBCEF57UL, 0x89D76C54UL, 0x5D1D08BFUL, 0xAF768BBCUL,
|
||||
0xBC267848UL, 0x4E4DFB4BUL, 0x20BD8EDEUL, 0xD2D60DDDUL, 0xC186FE29UL,
|
||||
0x33ED7D2AUL, 0xE72719C1UL, 0x154C9AC2UL, 0x061C6936UL, 0xF477EA35UL,
|
||||
0xAA64D611UL, 0x580F5512UL, 0x4B5FA6E6UL, 0xB93425E5UL, 0x6DFE410EUL,
|
||||
0x9F95C20DUL, 0x8CC531F9UL, 0x7EAEB2FAUL, 0x30E349B1UL, 0xC288CAB2UL,
|
||||
0xD1D83946UL, 0x23B3BA45UL, 0xF779DEAEUL, 0x05125DADUL, 0x1642AE59UL,
|
||||
0xE4292D5AUL, 0xBA3A117EUL, 0x4851927DUL, 0x5B016189UL, 0xA96AE28AUL,
|
||||
0x7DA08661UL, 0x8FCB0562UL, 0x9C9BF696UL, 0x6EF07595UL, 0x417B1DBCUL,
|
||||
0xB3109EBFUL, 0xA0406D4BUL, 0x522BEE48UL, 0x86E18AA3UL, 0x748A09A0UL,
|
||||
0x67DAFA54UL, 0x95B17957UL, 0xCBA24573UL, 0x39C9C670UL, 0x2A993584UL,
|
||||
0xD8F2B687UL, 0x0C38D26CUL, 0xFE53516FUL, 0xED03A29BUL, 0x1F682198UL,
|
||||
0x5125DAD3UL, 0xA34E59D0UL, 0xB01EAA24UL, 0x42752927UL, 0x96BF4DCCUL,
|
||||
0x64D4CECFUL, 0x77843D3BUL, 0x85EFBE38UL, 0xDBFC821CUL, 0x2997011FUL,
|
||||
0x3AC7F2EBUL, 0xC8AC71E8UL, 0x1C661503UL, 0xEE0D9600UL, 0xFD5D65F4UL,
|
||||
0x0F36E6F7UL, 0x61C69362UL, 0x93AD1061UL, 0x80FDE395UL, 0x72966096UL,
|
||||
0xA65C047DUL, 0x5437877EUL, 0x4767748AUL, 0xB50CF789UL, 0xEB1FCBADUL,
|
||||
0x197448AEUL, 0x0A24BB5AUL, 0xF84F3859UL, 0x2C855CB2UL, 0xDEEEDFB1UL,
|
||||
0xCDBE2C45UL, 0x3FD5AF46UL, 0x7198540DUL, 0x83F3D70EUL, 0x90A324FAUL,
|
||||
0x62C8A7F9UL, 0xB602C312UL, 0x44694011UL, 0x5739B3E5UL, 0xA55230E6UL,
|
||||
0xFB410CC2UL, 0x092A8FC1UL, 0x1A7A7C35UL, 0xE811FF36UL, 0x3CDB9BDDUL,
|
||||
0xCEB018DEUL, 0xDDE0EB2AUL, 0x2F8B6829UL, 0x82F63B78UL, 0x709DB87BUL,
|
||||
0x63CD4B8FUL, 0x91A6C88CUL, 0x456CAC67UL, 0xB7072F64UL, 0xA457DC90UL,
|
||||
0x563C5F93UL, 0x082F63B7UL, 0xFA44E0B4UL, 0xE9141340UL, 0x1B7F9043UL,
|
||||
0xCFB5F4A8UL, 0x3DDE77ABUL, 0x2E8E845FUL, 0xDCE5075CUL, 0x92A8FC17UL,
|
||||
0x60C37F14UL, 0x73938CE0UL, 0x81F80FE3UL, 0x55326B08UL, 0xA759E80BUL,
|
||||
0xB4091BFFUL, 0x466298FCUL, 0x1871A4D8UL, 0xEA1A27DBUL, 0xF94AD42FUL,
|
||||
0x0B21572CUL, 0xDFEB33C7UL, 0x2D80B0C4UL, 0x3ED04330UL, 0xCCBBC033UL,
|
||||
0xA24BB5A6UL, 0x502036A5UL, 0x4370C551UL, 0xB11B4652UL, 0x65D122B9UL,
|
||||
0x97BAA1BAUL, 0x84EA524EUL, 0x7681D14DUL, 0x2892ED69UL, 0xDAF96E6AUL,
|
||||
0xC9A99D9EUL, 0x3BC21E9DUL, 0xEF087A76UL, 0x1D63F975UL, 0x0E330A81UL,
|
||||
0xFC588982UL, 0xB21572C9UL, 0x407EF1CAUL, 0x532E023EUL, 0xA145813DUL,
|
||||
0x758FE5D6UL, 0x87E466D5UL, 0x94B49521UL, 0x66DF1622UL, 0x38CC2A06UL,
|
||||
0xCAA7A905UL, 0xD9F75AF1UL, 0x2B9CD9F2UL, 0xFF56BD19UL, 0x0D3D3E1AUL,
|
||||
0x1E6DCDEEUL, 0xEC064EEDUL, 0xC38D26C4UL, 0x31E6A5C7UL, 0x22B65633UL,
|
||||
0xD0DDD530UL, 0x0417B1DBUL, 0xF67C32D8UL, 0xE52CC12CUL, 0x1747422FUL,
|
||||
0x49547E0BUL, 0xBB3FFD08UL, 0xA86F0EFCUL, 0x5A048DFFUL, 0x8ECEE914UL,
|
||||
0x7CA56A17UL, 0x6FF599E3UL, 0x9D9E1AE0UL, 0xD3D3E1ABUL, 0x21B862A8UL,
|
||||
0x32E8915CUL, 0xC083125FUL, 0x144976B4UL, 0xE622F5B7UL, 0xF5720643UL,
|
||||
0x07198540UL, 0x590AB964UL, 0xAB613A67UL, 0xB831C993UL, 0x4A5A4A90UL,
|
||||
0x9E902E7BUL, 0x6CFBAD78UL, 0x7FAB5E8CUL, 0x8DC0DD8FUL, 0xE330A81AUL,
|
||||
0x115B2B19UL, 0x020BD8EDUL, 0xF0605BEEUL, 0x24AA3F05UL, 0xD6C1BC06UL,
|
||||
0xC5914FF2UL, 0x37FACCF1UL, 0x69E9F0D5UL, 0x9B8273D6UL, 0x88D28022UL,
|
||||
0x7AB90321UL, 0xAE7367CAUL, 0x5C18E4C9UL, 0x4F48173DUL, 0xBD23943EUL,
|
||||
0xF36E6F75UL, 0x0105EC76UL, 0x12551F82UL, 0xE03E9C81UL, 0x34F4F86AUL,
|
||||
0xC69F7B69UL, 0xD5CF889DUL, 0x27A40B9EUL, 0x79B737BAUL, 0x8BDCB4B9UL,
|
||||
0x988C474DUL, 0x6AE7C44EUL, 0xBE2DA0A5UL, 0x4C4623A6UL, 0x5F16D052UL,
|
||||
0xAD7D5351UL
|
||||
};
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) 1997, 2015, Oracle and/or its affiliates. All rights reserved.
|
||||
* Copyright (c) 1997, 2016, Oracle and/or its affiliates. All rights reserved.
|
||||
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
||||
*
|
||||
* This code is free software; you can redistribute it and/or modify it
|
||||
@ -56,6 +56,7 @@ class Sparc {
|
||||
// masks and table for CRC32
|
||||
static uint64_t _crc_by128_masks[];
|
||||
static juint _crc_table[];
|
||||
static juint _crc32c_table[];
|
||||
|
||||
public:
|
||||
// test assembler stop routine by setting registers
|
||||
|
@ -1082,8 +1082,56 @@ address TemplateInterpreterGenerator::generate_CRC32_updateBytes_entry(AbstractI
|
||||
return NULL;
|
||||
}
|
||||
|
||||
// Not supported
|
||||
/**
|
||||
* Method entry for intrinsic-candidate (non-native) methods:
|
||||
* int java.util.zip.CRC32C.updateBytes(int crc, byte[] b, int off, int end)
|
||||
* int java.util.zip.CRC32C.updateDirectByteBuffer(int crc, long buf, int off, int end)
|
||||
* Unlike CRC32, CRC32C does not have any methods marked as native
|
||||
* CRC32C also uses an "end" variable instead of the length variable CRC32 uses
|
||||
*/
|
||||
address TemplateInterpreterGenerator::generate_CRC32C_updateBytes_entry(AbstractInterpreter::MethodKind kind) {
|
||||
|
||||
if (UseCRC32CIntrinsics) {
|
||||
address entry = __ pc();
|
||||
|
||||
// Load parameters from the stack
|
||||
const Register crc = O0; // initial crc
|
||||
const Register buf = O1; // source java byte array address
|
||||
const Register offset = O2; // offset
|
||||
const Register end = O3; // index of last element to process
|
||||
const Register len = O2; // len argument to the kernel
|
||||
const Register table = O3; // crc32c lookup table address
|
||||
|
||||
// Arguments are reversed on java expression stack
|
||||
// Calculate address of start element
|
||||
if (kind == Interpreter::java_util_zip_CRC32C_updateDirectByteBuffer) {
|
||||
__ lduw(Gargs, 0, end);
|
||||
__ lduw(Gargs, 8, offset);
|
||||
__ ldx( Gargs, 16, buf);
|
||||
__ lduw(Gargs, 32, crc);
|
||||
__ add(buf, offset, buf);
|
||||
__ sub(end, offset, len);
|
||||
} else {
|
||||
__ lduw(Gargs, 0, end);
|
||||
__ lduw(Gargs, 8, offset);
|
||||
__ ldx( Gargs, 16, buf);
|
||||
__ lduw(Gargs, 24, crc);
|
||||
__ add(buf, arrayOopDesc::base_offset_in_bytes(T_BYTE), buf); // account for the header size
|
||||
__ add(buf, offset, buf);
|
||||
__ sub(end, offset, len);
|
||||
}
|
||||
|
||||
// Call the crc32c kernel
|
||||
__ MacroAssembler::save_thread(L7_thread_cache);
|
||||
__ kernel_crc32c(crc, buf, len, table);
|
||||
__ MacroAssembler::restore_thread(L7_thread_cache);
|
||||
|
||||
// result in O0
|
||||
__ retl();
|
||||
__ delayed()->nop();
|
||||
|
||||
return entry;
|
||||
}
|
||||
return NULL;
|
||||
}
|
||||
|
||||
|
@ -1108,6 +1108,10 @@ void LIRGenerator::do_update_CRC32(Intrinsic* x) {
|
||||
}
|
||||
}
|
||||
|
||||
void LIRGenerator::do_update_CRC32C(Intrinsic* x) {
|
||||
Unimplemented();
|
||||
}
|
||||
|
||||
// _i2l, _i2f, _i2d, _l2i, _l2f, _l2d, _f2i, _f2l, _f2d, _d2i, _d2l, _d2f
|
||||
// _i2b, _i2c, _i2s
|
||||
LIR_Opr fixed_register_for(BasicType type) {
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) 1999, 2015, Oracle and/or its affiliates. All rights reserved.
|
||||
* Copyright (c) 1999, 2016, Oracle and/or its affiliates. All rights reserved.
|
||||
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
||||
*
|
||||
* This code is free software; you can redistribute it and/or modify it
|
||||
@ -224,6 +224,10 @@ bool Compiler::is_intrinsic_supported(const methodHandle& method) {
|
||||
case vmIntrinsics::_updateCRC32:
|
||||
case vmIntrinsics::_updateBytesCRC32:
|
||||
case vmIntrinsics::_updateByteBufferCRC32:
|
||||
#ifdef SPARC
|
||||
case vmIntrinsics::_updateBytesCRC32C:
|
||||
case vmIntrinsics::_updateDirectByteBufferCRC32C:
|
||||
#endif
|
||||
case vmIntrinsics::_compareAndSwapInt:
|
||||
case vmIntrinsics::_compareAndSwapObject:
|
||||
case vmIntrinsics::_getCharStringU:
|
||||
|
@ -3174,6 +3174,11 @@ void LIRGenerator::do_Intrinsic(Intrinsic* x) {
|
||||
do_update_CRC32(x);
|
||||
break;
|
||||
|
||||
case vmIntrinsics::_updateBytesCRC32C:
|
||||
case vmIntrinsics::_updateDirectByteBufferCRC32C:
|
||||
do_update_CRC32C(x);
|
||||
break;
|
||||
|
||||
default: ShouldNotReachHere(); break;
|
||||
}
|
||||
}
|
||||
|
@ -253,6 +253,7 @@ class LIRGenerator: public InstructionVisitor, public BlockClosure {
|
||||
void do_FPIntrinsics(Intrinsic* x);
|
||||
void do_Reference_get(Intrinsic* x);
|
||||
void do_update_CRC32(Intrinsic* x);
|
||||
void do_update_CRC32C(Intrinsic* x);
|
||||
|
||||
LIR_Opr call_runtime(BasicTypeArray* signature, LIRItemList* args, address entry, ValueType* result_type, CodeEmitInfo* info);
|
||||
LIR_Opr call_runtime(BasicTypeArray* signature, LIR_OprList* args, address entry, ValueType* result_type, CodeEmitInfo* info);
|
||||
|
@ -318,6 +318,7 @@ const char* Runtime1::name_for_address(address entry) {
|
||||
FUNCTION_CASE(entry, TRACE_TIME_METHOD);
|
||||
#endif
|
||||
FUNCTION_CASE(entry, StubRoutines::updateBytesCRC32());
|
||||
FUNCTION_CASE(entry, StubRoutines::updateBytesCRC32C());
|
||||
FUNCTION_CASE(entry, StubRoutines::dexp());
|
||||
FUNCTION_CASE(entry, StubRoutines::dlog());
|
||||
FUNCTION_CASE(entry, StubRoutines::dlog10());
|
||||
|
Loading…
x
Reference in New Issue
Block a user