8143012: CRC32 Intrinsics support on SPARC
Reviewed-by: kvn, roland
This commit is contained in:
parent
ea052022fa
commit
e28d9ba105
@ -2812,7 +2812,23 @@ void LIR_Assembler::monitor_address(int monitor_no, LIR_Opr dst_opr) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
void LIR_Assembler::emit_updatecrc32(LIR_OpUpdateCRC32* op) {
|
void LIR_Assembler::emit_updatecrc32(LIR_OpUpdateCRC32* op) {
|
||||||
fatal("CRC32 intrinsic is not implemented on this platform");
|
assert(op->crc()->is_single_cpu(), "crc must be register");
|
||||||
|
assert(op->val()->is_single_cpu(), "byte value must be register");
|
||||||
|
assert(op->result_opr()->is_single_cpu(), "result must be register");
|
||||||
|
Register crc = op->crc()->as_register();
|
||||||
|
Register val = op->val()->as_register();
|
||||||
|
Register table = op->result_opr()->as_register();
|
||||||
|
Register res = op->result_opr()->as_register();
|
||||||
|
|
||||||
|
assert_different_registers(val, crc, table);
|
||||||
|
|
||||||
|
__ set(ExternalAddress(StubRoutines::crc_table_addr()), table);
|
||||||
|
__ not1(crc);
|
||||||
|
__ clruwu(crc);
|
||||||
|
__ update_byte_crc32(crc, val, table);
|
||||||
|
__ not1(crc);
|
||||||
|
|
||||||
|
__ mov(crc, res);
|
||||||
}
|
}
|
||||||
|
|
||||||
void LIR_Assembler::emit_lock(LIR_OpLock* op) {
|
void LIR_Assembler::emit_lock(LIR_OpLock* op) {
|
||||||
|
@ -786,7 +786,86 @@ void LIRGenerator::do_ArrayCopy(Intrinsic* x) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
void LIRGenerator::do_update_CRC32(Intrinsic* x) {
|
void LIRGenerator::do_update_CRC32(Intrinsic* x) {
|
||||||
fatal("CRC32 intrinsic is not implemented on this platform");
|
// Make all state_for calls early since they can emit code
|
||||||
|
LIR_Opr result = rlock_result(x);
|
||||||
|
int flags = 0;
|
||||||
|
switch (x->id()) {
|
||||||
|
case vmIntrinsics::_updateCRC32: {
|
||||||
|
LIRItem crc(x->argument_at(0), this);
|
||||||
|
LIRItem val(x->argument_at(1), this);
|
||||||
|
// val is destroyed by update_crc32
|
||||||
|
val.set_destroys_register();
|
||||||
|
crc.load_item();
|
||||||
|
val.load_item();
|
||||||
|
__ update_crc32(crc.result(), val.result(), result);
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
case vmIntrinsics::_updateBytesCRC32:
|
||||||
|
case vmIntrinsics::_updateByteBufferCRC32: {
|
||||||
|
|
||||||
|
bool is_updateBytes = (x->id() == vmIntrinsics::_updateBytesCRC32);
|
||||||
|
|
||||||
|
LIRItem crc(x->argument_at(0), this);
|
||||||
|
LIRItem buf(x->argument_at(1), this);
|
||||||
|
LIRItem off(x->argument_at(2), this);
|
||||||
|
LIRItem len(x->argument_at(3), this);
|
||||||
|
|
||||||
|
buf.load_item();
|
||||||
|
off.load_nonconstant();
|
||||||
|
|
||||||
|
LIR_Opr index = off.result();
|
||||||
|
int offset = is_updateBytes ? arrayOopDesc::base_offset_in_bytes(T_BYTE) : 0;
|
||||||
|
if(off.result()->is_constant()) {
|
||||||
|
index = LIR_OprFact::illegalOpr;
|
||||||
|
offset += off.result()->as_jint();
|
||||||
|
}
|
||||||
|
|
||||||
|
LIR_Opr base_op = buf.result();
|
||||||
|
|
||||||
|
if (index->is_valid()) {
|
||||||
|
LIR_Opr tmp = new_register(T_LONG);
|
||||||
|
__ convert(Bytecodes::_i2l, index, tmp);
|
||||||
|
index = tmp;
|
||||||
|
if (index->is_constant()) {
|
||||||
|
offset += index->as_constant_ptr()->as_jint();
|
||||||
|
index = LIR_OprFact::illegalOpr;
|
||||||
|
} else if (index->is_register()) {
|
||||||
|
LIR_Opr tmp2 = new_register(T_LONG);
|
||||||
|
LIR_Opr tmp3 = new_register(T_LONG);
|
||||||
|
__ move(base_op, tmp2);
|
||||||
|
__ move(index, tmp3);
|
||||||
|
__ add(tmp2, tmp3, tmp2);
|
||||||
|
base_op = tmp2;
|
||||||
|
} else {
|
||||||
|
ShouldNotReachHere();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
LIR_Address* a = new LIR_Address(base_op, offset, T_BYTE);
|
||||||
|
|
||||||
|
BasicTypeList signature(3);
|
||||||
|
signature.append(T_INT);
|
||||||
|
signature.append(T_ADDRESS);
|
||||||
|
signature.append(T_INT);
|
||||||
|
CallingConvention* cc = frame_map()->c_calling_convention(&signature);
|
||||||
|
const LIR_Opr result_reg = result_register_for(x->type());
|
||||||
|
|
||||||
|
LIR_Opr addr = new_pointer_register();
|
||||||
|
__ leal(LIR_OprFact::address(a), addr);
|
||||||
|
|
||||||
|
crc.load_item_force(cc->at(0));
|
||||||
|
__ move(addr, cc->at(1));
|
||||||
|
len.load_item_force(cc->at(2));
|
||||||
|
|
||||||
|
__ call_runtime_leaf(StubRoutines::updateBytesCRC32(), getThreadTemp(), result_reg, cc->args());
|
||||||
|
__ move(result_reg, result);
|
||||||
|
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
default: {
|
||||||
|
ShouldNotReachHere();
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// _i2l, _i2f, _i2d, _l2i, _l2f, _l2d, _f2i, _f2l, _f2d, _d2i, _d2l, _d2f
|
// _i2l, _i2f, _i2d, _l2i, _l2f, _l2d, _f2i, _f2l, _f2d, _d2i, _d2l, _d2f
|
||||||
|
@ -1,5 +1,5 @@
|
|||||||
/*
|
/*
|
||||||
* Copyright (c) 1997, 2014, Oracle and/or its affiliates. All rights reserved.
|
* Copyright (c) 1997, 2015, Oracle and/or its affiliates. All rights reserved.
|
||||||
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
||||||
*
|
*
|
||||||
* This code is free software; you can redistribute it and/or modify it
|
* This code is free software; you can redistribute it and/or modify it
|
||||||
@ -43,8 +43,9 @@
|
|||||||
void generate_counter_incr(Label* overflow, Label* profile_method, Label* profile_method_continue);
|
void generate_counter_incr(Label* overflow, Label* profile_method, Label* profile_method_continue);
|
||||||
void generate_counter_overflow(Label& Lcontinue);
|
void generate_counter_overflow(Label& Lcontinue);
|
||||||
|
|
||||||
|
address generate_CRC32_update_entry();
|
||||||
|
address generate_CRC32_updateBytes_entry(AbstractInterpreter::MethodKind kind);
|
||||||
|
|
||||||
// Not supported
|
// Not supported
|
||||||
address generate_CRC32_update_entry() { return NULL; }
|
|
||||||
address generate_CRC32_updateBytes_entry(AbstractInterpreter::MethodKind kind) { return NULL; }
|
|
||||||
address generate_CRC32C_updateBytes_entry(AbstractInterpreter::MethodKind kind) { return NULL; }
|
address generate_CRC32C_updateBytes_entry(AbstractInterpreter::MethodKind kind) { return NULL; }
|
||||||
#endif // CPU_SPARC_VM_INTERPRETERGENERATOR_SPARC_HPP
|
#endif // CPU_SPARC_VM_INTERPRETERGENERATOR_SPARC_HPP
|
||||||
|
@ -4771,3 +4771,243 @@ void MacroAssembler::movftoi_revbytes(FloatRegister src, Register dst, Register
|
|||||||
movdtox(src, tmp1);
|
movdtox(src, tmp1);
|
||||||
reverse_bytes_32(tmp1, dst, tmp2);
|
reverse_bytes_32(tmp1, dst, tmp2);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void MacroAssembler::fold_128bit_crc32(Register xcrc_hi, Register xcrc_lo, Register xK_hi, Register xK_lo, Register xtmp_hi, Register xtmp_lo, Register buf, int offset) {
|
||||||
|
xmulx(xcrc_hi, xK_hi, xtmp_lo);
|
||||||
|
xmulxhi(xcrc_hi, xK_hi, xtmp_hi);
|
||||||
|
xmulxhi(xcrc_lo, xK_lo, xcrc_hi);
|
||||||
|
xmulx(xcrc_lo, xK_lo, xcrc_lo);
|
||||||
|
xor3(xcrc_lo, xtmp_lo, xcrc_lo);
|
||||||
|
xor3(xcrc_hi, xtmp_hi, xcrc_hi);
|
||||||
|
ldxl(buf, G0, xtmp_lo);
|
||||||
|
inc(buf, 8);
|
||||||
|
ldxl(buf, G0, xtmp_hi);
|
||||||
|
inc(buf, 8);
|
||||||
|
xor3(xcrc_lo, xtmp_lo, xcrc_lo);
|
||||||
|
xor3(xcrc_hi, xtmp_hi, xcrc_hi);
|
||||||
|
}
|
||||||
|
|
||||||
|
void MacroAssembler::fold_128bit_crc32(Register xcrc_hi, Register xcrc_lo, Register xK_hi, Register xK_lo, Register xtmp_hi, Register xtmp_lo, Register xbuf_hi, Register xbuf_lo) {
|
||||||
|
mov(xcrc_lo, xtmp_lo);
|
||||||
|
mov(xcrc_hi, xtmp_hi);
|
||||||
|
xmulx(xtmp_hi, xK_hi, xtmp_lo);
|
||||||
|
xmulxhi(xtmp_hi, xK_hi, xtmp_hi);
|
||||||
|
xmulxhi(xcrc_lo, xK_lo, xcrc_hi);
|
||||||
|
xmulx(xcrc_lo, xK_lo, xcrc_lo);
|
||||||
|
xor3(xcrc_lo, xbuf_lo, xcrc_lo);
|
||||||
|
xor3(xcrc_hi, xbuf_hi, xcrc_hi);
|
||||||
|
xor3(xcrc_lo, xtmp_lo, xcrc_lo);
|
||||||
|
xor3(xcrc_hi, xtmp_hi, xcrc_hi);
|
||||||
|
}
|
||||||
|
|
||||||
|
void MacroAssembler::fold_8bit_crc32(Register xcrc, Register table, Register xtmp, Register tmp) {
|
||||||
|
and3(xcrc, 0xFF, tmp);
|
||||||
|
sllx(tmp, 2, tmp);
|
||||||
|
lduw(table, tmp, xtmp);
|
||||||
|
srlx(xcrc, 8, xcrc);
|
||||||
|
xor3(xtmp, xcrc, xcrc);
|
||||||
|
}
|
||||||
|
|
||||||
|
void MacroAssembler::fold_8bit_crc32(Register crc, Register table, Register tmp) {
|
||||||
|
and3(crc, 0xFF, tmp);
|
||||||
|
srlx(crc, 8, crc);
|
||||||
|
sllx(tmp, 2, tmp);
|
||||||
|
lduw(table, tmp, tmp);
|
||||||
|
xor3(tmp, crc, crc);
|
||||||
|
}
|
||||||
|
|
||||||
|
#define CRC32_TMP_REG_NUM 18
|
||||||
|
|
||||||
|
#define CRC32_CONST_64 0x163cd6124
|
||||||
|
#define CRC32_CONST_96 0x0ccaa009e
|
||||||
|
#define CRC32_CONST_160 0x1751997d0
|
||||||
|
#define CRC32_CONST_480 0x1c6e41596
|
||||||
|
#define CRC32_CONST_544 0x154442bd4
|
||||||
|
|
||||||
|
void MacroAssembler::kernel_crc32(Register crc, Register buf, Register len, Register table) {
|
||||||
|
|
||||||
|
Label L_cleanup_loop, L_cleanup_check, L_align_loop, L_align_check;
|
||||||
|
Label L_main_loop_prologue;
|
||||||
|
Label L_fold_512b, L_fold_512b_loop, L_fold_128b;
|
||||||
|
Label L_fold_tail, L_fold_tail_loop;
|
||||||
|
Label L_8byte_fold_loop, L_8byte_fold_check;
|
||||||
|
|
||||||
|
const Register tmp[CRC32_TMP_REG_NUM] = {L0, L1, L2, L3, L4, L5, L6, G1, I0, I1, I2, I3, I4, I5, I7, O4, O5, G3};
|
||||||
|
|
||||||
|
Register const_64 = tmp[CRC32_TMP_REG_NUM-1];
|
||||||
|
Register const_96 = tmp[CRC32_TMP_REG_NUM-1];
|
||||||
|
Register const_160 = tmp[CRC32_TMP_REG_NUM-2];
|
||||||
|
Register const_480 = tmp[CRC32_TMP_REG_NUM-1];
|
||||||
|
Register const_544 = tmp[CRC32_TMP_REG_NUM-2];
|
||||||
|
|
||||||
|
set(ExternalAddress(StubRoutines::crc_table_addr()), table);
|
||||||
|
|
||||||
|
not1(crc); // ~c
|
||||||
|
clruwu(crc); // clear upper 32 bits of crc
|
||||||
|
|
||||||
|
// Check if below cutoff, proceed directly to cleanup code
|
||||||
|
mov(31, G4);
|
||||||
|
cmp_and_br_short(len, G4, Assembler::lessEqualUnsigned, Assembler::pt, L_cleanup_check);
|
||||||
|
|
||||||
|
// Align buffer to 8 byte boundry
|
||||||
|
mov(8, O5);
|
||||||
|
and3(buf, 0x7, O4);
|
||||||
|
sub(O5, O4, O5);
|
||||||
|
and3(O5, 0x7, O5);
|
||||||
|
sub(len, O5, len);
|
||||||
|
ba(L_align_check);
|
||||||
|
delayed()->nop();
|
||||||
|
|
||||||
|
// Alignment loop, table look up method for up to 7 bytes
|
||||||
|
bind(L_align_loop);
|
||||||
|
ldub(buf, 0, O4);
|
||||||
|
inc(buf);
|
||||||
|
dec(O5);
|
||||||
|
xor3(O4, crc, O4);
|
||||||
|
and3(O4, 0xFF, O4);
|
||||||
|
sllx(O4, 2, O4);
|
||||||
|
lduw(table, O4, O4);
|
||||||
|
srlx(crc, 8, crc);
|
||||||
|
xor3(O4, crc, crc);
|
||||||
|
bind(L_align_check);
|
||||||
|
nop();
|
||||||
|
cmp_and_br_short(O5, 0, Assembler::notEqual, Assembler::pt, L_align_loop);
|
||||||
|
|
||||||
|
// Aligned on 64-bit (8-byte) boundry at this point
|
||||||
|
// Check if still above cutoff (31-bytes)
|
||||||
|
mov(31, G4);
|
||||||
|
cmp_and_br_short(len, G4, Assembler::lessEqualUnsigned, Assembler::pt, L_cleanup_check);
|
||||||
|
// At least 32 bytes left to process
|
||||||
|
|
||||||
|
// Free up registers by storing them to FP registers
|
||||||
|
for (int i = 0; i < CRC32_TMP_REG_NUM; i++) {
|
||||||
|
movxtod(tmp[i], as_FloatRegister(2*i));
|
||||||
|
}
|
||||||
|
|
||||||
|
// Determine which loop to enter
|
||||||
|
// Shared prologue
|
||||||
|
ldxl(buf, G0, tmp[0]);
|
||||||
|
inc(buf, 8);
|
||||||
|
ldxl(buf, G0, tmp[1]);
|
||||||
|
inc(buf, 8);
|
||||||
|
xor3(tmp[0], crc, tmp[0]); // Fold CRC into first few bytes
|
||||||
|
and3(crc, 0, crc); // Clear out the crc register
|
||||||
|
// Main loop needs 128-bytes at least
|
||||||
|
mov(128, G4);
|
||||||
|
mov(64, tmp[2]);
|
||||||
|
cmp_and_br_short(len, G4, Assembler::greaterEqualUnsigned, Assembler::pt, L_main_loop_prologue);
|
||||||
|
// Less than 64 bytes
|
||||||
|
nop();
|
||||||
|
cmp_and_br_short(len, tmp[2], Assembler::lessUnsigned, Assembler::pt, L_fold_tail);
|
||||||
|
// Between 64 and 127 bytes
|
||||||
|
set64(CRC32_CONST_96, const_96, tmp[8]);
|
||||||
|
set64(CRC32_CONST_160, const_160, tmp[9]);
|
||||||
|
fold_128bit_crc32(tmp[1], tmp[0], const_96, const_160, tmp[2], tmp[3], buf, 0);
|
||||||
|
fold_128bit_crc32(tmp[1], tmp[0], const_96, const_160, tmp[4], tmp[5], buf, 16);
|
||||||
|
fold_128bit_crc32(tmp[1], tmp[0], const_96, const_160, tmp[6], tmp[7], buf, 32);
|
||||||
|
dec(len, 48);
|
||||||
|
ba(L_fold_tail);
|
||||||
|
delayed()->nop();
|
||||||
|
|
||||||
|
bind(L_main_loop_prologue);
|
||||||
|
for (int i = 2; i < 8; i++) {
|
||||||
|
ldxl(buf, G0, tmp[i]);
|
||||||
|
inc(buf, 8);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Fold total 512 bits of polynomial on each iteration,
|
||||||
|
// 128 bits per each of 4 parallel streams
|
||||||
|
set64(CRC32_CONST_480, const_480, tmp[8]);
|
||||||
|
set64(CRC32_CONST_544, const_544, tmp[9]);
|
||||||
|
|
||||||
|
mov(128, G4);
|
||||||
|
bind(L_fold_512b_loop);
|
||||||
|
fold_128bit_crc32(tmp[1], tmp[0], const_480, const_544, tmp[9], tmp[8], buf, 0);
|
||||||
|
fold_128bit_crc32(tmp[3], tmp[2], const_480, const_544, tmp[11], tmp[10], buf, 16);
|
||||||
|
fold_128bit_crc32(tmp[5], tmp[4], const_480, const_544, tmp[13], tmp[12], buf, 32);
|
||||||
|
fold_128bit_crc32(tmp[7], tmp[6], const_480, const_544, tmp[15], tmp[14], buf, 64);
|
||||||
|
dec(len, 64);
|
||||||
|
cmp_and_br_short(len, G4, Assembler::greaterEqualUnsigned, Assembler::pt, L_fold_512b_loop);
|
||||||
|
|
||||||
|
// Fold 512 bits to 128 bits
|
||||||
|
bind(L_fold_512b);
|
||||||
|
set64(CRC32_CONST_96, const_96, tmp[8]);
|
||||||
|
set64(CRC32_CONST_160, const_160, tmp[9]);
|
||||||
|
|
||||||
|
fold_128bit_crc32(tmp[1], tmp[0], const_96, const_160, tmp[8], tmp[9], tmp[3], tmp[2]);
|
||||||
|
fold_128bit_crc32(tmp[1], tmp[0], const_96, const_160, tmp[8], tmp[9], tmp[5], tmp[4]);
|
||||||
|
fold_128bit_crc32(tmp[1], tmp[0], const_96, const_160, tmp[8], tmp[9], tmp[7], tmp[6]);
|
||||||
|
dec(len, 48);
|
||||||
|
|
||||||
|
// Fold the rest of 128 bits data chunks
|
||||||
|
bind(L_fold_tail);
|
||||||
|
mov(32, G4);
|
||||||
|
cmp_and_br_short(len, G4, Assembler::lessEqualUnsigned, Assembler::pt, L_fold_128b);
|
||||||
|
|
||||||
|
set64(CRC32_CONST_96, const_96, tmp[8]);
|
||||||
|
set64(CRC32_CONST_160, const_160, tmp[9]);
|
||||||
|
|
||||||
|
bind(L_fold_tail_loop);
|
||||||
|
fold_128bit_crc32(tmp[1], tmp[0], const_96, const_160, tmp[2], tmp[3], buf, 0);
|
||||||
|
sub(len, 16, len);
|
||||||
|
cmp_and_br_short(len, G4, Assembler::greaterEqualUnsigned, Assembler::pt, L_fold_tail_loop);
|
||||||
|
|
||||||
|
// Fold the 128 bits in tmps 0 - 1 into tmp 1
|
||||||
|
bind(L_fold_128b);
|
||||||
|
|
||||||
|
set64(CRC32_CONST_64, const_64, tmp[4]);
|
||||||
|
|
||||||
|
xmulx(const_64, tmp[0], tmp[2]);
|
||||||
|
xmulxhi(const_64, tmp[0], tmp[3]);
|
||||||
|
|
||||||
|
srl(tmp[2], G0, tmp[4]);
|
||||||
|
xmulx(const_64, tmp[4], tmp[4]);
|
||||||
|
|
||||||
|
srlx(tmp[2], 32, tmp[2]);
|
||||||
|
sllx(tmp[3], 32, tmp[3]);
|
||||||
|
or3(tmp[2], tmp[3], tmp[2]);
|
||||||
|
|
||||||
|
xor3(tmp[4], tmp[1], tmp[4]);
|
||||||
|
xor3(tmp[4], tmp[2], tmp[1]);
|
||||||
|
dec(len, 8);
|
||||||
|
|
||||||
|
// Use table lookup for the 8 bytes left in tmp[1]
|
||||||
|
dec(len, 8);
|
||||||
|
|
||||||
|
// 8 8-bit folds to compute 32-bit CRC.
|
||||||
|
for (int j = 0; j < 4; j++) {
|
||||||
|
fold_8bit_crc32(tmp[1], table, tmp[2], tmp[3]);
|
||||||
|
}
|
||||||
|
srl(tmp[1], G0, crc); // move 32 bits to general register
|
||||||
|
for (int j = 0; j < 4; j++) {
|
||||||
|
fold_8bit_crc32(crc, table, tmp[3]);
|
||||||
|
}
|
||||||
|
|
||||||
|
bind(L_8byte_fold_check);
|
||||||
|
|
||||||
|
// Restore int registers saved in FP registers
|
||||||
|
for (int i = 0; i < CRC32_TMP_REG_NUM; i++) {
|
||||||
|
movdtox(as_FloatRegister(2*i), tmp[i]);
|
||||||
|
}
|
||||||
|
|
||||||
|
ba(L_cleanup_check);
|
||||||
|
delayed()->nop();
|
||||||
|
|
||||||
|
// Table look-up method for the remaining few bytes
|
||||||
|
bind(L_cleanup_loop);
|
||||||
|
ldub(buf, 0, O4);
|
||||||
|
inc(buf);
|
||||||
|
dec(len);
|
||||||
|
xor3(O4, crc, O4);
|
||||||
|
and3(O4, 0xFF, O4);
|
||||||
|
sllx(O4, 2, O4);
|
||||||
|
lduw(table, O4, O4);
|
||||||
|
srlx(crc, 8, crc);
|
||||||
|
xor3(O4, crc, crc);
|
||||||
|
bind(L_cleanup_check);
|
||||||
|
nop();
|
||||||
|
cmp_and_br_short(len, 0, Assembler::greaterUnsigned, Assembler::pt, L_cleanup_loop);
|
||||||
|
|
||||||
|
not1(crc);
|
||||||
|
}
|
||||||
|
|
||||||
|
@ -904,7 +904,9 @@ public:
|
|||||||
inline void ldf(FloatRegisterImpl::Width w, const Address& a, FloatRegister d, int offset = 0);
|
inline void ldf(FloatRegisterImpl::Width w, const Address& a, FloatRegister d, int offset = 0);
|
||||||
|
|
||||||
// little-endian
|
// little-endian
|
||||||
inline void ldxl(Register s1, Register s2, Register d) { ldxa(s1, s2, ASI_PRIMARY_LITTLE, d); }
|
inline void lduwl(Register s1, Register s2, Register d) { lduwa(s1, s2, ASI_PRIMARY_LITTLE, d); }
|
||||||
|
inline void ldswl(Register s1, Register s2, Register d) { ldswa(s1, s2, ASI_PRIMARY_LITTLE, d);}
|
||||||
|
inline void ldxl( Register s1, Register s2, Register d) { ldxa(s1, s2, ASI_PRIMARY_LITTLE, d); }
|
||||||
inline void ldfl(FloatRegisterImpl::Width w, Register s1, Register s2, FloatRegister d) { ldfa(w, s1, s2, ASI_PRIMARY_LITTLE, d); }
|
inline void ldfl(FloatRegisterImpl::Width w, Register s1, Register s2, FloatRegister d) { ldfa(w, s1, s2, ASI_PRIMARY_LITTLE, d); }
|
||||||
|
|
||||||
// membar psuedo instruction. takes into account target memory model.
|
// membar psuedo instruction. takes into account target memory model.
|
||||||
@ -1469,6 +1471,15 @@ public:
|
|||||||
void movitof_revbytes(Register src, FloatRegister dst, Register tmp1, Register tmp2);
|
void movitof_revbytes(Register src, FloatRegister dst, Register tmp1, Register tmp2);
|
||||||
void movftoi_revbytes(FloatRegister src, Register dst, Register tmp1, Register tmp2);
|
void movftoi_revbytes(FloatRegister src, Register dst, Register tmp1, Register tmp2);
|
||||||
|
|
||||||
|
// CRC32 code for java.util.zip.CRC32::updateBytes0() instrinsic.
|
||||||
|
void kernel_crc32(Register crc, Register buf, Register len, Register table);
|
||||||
|
// Fold 128-bit data chunk
|
||||||
|
void fold_128bit_crc32(Register xcrc_hi, Register xcrc_lo, Register xK_hi, Register xK_lo, Register xtmp_hi, Register xtmp_lo, Register buf, int offset);
|
||||||
|
void fold_128bit_crc32(Register xcrc_hi, Register xcrc_lo, Register xK_hi, Register xK_lo, Register xtmp_hi, Register xtmp_lo, Register xbuf_hi, Register xbuf_lo);
|
||||||
|
// Fold 8-bit data
|
||||||
|
void fold_8bit_crc32(Register xcrc, Register table, Register xtmp, Register tmp);
|
||||||
|
void fold_8bit_crc32(Register crc, Register table, Register tmp);
|
||||||
|
|
||||||
#undef VIRTUAL
|
#undef VIRTUAL
|
||||||
};
|
};
|
||||||
|
|
||||||
|
@ -5292,6 +5292,38 @@ class StubGenerator: public StubCodeGenerator {
|
|||||||
return start;
|
return start;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Arguments:
|
||||||
|
*
|
||||||
|
* Inputs:
|
||||||
|
* O0 - int crc
|
||||||
|
* O1 - byte* buf
|
||||||
|
* O2 - int len
|
||||||
|
* O3 - int* table
|
||||||
|
*
|
||||||
|
* Output:
|
||||||
|
* O0 - int crc result
|
||||||
|
*/
|
||||||
|
address generate_updateBytesCRC32() {
|
||||||
|
assert(UseCRC32Intrinsics, "need VIS3 instructions");
|
||||||
|
|
||||||
|
__ align(CodeEntryAlignment);
|
||||||
|
StubCodeMark mark(this, "StubRoutines", "updateBytesCRC32");
|
||||||
|
address start = __ pc();
|
||||||
|
|
||||||
|
const Register crc = O0; // crc
|
||||||
|
const Register buf = O1; // source java byte array address
|
||||||
|
const Register len = O2; // length
|
||||||
|
const Register table = O3; // crc_table address (reuse register)
|
||||||
|
|
||||||
|
__ kernel_crc32(crc, buf, len, table);
|
||||||
|
|
||||||
|
__ retl();
|
||||||
|
__ delayed()->nop();
|
||||||
|
|
||||||
|
return start;
|
||||||
|
}
|
||||||
|
|
||||||
void generate_initial() {
|
void generate_initial() {
|
||||||
// Generates all stubs and initializes the entry points
|
// Generates all stubs and initializes the entry points
|
||||||
|
|
||||||
@ -5324,6 +5356,12 @@ class StubGenerator: public StubCodeGenerator {
|
|||||||
|
|
||||||
// Build this early so it's available for the interpreter.
|
// Build this early so it's available for the interpreter.
|
||||||
StubRoutines::_throw_StackOverflowError_entry = generate_throw_exception("StackOverflowError throw_exception", CAST_FROM_FN_PTR(address, SharedRuntime::throw_StackOverflowError));
|
StubRoutines::_throw_StackOverflowError_entry = generate_throw_exception("StackOverflowError throw_exception", CAST_FROM_FN_PTR(address, SharedRuntime::throw_StackOverflowError));
|
||||||
|
|
||||||
|
if (UseCRC32Intrinsics) {
|
||||||
|
// set table address before stub generation which use it
|
||||||
|
StubRoutines::_crc_table_adr = (address)StubRoutines::Sparc::_crc_table;
|
||||||
|
StubRoutines::_updateBytesCRC32 = generate_updateBytesCRC32();
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
@ -1,5 +1,5 @@
|
|||||||
/*
|
/*
|
||||||
* Copyright (c) 1997, 2013, Oracle and/or its affiliates. All rights reserved.
|
* Copyright (c) 1997, 2015, Oracle and/or its affiliates. All rights reserved.
|
||||||
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
||||||
*
|
*
|
||||||
* This code is free software; you can redistribute it and/or modify it
|
* This code is free software; you can redistribute it and/or modify it
|
||||||
@ -52,3 +52,98 @@ address StubRoutines::Sparc::_stop_subroutine_entry = NULL;
|
|||||||
address StubRoutines::Sparc::_flush_callers_register_windows_entry = CAST_FROM_FN_PTR(address, bootstrap_flush_windows);
|
address StubRoutines::Sparc::_flush_callers_register_windows_entry = CAST_FROM_FN_PTR(address, bootstrap_flush_windows);
|
||||||
|
|
||||||
address StubRoutines::Sparc::_partial_subtype_check = NULL;
|
address StubRoutines::Sparc::_partial_subtype_check = NULL;
|
||||||
|
|
||||||
|
uint64_t StubRoutines::Sparc::_crc_by128_masks[] =
|
||||||
|
{
|
||||||
|
/* The fields in this structure are arranged so that they can be
|
||||||
|
* picked up two at a time with 128-bit loads.
|
||||||
|
*
|
||||||
|
* Because of flipped bit order for this CRC polynomials
|
||||||
|
* the constant for X**N is left-shifted by 1. This is because
|
||||||
|
* a 64 x 64 polynomial multiply produces a 127-bit result
|
||||||
|
* but the highest term is always aligned to bit 0 in the container.
|
||||||
|
* Pre-shifting by one fixes this, at the cost of potentially making
|
||||||
|
* the 32-bit constant no longer fit in a 32-bit container (thus the
|
||||||
|
* use of uint64_t, though this is also the size used by the carry-
|
||||||
|
* less multiply instruction.
|
||||||
|
*
|
||||||
|
* In addition, the flipped bit order and highest-term-at-least-bit
|
||||||
|
* multiply changes the constants used. The 96-bit result will be
|
||||||
|
* aligned to the high-term end of the target 128-bit container,
|
||||||
|
* not the low-term end; that is, instead of a 512-bit or 576-bit fold,
|
||||||
|
* instead it is a 480 (=512-32) or 544 (=512+64-32) bit fold.
|
||||||
|
*
|
||||||
|
* This cause additional problems in the 128-to-64-bit reduction; see the
|
||||||
|
* code for details. By storing a mask in the otherwise unused half of
|
||||||
|
* a 128-bit constant, bits can be cleared before multiplication without
|
||||||
|
* storing and reloading. Note that staying on a 128-bit datapath means
|
||||||
|
* that some data is uselessly stored and some unused data is intersected
|
||||||
|
* with an irrelevant constant.
|
||||||
|
*/
|
||||||
|
|
||||||
|
((uint64_t) 0xffffffffUL), /* low of K_M_64 */
|
||||||
|
((uint64_t) 0xb1e6b092U << 1), /* high of K_M_64 */
|
||||||
|
((uint64_t) 0xba8ccbe8U << 1), /* low of K_160_96 */
|
||||||
|
((uint64_t) 0x6655004fU << 1), /* high of K_160_96 */
|
||||||
|
((uint64_t) 0xaa2215eaU << 1), /* low of K_544_480 */
|
||||||
|
((uint64_t) 0xe3720acbU << 1) /* high of K_544_480 */
|
||||||
|
};
|
||||||
|
|
||||||
|
/**
|
||||||
|
* crc_table[] from jdk/src/java.base/share/native/libzip/zlib-1.2.8/crc32.h
|
||||||
|
*/
|
||||||
|
juint StubRoutines::Sparc::_crc_table[] =
|
||||||
|
{
|
||||||
|
0x00000000UL, 0x77073096UL, 0xee0e612cUL, 0x990951baUL, 0x076dc419UL,
|
||||||
|
0x706af48fUL, 0xe963a535UL, 0x9e6495a3UL, 0x0edb8832UL, 0x79dcb8a4UL,
|
||||||
|
0xe0d5e91eUL, 0x97d2d988UL, 0x09b64c2bUL, 0x7eb17cbdUL, 0xe7b82d07UL,
|
||||||
|
0x90bf1d91UL, 0x1db71064UL, 0x6ab020f2UL, 0xf3b97148UL, 0x84be41deUL,
|
||||||
|
0x1adad47dUL, 0x6ddde4ebUL, 0xf4d4b551UL, 0x83d385c7UL, 0x136c9856UL,
|
||||||
|
0x646ba8c0UL, 0xfd62f97aUL, 0x8a65c9ecUL, 0x14015c4fUL, 0x63066cd9UL,
|
||||||
|
0xfa0f3d63UL, 0x8d080df5UL, 0x3b6e20c8UL, 0x4c69105eUL, 0xd56041e4UL,
|
||||||
|
0xa2677172UL, 0x3c03e4d1UL, 0x4b04d447UL, 0xd20d85fdUL, 0xa50ab56bUL,
|
||||||
|
0x35b5a8faUL, 0x42b2986cUL, 0xdbbbc9d6UL, 0xacbcf940UL, 0x32d86ce3UL,
|
||||||
|
0x45df5c75UL, 0xdcd60dcfUL, 0xabd13d59UL, 0x26d930acUL, 0x51de003aUL,
|
||||||
|
0xc8d75180UL, 0xbfd06116UL, 0x21b4f4b5UL, 0x56b3c423UL, 0xcfba9599UL,
|
||||||
|
0xb8bda50fUL, 0x2802b89eUL, 0x5f058808UL, 0xc60cd9b2UL, 0xb10be924UL,
|
||||||
|
0x2f6f7c87UL, 0x58684c11UL, 0xc1611dabUL, 0xb6662d3dUL, 0x76dc4190UL,
|
||||||
|
0x01db7106UL, 0x98d220bcUL, 0xefd5102aUL, 0x71b18589UL, 0x06b6b51fUL,
|
||||||
|
0x9fbfe4a5UL, 0xe8b8d433UL, 0x7807c9a2UL, 0x0f00f934UL, 0x9609a88eUL,
|
||||||
|
0xe10e9818UL, 0x7f6a0dbbUL, 0x086d3d2dUL, 0x91646c97UL, 0xe6635c01UL,
|
||||||
|
0x6b6b51f4UL, 0x1c6c6162UL, 0x856530d8UL, 0xf262004eUL, 0x6c0695edUL,
|
||||||
|
0x1b01a57bUL, 0x8208f4c1UL, 0xf50fc457UL, 0x65b0d9c6UL, 0x12b7e950UL,
|
||||||
|
0x8bbeb8eaUL, 0xfcb9887cUL, 0x62dd1ddfUL, 0x15da2d49UL, 0x8cd37cf3UL,
|
||||||
|
0xfbd44c65UL, 0x4db26158UL, 0x3ab551ceUL, 0xa3bc0074UL, 0xd4bb30e2UL,
|
||||||
|
0x4adfa541UL, 0x3dd895d7UL, 0xa4d1c46dUL, 0xd3d6f4fbUL, 0x4369e96aUL,
|
||||||
|
0x346ed9fcUL, 0xad678846UL, 0xda60b8d0UL, 0x44042d73UL, 0x33031de5UL,
|
||||||
|
0xaa0a4c5fUL, 0xdd0d7cc9UL, 0x5005713cUL, 0x270241aaUL, 0xbe0b1010UL,
|
||||||
|
0xc90c2086UL, 0x5768b525UL, 0x206f85b3UL, 0xb966d409UL, 0xce61e49fUL,
|
||||||
|
0x5edef90eUL, 0x29d9c998UL, 0xb0d09822UL, 0xc7d7a8b4UL, 0x59b33d17UL,
|
||||||
|
0x2eb40d81UL, 0xb7bd5c3bUL, 0xc0ba6cadUL, 0xedb88320UL, 0x9abfb3b6UL,
|
||||||
|
0x03b6e20cUL, 0x74b1d29aUL, 0xead54739UL, 0x9dd277afUL, 0x04db2615UL,
|
||||||
|
0x73dc1683UL, 0xe3630b12UL, 0x94643b84UL, 0x0d6d6a3eUL, 0x7a6a5aa8UL,
|
||||||
|
0xe40ecf0bUL, 0x9309ff9dUL, 0x0a00ae27UL, 0x7d079eb1UL, 0xf00f9344UL,
|
||||||
|
0x8708a3d2UL, 0x1e01f268UL, 0x6906c2feUL, 0xf762575dUL, 0x806567cbUL,
|
||||||
|
0x196c3671UL, 0x6e6b06e7UL, 0xfed41b76UL, 0x89d32be0UL, 0x10da7a5aUL,
|
||||||
|
0x67dd4accUL, 0xf9b9df6fUL, 0x8ebeeff9UL, 0x17b7be43UL, 0x60b08ed5UL,
|
||||||
|
0xd6d6a3e8UL, 0xa1d1937eUL, 0x38d8c2c4UL, 0x4fdff252UL, 0xd1bb67f1UL,
|
||||||
|
0xa6bc5767UL, 0x3fb506ddUL, 0x48b2364bUL, 0xd80d2bdaUL, 0xaf0a1b4cUL,
|
||||||
|
0x36034af6UL, 0x41047a60UL, 0xdf60efc3UL, 0xa867df55UL, 0x316e8eefUL,
|
||||||
|
0x4669be79UL, 0xcb61b38cUL, 0xbc66831aUL, 0x256fd2a0UL, 0x5268e236UL,
|
||||||
|
0xcc0c7795UL, 0xbb0b4703UL, 0x220216b9UL, 0x5505262fUL, 0xc5ba3bbeUL,
|
||||||
|
0xb2bd0b28UL, 0x2bb45a92UL, 0x5cb36a04UL, 0xc2d7ffa7UL, 0xb5d0cf31UL,
|
||||||
|
0x2cd99e8bUL, 0x5bdeae1dUL, 0x9b64c2b0UL, 0xec63f226UL, 0x756aa39cUL,
|
||||||
|
0x026d930aUL, 0x9c0906a9UL, 0xeb0e363fUL, 0x72076785UL, 0x05005713UL,
|
||||||
|
0x95bf4a82UL, 0xe2b87a14UL, 0x7bb12baeUL, 0x0cb61b38UL, 0x92d28e9bUL,
|
||||||
|
0xe5d5be0dUL, 0x7cdcefb7UL, 0x0bdbdf21UL, 0x86d3d2d4UL, 0xf1d4e242UL,
|
||||||
|
0x68ddb3f8UL, 0x1fda836eUL, 0x81be16cdUL, 0xf6b9265bUL, 0x6fb077e1UL,
|
||||||
|
0x18b74777UL, 0x88085ae6UL, 0xff0f6a70UL, 0x66063bcaUL, 0x11010b5cUL,
|
||||||
|
0x8f659effUL, 0xf862ae69UL, 0x616bffd3UL, 0x166ccf45UL, 0xa00ae278UL,
|
||||||
|
0xd70dd2eeUL, 0x4e048354UL, 0x3903b3c2UL, 0xa7672661UL, 0xd06016f7UL,
|
||||||
|
0x4969474dUL, 0x3e6e77dbUL, 0xaed16a4aUL, 0xd9d65adcUL, 0x40df0b66UL,
|
||||||
|
0x37d83bf0UL, 0xa9bcae53UL, 0xdebb9ec5UL, 0x47b2cf7fUL, 0x30b5ffe9UL,
|
||||||
|
0xbdbdf21cUL, 0xcabac28aUL, 0x53b39330UL, 0x24b4a3a6UL, 0xbad03605UL,
|
||||||
|
0xcdd70693UL, 0x54de5729UL, 0x23d967bfUL, 0xb3667a2eUL, 0xc4614ab8UL,
|
||||||
|
0x5d681b02UL, 0x2a6f2b94UL, 0xb40bbe37UL, 0xc30c8ea1UL, 0x5a05df1bUL,
|
||||||
|
0x2d02ef8dUL
|
||||||
|
};
|
||||||
|
@ -53,6 +53,9 @@ class Sparc {
|
|||||||
static address _flush_callers_register_windows_entry;
|
static address _flush_callers_register_windows_entry;
|
||||||
|
|
||||||
static address _partial_subtype_check;
|
static address _partial_subtype_check;
|
||||||
|
// masks and table for CRC32
|
||||||
|
static uint64_t _crc_by128_masks[];
|
||||||
|
static juint _crc_table[];
|
||||||
|
|
||||||
public:
|
public:
|
||||||
// test assembler stop routine by setting registers
|
// test assembler stop routine by setting registers
|
||||||
@ -65,6 +68,8 @@ class Sparc {
|
|||||||
static intptr_t* (*flush_callers_register_windows_func())() { return CAST_TO_FN_PTR(intptr_t* (*)(void), _flush_callers_register_windows_entry); }
|
static intptr_t* (*flush_callers_register_windows_func())() { return CAST_TO_FN_PTR(intptr_t* (*)(void), _flush_callers_register_windows_entry); }
|
||||||
|
|
||||||
static address partial_subtype_check() { return _partial_subtype_check; }
|
static address partial_subtype_check() { return _partial_subtype_check; }
|
||||||
|
|
||||||
|
static address crc_by128_masks_addr() { return (address)_crc_by128_masks; }
|
||||||
};
|
};
|
||||||
|
|
||||||
#endif // CPU_SPARC_VM_STUBROUTINES_SPARC_HPP
|
#endif // CPU_SPARC_VM_STUBROUTINES_SPARC_HPP
|
||||||
|
@ -803,6 +803,106 @@ address InterpreterGenerator::generate_Reference_get_entry(void) {
|
|||||||
return NULL;
|
return NULL;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Method entry for static native methods:
|
||||||
|
* int java.util.zip.CRC32.update(int crc, int b)
|
||||||
|
*/
|
||||||
|
address InterpreterGenerator::generate_CRC32_update_entry() {
|
||||||
|
|
||||||
|
if (UseCRC32Intrinsics) {
|
||||||
|
address entry = __ pc();
|
||||||
|
|
||||||
|
Label L_slow_path;
|
||||||
|
// If we need a safepoint check, generate full interpreter entry.
|
||||||
|
ExternalAddress state(SafepointSynchronize::address_of_state());
|
||||||
|
__ set(ExternalAddress(SafepointSynchronize::address_of_state()), O2);
|
||||||
|
__ set(SafepointSynchronize::_not_synchronized, O3);
|
||||||
|
__ cmp_and_br_short(O2, O3, Assembler::notEqual, Assembler::pt, L_slow_path);
|
||||||
|
|
||||||
|
// Load parameters
|
||||||
|
const Register crc = O0; // initial crc
|
||||||
|
const Register val = O1; // byte to update with
|
||||||
|
const Register table = O2; // address of 256-entry lookup table
|
||||||
|
|
||||||
|
__ ldub(Gargs, 3, val);
|
||||||
|
__ lduw(Gargs, 8, crc);
|
||||||
|
|
||||||
|
__ set(ExternalAddress(StubRoutines::crc_table_addr()), table);
|
||||||
|
|
||||||
|
__ not1(crc); // ~crc
|
||||||
|
__ clruwu(crc);
|
||||||
|
__ update_byte_crc32(crc, val, table);
|
||||||
|
__ not1(crc); // ~crc
|
||||||
|
|
||||||
|
// result in O0
|
||||||
|
__ retl();
|
||||||
|
__ delayed()->nop();
|
||||||
|
|
||||||
|
// generate a vanilla native entry as the slow path
|
||||||
|
__ bind(L_slow_path);
|
||||||
|
__ jump_to_entry(Interpreter::entry_for_kind(Interpreter::native));
|
||||||
|
return entry;
|
||||||
|
}
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Method entry for static native methods:
|
||||||
|
* int java.util.zip.CRC32.updateBytes(int crc, byte[] b, int off, int len)
|
||||||
|
* int java.util.zip.CRC32.updateByteBuffer(int crc, long buf, int off, int len)
|
||||||
|
*/
|
||||||
|
address InterpreterGenerator::generate_CRC32_updateBytes_entry(AbstractInterpreter::MethodKind kind) {
|
||||||
|
|
||||||
|
if (UseCRC32Intrinsics) {
|
||||||
|
address entry = __ pc();
|
||||||
|
|
||||||
|
Label L_slow_path;
|
||||||
|
// If we need a safepoint check, generate full interpreter entry.
|
||||||
|
ExternalAddress state(SafepointSynchronize::address_of_state());
|
||||||
|
__ set(ExternalAddress(SafepointSynchronize::address_of_state()), O2);
|
||||||
|
__ set(SafepointSynchronize::_not_synchronized, O3);
|
||||||
|
__ cmp_and_br_short(O2, O3, Assembler::notEqual, Assembler::pt, L_slow_path);
|
||||||
|
|
||||||
|
// Load parameters from the stack
|
||||||
|
const Register crc = O0; // initial crc
|
||||||
|
const Register buf = O1; // source java byte array address
|
||||||
|
const Register len = O2; // len
|
||||||
|
const Register offset = O3; // offset
|
||||||
|
|
||||||
|
// Arguments are reversed on java expression stack
|
||||||
|
// Calculate address of start element
|
||||||
|
if (kind == Interpreter::java_util_zip_CRC32_updateByteBuffer) {
|
||||||
|
__ lduw(Gargs, 0, len);
|
||||||
|
__ lduw(Gargs, 8, offset);
|
||||||
|
__ ldx( Gargs, 16, buf);
|
||||||
|
__ lduw(Gargs, 32, crc);
|
||||||
|
__ add(buf, offset, buf);
|
||||||
|
} else {
|
||||||
|
__ lduw(Gargs, 0, len);
|
||||||
|
__ lduw(Gargs, 8, offset);
|
||||||
|
__ ldx( Gargs, 16, buf);
|
||||||
|
__ lduw(Gargs, 24, crc);
|
||||||
|
__ add(buf, arrayOopDesc::base_offset_in_bytes(T_BYTE), buf); // account for the header size
|
||||||
|
__ add(buf ,offset, buf);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Call the crc32 kernel
|
||||||
|
__ MacroAssembler::save_thread(L7_thread_cache);
|
||||||
|
__ kernel_crc32(crc, buf, len, O3);
|
||||||
|
__ MacroAssembler::restore_thread(L7_thread_cache);
|
||||||
|
|
||||||
|
// result in O0
|
||||||
|
__ retl();
|
||||||
|
__ delayed()->nop();
|
||||||
|
|
||||||
|
// generate a vanilla native entry as the slow path
|
||||||
|
__ bind(L_slow_path);
|
||||||
|
__ jump_to_entry(Interpreter::entry_for_kind(Interpreter::native));
|
||||||
|
return entry;
|
||||||
|
}
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
|
||||||
//
|
//
|
||||||
// Interpreter stub for calling a native method. (asm interpreter)
|
// Interpreter stub for calling a native method. (asm interpreter)
|
||||||
// This sets up a somewhat different looking stack for calling the native method
|
// This sets up a somewhat different looking stack for calling the native method
|
||||||
|
@ -347,6 +347,15 @@ void VM_Version::initialize() {
|
|||||||
FLAG_SET_DEFAULT(UseAdler32Intrinsics, false);
|
FLAG_SET_DEFAULT(UseAdler32Intrinsics, false);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (UseVIS > 2) {
|
||||||
|
if (FLAG_IS_DEFAULT(UseCRC32Intrinsics)) {
|
||||||
|
FLAG_SET_DEFAULT(UseCRC32Intrinsics, true);
|
||||||
|
}
|
||||||
|
} else if (UseCRC32Intrinsics) {
|
||||||
|
warning("SPARC CRC32 intrinsics require VIS3 insructions support. Intriniscs will be disabled");
|
||||||
|
FLAG_SET_DEFAULT(UseCRC32Intrinsics, false);
|
||||||
|
}
|
||||||
|
|
||||||
if (FLAG_IS_DEFAULT(ContendedPaddingWidth) &&
|
if (FLAG_IS_DEFAULT(ContendedPaddingWidth) &&
|
||||||
(cache_line_size > ContendedPaddingWidth))
|
(cache_line_size > ContendedPaddingWidth))
|
||||||
ContendedPaddingWidth = cache_line_size;
|
ContendedPaddingWidth = cache_line_size;
|
||||||
|
221
hotspot/test/compiler/intrinsics/crc32/TestCRC32.java
Normal file
221
hotspot/test/compiler/intrinsics/crc32/TestCRC32.java
Normal file
@ -0,0 +1,221 @@
|
|||||||
|
/*
|
||||||
|
* Copyright (c) 2015, Oracle and/or its affiliates. All rights reserved.
|
||||||
|
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
||||||
|
*
|
||||||
|
* This code is free software; you can redistribute it and/or modify it
|
||||||
|
* under the terms of the GNU General Public License version 2 only, as
|
||||||
|
* published by the Free Software Foundation.
|
||||||
|
*
|
||||||
|
* This code is distributed in the hope that it will be useful, but WITHOUT
|
||||||
|
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
||||||
|
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
|
||||||
|
* version 2 for more details (a copy is included in the LICENSE file that
|
||||||
|
* accompanied this code).
|
||||||
|
*
|
||||||
|
* You should have received a copy of the GNU General Public License version
|
||||||
|
* 2 along with this work; if not, write to the Free Software Foundation,
|
||||||
|
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||||
|
*
|
||||||
|
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
|
||||||
|
* or visit www.oracle.com if you need additional information or have any
|
||||||
|
* questions.
|
||||||
|
*/
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @test
|
||||||
|
* @bug 8143012
|
||||||
|
* @summary CRC32 Intrinsics support on SPARC
|
||||||
|
*
|
||||||
|
* @run main/othervm/timeout=720 -Xbatch TestCRC32 -m
|
||||||
|
*/
|
||||||
|
|
||||||
|
import java.nio.ByteBuffer;
|
||||||
|
import java.util.zip.Checksum;
|
||||||
|
import java.util.zip.CRC32;
|
||||||
|
|
||||||
|
public class TestCRC32 {
|
||||||
|
public static void main(String[] args) {
|
||||||
|
int offset = Integer.getInteger("offset", 0);
|
||||||
|
int msgSize = Integer.getInteger("msgSize", 512);
|
||||||
|
boolean multi = false;
|
||||||
|
int iters = 20000;
|
||||||
|
int warmupIters = 20000;
|
||||||
|
|
||||||
|
if (args.length > 0) {
|
||||||
|
if (args[0].equals("-m")) {
|
||||||
|
multi = true;
|
||||||
|
} else {
|
||||||
|
iters = Integer.valueOf(args[0]);
|
||||||
|
}
|
||||||
|
if (args.length > 1) {
|
||||||
|
warmupIters = Integer.valueOf(args[1]);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (multi) {
|
||||||
|
test_multi(warmupIters);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
System.out.println(" offset = " + offset);
|
||||||
|
System.out.println("msgSize = " + msgSize + " bytes");
|
||||||
|
System.out.println(" iters = " + iters);
|
||||||
|
|
||||||
|
byte[] b = initializedBytes(msgSize, offset);
|
||||||
|
|
||||||
|
CRC32 crc0 = new CRC32();
|
||||||
|
CRC32 crc1 = new CRC32();
|
||||||
|
CRC32 crc2 = new CRC32();
|
||||||
|
|
||||||
|
crc0.update(b, offset, msgSize);
|
||||||
|
|
||||||
|
System.out.println("-------------------------------------------------------");
|
||||||
|
|
||||||
|
/* warm up */
|
||||||
|
for (int i = 0; i < warmupIters; i++) {
|
||||||
|
crc1.reset();
|
||||||
|
crc1.update(b, offset, msgSize);
|
||||||
|
}
|
||||||
|
|
||||||
|
/* measure performance */
|
||||||
|
long start = System.nanoTime();
|
||||||
|
for (int i = 0; i < iters; i++) {
|
||||||
|
crc1.reset();
|
||||||
|
crc1.update(b, offset, msgSize);
|
||||||
|
}
|
||||||
|
long end = System.nanoTime();
|
||||||
|
double total = (double)(end - start)/1e9; // in seconds
|
||||||
|
double thruput = (double)msgSize*iters/1e6/total; // in MB/s
|
||||||
|
System.out.println("CRC32.update(byte[]) runtime = " + total + " seconds");
|
||||||
|
System.out.println("CRC32.update(byte[]) throughput = " + thruput + " MB/s");
|
||||||
|
|
||||||
|
/* check correctness */
|
||||||
|
for (int i = 0; i < iters; i++) {
|
||||||
|
crc1.reset();
|
||||||
|
crc1.update(b, offset, msgSize);
|
||||||
|
if (!check(crc0, crc1)) break;
|
||||||
|
}
|
||||||
|
report("CRCs", crc0, crc1);
|
||||||
|
|
||||||
|
System.out.println("-------------------------------------------------------");
|
||||||
|
|
||||||
|
ByteBuffer buf = ByteBuffer.allocateDirect(msgSize);
|
||||||
|
buf.put(b, offset, msgSize);
|
||||||
|
buf.flip();
|
||||||
|
|
||||||
|
/* warm up */
|
||||||
|
for (int i = 0; i < warmupIters; i++) {
|
||||||
|
crc2.reset();
|
||||||
|
crc2.update(buf);
|
||||||
|
buf.rewind();
|
||||||
|
}
|
||||||
|
|
||||||
|
/* measure performance */
|
||||||
|
start = System.nanoTime();
|
||||||
|
for (int i = 0; i < iters; i++) {
|
||||||
|
crc2.reset();
|
||||||
|
crc2.update(buf);
|
||||||
|
buf.rewind();
|
||||||
|
}
|
||||||
|
end = System.nanoTime();
|
||||||
|
total = (double)(end - start)/1e9; // in seconds
|
||||||
|
thruput = (double)msgSize*iters/1e6/total; // in MB/s
|
||||||
|
System.out.println("CRC32.update(ByteBuffer) runtime = " + total + " seconds");
|
||||||
|
System.out.println("CRC32.update(ByteBuffer) throughput = " + thruput + " MB/s");
|
||||||
|
|
||||||
|
/* check correctness */
|
||||||
|
for (int i = 0; i < iters; i++) {
|
||||||
|
crc2.reset();
|
||||||
|
crc2.update(buf);
|
||||||
|
buf.rewind();
|
||||||
|
if (!check(crc0, crc2)) break;
|
||||||
|
}
|
||||||
|
report("CRCs", crc0, crc2);
|
||||||
|
|
||||||
|
System.out.println("-------------------------------------------------------");
|
||||||
|
}
|
||||||
|
|
||||||
|
private static void report(String s, Checksum crc0, Checksum crc1) {
|
||||||
|
System.out.printf("%s: crc0 = %08x, crc1 = %08x\n",
|
||||||
|
s, crc0.getValue(), crc1.getValue());
|
||||||
|
}
|
||||||
|
|
||||||
|
private static boolean check(Checksum crc0, Checksum crc1) {
|
||||||
|
if (crc0.getValue() != crc1.getValue()) {
|
||||||
|
System.err.printf("ERROR: crc0 = %08x, crc1 = %08x\n",
|
||||||
|
crc0.getValue(), crc1.getValue());
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
private static byte[] initializedBytes(int M, int offset) {
|
||||||
|
byte[] bytes = new byte[M + offset];
|
||||||
|
for (int i = 0; i < offset; i++) {
|
||||||
|
bytes[i] = (byte) i;
|
||||||
|
}
|
||||||
|
for (int i = offset; i < bytes.length; i++) {
|
||||||
|
bytes[i] = (byte) (i - offset);
|
||||||
|
}
|
||||||
|
return bytes;
|
||||||
|
}
|
||||||
|
|
||||||
|
private static void test_multi(int iters) {
|
||||||
|
int len1 = 8; // the 8B/iteration loop
|
||||||
|
int len2 = 32; // the 32B/iteration loop
|
||||||
|
int len3 = 4096; // the 4KB/iteration loop
|
||||||
|
|
||||||
|
byte[] b = initializedBytes(len3*16, 0);
|
||||||
|
int[] offsets = { 0, 1, 2, 3, 4, 5, 6, 7, 8, 16, 32, 64, 128, 256, 512 };
|
||||||
|
int[] sizes = { 0, 1, 2, 3, 4, 5, 6, 7,
|
||||||
|
len1, len1+1, len1+2, len1+3, len1+4, len1+5, len1+6, len1+7,
|
||||||
|
len1*2, len1*2+1, len1*2+3, len1*2+5, len1*2+7,
|
||||||
|
len2, len2+1, len2+3, len2+5, len2+7,
|
||||||
|
len2*2, len2*4, len2*8, len2*16, len2*32, len2*64,
|
||||||
|
len3, len3+1, len3+3, len3+5, len3+7,
|
||||||
|
len3*2, len3*4, len3*8,
|
||||||
|
len1+len2, len1+len2+1, len1+len2+3, len1+len2+5, len1+len2+7,
|
||||||
|
len1+len3, len1+len3+1, len1+len3+3, len1+len3+5, len1+len3+7,
|
||||||
|
len2+len3, len2+len3+1, len2+len3+3, len2+len3+5, len2+len3+7,
|
||||||
|
len1+len2+len3, len1+len2+len3+1, len1+len2+len3+3,
|
||||||
|
len1+len2+len3+5, len1+len2+len3+7,
|
||||||
|
(len1+len2+len3)*2, (len1+len2+len3)*2+1, (len1+len2+len3)*2+3,
|
||||||
|
(len1+len2+len3)*2+5, (len1+len2+len3)*2+7,
|
||||||
|
(len1+len2+len3)*3, (len1+len2+len3)*3-1, (len1+len2+len3)*3-3,
|
||||||
|
(len1+len2+len3)*3-5, (len1+len2+len3)*3-7 };
|
||||||
|
CRC32[] crc0 = new CRC32[offsets.length*sizes.length];
|
||||||
|
CRC32[] crc1 = new CRC32[offsets.length*sizes.length];
|
||||||
|
int i, j, k;
|
||||||
|
|
||||||
|
System.out.printf("testing %d cases ...\n", offsets.length*sizes.length);
|
||||||
|
|
||||||
|
/* set the result from interpreter as reference */
|
||||||
|
for (i = 0; i < offsets.length; i++) {
|
||||||
|
for (j = 0; j < sizes.length; j++) {
|
||||||
|
crc0[i*sizes.length + j] = new CRC32();
|
||||||
|
crc1[i*sizes.length + j] = new CRC32();
|
||||||
|
crc0[i*sizes.length + j].update(b, offsets[i], sizes[j]);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/* warm up the JIT compiler and get result */
|
||||||
|
for (k = 0; k < iters; k++) {
|
||||||
|
for (i = 0; i < offsets.length; i++) {
|
||||||
|
for (j = 0; j < sizes.length; j++) {
|
||||||
|
crc1[i*sizes.length + j].reset();
|
||||||
|
crc1[i*sizes.length + j].update(b, offsets[i], sizes[j]);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/* check correctness */
|
||||||
|
for (i = 0; i < offsets.length; i++) {
|
||||||
|
for (j = 0; j < sizes.length; j++) {
|
||||||
|
if (!check(crc0[i*sizes.length + j], crc1[i*sizes.length + j])) {
|
||||||
|
System.out.printf("offsets[%d] = %d", i, offsets[i]);
|
||||||
|
System.out.printf("\tsizes[%d] = %d\n", j, sizes[j]);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
Loading…
x
Reference in New Issue
Block a user