8175369: [ppc] Provide intrinsic implementation for CRC32C
Reviewed-by: mdoerr, simonis, kvn
This commit is contained in:
parent
99a554c5ad
commit
0171aad88e
@ -1,6 +1,6 @@
|
||||
/*
|
||||
* Copyright (c) 2000, 2016, Oracle and/or its affiliates. All rights reserved.
|
||||
* Copyright (c) 2012, 2016 SAP SE. All rights reserved.
|
||||
* Copyright (c) 2000, 2017, Oracle and/or its affiliates. All rights reserved.
|
||||
* Copyright (c) 2012, 2017, SAP SE. All rights reserved.
|
||||
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
||||
*
|
||||
* This code is free software; you can redistribute it and/or modify it
|
||||
@ -3177,9 +3177,8 @@ void LIR_Assembler::emit_updatecrc32(LIR_OpUpdateCRC32* op) {
|
||||
assert_different_registers(val, crc, res);
|
||||
|
||||
__ load_const_optimized(res, StubRoutines::crc_table_addr(), R0);
|
||||
__ nand(crc, crc, crc); // ~crc
|
||||
__ update_byte_crc32(crc, val, res);
|
||||
__ nand(res, crc, crc); // ~crc
|
||||
__ kernel_crc32_singleByteReg(crc, val, res, true);
|
||||
__ mr(res, crc);
|
||||
}
|
||||
|
||||
#undef __
|
||||
|
@ -1,6 +1,6 @@
|
||||
/*
|
||||
* Copyright (c) 2005, 2016, Oracle and/or its affiliates. All rights reserved.
|
||||
* Copyright (c) 2012, 2015 SAP SE. All rights reserved.
|
||||
* Copyright (c) 2005, 2017, Oracle and/or its affiliates. All rights reserved.
|
||||
* Copyright (c) 2012, 2017, SAP SE. All rights reserved.
|
||||
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
||||
*
|
||||
* This code is free software; you can redistribute it and/or modify it
|
||||
@ -63,18 +63,6 @@ void LIRItem::load_nonconstant() {
|
||||
}
|
||||
|
||||
|
||||
inline void load_int_as_long(LIR_List *ll, LIRItem &li, LIR_Opr dst) {
|
||||
LIR_Opr r = li.value()->operand();
|
||||
if (r->is_register()) {
|
||||
LIR_Opr dst_l = FrameMap::as_long_opr(dst->as_register());
|
||||
ll->convert(Bytecodes::_i2l, li.result(), dst_l); // Convert.
|
||||
} else {
|
||||
// Constants or memory get loaded with sign extend on this platform.
|
||||
ll->move(li.result(), dst);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
//--------------------------------------------------------------
|
||||
// LIRGenerator
|
||||
//--------------------------------------------------------------
|
||||
@ -1419,10 +1407,9 @@ void LIRGenerator::do_update_CRC32(Intrinsic* x) {
|
||||
arg2 = cc->at(1),
|
||||
arg3 = cc->at(2);
|
||||
|
||||
// CCallingConventionRequiresIntsAsLongs
|
||||
crc.load_item_force(arg1); // We skip int->long conversion here, because CRC32 stub doesn't care about high bits.
|
||||
__ leal(LIR_OprFact::address(a), arg2);
|
||||
load_int_as_long(gen()->lir(), len, arg3);
|
||||
len.load_item_force(arg3); // We skip int->long conversion here, , because CRC32 stub expects int.
|
||||
|
||||
__ call_runtime_leaf(StubRoutines::updateBytesCRC32(), LIR_OprFact::illegalOpr, result_reg, cc->args());
|
||||
__ move(result_reg, result);
|
||||
@ -1434,6 +1421,66 @@ void LIRGenerator::do_update_CRC32(Intrinsic* x) {
|
||||
}
|
||||
}
|
||||
|
||||
void LIRGenerator::do_update_CRC32C(Intrinsic* x) {
|
||||
assert(UseCRC32CIntrinsics, "or should not be here");
|
||||
LIR_Opr result = rlock_result(x);
|
||||
|
||||
switch (x->id()) {
|
||||
case vmIntrinsics::_updateBytesCRC32C:
|
||||
case vmIntrinsics::_updateDirectByteBufferCRC32C: {
|
||||
bool is_updateBytes = (x->id() == vmIntrinsics::_updateBytesCRC32C);
|
||||
|
||||
LIRItem crc(x->argument_at(0), this);
|
||||
LIRItem buf(x->argument_at(1), this);
|
||||
LIRItem off(x->argument_at(2), this);
|
||||
LIRItem len(x->argument_at(3), this);
|
||||
buf.load_item();
|
||||
off.load_nonconstant();
|
||||
|
||||
LIR_Opr index = off.result();
|
||||
int offset = is_updateBytes ? arrayOopDesc::base_offset_in_bytes(T_BYTE) : 0;
|
||||
if (off.result()->is_constant()) {
|
||||
index = LIR_OprFact::illegalOpr;
|
||||
offset += off.result()->as_jint();
|
||||
}
|
||||
LIR_Opr base_op = buf.result();
|
||||
LIR_Address* a = NULL;
|
||||
|
||||
if (index->is_valid()) {
|
||||
LIR_Opr tmp = new_register(T_LONG);
|
||||
__ convert(Bytecodes::_i2l, index, tmp);
|
||||
index = tmp;
|
||||
__ add(index, LIR_OprFact::intptrConst(offset), index);
|
||||
a = new LIR_Address(base_op, index, T_BYTE);
|
||||
} else {
|
||||
a = new LIR_Address(base_op, offset, T_BYTE);
|
||||
}
|
||||
|
||||
BasicTypeList signature(3);
|
||||
signature.append(T_INT);
|
||||
signature.append(T_ADDRESS);
|
||||
signature.append(T_INT);
|
||||
CallingConvention* cc = frame_map()->c_calling_convention(&signature);
|
||||
const LIR_Opr result_reg = result_register_for(x->type());
|
||||
|
||||
LIR_Opr arg1 = cc->at(0),
|
||||
arg2 = cc->at(1),
|
||||
arg3 = cc->at(2);
|
||||
|
||||
crc.load_item_force(arg1); // We skip int->long conversion here, because CRC32 stub doesn't care about high bits.
|
||||
__ leal(LIR_OprFact::address(a), arg2);
|
||||
len.load_item_force(arg3); // We skip int->long conversion here, , because CRC32 stub expects int.
|
||||
|
||||
__ call_runtime_leaf(StubRoutines::updateBytesCRC32C(), LIR_OprFact::illegalOpr, result_reg, cc->args());
|
||||
__ move(result_reg, result);
|
||||
break;
|
||||
}
|
||||
default: {
|
||||
ShouldNotReachHere();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void LIRGenerator::do_FmaIntrinsic(Intrinsic* x) {
|
||||
assert(x->number_of_arguments() == 3, "wrong type");
|
||||
assert(UseFMA, "Needs FMA instructions support.");
|
||||
@ -1460,7 +1507,3 @@ void LIRGenerator::do_FmaIntrinsic(Intrinsic* x) {
|
||||
void LIRGenerator::do_vectorizedMismatch(Intrinsic* x) {
|
||||
fatal("vectorizedMismatch intrinsic is not implemented on this platform");
|
||||
}
|
||||
|
||||
void LIRGenerator::do_update_CRC32C(Intrinsic* x) {
|
||||
Unimplemented();
|
||||
}
|
||||
|
@ -1,6 +1,6 @@
|
||||
/*
|
||||
* Copyright (c) 1997, 2016, Oracle and/or its affiliates. All rights reserved.
|
||||
* Copyright (c) 2012, 2016 SAP SE. All rights reserved.
|
||||
* Copyright (c) 1997, 2017, Oracle and/or its affiliates. All rights reserved.
|
||||
* Copyright (c) 2012, 2017, SAP SE. All rights reserved.
|
||||
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
||||
*
|
||||
* This code is free software; you can redistribute it and/or modify it
|
||||
@ -4092,7 +4092,7 @@ void MacroAssembler::update_byte_crc32(Register crc, Register val, Register tabl
|
||||
* @param table register pointing to CRC table
|
||||
*/
|
||||
void MacroAssembler::update_byteLoop_crc32(Register crc, Register buf, Register len, Register table,
|
||||
Register data, bool loopAlignment, bool invertCRC) {
|
||||
Register data, bool loopAlignment) {
|
||||
assert_different_registers(crc, buf, len, table, data);
|
||||
|
||||
Label L_mainLoop, L_done;
|
||||
@ -4103,10 +4103,6 @@ void MacroAssembler::update_byteLoop_crc32(Register crc, Register buf, Register
|
||||
clrldi_(len, len, 32); // Enforce 32 bit. Anything to do?
|
||||
beq(CCR0, L_done);
|
||||
|
||||
if (invertCRC) {
|
||||
nand(crc, crc, crc); // ~c
|
||||
}
|
||||
|
||||
mtctr(len);
|
||||
align(mainLoop_alignment);
|
||||
BIND(L_mainLoop);
|
||||
@ -4115,10 +4111,6 @@ void MacroAssembler::update_byteLoop_crc32(Register crc, Register buf, Register
|
||||
update_byte_crc32(crc, data, table);
|
||||
bdnz(L_mainLoop); // Iterate.
|
||||
|
||||
if (invertCRC) {
|
||||
nand(crc, crc, crc); // ~c
|
||||
}
|
||||
|
||||
bind(L_done);
|
||||
}
|
||||
|
||||
@ -4175,7 +4167,8 @@ void MacroAssembler::update_1word_crc32(Register crc, Register buf, Register tab
|
||||
*/
|
||||
void MacroAssembler::kernel_crc32_2word(Register crc, Register buf, Register len, Register table,
|
||||
Register t0, Register t1, Register t2, Register t3,
|
||||
Register tc0, Register tc1, Register tc2, Register tc3) {
|
||||
Register tc0, Register tc1, Register tc2, Register tc3,
|
||||
bool invertCRC) {
|
||||
assert_different_registers(crc, buf, len, table);
|
||||
|
||||
Label L_mainLoop, L_tail;
|
||||
@ -4189,14 +4182,16 @@ void MacroAssembler::kernel_crc32_2word(Register crc, Register buf, Register len
|
||||
const int complexThreshold = 2*mainLoop_stepping;
|
||||
|
||||
// Don't test for len <= 0 here. This pathological case should not occur anyway.
|
||||
// Optimizing for it by adding a test and a branch seems to be a waste of CPU cycles.
|
||||
// The situation itself is detected and handled correctly by the conditional branches
|
||||
// following aghi(len, -stepping) and aghi(len, +stepping).
|
||||
// Optimizing for it by adding a test and a branch seems to be a waste of CPU cycles
|
||||
// for all well-behaved cases. The situation itself is detected and handled correctly
|
||||
// within update_byteLoop_crc32.
|
||||
assert(tailLoop_stepping == 1, "check tailLoop_stepping!");
|
||||
|
||||
BLOCK_COMMENT("kernel_crc32_2word {");
|
||||
|
||||
nand(crc, crc, crc); // ~c
|
||||
if (invertCRC) {
|
||||
nand(crc, crc, crc); // 1s complement of crc
|
||||
}
|
||||
|
||||
// Check for short (<mainLoop_stepping) buffer.
|
||||
cmpdi(CCR0, len, complexThreshold);
|
||||
@ -4217,7 +4212,7 @@ void MacroAssembler::kernel_crc32_2word(Register crc, Register buf, Register len
|
||||
blt(CCR0, L_tail); // For less than one mainloop_stepping left, do only tail processing
|
||||
mr(len, tmp); // remaining bytes for main loop (>=mainLoop_stepping is guaranteed).
|
||||
}
|
||||
update_byteLoop_crc32(crc, buf, tmp2, table, data, false, false);
|
||||
update_byteLoop_crc32(crc, buf, tmp2, table, data, false);
|
||||
}
|
||||
|
||||
srdi(tmp2, len, log_stepping); // #iterations for mainLoop
|
||||
@ -4253,9 +4248,11 @@ void MacroAssembler::kernel_crc32_2word(Register crc, Register buf, Register len
|
||||
|
||||
// Process last few (<complexThreshold) bytes of buffer.
|
||||
BIND(L_tail);
|
||||
update_byteLoop_crc32(crc, buf, len, table, data, false, false);
|
||||
update_byteLoop_crc32(crc, buf, len, table, data, false);
|
||||
|
||||
nand(crc, crc, crc); // ~c
|
||||
if (invertCRC) {
|
||||
nand(crc, crc, crc); // 1s complement of crc
|
||||
}
|
||||
BLOCK_COMMENT("} kernel_crc32_2word");
|
||||
}
|
||||
|
||||
@ -4269,7 +4266,8 @@ void MacroAssembler::kernel_crc32_2word(Register crc, Register buf, Register len
|
||||
*/
|
||||
void MacroAssembler::kernel_crc32_1word(Register crc, Register buf, Register len, Register table,
|
||||
Register t0, Register t1, Register t2, Register t3,
|
||||
Register tc0, Register tc1, Register tc2, Register tc3) {
|
||||
Register tc0, Register tc1, Register tc2, Register tc3,
|
||||
bool invertCRC) {
|
||||
assert_different_registers(crc, buf, len, table);
|
||||
|
||||
Label L_mainLoop, L_tail;
|
||||
@ -4283,14 +4281,16 @@ void MacroAssembler::kernel_crc32_1word(Register crc, Register buf, Register len
|
||||
const int complexThreshold = 2*mainLoop_stepping;
|
||||
|
||||
// Don't test for len <= 0 here. This pathological case should not occur anyway.
|
||||
// Optimizing for it by adding a test and a branch seems to be a waste of CPU cycles.
|
||||
// The situation itself is detected and handled correctly by the conditional branches
|
||||
// following aghi(len, -stepping) and aghi(len, +stepping).
|
||||
// Optimizing for it by adding a test and a branch seems to be a waste of CPU cycles
|
||||
// for all well-behaved cases. The situation itself is detected and handled correctly
|
||||
// within update_byteLoop_crc32.
|
||||
assert(tailLoop_stepping == 1, "check tailLoop_stepping!");
|
||||
|
||||
BLOCK_COMMENT("kernel_crc32_1word {");
|
||||
|
||||
nand(crc, crc, crc); // ~c
|
||||
if (invertCRC) {
|
||||
nand(crc, crc, crc); // 1s complement of crc
|
||||
}
|
||||
|
||||
// Check for short (<mainLoop_stepping) buffer.
|
||||
cmpdi(CCR0, len, complexThreshold);
|
||||
@ -4311,7 +4311,7 @@ void MacroAssembler::kernel_crc32_1word(Register crc, Register buf, Register len
|
||||
blt(CCR0, L_tail); // For less than one mainloop_stepping left, do only tail processing
|
||||
mr(len, tmp); // remaining bytes for main loop (>=mainLoop_stepping is guaranteed).
|
||||
}
|
||||
update_byteLoop_crc32(crc, buf, tmp2, table, data, false, false);
|
||||
update_byteLoop_crc32(crc, buf, tmp2, table, data, false);
|
||||
}
|
||||
|
||||
srdi(tmp2, len, log_stepping); // #iterations for mainLoop
|
||||
@ -4346,9 +4346,11 @@ void MacroAssembler::kernel_crc32_1word(Register crc, Register buf, Register len
|
||||
|
||||
// Process last few (<complexThreshold) bytes of buffer.
|
||||
BIND(L_tail);
|
||||
update_byteLoop_crc32(crc, buf, len, table, data, false, false);
|
||||
update_byteLoop_crc32(crc, buf, len, table, data, false);
|
||||
|
||||
nand(crc, crc, crc); // ~c
|
||||
if (invertCRC) {
|
||||
nand(crc, crc, crc); // 1s complement of crc
|
||||
}
|
||||
BLOCK_COMMENT("} kernel_crc32_1word");
|
||||
}
|
||||
|
||||
@ -4361,16 +4363,24 @@ void MacroAssembler::kernel_crc32_1word(Register crc, Register buf, Register len
|
||||
* Uses R7_ARG5, R8_ARG6 as work registers.
|
||||
*/
|
||||
void MacroAssembler::kernel_crc32_1byte(Register crc, Register buf, Register len, Register table,
|
||||
Register t0, Register t1, Register t2, Register t3) {
|
||||
Register t0, Register t1, Register t2, Register t3,
|
||||
bool invertCRC) {
|
||||
assert_different_registers(crc, buf, len, table);
|
||||
|
||||
Register data = t0; // Holds the current byte to be folded into crc.
|
||||
|
||||
BLOCK_COMMENT("kernel_crc32_1byte {");
|
||||
|
||||
// Process all bytes in a single-byte loop.
|
||||
update_byteLoop_crc32(crc, buf, len, table, data, true, true);
|
||||
if (invertCRC) {
|
||||
nand(crc, crc, crc); // 1s complement of crc
|
||||
}
|
||||
|
||||
// Process all bytes in a single-byte loop.
|
||||
update_byteLoop_crc32(crc, buf, len, table, data, true);
|
||||
|
||||
if (invertCRC) {
|
||||
nand(crc, crc, crc); // 1s complement of crc
|
||||
}
|
||||
BLOCK_COMMENT("} kernel_crc32_1byte");
|
||||
}
|
||||
|
||||
@ -4388,7 +4398,8 @@ void MacroAssembler::kernel_crc32_1byte(Register crc, Register buf, Register len
|
||||
*/
|
||||
void MacroAssembler::kernel_crc32_1word_vpmsumd(Register crc, Register buf, Register len, Register table,
|
||||
Register constants, Register barretConstants,
|
||||
Register t0, Register t1, Register t2, Register t3, Register t4) {
|
||||
Register t0, Register t1, Register t2, Register t3, Register t4,
|
||||
bool invertCRC) {
|
||||
assert_different_registers(crc, buf, len, table);
|
||||
|
||||
Label L_alignedHead, L_tail, L_alignTail, L_start, L_end;
|
||||
@ -4406,13 +4417,15 @@ void MacroAssembler::kernel_crc32_1word_vpmsumd(Register crc, Register buf, Regi
|
||||
Register tc0 = t4;
|
||||
Register tc1 = constants;
|
||||
Register tc2 = barretConstants;
|
||||
kernel_crc32_1word(crc, buf, len, table,t0, t1, t2, t3, tc0, tc1, tc2, table);
|
||||
kernel_crc32_1word(crc, buf, len, table,t0, t1, t2, t3, tc0, tc1, tc2, table, invertCRC);
|
||||
b(L_end);
|
||||
|
||||
BIND(L_start);
|
||||
|
||||
// 2. ~c
|
||||
nand(crc, crc, crc);
|
||||
if (invertCRC) {
|
||||
nand(crc, crc, crc); // 1s complement of crc
|
||||
}
|
||||
|
||||
// 3. calculate from 0 to first 128bit-aligned address
|
||||
clrldi_(prealign, buf, 57);
|
||||
@ -4421,7 +4434,7 @@ void MacroAssembler::kernel_crc32_1word_vpmsumd(Register crc, Register buf, Regi
|
||||
subfic(prealign, prealign, 128);
|
||||
|
||||
subf(len, prealign, len);
|
||||
update_byteLoop_crc32(crc, buf, prealign, table, t2, false, false);
|
||||
update_byteLoop_crc32(crc, buf, prealign, table, t2, false);
|
||||
|
||||
// 4. calculate from first 128bit-aligned address to last 128bit-aligned address
|
||||
BIND(L_alignedHead);
|
||||
@ -4436,12 +4449,14 @@ void MacroAssembler::kernel_crc32_1word_vpmsumd(Register crc, Register buf, Regi
|
||||
cmpdi(CCR0, postalign, 0);
|
||||
beq(CCR0, L_tail);
|
||||
|
||||
update_byteLoop_crc32(crc, buf, postalign, table, t2, false, false);
|
||||
update_byteLoop_crc32(crc, buf, postalign, table, t2, false);
|
||||
|
||||
BIND(L_tail);
|
||||
|
||||
// 6. ~c
|
||||
nand(crc, crc, crc);
|
||||
if (invertCRC) {
|
||||
nand(crc, crc, crc); // 1s complement of crc
|
||||
}
|
||||
|
||||
BIND(L_end);
|
||||
|
||||
@ -4933,16 +4948,35 @@ void MacroAssembler::kernel_crc32_1word_aligned(Register crc, Register buf, Regi
|
||||
offsetInt -= 8; ld(R31, offsetInt, R1_SP);
|
||||
}
|
||||
|
||||
void MacroAssembler::kernel_crc32_singleByte(Register crc, Register buf, Register len, Register table, Register tmp) {
|
||||
void MacroAssembler::kernel_crc32_singleByte(Register crc, Register buf, Register len, Register table, Register tmp, bool invertCRC) {
|
||||
assert_different_registers(crc, buf, /* len, not used!! */ table, tmp);
|
||||
|
||||
BLOCK_COMMENT("kernel_crc32_singleByte:");
|
||||
nand(crc, crc, crc); // ~c
|
||||
if (invertCRC) {
|
||||
nand(crc, crc, crc); // 1s complement of crc
|
||||
}
|
||||
|
||||
lbz(tmp, 0, buf); // Byte from buffer, zero-extended.
|
||||
lbz(tmp, 0, buf); // Byte from buffer, zero-extended.
|
||||
update_byte_crc32(crc, tmp, table);
|
||||
|
||||
nand(crc, crc, crc); // ~c
|
||||
if (invertCRC) {
|
||||
nand(crc, crc, crc); // 1s complement of crc
|
||||
}
|
||||
}
|
||||
|
||||
void MacroAssembler::kernel_crc32_singleByteReg(Register crc, Register val, Register table, bool invertCRC) {
|
||||
assert_different_registers(crc, val, table);
|
||||
|
||||
BLOCK_COMMENT("kernel_crc32_singleByteReg:");
|
||||
if (invertCRC) {
|
||||
nand(crc, crc, crc); // 1s complement of crc
|
||||
}
|
||||
|
||||
update_byte_crc32(crc, val, table);
|
||||
|
||||
if (invertCRC) {
|
||||
nand(crc, crc, crc); // 1s complement of crc
|
||||
}
|
||||
}
|
||||
|
||||
// dest_lo += src1 + src2
|
||||
|
@ -1,6 +1,6 @@
|
||||
/*
|
||||
* Copyright (c) 2002, 2016, Oracle and/or its affiliates. All rights reserved.
|
||||
* Copyright (c) 2012, 2016 SAP SE. All rights reserved.
|
||||
* Copyright (c) 2002, 2017, Oracle and/or its affiliates. All rights reserved.
|
||||
* Copyright (c) 2012, 2017, SAP SE. All rights reserved.
|
||||
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
||||
*
|
||||
* This code is free software; you can redistribute it and/or modify it
|
||||
@ -817,33 +817,47 @@ class MacroAssembler: public Assembler {
|
||||
Register tmp6, Register tmp7, Register tmp8, Register tmp9, Register tmp10,
|
||||
Register tmp11, Register tmp12, Register tmp13);
|
||||
|
||||
// CRC32 Intrinsics.
|
||||
// Emitters for CRC32 calculation.
|
||||
// A note on invertCRC:
|
||||
// Unfortunately, internal representation of crc differs between CRC32 and CRC32C.
|
||||
// CRC32 holds it's current crc value in the externally visible representation.
|
||||
// CRC32C holds it's current crc value in internal format, ready for updating.
|
||||
// Thus, the crc value must be bit-flipped before updating it in the CRC32 case.
|
||||
// In the CRC32C case, it must be bit-flipped when it is given to the outside world (getValue()).
|
||||
// The bool invertCRC parameter indicates whether bit-flipping is required before updates.
|
||||
void load_reverse_32(Register dst, Register src);
|
||||
int crc32_table_columns(Register table, Register tc0, Register tc1, Register tc2, Register tc3);
|
||||
void fold_byte_crc32(Register crc, Register val, Register table, Register tmp);
|
||||
void fold_8bit_crc32(Register crc, Register table, Register tmp);
|
||||
void update_byte_crc32(Register crc, Register val, Register table);
|
||||
void update_byteLoop_crc32(Register crc, Register buf, Register len, Register table,
|
||||
Register data, bool loopAlignment, bool invertCRC);
|
||||
Register data, bool loopAlignment);
|
||||
void update_1word_crc32(Register crc, Register buf, Register table, int bufDisp, int bufInc,
|
||||
Register t0, Register t1, Register t2, Register t3,
|
||||
Register tc0, Register tc1, Register tc2, Register tc3);
|
||||
void kernel_crc32_2word(Register crc, Register buf, Register len, Register table,
|
||||
Register t0, Register t1, Register t2, Register t3,
|
||||
Register tc0, Register tc1, Register tc2, Register tc3);
|
||||
Register tc0, Register tc1, Register tc2, Register tc3,
|
||||
bool invertCRC);
|
||||
void kernel_crc32_1word(Register crc, Register buf, Register len, Register table,
|
||||
Register t0, Register t1, Register t2, Register t3,
|
||||
Register tc0, Register tc1, Register tc2, Register tc3);
|
||||
Register tc0, Register tc1, Register tc2, Register tc3,
|
||||
bool invertCRC);
|
||||
void kernel_crc32_1byte(Register crc, Register buf, Register len, Register table,
|
||||
Register t0, Register t1, Register t2, Register t3);
|
||||
Register t0, Register t1, Register t2, Register t3,
|
||||
bool invertCRC);
|
||||
void kernel_crc32_1word_vpmsumd(Register crc, Register buf, Register len, Register table,
|
||||
Register constants, Register barretConstants,
|
||||
Register t0, Register t1, Register t2, Register t3, Register t4);
|
||||
Register t0, Register t1, Register t2, Register t3, Register t4,
|
||||
bool invertCRC);
|
||||
void kernel_crc32_1word_aligned(Register crc, Register buf, Register len,
|
||||
Register constants, Register barretConstants,
|
||||
Register t0, Register t1, Register t2);
|
||||
|
||||
void kernel_crc32_singleByte(Register crc, Register buf, Register len, Register table, Register tmp);
|
||||
void kernel_crc32_singleByte(Register crc, Register buf, Register len, Register table, Register tmp,
|
||||
bool invertCRC);
|
||||
void kernel_crc32_singleByteReg(Register crc, Register val, Register table,
|
||||
bool invertCRC);
|
||||
|
||||
//
|
||||
// Debugging
|
||||
|
@ -1,6 +1,6 @@
|
||||
/*
|
||||
* Copyright (c) 1997, 2016, Oracle and/or its affiliates. All rights reserved.
|
||||
* Copyright (c) 2012, 2016 SAP SE. All rights reserved.
|
||||
* Copyright (c) 1997, 2017, Oracle and/or its affiliates. All rights reserved.
|
||||
* Copyright (c) 2012, 2017, SAP SE. All rights reserved.
|
||||
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
||||
*
|
||||
* This code is free software; you can redistribute it and/or modify it
|
||||
@ -3276,6 +3276,36 @@ class StubGenerator: public StubCodeGenerator {
|
||||
return start;
|
||||
}
|
||||
|
||||
|
||||
// Compute CRC32/CRC32C function.
|
||||
void generate_CRC_updateBytes(const char* name, Register table, bool invertCRC) {
|
||||
|
||||
// arguments to kernel_crc32:
|
||||
const Register crc = R3_ARG1; // Current checksum, preset by caller or result from previous call.
|
||||
const Register data = R4_ARG2; // source byte array
|
||||
const Register dataLen = R5_ARG3; // #bytes to process
|
||||
|
||||
const Register t0 = R2;
|
||||
const Register t1 = R7;
|
||||
const Register t2 = R8;
|
||||
const Register t3 = R9;
|
||||
const Register tc0 = R10;
|
||||
const Register tc1 = R11;
|
||||
const Register tc2 = R12;
|
||||
|
||||
BLOCK_COMMENT("Stub body {");
|
||||
assert_different_registers(crc, data, dataLen, table);
|
||||
|
||||
__ kernel_crc32_1word(crc, data, dataLen, table, t0, t1, t2, t3, tc0, tc1, tc2, table, invertCRC);
|
||||
|
||||
BLOCK_COMMENT("return");
|
||||
__ mr_if_needed(R3_RET, crc); // Updated crc is function result. No copying required (R3_ARG1 == R3_RET).
|
||||
__ blr();
|
||||
|
||||
BLOCK_COMMENT("} Stub body");
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Arguments:
|
||||
*
|
||||
@ -3296,14 +3326,14 @@ class StubGenerator: public StubCodeGenerator {
|
||||
StubCodeMark mark(this, "StubRoutines", name);
|
||||
address start = __ function_entry(); // Remember stub start address (is rtn value).
|
||||
|
||||
const Register table = R6; // crc table address
|
||||
|
||||
#ifdef VM_LITTLE_ENDIAN
|
||||
// arguments to kernel_crc32:
|
||||
const Register crc = R3_ARG1; // Current checksum, preset by caller or result from previous call.
|
||||
const Register data = R4_ARG2; // source byte array
|
||||
const Register dataLen = R5_ARG3; // #bytes to process
|
||||
|
||||
const Register table = R6; // crc table address
|
||||
|
||||
#ifdef VM_LITTLE_ENDIAN
|
||||
if (VM_Version::has_vpmsumb()) {
|
||||
const Register constants = R2; // constants address
|
||||
const Register bconstants = R8; // barret table address
|
||||
@ -3321,7 +3351,7 @@ class StubGenerator: public StubCodeGenerator {
|
||||
StubRoutines::ppc64::generate_load_crc_constants_addr(_masm, constants);
|
||||
StubRoutines::ppc64::generate_load_crc_barret_constants_addr(_masm, bconstants);
|
||||
|
||||
__ kernel_crc32_1word_vpmsumd(crc, data, dataLen, table, constants, bconstants, t0, t1, t2, t3, t4);
|
||||
__ kernel_crc32_1word_vpmsumd(crc, data, dataLen, table, constants, bconstants, t0, t1, t2, t3, t4, true);
|
||||
|
||||
BLOCK_COMMENT("return");
|
||||
__ mr_if_needed(R3_RET, crc); // Updated crc is function result. No copying required (R3_ARG1 == R3_RET).
|
||||
@ -3331,31 +3361,79 @@ class StubGenerator: public StubCodeGenerator {
|
||||
} else
|
||||
#endif
|
||||
{
|
||||
const Register t0 = R2;
|
||||
const Register t1 = R7;
|
||||
const Register t2 = R8;
|
||||
const Register t3 = R9;
|
||||
const Register tc0 = R10;
|
||||
const Register tc1 = R11;
|
||||
const Register tc2 = R12;
|
||||
StubRoutines::ppc64::generate_load_crc_table_addr(_masm, table);
|
||||
generate_CRC_updateBytes(name, table, true);
|
||||
}
|
||||
|
||||
return start;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Arguments:
|
||||
*
|
||||
* Inputs:
|
||||
* R3_ARG1 - int crc
|
||||
* R4_ARG2 - byte* buf
|
||||
* R5_ARG3 - int length (of buffer)
|
||||
*
|
||||
* scratch:
|
||||
* R2, R6-R12
|
||||
*
|
||||
* Ouput:
|
||||
* R3_RET - int crc result
|
||||
*/
|
||||
// Compute CRC32C function.
|
||||
address generate_CRC32C_updateBytes(const char* name) {
|
||||
__ align(CodeEntryAlignment);
|
||||
StubCodeMark mark(this, "StubRoutines", name);
|
||||
address start = __ function_entry(); // Remember stub start address (is rtn value).
|
||||
|
||||
const Register table = R6; // crc table address
|
||||
|
||||
#if 0 // no vector support yet for CRC32C
|
||||
#ifdef VM_LITTLE_ENDIAN
|
||||
// arguments to kernel_crc32:
|
||||
const Register crc = R3_ARG1; // Current checksum, preset by caller or result from previous call.
|
||||
const Register data = R4_ARG2; // source byte array
|
||||
const Register dataLen = R5_ARG3; // #bytes to process
|
||||
|
||||
if (VM_Version::has_vpmsumb()) {
|
||||
const Register constants = R2; // constants address
|
||||
const Register bconstants = R8; // barret table address
|
||||
|
||||
const Register t0 = R9;
|
||||
const Register t1 = R10;
|
||||
const Register t2 = R11;
|
||||
const Register t3 = R12;
|
||||
const Register t4 = R7;
|
||||
|
||||
BLOCK_COMMENT("Stub body {");
|
||||
assert_different_registers(crc, data, dataLen, table);
|
||||
|
||||
StubRoutines::ppc64::generate_load_crc_table_addr(_masm, table);
|
||||
StubRoutines::ppc64::generate_load_crc32c_table_addr(_masm, table);
|
||||
StubRoutines::ppc64::generate_load_crc32c_constants_addr(_masm, constants);
|
||||
StubRoutines::ppc64::generate_load_crc32c_barret_constants_addr(_masm, bconstants);
|
||||
|
||||
__ kernel_crc32_1word(crc, data, dataLen, table, t0, t1, t2, t3, tc0, tc1, tc2, table);
|
||||
__ kernel_crc32_1word_vpmsumd(crc, data, dataLen, table, constants, bconstants, t0, t1, t2, t3, t4, false);
|
||||
|
||||
BLOCK_COMMENT("return");
|
||||
__ mr_if_needed(R3_RET, crc); // Updated crc is function result. No copying required (R3_ARG1 == R3_RET).
|
||||
__ blr();
|
||||
|
||||
BLOCK_COMMENT("} Stub body");
|
||||
} else
|
||||
#endif
|
||||
#endif
|
||||
{
|
||||
StubRoutines::ppc64::generate_load_crc32c_table_addr(_masm, table);
|
||||
generate_CRC_updateBytes(name, table, false);
|
||||
}
|
||||
|
||||
return start;
|
||||
}
|
||||
|
||||
|
||||
// Initialization
|
||||
void generate_initial() {
|
||||
// Generates all stubs and initializes the entry points
|
||||
@ -3383,6 +3461,12 @@ class StubGenerator: public StubCodeGenerator {
|
||||
StubRoutines::_crc_table_adr = (address)StubRoutines::ppc64::_crc_table;
|
||||
StubRoutines::_updateBytesCRC32 = generate_CRC32_updateBytes("CRC32_updateBytes");
|
||||
}
|
||||
|
||||
// CRC32C Intrinsics.
|
||||
if (UseCRC32CIntrinsics) {
|
||||
StubRoutines::_crc32c_table_addr = (address)StubRoutines::ppc64::_crc32c_table;
|
||||
StubRoutines::_updateBytesCRC32C = generate_CRC32C_updateBytes("CRC32C_updateBytes");
|
||||
}
|
||||
}
|
||||
|
||||
void generate_all() {
|
||||
|
@ -1,6 +1,6 @@
|
||||
/*
|
||||
* Copyright (c) 2002, 2016, Oracle and/or its affiliates. All rights reserved.
|
||||
* Copyright (c) 2012, 2016 SAP SE. All rights reserved.
|
||||
* Copyright (c) 2002, 2017, Oracle and/or its affiliates. All rights reserved.
|
||||
* Copyright (c) 2012, 2017, SAP SE. All rights reserved.
|
||||
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
||||
*
|
||||
* This code is free software; you can redistribute it and/or modify it
|
||||
@ -55,13 +55,16 @@ class ppc64 {
|
||||
|
||||
// CRC32 Intrinsics.
|
||||
static juint _crc_table[CRC32_TABLES][CRC32_COLUMN_SIZE];
|
||||
static juint _crc32c_table[CRC32_TABLES][CRC32_COLUMN_SIZE];
|
||||
static juint* _constants;
|
||||
static juint* _barret_constants;
|
||||
|
||||
public:
|
||||
|
||||
// CRC32 Intrinsics.
|
||||
static void generate_load_table_addr(MacroAssembler* masm, Register table, address table_addr, uint64_t table_contents);
|
||||
static void generate_load_crc_table_addr(MacroAssembler* masm, Register table);
|
||||
static void generate_load_crc32c_table_addr(MacroAssembler* masm, Register table);
|
||||
static void generate_load_crc_constants_addr(MacroAssembler* masm, Register table);
|
||||
static void generate_load_crc_barret_constants_addr(MacroAssembler* masm, Register table);
|
||||
static juint* generate_crc_constants();
|
||||
|
File diff suppressed because it is too large
Load Diff
@ -1,6 +1,6 @@
|
||||
/*
|
||||
* Copyright (c) 2014, 2017, Oracle and/or its affiliates. All rights reserved.
|
||||
* Copyright (c) 2015, 2016 SAP SE. All rights reserved.
|
||||
* Copyright (c) 2015, 2017, SAP SE. All rights reserved.
|
||||
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
||||
*
|
||||
* This code is free software; you can redistribute it and/or modify it
|
||||
@ -1895,7 +1895,7 @@ address TemplateInterpreterGenerator::generate_CRC32_update_entry() {
|
||||
__ lwz(crc, 2*wordSize, argP); // Current crc state, zero extend to 64 bit to have a clean register.
|
||||
|
||||
StubRoutines::ppc64::generate_load_crc_table_addr(_masm, table);
|
||||
__ kernel_crc32_singleByte(crc, data, dataLen, table, tmp);
|
||||
__ kernel_crc32_singleByte(crc, data, dataLen, table, tmp, true);
|
||||
|
||||
// Restore caller sp for c2i case and return.
|
||||
__ mr(R1_SP, R21_sender_SP); // Cut the stack back to where the caller started.
|
||||
@ -1911,6 +1911,10 @@ address TemplateInterpreterGenerator::generate_CRC32_update_entry() {
|
||||
return NULL;
|
||||
}
|
||||
|
||||
// TODO: generate_CRC32_updateBytes_entry and generate_CRC32C_updateBytes_entry are identical
|
||||
// except for using different crc tables and some block comment strings.
|
||||
// We should provide a common implementation.
|
||||
|
||||
// CRC32 Intrinsics.
|
||||
/**
|
||||
* Method entry for static native methods:
|
||||
@ -1987,7 +1991,7 @@ address TemplateInterpreterGenerator::generate_CRC32_updateBytes_entry(AbstractI
|
||||
// Performance measurements show the 1word and 2word variants to be almost equivalent,
|
||||
// with very light advantages for the 1word variant. We chose the 1word variant for
|
||||
// code compactness.
|
||||
__ kernel_crc32_1word(crc, data, dataLen, table, t0, t1, t2, t3, tc0, tc1, tc2, tc3);
|
||||
__ kernel_crc32_1word(crc, data, dataLen, table, t0, t1, t2, t3, tc0, tc1, tc2, tc3, true);
|
||||
|
||||
// Restore caller sp for c2i case and return.
|
||||
__ mr(R1_SP, R21_sender_SP); // Cut the stack back to where the caller started.
|
||||
@ -2003,8 +2007,84 @@ address TemplateInterpreterGenerator::generate_CRC32_updateBytes_entry(AbstractI
|
||||
return NULL;
|
||||
}
|
||||
|
||||
// Not supported
|
||||
// CRC32C Intrinsics.
|
||||
/**
|
||||
* Method entry for static native methods:
|
||||
* int java.util.zip.CRC32C.updateBytes( int crc, byte[] b, int off, int len)
|
||||
* int java.util.zip.CRC32C.updateDirectByteBuffer(int crc, long* buf, int off, int len)
|
||||
**/
|
||||
address TemplateInterpreterGenerator::generate_CRC32C_updateBytes_entry(AbstractInterpreter::MethodKind kind) {
|
||||
if (UseCRC32CIntrinsics) {
|
||||
address start = __ pc(); // Remember stub start address (is rtn value).
|
||||
|
||||
// We don't generate local frame and don't align stack because
|
||||
// we not even call stub code (we generate the code inline)
|
||||
// and there is no safepoint on this path.
|
||||
|
||||
// Load parameters.
|
||||
// Z_esp is callers operand stack pointer, i.e. it points to the parameters.
|
||||
const Register argP = R15_esp;
|
||||
const Register crc = R3_ARG1; // crc value
|
||||
const Register data = R4_ARG2; // address of java byte array
|
||||
const Register dataLen = R5_ARG3; // source data len
|
||||
const Register table = R6_ARG4; // address of crc32c table
|
||||
|
||||
const Register t0 = R9; // scratch registers for crc calculation
|
||||
const Register t1 = R10;
|
||||
const Register t2 = R11;
|
||||
const Register t3 = R12;
|
||||
|
||||
const Register tc0 = R2; // registers to hold pre-calculated column addresses
|
||||
const Register tc1 = R7;
|
||||
const Register tc2 = R8;
|
||||
const Register tc3 = table; // table address is reconstructed at the end of kernel_crc32_* emitters
|
||||
|
||||
const Register tmp = t0; // Only used very locally to calculate byte buffer address.
|
||||
|
||||
// Arguments are reversed on java expression stack.
|
||||
// Calculate address of start element.
|
||||
if (kind == Interpreter::java_util_zip_CRC32C_updateDirectByteBuffer) { // Used for "updateDirectByteBuffer".
|
||||
BLOCK_COMMENT("CRC32C_updateDirectByteBuffer {");
|
||||
// crc @ (SP + 5W) (32bit)
|
||||
// buf @ (SP + 3W) (64bit ptr to long array)
|
||||
// off @ (SP + 2W) (32bit)
|
||||
// dataLen @ (SP + 1W) (32bit)
|
||||
// data = buf + off
|
||||
__ ld( data, 3*wordSize, argP); // start of byte buffer
|
||||
__ lwa( tmp, 2*wordSize, argP); // byte buffer offset
|
||||
__ lwa( dataLen, 1*wordSize, argP); // #bytes to process
|
||||
__ lwz( crc, 5*wordSize, argP); // current crc state
|
||||
__ add( data, data, tmp); // Add byte buffer offset.
|
||||
} else { // Used for "updateBytes update".
|
||||
BLOCK_COMMENT("CRC32C_updateBytes {");
|
||||
// crc @ (SP + 4W) (32bit)
|
||||
// buf @ (SP + 3W) (64bit ptr to byte array)
|
||||
// off @ (SP + 2W) (32bit)
|
||||
// dataLen @ (SP + 1W) (32bit)
|
||||
// data = buf + off + base_offset
|
||||
__ ld( data, 3*wordSize, argP); // start of byte buffer
|
||||
__ lwa( tmp, 2*wordSize, argP); // byte buffer offset
|
||||
__ lwa( dataLen, 1*wordSize, argP); // #bytes to process
|
||||
__ add( data, data, tmp); // add byte buffer offset
|
||||
__ lwz( crc, 4*wordSize, argP); // current crc state
|
||||
__ addi(data, data, arrayOopDesc::base_offset_in_bytes(T_BYTE));
|
||||
}
|
||||
|
||||
StubRoutines::ppc64::generate_load_crc32c_table_addr(_masm, table);
|
||||
|
||||
// Performance measurements show the 1word and 2word variants to be almost equivalent,
|
||||
// with very light advantages for the 1word variant. We chose the 1word variant for
|
||||
// code compactness.
|
||||
__ kernel_crc32_1word(crc, data, dataLen, table, t0, t1, t2, t3, tc0, tc1, tc2, tc3, false);
|
||||
|
||||
// Restore caller sp for c2i case and return.
|
||||
__ mr(R1_SP, R21_sender_SP); // Cut the stack back to where the caller started.
|
||||
__ blr();
|
||||
|
||||
BLOCK_COMMENT("} CRC32C_update{Bytes|DirectByteBuffer}");
|
||||
return start;
|
||||
}
|
||||
|
||||
return NULL;
|
||||
}
|
||||
|
||||
|
@ -1,6 +1,6 @@
|
||||
/*
|
||||
* Copyright (c) 1997, 2016, Oracle and/or its affiliates. All rights reserved.
|
||||
* Copyright (c) 2012, 2016 SAP SE. All rights reserved.
|
||||
* Copyright (c) 1997, 2017, Oracle and/or its affiliates. All rights reserved.
|
||||
* Copyright (c) 2012, 2017, SAP SE. All rights reserved.
|
||||
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
||||
*
|
||||
* This code is free software; you can redistribute it and/or modify it
|
||||
@ -172,18 +172,27 @@ void VM_Version::initialize() {
|
||||
|
||||
assert(AllocatePrefetchStyle >= 0, "AllocatePrefetchStyle should be positive");
|
||||
|
||||
// Implementation does not use any of the vector instructions
|
||||
// available with Power8. Their exploitation is still pending.
|
||||
// If defined(VM_LITTLE_ENDIAN) and running on Power8 or newer hardware,
|
||||
// the implementation uses the vector instructions available with Power8.
|
||||
// In all other cases, the implementation uses only generally available instructions.
|
||||
if (!UseCRC32Intrinsics) {
|
||||
if (FLAG_IS_DEFAULT(UseCRC32Intrinsics)) {
|
||||
FLAG_SET_DEFAULT(UseCRC32Intrinsics, true);
|
||||
}
|
||||
}
|
||||
|
||||
if (UseCRC32CIntrinsics) {
|
||||
if (!FLAG_IS_DEFAULT(UseCRC32CIntrinsics))
|
||||
warning("CRC32C intrinsics are not available on this CPU");
|
||||
FLAG_SET_DEFAULT(UseCRC32CIntrinsics, false);
|
||||
// Implementation does not use any of the vector instructions available with Power8.
|
||||
// Their exploitation is still pending (aka "work in progress").
|
||||
if (!UseCRC32CIntrinsics) {
|
||||
if (FLAG_IS_DEFAULT(UseCRC32CIntrinsics)) {
|
||||
FLAG_SET_DEFAULT(UseCRC32CIntrinsics, true);
|
||||
}
|
||||
}
|
||||
|
||||
// TODO: Provide implementation.
|
||||
if (UseAdler32Intrinsics) {
|
||||
warning("Adler32Intrinsics not available on this CPU.");
|
||||
FLAG_SET_DEFAULT(UseAdler32Intrinsics, false);
|
||||
}
|
||||
|
||||
// The AES intrinsic stubs require AES instruction support.
|
||||
@ -245,11 +254,6 @@ void VM_Version::initialize() {
|
||||
FLAG_SET_DEFAULT(UseSHA512Intrinsics, false);
|
||||
}
|
||||
|
||||
if (UseAdler32Intrinsics) {
|
||||
warning("Adler32Intrinsics not available on this CPU.");
|
||||
FLAG_SET_DEFAULT(UseAdler32Intrinsics, false);
|
||||
}
|
||||
|
||||
if (FLAG_IS_DEFAULT(UseMultiplyToLenIntrinsic)) {
|
||||
UseMultiplyToLenIntrinsic = true;
|
||||
}
|
||||
|
@ -212,7 +212,7 @@ bool Compiler::is_intrinsic_supported(const methodHandle& method) {
|
||||
case vmIntrinsics::_updateCRC32:
|
||||
case vmIntrinsics::_updateBytesCRC32:
|
||||
case vmIntrinsics::_updateByteBufferCRC32:
|
||||
#if defined(SPARC) || defined(S390)
|
||||
#if defined(SPARC) || defined(S390) || defined(PPC64)
|
||||
case vmIntrinsics::_updateBytesCRC32C:
|
||||
case vmIntrinsics::_updateDirectByteBufferCRC32C:
|
||||
#endif
|
||||
|
Loading…
Reference in New Issue
Block a user