8175369: [ppc] Provide intrinsic implementation for CRC32C

Reviewed-by: mdoerr, simonis, kvn
This commit is contained in:
Lutz Schmidt 2017-03-08 17:01:13 -08:00
parent 99a554c5ad
commit 0171aad88e
10 changed files with 963 additions and 547 deletions

View File

@ -1,6 +1,6 @@
/* /*
* Copyright (c) 2000, 2016, Oracle and/or its affiliates. All rights reserved. * Copyright (c) 2000, 2017, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2012, 2016 SAP SE. All rights reserved. * Copyright (c) 2012, 2017, SAP SE. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
* *
* This code is free software; you can redistribute it and/or modify it * This code is free software; you can redistribute it and/or modify it
@ -3177,9 +3177,8 @@ void LIR_Assembler::emit_updatecrc32(LIR_OpUpdateCRC32* op) {
assert_different_registers(val, crc, res); assert_different_registers(val, crc, res);
__ load_const_optimized(res, StubRoutines::crc_table_addr(), R0); __ load_const_optimized(res, StubRoutines::crc_table_addr(), R0);
__ nand(crc, crc, crc); // ~crc __ kernel_crc32_singleByteReg(crc, val, res, true);
__ update_byte_crc32(crc, val, res); __ mr(res, crc);
__ nand(res, crc, crc); // ~crc
} }
#undef __ #undef __

View File

@ -1,6 +1,6 @@
/* /*
* Copyright (c) 2005, 2016, Oracle and/or its affiliates. All rights reserved. * Copyright (c) 2005, 2017, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2012, 2015 SAP SE. All rights reserved. * Copyright (c) 2012, 2017, SAP SE. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
* *
* This code is free software; you can redistribute it and/or modify it * This code is free software; you can redistribute it and/or modify it
@ -63,18 +63,6 @@ void LIRItem::load_nonconstant() {
} }
inline void load_int_as_long(LIR_List *ll, LIRItem &li, LIR_Opr dst) {
LIR_Opr r = li.value()->operand();
if (r->is_register()) {
LIR_Opr dst_l = FrameMap::as_long_opr(dst->as_register());
ll->convert(Bytecodes::_i2l, li.result(), dst_l); // Convert.
} else {
// Constants or memory get loaded with sign extend on this platform.
ll->move(li.result(), dst);
}
}
//-------------------------------------------------------------- //--------------------------------------------------------------
// LIRGenerator // LIRGenerator
//-------------------------------------------------------------- //--------------------------------------------------------------
@ -1419,10 +1407,9 @@ void LIRGenerator::do_update_CRC32(Intrinsic* x) {
arg2 = cc->at(1), arg2 = cc->at(1),
arg3 = cc->at(2); arg3 = cc->at(2);
// CCallingConventionRequiresIntsAsLongs
crc.load_item_force(arg1); // We skip int->long conversion here, because CRC32 stub doesn't care about high bits. crc.load_item_force(arg1); // We skip int->long conversion here, because CRC32 stub doesn't care about high bits.
__ leal(LIR_OprFact::address(a), arg2); __ leal(LIR_OprFact::address(a), arg2);
load_int_as_long(gen()->lir(), len, arg3); len.load_item_force(arg3); // We skip int->long conversion here, , because CRC32 stub expects int.
__ call_runtime_leaf(StubRoutines::updateBytesCRC32(), LIR_OprFact::illegalOpr, result_reg, cc->args()); __ call_runtime_leaf(StubRoutines::updateBytesCRC32(), LIR_OprFact::illegalOpr, result_reg, cc->args());
__ move(result_reg, result); __ move(result_reg, result);
@ -1434,6 +1421,66 @@ void LIRGenerator::do_update_CRC32(Intrinsic* x) {
} }
} }
void LIRGenerator::do_update_CRC32C(Intrinsic* x) {
assert(UseCRC32CIntrinsics, "or should not be here");
LIR_Opr result = rlock_result(x);
switch (x->id()) {
case vmIntrinsics::_updateBytesCRC32C:
case vmIntrinsics::_updateDirectByteBufferCRC32C: {
bool is_updateBytes = (x->id() == vmIntrinsics::_updateBytesCRC32C);
LIRItem crc(x->argument_at(0), this);
LIRItem buf(x->argument_at(1), this);
LIRItem off(x->argument_at(2), this);
LIRItem len(x->argument_at(3), this);
buf.load_item();
off.load_nonconstant();
LIR_Opr index = off.result();
int offset = is_updateBytes ? arrayOopDesc::base_offset_in_bytes(T_BYTE) : 0;
if (off.result()->is_constant()) {
index = LIR_OprFact::illegalOpr;
offset += off.result()->as_jint();
}
LIR_Opr base_op = buf.result();
LIR_Address* a = NULL;
if (index->is_valid()) {
LIR_Opr tmp = new_register(T_LONG);
__ convert(Bytecodes::_i2l, index, tmp);
index = tmp;
__ add(index, LIR_OprFact::intptrConst(offset), index);
a = new LIR_Address(base_op, index, T_BYTE);
} else {
a = new LIR_Address(base_op, offset, T_BYTE);
}
BasicTypeList signature(3);
signature.append(T_INT);
signature.append(T_ADDRESS);
signature.append(T_INT);
CallingConvention* cc = frame_map()->c_calling_convention(&signature);
const LIR_Opr result_reg = result_register_for(x->type());
LIR_Opr arg1 = cc->at(0),
arg2 = cc->at(1),
arg3 = cc->at(2);
crc.load_item_force(arg1); // We skip int->long conversion here, because CRC32 stub doesn't care about high bits.
__ leal(LIR_OprFact::address(a), arg2);
len.load_item_force(arg3); // We skip int->long conversion here, , because CRC32 stub expects int.
__ call_runtime_leaf(StubRoutines::updateBytesCRC32C(), LIR_OprFact::illegalOpr, result_reg, cc->args());
__ move(result_reg, result);
break;
}
default: {
ShouldNotReachHere();
}
}
}
void LIRGenerator::do_FmaIntrinsic(Intrinsic* x) { void LIRGenerator::do_FmaIntrinsic(Intrinsic* x) {
assert(x->number_of_arguments() == 3, "wrong type"); assert(x->number_of_arguments() == 3, "wrong type");
assert(UseFMA, "Needs FMA instructions support."); assert(UseFMA, "Needs FMA instructions support.");
@ -1460,7 +1507,3 @@ void LIRGenerator::do_FmaIntrinsic(Intrinsic* x) {
void LIRGenerator::do_vectorizedMismatch(Intrinsic* x) { void LIRGenerator::do_vectorizedMismatch(Intrinsic* x) {
fatal("vectorizedMismatch intrinsic is not implemented on this platform"); fatal("vectorizedMismatch intrinsic is not implemented on this platform");
} }
void LIRGenerator::do_update_CRC32C(Intrinsic* x) {
Unimplemented();
}

View File

@ -1,6 +1,6 @@
/* /*
* Copyright (c) 1997, 2016, Oracle and/or its affiliates. All rights reserved. * Copyright (c) 1997, 2017, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2012, 2016 SAP SE. All rights reserved. * Copyright (c) 2012, 2017, SAP SE. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
* *
* This code is free software; you can redistribute it and/or modify it * This code is free software; you can redistribute it and/or modify it
@ -4092,7 +4092,7 @@ void MacroAssembler::update_byte_crc32(Register crc, Register val, Register tabl
* @param table register pointing to CRC table * @param table register pointing to CRC table
*/ */
void MacroAssembler::update_byteLoop_crc32(Register crc, Register buf, Register len, Register table, void MacroAssembler::update_byteLoop_crc32(Register crc, Register buf, Register len, Register table,
Register data, bool loopAlignment, bool invertCRC) { Register data, bool loopAlignment) {
assert_different_registers(crc, buf, len, table, data); assert_different_registers(crc, buf, len, table, data);
Label L_mainLoop, L_done; Label L_mainLoop, L_done;
@ -4103,10 +4103,6 @@ void MacroAssembler::update_byteLoop_crc32(Register crc, Register buf, Register
clrldi_(len, len, 32); // Enforce 32 bit. Anything to do? clrldi_(len, len, 32); // Enforce 32 bit. Anything to do?
beq(CCR0, L_done); beq(CCR0, L_done);
if (invertCRC) {
nand(crc, crc, crc); // ~c
}
mtctr(len); mtctr(len);
align(mainLoop_alignment); align(mainLoop_alignment);
BIND(L_mainLoop); BIND(L_mainLoop);
@ -4115,10 +4111,6 @@ void MacroAssembler::update_byteLoop_crc32(Register crc, Register buf, Register
update_byte_crc32(crc, data, table); update_byte_crc32(crc, data, table);
bdnz(L_mainLoop); // Iterate. bdnz(L_mainLoop); // Iterate.
if (invertCRC) {
nand(crc, crc, crc); // ~c
}
bind(L_done); bind(L_done);
} }
@ -4175,7 +4167,8 @@ void MacroAssembler::update_1word_crc32(Register crc, Register buf, Register tab
*/ */
void MacroAssembler::kernel_crc32_2word(Register crc, Register buf, Register len, Register table, void MacroAssembler::kernel_crc32_2word(Register crc, Register buf, Register len, Register table,
Register t0, Register t1, Register t2, Register t3, Register t0, Register t1, Register t2, Register t3,
Register tc0, Register tc1, Register tc2, Register tc3) { Register tc0, Register tc1, Register tc2, Register tc3,
bool invertCRC) {
assert_different_registers(crc, buf, len, table); assert_different_registers(crc, buf, len, table);
Label L_mainLoop, L_tail; Label L_mainLoop, L_tail;
@ -4189,14 +4182,16 @@ void MacroAssembler::kernel_crc32_2word(Register crc, Register buf, Register len
const int complexThreshold = 2*mainLoop_stepping; const int complexThreshold = 2*mainLoop_stepping;
// Don't test for len <= 0 here. This pathological case should not occur anyway. // Don't test for len <= 0 here. This pathological case should not occur anyway.
// Optimizing for it by adding a test and a branch seems to be a waste of CPU cycles. // Optimizing for it by adding a test and a branch seems to be a waste of CPU cycles
// The situation itself is detected and handled correctly by the conditional branches // for all well-behaved cases. The situation itself is detected and handled correctly
// following aghi(len, -stepping) and aghi(len, +stepping). // within update_byteLoop_crc32.
assert(tailLoop_stepping == 1, "check tailLoop_stepping!"); assert(tailLoop_stepping == 1, "check tailLoop_stepping!");
BLOCK_COMMENT("kernel_crc32_2word {"); BLOCK_COMMENT("kernel_crc32_2word {");
nand(crc, crc, crc); // ~c if (invertCRC) {
nand(crc, crc, crc); // 1s complement of crc
}
// Check for short (<mainLoop_stepping) buffer. // Check for short (<mainLoop_stepping) buffer.
cmpdi(CCR0, len, complexThreshold); cmpdi(CCR0, len, complexThreshold);
@ -4217,7 +4212,7 @@ void MacroAssembler::kernel_crc32_2word(Register crc, Register buf, Register len
blt(CCR0, L_tail); // For less than one mainloop_stepping left, do only tail processing blt(CCR0, L_tail); // For less than one mainloop_stepping left, do only tail processing
mr(len, tmp); // remaining bytes for main loop (>=mainLoop_stepping is guaranteed). mr(len, tmp); // remaining bytes for main loop (>=mainLoop_stepping is guaranteed).
} }
update_byteLoop_crc32(crc, buf, tmp2, table, data, false, false); update_byteLoop_crc32(crc, buf, tmp2, table, data, false);
} }
srdi(tmp2, len, log_stepping); // #iterations for mainLoop srdi(tmp2, len, log_stepping); // #iterations for mainLoop
@ -4253,9 +4248,11 @@ void MacroAssembler::kernel_crc32_2word(Register crc, Register buf, Register len
// Process last few (<complexThreshold) bytes of buffer. // Process last few (<complexThreshold) bytes of buffer.
BIND(L_tail); BIND(L_tail);
update_byteLoop_crc32(crc, buf, len, table, data, false, false); update_byteLoop_crc32(crc, buf, len, table, data, false);
nand(crc, crc, crc); // ~c if (invertCRC) {
nand(crc, crc, crc); // 1s complement of crc
}
BLOCK_COMMENT("} kernel_crc32_2word"); BLOCK_COMMENT("} kernel_crc32_2word");
} }
@ -4269,7 +4266,8 @@ void MacroAssembler::kernel_crc32_2word(Register crc, Register buf, Register len
*/ */
void MacroAssembler::kernel_crc32_1word(Register crc, Register buf, Register len, Register table, void MacroAssembler::kernel_crc32_1word(Register crc, Register buf, Register len, Register table,
Register t0, Register t1, Register t2, Register t3, Register t0, Register t1, Register t2, Register t3,
Register tc0, Register tc1, Register tc2, Register tc3) { Register tc0, Register tc1, Register tc2, Register tc3,
bool invertCRC) {
assert_different_registers(crc, buf, len, table); assert_different_registers(crc, buf, len, table);
Label L_mainLoop, L_tail; Label L_mainLoop, L_tail;
@ -4283,14 +4281,16 @@ void MacroAssembler::kernel_crc32_1word(Register crc, Register buf, Register len
const int complexThreshold = 2*mainLoop_stepping; const int complexThreshold = 2*mainLoop_stepping;
// Don't test for len <= 0 here. This pathological case should not occur anyway. // Don't test for len <= 0 here. This pathological case should not occur anyway.
// Optimizing for it by adding a test and a branch seems to be a waste of CPU cycles. // Optimizing for it by adding a test and a branch seems to be a waste of CPU cycles
// The situation itself is detected and handled correctly by the conditional branches // for all well-behaved cases. The situation itself is detected and handled correctly
// following aghi(len, -stepping) and aghi(len, +stepping). // within update_byteLoop_crc32.
assert(tailLoop_stepping == 1, "check tailLoop_stepping!"); assert(tailLoop_stepping == 1, "check tailLoop_stepping!");
BLOCK_COMMENT("kernel_crc32_1word {"); BLOCK_COMMENT("kernel_crc32_1word {");
nand(crc, crc, crc); // ~c if (invertCRC) {
nand(crc, crc, crc); // 1s complement of crc
}
// Check for short (<mainLoop_stepping) buffer. // Check for short (<mainLoop_stepping) buffer.
cmpdi(CCR0, len, complexThreshold); cmpdi(CCR0, len, complexThreshold);
@ -4311,7 +4311,7 @@ void MacroAssembler::kernel_crc32_1word(Register crc, Register buf, Register len
blt(CCR0, L_tail); // For less than one mainloop_stepping left, do only tail processing blt(CCR0, L_tail); // For less than one mainloop_stepping left, do only tail processing
mr(len, tmp); // remaining bytes for main loop (>=mainLoop_stepping is guaranteed). mr(len, tmp); // remaining bytes for main loop (>=mainLoop_stepping is guaranteed).
} }
update_byteLoop_crc32(crc, buf, tmp2, table, data, false, false); update_byteLoop_crc32(crc, buf, tmp2, table, data, false);
} }
srdi(tmp2, len, log_stepping); // #iterations for mainLoop srdi(tmp2, len, log_stepping); // #iterations for mainLoop
@ -4346,9 +4346,11 @@ void MacroAssembler::kernel_crc32_1word(Register crc, Register buf, Register len
// Process last few (<complexThreshold) bytes of buffer. // Process last few (<complexThreshold) bytes of buffer.
BIND(L_tail); BIND(L_tail);
update_byteLoop_crc32(crc, buf, len, table, data, false, false); update_byteLoop_crc32(crc, buf, len, table, data, false);
nand(crc, crc, crc); // ~c if (invertCRC) {
nand(crc, crc, crc); // 1s complement of crc
}
BLOCK_COMMENT("} kernel_crc32_1word"); BLOCK_COMMENT("} kernel_crc32_1word");
} }
@ -4361,16 +4363,24 @@ void MacroAssembler::kernel_crc32_1word(Register crc, Register buf, Register len
* Uses R7_ARG5, R8_ARG6 as work registers. * Uses R7_ARG5, R8_ARG6 as work registers.
*/ */
void MacroAssembler::kernel_crc32_1byte(Register crc, Register buf, Register len, Register table, void MacroAssembler::kernel_crc32_1byte(Register crc, Register buf, Register len, Register table,
Register t0, Register t1, Register t2, Register t3) { Register t0, Register t1, Register t2, Register t3,
bool invertCRC) {
assert_different_registers(crc, buf, len, table); assert_different_registers(crc, buf, len, table);
Register data = t0; // Holds the current byte to be folded into crc. Register data = t0; // Holds the current byte to be folded into crc.
BLOCK_COMMENT("kernel_crc32_1byte {"); BLOCK_COMMENT("kernel_crc32_1byte {");
// Process all bytes in a single-byte loop. if (invertCRC) {
update_byteLoop_crc32(crc, buf, len, table, data, true, true); nand(crc, crc, crc); // 1s complement of crc
}
// Process all bytes in a single-byte loop.
update_byteLoop_crc32(crc, buf, len, table, data, true);
if (invertCRC) {
nand(crc, crc, crc); // 1s complement of crc
}
BLOCK_COMMENT("} kernel_crc32_1byte"); BLOCK_COMMENT("} kernel_crc32_1byte");
} }
@ -4388,7 +4398,8 @@ void MacroAssembler::kernel_crc32_1byte(Register crc, Register buf, Register len
*/ */
void MacroAssembler::kernel_crc32_1word_vpmsumd(Register crc, Register buf, Register len, Register table, void MacroAssembler::kernel_crc32_1word_vpmsumd(Register crc, Register buf, Register len, Register table,
Register constants, Register barretConstants, Register constants, Register barretConstants,
Register t0, Register t1, Register t2, Register t3, Register t4) { Register t0, Register t1, Register t2, Register t3, Register t4,
bool invertCRC) {
assert_different_registers(crc, buf, len, table); assert_different_registers(crc, buf, len, table);
Label L_alignedHead, L_tail, L_alignTail, L_start, L_end; Label L_alignedHead, L_tail, L_alignTail, L_start, L_end;
@ -4406,13 +4417,15 @@ void MacroAssembler::kernel_crc32_1word_vpmsumd(Register crc, Register buf, Regi
Register tc0 = t4; Register tc0 = t4;
Register tc1 = constants; Register tc1 = constants;
Register tc2 = barretConstants; Register tc2 = barretConstants;
kernel_crc32_1word(crc, buf, len, table,t0, t1, t2, t3, tc0, tc1, tc2, table); kernel_crc32_1word(crc, buf, len, table,t0, t1, t2, t3, tc0, tc1, tc2, table, invertCRC);
b(L_end); b(L_end);
BIND(L_start); BIND(L_start);
// 2. ~c // 2. ~c
nand(crc, crc, crc); if (invertCRC) {
nand(crc, crc, crc); // 1s complement of crc
}
// 3. calculate from 0 to first 128bit-aligned address // 3. calculate from 0 to first 128bit-aligned address
clrldi_(prealign, buf, 57); clrldi_(prealign, buf, 57);
@ -4421,7 +4434,7 @@ void MacroAssembler::kernel_crc32_1word_vpmsumd(Register crc, Register buf, Regi
subfic(prealign, prealign, 128); subfic(prealign, prealign, 128);
subf(len, prealign, len); subf(len, prealign, len);
update_byteLoop_crc32(crc, buf, prealign, table, t2, false, false); update_byteLoop_crc32(crc, buf, prealign, table, t2, false);
// 4. calculate from first 128bit-aligned address to last 128bit-aligned address // 4. calculate from first 128bit-aligned address to last 128bit-aligned address
BIND(L_alignedHead); BIND(L_alignedHead);
@ -4436,12 +4449,14 @@ void MacroAssembler::kernel_crc32_1word_vpmsumd(Register crc, Register buf, Regi
cmpdi(CCR0, postalign, 0); cmpdi(CCR0, postalign, 0);
beq(CCR0, L_tail); beq(CCR0, L_tail);
update_byteLoop_crc32(crc, buf, postalign, table, t2, false, false); update_byteLoop_crc32(crc, buf, postalign, table, t2, false);
BIND(L_tail); BIND(L_tail);
// 6. ~c // 6. ~c
nand(crc, crc, crc); if (invertCRC) {
nand(crc, crc, crc); // 1s complement of crc
}
BIND(L_end); BIND(L_end);
@ -4933,16 +4948,35 @@ void MacroAssembler::kernel_crc32_1word_aligned(Register crc, Register buf, Regi
offsetInt -= 8; ld(R31, offsetInt, R1_SP); offsetInt -= 8; ld(R31, offsetInt, R1_SP);
} }
void MacroAssembler::kernel_crc32_singleByte(Register crc, Register buf, Register len, Register table, Register tmp) { void MacroAssembler::kernel_crc32_singleByte(Register crc, Register buf, Register len, Register table, Register tmp, bool invertCRC) {
assert_different_registers(crc, buf, /* len, not used!! */ table, tmp); assert_different_registers(crc, buf, /* len, not used!! */ table, tmp);
BLOCK_COMMENT("kernel_crc32_singleByte:"); BLOCK_COMMENT("kernel_crc32_singleByte:");
nand(crc, crc, crc); // ~c if (invertCRC) {
nand(crc, crc, crc); // 1s complement of crc
}
lbz(tmp, 0, buf); // Byte from buffer, zero-extended. lbz(tmp, 0, buf); // Byte from buffer, zero-extended.
update_byte_crc32(crc, tmp, table); update_byte_crc32(crc, tmp, table);
nand(crc, crc, crc); // ~c if (invertCRC) {
nand(crc, crc, crc); // 1s complement of crc
}
}
void MacroAssembler::kernel_crc32_singleByteReg(Register crc, Register val, Register table, bool invertCRC) {
assert_different_registers(crc, val, table);
BLOCK_COMMENT("kernel_crc32_singleByteReg:");
if (invertCRC) {
nand(crc, crc, crc); // 1s complement of crc
}
update_byte_crc32(crc, val, table);
if (invertCRC) {
nand(crc, crc, crc); // 1s complement of crc
}
} }
// dest_lo += src1 + src2 // dest_lo += src1 + src2

View File

@ -1,6 +1,6 @@
/* /*
* Copyright (c) 2002, 2016, Oracle and/or its affiliates. All rights reserved. * Copyright (c) 2002, 2017, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2012, 2016 SAP SE. All rights reserved. * Copyright (c) 2012, 2017, SAP SE. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
* *
* This code is free software; you can redistribute it and/or modify it * This code is free software; you can redistribute it and/or modify it
@ -817,33 +817,47 @@ class MacroAssembler: public Assembler {
Register tmp6, Register tmp7, Register tmp8, Register tmp9, Register tmp10, Register tmp6, Register tmp7, Register tmp8, Register tmp9, Register tmp10,
Register tmp11, Register tmp12, Register tmp13); Register tmp11, Register tmp12, Register tmp13);
// CRC32 Intrinsics. // Emitters for CRC32 calculation.
// A note on invertCRC:
// Unfortunately, internal representation of crc differs between CRC32 and CRC32C.
// CRC32 holds it's current crc value in the externally visible representation.
// CRC32C holds it's current crc value in internal format, ready for updating.
// Thus, the crc value must be bit-flipped before updating it in the CRC32 case.
// In the CRC32C case, it must be bit-flipped when it is given to the outside world (getValue()).
// The bool invertCRC parameter indicates whether bit-flipping is required before updates.
void load_reverse_32(Register dst, Register src); void load_reverse_32(Register dst, Register src);
int crc32_table_columns(Register table, Register tc0, Register tc1, Register tc2, Register tc3); int crc32_table_columns(Register table, Register tc0, Register tc1, Register tc2, Register tc3);
void fold_byte_crc32(Register crc, Register val, Register table, Register tmp); void fold_byte_crc32(Register crc, Register val, Register table, Register tmp);
void fold_8bit_crc32(Register crc, Register table, Register tmp); void fold_8bit_crc32(Register crc, Register table, Register tmp);
void update_byte_crc32(Register crc, Register val, Register table); void update_byte_crc32(Register crc, Register val, Register table);
void update_byteLoop_crc32(Register crc, Register buf, Register len, Register table, void update_byteLoop_crc32(Register crc, Register buf, Register len, Register table,
Register data, bool loopAlignment, bool invertCRC); Register data, bool loopAlignment);
void update_1word_crc32(Register crc, Register buf, Register table, int bufDisp, int bufInc, void update_1word_crc32(Register crc, Register buf, Register table, int bufDisp, int bufInc,
Register t0, Register t1, Register t2, Register t3, Register t0, Register t1, Register t2, Register t3,
Register tc0, Register tc1, Register tc2, Register tc3); Register tc0, Register tc1, Register tc2, Register tc3);
void kernel_crc32_2word(Register crc, Register buf, Register len, Register table, void kernel_crc32_2word(Register crc, Register buf, Register len, Register table,
Register t0, Register t1, Register t2, Register t3, Register t0, Register t1, Register t2, Register t3,
Register tc0, Register tc1, Register tc2, Register tc3); Register tc0, Register tc1, Register tc2, Register tc3,
bool invertCRC);
void kernel_crc32_1word(Register crc, Register buf, Register len, Register table, void kernel_crc32_1word(Register crc, Register buf, Register len, Register table,
Register t0, Register t1, Register t2, Register t3, Register t0, Register t1, Register t2, Register t3,
Register tc0, Register tc1, Register tc2, Register tc3); Register tc0, Register tc1, Register tc2, Register tc3,
bool invertCRC);
void kernel_crc32_1byte(Register crc, Register buf, Register len, Register table, void kernel_crc32_1byte(Register crc, Register buf, Register len, Register table,
Register t0, Register t1, Register t2, Register t3); Register t0, Register t1, Register t2, Register t3,
bool invertCRC);
void kernel_crc32_1word_vpmsumd(Register crc, Register buf, Register len, Register table, void kernel_crc32_1word_vpmsumd(Register crc, Register buf, Register len, Register table,
Register constants, Register barretConstants, Register constants, Register barretConstants,
Register t0, Register t1, Register t2, Register t3, Register t4); Register t0, Register t1, Register t2, Register t3, Register t4,
bool invertCRC);
void kernel_crc32_1word_aligned(Register crc, Register buf, Register len, void kernel_crc32_1word_aligned(Register crc, Register buf, Register len,
Register constants, Register barretConstants, Register constants, Register barretConstants,
Register t0, Register t1, Register t2); Register t0, Register t1, Register t2);
void kernel_crc32_singleByte(Register crc, Register buf, Register len, Register table, Register tmp); void kernel_crc32_singleByte(Register crc, Register buf, Register len, Register table, Register tmp,
bool invertCRC);
void kernel_crc32_singleByteReg(Register crc, Register val, Register table,
bool invertCRC);
// //
// Debugging // Debugging

View File

@ -1,6 +1,6 @@
/* /*
* Copyright (c) 1997, 2016, Oracle and/or its affiliates. All rights reserved. * Copyright (c) 1997, 2017, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2012, 2016 SAP SE. All rights reserved. * Copyright (c) 2012, 2017, SAP SE. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
* *
* This code is free software; you can redistribute it and/or modify it * This code is free software; you can redistribute it and/or modify it
@ -3276,6 +3276,36 @@ class StubGenerator: public StubCodeGenerator {
return start; return start;
} }
// Compute CRC32/CRC32C function.
void generate_CRC_updateBytes(const char* name, Register table, bool invertCRC) {
// arguments to kernel_crc32:
const Register crc = R3_ARG1; // Current checksum, preset by caller or result from previous call.
const Register data = R4_ARG2; // source byte array
const Register dataLen = R5_ARG3; // #bytes to process
const Register t0 = R2;
const Register t1 = R7;
const Register t2 = R8;
const Register t3 = R9;
const Register tc0 = R10;
const Register tc1 = R11;
const Register tc2 = R12;
BLOCK_COMMENT("Stub body {");
assert_different_registers(crc, data, dataLen, table);
__ kernel_crc32_1word(crc, data, dataLen, table, t0, t1, t2, t3, tc0, tc1, tc2, table, invertCRC);
BLOCK_COMMENT("return");
__ mr_if_needed(R3_RET, crc); // Updated crc is function result. No copying required (R3_ARG1 == R3_RET).
__ blr();
BLOCK_COMMENT("} Stub body");
}
/** /**
* Arguments: * Arguments:
* *
@ -3296,14 +3326,14 @@ class StubGenerator: public StubCodeGenerator {
StubCodeMark mark(this, "StubRoutines", name); StubCodeMark mark(this, "StubRoutines", name);
address start = __ function_entry(); // Remember stub start address (is rtn value). address start = __ function_entry(); // Remember stub start address (is rtn value).
const Register table = R6; // crc table address
#ifdef VM_LITTLE_ENDIAN
// arguments to kernel_crc32: // arguments to kernel_crc32:
const Register crc = R3_ARG1; // Current checksum, preset by caller or result from previous call. const Register crc = R3_ARG1; // Current checksum, preset by caller or result from previous call.
const Register data = R4_ARG2; // source byte array const Register data = R4_ARG2; // source byte array
const Register dataLen = R5_ARG3; // #bytes to process const Register dataLen = R5_ARG3; // #bytes to process
const Register table = R6; // crc table address
#ifdef VM_LITTLE_ENDIAN
if (VM_Version::has_vpmsumb()) { if (VM_Version::has_vpmsumb()) {
const Register constants = R2; // constants address const Register constants = R2; // constants address
const Register bconstants = R8; // barret table address const Register bconstants = R8; // barret table address
@ -3321,7 +3351,7 @@ class StubGenerator: public StubCodeGenerator {
StubRoutines::ppc64::generate_load_crc_constants_addr(_masm, constants); StubRoutines::ppc64::generate_load_crc_constants_addr(_masm, constants);
StubRoutines::ppc64::generate_load_crc_barret_constants_addr(_masm, bconstants); StubRoutines::ppc64::generate_load_crc_barret_constants_addr(_masm, bconstants);
__ kernel_crc32_1word_vpmsumd(crc, data, dataLen, table, constants, bconstants, t0, t1, t2, t3, t4); __ kernel_crc32_1word_vpmsumd(crc, data, dataLen, table, constants, bconstants, t0, t1, t2, t3, t4, true);
BLOCK_COMMENT("return"); BLOCK_COMMENT("return");
__ mr_if_needed(R3_RET, crc); // Updated crc is function result. No copying required (R3_ARG1 == R3_RET). __ mr_if_needed(R3_RET, crc); // Updated crc is function result. No copying required (R3_ARG1 == R3_RET).
@ -3331,31 +3361,79 @@ class StubGenerator: public StubCodeGenerator {
} else } else
#endif #endif
{ {
const Register t0 = R2; StubRoutines::ppc64::generate_load_crc_table_addr(_masm, table);
const Register t1 = R7; generate_CRC_updateBytes(name, table, true);
const Register t2 = R8; }
const Register t3 = R9;
const Register tc0 = R10; return start;
const Register tc1 = R11; }
const Register tc2 = R12;
/**
* Arguments:
*
* Inputs:
* R3_ARG1 - int crc
* R4_ARG2 - byte* buf
* R5_ARG3 - int length (of buffer)
*
* scratch:
* R2, R6-R12
*
* Ouput:
* R3_RET - int crc result
*/
// Compute CRC32C function.
address generate_CRC32C_updateBytes(const char* name) {
__ align(CodeEntryAlignment);
StubCodeMark mark(this, "StubRoutines", name);
address start = __ function_entry(); // Remember stub start address (is rtn value).
const Register table = R6; // crc table address
#if 0 // no vector support yet for CRC32C
#ifdef VM_LITTLE_ENDIAN
// arguments to kernel_crc32:
const Register crc = R3_ARG1; // Current checksum, preset by caller or result from previous call.
const Register data = R4_ARG2; // source byte array
const Register dataLen = R5_ARG3; // #bytes to process
if (VM_Version::has_vpmsumb()) {
const Register constants = R2; // constants address
const Register bconstants = R8; // barret table address
const Register t0 = R9;
const Register t1 = R10;
const Register t2 = R11;
const Register t3 = R12;
const Register t4 = R7;
BLOCK_COMMENT("Stub body {"); BLOCK_COMMENT("Stub body {");
assert_different_registers(crc, data, dataLen, table); assert_different_registers(crc, data, dataLen, table);
StubRoutines::ppc64::generate_load_crc_table_addr(_masm, table); StubRoutines::ppc64::generate_load_crc32c_table_addr(_masm, table);
StubRoutines::ppc64::generate_load_crc32c_constants_addr(_masm, constants);
StubRoutines::ppc64::generate_load_crc32c_barret_constants_addr(_masm, bconstants);
__ kernel_crc32_1word(crc, data, dataLen, table, t0, t1, t2, t3, tc0, tc1, tc2, table); __ kernel_crc32_1word_vpmsumd(crc, data, dataLen, table, constants, bconstants, t0, t1, t2, t3, t4, false);
BLOCK_COMMENT("return"); BLOCK_COMMENT("return");
__ mr_if_needed(R3_RET, crc); // Updated crc is function result. No copying required (R3_ARG1 == R3_RET). __ mr_if_needed(R3_RET, crc); // Updated crc is function result. No copying required (R3_ARG1 == R3_RET).
__ blr(); __ blr();
BLOCK_COMMENT("} Stub body"); BLOCK_COMMENT("} Stub body");
} else
#endif
#endif
{
StubRoutines::ppc64::generate_load_crc32c_table_addr(_masm, table);
generate_CRC_updateBytes(name, table, false);
} }
return start; return start;
} }
// Initialization // Initialization
void generate_initial() { void generate_initial() {
// Generates all stubs and initializes the entry points // Generates all stubs and initializes the entry points
@ -3383,6 +3461,12 @@ class StubGenerator: public StubCodeGenerator {
StubRoutines::_crc_table_adr = (address)StubRoutines::ppc64::_crc_table; StubRoutines::_crc_table_adr = (address)StubRoutines::ppc64::_crc_table;
StubRoutines::_updateBytesCRC32 = generate_CRC32_updateBytes("CRC32_updateBytes"); StubRoutines::_updateBytesCRC32 = generate_CRC32_updateBytes("CRC32_updateBytes");
} }
// CRC32C Intrinsics.
if (UseCRC32CIntrinsics) {
StubRoutines::_crc32c_table_addr = (address)StubRoutines::ppc64::_crc32c_table;
StubRoutines::_updateBytesCRC32C = generate_CRC32C_updateBytes("CRC32C_updateBytes");
}
} }
void generate_all() { void generate_all() {

View File

@ -1,6 +1,6 @@
/* /*
* Copyright (c) 2002, 2016, Oracle and/or its affiliates. All rights reserved. * Copyright (c) 2002, 2017, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2012, 2016 SAP SE. All rights reserved. * Copyright (c) 2012, 2017, SAP SE. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
* *
* This code is free software; you can redistribute it and/or modify it * This code is free software; you can redistribute it and/or modify it
@ -55,13 +55,16 @@ class ppc64 {
// CRC32 Intrinsics. // CRC32 Intrinsics.
static juint _crc_table[CRC32_TABLES][CRC32_COLUMN_SIZE]; static juint _crc_table[CRC32_TABLES][CRC32_COLUMN_SIZE];
static juint _crc32c_table[CRC32_TABLES][CRC32_COLUMN_SIZE];
static juint* _constants; static juint* _constants;
static juint* _barret_constants; static juint* _barret_constants;
public: public:
// CRC32 Intrinsics. // CRC32 Intrinsics.
static void generate_load_table_addr(MacroAssembler* masm, Register table, address table_addr, uint64_t table_contents);
static void generate_load_crc_table_addr(MacroAssembler* masm, Register table); static void generate_load_crc_table_addr(MacroAssembler* masm, Register table);
static void generate_load_crc32c_table_addr(MacroAssembler* masm, Register table);
static void generate_load_crc_constants_addr(MacroAssembler* masm, Register table); static void generate_load_crc_constants_addr(MacroAssembler* masm, Register table);
static void generate_load_crc_barret_constants_addr(MacroAssembler* masm, Register table); static void generate_load_crc_barret_constants_addr(MacroAssembler* masm, Register table);
static juint* generate_crc_constants(); static juint* generate_crc_constants();

File diff suppressed because it is too large Load Diff

View File

@ -1,6 +1,6 @@
/* /*
* Copyright (c) 2014, 2017, Oracle and/or its affiliates. All rights reserved. * Copyright (c) 2014, 2017, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2015, 2016 SAP SE. All rights reserved. * Copyright (c) 2015, 2017, SAP SE. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
* *
* This code is free software; you can redistribute it and/or modify it * This code is free software; you can redistribute it and/or modify it
@ -1895,7 +1895,7 @@ address TemplateInterpreterGenerator::generate_CRC32_update_entry() {
__ lwz(crc, 2*wordSize, argP); // Current crc state, zero extend to 64 bit to have a clean register. __ lwz(crc, 2*wordSize, argP); // Current crc state, zero extend to 64 bit to have a clean register.
StubRoutines::ppc64::generate_load_crc_table_addr(_masm, table); StubRoutines::ppc64::generate_load_crc_table_addr(_masm, table);
__ kernel_crc32_singleByte(crc, data, dataLen, table, tmp); __ kernel_crc32_singleByte(crc, data, dataLen, table, tmp, true);
// Restore caller sp for c2i case and return. // Restore caller sp for c2i case and return.
__ mr(R1_SP, R21_sender_SP); // Cut the stack back to where the caller started. __ mr(R1_SP, R21_sender_SP); // Cut the stack back to where the caller started.
@ -1911,6 +1911,10 @@ address TemplateInterpreterGenerator::generate_CRC32_update_entry() {
return NULL; return NULL;
} }
// TODO: generate_CRC32_updateBytes_entry and generate_CRC32C_updateBytes_entry are identical
// except for using different crc tables and some block comment strings.
// We should provide a common implementation.
// CRC32 Intrinsics. // CRC32 Intrinsics.
/** /**
* Method entry for static native methods: * Method entry for static native methods:
@ -1987,7 +1991,7 @@ address TemplateInterpreterGenerator::generate_CRC32_updateBytes_entry(AbstractI
// Performance measurements show the 1word and 2word variants to be almost equivalent, // Performance measurements show the 1word and 2word variants to be almost equivalent,
// with very light advantages for the 1word variant. We chose the 1word variant for // with very light advantages for the 1word variant. We chose the 1word variant for
// code compactness. // code compactness.
__ kernel_crc32_1word(crc, data, dataLen, table, t0, t1, t2, t3, tc0, tc1, tc2, tc3); __ kernel_crc32_1word(crc, data, dataLen, table, t0, t1, t2, t3, tc0, tc1, tc2, tc3, true);
// Restore caller sp for c2i case and return. // Restore caller sp for c2i case and return.
__ mr(R1_SP, R21_sender_SP); // Cut the stack back to where the caller started. __ mr(R1_SP, R21_sender_SP); // Cut the stack back to where the caller started.
@ -2003,8 +2007,84 @@ address TemplateInterpreterGenerator::generate_CRC32_updateBytes_entry(AbstractI
return NULL; return NULL;
} }
// Not supported // CRC32C Intrinsics.
/**
* Method entry for static native methods:
* int java.util.zip.CRC32C.updateBytes( int crc, byte[] b, int off, int len)
* int java.util.zip.CRC32C.updateDirectByteBuffer(int crc, long* buf, int off, int len)
**/
address TemplateInterpreterGenerator::generate_CRC32C_updateBytes_entry(AbstractInterpreter::MethodKind kind) { address TemplateInterpreterGenerator::generate_CRC32C_updateBytes_entry(AbstractInterpreter::MethodKind kind) {
if (UseCRC32CIntrinsics) {
address start = __ pc(); // Remember stub start address (is rtn value).
// We don't generate local frame and don't align stack because
// we not even call stub code (we generate the code inline)
// and there is no safepoint on this path.
// Load parameters.
// Z_esp is callers operand stack pointer, i.e. it points to the parameters.
const Register argP = R15_esp;
const Register crc = R3_ARG1; // crc value
const Register data = R4_ARG2; // address of java byte array
const Register dataLen = R5_ARG3; // source data len
const Register table = R6_ARG4; // address of crc32c table
const Register t0 = R9; // scratch registers for crc calculation
const Register t1 = R10;
const Register t2 = R11;
const Register t3 = R12;
const Register tc0 = R2; // registers to hold pre-calculated column addresses
const Register tc1 = R7;
const Register tc2 = R8;
const Register tc3 = table; // table address is reconstructed at the end of kernel_crc32_* emitters
const Register tmp = t0; // Only used very locally to calculate byte buffer address.
// Arguments are reversed on java expression stack.
// Calculate address of start element.
if (kind == Interpreter::java_util_zip_CRC32C_updateDirectByteBuffer) { // Used for "updateDirectByteBuffer".
BLOCK_COMMENT("CRC32C_updateDirectByteBuffer {");
// crc @ (SP + 5W) (32bit)
// buf @ (SP + 3W) (64bit ptr to long array)
// off @ (SP + 2W) (32bit)
// dataLen @ (SP + 1W) (32bit)
// data = buf + off
__ ld( data, 3*wordSize, argP); // start of byte buffer
__ lwa( tmp, 2*wordSize, argP); // byte buffer offset
__ lwa( dataLen, 1*wordSize, argP); // #bytes to process
__ lwz( crc, 5*wordSize, argP); // current crc state
__ add( data, data, tmp); // Add byte buffer offset.
} else { // Used for "updateBytes update".
BLOCK_COMMENT("CRC32C_updateBytes {");
// crc @ (SP + 4W) (32bit)
// buf @ (SP + 3W) (64bit ptr to byte array)
// off @ (SP + 2W) (32bit)
// dataLen @ (SP + 1W) (32bit)
// data = buf + off + base_offset
__ ld( data, 3*wordSize, argP); // start of byte buffer
__ lwa( tmp, 2*wordSize, argP); // byte buffer offset
__ lwa( dataLen, 1*wordSize, argP); // #bytes to process
__ add( data, data, tmp); // add byte buffer offset
__ lwz( crc, 4*wordSize, argP); // current crc state
__ addi(data, data, arrayOopDesc::base_offset_in_bytes(T_BYTE));
}
StubRoutines::ppc64::generate_load_crc32c_table_addr(_masm, table);
// Performance measurements show the 1word and 2word variants to be almost equivalent,
// with very light advantages for the 1word variant. We chose the 1word variant for
// code compactness.
__ kernel_crc32_1word(crc, data, dataLen, table, t0, t1, t2, t3, tc0, tc1, tc2, tc3, false);
// Restore caller sp for c2i case and return.
__ mr(R1_SP, R21_sender_SP); // Cut the stack back to where the caller started.
__ blr();
BLOCK_COMMENT("} CRC32C_update{Bytes|DirectByteBuffer}");
return start;
}
return NULL; return NULL;
} }

View File

@ -1,6 +1,6 @@
/* /*
* Copyright (c) 1997, 2016, Oracle and/or its affiliates. All rights reserved. * Copyright (c) 1997, 2017, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2012, 2016 SAP SE. All rights reserved. * Copyright (c) 2012, 2017, SAP SE. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
* *
* This code is free software; you can redistribute it and/or modify it * This code is free software; you can redistribute it and/or modify it
@ -172,18 +172,27 @@ void VM_Version::initialize() {
assert(AllocatePrefetchStyle >= 0, "AllocatePrefetchStyle should be positive"); assert(AllocatePrefetchStyle >= 0, "AllocatePrefetchStyle should be positive");
// Implementation does not use any of the vector instructions // If defined(VM_LITTLE_ENDIAN) and running on Power8 or newer hardware,
// available with Power8. Their exploitation is still pending. // the implementation uses the vector instructions available with Power8.
// In all other cases, the implementation uses only generally available instructions.
if (!UseCRC32Intrinsics) { if (!UseCRC32Intrinsics) {
if (FLAG_IS_DEFAULT(UseCRC32Intrinsics)) { if (FLAG_IS_DEFAULT(UseCRC32Intrinsics)) {
FLAG_SET_DEFAULT(UseCRC32Intrinsics, true); FLAG_SET_DEFAULT(UseCRC32Intrinsics, true);
} }
} }
if (UseCRC32CIntrinsics) { // Implementation does not use any of the vector instructions available with Power8.
if (!FLAG_IS_DEFAULT(UseCRC32CIntrinsics)) // Their exploitation is still pending (aka "work in progress").
warning("CRC32C intrinsics are not available on this CPU"); if (!UseCRC32CIntrinsics) {
FLAG_SET_DEFAULT(UseCRC32CIntrinsics, false); if (FLAG_IS_DEFAULT(UseCRC32CIntrinsics)) {
FLAG_SET_DEFAULT(UseCRC32CIntrinsics, true);
}
}
// TODO: Provide implementation.
if (UseAdler32Intrinsics) {
warning("Adler32Intrinsics not available on this CPU.");
FLAG_SET_DEFAULT(UseAdler32Intrinsics, false);
} }
// The AES intrinsic stubs require AES instruction support. // The AES intrinsic stubs require AES instruction support.
@ -245,11 +254,6 @@ void VM_Version::initialize() {
FLAG_SET_DEFAULT(UseSHA512Intrinsics, false); FLAG_SET_DEFAULT(UseSHA512Intrinsics, false);
} }
if (UseAdler32Intrinsics) {
warning("Adler32Intrinsics not available on this CPU.");
FLAG_SET_DEFAULT(UseAdler32Intrinsics, false);
}
if (FLAG_IS_DEFAULT(UseMultiplyToLenIntrinsic)) { if (FLAG_IS_DEFAULT(UseMultiplyToLenIntrinsic)) {
UseMultiplyToLenIntrinsic = true; UseMultiplyToLenIntrinsic = true;
} }

View File

@ -212,7 +212,7 @@ bool Compiler::is_intrinsic_supported(const methodHandle& method) {
case vmIntrinsics::_updateCRC32: case vmIntrinsics::_updateCRC32:
case vmIntrinsics::_updateBytesCRC32: case vmIntrinsics::_updateBytesCRC32:
case vmIntrinsics::_updateByteBufferCRC32: case vmIntrinsics::_updateByteBufferCRC32:
#if defined(SPARC) || defined(S390) #if defined(SPARC) || defined(S390) || defined(PPC64)
case vmIntrinsics::_updateBytesCRC32C: case vmIntrinsics::_updateBytesCRC32C:
case vmIntrinsics::_updateDirectByteBufferCRC32C: case vmIntrinsics::_updateDirectByteBufferCRC32C:
#endif #endif