8214451: PPC64/s390: Clean up unused CRC32 prototype and function
Reviewed-by: mdoerr, lucy
This commit is contained in:
parent
d92085431b
commit
d6b70fa2ab
@ -4013,105 +4013,6 @@ void MacroAssembler::update_1word_crc32(Register crc, Register buf, Register tab
|
||||
xorr(crc, t0, t2); // Now crc contains the final checksum value.
|
||||
}
|
||||
|
||||
/**
|
||||
* @param crc register containing existing CRC (32-bit)
|
||||
* @param buf register pointing to input byte buffer (byte*)
|
||||
* @param len register containing number of bytes
|
||||
* @param table register pointing to CRC table
|
||||
*
|
||||
* Uses R9..R12 as work register. Must be saved/restored by caller!
|
||||
*/
|
||||
void MacroAssembler::kernel_crc32_2word(Register crc, Register buf, Register len, Register table,
|
||||
Register t0, Register t1, Register t2, Register t3,
|
||||
Register tc0, Register tc1, Register tc2, Register tc3,
|
||||
bool invertCRC) {
|
||||
assert_different_registers(crc, buf, len, table);
|
||||
|
||||
Label L_mainLoop, L_tail;
|
||||
Register tmp = t0;
|
||||
Register data = t0;
|
||||
Register tmp2 = t1;
|
||||
const int mainLoop_stepping = 8;
|
||||
const int tailLoop_stepping = 1;
|
||||
const int log_stepping = exact_log2(mainLoop_stepping);
|
||||
const int mainLoop_alignment = 32; // InputForNewCode > 4 ? InputForNewCode : 32;
|
||||
const int complexThreshold = 2*mainLoop_stepping;
|
||||
|
||||
// Don't test for len <= 0 here. This pathological case should not occur anyway.
|
||||
// Optimizing for it by adding a test and a branch seems to be a waste of CPU cycles
|
||||
// for all well-behaved cases. The situation itself is detected and handled correctly
|
||||
// within update_byteLoop_crc32.
|
||||
assert(tailLoop_stepping == 1, "check tailLoop_stepping!");
|
||||
|
||||
BLOCK_COMMENT("kernel_crc32_2word {");
|
||||
|
||||
if (invertCRC) {
|
||||
nand(crc, crc, crc); // 1s complement of crc
|
||||
}
|
||||
|
||||
// Check for short (<mainLoop_stepping) buffer.
|
||||
cmpdi(CCR0, len, complexThreshold);
|
||||
blt(CCR0, L_tail);
|
||||
|
||||
// Pre-mainLoop alignment did show a slight (1%) positive effect on performance.
|
||||
// We leave the code in for reference. Maybe we need alignment when we exploit vector instructions.
|
||||
{
|
||||
// Align buf addr to mainLoop_stepping boundary.
|
||||
neg(tmp2, buf); // Calculate # preLoop iterations for alignment.
|
||||
rldicl(tmp2, tmp2, 0, 64-log_stepping); // Rotate tmp2 0 bits, insert into tmp2, anding with mask with 1s from 62..63.
|
||||
|
||||
if (complexThreshold > mainLoop_stepping) {
|
||||
sub(len, len, tmp2); // Remaining bytes for main loop (>=mainLoop_stepping is guaranteed).
|
||||
} else {
|
||||
sub(tmp, len, tmp2); // Remaining bytes for main loop.
|
||||
cmpdi(CCR0, tmp, mainLoop_stepping);
|
||||
blt(CCR0, L_tail); // For less than one mainloop_stepping left, do only tail processing
|
||||
mr(len, tmp); // remaining bytes for main loop (>=mainLoop_stepping is guaranteed).
|
||||
}
|
||||
update_byteLoop_crc32(crc, buf, tmp2, table, data, false);
|
||||
}
|
||||
|
||||
srdi(tmp2, len, log_stepping); // #iterations for mainLoop
|
||||
andi(len, len, mainLoop_stepping-1); // remaining bytes for tailLoop
|
||||
mtctr(tmp2);
|
||||
|
||||
#ifdef VM_LITTLE_ENDIAN
|
||||
Register crc_rv = crc;
|
||||
#else
|
||||
Register crc_rv = tmp; // Load_reverse needs separate registers to work on.
|
||||
// Occupies tmp, but frees up crc.
|
||||
load_reverse_32(crc_rv, crc); // Revert byte order because we are dealing with big-endian data.
|
||||
tmp = crc;
|
||||
#endif
|
||||
|
||||
int reconstructTableOffset = crc32_table_columns(table, tc0, tc1, tc2, tc3);
|
||||
|
||||
align(mainLoop_alignment); // Octoword-aligned loop address. Shows 2% improvement.
|
||||
BIND(L_mainLoop);
|
||||
update_1word_crc32(crc_rv, buf, table, 0, 0, crc_rv, t1, t2, t3, tc0, tc1, tc2, tc3);
|
||||
update_1word_crc32(crc_rv, buf, table, 4, mainLoop_stepping, crc_rv, t1, t2, t3, tc0, tc1, tc2, tc3);
|
||||
bdnz(L_mainLoop);
|
||||
|
||||
#ifndef VM_LITTLE_ENDIAN
|
||||
load_reverse_32(crc, crc_rv); // Revert byte order because we are dealing with big-endian data.
|
||||
tmp = crc_rv; // Tmp uses it's original register again.
|
||||
#endif
|
||||
|
||||
// Restore original table address for tailLoop.
|
||||
if (reconstructTableOffset != 0) {
|
||||
addi(table, table, -reconstructTableOffset);
|
||||
}
|
||||
|
||||
// Process last few (<complexThreshold) bytes of buffer.
|
||||
BIND(L_tail);
|
||||
update_byteLoop_crc32(crc, buf, len, table, data, false);
|
||||
|
||||
if (invertCRC) {
|
||||
nand(crc, crc, crc); // 1s complement of crc
|
||||
}
|
||||
BLOCK_COMMENT("} kernel_crc32_2word");
|
||||
}
|
||||
|
||||
/**
|
||||
* @param crc register containing existing CRC (32-bit)
|
||||
* @param buf register pointing to input byte buffer (byte*)
|
||||
|
@ -835,10 +835,6 @@ class MacroAssembler: public Assembler {
|
||||
void update_1word_crc32(Register crc, Register buf, Register table, int bufDisp, int bufInc,
|
||||
Register t0, Register t1, Register t2, Register t3,
|
||||
Register tc0, Register tc1, Register tc2, Register tc3);
|
||||
void kernel_crc32_2word(Register crc, Register buf, Register len, Register table,
|
||||
Register t0, Register t1, Register t2, Register t3,
|
||||
Register tc0, Register tc1, Register tc2, Register tc3,
|
||||
bool invertCRC);
|
||||
void kernel_crc32_1word(Register crc, Register buf, Register len, Register table,
|
||||
Register t0, Register t1, Register t2, Register t3,
|
||||
Register tc0, Register tc1, Register tc2, Register tc3,
|
||||
|
@ -62,7 +62,6 @@ class ppc64 {
|
||||
public:
|
||||
|
||||
// CRC32 Intrinsics.
|
||||
static void generate_load_table_addr(MacroAssembler* masm, Register table, address table_addr, uint64_t table_contents);
|
||||
static void generate_load_crc_table_addr(MacroAssembler* masm, Register table);
|
||||
static void generate_load_crc_constants_addr(MacroAssembler* masm, Register table);
|
||||
static void generate_load_crc_barret_constants_addr(MacroAssembler* masm, Register table);
|
||||
|
@ -6325,75 +6325,6 @@ void MacroAssembler::update_1word_crc32(Register crc, Register buf, Register tab
|
||||
lgr_if_needed(crc, t0);
|
||||
}
|
||||
|
||||
/**
|
||||
* @param crc register containing existing CRC (32-bit)
|
||||
* @param buf register pointing to input byte buffer (byte*)
|
||||
* @param len register containing number of bytes
|
||||
* @param table register pointing to CRC table
|
||||
*
|
||||
* uses Z_R10..Z_R13 as work register. Must be saved/restored by caller!
|
||||
*/
|
||||
void MacroAssembler::kernel_crc32_2word(Register crc, Register buf, Register len, Register table,
|
||||
Register t0, Register t1, Register t2, Register t3,
|
||||
bool invertCRC) {
|
||||
assert_different_registers(crc, buf, len, table);
|
||||
|
||||
Label L_mainLoop, L_tail;
|
||||
Register data = t0;
|
||||
Register ctr = Z_R0;
|
||||
const int mainLoop_stepping = 8;
|
||||
const int tailLoop_stepping = 1;
|
||||
const int log_stepping = exact_log2(mainLoop_stepping);
|
||||
|
||||
// Don't test for len <= 0 here. This pathological case should not occur anyway.
|
||||
// Optimizing for it by adding a test and a branch seems to be a waste of CPU cycles.
|
||||
// The situation itself is detected and handled correctly by the conditional branches
|
||||
// following aghi(len, -stepping) and aghi(len, +stepping).
|
||||
|
||||
if (invertCRC) {
|
||||
not_(crc, noreg, false); // 1s complement of crc
|
||||
}
|
||||
|
||||
#if 0
|
||||
{
|
||||
// Pre-mainLoop alignment did not show any positive effect on performance.
|
||||
// We leave the code in for reference. Maybe the vector instructions in z13 depend on alignment.
|
||||
|
||||
z_cghi(len, mainLoop_stepping); // Alignment is useless for short data streams.
|
||||
z_brnh(L_tail);
|
||||
|
||||
// Align buf to word (4-byte) boundary.
|
||||
z_lcr(ctr, buf);
|
||||
rotate_then_insert(ctr, ctr, 62, 63, 0, true); // TODO: should set cc
|
||||
z_sgfr(len, ctr); // Remaining len after alignment.
|
||||
|
||||
update_byteLoop_crc32(crc, buf, ctr, table, data);
|
||||
}
|
||||
#endif
|
||||
|
||||
// Check for short (<mainLoop_stepping bytes) buffer.
|
||||
z_srag(ctr, len, log_stepping);
|
||||
z_brnh(L_tail);
|
||||
|
||||
z_lrvr(crc, crc); // Revert byte order because we are dealing with big-endian data.
|
||||
rotate_then_insert(len, len, 64-log_stepping, 63, 0, true); // #bytes for tailLoop
|
||||
|
||||
BIND(L_mainLoop);
|
||||
update_1word_crc32(crc, buf, table, 0, 0, crc, t1, t2, t3);
|
||||
update_1word_crc32(crc, buf, table, 4, mainLoop_stepping, crc, t1, t2, t3);
|
||||
z_brct(ctr, L_mainLoop); // Iterate.
|
||||
|
||||
z_lrvr(crc, crc); // Revert byte order back to original.
|
||||
|
||||
// Process last few (<8) bytes of buffer.
|
||||
BIND(L_tail);
|
||||
update_byteLoop_crc32(crc, buf, len, table, data);
|
||||
|
||||
if (invertCRC) {
|
||||
not_(crc, noreg, false); // 1s complement of crc
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* @param crc register containing existing CRC (32-bit)
|
||||
* @param buf register pointing to input byte buffer (byte*)
|
||||
|
@ -1056,9 +1056,6 @@ class MacroAssembler: public Assembler {
|
||||
void kernel_crc32_1word(Register crc, Register buf, Register len, Register table,
|
||||
Register t0, Register t1, Register t2, Register t3,
|
||||
bool invertCRC);
|
||||
void kernel_crc32_2word(Register crc, Register buf, Register len, Register table,
|
||||
Register t0, Register t1, Register t2, Register t3,
|
||||
bool invertCRC);
|
||||
|
||||
// Emitters for BigInteger.multiplyToLen intrinsic
|
||||
// note: length of result array (zlen) is passed on the stack
|
||||
|
Loading…
Reference in New Issue
Block a user