8214451: PPC64/s390: Clean up unused CRC32 prototype and function

Reviewed-by: mdoerr, lucy
This commit is contained in:
Gustavo Romero 2018-11-28 13:16:54 -05:00
parent d92085431b
commit d6b70fa2ab
5 changed files with 0 additions and 176 deletions

View File

@ -4013,105 +4013,6 @@ void MacroAssembler::update_1word_crc32(Register crc, Register buf, Register tab
xorr(crc, t0, t2); // Now crc contains the final checksum value.
}
/**
* @param crc register containing existing CRC (32-bit)
* @param buf register pointing to input byte buffer (byte*)
* @param len register containing number of bytes
* @param table register pointing to CRC table
*
* Uses R9..R12 as work register. Must be saved/restored by caller!
*/
void MacroAssembler::kernel_crc32_2word(Register crc, Register buf, Register len, Register table,
Register t0, Register t1, Register t2, Register t3,
Register tc0, Register tc1, Register tc2, Register tc3,
bool invertCRC) {
assert_different_registers(crc, buf, len, table);
Label L_mainLoop, L_tail;
Register tmp = t0;
Register data = t0;
Register tmp2 = t1;
const int mainLoop_stepping = 8;
const int tailLoop_stepping = 1;
const int log_stepping = exact_log2(mainLoop_stepping);
const int mainLoop_alignment = 32; // InputForNewCode > 4 ? InputForNewCode : 32;
const int complexThreshold = 2*mainLoop_stepping;
// Don't test for len <= 0 here. This pathological case should not occur anyway.
// Optimizing for it by adding a test and a branch seems to be a waste of CPU cycles
// for all well-behaved cases. The situation itself is detected and handled correctly
// within update_byteLoop_crc32.
assert(tailLoop_stepping == 1, "check tailLoop_stepping!");
BLOCK_COMMENT("kernel_crc32_2word {");
if (invertCRC) {
nand(crc, crc, crc); // 1s complement of crc
}
// Check for short (<mainLoop_stepping) buffer.
cmpdi(CCR0, len, complexThreshold);
blt(CCR0, L_tail);
// Pre-mainLoop alignment did show a slight (1%) positive effect on performance.
// We leave the code in for reference. Maybe we need alignment when we exploit vector instructions.
{
// Align buf addr to mainLoop_stepping boundary.
neg(tmp2, buf); // Calculate # preLoop iterations for alignment.
rldicl(tmp2, tmp2, 0, 64-log_stepping); // Rotate tmp2 0 bits, insert into tmp2, anding with mask with 1s from 62..63.
if (complexThreshold > mainLoop_stepping) {
sub(len, len, tmp2); // Remaining bytes for main loop (>=mainLoop_stepping is guaranteed).
} else {
sub(tmp, len, tmp2); // Remaining bytes for main loop.
cmpdi(CCR0, tmp, mainLoop_stepping);
blt(CCR0, L_tail); // For less than one mainloop_stepping left, do only tail processing
mr(len, tmp); // remaining bytes for main loop (>=mainLoop_stepping is guaranteed).
}
update_byteLoop_crc32(crc, buf, tmp2, table, data, false);
}
srdi(tmp2, len, log_stepping); // #iterations for mainLoop
andi(len, len, mainLoop_stepping-1); // remaining bytes for tailLoop
mtctr(tmp2);
#ifdef VM_LITTLE_ENDIAN
Register crc_rv = crc;
#else
Register crc_rv = tmp; // Load_reverse needs separate registers to work on.
// Occupies tmp, but frees up crc.
load_reverse_32(crc_rv, crc); // Revert byte order because we are dealing with big-endian data.
tmp = crc;
#endif
int reconstructTableOffset = crc32_table_columns(table, tc0, tc1, tc2, tc3);
align(mainLoop_alignment); // Octoword-aligned loop address. Shows 2% improvement.
BIND(L_mainLoop);
update_1word_crc32(crc_rv, buf, table, 0, 0, crc_rv, t1, t2, t3, tc0, tc1, tc2, tc3);
update_1word_crc32(crc_rv, buf, table, 4, mainLoop_stepping, crc_rv, t1, t2, t3, tc0, tc1, tc2, tc3);
bdnz(L_mainLoop);
#ifndef VM_LITTLE_ENDIAN
load_reverse_32(crc, crc_rv); // Revert byte order because we are dealing with big-endian data.
tmp = crc_rv; // Tmp uses it's original register again.
#endif
// Restore original table address for tailLoop.
if (reconstructTableOffset != 0) {
addi(table, table, -reconstructTableOffset);
}
// Process last few (<complexThreshold) bytes of buffer.
BIND(L_tail);
update_byteLoop_crc32(crc, buf, len, table, data, false);
if (invertCRC) {
nand(crc, crc, crc); // 1s complement of crc
}
BLOCK_COMMENT("} kernel_crc32_2word");
}
/**
* @param crc register containing existing CRC (32-bit)
* @param buf register pointing to input byte buffer (byte*)

View File

@ -835,10 +835,6 @@ class MacroAssembler: public Assembler {
void update_1word_crc32(Register crc, Register buf, Register table, int bufDisp, int bufInc,
Register t0, Register t1, Register t2, Register t3,
Register tc0, Register tc1, Register tc2, Register tc3);
void kernel_crc32_2word(Register crc, Register buf, Register len, Register table,
Register t0, Register t1, Register t2, Register t3,
Register tc0, Register tc1, Register tc2, Register tc3,
bool invertCRC);
void kernel_crc32_1word(Register crc, Register buf, Register len, Register table,
Register t0, Register t1, Register t2, Register t3,
Register tc0, Register tc1, Register tc2, Register tc3,

View File

@ -62,7 +62,6 @@ class ppc64 {
public:
// CRC32 Intrinsics.
static void generate_load_table_addr(MacroAssembler* masm, Register table, address table_addr, uint64_t table_contents);
static void generate_load_crc_table_addr(MacroAssembler* masm, Register table);
static void generate_load_crc_constants_addr(MacroAssembler* masm, Register table);
static void generate_load_crc_barret_constants_addr(MacroAssembler* masm, Register table);

View File

@ -6325,75 +6325,6 @@ void MacroAssembler::update_1word_crc32(Register crc, Register buf, Register tab
lgr_if_needed(crc, t0);
}
/**
* @param crc register containing existing CRC (32-bit)
* @param buf register pointing to input byte buffer (byte*)
* @param len register containing number of bytes
* @param table register pointing to CRC table
*
* uses Z_R10..Z_R13 as work register. Must be saved/restored by caller!
*/
void MacroAssembler::kernel_crc32_2word(Register crc, Register buf, Register len, Register table,
Register t0, Register t1, Register t2, Register t3,
bool invertCRC) {
assert_different_registers(crc, buf, len, table);
Label L_mainLoop, L_tail;
Register data = t0;
Register ctr = Z_R0;
const int mainLoop_stepping = 8;
const int tailLoop_stepping = 1;
const int log_stepping = exact_log2(mainLoop_stepping);
// Don't test for len <= 0 here. This pathological case should not occur anyway.
// Optimizing for it by adding a test and a branch seems to be a waste of CPU cycles.
// The situation itself is detected and handled correctly by the conditional branches
// following aghi(len, -stepping) and aghi(len, +stepping).
if (invertCRC) {
not_(crc, noreg, false); // 1s complement of crc
}
#if 0
{
// Pre-mainLoop alignment did not show any positive effect on performance.
// We leave the code in for reference. Maybe the vector instructions in z13 depend on alignment.
z_cghi(len, mainLoop_stepping); // Alignment is useless for short data streams.
z_brnh(L_tail);
// Align buf to word (4-byte) boundary.
z_lcr(ctr, buf);
rotate_then_insert(ctr, ctr, 62, 63, 0, true); // TODO: should set cc
z_sgfr(len, ctr); // Remaining len after alignment.
update_byteLoop_crc32(crc, buf, ctr, table, data);
}
#endif
// Check for short (<mainLoop_stepping bytes) buffer.
z_srag(ctr, len, log_stepping);
z_brnh(L_tail);
z_lrvr(crc, crc); // Revert byte order because we are dealing with big-endian data.
rotate_then_insert(len, len, 64-log_stepping, 63, 0, true); // #bytes for tailLoop
BIND(L_mainLoop);
update_1word_crc32(crc, buf, table, 0, 0, crc, t1, t2, t3);
update_1word_crc32(crc, buf, table, 4, mainLoop_stepping, crc, t1, t2, t3);
z_brct(ctr, L_mainLoop); // Iterate.
z_lrvr(crc, crc); // Revert byte order back to original.
// Process last few (<8) bytes of buffer.
BIND(L_tail);
update_byteLoop_crc32(crc, buf, len, table, data);
if (invertCRC) {
not_(crc, noreg, false); // 1s complement of crc
}
}
/**
* @param crc register containing existing CRC (32-bit)
* @param buf register pointing to input byte buffer (byte*)

View File

@ -1056,9 +1056,6 @@ class MacroAssembler: public Assembler {
void kernel_crc32_1word(Register crc, Register buf, Register len, Register table,
Register t0, Register t1, Register t2, Register t3,
bool invertCRC);
void kernel_crc32_2word(Register crc, Register buf, Register len, Register table,
Register t0, Register t1, Register t2, Register t3,
bool invertCRC);
// Emitters for BigInteger.multiplyToLen intrinsic
// note: length of result array (zlen) is passed on the stack